├── .github
└── workflows
│ └── on-push.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── LICENSE
├── README.rst
├── cospar
├── __init__.py
├── datasets.py
├── help_functions
│ ├── __init__.py
│ ├── _docs.py
│ └── _help_functions_CoSpar.py
├── hf.py
├── logging.py
├── pl.py
├── plotting
│ ├── __init__.py
│ ├── _clone.py
│ ├── _gene.py
│ ├── _map.py
│ └── _utils.py
├── pp.py
├── preprocessing
│ ├── __init__.py
│ └── _preprocessing.py
├── settings.py
├── simulate.py
├── tl.py
├── tmap
│ ├── __init__.py
│ ├── _tmap_core.py
│ ├── _utils.py
│ ├── map_reconstruction.py
│ └── optimal_transport.py
└── tool
│ ├── __init__.py
│ ├── _clone.py
│ ├── _gene.py
│ ├── _map.py
│ └── _utils.py
├── docs
├── Makefile
├── make.bat
├── requirements.txt
└── source
│ ├── .ipynb_checkpoints
│ ├── 20201121-Bifurcation_model_staticBC-CoSapr_basic-checkpoint.ipynb
│ ├── 20210120-Bifurcation_model_dynamic_barcoding-checkpoint.ipynb
│ ├── 20210120-Bifurcation_model_static_barcoding-checkpoint.ipynb
│ ├── 20210121_all_hematopoietic_data-checkpoint.ipynb
│ ├── 20210121_cospar_tutorial-checkpoint.ipynb
│ ├── 20210121_lung_data-checkpoint.ipynb
│ ├── 20210121_reprogramming_data_merge_tags-checkpoint.ipynb
│ ├── 20210121_reprogramming_data_no_merge_tags-checkpoint.ipynb
│ ├── 20210121_reprogramming_dynamic_barcoding-checkpoint.ipynb
│ └── 20210121_subsampled_hematopoietic_data-checkpoint.ipynb
│ ├── 20210120_bifurcation_model_static_barcoding.ipynb
│ ├── 20210121_all_hematopoietic_data_v3.ipynb
│ ├── 20210121_lung_data_v2.ipynb
│ ├── 20210121_reprogramming_static_barcoding_v2.ipynb
│ ├── 20210602_loading_data.ipynb
│ ├── 20211010_clonal_analysis.ipynb
│ ├── 20211010_map_analysis.ipynb
│ ├── 20211010_map_inference.ipynb
│ ├── 20211010_preprocessing.ipynb
│ ├── 20220402_simulate_differentiation.ipynb
│ ├── _ext
│ └── edit_on_github.py
│ ├── _static
│ ├── colab-badge.svg
│ ├── custom.css
│ └── nbviewer-badge.svg
│ ├── _templates
│ └── autosummary
│ │ ├── base.rst
│ │ └── class.rst
│ ├── about.rst
│ ├── api.rst
│ ├── conf.py
│ ├── getting_started.rst
│ ├── index.rst
│ ├── installation.rst
│ └── release_note.rst
├── environment.yml
├── pypi.rst
├── pyproject.toml
├── requirements.txt
├── setup.py
└── tests
├── __init__.py
├── context.py
├── data
├── cell_id.txt
├── clonal_data_in_table_format.txt
└── test_adata_preprocessed.h5ad
└── test_all.py
/.github/workflows/on-push.yml:
--------------------------------------------------------------------------------
1 | name: on-push
2 | on:
3 | push:
4 | branches:
5 | - master
6 | tags:
7 | - '*'
8 | pull_request:
9 | branches:
10 | - master
11 | workflow_dispatch:
12 |
13 | jobs:
14 | test-cospar:
15 | runs-on: ${{ matrix.os }}
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | os: ["ubuntu-latest"]
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: cache conda
23 | uses: actions/cache@v2
24 | env:
25 | CACHE_NUMBER: 1 # bump to reset cache
26 | with:
27 | path: ~/conda_pkgs_dir
28 | key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }}
29 | - uses: conda-incubator/setup-miniconda@v2
30 | with:
31 | miniconda-version: "latest"
32 | channels: conda-forge,bioconda,defaults
33 | auto-update-conda: false
34 | activate-environment: cospar
35 | environment-file: environment.yml
36 | use-only-tar-bz2: true
37 | mamba-version: "*"
38 | - name: Install cospar
39 | shell: bash -l {0}
40 | run: |
41 | python setup.py install
42 | - name: Run pre-commit
43 | uses: pre-commit/action@v2.0.0
44 | - name: Test cospar
45 | shell: bash -l {0}
46 | run: |
47 | pytest -v --cov=cospar --cov-report=term-missing
48 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 | cospar.egg-info/
4 | docs/build/
5 | docs/source/cospar.*
6 | /**/.DS_Store
7 | /**/*.py[cod]
8 | /**/__pycache__
9 | .eggs/
10 | tests/output/
11 | .coverage*
12 | .pytest_cache
13 | /**/.ipynb_checkpoints
14 | /**/__MACOSX/
15 | docs/source/*cospar*/
16 | docs/source/test_data*
17 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v3.2.0
4 | hooks:
5 | - id: trailing-whitespace
6 | - id: end-of-file-fixer
7 | - id: check-yaml
8 | - id: check-added-large-files
9 | args: ['--maxkb=10000']
10 | - repo: https://github.com/psf/black
11 | rev: 22.3.0
12 | hooks:
13 | - id: black
14 | language_version: python3
15 | - repo: https://github.com/timothycrosley/isort
16 | rev: 5.8.0
17 | hooks:
18 | - id: isort
19 | args: ["--profile", "black"]
20 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | build:
2 | image: latest
3 |
4 | # Build documentation in the docs/ directory with Sphinx
5 | sphinx:
6 | configuration: docs/source/conf.py
7 |
8 | python:
9 | version: 3.8
10 | install:
11 | - requirements: docs/requirements.txt
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright (c) 2020 Shou-Wen Wang
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 |
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 |
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | |PyPI| |PyPIDownloads| |Docs|
2 |
3 |
4 | CoSpar - dynamic inference by integrating state and lineage information
5 | =======================================================================
6 |
7 | 🔴 🔴 Notice: Package relocation 🔴 🔴
8 | -------------------------------------
9 | Effective on April 1st 2023, Shou-Wen Wang is leaving the Klein lab to start `his own group at Westlake University `_, and he will no longer maintain this repository. Further development of CoSpar will continue in his own lab under this respository `https://github.com/ShouWenWang-Lab/cospar `_. Please **reach out there for any issues related to CoSpar**.
10 |
11 | =======================================================================
12 |
13 | .. image:: https://user-images.githubusercontent.com/4595786/104988296-b987ce00-59e5-11eb-8dbe-a463b355a9fd.png
14 | :width: 300px
15 | :align: left
16 |
17 | **CoSpar** is a toolkit for dynamic inference from lineage-traced single cells.
18 | The methods are based on
19 | `Wang et al. Nat. Biotech. (2022) `_.
20 |
21 | Dynamic inference based on single-cell state measurement alone requires serious simplifications. On the other hand, direct dynamic measurement via lineage tracing only captures partial information and its interpretation is challenging. CoSpar integrates both state and lineage information to infer a finite-time transition map of a development/differentiation system. It gains superior robustness and accuracy by exploiting both the local coherence and sparsity of differentiation transitions, i.e., neighboring initial states share similar yet sparse fate outcomes. Building around the anndata_ object, CoSpar provides an integrated analysis framework for datasets with both state and lineage information. When only state information is available, CoSpar also improves upon existing dynamic inference methods by imposing sparsity and coherence. It offers essential toolkits for analyzing lineage data, state information, or their integration.
22 |
23 | See ``_ for documentation and tutorials.
24 |
25 | Recorded talks
26 | --------
27 | `Jun 1: Single-Cell Data Science 2022 `_. This is a 20-min short talk focusing more on the utility of CoSpar: `talk video `_
28 |
29 | `Oct 19, 2022: Invited MIA talk at Broad Institute `_. This is an one-hour talk focusing on the Machine Learning part of CoSpar: `talk video `_. The talk slides can be found `here `_.
30 |
31 | Reference
32 | ---------
33 | `S.-W. Wang*, M. Herriges, K. Hurley, D. Kotton, A. M. Klein*, CoSpar identifies early cell fate biases from single cell transcriptomic and lineage information, Nat. Biotech. (2022) `_. [* corresponding authors]
34 |
35 | Support
36 | -------
37 | Feel free to submit an `issue `_
38 | or send us an `email `_.
39 | Your help to improve CoSpar is highly appreciated.
40 |
41 |
42 |
43 | .. _anndata: https://anndata.readthedocs.io
44 |
45 | .. |PyPI| image:: https://img.shields.io/pypi/v/cospar.svg
46 | :target: https://pypi.org/project/cospar
47 |
48 | .. |PyPIDownloads| image:: https://pepy.tech/badge/cospar
49 | :target: https://pepy.tech/project/cospar
50 |
51 | .. |Docs| image:: https://readthedocs.org/projects/cospar/badge/?version=latest
52 | :target: https://cospar.readthedocs.io
53 |
--------------------------------------------------------------------------------
/cospar/__init__.py:
--------------------------------------------------------------------------------
1 | """CoSpar - dynamic inference by integrating transcriptome and lineage information"""
2 |
3 | __version__ = "0.3.0"
4 | from . import datasets, hf, logging, pl, pp, settings, simulate, tl, tmap
5 |
--------------------------------------------------------------------------------
/cospar/datasets.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path, PurePath
3 |
4 | from scanpy import read
5 |
6 | from . import logging as logg
7 | from . import settings
8 |
9 | url_prefix_0 = "https://kleintools.hms.harvard.edu/tools/downloads/cospar"
10 |
11 |
12 | def synthetic_bifurcation(data_des="bifur"):
13 | """
14 | Synthetic clonal dataset with static barcoding.
15 |
16 | We simulated a differentiation process over a bifurcation fork.
17 | Cells are barcoded in the beginning, and the barcodes remain unchanged.
18 | In the simulation, we resample clones over time,
19 | like the experimental design to obtain the hematopoietic dataset
20 | or the reprogramming dataset. The dataset has two time points.
21 |
22 | Parameters
23 | ----------
24 | data_des: `str`
25 | A key to label this dataset.
26 | """
27 |
28 | data_path = settings.data_path
29 | figure_path = settings.figure_path
30 | # data_name='bifurcation_static_BC_adata_preprocessed.h5ad'
31 | data_name = "bifur_adata_preprocessed.h5ad"
32 | return load_data_core(data_path, figure_path, data_name, data_des)
33 |
34 |
35 | def raw_data_for_import_exercise():
36 | """
37 | The test dataset for demonstrating how to import your own data
38 |
39 | """
40 |
41 | url = "https://github.com/ShouWenWang-Lab/cospar/files/12036732/test_data.zip"
42 | path = "test_data.zip"
43 | path = Path(path)
44 | _check_datafile_present_and_download(path, backup_url=url)
45 |
46 | import zipfile
47 |
48 | with zipfile.ZipFile(f"test_data.zip", "r") as zip_ref:
49 | zip_ref.extractall()
50 |
51 |
52 | # def synthetic_bifurcation_dynamic_BC(data_des='bifur_conBC'):
53 | # """
54 | # Synthetic clonal dataset with dynamic barcoding.
55 |
56 | # We simulated a differentiation process over a bifurcation fork.
57 | # Cells are barcoded, and the barcodes could accumulate mutations, which we call
58 | # `dynamic barcoding`. In the simulation, we resample clones over time,
59 | # like the experimental design to obtain the hematopoietic dataset
60 | # or the reprogramming dataset. The dataset has two time points.
61 |
62 | # Parameters
63 | # ----------
64 | # data_des: `str`
65 | # A key to label this dataset.
66 | # """
67 |
68 | # data_path=settings.data_path
69 | # figure_path=settings.figure_path
70 | # data_name='bifurcation_dynamic_BC_adata_preprocessed.h5ad'
71 | # return load_data_core(data_path,figure_path,data_name,data_des)
72 |
73 |
74 | def reprogramming(data_des="CellTagging"):
75 | """
76 | The reprogramming dataset from
77 |
78 | * Biddy, B. A. et al. `Single-cell mapping of lineage and identity in direct reprogramming`. Nature 564, 219–224 (2018).
79 |
80 | This dataset has multiple time points for both the clones and the state measurements.
81 | The cells are barcoded over 3 rounds during the entire differentiation process.
82 | There are multiple ways to assemble the barcodes on day 0, day 3, and day 13
83 | into a clonal ID. Below, we provide three variants:
84 |
85 | * Concatenate barcodes on day 0 and day 13, as in the original
86 | analysis (adata.obsm['X_clone_Concat_D0D3'], the default);
87 |
88 | * Concatenate barcodes on day 0, day 3, and day 13 (adata.obsm['X_clone_Concat_D0D3D13']);
89 |
90 | * No concatenation; each cell has up to 3 barcodes (adata.obsm['X_clone_NonConcat_D0D3D13']).
91 |
92 | The last choice keeps the nested clonal structure in the data.
93 | You can choose any one of the clonal arrangement for downstream analysis,
94 | by setting `adata_orig.obsm['X_clone']=adata_orig.obsm['X_clone_Concat_D0D3']`.
95 | The three clonal arrangements give very similar fate prediction.
96 |
97 | Parameters
98 | ----------
99 | data_des: `str`
100 | A key to label this dataset.
101 | """
102 |
103 | data_path = settings.data_path
104 | figure_path = settings.figure_path
105 | data_name = "CellTagging_adata_preprocessed.h5ad"
106 | return load_data_core(data_path, figure_path, data_name, data_des)
107 |
108 |
109 | def reprogramming_Day0_3_28(data_des="Reprog_128"):
110 | """
111 | The reprogramming dataset from
112 |
113 | * Biddy, B. A. et al. `Single-cell mapping of lineage and identity in direct reprogramming`. Nature 564, 219–224 (2018).
114 |
115 | This dataset has time points on day 0, day 3, and day 28. Only day 28 has clonal information.
116 | The cells are barcoded over 3 rounds during the entire differentiation process.
117 | There are multiple ways to assemble the barcodes on day 0, day 3, and day 13
118 | into a clonal ID. Below, we concatenate barcodes on day 0 and day 13,
119 | as in the original analysis.
120 |
121 |
122 | Parameters
123 | ----------
124 | data_des: `str`
125 | A key to label this dataset.
126 | """
127 |
128 | data_path = settings.data_path
129 | figure_path = settings.figure_path
130 | data_name = "Reprog_128_D0D3_adata_preprocessed.h5ad"
131 | return load_data_core(data_path, figure_path, data_name, data_des)
132 |
133 |
134 | # def reprogramming_static_BC(data_des='CellTagging'):
135 | # """
136 | # The reprogramming dataset from
137 |
138 | # * Biddy, B. A. et al. `Single-cell mapping of lineage and identity in direct reprogramming`. Nature 564, 219–224 (2018).
139 |
140 | # This dataset has multiple time points for both the clones and the state measurements.
141 |
142 | # The cells are barcoded over 3 rounds during the entire differentiation process.
143 | # We combine up to 3 tags from the same cell into a single clonal label in representing
144 | # the X_clone matrix. In this representation, each cell has at most one clonal label.
145 | # Effectively, we convert the barcodes into static labels that do not carry temporal information.
146 |
147 | # Parameters
148 | # ----------
149 | # data_des: `str`
150 | # A key to label this dataset.
151 | # """
152 |
153 | # data_path=settings.data_path
154 | # figure_path=settings.figure_path
155 | # data_name='CellTagging_ConcatenateClone_adata_preprocessed.h5ad'
156 | # return load_data_core(data_path,figure_path,data_name,data_des)
157 |
158 | # def reprogramming_dynamic_BC(data_des='CellTagging_NoConcat'):
159 | # """
160 | # The reprogramming dataset from
161 |
162 | # * Biddy, B. A. et al. `Single-cell mapping of lineage and identity in direct reprogramming`. Nature 564, 219–224 (2018).
163 |
164 | # This dataset has multiple time points for both the clones and the state measurements.
165 |
166 | # The cells are barcoded over 3 rounds during the entire differentiation process.
167 | # We treat barcode tags from each round as independent clonal label here. In this
168 | # representation, each cell can have multiple clonal labels at different time points.
169 |
170 | # Parameters
171 | # ----------
172 | # data_des: `str`
173 | # A key to label this dataset.
174 | # """
175 |
176 | # data_path=settings.data_path
177 | # figure_path=settings.figure_path
178 | # data_name='CellTagging_NoConcat_adata_preprocessed.h5ad'
179 | # return load_data_core(data_path,figure_path,data_name,data_des)
180 |
181 |
182 | def lung(data_des="Lung"):
183 | """
184 | The direct lung differentiation dataset from
185 |
186 | * Hurley, K. et al. Cell Stem Cell (2020) doi:10.1016/j.stem.2019.12.009.
187 |
188 | This dataset has multiple time points for the state manifold, but only one time point
189 | for the clonal observation on day 27.
190 |
191 | Parameters
192 | ----------
193 | data_des: `str`
194 | A key to label this dataset.
195 | """
196 |
197 | data_path = settings.data_path
198 | figure_path = settings.figure_path
199 | data_name = "Lung_pos17_21_D27_adata_preprocessed.h5ad"
200 | return load_data_core(data_path, figure_path, data_name, data_des)
201 |
202 |
203 | def hematopoiesis(data_des="LARRY"):
204 | """
205 | The hematopoiesis data set from
206 |
207 | * Weinreb, C., Rodriguez-Fraticelli, A., Camargo, F. D. & Klein, A. M. Science 367, (2020)
208 |
209 | This dataset has 3 time points for both the clonal and state measurements. It only contains cells with clonal
210 | labels, which has ~50000 cells. Running the whole pipeline for the first time could take several hours in
211 | a standard personal computer.
212 |
213 | Parameters
214 | ----------
215 | data_des: `str`
216 | A key to label this dataset.
217 | """
218 |
219 | data_path = settings.data_path
220 | figure_path = settings.figure_path
221 | data_name = "LARRY_adata_preprocessed.h5ad"
222 | return load_data_core(data_path, figure_path, data_name, data_des)
223 |
224 |
225 | def hematopoiesis_130K(data_des="LARRY"):
226 | """
227 | The hematopoiesis data set from
228 |
229 | * Weinreb, C., Rodriguez-Fraticelli, A., Camargo, F. D. & Klein, A. M. Science 367, (2020)
230 |
231 | This dataset has 3 time points for both the clonal and state measurements. It includes
232 | cells with or without clonal labels, which has a total of ~130K cells.
233 |
234 | Parameters
235 | ----------
236 | data_des: `str`
237 | A key to label this dataset.
238 | """
239 |
240 | data_path = settings.data_path
241 | figure_path = settings.figure_path
242 | data_name = "Complete_LARRY_dataset_adata_preprocessed.h5ad"
243 | return load_data_core(data_path, figure_path, data_name, data_des)
244 |
245 |
246 | def hematopoiesis_Gata1_states(data_des="LARRY_Gata1_lineage"):
247 | """
248 | All of the hematopoiesis data set from
249 |
250 | * Weinreb, C., Rodriguez-Fraticelli, A., Camargo, F. D. & Klein, A. M. Science 367, (2020)
251 |
252 | This dataset includes non-clonally-labeled states that express Gata1. In total, it has ~38K cells.
253 |
254 | Parameters
255 | ----------
256 | data_des: `str`
257 | A key to label this dataset.
258 | """
259 |
260 | data_path = settings.data_path
261 | figure_path = settings.figure_path
262 | data_name = "LARRY_Gata1_lineage_adata_preprocessed.h5ad"
263 | return load_data_core(data_path, figure_path, data_name, data_des)
264 |
265 |
266 | def hematopoiesis_subsampled(data_des="LARRY_sp500_ranking1"):
267 | """
268 | Top 15% most heterogeneous clones of the hematopoiesis data set from
269 |
270 | * Weinreb, C., Rodriguez-Fraticelli, A., Camargo, F. D. & Klein, A. M. Science 367, (2020)
271 |
272 | This dataset has 3 time points for both the clones and the state measurements.
273 | This sub-sampled data better illustrates the power of CoSpar in robstly
274 | inferring differentiation dynamics from a noisy clonal dataset. Also, it
275 | is smaller and thus much faster to analyze.
276 |
277 | Parameters
278 | ----------
279 | data_des: `str`
280 | A key to label this dataset.
281 | """
282 |
283 | data_path = settings.data_path
284 | figure_path = settings.figure_path
285 | data_name = "LARRY_sp500_ranking1_adata_preprocessed.h5ad"
286 | return load_data_core(data_path, figure_path, data_name, data_des)
287 |
288 |
289 | def load_data_core(
290 | data_path, figure_path, data_name, data_des, url_prefix=url_prefix_0
291 | ):
292 | url = f"{url_prefix}/{data_name}"
293 | path = os.path.join(data_path, data_name)
294 | path = Path(path)
295 | figure_path = Path(figure_path)
296 |
297 | if not path.parent.is_dir():
298 | logg.info(f"creating directory {path.parent} for saving data")
299 | path.parent.mkdir(parents=True)
300 |
301 | if not figure_path.is_dir():
302 | logg.info(f"creating directory {figure_path} for saving figures")
303 | figure_path.mkdir(parents=True)
304 |
305 | # print(url)
306 | status = _check_datafile_present_and_download(path, backup_url=url)
307 | if status:
308 | adata = read(path)
309 | # adata.uns['data_path']=[str(data_path)]
310 | # adata.uns['figure_path']=[str(figure_path)]
311 | adata.uns["data_des"] = [str(data_des)]
312 | return adata
313 | else:
314 | logg.error("Error, files do not exist")
315 | return None
316 |
317 |
318 | def _check_datafile_present_and_download(path, backup_url=None):
319 | """Check whether the file is present, otherwise download."""
320 | path = Path(path)
321 | if path.is_file():
322 | return True
323 | if backup_url is None:
324 | return False
325 | logg.info(
326 | f"try downloading from url\n{backup_url}\n"
327 | "... this may take a while but only happens once"
328 | )
329 | if not path.parent.is_dir():
330 | logg.info(f"creating directory {path.parent} for saving data")
331 | path.parent.mkdir(parents=True)
332 |
333 | _download(backup_url, path)
334 | return True
335 |
336 |
337 | def _download(url: str, path: Path):
338 | try:
339 | import ipywidgets
340 | from tqdm.auto import tqdm
341 | except ImportError:
342 | from tqdm import tqdm
343 |
344 | from urllib.request import Request, urlopen
345 |
346 | blocksize = 1024 * 8
347 | blocknum = 0
348 |
349 | try:
350 | with urlopen(Request(url, headers={"User-agent": "scanpy-user"})) as resp:
351 | total = resp.info().get("content-length", None)
352 | with tqdm(
353 | unit="B",
354 | unit_scale=True,
355 | miniters=1,
356 | unit_divisor=1024,
357 | total=total if total is None else int(total),
358 | ) as t, path.open("wb") as f:
359 | block = resp.read(blocksize)
360 | while block:
361 | f.write(block)
362 | blocknum += 1
363 | t.update(len(block))
364 | block = resp.read(blocksize)
365 |
366 | except (KeyboardInterrupt, Exception):
367 | # Make sure file doesn’t exist half-downloaded
368 | if path.is_file():
369 | path.unlink()
370 | raise
371 |
--------------------------------------------------------------------------------
/cospar/help_functions/__init__.py:
--------------------------------------------------------------------------------
1 | from ._help_functions_CoSpar import *
2 |
--------------------------------------------------------------------------------
/cospar/help_functions/_docs.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 |
3 |
4 | def _doc_params(**kwds):
5 | """\
6 | Docstrings should start with "\" in the first line for proper formatting.
7 | """
8 |
9 | def dec(obj):
10 | obj.__orig_doc__ = obj.__doc__
11 | obj.__doc__ = dedent(obj.__doc__).format_map(kwds)
12 | return obj
13 |
14 | return dec
15 |
16 |
17 | selected_fates = """\
18 | selected_fates: `list`
19 | List of cluster ids consistent with adata.obs['state_info'].
20 | It allows a nested structure. If so, we merge clusters within
21 | each sub-list into a mega-fate cluster.\
22 | """
23 |
24 | map_source = """\
25 | source: `str`
26 | The transition map to be used for plotting: {'transition_map',
27 | 'intraclone_transition_map',...}. The actual available
28 | map depends on adata itself, which can be accessed at adata.uns['available_map']\
29 | """
30 |
31 | map_backward = """\
32 | map_backward: `bool`, optional (default: True)
33 | If `map_backward=True`, show fate properties of initial cell states :math:`i`;
34 | otherwise, show progenitor properties of later cell states :math:`j`.
35 | This is used for building the fate map :math:`P_i(\mathcal{C})`. See :func:`.fate_map`.\
36 | """
37 |
38 | fate_method = """\
39 | method: `str`, optional (default: 'norm-sum')
40 | Method to obtain the fate probability map :math:`P_i(\mathcal{C})` towards a set
41 | of states annotated with fate :math:`\mathcal{C}`. Available options:
42 | {'sum', 'norm-sum'}. See :func:`.fate_map`.\
43 | """
44 |
45 | sum_fate_prob_thresh = """\
46 | sum_fate_prob_thresh: `float`, optional (default: 0.05)
47 | The fate bias of a state is plotted only when it has a cumulative fate
48 | probability to the combined cluster (A+B) larger than this threshold,
49 | i.e., P(i->A)+P(i+>B) > sum_fate_prob_thresh.\
50 | """
51 |
52 | selected_times = """\
53 | selected_times: `list`, optional (default: all)
54 | A list of time points to further restrict the cell states to plot.
55 | The default choice is not to constrain the cell states to show.\
56 | """
57 |
58 | all_source = """\
59 | source: `str`
60 | Choices: {'X_clone', 'transition_map',
61 | 'intraclone_transition_map',...}. If set to be 'clone', use only the clonal
62 | information. If set to be any of the precomputed transition map, use the
63 | transition map to compute the fate coupling. The actual available
64 | map depends on adata itself, which can be accessed at adata.uns['available_map']\
65 | """
66 |
67 |
68 | rename_fates = """\
69 | rename_fates: `list`, optional (default: None)
70 | Provide new names in substitution of names in selected_fates.
71 | For this to be effective, the new name list needs to have names
72 | in exact correspondence to those in the old list.\
73 | """
74 |
75 |
76 | background = """\
77 | background: `bool`, optional (default: True)
78 | If true, plot all cell states (t1+t2) in grey as the background.\
79 | """
80 |
81 | show_histogram = """\
82 | show_histogram: `bool`, optional (default: False)
83 | If true, show the distribution of inferred fate probability.\
84 | """
85 |
86 | plot_target_state = """\
87 | plot_target_state: `bool`, optional (default: True)
88 | If true, highlight the target clusters as defined in selected_fates.\
89 | """
90 |
91 | color_bar = """\
92 | color_bar: `bool`, optional (default: True)
93 | plot the color bar if True.\
94 | """
95 |
96 | auto_color_scale = """\
97 | auto_color_scale:
98 | True: automatically rescale the color range to match the value range.\
99 | """
100 |
101 | target_transparency = """\
102 | target_transparency: `float`, optional (default: 0.2)
103 | It controls the transparency of the plotted target cell states,
104 | for visual effect. Range: [0,1].\
105 | """
106 |
107 | figure_index = """\
108 | figure_index: `str`, optional (default: '')
109 | String index for annotate filename for saved figures. Used to distinuigh plots from different conditions.\
110 | """
111 |
112 | mask = """\
113 | mask: `np.array`, optional (default: None)
114 | A boolean array for available cell states. It should has the length as adata.shape[0].
115 | Especially useful to constrain the states to show fate bias.\
116 | """
117 |
118 | color_map = """\
119 | color_map:
120 | The color map (a matplotlib.pyplot.cm object) to visualize the result.\
121 | """
122 |
--------------------------------------------------------------------------------
/cospar/hf.py:
--------------------------------------------------------------------------------
1 | from .help_functions import *
2 |
--------------------------------------------------------------------------------
/cospar/logging.py:
--------------------------------------------------------------------------------
1 | """Logging and Profiling
2 | """
3 |
4 | from datetime import datetime
5 | from platform import python_version
6 | from sys import stdout
7 | from time import time as get_time
8 |
9 | from anndata.logging import get_memory_usage, print_memory_usage
10 |
11 | from . import settings
12 |
13 | _VERBOSITY_LEVELS_FROM_STRINGS = {"error": 0, "warn": 1, "info": 2, "hint": 3}
14 |
15 |
16 | def info(*args, **kwargs):
17 | return msg(*args, v="info", **kwargs)
18 |
19 |
20 | def error(*args, **kwargs):
21 | args = ("Error:",) + args
22 | return msg(*args, v="error", **kwargs)
23 |
24 |
25 | def warn(*args, **kwargs):
26 | args = ("WARNING:",) + args
27 | return msg(*args, v="warn", **kwargs)
28 |
29 |
30 | def hint(*args, **kwargs):
31 | return msg(*args, v="hint", **kwargs)
32 |
33 |
34 | def _settings_verbosity_greater_or_equal_than(v):
35 | if isinstance(settings.verbosity, str):
36 | settings_v = _VERBOSITY_LEVELS_FROM_STRINGS[settings.verbosity]
37 | else:
38 | settings_v = settings.verbosity
39 | return settings_v >= v
40 |
41 |
42 | def msg(
43 | *msg,
44 | v=None,
45 | time=False,
46 | memory=False,
47 | reset=False,
48 | end="\n",
49 | no_indent=False,
50 | t=None,
51 | m=None,
52 | r=None,
53 | ):
54 | """Write message to logging output.
55 | Log output defaults to standard output but can be set to a file
56 | by setting `sc.settings.log_file = 'mylogfile.txt'`.
57 | v : {'error', 'warn', 'info', 'hint'} or int, (default: 4)
58 | 0/'error', 1/'warn', 2/'info', 3/'hint', 4, 5, 6...
59 | time, t : bool, optional (default: False)
60 | Print timing information; restart the clock.
61 | memory, m : bool, optional (default: Faulse)
62 | Print memory information.
63 | reset, r : bool, optional (default: False)
64 | Reset timing and memory measurement. Is automatically reset
65 | when passing one of ``time`` or ``memory``.
66 | end : str (default: '\n')
67 | Same meaning as in builtin ``print()`` function.
68 | no_indent : bool (default: False)
69 | Do not indent for ``v >= 4``.
70 | """
71 | # variable shortcuts
72 | if t is not None:
73 | time = t
74 | if m is not None:
75 | memory = m
76 | if r is not None:
77 | reset = r
78 | if v is None:
79 | v = 4
80 | if isinstance(v, str):
81 | v = _VERBOSITY_LEVELS_FROM_STRINGS[v]
82 | if v == 3: # insert "--> " before hints
83 | msg = ("-->",) + msg
84 | if v >= 4 and not no_indent:
85 | msg = (" ",) + msg
86 | if _settings_verbosity_greater_or_equal_than(v):
87 | if not time and not memory and len(msg) > 0:
88 | _write_log(*msg, end=end)
89 | if reset:
90 | try:
91 | settings._previous_memory_usage, _ = get_memory_usage()
92 | except:
93 | pass
94 | settings._previous_time = get_time()
95 | if time:
96 | elapsed = get_passed_time()
97 | msg = msg + (f"({_sec_to_str(elapsed)})",)
98 | _write_log(*msg, end=end)
99 | if memory:
100 | _write_log(get_memory_usage(), end=end)
101 |
102 |
103 | m = msg
104 |
105 |
106 | def _write_log(*msg, end="\n"):
107 | """Write message to log output, ignoring the verbosity level.
108 | This is the most basic function.
109 | Parameters
110 | ----------
111 | *msg :
112 | One or more arguments to be formatted as string. Same behavior as print
113 | function.
114 | """
115 | from .settings import logfile
116 |
117 | if logfile == "":
118 | print(*msg, end=end)
119 | else:
120 | out = ""
121 | for s in msg:
122 | out += f"{s} "
123 | with open(logfile, "a") as f:
124 | f.write(out + end)
125 |
126 |
127 | def _sec_to_str(t, show_microseconds=False):
128 | """Format time in seconds.
129 | Parameters
130 | ----------
131 | t : int
132 | Time in seconds.
133 | """
134 | from functools import reduce
135 |
136 | t_str = "%d:%02d:%02d.%02d" % reduce(
137 | lambda ll, b: divmod(ll[0], b) + ll[1:], [(t * 100,), 100, 60, 60]
138 | )
139 | return t_str if show_microseconds else t_str[:-3]
140 |
141 |
142 | def get_passed_time():
143 | now = get_time()
144 | elapsed = now - settings._previous_time
145 | settings._previous_time = now
146 | return elapsed
147 |
148 |
149 | def print_passed_time():
150 | return _sec_to_str(get_passed_time())
151 |
152 |
153 | def timeout(func, args=(), timeout_duration=2, default=None, **kwargs):
154 | """This will spwan a thread and run the given function using the args, kwargs and
155 | return the given default value if the timeout_duration is exceeded
156 | """
157 | import threading
158 |
159 | class InterruptableThread(threading.Thread):
160 | def __init__(self):
161 | threading.Thread.__init__(self)
162 | self.result = default
163 |
164 | def run(self):
165 | try:
166 | self.result = func(*args, **kwargs)
167 | except:
168 | pass
169 |
170 | it = InterruptableThread()
171 | it.start()
172 | it.join(timeout_duration)
173 | return it.result
174 |
175 |
176 | def get_latest_pypi_version():
177 | from subprocess import CalledProcessError, check_output
178 |
179 | try: # needs to work offline as well
180 | result = check_output(["pip", "search", "cospar"])
181 | return f"{result.split()[-1]}"[2:-1]
182 | except CalledProcessError:
183 | return "0.0.0"
184 |
185 |
186 | def check_if_latest_version():
187 | from . import __version__
188 |
189 | latest_version = timeout(
190 | get_latest_pypi_version, timeout_duration=2, default="0.0.0"
191 | )
192 | if __version__.rsplit(".dev")[0] < latest_version.rsplit(".dev")[0]:
193 | warn(
194 | "There is a newer cospar version available on PyPI:\n",
195 | "Your version: \t\t",
196 | __version__,
197 | "\nLatest version: \t",
198 | latest_version,
199 | )
200 |
201 |
202 | def print_version():
203 | from . import __version__
204 |
205 | _write_log(
206 | f"Running cospar {__version__} "
207 | f"(python {python_version()}) on {get_date_string()}.",
208 | )
209 | check_if_latest_version()
210 |
211 |
212 | def print_versions():
213 | for mod in [
214 | "cospar",
215 | "scanpy",
216 | "anndata",
217 | "loompy",
218 | "numpy",
219 | "scipy",
220 | "matplotlib",
221 | "sklearn",
222 | "pandas",
223 | ]:
224 | mod_name = mod[0] if isinstance(mod, tuple) else mod
225 | mod_install = mod[1] if isinstance(mod, tuple) else mod
226 | try:
227 | mod_version = __import__(mod_name).__version__
228 | _write_log(f"{mod_install}=={mod_version}", end=" ")
229 | except (ImportError, AttributeError):
230 | pass
231 | _write_log("")
232 | check_if_latest_version()
233 |
234 |
235 | def get_date_string():
236 | return datetime.now().strftime("%Y-%m-%d %H:%M")
237 |
238 |
239 | def switch_verbosity(mode="on", module=None):
240 | if module is None:
241 | from . import settings
242 | elif module == "scanpy":
243 | from scanpy import settings
244 | else:
245 | exec(f"from {module} import settings")
246 |
247 | if mode == "on" and hasattr(settings, "tmp_verbosity"):
248 | settings.verbosity = settings.tmp_verbosity
249 | del settings.tmp_verbosity
250 |
251 | elif mode == "off":
252 | settings.tmp_verbosity = settings.verbosity
253 | settings.verbosity = 0
254 |
255 | elif not isinstance(mode, str):
256 | settings.tmp_verbosity = settings.verbosity
257 | settings.verbosity = mode
258 |
259 |
260 | class ProgressReporter:
261 | def __init__(self, total, interval=3):
262 | self.count = 0
263 | self.total = total
264 | self.timestamp = get_time()
265 | self.interval = interval
266 |
267 | def update(self):
268 | self.count += 1
269 | if settings.verbosity > 1 and (
270 | get_time() - self.timestamp > self.interval or self.count == self.total
271 | ):
272 | self.timestamp = get_time()
273 | percent = int(self.count * 100 / self.total)
274 | stdout.write(f"\r... {percent}%")
275 | stdout.flush()
276 |
277 | def finish(self):
278 | if settings.verbosity > 1:
279 | stdout.write("\r")
280 | stdout.flush()
281 |
282 |
283 | def profiler(command, filename="profile.stats", n_stats=10):
284 | """Profiler for a python program
285 |
286 | Runs cProfile and outputs ordered statistics that describe
287 | how often and for how long various parts of the program are executed.
288 |
289 | Stats can be visualized with `!snakeviz profile.stats`.
290 |
291 | Parameters
292 | ----------
293 | command: str
294 | Command string to be executed.
295 | filename: str
296 | Name under which to store the stats.
297 | n_stats: int or None
298 | Number of top stats to show.
299 | """
300 | import cProfile
301 | import pstats
302 |
303 | cProfile.run(command, filename)
304 | stats = pstats.Stats(filename).strip_dirs().sort_stats("time")
305 | return stats.print_stats(n_stats or {})
306 |
--------------------------------------------------------------------------------
/cospar/pl.py:
--------------------------------------------------------------------------------
1 | from .plotting import *
2 |
--------------------------------------------------------------------------------
/cospar/plotting/__init__.py:
--------------------------------------------------------------------------------
1 | from cospar.plotting._clone import *
2 | from cospar.plotting._gene import *
3 | from cospar.plotting._map import *
4 | from cospar.plotting._utils import *
5 |
--------------------------------------------------------------------------------
/cospar/plotting/_clone.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import scipy.sparse as ssp
6 | import scipy.stats as stats
7 | import seaborn as sns
8 | import statsmodels.sandbox.stats.multicomp
9 | from ete3 import Tree
10 | from matplotlib import pyplot as plt
11 | from numpy.lib.twodim_base import tril_indices
12 | from scipy.cluster import hierarchy
13 |
14 | # from plotnine import *
15 | from sklearn.manifold import SpectralEmbedding
16 |
17 | from cospar import tool as tl
18 | from cospar.plotting import _utils as pl_util
19 |
20 | from .. import help_functions as hf
21 | from .. import logging as logg
22 | from .. import settings
23 |
24 |
25 | def barcode_heatmap(
26 | adata,
27 | selected_times=None,
28 | selected_fates=None,
29 | color_bar=True,
30 | rename_fates=None,
31 | normalize=False,
32 | binarize=False,
33 | log_transform=False,
34 | fig_width=4,
35 | fig_height=6,
36 | figure_index="",
37 | plot=True,
38 | pseudocount=10 ** (-10),
39 | order_map_x=False,
40 | order_map_y=False,
41 | fate_normalize_source="X_clone",
42 | select_clones_with_fates: list = None,
43 | select_clones_without_fates: list = None,
44 | select_clones_mode: str = "or",
45 | **kwargs,
46 | ):
47 | """
48 | Plot barcode heatmap among different fate clusters.
49 |
50 | We clonal measurement at selected time points and show the
51 | corresponding heatmap among selected fate clusters.
52 |
53 | Parameters
54 | ----------
55 | adata: :class:`~anndata.AnnData` object
56 | selected_times: `list`, optional (default: None)
57 | Time points to select the cell states.
58 | selected_fates: `list`, optional (default: all)
59 | List of fate clusters to use. If set to be [], use all.
60 | color_bar: `bool`, optional (default: True)
61 | Plot color bar.
62 | rename_fates: `list`, optional (default: None)
63 | Provide new names in substitution of names in selected_fates.
64 | For this to be effective, the new name list needs to have names
65 | in exact correspondence to those in the old list.
66 | normalize:
67 | To perform cluster-wise then clone-wise normalization
68 | binarize: `bool`
69 | Binarize the coarse-grained barcode count matrix, just for the purpose of plotting.
70 | log_transform: `bool`, optional (default: False)
71 | If true, perform a log transform. This is needed when the data
72 | matrix has entries varying by several order of magnitude.
73 | fig_width: `float`, optional (default: 4)
74 | Figure width.
75 | fig_height: `float`, optional (default: 6)
76 | Figure height.
77 | plot: `bool`
78 | True: plot the result. False, suppress the plot.
79 | pseudocount: `float`
80 | Pseudocount for the heatmap (needed for ordering the map)
81 | order_map_x: `bool`
82 | Whether to re-order the x coordinate of the matrix or not
83 | order_map_y: `bool`
84 | Whether to re-order the y coordinate of the matrix or not
85 | fate_normalize_source:
86 | Source for cluster-wise normalization: {'X_clone','state_info'}. 'X_clone': directly row-normalize coarse_X_clone; 'state_info': compute each cluster size directly, and then normalize coarse_X_clone. The latter method is useful if we have single-cell resolution for each fate.
87 | select_clones_with_fates: list = None,
88 | Select clones that labels fates from this list.
89 | select_clones_without_fates: list = None,
90 | Exclude clones that labels fates from this list.
91 | select_clones_mode: str = {'or','and'}
92 | Logic rule for selection.
93 |
94 | Returns:
95 | --------
96 | The coarse-grained X_clone matrix and the selected clusters are returned at
97 | adata.uns['barcode_heatmap']. The coarse-grained X_clone keeps all clones and maintains their ordering.
98 | """
99 |
100 | data_des = adata.uns["data_des"][-1]
101 | data_des = f"{data_des}_clonal"
102 | figure_path = settings.figure_path
103 |
104 | coarse_X_clone, mega_cluster_list = tl.coarse_grain_clone_over_cell_clusters(
105 | adata,
106 | selected_times=selected_times,
107 | selected_fates=selected_fates,
108 | normalize=normalize,
109 | fate_normalize_source=fate_normalize_source,
110 | select_clones_with_fates=select_clones_with_fates,
111 | select_clones_without_fates=select_clones_without_fates,
112 | select_clones_mode=select_clones_mode,
113 | **kwargs,
114 | )
115 |
116 | if rename_fates is None:
117 | rename_fates = mega_cluster_list
118 |
119 | if len(rename_fates) != len(mega_cluster_list):
120 | logg.warn(
121 | "rename_fates does not have the same length as selected_fates, thus not used."
122 | )
123 | rename_fates = mega_cluster_list
124 |
125 | if "x_ticks" not in kwargs.keys():
126 | kwargs["x_ticks"] = rename_fates
127 |
128 | coarse_X_clone_new = pl_util.custom_hierachical_ordering(
129 | np.arange(coarse_X_clone.shape[0]), coarse_X_clone
130 | )
131 | adata.uns["barcode_heatmap"] = {
132 | "coarse_X_clone": coarse_X_clone,
133 | "fate_names": rename_fates,
134 | }
135 | logg.info("Data saved at adata.uns['barcode_heatmap']")
136 | if plot:
137 | if binarize:
138 | final_matrix = coarse_X_clone_new > 0
139 | color_bar_label = "Binarized barcode count"
140 | else:
141 | final_matrix = coarse_X_clone_new
142 | color_bar_label = "Barcode count"
143 |
144 | if normalize:
145 | color_bar_label += " (normalized)"
146 |
147 | clone_idx = final_matrix.sum(0) > 0
148 | ax = pl_util.heatmap(
149 | final_matrix[:, clone_idx].T + pseudocount,
150 | order_map_x=order_map_x,
151 | order_map_y=order_map_y,
152 | color_bar_label=color_bar_label,
153 | log_transform=log_transform,
154 | fig_width=fig_width,
155 | fig_height=fig_height,
156 | color_bar=color_bar,
157 | **kwargs,
158 | )
159 | plt.title(f"{np.sum(clone_idx)} clones")
160 |
161 | plt.tight_layout()
162 | if figure_index != "":
163 | figure_index == f"_{figure_index}"
164 | plt.savefig(
165 | os.path.join(
166 | figure_path,
167 | f"{data_des}_barcode_heatmap{figure_index}.{settings.file_format_figs}",
168 | )
169 | )
170 | return ax
171 |
172 |
173 | def clonal_fates_across_time(adata, selected_times, **kwargs):
174 | """
175 | Scatter plot for clonal fate number across time point
176 |
177 | Parameters
178 | ----------
179 | adata: :class:`~anndata.AnnData` object
180 | selected_times: `list`, optional (default: None)
181 | Time points to select the cell states.
182 |
183 | Returns
184 | -------
185 | Results updated at adata.uns["clonal_fates_across_time"]
186 | """
187 | if len(selected_times) != 2:
188 | raise ValueError("selected_times must be a list with two values")
189 | barcode_heatmap(
190 | adata,
191 | selected_times=selected_times[0],
192 | color_bar=True,
193 | log_transform=False,
194 | plot=False,
195 | )
196 | clonal_fates_t1 = (adata.uns["barcode_heatmap"]["coarse_X_clone"] > 0).sum(0)
197 | barcode_heatmap(
198 | adata,
199 | selected_times=selected_times[1],
200 | color_bar=True,
201 | log_transform=False,
202 | plot=False,
203 | )
204 | clonal_fates_t2 = (adata.uns["barcode_heatmap"]["coarse_X_clone"] > 0).sum(0)
205 |
206 | pl_util.jitter(clonal_fates_t1, clonal_fates_t2, **kwargs)
207 | plt.xlabel(f"Number of fates per clone (t={selected_times[0]})")
208 | plt.ylabel(f"Number of fates per clone (t={selected_times[1]})")
209 | data_des = adata.uns["data_des"][0]
210 | plt.savefig(
211 | os.path.join(
212 | settings.figure_path,
213 | f"{data_des}_barcode_coupling_across_time.{settings.file_format_figs}",
214 | )
215 | )
216 | adata.uns["clonal_fates_across_time"] = {
217 | "clonal_fates_t1": clonal_fates_t1,
218 | "clonal_fates_t2": clonal_fates_t2,
219 | }
220 | logg.info("Data saved at adata.uns['clonal_fates_across_time']")
221 |
222 |
223 | def clones_on_manifold(
224 | adata,
225 | selected_clone_list=[0],
226 | color_list=["red", "blue", "purple", "green", "cyan", "black"],
227 | selected_times=None,
228 | title=True,
229 | clone_markersize=12,
230 | clone_markeredgewidth=1,
231 | markeredgecolor="black",
232 | **kwargs,
233 | ):
234 | """
235 | Plot clones on top of state embedding.
236 |
237 | Parameters
238 | ----------
239 | adata: :class:`~anndata.AnnData` object
240 | selected_clone_list: `list`
241 | List of selected clone ID's.
242 | color_list: `list`, optional (default: ['red','blue','purple','green','cyan','black'])
243 | The list of color that defines color at respective time points.
244 | selected_times: `list`, optional (default: all)
245 | Select time points to show corresponding states. If set to be [], use all states.
246 | title: `bool`, optional (default: True)
247 | If ture, show the clone id as panel title.
248 | clone_markersize: `int`, optional (default: 12)
249 | Clone marker size
250 | clone_markeredgewidth: `int`, optional (default: 1)
251 | Edige size for clone marker
252 | """
253 |
254 | fig_width = settings.fig_width
255 | fig_height = settings.fig_height
256 | point_size = settings.fig_point_size
257 | x_emb = adata.obsm["X_emb"][:, 0]
258 | y_emb = adata.obsm["X_emb"][:, 1]
259 | data_des = adata.uns["data_des"][-1]
260 | # data_path=settings.data_path
261 | figure_path = settings.figure_path
262 | X_clone = adata.obsm["X_clone"]
263 | time_info = np.array(adata.obs["time_info"])
264 |
265 | # use only valid time points
266 | sp_idx = hf.selecting_cells_by_time_points(time_info, selected_times)
267 | selected_times = np.sort(list(set(time_info[sp_idx])))
268 |
269 | selected_clone_list = np.array(selected_clone_list)
270 | full_id_list = np.arange(X_clone.shape[1])
271 | valid_idx = np.in1d(full_id_list, selected_clone_list)
272 | if np.sum(valid_idx) < len(selected_clone_list):
273 | logg.error(
274 | f"Valid id range is (0,{X_clone.shape[1]-1}). Please use a smaller ID!"
275 | )
276 | selected_clone_list = full_id_list[valid_idx]
277 |
278 | if len(selected_clone_list) == 0:
279 | logg.error("No valid states selected.")
280 | else:
281 | # using all data
282 | for my_id in selected_clone_list:
283 | fig = plt.figure(figsize=(fig_width, fig_height))
284 | ax = plt.subplot(1, 1, 1)
285 | idx_t = np.zeros(len(time_info), dtype=bool)
286 | for j, xx in enumerate(selected_times):
287 | idx_t0 = time_info == selected_times[j]
288 | idx_t = idx_t0 | idx_t
289 |
290 | pl_util.customized_embedding(
291 | x_emb[idx_t],
292 | y_emb[idx_t],
293 | np.zeros(len(y_emb[idx_t])),
294 | ax=ax,
295 | point_size=point_size,
296 | )
297 | for j, xx in enumerate(selected_times):
298 | idx_t = time_info == selected_times[j]
299 | idx_clone = X_clone[:, my_id].A.flatten() > 0
300 | idx = idx_t & idx_clone
301 | ax.plot(
302 | x_emb[idx],
303 | y_emb[idx],
304 | ".",
305 | color=color_list[j % len(color_list)],
306 | markersize=clone_markersize,
307 | markeredgecolor=markeredgecolor,
308 | markeredgewidth=clone_markeredgewidth,
309 | **kwargs,
310 | )
311 |
312 | if title:
313 | ax.set_title(f"ID: {my_id}")
314 |
315 | fig.savefig(
316 | os.path.join(
317 | figure_path,
318 | f"{data_des}_different_clones_{my_id}.{settings.file_format_figs}",
319 | )
320 | )
321 |
322 |
323 | def clonal_fate_bias(adata, show_histogram=True, FDR=0.05):
324 | """
325 | Plot clonal fate bias towards a cluster.
326 |
327 | The results should be pre-computed from :func:`cospar.tl.clonal_fate_bias`
328 |
329 | Parameters
330 | ----------
331 | adata: :class:`~anndata.AnnData` object
332 | show_histogram: `bool`, optional (default: True)
333 | If true, show the distribution of inferred fate probability.
334 | FDR: `float`, optional (default: 0.05)
335 | False-discovery rate after the Benjamini-Hochberg correction.
336 | """
337 |
338 | if "clonal_fate_bias" not in adata.uns.keys():
339 | raise ValueError(
340 | "clonal_fate_bias has not been computed. Run cs.tl.clonal_fate_bias first"
341 | )
342 | else:
343 | df = adata.uns["clonal_fate_bias"]
344 | fate_bias = df["Fate_bias"]
345 | target_fraction_array = df["clonal_fraction_in_target_fate"]
346 |
347 | fig_width = settings.fig_width
348 | fig_height = settings.fig_height
349 | data_des = adata.uns["data_des"][-1]
350 | figure_path = settings.figure_path
351 | FDR_threshold = -np.log10(FDR)
352 |
353 | fig = plt.figure(figsize=(fig_width, fig_height))
354 | ax = plt.subplot(1, 1, 1)
355 | ax.plot(np.arange(len(fate_bias)), fate_bias, ".", color="blue", markersize=5)
356 | ax.plot(
357 | np.arange(len(fate_bias)),
358 | np.zeros(len(fate_bias)) + FDR_threshold,
359 | "-.",
360 | color="grey",
361 | markersize=5,
362 | label=f"FDR={FDR}",
363 | )
364 |
365 | ax.spines["top"].set_visible(False)
366 | ax.spines["right"].set_visible(False)
367 | # ax.set_ylabel('Fate bias ($-\\log_{10}P_{value}$)')
368 | ax.set_ylabel("Clonal fate bias")
369 | ax.set_xlabel("Clonal index")
370 | ax.legend()
371 | fig.tight_layout()
372 | fig.savefig(
373 | os.path.join(
374 | figure_path, f"{data_des}_clonal_fate_bias.{settings.file_format_figs}"
375 | )
376 | )
377 |
378 | if show_histogram:
379 | fig = plt.figure(figsize=(fig_width, fig_height))
380 | ax = plt.subplot(1, 1, 1)
381 | ax.hist(target_fraction_array, color="#2ca02c", density=True)
382 | ax.set_xlim([0, 1])
383 | ax.set_xlabel("Clonal fraction in selected fates")
384 | ax.set_ylabel("Density")
385 | ax.spines["top"].set_visible(False)
386 | ax.spines["right"].set_visible(False)
387 | ax.set_title(f"Average: {np.mean(target_fraction_array):.2f}")
388 | fig.tight_layout()
389 | fig.savefig(
390 | os.path.join(
391 | figure_path,
392 | f"{data_des}_observed_clonal_fraction.{settings.file_format_figs}",
393 | )
394 | )
395 |
396 |
397 | def clonal_reports(adata, selected_times=None, **kwargs):
398 | """
399 | Report the statistics of the clonal data.
400 |
401 | It includes the statistics for clone size , and the barcode number per cell.
402 | """
403 |
404 | time_info = np.array(adata.obs["time_info"])
405 | sp_idx = hf.selecting_cells_by_time_points(time_info, selected_times)
406 | adata_1 = adata[sp_idx]
407 | persistent_clone_ids = tl.identify_persistent_clones(adata_1)
408 | X_clone = adata_1.obsm["X_clone"]
409 | total_clone_N = X_clone.shape[1]
410 | print(
411 | f" Clones observed across selected times: {len(persistent_clone_ids)} (out of {total_clone_N} clones)"
412 | )
413 |
414 | for x in set(adata_1.obs["time_info"]):
415 | print(f"---------t={x}---------")
416 | adata_sp = adata_1[adata_1.obs["time_info"] == x]
417 | X_clone = adata_sp.obsm["X_clone"]
418 | clone_size = X_clone.sum(0).A.flatten()
419 | clonal_bc_number = X_clone.sum(1).A.flatten()
420 | clonal_cells_N = np.sum(clonal_bc_number > 0)
421 | total_N = X_clone.shape[0]
422 | total_clone_N = X_clone.shape[1]
423 | useful_clone_N = np.sum(clone_size > 0)
424 | print(f" Cells with barcode: {clonal_cells_N} (out of {total_N} cells)")
425 | print(
426 | f" Barcodes with cells: {useful_clone_N} (out of {total_clone_N} clones)"
427 | )
428 |
429 | fig, axs = plt.subplots(1, 2, figsize=(8, 3.5))
430 | ax = sns.histplot(clone_size[clone_size > 0], ax=axs[0], **kwargs)
431 | ax.set_xlabel("Clone size")
432 | ax.set_ylabel("Count")
433 |
434 | ax = sns.histplot(clonal_bc_number[clonal_bc_number > 0], ax=axs[1], **kwargs)
435 | ax.set_xlabel("Clonal barcode number per cell")
436 | ax.set_ylabel("Count")
437 | fig.suptitle(f"Time={x}")
438 |
--------------------------------------------------------------------------------
/cospar/pp.py:
--------------------------------------------------------------------------------
1 | from .preprocessing import *
2 |
--------------------------------------------------------------------------------
/cospar/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | from ._preprocessing import *
2 |
--------------------------------------------------------------------------------
/cospar/settings.py:
--------------------------------------------------------------------------------
1 | """Settings
2 | """
3 |
4 | verbosity = 3
5 | """Verbosity level (0=errors, 1=warnings, 2=info, 3=hints)
6 | """
7 |
8 | data_path = "data"
9 | """Directory where adata is stored (default 'data_cospar').
10 | """
11 |
12 | figure_path = "figure"
13 | """Directory where plots are saved (default 'figure_cospar').
14 | """
15 |
16 | file_format_figs = "pdf"
17 | """File format for saving figures.
18 | For example 'png', 'pdf' or 'svg'. Many other formats work as well (see
19 | `matplotlib.pyplot.savefig`).
20 | """
21 |
22 | fig_width = 4
23 | fig_height = 3.5
24 | fig_point_size = 2
25 |
26 |
27 | logfile = ""
28 | """Name of logfile. By default is set to '' and writes to standard output."""
29 |
30 | # --------------------------------------------------------------------------------
31 | # Functions
32 | # --------------------------------------------------------------------------------
33 |
34 | import warnings
35 |
36 | from cycler import cycler
37 | from matplotlib import cbook, cm, colors, rcParams
38 |
39 | # from cospar import help_functions as hf
40 | # from . import help_functions as hf
41 |
42 | warnings.filterwarnings("ignore", category=cbook.mplDeprecation)
43 |
44 |
45 | def set_rcParams_cospar(fontsize=12, color_map=None, frameon=None):
46 | """Set matplotlib.rcParams to cospar defaults."""
47 | # check here if you want to customize it: https://matplotlib.org/stable/tutorials/introductory/customizing.html
48 |
49 | # dpi options (mpl default: 100, 100)
50 | rcParams["figure.dpi"] = 100
51 | rcParams["savefig.dpi"] = 150
52 |
53 | # figure (mpl default: 0.125, 0.96, 0.15, 0.91)
54 | rcParams["figure.figsize"] = (6, 4)
55 | # rcParams["figure.subplot.left"] = 0.18
56 | # rcParams["figure.subplot.right"] = 0.96
57 | # rcParams["figure.subplot.bottom"] = 0.15
58 | # rcParams["figure.subplot.top"] = 0.91
59 |
60 | # lines (defaults: 1.5, 6, 1)
61 | rcParams["lines.linewidth"] = 1.5 # the line width of the frame
62 | rcParams["lines.markersize"] = 6
63 | rcParams["lines.markeredgewidth"] = 1
64 |
65 | # font
66 | rcParams["font.sans-serif"] = [
67 | "Arial",
68 | "Helvetica",
69 | "DejaVu Sans",
70 | "Bitstream Vera Sans",
71 | "sans-serif",
72 | ]
73 |
74 | fontsize = fontsize
75 | labelsize = 0.92 * fontsize
76 |
77 | # fonsizes (mpl default: 10, medium, large, medium)
78 | rcParams["font.size"] = fontsize
79 | rcParams["legend.fontsize"] = labelsize
80 | rcParams["axes.titlesize"] = fontsize
81 | rcParams["axes.labelsize"] = labelsize
82 |
83 | # legend (mpl default: 1, 1, 2, 0.8)
84 | rcParams["legend.numpoints"] = 1
85 | rcParams["legend.scatterpoints"] = 1
86 | rcParams["legend.handlelength"] = 0.5
87 | rcParams["legend.handletextpad"] = 0.4
88 | rcParams["pdf.fonttype"] = 42
89 |
90 | # color cycle
91 | # rcParams["axes.prop_cycle"] = cycler(color=vega_10)
92 |
93 | # axes
94 | rcParams["axes.linewidth"] = 0.8
95 | rcParams["axes.edgecolor"] = "black"
96 | rcParams["axes.facecolor"] = "white"
97 |
98 | # ticks (mpl default: k, k, medium, medium)
99 | rcParams["xtick.color"] = "k"
100 | rcParams["ytick.color"] = "k"
101 | rcParams["xtick.labelsize"] = labelsize
102 | rcParams["ytick.labelsize"] = labelsize
103 |
104 | # axes grid (mpl default: False, #b0b0b0)
105 | rcParams["axes.grid"] = False
106 | rcParams["grid.color"] = ".8"
107 |
108 | # color map
109 | rcParams["image.cmap"] = "Reds" if color_map is None else color_map
110 |
111 | # spines
112 | rcParams["axes.spines.right"] = False
113 | rcParams["axes.spines.top"] = False
114 |
115 | # frame (mpl default: True)
116 | frameon = False if frameon is None else frameon
117 | global _frameon
118 | _frameon = frameon
119 |
120 |
121 | # def set_up_plotting(fontsize):
122 | # """
123 | # Change matplotlib setting for beautiful plots.
124 | # """
125 |
126 | # plt.rc('font', family='sans-serif')
127 | # plt.rcParams['font.sans-serif']=['Helvetica']
128 | # plt.rc('xtick',labelsize=12) #14
129 | # plt.rc('ytick', labelsize=12) #14
130 | # #plt.rc('font', weight='bold')
131 | # plt.rc('font', weight='regular')
132 | # plt.rcParams.update({'font.size': fontsize}) #16
133 | # #plt.rcParams['axes.labelweight'] = 'bold'
134 | # plt.rcParams['axes.labelweight'] = 'regular'
135 | # #plt.rcParams['pdf.fonttype'] = 42 #make the figure editable, this comes with a heavy cost of file size
136 |
137 |
138 | def set_figure_params(
139 | style="cospar",
140 | dpi=100,
141 | dpi_save=300,
142 | frameon=None,
143 | vector_friendly=True,
144 | transparent=True,
145 | fontsize=14,
146 | figsize=None,
147 | pointsize=2,
148 | color_map=None,
149 | facecolor=None,
150 | format="pdf",
151 | ipython_format="png2x",
152 | ):
153 | """Set resolution/size, styling and format of figures.
154 |
155 | Arguments
156 | ---------
157 | style : `str` (default: `None`)
158 | Init default values for ``matplotlib.rcParams`` suited for `cospar`.
159 | Use `None` for the default matplotlib values.
160 |
161 | dpi : `int` (default: `None`)
162 | Resolution of rendered figures - affects the size of figures in notebooks.
163 | dpi_save : `int` (default: `None`)
164 | Resolution of saved figures. This should typically be higher to achieve
165 | publication quality.
166 | frameon : `bool` (default: `None`)
167 | Add frames and axes labels to scatter plots.
168 | vector_friendly : `bool` (default: `True`)
169 | Plot scatter plots using `png` backend even when exporting as `pdf` or `svg`.
170 | transparent : `bool` (default: `True`)
171 | Save figures with transparent back ground. Sets
172 | `rcParams['savefig.transparent']`.
173 | fontsize : `int` (default: 14)
174 | Set the fontsize for several `rcParams` entries.
175 | figsize: `[float, float]` (default: `None`)
176 | Width and height for default figure size.
177 | color_map : `str` (default: `None`)
178 | Convenience method for setting the default color map.
179 | facecolor : `str` (default: `None`)
180 | Sets backgrounds `rcParams['figure.facecolor']`
181 | and `rcParams['axes.facecolor']` to `facecolor`.
182 | format : {'png', 'pdf', 'svg', etc.} (default: 'pdf')
183 | This sets the default format for saving figures: `file_format_figs`.
184 | ipython_format : list of `str` (default: 'png2x')
185 | Only concerns the notebook/IPython environment; see
186 | `IPython.core.display.set_matplotlib_formats` for more details.
187 | """
188 | try:
189 | import IPython
190 |
191 | if isinstance(ipython_format, str):
192 | ipython_format = [ipython_format]
193 | IPython.display.set_matplotlib_formats(*ipython_format)
194 | except:
195 | pass
196 |
197 | global _rcParams_style
198 | _rcParams_style = style
199 | global _vector_friendly
200 | _vector_friendly = vector_friendly
201 | global file_format_figs
202 | file_format_figs = format
203 | if transparent is not None:
204 | rcParams["savefig.transparent"] = transparent
205 | if facecolor is not None:
206 | rcParams["figure.facecolor"] = facecolor
207 | rcParams["axes.facecolor"] = facecolor
208 | if style == "cospar":
209 | set_rcParams_cospar(fontsize=fontsize, color_map=color_map, frameon=frameon)
210 | # Overwrite style options if given
211 | if figsize is not None:
212 | rcParams["figure.figsize"] = figsize
213 | global fig_width
214 | global fig_height
215 | fig_width = figsize[0]
216 | fig_height = figsize[1]
217 | if dpi is not None:
218 | rcParams["figure.dpi"] = dpi
219 | if dpi_save is not None:
220 | rcParams["savefig.dpi"] = dpi_save
221 |
222 | global fig_point_size
223 | fig_point_size = pointsize
224 |
225 | # hf.set_up_folders()
226 |
227 |
228 | def set_rcParams_defaults():
229 | """Reset `matplotlib.rcParams` to defaults."""
230 | from matplotlib import rcParamsDefault
231 |
232 | rcParams.update(rcParamsDefault)
233 |
234 |
235 | def _set_start_time():
236 | from time import time
237 |
238 | return time()
239 |
240 |
241 | _start = _set_start_time()
242 | """Time when the settings module is first imported."""
243 |
244 | _previous_time = _start
245 | """Variable for timing program parts."""
246 |
--------------------------------------------------------------------------------
/cospar/tl.py:
--------------------------------------------------------------------------------
1 | from .tool import *
2 |
--------------------------------------------------------------------------------
/cospar/tmap/__init__.py:
--------------------------------------------------------------------------------
1 | from .map_reconstruction import *
2 | from .optimal_transport import *
3 |
--------------------------------------------------------------------------------
/cospar/tmap/_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import numpy as np
5 | import pandas as pd
6 | import scanpy as sc
7 | import scipy.sparse as ssp
8 |
9 | from .. import help_functions as hf
10 | from .. import logging as logg
11 | from .. import plotting as pl
12 | from .. import settings
13 | from .. import tool as tl
14 |
15 |
16 | def generate_similarity_matrix(
17 | adata,
18 | file_name,
19 | round_of_smooth=10,
20 | neighbor_N=20,
21 | beta=0.1,
22 | truncation_threshold=0.001,
23 | save_subset=True,
24 | use_existing_KNN_graph=False,
25 | compute_new_Smatrix=False,
26 | ):
27 | """
28 | Generate similarity matrix (Smatrix) through graph diffusion
29 |
30 | It generates the similarity matrix via iterative graph diffusion.
31 | Similarity matrix from each round of diffusion will be saved, after truncation
32 | to promote sparsity and save space. If save_subset is activated, only save
33 | Smatrix for smooth rounds at the multiples of 5 (like 5,10,15,...). If a Smatrix is pre-computed,
34 | it will be loaded directly if compute_new_Smatrix=Flase.
35 |
36 | Parameters
37 | ----------
38 | adata: :class:`~anndata.AnnData` object
39 | file_name: str
40 | Filename to load pre-computed similarity matrix or save the newly
41 | computed similarity matrix.
42 | round_of_smooth: `int`, optional (default: 10)
43 | The rounds of graph diffusion.
44 | neighbor_N: `int`, optional (default: 20)
45 | Neighber number for constructing the KNN graph, using the UMAP method.
46 | beta: `float`, option (default: 0.1)
47 | Probability to stay at the origin in a unit diffusion step, in the range [0,1]
48 | truncation_threshold: `float`, optional (default: 0.001)
49 | At each iteration, truncate the similarity matrix using
50 | truncation_threshold. This promotes the sparsity of the matrix,
51 | thus the speed of computation. We set the truncation threshold to be small,
52 | to guarantee accracy.
53 | save_subset: `bool`, optional (default: True)
54 | If true, save only Smatrix at smooth round [5,10,15,...];
55 | Otherwise, save Smatrix at each round.
56 | use_existing_KNN_graph: `bool`, optional (default: False)
57 | If true and adata.obsp['connectivities'], use the existing knn graph to build
58 | the similarity matrix, regardless of neighbor_N.
59 | compute_new_Smatrix: `bool`, optional (default: False)
60 | If true, compute a new Smatrix, even if there is pre-computed Smatrix with the
61 | same parameterization.
62 |
63 | Returns
64 | -------
65 | similarity_matrix: `sp.spmatrix`
66 | """
67 |
68 | if os.path.exists(file_name + f"_SM{round_of_smooth}.npz") and (
69 | not compute_new_Smatrix
70 | ):
71 |
72 | logg.hint("Compute similarity matrix: load existing data")
73 | similarity_matrix = ssp.load_npz(file_name + f"_SM{round_of_smooth}.npz")
74 | else: # compute now
75 |
76 | logg.hint(f"Compute similarity matrix: computing new; beta={beta}")
77 |
78 | # add a step to compute PCA in case this is not computed
79 |
80 | if (not use_existing_KNN_graph) or ("connectivities" not in adata.obsp.keys()):
81 | # here, we assume that adata already has pre-computed PCA
82 | sc.pp.neighbors(adata, n_neighbors=neighbor_N)
83 | else:
84 | logg.hint(
85 | "Use existing KNN graph at adata.obsp['connectivities'] for generating the smooth matrix"
86 | )
87 | adjacency_matrix = adata.obsp["connectivities"]
88 |
89 | ############## The new method
90 | adjacency_matrix = (adjacency_matrix + adjacency_matrix.T) / 2
91 | ##############
92 |
93 | adjacency_matrix = hf.sparse_rowwise_multiply(
94 | adjacency_matrix, 1 / adjacency_matrix.sum(1).A.squeeze()
95 | )
96 | nrow = adata.shape[0]
97 | similarity_matrix = ssp.lil_matrix((nrow, nrow))
98 | similarity_matrix.setdiag(np.ones(nrow))
99 | transpose_A = adjacency_matrix.T
100 |
101 | if round_of_smooth == 0:
102 | SM = 0
103 | similarity_matrix = ssp.csr_matrix(similarity_matrix)
104 | ssp.save_npz(file_name + f"_SM{SM}.npz", similarity_matrix)
105 |
106 | for iRound in range(round_of_smooth):
107 | SM = iRound + 1
108 |
109 | logg.info("Smooth round:", SM)
110 | t = time.time()
111 | similarity_matrix = (
112 | beta * similarity_matrix + (1 - beta) * transpose_A * similarity_matrix
113 | )
114 | # similarity_matrix =beta*similarity_matrix+(1-beta)*similarity_matrix*adjacency_matrix
115 | # similarity_matrix_array.append(similarity_matrix)
116 |
117 | logg.hint("Time elapsed:", time.time() - t)
118 |
119 | t = time.time()
120 | sparsity_frac = (similarity_matrix > 0).sum() / (
121 | similarity_matrix.shape[0] * similarity_matrix.shape[1]
122 | )
123 | if sparsity_frac >= 0.1:
124 | # similarity_matrix_truncate=similarity_matrix
125 | # similarity_matrix_truncate_array.append(similarity_matrix_truncate)
126 |
127 | logg.hint(f"Orignal sparsity={sparsity_frac}, Thresholding")
128 | similarity_matrix = hf.matrix_row_or_column_thresholding(
129 | similarity_matrix, truncation_threshold
130 | )
131 | sparsity_frac_2 = (similarity_matrix > 0).sum() / (
132 | similarity_matrix.shape[0] * similarity_matrix.shape[1]
133 | )
134 | # similarity_matrix_truncate_array.append(similarity_matrix_truncate)
135 |
136 | logg.hint(f"Final sparsity={sparsity_frac_2}")
137 |
138 | logg.info(
139 | f"similarity matrix truncated (Smooth round={SM}): ",
140 | time.time() - t,
141 | )
142 |
143 | # logg.info("Save the matrix")
144 | # file_name=f'data/20200221_truncated_similarity_matrix_SM{round_of_smooth}_kNN{neighbor_N}_Truncate{str(truncation_threshold)[2:]}.npz'
145 | similarity_matrix = ssp.csr_matrix(similarity_matrix)
146 |
147 | ############## The new method
148 | # similarity_matrix=similarity_matrix.T.copy()
149 | ##############
150 |
151 | if save_subset:
152 | if SM % 5 == 0: # save when SM=5,10,15,20,...
153 |
154 | logg.hint("Save the matrix at every 5 rounds")
155 | ssp.save_npz(file_name + f"_SM{SM}.npz", similarity_matrix)
156 | else: # save all
157 |
158 | logg.hint("Save the matrix at every round")
159 | ssp.save_npz(file_name + f"_SM{SM}.npz", similarity_matrix)
160 |
161 | return similarity_matrix
162 |
163 |
164 | def generate_initial_similarity(similarity_matrix, initial_index_0, initial_index_1):
165 | """
166 | Extract Smatrix at t1 from the full Smatrix
167 |
168 | Parameters
169 | ----------
170 | similarity_matrix: `np.array` or `sp.spmatrix`
171 | full Smatrix
172 | initial_index_0: `list`
173 | list of selected t1-cell id among all cells (t1+t2)
174 | initial_index_1: `list`
175 | list of selected t1-cell id among all cells (t1+t2)
176 | It can be the same as initial_index_0. In the case that they are different,
177 | initial_index_1 is a subset of cells that correspond to multi-time clones,
178 | while initial_index_0 may be all cells at t1.
179 |
180 | Returns
181 | -------
182 | initial Smatrix: `np.array`
183 | """
184 |
185 | t = time.time()
186 | initial_similarity = similarity_matrix[initial_index_0][:, initial_index_1]
187 | # initial_similarity=hf.sparse_column_multiply(initial_similarity,1/(resol+initial_similarity.sum(0)))
188 | if ssp.issparse(initial_similarity):
189 | initial_similarity = initial_similarity.A
190 |
191 | logg.hint("Time elapsed: ", time.time() - t)
192 | return initial_similarity
193 |
194 |
195 | def generate_final_similarity(similarity_matrix, final_index_0, final_index_1):
196 | """
197 | Extract Smatrix at t2 from the full Smatrix
198 |
199 | Parameters
200 | ----------
201 | similarity_matrix: `np.array` or `sp.spmatrix`
202 | full Smatrix
203 | final_index_0: `list`
204 | list of selected t2-cell id among all cells (t1+t2)
205 | final_index_1: `list`
206 | list of selected t2-cell id among all cells (t1+t2)
207 | It can be the same as final_index_0. In the case that they are different,
208 | initial_index_0 is a subset of cells that correspond to multi-time clones,
209 | while initial_index_1 may be all cells at t2.
210 |
211 | Returns
212 | -------
213 | initial Smatrix: `np.array`
214 | """
215 |
216 | t = time.time()
217 | final_similarity = similarity_matrix.T[final_index_0][:, final_index_1]
218 | if ssp.issparse(final_similarity):
219 | final_similarity = final_similarity.A
220 | # final_similarity=hf.sparse_rowwise_multiply(final_similarity,1/(resol+final_similarity.sum(1)))
221 |
222 | logg.hint("Time elapsed: ", time.time() - t)
223 | return final_similarity
224 |
225 |
226 | def select_time_points(
227 | adata_orig, time_point=["day_1", "day_2"], extend_Tmap_space=False
228 | ):
229 | """
230 | Select barcoded cells at given time points for Tmap inference.
231 |
232 | Select cells at given time points, and prepare the right data structure
233 | for running core cospar function to infer the Tmap.
234 |
235 | Parameters
236 | ----------
237 | adata_orig: original :class:`~anndata.AnnData` object
238 | time_point: `list` optional (default: ['day_1','day_2'])
239 | Require at least two time points, arranged in ascending order.
240 | extend_Tmap_space: `bool` optional (default: `False`)
241 | If true, the initial states for Tmap will include all states at initial time points,
242 | and the later states for Tmap will include all states at later time points.
243 | Otherwise, the initial and later state
244 | space of the Tmap will be restricted to cells with multi-time clonal information
245 | alone. The latter case speeds up the computation, which is recommended.
246 |
247 | Returns
248 | -------
249 | Subsampled :class:`~anndata.AnnData` object
250 | """
251 |
252 | # x_emb_orig=adata_orig.obsm['X_emb'][:,0]
253 | # y_emb_orig=adata_orig.obsm['X_emb'][:,1]
254 | time_info_orig = np.array(adata_orig.obs["time_info"])
255 | clone_annot_orig = adata_orig.obsm["X_clone"]
256 | if len(time_point) == 0: # use all clonally labelled cell states
257 | time_point = np.sort(
258 | list(set(time_info_orig))
259 | ) # this automatic ordering may not work
260 |
261 | if len(time_point) < 2:
262 | logg.error("Must select more than 1 time point!")
263 | else:
264 |
265 | At = []
266 | for j, time_0 in enumerate(time_point):
267 | At.append(ssp.csr_matrix(clone_annot_orig[time_info_orig == time_0]))
268 |
269 | ### Day t - t+1
270 | Clonal_cell_ID_FOR_t = []
271 | for j in range(len(time_point) - 1):
272 | idx_t = np.array((At[j] * At[j + 1].T).sum(1) > 0).flatten()
273 | time_index_t = time_info_orig == time_point[j]
274 | temp = np.nonzero(time_index_t)[0][idx_t]
275 | Clonal_cell_ID_FOR_t.append(
276 | temp
277 | ) # this index is in the original space, without sampling etc
278 |
279 | logg.hint(
280 | f"Clonal cell fraction (day {time_point[j]}-{time_point[j+1]}):",
281 | len(temp) / np.sum(time_index_t),
282 | )
283 |
284 | ### Day t+1 - t
285 | Clonal_cell_ID_BACK_t = []
286 | for j in range(len(time_point) - 1):
287 | idx_t = np.array((At[j + 1] * At[j].T).sum(1) > 0).flatten()
288 | time_index_t = time_info_orig == time_point[j + 1]
289 | temp = np.nonzero(time_index_t)[0][idx_t]
290 | Clonal_cell_ID_BACK_t.append(
291 | temp
292 | ) # this index is in the original space, without sampling etc
293 |
294 | logg.hint(
295 | f"Clonal cell fraction (day {time_point[j+1]}-{time_point[j]}):",
296 | len(temp) / np.sum(time_index_t),
297 | )
298 |
299 | for j in range(len(time_point) - 1):
300 | logg.hint(
301 | f"Numer of cells that are clonally related -- day {time_point[j]}: {len(Clonal_cell_ID_FOR_t[j])} and day {time_point[j+1]}: {len(Clonal_cell_ID_BACK_t[j])}"
302 | )
303 |
304 | proportion = np.ones(len(time_point))
305 | # flatten the list
306 | flatten_clonal_cell_ID_FOR = np.array(
307 | [sub_item for item in Clonal_cell_ID_FOR_t for sub_item in item]
308 | )
309 | flatten_clonal_cell_ID_BACK = np.array(
310 | [sub_item for item in Clonal_cell_ID_BACK_t for sub_item in item]
311 | )
312 | valid_clone_N_FOR = np.sum(
313 | clone_annot_orig[flatten_clonal_cell_ID_FOR].A.sum(0) > 0
314 | )
315 | valid_clone_N_BACK = np.sum(
316 | clone_annot_orig[flatten_clonal_cell_ID_BACK].A.sum(0) > 0
317 | )
318 |
319 | logg.info(f"Number of multi-time clones post selection: {valid_clone_N_FOR}")
320 | # logg.info("Valid clone number 'BACK' post selection",valid_clone_N_BACK)
321 |
322 | ###################### select initial and later cell states
323 |
324 | if extend_Tmap_space:
325 | old_Tmap_cell_id_t1 = []
326 | for t_temp in time_point[:-1]:
327 | old_Tmap_cell_id_t1 = old_Tmap_cell_id_t1 + list(
328 | np.nonzero(time_info_orig == t_temp)[0]
329 | )
330 | old_Tmap_cell_id_t1 = np.array(old_Tmap_cell_id_t1)
331 |
332 | ########
333 | old_Tmap_cell_id_t2 = []
334 | for t_temp in time_point[1:]:
335 | old_Tmap_cell_id_t2 = old_Tmap_cell_id_t2 + list(
336 | np.nonzero(time_info_orig == t_temp)[0]
337 | )
338 | old_Tmap_cell_id_t2 = np.array(old_Tmap_cell_id_t2)
339 |
340 | else:
341 | old_Tmap_cell_id_t1 = flatten_clonal_cell_ID_FOR
342 | old_Tmap_cell_id_t2 = flatten_clonal_cell_ID_BACK
343 |
344 | old_clonal_cell_id_t1 = flatten_clonal_cell_ID_FOR
345 | old_clonal_cell_id_t2 = flatten_clonal_cell_ID_BACK
346 | ########################
347 |
348 | sp_id = np.sort(
349 | list(set(list(old_Tmap_cell_id_t1) + list(old_Tmap_cell_id_t2)))
350 | )
351 | sp_idx = np.zeros(clone_annot_orig.shape[0], dtype=bool)
352 | sp_idx[sp_id] = True
353 |
354 | Tmap_cell_id_t1 = hf.converting_id_from_fullSpace_to_subSpace(
355 | old_Tmap_cell_id_t1, sp_id
356 | )[0]
357 | clonal_cell_id_t1 = hf.converting_id_from_fullSpace_to_subSpace(
358 | old_clonal_cell_id_t1, sp_id
359 | )[0]
360 | clonal_cell_id_t2 = hf.converting_id_from_fullSpace_to_subSpace(
361 | old_clonal_cell_id_t2, sp_id
362 | )[0]
363 | Tmap_cell_id_t2 = hf.converting_id_from_fullSpace_to_subSpace(
364 | old_Tmap_cell_id_t2, sp_id
365 | )[0]
366 |
367 | Clonal_cell_ID_FOR_t_new = []
368 | for temp_id_list in Clonal_cell_ID_FOR_t:
369 | convert_list = hf.converting_id_from_fullSpace_to_subSpace(
370 | temp_id_list, sp_id
371 | )[0]
372 | Clonal_cell_ID_FOR_t_new.append(convert_list)
373 |
374 | Clonal_cell_ID_BACK_t_new = []
375 | for temp_id_list in Clonal_cell_ID_BACK_t:
376 | convert_list = hf.converting_id_from_fullSpace_to_subSpace(
377 | temp_id_list, sp_id
378 | )[0]
379 | Clonal_cell_ID_BACK_t_new.append(convert_list)
380 |
381 | sp_id_0 = np.sort(list(old_clonal_cell_id_t1) + list(old_clonal_cell_id_t2))
382 | sp_idx_0 = np.zeros(clone_annot_orig.shape[0], dtype=bool)
383 | sp_idx_0[sp_id_0] = True
384 |
385 | barcode_id = np.nonzero(clone_annot_orig[sp_idx_0].A.sum(0).flatten() > 0)[0]
386 | # sp_id=np.nonzero(sp_idx)[0]
387 | clone_annot = clone_annot_orig[sp_idx][:, barcode_id]
388 |
389 | adata = adata_orig[sp_idx]
390 | adata.obsm["X_clone"] = clone_annot
391 | adata.uns["clonal_cell_id_t1"] = clonal_cell_id_t1
392 | adata.uns["clonal_cell_id_t2"] = clonal_cell_id_t2
393 | adata.uns["Tmap_cell_id_t1"] = Tmap_cell_id_t1
394 | adata.uns["Tmap_cell_id_t2"] = Tmap_cell_id_t2
395 | adata.uns["multiTime_cell_id_t1"] = Clonal_cell_ID_FOR_t_new
396 | adata.uns["multiTime_cell_id_t2"] = Clonal_cell_ID_BACK_t_new
397 | adata.uns["proportion"] = np.ones(len(time_point) - 1)
398 | adata.uns["sp_idx"] = sp_idx
399 |
400 | data_des_orig = adata_orig.uns["data_des"][0]
401 | data_des_0 = adata_orig.uns["data_des"][-1]
402 | time_label = "t"
403 | for x in time_point:
404 | time_label = time_label + f"*{x}"
405 |
406 | data_des = (
407 | data_des_0
408 | + f"_MultiTimeClone_FullSpace{int(extend_Tmap_space)}_{time_label}"
409 | )
410 | adata.uns["data_des"] = [data_des_orig, data_des]
411 |
412 | if logg._settings_verbosity_greater_or_equal_than(3):
413 | N_cell, N_clone = clone_annot.shape
414 | logg.info(f"Cell number={N_cell}, Clone number={N_clone}")
415 | x_emb = adata.obsm["X_emb"][:, 0]
416 | y_emb = adata.obsm["X_emb"][:, 1]
417 | pl.customized_embedding(x_emb, y_emb, -x_emb)
418 |
419 | logg.hint(f"clonal_cell_id_t1: {len(clonal_cell_id_t1)}")
420 | logg.hint(f"Tmap_cell_id_t1: {len(Tmap_cell_id_t1)}")
421 | return adata
422 |
--------------------------------------------------------------------------------
/cospar/tmap/optimal_transport.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | This module is borrowed from Waddington-OT
4 | https://github.com/broadinstitute/wot/blob/master/wot/ot/optimal_transport.py
5 | """
6 |
7 | # import logging # this is a buildin package from Python
8 |
9 | import numpy as np
10 |
11 | from .. import logging as logg
12 |
13 | # logger = logging.getLogger('wot')
14 |
15 |
16 | # def compute_transport_matrix(solver, **params):
17 | # """
18 | # Compute the optimal transport with stabilized numerics.
19 | # Args:
20 | # G: Growth (absolute)
21 | # solver: transport_stablev2 or optimal_transport_duality_gap
22 | # growth_iters:
23 | # """
24 |
25 | # import gc
26 | # G = params['G']
27 | # growth_iters = params['growth_iters']
28 | # learned_growth = []
29 | # for i in range(growth_iters):
30 | # if i == 0:
31 | # row_sums = G
32 | # else:
33 | # row_sums = tmap.sum(axis=1) # / tmap.shape[1]
34 | # params['G'] = row_sums
35 | # learned_growth.append(row_sums)
36 | # tmap = solver(**params)
37 | # gc.collect()
38 |
39 | # return tmap, learned_growth
40 |
41 |
42 | # @ Lénaïc Chizat 2015 - optimal transport
43 | def fdiv(l, x, p, dx):
44 | return l * np.sum(dx * (x * (np.log(x / p)) - x + p))
45 |
46 |
47 | def fdivstar(l, u, p, dx):
48 | return l * np.sum((p * dx) * (np.exp(u / l) - 1))
49 |
50 |
51 | def primal(C, K, R, dx, dy, p, q, a, b, epsilon, lambda1, lambda2):
52 | I = len(p)
53 | J = len(q)
54 | F1 = lambda x, y: fdiv(lambda1, x, p, y)
55 | F2 = lambda x, y: fdiv(lambda2, x, q, y)
56 | with np.errstate(divide="ignore"):
57 | return (
58 | F1(np.dot(R, dy), dx)
59 | + F2(np.dot(R.T, dx), dy)
60 | + (epsilon * np.sum(R * np.nan_to_num(np.log(R)) - R + K) + np.sum(R * C))
61 | / (I * J)
62 | )
63 |
64 |
65 | def dual(C, K, R, dx, dy, p, q, a, b, epsilon, lambda1, lambda2):
66 | I = len(p)
67 | J = len(q)
68 | F1c = lambda u, v: fdivstar(lambda1, u, p, v)
69 | F2c = lambda u, v: fdivstar(lambda2, u, q, v)
70 | return (
71 | -F1c(-epsilon * np.log(a), dx)
72 | - F2c(-epsilon * np.log(b), dy)
73 | - epsilon * np.sum(R - K) / (I * J)
74 | )
75 |
76 |
77 | # end @ Lénaïc Chizat
78 |
79 |
80 | def optimal_transport_duality_gap(
81 | C,
82 | G,
83 | lambda1,
84 | lambda2,
85 | epsilon,
86 | batch_size,
87 | tolerance,
88 | tau,
89 | epsilon0,
90 | max_iter,
91 | **ignored
92 | ):
93 | """
94 | Compute the optimal transport with stabilized numerics.
95 |
96 | It guarantees that the duality gap is at most `tolerance`.
97 | The method is twice faster than the :func:`.transport_stablev2`
98 |
99 |
100 | Parameters
101 | ----------
102 | C : 2-D ndarray
103 | The cost matrix. C[i][j] is the cost to transport cell i to cell j
104 | G : 1-D array_like
105 | Growth value for input cells.
106 | lambda1 : float, optional
107 | Regularization parameter for the marginal constraint on p
108 | lambda2 : float, optional
109 | Regularization parameter for the marginal constraint on q
110 | epsilon : float, optional
111 | Entropy regularization parameter.
112 | batch_size : int, optional
113 | Number of iterations to perform between each duality gap check
114 | tolerance : float, optional
115 | Upper bound on the duality gap that the resulting transport map must guarantee.
116 | tau : float, optional
117 | Threshold at which to perform numerical stabilization
118 | epsilon0 : float, optional
119 | Starting value for exponentially-decreasing epsilon
120 | max_iter : int, optional
121 | Maximum number of iterations. Print a warning and return if it is reached, even without convergence.
122 | Returns
123 | -------
124 | transport_map : 2-D ndarray
125 | The entropy-regularized unbalanced transport map
126 | """
127 | C = np.asarray(C, dtype=np.float64)
128 | epsilon_scalings = 5
129 | scale_factor = np.exp(-np.log(epsilon) / epsilon_scalings)
130 |
131 | I, J = C.shape
132 | dx, dy = np.ones(I) / I, np.ones(J) / J
133 |
134 | p = G
135 | q = np.ones(C.shape[1]) * np.average(G)
136 |
137 | u, v = np.zeros(I), np.zeros(J)
138 | a, b = np.ones(I), np.ones(J)
139 |
140 | epsilon_i = epsilon0 * scale_factor
141 | current_iter = 0
142 |
143 | for e in range(epsilon_scalings + 1):
144 | duality_gap = np.inf
145 | u = u + epsilon_i * np.log(a)
146 | v = v + epsilon_i * np.log(b) # absorb
147 | epsilon_i = epsilon_i / scale_factor
148 | _K = np.exp(-C / epsilon_i)
149 | alpha1 = lambda1 / (lambda1 + epsilon_i)
150 | alpha2 = lambda2 / (lambda2 + epsilon_i)
151 | K = np.exp((np.array([u]).T - C + np.array([v])) / epsilon_i)
152 | a, b = np.ones(I), np.ones(J)
153 | old_a, old_b = a, b
154 | threshold = tolerance if e == epsilon_scalings else 1e-6
155 |
156 | while duality_gap > threshold:
157 | for i in range(batch_size if e == epsilon_scalings else 5):
158 | current_iter += 1
159 | old_a, old_b = a, b
160 | a = (p / (K.dot(np.multiply(b, dy)))) ** alpha1 * np.exp(
161 | -u / (lambda1 + epsilon_i)
162 | )
163 | b = (q / (K.T.dot(np.multiply(a, dx)))) ** alpha2 * np.exp(
164 | -v / (lambda2 + epsilon_i)
165 | )
166 |
167 | # stabilization
168 | if max(max(abs(a)), max(abs(b))) > tau:
169 | u = u + epsilon_i * np.log(a)
170 | v = v + epsilon_i * np.log(b) # absorb
171 | K = np.exp((np.array([u]).T - C + np.array([v])) / epsilon_i)
172 | a, b = np.ones(I), np.ones(J)
173 |
174 | if current_iter >= max_iter:
175 | logg.warn(
176 | "Reached max_iter with duality gap still above threshold. Returning"
177 | )
178 | return (K.T * a).T * b
179 |
180 | # The real dual variables. a and b are only the stabilized variables
181 | _a = a * np.exp(u / epsilon_i)
182 | _b = b * np.exp(v / epsilon_i)
183 |
184 | # Skip duality gap computation for the first epsilon scalings, use dual variables evolution instead
185 | if e == epsilon_scalings:
186 | R = (K.T * a).T * b
187 | pri = primal(
188 | C, _K, R, dx, dy, p, q, _a, _b, epsilon_i, lambda1, lambda2
189 | )
190 | dua = dual(C, _K, R, dx, dy, p, q, _a, _b, epsilon_i, lambda1, lambda2)
191 | duality_gap = (pri - dua) / abs(pri)
192 | else:
193 | duality_gap = max(
194 | np.linalg.norm(_a - old_a * np.exp(u / epsilon_i))
195 | / (1 + np.linalg.norm(_a)),
196 | np.linalg.norm(_b - old_b * np.exp(v / epsilon_i))
197 | / (1 + np.linalg.norm(_b)),
198 | )
199 |
200 | if np.isnan(duality_gap):
201 | # raise RuntimeError("Overflow encountered in duality gap computation, please report this incident")
202 | logg.error(
203 | "Overflow encountered in duality gap computation, please report this incident"
204 | )
205 | return R / C.shape[1]
206 |
207 |
208 | def transport_stablev2(
209 | C,
210 | lambda1,
211 | lambda2,
212 | epsilon,
213 | scaling_iter,
214 | G,
215 | tau,
216 | epsilon0,
217 | extra_iter,
218 | inner_iter_max,
219 | **ignored
220 | ):
221 | """
222 | Compute the optimal transport with stabilized numerics.
223 | Args:
224 | C: cost matrix to transport cell i to cell j
225 | lambda1: regularization parameter for marginal constraint for p.
226 | lambda2: regularization parameter for marginal constraint for q.
227 | epsilon: entropy parameter
228 | scaling_iter: number of scaling iterations
229 | G: growth value for input cells
230 | """
231 |
232 | warm_start = tau is not None
233 | epsilon_final = epsilon
234 |
235 | def get_reg(n): # exponential decreasing
236 | return (epsilon0 - epsilon_final) * np.exp(-n) + epsilon_final
237 |
238 | epsilon_i = epsilon0 if warm_start else epsilon
239 | dx = np.ones(C.shape[0]) / C.shape[0]
240 | dy = np.ones(C.shape[1]) / C.shape[1]
241 |
242 | p = G
243 | q = np.ones(C.shape[1]) * np.average(G)
244 |
245 | u = np.zeros(len(p))
246 | v = np.zeros(len(q))
247 | b = np.ones(len(q))
248 | K = np.exp(-C / epsilon_i)
249 |
250 | alpha1 = lambda1 / (lambda1 + epsilon_i)
251 | alpha2 = lambda2 / (lambda2 + epsilon_i)
252 | epsilon_index = 0
253 | iterations_since_epsilon_adjusted = 0
254 |
255 | for i in range(scaling_iter):
256 | # scaling iteration
257 | a = (p / (K.dot(np.multiply(b, dy)))) ** alpha1 * np.exp(
258 | -u / (lambda1 + epsilon_i)
259 | )
260 | b = (q / (K.T.dot(np.multiply(a, dx)))) ** alpha2 * np.exp(
261 | -v / (lambda2 + epsilon_i)
262 | )
263 |
264 | # stabilization
265 | iterations_since_epsilon_adjusted += 1
266 | if max(max(abs(a)), max(abs(b))) > tau:
267 | u = u + epsilon_i * np.log(a)
268 | v = v + epsilon_i * np.log(b) # absorb
269 | K = np.exp((np.array([u]).T - C + np.array([v])) / epsilon_i)
270 | a = np.ones(len(p))
271 | b = np.ones(len(q))
272 |
273 | if warm_start and iterations_since_epsilon_adjusted == inner_iter_max:
274 | epsilon_index += 1
275 | iterations_since_epsilon_adjusted = 0
276 | u = u + epsilon_i * np.log(a)
277 | v = v + epsilon_i * np.log(b) # absorb
278 | epsilon_i = get_reg(epsilon_index)
279 | alpha1 = lambda1 / (lambda1 + epsilon_i)
280 | alpha2 = lambda2 / (lambda2 + epsilon_i)
281 | K = np.exp((np.array([u]).T - C + np.array([v])) / epsilon_i)
282 | a = np.ones(len(p))
283 | b = np.ones(len(q))
284 |
285 | for i in range(extra_iter):
286 | a = (p / (K.dot(np.multiply(b, dy)))) ** alpha1 * np.exp(
287 | -u / (lambda1 + epsilon_i)
288 | )
289 | b = (q / (K.T.dot(np.multiply(a, dx)))) ** alpha2 * np.exp(
290 | -v / (lambda2 + epsilon_i)
291 | )
292 |
293 | R = (K.T * a).T * b
294 |
295 | return R / C.shape[1]
296 |
--------------------------------------------------------------------------------
/cospar/tool/__init__.py:
--------------------------------------------------------------------------------
1 | from cospar.tool._clone import *
2 | from cospar.tool._gene import *
3 | from cospar.tool._map import *
4 | from cospar.tool._utils import *
5 |
--------------------------------------------------------------------------------
/cospar/tool/_gene.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | from logging import raiseExceptions
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import scipy.sparse as ssp
8 | import scipy.stats as stats
9 | import statsmodels.sandbox.stats.multicomp
10 | from ete3 import Tree
11 | from matplotlib import pyplot as plt
12 | from scipy.cluster import hierarchy
13 |
14 | # from plotnine import *
15 | from sklearn.manifold import SpectralEmbedding
16 |
17 | from cospar.tool import _utils as tl_util
18 |
19 | from .. import help_functions as hf
20 | from .. import logging as logg
21 | from .. import settings
22 |
23 |
24 | def differential_genes(
25 | adata,
26 | cell_group_A=None,
27 | cell_group_B=None,
28 | FDR_cutoff=0.05,
29 | sort_by="ratio",
30 | min_frac_expr=0.05,
31 | pseudocount=1,
32 | ):
33 | """
34 | Perform differential gene expression analysis and plot top DGE genes.
35 |
36 | We use Wilcoxon rank-sum test to calculate P values, followed by
37 | Benjamini-Hochberg correction.
38 |
39 | Parameters
40 | ----------
41 | adata: :class:`~anndata.AnnData` object
42 | Need to contain gene expression matrix.
43 | cell_group_A: `np.array`, optional (default: None)
44 | A boolean array of the size adata.shape[0] for defining population A.
45 | If not specified, we set it to be adata.obs['cell_group_A'].
46 | cell_group_B: `np.array`, optional (default: None)
47 | A boolean array of the size adata.shape[0] for defining population B.
48 | If not specified, we set it to be adata.obs['cell_group_A'].
49 | FDR_cutoff: `float`, optional (default: 0.05)
50 | Cut off for the corrected Pvalue of each gene. Only genes below this
51 | cutoff will be shown.
52 | sort_by: `float`, optional (default: 'ratio')
53 | The key to sort the differentially expressed genes. The key can be: 'ratio' or 'Qvalue'.
54 | min_frac_expr: `float`, optional (default: 0.05)
55 | Minimum expression fraction among selected states for a
56 | gene to be considered for DGE analysis.
57 | pseudocount: `int`, optional (default: 1)
58 | pseudo count for taking the gene expression ratio between the two groups
59 |
60 | Returns
61 | -------
62 | diff_gene_A: `pd.DataFrame`
63 | Genes differentially expressed in cell state group A, ranked
64 | by the ratio of mean expressions between
65 | the two groups, with the top being more differentially expressed.
66 | diff_gene_B: `pd.DataFrame`
67 | Genes differentially expressed in cell state group B, ranked
68 | by the ratio of mean expressions between
69 | the two groups, with the top being more differentially expressed.
70 | """
71 |
72 | diff_gene_A = []
73 | diff_gene_B = []
74 |
75 | if sort_by not in ["ratio", "Qvalue"]:
76 | raise ValueError(f"sort_by must be among {['ratio','Qvalue']}")
77 |
78 | state_info = np.array(adata.obs["state_info"])
79 | inputs = [cell_group_A, cell_group_B]
80 | selections = []
81 | for cell_group_X in inputs:
82 | if type(cell_group_X) is str:
83 | if cell_group_X in list(set(state_info)):
84 | group_idx = state_info == cell_group_X
85 | else:
86 | raise ValueError(
87 | "cell_group_A (or B) should be either a cluster name among adata.obs['state_info'] or a boolean array of size adata.shape[0]."
88 | )
89 | else:
90 | group_idx = np.array(cell_group_X).astype("bool")
91 |
92 | selections.append(group_idx)
93 |
94 | if (np.sum(selections[0]) == 0) or (np.sum(selections[1]) == 0):
95 | raise ValueError("Group A or B has zero selected cell states.")
96 |
97 | else:
98 |
99 | dge = hf.get_dge_SW(
100 | adata,
101 | selections[0],
102 | selections[1],
103 | min_frac_expr=min_frac_expr,
104 | pseudocount=pseudocount,
105 | )
106 |
107 | dge = dge.sort_values(by=sort_by, ascending=False)
108 | diff_gene_A_0 = dge
109 | diff_gene_A = diff_gene_A_0[(dge["Qvalue"] < FDR_cutoff) & (dge["ratio"] > 0)]
110 | diff_gene_A = diff_gene_A.reset_index()
111 |
112 | dge = dge.sort_values(by=sort_by, ascending=True)
113 | diff_gene_B_0 = dge
114 | diff_gene_B = diff_gene_B_0[(dge["Qvalue"] < FDR_cutoff) & (dge["ratio"] < 0)]
115 | diff_gene_B = diff_gene_B.reset_index()
116 |
117 | return diff_gene_A, diff_gene_B
118 |
119 |
120 | def identify_TF_and_surface_marker(
121 | gene_list,
122 | species="mouse",
123 | go_term_keywards=[
124 | "cell surface",
125 | "cell cycle",
126 | "regulation of transcription",
127 | "DNA-binding transcription factor activity",
128 | "regulation of transcription by RNA polymerase II",
129 | ],
130 | ):
131 | """
132 | From an input gene list, return the go term and annotation for each gene,
133 | and further select the genes identified as TF or cell surface protein
134 |
135 | Returns
136 | ------
137 | results:
138 | Full annotation for each gene
139 | df_anno
140 | Only include genes identified as TF or cell surface protein
141 | """
142 |
143 | if species not in ["mouse", "human"]:
144 | raise ValueError("species must be either mouse or human")
145 | else:
146 | if species == "mouse":
147 | dataset = "mmusculus_gene_ensembl"
148 | elif species == "human":
149 | dataset = "hsapiens_gene_ensembl"
150 |
151 | from gseapy.parser import Biomart
152 |
153 | bm = Biomart()
154 | ## view validated marts
155 | marts = bm.get_marts()
156 | ## view validated dataset
157 | datasets = bm.get_datasets(mart="ENSEMBL_MART_ENSEMBL")
158 | ## view validated attributes
159 | attrs = bm.get_attributes(
160 | dataset=dataset
161 | ) # hsapiens_gene_ensembl: Human genes (GRCh38.p13); mmusculus_gene_ensembl for 'Mouse genes (GRCm39)'
162 | ## view validated filters
163 | filters = bm.get_filters(dataset=dataset) # Gene Name(s) [e.g. MT-TF]
164 | ## query results
165 |
166 | results = bm.query(
167 | dataset=dataset,
168 | attributes=[
169 | "ensembl_gene_id",
170 | "external_gene_name",
171 | "namespace_1003",
172 | "name_1006",
173 | ],
174 | filters={"external_gene_name": gene_list},
175 | )
176 | results = results.dropna()
177 | df_list = []
178 | for term in go_term_keywards:
179 | tmp_genes = list(
180 | set(
181 | results[results["name_1006"].apply(lambda x: term == x)][
182 | "external_gene_name"
183 | ]
184 | )
185 | )
186 | df_tmp = pd.DataFrame({"gene": tmp_genes})
187 | df_tmp["annotation"] = term
188 | df_list.append(df_tmp)
189 | df_anno = pd.concat(df_list, ignore_index=True)
190 | return results, df_anno
191 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = scvelo
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SPHINXPROJ=scvelo
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | echo.installed, then set the SPHINXBUILD environment variable to point
21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | echo.may add the Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.4
2 | scipy>=1.5.4
3 | scikit-learn>=0.23.2
4 | scanpy>=1.6.0
5 | pandas>=1.1.4
6 | statsmodels==0.13.2
7 | plotnine>=0.7.1
8 | matplotlib>=3.3.3
9 | fastcluster>=1.1.26 # used to generate the clustered heat map of barcodes
10 | anndata>=0.7.5
11 | numba>=0.52.0 # related to issues of GPUipatch error
12 | scikit-misc>=0.1.3 # used for loess smoothing
13 | leidenalg>=0.7.0
14 | ete3>=3.1.2
15 | ipywidgets
16 |
17 | # Just until rtd.org understands pyproject.toml
18 | setuptools
19 | setuptools_scm
20 | typing_extensions
21 | importlib_metadata
22 | sphinx_rtd_theme>=0.3
23 | sphinx_autodoc_typehints<=1.6
24 | Jinja2<3.1
25 |
26 | # converting notebooks to html
27 | ipykernel
28 | sphinx==3.5.4
29 | nbsphinx==0.8.0
30 |
--------------------------------------------------------------------------------
/docs/source/_ext/edit_on_github.py:
--------------------------------------------------------------------------------
1 | """
2 | Loosely based on gist.github.com/MantasVaitkunas/7c16de233812adcb7028
3 | """
4 |
5 | import os
6 | import warnings
7 |
8 | __licence__ = "BSD (3 clause)"
9 |
10 |
11 | def get_github_repo(app, path):
12 | if path.endswith(".ipynb"):
13 | return app.config.github_nb_repo, "/"
14 | return app.config.github_repo, "/docs/source/"
15 |
16 |
17 | def html_page_context(app, pagename, templatename, context, doctree):
18 | if templatename != "page.html":
19 | return
20 |
21 | if not app.config.github_repo:
22 | warnings.warn("`github_repo `not specified")
23 | return
24 |
25 | if not app.config.github_nb_repo:
26 | nb_repo = f"{app.config.github_repo}/docs/source"
27 | warnings.warn("`github_nb_repo `not specified. Setting to `{nb_repo}`")
28 | app.config.github_nb_repo = nb_repo
29 |
30 | path = os.path.relpath(doctree.get("source"), app.builder.srcdir)
31 | repo, conf_py_path = get_github_repo(app, path)
32 |
33 | # For sphinx_rtd_theme.
34 | context["display_github"] = True
35 | context["github_user"] = "ShouWenWang-Lab"
36 | context["github_version"] = "master"
37 | context["github_repo"] = repo
38 | context["conf_py_path"] = conf_py_path
39 |
40 |
41 | def setup(app):
42 | app.add_config_value("github_nb_repo", "", False)
43 | app.add_config_value("github_repo", "", True)
44 | app.connect("html-page-context", html_page_context)
45 |
--------------------------------------------------------------------------------
/docs/source/_static/colab-badge.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* ReadTheDocs theme colors */
2 |
3 | .wy-nav-top { background-color: #404040 }
4 | .wy-nav-content { max-width: 950px }
5 | .wy-side-nav-search { background-color: transparent }
6 | .wy-side-nav-search input[type="text"] { border-width: 0 }
7 |
8 |
9 | /* Custom classes */
10 | .small { font-size:40% }
11 | .smaller, .pr { font-size:70% }
12 |
13 |
14 | /* Custom classes with bootstrap buttons */
15 |
16 | .tutorial,
17 | .tutorial:visited,
18 | .tutorial:hover
19 | {
20 | /* text-decoration: underline; */
21 | font-weight: bold;
22 | padding: 2px 5px;
23 | white-space: nowrap;
24 | max-width: 100%;
25 | background: #EF3270;
26 | border: solid 1px #EF3270;
27 | border-radius: .25rem;
28 | font-size: 75%;
29 | /* font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace; */
30 | color: #404040;
31 | overflow-x: auto;
32 | box-sizing: border-box;
33 | }
34 |
35 |
36 | /* Formatting of RTD markup: rubrics and sidebars and admonitions */
37 |
38 | /* rubric */
39 | .rst-content p.rubric {
40 | margin-bottom: 6px;
41 | font-weight: normal;
42 | }
43 | .rst-content p.rubric::after { content: ":" }
44 |
45 | /* sidebar */
46 | .rst-content .sidebar {
47 | /* margin: 0px 0px 0px 12px; */
48 | padding-bottom: 0px;
49 | }
50 | .rst-content .sidebar p {
51 | margin-bottom: 12px;
52 | }
53 | .rst-content .sidebar p,
54 | .rst-content .sidebar ul,
55 | .rst-content .sidebar dl {
56 | font-size: 13px;
57 | }
58 |
59 | /* less space after bullet lists in admonitions like warnings and notes */
60 | .rst-content .section .admonition ul {
61 | margin-bottom: 6px;
62 | }
63 |
64 |
65 | /* Code: literals and links */
66 |
67 | .rst-content tt.literal,
68 | .rst-content code.literal {
69 | color: #404040;
70 | }
71 | /* slim font weight for non-link code */
72 | .rst-content tt:not(.xref),
73 | .rst-content code:not(.xref),
74 | .rst-content *:not(a) > tt.xref,
75 | .rst-content *:not(a) > code.xref,
76 | .rst-content a > tt.xref,
77 | .rst-content a > code.xref,
78 | .rst-content dl:not(.docutils) a > tt.xref,
79 |
80 |
81 | /* Just one box for annotation code for a less noisy look */
82 |
83 | .rst-content .annotation {
84 | padding: 2px 5px;
85 | background-color: white;
86 | border: 1px solid #e1e4e5;
87 | }
88 | .rst-content .annotation tt,
89 | .rst-content .annotation code {
90 | padding: 0 0;
91 | background-color: transparent;
92 | border: 0 solid transparent;
93 | }
94 |
95 |
96 | /* Parameter lists */
97 |
98 | .rst-content dl:not(.docutils) dl dt {
99 | /* mimick numpydoc’s blockquote style */
100 | font-weight: normal;
101 | background: none transparent;
102 | border-left: none;
103 | margin: 0 0 12px;
104 | padding: 3px 0 0;
105 | font-size: 100%;
106 | }
107 |
108 | .rst-content dl:not(.docutils) dl dt code {
109 | font-size: 100%;
110 | font-weight: normal;
111 | background: none transparent;
112 | border: none;
113 | padding: 0 2px;
114 | }
115 |
116 | .rst-content dl:not(.docutils) dl dt a.reference>code {
117 | text-decoration: underline;
118 | }
119 |
120 | /* Mimick rubric style used for other headings */
121 | .rst-content dl:not(.docutils) dl > dt {
122 | font-weight: bold;
123 | background: none transparent;
124 | border-left: none;
125 | margin: 0 0 12px;
126 | padding: 3px 0 0;
127 | font-size: 100%;
128 | }
129 | /* Parameters contain parts and don’t need bold font */
130 | .rst-content dl.field-list dl > dt { font-weight: unset }
131 | /* Add colon between return tuple element name and type */
132 | .rst-content dl:not(.docutils) dl > dt .classifier::before { content: ' : ' }
133 |
134 | /* Function headers */
135 |
136 | .rst-content dl:not(.docutils) dt {
137 | background: #edf0f2;
138 | color: #404040;
139 | border-top: solid 3px #343131;
140 | }
141 |
142 | .rst-content .section ul li p:last-child {
143 | margin-bottom: 0;
144 | margin-top: 0;
145 | }
146 |
147 | /* Adjust width of navigation bar on mobile */
148 | @media screen and (max-width: 768px) {
149 | .header-bar {
150 | display: none;
151 | }
152 |
153 | .wy-nav-content-wrap {
154 | margin-left: 0px;
155 | }
156 |
157 | .wy-nav-side {
158 | width: 300px;
159 | }
160 |
161 | .wy-nav-side.shift {
162 | max-width: 320px;
163 | }
164 |
165 | /* Fix sidebar adjust */
166 | .rst-versions {
167 | width: 40%;
168 | max-width: 320px;
169 | }
170 | }
171 |
172 | /* Handle landscape */
173 | @media screen and (min-width: 377px) {
174 | .wy-nav-content-wrap.shift {
175 | left: 320px;
176 | }
177 | }
178 |
179 | /* make height responsive for notebook figures */
180 | .rst-content .image-reference img {
181 | max-width: 100% !important;
182 | height: auto !important;
183 | }
184 |
--------------------------------------------------------------------------------
/docs/source/_static/nbviewer-badge.svg:
--------------------------------------------------------------------------------
1 |
2 |
18 |
20 |
21 |
23 | image/svg+xml
24 |
26 |
27 |
28 |
29 |
30 |
32 |
35 |
38 |
41 |
44 |
47 |
50 |
53 |
56 |
59 |
62 |
65 |
68 |
69 |
89 |
93 |
98 |
102 |
103 |
105 |
111 |
112 |
115 |
119 |
123 |
127 |
128 |
133 |
141 |
148 |
156 | Open in nbviewer
159 |
160 |
167 | Open in nbviewer
172 |
173 |
183 |
184 |
188 |
193 |
197 |
201 |
205 |
210 |
214 |
223 |
224 |
225 |
230 |
234 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/base.rst:
--------------------------------------------------------------------------------
1 | :github_url: {{ fullname | modurl }}
2 |
3 | {{ fullname | api_image }}
4 |
5 | {% extends "!autosummary/base.rst" %}
6 |
7 | .. http://www.sphinx-doc.org/en/stable/ext/autosummary.html#customizing-templates
8 |
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
1 | :github_url: {{ fullname | modurl }}
2 |
3 | {{ fullname | escape | underline}}
4 |
5 | .. currentmodule:: {{ module }}
6 |
7 | .. add toctree option to make autodoc generate the pages
8 |
9 | .. autoclass:: {{ objname }}
10 |
11 | {% block attributes %}
12 | {% if attributes %}
13 | .. rubric:: Attributes
14 |
15 | .. autosummary::
16 | :toctree: .
17 | {% for item in attributes %}
18 | ~{{ fullname }}.{{ item }}
19 | {%- endfor %}
20 | {% endif %}
21 | {% endblock %}
22 |
23 | {% block methods %}
24 | {% if methods %}
25 | .. rubric:: Methods
26 |
27 | .. autosummary::
28 | :toctree: .
29 | {% for item in methods %}
30 | {%- if item != '__init__' %}
31 | ~{{ fullname }}.{{ item }}
32 | {%- endif -%}
33 | {%- endfor %}
34 | {% endif %}
35 | {% endblock %}
36 |
--------------------------------------------------------------------------------
/docs/source/about.rst:
--------------------------------------------------------------------------------
1 | About CoSpar
2 | ------------
3 |
4 | The following information is adapted from `Wang et al. Nat. Biotech. (2022) `_.
5 | High-throughput single-cell measurements have enabled unbiased studies of development and differentiation, leading to numerous methods for dynamic inference. However, single-cell RNA sequencing (scRNA-seq) data alone does not fully constrain the differentiation dynamics, and existing methods inevitably operate under simplified assumptions. In parallel, the lineage information of individual cells can be profiled simultaneously along with their transcriptome by using a heritable and expressible DNA barcode as a lineage tracer (we call lineage-tracing scRNAseq, or LT-scSeq). The barcode may remain static or evolve over time.
6 |
7 |
8 | However, the lineage data could be challenging to analyze. These challenges include stochastic differentiation and variable expansion of clones; cells loss during analysis; barcode homoplasy wherein cells acquire the same barcode despite not having a lineage relationship; access to clones only at a single time point; and clonal dispersion due to a lag time between labeling cells and the first sampling (the lag time is necessary to allow the clone to grow large for resampling).
9 |
10 |
11 | CoSpar, developed by `Wang et al. Nat. Biotech. (2022) `_, is among the first tools to perform dynamic inference by integrating state and lineage information. It solves for the transition probability map from cell states at an earlier time point to states at a later time point. It achieves accuracy and robustness by learning a sparse and coherent transition map, where neighboring initial states share similar yet sparse fate outcomes. Built upon the finite-time transition map, CoSpar can 1) infer fate potential of early states; 2) detect early fate bias (thus, fate boundary) among a heterogeneous progenitor population; 3) identify putative driver genes for fate bifurcation; 4) infer fate coupling or hierarchy; 5) visualize gene expression dynamics along an inferred differential trajectory. CoSpar also provides several methods to analyze clonal data by itself, including the clonal coupling between fate clusters and the bias of a clone towards a given fate, etc. We envision CoSpar to be a platform to integrate key methods needed to analyze data with both state and lineage information.
12 |
13 | .. image:: https://user-images.githubusercontent.com/4595786/113746452-56e4cb00-96d4-11eb-8278-0aac0469ba9d.png
14 | :width: 1000px
15 | :align: center
16 | (copy right: Nature Biotechnology)
17 |
18 | Coherent sparse optimization
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 |
21 | One formalization of dynamic inference is to identify a transition map, a matrix :math:`T_{ij} (t_1,t_2)`, which describes the probability of a cell, initially in some state :math:`i` at time :math:`t_1`, giving rise to progeny in a state :math:`j` at time :math:`t_2`. We define :math:`T_{ij} (t_1,t_2)` specifically as the fraction of progeny from state :math:`i` that occupy state :math:`j`. This transition matrix averages the effects of cell division, loss, and differentiation, but it nonetheless proves useful for several applications.
22 |
23 |
24 | We make two reasonable assumptions about the nature of biological dynamics to constrain inference of the transition map. We assume the map to be a sparse matrix, since most cells can access just a few states during an experiment. And we assume the map to be locally coherent, meaning that cells in similar states should share similar fate outcomes. These constraints together force transition maps to be parsimonious and smooth, which we reasoned would make them robust to practical sources of noise in LT-scSeq experiments. As inputs, CoSpar requires a barcode-by-cell matrix :math:`I(t)`` that encodes the clonal information at time :math:`t`, and a data matrix for observed cell states (e.g. from scRNA-seq). Clonal data may have nested structure reflecting subclonal labeling.
25 |
26 | CoSpar is formulated assuming that we have information on the same clones at more than one time point. More often, one might observe clones at only a later time point :math:`t_2`. For these cases inference is not fully constrained, one must learn both the transition map T and the initial clonal data :math:`I(t_1)`. We approximate a solution additionally constrained by a minimum global transport cost. We show that this approach is robust to initialization in tested datasets. Finally, coherence and sparsity provide reasonable constraints to the simpler problem of predicting dynamics from state heterogeneity alone without lineage data. We extended CoSpar to this case. Thus, CoSpar is flexible to different experimental designs, as summarized by the above figure. Our core algorithms are illustrated below.
27 |
28 |
29 | .. image:: https://user-images.githubusercontent.com/4595786/113746670-93b0c200-96d4-11eb-89c0-d1e7d72383e7.png
30 | :width: 1000px
31 | :align: center
32 | (copy right: Nature Biotechnology)
33 |
34 | Below, we formalize the coherent sparse optimization by which CoSpar infers the transition map.
35 |
36 | In a model of stochastic differentiation, cells in a clone are distributed across states with a time-dependent density vector :math:`\vec{P}(t)`. A transition map :math:`T` directly links clonal density profiles :math:`\vec{P}(t_{1,2})` between time points:
37 |
38 | .. math::
39 | \begin{equation}
40 | P_i(t_2 )= \sum_j P_j(t_1 )T_{ji}(t_1,t_2), \quad \quad \quad \text{Eq. (1)}
41 | \end{equation}
42 |
43 | From multiple clonal observations, our goal is to learn :math:`T`. To do so, we consider each observed cell transcriptome as a distinct state (:math:`\vec{P}(t)\in R^{N_t}`) for :math:`N_t`` cells profiled at time :math:`t``), and introduce :math:`S(t)\in R^{N_t\times N_t}` as a matrix of cell-cell similarity over all observed cell states, including those lacking clonal information. Denoting :math:`I(t)\in \{0,1\}^{M\times N_t}` as a clone-by-cell matrix of :math:`M` clonal barcodes, the density profiles of observed clones :math:`P(t)\in R^{M\times N_t}` are estimated as :math:`P(t)\approx I(t)S(t)`. In matrix form, the constraint in Eq. (1) from all observed clones then becomes :math:`P(t_2)\approx P(t_1)T(t_1,t_2)`.
44 |
45 |
46 | Since the matrices :math:`P(t_{1,2})` are determined directly from data, with enough information :math:`T(t_1,t_2)` could be learnt by matrix inversion. However, in most cases, the number of clones is far less than the number of states. To constrain the map, we require that: 1) :math:`T` is a sparse matrix; 2) :math:`T` is locally coherent; and 3) :math:`T` is a non-negative matrix. With these requirements, the inference becomes an optimization problem:
47 |
48 | .. math::
49 | \begin{equation}
50 | \min_{T} ||T||_1+\alpha ||LT||_2, \; \text{s.t.} \; ||P(t_2)- P(t_1) T(t_1,t_2)||_{2}\le\epsilon;\; T\ge 0; \text{Normalization}.
51 | \end{equation}
52 |
53 | Here, :math:`‖T‖_1` quantifies the sparsity of the matrix T through its l-1 norm, while :math:`‖LT‖_2` quantifies the local coherence of :math:`T` (:math:`L` is the Laplacian of the cell state similarity graph, and :math:`LT` is the local divergence). The remaining constraints enforce the observed clonal dynamics, non-negativity of :math:`T`, and map normalization, respectively. At :math:`\alpha=0`, the minimization takes the form of Lasso, an algorithm for compressed sensing. Our formulation extends compressed sensing from vectors to matrices, and to enforce local coherence. The local coherence extension is reminiscent of the fused Lasso problem.
54 | An iterative, heuristic approach solves the CoSpar optimization efficiently, replacing :math:`(\alpha,\epsilon)` with parameters that explicitly control coherence and sparsity. See `Wang et al. Nat. Biotech. (2022) `_ for a detailed exposition of the method and its implementation.
55 |
--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: cospar
2 |
3 | API
4 | ===
5 |
6 | .. include::
7 |
8 | Import CoSpar as::
9 |
10 | import cospar as cs
11 |
12 |
13 | CoSpar is built around the :class:`~anndata.AnnData` object (usually called `adata`). For each cell, we store its RNA count matrix at ``adata.X``, the gene names at ``adata.var_names``,time information at ``adata.obs['time_info']``, state annotation at ``adata.obs['state_info']``, clonal information at ``adata.obsm['X_clone']``, and 2-d embedding at ``adata.obsm['X_emb']``.
14 |
15 |
16 | Once the :class:`~anndata.AnnData` object is initialized via :func:`cs.pp.initialize_adata_object`, the typical flow of analysis is to 1) perform preprocessing and dimension reduction (``cs.pp.*``); 2) visualize and analyze clonal data alone (``cs.pl.*``); 3) infer transition map (``cs.tmap.*``); and 4) analyze inferred map (``cs.tl.*``) and then visualize the results with the plotting functions (``cs.pl.*``). Typically, each ``cs.tl.*`` function has a corresponding ``cs.pl.*`` function. We also provide several built-in datasets (``cs.datasets.*``) and miscellaneous functions to assist with the analysis (``cs.hf.*``). See :doc:`tutorial ` for details.
17 |
18 |
19 |
20 | Preprocessing
21 | -------------
22 |
23 | .. autosummary::
24 | :toctree: .
25 |
26 | pp.initialize_adata_object
27 | pp.get_highly_variable_genes
28 | pp.remove_cell_cycle_correlated_genes
29 | pp.get_X_pca
30 | pp.get_X_emb
31 | pp.get_X_clone
32 | pp.get_state_info
33 | pp.refine_state_info_by_marker_genes
34 | pp.refine_state_info_by_leiden_clustering
35 |
36 |
37 |
38 |
39 | Transition map inference
40 | ------------------------
41 |
42 |
43 | .. autosummary::
44 | :toctree: .
45 |
46 | tmap.infer_Tmap_from_multitime_clones
47 | tmap.infer_Tmap_from_one_time_clones
48 | tmap.infer_Tmap_from_state_info_alone
49 | tmap.infer_Tmap_from_clonal_info_alone
50 |
51 |
52 | Analysis
53 | ----------
54 |
55 | .. autosummary::
56 | :toctree: .
57 |
58 | tl.clonal_fate_bias
59 | tl.fate_biased_clones
60 | tl.fate_coupling
61 | tl.fate_hierarchy
62 | tl.fate_map
63 | tl.fate_potency
64 | tl.fate_bias
65 | tl.progenitor
66 | tl.iterative_differentiation
67 | tl.differential_genes
68 |
69 |
70 | Plotting
71 | ---------
72 |
73 |
74 | **Clone analysis** (clone visualization, clustering etc.)
75 |
76 | .. autosummary::
77 | :toctree: .
78 |
79 | pl.clones_on_manifold
80 | pl.barcode_heatmap
81 | pl.clonal_fate_bias
82 | pl.fate_coupling
83 | pl.fate_hierarchy
84 | pl.clonal_fates_across_time
85 | pl.clonal_reports
86 | pl.visualize_tree
87 |
88 |
89 |
90 | **Transition map analysis** (fate bias etc.)
91 |
92 | .. autosummary::
93 | :toctree: .
94 |
95 | pl.single_cell_transition
96 | pl.fate_map
97 | pl.fate_potency
98 | pl.fate_bias
99 | pl.progenitor
100 | pl.iterative_differentiation
101 | pl.gene_expression_dynamics
102 | pl.fate_coupling
103 | pl.fate_hierarchy
104 |
105 | **General**
106 |
107 | .. autosummary::
108 | :toctree: .
109 |
110 | pl.embedding
111 | pl.embedding_genes
112 | pl.gene_expression_on_manifold
113 | pl.gene_expression_heatmap
114 | settings.set_figure_params
115 |
116 |
117 | Datasets
118 | --------
119 |
120 | .. autosummary::
121 | :toctree: .
122 |
123 | datasets.hematopoiesis
124 | datasets.hematopoiesis_130K
125 | datasets.hematopoiesis_subsampled
126 | datasets.hematopoiesis_Gata1_states
127 | datasets.lung
128 | datasets.reprogramming
129 | datasets.reprogramming_Day0_3_28
130 | datasets.synthetic_bifurcation
131 |
132 | Help functions
133 | --------------
134 |
135 | .. autosummary::
136 | :toctree: .
137 |
138 | hf.read
139 | hf.save_map
140 | hf.save_preprocessed_adata
141 | hf.check_adata_structure
142 | hf.check_available_choices
143 | hf.update_time_ordering
144 | hf.update_data_description
145 | tl.get_normalized_covariance
146 | hf.get_X_clone_with_reference_ordering
147 |
148 |
149 | Simulations
150 | -----------
151 |
152 | .. autosummary::
153 | :toctree: .
154 |
155 | simulate.linear_differentiation_model
156 | simulate.bifurcation_model
157 | simulate.quantify_correlation_with_ground_truth_fate_bias_BifurcationModel
158 | simulate.quantify_transition_peak_TPR_LinearDifferentiation
159 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import logging
3 | import os
4 | import sys
5 | from datetime import datetime
6 | from pathlib import Path
7 | from typing import Mapping, Optional, Union
8 |
9 | from sphinx.application import Sphinx
10 | from sphinx.ext import autosummary
11 |
12 | # remove PyCharm’s old six module
13 | if "six" in sys.modules:
14 | print(*sys.path, sep="\n")
15 | for pypath in list(sys.path):
16 | if any(p in pypath for p in ["PyCharm", "pycharm"]) and "helpers" in pypath:
17 | sys.path.remove(pypath)
18 | del sys.modules["six"]
19 |
20 | import matplotlib # noqa
21 |
22 | matplotlib.use("agg")
23 |
24 | HERE = Path(__file__).parent
25 | sys.path.insert(0, f"{HERE.parent.parent}")
26 | sys.path.insert(0, os.path.abspath("_ext"))
27 | import cospar
28 |
29 | logger = logging.getLogger(__name__)
30 |
31 |
32 | # -- General configuration ------------------------------------------------
33 |
34 | needs_sphinx = "1.7"
35 |
36 | extensions = [
37 | "sphinx.ext.autodoc",
38 | "sphinx.ext.doctest",
39 | "sphinx.ext.coverage",
40 | "sphinx.ext.mathjax",
41 | "sphinx.ext.autosummary",
42 | "sphinx.ext.napoleon",
43 | "sphinx.ext.intersphinx",
44 | "sphinx.ext.githubpages",
45 | "sphinx_autodoc_typehints",
46 | "nbsphinx",
47 | "edit_on_github",
48 | ]
49 |
50 |
51 | # Generate the API documentation when building
52 | autosummary_generate = True
53 | napoleon_google_docstring = False
54 | napoleon_numpy_docstring = True
55 | napoleon_include_init_with_doc = False
56 | napoleon_use_rtype = False
57 | napoleon_custom_sections = [("Params", "Parameters")]
58 |
59 | intersphinx_mapping = dict(
60 | python=("https://docs.python.org/3", None),
61 | anndata=("https://anndata.readthedocs.io/en/latest/", None),
62 | scanpy=("https://scanpy.readthedocs.io/en/latest/", None),
63 | cospar=("https://cospar.readthedocs.io/en/latest/", None),
64 | cellrank=("https://cellrank.readthedocs.io/en/latest/", None),
65 | )
66 |
67 | templates_path = ["_templates"]
68 | source_suffix = [".rst", ".ipynb"]
69 | master_doc = "index"
70 |
71 | # General information about the project.
72 | project = "CoSpar"
73 | author = "Shou-Wen Wang"
74 | title = "CoSpar - dynamic inference by integrating state and lineage information"
75 | copyright = f"{datetime.now():%Y}, {author}"
76 |
77 | version = cospar.__version__.replace(".dirty", "")
78 | release = version
79 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
80 | pygments_style = "sphinx"
81 | todo_include_todos = False
82 |
83 | # # Add notebooks prolog to Google Colab and nbviewer
84 | # nbsphinx_prolog = r"""
85 | # {% set docname = 'github/theislab/cospar_notebooks/blob/master/' + env.doc2path(env.docname, base=None) %}
86 | # .. raw:: html
87 |
88 | #
94 | # """
95 |
96 | # -- Options for HTML output ----------------------------------------------
97 |
98 | html_theme = "sphinx_rtd_theme"
99 | html_theme_options = dict(navigation_depth=1, titles_only=True)
100 | github_repo = "cospar"
101 | github_nb_repo = "cospar_notebooks"
102 | html_static_path = ["_static"]
103 |
104 |
105 | def setup(app):
106 | app.add_stylesheet("custom.css")
107 |
108 |
109 | # -- Options for other output ------------------------------------------
110 |
111 | htmlhelp_basename = "cospardoc"
112 | title_doc = f"{project} documentation"
113 |
114 | latex_documents = [(master_doc, f"{project}.tex", title_doc, author, "manual")]
115 | man_pages = [(master_doc, project, title_doc, [author], 1)]
116 | texinfo_documents = [
117 | (master_doc, project, title_doc, author, project, title, "Miscellaneous")
118 | ]
119 |
120 |
121 | # -- generate_options override ------------------------------------------
122 |
123 |
124 | def process_generate_options(app: Sphinx):
125 | genfiles = app.config.autosummary_generate
126 |
127 | if genfiles and not hasattr(genfiles, "__len__"):
128 | env = app.builder.env
129 | genfiles = [
130 | env.doc2path(x, base=None)
131 | for x in env.found_docs
132 | if Path(env.doc2path(x)).is_file()
133 | ]
134 | if not genfiles:
135 | return
136 |
137 | from sphinx.ext.autosummary.generate import generate_autosummary_docs
138 |
139 | ext = app.config.source_suffix
140 | genfiles = [
141 | genfile + (not genfile.endswith(tuple(ext)) and ext[0] or "")
142 | for genfile in genfiles
143 | ]
144 |
145 | suffix = autosummary.get_rst_suffix(app)
146 | if suffix is None:
147 | return
148 |
149 | generate_autosummary_docs(
150 | genfiles,
151 | builder=app.builder,
152 | warn=logger.warning,
153 | info=logger.info,
154 | suffix=suffix,
155 | base_path=app.srcdir,
156 | imported_members=True,
157 | app=app,
158 | )
159 |
160 |
161 | autosummary.process_generate_options = process_generate_options
162 |
163 |
164 | # -- GitHub URLs for class and method pages ------------------------------------------
165 |
166 |
167 | def get_obj_module(qualname):
168 | """Get a module/class/attribute and its original module by qualname"""
169 | modname = qualname
170 | classname = None
171 | attrname = None
172 | while modname not in sys.modules:
173 | attrname = classname
174 | modname, classname = modname.rsplit(".", 1)
175 |
176 | # retrieve object and find original module name
177 | if classname:
178 | cls = getattr(sys.modules[modname], classname)
179 | modname = cls.__module__
180 | obj = getattr(cls, attrname) if attrname else cls
181 | else:
182 | obj = None
183 |
184 | return obj, sys.modules[modname]
185 |
186 |
187 | def get_linenos(obj):
188 | """Get an object’s line numbers"""
189 | try:
190 | lines, start = inspect.getsourcelines(obj)
191 | except TypeError:
192 | return None, None
193 | else:
194 | return start, start + len(lines) - 1
195 |
196 |
197 | # set project_dir: project/docs/source/conf.py/../../.. → project/
198 | project_dir = Path(__file__).parent.parent.parent
199 | github_url_cospar = "https://github.com/ShouWenWang-Lab/cospar/tree/master"
200 | # github_url_read_loom = "https://github.com/theislab/anndata/tree/master/anndata"
201 | github_url_read = "https://github.com/theislab/scanpy/tree/master"
202 | github_url_scanpy = "https://github.com/theislab/scanpy/tree/master/scanpy"
203 | from pathlib import PurePosixPath
204 |
205 |
206 | def modurl(qualname):
207 | """Get the full GitHub URL for some object’s qualname."""
208 | obj, module = get_obj_module(qualname)
209 | github_url = github_url_cospar
210 | try:
211 | path = PurePosixPath(Path(module.__file__).resolve().relative_to(project_dir))
212 | except ValueError:
213 | # trying to document something from another package
214 | github_url = (
215 | github_url_read_loom
216 | if "read_loom" in qualname
217 | else github_url_read
218 | if "read" in qualname
219 | else github_url_scanpy
220 | )
221 | path = "/".join(module.__file__.split("/")[-2:])
222 | start, end = get_linenos(obj)
223 | fragment = f"#L{start}-L{end}" if start and end else ""
224 | return f"{github_url}/{path}{fragment}"
225 |
226 |
227 | def api_image(qualname: str) -> Optional[str]:
228 | path = Path(__file__).parent / f"{qualname}.png"
229 | print(path, path.is_file())
230 | return (
231 | f".. image:: {path.name}\n :width: 200\n :align: right"
232 | if path.is_file()
233 | else ""
234 | )
235 |
236 |
237 | # modify the default filters
238 | from jinja2.defaults import DEFAULT_FILTERS
239 |
240 | DEFAULT_FILTERS.update(modurl=modurl, api_image=api_image)
241 |
242 | # -- Override some classnames in autodoc --------------------------------------------
243 |
244 | import sphinx_autodoc_typehints
245 |
246 | qualname_overrides = {
247 | "anndata.base.AnnData": "anndata.AnnData",
248 | "cospar.pl.scatter": "cospar.plotting.scatter",
249 | }
250 |
251 | fa_orig = sphinx_autodoc_typehints.format_annotation
252 |
253 |
254 | def format_annotation(annotation):
255 | if getattr(annotation, "__origin__", None) is Union or hasattr(
256 | annotation, "__union_params__"
257 | ):
258 | params = getattr(annotation, "__union_params__", None) or getattr(
259 | annotation, "__args__", None
260 | )
261 | return ", ".join(map(format_annotation, params))
262 | if getattr(annotation, "__origin__", None) is Mapping:
263 | return ":class:`~typing.Mapping`"
264 | if inspect.isclass(annotation):
265 | full_name = f"{annotation.__module__}.{annotation.__qualname__}"
266 | override = qualname_overrides.get(full_name)
267 | if override is not None:
268 | return f":py:class:`~{qualname_overrides[full_name]}`"
269 | return fa_orig(annotation)
270 |
271 |
272 | sphinx_autodoc_typehints.format_annotation = format_annotation
273 |
274 |
275 | # -- Prettier Param docs --------------------------------------------
276 |
277 | from typing import Dict, List, Tuple
278 |
279 | from docutils import nodes
280 | from sphinx import addnodes
281 | from sphinx.domains.python import PyObject, PyTypedField
282 | from sphinx.environment import BuildEnvironment
283 |
284 |
285 | class PrettyTypedField(PyTypedField):
286 | list_type = nodes.definition_list
287 |
288 | def make_field(
289 | self,
290 | types: Dict[str, List[nodes.Node]],
291 | domain: str,
292 | items: Tuple[str, List[nodes.inline]],
293 | env: BuildEnvironment = None,
294 | ) -> nodes.field:
295 | def makerefs(rolename, name, node):
296 | return self.make_xrefs(rolename, domain, name, node, env=env)
297 |
298 | def handle_item(
299 | fieldarg: str, content: List[nodes.inline]
300 | ) -> nodes.definition_list_item:
301 | head = nodes.term()
302 | head += makerefs(self.rolename, fieldarg, addnodes.literal_strong)
303 | fieldtype = types.pop(fieldarg, None)
304 | if fieldtype is not None:
305 | head += nodes.Text(" : ")
306 | if len(fieldtype) == 1 and isinstance(fieldtype[0], nodes.Text):
307 | (text_node,) = fieldtype # type: nodes.Text
308 | head += makerefs(
309 | self.typerolename, text_node.astext(), addnodes.literal_emphasis
310 | )
311 | else:
312 | head += fieldtype
313 |
314 | body_content = nodes.paragraph("", "", *content)
315 | body = nodes.definition("", body_content)
316 |
317 | return nodes.definition_list_item("", head, body)
318 |
319 | fieldname = nodes.field_name("", self.label)
320 | if len(items) == 1 and self.can_collapse:
321 | fieldarg, content = items[0]
322 | bodynode = handle_item(fieldarg, content)
323 | else:
324 | bodynode = self.list_type()
325 | for fieldarg, content in items:
326 | bodynode += handle_item(fieldarg, content)
327 | fieldbody = nodes.field_body("", bodynode)
328 | return nodes.field("", fieldname, fieldbody)
329 |
330 |
331 | # replace matching field types with ours
332 | PyObject.doc_field_types = [
333 | PrettyTypedField(
334 | ft.name,
335 | names=ft.names,
336 | typenames=ft.typenames,
337 | label=ft.label,
338 | rolename=ft.rolename,
339 | typerolename=ft.typerolename,
340 | can_collapse=ft.can_collapse,
341 | )
342 | if isinstance(ft, PyTypedField)
343 | else ft
344 | for ft in PyObject.doc_field_types
345 | ]
346 |
--------------------------------------------------------------------------------
/docs/source/getting_started.rst:
--------------------------------------------------------------------------------
1 | Getting Started
2 | ---------------
3 |
4 | Here, we explain the basics of using CoSpar. CoSpar requires the count matrix ``not log-transformed``. This is specifically assumed in selecting highly variable genes, in computing PCA, and in the HighVar method for initializing the joint optimization using a single clonal time point. CoSpar also assumes that the dataset has more than one time point. However, if you have only a snapshot, you can still manually cluster the cells into more than one time point to use CoSpar.
5 |
6 | First, import CoSpar with::
7 |
8 | import cospar as cs
9 |
10 | For better visualization you can change the matplotlib settings to our defaults with::
11 |
12 | cs.settings.set_figure_params()
13 |
14 | If you want to adjust parameters for a particular plot, just pass the parameters into this function.
15 |
16 |
17 | The workflow of CoSpar is summarized by the following illustration:
18 |
19 |
20 | .. image:: https://user-images.githubusercontent.com/4595786/145308761-a6532c6b-ac5b-4457-a00e-4a0f3972a360.png
21 | :width: 1000px
22 | :align: center
23 |
24 | Also, below is a summary of the main analyses after we infer the transition map, and its connection with the mathematical formulation in `Wang et al. Nat. Biotech. (2022) `_.
25 |
26 | .. image:: https://user-images.githubusercontent.com/4595786/161853386-04126382-6a9a-4817-b6a8-e5e950977357.jpg
27 | :width: 1000px
28 | :align: center
29 |
30 | Initialization
31 | ''''''''''''''
32 | Given the gene expression matrix, clonal matrix, and other information, initialize the anndata object using::
33 |
34 | adata_orig = cs.pp.initialize_adata_object(adata=None,**params)
35 |
36 | The :class:`~anndata.AnnData` object ``adata_orig`` stores the count matrix (``adata_orig.X``), gene names (``adata_orig.var_names``), and temporal annotation of cells (``adata_orig.obs['time_info']``). Optionally, you can also provide the clonal matrix ``X_clone``, selected PCA matrix ``X_pca``, the embedding matrix ``X_emb``, and the state annotation ``state_info``, which will be stored at ``adata_orig.obsm['X_clone']``, ``adata_orig.obsm['X_pca']``, ``adata_orig.obsm['X_emb']``, and ``adata_orig.obs['state_info']``, respectively.
37 |
38 | If an adata object is provided as an input, the initialization function will try to automatically generate the correct data structure, and all annotations associated with the provided adata will remain intact. You can add new annotations to supplement or override existing annotations in the adata object.
39 |
40 |
41 | .. raw:: html
42 |
43 |
44 |
45 | If you do not have a dataset yet, you can still play around using one of the built-in datasets, e.g.::
46 |
47 | adata_orig = cs.datasets.hematopoiesis_subsampled()
48 |
49 |
50 |
51 | Preprocessing & dimension reduction
52 | '''''''''''''''''''''''''''''''''''
53 | Assuming basic quality control (excluding cells with low read count etc.) have been done, we provide basic preprocessing (gene selection and normalization) and dimension reduction related analysis (PCA, UMAP embedding etc.) at ``cs.pp.*``::
54 |
55 | cs.pp.get_highly_variable_genes(adata_orig,**params)
56 | cs.pp.remove_cell_cycle_correlated_genes(adata_orig,**params)
57 | cs.pp.get_X_pca(adata_orig,**params)
58 | cs.pp.get_X_emb(adata_orig,**params)
59 | cs.pp.get_state_info(adata_orig,**params)
60 | cs.pp.get_X_clone(adata_orig,**params)
61 |
62 | The first step ``get_highly_variable_genes`` also includes count matrix normalization. The second step, which is optional but recommended, removes cell cycle correlated genes among the selected highly variable genes. In ``get_X_pca``, we apply z-score transformation for each gene expression before computing the PCA. In ``get_X_emb``, we simply use the umap function from :mod:`~scanpy`. With ``get_state_info``, we extract state information using leiden clustering implemented in :mod:`~scanpy`.
63 | In ``get_X_clone``, we faciliate the conversion of the raw clonal data into a cell-by-clone matrix. As mentioned before, this preprocessing assumes that the count matrix is not log-transformed.
64 |
65 |
66 |
67 |
68 | Basic clonal analysis
69 | ''''''''''''''''''''''
70 | We provide a few plotting functions to help visually exploring the clonal data before any downstream analysis. You can visualize clones on state manifold directly::
71 |
72 | cs.pl.clones_on_manifold(adata_orig,**params)
73 |
74 | You can generate the barcode heatmap across given clusters to inspect clonal behavior::
75 |
76 | cs.pl.barcode_heatmap(adata_orig,**params)
77 |
78 | You can quantify the clonal coupling across different fate clusters::
79 |
80 | cs.tl.fate_coupling(adata_orig,source='X_clone',**params)
81 | cs.pl.fate_coupling(adata_orig,source='X_clone',**params)
82 |
83 | Strong coupling implies the existence of bi-potent or multi-potent cell states at the time of barcoding. You can visualize the fate hierarchy by a simple neighbor-joining method::
84 |
85 | cs.tl.fate_hierarchy(adata_orig,source='X_clone',**params)
86 | cs.pl.fate_hierarchy(adata_orig,source='X_clone',**params)
87 |
88 | Finally, you can infer the fate bias :math:`-log_{10}(P_{value})` of each clone towards a designated fate cluster::
89 |
90 | cs.pl.clonal_fate_bias(adata_orig,**params)
91 |
92 | A biased clone towards this cluster has a statistically significant cell fraction within or outside this cluster.
93 |
94 |
95 |
96 |
97 | Transition map inference
98 | ''''''''''''''''''''''''
99 | The core of the software is efficient and robust inference of a transition map by integrating state and clonal information. If the dataset has multiple clonal time points, you can run::
100 |
101 | adata=cs.tmap.infer_Tmap_from_multitime_clones(adata_orig,clonal_time_points=None,later_time_point=None,**params)
102 |
103 | It subsamples the input data at selected time points and computes the transition map, stored at ``adata.uns['transition_map']`` and ``adata.uns['intraclone_transition_map']``, with the latter restricted to intra-clone transitions. Depending on ``later_time_point``, it has two modes of inference:
104 |
105 | 1) When ``later_time_point=None``, it infers a transition map between neighboring time points. For example, for clonal_time_points=['day1', 'day2', 'day3'], it computes transitions for pairs ('day1', 'day2') and ('day2', 'day3'), but not for ('day1', 'day3').
106 |
107 | 2) If ``later_time_point`` is specified, it generates a transition map between this time point and each of the earlier time points. In the previous example, if ``later_time_point=='day3'``, we infer transitions for pairs ('day1', 'day3') and ('day2', 'day3'). This applies to the following map inference functions.
108 |
109 |
110 | -------------------------------------
111 |
112 | If the dataset has only one clonal time point, you can run::
113 |
114 | adata=cs.tmap.infer_Tmap_from_one_time_clones(adata_orig,initial_time_points=None, later_time_point=None,initialize_method='OT',**params)
115 |
116 | which jointly optimizes the transition map and the initial clonal structure. It requires initializing the transition map using state information alone. We provide two methods for such initialization: 1) ``OT`` for using the standard optimal transport approach; 2) ``HighVar`` for a customized approach, assuming that cells similar in gene expression across time points share clonal origin. For the ``OT`` method, if you wish to utilize the growth rate information as Waddington-OT, you can directly pass the growth rate estimate for each cell to the input AnnaData object at ``adata_orig.obs["cell_growth_rate"]``. Depending on the choice, the initialized map is stored at ``adata.uns['OT_transition_map']`` or ``adata.uns['HighVar_transition_map']``. The final product is stored at ``adata.uns['transition_map']``.
117 |
118 | ``HighVar`` converts highly variable genes into pseudo multi-time clones and infers a putative map with coherent sparse optimization. We find the ``HighVar`` method performs better than the `OT` method, especially when there are large differentiation effects over the observed time window, or batch effects.
119 |
120 | If ``initial_time_points`` and ``later_time_point`` are not specified, a map with transitions from all time points to the last time point is generated.
121 |
122 | -------------------------------------
123 |
124 | If you do not have any clonal information, you can still run::
125 |
126 | adata=cs.tmap.infer_Tmap_from_state_info_alone(adata_orig,initial_time_points=None,later_time_point=None,initialize_method='OT',**params)
127 |
128 | It is the same as ``cs.tmap.infer_Tmap_from_one_time_clones`` except that we assume a pseudo clonal data where each cell at the later time point occupies a unique clone.
129 |
130 | -------------------------------------
131 |
132 | We also provide simple methods that infer transition map from clonal information alone::
133 |
134 | adata=cs.tmap.infer_Tmap_from_clonal_info_alone(adata_orig,clonal_time_points=None,later_time_point=None,**params)
135 |
136 | The result is stored at ``adata.uns['clonal_transition_map']``.
137 |
138 | Analysis and visualization
139 | ''''''''''''''''''''''''''
140 |
141 | Finally, each of the computed transition maps can be explored on state embedding at the single-cell level using a variety of analysis and plotting functions. There are some common parameters: 1) ``source``, for choosing one of the pre-computed transition maps (or the raw clonal data) for analysis; 2) ``selected_fates``, for visualizing the fate bias towards/against given fate clusters; 3) ``map_backward``, for analyzing forward or backward transitions; 4) ``method``, for different methods in fate probability analysis. See :doc:`CoSpar basics <20210121_cospar_tutorial_v2>` for more details.
142 |
143 |
144 | Below, we frame the task in the language of analyzing backward transitions for convenience. To see where a cell came from, run::
145 |
146 | cs.pl.single_cell_transition(adata,**params)
147 |
148 | To visualize the fate probability of initial cell states, run::
149 |
150 | cs.tl.fate_map(adata,**params)
151 | cs.pl.fate_map(adata,**params)
152 |
153 | To infer the fate bias of initial cell states between two fate clusters, run::
154 |
155 | cs.tl.fate_bias(adata,**params)
156 | cs.pl.fate_bias(adata,**params)
157 |
158 | To infer the dynamic trajectory towards given fate clusters, run::
159 |
160 | cs.tl.progenitor(adata,**params)
161 | cs.pl.progenitor(adata,**params)
162 |
163 | or, alternatively if you have data with multiple clonal time points, run::
164 |
165 | cs.tl.iterative_differentiation(adata,**params)
166 | cs.pl.iterative_differentiation(adata,**params)
167 |
168 | The first method (``cs.tl.progenitor``) assumes two input fate clusters and infers each trajectory by thresholding the corresponding fate bias. The second method (``cs.tl.iterative_differentiation``) infers the trajectory by iteratively tracing a selected fate cluster all the way back to its putative origin at the initial time point. For both methods, the inferred trajectory for each fate will be saved at ``adata.obs[f'diff_trajectory_{source}_{fate_name}']``, and we can explore the gene expression dynamics along this trajectory using::
169 |
170 | cs.pl.gene_expression_dynamics(adata,**params)
171 |
172 | Additionally, the first method (``cs.pl.progenitor``) exports the selected ancestor states selected fate clusters at ``adata.obs[f'progenitor_{source}_{fate_name}']``, which can be used to infer the driver genes for fate bifurcation by running::
173 |
174 | cs.pl.differential_genes(adata,**params)
175 |
176 |
177 | If there are multiple mature fate clusters, you can infer their differentiation coupling from the fate probabilities of initial cells or the raw clonal matrix by::
178 |
179 | cs.tl.fate_coupling(adata,source='transition_map',**params)
180 | cs.pl.fate_coupling(adata,source='transition_map',**params)
181 |
182 | You can also infer the fate hierarchy from::
183 |
184 | cs.tl.fate_hierarchy(adata,source='transition_map',**params)
185 | cs.pl.fate_hierarchy(adata,source='transition_map',**params)
186 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | |PyPI| |PyPIDownloads| |Docs|
2 |
3 | CoSpar - dynamic inference by integrating state and lineage information
4 | =======================================================================
5 |
6 | .. image:: https://user-images.githubusercontent.com/4595786/104988296-b987ce00-59e5-11eb-8dbe-a463b355a9fd.png
7 | :width: 300px
8 | :align: left
9 |
10 | **CoSpar** is a toolkit for dynamic inference from lineage-traced single cells. |br|
11 | The methods are based on
12 | `Wang et al. Nat. Biotech. (2022) `_.
13 |
14 | Dynamic inference based on single-cell state measurement alone requires serious simplifications. On the other hand, direct dynamic measurement via lineage tracing only captures partial information and its interpretation is challenging. CoSpar integrates both state and lineage information to infer a finite-time transition map of a development/differentiation system. It gains superior robustness and accuracy by exploiting both the local coherence and sparsity of differentiation transitions, i.e., neighboring initial states share similar yet sparse fate outcomes. Building around the :class:`~anndata.AnnData` object, CoSpar provides an integrated analysis framework for datasets with both state and lineage information. When only state information is available, CoSpar also improves upon existing dynamic inference methods by imposing sparsity and coherence. It offers essential toolkits for analyzing lineage data, state information, or their integration.
15 |
16 | CoSpar's key applications
17 | ^^^^^^^^^^^^^^^^^^^^^^^^^
18 | - infer transition maps from lineage data, state measurements, or their integration.
19 | - predict the fate bias of progenitor cells.
20 | - order cells along a differentiation trajectory leading to a given cell fate.
21 | - predict gene expression dynamics along a trajectory.
22 | - predict genes whose expression correlates with future fate outcome.
23 | - generate a putative fate hierarchy, ordering fates by their lineage distances.
24 |
25 | Package development relocation
26 | ------------------------------
27 | Effective on April 1st 2023, Shou-Wen Wang is leaving the Klein lab to start `his own group at Westlake University `_, and he will no longer maintain this repository. Further development of CoSpar will continue in his own lab under this respository `https://github.com/ShouWenWang-Lab/cospar `_. Please reach out there for any issues related to CoSpar.
28 |
29 | Recorded Talks
30 | ^^^^^^^^^^^^^^
31 | - `Jun 1: Single-Cell Data Science 2022 `_. This is a 20-min short talk focusing more on the utility of CoSpar: `talk video `_
32 |
33 | - `Oct 19, 2022: Invited MIA talk at Broad Institute `_. This is an one-hour talk focusing on the Machine Learning part of CoSpar: `talk video `_. The talk slides can be found `here `_.
34 |
35 |
36 | .. Upcoming talks
37 | .. ^^^^^^^^^^^^^^
38 | .. - `Sep 15: Temporal Single-Cell Analysis (SCOG) `_
39 | .. - `Nov 12: Single Cell Biology (SCB) `_
40 |
41 |
42 |
43 | Reference
44 | ^^^^^^^^^
45 | `S.-W. Wang*, M. Herriges, K. Hurley, D. Kotton, A. M. Klein*, CoSpar identifies early cell fate biases from single cell transcriptomic and lineage information, Nat. Biotech. (2022) `_. [* corresponding authors]
46 |
47 |
48 |
49 | Support
50 | ^^^^^^^
51 | Feel free to submit an `issue `_
52 | or send us an `email `_.
53 | Your help to improve CoSpar is highly appreciated.
54 |
55 | Acknowledgment
56 | ^^^^^^^^^^^^^^
57 | Shou-Wen Wang wants to acknowledge `Xiaojie Qiu `_ for inspiring him to make this website. He also wants to acknowledge the community that maintains `scanpy `_ and `scvelo `_, where he learned about proper code documentation. He thanks Tal Debrobrah Scully, Qiu Wu and other lab members for testing the package. Shou-Wen wants to thank especially Allon Klein for his mentorship. Finally, he wants to acknowledge the generous support of the Damon Runyon Foundation through the Quantitative Biology Fellowship.
58 |
59 |
60 | .. toctree::
61 | :caption: Main
62 | :maxdepth: 1
63 | :hidden:
64 |
65 | about
66 | api
67 | release_note
68 |
69 |
70 | .. toctree::
71 | :caption: Tutorial
72 | :maxdepth: 1
73 | :hidden:
74 |
75 | installation
76 | getting_started
77 | 20210602_loading_data
78 | 20211010_preprocessing
79 | 20211010_clonal_analysis
80 | 20211010_map_inference
81 | 20211010_map_analysis
82 |
83 | .. toctree::
84 | :caption: Examples
85 | :maxdepth: 1
86 | :hidden:
87 |
88 | 20210121_all_hematopoietic_data_v3
89 | 20210121_reprogramming_static_barcoding_v2
90 | 20210121_lung_data_v2
91 | 20210120_bifurcation_model_static_barcoding
92 | 20220402_simulate_differentiation
93 |
94 |
95 | .. |PyPI| image:: https://img.shields.io/pypi/v/cospar.svg
96 | :target: https://pypi.org/project/cospar
97 |
98 | .. |PyPIDownloads| image:: https://pepy.tech/badge/cospar
99 | :target: https://pepy.tech/project/cospar
100 |
101 | .. |Docs| image:: https://readthedocs.org/projects/cospar/badge/?version=latest
102 | :target: https://cospar.readthedocs.io
103 |
104 |
105 | ..
106 | .. |travis| image:: https://travis-ci.org/theislab/cospar.svg?branch=master
107 | :target: https://travis-ci.org/theislab/cospar
108 |
109 |
110 | .. |br| raw:: html
111 |
112 |
113 |
114 | ..
115 | .. |meet| raw:: html
116 |
117 |
118 |
119 | .. |dim| raw:: html
120 |
121 |
122 |
123 |
--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ------------
3 |
4 | CoSpar requires Python 3.6 or later. We recommend using Miniconda_ for package management. For a computer that does not have a package management tool yet, please install Miniconda_ first, and activate it by running the following command in the terminal::
5 |
6 | source ~/.bash_profile
7 |
8 | PyPI
9 | ^^^^
10 |
11 | Install CoSpar from PyPI_ using::
12 |
13 | pip install --upgrade cospar
14 |
15 | If you get a ``Permission denied`` error, use ``pip install --upgrade cospar --user`` instead.
16 |
17 | If you get errors related to 'gcc', try to specify the following gcc path for installation::
18 |
19 | env CXX=/usr/local/Cellar/gcc/8.2.0/bin/g++-8 CC=/usr/local/Cellar/gcc/8.2.0/bin/gcc-8 pip install cospar
20 |
21 | If you get errors for version conflicts with existing packages, try::
22 |
23 | pip install --ignore-installed --upgrade cospar
24 |
25 | Development Version
26 | ^^^^^^^^^^^^^^^^^^^
27 |
28 | To work with the latest development version, install from GitHub_ using::
29 |
30 | pip install git+https://github.com/ShouWenWang-Lab/cospar
31 |
32 | or::
33 |
34 | git clone https://github.com/ShouWenWang-Lab/cospar
35 | pip install -e cospar
36 |
37 | ``-e`` is short for ``--editable`` and links the package to the original cloned location such that pulled changes are also reflected in the environment.
38 |
39 |
40 | Dependencies
41 | ^^^^^^^^^^^^
42 |
43 | - `anndata `_ - annotated data object.
44 | - `scanpy `_ - toolkit for single-cell analysis.
45 | - `numpy `_, `scipy `_, `pandas `_, `scikit-learn `_, `matplotlib `_, `plotnine `_,
46 |
47 |
48 |
49 | Jupyter Notebook
50 | ^^^^^^^^^^^^^^^^
51 |
52 | To run the tutorials in a notebook locally, please install::
53 |
54 | conda install notebook
55 |
56 | and run ``jupyter notebook`` in the terminal. If you get the error ``Not a directory: 'xdg-settings'``,
57 | use ``jupyter notebook --no-browser`` instead and open the url manually (or use this
58 | `bugfix `_).
59 |
60 |
61 | If you run into issues, do not hesitate to approach us or raise a `GitHub issue`_.
62 |
63 | .. _Miniconda: http://conda.pydata.org/miniconda.html
64 | .. _PyPI: https://pypi.org/project/cospar
65 | .. _Github: https://github.com/ShouWenWang-Lab/cospar/
66 | .. _`Github issue`: https://github.com/ShouWenWang-Lab/cospar/issues/new/choose
67 |
68 |
69 | Testing CoSpar in a new environment
70 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
71 |
72 | In case you want to test cospar without affecting existing python packages, you can create a new conda environment and install CoSpar there::
73 |
74 | conda create -n test_cospar python=3.6
75 | conda activate test_cospar
76 | pip install cospar
77 |
78 | Now, install jupyter notebook in this environment::
79 |
80 | pip install --user ipykernel
81 |
82 | If you encounter an error related to ``nbconvert``, run (this is optional)::
83 |
84 | pip3 install --upgrade --user nbconvert
85 |
86 | Finally, install the jupyter notebook kernel related to this environment::
87 |
88 | python -m ipykernel install --user --name=test_cospar
89 |
90 | Now, you can open jupyter notebook by running ``jupyter notebook`` in the terminal, and select the kernel ``test_cospar`` to run CoSpar.
91 |
--------------------------------------------------------------------------------
/docs/source/release_note.rst:
--------------------------------------------------------------------------------
1 | Release notes
2 | -------------
3 |
4 | v0.2.1
5 | ''''''
6 |
7 | Major changes from v0.1.8 to v0.2.1:
8 | - Split each plotting function into two parts: computing the results (stored at cospar.tl.**) and actually plotting the result (stored at cospar.pl.**).
9 | - Update the notebooks to accomodate these changes.
10 | - Update the datasets in the cloud to add more annotations.
11 | - Re-organize the content of the plot, tool, and tmap modules.
12 | - Fix stochasticity when running HighVar method to generate the initialized map.
13 | - Fix generating X_clone from the cell_id-by-barcode_id list.
14 | - Add a few more functions: :func:`cospar.pl.clonal_fates_across_time`, :func:`cospar.pl.clonal_reports`, :func:`cospar.pl.embedding_genes`, :func:`cospar.tl.fate_biased_clones`
15 | - Update :func:`cospar.pl.barcode_heatmap` to order clones in a better way
16 | - Fix the docs.
17 | - Adopt "Raise ValueError" method for error handling.
18 | - Unify error checking at the beginning of several functions.
19 |
20 | v0.1.8
21 | ''''''
22 |
23 | This is used in running the notebooks that generate figures for the published paper. To run the original notebooks, you should switch to this version.
24 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: cospar
2 | channels:
3 | - conda-forge
4 | - bioconda
5 | dependencies:
6 | - pip
7 | - pytest
8 | - pytest-cov
9 | - pytest-datadir
10 | - ipywidgets
11 | - numpy<=1.21
12 | - scipy>=1.5.4
13 | - scikit-learn>=0.23.2
14 | - scanpy>=1.6.0
15 | - pandas>=1.1.4
16 | - statsmodels==0.13.2
17 | - plotnine>=0.7.1
18 | - matplotlib>=3.3.3
19 | - fastcluster>=1.1.26 # used to generate the clustered heat map of barcodes
20 | - anndata>=0.7.5
21 | - numba>=0.52.0 # related to issues of GPUipatch error
22 | - scikit-misc>=0.1.3 # used for loess smoothing
23 | - leidenalg>=0.7.0
24 | - ete3>=3.1.2
25 | - click==8.0.4 # related to black import
26 | - black==22.1.0 # related to black import
27 |
--------------------------------------------------------------------------------
/pypi.rst:
--------------------------------------------------------------------------------
1 | |PyPI| |PyPIDownloads| |Docs|
2 |
3 | CoSpar - dynamic inference by integrating state and lineage information
4 | =======================================================================
5 |
6 | .. image:: https://user-images.githubusercontent.com/4595786/104988296-b987ce00-59e5-11eb-8dbe-a463b355a9fd.png
7 | :width: 300px
8 | :align: left
9 |
10 | **CoSpar** is a toolkit for dynamic inference from lineage-traced single cells.
11 | The methods are based on
12 | `Wang et al. Nat. Biotech. (2022) `_.
13 |
14 | Dynamic inference based on single-cell state measurement alone requires serious simplifications. On the other hand, direct dynamic measurement via lineage tracing only captures partial information and its interpretation is challenging. CoSpar integrates both state and lineage information to infer a finite-time transition map of a development/differentiation system. It gains superior robustness and accuracy by exploiting both the local coherence and sparsity of differentiation transitions, i.e., neighboring initial states share similar yet sparse fate outcomes. Building around the anndata_ object, CoSpar provides an integrated analysis framework for datasets with both state and lineage information. When only state information is available, CoSpar also improves upon existing dynamic inference methods by imposing sparsity and coherence. It offers essential toolkits for analyzing lineage data, state information, or their integration.
15 |
16 | See ``_ for documentation and tutorials.
17 |
18 |
19 | Reference
20 | ---------
21 | `S.-W. Wang*, M. Herriges, K. Hurley, D. Kotton, A. M. Klein*, CoSpar identifies early cell fate biases from single cell transcriptomic and lineage information, Nat. Biotech. (2022) `_. [* corresponding authors]
22 |
23 | Support
24 | -------
25 | Feel free to submit an `issue `_
26 | or send us an `email `_.
27 | Your help to improve CoSpar is highly appreciated.
28 |
29 | .. |PyPI| image:: https://img.shields.io/pypi/v/cospar.svg
30 | :target: https://pypi.org/project/cospar
31 |
32 | .. |PyPIDownloads| image:: https://pepy.tech/badge/cospar
33 | :target: https://pepy.tech/project/cospar
34 |
35 | .. |Docs| image:: https://readthedocs.org/projects/cospar/badge/?version=latest
36 | :target: https://cospar.readthedocs.io
37 |
38 |
39 | .. _anndata: https://anndata.readthedocs.io
40 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools >= 40.6.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [tool.black]
6 | #line-length = 10 # override black's default line-length
7 | exclude = '''
8 | /(
9 | \.git
10 | | \.mypy_cache
11 | | \.tox
12 | | venv
13 | | \.venv
14 | | _build
15 | | buck-out
16 | | build
17 | | dist
18 | )/
19 | '''
20 |
21 | [tool.isort]
22 | # make it compatible with black
23 | profile = "black"
24 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.4
2 | scipy>=1.5.4
3 | scikit-learn>=0.23.2
4 | scanpy>=1.6.0
5 | pandas>=1.1.4
6 | statsmodels==0.13.2
7 | plotnine>=0.7.1
8 | matplotlib>=3.3.3
9 | fastcluster>=1.1.26 # used to generate the clustered heat map of barcodes
10 | anndata>=0.7.5
11 | numba>=0.52.0 # related to issues of GPUipatch error
12 | scikit-misc>=0.1.3 # used for loess smoothing
13 | leidenalg>=0.7.0
14 | ete3>=3.1.2
15 | ipywidgets
16 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from pathlib import Path
4 |
5 | from setuptools import find_packages, setup
6 |
7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__))))
8 | from cospar import __version__
9 |
10 | setup(
11 | name="cospar",
12 | version=__version__,
13 | python_requires=">=3.6",
14 | install_requires=[
15 | l.strip() for l in Path("requirements.txt").read_text("utf-8").splitlines()
16 | ],
17 | extras_require=dict(
18 | dev=["black==19.10b0", "pre-commit==2.5.1"],
19 | docs=[r for r in Path("docs/requirements.txt").read_text("utf-8").splitlines()],
20 | ),
21 | packages=find_packages(), # this is better than packages=["cospar"], which only include the top level files
22 | long_description_content_type="text/x-rst",
23 | author="Shou-Wen Wang",
24 | author_email="shouwen_wang@hms.harvard.edu",
25 | description="CoSpar: integrating state and lineage information for dynamic inference",
26 | long_description=Path("pypi.rst").read_text("utf-8"),
27 | license="BSD",
28 | url="https://github.com/ShouWenWang-Lab/cospar",
29 | download_url="https://github.com/ShouWenWang-Lab/cospar",
30 | keywords=[
31 | "dynamic inference",
32 | "lineage tracing",
33 | "single cell",
34 | "transcriptomics",
35 | "differentiation",
36 | ],
37 | classifiers=[
38 | "License :: OSI Approved :: BSD License",
39 | "Development Status :: 5 - Production/Stable",
40 | "Intended Audience :: Science/Research",
41 | "Natural Language :: English",
42 | "Programming Language :: Python :: 3",
43 | "Programming Language :: Python :: 3.6",
44 | "Programming Language :: Python :: 3.7",
45 | "Programming Language :: Python :: 3.8",
46 | "Topic :: Scientific/Engineering :: Bio-Informatics",
47 | "Topic :: Scientific/Engineering :: Visualization",
48 | ],
49 | )
50 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllonKleinLab/cospar/ca54cad8a9db9a72152ba8a8b6d67d57eace4acb/tests/__init__.py
--------------------------------------------------------------------------------
/tests/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
5 |
6 | import cospar
7 |
--------------------------------------------------------------------------------
/tests/data/cell_id.txt:
--------------------------------------------------------------------------------
1 | Cell_ID
2 | cell_10
3 | cell_13
4 | cell_18
5 | cell_32
6 | cell_70
7 | cell_80
8 | cell_90
9 | cell_97
10 | cell_108
11 | cell_117
12 | cell_136
13 | cell_138
14 | cell_143
15 | cell_144
16 | cell_147
17 | cell_160
18 | cell_162
19 | cell_166
20 | cell_191
21 | cell_199
22 | cell_205
23 | cell_206
24 | cell_214
25 | cell_218
26 | cell_232
27 | cell_235
28 | cell_239
29 | cell_253
30 | cell_260
31 | cell_269
32 | cell_282
33 | cell_286
34 | cell_289
35 | cell_293
36 | cell_295
37 | cell_300
38 | cell_306
39 | cell_319
40 | cell_320
41 | cell_345
42 | cell_363
43 | cell_369
44 | cell_376
45 | cell_384
46 | cell_394
47 | cell_413
48 | cell_415
49 | cell_418
50 | cell_421
51 | cell_426
52 | cell_427
53 | cell_430
54 | cell_446
55 | cell_447
56 | cell_454
57 | cell_464
58 | cell_478
59 | cell_495
60 | cell_497
61 | cell_504
62 | cell_505
63 | cell_525
64 | cell_532
65 | cell_537
66 | cell_547
67 | cell_551
68 | cell_558
69 | cell_562
70 | cell_563
71 | cell_569
72 | cell_593
73 | cell_610
74 | cell_634
75 | cell_664
76 | cell_687
77 | cell_693
78 | cell_703
79 | cell_704
80 | cell_717
81 | cell_720
82 | cell_723
83 | cell_732
84 | cell_755
85 | cell_773
86 | cell_774
87 | cell_775
88 | cell_778
89 | cell_790
90 | cell_791
91 | cell_803
92 | cell_840
93 | cell_846
94 | cell_852
95 | cell_855
96 | cell_874
97 | cell_889
98 | cell_898
99 | cell_926
100 | cell_942
101 | cell_944
102 | cell_953
103 | cell_955
104 | cell_957
105 | cell_962
106 | cell_963
107 | cell_972
108 | cell_986
109 | cell_991
110 | cell_1000
111 | cell_1002
112 | cell_1010
113 | cell_1032
114 | cell_1038
115 | cell_1042
116 | cell_1045
117 | cell_1063
118 | cell_1071
119 | cell_1083
120 | cell_1092
121 | cell_1107
122 | cell_1110
123 | cell_1117
124 | cell_1120
125 | cell_1122
126 | cell_1126
127 | cell_1166
128 | cell_1170
129 | cell_1190
130 | cell_1191
131 | cell_1192
132 | cell_1207
133 | cell_1245
134 | cell_1254
135 | cell_1274
136 | cell_1304
137 | cell_1345
138 | cell_1358
139 | cell_1383
140 | cell_1384
141 | cell_1389
142 | cell_1390
143 | cell_1397
144 | cell_1404
145 | cell_1415
146 | cell_1418
147 | cell_1422
148 | cell_1428
149 | cell_1430
150 | cell_1442
151 | cell_1462
152 | cell_1467
153 | cell_1470
154 | cell_1490
155 | cell_1494
156 | cell_1495
157 | cell_1497
158 | cell_1506
159 | cell_1507
160 | cell_1531
161 | cell_1551
162 | cell_1556
163 | cell_1567
164 | cell_1576
165 | cell_1578
166 | cell_1588
167 | cell_1590
168 | cell_1593
169 | cell_1597
170 | cell_1600
171 | cell_1622
172 | cell_1625
173 | cell_1637
174 | cell_1654
175 | cell_1658
176 | cell_1660
177 | cell_1684
178 | cell_1686
179 | cell_1693
180 | cell_1697
181 | cell_1710
182 | cell_1712
183 | cell_1715
184 | cell_1728
185 | cell_1743
186 | cell_1747
187 | cell_1749
188 | cell_1787
189 | cell_1801
190 | cell_1809
191 | cell_1830
192 | cell_1832
193 | cell_1835
194 | cell_1856
195 | cell_1869
196 | cell_1870
197 | cell_1890
198 | cell_1892
199 | cell_1896
200 | cell_1901
201 | cell_1906
202 | cell_1925
203 | cell_1927
204 | cell_1928
205 | cell_1957
206 | cell_1969
207 | cell_1979
208 | cell_1984
209 | cell_1987
210 | cell_2022
211 | cell_2025
212 | cell_2076
213 | cell_2098
214 | cell_2107
215 | cell_2140
216 | cell_2172
217 | cell_2200
218 | cell_2206
219 | cell_2210
220 | cell_2213
221 | cell_2220
222 | cell_2227
223 | cell_2237
224 | cell_2246
225 | cell_2251
226 | cell_2264
227 | cell_2295
228 | cell_2296
229 | cell_2309
230 | cell_2331
231 | cell_2339
232 | cell_2345
233 | cell_2391
234 | cell_2393
235 | cell_2395
236 | cell_2402
237 | cell_2403
238 | cell_2408
239 | cell_2410
240 | cell_2421
241 | cell_2430
242 | cell_2431
243 | cell_2437
244 | cell_2445
245 | cell_2447
246 | cell_2453
247 | cell_2484
248 | cell_2486
249 | cell_2502
250 | cell_2504
251 | cell_2512
252 | cell_2536
253 | cell_2557
254 | cell_2560
255 | cell_2561
256 | cell_2569
257 | cell_2573
258 | cell_2592
259 | cell_2597
260 | cell_2599
261 | cell_2635
262 | cell_2637
263 | cell_2642
264 | cell_2646
265 | cell_2653
266 | cell_2655
267 | cell_2661
268 | cell_2663
269 | cell_2669
270 | cell_2685
271 | cell_2686
272 | cell_2706
273 | cell_2709
274 | cell_2732
275 | cell_2744
276 | cell_2747
277 | cell_2766
278 | cell_2770
279 | cell_2771
280 | cell_2794
281 | cell_2796
282 | cell_2814
283 | cell_2821
284 | cell_2826
285 | cell_2839
286 | cell_2855
287 | cell_2859
288 | cell_2865
289 | cell_2866
290 | cell_2874
291 | cell_2877
292 | cell_2878
293 | cell_2879
294 | cell_2927
295 | cell_2929
296 | cell_2938
297 | cell_2942
298 | cell_2946
299 | cell_2950
300 | cell_2966
301 | cell_2968
302 | cell_2992
303 | cell_3008
304 | cell_3009
305 | cell_3014
306 | cell_3017
307 | cell_3024
308 | cell_3032
309 | cell_3045
310 | cell_3060
311 | cell_3079
312 | cell_3083
313 | cell_3093
314 | cell_3107
315 | cell_3117
316 | cell_3125
317 | cell_3132
318 | cell_3178
319 | cell_3182
320 | cell_3195
321 | cell_3221
322 | cell_3224
323 | cell_3226
324 | cell_3241
325 | cell_3246
326 | cell_3270
327 | cell_3275
328 | cell_3277
329 | cell_3290
330 | cell_3295
331 | cell_3315
332 | cell_3317
333 | cell_3318
334 | cell_3319
335 | cell_3322
336 | cell_3325
337 | cell_3329
338 | cell_3333
339 | cell_3341
340 | cell_3344
341 | cell_3355
342 | cell_3372
343 | cell_3374
344 | cell_3379
345 | cell_3395
346 | cell_3419
347 | cell_3428
348 | cell_3439
349 | cell_3450
350 | cell_3453
351 | cell_3457
352 | cell_3458
353 | cell_3466
354 | cell_3507
355 | cell_3511
356 | cell_3514
357 | cell_3532
358 | cell_3542
359 | cell_3548
360 | cell_3558
361 | cell_3559
362 | cell_3574
363 | cell_3579
364 | cell_3580
365 | cell_3583
366 | cell_3592
367 | cell_3601
368 | cell_3623
369 | cell_3628
370 | cell_3644
371 | cell_3646
372 | cell_3655
373 | cell_3664
374 | cell_3666
375 | cell_3670
376 | cell_3676
377 | cell_3689
378 | cell_3703
379 | cell_3712
380 | cell_3721
381 | cell_3726
382 | cell_3747
383 | cell_3782
384 | cell_3786
385 | cell_3788
386 | cell_3797
387 | cell_3806
388 | cell_3812
389 | cell_3822
390 | cell_3829
391 | cell_3833
392 | cell_3871
393 | cell_3878
394 | cell_3897
395 | cell_3906
396 | cell_3912
397 | cell_3916
398 | cell_3920
399 | cell_3926
400 | cell_3928
401 | cell_3930
402 | cell_3940
403 | cell_3946
404 | cell_3964
405 | cell_3965
406 | cell_3974
407 | cell_3983
408 | cell_3987
409 | cell_3992
410 | cell_4017
411 | cell_4018
412 | cell_4021
413 | cell_4026
414 | cell_4038
415 | cell_4048
416 | cell_4049
417 | cell_4062
418 | cell_4068
419 | cell_4072
420 | cell_4074
421 | cell_4077
422 | cell_4087
423 | cell_4101
424 | cell_4117
425 | cell_4126
426 | cell_4128
427 | cell_4129
428 | cell_4136
429 | cell_4140
430 | cell_4144
431 | cell_4149
432 | cell_4184
433 | cell_4190
434 | cell_4193
435 | cell_4196
436 | cell_4212
437 | cell_4230
438 | cell_4235
439 | cell_4243
440 | cell_4250
441 | cell_4263
442 | cell_4279
443 | cell_4289
444 | cell_4297
445 | cell_4301
446 | cell_4312
447 | cell_4325
448 | cell_4330
449 | cell_4336
450 | cell_4342
451 | cell_4351
452 | cell_4386
453 | cell_4388
454 | cell_4391
455 | cell_4395
456 | cell_4399
457 | cell_4424
458 | cell_4426
459 | cell_4448
460 | cell_4450
461 | cell_4460
462 | cell_4477
463 | cell_4482
464 | cell_4504
465 | cell_4512
466 | cell_4530
467 | cell_4531
468 | cell_4533
469 | cell_4534
470 | cell_4557
471 | cell_4567
472 | cell_4570
473 | cell_4571
474 | cell_4576
475 | cell_4590
476 | cell_4601
477 | cell_4607
478 | cell_4609
479 | cell_4612
480 | cell_4622
481 | cell_4626
482 | cell_4630
483 | cell_4645
484 | cell_4665
485 | cell_4674
486 | cell_4678
487 | cell_4681
488 | cell_4682
489 | cell_4701
490 | cell_4719
491 | cell_4720
492 | cell_4721
493 | cell_4726
494 | cell_4737
495 | cell_4744
496 | cell_4748
497 | cell_4749
498 | cell_4769
499 | cell_4779
500 | cell_4781
501 | cell_4791
502 | cell_4797
503 | cell_4815
504 | cell_4819
505 | cell_4823
506 | cell_4830
507 | cell_4852
508 | cell_4857
509 | cell_4864
510 | cell_4865
511 | cell_4890
512 | cell_4918
513 | cell_4928
514 | cell_4949
515 | cell_4950
516 | cell_4958
517 | cell_4959
518 | cell_4974
519 | cell_4984
520 | cell_5010
521 | cell_5013
522 | cell_5016
523 | cell_5021
524 | cell_5030
525 | cell_5037
526 | cell_5042
527 | cell_5055
528 | cell_5058
529 | cell_5077
530 | cell_5083
531 | cell_5085
532 | cell_5092
533 | cell_5101
534 | cell_5102
535 | cell_5107
536 | cell_5116
537 | cell_5119
538 | cell_5142
539 | cell_5154
540 | cell_5157
541 | cell_5163
542 | cell_5204
543 | cell_5213
544 | cell_5216
545 | cell_5218
546 | cell_5225
547 | cell_5234
548 | cell_5241
549 | cell_5242
550 | cell_5246
551 | cell_5266
552 | cell_5269
553 | cell_5309
554 | cell_5331
555 | cell_5347
556 | cell_5348
557 | cell_5375
558 | cell_5380
559 | cell_5381
560 | cell_5390
561 | cell_5400
562 | cell_5404
563 | cell_5406
564 | cell_5420
565 | cell_5449
566 | cell_5461
567 | cell_5468
568 | cell_5474
569 | cell_5498
570 | cell_5512
571 | cell_5514
572 | cell_5522
573 | cell_5535
574 | cell_5537
575 | cell_5538
576 | cell_5542
577 | cell_5552
578 | cell_5568
579 | cell_5616
580 | cell_5637
581 | cell_5638
582 | cell_5640
583 | cell_5651
584 | cell_5671
585 | cell_5672
586 | cell_5681
587 | cell_5682
588 | cell_5684
589 | cell_5686
590 | cell_5688
591 | cell_5691
592 | cell_5695
593 | cell_5697
594 | cell_5739
595 | cell_5744
596 | cell_5771
597 | cell_5773
598 | cell_5776
599 | cell_5815
600 | cell_5837
601 | cell_5847
602 | cell_5860
603 | cell_5861
604 | cell_5865
605 | cell_5875
606 | cell_5883
607 | cell_5887
608 | cell_5888
609 | cell_5901
610 | cell_5903
611 | cell_5907
612 | cell_5908
613 | cell_5911
614 | cell_5922
615 | cell_5931
616 | cell_5934
617 | cell_5935
618 | cell_5947
619 | cell_5951
620 | cell_5968
621 | cell_5969
622 | cell_5976
623 | cell_5995
624 | cell_5997
625 | cell_6003
626 | cell_6006
627 | cell_6017
628 | cell_6059
629 | cell_6080
630 | cell_6081
631 | cell_6083
632 | cell_6090
633 | cell_6097
634 | cell_6113
635 | cell_6114
636 | cell_6119
637 | cell_6124
638 | cell_6132
639 | cell_6141
640 | cell_6167
641 | cell_6177
642 | cell_6202
643 | cell_6203
644 | cell_6204
645 | cell_6205
646 | cell_6210
647 | cell_6230
648 | cell_6232
649 | cell_6246
650 | cell_6257
651 | cell_6266
652 | cell_6267
653 | cell_6268
654 | cell_6283
655 | cell_6297
656 | cell_6340
657 | cell_6346
658 | cell_6355
659 | cell_6357
660 | cell_6366
661 | cell_6369
662 | cell_6371
663 | cell_6372
664 | cell_6373
665 | cell_6398
666 | cell_6402
667 | cell_6412
668 | cell_6420
669 | cell_6439
670 | cell_6453
671 | cell_6455
672 | cell_6463
673 | cell_6481
674 | cell_6484
675 | cell_6486
676 | cell_6489
677 | cell_6500
678 | cell_6505
679 | cell_6508
680 | cell_6556
681 | cell_6569
682 | cell_6581
683 | cell_6582
684 | cell_6584
685 | cell_6590
686 | cell_6596
687 | cell_6599
688 | cell_6602
689 | cell_6603
690 | cell_6608
691 | cell_6614
692 | cell_6616
693 | cell_6617
694 | cell_6635
695 | cell_6637
696 | cell_6642
697 | cell_6656
698 | cell_6659
699 | cell_6672
700 | cell_6727
701 | cell_6734
702 | cell_6754
703 | cell_6764
704 | cell_6779
705 | cell_6785
706 | cell_6788
707 | cell_6803
708 | cell_6804
709 | cell_6805
710 | cell_6809
711 | cell_6847
712 | cell_6862
713 | cell_6865
714 | cell_6870
715 | cell_6889
716 | cell_6894
717 | cell_6919
718 | cell_6930
719 | cell_6936
720 | cell_6950
721 | cell_6965
722 | cell_6971
723 | cell_6972
724 | cell_6976
725 | cell_6977
726 | cell_6980
727 | cell_6989
728 | cell_7006
729 | cell_7026
730 | cell_7029
731 | cell_7035
732 | cell_7057
733 | cell_7079
734 | cell_7081
735 | cell_7087
736 | cell_7090
737 | cell_7091
738 | cell_7093
739 | cell_7111
740 | cell_7117
741 | cell_7125
742 | cell_7126
743 | cell_7133
744 | cell_7136
745 | cell_7141
746 | cell_7146
747 | cell_7154
748 | cell_7159
749 | cell_7168
750 | cell_7182
751 | cell_7193
752 | cell_7212
753 | cell_7229
754 | cell_7240
755 | cell_7256
756 | cell_7258
757 | cell_7260
758 | cell_7263
759 | cell_7274
760 | cell_7279
761 | cell_7288
762 | cell_7295
763 | cell_7301
764 | cell_7317
765 | cell_7333
766 | cell_7335
767 | cell_7340
768 | cell_7359
769 | cell_7376
770 | cell_7379
771 | cell_7381
772 | cell_7388
773 | cell_7389
774 | cell_7392
775 | cell_7402
776 | cell_7407
777 | cell_7409
778 | cell_7417
779 | cell_7423
780 | cell_7435
781 | cell_7436
782 | cell_7437
783 |
--------------------------------------------------------------------------------
/tests/data/clonal_data_in_table_format.txt:
--------------------------------------------------------------------------------
1 | Cell_ID,Clone_ID
2 | cell_0,clone_275
3 | cell_1,clone_329
4 | cell_2,clone_56
5 | cell_3,clone_236
6 | cell_4,clone_213
7 | cell_5,clone_190
8 | cell_6,clone_236
9 | cell_7,clone_315
10 | cell_8,clone_109
11 | cell_9,clone_152
12 | cell_10,clone_284
13 | cell_11,clone_335
14 | cell_12,clone_275
15 | cell_13,clone_213
16 | cell_14,clone_285
17 | cell_15,clone_155
18 | cell_16,clone_104
19 | cell_17,clone_317
20 | cell_18,clone_229
21 | cell_19,clone_244
22 | cell_20,clone_183
23 | cell_21,clone_275
24 | cell_22,clone_284
25 | cell_23,clone_236
26 | cell_24,clone_236
27 | cell_25,clone_197
28 | cell_26,clone_36
29 | cell_27,clone_275
30 | cell_28,clone_298
31 | cell_29,clone_10
32 | cell_30,clone_236
33 | cell_31,clone_114
34 | cell_32,clone_158
35 | cell_33,clone_236
36 | cell_34,clone_158
37 | cell_35,clone_104
38 | cell_36,clone_77
39 | cell_37,clone_325
40 | cell_38,clone_161
41 | cell_39,clone_190
42 | cell_40,clone_62
43 | cell_41,clone_158
44 | cell_42,clone_251
45 | cell_43,clone_18
46 | cell_44,clone_325
47 | cell_45,clone_306
48 | cell_46,clone_243
49 | cell_47,clone_73
50 | cell_48,clone_59
51 | cell_49,clone_314
52 | cell_50,clone_317
53 | cell_51,clone_236
54 | cell_52,clone_74
55 | cell_53,clone_314
56 | cell_54,clone_221
57 | cell_55,clone_314
58 | cell_56,clone_109
59 | cell_57,clone_178
60 | cell_58,clone_136
61 | cell_59,clone_219
62 | cell_60,clone_236
63 | cell_61,clone_56
64 | cell_62,clone_186
65 | cell_63,clone_284
66 | cell_64,clone_73
67 | cell_65,clone_251
68 | cell_66,clone_59
69 | cell_67,clone_186
70 | cell_68,clone_284
71 | cell_69,clone_18
72 | cell_70,clone_183
73 | cell_71,clone_284
74 | cell_72,clone_187
75 | cell_73,clone_315
76 | cell_74,clone_226
77 | cell_75,clone_88
78 | cell_76,clone_229
79 | cell_77,clone_77
80 | cell_78,clone_189
81 | cell_79,clone_229
82 | cell_80,clone_314
83 | cell_81,clone_275
84 | cell_82,clone_152
85 | cell_83,clone_284
86 | cell_84,clone_317
87 | cell_85,clone_179
88 | cell_86,clone_285
89 | cell_87,clone_59
90 | cell_88,clone_335
91 | cell_89,clone_202
92 | cell_90,clone_284
93 | cell_91,clone_275
94 | cell_92,clone_285
95 | cell_93,clone_98
96 | cell_94,clone_229
97 | cell_95,clone_335
98 | cell_96,clone_158
99 | cell_97,clone_74
100 | cell_98,clone_275
101 | cell_99,clone_10
102 | cell_100,clone_32
103 | cell_101,clone_56
104 | cell_102,clone_104
105 | cell_103,clone_335
106 | cell_104,clone_284
107 | cell_105,clone_152
108 | cell_106,clone_88
109 | cell_107,clone_136
110 | cell_108,clone_284
111 | cell_109,clone_284
112 | cell_110,clone_158
113 | cell_111,clone_284
114 | cell_112,clone_20
115 | cell_113,clone_24
116 | cell_114,clone_79
117 | cell_115,clone_252
118 | cell_116,clone_237
119 | cell_117,clone_24
120 | cell_118,clone_176
121 | cell_119,clone_213
122 | cell_120,clone_79
123 | cell_121,clone_130
124 | cell_122,clone_168
125 | cell_123,clone_285
126 | cell_124,clone_79
127 | cell_125,clone_325
128 | cell_126,clone_252
129 | cell_127,clone_37
130 | cell_128,clone_251
131 | cell_129,clone_179
132 | cell_130,clone_79
133 | cell_131,clone_163
134 | cell_132,clone_258
135 | cell_133,clone_79
136 | cell_134,clone_275
137 | cell_135,clone_57
138 | cell_136,clone_315
139 | cell_137,clone_221
140 | cell_138,clone_279
141 | cell_139,clone_274
142 | cell_140,clone_78
143 | cell_141,clone_213
144 | cell_142,clone_275
145 | cell_143,clone_23
146 | cell_144,clone_148
147 | cell_145,clone_136
148 | cell_146,clone_179
149 | cell_147,clone_252
150 | cell_148,clone_325
151 | cell_149,clone_232
152 | cell_150,clone_298
153 | cell_151,clone_117
154 | cell_152,clone_279
155 | cell_153,clone_186
156 | cell_154,clone_207
157 | cell_155,clone_176
158 | cell_156,clone_306
159 | cell_157,clone_213
160 | cell_158,clone_221
161 | cell_159,clone_130
162 | cell_160,clone_29
163 | cell_161,clone_251
164 | cell_162,clone_302
165 | cell_163,clone_224
166 | cell_164,clone_1
167 | cell_165,clone_77
168 | cell_166,clone_302
169 | cell_167,clone_285
170 | cell_168,clone_243
171 | cell_169,clone_314
172 | cell_170,clone_118
173 | cell_171,clone_252
174 | cell_172,clone_8
175 | cell_173,clone_281
176 | cell_174,clone_118
177 | cell_175,clone_276
178 | cell_176,clone_134
179 | cell_177,clone_317
180 | cell_178,clone_118
181 | cell_179,clone_203
182 | cell_180,clone_134
183 | cell_181,clone_88
184 | cell_182,clone_325
185 | cell_183,clone_59
186 | cell_184,clone_199
187 | cell_185,clone_59
188 | cell_186,clone_317
189 | cell_187,clone_335
190 | cell_188,clone_182
191 | cell_189,clone_157
192 | cell_190,clone_252
193 | cell_191,clone_284
194 | cell_192,clone_70
195 | cell_193,clone_34
196 | cell_194,clone_122
197 | cell_195,clone_294
198 | cell_196,clone_306
199 | cell_197,clone_69
200 | cell_198,clone_284
201 | cell_199,clone_20
202 | cell_200,clone_314
203 | cell_201,clone_275
204 | cell_202,clone_110
205 | cell_203,clone_210
206 | cell_204,clone_325
207 | cell_205,clone_285
208 | cell_206,clone_56
209 | cell_207,clone_130
210 | cell_208,clone_176
211 | cell_209,clone_237
212 | cell_210,clone_229
213 | cell_211,clone_202
214 | cell_212,clone_284
215 | cell_213,clone_79
216 | cell_214,clone_244
217 | cell_215,clone_110
218 | cell_216,clone_238
219 | cell_217,clone_284
220 | cell_218,clone_47
221 | cell_219,clone_126
222 | cell_220,clone_176
223 | cell_221,clone_18
224 | cell_222,clone_229
225 | cell_223,clone_315
226 | cell_224,clone_243
227 | cell_225,clone_88
228 | cell_226,clone_219
229 | cell_227,clone_237
230 | cell_228,clone_237
231 | cell_229,clone_281
232 | cell_230,clone_134
233 | cell_231,clone_251
234 | cell_232,clone_126
235 | cell_233,clone_88
236 | cell_234,clone_186
237 | cell_235,clone_143
238 | cell_236,clone_126
239 | cell_237,clone_237
240 | cell_238,clone_43
241 | cell_239,clone_281
242 | cell_240,clone_243
243 | cell_241,clone_219
244 | cell_242,clone_182
245 | cell_243,clone_281
246 | cell_244,clone_20
247 | cell_245,clone_24
248 | cell_246,clone_122
249 | cell_247,clone_79
250 | cell_248,clone_130
251 | cell_249,clone_172
252 | cell_250,clone_284
253 | cell_251,clone_222
254 | cell_252,clone_284
255 | cell_253,clone_172
256 | cell_254,clone_122
257 | cell_255,clone_224
258 | cell_256,clone_161
259 | cell_257,clone_291
260 | cell_258,clone_172
261 | cell_259,clone_294
262 | cell_260,clone_252
263 | cell_261,clone_228
264 | cell_262,clone_139
265 | cell_263,clone_62
266 | cell_264,clone_24
267 | cell_265,clone_79
268 | cell_266,clone_143
269 | cell_267,clone_148
270 | cell_268,clone_176
271 | cell_269,clone_176
272 | cell_270,clone_31
273 | cell_271,clone_79
274 | cell_272,clone_179
275 | cell_273,clone_317
276 | cell_274,clone_213
277 | cell_275,clone_300
278 | cell_276,clone_116
279 | cell_277,clone_204
280 | cell_278,clone_97
281 | cell_279,clone_216
282 | cell_280,clone_89
283 | cell_281,clone_332
284 | cell_282,clone_112
285 | cell_283,clone_273
286 | cell_284,clone_94
287 | cell_285,clone_142
288 | cell_286,clone_94
289 | cell_287,clone_84
290 | cell_288,clone_169
291 | cell_289,clone_40
292 | cell_290,clone_95
293 | cell_291,clone_282
294 | cell_292,clone_223
295 | cell_293,clone_44
296 | cell_294,clone_106
297 | cell_295,clone_248
298 | cell_296,clone_63
299 | cell_297,clone_204
300 | cell_298,clone_71
301 | cell_299,clone_149
302 | cell_300,clone_93
303 | cell_301,clone_334
304 | cell_302,clone_216
305 | cell_303,clone_204
306 | cell_304,clone_204
307 | cell_305,clone_181
308 | cell_306,clone_307
309 | cell_307,clone_241
310 | cell_308,clone_225
311 | cell_309,clone_246
312 | cell_310,clone_282
313 | cell_311,clone_332
314 | cell_312,clone_44
315 | cell_313,clone_318
316 | cell_314,clone_93
317 | cell_315,clone_303
318 | cell_316,clone_95
319 | cell_317,clone_334
320 | cell_318,clone_223
321 | cell_319,clone_95
322 | cell_320,clone_169
323 | cell_321,clone_95
324 | cell_322,clone_204
325 | cell_323,clone_310
326 | cell_324,clone_99
327 | cell_325,clone_95
328 | cell_326,clone_204
329 | cell_327,clone_204
330 | cell_328,clone_216
331 | cell_329,clone_93
332 | cell_330,clone_181
333 | cell_331,clone_282
334 | cell_332,clone_330
335 | cell_333,clone_40
336 | cell_334,clone_169
337 | cell_335,clone_95
338 | cell_336,clone_150
339 | cell_337,clone_95
340 | cell_338,clone_204
341 | cell_339,clone_112
342 | cell_340,clone_169
343 | cell_341,clone_216
344 | cell_342,clone_181
345 | cell_343,clone_91
346 | cell_344,clone_223
347 | cell_345,clone_312
348 | cell_346,clone_75
349 | cell_347,clone_181
350 | cell_348,clone_204
351 | cell_349,clone_95
352 | cell_350,clone_164
353 | cell_351,clone_184
354 | cell_352,clone_216
355 | cell_353,clone_282
356 | cell_354,clone_193
357 | cell_355,clone_217
358 | cell_356,clone_257
359 | cell_357,clone_287
360 | cell_358,clone_230
361 | cell_359,clone_174
362 | cell_360,clone_58
363 | cell_361,clone_156
364 | cell_362,clone_61
365 | cell_363,clone_147
366 | cell_364,clone_185
367 | cell_365,clone_11
368 | cell_366,clone_218
369 | cell_367,clone_90
370 | cell_368,clone_132
371 | cell_369,clone_137
372 | cell_370,clone_196
373 | cell_371,clone_194
374 | cell_372,clone_41
375 | cell_373,clone_271
376 | cell_374,clone_303
377 | cell_375,clone_167
378 | cell_376,clone_7
379 | cell_377,clone_106
380 | cell_378,clone_264
381 | cell_379,clone_205
382 | cell_380,clone_180
383 | cell_381,clone_264
384 | cell_382,clone_249
385 | cell_383,clone_282
386 | cell_384,clone_250
387 | cell_385,clone_256
388 | cell_386,clone_171
389 | cell_387,clone_334
390 | cell_388,clone_95
391 | cell_389,clone_173
392 | cell_390,clone_75
393 | cell_391,clone_293
394 | cell_392,clone_173
395 | cell_393,clone_256
396 | cell_394,clone_250
397 | cell_395,clone_91
398 | cell_396,clone_15
399 | cell_397,clone_91
400 | cell_398,clone_44
401 | cell_399,clone_316
402 | cell_400,clone_283
403 | cell_401,clone_278
404 | cell_402,clone_204
405 | cell_403,clone_301
406 | cell_404,clone_262
407 | cell_405,clone_165
408 | cell_406,clone_273
409 | cell_407,clone_4
410 | cell_408,clone_131
411 | cell_409,clone_338
412 | cell_410,clone_261
413 | cell_411,clone_167
414 | cell_412,clone_41
415 | cell_413,clone_115
416 | cell_414,clone_125
417 | cell_415,clone_256
418 | cell_416,clone_16
419 | cell_417,clone_318
420 | cell_418,clone_216
421 | cell_419,clone_307
422 | cell_420,clone_307
423 | cell_421,clone_216
424 | cell_422,clone_71
425 | cell_423,clone_66
426 | cell_424,clone_144
427 | cell_425,clone_264
428 | cell_426,clone_150
429 | cell_427,clone_312
430 | cell_428,clone_177
431 | cell_429,clone_169
432 | cell_430,clone_268
433 | cell_431,clone_105
434 | cell_432,clone_330
435 | cell_433,clone_307
436 | cell_434,clone_150
437 | cell_435,clone_271
438 | cell_436,clone_144
439 | cell_437,clone_80
440 | cell_438,clone_80
441 | cell_439,clone_106
442 | cell_440,clone_63
443 | cell_441,clone_332
444 | cell_442,clone_105
445 | cell_443,clone_225
446 | cell_444,clone_264
447 | cell_445,clone_241
448 | cell_446,clone_330
449 | cell_447,clone_144
450 | cell_448,clone_184
451 | cell_449,clone_307
452 | cell_450,clone_106
453 | cell_451,clone_307
454 | cell_452,clone_105
455 | cell_453,clone_193
456 | cell_454,clone_81
457 | cell_455,clone_142
458 | cell_456,clone_76
459 | cell_457,clone_149
460 | cell_458,clone_95
461 | cell_459,clone_177
462 | cell_460,clone_169
463 | cell_461,clone_169
464 | cell_462,clone_14
465 | cell_463,clone_112
466 | cell_464,clone_80
467 | cell_465,clone_93
468 | cell_466,clone_128
469 | cell_467,clone_93
470 | cell_468,clone_248
471 | cell_469,clone_246
472 | cell_470,clone_80
473 | cell_471,clone_194
474 | cell_472,clone_312
475 | cell_473,clone_270
476 | cell_474,clone_105
477 | cell_475,clone_81
478 | cell_476,clone_303
479 | cell_477,clone_44
480 | cell_478,clone_256
481 | cell_479,clone_84
482 | cell_480,clone_106
483 | cell_481,clone_71
484 | cell_482,clone_27
485 | cell_483,clone_194
486 | cell_484,clone_63
487 | cell_485,clone_269
488 | cell_486,clone_14
489 | cell_487,clone_46
490 | cell_488,clone_266
491 | cell_489,clone_170
492 | cell_490,clone_52
493 | cell_491,clone_35
494 | cell_492,clone_286
495 | cell_493,clone_211
496 | cell_494,clone_19
497 | cell_495,clone_64
498 | cell_496,clone_19
499 | cell_497,clone_320
500 | cell_498,clone_60
501 | cell_499,clone_33
502 | cell_500,clone_297
503 | cell_501,clone_48
504 | cell_502,clone_170
505 | cell_503,clone_19
506 | cell_504,clone_304
507 | cell_505,clone_277
508 | cell_506,clone_65
509 | cell_507,clone_92
510 | cell_508,clone_196
511 | cell_509,clone_101
512 | cell_510,clone_215
513 | cell_511,clone_289
514 | cell_512,clone_82
515 | cell_513,clone_111
516 | cell_514,clone_265
517 | cell_515,clone_85
518 | cell_516,clone_111
519 | cell_517,clone_196
520 | cell_518,clone_19
521 | cell_519,clone_290
522 | cell_520,clone_211
523 | cell_521,clone_220
524 | cell_522,clone_67
525 | cell_523,clone_292
526 | cell_524,clone_296
527 | cell_525,clone_55
528 | cell_526,clone_296
529 | cell_527,clone_319
530 | cell_528,clone_46
531 | cell_529,clone_299
532 | cell_530,clone_209
533 | cell_531,clone_280
534 | cell_532,clone_60
535 | cell_533,clone_5
536 | cell_534,clone_324
537 | cell_535,clone_266
538 | cell_536,clone_2
539 | cell_537,clone_305
540 | cell_538,clone_218
541 | cell_539,clone_295
542 | cell_540,clone_9
543 | cell_541,clone_52
544 | cell_542,clone_240
545 | cell_543,clone_299
546 | cell_544,clone_198
547 | cell_545,clone_101
548 | cell_546,clone_227
549 | cell_547,clone_313
550 | cell_548,clone_132
551 | cell_549,clone_38
552 | cell_550,clone_102
553 | cell_551,clone_132
554 | cell_552,clone_52
555 | cell_553,clone_299
556 | cell_554,clone_240
557 | cell_555,clone_83
558 | cell_556,clone_19
559 | cell_557,clone_121
560 | cell_558,clone_326
561 | cell_559,clone_166
562 | cell_560,clone_100
563 | cell_561,clone_33
564 | cell_562,clone_123
565 | cell_563,clone_124
566 | cell_564,clone_263
567 | cell_565,clone_321
568 | cell_566,clone_119
569 | cell_567,clone_211
570 | cell_568,clone_192
571 | cell_569,clone_196
572 | cell_570,clone_272
573 | cell_571,clone_39
574 | cell_572,clone_288
575 | cell_573,clone_218
576 | cell_574,clone_195
577 | cell_575,clone_192
578 | cell_576,clone_170
579 | cell_577,clone_119
580 | cell_578,clone_185
581 | cell_579,clone_9
582 | cell_580,clone_214
583 | cell_581,clone_321
584 | cell_582,clone_230
585 | cell_583,clone_72
586 | cell_584,clone_280
587 | cell_585,clone_239
588 | cell_586,clone_108
589 | cell_587,clone_113
590 | cell_588,clone_50
591 | cell_589,clone_231
592 | cell_590,clone_255
593 | cell_591,clone_53
594 | cell_592,clone_211
595 | cell_593,clone_154
596 | cell_594,clone_234
597 | cell_595,clone_51
598 | cell_596,clone_255
599 | cell_597,clone_13
600 | cell_598,clone_333
601 | cell_599,clone_53
602 | cell_600,clone_260
603 | cell_601,clone_200
604 | cell_602,clone_217
605 | cell_603,clone_42
606 | cell_604,clone_311
607 | cell_605,clone_214
608 | cell_606,clone_323
609 | cell_607,clone_21
610 | cell_608,clone_129
611 | cell_609,clone_255
612 | cell_610,clone_54
613 | cell_611,clone_30
614 | cell_612,clone_92
615 | cell_613,clone_308
616 | cell_614,clone_135
617 | cell_615,clone_26
618 | cell_616,clone_135
619 | cell_617,clone_327
620 | cell_618,clone_166
621 | cell_619,clone_321
622 | cell_620,clone_206
623 | cell_621,clone_160
624 | cell_622,clone_28
625 | cell_623,clone_11
626 | cell_624,clone_147
627 | cell_625,clone_120
628 | cell_626,clone_254
629 | cell_627,clone_208
630 | cell_628,clone_2
631 | cell_629,clone_240
632 | cell_630,clone_124
633 | cell_631,clone_55
634 | cell_632,clone_35
635 | cell_633,clone_0
636 | cell_634,clone_195
637 | cell_635,clone_201
638 | cell_636,clone_6
639 | cell_637,clone_185
640 | cell_638,clone_196
641 | cell_639,clone_242
642 | cell_640,clone_0
643 | cell_641,clone_321
644 | cell_642,clone_45
645 | cell_643,clone_304
646 | cell_644,clone_141
647 | cell_645,clone_146
648 | cell_646,clone_309
649 | cell_647,clone_159
650 | cell_648,clone_218
651 | cell_649,clone_320
652 | cell_650,clone_175
653 | cell_651,clone_154
654 | cell_652,clone_133
655 | cell_653,clone_305
656 | cell_654,clone_2
657 | cell_655,clone_195
658 | cell_656,clone_121
659 | cell_657,clone_48
660 | cell_658,clone_290
661 | cell_659,clone_138
662 | cell_660,clone_336
663 | cell_661,clone_92
664 | cell_662,clone_192
665 | cell_663,clone_52
666 | cell_664,clone_2
667 | cell_665,clone_278
668 | cell_666,clone_68
669 | cell_667,clone_193
670 | cell_668,clone_106
671 | cell_669,clone_301
672 | cell_670,clone_262
673 | cell_671,clone_293
674 | cell_672,clone_330
675 | cell_673,clone_338
676 | cell_674,clone_27
677 | cell_675,clone_204
678 | cell_676,clone_145
679 | cell_677,clone_125
680 | cell_678,clone_3
681 | cell_679,clone_332
682 | cell_680,clone_81
683 | cell_681,clone_223
684 | cell_682,clone_330
685 | cell_683,clone_116
686 | cell_684,clone_278
687 | cell_685,clone_121
688 | cell_686,clone_330
689 | cell_687,clone_89
690 | cell_688,clone_264
691 | cell_689,clone_171
692 | cell_690,clone_264
693 | cell_691,clone_95
694 | cell_692,clone_293
695 | cell_693,clone_184
696 | cell_694,clone_121
697 | cell_695,clone_89
698 | cell_696,clone_278
699 | cell_697,clone_278
700 | cell_698,clone_312
701 | cell_699,clone_49
702 | cell_700,clone_282
703 | cell_701,clone_332
704 | cell_702,clone_121
705 | cell_703,clone_338
706 | cell_704,clone_169
707 | cell_705,clone_89
708 | cell_706,clone_282
709 | cell_707,clone_91
710 | cell_708,clone_293
711 | cell_709,clone_278
712 | cell_710,clone_338
713 | cell_711,clone_264
714 | cell_712,clone_283
715 | cell_713,clone_293
716 | cell_714,clone_282
717 | cell_715,clone_87
718 | cell_716,clone_160
719 | cell_717,clone_212
720 | cell_718,clone_127
721 | cell_719,clone_322
722 | cell_720,clone_267
723 | cell_721,clone_311
724 | cell_722,clone_86
725 | cell_723,clone_328
726 | cell_724,clone_253
727 | cell_725,clone_25
728 | cell_726,clone_331
729 | cell_727,clone_235
730 | cell_728,clone_211
731 | cell_729,clone_127
732 | cell_730,clone_107
733 | cell_731,clone_337
734 | cell_732,clone_127
735 | cell_733,clone_17
736 | cell_734,clone_103
737 | cell_735,clone_257
738 | cell_736,clone_257
739 | cell_737,clone_51
740 | cell_738,clone_253
741 | cell_739,clone_26
742 | cell_740,clone_247
743 | cell_741,clone_233
744 | cell_742,clone_214
745 | cell_743,clone_267
746 | cell_744,clone_327
747 | cell_745,clone_72
748 | cell_746,clone_188
749 | cell_747,clone_328
750 | cell_748,clone_259
751 | cell_749,clone_245
752 | cell_750,clone_151
753 | cell_751,clone_103
754 | cell_752,clone_191
755 | cell_753,clone_196
756 | cell_754,clone_257
757 | cell_755,clone_22
758 | cell_756,clone_272
759 | cell_757,clone_153
760 | cell_758,clone_135
761 | cell_759,clone_120
762 | cell_760,clone_257
763 | cell_761,clone_21
764 | cell_762,clone_162
765 | cell_763,clone_140
766 | cell_764,clone_103
767 | cell_765,clone_127
768 | cell_766,clone_12
769 | cell_767,clone_280
770 | cell_768,clone_191
771 | cell_769,clone_96
772 | cell_770,clone_327
773 | cell_771,clone_30
774 | cell_772,clone_127
775 | cell_773,clone_86
776 | cell_774,clone_129
777 | cell_775,clone_255
778 | cell_776,clone_196
779 | cell_777,clone_239
780 | cell_778,clone_259
781 | cell_779,clone_217
782 | cell_780,clone_259
783 |
--------------------------------------------------------------------------------
/tests/data/test_adata_preprocessed.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllonKleinLab/cospar/ca54cad8a9db9a72152ba8a8b6d67d57eace4acb/tests/data/test_adata_preprocessed.h5ad
--------------------------------------------------------------------------------
/tests/test_all.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
5 |
6 | from pathlib import Path
7 |
8 | from matplotlib import pyplot as plt
9 |
10 | from tests.context import cospar as cs
11 |
12 | # be careful not to change this global parameter
13 | selected_fates = [
14 | "Ccr7_DC",
15 | "Mast",
16 | "Meg",
17 | "pDC",
18 | "Eos",
19 | "Lymphoid",
20 | "Erythroid",
21 | "Baso",
22 | "Neutrophil",
23 | "Monocyte",
24 | ]
25 |
26 |
27 | def config(shared_datadir):
28 | cs.settings.data_path = os.path.join(shared_datadir, "..", "output")
29 | cs.settings.figure_path = os.path.join(shared_datadir, "..", "output")
30 | cs.settings.verbosity = 0 # range: 0 (error),1 (warning),2 (info),3 (hint).
31 | cs.settings.set_figure_params(
32 | format="png", figsize=[4, 3.5], dpi=25, fontsize=14, pointsize=3, dpi_save=25
33 | )
34 | cs.hf.set_up_folders() # setup the data_path and figure_path
35 |
36 |
37 | def test_load_dataset(shared_datadir):
38 | config(shared_datadir)
39 | print("-------------------------load dataset")
40 | # cs.datasets.hematopoiesis_subsampled()
41 | # cs.datasets.hematopoiesis()
42 | # cs.datasets.hematopoiesis_130K()
43 | # cs.datasets.hematopoiesis_Gata1_states()
44 | # cs.datasets.reprogramming()
45 | # cs.datasets.lung()
46 | cs.datasets.synthetic_bifurcation()
47 | # cs.datasets.reprogramming_Day0_3_28()
48 |
49 |
50 | def test_load_data_from_scratch(shared_datadir):
51 | import numpy as np
52 | import pandas as pd
53 | import scipy.io as sio
54 |
55 | config(shared_datadir)
56 | df_cell_id = pd.read_csv(os.path.join(shared_datadir, "cell_id.txt"))
57 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad")
58 | adata_orig = cs.hf.read(file_name)
59 | adata_orig = cs.pp.initialize_adata_object(
60 | adata_orig,
61 | cell_names=df_cell_id["Cell_ID"],
62 | )
63 | df_X_clone = pd.read_csv(
64 | os.path.join(shared_datadir, "clonal_data_in_table_format.txt")
65 | )
66 | cs.pp.get_X_clone(adata_orig, df_X_clone["Cell_ID"], df_X_clone["Clone_ID"])
67 | print(adata_orig.obsm["X_clone"].shape)
68 | # cs.pl.embedding(adata_orig, color="state_info")
69 |
70 |
71 | def test_preprocessing(shared_datadir):
72 | config(shared_datadir)
73 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad")
74 | adata_orig_0 = cs.hf.read(file_name)
75 | print("------------------------Test preprocessing")
76 | data_des = "test"
77 | # This is just a name to indicate this data for saving results. Can be arbitrary but should be unique to this data.
78 | X_state = adata_orig_0.X # np.array or sparse matrix, shape (n_cell, n_gene)
79 | gene_names = adata_orig_0.var_names # List of gene names, shape (n_genes,)
80 | # Clonal data matrix, np.array or sparse matrix, shape: (n_cell, n_clone)
81 | X_clone = adata_orig_0.obsm["X_clone"]
82 | # 2-d embedding, np.array, shape: (n_cell, 2)
83 | X_emb = adata_orig_0.obsm["X_emb"]
84 | # A vector of cluster id for each cell, np.array, shape: (n_cell,),
85 | state_info = adata_orig_0.obs["state_info"]
86 | # principle component matrix, np.array, shape: (n_cell, n_pcs)
87 | X_pca = adata_orig_0.obsm["X_pca"]
88 | # A vector of time info, np.array of string, shape: (n_cell,)
89 | time_info = adata_orig_0.obs["time_info"]
90 |
91 | print("------------initialize_adata_object")
92 | adata_orig = cs.pp.initialize_adata_object(
93 | X_state=X_state,
94 | gene_names=gene_names,
95 | time_info=time_info,
96 | X_clone=X_clone,
97 | data_des=data_des,
98 | )
99 |
100 | adata_orig = cs.pp.initialize_adata_object(adata=adata_orig_0, X_clone=X_clone)
101 |
102 | print("------------get_highly_variable_genes")
103 | cs.pp.get_highly_variable_genes(
104 | adata_orig,
105 | normalized_counts_per_cell=10000,
106 | min_counts=3,
107 | min_cells=3,
108 | min_gene_vscore_pctl=90,
109 | )
110 |
111 | print("------------remove_cell_cycle_correlated_genes")
112 | cs.pp.remove_cell_cycle_correlated_genes(
113 | adata_orig,
114 | cycling_gene_list=["Ube2c"],
115 | )
116 |
117 | print("------------get_X_pca")
118 | cs.pp.get_X_pca(adata_orig, n_pca_comp=40)
119 |
120 | print("------------get_X_emb")
121 | cs.pp.get_X_emb(adata_orig, n_neighbors=20, umap_min_dist=0.3)
122 |
123 | print("------------get_state_info (this modifies the state info. Need to reload")
124 | cs.pp.get_state_info(adata_orig, n_neighbors=20, resolution=0.5)
125 |
126 | plt.close("all")
127 |
128 |
129 | def test_clonal_analysis(shared_datadir):
130 | config(shared_datadir)
131 |
132 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad")
133 | adata = cs.hf.read(file_name)
134 | print("------------------------------Basic clonal analysis")
135 | print("----------barcode_heatmap")
136 | selected_times = None
137 |
138 | cs.pl.barcode_heatmap(adata, log_transform=True, selected_fates=selected_fates)
139 | plt.close("all")
140 |
141 | print("----------fate_coupling_from_clones")
142 |
143 | cs.tl.fate_coupling(adata, source="X_clone")
144 | cs.pl.fate_coupling(adata, source="X_clone")
145 |
146 | print("----------fate_hierarchy_from_clones")
147 | cs.tl.fate_hierarchy(adata, source="X_clone")
148 | cs.pl.fate_hierarchy(adata, source="X_clone")
149 | plt.close("all")
150 |
151 | print("----------clonal_fate_bias")
152 | cs.tl.clonal_fate_bias(adata, selected_fate="Neutrophil")
153 | cs.pl.clonal_fate_bias(adata)
154 | plt.close("all")
155 |
156 | print("----------clones_on_manifold")
157 | cs.pl.clones_on_manifold(adata, selected_clone_list=[1, 2, 3])
158 | plt.close("all")
159 |
160 |
161 | def test_Tmap_inference(shared_datadir):
162 | config(shared_datadir)
163 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad")
164 | adata_orig = cs.hf.read(file_name)
165 | print("------------------------------T map inference")
166 |
167 | print("---------infer_Tmap_from_one_time_clones")
168 | adata_1 = cs.tmap.infer_Tmap_from_one_time_clones(
169 | adata_orig,
170 | initial_time_points=["2"],
171 | later_time_point="4",
172 | initialize_method="OT",
173 | OT_cost="GED",
174 | smooth_array=[5, 5, 5],
175 | sparsity_threshold=0.1,
176 | )
177 |
178 | print("---------infer_Tmap_from_state_info_alone")
179 | adata_2 = cs.tmap.infer_Tmap_from_state_info_alone(
180 | adata_orig,
181 | initial_time_points=["4"],
182 | later_time_point="6",
183 | initialize_method="HighVar",
184 | HighVar_gene_pctl=85,
185 | max_iter_N=[10, 10],
186 | epsilon_converge=[0.01, 0.01],
187 | smooth_array=[5, 5, 5],
188 | sparsity_threshold=0.1,
189 | )
190 |
191 | print("---------infer_Tmap_from_clonal_info_alone")
192 | adata_3 = cs.tmap.infer_Tmap_from_clonal_info_alone(
193 | adata_orig,
194 | method="weinreb",
195 | later_time_point="6",
196 | selected_fates=selected_fates,
197 | )
198 |
199 | print("-------------------------save maps")
200 | # cs.hf.save_map(adata_3)
201 |
202 |
203 | def test_Tmap_analysis(shared_datadir):
204 | config(shared_datadir)
205 |
206 | load_pre_compute_map = False
207 | if load_pre_compute_map:
208 | # this is for fast local testing
209 | file_name = os.path.join(
210 | cs.settings.data_path,
211 | "test_MultiTimeClone_Later_FullSpace0_t*2*4*6_adata_with_transition_map.h5ad",
212 | )
213 | adata = cs.hf.read(file_name)
214 |
215 | # adata = cs.hf.read(
216 | # "/Users/shouwenwang/Dropbox (HMS)/Python/CoSpar/docs/source/data_cospar/LARRY_sp500_ranking1_MultiTimeClone_Later_FullSpace0_t*2*4*6_adata_with_transition_map.h5ad"
217 | # )
218 | else:
219 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad")
220 | adata_orig = cs.hf.read(file_name)
221 | print("---------infer_Tmap_from_multitime_clones")
222 | adata = cs.tmap.infer_Tmap_from_multitime_clones(
223 | adata_orig,
224 | clonal_time_points=["2", "4"],
225 | later_time_point="6",
226 | smooth_array=[5, 5, 5],
227 | sparsity_threshold=0.1,
228 | intraclone_threshold=0.2,
229 | max_iter_N=5,
230 | epsilon_converge=0.01,
231 | )
232 |
233 | X_clone = adata.obsm["X_clone"]
234 | print(type(X_clone))
235 |
236 | selected_fates = [
237 | "Ccr7_DC",
238 | "Mast",
239 | "Meg",
240 | "pDC",
241 | "Eos",
242 | "Baso",
243 | "Lymphoid",
244 | "Erythroid",
245 | "Neutrophil",
246 | "Monocyte",
247 | ]
248 |
249 | cs.tl.fate_coupling(adata, source="transition_map")
250 | cs.pl.fate_coupling(adata, source="transition_map")
251 |
252 | cs.tl.fate_hierarchy(adata, source="transition_map")
253 | cs.pl.fate_hierarchy(adata, source="transition_map")
254 |
255 | selected_fates = [
256 | "Neutrophil",
257 | "Monocyte",
258 | ]
259 | cs.tl.fate_map(adata, source="transition_map", selected_fates=selected_fates)
260 | cs.pl.fate_map(
261 | adata,
262 | source="transition_map",
263 | selected_fates=selected_fates,
264 | show_histogram=True,
265 | selected_times="4",
266 | )
267 |
268 | cs.tl.fate_potency(
269 | adata, source="transition_map", selected_fates=selected_fates, fate_count=True
270 | )
271 | cs.pl.fate_potency(
272 | adata,
273 | source="transition_map",
274 | show_histogram=True,
275 | selected_times="4",
276 | )
277 |
278 | selected_fates = [
279 | "Neutrophil",
280 | "Monocyte",
281 | ]
282 | cs.tl.fate_bias(
283 | adata,
284 | source="transition_map",
285 | selected_fates=selected_fates,
286 | sum_fate_prob_thresh=0.01,
287 | )
288 | cs.pl.fate_bias(
289 | adata,
290 | source="transition_map",
291 | show_histogram=True,
292 | selected_times="4",
293 | )
294 | cs.pl.fate_bias(
295 | adata,
296 | source="transition_map",
297 | show_histogram=True,
298 | selected_fates=selected_fates,
299 | selected_times="4",
300 | )
301 |
302 | selected_fates = [
303 | "Neutrophil",
304 | "Monocyte",
305 | ]
306 | cs.tl.progenitor(
307 | adata,
308 | source="transition_map",
309 | selected_fates=selected_fates,
310 | sum_fate_prob_thresh=0.01,
311 | avoid_target_states=True,
312 | )
313 | cs.pl.progenitor(adata, source="transition_map", selected_times="4")
314 |
315 | cs.tl.iterative_differentiation(
316 | adata,
317 | source="transition_map",
318 | selected_fates="Neutrophil",
319 | apply_time_constaint=False,
320 | )
321 | cs.pl.iterative_differentiation(
322 | adata,
323 | source="transition_map",
324 | )
325 |
326 | cs.pl.gene_expression_dynamics(
327 | adata, selected_fate="Neutrophil", gene_name_list=["Gata1"]
328 | )
329 |
330 | gene_list = [
331 | "Mpo",
332 | "Elane",
333 | "Gstm1",
334 | "Mt1",
335 | "S100a8",
336 | "Prtn3",
337 | "Gfi1",
338 | "Dstn",
339 | "Cd63",
340 | "Ap3s1",
341 | "H2-Aa",
342 | "H2-Eb1",
343 | "Ighm",
344 | ]
345 |
346 | selected_fates = [
347 | "Neutrophil",
348 | "Monocyte",
349 | ["Baso", "Eos", "Erythroid", "Mast", "Meg"],
350 | ["pDC", "Ccr7_DC", "Lymphoid"],
351 | ]
352 | renames = ["Neu", "Mon", "Meg-Ery-MBaE", "Lym-Dc"]
353 |
354 | cs.pl.gene_expression_heatmap(
355 | adata,
356 | selected_genes=gene_list,
357 | selected_fates=selected_fates,
358 | rename_fates=renames,
359 | fig_width=12,
360 | )
361 |
362 | cs.pl.gene_expression_on_manifold(
363 | adata, selected_genes=["Gata1", "Elane"], savefig=True
364 | )
365 |
366 | df1, df2 = cs.tl.differential_genes(
367 | adata, cell_group_A="Neutrophil", cell_group_B="Monocyte"
368 | )
369 | import numpy as np
370 |
371 | state_info = np.array(adata.obs["state_info"])
372 | df1, df2 = cs.tl.differential_genes(
373 | adata,
374 | cell_group_A=(state_info == "Neutrophil"),
375 | cell_group_B=(state_info == "Monocyte"),
376 | )
377 | print(df1)
378 |
379 | cs.pl.single_cell_transition(
380 | adata, selected_state_id_list=[1, 2], savefig=True, map_backward=False
381 | )
382 |
383 |
384 | def test_simulated_data():
385 | print("---------- bifurcation model ------------")
386 | L = 10
387 | adata = cs.simulate.bifurcation_model(t1=2, M=20, L=L)
388 | adata = cs.tmap.infer_Tmap_from_multitime_clones(
389 | adata, smooth_array=[10, 10, 10], compute_new=True
390 | )
391 | Tmap = adata.uns["transition_map"]
392 | state_info = adata.obs["state_info"]
393 | cell_id_t1 = adata.uns["Tmap_cell_id_t1"]
394 | cell_id_t2 = adata.uns["Tmap_cell_id_t2"]
395 | correlation_cospar = (
396 | cs.simulate.quantify_correlation_with_ground_truth_fate_bias_BifurcationModel(
397 | Tmap, state_info, cell_id_t1, cell_id_t2
398 | )
399 | )
400 | print(
401 | f"Fate bias correlation from the predicted transition map: {correlation_cospar:.3f}"
402 | )
403 |
404 | print("---------------Linear differentiation---------------")
405 | adata = cs.simulate.linear_differentiation_model(
406 | Nt1=50, progeny_N=1, used_clone_N=10, always_simulate_data=True
407 | )
408 | adata = cs.tmap.infer_Tmap_from_multitime_clones(
409 | adata, smooth_array=[10, 10, 10], compute_new=True
410 | )
411 | Tmap = adata.uns["transition_map"]
412 | state_info = adata.obs["state_info"]
413 | cell_id_t1 = adata.uns["Tmap_cell_id_t1"]
414 | cell_id_t2 = adata.uns["Tmap_cell_id_t2"]
415 |
416 | X_t1 = adata.obsm["X_orig"][cell_id_t1]
417 | X_t2 = adata.obsm["X_orig"][cell_id_t2]
418 | TPR_cospar = cs.simulate.quantify_transition_peak_TPR_LinearDifferentiation(
419 | Tmap, X_t1, X_t2
420 | )
421 | print(f"True positive rate for the predicted transition map: {TPR_cospar:.3f}")
422 |
423 |
424 | def test_clean_up():
425 | print("---------Clean up")
426 | if Path(cs.settings.data_path).is_dir():
427 | os.system("rm -r output")
428 |
429 |
430 | # os.chdir(os.path.dirname(__file__))
431 | # cs.settings.verbosity = 3 # range: 0 (error),1 (warning),2 (info),3 (hint).
432 | # # test_load_dataset("data")
433 | # # test_preprocessing("data")
434 | # # test_load_data_from_scratch("data")
435 | # # test_clonal_analysis("data")
436 | # # test_Tmap_inference("data")
437 | # test_Tmap_analysis("data")
438 |
--------------------------------------------------------------------------------