├── .github └── workflows │ └── on-push.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── LICENSE ├── README.rst ├── cospar ├── __init__.py ├── datasets.py ├── help_functions │ ├── __init__.py │ ├── _docs.py │ └── _help_functions_CoSpar.py ├── hf.py ├── logging.py ├── pl.py ├── plotting │ ├── __init__.py │ ├── _clone.py │ ├── _gene.py │ ├── _map.py │ └── _utils.py ├── pp.py ├── preprocessing │ ├── __init__.py │ └── _preprocessing.py ├── settings.py ├── simulate.py ├── tl.py ├── tmap │ ├── __init__.py │ ├── _tmap_core.py │ ├── _utils.py │ ├── map_reconstruction.py │ └── optimal_transport.py └── tool │ ├── __init__.py │ ├── _clone.py │ ├── _gene.py │ ├── _map.py │ └── _utils.py ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── .ipynb_checkpoints │ ├── 20201121-Bifurcation_model_staticBC-CoSapr_basic-checkpoint.ipynb │ ├── 20210120-Bifurcation_model_dynamic_barcoding-checkpoint.ipynb │ ├── 20210120-Bifurcation_model_static_barcoding-checkpoint.ipynb │ ├── 20210121_all_hematopoietic_data-checkpoint.ipynb │ ├── 20210121_cospar_tutorial-checkpoint.ipynb │ ├── 20210121_lung_data-checkpoint.ipynb │ ├── 20210121_reprogramming_data_merge_tags-checkpoint.ipynb │ ├── 20210121_reprogramming_data_no_merge_tags-checkpoint.ipynb │ ├── 20210121_reprogramming_dynamic_barcoding-checkpoint.ipynb │ └── 20210121_subsampled_hematopoietic_data-checkpoint.ipynb │ ├── 20210120_bifurcation_model_static_barcoding.ipynb │ ├── 20210121_all_hematopoietic_data_v3.ipynb │ ├── 20210121_lung_data_v2.ipynb │ ├── 20210121_reprogramming_static_barcoding_v2.ipynb │ ├── 20210602_loading_data.ipynb │ ├── 20211010_clonal_analysis.ipynb │ ├── 20211010_map_analysis.ipynb │ ├── 20211010_map_inference.ipynb │ ├── 20211010_preprocessing.ipynb │ ├── 20220402_simulate_differentiation.ipynb │ ├── _ext │ └── edit_on_github.py │ ├── _static │ ├── colab-badge.svg │ ├── custom.css │ └── nbviewer-badge.svg │ ├── _templates │ └── autosummary │ │ ├── base.rst │ │ └── class.rst │ ├── about.rst │ ├── api.rst │ ├── conf.py │ ├── getting_started.rst │ ├── index.rst │ ├── installation.rst │ └── release_note.rst ├── environment.yml ├── pypi.rst ├── pyproject.toml ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── context.py ├── data ├── cell_id.txt ├── clonal_data_in_table_format.txt └── test_adata_preprocessed.h5ad └── test_all.py /.github/workflows/on-push.yml: -------------------------------------------------------------------------------- 1 | name: on-push 2 | on: 3 | push: 4 | branches: 5 | - master 6 | tags: 7 | - '*' 8 | pull_request: 9 | branches: 10 | - master 11 | workflow_dispatch: 12 | 13 | jobs: 14 | test-cospar: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | os: ["ubuntu-latest"] 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: cache conda 23 | uses: actions/cache@v2 24 | env: 25 | CACHE_NUMBER: 1 # bump to reset cache 26 | with: 27 | path: ~/conda_pkgs_dir 28 | key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }} 29 | - uses: conda-incubator/setup-miniconda@v2 30 | with: 31 | miniconda-version: "latest" 32 | channels: conda-forge,bioconda,defaults 33 | auto-update-conda: false 34 | activate-environment: cospar 35 | environment-file: environment.yml 36 | use-only-tar-bz2: true 37 | mamba-version: "*" 38 | - name: Install cospar 39 | shell: bash -l {0} 40 | run: | 41 | python setup.py install 42 | - name: Run pre-commit 43 | uses: pre-commit/action@v2.0.0 44 | - name: Test cospar 45 | shell: bash -l {0} 46 | run: | 47 | pytest -v --cov=cospar --cov-report=term-missing 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | cospar.egg-info/ 4 | docs/build/ 5 | docs/source/cospar.* 6 | /**/.DS_Store 7 | /**/*.py[cod] 8 | /**/__pycache__ 9 | .eggs/ 10 | tests/output/ 11 | .coverage* 12 | .pytest_cache 13 | /**/.ipynb_checkpoints 14 | /**/__MACOSX/ 15 | docs/source/*cospar*/ 16 | docs/source/test_data* 17 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v3.2.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | - id: check-added-large-files 9 | args: ['--maxkb=10000'] 10 | - repo: https://github.com/psf/black 11 | rev: 22.3.0 12 | hooks: 13 | - id: black 14 | language_version: python3 15 | - repo: https://github.com/timothycrosley/isort 16 | rev: 5.8.0 17 | hooks: 18 | - id: isort 19 | args: ["--profile", "black"] 20 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | build: 2 | image: latest 3 | 4 | # Build documentation in the docs/ directory with Sphinx 5 | sphinx: 6 | configuration: docs/source/conf.py 7 | 8 | python: 9 | version: 3.8 10 | install: 11 | - requirements: docs/requirements.txt 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2020 Shou-Wen Wang 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |PyPI| |PyPIDownloads| |Docs| 2 | 3 | 4 | CoSpar - dynamic inference by integrating state and lineage information 5 | ======================================================================= 6 | 7 | 🔴 🔴 Notice: Package relocation 🔴 🔴 8 | ------------------------------------- 9 | Effective on April 1st 2023, Shou-Wen Wang is leaving the Klein lab to start `his own group at Westlake University `_, and he will no longer maintain this repository. Further development of CoSpar will continue in his own lab under this respository `https://github.com/ShouWenWang-Lab/cospar `_. Please **reach out there for any issues related to CoSpar**. 10 | 11 | ======================================================================= 12 | 13 | .. image:: https://user-images.githubusercontent.com/4595786/104988296-b987ce00-59e5-11eb-8dbe-a463b355a9fd.png 14 | :width: 300px 15 | :align: left 16 | 17 | **CoSpar** is a toolkit for dynamic inference from lineage-traced single cells. 18 | The methods are based on 19 | `Wang et al. Nat. Biotech. (2022) `_. 20 | 21 | Dynamic inference based on single-cell state measurement alone requires serious simplifications. On the other hand, direct dynamic measurement via lineage tracing only captures partial information and its interpretation is challenging. CoSpar integrates both state and lineage information to infer a finite-time transition map of a development/differentiation system. It gains superior robustness and accuracy by exploiting both the local coherence and sparsity of differentiation transitions, i.e., neighboring initial states share similar yet sparse fate outcomes. Building around the anndata_ object, CoSpar provides an integrated analysis framework for datasets with both state and lineage information. When only state information is available, CoSpar also improves upon existing dynamic inference methods by imposing sparsity and coherence. It offers essential toolkits for analyzing lineage data, state information, or their integration. 22 | 23 | See ``_ for documentation and tutorials. 24 | 25 | Recorded talks 26 | -------- 27 | `Jun 1: Single-Cell Data Science 2022 `_. This is a 20-min short talk focusing more on the utility of CoSpar: `talk video `_ 28 | 29 | `Oct 19, 2022: Invited MIA talk at Broad Institute `_. This is an one-hour talk focusing on the Machine Learning part of CoSpar: `talk video `_. The talk slides can be found `here `_. 30 | 31 | Reference 32 | --------- 33 | `S.-W. Wang*, M. Herriges, K. Hurley, D. Kotton, A. M. Klein*, CoSpar identifies early cell fate biases from single cell transcriptomic and lineage information, Nat. Biotech. (2022) `_. [* corresponding authors] 34 | 35 | Support 36 | ------- 37 | Feel free to submit an `issue `_ 38 | or send us an `email `_. 39 | Your help to improve CoSpar is highly appreciated. 40 | 41 | 42 | 43 | .. _anndata: https://anndata.readthedocs.io 44 | 45 | .. |PyPI| image:: https://img.shields.io/pypi/v/cospar.svg 46 | :target: https://pypi.org/project/cospar 47 | 48 | .. |PyPIDownloads| image:: https://pepy.tech/badge/cospar 49 | :target: https://pepy.tech/project/cospar 50 | 51 | .. |Docs| image:: https://readthedocs.org/projects/cospar/badge/?version=latest 52 | :target: https://cospar.readthedocs.io 53 | -------------------------------------------------------------------------------- /cospar/__init__.py: -------------------------------------------------------------------------------- 1 | """CoSpar - dynamic inference by integrating transcriptome and lineage information""" 2 | 3 | __version__ = "0.3.0" 4 | from . import datasets, hf, logging, pl, pp, settings, simulate, tl, tmap 5 | -------------------------------------------------------------------------------- /cospar/datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path, PurePath 3 | 4 | from scanpy import read 5 | 6 | from . import logging as logg 7 | from . import settings 8 | 9 | url_prefix_0 = "https://kleintools.hms.harvard.edu/tools/downloads/cospar" 10 | 11 | 12 | def synthetic_bifurcation(data_des="bifur"): 13 | """ 14 | Synthetic clonal dataset with static barcoding. 15 | 16 | We simulated a differentiation process over a bifurcation fork. 17 | Cells are barcoded in the beginning, and the barcodes remain unchanged. 18 | In the simulation, we resample clones over time, 19 | like the experimental design to obtain the hematopoietic dataset 20 | or the reprogramming dataset. The dataset has two time points. 21 | 22 | Parameters 23 | ---------- 24 | data_des: `str` 25 | A key to label this dataset. 26 | """ 27 | 28 | data_path = settings.data_path 29 | figure_path = settings.figure_path 30 | # data_name='bifurcation_static_BC_adata_preprocessed.h5ad' 31 | data_name = "bifur_adata_preprocessed.h5ad" 32 | return load_data_core(data_path, figure_path, data_name, data_des) 33 | 34 | 35 | def raw_data_for_import_exercise(): 36 | """ 37 | The test dataset for demonstrating how to import your own data 38 | 39 | """ 40 | 41 | url = "https://github.com/ShouWenWang-Lab/cospar/files/12036732/test_data.zip" 42 | path = "test_data.zip" 43 | path = Path(path) 44 | _check_datafile_present_and_download(path, backup_url=url) 45 | 46 | import zipfile 47 | 48 | with zipfile.ZipFile(f"test_data.zip", "r") as zip_ref: 49 | zip_ref.extractall() 50 | 51 | 52 | # def synthetic_bifurcation_dynamic_BC(data_des='bifur_conBC'): 53 | # """ 54 | # Synthetic clonal dataset with dynamic barcoding. 55 | 56 | # We simulated a differentiation process over a bifurcation fork. 57 | # Cells are barcoded, and the barcodes could accumulate mutations, which we call 58 | # `dynamic barcoding`. In the simulation, we resample clones over time, 59 | # like the experimental design to obtain the hematopoietic dataset 60 | # or the reprogramming dataset. The dataset has two time points. 61 | 62 | # Parameters 63 | # ---------- 64 | # data_des: `str` 65 | # A key to label this dataset. 66 | # """ 67 | 68 | # data_path=settings.data_path 69 | # figure_path=settings.figure_path 70 | # data_name='bifurcation_dynamic_BC_adata_preprocessed.h5ad' 71 | # return load_data_core(data_path,figure_path,data_name,data_des) 72 | 73 | 74 | def reprogramming(data_des="CellTagging"): 75 | """ 76 | The reprogramming dataset from 77 | 78 | * Biddy, B. A. et al. `Single-cell mapping of lineage and identity in direct reprogramming`. Nature 564, 219–224 (2018). 79 | 80 | This dataset has multiple time points for both the clones and the state measurements. 81 | The cells are barcoded over 3 rounds during the entire differentiation process. 82 | There are multiple ways to assemble the barcodes on day 0, day 3, and day 13 83 | into a clonal ID. Below, we provide three variants: 84 | 85 | * Concatenate barcodes on day 0 and day 13, as in the original 86 | analysis (adata.obsm['X_clone_Concat_D0D3'], the default); 87 | 88 | * Concatenate barcodes on day 0, day 3, and day 13 (adata.obsm['X_clone_Concat_D0D3D13']); 89 | 90 | * No concatenation; each cell has up to 3 barcodes (adata.obsm['X_clone_NonConcat_D0D3D13']). 91 | 92 | The last choice keeps the nested clonal structure in the data. 93 | You can choose any one of the clonal arrangement for downstream analysis, 94 | by setting `adata_orig.obsm['X_clone']=adata_orig.obsm['X_clone_Concat_D0D3']`. 95 | The three clonal arrangements give very similar fate prediction. 96 | 97 | Parameters 98 | ---------- 99 | data_des: `str` 100 | A key to label this dataset. 101 | """ 102 | 103 | data_path = settings.data_path 104 | figure_path = settings.figure_path 105 | data_name = "CellTagging_adata_preprocessed.h5ad" 106 | return load_data_core(data_path, figure_path, data_name, data_des) 107 | 108 | 109 | def reprogramming_Day0_3_28(data_des="Reprog_128"): 110 | """ 111 | The reprogramming dataset from 112 | 113 | * Biddy, B. A. et al. `Single-cell mapping of lineage and identity in direct reprogramming`. Nature 564, 219–224 (2018). 114 | 115 | This dataset has time points on day 0, day 3, and day 28. Only day 28 has clonal information. 116 | The cells are barcoded over 3 rounds during the entire differentiation process. 117 | There are multiple ways to assemble the barcodes on day 0, day 3, and day 13 118 | into a clonal ID. Below, we concatenate barcodes on day 0 and day 13, 119 | as in the original analysis. 120 | 121 | 122 | Parameters 123 | ---------- 124 | data_des: `str` 125 | A key to label this dataset. 126 | """ 127 | 128 | data_path = settings.data_path 129 | figure_path = settings.figure_path 130 | data_name = "Reprog_128_D0D3_adata_preprocessed.h5ad" 131 | return load_data_core(data_path, figure_path, data_name, data_des) 132 | 133 | 134 | # def reprogramming_static_BC(data_des='CellTagging'): 135 | # """ 136 | # The reprogramming dataset from 137 | 138 | # * Biddy, B. A. et al. `Single-cell mapping of lineage and identity in direct reprogramming`. Nature 564, 219–224 (2018). 139 | 140 | # This dataset has multiple time points for both the clones and the state measurements. 141 | 142 | # The cells are barcoded over 3 rounds during the entire differentiation process. 143 | # We combine up to 3 tags from the same cell into a single clonal label in representing 144 | # the X_clone matrix. In this representation, each cell has at most one clonal label. 145 | # Effectively, we convert the barcodes into static labels that do not carry temporal information. 146 | 147 | # Parameters 148 | # ---------- 149 | # data_des: `str` 150 | # A key to label this dataset. 151 | # """ 152 | 153 | # data_path=settings.data_path 154 | # figure_path=settings.figure_path 155 | # data_name='CellTagging_ConcatenateClone_adata_preprocessed.h5ad' 156 | # return load_data_core(data_path,figure_path,data_name,data_des) 157 | 158 | # def reprogramming_dynamic_BC(data_des='CellTagging_NoConcat'): 159 | # """ 160 | # The reprogramming dataset from 161 | 162 | # * Biddy, B. A. et al. `Single-cell mapping of lineage and identity in direct reprogramming`. Nature 564, 219–224 (2018). 163 | 164 | # This dataset has multiple time points for both the clones and the state measurements. 165 | 166 | # The cells are barcoded over 3 rounds during the entire differentiation process. 167 | # We treat barcode tags from each round as independent clonal label here. In this 168 | # representation, each cell can have multiple clonal labels at different time points. 169 | 170 | # Parameters 171 | # ---------- 172 | # data_des: `str` 173 | # A key to label this dataset. 174 | # """ 175 | 176 | # data_path=settings.data_path 177 | # figure_path=settings.figure_path 178 | # data_name='CellTagging_NoConcat_adata_preprocessed.h5ad' 179 | # return load_data_core(data_path,figure_path,data_name,data_des) 180 | 181 | 182 | def lung(data_des="Lung"): 183 | """ 184 | The direct lung differentiation dataset from 185 | 186 | * Hurley, K. et al. Cell Stem Cell (2020) doi:10.1016/j.stem.2019.12.009. 187 | 188 | This dataset has multiple time points for the state manifold, but only one time point 189 | for the clonal observation on day 27. 190 | 191 | Parameters 192 | ---------- 193 | data_des: `str` 194 | A key to label this dataset. 195 | """ 196 | 197 | data_path = settings.data_path 198 | figure_path = settings.figure_path 199 | data_name = "Lung_pos17_21_D27_adata_preprocessed.h5ad" 200 | return load_data_core(data_path, figure_path, data_name, data_des) 201 | 202 | 203 | def hematopoiesis(data_des="LARRY"): 204 | """ 205 | The hematopoiesis data set from 206 | 207 | * Weinreb, C., Rodriguez-Fraticelli, A., Camargo, F. D. & Klein, A. M. Science 367, (2020) 208 | 209 | This dataset has 3 time points for both the clonal and state measurements. It only contains cells with clonal 210 | labels, which has ~50000 cells. Running the whole pipeline for the first time could take several hours in 211 | a standard personal computer. 212 | 213 | Parameters 214 | ---------- 215 | data_des: `str` 216 | A key to label this dataset. 217 | """ 218 | 219 | data_path = settings.data_path 220 | figure_path = settings.figure_path 221 | data_name = "LARRY_adata_preprocessed.h5ad" 222 | return load_data_core(data_path, figure_path, data_name, data_des) 223 | 224 | 225 | def hematopoiesis_130K(data_des="LARRY"): 226 | """ 227 | The hematopoiesis data set from 228 | 229 | * Weinreb, C., Rodriguez-Fraticelli, A., Camargo, F. D. & Klein, A. M. Science 367, (2020) 230 | 231 | This dataset has 3 time points for both the clonal and state measurements. It includes 232 | cells with or without clonal labels, which has a total of ~130K cells. 233 | 234 | Parameters 235 | ---------- 236 | data_des: `str` 237 | A key to label this dataset. 238 | """ 239 | 240 | data_path = settings.data_path 241 | figure_path = settings.figure_path 242 | data_name = "Complete_LARRY_dataset_adata_preprocessed.h5ad" 243 | return load_data_core(data_path, figure_path, data_name, data_des) 244 | 245 | 246 | def hematopoiesis_Gata1_states(data_des="LARRY_Gata1_lineage"): 247 | """ 248 | All of the hematopoiesis data set from 249 | 250 | * Weinreb, C., Rodriguez-Fraticelli, A., Camargo, F. D. & Klein, A. M. Science 367, (2020) 251 | 252 | This dataset includes non-clonally-labeled states that express Gata1. In total, it has ~38K cells. 253 | 254 | Parameters 255 | ---------- 256 | data_des: `str` 257 | A key to label this dataset. 258 | """ 259 | 260 | data_path = settings.data_path 261 | figure_path = settings.figure_path 262 | data_name = "LARRY_Gata1_lineage_adata_preprocessed.h5ad" 263 | return load_data_core(data_path, figure_path, data_name, data_des) 264 | 265 | 266 | def hematopoiesis_subsampled(data_des="LARRY_sp500_ranking1"): 267 | """ 268 | Top 15% most heterogeneous clones of the hematopoiesis data set from 269 | 270 | * Weinreb, C., Rodriguez-Fraticelli, A., Camargo, F. D. & Klein, A. M. Science 367, (2020) 271 | 272 | This dataset has 3 time points for both the clones and the state measurements. 273 | This sub-sampled data better illustrates the power of CoSpar in robstly 274 | inferring differentiation dynamics from a noisy clonal dataset. Also, it 275 | is smaller and thus much faster to analyze. 276 | 277 | Parameters 278 | ---------- 279 | data_des: `str` 280 | A key to label this dataset. 281 | """ 282 | 283 | data_path = settings.data_path 284 | figure_path = settings.figure_path 285 | data_name = "LARRY_sp500_ranking1_adata_preprocessed.h5ad" 286 | return load_data_core(data_path, figure_path, data_name, data_des) 287 | 288 | 289 | def load_data_core( 290 | data_path, figure_path, data_name, data_des, url_prefix=url_prefix_0 291 | ): 292 | url = f"{url_prefix}/{data_name}" 293 | path = os.path.join(data_path, data_name) 294 | path = Path(path) 295 | figure_path = Path(figure_path) 296 | 297 | if not path.parent.is_dir(): 298 | logg.info(f"creating directory {path.parent} for saving data") 299 | path.parent.mkdir(parents=True) 300 | 301 | if not figure_path.is_dir(): 302 | logg.info(f"creating directory {figure_path} for saving figures") 303 | figure_path.mkdir(parents=True) 304 | 305 | # print(url) 306 | status = _check_datafile_present_and_download(path, backup_url=url) 307 | if status: 308 | adata = read(path) 309 | # adata.uns['data_path']=[str(data_path)] 310 | # adata.uns['figure_path']=[str(figure_path)] 311 | adata.uns["data_des"] = [str(data_des)] 312 | return adata 313 | else: 314 | logg.error("Error, files do not exist") 315 | return None 316 | 317 | 318 | def _check_datafile_present_and_download(path, backup_url=None): 319 | """Check whether the file is present, otherwise download.""" 320 | path = Path(path) 321 | if path.is_file(): 322 | return True 323 | if backup_url is None: 324 | return False 325 | logg.info( 326 | f"try downloading from url\n{backup_url}\n" 327 | "... this may take a while but only happens once" 328 | ) 329 | if not path.parent.is_dir(): 330 | logg.info(f"creating directory {path.parent} for saving data") 331 | path.parent.mkdir(parents=True) 332 | 333 | _download(backup_url, path) 334 | return True 335 | 336 | 337 | def _download(url: str, path: Path): 338 | try: 339 | import ipywidgets 340 | from tqdm.auto import tqdm 341 | except ImportError: 342 | from tqdm import tqdm 343 | 344 | from urllib.request import Request, urlopen 345 | 346 | blocksize = 1024 * 8 347 | blocknum = 0 348 | 349 | try: 350 | with urlopen(Request(url, headers={"User-agent": "scanpy-user"})) as resp: 351 | total = resp.info().get("content-length", None) 352 | with tqdm( 353 | unit="B", 354 | unit_scale=True, 355 | miniters=1, 356 | unit_divisor=1024, 357 | total=total if total is None else int(total), 358 | ) as t, path.open("wb") as f: 359 | block = resp.read(blocksize) 360 | while block: 361 | f.write(block) 362 | blocknum += 1 363 | t.update(len(block)) 364 | block = resp.read(blocksize) 365 | 366 | except (KeyboardInterrupt, Exception): 367 | # Make sure file doesn’t exist half-downloaded 368 | if path.is_file(): 369 | path.unlink() 370 | raise 371 | -------------------------------------------------------------------------------- /cospar/help_functions/__init__.py: -------------------------------------------------------------------------------- 1 | from ._help_functions_CoSpar import * 2 | -------------------------------------------------------------------------------- /cospar/help_functions/_docs.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | 4 | def _doc_params(**kwds): 5 | """\ 6 | Docstrings should start with "\" in the first line for proper formatting. 7 | """ 8 | 9 | def dec(obj): 10 | obj.__orig_doc__ = obj.__doc__ 11 | obj.__doc__ = dedent(obj.__doc__).format_map(kwds) 12 | return obj 13 | 14 | return dec 15 | 16 | 17 | selected_fates = """\ 18 | selected_fates: `list` 19 | List of cluster ids consistent with adata.obs['state_info']. 20 | It allows a nested structure. If so, we merge clusters within 21 | each sub-list into a mega-fate cluster.\ 22 | """ 23 | 24 | map_source = """\ 25 | source: `str` 26 | The transition map to be used for plotting: {'transition_map', 27 | 'intraclone_transition_map',...}. The actual available 28 | map depends on adata itself, which can be accessed at adata.uns['available_map']\ 29 | """ 30 | 31 | map_backward = """\ 32 | map_backward: `bool`, optional (default: True) 33 | If `map_backward=True`, show fate properties of initial cell states :math:`i`; 34 | otherwise, show progenitor properties of later cell states :math:`j`. 35 | This is used for building the fate map :math:`P_i(\mathcal{C})`. See :func:`.fate_map`.\ 36 | """ 37 | 38 | fate_method = """\ 39 | method: `str`, optional (default: 'norm-sum') 40 | Method to obtain the fate probability map :math:`P_i(\mathcal{C})` towards a set 41 | of states annotated with fate :math:`\mathcal{C}`. Available options: 42 | {'sum', 'norm-sum'}. See :func:`.fate_map`.\ 43 | """ 44 | 45 | sum_fate_prob_thresh = """\ 46 | sum_fate_prob_thresh: `float`, optional (default: 0.05) 47 | The fate bias of a state is plotted only when it has a cumulative fate 48 | probability to the combined cluster (A+B) larger than this threshold, 49 | i.e., P(i->A)+P(i+>B) > sum_fate_prob_thresh.\ 50 | """ 51 | 52 | selected_times = """\ 53 | selected_times: `list`, optional (default: all) 54 | A list of time points to further restrict the cell states to plot. 55 | The default choice is not to constrain the cell states to show.\ 56 | """ 57 | 58 | all_source = """\ 59 | source: `str` 60 | Choices: {'X_clone', 'transition_map', 61 | 'intraclone_transition_map',...}. If set to be 'clone', use only the clonal 62 | information. If set to be any of the precomputed transition map, use the 63 | transition map to compute the fate coupling. The actual available 64 | map depends on adata itself, which can be accessed at adata.uns['available_map']\ 65 | """ 66 | 67 | 68 | rename_fates = """\ 69 | rename_fates: `list`, optional (default: None) 70 | Provide new names in substitution of names in selected_fates. 71 | For this to be effective, the new name list needs to have names 72 | in exact correspondence to those in the old list.\ 73 | """ 74 | 75 | 76 | background = """\ 77 | background: `bool`, optional (default: True) 78 | If true, plot all cell states (t1+t2) in grey as the background.\ 79 | """ 80 | 81 | show_histogram = """\ 82 | show_histogram: `bool`, optional (default: False) 83 | If true, show the distribution of inferred fate probability.\ 84 | """ 85 | 86 | plot_target_state = """\ 87 | plot_target_state: `bool`, optional (default: True) 88 | If true, highlight the target clusters as defined in selected_fates.\ 89 | """ 90 | 91 | color_bar = """\ 92 | color_bar: `bool`, optional (default: True) 93 | plot the color bar if True.\ 94 | """ 95 | 96 | auto_color_scale = """\ 97 | auto_color_scale: 98 | True: automatically rescale the color range to match the value range.\ 99 | """ 100 | 101 | target_transparency = """\ 102 | target_transparency: `float`, optional (default: 0.2) 103 | It controls the transparency of the plotted target cell states, 104 | for visual effect. Range: [0,1].\ 105 | """ 106 | 107 | figure_index = """\ 108 | figure_index: `str`, optional (default: '') 109 | String index for annotate filename for saved figures. Used to distinuigh plots from different conditions.\ 110 | """ 111 | 112 | mask = """\ 113 | mask: `np.array`, optional (default: None) 114 | A boolean array for available cell states. It should has the length as adata.shape[0]. 115 | Especially useful to constrain the states to show fate bias.\ 116 | """ 117 | 118 | color_map = """\ 119 | color_map: 120 | The color map (a matplotlib.pyplot.cm object) to visualize the result.\ 121 | """ 122 | -------------------------------------------------------------------------------- /cospar/hf.py: -------------------------------------------------------------------------------- 1 | from .help_functions import * 2 | -------------------------------------------------------------------------------- /cospar/logging.py: -------------------------------------------------------------------------------- 1 | """Logging and Profiling 2 | """ 3 | 4 | from datetime import datetime 5 | from platform import python_version 6 | from sys import stdout 7 | from time import time as get_time 8 | 9 | from anndata.logging import get_memory_usage, print_memory_usage 10 | 11 | from . import settings 12 | 13 | _VERBOSITY_LEVELS_FROM_STRINGS = {"error": 0, "warn": 1, "info": 2, "hint": 3} 14 | 15 | 16 | def info(*args, **kwargs): 17 | return msg(*args, v="info", **kwargs) 18 | 19 | 20 | def error(*args, **kwargs): 21 | args = ("Error:",) + args 22 | return msg(*args, v="error", **kwargs) 23 | 24 | 25 | def warn(*args, **kwargs): 26 | args = ("WARNING:",) + args 27 | return msg(*args, v="warn", **kwargs) 28 | 29 | 30 | def hint(*args, **kwargs): 31 | return msg(*args, v="hint", **kwargs) 32 | 33 | 34 | def _settings_verbosity_greater_or_equal_than(v): 35 | if isinstance(settings.verbosity, str): 36 | settings_v = _VERBOSITY_LEVELS_FROM_STRINGS[settings.verbosity] 37 | else: 38 | settings_v = settings.verbosity 39 | return settings_v >= v 40 | 41 | 42 | def msg( 43 | *msg, 44 | v=None, 45 | time=False, 46 | memory=False, 47 | reset=False, 48 | end="\n", 49 | no_indent=False, 50 | t=None, 51 | m=None, 52 | r=None, 53 | ): 54 | """Write message to logging output. 55 | Log output defaults to standard output but can be set to a file 56 | by setting `sc.settings.log_file = 'mylogfile.txt'`. 57 | v : {'error', 'warn', 'info', 'hint'} or int, (default: 4) 58 | 0/'error', 1/'warn', 2/'info', 3/'hint', 4, 5, 6... 59 | time, t : bool, optional (default: False) 60 | Print timing information; restart the clock. 61 | memory, m : bool, optional (default: Faulse) 62 | Print memory information. 63 | reset, r : bool, optional (default: False) 64 | Reset timing and memory measurement. Is automatically reset 65 | when passing one of ``time`` or ``memory``. 66 | end : str (default: '\n') 67 | Same meaning as in builtin ``print()`` function. 68 | no_indent : bool (default: False) 69 | Do not indent for ``v >= 4``. 70 | """ 71 | # variable shortcuts 72 | if t is not None: 73 | time = t 74 | if m is not None: 75 | memory = m 76 | if r is not None: 77 | reset = r 78 | if v is None: 79 | v = 4 80 | if isinstance(v, str): 81 | v = _VERBOSITY_LEVELS_FROM_STRINGS[v] 82 | if v == 3: # insert "--> " before hints 83 | msg = ("-->",) + msg 84 | if v >= 4 and not no_indent: 85 | msg = (" ",) + msg 86 | if _settings_verbosity_greater_or_equal_than(v): 87 | if not time and not memory and len(msg) > 0: 88 | _write_log(*msg, end=end) 89 | if reset: 90 | try: 91 | settings._previous_memory_usage, _ = get_memory_usage() 92 | except: 93 | pass 94 | settings._previous_time = get_time() 95 | if time: 96 | elapsed = get_passed_time() 97 | msg = msg + (f"({_sec_to_str(elapsed)})",) 98 | _write_log(*msg, end=end) 99 | if memory: 100 | _write_log(get_memory_usage(), end=end) 101 | 102 | 103 | m = msg 104 | 105 | 106 | def _write_log(*msg, end="\n"): 107 | """Write message to log output, ignoring the verbosity level. 108 | This is the most basic function. 109 | Parameters 110 | ---------- 111 | *msg : 112 | One or more arguments to be formatted as string. Same behavior as print 113 | function. 114 | """ 115 | from .settings import logfile 116 | 117 | if logfile == "": 118 | print(*msg, end=end) 119 | else: 120 | out = "" 121 | for s in msg: 122 | out += f"{s} " 123 | with open(logfile, "a") as f: 124 | f.write(out + end) 125 | 126 | 127 | def _sec_to_str(t, show_microseconds=False): 128 | """Format time in seconds. 129 | Parameters 130 | ---------- 131 | t : int 132 | Time in seconds. 133 | """ 134 | from functools import reduce 135 | 136 | t_str = "%d:%02d:%02d.%02d" % reduce( 137 | lambda ll, b: divmod(ll[0], b) + ll[1:], [(t * 100,), 100, 60, 60] 138 | ) 139 | return t_str if show_microseconds else t_str[:-3] 140 | 141 | 142 | def get_passed_time(): 143 | now = get_time() 144 | elapsed = now - settings._previous_time 145 | settings._previous_time = now 146 | return elapsed 147 | 148 | 149 | def print_passed_time(): 150 | return _sec_to_str(get_passed_time()) 151 | 152 | 153 | def timeout(func, args=(), timeout_duration=2, default=None, **kwargs): 154 | """This will spwan a thread and run the given function using the args, kwargs and 155 | return the given default value if the timeout_duration is exceeded 156 | """ 157 | import threading 158 | 159 | class InterruptableThread(threading.Thread): 160 | def __init__(self): 161 | threading.Thread.__init__(self) 162 | self.result = default 163 | 164 | def run(self): 165 | try: 166 | self.result = func(*args, **kwargs) 167 | except: 168 | pass 169 | 170 | it = InterruptableThread() 171 | it.start() 172 | it.join(timeout_duration) 173 | return it.result 174 | 175 | 176 | def get_latest_pypi_version(): 177 | from subprocess import CalledProcessError, check_output 178 | 179 | try: # needs to work offline as well 180 | result = check_output(["pip", "search", "cospar"]) 181 | return f"{result.split()[-1]}"[2:-1] 182 | except CalledProcessError: 183 | return "0.0.0" 184 | 185 | 186 | def check_if_latest_version(): 187 | from . import __version__ 188 | 189 | latest_version = timeout( 190 | get_latest_pypi_version, timeout_duration=2, default="0.0.0" 191 | ) 192 | if __version__.rsplit(".dev")[0] < latest_version.rsplit(".dev")[0]: 193 | warn( 194 | "There is a newer cospar version available on PyPI:\n", 195 | "Your version: \t\t", 196 | __version__, 197 | "\nLatest version: \t", 198 | latest_version, 199 | ) 200 | 201 | 202 | def print_version(): 203 | from . import __version__ 204 | 205 | _write_log( 206 | f"Running cospar {__version__} " 207 | f"(python {python_version()}) on {get_date_string()}.", 208 | ) 209 | check_if_latest_version() 210 | 211 | 212 | def print_versions(): 213 | for mod in [ 214 | "cospar", 215 | "scanpy", 216 | "anndata", 217 | "loompy", 218 | "numpy", 219 | "scipy", 220 | "matplotlib", 221 | "sklearn", 222 | "pandas", 223 | ]: 224 | mod_name = mod[0] if isinstance(mod, tuple) else mod 225 | mod_install = mod[1] if isinstance(mod, tuple) else mod 226 | try: 227 | mod_version = __import__(mod_name).__version__ 228 | _write_log(f"{mod_install}=={mod_version}", end=" ") 229 | except (ImportError, AttributeError): 230 | pass 231 | _write_log("") 232 | check_if_latest_version() 233 | 234 | 235 | def get_date_string(): 236 | return datetime.now().strftime("%Y-%m-%d %H:%M") 237 | 238 | 239 | def switch_verbosity(mode="on", module=None): 240 | if module is None: 241 | from . import settings 242 | elif module == "scanpy": 243 | from scanpy import settings 244 | else: 245 | exec(f"from {module} import settings") 246 | 247 | if mode == "on" and hasattr(settings, "tmp_verbosity"): 248 | settings.verbosity = settings.tmp_verbosity 249 | del settings.tmp_verbosity 250 | 251 | elif mode == "off": 252 | settings.tmp_verbosity = settings.verbosity 253 | settings.verbosity = 0 254 | 255 | elif not isinstance(mode, str): 256 | settings.tmp_verbosity = settings.verbosity 257 | settings.verbosity = mode 258 | 259 | 260 | class ProgressReporter: 261 | def __init__(self, total, interval=3): 262 | self.count = 0 263 | self.total = total 264 | self.timestamp = get_time() 265 | self.interval = interval 266 | 267 | def update(self): 268 | self.count += 1 269 | if settings.verbosity > 1 and ( 270 | get_time() - self.timestamp > self.interval or self.count == self.total 271 | ): 272 | self.timestamp = get_time() 273 | percent = int(self.count * 100 / self.total) 274 | stdout.write(f"\r... {percent}%") 275 | stdout.flush() 276 | 277 | def finish(self): 278 | if settings.verbosity > 1: 279 | stdout.write("\r") 280 | stdout.flush() 281 | 282 | 283 | def profiler(command, filename="profile.stats", n_stats=10): 284 | """Profiler for a python program 285 | 286 | Runs cProfile and outputs ordered statistics that describe 287 | how often and for how long various parts of the program are executed. 288 | 289 | Stats can be visualized with `!snakeviz profile.stats`. 290 | 291 | Parameters 292 | ---------- 293 | command: str 294 | Command string to be executed. 295 | filename: str 296 | Name under which to store the stats. 297 | n_stats: int or None 298 | Number of top stats to show. 299 | """ 300 | import cProfile 301 | import pstats 302 | 303 | cProfile.run(command, filename) 304 | stats = pstats.Stats(filename).strip_dirs().sort_stats("time") 305 | return stats.print_stats(n_stats or {}) 306 | -------------------------------------------------------------------------------- /cospar/pl.py: -------------------------------------------------------------------------------- 1 | from .plotting import * 2 | -------------------------------------------------------------------------------- /cospar/plotting/__init__.py: -------------------------------------------------------------------------------- 1 | from cospar.plotting._clone import * 2 | from cospar.plotting._gene import * 3 | from cospar.plotting._map import * 4 | from cospar.plotting._utils import * 5 | -------------------------------------------------------------------------------- /cospar/plotting/_clone.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import scipy.sparse as ssp 6 | import scipy.stats as stats 7 | import seaborn as sns 8 | import statsmodels.sandbox.stats.multicomp 9 | from ete3 import Tree 10 | from matplotlib import pyplot as plt 11 | from numpy.lib.twodim_base import tril_indices 12 | from scipy.cluster import hierarchy 13 | 14 | # from plotnine import * 15 | from sklearn.manifold import SpectralEmbedding 16 | 17 | from cospar import tool as tl 18 | from cospar.plotting import _utils as pl_util 19 | 20 | from .. import help_functions as hf 21 | from .. import logging as logg 22 | from .. import settings 23 | 24 | 25 | def barcode_heatmap( 26 | adata, 27 | selected_times=None, 28 | selected_fates=None, 29 | color_bar=True, 30 | rename_fates=None, 31 | normalize=False, 32 | binarize=False, 33 | log_transform=False, 34 | fig_width=4, 35 | fig_height=6, 36 | figure_index="", 37 | plot=True, 38 | pseudocount=10 ** (-10), 39 | order_map_x=False, 40 | order_map_y=False, 41 | fate_normalize_source="X_clone", 42 | select_clones_with_fates: list = None, 43 | select_clones_without_fates: list = None, 44 | select_clones_mode: str = "or", 45 | **kwargs, 46 | ): 47 | """ 48 | Plot barcode heatmap among different fate clusters. 49 | 50 | We clonal measurement at selected time points and show the 51 | corresponding heatmap among selected fate clusters. 52 | 53 | Parameters 54 | ---------- 55 | adata: :class:`~anndata.AnnData` object 56 | selected_times: `list`, optional (default: None) 57 | Time points to select the cell states. 58 | selected_fates: `list`, optional (default: all) 59 | List of fate clusters to use. If set to be [], use all. 60 | color_bar: `bool`, optional (default: True) 61 | Plot color bar. 62 | rename_fates: `list`, optional (default: None) 63 | Provide new names in substitution of names in selected_fates. 64 | For this to be effective, the new name list needs to have names 65 | in exact correspondence to those in the old list. 66 | normalize: 67 | To perform cluster-wise then clone-wise normalization 68 | binarize: `bool` 69 | Binarize the coarse-grained barcode count matrix, just for the purpose of plotting. 70 | log_transform: `bool`, optional (default: False) 71 | If true, perform a log transform. This is needed when the data 72 | matrix has entries varying by several order of magnitude. 73 | fig_width: `float`, optional (default: 4) 74 | Figure width. 75 | fig_height: `float`, optional (default: 6) 76 | Figure height. 77 | plot: `bool` 78 | True: plot the result. False, suppress the plot. 79 | pseudocount: `float` 80 | Pseudocount for the heatmap (needed for ordering the map) 81 | order_map_x: `bool` 82 | Whether to re-order the x coordinate of the matrix or not 83 | order_map_y: `bool` 84 | Whether to re-order the y coordinate of the matrix or not 85 | fate_normalize_source: 86 | Source for cluster-wise normalization: {'X_clone','state_info'}. 'X_clone': directly row-normalize coarse_X_clone; 'state_info': compute each cluster size directly, and then normalize coarse_X_clone. The latter method is useful if we have single-cell resolution for each fate. 87 | select_clones_with_fates: list = None, 88 | Select clones that labels fates from this list. 89 | select_clones_without_fates: list = None, 90 | Exclude clones that labels fates from this list. 91 | select_clones_mode: str = {'or','and'} 92 | Logic rule for selection. 93 | 94 | Returns: 95 | -------- 96 | The coarse-grained X_clone matrix and the selected clusters are returned at 97 | adata.uns['barcode_heatmap']. The coarse-grained X_clone keeps all clones and maintains their ordering. 98 | """ 99 | 100 | data_des = adata.uns["data_des"][-1] 101 | data_des = f"{data_des}_clonal" 102 | figure_path = settings.figure_path 103 | 104 | coarse_X_clone, mega_cluster_list = tl.coarse_grain_clone_over_cell_clusters( 105 | adata, 106 | selected_times=selected_times, 107 | selected_fates=selected_fates, 108 | normalize=normalize, 109 | fate_normalize_source=fate_normalize_source, 110 | select_clones_with_fates=select_clones_with_fates, 111 | select_clones_without_fates=select_clones_without_fates, 112 | select_clones_mode=select_clones_mode, 113 | **kwargs, 114 | ) 115 | 116 | if rename_fates is None: 117 | rename_fates = mega_cluster_list 118 | 119 | if len(rename_fates) != len(mega_cluster_list): 120 | logg.warn( 121 | "rename_fates does not have the same length as selected_fates, thus not used." 122 | ) 123 | rename_fates = mega_cluster_list 124 | 125 | if "x_ticks" not in kwargs.keys(): 126 | kwargs["x_ticks"] = rename_fates 127 | 128 | coarse_X_clone_new = pl_util.custom_hierachical_ordering( 129 | np.arange(coarse_X_clone.shape[0]), coarse_X_clone 130 | ) 131 | adata.uns["barcode_heatmap"] = { 132 | "coarse_X_clone": coarse_X_clone, 133 | "fate_names": rename_fates, 134 | } 135 | logg.info("Data saved at adata.uns['barcode_heatmap']") 136 | if plot: 137 | if binarize: 138 | final_matrix = coarse_X_clone_new > 0 139 | color_bar_label = "Binarized barcode count" 140 | else: 141 | final_matrix = coarse_X_clone_new 142 | color_bar_label = "Barcode count" 143 | 144 | if normalize: 145 | color_bar_label += " (normalized)" 146 | 147 | clone_idx = final_matrix.sum(0) > 0 148 | ax = pl_util.heatmap( 149 | final_matrix[:, clone_idx].T + pseudocount, 150 | order_map_x=order_map_x, 151 | order_map_y=order_map_y, 152 | color_bar_label=color_bar_label, 153 | log_transform=log_transform, 154 | fig_width=fig_width, 155 | fig_height=fig_height, 156 | color_bar=color_bar, 157 | **kwargs, 158 | ) 159 | plt.title(f"{np.sum(clone_idx)} clones") 160 | 161 | plt.tight_layout() 162 | if figure_index != "": 163 | figure_index == f"_{figure_index}" 164 | plt.savefig( 165 | os.path.join( 166 | figure_path, 167 | f"{data_des}_barcode_heatmap{figure_index}.{settings.file_format_figs}", 168 | ) 169 | ) 170 | return ax 171 | 172 | 173 | def clonal_fates_across_time(adata, selected_times, **kwargs): 174 | """ 175 | Scatter plot for clonal fate number across time point 176 | 177 | Parameters 178 | ---------- 179 | adata: :class:`~anndata.AnnData` object 180 | selected_times: `list`, optional (default: None) 181 | Time points to select the cell states. 182 | 183 | Returns 184 | ------- 185 | Results updated at adata.uns["clonal_fates_across_time"] 186 | """ 187 | if len(selected_times) != 2: 188 | raise ValueError("selected_times must be a list with two values") 189 | barcode_heatmap( 190 | adata, 191 | selected_times=selected_times[0], 192 | color_bar=True, 193 | log_transform=False, 194 | plot=False, 195 | ) 196 | clonal_fates_t1 = (adata.uns["barcode_heatmap"]["coarse_X_clone"] > 0).sum(0) 197 | barcode_heatmap( 198 | adata, 199 | selected_times=selected_times[1], 200 | color_bar=True, 201 | log_transform=False, 202 | plot=False, 203 | ) 204 | clonal_fates_t2 = (adata.uns["barcode_heatmap"]["coarse_X_clone"] > 0).sum(0) 205 | 206 | pl_util.jitter(clonal_fates_t1, clonal_fates_t2, **kwargs) 207 | plt.xlabel(f"Number of fates per clone (t={selected_times[0]})") 208 | plt.ylabel(f"Number of fates per clone (t={selected_times[1]})") 209 | data_des = adata.uns["data_des"][0] 210 | plt.savefig( 211 | os.path.join( 212 | settings.figure_path, 213 | f"{data_des}_barcode_coupling_across_time.{settings.file_format_figs}", 214 | ) 215 | ) 216 | adata.uns["clonal_fates_across_time"] = { 217 | "clonal_fates_t1": clonal_fates_t1, 218 | "clonal_fates_t2": clonal_fates_t2, 219 | } 220 | logg.info("Data saved at adata.uns['clonal_fates_across_time']") 221 | 222 | 223 | def clones_on_manifold( 224 | adata, 225 | selected_clone_list=[0], 226 | color_list=["red", "blue", "purple", "green", "cyan", "black"], 227 | selected_times=None, 228 | title=True, 229 | clone_markersize=12, 230 | clone_markeredgewidth=1, 231 | markeredgecolor="black", 232 | **kwargs, 233 | ): 234 | """ 235 | Plot clones on top of state embedding. 236 | 237 | Parameters 238 | ---------- 239 | adata: :class:`~anndata.AnnData` object 240 | selected_clone_list: `list` 241 | List of selected clone ID's. 242 | color_list: `list`, optional (default: ['red','blue','purple','green','cyan','black']) 243 | The list of color that defines color at respective time points. 244 | selected_times: `list`, optional (default: all) 245 | Select time points to show corresponding states. If set to be [], use all states. 246 | title: `bool`, optional (default: True) 247 | If ture, show the clone id as panel title. 248 | clone_markersize: `int`, optional (default: 12) 249 | Clone marker size 250 | clone_markeredgewidth: `int`, optional (default: 1) 251 | Edige size for clone marker 252 | """ 253 | 254 | fig_width = settings.fig_width 255 | fig_height = settings.fig_height 256 | point_size = settings.fig_point_size 257 | x_emb = adata.obsm["X_emb"][:, 0] 258 | y_emb = adata.obsm["X_emb"][:, 1] 259 | data_des = adata.uns["data_des"][-1] 260 | # data_path=settings.data_path 261 | figure_path = settings.figure_path 262 | X_clone = adata.obsm["X_clone"] 263 | time_info = np.array(adata.obs["time_info"]) 264 | 265 | # use only valid time points 266 | sp_idx = hf.selecting_cells_by_time_points(time_info, selected_times) 267 | selected_times = np.sort(list(set(time_info[sp_idx]))) 268 | 269 | selected_clone_list = np.array(selected_clone_list) 270 | full_id_list = np.arange(X_clone.shape[1]) 271 | valid_idx = np.in1d(full_id_list, selected_clone_list) 272 | if np.sum(valid_idx) < len(selected_clone_list): 273 | logg.error( 274 | f"Valid id range is (0,{X_clone.shape[1]-1}). Please use a smaller ID!" 275 | ) 276 | selected_clone_list = full_id_list[valid_idx] 277 | 278 | if len(selected_clone_list) == 0: 279 | logg.error("No valid states selected.") 280 | else: 281 | # using all data 282 | for my_id in selected_clone_list: 283 | fig = plt.figure(figsize=(fig_width, fig_height)) 284 | ax = plt.subplot(1, 1, 1) 285 | idx_t = np.zeros(len(time_info), dtype=bool) 286 | for j, xx in enumerate(selected_times): 287 | idx_t0 = time_info == selected_times[j] 288 | idx_t = idx_t0 | idx_t 289 | 290 | pl_util.customized_embedding( 291 | x_emb[idx_t], 292 | y_emb[idx_t], 293 | np.zeros(len(y_emb[idx_t])), 294 | ax=ax, 295 | point_size=point_size, 296 | ) 297 | for j, xx in enumerate(selected_times): 298 | idx_t = time_info == selected_times[j] 299 | idx_clone = X_clone[:, my_id].A.flatten() > 0 300 | idx = idx_t & idx_clone 301 | ax.plot( 302 | x_emb[idx], 303 | y_emb[idx], 304 | ".", 305 | color=color_list[j % len(color_list)], 306 | markersize=clone_markersize, 307 | markeredgecolor=markeredgecolor, 308 | markeredgewidth=clone_markeredgewidth, 309 | **kwargs, 310 | ) 311 | 312 | if title: 313 | ax.set_title(f"ID: {my_id}") 314 | 315 | fig.savefig( 316 | os.path.join( 317 | figure_path, 318 | f"{data_des}_different_clones_{my_id}.{settings.file_format_figs}", 319 | ) 320 | ) 321 | 322 | 323 | def clonal_fate_bias(adata, show_histogram=True, FDR=0.05): 324 | """ 325 | Plot clonal fate bias towards a cluster. 326 | 327 | The results should be pre-computed from :func:`cospar.tl.clonal_fate_bias` 328 | 329 | Parameters 330 | ---------- 331 | adata: :class:`~anndata.AnnData` object 332 | show_histogram: `bool`, optional (default: True) 333 | If true, show the distribution of inferred fate probability. 334 | FDR: `float`, optional (default: 0.05) 335 | False-discovery rate after the Benjamini-Hochberg correction. 336 | """ 337 | 338 | if "clonal_fate_bias" not in adata.uns.keys(): 339 | raise ValueError( 340 | "clonal_fate_bias has not been computed. Run cs.tl.clonal_fate_bias first" 341 | ) 342 | else: 343 | df = adata.uns["clonal_fate_bias"] 344 | fate_bias = df["Fate_bias"] 345 | target_fraction_array = df["clonal_fraction_in_target_fate"] 346 | 347 | fig_width = settings.fig_width 348 | fig_height = settings.fig_height 349 | data_des = adata.uns["data_des"][-1] 350 | figure_path = settings.figure_path 351 | FDR_threshold = -np.log10(FDR) 352 | 353 | fig = plt.figure(figsize=(fig_width, fig_height)) 354 | ax = plt.subplot(1, 1, 1) 355 | ax.plot(np.arange(len(fate_bias)), fate_bias, ".", color="blue", markersize=5) 356 | ax.plot( 357 | np.arange(len(fate_bias)), 358 | np.zeros(len(fate_bias)) + FDR_threshold, 359 | "-.", 360 | color="grey", 361 | markersize=5, 362 | label=f"FDR={FDR}", 363 | ) 364 | 365 | ax.spines["top"].set_visible(False) 366 | ax.spines["right"].set_visible(False) 367 | # ax.set_ylabel('Fate bias ($-\\log_{10}P_{value}$)') 368 | ax.set_ylabel("Clonal fate bias") 369 | ax.set_xlabel("Clonal index") 370 | ax.legend() 371 | fig.tight_layout() 372 | fig.savefig( 373 | os.path.join( 374 | figure_path, f"{data_des}_clonal_fate_bias.{settings.file_format_figs}" 375 | ) 376 | ) 377 | 378 | if show_histogram: 379 | fig = plt.figure(figsize=(fig_width, fig_height)) 380 | ax = plt.subplot(1, 1, 1) 381 | ax.hist(target_fraction_array, color="#2ca02c", density=True) 382 | ax.set_xlim([0, 1]) 383 | ax.set_xlabel("Clonal fraction in selected fates") 384 | ax.set_ylabel("Density") 385 | ax.spines["top"].set_visible(False) 386 | ax.spines["right"].set_visible(False) 387 | ax.set_title(f"Average: {np.mean(target_fraction_array):.2f}") 388 | fig.tight_layout() 389 | fig.savefig( 390 | os.path.join( 391 | figure_path, 392 | f"{data_des}_observed_clonal_fraction.{settings.file_format_figs}", 393 | ) 394 | ) 395 | 396 | 397 | def clonal_reports(adata, selected_times=None, **kwargs): 398 | """ 399 | Report the statistics of the clonal data. 400 | 401 | It includes the statistics for clone size , and the barcode number per cell. 402 | """ 403 | 404 | time_info = np.array(adata.obs["time_info"]) 405 | sp_idx = hf.selecting_cells_by_time_points(time_info, selected_times) 406 | adata_1 = adata[sp_idx] 407 | persistent_clone_ids = tl.identify_persistent_clones(adata_1) 408 | X_clone = adata_1.obsm["X_clone"] 409 | total_clone_N = X_clone.shape[1] 410 | print( 411 | f" Clones observed across selected times: {len(persistent_clone_ids)} (out of {total_clone_N} clones)" 412 | ) 413 | 414 | for x in set(adata_1.obs["time_info"]): 415 | print(f"---------t={x}---------") 416 | adata_sp = adata_1[adata_1.obs["time_info"] == x] 417 | X_clone = adata_sp.obsm["X_clone"] 418 | clone_size = X_clone.sum(0).A.flatten() 419 | clonal_bc_number = X_clone.sum(1).A.flatten() 420 | clonal_cells_N = np.sum(clonal_bc_number > 0) 421 | total_N = X_clone.shape[0] 422 | total_clone_N = X_clone.shape[1] 423 | useful_clone_N = np.sum(clone_size > 0) 424 | print(f" Cells with barcode: {clonal_cells_N} (out of {total_N} cells)") 425 | print( 426 | f" Barcodes with cells: {useful_clone_N} (out of {total_clone_N} clones)" 427 | ) 428 | 429 | fig, axs = plt.subplots(1, 2, figsize=(8, 3.5)) 430 | ax = sns.histplot(clone_size[clone_size > 0], ax=axs[0], **kwargs) 431 | ax.set_xlabel("Clone size") 432 | ax.set_ylabel("Count") 433 | 434 | ax = sns.histplot(clonal_bc_number[clonal_bc_number > 0], ax=axs[1], **kwargs) 435 | ax.set_xlabel("Clonal barcode number per cell") 436 | ax.set_ylabel("Count") 437 | fig.suptitle(f"Time={x}") 438 | -------------------------------------------------------------------------------- /cospar/pp.py: -------------------------------------------------------------------------------- 1 | from .preprocessing import * 2 | -------------------------------------------------------------------------------- /cospar/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | from ._preprocessing import * 2 | -------------------------------------------------------------------------------- /cospar/settings.py: -------------------------------------------------------------------------------- 1 | """Settings 2 | """ 3 | 4 | verbosity = 3 5 | """Verbosity level (0=errors, 1=warnings, 2=info, 3=hints) 6 | """ 7 | 8 | data_path = "data" 9 | """Directory where adata is stored (default 'data_cospar'). 10 | """ 11 | 12 | figure_path = "figure" 13 | """Directory where plots are saved (default 'figure_cospar'). 14 | """ 15 | 16 | file_format_figs = "pdf" 17 | """File format for saving figures. 18 | For example 'png', 'pdf' or 'svg'. Many other formats work as well (see 19 | `matplotlib.pyplot.savefig`). 20 | """ 21 | 22 | fig_width = 4 23 | fig_height = 3.5 24 | fig_point_size = 2 25 | 26 | 27 | logfile = "" 28 | """Name of logfile. By default is set to '' and writes to standard output.""" 29 | 30 | # -------------------------------------------------------------------------------- 31 | # Functions 32 | # -------------------------------------------------------------------------------- 33 | 34 | import warnings 35 | 36 | from cycler import cycler 37 | from matplotlib import cbook, cm, colors, rcParams 38 | 39 | # from cospar import help_functions as hf 40 | # from . import help_functions as hf 41 | 42 | warnings.filterwarnings("ignore", category=cbook.mplDeprecation) 43 | 44 | 45 | def set_rcParams_cospar(fontsize=12, color_map=None, frameon=None): 46 | """Set matplotlib.rcParams to cospar defaults.""" 47 | # check here if you want to customize it: https://matplotlib.org/stable/tutorials/introductory/customizing.html 48 | 49 | # dpi options (mpl default: 100, 100) 50 | rcParams["figure.dpi"] = 100 51 | rcParams["savefig.dpi"] = 150 52 | 53 | # figure (mpl default: 0.125, 0.96, 0.15, 0.91) 54 | rcParams["figure.figsize"] = (6, 4) 55 | # rcParams["figure.subplot.left"] = 0.18 56 | # rcParams["figure.subplot.right"] = 0.96 57 | # rcParams["figure.subplot.bottom"] = 0.15 58 | # rcParams["figure.subplot.top"] = 0.91 59 | 60 | # lines (defaults: 1.5, 6, 1) 61 | rcParams["lines.linewidth"] = 1.5 # the line width of the frame 62 | rcParams["lines.markersize"] = 6 63 | rcParams["lines.markeredgewidth"] = 1 64 | 65 | # font 66 | rcParams["font.sans-serif"] = [ 67 | "Arial", 68 | "Helvetica", 69 | "DejaVu Sans", 70 | "Bitstream Vera Sans", 71 | "sans-serif", 72 | ] 73 | 74 | fontsize = fontsize 75 | labelsize = 0.92 * fontsize 76 | 77 | # fonsizes (mpl default: 10, medium, large, medium) 78 | rcParams["font.size"] = fontsize 79 | rcParams["legend.fontsize"] = labelsize 80 | rcParams["axes.titlesize"] = fontsize 81 | rcParams["axes.labelsize"] = labelsize 82 | 83 | # legend (mpl default: 1, 1, 2, 0.8) 84 | rcParams["legend.numpoints"] = 1 85 | rcParams["legend.scatterpoints"] = 1 86 | rcParams["legend.handlelength"] = 0.5 87 | rcParams["legend.handletextpad"] = 0.4 88 | rcParams["pdf.fonttype"] = 42 89 | 90 | # color cycle 91 | # rcParams["axes.prop_cycle"] = cycler(color=vega_10) 92 | 93 | # axes 94 | rcParams["axes.linewidth"] = 0.8 95 | rcParams["axes.edgecolor"] = "black" 96 | rcParams["axes.facecolor"] = "white" 97 | 98 | # ticks (mpl default: k, k, medium, medium) 99 | rcParams["xtick.color"] = "k" 100 | rcParams["ytick.color"] = "k" 101 | rcParams["xtick.labelsize"] = labelsize 102 | rcParams["ytick.labelsize"] = labelsize 103 | 104 | # axes grid (mpl default: False, #b0b0b0) 105 | rcParams["axes.grid"] = False 106 | rcParams["grid.color"] = ".8" 107 | 108 | # color map 109 | rcParams["image.cmap"] = "Reds" if color_map is None else color_map 110 | 111 | # spines 112 | rcParams["axes.spines.right"] = False 113 | rcParams["axes.spines.top"] = False 114 | 115 | # frame (mpl default: True) 116 | frameon = False if frameon is None else frameon 117 | global _frameon 118 | _frameon = frameon 119 | 120 | 121 | # def set_up_plotting(fontsize): 122 | # """ 123 | # Change matplotlib setting for beautiful plots. 124 | # """ 125 | 126 | # plt.rc('font', family='sans-serif') 127 | # plt.rcParams['font.sans-serif']=['Helvetica'] 128 | # plt.rc('xtick',labelsize=12) #14 129 | # plt.rc('ytick', labelsize=12) #14 130 | # #plt.rc('font', weight='bold') 131 | # plt.rc('font', weight='regular') 132 | # plt.rcParams.update({'font.size': fontsize}) #16 133 | # #plt.rcParams['axes.labelweight'] = 'bold' 134 | # plt.rcParams['axes.labelweight'] = 'regular' 135 | # #plt.rcParams['pdf.fonttype'] = 42 #make the figure editable, this comes with a heavy cost of file size 136 | 137 | 138 | def set_figure_params( 139 | style="cospar", 140 | dpi=100, 141 | dpi_save=300, 142 | frameon=None, 143 | vector_friendly=True, 144 | transparent=True, 145 | fontsize=14, 146 | figsize=None, 147 | pointsize=2, 148 | color_map=None, 149 | facecolor=None, 150 | format="pdf", 151 | ipython_format="png2x", 152 | ): 153 | """Set resolution/size, styling and format of figures. 154 | 155 | Arguments 156 | --------- 157 | style : `str` (default: `None`) 158 | Init default values for ``matplotlib.rcParams`` suited for `cospar`. 159 | Use `None` for the default matplotlib values. 160 | 161 | dpi : `int` (default: `None`) 162 | Resolution of rendered figures - affects the size of figures in notebooks. 163 | dpi_save : `int` (default: `None`) 164 | Resolution of saved figures. This should typically be higher to achieve 165 | publication quality. 166 | frameon : `bool` (default: `None`) 167 | Add frames and axes labels to scatter plots. 168 | vector_friendly : `bool` (default: `True`) 169 | Plot scatter plots using `png` backend even when exporting as `pdf` or `svg`. 170 | transparent : `bool` (default: `True`) 171 | Save figures with transparent back ground. Sets 172 | `rcParams['savefig.transparent']`. 173 | fontsize : `int` (default: 14) 174 | Set the fontsize for several `rcParams` entries. 175 | figsize: `[float, float]` (default: `None`) 176 | Width and height for default figure size. 177 | color_map : `str` (default: `None`) 178 | Convenience method for setting the default color map. 179 | facecolor : `str` (default: `None`) 180 | Sets backgrounds `rcParams['figure.facecolor']` 181 | and `rcParams['axes.facecolor']` to `facecolor`. 182 | format : {'png', 'pdf', 'svg', etc.} (default: 'pdf') 183 | This sets the default format for saving figures: `file_format_figs`. 184 | ipython_format : list of `str` (default: 'png2x') 185 | Only concerns the notebook/IPython environment; see 186 | `IPython.core.display.set_matplotlib_formats` for more details. 187 | """ 188 | try: 189 | import IPython 190 | 191 | if isinstance(ipython_format, str): 192 | ipython_format = [ipython_format] 193 | IPython.display.set_matplotlib_formats(*ipython_format) 194 | except: 195 | pass 196 | 197 | global _rcParams_style 198 | _rcParams_style = style 199 | global _vector_friendly 200 | _vector_friendly = vector_friendly 201 | global file_format_figs 202 | file_format_figs = format 203 | if transparent is not None: 204 | rcParams["savefig.transparent"] = transparent 205 | if facecolor is not None: 206 | rcParams["figure.facecolor"] = facecolor 207 | rcParams["axes.facecolor"] = facecolor 208 | if style == "cospar": 209 | set_rcParams_cospar(fontsize=fontsize, color_map=color_map, frameon=frameon) 210 | # Overwrite style options if given 211 | if figsize is not None: 212 | rcParams["figure.figsize"] = figsize 213 | global fig_width 214 | global fig_height 215 | fig_width = figsize[0] 216 | fig_height = figsize[1] 217 | if dpi is not None: 218 | rcParams["figure.dpi"] = dpi 219 | if dpi_save is not None: 220 | rcParams["savefig.dpi"] = dpi_save 221 | 222 | global fig_point_size 223 | fig_point_size = pointsize 224 | 225 | # hf.set_up_folders() 226 | 227 | 228 | def set_rcParams_defaults(): 229 | """Reset `matplotlib.rcParams` to defaults.""" 230 | from matplotlib import rcParamsDefault 231 | 232 | rcParams.update(rcParamsDefault) 233 | 234 | 235 | def _set_start_time(): 236 | from time import time 237 | 238 | return time() 239 | 240 | 241 | _start = _set_start_time() 242 | """Time when the settings module is first imported.""" 243 | 244 | _previous_time = _start 245 | """Variable for timing program parts.""" 246 | -------------------------------------------------------------------------------- /cospar/tl.py: -------------------------------------------------------------------------------- 1 | from .tool import * 2 | -------------------------------------------------------------------------------- /cospar/tmap/__init__.py: -------------------------------------------------------------------------------- 1 | from .map_reconstruction import * 2 | from .optimal_transport import * 3 | -------------------------------------------------------------------------------- /cospar/tmap/_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import scanpy as sc 7 | import scipy.sparse as ssp 8 | 9 | from .. import help_functions as hf 10 | from .. import logging as logg 11 | from .. import plotting as pl 12 | from .. import settings 13 | from .. import tool as tl 14 | 15 | 16 | def generate_similarity_matrix( 17 | adata, 18 | file_name, 19 | round_of_smooth=10, 20 | neighbor_N=20, 21 | beta=0.1, 22 | truncation_threshold=0.001, 23 | save_subset=True, 24 | use_existing_KNN_graph=False, 25 | compute_new_Smatrix=False, 26 | ): 27 | """ 28 | Generate similarity matrix (Smatrix) through graph diffusion 29 | 30 | It generates the similarity matrix via iterative graph diffusion. 31 | Similarity matrix from each round of diffusion will be saved, after truncation 32 | to promote sparsity and save space. If save_subset is activated, only save 33 | Smatrix for smooth rounds at the multiples of 5 (like 5,10,15,...). If a Smatrix is pre-computed, 34 | it will be loaded directly if compute_new_Smatrix=Flase. 35 | 36 | Parameters 37 | ---------- 38 | adata: :class:`~anndata.AnnData` object 39 | file_name: str 40 | Filename to load pre-computed similarity matrix or save the newly 41 | computed similarity matrix. 42 | round_of_smooth: `int`, optional (default: 10) 43 | The rounds of graph diffusion. 44 | neighbor_N: `int`, optional (default: 20) 45 | Neighber number for constructing the KNN graph, using the UMAP method. 46 | beta: `float`, option (default: 0.1) 47 | Probability to stay at the origin in a unit diffusion step, in the range [0,1] 48 | truncation_threshold: `float`, optional (default: 0.001) 49 | At each iteration, truncate the similarity matrix using 50 | truncation_threshold. This promotes the sparsity of the matrix, 51 | thus the speed of computation. We set the truncation threshold to be small, 52 | to guarantee accracy. 53 | save_subset: `bool`, optional (default: True) 54 | If true, save only Smatrix at smooth round [5,10,15,...]; 55 | Otherwise, save Smatrix at each round. 56 | use_existing_KNN_graph: `bool`, optional (default: False) 57 | If true and adata.obsp['connectivities'], use the existing knn graph to build 58 | the similarity matrix, regardless of neighbor_N. 59 | compute_new_Smatrix: `bool`, optional (default: False) 60 | If true, compute a new Smatrix, even if there is pre-computed Smatrix with the 61 | same parameterization. 62 | 63 | Returns 64 | ------- 65 | similarity_matrix: `sp.spmatrix` 66 | """ 67 | 68 | if os.path.exists(file_name + f"_SM{round_of_smooth}.npz") and ( 69 | not compute_new_Smatrix 70 | ): 71 | 72 | logg.hint("Compute similarity matrix: load existing data") 73 | similarity_matrix = ssp.load_npz(file_name + f"_SM{round_of_smooth}.npz") 74 | else: # compute now 75 | 76 | logg.hint(f"Compute similarity matrix: computing new; beta={beta}") 77 | 78 | # add a step to compute PCA in case this is not computed 79 | 80 | if (not use_existing_KNN_graph) or ("connectivities" not in adata.obsp.keys()): 81 | # here, we assume that adata already has pre-computed PCA 82 | sc.pp.neighbors(adata, n_neighbors=neighbor_N) 83 | else: 84 | logg.hint( 85 | "Use existing KNN graph at adata.obsp['connectivities'] for generating the smooth matrix" 86 | ) 87 | adjacency_matrix = adata.obsp["connectivities"] 88 | 89 | ############## The new method 90 | adjacency_matrix = (adjacency_matrix + adjacency_matrix.T) / 2 91 | ############## 92 | 93 | adjacency_matrix = hf.sparse_rowwise_multiply( 94 | adjacency_matrix, 1 / adjacency_matrix.sum(1).A.squeeze() 95 | ) 96 | nrow = adata.shape[0] 97 | similarity_matrix = ssp.lil_matrix((nrow, nrow)) 98 | similarity_matrix.setdiag(np.ones(nrow)) 99 | transpose_A = adjacency_matrix.T 100 | 101 | if round_of_smooth == 0: 102 | SM = 0 103 | similarity_matrix = ssp.csr_matrix(similarity_matrix) 104 | ssp.save_npz(file_name + f"_SM{SM}.npz", similarity_matrix) 105 | 106 | for iRound in range(round_of_smooth): 107 | SM = iRound + 1 108 | 109 | logg.info("Smooth round:", SM) 110 | t = time.time() 111 | similarity_matrix = ( 112 | beta * similarity_matrix + (1 - beta) * transpose_A * similarity_matrix 113 | ) 114 | # similarity_matrix =beta*similarity_matrix+(1-beta)*similarity_matrix*adjacency_matrix 115 | # similarity_matrix_array.append(similarity_matrix) 116 | 117 | logg.hint("Time elapsed:", time.time() - t) 118 | 119 | t = time.time() 120 | sparsity_frac = (similarity_matrix > 0).sum() / ( 121 | similarity_matrix.shape[0] * similarity_matrix.shape[1] 122 | ) 123 | if sparsity_frac >= 0.1: 124 | # similarity_matrix_truncate=similarity_matrix 125 | # similarity_matrix_truncate_array.append(similarity_matrix_truncate) 126 | 127 | logg.hint(f"Orignal sparsity={sparsity_frac}, Thresholding") 128 | similarity_matrix = hf.matrix_row_or_column_thresholding( 129 | similarity_matrix, truncation_threshold 130 | ) 131 | sparsity_frac_2 = (similarity_matrix > 0).sum() / ( 132 | similarity_matrix.shape[0] * similarity_matrix.shape[1] 133 | ) 134 | # similarity_matrix_truncate_array.append(similarity_matrix_truncate) 135 | 136 | logg.hint(f"Final sparsity={sparsity_frac_2}") 137 | 138 | logg.info( 139 | f"similarity matrix truncated (Smooth round={SM}): ", 140 | time.time() - t, 141 | ) 142 | 143 | # logg.info("Save the matrix") 144 | # file_name=f'data/20200221_truncated_similarity_matrix_SM{round_of_smooth}_kNN{neighbor_N}_Truncate{str(truncation_threshold)[2:]}.npz' 145 | similarity_matrix = ssp.csr_matrix(similarity_matrix) 146 | 147 | ############## The new method 148 | # similarity_matrix=similarity_matrix.T.copy() 149 | ############## 150 | 151 | if save_subset: 152 | if SM % 5 == 0: # save when SM=5,10,15,20,... 153 | 154 | logg.hint("Save the matrix at every 5 rounds") 155 | ssp.save_npz(file_name + f"_SM{SM}.npz", similarity_matrix) 156 | else: # save all 157 | 158 | logg.hint("Save the matrix at every round") 159 | ssp.save_npz(file_name + f"_SM{SM}.npz", similarity_matrix) 160 | 161 | return similarity_matrix 162 | 163 | 164 | def generate_initial_similarity(similarity_matrix, initial_index_0, initial_index_1): 165 | """ 166 | Extract Smatrix at t1 from the full Smatrix 167 | 168 | Parameters 169 | ---------- 170 | similarity_matrix: `np.array` or `sp.spmatrix` 171 | full Smatrix 172 | initial_index_0: `list` 173 | list of selected t1-cell id among all cells (t1+t2) 174 | initial_index_1: `list` 175 | list of selected t1-cell id among all cells (t1+t2) 176 | It can be the same as initial_index_0. In the case that they are different, 177 | initial_index_1 is a subset of cells that correspond to multi-time clones, 178 | while initial_index_0 may be all cells at t1. 179 | 180 | Returns 181 | ------- 182 | initial Smatrix: `np.array` 183 | """ 184 | 185 | t = time.time() 186 | initial_similarity = similarity_matrix[initial_index_0][:, initial_index_1] 187 | # initial_similarity=hf.sparse_column_multiply(initial_similarity,1/(resol+initial_similarity.sum(0))) 188 | if ssp.issparse(initial_similarity): 189 | initial_similarity = initial_similarity.A 190 | 191 | logg.hint("Time elapsed: ", time.time() - t) 192 | return initial_similarity 193 | 194 | 195 | def generate_final_similarity(similarity_matrix, final_index_0, final_index_1): 196 | """ 197 | Extract Smatrix at t2 from the full Smatrix 198 | 199 | Parameters 200 | ---------- 201 | similarity_matrix: `np.array` or `sp.spmatrix` 202 | full Smatrix 203 | final_index_0: `list` 204 | list of selected t2-cell id among all cells (t1+t2) 205 | final_index_1: `list` 206 | list of selected t2-cell id among all cells (t1+t2) 207 | It can be the same as final_index_0. In the case that they are different, 208 | initial_index_0 is a subset of cells that correspond to multi-time clones, 209 | while initial_index_1 may be all cells at t2. 210 | 211 | Returns 212 | ------- 213 | initial Smatrix: `np.array` 214 | """ 215 | 216 | t = time.time() 217 | final_similarity = similarity_matrix.T[final_index_0][:, final_index_1] 218 | if ssp.issparse(final_similarity): 219 | final_similarity = final_similarity.A 220 | # final_similarity=hf.sparse_rowwise_multiply(final_similarity,1/(resol+final_similarity.sum(1))) 221 | 222 | logg.hint("Time elapsed: ", time.time() - t) 223 | return final_similarity 224 | 225 | 226 | def select_time_points( 227 | adata_orig, time_point=["day_1", "day_2"], extend_Tmap_space=False 228 | ): 229 | """ 230 | Select barcoded cells at given time points for Tmap inference. 231 | 232 | Select cells at given time points, and prepare the right data structure 233 | for running core cospar function to infer the Tmap. 234 | 235 | Parameters 236 | ---------- 237 | adata_orig: original :class:`~anndata.AnnData` object 238 | time_point: `list` optional (default: ['day_1','day_2']) 239 | Require at least two time points, arranged in ascending order. 240 | extend_Tmap_space: `bool` optional (default: `False`) 241 | If true, the initial states for Tmap will include all states at initial time points, 242 | and the later states for Tmap will include all states at later time points. 243 | Otherwise, the initial and later state 244 | space of the Tmap will be restricted to cells with multi-time clonal information 245 | alone. The latter case speeds up the computation, which is recommended. 246 | 247 | Returns 248 | ------- 249 | Subsampled :class:`~anndata.AnnData` object 250 | """ 251 | 252 | # x_emb_orig=adata_orig.obsm['X_emb'][:,0] 253 | # y_emb_orig=adata_orig.obsm['X_emb'][:,1] 254 | time_info_orig = np.array(adata_orig.obs["time_info"]) 255 | clone_annot_orig = adata_orig.obsm["X_clone"] 256 | if len(time_point) == 0: # use all clonally labelled cell states 257 | time_point = np.sort( 258 | list(set(time_info_orig)) 259 | ) # this automatic ordering may not work 260 | 261 | if len(time_point) < 2: 262 | logg.error("Must select more than 1 time point!") 263 | else: 264 | 265 | At = [] 266 | for j, time_0 in enumerate(time_point): 267 | At.append(ssp.csr_matrix(clone_annot_orig[time_info_orig == time_0])) 268 | 269 | ### Day t - t+1 270 | Clonal_cell_ID_FOR_t = [] 271 | for j in range(len(time_point) - 1): 272 | idx_t = np.array((At[j] * At[j + 1].T).sum(1) > 0).flatten() 273 | time_index_t = time_info_orig == time_point[j] 274 | temp = np.nonzero(time_index_t)[0][idx_t] 275 | Clonal_cell_ID_FOR_t.append( 276 | temp 277 | ) # this index is in the original space, without sampling etc 278 | 279 | logg.hint( 280 | f"Clonal cell fraction (day {time_point[j]}-{time_point[j+1]}):", 281 | len(temp) / np.sum(time_index_t), 282 | ) 283 | 284 | ### Day t+1 - t 285 | Clonal_cell_ID_BACK_t = [] 286 | for j in range(len(time_point) - 1): 287 | idx_t = np.array((At[j + 1] * At[j].T).sum(1) > 0).flatten() 288 | time_index_t = time_info_orig == time_point[j + 1] 289 | temp = np.nonzero(time_index_t)[0][idx_t] 290 | Clonal_cell_ID_BACK_t.append( 291 | temp 292 | ) # this index is in the original space, without sampling etc 293 | 294 | logg.hint( 295 | f"Clonal cell fraction (day {time_point[j+1]}-{time_point[j]}):", 296 | len(temp) / np.sum(time_index_t), 297 | ) 298 | 299 | for j in range(len(time_point) - 1): 300 | logg.hint( 301 | f"Numer of cells that are clonally related -- day {time_point[j]}: {len(Clonal_cell_ID_FOR_t[j])} and day {time_point[j+1]}: {len(Clonal_cell_ID_BACK_t[j])}" 302 | ) 303 | 304 | proportion = np.ones(len(time_point)) 305 | # flatten the list 306 | flatten_clonal_cell_ID_FOR = np.array( 307 | [sub_item for item in Clonal_cell_ID_FOR_t for sub_item in item] 308 | ) 309 | flatten_clonal_cell_ID_BACK = np.array( 310 | [sub_item for item in Clonal_cell_ID_BACK_t for sub_item in item] 311 | ) 312 | valid_clone_N_FOR = np.sum( 313 | clone_annot_orig[flatten_clonal_cell_ID_FOR].A.sum(0) > 0 314 | ) 315 | valid_clone_N_BACK = np.sum( 316 | clone_annot_orig[flatten_clonal_cell_ID_BACK].A.sum(0) > 0 317 | ) 318 | 319 | logg.info(f"Number of multi-time clones post selection: {valid_clone_N_FOR}") 320 | # logg.info("Valid clone number 'BACK' post selection",valid_clone_N_BACK) 321 | 322 | ###################### select initial and later cell states 323 | 324 | if extend_Tmap_space: 325 | old_Tmap_cell_id_t1 = [] 326 | for t_temp in time_point[:-1]: 327 | old_Tmap_cell_id_t1 = old_Tmap_cell_id_t1 + list( 328 | np.nonzero(time_info_orig == t_temp)[0] 329 | ) 330 | old_Tmap_cell_id_t1 = np.array(old_Tmap_cell_id_t1) 331 | 332 | ######## 333 | old_Tmap_cell_id_t2 = [] 334 | for t_temp in time_point[1:]: 335 | old_Tmap_cell_id_t2 = old_Tmap_cell_id_t2 + list( 336 | np.nonzero(time_info_orig == t_temp)[0] 337 | ) 338 | old_Tmap_cell_id_t2 = np.array(old_Tmap_cell_id_t2) 339 | 340 | else: 341 | old_Tmap_cell_id_t1 = flatten_clonal_cell_ID_FOR 342 | old_Tmap_cell_id_t2 = flatten_clonal_cell_ID_BACK 343 | 344 | old_clonal_cell_id_t1 = flatten_clonal_cell_ID_FOR 345 | old_clonal_cell_id_t2 = flatten_clonal_cell_ID_BACK 346 | ######################## 347 | 348 | sp_id = np.sort( 349 | list(set(list(old_Tmap_cell_id_t1) + list(old_Tmap_cell_id_t2))) 350 | ) 351 | sp_idx = np.zeros(clone_annot_orig.shape[0], dtype=bool) 352 | sp_idx[sp_id] = True 353 | 354 | Tmap_cell_id_t1 = hf.converting_id_from_fullSpace_to_subSpace( 355 | old_Tmap_cell_id_t1, sp_id 356 | )[0] 357 | clonal_cell_id_t1 = hf.converting_id_from_fullSpace_to_subSpace( 358 | old_clonal_cell_id_t1, sp_id 359 | )[0] 360 | clonal_cell_id_t2 = hf.converting_id_from_fullSpace_to_subSpace( 361 | old_clonal_cell_id_t2, sp_id 362 | )[0] 363 | Tmap_cell_id_t2 = hf.converting_id_from_fullSpace_to_subSpace( 364 | old_Tmap_cell_id_t2, sp_id 365 | )[0] 366 | 367 | Clonal_cell_ID_FOR_t_new = [] 368 | for temp_id_list in Clonal_cell_ID_FOR_t: 369 | convert_list = hf.converting_id_from_fullSpace_to_subSpace( 370 | temp_id_list, sp_id 371 | )[0] 372 | Clonal_cell_ID_FOR_t_new.append(convert_list) 373 | 374 | Clonal_cell_ID_BACK_t_new = [] 375 | for temp_id_list in Clonal_cell_ID_BACK_t: 376 | convert_list = hf.converting_id_from_fullSpace_to_subSpace( 377 | temp_id_list, sp_id 378 | )[0] 379 | Clonal_cell_ID_BACK_t_new.append(convert_list) 380 | 381 | sp_id_0 = np.sort(list(old_clonal_cell_id_t1) + list(old_clonal_cell_id_t2)) 382 | sp_idx_0 = np.zeros(clone_annot_orig.shape[0], dtype=bool) 383 | sp_idx_0[sp_id_0] = True 384 | 385 | barcode_id = np.nonzero(clone_annot_orig[sp_idx_0].A.sum(0).flatten() > 0)[0] 386 | # sp_id=np.nonzero(sp_idx)[0] 387 | clone_annot = clone_annot_orig[sp_idx][:, barcode_id] 388 | 389 | adata = adata_orig[sp_idx] 390 | adata.obsm["X_clone"] = clone_annot 391 | adata.uns["clonal_cell_id_t1"] = clonal_cell_id_t1 392 | adata.uns["clonal_cell_id_t2"] = clonal_cell_id_t2 393 | adata.uns["Tmap_cell_id_t1"] = Tmap_cell_id_t1 394 | adata.uns["Tmap_cell_id_t2"] = Tmap_cell_id_t2 395 | adata.uns["multiTime_cell_id_t1"] = Clonal_cell_ID_FOR_t_new 396 | adata.uns["multiTime_cell_id_t2"] = Clonal_cell_ID_BACK_t_new 397 | adata.uns["proportion"] = np.ones(len(time_point) - 1) 398 | adata.uns["sp_idx"] = sp_idx 399 | 400 | data_des_orig = adata_orig.uns["data_des"][0] 401 | data_des_0 = adata_orig.uns["data_des"][-1] 402 | time_label = "t" 403 | for x in time_point: 404 | time_label = time_label + f"*{x}" 405 | 406 | data_des = ( 407 | data_des_0 408 | + f"_MultiTimeClone_FullSpace{int(extend_Tmap_space)}_{time_label}" 409 | ) 410 | adata.uns["data_des"] = [data_des_orig, data_des] 411 | 412 | if logg._settings_verbosity_greater_or_equal_than(3): 413 | N_cell, N_clone = clone_annot.shape 414 | logg.info(f"Cell number={N_cell}, Clone number={N_clone}") 415 | x_emb = adata.obsm["X_emb"][:, 0] 416 | y_emb = adata.obsm["X_emb"][:, 1] 417 | pl.customized_embedding(x_emb, y_emb, -x_emb) 418 | 419 | logg.hint(f"clonal_cell_id_t1: {len(clonal_cell_id_t1)}") 420 | logg.hint(f"Tmap_cell_id_t1: {len(Tmap_cell_id_t1)}") 421 | return adata 422 | -------------------------------------------------------------------------------- /cospar/tmap/optimal_transport.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This module is borrowed from Waddington-OT 4 | https://github.com/broadinstitute/wot/blob/master/wot/ot/optimal_transport.py 5 | """ 6 | 7 | # import logging # this is a buildin package from Python 8 | 9 | import numpy as np 10 | 11 | from .. import logging as logg 12 | 13 | # logger = logging.getLogger('wot') 14 | 15 | 16 | # def compute_transport_matrix(solver, **params): 17 | # """ 18 | # Compute the optimal transport with stabilized numerics. 19 | # Args: 20 | # G: Growth (absolute) 21 | # solver: transport_stablev2 or optimal_transport_duality_gap 22 | # growth_iters: 23 | # """ 24 | 25 | # import gc 26 | # G = params['G'] 27 | # growth_iters = params['growth_iters'] 28 | # learned_growth = [] 29 | # for i in range(growth_iters): 30 | # if i == 0: 31 | # row_sums = G 32 | # else: 33 | # row_sums = tmap.sum(axis=1) # / tmap.shape[1] 34 | # params['G'] = row_sums 35 | # learned_growth.append(row_sums) 36 | # tmap = solver(**params) 37 | # gc.collect() 38 | 39 | # return tmap, learned_growth 40 | 41 | 42 | # @ Lénaïc Chizat 2015 - optimal transport 43 | def fdiv(l, x, p, dx): 44 | return l * np.sum(dx * (x * (np.log(x / p)) - x + p)) 45 | 46 | 47 | def fdivstar(l, u, p, dx): 48 | return l * np.sum((p * dx) * (np.exp(u / l) - 1)) 49 | 50 | 51 | def primal(C, K, R, dx, dy, p, q, a, b, epsilon, lambda1, lambda2): 52 | I = len(p) 53 | J = len(q) 54 | F1 = lambda x, y: fdiv(lambda1, x, p, y) 55 | F2 = lambda x, y: fdiv(lambda2, x, q, y) 56 | with np.errstate(divide="ignore"): 57 | return ( 58 | F1(np.dot(R, dy), dx) 59 | + F2(np.dot(R.T, dx), dy) 60 | + (epsilon * np.sum(R * np.nan_to_num(np.log(R)) - R + K) + np.sum(R * C)) 61 | / (I * J) 62 | ) 63 | 64 | 65 | def dual(C, K, R, dx, dy, p, q, a, b, epsilon, lambda1, lambda2): 66 | I = len(p) 67 | J = len(q) 68 | F1c = lambda u, v: fdivstar(lambda1, u, p, v) 69 | F2c = lambda u, v: fdivstar(lambda2, u, q, v) 70 | return ( 71 | -F1c(-epsilon * np.log(a), dx) 72 | - F2c(-epsilon * np.log(b), dy) 73 | - epsilon * np.sum(R - K) / (I * J) 74 | ) 75 | 76 | 77 | # end @ Lénaïc Chizat 78 | 79 | 80 | def optimal_transport_duality_gap( 81 | C, 82 | G, 83 | lambda1, 84 | lambda2, 85 | epsilon, 86 | batch_size, 87 | tolerance, 88 | tau, 89 | epsilon0, 90 | max_iter, 91 | **ignored 92 | ): 93 | """ 94 | Compute the optimal transport with stabilized numerics. 95 | 96 | It guarantees that the duality gap is at most `tolerance`. 97 | The method is twice faster than the :func:`.transport_stablev2` 98 | 99 | 100 | Parameters 101 | ---------- 102 | C : 2-D ndarray 103 | The cost matrix. C[i][j] is the cost to transport cell i to cell j 104 | G : 1-D array_like 105 | Growth value for input cells. 106 | lambda1 : float, optional 107 | Regularization parameter for the marginal constraint on p 108 | lambda2 : float, optional 109 | Regularization parameter for the marginal constraint on q 110 | epsilon : float, optional 111 | Entropy regularization parameter. 112 | batch_size : int, optional 113 | Number of iterations to perform between each duality gap check 114 | tolerance : float, optional 115 | Upper bound on the duality gap that the resulting transport map must guarantee. 116 | tau : float, optional 117 | Threshold at which to perform numerical stabilization 118 | epsilon0 : float, optional 119 | Starting value for exponentially-decreasing epsilon 120 | max_iter : int, optional 121 | Maximum number of iterations. Print a warning and return if it is reached, even without convergence. 122 | Returns 123 | ------- 124 | transport_map : 2-D ndarray 125 | The entropy-regularized unbalanced transport map 126 | """ 127 | C = np.asarray(C, dtype=np.float64) 128 | epsilon_scalings = 5 129 | scale_factor = np.exp(-np.log(epsilon) / epsilon_scalings) 130 | 131 | I, J = C.shape 132 | dx, dy = np.ones(I) / I, np.ones(J) / J 133 | 134 | p = G 135 | q = np.ones(C.shape[1]) * np.average(G) 136 | 137 | u, v = np.zeros(I), np.zeros(J) 138 | a, b = np.ones(I), np.ones(J) 139 | 140 | epsilon_i = epsilon0 * scale_factor 141 | current_iter = 0 142 | 143 | for e in range(epsilon_scalings + 1): 144 | duality_gap = np.inf 145 | u = u + epsilon_i * np.log(a) 146 | v = v + epsilon_i * np.log(b) # absorb 147 | epsilon_i = epsilon_i / scale_factor 148 | _K = np.exp(-C / epsilon_i) 149 | alpha1 = lambda1 / (lambda1 + epsilon_i) 150 | alpha2 = lambda2 / (lambda2 + epsilon_i) 151 | K = np.exp((np.array([u]).T - C + np.array([v])) / epsilon_i) 152 | a, b = np.ones(I), np.ones(J) 153 | old_a, old_b = a, b 154 | threshold = tolerance if e == epsilon_scalings else 1e-6 155 | 156 | while duality_gap > threshold: 157 | for i in range(batch_size if e == epsilon_scalings else 5): 158 | current_iter += 1 159 | old_a, old_b = a, b 160 | a = (p / (K.dot(np.multiply(b, dy)))) ** alpha1 * np.exp( 161 | -u / (lambda1 + epsilon_i) 162 | ) 163 | b = (q / (K.T.dot(np.multiply(a, dx)))) ** alpha2 * np.exp( 164 | -v / (lambda2 + epsilon_i) 165 | ) 166 | 167 | # stabilization 168 | if max(max(abs(a)), max(abs(b))) > tau: 169 | u = u + epsilon_i * np.log(a) 170 | v = v + epsilon_i * np.log(b) # absorb 171 | K = np.exp((np.array([u]).T - C + np.array([v])) / epsilon_i) 172 | a, b = np.ones(I), np.ones(J) 173 | 174 | if current_iter >= max_iter: 175 | logg.warn( 176 | "Reached max_iter with duality gap still above threshold. Returning" 177 | ) 178 | return (K.T * a).T * b 179 | 180 | # The real dual variables. a and b are only the stabilized variables 181 | _a = a * np.exp(u / epsilon_i) 182 | _b = b * np.exp(v / epsilon_i) 183 | 184 | # Skip duality gap computation for the first epsilon scalings, use dual variables evolution instead 185 | if e == epsilon_scalings: 186 | R = (K.T * a).T * b 187 | pri = primal( 188 | C, _K, R, dx, dy, p, q, _a, _b, epsilon_i, lambda1, lambda2 189 | ) 190 | dua = dual(C, _K, R, dx, dy, p, q, _a, _b, epsilon_i, lambda1, lambda2) 191 | duality_gap = (pri - dua) / abs(pri) 192 | else: 193 | duality_gap = max( 194 | np.linalg.norm(_a - old_a * np.exp(u / epsilon_i)) 195 | / (1 + np.linalg.norm(_a)), 196 | np.linalg.norm(_b - old_b * np.exp(v / epsilon_i)) 197 | / (1 + np.linalg.norm(_b)), 198 | ) 199 | 200 | if np.isnan(duality_gap): 201 | # raise RuntimeError("Overflow encountered in duality gap computation, please report this incident") 202 | logg.error( 203 | "Overflow encountered in duality gap computation, please report this incident" 204 | ) 205 | return R / C.shape[1] 206 | 207 | 208 | def transport_stablev2( 209 | C, 210 | lambda1, 211 | lambda2, 212 | epsilon, 213 | scaling_iter, 214 | G, 215 | tau, 216 | epsilon0, 217 | extra_iter, 218 | inner_iter_max, 219 | **ignored 220 | ): 221 | """ 222 | Compute the optimal transport with stabilized numerics. 223 | Args: 224 | C: cost matrix to transport cell i to cell j 225 | lambda1: regularization parameter for marginal constraint for p. 226 | lambda2: regularization parameter for marginal constraint for q. 227 | epsilon: entropy parameter 228 | scaling_iter: number of scaling iterations 229 | G: growth value for input cells 230 | """ 231 | 232 | warm_start = tau is not None 233 | epsilon_final = epsilon 234 | 235 | def get_reg(n): # exponential decreasing 236 | return (epsilon0 - epsilon_final) * np.exp(-n) + epsilon_final 237 | 238 | epsilon_i = epsilon0 if warm_start else epsilon 239 | dx = np.ones(C.shape[0]) / C.shape[0] 240 | dy = np.ones(C.shape[1]) / C.shape[1] 241 | 242 | p = G 243 | q = np.ones(C.shape[1]) * np.average(G) 244 | 245 | u = np.zeros(len(p)) 246 | v = np.zeros(len(q)) 247 | b = np.ones(len(q)) 248 | K = np.exp(-C / epsilon_i) 249 | 250 | alpha1 = lambda1 / (lambda1 + epsilon_i) 251 | alpha2 = lambda2 / (lambda2 + epsilon_i) 252 | epsilon_index = 0 253 | iterations_since_epsilon_adjusted = 0 254 | 255 | for i in range(scaling_iter): 256 | # scaling iteration 257 | a = (p / (K.dot(np.multiply(b, dy)))) ** alpha1 * np.exp( 258 | -u / (lambda1 + epsilon_i) 259 | ) 260 | b = (q / (K.T.dot(np.multiply(a, dx)))) ** alpha2 * np.exp( 261 | -v / (lambda2 + epsilon_i) 262 | ) 263 | 264 | # stabilization 265 | iterations_since_epsilon_adjusted += 1 266 | if max(max(abs(a)), max(abs(b))) > tau: 267 | u = u + epsilon_i * np.log(a) 268 | v = v + epsilon_i * np.log(b) # absorb 269 | K = np.exp((np.array([u]).T - C + np.array([v])) / epsilon_i) 270 | a = np.ones(len(p)) 271 | b = np.ones(len(q)) 272 | 273 | if warm_start and iterations_since_epsilon_adjusted == inner_iter_max: 274 | epsilon_index += 1 275 | iterations_since_epsilon_adjusted = 0 276 | u = u + epsilon_i * np.log(a) 277 | v = v + epsilon_i * np.log(b) # absorb 278 | epsilon_i = get_reg(epsilon_index) 279 | alpha1 = lambda1 / (lambda1 + epsilon_i) 280 | alpha2 = lambda2 / (lambda2 + epsilon_i) 281 | K = np.exp((np.array([u]).T - C + np.array([v])) / epsilon_i) 282 | a = np.ones(len(p)) 283 | b = np.ones(len(q)) 284 | 285 | for i in range(extra_iter): 286 | a = (p / (K.dot(np.multiply(b, dy)))) ** alpha1 * np.exp( 287 | -u / (lambda1 + epsilon_i) 288 | ) 289 | b = (q / (K.T.dot(np.multiply(a, dx)))) ** alpha2 * np.exp( 290 | -v / (lambda2 + epsilon_i) 291 | ) 292 | 293 | R = (K.T * a).T * b 294 | 295 | return R / C.shape[1] 296 | -------------------------------------------------------------------------------- /cospar/tool/__init__.py: -------------------------------------------------------------------------------- 1 | from cospar.tool._clone import * 2 | from cospar.tool._gene import * 3 | from cospar.tool._map import * 4 | from cospar.tool._utils import * 5 | -------------------------------------------------------------------------------- /cospar/tool/_gene.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from logging import raiseExceptions 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import scipy.sparse as ssp 8 | import scipy.stats as stats 9 | import statsmodels.sandbox.stats.multicomp 10 | from ete3 import Tree 11 | from matplotlib import pyplot as plt 12 | from scipy.cluster import hierarchy 13 | 14 | # from plotnine import * 15 | from sklearn.manifold import SpectralEmbedding 16 | 17 | from cospar.tool import _utils as tl_util 18 | 19 | from .. import help_functions as hf 20 | from .. import logging as logg 21 | from .. import settings 22 | 23 | 24 | def differential_genes( 25 | adata, 26 | cell_group_A=None, 27 | cell_group_B=None, 28 | FDR_cutoff=0.05, 29 | sort_by="ratio", 30 | min_frac_expr=0.05, 31 | pseudocount=1, 32 | ): 33 | """ 34 | Perform differential gene expression analysis and plot top DGE genes. 35 | 36 | We use Wilcoxon rank-sum test to calculate P values, followed by 37 | Benjamini-Hochberg correction. 38 | 39 | Parameters 40 | ---------- 41 | adata: :class:`~anndata.AnnData` object 42 | Need to contain gene expression matrix. 43 | cell_group_A: `np.array`, optional (default: None) 44 | A boolean array of the size adata.shape[0] for defining population A. 45 | If not specified, we set it to be adata.obs['cell_group_A']. 46 | cell_group_B: `np.array`, optional (default: None) 47 | A boolean array of the size adata.shape[0] for defining population B. 48 | If not specified, we set it to be adata.obs['cell_group_A']. 49 | FDR_cutoff: `float`, optional (default: 0.05) 50 | Cut off for the corrected Pvalue of each gene. Only genes below this 51 | cutoff will be shown. 52 | sort_by: `float`, optional (default: 'ratio') 53 | The key to sort the differentially expressed genes. The key can be: 'ratio' or 'Qvalue'. 54 | min_frac_expr: `float`, optional (default: 0.05) 55 | Minimum expression fraction among selected states for a 56 | gene to be considered for DGE analysis. 57 | pseudocount: `int`, optional (default: 1) 58 | pseudo count for taking the gene expression ratio between the two groups 59 | 60 | Returns 61 | ------- 62 | diff_gene_A: `pd.DataFrame` 63 | Genes differentially expressed in cell state group A, ranked 64 | by the ratio of mean expressions between 65 | the two groups, with the top being more differentially expressed. 66 | diff_gene_B: `pd.DataFrame` 67 | Genes differentially expressed in cell state group B, ranked 68 | by the ratio of mean expressions between 69 | the two groups, with the top being more differentially expressed. 70 | """ 71 | 72 | diff_gene_A = [] 73 | diff_gene_B = [] 74 | 75 | if sort_by not in ["ratio", "Qvalue"]: 76 | raise ValueError(f"sort_by must be among {['ratio','Qvalue']}") 77 | 78 | state_info = np.array(adata.obs["state_info"]) 79 | inputs = [cell_group_A, cell_group_B] 80 | selections = [] 81 | for cell_group_X in inputs: 82 | if type(cell_group_X) is str: 83 | if cell_group_X in list(set(state_info)): 84 | group_idx = state_info == cell_group_X 85 | else: 86 | raise ValueError( 87 | "cell_group_A (or B) should be either a cluster name among adata.obs['state_info'] or a boolean array of size adata.shape[0]." 88 | ) 89 | else: 90 | group_idx = np.array(cell_group_X).astype("bool") 91 | 92 | selections.append(group_idx) 93 | 94 | if (np.sum(selections[0]) == 0) or (np.sum(selections[1]) == 0): 95 | raise ValueError("Group A or B has zero selected cell states.") 96 | 97 | else: 98 | 99 | dge = hf.get_dge_SW( 100 | adata, 101 | selections[0], 102 | selections[1], 103 | min_frac_expr=min_frac_expr, 104 | pseudocount=pseudocount, 105 | ) 106 | 107 | dge = dge.sort_values(by=sort_by, ascending=False) 108 | diff_gene_A_0 = dge 109 | diff_gene_A = diff_gene_A_0[(dge["Qvalue"] < FDR_cutoff) & (dge["ratio"] > 0)] 110 | diff_gene_A = diff_gene_A.reset_index() 111 | 112 | dge = dge.sort_values(by=sort_by, ascending=True) 113 | diff_gene_B_0 = dge 114 | diff_gene_B = diff_gene_B_0[(dge["Qvalue"] < FDR_cutoff) & (dge["ratio"] < 0)] 115 | diff_gene_B = diff_gene_B.reset_index() 116 | 117 | return diff_gene_A, diff_gene_B 118 | 119 | 120 | def identify_TF_and_surface_marker( 121 | gene_list, 122 | species="mouse", 123 | go_term_keywards=[ 124 | "cell surface", 125 | "cell cycle", 126 | "regulation of transcription", 127 | "DNA-binding transcription factor activity", 128 | "regulation of transcription by RNA polymerase II", 129 | ], 130 | ): 131 | """ 132 | From an input gene list, return the go term and annotation for each gene, 133 | and further select the genes identified as TF or cell surface protein 134 | 135 | Returns 136 | ------ 137 | results: 138 | Full annotation for each gene 139 | df_anno 140 | Only include genes identified as TF or cell surface protein 141 | """ 142 | 143 | if species not in ["mouse", "human"]: 144 | raise ValueError("species must be either mouse or human") 145 | else: 146 | if species == "mouse": 147 | dataset = "mmusculus_gene_ensembl" 148 | elif species == "human": 149 | dataset = "hsapiens_gene_ensembl" 150 | 151 | from gseapy.parser import Biomart 152 | 153 | bm = Biomart() 154 | ## view validated marts 155 | marts = bm.get_marts() 156 | ## view validated dataset 157 | datasets = bm.get_datasets(mart="ENSEMBL_MART_ENSEMBL") 158 | ## view validated attributes 159 | attrs = bm.get_attributes( 160 | dataset=dataset 161 | ) # hsapiens_gene_ensembl: Human genes (GRCh38.p13); mmusculus_gene_ensembl for 'Mouse genes (GRCm39)' 162 | ## view validated filters 163 | filters = bm.get_filters(dataset=dataset) # Gene Name(s) [e.g. MT-TF] 164 | ## query results 165 | 166 | results = bm.query( 167 | dataset=dataset, 168 | attributes=[ 169 | "ensembl_gene_id", 170 | "external_gene_name", 171 | "namespace_1003", 172 | "name_1006", 173 | ], 174 | filters={"external_gene_name": gene_list}, 175 | ) 176 | results = results.dropna() 177 | df_list = [] 178 | for term in go_term_keywards: 179 | tmp_genes = list( 180 | set( 181 | results[results["name_1006"].apply(lambda x: term == x)][ 182 | "external_gene_name" 183 | ] 184 | ) 185 | ) 186 | df_tmp = pd.DataFrame({"gene": tmp_genes}) 187 | df_tmp["annotation"] = term 188 | df_list.append(df_tmp) 189 | df_anno = pd.concat(df_list, ignore_index=True) 190 | return results, df_anno 191 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = scvelo 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=scvelo 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.4 2 | scipy>=1.5.4 3 | scikit-learn>=0.23.2 4 | scanpy>=1.6.0 5 | pandas>=1.1.4 6 | statsmodels==0.13.2 7 | plotnine>=0.7.1 8 | matplotlib>=3.3.3 9 | fastcluster>=1.1.26 # used to generate the clustered heat map of barcodes 10 | anndata>=0.7.5 11 | numba>=0.52.0 # related to issues of GPUipatch error 12 | scikit-misc>=0.1.3 # used for loess smoothing 13 | leidenalg>=0.7.0 14 | ete3>=3.1.2 15 | ipywidgets 16 | 17 | # Just until rtd.org understands pyproject.toml 18 | setuptools 19 | setuptools_scm 20 | typing_extensions 21 | importlib_metadata 22 | sphinx_rtd_theme>=0.3 23 | sphinx_autodoc_typehints<=1.6 24 | Jinja2<3.1 25 | 26 | # converting notebooks to html 27 | ipykernel 28 | sphinx==3.5.4 29 | nbsphinx==0.8.0 30 | -------------------------------------------------------------------------------- /docs/source/_ext/edit_on_github.py: -------------------------------------------------------------------------------- 1 | """ 2 | Loosely based on gist.github.com/MantasVaitkunas/7c16de233812adcb7028 3 | """ 4 | 5 | import os 6 | import warnings 7 | 8 | __licence__ = "BSD (3 clause)" 9 | 10 | 11 | def get_github_repo(app, path): 12 | if path.endswith(".ipynb"): 13 | return app.config.github_nb_repo, "/" 14 | return app.config.github_repo, "/docs/source/" 15 | 16 | 17 | def html_page_context(app, pagename, templatename, context, doctree): 18 | if templatename != "page.html": 19 | return 20 | 21 | if not app.config.github_repo: 22 | warnings.warn("`github_repo `not specified") 23 | return 24 | 25 | if not app.config.github_nb_repo: 26 | nb_repo = f"{app.config.github_repo}/docs/source" 27 | warnings.warn("`github_nb_repo `not specified. Setting to `{nb_repo}`") 28 | app.config.github_nb_repo = nb_repo 29 | 30 | path = os.path.relpath(doctree.get("source"), app.builder.srcdir) 31 | repo, conf_py_path = get_github_repo(app, path) 32 | 33 | # For sphinx_rtd_theme. 34 | context["display_github"] = True 35 | context["github_user"] = "ShouWenWang-Lab" 36 | context["github_version"] = "master" 37 | context["github_repo"] = repo 38 | context["conf_py_path"] = conf_py_path 39 | 40 | 41 | def setup(app): 42 | app.add_config_value("github_nb_repo", "", False) 43 | app.add_config_value("github_repo", "", True) 44 | app.connect("html-page-context", html_page_context) 45 | -------------------------------------------------------------------------------- /docs/source/_static/colab-badge.svg: -------------------------------------------------------------------------------- 1 | Open in ColabOpen in Colab 2 | -------------------------------------------------------------------------------- /docs/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* ReadTheDocs theme colors */ 2 | 3 | .wy-nav-top { background-color: #404040 } 4 | .wy-nav-content { max-width: 950px } 5 | .wy-side-nav-search { background-color: transparent } 6 | .wy-side-nav-search input[type="text"] { border-width: 0 } 7 | 8 | 9 | /* Custom classes */ 10 | .small { font-size:40% } 11 | .smaller, .pr { font-size:70% } 12 | 13 | 14 | /* Custom classes with bootstrap buttons */ 15 | 16 | .tutorial, 17 | .tutorial:visited, 18 | .tutorial:hover 19 | { 20 | /* text-decoration: underline; */ 21 | font-weight: bold; 22 | padding: 2px 5px; 23 | white-space: nowrap; 24 | max-width: 100%; 25 | background: #EF3270; 26 | border: solid 1px #EF3270; 27 | border-radius: .25rem; 28 | font-size: 75%; 29 | /* font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace; */ 30 | color: #404040; 31 | overflow-x: auto; 32 | box-sizing: border-box; 33 | } 34 | 35 | 36 | /* Formatting of RTD markup: rubrics and sidebars and admonitions */ 37 | 38 | /* rubric */ 39 | .rst-content p.rubric { 40 | margin-bottom: 6px; 41 | font-weight: normal; 42 | } 43 | .rst-content p.rubric::after { content: ":" } 44 | 45 | /* sidebar */ 46 | .rst-content .sidebar { 47 | /* margin: 0px 0px 0px 12px; */ 48 | padding-bottom: 0px; 49 | } 50 | .rst-content .sidebar p { 51 | margin-bottom: 12px; 52 | } 53 | .rst-content .sidebar p, 54 | .rst-content .sidebar ul, 55 | .rst-content .sidebar dl { 56 | font-size: 13px; 57 | } 58 | 59 | /* less space after bullet lists in admonitions like warnings and notes */ 60 | .rst-content .section .admonition ul { 61 | margin-bottom: 6px; 62 | } 63 | 64 | 65 | /* Code: literals and links */ 66 | 67 | .rst-content tt.literal, 68 | .rst-content code.literal { 69 | color: #404040; 70 | } 71 | /* slim font weight for non-link code */ 72 | .rst-content tt:not(.xref), 73 | .rst-content code:not(.xref), 74 | .rst-content *:not(a) > tt.xref, 75 | .rst-content *:not(a) > code.xref, 76 | .rst-content a > tt.xref, 77 | .rst-content a > code.xref, 78 | .rst-content dl:not(.docutils) a > tt.xref, 79 | 80 | 81 | /* Just one box for annotation code for a less noisy look */ 82 | 83 | .rst-content .annotation { 84 | padding: 2px 5px; 85 | background-color: white; 86 | border: 1px solid #e1e4e5; 87 | } 88 | .rst-content .annotation tt, 89 | .rst-content .annotation code { 90 | padding: 0 0; 91 | background-color: transparent; 92 | border: 0 solid transparent; 93 | } 94 | 95 | 96 | /* Parameter lists */ 97 | 98 | .rst-content dl:not(.docutils) dl dt { 99 | /* mimick numpydoc’s blockquote style */ 100 | font-weight: normal; 101 | background: none transparent; 102 | border-left: none; 103 | margin: 0 0 12px; 104 | padding: 3px 0 0; 105 | font-size: 100%; 106 | } 107 | 108 | .rst-content dl:not(.docutils) dl dt code { 109 | font-size: 100%; 110 | font-weight: normal; 111 | background: none transparent; 112 | border: none; 113 | padding: 0 2px; 114 | } 115 | 116 | .rst-content dl:not(.docutils) dl dt a.reference>code { 117 | text-decoration: underline; 118 | } 119 | 120 | /* Mimick rubric style used for other headings */ 121 | .rst-content dl:not(.docutils) dl > dt { 122 | font-weight: bold; 123 | background: none transparent; 124 | border-left: none; 125 | margin: 0 0 12px; 126 | padding: 3px 0 0; 127 | font-size: 100%; 128 | } 129 | /* Parameters contain parts and don’t need bold font */ 130 | .rst-content dl.field-list dl > dt { font-weight: unset } 131 | /* Add colon between return tuple element name and type */ 132 | .rst-content dl:not(.docutils) dl > dt .classifier::before { content: ' : ' } 133 | 134 | /* Function headers */ 135 | 136 | .rst-content dl:not(.docutils) dt { 137 | background: #edf0f2; 138 | color: #404040; 139 | border-top: solid 3px #343131; 140 | } 141 | 142 | .rst-content .section ul li p:last-child { 143 | margin-bottom: 0; 144 | margin-top: 0; 145 | } 146 | 147 | /* Adjust width of navigation bar on mobile */ 148 | @media screen and (max-width: 768px) { 149 | .header-bar { 150 | display: none; 151 | } 152 | 153 | .wy-nav-content-wrap { 154 | margin-left: 0px; 155 | } 156 | 157 | .wy-nav-side { 158 | width: 300px; 159 | } 160 | 161 | .wy-nav-side.shift { 162 | max-width: 320px; 163 | } 164 | 165 | /* Fix sidebar adjust */ 166 | .rst-versions { 167 | width: 40%; 168 | max-width: 320px; 169 | } 170 | } 171 | 172 | /* Handle landscape */ 173 | @media screen and (min-width: 377px) { 174 | .wy-nav-content-wrap.shift { 175 | left: 320px; 176 | } 177 | } 178 | 179 | /* make height responsive for notebook figures */ 180 | .rst-content .image-reference img { 181 | max-width: 100% !important; 182 | height: auto !important; 183 | } 184 | -------------------------------------------------------------------------------- /docs/source/_static/nbviewer-badge.svg: -------------------------------------------------------------------------------- 1 | 2 | 18 | 20 | 21 | 23 | image/svg+xml 24 | 26 | 27 | 28 | 29 | 30 | 32 | 35 | 38 | 41 | 44 | 47 | 50 | 53 | 56 | 59 | 62 | 65 | 68 | 69 | 89 | 93 | 98 | 102 | 103 | 105 | 111 | 112 | 115 | 119 | 123 | 127 | 128 | 133 | 141 | 148 | 156 | Open in nbviewer 159 | 160 | 167 | Open in nbviewer 172 | 173 | 183 | 184 | 188 | 193 | 197 | 201 | 205 | 210 | 214 | 223 | 224 | 225 | 230 | 234 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/base.rst: -------------------------------------------------------------------------------- 1 | :github_url: {{ fullname | modurl }} 2 | 3 | {{ fullname | api_image }} 4 | 5 | {% extends "!autosummary/base.rst" %} 6 | 7 | .. http://www.sphinx-doc.org/en/stable/ext/autosummary.html#customizing-templates 8 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | :github_url: {{ fullname | modurl }} 2 | 3 | {{ fullname | escape | underline}} 4 | 5 | .. currentmodule:: {{ module }} 6 | 7 | .. add toctree option to make autodoc generate the pages 8 | 9 | .. autoclass:: {{ objname }} 10 | 11 | {% block attributes %} 12 | {% if attributes %} 13 | .. rubric:: Attributes 14 | 15 | .. autosummary:: 16 | :toctree: . 17 | {% for item in attributes %} 18 | ~{{ fullname }}.{{ item }} 19 | {%- endfor %} 20 | {% endif %} 21 | {% endblock %} 22 | 23 | {% block methods %} 24 | {% if methods %} 25 | .. rubric:: Methods 26 | 27 | .. autosummary:: 28 | :toctree: . 29 | {% for item in methods %} 30 | {%- if item != '__init__' %} 31 | ~{{ fullname }}.{{ item }} 32 | {%- endif -%} 33 | {%- endfor %} 34 | {% endif %} 35 | {% endblock %} 36 | -------------------------------------------------------------------------------- /docs/source/about.rst: -------------------------------------------------------------------------------- 1 | About CoSpar 2 | ------------ 3 | 4 | The following information is adapted from `Wang et al. Nat. Biotech. (2022) `_. 5 | High-throughput single-cell measurements have enabled unbiased studies of development and differentiation, leading to numerous methods for dynamic inference. However, single-cell RNA sequencing (scRNA-seq) data alone does not fully constrain the differentiation dynamics, and existing methods inevitably operate under simplified assumptions. In parallel, the lineage information of individual cells can be profiled simultaneously along with their transcriptome by using a heritable and expressible DNA barcode as a lineage tracer (we call lineage-tracing scRNAseq, or LT-scSeq). The barcode may remain static or evolve over time. 6 | 7 | 8 | However, the lineage data could be challenging to analyze. These challenges include stochastic differentiation and variable expansion of clones; cells loss during analysis; barcode homoplasy wherein cells acquire the same barcode despite not having a lineage relationship; access to clones only at a single time point; and clonal dispersion due to a lag time between labeling cells and the first sampling (the lag time is necessary to allow the clone to grow large for resampling). 9 | 10 | 11 | CoSpar, developed by `Wang et al. Nat. Biotech. (2022) `_, is among the first tools to perform dynamic inference by integrating state and lineage information. It solves for the transition probability map from cell states at an earlier time point to states at a later time point. It achieves accuracy and robustness by learning a sparse and coherent transition map, where neighboring initial states share similar yet sparse fate outcomes. Built upon the finite-time transition map, CoSpar can 1) infer fate potential of early states; 2) detect early fate bias (thus, fate boundary) among a heterogeneous progenitor population; 3) identify putative driver genes for fate bifurcation; 4) infer fate coupling or hierarchy; 5) visualize gene expression dynamics along an inferred differential trajectory. CoSpar also provides several methods to analyze clonal data by itself, including the clonal coupling between fate clusters and the bias of a clone towards a given fate, etc. We envision CoSpar to be a platform to integrate key methods needed to analyze data with both state and lineage information. 12 | 13 | .. image:: https://user-images.githubusercontent.com/4595786/113746452-56e4cb00-96d4-11eb-8278-0aac0469ba9d.png 14 | :width: 1000px 15 | :align: center 16 | (copy right: Nature Biotechnology) 17 | 18 | Coherent sparse optimization 19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | One formalization of dynamic inference is to identify a transition map, a matrix :math:`T_{ij} (t_1,t_2)`, which describes the probability of a cell, initially in some state :math:`i` at time :math:`t_1`, giving rise to progeny in a state :math:`j` at time :math:`t_2`. We define :math:`T_{ij} (t_1,t_2)` specifically as the fraction of progeny from state :math:`i` that occupy state :math:`j`. This transition matrix averages the effects of cell division, loss, and differentiation, but it nonetheless proves useful for several applications. 22 | 23 | 24 | We make two reasonable assumptions about the nature of biological dynamics to constrain inference of the transition map. We assume the map to be a sparse matrix, since most cells can access just a few states during an experiment. And we assume the map to be locally coherent, meaning that cells in similar states should share similar fate outcomes. These constraints together force transition maps to be parsimonious and smooth, which we reasoned would make them robust to practical sources of noise in LT-scSeq experiments. As inputs, CoSpar requires a barcode-by-cell matrix :math:`I(t)`` that encodes the clonal information at time :math:`t`, and a data matrix for observed cell states (e.g. from scRNA-seq). Clonal data may have nested structure reflecting subclonal labeling. 25 | 26 | CoSpar is formulated assuming that we have information on the same clones at more than one time point. More often, one might observe clones at only a later time point :math:`t_2`. For these cases inference is not fully constrained, one must learn both the transition map T and the initial clonal data :math:`I(t_1)`. We approximate a solution additionally constrained by a minimum global transport cost. We show that this approach is robust to initialization in tested datasets. Finally, coherence and sparsity provide reasonable constraints to the simpler problem of predicting dynamics from state heterogeneity alone without lineage data. We extended CoSpar to this case. Thus, CoSpar is flexible to different experimental designs, as summarized by the above figure. Our core algorithms are illustrated below. 27 | 28 | 29 | .. image:: https://user-images.githubusercontent.com/4595786/113746670-93b0c200-96d4-11eb-89c0-d1e7d72383e7.png 30 | :width: 1000px 31 | :align: center 32 | (copy right: Nature Biotechnology) 33 | 34 | Below, we formalize the coherent sparse optimization by which CoSpar infers the transition map. 35 | 36 | In a model of stochastic differentiation, cells in a clone are distributed across states with a time-dependent density vector :math:`\vec{P}(t)`. A transition map :math:`T` directly links clonal density profiles :math:`\vec{P}(t_{1,2})` between time points: 37 | 38 | .. math:: 39 | \begin{equation} 40 | P_i(t_2 )= \sum_j P_j(t_1 )T_{ji}(t_1,t_2), \quad \quad \quad \text{Eq. (1)} 41 | \end{equation} 42 | 43 | From multiple clonal observations, our goal is to learn :math:`T`. To do so, we consider each observed cell transcriptome as a distinct state (:math:`\vec{P}(t)\in R^{N_t}`) for :math:`N_t`` cells profiled at time :math:`t``), and introduce :math:`S(t)\in R^{N_t\times N_t}` as a matrix of cell-cell similarity over all observed cell states, including those lacking clonal information. Denoting :math:`I(t)\in \{0,1\}^{M\times N_t}` as a clone-by-cell matrix of :math:`M` clonal barcodes, the density profiles of observed clones :math:`P(t)\in R^{M\times N_t}` are estimated as :math:`P(t)\approx I(t)S(t)`. In matrix form, the constraint in Eq. (1) from all observed clones then becomes :math:`P(t_2)\approx P(t_1)T(t_1,t_2)`. 44 | 45 | 46 | Since the matrices :math:`P(t_{1,2})` are determined directly from data, with enough information :math:`T(t_1,t_2)` could be learnt by matrix inversion. However, in most cases, the number of clones is far less than the number of states. To constrain the map, we require that: 1) :math:`T` is a sparse matrix; 2) :math:`T` is locally coherent; and 3) :math:`T` is a non-negative matrix. With these requirements, the inference becomes an optimization problem: 47 | 48 | .. math:: 49 | \begin{equation} 50 | \min_{T} ||T||_1+\alpha ||LT||_2, \; \text{s.t.} \; ||P(t_2)- P(t_1) T(t_1,t_2)||_{2}\le\epsilon;\; T\ge 0; \text{Normalization}. 51 | \end{equation} 52 | 53 | Here, :math:`‖T‖_1` quantifies the sparsity of the matrix T through its l-1 norm, while :math:`‖LT‖_2` quantifies the local coherence of :math:`T` (:math:`L` is the Laplacian of the cell state similarity graph, and :math:`LT` is the local divergence). The remaining constraints enforce the observed clonal dynamics, non-negativity of :math:`T`, and map normalization, respectively. At :math:`\alpha=0`, the minimization takes the form of Lasso, an algorithm for compressed sensing. Our formulation extends compressed sensing from vectors to matrices, and to enforce local coherence. The local coherence extension is reminiscent of the fused Lasso problem. 54 | An iterative, heuristic approach solves the CoSpar optimization efficiently, replacing :math:`(\alpha,\epsilon)` with parameters that explicitly control coherence and sparsity. See `Wang et al. Nat. Biotech. (2022) `_ for a detailed exposition of the method and its implementation. 55 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: cospar 2 | 3 | API 4 | === 5 | 6 | .. include:: 7 | 8 | Import CoSpar as:: 9 | 10 | import cospar as cs 11 | 12 | 13 | CoSpar is built around the :class:`~anndata.AnnData` object (usually called `adata`). For each cell, we store its RNA count matrix at ``adata.X``, the gene names at ``adata.var_names``,time information at ``adata.obs['time_info']``, state annotation at ``adata.obs['state_info']``, clonal information at ``adata.obsm['X_clone']``, and 2-d embedding at ``adata.obsm['X_emb']``. 14 | 15 | 16 | Once the :class:`~anndata.AnnData` object is initialized via :func:`cs.pp.initialize_adata_object`, the typical flow of analysis is to 1) perform preprocessing and dimension reduction (``cs.pp.*``); 2) visualize and analyze clonal data alone (``cs.pl.*``); 3) infer transition map (``cs.tmap.*``); and 4) analyze inferred map (``cs.tl.*``) and then visualize the results with the plotting functions (``cs.pl.*``). Typically, each ``cs.tl.*`` function has a corresponding ``cs.pl.*`` function. We also provide several built-in datasets (``cs.datasets.*``) and miscellaneous functions to assist with the analysis (``cs.hf.*``). See :doc:`tutorial ` for details. 17 | 18 | 19 | 20 | Preprocessing 21 | ------------- 22 | 23 | .. autosummary:: 24 | :toctree: . 25 | 26 | pp.initialize_adata_object 27 | pp.get_highly_variable_genes 28 | pp.remove_cell_cycle_correlated_genes 29 | pp.get_X_pca 30 | pp.get_X_emb 31 | pp.get_X_clone 32 | pp.get_state_info 33 | pp.refine_state_info_by_marker_genes 34 | pp.refine_state_info_by_leiden_clustering 35 | 36 | 37 | 38 | 39 | Transition map inference 40 | ------------------------ 41 | 42 | 43 | .. autosummary:: 44 | :toctree: . 45 | 46 | tmap.infer_Tmap_from_multitime_clones 47 | tmap.infer_Tmap_from_one_time_clones 48 | tmap.infer_Tmap_from_state_info_alone 49 | tmap.infer_Tmap_from_clonal_info_alone 50 | 51 | 52 | Analysis 53 | ---------- 54 | 55 | .. autosummary:: 56 | :toctree: . 57 | 58 | tl.clonal_fate_bias 59 | tl.fate_biased_clones 60 | tl.fate_coupling 61 | tl.fate_hierarchy 62 | tl.fate_map 63 | tl.fate_potency 64 | tl.fate_bias 65 | tl.progenitor 66 | tl.iterative_differentiation 67 | tl.differential_genes 68 | 69 | 70 | Plotting 71 | --------- 72 | 73 | 74 | **Clone analysis** (clone visualization, clustering etc.) 75 | 76 | .. autosummary:: 77 | :toctree: . 78 | 79 | pl.clones_on_manifold 80 | pl.barcode_heatmap 81 | pl.clonal_fate_bias 82 | pl.fate_coupling 83 | pl.fate_hierarchy 84 | pl.clonal_fates_across_time 85 | pl.clonal_reports 86 | pl.visualize_tree 87 | 88 | 89 | 90 | **Transition map analysis** (fate bias etc.) 91 | 92 | .. autosummary:: 93 | :toctree: . 94 | 95 | pl.single_cell_transition 96 | pl.fate_map 97 | pl.fate_potency 98 | pl.fate_bias 99 | pl.progenitor 100 | pl.iterative_differentiation 101 | pl.gene_expression_dynamics 102 | pl.fate_coupling 103 | pl.fate_hierarchy 104 | 105 | **General** 106 | 107 | .. autosummary:: 108 | :toctree: . 109 | 110 | pl.embedding 111 | pl.embedding_genes 112 | pl.gene_expression_on_manifold 113 | pl.gene_expression_heatmap 114 | settings.set_figure_params 115 | 116 | 117 | Datasets 118 | -------- 119 | 120 | .. autosummary:: 121 | :toctree: . 122 | 123 | datasets.hematopoiesis 124 | datasets.hematopoiesis_130K 125 | datasets.hematopoiesis_subsampled 126 | datasets.hematopoiesis_Gata1_states 127 | datasets.lung 128 | datasets.reprogramming 129 | datasets.reprogramming_Day0_3_28 130 | datasets.synthetic_bifurcation 131 | 132 | Help functions 133 | -------------- 134 | 135 | .. autosummary:: 136 | :toctree: . 137 | 138 | hf.read 139 | hf.save_map 140 | hf.save_preprocessed_adata 141 | hf.check_adata_structure 142 | hf.check_available_choices 143 | hf.update_time_ordering 144 | hf.update_data_description 145 | tl.get_normalized_covariance 146 | hf.get_X_clone_with_reference_ordering 147 | 148 | 149 | Simulations 150 | ----------- 151 | 152 | .. autosummary:: 153 | :toctree: . 154 | 155 | simulate.linear_differentiation_model 156 | simulate.bifurcation_model 157 | simulate.quantify_correlation_with_ground_truth_fate_bias_BifurcationModel 158 | simulate.quantify_transition_peak_TPR_LinearDifferentiation 159 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import logging 3 | import os 4 | import sys 5 | from datetime import datetime 6 | from pathlib import Path 7 | from typing import Mapping, Optional, Union 8 | 9 | from sphinx.application import Sphinx 10 | from sphinx.ext import autosummary 11 | 12 | # remove PyCharm’s old six module 13 | if "six" in sys.modules: 14 | print(*sys.path, sep="\n") 15 | for pypath in list(sys.path): 16 | if any(p in pypath for p in ["PyCharm", "pycharm"]) and "helpers" in pypath: 17 | sys.path.remove(pypath) 18 | del sys.modules["six"] 19 | 20 | import matplotlib # noqa 21 | 22 | matplotlib.use("agg") 23 | 24 | HERE = Path(__file__).parent 25 | sys.path.insert(0, f"{HERE.parent.parent}") 26 | sys.path.insert(0, os.path.abspath("_ext")) 27 | import cospar 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | 32 | # -- General configuration ------------------------------------------------ 33 | 34 | needs_sphinx = "1.7" 35 | 36 | extensions = [ 37 | "sphinx.ext.autodoc", 38 | "sphinx.ext.doctest", 39 | "sphinx.ext.coverage", 40 | "sphinx.ext.mathjax", 41 | "sphinx.ext.autosummary", 42 | "sphinx.ext.napoleon", 43 | "sphinx.ext.intersphinx", 44 | "sphinx.ext.githubpages", 45 | "sphinx_autodoc_typehints", 46 | "nbsphinx", 47 | "edit_on_github", 48 | ] 49 | 50 | 51 | # Generate the API documentation when building 52 | autosummary_generate = True 53 | napoleon_google_docstring = False 54 | napoleon_numpy_docstring = True 55 | napoleon_include_init_with_doc = False 56 | napoleon_use_rtype = False 57 | napoleon_custom_sections = [("Params", "Parameters")] 58 | 59 | intersphinx_mapping = dict( 60 | python=("https://docs.python.org/3", None), 61 | anndata=("https://anndata.readthedocs.io/en/latest/", None), 62 | scanpy=("https://scanpy.readthedocs.io/en/latest/", None), 63 | cospar=("https://cospar.readthedocs.io/en/latest/", None), 64 | cellrank=("https://cellrank.readthedocs.io/en/latest/", None), 65 | ) 66 | 67 | templates_path = ["_templates"] 68 | source_suffix = [".rst", ".ipynb"] 69 | master_doc = "index" 70 | 71 | # General information about the project. 72 | project = "CoSpar" 73 | author = "Shou-Wen Wang" 74 | title = "CoSpar - dynamic inference by integrating state and lineage information" 75 | copyright = f"{datetime.now():%Y}, {author}" 76 | 77 | version = cospar.__version__.replace(".dirty", "") 78 | release = version 79 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 80 | pygments_style = "sphinx" 81 | todo_include_todos = False 82 | 83 | # # Add notebooks prolog to Google Colab and nbviewer 84 | # nbsphinx_prolog = r""" 85 | # {% set docname = 'github/theislab/cospar_notebooks/blob/master/' + env.doc2path(env.docname, base=None) %} 86 | # .. raw:: html 87 | 88 | #
89 | # 90 | # Open In Colab 91 | # 92 | # Open In nbviewer 93 | #
94 | # """ 95 | 96 | # -- Options for HTML output ---------------------------------------------- 97 | 98 | html_theme = "sphinx_rtd_theme" 99 | html_theme_options = dict(navigation_depth=1, titles_only=True) 100 | github_repo = "cospar" 101 | github_nb_repo = "cospar_notebooks" 102 | html_static_path = ["_static"] 103 | 104 | 105 | def setup(app): 106 | app.add_stylesheet("custom.css") 107 | 108 | 109 | # -- Options for other output ------------------------------------------ 110 | 111 | htmlhelp_basename = "cospardoc" 112 | title_doc = f"{project} documentation" 113 | 114 | latex_documents = [(master_doc, f"{project}.tex", title_doc, author, "manual")] 115 | man_pages = [(master_doc, project, title_doc, [author], 1)] 116 | texinfo_documents = [ 117 | (master_doc, project, title_doc, author, project, title, "Miscellaneous") 118 | ] 119 | 120 | 121 | # -- generate_options override ------------------------------------------ 122 | 123 | 124 | def process_generate_options(app: Sphinx): 125 | genfiles = app.config.autosummary_generate 126 | 127 | if genfiles and not hasattr(genfiles, "__len__"): 128 | env = app.builder.env 129 | genfiles = [ 130 | env.doc2path(x, base=None) 131 | for x in env.found_docs 132 | if Path(env.doc2path(x)).is_file() 133 | ] 134 | if not genfiles: 135 | return 136 | 137 | from sphinx.ext.autosummary.generate import generate_autosummary_docs 138 | 139 | ext = app.config.source_suffix 140 | genfiles = [ 141 | genfile + (not genfile.endswith(tuple(ext)) and ext[0] or "") 142 | for genfile in genfiles 143 | ] 144 | 145 | suffix = autosummary.get_rst_suffix(app) 146 | if suffix is None: 147 | return 148 | 149 | generate_autosummary_docs( 150 | genfiles, 151 | builder=app.builder, 152 | warn=logger.warning, 153 | info=logger.info, 154 | suffix=suffix, 155 | base_path=app.srcdir, 156 | imported_members=True, 157 | app=app, 158 | ) 159 | 160 | 161 | autosummary.process_generate_options = process_generate_options 162 | 163 | 164 | # -- GitHub URLs for class and method pages ------------------------------------------ 165 | 166 | 167 | def get_obj_module(qualname): 168 | """Get a module/class/attribute and its original module by qualname""" 169 | modname = qualname 170 | classname = None 171 | attrname = None 172 | while modname not in sys.modules: 173 | attrname = classname 174 | modname, classname = modname.rsplit(".", 1) 175 | 176 | # retrieve object and find original module name 177 | if classname: 178 | cls = getattr(sys.modules[modname], classname) 179 | modname = cls.__module__ 180 | obj = getattr(cls, attrname) if attrname else cls 181 | else: 182 | obj = None 183 | 184 | return obj, sys.modules[modname] 185 | 186 | 187 | def get_linenos(obj): 188 | """Get an object’s line numbers""" 189 | try: 190 | lines, start = inspect.getsourcelines(obj) 191 | except TypeError: 192 | return None, None 193 | else: 194 | return start, start + len(lines) - 1 195 | 196 | 197 | # set project_dir: project/docs/source/conf.py/../../.. → project/ 198 | project_dir = Path(__file__).parent.parent.parent 199 | github_url_cospar = "https://github.com/ShouWenWang-Lab/cospar/tree/master" 200 | # github_url_read_loom = "https://github.com/theislab/anndata/tree/master/anndata" 201 | github_url_read = "https://github.com/theislab/scanpy/tree/master" 202 | github_url_scanpy = "https://github.com/theislab/scanpy/tree/master/scanpy" 203 | from pathlib import PurePosixPath 204 | 205 | 206 | def modurl(qualname): 207 | """Get the full GitHub URL for some object’s qualname.""" 208 | obj, module = get_obj_module(qualname) 209 | github_url = github_url_cospar 210 | try: 211 | path = PurePosixPath(Path(module.__file__).resolve().relative_to(project_dir)) 212 | except ValueError: 213 | # trying to document something from another package 214 | github_url = ( 215 | github_url_read_loom 216 | if "read_loom" in qualname 217 | else github_url_read 218 | if "read" in qualname 219 | else github_url_scanpy 220 | ) 221 | path = "/".join(module.__file__.split("/")[-2:]) 222 | start, end = get_linenos(obj) 223 | fragment = f"#L{start}-L{end}" if start and end else "" 224 | return f"{github_url}/{path}{fragment}" 225 | 226 | 227 | def api_image(qualname: str) -> Optional[str]: 228 | path = Path(__file__).parent / f"{qualname}.png" 229 | print(path, path.is_file()) 230 | return ( 231 | f".. image:: {path.name}\n :width: 200\n :align: right" 232 | if path.is_file() 233 | else "" 234 | ) 235 | 236 | 237 | # modify the default filters 238 | from jinja2.defaults import DEFAULT_FILTERS 239 | 240 | DEFAULT_FILTERS.update(modurl=modurl, api_image=api_image) 241 | 242 | # -- Override some classnames in autodoc -------------------------------------------- 243 | 244 | import sphinx_autodoc_typehints 245 | 246 | qualname_overrides = { 247 | "anndata.base.AnnData": "anndata.AnnData", 248 | "cospar.pl.scatter": "cospar.plotting.scatter", 249 | } 250 | 251 | fa_orig = sphinx_autodoc_typehints.format_annotation 252 | 253 | 254 | def format_annotation(annotation): 255 | if getattr(annotation, "__origin__", None) is Union or hasattr( 256 | annotation, "__union_params__" 257 | ): 258 | params = getattr(annotation, "__union_params__", None) or getattr( 259 | annotation, "__args__", None 260 | ) 261 | return ", ".join(map(format_annotation, params)) 262 | if getattr(annotation, "__origin__", None) is Mapping: 263 | return ":class:`~typing.Mapping`" 264 | if inspect.isclass(annotation): 265 | full_name = f"{annotation.__module__}.{annotation.__qualname__}" 266 | override = qualname_overrides.get(full_name) 267 | if override is not None: 268 | return f":py:class:`~{qualname_overrides[full_name]}`" 269 | return fa_orig(annotation) 270 | 271 | 272 | sphinx_autodoc_typehints.format_annotation = format_annotation 273 | 274 | 275 | # -- Prettier Param docs -------------------------------------------- 276 | 277 | from typing import Dict, List, Tuple 278 | 279 | from docutils import nodes 280 | from sphinx import addnodes 281 | from sphinx.domains.python import PyObject, PyTypedField 282 | from sphinx.environment import BuildEnvironment 283 | 284 | 285 | class PrettyTypedField(PyTypedField): 286 | list_type = nodes.definition_list 287 | 288 | def make_field( 289 | self, 290 | types: Dict[str, List[nodes.Node]], 291 | domain: str, 292 | items: Tuple[str, List[nodes.inline]], 293 | env: BuildEnvironment = None, 294 | ) -> nodes.field: 295 | def makerefs(rolename, name, node): 296 | return self.make_xrefs(rolename, domain, name, node, env=env) 297 | 298 | def handle_item( 299 | fieldarg: str, content: List[nodes.inline] 300 | ) -> nodes.definition_list_item: 301 | head = nodes.term() 302 | head += makerefs(self.rolename, fieldarg, addnodes.literal_strong) 303 | fieldtype = types.pop(fieldarg, None) 304 | if fieldtype is not None: 305 | head += nodes.Text(" : ") 306 | if len(fieldtype) == 1 and isinstance(fieldtype[0], nodes.Text): 307 | (text_node,) = fieldtype # type: nodes.Text 308 | head += makerefs( 309 | self.typerolename, text_node.astext(), addnodes.literal_emphasis 310 | ) 311 | else: 312 | head += fieldtype 313 | 314 | body_content = nodes.paragraph("", "", *content) 315 | body = nodes.definition("", body_content) 316 | 317 | return nodes.definition_list_item("", head, body) 318 | 319 | fieldname = nodes.field_name("", self.label) 320 | if len(items) == 1 and self.can_collapse: 321 | fieldarg, content = items[0] 322 | bodynode = handle_item(fieldarg, content) 323 | else: 324 | bodynode = self.list_type() 325 | for fieldarg, content in items: 326 | bodynode += handle_item(fieldarg, content) 327 | fieldbody = nodes.field_body("", bodynode) 328 | return nodes.field("", fieldname, fieldbody) 329 | 330 | 331 | # replace matching field types with ours 332 | PyObject.doc_field_types = [ 333 | PrettyTypedField( 334 | ft.name, 335 | names=ft.names, 336 | typenames=ft.typenames, 337 | label=ft.label, 338 | rolename=ft.rolename, 339 | typerolename=ft.typerolename, 340 | can_collapse=ft.can_collapse, 341 | ) 342 | if isinstance(ft, PyTypedField) 343 | else ft 344 | for ft in PyObject.doc_field_types 345 | ] 346 | -------------------------------------------------------------------------------- /docs/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | --------------- 3 | 4 | Here, we explain the basics of using CoSpar. CoSpar requires the count matrix ``not log-transformed``. This is specifically assumed in selecting highly variable genes, in computing PCA, and in the HighVar method for initializing the joint optimization using a single clonal time point. CoSpar also assumes that the dataset has more than one time point. However, if you have only a snapshot, you can still manually cluster the cells into more than one time point to use CoSpar. 5 | 6 | First, import CoSpar with:: 7 | 8 | import cospar as cs 9 | 10 | For better visualization you can change the matplotlib settings to our defaults with:: 11 | 12 | cs.settings.set_figure_params() 13 | 14 | If you want to adjust parameters for a particular plot, just pass the parameters into this function. 15 | 16 | 17 | The workflow of CoSpar is summarized by the following illustration: 18 | 19 | 20 | .. image:: https://user-images.githubusercontent.com/4595786/145308761-a6532c6b-ac5b-4457-a00e-4a0f3972a360.png 21 | :width: 1000px 22 | :align: center 23 | 24 | Also, below is a summary of the main analyses after we infer the transition map, and its connection with the mathematical formulation in `Wang et al. Nat. Biotech. (2022) `_. 25 | 26 | .. image:: https://user-images.githubusercontent.com/4595786/161853386-04126382-6a9a-4817-b6a8-e5e950977357.jpg 27 | :width: 1000px 28 | :align: center 29 | 30 | Initialization 31 | '''''''''''''' 32 | Given the gene expression matrix, clonal matrix, and other information, initialize the anndata object using:: 33 | 34 | adata_orig = cs.pp.initialize_adata_object(adata=None,**params) 35 | 36 | The :class:`~anndata.AnnData` object ``adata_orig`` stores the count matrix (``adata_orig.X``), gene names (``adata_orig.var_names``), and temporal annotation of cells (``adata_orig.obs['time_info']``). Optionally, you can also provide the clonal matrix ``X_clone``, selected PCA matrix ``X_pca``, the embedding matrix ``X_emb``, and the state annotation ``state_info``, which will be stored at ``adata_orig.obsm['X_clone']``, ``adata_orig.obsm['X_pca']``, ``adata_orig.obsm['X_emb']``, and ``adata_orig.obs['state_info']``, respectively. 37 | 38 | If an adata object is provided as an input, the initialization function will try to automatically generate the correct data structure, and all annotations associated with the provided adata will remain intact. You can add new annotations to supplement or override existing annotations in the adata object. 39 | 40 | 41 | .. raw:: html 42 | 43 | 44 | 45 | If you do not have a dataset yet, you can still play around using one of the built-in datasets, e.g.:: 46 | 47 | adata_orig = cs.datasets.hematopoiesis_subsampled() 48 | 49 | 50 | 51 | Preprocessing & dimension reduction 52 | ''''''''''''''''''''''''''''''''''' 53 | Assuming basic quality control (excluding cells with low read count etc.) have been done, we provide basic preprocessing (gene selection and normalization) and dimension reduction related analysis (PCA, UMAP embedding etc.) at ``cs.pp.*``:: 54 | 55 | cs.pp.get_highly_variable_genes(adata_orig,**params) 56 | cs.pp.remove_cell_cycle_correlated_genes(adata_orig,**params) 57 | cs.pp.get_X_pca(adata_orig,**params) 58 | cs.pp.get_X_emb(adata_orig,**params) 59 | cs.pp.get_state_info(adata_orig,**params) 60 | cs.pp.get_X_clone(adata_orig,**params) 61 | 62 | The first step ``get_highly_variable_genes`` also includes count matrix normalization. The second step, which is optional but recommended, removes cell cycle correlated genes among the selected highly variable genes. In ``get_X_pca``, we apply z-score transformation for each gene expression before computing the PCA. In ``get_X_emb``, we simply use the umap function from :mod:`~scanpy`. With ``get_state_info``, we extract state information using leiden clustering implemented in :mod:`~scanpy`. 63 | In ``get_X_clone``, we faciliate the conversion of the raw clonal data into a cell-by-clone matrix. As mentioned before, this preprocessing assumes that the count matrix is not log-transformed. 64 | 65 | 66 | 67 | 68 | Basic clonal analysis 69 | '''''''''''''''''''''' 70 | We provide a few plotting functions to help visually exploring the clonal data before any downstream analysis. You can visualize clones on state manifold directly:: 71 | 72 | cs.pl.clones_on_manifold(adata_orig,**params) 73 | 74 | You can generate the barcode heatmap across given clusters to inspect clonal behavior:: 75 | 76 | cs.pl.barcode_heatmap(adata_orig,**params) 77 | 78 | You can quantify the clonal coupling across different fate clusters:: 79 | 80 | cs.tl.fate_coupling(adata_orig,source='X_clone',**params) 81 | cs.pl.fate_coupling(adata_orig,source='X_clone',**params) 82 | 83 | Strong coupling implies the existence of bi-potent or multi-potent cell states at the time of barcoding. You can visualize the fate hierarchy by a simple neighbor-joining method:: 84 | 85 | cs.tl.fate_hierarchy(adata_orig,source='X_clone',**params) 86 | cs.pl.fate_hierarchy(adata_orig,source='X_clone',**params) 87 | 88 | Finally, you can infer the fate bias :math:`-log_{10}(P_{value})` of each clone towards a designated fate cluster:: 89 | 90 | cs.pl.clonal_fate_bias(adata_orig,**params) 91 | 92 | A biased clone towards this cluster has a statistically significant cell fraction within or outside this cluster. 93 | 94 | 95 | 96 | 97 | Transition map inference 98 | '''''''''''''''''''''''' 99 | The core of the software is efficient and robust inference of a transition map by integrating state and clonal information. If the dataset has multiple clonal time points, you can run:: 100 | 101 | adata=cs.tmap.infer_Tmap_from_multitime_clones(adata_orig,clonal_time_points=None,later_time_point=None,**params) 102 | 103 | It subsamples the input data at selected time points and computes the transition map, stored at ``adata.uns['transition_map']`` and ``adata.uns['intraclone_transition_map']``, with the latter restricted to intra-clone transitions. Depending on ``later_time_point``, it has two modes of inference: 104 | 105 | 1) When ``later_time_point=None``, it infers a transition map between neighboring time points. For example, for clonal_time_points=['day1', 'day2', 'day3'], it computes transitions for pairs ('day1', 'day2') and ('day2', 'day3'), but not for ('day1', 'day3'). 106 | 107 | 2) If ``later_time_point`` is specified, it generates a transition map between this time point and each of the earlier time points. In the previous example, if ``later_time_point=='day3'``, we infer transitions for pairs ('day1', 'day3') and ('day2', 'day3'). This applies to the following map inference functions. 108 | 109 | 110 | ------------------------------------- 111 | 112 | If the dataset has only one clonal time point, you can run:: 113 | 114 | adata=cs.tmap.infer_Tmap_from_one_time_clones(adata_orig,initial_time_points=None, later_time_point=None,initialize_method='OT',**params) 115 | 116 | which jointly optimizes the transition map and the initial clonal structure. It requires initializing the transition map using state information alone. We provide two methods for such initialization: 1) ``OT`` for using the standard optimal transport approach; 2) ``HighVar`` for a customized approach, assuming that cells similar in gene expression across time points share clonal origin. For the ``OT`` method, if you wish to utilize the growth rate information as Waddington-OT, you can directly pass the growth rate estimate for each cell to the input AnnaData object at ``adata_orig.obs["cell_growth_rate"]``. Depending on the choice, the initialized map is stored at ``adata.uns['OT_transition_map']`` or ``adata.uns['HighVar_transition_map']``. The final product is stored at ``adata.uns['transition_map']``. 117 | 118 | ``HighVar`` converts highly variable genes into pseudo multi-time clones and infers a putative map with coherent sparse optimization. We find the ``HighVar`` method performs better than the `OT` method, especially when there are large differentiation effects over the observed time window, or batch effects. 119 | 120 | If ``initial_time_points`` and ``later_time_point`` are not specified, a map with transitions from all time points to the last time point is generated. 121 | 122 | ------------------------------------- 123 | 124 | If you do not have any clonal information, you can still run:: 125 | 126 | adata=cs.tmap.infer_Tmap_from_state_info_alone(adata_orig,initial_time_points=None,later_time_point=None,initialize_method='OT',**params) 127 | 128 | It is the same as ``cs.tmap.infer_Tmap_from_one_time_clones`` except that we assume a pseudo clonal data where each cell at the later time point occupies a unique clone. 129 | 130 | ------------------------------------- 131 | 132 | We also provide simple methods that infer transition map from clonal information alone:: 133 | 134 | adata=cs.tmap.infer_Tmap_from_clonal_info_alone(adata_orig,clonal_time_points=None,later_time_point=None,**params) 135 | 136 | The result is stored at ``adata.uns['clonal_transition_map']``. 137 | 138 | Analysis and visualization 139 | '''''''''''''''''''''''''' 140 | 141 | Finally, each of the computed transition maps can be explored on state embedding at the single-cell level using a variety of analysis and plotting functions. There are some common parameters: 1) ``source``, for choosing one of the pre-computed transition maps (or the raw clonal data) for analysis; 2) ``selected_fates``, for visualizing the fate bias towards/against given fate clusters; 3) ``map_backward``, for analyzing forward or backward transitions; 4) ``method``, for different methods in fate probability analysis. See :doc:`CoSpar basics <20210121_cospar_tutorial_v2>` for more details. 142 | 143 | 144 | Below, we frame the task in the language of analyzing backward transitions for convenience. To see where a cell came from, run:: 145 | 146 | cs.pl.single_cell_transition(adata,**params) 147 | 148 | To visualize the fate probability of initial cell states, run:: 149 | 150 | cs.tl.fate_map(adata,**params) 151 | cs.pl.fate_map(adata,**params) 152 | 153 | To infer the fate bias of initial cell states between two fate clusters, run:: 154 | 155 | cs.tl.fate_bias(adata,**params) 156 | cs.pl.fate_bias(adata,**params) 157 | 158 | To infer the dynamic trajectory towards given fate clusters, run:: 159 | 160 | cs.tl.progenitor(adata,**params) 161 | cs.pl.progenitor(adata,**params) 162 | 163 | or, alternatively if you have data with multiple clonal time points, run:: 164 | 165 | cs.tl.iterative_differentiation(adata,**params) 166 | cs.pl.iterative_differentiation(adata,**params) 167 | 168 | The first method (``cs.tl.progenitor``) assumes two input fate clusters and infers each trajectory by thresholding the corresponding fate bias. The second method (``cs.tl.iterative_differentiation``) infers the trajectory by iteratively tracing a selected fate cluster all the way back to its putative origin at the initial time point. For both methods, the inferred trajectory for each fate will be saved at ``adata.obs[f'diff_trajectory_{source}_{fate_name}']``, and we can explore the gene expression dynamics along this trajectory using:: 169 | 170 | cs.pl.gene_expression_dynamics(adata,**params) 171 | 172 | Additionally, the first method (``cs.pl.progenitor``) exports the selected ancestor states selected fate clusters at ``adata.obs[f'progenitor_{source}_{fate_name}']``, which can be used to infer the driver genes for fate bifurcation by running:: 173 | 174 | cs.pl.differential_genes(adata,**params) 175 | 176 | 177 | If there are multiple mature fate clusters, you can infer their differentiation coupling from the fate probabilities of initial cells or the raw clonal matrix by:: 178 | 179 | cs.tl.fate_coupling(adata,source='transition_map',**params) 180 | cs.pl.fate_coupling(adata,source='transition_map',**params) 181 | 182 | You can also infer the fate hierarchy from:: 183 | 184 | cs.tl.fate_hierarchy(adata,source='transition_map',**params) 185 | cs.pl.fate_hierarchy(adata,source='transition_map',**params) 186 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | |PyPI| |PyPIDownloads| |Docs| 2 | 3 | CoSpar - dynamic inference by integrating state and lineage information 4 | ======================================================================= 5 | 6 | .. image:: https://user-images.githubusercontent.com/4595786/104988296-b987ce00-59e5-11eb-8dbe-a463b355a9fd.png 7 | :width: 300px 8 | :align: left 9 | 10 | **CoSpar** is a toolkit for dynamic inference from lineage-traced single cells. |br| 11 | The methods are based on 12 | `Wang et al. Nat. Biotech. (2022) `_. 13 | 14 | Dynamic inference based on single-cell state measurement alone requires serious simplifications. On the other hand, direct dynamic measurement via lineage tracing only captures partial information and its interpretation is challenging. CoSpar integrates both state and lineage information to infer a finite-time transition map of a development/differentiation system. It gains superior robustness and accuracy by exploiting both the local coherence and sparsity of differentiation transitions, i.e., neighboring initial states share similar yet sparse fate outcomes. Building around the :class:`~anndata.AnnData` object, CoSpar provides an integrated analysis framework for datasets with both state and lineage information. When only state information is available, CoSpar also improves upon existing dynamic inference methods by imposing sparsity and coherence. It offers essential toolkits for analyzing lineage data, state information, or their integration. 15 | 16 | CoSpar's key applications 17 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 18 | - infer transition maps from lineage data, state measurements, or their integration. 19 | - predict the fate bias of progenitor cells. 20 | - order cells along a differentiation trajectory leading to a given cell fate. 21 | - predict gene expression dynamics along a trajectory. 22 | - predict genes whose expression correlates with future fate outcome. 23 | - generate a putative fate hierarchy, ordering fates by their lineage distances. 24 | 25 | Package development relocation 26 | ------------------------------ 27 | Effective on April 1st 2023, Shou-Wen Wang is leaving the Klein lab to start `his own group at Westlake University `_, and he will no longer maintain this repository. Further development of CoSpar will continue in his own lab under this respository `https://github.com/ShouWenWang-Lab/cospar `_. Please reach out there for any issues related to CoSpar. 28 | 29 | Recorded Talks 30 | ^^^^^^^^^^^^^^ 31 | - `Jun 1: Single-Cell Data Science 2022 `_. This is a 20-min short talk focusing more on the utility of CoSpar: `talk video `_ 32 | 33 | - `Oct 19, 2022: Invited MIA talk at Broad Institute `_. This is an one-hour talk focusing on the Machine Learning part of CoSpar: `talk video `_. The talk slides can be found `here `_. 34 | 35 | 36 | .. Upcoming talks 37 | .. ^^^^^^^^^^^^^^ 38 | .. - `Sep 15: Temporal Single-Cell Analysis (SCOG) `_ 39 | .. - `Nov 12: Single Cell Biology (SCB) `_ 40 | 41 | 42 | 43 | Reference 44 | ^^^^^^^^^ 45 | `S.-W. Wang*, M. Herriges, K. Hurley, D. Kotton, A. M. Klein*, CoSpar identifies early cell fate biases from single cell transcriptomic and lineage information, Nat. Biotech. (2022) `_. [* corresponding authors] 46 | 47 | 48 | 49 | Support 50 | ^^^^^^^ 51 | Feel free to submit an `issue `_ 52 | or send us an `email `_. 53 | Your help to improve CoSpar is highly appreciated. 54 | 55 | Acknowledgment 56 | ^^^^^^^^^^^^^^ 57 | Shou-Wen Wang wants to acknowledge `Xiaojie Qiu `_ for inspiring him to make this website. He also wants to acknowledge the community that maintains `scanpy `_ and `scvelo `_, where he learned about proper code documentation. He thanks Tal Debrobrah Scully, Qiu Wu and other lab members for testing the package. Shou-Wen wants to thank especially Allon Klein for his mentorship. Finally, he wants to acknowledge the generous support of the Damon Runyon Foundation through the Quantitative Biology Fellowship. 58 | 59 | 60 | .. toctree:: 61 | :caption: Main 62 | :maxdepth: 1 63 | :hidden: 64 | 65 | about 66 | api 67 | release_note 68 | 69 | 70 | .. toctree:: 71 | :caption: Tutorial 72 | :maxdepth: 1 73 | :hidden: 74 | 75 | installation 76 | getting_started 77 | 20210602_loading_data 78 | 20211010_preprocessing 79 | 20211010_clonal_analysis 80 | 20211010_map_inference 81 | 20211010_map_analysis 82 | 83 | .. toctree:: 84 | :caption: Examples 85 | :maxdepth: 1 86 | :hidden: 87 | 88 | 20210121_all_hematopoietic_data_v3 89 | 20210121_reprogramming_static_barcoding_v2 90 | 20210121_lung_data_v2 91 | 20210120_bifurcation_model_static_barcoding 92 | 20220402_simulate_differentiation 93 | 94 | 95 | .. |PyPI| image:: https://img.shields.io/pypi/v/cospar.svg 96 | :target: https://pypi.org/project/cospar 97 | 98 | .. |PyPIDownloads| image:: https://pepy.tech/badge/cospar 99 | :target: https://pepy.tech/project/cospar 100 | 101 | .. |Docs| image:: https://readthedocs.org/projects/cospar/badge/?version=latest 102 | :target: https://cospar.readthedocs.io 103 | 104 | 105 | .. 106 | .. |travis| image:: https://travis-ci.org/theislab/cospar.svg?branch=master 107 | :target: https://travis-ci.org/theislab/cospar 108 | 109 | 110 | .. |br| raw:: html 111 | 112 |
113 | 114 | .. 115 | .. |meet| raw:: html 116 | 117 | 118 | 119 | .. |dim| raw:: html 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ------------ 3 | 4 | CoSpar requires Python 3.6 or later. We recommend using Miniconda_ for package management. For a computer that does not have a package management tool yet, please install Miniconda_ first, and activate it by running the following command in the terminal:: 5 | 6 | source ~/.bash_profile 7 | 8 | PyPI 9 | ^^^^ 10 | 11 | Install CoSpar from PyPI_ using:: 12 | 13 | pip install --upgrade cospar 14 | 15 | If you get a ``Permission denied`` error, use ``pip install --upgrade cospar --user`` instead. 16 | 17 | If you get errors related to 'gcc', try to specify the following gcc path for installation:: 18 | 19 | env CXX=/usr/local/Cellar/gcc/8.2.0/bin/g++-8 CC=/usr/local/Cellar/gcc/8.2.0/bin/gcc-8 pip install cospar 20 | 21 | If you get errors for version conflicts with existing packages, try:: 22 | 23 | pip install --ignore-installed --upgrade cospar 24 | 25 | Development Version 26 | ^^^^^^^^^^^^^^^^^^^ 27 | 28 | To work with the latest development version, install from GitHub_ using:: 29 | 30 | pip install git+https://github.com/ShouWenWang-Lab/cospar 31 | 32 | or:: 33 | 34 | git clone https://github.com/ShouWenWang-Lab/cospar 35 | pip install -e cospar 36 | 37 | ``-e`` is short for ``--editable`` and links the package to the original cloned location such that pulled changes are also reflected in the environment. 38 | 39 | 40 | Dependencies 41 | ^^^^^^^^^^^^ 42 | 43 | - `anndata `_ - annotated data object. 44 | - `scanpy `_ - toolkit for single-cell analysis. 45 | - `numpy `_, `scipy `_, `pandas `_, `scikit-learn `_, `matplotlib `_, `plotnine `_, 46 | 47 | 48 | 49 | Jupyter Notebook 50 | ^^^^^^^^^^^^^^^^ 51 | 52 | To run the tutorials in a notebook locally, please install:: 53 | 54 | conda install notebook 55 | 56 | and run ``jupyter notebook`` in the terminal. If you get the error ``Not a directory: 'xdg-settings'``, 57 | use ``jupyter notebook --no-browser`` instead and open the url manually (or use this 58 | `bugfix `_). 59 | 60 | 61 | If you run into issues, do not hesitate to approach us or raise a `GitHub issue`_. 62 | 63 | .. _Miniconda: http://conda.pydata.org/miniconda.html 64 | .. _PyPI: https://pypi.org/project/cospar 65 | .. _Github: https://github.com/ShouWenWang-Lab/cospar/ 66 | .. _`Github issue`: https://github.com/ShouWenWang-Lab/cospar/issues/new/choose 67 | 68 | 69 | Testing CoSpar in a new environment 70 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 71 | 72 | In case you want to test cospar without affecting existing python packages, you can create a new conda environment and install CoSpar there:: 73 | 74 | conda create -n test_cospar python=3.6 75 | conda activate test_cospar 76 | pip install cospar 77 | 78 | Now, install jupyter notebook in this environment:: 79 | 80 | pip install --user ipykernel 81 | 82 | If you encounter an error related to ``nbconvert``, run (this is optional):: 83 | 84 | pip3 install --upgrade --user nbconvert 85 | 86 | Finally, install the jupyter notebook kernel related to this environment:: 87 | 88 | python -m ipykernel install --user --name=test_cospar 89 | 90 | Now, you can open jupyter notebook by running ``jupyter notebook`` in the terminal, and select the kernel ``test_cospar`` to run CoSpar. 91 | -------------------------------------------------------------------------------- /docs/source/release_note.rst: -------------------------------------------------------------------------------- 1 | Release notes 2 | ------------- 3 | 4 | v0.2.1 5 | '''''' 6 | 7 | Major changes from v0.1.8 to v0.2.1: 8 | - Split each plotting function into two parts: computing the results (stored at cospar.tl.**) and actually plotting the result (stored at cospar.pl.**). 9 | - Update the notebooks to accomodate these changes. 10 | - Update the datasets in the cloud to add more annotations. 11 | - Re-organize the content of the plot, tool, and tmap modules. 12 | - Fix stochasticity when running HighVar method to generate the initialized map. 13 | - Fix generating X_clone from the cell_id-by-barcode_id list. 14 | - Add a few more functions: :func:`cospar.pl.clonal_fates_across_time`, :func:`cospar.pl.clonal_reports`, :func:`cospar.pl.embedding_genes`, :func:`cospar.tl.fate_biased_clones` 15 | - Update :func:`cospar.pl.barcode_heatmap` to order clones in a better way 16 | - Fix the docs. 17 | - Adopt "Raise ValueError" method for error handling. 18 | - Unify error checking at the beginning of several functions. 19 | 20 | v0.1.8 21 | '''''' 22 | 23 | This is used in running the notebooks that generate figures for the published paper. To run the original notebooks, you should switch to this version. 24 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: cospar 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - pip 7 | - pytest 8 | - pytest-cov 9 | - pytest-datadir 10 | - ipywidgets 11 | - numpy<=1.21 12 | - scipy>=1.5.4 13 | - scikit-learn>=0.23.2 14 | - scanpy>=1.6.0 15 | - pandas>=1.1.4 16 | - statsmodels==0.13.2 17 | - plotnine>=0.7.1 18 | - matplotlib>=3.3.3 19 | - fastcluster>=1.1.26 # used to generate the clustered heat map of barcodes 20 | - anndata>=0.7.5 21 | - numba>=0.52.0 # related to issues of GPUipatch error 22 | - scikit-misc>=0.1.3 # used for loess smoothing 23 | - leidenalg>=0.7.0 24 | - ete3>=3.1.2 25 | - click==8.0.4 # related to black import 26 | - black==22.1.0 # related to black import 27 | -------------------------------------------------------------------------------- /pypi.rst: -------------------------------------------------------------------------------- 1 | |PyPI| |PyPIDownloads| |Docs| 2 | 3 | CoSpar - dynamic inference by integrating state and lineage information 4 | ======================================================================= 5 | 6 | .. image:: https://user-images.githubusercontent.com/4595786/104988296-b987ce00-59e5-11eb-8dbe-a463b355a9fd.png 7 | :width: 300px 8 | :align: left 9 | 10 | **CoSpar** is a toolkit for dynamic inference from lineage-traced single cells. 11 | The methods are based on 12 | `Wang et al. Nat. Biotech. (2022) `_. 13 | 14 | Dynamic inference based on single-cell state measurement alone requires serious simplifications. On the other hand, direct dynamic measurement via lineage tracing only captures partial information and its interpretation is challenging. CoSpar integrates both state and lineage information to infer a finite-time transition map of a development/differentiation system. It gains superior robustness and accuracy by exploiting both the local coherence and sparsity of differentiation transitions, i.e., neighboring initial states share similar yet sparse fate outcomes. Building around the anndata_ object, CoSpar provides an integrated analysis framework for datasets with both state and lineage information. When only state information is available, CoSpar also improves upon existing dynamic inference methods by imposing sparsity and coherence. It offers essential toolkits for analyzing lineage data, state information, or their integration. 15 | 16 | See ``_ for documentation and tutorials. 17 | 18 | 19 | Reference 20 | --------- 21 | `S.-W. Wang*, M. Herriges, K. Hurley, D. Kotton, A. M. Klein*, CoSpar identifies early cell fate biases from single cell transcriptomic and lineage information, Nat. Biotech. (2022) `_. [* corresponding authors] 22 | 23 | Support 24 | ------- 25 | Feel free to submit an `issue `_ 26 | or send us an `email `_. 27 | Your help to improve CoSpar is highly appreciated. 28 | 29 | .. |PyPI| image:: https://img.shields.io/pypi/v/cospar.svg 30 | :target: https://pypi.org/project/cospar 31 | 32 | .. |PyPIDownloads| image:: https://pepy.tech/badge/cospar 33 | :target: https://pepy.tech/project/cospar 34 | 35 | .. |Docs| image:: https://readthedocs.org/projects/cospar/badge/?version=latest 36 | :target: https://cospar.readthedocs.io 37 | 38 | 39 | .. _anndata: https://anndata.readthedocs.io 40 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 40.6.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.black] 6 | #line-length = 10 # override black's default line-length 7 | exclude = ''' 8 | /( 9 | \.git 10 | | \.mypy_cache 11 | | \.tox 12 | | venv 13 | | \.venv 14 | | _build 15 | | buck-out 16 | | build 17 | | dist 18 | )/ 19 | ''' 20 | 21 | [tool.isort] 22 | # make it compatible with black 23 | profile = "black" 24 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.4 2 | scipy>=1.5.4 3 | scikit-learn>=0.23.2 4 | scanpy>=1.6.0 5 | pandas>=1.1.4 6 | statsmodels==0.13.2 7 | plotnine>=0.7.1 8 | matplotlib>=3.3.3 9 | fastcluster>=1.1.26 # used to generate the clustered heat map of barcodes 10 | anndata>=0.7.5 11 | numba>=0.52.0 # related to issues of GPUipatch error 12 | scikit-misc>=0.1.3 # used for loess smoothing 13 | leidenalg>=0.7.0 14 | ete3>=3.1.2 15 | ipywidgets 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from pathlib import Path 4 | 5 | from setuptools import find_packages, setup 6 | 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__)))) 8 | from cospar import __version__ 9 | 10 | setup( 11 | name="cospar", 12 | version=__version__, 13 | python_requires=">=3.6", 14 | install_requires=[ 15 | l.strip() for l in Path("requirements.txt").read_text("utf-8").splitlines() 16 | ], 17 | extras_require=dict( 18 | dev=["black==19.10b0", "pre-commit==2.5.1"], 19 | docs=[r for r in Path("docs/requirements.txt").read_text("utf-8").splitlines()], 20 | ), 21 | packages=find_packages(), # this is better than packages=["cospar"], which only include the top level files 22 | long_description_content_type="text/x-rst", 23 | author="Shou-Wen Wang", 24 | author_email="shouwen_wang@hms.harvard.edu", 25 | description="CoSpar: integrating state and lineage information for dynamic inference", 26 | long_description=Path("pypi.rst").read_text("utf-8"), 27 | license="BSD", 28 | url="https://github.com/ShouWenWang-Lab/cospar", 29 | download_url="https://github.com/ShouWenWang-Lab/cospar", 30 | keywords=[ 31 | "dynamic inference", 32 | "lineage tracing", 33 | "single cell", 34 | "transcriptomics", 35 | "differentiation", 36 | ], 37 | classifiers=[ 38 | "License :: OSI Approved :: BSD License", 39 | "Development Status :: 5 - Production/Stable", 40 | "Intended Audience :: Science/Research", 41 | "Natural Language :: English", 42 | "Programming Language :: Python :: 3", 43 | "Programming Language :: Python :: 3.6", 44 | "Programming Language :: Python :: 3.7", 45 | "Programming Language :: Python :: 3.8", 46 | "Topic :: Scientific/Engineering :: Bio-Informatics", 47 | "Topic :: Scientific/Engineering :: Visualization", 48 | ], 49 | ) 50 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllonKleinLab/cospar/ca54cad8a9db9a72152ba8a8b6d67d57eace4acb/tests/__init__.py -------------------------------------------------------------------------------- /tests/context.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 5 | 6 | import cospar 7 | -------------------------------------------------------------------------------- /tests/data/cell_id.txt: -------------------------------------------------------------------------------- 1 | Cell_ID 2 | cell_10 3 | cell_13 4 | cell_18 5 | cell_32 6 | cell_70 7 | cell_80 8 | cell_90 9 | cell_97 10 | cell_108 11 | cell_117 12 | cell_136 13 | cell_138 14 | cell_143 15 | cell_144 16 | cell_147 17 | cell_160 18 | cell_162 19 | cell_166 20 | cell_191 21 | cell_199 22 | cell_205 23 | cell_206 24 | cell_214 25 | cell_218 26 | cell_232 27 | cell_235 28 | cell_239 29 | cell_253 30 | cell_260 31 | cell_269 32 | cell_282 33 | cell_286 34 | cell_289 35 | cell_293 36 | cell_295 37 | cell_300 38 | cell_306 39 | cell_319 40 | cell_320 41 | cell_345 42 | cell_363 43 | cell_369 44 | cell_376 45 | cell_384 46 | cell_394 47 | cell_413 48 | cell_415 49 | cell_418 50 | cell_421 51 | cell_426 52 | cell_427 53 | cell_430 54 | cell_446 55 | cell_447 56 | cell_454 57 | cell_464 58 | cell_478 59 | cell_495 60 | cell_497 61 | cell_504 62 | cell_505 63 | cell_525 64 | cell_532 65 | cell_537 66 | cell_547 67 | cell_551 68 | cell_558 69 | cell_562 70 | cell_563 71 | cell_569 72 | cell_593 73 | cell_610 74 | cell_634 75 | cell_664 76 | cell_687 77 | cell_693 78 | cell_703 79 | cell_704 80 | cell_717 81 | cell_720 82 | cell_723 83 | cell_732 84 | cell_755 85 | cell_773 86 | cell_774 87 | cell_775 88 | cell_778 89 | cell_790 90 | cell_791 91 | cell_803 92 | cell_840 93 | cell_846 94 | cell_852 95 | cell_855 96 | cell_874 97 | cell_889 98 | cell_898 99 | cell_926 100 | cell_942 101 | cell_944 102 | cell_953 103 | cell_955 104 | cell_957 105 | cell_962 106 | cell_963 107 | cell_972 108 | cell_986 109 | cell_991 110 | cell_1000 111 | cell_1002 112 | cell_1010 113 | cell_1032 114 | cell_1038 115 | cell_1042 116 | cell_1045 117 | cell_1063 118 | cell_1071 119 | cell_1083 120 | cell_1092 121 | cell_1107 122 | cell_1110 123 | cell_1117 124 | cell_1120 125 | cell_1122 126 | cell_1126 127 | cell_1166 128 | cell_1170 129 | cell_1190 130 | cell_1191 131 | cell_1192 132 | cell_1207 133 | cell_1245 134 | cell_1254 135 | cell_1274 136 | cell_1304 137 | cell_1345 138 | cell_1358 139 | cell_1383 140 | cell_1384 141 | cell_1389 142 | cell_1390 143 | cell_1397 144 | cell_1404 145 | cell_1415 146 | cell_1418 147 | cell_1422 148 | cell_1428 149 | cell_1430 150 | cell_1442 151 | cell_1462 152 | cell_1467 153 | cell_1470 154 | cell_1490 155 | cell_1494 156 | cell_1495 157 | cell_1497 158 | cell_1506 159 | cell_1507 160 | cell_1531 161 | cell_1551 162 | cell_1556 163 | cell_1567 164 | cell_1576 165 | cell_1578 166 | cell_1588 167 | cell_1590 168 | cell_1593 169 | cell_1597 170 | cell_1600 171 | cell_1622 172 | cell_1625 173 | cell_1637 174 | cell_1654 175 | cell_1658 176 | cell_1660 177 | cell_1684 178 | cell_1686 179 | cell_1693 180 | cell_1697 181 | cell_1710 182 | cell_1712 183 | cell_1715 184 | cell_1728 185 | cell_1743 186 | cell_1747 187 | cell_1749 188 | cell_1787 189 | cell_1801 190 | cell_1809 191 | cell_1830 192 | cell_1832 193 | cell_1835 194 | cell_1856 195 | cell_1869 196 | cell_1870 197 | cell_1890 198 | cell_1892 199 | cell_1896 200 | cell_1901 201 | cell_1906 202 | cell_1925 203 | cell_1927 204 | cell_1928 205 | cell_1957 206 | cell_1969 207 | cell_1979 208 | cell_1984 209 | cell_1987 210 | cell_2022 211 | cell_2025 212 | cell_2076 213 | cell_2098 214 | cell_2107 215 | cell_2140 216 | cell_2172 217 | cell_2200 218 | cell_2206 219 | cell_2210 220 | cell_2213 221 | cell_2220 222 | cell_2227 223 | cell_2237 224 | cell_2246 225 | cell_2251 226 | cell_2264 227 | cell_2295 228 | cell_2296 229 | cell_2309 230 | cell_2331 231 | cell_2339 232 | cell_2345 233 | cell_2391 234 | cell_2393 235 | cell_2395 236 | cell_2402 237 | cell_2403 238 | cell_2408 239 | cell_2410 240 | cell_2421 241 | cell_2430 242 | cell_2431 243 | cell_2437 244 | cell_2445 245 | cell_2447 246 | cell_2453 247 | cell_2484 248 | cell_2486 249 | cell_2502 250 | cell_2504 251 | cell_2512 252 | cell_2536 253 | cell_2557 254 | cell_2560 255 | cell_2561 256 | cell_2569 257 | cell_2573 258 | cell_2592 259 | cell_2597 260 | cell_2599 261 | cell_2635 262 | cell_2637 263 | cell_2642 264 | cell_2646 265 | cell_2653 266 | cell_2655 267 | cell_2661 268 | cell_2663 269 | cell_2669 270 | cell_2685 271 | cell_2686 272 | cell_2706 273 | cell_2709 274 | cell_2732 275 | cell_2744 276 | cell_2747 277 | cell_2766 278 | cell_2770 279 | cell_2771 280 | cell_2794 281 | cell_2796 282 | cell_2814 283 | cell_2821 284 | cell_2826 285 | cell_2839 286 | cell_2855 287 | cell_2859 288 | cell_2865 289 | cell_2866 290 | cell_2874 291 | cell_2877 292 | cell_2878 293 | cell_2879 294 | cell_2927 295 | cell_2929 296 | cell_2938 297 | cell_2942 298 | cell_2946 299 | cell_2950 300 | cell_2966 301 | cell_2968 302 | cell_2992 303 | cell_3008 304 | cell_3009 305 | cell_3014 306 | cell_3017 307 | cell_3024 308 | cell_3032 309 | cell_3045 310 | cell_3060 311 | cell_3079 312 | cell_3083 313 | cell_3093 314 | cell_3107 315 | cell_3117 316 | cell_3125 317 | cell_3132 318 | cell_3178 319 | cell_3182 320 | cell_3195 321 | cell_3221 322 | cell_3224 323 | cell_3226 324 | cell_3241 325 | cell_3246 326 | cell_3270 327 | cell_3275 328 | cell_3277 329 | cell_3290 330 | cell_3295 331 | cell_3315 332 | cell_3317 333 | cell_3318 334 | cell_3319 335 | cell_3322 336 | cell_3325 337 | cell_3329 338 | cell_3333 339 | cell_3341 340 | cell_3344 341 | cell_3355 342 | cell_3372 343 | cell_3374 344 | cell_3379 345 | cell_3395 346 | cell_3419 347 | cell_3428 348 | cell_3439 349 | cell_3450 350 | cell_3453 351 | cell_3457 352 | cell_3458 353 | cell_3466 354 | cell_3507 355 | cell_3511 356 | cell_3514 357 | cell_3532 358 | cell_3542 359 | cell_3548 360 | cell_3558 361 | cell_3559 362 | cell_3574 363 | cell_3579 364 | cell_3580 365 | cell_3583 366 | cell_3592 367 | cell_3601 368 | cell_3623 369 | cell_3628 370 | cell_3644 371 | cell_3646 372 | cell_3655 373 | cell_3664 374 | cell_3666 375 | cell_3670 376 | cell_3676 377 | cell_3689 378 | cell_3703 379 | cell_3712 380 | cell_3721 381 | cell_3726 382 | cell_3747 383 | cell_3782 384 | cell_3786 385 | cell_3788 386 | cell_3797 387 | cell_3806 388 | cell_3812 389 | cell_3822 390 | cell_3829 391 | cell_3833 392 | cell_3871 393 | cell_3878 394 | cell_3897 395 | cell_3906 396 | cell_3912 397 | cell_3916 398 | cell_3920 399 | cell_3926 400 | cell_3928 401 | cell_3930 402 | cell_3940 403 | cell_3946 404 | cell_3964 405 | cell_3965 406 | cell_3974 407 | cell_3983 408 | cell_3987 409 | cell_3992 410 | cell_4017 411 | cell_4018 412 | cell_4021 413 | cell_4026 414 | cell_4038 415 | cell_4048 416 | cell_4049 417 | cell_4062 418 | cell_4068 419 | cell_4072 420 | cell_4074 421 | cell_4077 422 | cell_4087 423 | cell_4101 424 | cell_4117 425 | cell_4126 426 | cell_4128 427 | cell_4129 428 | cell_4136 429 | cell_4140 430 | cell_4144 431 | cell_4149 432 | cell_4184 433 | cell_4190 434 | cell_4193 435 | cell_4196 436 | cell_4212 437 | cell_4230 438 | cell_4235 439 | cell_4243 440 | cell_4250 441 | cell_4263 442 | cell_4279 443 | cell_4289 444 | cell_4297 445 | cell_4301 446 | cell_4312 447 | cell_4325 448 | cell_4330 449 | cell_4336 450 | cell_4342 451 | cell_4351 452 | cell_4386 453 | cell_4388 454 | cell_4391 455 | cell_4395 456 | cell_4399 457 | cell_4424 458 | cell_4426 459 | cell_4448 460 | cell_4450 461 | cell_4460 462 | cell_4477 463 | cell_4482 464 | cell_4504 465 | cell_4512 466 | cell_4530 467 | cell_4531 468 | cell_4533 469 | cell_4534 470 | cell_4557 471 | cell_4567 472 | cell_4570 473 | cell_4571 474 | cell_4576 475 | cell_4590 476 | cell_4601 477 | cell_4607 478 | cell_4609 479 | cell_4612 480 | cell_4622 481 | cell_4626 482 | cell_4630 483 | cell_4645 484 | cell_4665 485 | cell_4674 486 | cell_4678 487 | cell_4681 488 | cell_4682 489 | cell_4701 490 | cell_4719 491 | cell_4720 492 | cell_4721 493 | cell_4726 494 | cell_4737 495 | cell_4744 496 | cell_4748 497 | cell_4749 498 | cell_4769 499 | cell_4779 500 | cell_4781 501 | cell_4791 502 | cell_4797 503 | cell_4815 504 | cell_4819 505 | cell_4823 506 | cell_4830 507 | cell_4852 508 | cell_4857 509 | cell_4864 510 | cell_4865 511 | cell_4890 512 | cell_4918 513 | cell_4928 514 | cell_4949 515 | cell_4950 516 | cell_4958 517 | cell_4959 518 | cell_4974 519 | cell_4984 520 | cell_5010 521 | cell_5013 522 | cell_5016 523 | cell_5021 524 | cell_5030 525 | cell_5037 526 | cell_5042 527 | cell_5055 528 | cell_5058 529 | cell_5077 530 | cell_5083 531 | cell_5085 532 | cell_5092 533 | cell_5101 534 | cell_5102 535 | cell_5107 536 | cell_5116 537 | cell_5119 538 | cell_5142 539 | cell_5154 540 | cell_5157 541 | cell_5163 542 | cell_5204 543 | cell_5213 544 | cell_5216 545 | cell_5218 546 | cell_5225 547 | cell_5234 548 | cell_5241 549 | cell_5242 550 | cell_5246 551 | cell_5266 552 | cell_5269 553 | cell_5309 554 | cell_5331 555 | cell_5347 556 | cell_5348 557 | cell_5375 558 | cell_5380 559 | cell_5381 560 | cell_5390 561 | cell_5400 562 | cell_5404 563 | cell_5406 564 | cell_5420 565 | cell_5449 566 | cell_5461 567 | cell_5468 568 | cell_5474 569 | cell_5498 570 | cell_5512 571 | cell_5514 572 | cell_5522 573 | cell_5535 574 | cell_5537 575 | cell_5538 576 | cell_5542 577 | cell_5552 578 | cell_5568 579 | cell_5616 580 | cell_5637 581 | cell_5638 582 | cell_5640 583 | cell_5651 584 | cell_5671 585 | cell_5672 586 | cell_5681 587 | cell_5682 588 | cell_5684 589 | cell_5686 590 | cell_5688 591 | cell_5691 592 | cell_5695 593 | cell_5697 594 | cell_5739 595 | cell_5744 596 | cell_5771 597 | cell_5773 598 | cell_5776 599 | cell_5815 600 | cell_5837 601 | cell_5847 602 | cell_5860 603 | cell_5861 604 | cell_5865 605 | cell_5875 606 | cell_5883 607 | cell_5887 608 | cell_5888 609 | cell_5901 610 | cell_5903 611 | cell_5907 612 | cell_5908 613 | cell_5911 614 | cell_5922 615 | cell_5931 616 | cell_5934 617 | cell_5935 618 | cell_5947 619 | cell_5951 620 | cell_5968 621 | cell_5969 622 | cell_5976 623 | cell_5995 624 | cell_5997 625 | cell_6003 626 | cell_6006 627 | cell_6017 628 | cell_6059 629 | cell_6080 630 | cell_6081 631 | cell_6083 632 | cell_6090 633 | cell_6097 634 | cell_6113 635 | cell_6114 636 | cell_6119 637 | cell_6124 638 | cell_6132 639 | cell_6141 640 | cell_6167 641 | cell_6177 642 | cell_6202 643 | cell_6203 644 | cell_6204 645 | cell_6205 646 | cell_6210 647 | cell_6230 648 | cell_6232 649 | cell_6246 650 | cell_6257 651 | cell_6266 652 | cell_6267 653 | cell_6268 654 | cell_6283 655 | cell_6297 656 | cell_6340 657 | cell_6346 658 | cell_6355 659 | cell_6357 660 | cell_6366 661 | cell_6369 662 | cell_6371 663 | cell_6372 664 | cell_6373 665 | cell_6398 666 | cell_6402 667 | cell_6412 668 | cell_6420 669 | cell_6439 670 | cell_6453 671 | cell_6455 672 | cell_6463 673 | cell_6481 674 | cell_6484 675 | cell_6486 676 | cell_6489 677 | cell_6500 678 | cell_6505 679 | cell_6508 680 | cell_6556 681 | cell_6569 682 | cell_6581 683 | cell_6582 684 | cell_6584 685 | cell_6590 686 | cell_6596 687 | cell_6599 688 | cell_6602 689 | cell_6603 690 | cell_6608 691 | cell_6614 692 | cell_6616 693 | cell_6617 694 | cell_6635 695 | cell_6637 696 | cell_6642 697 | cell_6656 698 | cell_6659 699 | cell_6672 700 | cell_6727 701 | cell_6734 702 | cell_6754 703 | cell_6764 704 | cell_6779 705 | cell_6785 706 | cell_6788 707 | cell_6803 708 | cell_6804 709 | cell_6805 710 | cell_6809 711 | cell_6847 712 | cell_6862 713 | cell_6865 714 | cell_6870 715 | cell_6889 716 | cell_6894 717 | cell_6919 718 | cell_6930 719 | cell_6936 720 | cell_6950 721 | cell_6965 722 | cell_6971 723 | cell_6972 724 | cell_6976 725 | cell_6977 726 | cell_6980 727 | cell_6989 728 | cell_7006 729 | cell_7026 730 | cell_7029 731 | cell_7035 732 | cell_7057 733 | cell_7079 734 | cell_7081 735 | cell_7087 736 | cell_7090 737 | cell_7091 738 | cell_7093 739 | cell_7111 740 | cell_7117 741 | cell_7125 742 | cell_7126 743 | cell_7133 744 | cell_7136 745 | cell_7141 746 | cell_7146 747 | cell_7154 748 | cell_7159 749 | cell_7168 750 | cell_7182 751 | cell_7193 752 | cell_7212 753 | cell_7229 754 | cell_7240 755 | cell_7256 756 | cell_7258 757 | cell_7260 758 | cell_7263 759 | cell_7274 760 | cell_7279 761 | cell_7288 762 | cell_7295 763 | cell_7301 764 | cell_7317 765 | cell_7333 766 | cell_7335 767 | cell_7340 768 | cell_7359 769 | cell_7376 770 | cell_7379 771 | cell_7381 772 | cell_7388 773 | cell_7389 774 | cell_7392 775 | cell_7402 776 | cell_7407 777 | cell_7409 778 | cell_7417 779 | cell_7423 780 | cell_7435 781 | cell_7436 782 | cell_7437 783 | -------------------------------------------------------------------------------- /tests/data/clonal_data_in_table_format.txt: -------------------------------------------------------------------------------- 1 | Cell_ID,Clone_ID 2 | cell_0,clone_275 3 | cell_1,clone_329 4 | cell_2,clone_56 5 | cell_3,clone_236 6 | cell_4,clone_213 7 | cell_5,clone_190 8 | cell_6,clone_236 9 | cell_7,clone_315 10 | cell_8,clone_109 11 | cell_9,clone_152 12 | cell_10,clone_284 13 | cell_11,clone_335 14 | cell_12,clone_275 15 | cell_13,clone_213 16 | cell_14,clone_285 17 | cell_15,clone_155 18 | cell_16,clone_104 19 | cell_17,clone_317 20 | cell_18,clone_229 21 | cell_19,clone_244 22 | cell_20,clone_183 23 | cell_21,clone_275 24 | cell_22,clone_284 25 | cell_23,clone_236 26 | cell_24,clone_236 27 | cell_25,clone_197 28 | cell_26,clone_36 29 | cell_27,clone_275 30 | cell_28,clone_298 31 | cell_29,clone_10 32 | cell_30,clone_236 33 | cell_31,clone_114 34 | cell_32,clone_158 35 | cell_33,clone_236 36 | cell_34,clone_158 37 | cell_35,clone_104 38 | cell_36,clone_77 39 | cell_37,clone_325 40 | cell_38,clone_161 41 | cell_39,clone_190 42 | cell_40,clone_62 43 | cell_41,clone_158 44 | cell_42,clone_251 45 | cell_43,clone_18 46 | cell_44,clone_325 47 | cell_45,clone_306 48 | cell_46,clone_243 49 | cell_47,clone_73 50 | cell_48,clone_59 51 | cell_49,clone_314 52 | cell_50,clone_317 53 | cell_51,clone_236 54 | cell_52,clone_74 55 | cell_53,clone_314 56 | cell_54,clone_221 57 | cell_55,clone_314 58 | cell_56,clone_109 59 | cell_57,clone_178 60 | cell_58,clone_136 61 | cell_59,clone_219 62 | cell_60,clone_236 63 | cell_61,clone_56 64 | cell_62,clone_186 65 | cell_63,clone_284 66 | cell_64,clone_73 67 | cell_65,clone_251 68 | cell_66,clone_59 69 | cell_67,clone_186 70 | cell_68,clone_284 71 | cell_69,clone_18 72 | cell_70,clone_183 73 | cell_71,clone_284 74 | cell_72,clone_187 75 | cell_73,clone_315 76 | cell_74,clone_226 77 | cell_75,clone_88 78 | cell_76,clone_229 79 | cell_77,clone_77 80 | cell_78,clone_189 81 | cell_79,clone_229 82 | cell_80,clone_314 83 | cell_81,clone_275 84 | cell_82,clone_152 85 | cell_83,clone_284 86 | cell_84,clone_317 87 | cell_85,clone_179 88 | cell_86,clone_285 89 | cell_87,clone_59 90 | cell_88,clone_335 91 | cell_89,clone_202 92 | cell_90,clone_284 93 | cell_91,clone_275 94 | cell_92,clone_285 95 | cell_93,clone_98 96 | cell_94,clone_229 97 | cell_95,clone_335 98 | cell_96,clone_158 99 | cell_97,clone_74 100 | cell_98,clone_275 101 | cell_99,clone_10 102 | cell_100,clone_32 103 | cell_101,clone_56 104 | cell_102,clone_104 105 | cell_103,clone_335 106 | cell_104,clone_284 107 | cell_105,clone_152 108 | cell_106,clone_88 109 | cell_107,clone_136 110 | cell_108,clone_284 111 | cell_109,clone_284 112 | cell_110,clone_158 113 | cell_111,clone_284 114 | cell_112,clone_20 115 | cell_113,clone_24 116 | cell_114,clone_79 117 | cell_115,clone_252 118 | cell_116,clone_237 119 | cell_117,clone_24 120 | cell_118,clone_176 121 | cell_119,clone_213 122 | cell_120,clone_79 123 | cell_121,clone_130 124 | cell_122,clone_168 125 | cell_123,clone_285 126 | cell_124,clone_79 127 | cell_125,clone_325 128 | cell_126,clone_252 129 | cell_127,clone_37 130 | cell_128,clone_251 131 | cell_129,clone_179 132 | cell_130,clone_79 133 | cell_131,clone_163 134 | cell_132,clone_258 135 | cell_133,clone_79 136 | cell_134,clone_275 137 | cell_135,clone_57 138 | cell_136,clone_315 139 | cell_137,clone_221 140 | cell_138,clone_279 141 | cell_139,clone_274 142 | cell_140,clone_78 143 | cell_141,clone_213 144 | cell_142,clone_275 145 | cell_143,clone_23 146 | cell_144,clone_148 147 | cell_145,clone_136 148 | cell_146,clone_179 149 | cell_147,clone_252 150 | cell_148,clone_325 151 | cell_149,clone_232 152 | cell_150,clone_298 153 | cell_151,clone_117 154 | cell_152,clone_279 155 | cell_153,clone_186 156 | cell_154,clone_207 157 | cell_155,clone_176 158 | cell_156,clone_306 159 | cell_157,clone_213 160 | cell_158,clone_221 161 | cell_159,clone_130 162 | cell_160,clone_29 163 | cell_161,clone_251 164 | cell_162,clone_302 165 | cell_163,clone_224 166 | cell_164,clone_1 167 | cell_165,clone_77 168 | cell_166,clone_302 169 | cell_167,clone_285 170 | cell_168,clone_243 171 | cell_169,clone_314 172 | cell_170,clone_118 173 | cell_171,clone_252 174 | cell_172,clone_8 175 | cell_173,clone_281 176 | cell_174,clone_118 177 | cell_175,clone_276 178 | cell_176,clone_134 179 | cell_177,clone_317 180 | cell_178,clone_118 181 | cell_179,clone_203 182 | cell_180,clone_134 183 | cell_181,clone_88 184 | cell_182,clone_325 185 | cell_183,clone_59 186 | cell_184,clone_199 187 | cell_185,clone_59 188 | cell_186,clone_317 189 | cell_187,clone_335 190 | cell_188,clone_182 191 | cell_189,clone_157 192 | cell_190,clone_252 193 | cell_191,clone_284 194 | cell_192,clone_70 195 | cell_193,clone_34 196 | cell_194,clone_122 197 | cell_195,clone_294 198 | cell_196,clone_306 199 | cell_197,clone_69 200 | cell_198,clone_284 201 | cell_199,clone_20 202 | cell_200,clone_314 203 | cell_201,clone_275 204 | cell_202,clone_110 205 | cell_203,clone_210 206 | cell_204,clone_325 207 | cell_205,clone_285 208 | cell_206,clone_56 209 | cell_207,clone_130 210 | cell_208,clone_176 211 | cell_209,clone_237 212 | cell_210,clone_229 213 | cell_211,clone_202 214 | cell_212,clone_284 215 | cell_213,clone_79 216 | cell_214,clone_244 217 | cell_215,clone_110 218 | cell_216,clone_238 219 | cell_217,clone_284 220 | cell_218,clone_47 221 | cell_219,clone_126 222 | cell_220,clone_176 223 | cell_221,clone_18 224 | cell_222,clone_229 225 | cell_223,clone_315 226 | cell_224,clone_243 227 | cell_225,clone_88 228 | cell_226,clone_219 229 | cell_227,clone_237 230 | cell_228,clone_237 231 | cell_229,clone_281 232 | cell_230,clone_134 233 | cell_231,clone_251 234 | cell_232,clone_126 235 | cell_233,clone_88 236 | cell_234,clone_186 237 | cell_235,clone_143 238 | cell_236,clone_126 239 | cell_237,clone_237 240 | cell_238,clone_43 241 | cell_239,clone_281 242 | cell_240,clone_243 243 | cell_241,clone_219 244 | cell_242,clone_182 245 | cell_243,clone_281 246 | cell_244,clone_20 247 | cell_245,clone_24 248 | cell_246,clone_122 249 | cell_247,clone_79 250 | cell_248,clone_130 251 | cell_249,clone_172 252 | cell_250,clone_284 253 | cell_251,clone_222 254 | cell_252,clone_284 255 | cell_253,clone_172 256 | cell_254,clone_122 257 | cell_255,clone_224 258 | cell_256,clone_161 259 | cell_257,clone_291 260 | cell_258,clone_172 261 | cell_259,clone_294 262 | cell_260,clone_252 263 | cell_261,clone_228 264 | cell_262,clone_139 265 | cell_263,clone_62 266 | cell_264,clone_24 267 | cell_265,clone_79 268 | cell_266,clone_143 269 | cell_267,clone_148 270 | cell_268,clone_176 271 | cell_269,clone_176 272 | cell_270,clone_31 273 | cell_271,clone_79 274 | cell_272,clone_179 275 | cell_273,clone_317 276 | cell_274,clone_213 277 | cell_275,clone_300 278 | cell_276,clone_116 279 | cell_277,clone_204 280 | cell_278,clone_97 281 | cell_279,clone_216 282 | cell_280,clone_89 283 | cell_281,clone_332 284 | cell_282,clone_112 285 | cell_283,clone_273 286 | cell_284,clone_94 287 | cell_285,clone_142 288 | cell_286,clone_94 289 | cell_287,clone_84 290 | cell_288,clone_169 291 | cell_289,clone_40 292 | cell_290,clone_95 293 | cell_291,clone_282 294 | cell_292,clone_223 295 | cell_293,clone_44 296 | cell_294,clone_106 297 | cell_295,clone_248 298 | cell_296,clone_63 299 | cell_297,clone_204 300 | cell_298,clone_71 301 | cell_299,clone_149 302 | cell_300,clone_93 303 | cell_301,clone_334 304 | cell_302,clone_216 305 | cell_303,clone_204 306 | cell_304,clone_204 307 | cell_305,clone_181 308 | cell_306,clone_307 309 | cell_307,clone_241 310 | cell_308,clone_225 311 | cell_309,clone_246 312 | cell_310,clone_282 313 | cell_311,clone_332 314 | cell_312,clone_44 315 | cell_313,clone_318 316 | cell_314,clone_93 317 | cell_315,clone_303 318 | cell_316,clone_95 319 | cell_317,clone_334 320 | cell_318,clone_223 321 | cell_319,clone_95 322 | cell_320,clone_169 323 | cell_321,clone_95 324 | cell_322,clone_204 325 | cell_323,clone_310 326 | cell_324,clone_99 327 | cell_325,clone_95 328 | cell_326,clone_204 329 | cell_327,clone_204 330 | cell_328,clone_216 331 | cell_329,clone_93 332 | cell_330,clone_181 333 | cell_331,clone_282 334 | cell_332,clone_330 335 | cell_333,clone_40 336 | cell_334,clone_169 337 | cell_335,clone_95 338 | cell_336,clone_150 339 | cell_337,clone_95 340 | cell_338,clone_204 341 | cell_339,clone_112 342 | cell_340,clone_169 343 | cell_341,clone_216 344 | cell_342,clone_181 345 | cell_343,clone_91 346 | cell_344,clone_223 347 | cell_345,clone_312 348 | cell_346,clone_75 349 | cell_347,clone_181 350 | cell_348,clone_204 351 | cell_349,clone_95 352 | cell_350,clone_164 353 | cell_351,clone_184 354 | cell_352,clone_216 355 | cell_353,clone_282 356 | cell_354,clone_193 357 | cell_355,clone_217 358 | cell_356,clone_257 359 | cell_357,clone_287 360 | cell_358,clone_230 361 | cell_359,clone_174 362 | cell_360,clone_58 363 | cell_361,clone_156 364 | cell_362,clone_61 365 | cell_363,clone_147 366 | cell_364,clone_185 367 | cell_365,clone_11 368 | cell_366,clone_218 369 | cell_367,clone_90 370 | cell_368,clone_132 371 | cell_369,clone_137 372 | cell_370,clone_196 373 | cell_371,clone_194 374 | cell_372,clone_41 375 | cell_373,clone_271 376 | cell_374,clone_303 377 | cell_375,clone_167 378 | cell_376,clone_7 379 | cell_377,clone_106 380 | cell_378,clone_264 381 | cell_379,clone_205 382 | cell_380,clone_180 383 | cell_381,clone_264 384 | cell_382,clone_249 385 | cell_383,clone_282 386 | cell_384,clone_250 387 | cell_385,clone_256 388 | cell_386,clone_171 389 | cell_387,clone_334 390 | cell_388,clone_95 391 | cell_389,clone_173 392 | cell_390,clone_75 393 | cell_391,clone_293 394 | cell_392,clone_173 395 | cell_393,clone_256 396 | cell_394,clone_250 397 | cell_395,clone_91 398 | cell_396,clone_15 399 | cell_397,clone_91 400 | cell_398,clone_44 401 | cell_399,clone_316 402 | cell_400,clone_283 403 | cell_401,clone_278 404 | cell_402,clone_204 405 | cell_403,clone_301 406 | cell_404,clone_262 407 | cell_405,clone_165 408 | cell_406,clone_273 409 | cell_407,clone_4 410 | cell_408,clone_131 411 | cell_409,clone_338 412 | cell_410,clone_261 413 | cell_411,clone_167 414 | cell_412,clone_41 415 | cell_413,clone_115 416 | cell_414,clone_125 417 | cell_415,clone_256 418 | cell_416,clone_16 419 | cell_417,clone_318 420 | cell_418,clone_216 421 | cell_419,clone_307 422 | cell_420,clone_307 423 | cell_421,clone_216 424 | cell_422,clone_71 425 | cell_423,clone_66 426 | cell_424,clone_144 427 | cell_425,clone_264 428 | cell_426,clone_150 429 | cell_427,clone_312 430 | cell_428,clone_177 431 | cell_429,clone_169 432 | cell_430,clone_268 433 | cell_431,clone_105 434 | cell_432,clone_330 435 | cell_433,clone_307 436 | cell_434,clone_150 437 | cell_435,clone_271 438 | cell_436,clone_144 439 | cell_437,clone_80 440 | cell_438,clone_80 441 | cell_439,clone_106 442 | cell_440,clone_63 443 | cell_441,clone_332 444 | cell_442,clone_105 445 | cell_443,clone_225 446 | cell_444,clone_264 447 | cell_445,clone_241 448 | cell_446,clone_330 449 | cell_447,clone_144 450 | cell_448,clone_184 451 | cell_449,clone_307 452 | cell_450,clone_106 453 | cell_451,clone_307 454 | cell_452,clone_105 455 | cell_453,clone_193 456 | cell_454,clone_81 457 | cell_455,clone_142 458 | cell_456,clone_76 459 | cell_457,clone_149 460 | cell_458,clone_95 461 | cell_459,clone_177 462 | cell_460,clone_169 463 | cell_461,clone_169 464 | cell_462,clone_14 465 | cell_463,clone_112 466 | cell_464,clone_80 467 | cell_465,clone_93 468 | cell_466,clone_128 469 | cell_467,clone_93 470 | cell_468,clone_248 471 | cell_469,clone_246 472 | cell_470,clone_80 473 | cell_471,clone_194 474 | cell_472,clone_312 475 | cell_473,clone_270 476 | cell_474,clone_105 477 | cell_475,clone_81 478 | cell_476,clone_303 479 | cell_477,clone_44 480 | cell_478,clone_256 481 | cell_479,clone_84 482 | cell_480,clone_106 483 | cell_481,clone_71 484 | cell_482,clone_27 485 | cell_483,clone_194 486 | cell_484,clone_63 487 | cell_485,clone_269 488 | cell_486,clone_14 489 | cell_487,clone_46 490 | cell_488,clone_266 491 | cell_489,clone_170 492 | cell_490,clone_52 493 | cell_491,clone_35 494 | cell_492,clone_286 495 | cell_493,clone_211 496 | cell_494,clone_19 497 | cell_495,clone_64 498 | cell_496,clone_19 499 | cell_497,clone_320 500 | cell_498,clone_60 501 | cell_499,clone_33 502 | cell_500,clone_297 503 | cell_501,clone_48 504 | cell_502,clone_170 505 | cell_503,clone_19 506 | cell_504,clone_304 507 | cell_505,clone_277 508 | cell_506,clone_65 509 | cell_507,clone_92 510 | cell_508,clone_196 511 | cell_509,clone_101 512 | cell_510,clone_215 513 | cell_511,clone_289 514 | cell_512,clone_82 515 | cell_513,clone_111 516 | cell_514,clone_265 517 | cell_515,clone_85 518 | cell_516,clone_111 519 | cell_517,clone_196 520 | cell_518,clone_19 521 | cell_519,clone_290 522 | cell_520,clone_211 523 | cell_521,clone_220 524 | cell_522,clone_67 525 | cell_523,clone_292 526 | cell_524,clone_296 527 | cell_525,clone_55 528 | cell_526,clone_296 529 | cell_527,clone_319 530 | cell_528,clone_46 531 | cell_529,clone_299 532 | cell_530,clone_209 533 | cell_531,clone_280 534 | cell_532,clone_60 535 | cell_533,clone_5 536 | cell_534,clone_324 537 | cell_535,clone_266 538 | cell_536,clone_2 539 | cell_537,clone_305 540 | cell_538,clone_218 541 | cell_539,clone_295 542 | cell_540,clone_9 543 | cell_541,clone_52 544 | cell_542,clone_240 545 | cell_543,clone_299 546 | cell_544,clone_198 547 | cell_545,clone_101 548 | cell_546,clone_227 549 | cell_547,clone_313 550 | cell_548,clone_132 551 | cell_549,clone_38 552 | cell_550,clone_102 553 | cell_551,clone_132 554 | cell_552,clone_52 555 | cell_553,clone_299 556 | cell_554,clone_240 557 | cell_555,clone_83 558 | cell_556,clone_19 559 | cell_557,clone_121 560 | cell_558,clone_326 561 | cell_559,clone_166 562 | cell_560,clone_100 563 | cell_561,clone_33 564 | cell_562,clone_123 565 | cell_563,clone_124 566 | cell_564,clone_263 567 | cell_565,clone_321 568 | cell_566,clone_119 569 | cell_567,clone_211 570 | cell_568,clone_192 571 | cell_569,clone_196 572 | cell_570,clone_272 573 | cell_571,clone_39 574 | cell_572,clone_288 575 | cell_573,clone_218 576 | cell_574,clone_195 577 | cell_575,clone_192 578 | cell_576,clone_170 579 | cell_577,clone_119 580 | cell_578,clone_185 581 | cell_579,clone_9 582 | cell_580,clone_214 583 | cell_581,clone_321 584 | cell_582,clone_230 585 | cell_583,clone_72 586 | cell_584,clone_280 587 | cell_585,clone_239 588 | cell_586,clone_108 589 | cell_587,clone_113 590 | cell_588,clone_50 591 | cell_589,clone_231 592 | cell_590,clone_255 593 | cell_591,clone_53 594 | cell_592,clone_211 595 | cell_593,clone_154 596 | cell_594,clone_234 597 | cell_595,clone_51 598 | cell_596,clone_255 599 | cell_597,clone_13 600 | cell_598,clone_333 601 | cell_599,clone_53 602 | cell_600,clone_260 603 | cell_601,clone_200 604 | cell_602,clone_217 605 | cell_603,clone_42 606 | cell_604,clone_311 607 | cell_605,clone_214 608 | cell_606,clone_323 609 | cell_607,clone_21 610 | cell_608,clone_129 611 | cell_609,clone_255 612 | cell_610,clone_54 613 | cell_611,clone_30 614 | cell_612,clone_92 615 | cell_613,clone_308 616 | cell_614,clone_135 617 | cell_615,clone_26 618 | cell_616,clone_135 619 | cell_617,clone_327 620 | cell_618,clone_166 621 | cell_619,clone_321 622 | cell_620,clone_206 623 | cell_621,clone_160 624 | cell_622,clone_28 625 | cell_623,clone_11 626 | cell_624,clone_147 627 | cell_625,clone_120 628 | cell_626,clone_254 629 | cell_627,clone_208 630 | cell_628,clone_2 631 | cell_629,clone_240 632 | cell_630,clone_124 633 | cell_631,clone_55 634 | cell_632,clone_35 635 | cell_633,clone_0 636 | cell_634,clone_195 637 | cell_635,clone_201 638 | cell_636,clone_6 639 | cell_637,clone_185 640 | cell_638,clone_196 641 | cell_639,clone_242 642 | cell_640,clone_0 643 | cell_641,clone_321 644 | cell_642,clone_45 645 | cell_643,clone_304 646 | cell_644,clone_141 647 | cell_645,clone_146 648 | cell_646,clone_309 649 | cell_647,clone_159 650 | cell_648,clone_218 651 | cell_649,clone_320 652 | cell_650,clone_175 653 | cell_651,clone_154 654 | cell_652,clone_133 655 | cell_653,clone_305 656 | cell_654,clone_2 657 | cell_655,clone_195 658 | cell_656,clone_121 659 | cell_657,clone_48 660 | cell_658,clone_290 661 | cell_659,clone_138 662 | cell_660,clone_336 663 | cell_661,clone_92 664 | cell_662,clone_192 665 | cell_663,clone_52 666 | cell_664,clone_2 667 | cell_665,clone_278 668 | cell_666,clone_68 669 | cell_667,clone_193 670 | cell_668,clone_106 671 | cell_669,clone_301 672 | cell_670,clone_262 673 | cell_671,clone_293 674 | cell_672,clone_330 675 | cell_673,clone_338 676 | cell_674,clone_27 677 | cell_675,clone_204 678 | cell_676,clone_145 679 | cell_677,clone_125 680 | cell_678,clone_3 681 | cell_679,clone_332 682 | cell_680,clone_81 683 | cell_681,clone_223 684 | cell_682,clone_330 685 | cell_683,clone_116 686 | cell_684,clone_278 687 | cell_685,clone_121 688 | cell_686,clone_330 689 | cell_687,clone_89 690 | cell_688,clone_264 691 | cell_689,clone_171 692 | cell_690,clone_264 693 | cell_691,clone_95 694 | cell_692,clone_293 695 | cell_693,clone_184 696 | cell_694,clone_121 697 | cell_695,clone_89 698 | cell_696,clone_278 699 | cell_697,clone_278 700 | cell_698,clone_312 701 | cell_699,clone_49 702 | cell_700,clone_282 703 | cell_701,clone_332 704 | cell_702,clone_121 705 | cell_703,clone_338 706 | cell_704,clone_169 707 | cell_705,clone_89 708 | cell_706,clone_282 709 | cell_707,clone_91 710 | cell_708,clone_293 711 | cell_709,clone_278 712 | cell_710,clone_338 713 | cell_711,clone_264 714 | cell_712,clone_283 715 | cell_713,clone_293 716 | cell_714,clone_282 717 | cell_715,clone_87 718 | cell_716,clone_160 719 | cell_717,clone_212 720 | cell_718,clone_127 721 | cell_719,clone_322 722 | cell_720,clone_267 723 | cell_721,clone_311 724 | cell_722,clone_86 725 | cell_723,clone_328 726 | cell_724,clone_253 727 | cell_725,clone_25 728 | cell_726,clone_331 729 | cell_727,clone_235 730 | cell_728,clone_211 731 | cell_729,clone_127 732 | cell_730,clone_107 733 | cell_731,clone_337 734 | cell_732,clone_127 735 | cell_733,clone_17 736 | cell_734,clone_103 737 | cell_735,clone_257 738 | cell_736,clone_257 739 | cell_737,clone_51 740 | cell_738,clone_253 741 | cell_739,clone_26 742 | cell_740,clone_247 743 | cell_741,clone_233 744 | cell_742,clone_214 745 | cell_743,clone_267 746 | cell_744,clone_327 747 | cell_745,clone_72 748 | cell_746,clone_188 749 | cell_747,clone_328 750 | cell_748,clone_259 751 | cell_749,clone_245 752 | cell_750,clone_151 753 | cell_751,clone_103 754 | cell_752,clone_191 755 | cell_753,clone_196 756 | cell_754,clone_257 757 | cell_755,clone_22 758 | cell_756,clone_272 759 | cell_757,clone_153 760 | cell_758,clone_135 761 | cell_759,clone_120 762 | cell_760,clone_257 763 | cell_761,clone_21 764 | cell_762,clone_162 765 | cell_763,clone_140 766 | cell_764,clone_103 767 | cell_765,clone_127 768 | cell_766,clone_12 769 | cell_767,clone_280 770 | cell_768,clone_191 771 | cell_769,clone_96 772 | cell_770,clone_327 773 | cell_771,clone_30 774 | cell_772,clone_127 775 | cell_773,clone_86 776 | cell_774,clone_129 777 | cell_775,clone_255 778 | cell_776,clone_196 779 | cell_777,clone_239 780 | cell_778,clone_259 781 | cell_779,clone_217 782 | cell_780,clone_259 783 | -------------------------------------------------------------------------------- /tests/data/test_adata_preprocessed.h5ad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllonKleinLab/cospar/ca54cad8a9db9a72152ba8a8b6d67d57eace4acb/tests/data/test_adata_preprocessed.h5ad -------------------------------------------------------------------------------- /tests/test_all.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 5 | 6 | from pathlib import Path 7 | 8 | from matplotlib import pyplot as plt 9 | 10 | from tests.context import cospar as cs 11 | 12 | # be careful not to change this global parameter 13 | selected_fates = [ 14 | "Ccr7_DC", 15 | "Mast", 16 | "Meg", 17 | "pDC", 18 | "Eos", 19 | "Lymphoid", 20 | "Erythroid", 21 | "Baso", 22 | "Neutrophil", 23 | "Monocyte", 24 | ] 25 | 26 | 27 | def config(shared_datadir): 28 | cs.settings.data_path = os.path.join(shared_datadir, "..", "output") 29 | cs.settings.figure_path = os.path.join(shared_datadir, "..", "output") 30 | cs.settings.verbosity = 0 # range: 0 (error),1 (warning),2 (info),3 (hint). 31 | cs.settings.set_figure_params( 32 | format="png", figsize=[4, 3.5], dpi=25, fontsize=14, pointsize=3, dpi_save=25 33 | ) 34 | cs.hf.set_up_folders() # setup the data_path and figure_path 35 | 36 | 37 | def test_load_dataset(shared_datadir): 38 | config(shared_datadir) 39 | print("-------------------------load dataset") 40 | # cs.datasets.hematopoiesis_subsampled() 41 | # cs.datasets.hematopoiesis() 42 | # cs.datasets.hematopoiesis_130K() 43 | # cs.datasets.hematopoiesis_Gata1_states() 44 | # cs.datasets.reprogramming() 45 | # cs.datasets.lung() 46 | cs.datasets.synthetic_bifurcation() 47 | # cs.datasets.reprogramming_Day0_3_28() 48 | 49 | 50 | def test_load_data_from_scratch(shared_datadir): 51 | import numpy as np 52 | import pandas as pd 53 | import scipy.io as sio 54 | 55 | config(shared_datadir) 56 | df_cell_id = pd.read_csv(os.path.join(shared_datadir, "cell_id.txt")) 57 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad") 58 | adata_orig = cs.hf.read(file_name) 59 | adata_orig = cs.pp.initialize_adata_object( 60 | adata_orig, 61 | cell_names=df_cell_id["Cell_ID"], 62 | ) 63 | df_X_clone = pd.read_csv( 64 | os.path.join(shared_datadir, "clonal_data_in_table_format.txt") 65 | ) 66 | cs.pp.get_X_clone(adata_orig, df_X_clone["Cell_ID"], df_X_clone["Clone_ID"]) 67 | print(adata_orig.obsm["X_clone"].shape) 68 | # cs.pl.embedding(adata_orig, color="state_info") 69 | 70 | 71 | def test_preprocessing(shared_datadir): 72 | config(shared_datadir) 73 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad") 74 | adata_orig_0 = cs.hf.read(file_name) 75 | print("------------------------Test preprocessing") 76 | data_des = "test" 77 | # This is just a name to indicate this data for saving results. Can be arbitrary but should be unique to this data. 78 | X_state = adata_orig_0.X # np.array or sparse matrix, shape (n_cell, n_gene) 79 | gene_names = adata_orig_0.var_names # List of gene names, shape (n_genes,) 80 | # Clonal data matrix, np.array or sparse matrix, shape: (n_cell, n_clone) 81 | X_clone = adata_orig_0.obsm["X_clone"] 82 | # 2-d embedding, np.array, shape: (n_cell, 2) 83 | X_emb = adata_orig_0.obsm["X_emb"] 84 | # A vector of cluster id for each cell, np.array, shape: (n_cell,), 85 | state_info = adata_orig_0.obs["state_info"] 86 | # principle component matrix, np.array, shape: (n_cell, n_pcs) 87 | X_pca = adata_orig_0.obsm["X_pca"] 88 | # A vector of time info, np.array of string, shape: (n_cell,) 89 | time_info = adata_orig_0.obs["time_info"] 90 | 91 | print("------------initialize_adata_object") 92 | adata_orig = cs.pp.initialize_adata_object( 93 | X_state=X_state, 94 | gene_names=gene_names, 95 | time_info=time_info, 96 | X_clone=X_clone, 97 | data_des=data_des, 98 | ) 99 | 100 | adata_orig = cs.pp.initialize_adata_object(adata=adata_orig_0, X_clone=X_clone) 101 | 102 | print("------------get_highly_variable_genes") 103 | cs.pp.get_highly_variable_genes( 104 | adata_orig, 105 | normalized_counts_per_cell=10000, 106 | min_counts=3, 107 | min_cells=3, 108 | min_gene_vscore_pctl=90, 109 | ) 110 | 111 | print("------------remove_cell_cycle_correlated_genes") 112 | cs.pp.remove_cell_cycle_correlated_genes( 113 | adata_orig, 114 | cycling_gene_list=["Ube2c"], 115 | ) 116 | 117 | print("------------get_X_pca") 118 | cs.pp.get_X_pca(adata_orig, n_pca_comp=40) 119 | 120 | print("------------get_X_emb") 121 | cs.pp.get_X_emb(adata_orig, n_neighbors=20, umap_min_dist=0.3) 122 | 123 | print("------------get_state_info (this modifies the state info. Need to reload") 124 | cs.pp.get_state_info(adata_orig, n_neighbors=20, resolution=0.5) 125 | 126 | plt.close("all") 127 | 128 | 129 | def test_clonal_analysis(shared_datadir): 130 | config(shared_datadir) 131 | 132 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad") 133 | adata = cs.hf.read(file_name) 134 | print("------------------------------Basic clonal analysis") 135 | print("----------barcode_heatmap") 136 | selected_times = None 137 | 138 | cs.pl.barcode_heatmap(adata, log_transform=True, selected_fates=selected_fates) 139 | plt.close("all") 140 | 141 | print("----------fate_coupling_from_clones") 142 | 143 | cs.tl.fate_coupling(adata, source="X_clone") 144 | cs.pl.fate_coupling(adata, source="X_clone") 145 | 146 | print("----------fate_hierarchy_from_clones") 147 | cs.tl.fate_hierarchy(adata, source="X_clone") 148 | cs.pl.fate_hierarchy(adata, source="X_clone") 149 | plt.close("all") 150 | 151 | print("----------clonal_fate_bias") 152 | cs.tl.clonal_fate_bias(adata, selected_fate="Neutrophil") 153 | cs.pl.clonal_fate_bias(adata) 154 | plt.close("all") 155 | 156 | print("----------clones_on_manifold") 157 | cs.pl.clones_on_manifold(adata, selected_clone_list=[1, 2, 3]) 158 | plt.close("all") 159 | 160 | 161 | def test_Tmap_inference(shared_datadir): 162 | config(shared_datadir) 163 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad") 164 | adata_orig = cs.hf.read(file_name) 165 | print("------------------------------T map inference") 166 | 167 | print("---------infer_Tmap_from_one_time_clones") 168 | adata_1 = cs.tmap.infer_Tmap_from_one_time_clones( 169 | adata_orig, 170 | initial_time_points=["2"], 171 | later_time_point="4", 172 | initialize_method="OT", 173 | OT_cost="GED", 174 | smooth_array=[5, 5, 5], 175 | sparsity_threshold=0.1, 176 | ) 177 | 178 | print("---------infer_Tmap_from_state_info_alone") 179 | adata_2 = cs.tmap.infer_Tmap_from_state_info_alone( 180 | adata_orig, 181 | initial_time_points=["4"], 182 | later_time_point="6", 183 | initialize_method="HighVar", 184 | HighVar_gene_pctl=85, 185 | max_iter_N=[10, 10], 186 | epsilon_converge=[0.01, 0.01], 187 | smooth_array=[5, 5, 5], 188 | sparsity_threshold=0.1, 189 | ) 190 | 191 | print("---------infer_Tmap_from_clonal_info_alone") 192 | adata_3 = cs.tmap.infer_Tmap_from_clonal_info_alone( 193 | adata_orig, 194 | method="weinreb", 195 | later_time_point="6", 196 | selected_fates=selected_fates, 197 | ) 198 | 199 | print("-------------------------save maps") 200 | # cs.hf.save_map(adata_3) 201 | 202 | 203 | def test_Tmap_analysis(shared_datadir): 204 | config(shared_datadir) 205 | 206 | load_pre_compute_map = False 207 | if load_pre_compute_map: 208 | # this is for fast local testing 209 | file_name = os.path.join( 210 | cs.settings.data_path, 211 | "test_MultiTimeClone_Later_FullSpace0_t*2*4*6_adata_with_transition_map.h5ad", 212 | ) 213 | adata = cs.hf.read(file_name) 214 | 215 | # adata = cs.hf.read( 216 | # "/Users/shouwenwang/Dropbox (HMS)/Python/CoSpar/docs/source/data_cospar/LARRY_sp500_ranking1_MultiTimeClone_Later_FullSpace0_t*2*4*6_adata_with_transition_map.h5ad" 217 | # ) 218 | else: 219 | file_name = os.path.join(shared_datadir, "test_adata_preprocessed.h5ad") 220 | adata_orig = cs.hf.read(file_name) 221 | print("---------infer_Tmap_from_multitime_clones") 222 | adata = cs.tmap.infer_Tmap_from_multitime_clones( 223 | adata_orig, 224 | clonal_time_points=["2", "4"], 225 | later_time_point="6", 226 | smooth_array=[5, 5, 5], 227 | sparsity_threshold=0.1, 228 | intraclone_threshold=0.2, 229 | max_iter_N=5, 230 | epsilon_converge=0.01, 231 | ) 232 | 233 | X_clone = adata.obsm["X_clone"] 234 | print(type(X_clone)) 235 | 236 | selected_fates = [ 237 | "Ccr7_DC", 238 | "Mast", 239 | "Meg", 240 | "pDC", 241 | "Eos", 242 | "Baso", 243 | "Lymphoid", 244 | "Erythroid", 245 | "Neutrophil", 246 | "Monocyte", 247 | ] 248 | 249 | cs.tl.fate_coupling(adata, source="transition_map") 250 | cs.pl.fate_coupling(adata, source="transition_map") 251 | 252 | cs.tl.fate_hierarchy(adata, source="transition_map") 253 | cs.pl.fate_hierarchy(adata, source="transition_map") 254 | 255 | selected_fates = [ 256 | "Neutrophil", 257 | "Monocyte", 258 | ] 259 | cs.tl.fate_map(adata, source="transition_map", selected_fates=selected_fates) 260 | cs.pl.fate_map( 261 | adata, 262 | source="transition_map", 263 | selected_fates=selected_fates, 264 | show_histogram=True, 265 | selected_times="4", 266 | ) 267 | 268 | cs.tl.fate_potency( 269 | adata, source="transition_map", selected_fates=selected_fates, fate_count=True 270 | ) 271 | cs.pl.fate_potency( 272 | adata, 273 | source="transition_map", 274 | show_histogram=True, 275 | selected_times="4", 276 | ) 277 | 278 | selected_fates = [ 279 | "Neutrophil", 280 | "Monocyte", 281 | ] 282 | cs.tl.fate_bias( 283 | adata, 284 | source="transition_map", 285 | selected_fates=selected_fates, 286 | sum_fate_prob_thresh=0.01, 287 | ) 288 | cs.pl.fate_bias( 289 | adata, 290 | source="transition_map", 291 | show_histogram=True, 292 | selected_times="4", 293 | ) 294 | cs.pl.fate_bias( 295 | adata, 296 | source="transition_map", 297 | show_histogram=True, 298 | selected_fates=selected_fates, 299 | selected_times="4", 300 | ) 301 | 302 | selected_fates = [ 303 | "Neutrophil", 304 | "Monocyte", 305 | ] 306 | cs.tl.progenitor( 307 | adata, 308 | source="transition_map", 309 | selected_fates=selected_fates, 310 | sum_fate_prob_thresh=0.01, 311 | avoid_target_states=True, 312 | ) 313 | cs.pl.progenitor(adata, source="transition_map", selected_times="4") 314 | 315 | cs.tl.iterative_differentiation( 316 | adata, 317 | source="transition_map", 318 | selected_fates="Neutrophil", 319 | apply_time_constaint=False, 320 | ) 321 | cs.pl.iterative_differentiation( 322 | adata, 323 | source="transition_map", 324 | ) 325 | 326 | cs.pl.gene_expression_dynamics( 327 | adata, selected_fate="Neutrophil", gene_name_list=["Gata1"] 328 | ) 329 | 330 | gene_list = [ 331 | "Mpo", 332 | "Elane", 333 | "Gstm1", 334 | "Mt1", 335 | "S100a8", 336 | "Prtn3", 337 | "Gfi1", 338 | "Dstn", 339 | "Cd63", 340 | "Ap3s1", 341 | "H2-Aa", 342 | "H2-Eb1", 343 | "Ighm", 344 | ] 345 | 346 | selected_fates = [ 347 | "Neutrophil", 348 | "Monocyte", 349 | ["Baso", "Eos", "Erythroid", "Mast", "Meg"], 350 | ["pDC", "Ccr7_DC", "Lymphoid"], 351 | ] 352 | renames = ["Neu", "Mon", "Meg-Ery-MBaE", "Lym-Dc"] 353 | 354 | cs.pl.gene_expression_heatmap( 355 | adata, 356 | selected_genes=gene_list, 357 | selected_fates=selected_fates, 358 | rename_fates=renames, 359 | fig_width=12, 360 | ) 361 | 362 | cs.pl.gene_expression_on_manifold( 363 | adata, selected_genes=["Gata1", "Elane"], savefig=True 364 | ) 365 | 366 | df1, df2 = cs.tl.differential_genes( 367 | adata, cell_group_A="Neutrophil", cell_group_B="Monocyte" 368 | ) 369 | import numpy as np 370 | 371 | state_info = np.array(adata.obs["state_info"]) 372 | df1, df2 = cs.tl.differential_genes( 373 | adata, 374 | cell_group_A=(state_info == "Neutrophil"), 375 | cell_group_B=(state_info == "Monocyte"), 376 | ) 377 | print(df1) 378 | 379 | cs.pl.single_cell_transition( 380 | adata, selected_state_id_list=[1, 2], savefig=True, map_backward=False 381 | ) 382 | 383 | 384 | def test_simulated_data(): 385 | print("---------- bifurcation model ------------") 386 | L = 10 387 | adata = cs.simulate.bifurcation_model(t1=2, M=20, L=L) 388 | adata = cs.tmap.infer_Tmap_from_multitime_clones( 389 | adata, smooth_array=[10, 10, 10], compute_new=True 390 | ) 391 | Tmap = adata.uns["transition_map"] 392 | state_info = adata.obs["state_info"] 393 | cell_id_t1 = adata.uns["Tmap_cell_id_t1"] 394 | cell_id_t2 = adata.uns["Tmap_cell_id_t2"] 395 | correlation_cospar = ( 396 | cs.simulate.quantify_correlation_with_ground_truth_fate_bias_BifurcationModel( 397 | Tmap, state_info, cell_id_t1, cell_id_t2 398 | ) 399 | ) 400 | print( 401 | f"Fate bias correlation from the predicted transition map: {correlation_cospar:.3f}" 402 | ) 403 | 404 | print("---------------Linear differentiation---------------") 405 | adata = cs.simulate.linear_differentiation_model( 406 | Nt1=50, progeny_N=1, used_clone_N=10, always_simulate_data=True 407 | ) 408 | adata = cs.tmap.infer_Tmap_from_multitime_clones( 409 | adata, smooth_array=[10, 10, 10], compute_new=True 410 | ) 411 | Tmap = adata.uns["transition_map"] 412 | state_info = adata.obs["state_info"] 413 | cell_id_t1 = adata.uns["Tmap_cell_id_t1"] 414 | cell_id_t2 = adata.uns["Tmap_cell_id_t2"] 415 | 416 | X_t1 = adata.obsm["X_orig"][cell_id_t1] 417 | X_t2 = adata.obsm["X_orig"][cell_id_t2] 418 | TPR_cospar = cs.simulate.quantify_transition_peak_TPR_LinearDifferentiation( 419 | Tmap, X_t1, X_t2 420 | ) 421 | print(f"True positive rate for the predicted transition map: {TPR_cospar:.3f}") 422 | 423 | 424 | def test_clean_up(): 425 | print("---------Clean up") 426 | if Path(cs.settings.data_path).is_dir(): 427 | os.system("rm -r output") 428 | 429 | 430 | # os.chdir(os.path.dirname(__file__)) 431 | # cs.settings.verbosity = 3 # range: 0 (error),1 (warning),2 (info),3 (hint). 432 | # # test_load_dataset("data") 433 | # # test_preprocessing("data") 434 | # # test_load_data_from_scratch("data") 435 | # # test_clonal_analysis("data") 436 | # # test_Tmap_inference("data") 437 | # test_Tmap_analysis("data") 438 | --------------------------------------------------------------------------------