├── .github └── workflows │ └── python-publish.yml ├── .readthedocs.yml ├── LICENSE.txt ├── Logo.png ├── LogoTensor.png ├── README.md ├── benchmarks ├── __init__.py └── benchmarks.py ├── cell2cell ├── __init__.py ├── analysis │ ├── __init__.py │ ├── cell2cell_pipelines.py │ ├── tensor_downstream.py │ └── tensor_pipelines.py ├── clustering │ ├── __init__.py │ └── cluster_interactions.py ├── core │ ├── __init__.py │ ├── cci_scores.py │ ├── cell.py │ ├── communication_scores.py │ └── interaction_space.py ├── datasets │ ├── __init__.py │ ├── anndata.py │ ├── gsea_data.py │ ├── heuristic_data.py │ ├── random_data.py │ └── toy_data.py ├── external │ ├── __init__.py │ ├── goenrich.py │ ├── gseapy.py │ ├── pcoa.py │ ├── pcoa_utils.py │ └── umap.py ├── io │ ├── __init__.py │ ├── directories.py │ ├── read_data.py │ └── save_data.py ├── plotting │ ├── __init__.py │ ├── aesthetics.py │ ├── ccc_plot.py │ ├── cci_plot.py │ ├── circular_plot.py │ ├── factor_plot.py │ ├── pcoa_plot.py │ ├── pval_plot.py │ ├── tensor_plot.py │ └── umap_plot.py ├── preprocessing │ ├── __init__.py │ ├── cutoffs.py │ ├── find_elements.py │ ├── gene_ontology.py │ ├── integrate_data.py │ ├── manipulate_dataframes.py │ ├── ppi.py │ ├── rnaseq.py │ └── signal.py ├── spatial │ ├── __init__.py │ ├── distances.py │ ├── filtering.py │ └── neighborhoods.py ├── stats │ ├── __init__.py │ ├── enrichment.py │ ├── gini.py │ ├── multitest.py │ └── permutation.py ├── tensor │ ├── __init__.py │ ├── external_scores.py │ ├── factor_manipulation.py │ ├── factorization.py │ ├── metrics.py │ ├── subset.py │ ├── tensor.py │ └── tensor_manipulation.py └── utils │ ├── __init__.py │ ├── networks.py │ └── parallel_computing.py ├── docs ├── documentation.md ├── index.md ├── requirements.in ├── requirements.txt └── tutorials │ ├── ASD │ ├── 01-Tensor-Factorization-ASD.ipynb │ ├── 02-Factor-Specific-ASD.ipynb │ ├── 03-GSEA-ASD.ipynb │ ├── KEGG.gmt │ ├── figures │ │ ├── 4d-tensor.png │ │ ├── tensor-approx.png │ │ ├── tensor-factorization.png │ │ └── tf.png │ └── results │ │ └── Loadings.xlsx │ ├── GPU-Example.ipynb │ ├── Tensor-cell2cell-Spatial.ipynb │ ├── Toy-Example-BulkPipeline.ipynb │ └── Toy-Example-SingleCellPipeline.ipynb ├── examples ├── cell2cell │ ├── Human-2020-Jin-LR-pairs.csv │ ├── Toy-Example-BulkPipeline.ipynb │ ├── Toy-Example-SingleCellPipeline.ipynb │ └── Toy-Example.ipynb └── tensor_cell2cell │ ├── GPU-Example.ipynb │ ├── Loading-PreBuiltTensor.ipynb │ ├── PreBuiltMetadata-PBMC.pkl │ ├── PreBuiltTensor-PBMC.pkl │ ├── Tensor-cell2cell-PBMC.ipynb │ └── Tensor-cell2cell-Spatial.ipynb ├── mkdocs.yml ├── release ├── 0.5.10-notes.md ├── 0.5.11-notes.md ├── 0.5.4-notes.md ├── 0.5.5-notes.md ├── 0.5.6-notes.md ├── 0.5.7-notes.md ├── 0.5.8-notes.md ├── 0.5.9-notes.md ├── 0.6.0-notes.md ├── 0.6.1-notes.md ├── 0.6.2-notes.md ├── 0.6.3-notes.md ├── 0.6.4-notes.md ├── 0.6.5-notes.md ├── 0.6.6-notes.md ├── 0.6.7-notes.md ├── 0.6.8-notes.md ├── 0.7.0-notes.md ├── 0.7.1-notes.md ├── 0.7.2-notes.md ├── 0.7.3-notes.md └── 0.7.4-notes.md └── setup.py /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.PYPI_CELL2CELL_TOKEN }} 40 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.7" 13 | 14 | mkdocs: 15 | configuration: mkdocs.yml 16 | 17 | # Optionally declare the Python requirements required to build your docs 18 | python: 19 | install: 20 | - requirements: docs/requirements.txt 21 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Erick Armingol 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/Logo.png -------------------------------------------------------------------------------- /LogoTensor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/LogoTensor.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Inferring cell-cell interactions from transcriptomes with *cell2cell* 2 | [![PyPI Version][pb]][pypi] 3 | [![Documentation Status](https://readthedocs.org/projects/cell2cell/badge/?version=latest)](https://cell2cell.readthedocs.io/en/latest/?badge=latest) 4 | [![Downloads](https://pepy.tech/badge/cell2cell/month)](https://pepy.tech/project/cell2cell) 5 | 6 | 7 | [pb]: https://badge.fury.io/py/cell2cell.svg 8 | [pypi]: https://pypi.org/project/cell2cell/ 9 | 10 | ## :book: Getting started 11 | For tutorials and documentation, visit [**cell2cell ReadTheDocs**](https://cell2cell.readthedocs.org/) or our [**cell2cell website**](https://earmingol.github.io/cell2cell). 12 | 13 | 14 | 15 | ## :wrench: Installation 16 | 17 |
18 | Step 1: Install Anaconda :snake: 19 | 20 | First, [install Anaconda following this tutorial](https://docs.anaconda.com/anaconda/install/) 21 |
22 | 23 |
24 | Step 2: Create and Activate a New Conda Environment :computer: 25 | 26 | ``` 27 | # Create a new conda environment 28 | conda create -n cell2cell -y python=3.7 jupyter 29 | 30 | # Activate the environment 31 | conda activate cell2cell 32 | ``` 33 |
34 | 35 |
Step 3: Install cell2cell :arrow_down: 36 | 37 | ``` 38 | pip install cell2cell 39 | ``` 40 |
41 | 42 | ## :bulb: Examples 43 | 44 | | cell2cell Examples | Tensor-cell2cell Examples | 45 | | --- | --- | 46 | | ![cell2cell Logo](https://github.com/earmingol/cell2cell/blob/master/Logo.png?raw=true) | ![Tensor-cell2cell Logo](https://github.com/earmingol/cell2cell/blob/master/LogoTensor.png?raw=true) | 47 | | - [Step-by-step Pipeline](https://github.com/earmingol/cell2cell/blob/master/examples/cell2cell/Toy-Example.ipynb)
- [Interaction Pipeline for Bulk Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-BulkPipeline)
- [Interaction Pipeline for Single-Cell Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-SingleCellPipeline)
- [Whole Body of *C. elegans*](https://github.com/LewisLabUCSD/Celegans-cell2cell) | - [Obtaining patterns of cell-cell communication](https://earmingol.github.io/cell2cell/tutorials/ASD/01-Tensor-Factorization-ASD/)
- [Downstream 1: Factor-specific analyses](https://earmingol.github.io/cell2cell/tutorials/ASD/02-Factor-Specific-ASD/)
- [Downstream 2: Patterns to functions (GSEA)](https://earmingol.github.io/cell2cell/tutorials/ASD/03-GSEA-ASD/)
- [Tensor-cell2cell in Google Colab (**GPU**)](https://colab.research.google.com/drive/1T6MUoxafTHYhjvenDbEtQoveIlHT2U6_?usp=sharing)
- [Communication patterns in **Spatial Transcriptomics**](https://earmingol.github.io/cell2cell/tutorials/Tensor-cell2cell-Spatial/) | 48 | 49 | Reproducible runs of the analyses in the [Tensor-cell2cell paper](https://doi.org/10.1038/s41467-022-31369-2) are available at [CodeOcean.com](https://doi.org/10.24433/CO.0051950.v2) 50 | 51 | ## :link: LIANA & Tensor-cell2cell 52 | 53 | Explore our tutorials for using Tensor-cell2cell with [LIANA](https://github.com/saezlab/liana-py) at [ccc-protocols.readthedocs.io](https://ccc-protocols.readthedocs.io/). 54 | 55 | ## :question: Common Issues 56 | 57 | - **Memory Errors with Tensor-cell2cell:** If you encounter memory errors when performing tensor factorizations, try replacing `init='svd'` with `init='random'`. 58 | 59 | ## :dna: Ligand-Receptor Pairs 60 | Find a curated list of ligand-receptor pairs for your analyses at our [GitHub Repository](https://github.com/LewisLabUCSD/Ligand-Receptor-Pairs). 61 | 62 | ## :bookmark_tabs: Citation 63 | 64 | Please cite our work using the following references: 65 | 66 | - **cell2cell**: [Inferring a spatial code of cell-cell interactions across a whole animal body](https://doi.org/10.1371/journal.pcbi.1010715). 67 | *PLOS Computational Biology, 2022* 68 | 69 | - **Tensor-cell2cell**: [Context-aware deconvolution of cell-cell communication with Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2). 70 | *Nature Communications, 2022.* 71 | 72 | - **LIANA & Tensor-cell2cell tutorials**: [Combining LIANA and Tensor-cell2cell to decipher cell-cell communication across multiple samples](https://doi.org/10.1016/j.crmeth.2024.100758). 73 | *Cell Reports Methods, 2024* 74 | -------------------------------------------------------------------------------- /benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | from benchmarks.benchmarks import (timeit) 6 | -------------------------------------------------------------------------------- /benchmarks/benchmarks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import time 6 | 7 | 8 | def timeit(func, *args, **kwargs): 9 | ''' 10 | This function measures the running time of a given function. 11 | Borrowed from George Armstrong's Github repo (https://github.com/gwarmstrong). 12 | ''' 13 | t0 = time.time() 14 | output = func(*args, **kwargs) 15 | t1 = time.time() 16 | tot_time = t1-t0 17 | data = {'time': tot_time, 'results': output} 18 | return data -------------------------------------------------------------------------------- /cell2cell/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from cell2cell import analysis 4 | from cell2cell import clustering 5 | from cell2cell import core 6 | from cell2cell import datasets 7 | from cell2cell import external 8 | from cell2cell import io 9 | from cell2cell import plotting 10 | from cell2cell import preprocessing 11 | from cell2cell import spatial 12 | from cell2cell import stats 13 | from cell2cell import tensor 14 | from cell2cell import utils 15 | 16 | __version__ = "0.7.4" -------------------------------------------------------------------------------- /cell2cell/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.analysis.cell2cell_pipelines import (initialize_interaction_space, BulkInteractions, SingleCellInteractions) 2 | from cell2cell.analysis.tensor_pipelines import (run_tensor_cell2cell_pipeline) 3 | import cell2cell.analysis.tensor_downstream as tensor_downstream 4 | 5 | -------------------------------------------------------------------------------- /cell2cell/analysis/tensor_pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import tensorly as tl 6 | 7 | from cell2cell.plotting.tensor_plot import tensor_factors_plot 8 | 9 | 10 | def run_tensor_cell2cell_pipeline(interaction_tensor, tensor_metadata, copy_tensor=False, rank=None, 11 | tf_optimization='regular', random_state=None, backend=None, device=None, 12 | elbow_metric='error', smooth_elbow=False, upper_rank=25, tf_init='random', 13 | tf_svd='numpy_svd', cmaps=None, sample_col='Element', group_col='Category', 14 | fig_fontsize=14, output_folder=None, output_fig=True, fig_format='pdf', **kwargs): 15 | ''' 16 | Runs basic pipeline of Tensor-cell2cell (excluding downstream analyses). 17 | 18 | Parameters 19 | ---------- 20 | interaction_tensor : cell2cell.tensor.BaseTensor 21 | A communication tensor generated with any of the tensor class in 22 | cell2cell.tensor. 23 | 24 | tensor_metadata : list 25 | List of pandas dataframes with metadata information for elements of each 26 | dimension in the tensor. A column called as the variable `sample_col` contains 27 | the name of each element in the tensor while another column called as the 28 | variable `group_col` contains the metadata or grouping information of each 29 | element. 30 | 31 | copy_tensor : boolean, default=False 32 | Whether generating a copy of the original tensor to avoid modifying it. 33 | 34 | rank : int, default=None 35 | Rank of the Tensor Factorization (number of factors to deconvolve the original 36 | tensor). If None, it will automatically inferred from an elbow analysis. 37 | 38 | tf_optimization : str, default='regular' 39 | It defines whether performing an optimization with higher number of iterations, 40 | independent factorization runs, and higher resolution (lower tolerance), 41 | or with lower number of iterations, factorization runs, and resolution. 42 | Options are: 43 | 44 | - 'regular' : It uses 100 max iterations, 1 factorization run, and 10e-7 tolerance. 45 | Faster to run. 46 | - 'robust' : It uses 500 max iterations, 100 factorization runs, and 10e-8 tolerance. 47 | Slower to run. 48 | 49 | random_state : boolean, default=None 50 | Seed for randomization. 51 | 52 | backend : str, default=None 53 | Backend that TensorLy will use to perform calculations 54 | on this tensor. When None, the default backend used is 55 | the currently active backend, usually is ('numpy'). Options are: 56 | {'cupy', 'jax', 'mxnet', 'numpy', 'pytorch', 'tensorflow'} 57 | 58 | device : str, default=None 59 | Device to use when backend allows multiple devices. Options are: 60 | {'cpu', 'cuda:0', None} 61 | 62 | elbow_metric : str, default='error' 63 | Metric to perform the elbow analysis (y-axis). 64 | 65 | - 'error' : Normalized error to compute the elbow. 66 | - 'similarity' : Similarity based on CorrIndex (1-CorrIndex). 67 | 68 | smooth_elbow : boolean, default=False 69 | Whether smoothing the elbow-analysis curve with a Savitzky-Golay filter. 70 | 71 | upper_rank : int, default=25 72 | Upper bound of ranks to explore with the elbow analysis. 73 | 74 | tf_init : str, default='random' 75 | Initialization method for computing the Tensor Factorization. 76 | {‘svd’, ‘random’} 77 | 78 | tf_svd : str, default='numpy_svd' 79 | Function to compute the SVD for initializing the Tensor Factorization, 80 | acceptable values in tensorly.SVD_FUNS 81 | 82 | cmaps : list, default=None 83 | A list of colormaps used for coloring elements in each dimension. The length 84 | of this list is equal to the number of dimensions of the tensor. If None, all 85 | dimensions will be colores with the colormap 'gist_rainbow'. 86 | 87 | sample_col : str, default='Element' 88 | Name of the column containing the element names in the metadata. 89 | 90 | group_col : str, default='Category' 91 | Name of the column containing the metadata or grouping information for each 92 | element in the metadata. 93 | 94 | fig_fontsize : int, default=14 95 | Font size of the tick labels. Axis labels will be 1.2 times the fontsize. 96 | 97 | output_folder : str, default=None 98 | Path to the folder where the figures generated will be saved. 99 | If None, figures will not be saved. 100 | 101 | output_fig : boolean, default=True 102 | Whether generating the figures with matplotlib. 103 | 104 | fig_format : str, default='pdf' 105 | Format to store figures when an `output_folder` is specified 106 | and `output_fig` is True. Otherwise, this is not necessary. 107 | 108 | **kwargs : dict 109 | Extra arguments for the tensor factorization according to inputs in 110 | tensorly. 111 | 112 | Returns 113 | ------- 114 | interaction_tensor : cell2cell.tensor.tensor.BaseTensor 115 | Either the original input `interaction_tensor` or a copy of it. 116 | This also stores the results from running the Tensor-cell2cell 117 | pipeline in the corresponding attributes. 118 | ''' 119 | if copy_tensor: 120 | interaction_tensor = interaction_tensor.copy() 121 | 122 | dim = len(interaction_tensor.tensor.shape) 123 | 124 | ### OUTPUT FILENAMES ### 125 | if output_folder is None: 126 | elbow_filename = None 127 | tf_filename = None 128 | loading_filename = None 129 | else: 130 | elbow_filename = output_folder + '/Elbow.{}'.format(fig_format) 131 | tf_filename = output_folder + '/Tensor-Factorization.{}'.format(fig_format) 132 | loading_filename = output_folder + '/Loadings.xlsx' 133 | 134 | ### PALETTE COLORS FOR ELEMENTS IN TENSOR DIMS ### 135 | if cmaps is None: 136 | cmap_5d = ['tab10', 'viridis', 'Dark2_r', 'tab20', 'tab20'] 137 | cmap_4d = ['plasma', 'Dark2_r', 'tab20', 'tab20'] 138 | 139 | if dim == 5: 140 | cmaps = cmap_5d 141 | elif dim <= 4: 142 | cmaps = cmap_4d[-dim:] 143 | else: 144 | raise ValueError('Tensor of dimension higher to 5 is not supported') 145 | 146 | assert len(cmaps) == dim, "`cmap` must be of the same len of dimensions in the tensor." 147 | 148 | ### FACTORIZATION PARAMETERS ### 149 | if tf_optimization == 'robust': 150 | elbow_runs = 20 151 | tf_runs = 100 152 | tol = 1e-8 153 | n_iter_max = 500 154 | elif tf_optimization == 'regular': 155 | elbow_runs = 10 156 | tf_runs = 1 157 | tol = 1e-7 158 | n_iter_max = 100 159 | else: 160 | raise ValueError("`factorization_type` must be either 'robust' or 'regular'.") 161 | 162 | if backend is not None: 163 | tl.set_backend(backend) 164 | 165 | if device is not None: 166 | interaction_tensor.to_device(device=device) 167 | 168 | ### ANALYSIS ### 169 | # Elbow 170 | if rank is None: 171 | print('Running Elbow Analysis') 172 | fig1, error = interaction_tensor.elbow_rank_selection(upper_rank=upper_rank, 173 | runs=elbow_runs, 174 | init=tf_init, 175 | svd=tf_svd, 176 | automatic_elbow=True, 177 | metric=elbow_metric, 178 | output_fig=output_fig, 179 | smooth=smooth_elbow, 180 | random_state=random_state, 181 | fontsize=fig_fontsize, 182 | filename=elbow_filename, 183 | tol=tol, n_iter_max=n_iter_max, 184 | **kwargs 185 | ) 186 | 187 | rank = interaction_tensor.rank 188 | 189 | # Factorization 190 | print('Running Tensor Factorization') 191 | interaction_tensor.compute_tensor_factorization(rank=rank, 192 | init=tf_init, 193 | svd=tf_svd, 194 | random_state=random_state, 195 | runs=tf_runs, 196 | normalize_loadings=True, 197 | tol=tol, n_iter_max=n_iter_max, 198 | **kwargs 199 | ) 200 | 201 | ### EXPORT RESULTS ### 202 | if output_folder is not None: 203 | print('Generating Outputs') 204 | interaction_tensor.export_factor_loadings(loading_filename) 205 | 206 | if output_fig: 207 | fig2, axes = tensor_factors_plot(interaction_tensor=interaction_tensor, 208 | metadata=tensor_metadata, 209 | sample_col=sample_col, 210 | group_col=group_col, 211 | meta_cmaps=cmaps, 212 | fontsize=fig_fontsize, 213 | filename=tf_filename 214 | ) 215 | 216 | return interaction_tensor -------------------------------------------------------------------------------- /cell2cell/clustering/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.clustering.cluster_interactions import (compute_distance, compute_linkage, get_clusters_from_linkage) 2 | -------------------------------------------------------------------------------- /cell2cell/clustering/cluster_interactions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import scipy.cluster.hierarchy as hc 8 | import scipy.spatial as sp 9 | 10 | 11 | # Distance-based algorithms 12 | def compute_distance(data_matrix, axis=0, metric='euclidean'): 13 | '''Computes the pairwise distance between elements in a 14 | matrix of shape m x n. Uses the function 15 | scipy.spatial.distance.pdist 16 | 17 | Parameters 18 | ---------- 19 | data_matrix : pandas.DataFrame or ndarray 20 | A m x n matrix used to compute the distances 21 | 22 | axis : int, default=0 23 | To decide on which elements to compute the distance. 24 | If axis=0, the distances will be between elements in 25 | the rows, while axis=1 will lead to distances between 26 | elements in the columns. 27 | 28 | metric : str, default='euclidean' 29 | The distance metric to use. The distance function can be 'braycurtis', 30 | 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 31 | 'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'kulsinski', 32 | 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 33 | 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'. 34 | 35 | Returns 36 | ------- 37 | D : ndarray 38 | Returns a condensed distance matrix Y. For each i and j (where i < j < m), 39 | where m is the number of original observations. The metric 40 | dist(u=X[i], v=X[j]) is computed and stored in entry 41 | m * i + j - ((i + 2) * (i + 1)) // 2. 42 | ''' 43 | if (type(data_matrix) is pd.core.frame.DataFrame): 44 | data = data_matrix.values 45 | else: 46 | data = data_matrix 47 | if axis == 0: 48 | D = sp.distance.squareform(sp.distance.pdist(data, metric=metric)) 49 | elif axis == 1: 50 | D = sp.distance.squareform(sp.distance.pdist(data.T, metric=metric)) 51 | else: 52 | raise ValueError('Not valid axis. Use 0 or 1.') 53 | return D 54 | 55 | 56 | def compute_linkage(distance_matrix, method='ward', optimal_ordering=True): 57 | ''' 58 | Returns a linkage for a given distance matrix using a specific method. 59 | 60 | Parameters 61 | ---------- 62 | distance_matrix : numpy.ndarray 63 | A square array containing the distance between a given row and a 64 | given column. Diagonal elements must be zero. 65 | 66 | method : str, 'ward' by default 67 | Method to compute the linkage. It could be: 68 | 69 | - 'single' 70 | - 'complete' 71 | - 'average' 72 | - 'weighted' 73 | - 'centroid' 74 | - 'median' 75 | - 'ward' 76 | For more details, go to: 77 | https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.cluster.hierarchy.linkage.html 78 | 79 | optimal_ordering : boolean, default=True 80 | Whether sorting the leaf of the dendrograms to have a minimal distance 81 | between successive leaves. For more information, see 82 | scipy.cluster.hierarchy.optimal_leaf_ordering 83 | 84 | Returns 85 | ------- 86 | Z : numpy.ndarray 87 | The hierarchical clustering encoded as a linkage matrix. 88 | ''' 89 | if (type(distance_matrix) is pd.core.frame.DataFrame): 90 | data = distance_matrix.values 91 | else: 92 | data = distance_matrix.copy() 93 | if ~(data.transpose() == data).all(): 94 | raise ValueError('The matrix is not symmetric') 95 | 96 | np.fill_diagonal(data, 0.0) 97 | 98 | # Compute linkage 99 | D = sp.distance.squareform(data) 100 | Z = hc.linkage(D, method=method, optimal_ordering=optimal_ordering) 101 | return Z 102 | 103 | 104 | def get_clusters_from_linkage(linkage, threshold, criterion='maxclust', labels=None): 105 | ''' 106 | Gets clusters from a linkage given a threshold and a criterion. 107 | 108 | Parameters 109 | ---------- 110 | linkage : numpy.ndarray 111 | The hierarchical clustering encoded with the matrix returned by 112 | the linkage function (Z). 113 | 114 | threshold : float 115 | The threshold to apply when forming flat clusters. 116 | 117 | criterion : str, 'maxclust' by default 118 | The criterion to use in forming flat clusters. Depending on the 119 | criterion, the threshold has different meanings. More information on: 120 | https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.cluster.hierarchy.fcluster.html 121 | 122 | labels : array-like, None by default 123 | List of labels of the elements contained in the linkage. The order 124 | must match the order they were provided when generating the linkage. 125 | 126 | Returns 127 | ------- 128 | clusters : dict 129 | A dictionary containing the clusters obtained. The keys correspond to 130 | the cluster numbers and the vaues to a list with element names given the 131 | labels, or the element index based on the linkage. 132 | ''' 133 | 134 | cluster_ids = hc.fcluster(linkage, threshold, criterion=criterion) 135 | clusters = dict() 136 | for c in np.unique(cluster_ids): 137 | clusters[c] = [] 138 | 139 | for i, c in enumerate(cluster_ids): 140 | if labels is not None: 141 | clusters[c].append(labels[i]) 142 | else: 143 | clusters[c].append(i) 144 | return clusters -------------------------------------------------------------------------------- /cell2cell/core/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_icellnet_score, 4 | compute_jaccard_like_cci_score, matmul_bray_curtis_like, matmul_count_active, 5 | matmul_jaccard_like) 6 | from cell2cell.core.cell import (Cell, get_cells_from_rnaseq) 7 | from cell2cell.core.communication_scores import (get_binary_scores, get_continuous_scores, compute_ccc_matrix, aggregate_ccc_matrices) 8 | from cell2cell.core.interaction_space import (generate_interaction_elements, InteractionSpace) -------------------------------------------------------------------------------- /cell2cell/core/cell.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import pandas as pd 6 | 7 | class Cell: 8 | '''Specific cell-type/tissue/organ element in a RNAseq dataset. 9 | 10 | Parameters 11 | ---------- 12 | sc_rnaseq_data : pandas.DataFrame 13 | A gene expression matrix. Contains only one column that 14 | corresponds to cell-type/tissue/sample, while the genes 15 | are rows and the specific. Column name will be the label 16 | of the instance. 17 | 18 | verbose : boolean, default=True 19 | Whether printing or not steps of the analysis. 20 | 21 | Attributes 22 | ---------- 23 | id : int 24 | ID number of the instance generated. 25 | 26 | type : str 27 | Name of the respective cell-type/tissue/sample. 28 | 29 | rnaseq_data : pandas.DataFrame 30 | Copy of sc_rnaseq_data. 31 | 32 | weighted_ppi : pandas.DataFrame 33 | Dataframe created from a list of protein-protein interactions, 34 | here the columns of the interacting proteins are replaced by 35 | a score or a preprocessed gene expression of the respective 36 | proteins. 37 | ''' 38 | _id_counter = 0 # Number of active instances 39 | _id = 0 # Unique ID 40 | 41 | def __init__(self, sc_rnaseq_data, verbose=True): 42 | self.id = Cell._id 43 | Cell._id_counter += 1 44 | Cell._id += 1 45 | 46 | self.type = str(sc_rnaseq_data.columns[-1]) 47 | 48 | # RNAseq datasets 49 | self.rnaseq_data = sc_rnaseq_data.copy() 50 | self.rnaseq_data.columns = ['value'] 51 | 52 | # Binary ppi datasets 53 | self.weighted_ppi = pd.DataFrame(columns=['A', 'B', 'score']) 54 | 55 | # Object created 56 | if verbose: 57 | print("New cell instance created for " + self.type) 58 | 59 | def __del__(self): 60 | Cell._id_counter -= 1 61 | 62 | def __str__(self): 63 | return str(self.type) 64 | 65 | __repr__ = __str__ 66 | 67 | 68 | def get_cells_from_rnaseq(rnaseq_data, cell_columns=None, verbose=True): 69 | ''' 70 | Creates new instances of Cell based on the RNAseq data of each 71 | cell-type/tissue/sample in a gene expression matrix. 72 | 73 | Parameters 74 | ---------- 75 | rnaseq_data : pandas.DataFrame 76 | Gene expression data for a RNA-seq experiment. Columns are 77 | cell-types/tissues/samples and rows are genes. 78 | 79 | cell_columns : array-like, default=None 80 | List of names of cell-types/tissues/samples in the dataset 81 | to be used. If None, all columns will be used. 82 | 83 | verbose : boolean, default=True 84 | Whether printing or not steps of the analysis. 85 | 86 | Returns 87 | ------- 88 | cells : dict 89 | Dictionary containing all Cell instances generated from a RNAseq dataset. 90 | The keys of this dictionary are the names of the corresponding Cell instances. 91 | ''' 92 | if verbose: 93 | print("Generating objects according to RNAseq datasets provided") 94 | cells = dict() 95 | if cell_columns is None: 96 | cell_columns = rnaseq_data.columns 97 | 98 | for cell in cell_columns: 99 | cells[cell] = Cell(rnaseq_data[[cell]], verbose=verbose) 100 | return cells 101 | -------------------------------------------------------------------------------- /cell2cell/core/communication_scores.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import numpy as np 6 | from scipy.stats.mstats import gmean 7 | 8 | 9 | def get_binary_scores(cell1, cell2, ppi_score=None): 10 | '''Computes binary communication scores for all 11 | protein-protein interactions between a pair of 12 | cell-types/tissues/samples. This corresponds to 13 | an AND function between binary values for each 14 | interacting protein coming from each cell. 15 | 16 | Parameters 17 | ---------- 18 | cell1 : cell2cell.core.cell.Cell 19 | First cell-type/tissue/sample to compute the communication 20 | score. In a directed interaction, this is the sender. 21 | 22 | cell2 : cell2cell.core.cell.Cell 23 | Second cell-type/tissue/sample to compute the communication 24 | score. In a directed interaction, this is the receiver. 25 | 26 | ppi_score : array-like, default=None 27 | An array with a weight for each PPI. The weight 28 | multiplies the communication scores. 29 | 30 | Returns 31 | ------- 32 | communication_scores : numpy.array 33 | An array with the communication scores for each intercellular 34 | PPI. 35 | ''' 36 | c1 = cell1.weighted_ppi['A'].values 37 | c2 = cell2.weighted_ppi['B'].values 38 | 39 | if (len(c1) == 0) or (len(c2) == 0): 40 | return 0.0 41 | 42 | if ppi_score is None: 43 | ppi_score = np.array([1.0] * len(c1)) 44 | 45 | communication_scores = c1 * c2 * ppi_score 46 | return communication_scores 47 | 48 | 49 | def get_continuous_scores(cell1, cell2, ppi_score=None, method='expression_product'): 50 | '''Computes continuous communication scores for all 51 | protein-protein interactions between a pair of 52 | cell-types/tissues/samples. This corresponds to 53 | a specific scoring function between preprocessed continuous 54 | expression values for each interacting protein coming from 55 | each cell. 56 | 57 | Parameters 58 | ---------- 59 | cell1 : cell2cell.core.cell.Cell 60 | First cell-type/tissue/sample to compute the communication 61 | score. In a directed interaction, this is the sender. 62 | 63 | cell2 : cell2cell.core.cell.Cell 64 | Second cell-type/tissue/sample to compute the communication 65 | score. In a directed interaction, this is the receiver. 66 | 67 | ppi_score : array-like, default=None 68 | An array with a weight for each PPI. The weight 69 | multiplies the communication scores. 70 | 71 | method : str, default='expression_product' 72 | Scoring function for computing the communication score. 73 | Options are: 74 | - 'expression_product' : Multiplication between the expression 75 | of the interacting proteins. One coming from cell1 and the 76 | other from cell2. 77 | - 'expression_mean' : Average between the expression 78 | of the interacting proteins. One coming from cell1 and the 79 | other from cell2. 80 | - 'expression_gmean' : Geometric mean between the expression 81 | of the interacting proteins. One coming from cell1 and the 82 | other from cell2. 83 | 84 | Returns 85 | ------- 86 | communication_scores : numpy.array 87 | An array with the communication scores for each intercellular 88 | PPI. 89 | ''' 90 | c1 = cell1.weighted_ppi['A'].values 91 | c2 = cell2.weighted_ppi['B'].values 92 | 93 | if method == 'expression_product': 94 | communication_scores = score_expression_product(c1, c2) 95 | elif method == 'expression_mean': 96 | communication_scores = score_expression_mean(c1, c2) 97 | elif method == 'expression_gmean': 98 | communication_scores = np.sqrt(score_expression_product(c1, c2)) 99 | else: 100 | raise ValueError('{} is not implemented yet'.format(method)) 101 | 102 | if ppi_score is None: 103 | ppi_score = np.array([1.0] * len(c1)) 104 | 105 | communication_scores = communication_scores * ppi_score 106 | return communication_scores 107 | 108 | 109 | def score_expression_product(c1, c2): 110 | '''Computes the expression product score 111 | 112 | Parameters 113 | ---------- 114 | c1 : array-like 115 | A 1D-array containing the preprocessed expression values 116 | for the interactors in the first column of a list of 117 | protein-protein interactions. 118 | 119 | c2 : array-like 120 | A 1D-array containing the preprocessed expression values 121 | for the interactors in the second column of a list of 122 | protein-protein interactions. 123 | 124 | Returns 125 | ------- 126 | c1 * c2 : array-like 127 | Multiplication of vectors. 128 | ''' 129 | if (len(c1) == 0) or (len(c2) == 0): 130 | return 0.0 131 | return c1 * c2 132 | 133 | 134 | def score_expression_mean(c1, c2): 135 | '''Computes the expression product score 136 | 137 | Parameters 138 | ---------- 139 | c1 : array-like 140 | A 1D-array containing the preprocessed expression values 141 | for the interactors in the first column of a list of 142 | protein-protein interactions. 143 | 144 | c2 : array-like 145 | A 1D-array containing the preprocessed expression values 146 | for the interactors in the second column of a list of 147 | protein-protein interactions. 148 | 149 | Returns 150 | ------- 151 | (c1 + c2)/2. : array-like 152 | Average of vectors. 153 | ''' 154 | if (len(c1) == 0) or (len(c2) == 0): 155 | return 0.0 156 | return (c1 + c2)/2. 157 | 158 | 159 | def compute_ccc_matrix(prot_a_exp, prot_b_exp, communication_score='expression_product'): 160 | '''Computes communication scores for an specific 161 | protein-protein interaction using vectors of gene expression 162 | levels for a given interacting protein produced by 163 | different cell-types/tissues/samples. 164 | 165 | Parameters 166 | ---------- 167 | prot_a_exp : array-like 168 | Vector with gene expression levels for an interacting protein A 169 | in a given PPI. Coordinates are different cell-types/tissues/samples. 170 | 171 | prot_b_exp : array-like 172 | Vector with gene expression levels for an interacting protein B 173 | in a given PPI. Coordinates are different cell-types/tissues/samples. 174 | 175 | communication_score : str, default='expression_product' 176 | Scoring function for computing the communication score. 177 | Options are: 178 | 179 | - 'expression_product' : Multiplication between the expression 180 | of the interacting proteins. 181 | - 'expression_mean' : Average between the expression 182 | of the interacting proteins. 183 | - 'expression_gmean' : Geometric mean between the expression 184 | of the interacting proteins. 185 | 186 | Returns 187 | ------- 188 | communication_scores : numpy.array 189 | Matrix MxM, representing the CCC scores of an specific PPI 190 | across all pairs of cell-types/tissues/samples. M are all 191 | cell-types/tissues/samples. In directed interactions, the 192 | vertical axis (axis 0) represents the senders, while the 193 | horizontal axis (axis 1) represents the receivers. 194 | ''' 195 | if communication_score == 'expression_product': 196 | communication_scores = np.outer(prot_a_exp, prot_b_exp) 197 | elif communication_score == 'expression_mean': 198 | communication_scores = (np.outer(prot_a_exp, np.ones(prot_b_exp.shape)) + np.outer(np.ones(prot_a_exp.shape), prot_b_exp)) / 2. 199 | elif communication_score == 'expression_gmean': 200 | communication_scores = np.sqrt(np.outer(prot_a_exp, prot_b_exp)) 201 | else: 202 | raise ValueError("Not a valid communication_score") 203 | return communication_scores 204 | 205 | 206 | def aggregate_ccc_matrices(ccc_matrices, method='gmean'): 207 | '''Aggregates matrices of communication scores. Each 208 | matrix has the communication scores across all pairs 209 | of cell-types/tissues/samples for a different 210 | pair of interacting proteins. 211 | 212 | Parameters 213 | ---------- 214 | ccc_matrices : list 215 | List of matrices of communication scores. Each matrix 216 | is for an specific pair of interacting proteins. 217 | 218 | method : str, default='gmean'. 219 | Method to aggregate the matrices element-wise. 220 | Options are: 221 | 222 | - 'gmean' : Geometric mean in an element-wise way. 223 | - 'sum' : Sum in an element-wise way. 224 | - 'mean' : Mean in an element-wise way. 225 | 226 | Returns 227 | ------- 228 | aggregated_ccc_matrix : numpy.array 229 | A matrix contiaining aggregated communication scores 230 | from multiple PPIs. It's shape is of MxM, where M are all 231 | cell-types/tissues/samples. In directed interactions, the 232 | vertical axis (axis 0) represents the senders, while the 233 | horizontal axis (axis 1) represents the receivers. 234 | ''' 235 | if method == 'gmean': 236 | aggregated_ccc_matrix = gmean(ccc_matrices) 237 | elif method == 'sum': 238 | aggregated_ccc_matrix = np.nansum(ccc_matrices, axis=0) 239 | elif method == 'mean': 240 | aggregated_ccc_matrix = np.nanmean(ccc_matrices, axis=0) 241 | else: 242 | raise ValueError("Not a valid method") 243 | 244 | return aggregated_ccc_matrix -------------------------------------------------------------------------------- /cell2cell/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.datasets.anndata import (balf_covid) 2 | from cell2cell.datasets.gsea_data import (gsea_msig) 3 | from cell2cell.datasets.heuristic_data import (HeuristicGOTerms) 4 | from cell2cell.datasets.random_data import (generate_random_rnaseq, generate_random_ppi, generate_random_cci_scores, 5 | generate_random_metadata) 6 | from cell2cell.datasets.toy_data import (generate_toy_distance, generate_toy_rnaseq, generate_toy_ppi, generate_toy_metadata) -------------------------------------------------------------------------------- /cell2cell/datasets/anndata.py: -------------------------------------------------------------------------------- 1 | from scanpy.readwrite import read 2 | 3 | 4 | def balf_covid(filename='BALF-COVID19-Liao_et_al-NatMed-2020.h5ad'): 5 | """BALF samples from COVID-19 patients 6 | The data consists in 63k immune and epithelial cells in lungs 7 | from 3 control, 3 moderate COVID-19, and 6 severe COVID-19 patients. 8 | 9 | This dataset was previously published in [1], and this objects contains 10 | the raw counts for the annotated cell types available in: 11 | https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE145926 12 | 13 | References: 14 | [1] Liao, M., Liu, Y., Yuan, J. et al. 15 | Single-cell landscape of bronchoalveolar immune cells in patients 16 | with COVID-19. Nat Med 26, 842–844 (2020). 17 | https://doi.org/10.1038/s41591-020-0901-9 18 | 19 | Parameters 20 | ---------- 21 | filename : str, default='BALF-COVID19-Liao_et_al-NatMed-2020.h5ad' 22 | Path to the h5ad file in case it was manually downloaded. 23 | 24 | Returns 25 | ------- 26 | Annotated data matrix. 27 | """ 28 | url = 'https://zenodo.org/record/7535867/files/BALF-COVID19-Liao_et_al-NatMed-2020.h5ad' 29 | adata = read(filename, backup_url=url) 30 | return adata -------------------------------------------------------------------------------- /cell2cell/datasets/gsea_data.py: -------------------------------------------------------------------------------- 1 | from cell2cell.external.gseapy import _check_pathwaydb, load_gmt, PATHWAY_DATA 2 | 3 | 4 | def gsea_msig(organism='human', pathwaydb='GOBP', readable_name=False): 5 | '''Load a MSigDB from a gmt file 6 | 7 | Parameters 8 | ---------- 9 | organism : str, default='human' 10 | Organism for whom the DB will be loaded. 11 | Available options are {'human', 'mouse'}. 12 | 13 | pathwaydb: str, default='GOBP' 14 | Molecular Signature Database to load. 15 | Available options are {'GOBP', 'KEGG', 'Reactome'} 16 | 17 | readable_name : boolean, default=False 18 | If True, the pathway names are transformed to a more readable format. 19 | That is, removing underscores and pathway DB name at the beginning. 20 | 21 | Returns 22 | ------- 23 | pathway_per_gene : defaultdict 24 | Dictionary containing all genes in the DB as keys, and 25 | their values are lists with their pathway annotations. 26 | ''' 27 | _check_pathwaydb(organism, pathwaydb) 28 | 29 | pathway_per_gene = load_gmt(readable_name=readable_name, **PATHWAY_DATA[organism][pathwaydb]) 30 | return pathway_per_gene -------------------------------------------------------------------------------- /cell2cell/datasets/heuristic_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | 6 | class HeuristicGOTerms: 7 | '''GO terms for contact and secreted proteins. 8 | 9 | Attributes 10 | ---------- 11 | contact_go_terms : list 12 | List of GO terms associated with proteins that 13 | participate in contact interactions (usually 14 | on the surface of cells). 15 | 16 | mediator_go_terms : list 17 | List of GO terms associated with secreted 18 | proteins that mediate intercellular interactions 19 | or communication. 20 | ''' 21 | def __init__(self): 22 | self.contact_go_terms = ['GO:0007155', # Cell adhesion 23 | 'GO:0022608', # Multicellular organism adhesion 24 | 'GO:0098740', # Multiorganism cell adhesion 25 | 'GO:0098743', # Cell aggregation 26 | 'GO:0030054', # Cell-junction # 27 | 'GO:0009986', # Cell surface # 28 | 'GO:0097610', # Cell surface forrow 29 | 'GO:0007160', # Cell-matrix adhesion 30 | 'GO:0043235', # Receptor complex, 31 | 'GO:0008305', # Integrin complex, 32 | 'GO:0043113', # Receptor clustering 33 | 'GO:0009897', # External side of plasma membrane # 34 | 'GO:0038023', # Signaling receptor activity # 35 | ] 36 | 37 | self.mediator_go_terms = ['GO:0005615', # Extracellular space 38 | 'GO:0005576', # Extracellular region 39 | 'GO:0031012', # Extracellular matrix 40 | 'GO:0005201', # Extracellular matrix structural constituent 41 | 'GO:1990430', # Extracellular matrix protein binding 42 | 'GO:0048018', # Receptor ligand activity # 43 | ] -------------------------------------------------------------------------------- /cell2cell/datasets/random_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from sklearn.utils import resample 9 | 10 | from cell2cell.preprocessing import rnaseq, ppi 11 | 12 | 13 | def generate_random_rnaseq(size, row_names, random_state=None, verbose=True): 14 | ''' 15 | Generates a RNA-seq dataset that is normally distributed gene-wise and size 16 | normalized (each column sums up to a million). 17 | 18 | Parameters 19 | ---------- 20 | size : int 21 | Number of cell-types/tissues/samples (columns). 22 | 23 | row_names : array-like 24 | List containing the name of genes (rows). 25 | 26 | random_state : int, default=None 27 | Seed for randomization. 28 | 29 | verbose : boolean, default=True 30 | Whether printing or not steps of the analysis. 31 | 32 | Returns 33 | ------- 34 | df : pandas.DataFrame 35 | Dataframe containing gene expression given the list 36 | of genes for each cell-type/tissue/sample. 37 | ''' 38 | if verbose: 39 | print('Generating random RNA-seq dataset.') 40 | columns = ['Cell-{}'.format(c) for c in range(1, size+1)] 41 | 42 | if random_state is not None: 43 | np.random.seed(random_state) 44 | data = np.random.randn(len(row_names), len(columns)) # Normal distribution 45 | min = np.abs(np.amin(data, axis=1)) 46 | min = min.reshape((len(min), 1)) 47 | 48 | data = data + min 49 | df = pd.DataFrame(data, index=row_names, columns=columns) 50 | if verbose: 51 | print('Normalizing random RNA-seq dataset (into TPM)') 52 | df = rnaseq.scale_expression_by_sum(df, axis=0, sum_value=1e6) 53 | return df 54 | 55 | 56 | def generate_random_ppi(max_size, interactors_A, interactors_B=None, random_state=None, verbose=True): 57 | '''Generates a random list of protein-protein interactions. 58 | 59 | Parameters 60 | ---------- 61 | max_size : int 62 | Maximum size of interactions to obtain. Since the PPIs 63 | are obtained by independently resampling interactors A and B 64 | rather than creating all possible combinations (it may demand too much 65 | memory), some PPIs can be duplicated and when dropping them 66 | results into a smaller number of PPIs than the max_size. 67 | 68 | interactors_A : list 69 | A list of protein names to include in the first column of 70 | the PPIs. 71 | 72 | interactors_B : list, default=None 73 | A list of protein names to include in the second columns 74 | of the PPIs. If None, interactors_A will be used as 75 | interactors_B too. 76 | 77 | random_state : int, default=None 78 | Seed for randomization. 79 | 80 | verbose : boolean, default=True 81 | Whether printing or not steps of the analysis. 82 | 83 | Returns 84 | ------- 85 | ppi_data : pandas.DataFrame 86 | DataFrame containing a list of protein-protein interactions. 87 | It has three columns: 'A', 'B', and 'score' for interactors 88 | A, B and weights of interactions, respectively. 89 | ''' 90 | if interactors_B is not None: 91 | assert max_size <= len(interactors_A)*len(interactors_B), "The maximum size can't be greater than all combinations between partners A and B" 92 | else: 93 | assert max_size <= len(interactors_A)**2, "The maximum size can't be greater than all combinations of partners A" 94 | 95 | 96 | if verbose: 97 | print('Generating random PPI network.') 98 | 99 | def small_block_ppi(size, interactors_A, interactors_B, random_state): 100 | if random_state is not None: 101 | random_state += 1 102 | if interactors_B is None: 103 | interactors_B = interactors_A 104 | 105 | col_A = resample(interactors_A, n_samples=size, random_state=random_state) 106 | col_B = resample(interactors_B, n_samples=size, random_state=random_state) 107 | 108 | ppi_data = pd.DataFrame() 109 | ppi_data['A'] = col_A 110 | ppi_data['B'] = col_B 111 | ppi_data.assign(score=1.0) 112 | 113 | ppi_data = ppi.remove_ppi_bidirectionality(ppi_data, ('A', 'B'), verbose=verbose) 114 | ppi_data = ppi_data.drop_duplicates() 115 | ppi_data.reset_index(inplace=True, drop=True) 116 | return ppi_data 117 | 118 | ppi_data = small_block_ppi(max_size*2, interactors_A, interactors_B, random_state) 119 | 120 | # TODO: This part need to be fixed, it does not converge to the max_size -> len((set(A)) * len(set(B) - set(A))) 121 | # while ppi_data.shape[0] < size: 122 | # if random_state is not None: 123 | # random_state += 2 124 | # b = small_block_ppi(size, interactors_A, interactors_B, random_state) 125 | # print(b) 126 | # ppi_data = pd.concat([ppi_data, b]) 127 | # ppi_data = ppi.remove_ppi_bidirectionality(ppi_data, ('A', 'B'), verbose=verbose) 128 | # ppi_data = ppi_data.drop_duplicates() 129 | # ppi_data.dropna() 130 | # ppi_data.reset_index(inplace=True, drop=True) 131 | # print(ppi_data.shape[0]) 132 | 133 | if ppi_data.shape[0] > max_size: 134 | ppi_data = ppi_data.loc[list(range(max_size)), :] 135 | ppi_data.reset_index(inplace=True, drop=True) 136 | return ppi_data 137 | 138 | 139 | def generate_random_cci_scores(cell_number, labels=None, symmetric=True, random_state=None): 140 | '''Generates a square cell-cell interaction 141 | matrix with random scores. 142 | 143 | Parameters 144 | ---------- 145 | cell_number : int 146 | Number of cells. 147 | 148 | labels : list, default=None 149 | List containing labels for each cells. Length of 150 | this list must match the cell_number. 151 | 152 | symmetric : boolean, default=True 153 | Whether generating a symmetric CCI matrix. 154 | 155 | random_state : int, default=None 156 | Seed for randomization. 157 | 158 | Returns 159 | ------- 160 | cci_matrix : pandas.DataFrame 161 | Matrix with rows and columns as cells. Values 162 | represent a random CCI score between 0 and 1. 163 | ''' 164 | if labels is not None: 165 | assert len(labels) == cell_number, "Lenght of labels must match cell_number" 166 | else: 167 | labels = ['Cell-{}'.format(n) for n in range(1, cell_number+1)] 168 | 169 | if random_state is not None: 170 | np.random.seed(random_state) 171 | cci_scores = np.random.random((cell_number, cell_number)) 172 | if symmetric: 173 | cci_scores = (cci_scores + cci_scores.T) / 2. 174 | cci_matrix = pd.DataFrame(cci_scores, index=labels, columns=labels) 175 | 176 | return cci_matrix 177 | 178 | 179 | def generate_random_metadata(cell_labels, group_number): 180 | '''Randomly assigns groups to cell labels. 181 | 182 | Parameters 183 | ---------- 184 | cell_labels : list 185 | A list of cell labels. 186 | 187 | group_number : int 188 | Number of major groups of cells. 189 | 190 | Returns 191 | ------- 192 | metadata : pandas.DataFrame 193 | DataFrame containing the major groups that each cell 194 | received randomly (under column 'Group'). Cells are 195 | under the column 'Cell'. 196 | ''' 197 | metadata = pd.DataFrame() 198 | metadata['Cell'] = cell_labels 199 | 200 | groups = list(range(1, group_number+1)) 201 | metadata['Group'] = metadata['Cell'].apply(lambda x: np.random.choice(groups, 1)[0]) 202 | return metadata 203 | -------------------------------------------------------------------------------- /cell2cell/datasets/toy_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def generate_toy_rnaseq(): 6 | '''Generates a toy RNA-seq dataset 7 | 8 | Returns 9 | ------- 10 | rnaseq : pandas.DataFrame 11 | DataFrame contianing the toy RNA-seq dataset. Columns 12 | are cells and rows are genes. 13 | ''' 14 | data = np.asarray([[5, 10, 8, 15, 2], 15 | [15, 5, 20, 1, 30], 16 | [18, 12, 5, 40, 20], 17 | [9, 30, 22, 5, 2], 18 | [2, 1, 1, 27, 15], 19 | [30, 11, 16, 5, 12], 20 | ]) 21 | 22 | rnaseq = pd.DataFrame(data, 23 | index=['Protein-A', 'Protein-B', 'Protein-C', 'Protein-D', 'Protein-E', 'Protein-F'], 24 | columns=['C1', 'C2', 'C3', 'C4', 'C5'] 25 | ) 26 | rnaseq.index.name = 'gene_id' 27 | return rnaseq 28 | 29 | 30 | def generate_toy_ppi(prot_complex=False): 31 | '''Generates a toy list of protein-protein interactions. 32 | 33 | Parameters 34 | ---------- 35 | prot_complex : boolean, default=False 36 | Whether including PPIs where interactors could contain 37 | multimeric complexes. 38 | 39 | Returns 40 | ------- 41 | ppi : pandas.DataFrame 42 | Dataframe containing PPIs. Columns are 'A' (first interacting 43 | partners), 'B' (second interacting partners) and 'score' 44 | for weighting each PPI. 45 | ''' 46 | if prot_complex: 47 | data = np.asarray([['Protein-A', 'Protein-B'], 48 | ['Protein-B', 'Protein-C'], 49 | ['Protein-C', 'Protein-A'], 50 | ['Protein-B', 'Protein-B'], 51 | ['Protein-B', 'Protein-A'], 52 | ['Protein-E', 'Protein-F'], 53 | ['Protein-F', 'Protein-F'], 54 | ['Protein-C&Protein-E', 'Protein-F'], 55 | ['Protein-B', 'Protein-E'], 56 | ['Protein-A&Protein-B', 'Protein-F'], 57 | ]) 58 | else: 59 | data = np.asarray([['Protein-A', 'Protein-B'], 60 | ['Protein-B', 'Protein-C'], 61 | ['Protein-C', 'Protein-A'], 62 | ['Protein-B', 'Protein-B'], 63 | ['Protein-B', 'Protein-A'], 64 | ['Protein-E', 'Protein-F'], 65 | ['Protein-F', 'Protein-F'], 66 | ['Protein-C', 'Protein-F'], 67 | ['Protein-B', 'Protein-E'], 68 | ['Protein-A', 'Protein-F'], 69 | ]) 70 | ppi = pd.DataFrame(data, columns=['A', 'B']) 71 | ppi = ppi.assign(score=1.0) 72 | return ppi 73 | 74 | 75 | def generate_toy_metadata(): 76 | '''Generates metadata for cells in the toy RNA-seq dataset. 77 | 78 | Returns 79 | ------- 80 | metadata : pandas.DataFrame 81 | DataFrame with metadata for each cell. Metadata contains the 82 | major groups of those cells. 83 | ''' 84 | data = np.asarray([['C1', 'G1'], 85 | ['C2', 'G2'], 86 | ['C3', 'G3'], 87 | ['C4', 'G3'], 88 | ['C5', 'G1'] 89 | ]) 90 | 91 | metadata = pd.DataFrame(data, columns=['#SampleID', 'Groups']) 92 | return metadata 93 | 94 | 95 | def generate_toy_distance(): 96 | '''Generates a square matrix with cell-cell distance. 97 | 98 | Returns 99 | ------- 100 | distance : pandas.DataFrame 101 | DataFrame with Euclidean-like distance between each 102 | pair of cells in the toy RNA-seq dataset. 103 | ''' 104 | data = np.asarray([[0.0, 10.0, 12.0, 5.0, 3.0], 105 | [10.0, 0.0, 15.0, 8.0, 9.0], 106 | [12.0, 15.0, 0.0, 4.5, 7.5], 107 | [5.0, 8.0, 4.5, 0.0, 6.5], 108 | [3.0, 9.0, 7.5, 6.5, 0.0], 109 | ]) 110 | distance = pd.DataFrame(data, 111 | index=['C1', 'C2', 'C3', 'C4', 'C5'], 112 | columns=['C1', 'C2', 'C3', 'C4', 'C5'] 113 | ) 114 | return distance -------------------------------------------------------------------------------- /cell2cell/external/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.external.pcoa import (pcoa, pcoa_biplot, _check_ordination) 2 | from cell2cell.external.goenrich import (goa, ontology) 3 | from cell2cell.external.gseapy import (load_gmt, generate_lr_geneset, run_gsea) 4 | from cell2cell.external.umap import (run_umap) -------------------------------------------------------------------------------- /cell2cell/external/goenrich.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2017--, goenrich development team. 3 | # 4 | # Distributed under the terms of the MIT licence. 5 | # ---------------------------------------------------------------------------- 6 | 7 | # CODE OBTAINED FROM: https://github.com/jdrudolph/goenrich/ 8 | # COPIED HERE BECAUSE GOENRICH IS NOT AVAILABLE THROUGH CONDA 9 | 10 | import itertools 11 | import networkx as nx 12 | import pandas as pd 13 | 14 | def _tokenize(f): 15 | token = [] 16 | for line in f: 17 | if line == '\n': 18 | yield token 19 | token = [] 20 | else: 21 | token.append(line) 22 | 23 | def _filter_terms(tokens): 24 | for token in tokens: 25 | if token[0] == '[Term]\n': 26 | yield token[1:] 27 | 28 | def _parse_terms(terms): 29 | for term in terms: 30 | obsolete = False 31 | node = {} 32 | parents = [] 33 | for line in term: 34 | if line.startswith('id:'): 35 | id = line[4:-1] 36 | elif line.startswith('name:'): 37 | node['name'] = line[6:-1] 38 | elif line.startswith('namespace:'): 39 | node['namespace'] = line[11:-1] 40 | elif line.startswith('is_a:'): 41 | parents.append(line[6:16]) 42 | elif line.startswith('relationship: part_of'): 43 | parents.append(line[22:32]) 44 | elif line.startswith('is_obsolete'): 45 | obsolete = True 46 | break 47 | if not obsolete: 48 | edges = [(p, id) for p in parents] # will reverse edges later 49 | yield (id, node), edges 50 | else: 51 | continue 52 | 53 | _filename = 'db/go-basic.obo' 54 | 55 | def ontology(file): 56 | """ read ontology from file 57 | :param file: file path of file handle 58 | """ 59 | O = nx.DiGraph() 60 | 61 | if isinstance(file, str): 62 | f = open(file) 63 | we_opened_file = True 64 | else: 65 | f = file 66 | we_opened_file = False 67 | 68 | try: 69 | tokens = _tokenize(f) 70 | terms = _filter_terms(tokens) 71 | entries = _parse_terms(terms) 72 | nodes, edges = zip(*entries) 73 | O.add_nodes_from(nodes) 74 | O.add_edges_from(itertools.chain.from_iterable(edges)) 75 | O.graph['roots'] = {data['name'] : n for n, data in O.nodes.items() 76 | if data['name'] == data['namespace']} 77 | finally: 78 | if we_opened_file: 79 | f.close() 80 | 81 | for root in O.graph['roots'].values(): 82 | for n, depth in nx.shortest_path_length(O, root).items(): 83 | node = O.nodes[n] 84 | node['depth'] = min(depth, node.get('depth', float('inf'))) 85 | return O.reverse() 86 | 87 | 88 | """ 89 | parsers for different go-annotation formats 90 | """ 91 | GENE_ASSOCIATION_COLUMNS = ('db', 'db_object_id', 'db_object_symbol', 92 | 'qualifier', 'go_id', 'db_reference', 93 | 'evidence_code', 'with_from', 'aspect', 94 | 'db_object_name', 'db_object_synonym', 95 | 'db_object_type', 'taxon', 'date', 'assigned_by', 96 | 'annotation_extension', 'gene_product_form_id') 97 | EXPERIMENTAL_EVIDENCE = ('EXP', 'IDA', 'IPI', 'IMP', 'IGI', 'IEP') 98 | 99 | 100 | def goa(filename, experimental=True, **kwds): 101 | """ read go-annotation file 102 | 103 | :param filename: protein or gene identifier column 104 | :param experimental: use only experimentally validated annotations 105 | """ 106 | defaults = {'comment': '!', 107 | 'names': GENE_ASSOCIATION_COLUMNS} 108 | 109 | if experimental and 'usecols' in kwds: 110 | kwds['usecols'] += ('evidence_code',) 111 | 112 | defaults.update(kwds) 113 | result = pd.read_csv(filename, sep='\t', **defaults) 114 | 115 | if experimental: 116 | retain_mask = result.evidence_code.isin(EXPERIMENTAL_EVIDENCE) 117 | result.drop(result.index[~retain_mask], inplace=True) 118 | 119 | return result 120 | 121 | 122 | def sgd(filename, experimental=False, **kwds): 123 | """ read yeast genome database go-annotation file 124 | :param filename: protein or gene identifier column 125 | :param experimental: use only experimentally validated annotations 126 | """ 127 | return goa(filename, experimental, **kwds) 128 | 129 | 130 | GENE2GO_COLUMNS = ('tax_id', 'GeneID', 'GO_ID', 'Evidence', 'Qualifier', 'GO_term', 'PubMed', 'Category') 131 | 132 | 133 | def gene2go(filename, experimental=False, tax_id=9606, **kwds): 134 | """ read go-annotation file 135 | 136 | :param filename: protein or gene identifier column 137 | :param experimental: use only experimentally validated annotations 138 | :param tax_id: filter according to taxon 139 | """ 140 | defaults = {'comment': '#', 141 | 'names': GENE2GO_COLUMNS} 142 | defaults.update(kwds) 143 | result = pd.read_csv(filename, sep='\t', **defaults) 144 | 145 | retain_mask = result.tax_id == tax_id 146 | result.drop(result.index[~retain_mask], inplace=True) 147 | 148 | if experimental: 149 | retain_mask = result.Evidence.isin(EXPERIMENTAL_EVIDENCE) 150 | result.drop(result.index[~retain_mask], inplace=True) 151 | 152 | return result -------------------------------------------------------------------------------- /cell2cell/external/umap.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import umap 3 | 4 | import pandas as pd 5 | import scipy.spatial as sp 6 | 7 | 8 | def run_umap(rnaseq_data, axis=1, metric='euclidean', min_dist=0.4, n_neighbors=8, random_state=None, **kwargs): 9 | '''Runs UMAP on a expression matrix. 10 | Parameters 11 | ---------- 12 | rnaseq_data : pandas.DataFrame 13 | A dataframe of gene expression values wherein the rows are the genes or 14 | embeddings of a dimensionality reduction method and columns the cells, 15 | tissues or samples. 16 | 17 | axis : int, default=0 18 | An axis of the dataframe (0 across rows, 1 across columns). 19 | Across rows means that the UMAP is to compare genes, while 20 | across columns is to compare cells, tissues or samples. 21 | 22 | metric : str, default='euclidean' 23 | The distance metric to use. The distance function can be 'braycurtis', 24 | 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 25 | 'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'kulsinski', 26 | 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 27 | 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'. 28 | 29 | min_dist: float, default=0.4 30 | The effective minimum distance between embedded points. Smaller values 31 | will result in a more clustered/clumped embedding where nearby points 32 | on the manifold are drawn closer together, while larger values will 33 | result on a more even dispersal of points. The value should be set 34 | relative to the ``spread`` value, which determines the scale at which 35 | embedded points will be spread out. 36 | 37 | n_neighbors: float, default=8 38 | The size of local neighborhood (in terms of number of neighboring 39 | sample points) used for manifold approximation. Larger values 40 | result in more global views of the manifold, while smaller 41 | values result in more local data being preserved. In general 42 | values should be in the range 2 to 100. 43 | 44 | random_state : int, default=None 45 | Seed for randomization. 46 | 47 | **kwargs : dict 48 | Extra arguments for UMAP as defined in umap.UMAP. 49 | 50 | Returns 51 | ------- 52 | umap_df : pandas.DataFrame 53 | Dataframe containing the UMAP embeddings for the axis analyzed. 54 | Contains columns 'umap1 and 'umap2'. 55 | ''' 56 | # Organize data 57 | if axis == 0: 58 | df = rnaseq_data 59 | elif axis == 1: 60 | df = rnaseq_data.T 61 | else: 62 | raise ValueError("The parameter axis must be either 0 or 1.") 63 | 64 | # Compute distances 65 | D = sp.distance.pdist(df, metric=metric) 66 | D_sq = sp.distance.squareform(D) 67 | 68 | # Run UMAP 69 | model = umap.UMAP(metric="precomputed", 70 | min_dist=min_dist, 71 | n_neighbors=n_neighbors, 72 | random_state=random_state, 73 | **kwargs 74 | ) 75 | 76 | trans_D = model.fit_transform(D_sq) 77 | 78 | # Organize results 79 | umap_df = pd.DataFrame(trans_D, columns=['umap1', 'umap2'], index=df.index) 80 | return umap_df -------------------------------------------------------------------------------- /cell2cell/io/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from cell2cell.io.directories import (create_directory, get_files_from_directory) 4 | from cell2cell.io.read_data import (load_cutoffs, load_go_annotations, load_go_terms, load_metadata, load_ppi, 5 | load_rnaseq, load_table, load_tables_from_directory, load_variable_with_pickle, 6 | load_tensor, load_tensor_factors) 7 | from cell2cell.io.save_data import (export_variable_with_pickle) 8 | -------------------------------------------------------------------------------- /cell2cell/io/directories.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | 6 | def create_directory(pathname): 7 | '''Creates a directory. 8 | 9 | Uses a path to create a directory. It creates 10 | all intermediate folders before creating the 11 | leaf folder. 12 | 13 | Parameters 14 | ---------- 15 | pathname : str 16 | Full path of the folder to create. 17 | ''' 18 | if not os.path.isdir(pathname): 19 | os.makedirs(pathname) 20 | print("{} was created successfully.".format(pathname)) 21 | else: 22 | print("{} already exists.".format(pathname)) 23 | 24 | 25 | def get_files_from_directory(pathname, dir_in_filepath=False): 26 | '''Obtains a list of filenames in a folder. 27 | 28 | Parameters 29 | ---------- 30 | pathname : str 31 | Full path of the folder to explore. 32 | 33 | dir_in_filepath : boolean, default=False 34 | Whether adding `pathname` to the filenames 35 | 36 | Returns 37 | ------- 38 | filenames : list 39 | A list containing the names (strings) of the files 40 | in the folder. 41 | ''' 42 | directory = os.fsencode(pathname) 43 | filenames = [pathname + '/' + os.fsdecode(file) if dir_in_filepath else os.fsdecode(file) for file in os.listdir(directory)] 44 | return filenames 45 | -------------------------------------------------------------------------------- /cell2cell/io/save_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import pickle 6 | 7 | 8 | def export_variable_with_pickle(variable, filename): 9 | '''Exports a large size variable in a python readable way 10 | using pickle. 11 | 12 | Parameters 13 | ---------- 14 | variable : a python variable 15 | Variable to export 16 | 17 | filename : str 18 | Complete path to the file wherein the variable will be 19 | stored. For example: 20 | /home/user/variable.pkl 21 | ''' 22 | 23 | max_bytes = 2 ** 31 - 1 24 | 25 | bytes_out = pickle.dumps(variable) 26 | with open(filename, 'wb') as f_out: 27 | for idx in range(0, len(bytes_out), max_bytes): 28 | f_out.write(bytes_out[idx:idx + max_bytes]) 29 | print(filename, ' was correctly saved.') -------------------------------------------------------------------------------- /cell2cell/plotting/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.plotting.aesthetics import (get_colors_from_labels, map_colors_to_metadata, generate_legend) 2 | from cell2cell.plotting.ccc_plot import (clustermap_ccc) 3 | from cell2cell.plotting.cci_plot import (clustermap_cci) 4 | from cell2cell.plotting.circular_plot import (circos_plot) 5 | from cell2cell.plotting.pval_plot import (dot_plot, generate_dot_plot) 6 | from cell2cell.plotting.factor_plot import (context_boxplot, loading_clustermap, ccc_networks_plot) 7 | from cell2cell.plotting.pcoa_plot import (pcoa_3dplot) 8 | from cell2cell.plotting.tensor_plot import (tensor_factors_plot, tensor_factors_plot_from_loadings) 9 | from cell2cell.plotting.umap_plot import (umap_biplot) -------------------------------------------------------------------------------- /cell2cell/plotting/aesthetics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from matplotlib import pyplot as plt 4 | from matplotlib.colors import Normalize 5 | import matplotlib.cm as cm 6 | import matplotlib.patches as patches 7 | import numpy as np 8 | 9 | 10 | def get_colors_from_labels(labels, cmap='gist_rainbow', factor=1): 11 | '''Generates colors for each label in a list given a colormap 12 | 13 | Parameters 14 | ---------- 15 | labels : list 16 | A list of labels to assign a color. 17 | 18 | cmap : str, default='gist_rainbow' 19 | A matplotlib color palette name. 20 | 21 | factor : int, default=1 22 | Factor to amplify the separation of colors. 23 | 24 | Returns 25 | ------- 26 | colors : dict 27 | A dictionary where the keys are the labels and the values 28 | correspond to the assigned colors. 29 | ''' 30 | assert factor >= 1 31 | 32 | colors = dict.fromkeys(labels, ()) 33 | 34 | factor = int(factor) 35 | cm_ = plt.get_cmap(cmap) 36 | 37 | is_number = all((isinstance(e, float) or isinstance(e, int)) for e in labels) 38 | 39 | if not is_number: 40 | NUM_COLORS = factor * len(colors) 41 | for i, label in enumerate(colors.keys()): 42 | colors[label] = cm_((1 + ((factor-1)/factor)) * i / NUM_COLORS) 43 | else: 44 | max_ = np.nanmax(labels) 45 | min_ = np.nanmin(labels) 46 | norm = Normalize(vmin=-min_, vmax=max_) 47 | 48 | m = cm.ScalarMappable(norm=norm, cmap=cmap) 49 | for label in colors.keys(): 50 | colors[label] = m.to_rgba(label) 51 | return colors 52 | 53 | 54 | def map_colors_to_metadata(metadata, ref_df=None, colors=None, sample_col='#SampleID', group_col='Groups', 55 | cmap='gist_rainbow'): 56 | '''Assigns a color to elements in a dataframe containing metadata. 57 | 58 | Parameters 59 | ---------- 60 | metadata : pandas.DataFrame 61 | A dataframe with metadata for specific elements. 62 | 63 | ref_df : pandas.DataFrame 64 | A dataframe whose columns contains a subset of 65 | elements in the metadata. 66 | 67 | colors : dict, default=None 68 | Dictionary containing tuples in the RGBA format for indicating colors 69 | of major groups of cells. If colors is specified, cmap will be 70 | ignored. 71 | 72 | sample_col : str, default='#SampleID' 73 | Column in the metadata for elements to color. 74 | 75 | group_col : str, default='Groups' 76 | Column in the metadata containing the major groups of the elements 77 | to color. 78 | 79 | cmap : str, default='gist_rainbow' 80 | Name of the color palette for coloring the major groups of elements. 81 | 82 | Returns 83 | ------- 84 | new_colors : pandas.DataFrame 85 | A pandas dataframe where the index is the list of elements in the 86 | sample_col and the column group_col contains the colors assigned 87 | to each element given their groups. 88 | ''' 89 | if ref_df is not None: 90 | meta_ = metadata.set_index(sample_col).reindex(ref_df.columns) 91 | else: 92 | meta_ = metadata.set_index(sample_col) 93 | labels = meta_[group_col].unique().tolist() 94 | if colors is None: 95 | colors = get_colors_from_labels(labels, cmap=cmap) 96 | else: 97 | upd_dict = dict([(v, (1., 1., 1., 1.)) for v in labels if v not in colors.keys()]) 98 | colors.update(upd_dict) 99 | 100 | new_colors = meta_[group_col].map(colors) 101 | new_colors.index = meta_.index 102 | new_colors.name = group_col.capitalize() 103 | 104 | return new_colors 105 | 106 | 107 | def generate_legend(color_dict, loc='center left', bbox_to_anchor=(1.01, 0.5), ncol=1, fancybox=True, shadow=True, 108 | title='Legend', fontsize=14, sorted_labels=True, ax=None): 109 | '''Adds a legend to a previous plot or displays an independent legend 110 | given specific colors for labels. 111 | 112 | Parameters 113 | ---------- 114 | color_dict : dict 115 | Dictionary containing tuples in the RGBA format for indicating colors 116 | of major groups of cells. Keys are the labels and values are the RGBA 117 | tuples. 118 | 119 | loc : str, default='center left' 120 | Alignment of the legend given the location specieid in bbox_to_anchor. 121 | 122 | bbox_to_anchor : tuple, default=(1.01, 0.5) 123 | Location of the legend in a (X, Y) format. For example, if you want 124 | your axes legend located at the figure's top right-hand corner instead 125 | of the axes' corner, simply specify the corner's location and the 126 | coordinate system of that location, which in this case would be (1, 1). 127 | 128 | ncol : int, default=1 129 | Number of columns to display the legend. 130 | 131 | fancybox : boolean, default=True 132 | Whether round edges should be enabled around the FancyBboxPatch which 133 | makes up the legend's background. 134 | 135 | shadow : boolean, default=True 136 | Whether to draw a shadow behind the legend. 137 | 138 | title : str, default='Legend' 139 | Title of the legend box 140 | 141 | fontsize : int, default=14 142 | Size of the text in the legends. 143 | 144 | sorted_labels : boolean, default=True 145 | Whether alphabetically sorting the labels. 146 | 147 | fig : matplotlib.figure.Figure, default=None 148 | Figure object to add a legend. If fig=None and ax=None, a new empty 149 | figure will be generated. 150 | 151 | ax : matplotlib.axes.Axes, default=None 152 | Axes instance for a plot. 153 | 154 | Returns 155 | ------- 156 | legend1 : matplotlib.legend.Legend 157 | A legend object in a figure. 158 | ''' 159 | color_patches = [] 160 | if sorted_labels: 161 | iteritems = sorted(color_dict.items()) 162 | else: 163 | iteritems = color_dict.items() 164 | for k, v in iteritems: 165 | color_patches.append(patches.Patch(color=v, label=str(k).replace('_', ' '))) 166 | 167 | if ax is None: 168 | legend1 = plt.legend(handles=color_patches, 169 | loc=loc, 170 | bbox_to_anchor=bbox_to_anchor, 171 | ncol=ncol, 172 | fancybox=fancybox, 173 | shadow=shadow, 174 | title=title, 175 | title_fontsize=fontsize, 176 | fontsize=fontsize) 177 | else: 178 | legend1 = ax.legend(handles=color_patches, 179 | loc=loc, 180 | bbox_to_anchor=bbox_to_anchor, 181 | ncol=ncol, 182 | fancybox=fancybox, 183 | shadow=shadow, 184 | title=title, 185 | title_fontsize=fontsize, 186 | fontsize=fontsize) 187 | return legend1 -------------------------------------------------------------------------------- /cell2cell/plotting/pcoa_plot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from matplotlib import pyplot as plt 6 | from mpl_toolkits.mplot3d import Axes3D 7 | 8 | from cell2cell.external import pcoa, _check_ordination 9 | from cell2cell.plotting.aesthetics import get_colors_from_labels 10 | 11 | 12 | def pcoa_3dplot(interaction_space, metadata=None, sample_col='#SampleID', group_col='Groups', pcoa_method='eigh', 13 | meta_cmap='gist_rainbow', colors=None, excluded_cells=None, title='', axis_fontsize=14, legend_fontsize=12, 14 | figsize=(6, 5), view_angles=(30, 135), filename=None): 15 | '''Projects the cells into an Euclidean space (PCoA) given their distances 16 | based on their CCI scores. Then, plots each cell by their first three 17 | coordinates in a 3D scatter plot. 18 | 19 | Parameters 20 | ---------- 21 | interaction_space : cell2cell.core.interaction_space.InteractionSpace 22 | Interaction space that contains all a distance matrix after running the 23 | the method compute_pairwise_cci_scores. Alternatively, this object 24 | can be a numpy-array or a pandas DataFrame. Also, a 25 | SingleCellInteractions or a BulkInteractions object after running 26 | the method compute_pairwise_cci_scores. 27 | 28 | metadata : pandas.Dataframe, default=None 29 | Metadata associated with the cells, cell types or samples in the 30 | matrix containing CCC scores. If None, cells will not be colored 31 | by major groups. 32 | 33 | sample_col : str, default='#SampleID' 34 | Column in the metadata for the cells, cell types or samples 35 | in the matrix containing CCI scores. 36 | 37 | group_col : str, default='Groups' 38 | Column in the metadata containing the major groups of cells, cell types 39 | or samples in the matrix with CCI scores. 40 | 41 | pcoa_method : str, default='eigh' 42 | Eigendecomposition method to use in performing PCoA. 43 | By default, uses SciPy's `eigh`, which computes exact 44 | eigenvectors and eigenvalues for all dimensions. The alternate 45 | method, `fsvd`, uses faster heuristic eigendecomposition but loses 46 | accuracy. The magnitude of accuracy lost is dependent on dataset. 47 | 48 | meta_cmap : str, default='gist_rainbow' 49 | Name of the color palette for coloring the major groups of cells. 50 | 51 | colors : dict, default=None 52 | Dictionary containing tuples in the RGBA format for indicating colors 53 | of major groups of cells. If colors is specified, meta_cmap will be 54 | ignored. 55 | 56 | excluded_cells : list, default=None 57 | List containing cell names that are present in the interaction_space 58 | object but that will be excluded from this plot. 59 | 60 | title : str, default='' 61 | Title of the PCoA 3D plot. 62 | 63 | axis_fontsize : int, default=14 64 | Size of the font for the labels of each axis (X, Y and Z). 65 | 66 | legend_fontsize : int, default=12 67 | Size of the font for labels in the legend. 68 | 69 | figsize : tuple, default=(6, 5) 70 | Size of the figure (width*height), each in inches. 71 | 72 | view_angles : tuple, default=(30, 135) 73 | Rotation angles of the plot. Set the elevation and 74 | azimuth of the axes. 75 | 76 | filename : str, default=None 77 | Path to save the figure of the elbow analysis. If None, the figure is not 78 | saved. 79 | 80 | Returns 81 | ------- 82 | results : dict 83 | Dictionary that contains: 84 | 85 | - 'fig' : matplotlib.figure.Figure, containing the whole figure 86 | - 'axes' : matplotlib.axes.Axes, containing the axes of the 3D plot 87 | - 'ordination' : Ordination or projection obtained from the PCoA 88 | - 'distance_matrix' : Distance matrix used to perform the PCoA (usually in 89 | interaction_space.distance_matrix 90 | ''' 91 | if hasattr(interaction_space, 'distance_matrix'): 92 | print('Interaction space detected as an InteractionSpace class') 93 | distance_matrix = interaction_space.distance_matrix 94 | elif (type(interaction_space) is np.ndarray) or (type(interaction_space) is pd.core.frame.DataFrame): 95 | print('Interaction space detected as a distance matrix') 96 | distance_matrix = interaction_space 97 | elif hasattr(interaction_space, 'interaction_space'): 98 | print('Interaction space detected as a Interactions class') 99 | if not hasattr(interaction_space.interaction_space, 'distance_matrix'): 100 | raise ValueError('First run the method compute_pairwise_interactions() in your interaction' + \ 101 | ' object to generate a distance matrix.') 102 | else: 103 | distance_matrix = interaction_space.interaction_space.distance_matrix 104 | else: 105 | raise ValueError('First run the method compute_pairwise_interactions() in your interaction' + \ 106 | ' object to generate a distance matrix.') 107 | 108 | # Drop excluded cells 109 | if excluded_cells is not None: 110 | df = distance_matrix.loc[~distance_matrix.index.isin(excluded_cells), 111 | ~distance_matrix.columns.isin(excluded_cells)] 112 | else: 113 | df = distance_matrix 114 | 115 | # PCoA 116 | ordination = pcoa(df, method=pcoa_method) 117 | ordination = _check_ordination(ordination) 118 | ordination['samples'].index = df.index 119 | 120 | # Biplot 121 | fig = plt.figure(figsize=figsize) 122 | ax = fig.add_subplot(111, projection='3d') 123 | #ax = Axes3D(fig) # Not displayed in newer versions 124 | 125 | if metadata is None: 126 | metadata = pd.DataFrame() 127 | metadata[sample_col] = list(distance_matrix.columns) 128 | metadata[group_col] = list(distance_matrix.columns) 129 | 130 | meta_ = metadata.set_index(sample_col) 131 | if excluded_cells is not None: 132 | meta_ = meta_.loc[~meta_.index.isin(excluded_cells)] 133 | labels = meta_[group_col].values.tolist() 134 | 135 | if colors is None: 136 | colors = get_colors_from_labels(labels, cmap=meta_cmap) 137 | else: 138 | assert all(elem in colors.keys() for elem in set(labels)) 139 | 140 | # Plot each data point with respective color 141 | for i, cell_type in enumerate(sorted(meta_[group_col].unique())): 142 | cells = list(meta_.loc[meta_[group_col] == cell_type].index) 143 | if colors is not None: 144 | ax.scatter(ordination['samples'].loc[cells, 'PC1'], 145 | ordination['samples'].loc[cells, 'PC2'], 146 | ordination['samples'].loc[cells, 'PC3'], 147 | color=colors[cell_type], 148 | s=50, 149 | edgecolors='k', 150 | label=cell_type) 151 | else: 152 | ax.scatter(ordination['samples'].loc[cells, 'PC1'], 153 | ordination['samples'].loc[cells, 'PC2'], 154 | ordination['samples'].loc[cells, 'PC3'], 155 | s=50, 156 | edgecolors='k', 157 | label=cell_type) 158 | 159 | # Plot texts 160 | ax.set_xlabel('PC1 ({}%)'.format(np.round(ordination['proportion_explained']['PC1'] * 100), 2), fontsize=axis_fontsize) 161 | ax.set_ylabel('PC2 ({}%)'.format(np.round(ordination['proportion_explained']['PC2'] * 100), 2), fontsize=axis_fontsize) 162 | ax.set_zlabel('PC3 ({}%)'.format(np.round(ordination['proportion_explained']['PC3'] * 100), 2), fontsize=axis_fontsize) 163 | 164 | ax.set_xticklabels([]) 165 | ax.set_yticklabels([]) 166 | ax.set_zticklabels([]) 167 | 168 | ax.view_init(view_angles[0], view_angles[1]) 169 | plt.legend(loc='center left', bbox_to_anchor=(1.35, 0.5), 170 | ncol=2, fancybox=True, shadow=True, fontsize=legend_fontsize) 171 | plt.title(title, fontsize=16) 172 | 173 | #distskbio = skbio.DistanceMatrix(df, ids=df.index) # Not using skbio for now 174 | 175 | # Save plot 176 | if filename is not None: 177 | plt.savefig(filename, dpi=300, 178 | bbox_inches='tight') 179 | 180 | results = {'fig' : fig, 'axes' : ax, 'ordination' : ordination, 'distance_matrix' : df} # df used to be distskbio 181 | return results -------------------------------------------------------------------------------- /cell2cell/plotting/umap_plot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def umap_biplot(umap_df, figsize=(8 ,8), ax=None, show_axes=True, show_legend=True, hue=None, 7 | cmap='tab10', fontsize=20, filename=None): 8 | '''Plots a UMAP biplot for the UMAP embeddings. 9 | 10 | Parameters 11 | ---------- 12 | umap_df : pandas.DataFrame 13 | Dataframe containing the UMAP embeddings for the axis analyzed. 14 | It must contain columns 'umap1 and 'umap2'. If a hue column is 15 | provided in the parameter 'hue', that column must be provided 16 | in this dataframe. 17 | 18 | figsize : tuple, default=(8, 8) 19 | Size of the figure (width*height), each in inches. 20 | 21 | ax : matplotlib.axes.Axes, default=None 22 | The matplotlib axes containing a plot. 23 | 24 | show_axes : boolean, default=True 25 | Whether showing lines, ticks and ticklabels of both axes. 26 | 27 | show_legend : boolean, default=True 28 | Whether including the legend when a hue is provided. 29 | 30 | hue : vector or key in 'umap_df' 31 | Grouping variable that will produce points with different colors. 32 | Can be either categorical or numeric, although color mapping will 33 | behave differently in latter case. 34 | 35 | cmap : str, default='tab10' 36 | Name of the color palette for coloring elements with UMAP embeddings. 37 | 38 | fontsize : int, default=20 39 | Fontsize of the axis labels (UMAP1 and UMAP2). 40 | 41 | filename : str, default=None 42 | Path to save the figure of the elbow analysis. If None, the figure is not 43 | saved. 44 | 45 | Returns 46 | ------- 47 | fig : matplotlib.figure.Figure 48 | A matplotlib Figure instance. 49 | 50 | ax : matplotlib.axes.Axes 51 | The matplotlib axes containing the plot. 52 | ''' 53 | 54 | if ax is None: 55 | fig = plt.figure(figsize=figsize) 56 | 57 | ax = sns.scatterplot(x='umap1', 58 | y='umap2', 59 | data=umap_df, 60 | hue=hue, 61 | palette=cmap, 62 | ax=ax 63 | ) 64 | 65 | if show_axes: 66 | sns.despine(ax=ax, 67 | offset=15 68 | ) 69 | 70 | ax.tick_params(axis='both', 71 | which='both', 72 | colors='black', 73 | width=2, 74 | length=5 75 | ) 76 | else: 77 | ax.set_xticks([]) 78 | ax.set_yticks([]) 79 | for key, spine in ax.spines.items(): 80 | spine.set_visible(False) 81 | 82 | 83 | for tick in ax.get_xticklabels(): 84 | tick.set_fontproperties('arial') 85 | tick.set_weight("bold") 86 | tick.set_color("black") 87 | tick.set_fontsize(int(0.7*fontsize)) 88 | for tick in ax.get_yticklabels(): 89 | tick.set_fontproperties('arial') 90 | tick.set_weight("bold") 91 | tick.set_color("black") 92 | tick.set_fontsize(int(0.7*fontsize)) 93 | 94 | ax.set_xlabel('UMAP 1', fontsize=fontsize) 95 | ax.set_ylabel('UMAP 2', fontsize=fontsize) 96 | 97 | if (show_legend) & (hue is not None): 98 | # Put the legend out of the figure 99 | legend = ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) 100 | legend.set_title(hue) 101 | legend.get_title().set_fontsize(int(0.7*fontsize)) 102 | 103 | for text in legend.get_texts(): 104 | text.set_fontsize(int(0.7*fontsize)) 105 | 106 | if filename is not None: 107 | plt.savefig(filename, dpi=300, bbox_inches='tight') 108 | 109 | if ax is None: 110 | return fig, ax 111 | else: 112 | return ax -------------------------------------------------------------------------------- /cell2cell/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.preprocessing.cutoffs import (get_constant_cutoff, get_cutoffs, get_global_percentile_cutoffs, 2 | get_local_percentile_cutoffs) 3 | from cell2cell.preprocessing.find_elements import (find_duplicates, get_element_abundances, get_elements_over_fraction) 4 | from cell2cell.preprocessing.gene_ontology import (find_all_children_of_go_term, find_go_terms_from_keyword, 5 | get_genes_from_go_hierarchy, get_genes_from_go_terms) 6 | from cell2cell.preprocessing.integrate_data import (get_thresholded_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms, 7 | get_ppi_dict_from_proteins, get_weighted_ppi) 8 | from cell2cell.preprocessing.manipulate_dataframes import (check_presence_in_dataframe, shuffle_cols_in_df, shuffle_rows_in_df, 9 | shuffle_dataframe, subsample_dataframe) 10 | from cell2cell.preprocessing.ppi import (bidirectional_ppi_for_cci, filter_ppi_by_proteins, filter_ppi_network, 11 | get_all_to_all_ppi, get_filtered_ppi_network, get_one_group_to_other_ppi, 12 | remove_ppi_bidirectionality, simplify_ppi, filter_complex_ppi_by_proteins, 13 | get_genes_from_complexes, preprocess_ppi_data) 14 | from cell2cell.preprocessing.rnaseq import (divide_expression_by_max, divide_expression_by_mean, drop_empty_genes, 15 | log10_transformation, scale_expression_by_sum, add_complexes_to_expression, 16 | aggregate_single_cells) 17 | 18 | from cell2cell.preprocessing.signal import (smooth_curve) -------------------------------------------------------------------------------- /cell2cell/preprocessing/cutoffs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | from cell2cell.io import read_data 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | 11 | def get_local_percentile_cutoffs(rnaseq_data, percentile=0.75): 12 | ''' 13 | Obtains a local value associated with a given percentile across 14 | cells/tissues/samples for each gene in a rnaseq_data. 15 | 16 | Parameters 17 | ---------- 18 | rnaseq_data : pandas.DataFrame 19 | Gene expression data for a bulk RNA-seq experiment or a single-cell 20 | experiment after aggregation into cell types. Columns are 21 | cell-types/tissues/samples and rows are genes. 22 | 23 | percentile : float, default=0.75 24 | This is the percentile to be computed. 25 | 26 | Returns 27 | ------- 28 | cutoffs : pandas.DataFrame 29 | A dataframe containing the value corresponding to the percentile 30 | across the genes. Rows are genes and the column corresponds to 31 | 'value'. 32 | ''' 33 | cutoffs = rnaseq_data.quantile(percentile, axis=1).to_frame() 34 | cutoffs.columns = ['value'] 35 | return cutoffs 36 | 37 | 38 | def get_global_percentile_cutoffs(rnaseq_data, percentile=0.75): 39 | ''' 40 | Obtains a global value associated with a given percentile across 41 | cells/tissues/samples and genes in a rnaseq_data. 42 | 43 | Parameters 44 | ---------- 45 | rnaseq_data : pandas.DataFrame 46 | Gene expression data for a bulk RNA-seq experiment or a single-cell 47 | experiment after aggregation into cell types. Columns are 48 | cell-types/tissues/samples and rows are genes. 49 | 50 | percentile : float, default=0.75 51 | This is the percentile to be computed. 52 | 53 | Returns 54 | ------- 55 | cutoffs : pandas.DataFrame 56 | A dataframe containing the value corresponding to the percentile 57 | across the dataset. Rows are genes and the column corresponds to 58 | 'value'. All values here are the same global percentile. 59 | ''' 60 | cutoffs = pd.DataFrame(index=rnaseq_data.index, columns=['value']) 61 | cutoffs['value'] = np.quantile(rnaseq_data.values, percentile) 62 | return cutoffs 63 | 64 | 65 | def get_constant_cutoff(rnaseq_data, constant_cutoff=10): 66 | ''' 67 | Generates a cutoff/threshold dataframe for all genes 68 | in rnaseq_data assigning a constant value as the cutoff. 69 | 70 | Parameters 71 | ---------- 72 | rnaseq_data : pandas.DataFrame 73 | Gene expression data for a bulk RNA-seq experiment or a single-cell 74 | experiment after aggregation into cell types. Columns are 75 | cell-types/tissues/samples and rows are genes. 76 | 77 | constant_cutoff : float, default=10 78 | Cutoff or threshold assigned to each gene. 79 | 80 | Returns 81 | ------- 82 | cutoffs : pandas.DataFrame 83 | A dataframe containing the value corresponding to cutoff or threshold 84 | assigned to each gene. Rows are genes and the column corresponds to 85 | 'value'. All values are the same and corresponds to the 86 | constant_cutoff. 87 | ''' 88 | cutoffs = pd.DataFrame(index=rnaseq_data.index) 89 | cutoffs['value'] = constant_cutoff 90 | return cutoffs 91 | 92 | 93 | def get_cutoffs(rnaseq_data, parameters, verbose=True): 94 | ''' 95 | This function creates cutoff/threshold values for genes 96 | in rnaseq_data and the respective cells/tissues/samples 97 | by a given method or parameter. 98 | 99 | Parameters 100 | ---------- 101 | rnaseq_data : pandas.DataFrame 102 | Gene expression data for a bulk RNA-seq experiment or a single-cell 103 | experiment after aggregation into cell types. Columns are 104 | cell-types/tissues/samples and rows are genes. 105 | 106 | parameters : dict 107 | This dictionary must contain a 'parameter' key and a 'type' key. 108 | The first one is the respective parameter to compute the threshold 109 | or cutoff values. The type corresponds to the approach to 110 | compute the values according to the parameter employed. 111 | Options of 'type' that can be used: 112 | 113 | - 'local_percentile' : computes the value of a given percentile, 114 | for each gene independently. In this case, 115 | the parameter corresponds to the percentile 116 | to compute, as a float value between 0 and 1. 117 | - 'global_percentile' : computes the value of a given percentile 118 | from all genes and samples simultaneously. 119 | In this case, the parameter corresponds to 120 | the percentile to compute, as a float value 121 | between 0 and 1. All genes have the same cutoff. 122 | - 'file' : load a cutoff table from a file. Parameter in this case is 123 | the path of that file. It must contain the same genes as 124 | index and same samples as columns. 125 | - 'multi_col_matrix' : a dataframe must be provided, containing a 126 | cutoff for each gene in each sample. This allows 127 | to use specific cutoffs for each sample. The 128 | columns here must be the same as the ones in the 129 | rnaseq_data. 130 | - 'single_col_matrix' : a dataframe must be provided, containing a 131 | cutoff for each gene in only one column. These 132 | cutoffs will be applied to all samples. 133 | - 'constant_value' : binarizes the expression. Evaluates whether 134 | expression is greater than the value input in 135 | the 'parameter'. 136 | 137 | verbose : boolean, default=True 138 | Whether printing or not steps of the analysis. 139 | 140 | Returns 141 | ------- 142 | cutoffs : pandas.DataFrame 143 | Dataframe wherein rows are genes in rnaseq_data. Depending on the type in 144 | the parameters dictionary, it may have only one column ('value') or the 145 | same columns that rnaseq_data has, generating specfic cutoffs for each 146 | cell/tissue/sample. 147 | ''' 148 | parameter = parameters['parameter'] 149 | type = parameters['type'] 150 | if verbose: 151 | print("Calculating cutoffs for gene abundances") 152 | if type == 'local_percentile': 153 | cutoffs = get_local_percentile_cutoffs(rnaseq_data, parameter) 154 | cutoffs.columns = ['value'] 155 | elif type == 'global_percentile': 156 | cutoffs = get_global_percentile_cutoffs(rnaseq_data, parameter) 157 | cutoffs.columns = ['value'] 158 | elif type == 'constant_value': 159 | cutoffs = get_constant_cutoff(rnaseq_data, parameter) 160 | cutoffs.columns = ['value'] 161 | elif type == 'file': 162 | cutoffs = read_data.load_cutoffs(parameter, 163 | format='auto') 164 | cutoffs = cutoffs.loc[rnaseq_data.index] 165 | elif type == 'multi_col_matrix': 166 | cutoffs = parameter 167 | cutoffs = cutoffs.loc[rnaseq_data.index] 168 | cutoffs = cutoffs[rnaseq_data.columns] 169 | elif type == 'single_col_matrix': 170 | cutoffs = parameter 171 | cutoffs.columns = ['value'] 172 | cutoffs = cutoffs.loc[rnaseq_data.index] 173 | else: 174 | raise ValueError(type + ' is not a valid cutoff') 175 | return cutoffs -------------------------------------------------------------------------------- /cell2cell/preprocessing/find_elements.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import itertools 6 | from collections import defaultdict, Counter 7 | 8 | def find_duplicates(element_list): 9 | '''Function based on: https://stackoverflow.com/a/5419576/12032899 10 | Finds duplicate items and list their index location. 11 | 12 | Parameters 13 | ---------- 14 | element_list : list 15 | List of elements 16 | 17 | Returns 18 | ------- 19 | duplicate_dict : dict 20 | Dictionary with duplicate items. Keys are the items, and values 21 | are lists with the respective indexes where they are. 22 | ''' 23 | tally = defaultdict(list) 24 | for i,item in enumerate(element_list): 25 | tally[item].append(i) 26 | 27 | duplicate_dict = {key : locs for key,locs in tally.items() 28 | if len(locs)>1} 29 | return duplicate_dict 30 | 31 | 32 | def get_element_abundances(element_lists): 33 | '''Computes the fraction of occurrence of each element 34 | in a list of lists. 35 | 36 | Parameters 37 | ---------- 38 | element_lists : list 39 | List of lists of elements. Elements will be 40 | counted only once in each of the lists. 41 | 42 | Returns 43 | ------- 44 | abundance_dict : dict 45 | Dictionary containing the number of times that an 46 | element was present, divided by the total number of 47 | lists in `element_lists`. 48 | ''' 49 | abundance_dict = Counter(itertools.chain(*map(set, element_lists))) 50 | total = len(element_lists) 51 | abundance_dict = {k : v/total for k, v in abundance_dict.items()} 52 | return abundance_dict 53 | 54 | 55 | def get_elements_over_fraction(abundance_dict, fraction): 56 | '''Obtains a list of elements with the 57 | fraction of occurrence at least the threshold. 58 | 59 | Parameters 60 | ---------- 61 | abundance_dict : dict 62 | Dictionary containing the number of times that an 63 | element was present, divided by the total number of 64 | possible occurrences. 65 | 66 | fraction : float 67 | Threshold to filter the elements. Elements with at least 68 | this threshold will be included. 69 | 70 | Returns 71 | ------- 72 | elements : list 73 | List of elements that met the fraction criteria. 74 | ''' 75 | elements = [k for k, v in abundance_dict.items() if v >= fraction] 76 | return elements -------------------------------------------------------------------------------- /cell2cell/preprocessing/gene_ontology.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import numpy as np 6 | import networkx 7 | 8 | 9 | def get_genes_from_go_terms(go_annotations, go_filter, go_header='GO', gene_header='Gene', verbose=True): 10 | ''' 11 | Finds genes associated with specific GO-terms. 12 | 13 | Parameters 14 | ---------- 15 | go_annotations : pandas.DataFrame 16 | Dataframe containing information about GO term annotations of each 17 | gene for a given organism according to the ga file. Can be loading 18 | with the function cell2cell.io.read_data.load_go_annotations(). 19 | 20 | go_filter : list 21 | List containing one or more GO-terms to find associated genes. 22 | 23 | go_header : str, default='GO' 24 | Column name wherein GO terms are located in the dataframe. 25 | 26 | gene_header : str, default='Gene' 27 | Column name wherein genes are located in the dataframe. 28 | 29 | verbose : boolean, default=True 30 | Whether printing or not steps of the analysis. 31 | 32 | Returns 33 | ------- 34 | genes : list 35 | List of genes that are associated with GO-terms contained in 36 | go_filter. 37 | ''' 38 | if verbose: 39 | print('Filtering genes by using GO terms') 40 | genes = list(go_annotations.loc[go_annotations[go_header].isin(go_filter)][gene_header].unique()) 41 | return genes 42 | 43 | 44 | def get_genes_from_go_hierarchy(go_annotations, go_terms, go_filter, go_header='GO', gene_header='Gene', verbose=False): 45 | ''' 46 | Obtains genes associated with specific GO terms and their 47 | children GO terms (below in the hierarchy). 48 | 49 | Parameters 50 | ---------- 51 | go_annotations : pandas.DataFrame 52 | Dataframe containing information about GO term annotations of each 53 | gene for a given organism according to the ga file. Can be loading 54 | with the function cell2cell.io.read_data.load_go_annotations(). 55 | 56 | go_terms : networkx.Graph 57 | NetworkX Graph containing GO terms datasets from .obo file. 58 | It could be loaded using 59 | cell2cell.io.read_data.load_go_terms(filename). 60 | 61 | go_filter : list 62 | List containing one or more GO-terms to find associated genes. 63 | 64 | go_header : str, default='GO' 65 | Column name wherein GO terms are located in the dataframe. 66 | 67 | gene_header : str, default='Gene' 68 | Column name wherein genes are located in the dataframe. 69 | 70 | verbose : boolean, default=False 71 | Whether printing or not steps of the analysis. 72 | 73 | Returns 74 | ------- 75 | genes : list 76 | List of genes that are associated with GO-terms contained in 77 | go_filter, and related to the children GO terms of those terms. 78 | ''' 79 | go_hierarchy = go_filter.copy() 80 | iter = len(go_hierarchy) 81 | for i in range(iter): 82 | find_all_children_of_go_term(go_terms, go_hierarchy[i], go_hierarchy, verbose=verbose) 83 | go_hierarchy = list(set(go_hierarchy)) 84 | genes = get_genes_from_go_terms(go_annotations=go_annotations, 85 | go_filter=go_hierarchy, 86 | go_header=go_header, 87 | gene_header=gene_header, 88 | verbose=verbose) 89 | return genes 90 | 91 | 92 | def find_all_children_of_go_term(go_terms, go_term_name, output_list, verbose=True): 93 | ''' 94 | Finds all children GO terms (below in hierarchy) of 95 | a given GO term. 96 | 97 | Parameters 98 | ---------- 99 | go_terms : networkx.Graph 100 | NetworkX Graph containing GO terms datasets from .obo file. 101 | It could be loaded using 102 | cell2cell.io.read_data.load_go_terms(filename). 103 | 104 | go_term_name : str 105 | Specific GO term to find their children. For example: 106 | 'GO:0007155'. 107 | 108 | output_list : list 109 | List used to perform a Depth First Search and find the 110 | children in a recursive way. Here the children will be 111 | automatically written. 112 | 113 | verbose : boolean, default=True 114 | Whether printing or not steps of the analysis. 115 | ''' 116 | for child in networkx.ancestors(go_terms, go_term_name): 117 | if child not in output_list: 118 | if verbose: 119 | print('Retrieving children for ' + go_term_name) 120 | output_list.append(child) 121 | find_all_children_of_go_term(go_terms, child, output_list, verbose) 122 | 123 | 124 | def find_go_terms_from_keyword(go_terms, keyword, verbose=False): 125 | ''' 126 | Uses a keyword to find related GO terms. 127 | 128 | Parameters 129 | ---------- 130 | go_terms : networkx.Graph 131 | NetworkX Graph containing GO terms datasets from .obo file. 132 | It could be loaded using 133 | cell2cell.io.read_data.load_go_terms(filename). 134 | 135 | keyword : str 136 | Keyword to be included in the names of retrieved GO terms. 137 | 138 | verbose : boolean, default=False 139 | Whether printing or not steps of the analysis. 140 | 141 | Returns 142 | ------- 143 | go_filter : list 144 | List containing all GO terms related to a keyword. 145 | ''' 146 | go_filter = [] 147 | for go, node in go_terms.nodes.items(): 148 | if keyword in node['name']: 149 | go_filter.append(go) 150 | if verbose: 151 | print(go, node['name']) 152 | return go_filter -------------------------------------------------------------------------------- /cell2cell/preprocessing/manipulate_dataframes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import random 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def check_presence_in_dataframe(df, elements, columns=None): 11 | ''' 12 | Searches for elements in a dataframe and returns those 13 | that are present in the dataframe. 14 | 15 | Parameters 16 | ---------- 17 | df : pandas.DataFrame 18 | A dataframe 19 | 20 | elements : list 21 | List of elements to find in the dataframe. They 22 | must be a data type contained in the dataframe. 23 | 24 | columns : list, default=None 25 | Names of columns to consider in the search. If 26 | None, all columns are used. 27 | 28 | Returns 29 | ------- 30 | found_elements : list 31 | List of elements in the input list that were found 32 | in the dataframe. 33 | ''' 34 | if columns is None: 35 | columns = list(df.columns) 36 | df_elements = pd.Series(np.unique(df[columns].values.flatten())) 37 | df_elements = df_elements.loc[df_elements.isin(elements)].values 38 | found_elements = list(df_elements) 39 | return found_elements 40 | 41 | 42 | def shuffle_cols_in_df(df, columns, shuffling_number=1, random_state=None): 43 | ''' 44 | Randomly shuffles specific columns in a dataframe. 45 | 46 | Parameters 47 | ---------- 48 | df : pandas.DataFrame 49 | A dataframe. 50 | 51 | columns : list 52 | Names of columns to shuffle. 53 | 54 | shuffling_number : int, default=1 55 | Number of shuffles per column. 56 | 57 | random_state : int, default=None 58 | Seed for randomization. 59 | 60 | Returns 61 | ------- 62 | df_ : pandas.DataFrame 63 | A shuffled dataframe. 64 | ''' 65 | df_ = df.copy() 66 | if isinstance(columns, str): 67 | columns = [columns] 68 | 69 | for col in columns: 70 | for i in range(shuffling_number): 71 | if random_state is not None: 72 | np.random.seed(random_state + i) 73 | df_[col] = np.random.permutation(df_[col].values) 74 | return df_ 75 | 76 | 77 | def shuffle_rows_in_df(df, rows, shuffling_number=1, random_state=None): 78 | ''' 79 | Randomly shuffles specific rows in a dataframe. 80 | 81 | Parameters 82 | ---------- 83 | df : pandas.DataFrame 84 | A dataframe. 85 | 86 | rows : list 87 | Names of rows (or indexes) to shuffle. 88 | 89 | shuffling_number : int, default=1 90 | Number of shuffles per row. 91 | 92 | random_state : int, default=None 93 | Seed for randomization. 94 | 95 | Returns 96 | ------- 97 | df_.T : pandas.DataFrame 98 | A shuffled dataframe. 99 | ''' 100 | df_ = df.copy().T 101 | if isinstance(rows, str): 102 | rows = [rows] 103 | 104 | for row in rows: 105 | for i in range(shuffling_number): 106 | if random_state is not None: 107 | np.random.seed(random_state + i) 108 | df_[row] = np.random.permutation(df_[row].values) 109 | return df_.T 110 | 111 | 112 | def shuffle_dataframe(df, shuffling_number=1, axis=0, random_state=None): 113 | ''' 114 | Randomly shuffles a whole dataframe across a given axis. 115 | 116 | Parameters 117 | ---------- 118 | df : pandas.DataFrame 119 | A dataframe. 120 | 121 | shuffling_number : int, default=1 122 | Number of shuffles per column. 123 | 124 | axis : int, default=0 125 | An axis of the dataframe (0 across rows, 1 across columns). 126 | Across rows means that shuffles each column independently, 127 | and across columns shuffles each row independently. 128 | 129 | random_state : int, default=None 130 | Seed for randomization. 131 | 132 | Returns 133 | ------- 134 | df_ : pandas.DataFrame 135 | A shuffled dataframe. 136 | ''' 137 | df_ = df.copy() 138 | axis = int(not axis) # pandas.DataFrame is always 2D 139 | to_shuffle = np.rollaxis(df_.values, axis) 140 | for _ in range(shuffling_number): 141 | for i, view in enumerate(to_shuffle): 142 | if random_state is not None: 143 | np.random.seed(random_state + i) 144 | np.random.shuffle(view) 145 | df_ = pd.DataFrame(np.rollaxis(to_shuffle, axis=axis), index=df_.index, columns=df_.columns) 146 | return df_ 147 | 148 | 149 | def subsample_dataframe(df, n_samples, random_state=None): 150 | ''' 151 | Randomly subsamples rows of a dataframe. 152 | 153 | Parameters 154 | ---------- 155 | df : pandas.DataFrame 156 | A dataframe. 157 | 158 | n_samples : int 159 | Number of samples, rows in this case. If 160 | n_samples is larger than the number of rows, 161 | the entire dataframe will be returned, but 162 | shuffled. 163 | 164 | random_state : int, default=None 165 | Seed for randomization. 166 | 167 | Returns 168 | ------- 169 | subsampled_df : pandas.DataFrame 170 | A subsampled and shuffled dataframe. 171 | ''' 172 | items = list(df.index) 173 | if n_samples > len(items): 174 | n_samples = len(items) 175 | if isinstance(random_state, int): 176 | random.seed(random_state) 177 | random.shuffle(items) 178 | 179 | subsampled_df = df.loc[items[:n_samples],:] 180 | return subsampled_df 181 | 182 | 183 | def check_symmetry(df): 184 | ''' 185 | Checks whether a dataframe is symmetric. 186 | 187 | Parameters 188 | ---------- 189 | df : pandas.DataFrame 190 | A dataframe. 191 | 192 | Returns 193 | ------- 194 | symmetric : boolean 195 | Whether a dataframe is symmetric. 196 | ''' 197 | shape = df.shape 198 | if shape[0] == shape[1]: 199 | symmetric = (df.values.transpose() == df.values).all() 200 | else: 201 | symmetric = False 202 | return symmetric 203 | 204 | 205 | def convert_to_distance_matrix(df): 206 | ''' 207 | Converts a symmetric dataframe into a distance dataframe. 208 | That is, diagonal elements are all zero. 209 | 210 | Parameters 211 | ---------- 212 | df : pandas.DataFrame 213 | A dataframe. 214 | 215 | Returns 216 | ------- 217 | df_ : pandas.DataFrame 218 | A copy of df, but with all diagonal elements with a 219 | value of zero. 220 | ''' 221 | if check_symmetry(df): 222 | df_ = df.copy() 223 | if np.trace(df_.values,) != 0.0: 224 | raise Warning("Diagonal elements are not zero. Automatically replaced by zeros") 225 | np.fill_diagonal(df_.values, 0.0) 226 | else: 227 | raise ValueError('The DataFrame is not symmetric') 228 | return df_ 229 | -------------------------------------------------------------------------------- /cell2cell/preprocessing/signal.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from scipy.signal import savgol_filter 4 | 5 | 6 | def smooth_curve(values, window_length=None, polyorder=3, **kwargs): 7 | '''Apply a Savitzky-Golay filter to an array to smooth the curve. 8 | 9 | Parameters 10 | ---------- 11 | values : array-like 12 | An array or list of values. 13 | 14 | window_length : int, default=None 15 | Size of the window of values to use too smooth the curve. 16 | 17 | polyorder : int, default=3 18 | The order of the polynomial used to fit the samples. 19 | 20 | **kwargs : dict 21 | Extra arguments for the scipy.signal.savgol_filter function. 22 | 23 | Returns 24 | ------- 25 | smooth_values : array-like 26 | An array or list of values representing the smooth curvee. 27 | ''' 28 | size = len(values) 29 | if window_length is None: 30 | window_length = int(size / min([2, size])) 31 | if window_length % 2 == 0: 32 | window_length += 1 33 | assert(polyorder < window_length), "polyorder must be less than window_length." 34 | smooth_values = savgol_filter(values, window_length, polyorder, **kwargs) 35 | return smooth_values -------------------------------------------------------------------------------- /cell2cell/spatial/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.spatial.distances import (celltype_pair_distance, pairwise_celltype_distances) 2 | from cell2cell.spatial.filtering import (dist_filter_liana, dist_filter_tensor) 3 | from cell2cell.spatial.neighborhoods import (create_spatial_grid, create_sliding_windows, calculate_window_size, add_sliding_window_info_to_adata) -------------------------------------------------------------------------------- /cell2cell/spatial/distances.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import itertools 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances 6 | 7 | 8 | def celltype_pair_distance(df1, df2, method='min', distance='euclidean'): 9 | ''' 10 | Calculates the distance between two sets of data points (single cell coordinates) 11 | represented by df1 and df2. It supports two distance metrics: Euclidean and Manhattan 12 | distances. The method parameter allows you to specify how the distances between the 13 | two sets are aggregated. 14 | 15 | Parameters 16 | ---------- 17 | df1 : pandas.DataFrame 18 | The first set of single cell coordinates. 19 | 20 | df1 : pandas.DataFrame 21 | The second set of single cell coordinates. 22 | 23 | method : str, default='min' 24 | The aggregation method for the calculated distances. It can be one of 'min', 25 | 'max', or 'mean'. 26 | 27 | distance : str, default='euclidean' 28 | The distance metric to use. It can be 'euclidean' or 'manhattan'. 29 | 30 | Returns 31 | ------- 32 | agg_dist : numpy.float 33 | The aggregated distance between the two sets of data points based on the specified 34 | method and distance metric. 35 | ''' 36 | if distance == 'euclidean': 37 | distances = euclidean_distances(df1, df2) 38 | elif distance == 'manhattan': 39 | distances = manhattan_distances(df1, df2) 40 | else: 41 | raise NotImplementedError("{} distance is not implemented.".format(distance.capitalize())) 42 | 43 | if method == 'min': 44 | agg_dist = np.nanmin(distances) 45 | elif method == 'max': 46 | agg_dist = np.nanmax(distances) 47 | elif method == 'mean': 48 | agg_dist = np.nanmean(distances) 49 | else: 50 | raise NotImplementedError('Method {} is not implemented.'.format(method)) 51 | return agg_dist 52 | 53 | 54 | def pairwise_celltype_distances(df, group_col, coord_cols=['X', 'Y'], 55 | method='min', distance='euclidean', pairs=None): 56 | ''' 57 | Calculates pairwise distances between groups of single cells. It computes an 58 | aggregate distance between all possible combinations of groups. 59 | 60 | Parameters 61 | ---------- 62 | df : pandas.DataFrame 63 | A dataframe where each row is a single cell, and there are columns containing 64 | spatial coordinates and cell group. 65 | 66 | group_col : str 67 | The name of the column that defines the groups for which distances are calculated. 68 | 69 | coord_cols : list, default=None 70 | The list of column names that represent the coordinates of the single cells. 71 | 72 | pairs : list 73 | A list of specific group pairs for which distances should be calculated. 74 | If not provided, all possible combinations of group pairs will be considered. 75 | 76 | Returns 77 | ------- 78 | distances : pandas.DataFrame 79 | The pairwise distances between groups based on the specified group column. 80 | In this dataframe rows and columns are the cell groups used to compute distances. 81 | ''' 82 | # TODO: Adapt code below to receive AnnData or MuData objects 83 | # df_ = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y']) 84 | # df = adata.obs[[group_col]] 85 | df_ = df[coord_cols] 86 | groups = df[group_col].unique() 87 | distances = pd.DataFrame(np.zeros((len(groups), len(groups))), 88 | index=groups, 89 | columns=groups) 90 | 91 | if pairs is None: 92 | pairs = list(itertools.combinations(groups, 2)) 93 | 94 | for pair in pairs: 95 | dist = celltype_pair_distance(df_.loc[df[group_col] == pair[0]], df_.loc[df[group_col] == pair[1]], 96 | method=method, 97 | distance=distance 98 | ) 99 | distances.loc[pair[0], pair[1]] = dist 100 | distances.loc[pair[1], pair[0]] = dist 101 | return distances -------------------------------------------------------------------------------- /cell2cell/spatial/filtering.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import tensorly as tl 4 | 5 | 6 | def dist_filter_tensor(interaction_tensor, distances, max_dist, min_dist=0, source_axis=2, target_axis=3): 7 | ''' 8 | Filters an Interaction Tensor based on intercellular distances between cell types. 9 | 10 | Parameters 11 | ---------- 12 | interaction_tensor : cell2cell.tensor.BaseTensor 13 | A communication tensor generated with any of the tensor class in 14 | cell2cell.tensor 15 | 16 | distances : pandas.DataFrame 17 | Square dataframe containing distances between pairs of cell groups. It must contain 18 | all cell groups that act as sender and receiver cells in the tensor. 19 | 20 | max_dist : float 21 | The maximum distance between cell pairs to consider them in the interaction tensor. 22 | 23 | min_dist : float, default=0 24 | The minimum distance between cell pairs to consider them in the interaction tensor. 25 | 26 | source_axis : int, default=2 27 | The index indicating the axis in the tensor corresponding to sender cells. 28 | 29 | target_axis : int, default=3 30 | The index indicating the axis in the tensor corresponding to receiver cells. 31 | 32 | Returns 33 | ------- 34 | new_interaction_tensor : cell2cell.tensor.BaseTensor 35 | A tensor with communication scores made zero for cell type pairs with intercellular 36 | distance over the distance threshold. 37 | ''' 38 | # Evaluate whether we provide distances for all cell types in the tensor 39 | assert all([cell in distances.index for cell in 40 | interaction_tensor.order_names[source_axis]]), "Distances not provided for all sender cells" 41 | assert all([cell in distances.columns for cell in 42 | interaction_tensor.order_names[target_axis]]), "Distances not provided for all receiver cells" 43 | 44 | source_cell_groups = interaction_tensor.order_names[source_axis] 45 | target_cell_groups = interaction_tensor.order_names[target_axis] 46 | 47 | # Use only cell types in the tensor 48 | dist_df = distances.loc[source_cell_groups, target_cell_groups] 49 | 50 | # Filter cell types by intercellular distances 51 | dist = ((min_dist <= dist_df) & (dist_df <= max_dist)).astype(int).values 52 | 53 | # Mapping what re-arrange should be done to keep the original tensor shape 54 | tensor_shape = list(interaction_tensor.tensor.shape) 55 | original_order = list(range(len(tensor_shape))) 56 | new_order = [] 57 | 58 | # Generate template tensor with cells to keep 59 | template_tensor = dist 60 | for i, size in enumerate(tensor_shape): 61 | if (i != source_axis) and (i != target_axis): 62 | template_tensor = [template_tensor] * size 63 | new_order.insert(0, i) 64 | template_tensor = np.array(template_tensor) 65 | 66 | new_order += [source_axis, target_axis] 67 | changes_needed = [new_order.index(i) for i in original_order] 68 | 69 | # Re-arrange axes by the order 70 | template_tensor = template_tensor.transpose(changes_needed) 71 | 72 | # Create tensorly object 73 | template_tensor = tl.tensor(template_tensor, **tl.context(interaction_tensor.tensor)) 74 | 75 | assert template_tensor.shape == interaction_tensor.tensor.shape, "Filtering of cells was not properly done. Revise code of this function (template tensor)" 76 | 77 | # tensor = tl.zeros_like(interaction_tensor.tensor, **tl.context(tensor)) 78 | new_interaction_tensor = interaction_tensor.copy() 79 | new_interaction_tensor.tensor = new_interaction_tensor.tensor * template_tensor 80 | # Make masked cells by distance to be real zeros 81 | new_interaction_tensor.loc_zeros = (new_interaction_tensor.tensor == 0).astype(int) - new_interaction_tensor.loc_nans 82 | return new_interaction_tensor 83 | 84 | 85 | def dist_filter_liana(liana_outputs, distances, max_dist, min_dist=0, source_col='source', target_col='target', 86 | keep_dist=False): 87 | ''' 88 | Filters a dataframe with outputs from LIANA based on a distance threshold 89 | defined applied to another dataframe containing distances between cell groups. 90 | 91 | Parameters 92 | ---------- 93 | liana_outputs : pandas.DataFrame 94 | Dataframe containing the results from LIANA, where rows are pairs of 95 | ligand-receptor interactions by pair of source-target cell groups. 96 | 97 | distances : pandas.DataFrame 98 | Square dataframe containing distances between pairs of cell groups. 99 | 100 | max_dist : float 101 | The distance threshold used to filter the pairs from the liana_outputs dataframe. 102 | 103 | min_dist : float, default=0 104 | The minimum distance between cell pairs to consider them in the interaction tensor. 105 | 106 | source_col : str, default='source' 107 | Column name in both dataframes that represents the source cell groups. 108 | 109 | target_col : str, default='target' 110 | Column name in both dataframes that represents the target cell groups. 111 | 112 | keep_dist : bool, default=False 113 | To determine whether to keep the 'distance' column in the filtered output. 114 | If set to True, the 'distance' column will be retained; otherwise, it will be dropped 115 | and the LIANA dataframe will contain the original columns. 116 | 117 | Returns 118 | ------- 119 | filtered_liana_outputs : pandas.DataFrame 120 | It containing pairs from the liana_outputs dataframe that meet the distance 121 | threshold criteria. 122 | ''' 123 | # Convert distances to a long-form dataframe 124 | distances = distances.stack().reset_index() 125 | distances.columns = [source_col, target_col, 'distance'] 126 | 127 | # Merge the long-form distances DataFrame with pairs_df 128 | merged_df = liana_outputs.merge(distances, on=[source_col, target_col], how='left') 129 | 130 | # Filter based on the distance threshold 131 | filtered_liana_outputs = merged_df[(min_dist <= merged_df['distance']) & (merged_df['distance'] <= max_dist)] 132 | 133 | if keep_dist == False: 134 | filtered_liana_outputs = filtered_liana_outputs.drop(['distance'], axis=1) 135 | 136 | return filtered_liana_outputs -------------------------------------------------------------------------------- /cell2cell/spatial/neighborhoods.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import pandas as pd 4 | 5 | 6 | def create_spatial_grid(adata, num_bins, copy=False): 7 | """ 8 | Segments spatial transcriptomics data into a square grid based on spatial coordinates 9 | and annotates each cell or spot with its corresponding grid position. 10 | 11 | Parameters 12 | ---------- 13 | adata : AnnData 14 | The AnnData object containing spatial transcriptomics data. The spatial coordinates 15 | must be stored in `adata.obsm['spatial']`. This object is either modified in place 16 | or a copy is returned based on the `copy` parameter. 17 | 18 | num_bins : int 19 | The number of bins (squares) along each dimension of the grid. The grid is square, 20 | so this number applies to both the horizontal and vertical divisions. 21 | 22 | copy : bool, default=False 23 | If True, the function operates on and returns a copy of the input AnnData object. 24 | If False, the function modifies the input AnnData object in place. 25 | 26 | Returns 27 | ------- 28 | adata_ : AnnData or None 29 | If `copy=True`, a new AnnData object with added grid annotations is returned. 30 | """ 31 | 32 | if copy: 33 | adata_ = adata.copy() 34 | else: 35 | adata_ = adata 36 | 37 | # Get the spatial coordinates 38 | coords = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y']) 39 | 40 | # Define the bins for each dimension 41 | x_min, y_min = coords.min() 42 | x_max, y_max = coords.max() 43 | x_bins = np.linspace(x_min, x_max, num_bins + 1) 44 | y_bins = np.linspace(y_min, y_max, num_bins + 1) 45 | 46 | # Digitize the coordinates into bins 47 | adata_.obs['grid_x'] = np.digitize(coords['X'], x_bins, right=False) - 1 48 | adata_.obs['grid_y'] = np.digitize(coords['Y'], y_bins, right=False) - 1 49 | 50 | # Adjust indices to start from 0 and end at num_bins - 1 51 | adata_.obs['grid_x'] = np.clip(adata_.obs['grid_x'], 0, num_bins - 1) 52 | adata_.obs['grid_y'] = np.clip(adata_.obs['grid_y'], 0, num_bins - 1) 53 | 54 | # Combine grid indices to form a grid cell identifier 55 | adata_.obs['grid_cell'] = adata_.obs['grid_x'].astype(str) + "_" + adata_.obs['grid_y'].astype(str) 56 | 57 | if copy: 58 | return adata_ 59 | 60 | 61 | def calculate_window_size(adata, num_windows): 62 | """ 63 | Calculates the window size required to fit a specified number of windows 64 | across the width of the coordinate space in spatial transcriptomics data. 65 | 66 | Parameters 67 | ---------- 68 | adata : AnnData 69 | The AnnData object containing spatial transcriptomics data. The spatial coordinates 70 | must be stored in `adata.obsm['spatial']`. 71 | 72 | num_windows : int 73 | The desired number of windows to fit across the width of the coordinate space. 74 | 75 | Returns 76 | ------- 77 | window_size : float 78 | The calculated size of each window to fit the specified number of windows 79 | across the width of the coordinate space. 80 | """ 81 | 82 | # Extract X coordinates 83 | x_coords = adata.obsm['spatial'][:, 0] 84 | 85 | # Determine the range of X coordinates 86 | x_min, x_max = np.min(x_coords), np.max(x_coords) 87 | 88 | # Calculate the window size 89 | window_size = (x_max - x_min) / num_windows 90 | 91 | return window_size 92 | 93 | 94 | def create_sliding_windows(adata, window_size, stride): 95 | """ 96 | Maps windows to the cells they contain based on spatial transcriptomics data. 97 | Returns a dictionary where keys are window identifiers and values are sets of cell indices. 98 | 99 | Parameters 100 | ---------- 101 | adata : AnnData 102 | The AnnData object containing spatial transcriptomics data. The spatial coordinates 103 | must be stored in `adata.obsm['spatial']`. 104 | 105 | window_size : float 106 | The size of each square window along each dimension. 107 | 108 | stride : float 109 | The stride with which the window moves along each dimension. 110 | 111 | Returns 112 | ------- 113 | window_mapping : dict 114 | A dictionary mapping each window to a set of cell indices that fall within that window. 115 | """ 116 | 117 | # Get the spatial coordinates 118 | coords = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y']) 119 | 120 | # Define the range of the sliding windows 121 | x_min, y_min = coords.min() 122 | x_max, y_max = coords.max() 123 | x_windows = np.arange(x_min, x_max - window_size + stride, stride) 124 | y_windows = np.arange(y_min, y_max - window_size + stride, stride) 125 | 126 | # Function to find all windows a point belongs to 127 | def find_windows(coord, window_edges): 128 | return [i for i, edge in enumerate(window_edges) if edge <= coord < edge + window_size] 129 | 130 | # Initialize the window mapping 131 | window_mapping = {} 132 | 133 | # Assign cells to all overlapping windows 134 | for cell_idx, (x, y) in enumerate(zip(coords['X'], coords['Y'])): 135 | cell_windows = ["window_{}_{}".format(wx, wy) 136 | for wx in find_windows(x, x_windows) 137 | for wy in find_windows(y, y_windows)] 138 | 139 | for win in cell_windows: 140 | if win not in window_mapping: 141 | window_mapping[win] = set() 142 | window_mapping[win].add(coords.index[cell_idx]) # This stores the cell/spot barcodes 143 | # For memory efficiency, it could be `window_mapping[win].add(cell_idx)` instead 144 | 145 | return window_mapping 146 | 147 | 148 | def add_sliding_window_info_to_adata(adata, window_mapping): 149 | """ 150 | Adds window information to the AnnData object's .obs DataFrame. Each window is represented 151 | as a column, and cells/spots belonging to a window are marked with a 1.0, while others are marked 152 | with a 0.0. It modifies the `adata` object in place. 153 | 154 | Parameters 155 | ---------- 156 | adata : AnnData 157 | The AnnData object to which the window information will be added. 158 | 159 | window_mapping : dict 160 | A dictionary mapping each window to a set of cell/spot indeces or barcodes. 161 | This is the output from the `create_moving_windows` function. 162 | """ 163 | 164 | # Initialize all window columns to 0.0 165 | for window in sorted(window_mapping.keys()): 166 | adata.obs[window] = 0.0 167 | 168 | # Mark cells that belong to each window 169 | for window, barcode_indeces in window_mapping.items(): 170 | adata.obs.loc[barcode_indeces, window] = 1.0 -------------------------------------------------------------------------------- /cell2cell/stats/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.stats.enrichment import (fisher_representation, hypergeom_representation) 2 | from cell2cell.stats.gini import (gini_coefficient) 3 | from cell2cell.stats.multitest import (compute_fdrcorrection_asymmetric_matrix, compute_fdrcorrection_symmetric_matrix) 4 | from cell2cell.stats.permutation import (compute_pvalue_from_dist, pvalue_from_dist, random_switching_ppi_labels, 5 | run_label_permutation) 6 | -------------------------------------------------------------------------------- /cell2cell/stats/enrichment.py: -------------------------------------------------------------------------------- 1 | import scipy.stats as st 2 | 3 | 4 | def hypergeom_representation(sample_size, class_in_sample, population_size, class_in_population): 5 | ''' 6 | Performs an analysis of enrichment/depletion based on observation 7 | in a sample. It computes a p-value given a hypergeometric 8 | distribution. 9 | 10 | Parameters 11 | ---------- 12 | sample_size : int 13 | Size of the sample obtained or number of elements 14 | obtained from the analysis. 15 | 16 | class_in_sample : int 17 | Number of elements of a given class that are 18 | contained in the sample. This is the class to be tested. 19 | 20 | population_size : int 21 | Size of the sampling space. That is, the total number 22 | of possible elements to be chosen when sampling. 23 | 24 | class_in_population : int 25 | Number of elements of a given class that are contained 26 | in the population. This is the class to be tested. 27 | 28 | Returns 29 | ------- 30 | p_vals : tuple 31 | A tuple containing the p-values for depletion and 32 | enrichment analysis, respectively. 33 | ''' 34 | # Computing the number of elements that are not in the same class 35 | nonclass_in_sample = sample_size - class_in_sample 36 | nonclass_in_population = population_size - class_in_population 37 | 38 | # Remaining elements in population after sampling 39 | rem_class = class_in_population - class_in_sample 40 | rem_nonclass = nonclass_in_population - nonclass_in_sample 41 | 42 | # Depletion Analysis 43 | depletion_hyp_p_val = st.hypergeom.cdf(class_in_sample, population_size, class_in_population, sample_size) 44 | 45 | # Enrichment Analysis 46 | enrichment_hyp_p_val = 1.0 - st.hypergeom.cdf(class_in_sample - 1.0, population_size, class_in_population, 47 | sample_size) 48 | 49 | p_vals = (depletion_hyp_p_val, enrichment_hyp_p_val) 50 | return p_vals 51 | 52 | 53 | def fisher_representation(sample_size, class_in_sample, population_size, class_in_population): 54 | ''' 55 | Performs an analysis of enrichment/depletion based on observation 56 | in a sample. It computes a p-value given a fisher exact test. 57 | 58 | Parameters 59 | ---------- 60 | sample_size : int 61 | Size of the sample obtained or number of elements 62 | obtained from the analysis. 63 | 64 | class_in_sample : int 65 | Number of elements of a given class that are 66 | contained in the sample. This is the class to be tested. 67 | 68 | population_size : int 69 | Size of the sampling space. That is, the total number 70 | of possible elements to be chosen when sampling. 71 | 72 | class_in_population : int 73 | Number of elements of a given class that are contained 74 | in the population. This is the class to be tested. 75 | 76 | Returns 77 | ------- 78 | results : dict 79 | A dictionary containing the odd ratios and p-values for 80 | depletion and enrichment analysis. 81 | ''' 82 | # Computing the number of elements that are not in the same class 83 | nonclass_in_sample = sample_size - class_in_sample 84 | nonclass_in_population = population_size - class_in_population 85 | 86 | # Remaining elements in population after sampling 87 | rem_class = class_in_population - class_in_sample 88 | rem_nonclass = nonclass_in_population - nonclass_in_sample 89 | 90 | # Depletion Analysis 91 | depletion_odds, depletion_fisher_p_val = st.fisher_exact([[class_in_sample, rem_class], 92 | [nonclass_in_sample, rem_nonclass]], 93 | alternative='less') 94 | 95 | # Enrichment Analysis 96 | enrichment_odds, enrichment_fisher_p_val = st.fisher_exact([[class_in_sample, rem_class], 97 | [nonclass_in_sample, rem_nonclass]], 98 | alternative='greater') 99 | 100 | p_vals = (depletion_fisher_p_val, enrichment_fisher_p_val) 101 | odds = (depletion_odds, enrichment_odds) 102 | results = {'pval' : p_vals, 103 | 'odds' : odds, 104 | } 105 | return results -------------------------------------------------------------------------------- /cell2cell/stats/gini.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | 6 | def gini_coefficient(distribution): 7 | """Computes the Gini coefficient of an array of values. 8 | Code borrowed from: 9 | https://stackoverflow.com/questions/39512260/calculating-gini-coefficient-in-python-numpy 10 | 11 | Parameters 12 | ---------- 13 | distribution : array-like 14 | An array of values representing the distribution 15 | to be evaluated. 16 | 17 | Returns 18 | ------- 19 | gini : float 20 | Gini coefficient for the evaluated distribution. 21 | """ 22 | diffsum = 0 23 | for i, xi in enumerate(distribution[:-1], 1): 24 | diffsum += np.sum(np.abs(xi - distribution[i:])) 25 | gini = diffsum / (len(distribution)**2 * np.mean(distribution)) 26 | return gini -------------------------------------------------------------------------------- /cell2cell/stats/multitest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from statsmodels.stats.multitest import fdrcorrection 5 | 6 | 7 | def compute_fdrcorrection_symmetric_matrix(X, alpha=0.1): 8 | ''' 9 | Computes and FDR correction or Benjamini-Hochberg procedure 10 | on a symmetric matrix of p-values. Here, only the diagonal 11 | and values on the upper triangle are considered to avoid 12 | repetition with the lower triangle. 13 | 14 | Parameters 15 | ---------- 16 | X : pandas.DataFrame 17 | A symmetric dataframe of P-values. 18 | 19 | alpha : float, default=0.1 20 | Error rate of the FDR correction. Must be 0 < alpha < 1. 21 | 22 | Returns 23 | ------- 24 | adj_X : pandas.DataFrame 25 | A symmetric dataframe with adjusted P-values of X. 26 | ''' 27 | pandas = False 28 | a = X.copy() 29 | 30 | if isinstance(X, pd.DataFrame): 31 | pandas = True 32 | a = X.values 33 | index = X.index 34 | columns = X.columns 35 | 36 | # Original data 37 | upper_idx = np.triu_indices_from(a) 38 | pvals = a[upper_idx] 39 | 40 | # New data 41 | adj_X = np.zeros(a.shape) 42 | rej, adj_pvals = fdrcorrection(pvals.flatten(), alpha=alpha) 43 | 44 | # Reorder_data 45 | adj_X[upper_idx] = adj_pvals 46 | adj_X = adj_X + np.triu(adj_X, 1).T 47 | 48 | if pandas: 49 | adj_X = pd.DataFrame(adj_X, index=index, columns=columns) 50 | return adj_X 51 | 52 | 53 | def compute_fdrcorrection_asymmetric_matrix(X, alpha=0.1): 54 | ''' 55 | Computes and FDR correction or Benjamini-Hochberg procedure 56 | on a asymmetric matrix of p-values. Here, the correction 57 | is performed for every value in X. 58 | 59 | Parameters 60 | ---------- 61 | X : pandas.DataFrame 62 | An asymmetric dataframe of P-values. 63 | 64 | alpha : float, default=0.1 65 | Error rate of the FDR correction. Must be 0 < alpha < 1. 66 | 67 | Returns 68 | ------- 69 | adj_X : pandas.DataFrame 70 | An asymmetric dataframe with adjusted P-values of X. 71 | ''' 72 | pandas = False 73 | a = X.copy() 74 | 75 | if isinstance(X, pd.DataFrame): 76 | pandas = True 77 | a = X.values 78 | index = X.index 79 | columns = X.columns 80 | 81 | # Original data 82 | pvals = a.flatten() 83 | 84 | # New data 85 | rej, adj_pvals = fdrcorrection(pvals, alpha=alpha) 86 | 87 | # Reorder_data 88 | #adj_X = adj_pvals.reshape(-1, a.shape[1]) 89 | adj_X = adj_pvals.reshape(a.shape) # Allows using tensors 90 | 91 | if pandas: 92 | adj_X = pd.DataFrame(adj_X, index=index, columns=columns) 93 | return adj_X -------------------------------------------------------------------------------- /cell2cell/tensor/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.tensor.external_scores import (dataframes_to_tensor) 2 | from cell2cell.tensor.factor_manipulation import (normalize_factors) 3 | from cell2cell.tensor.metrics import (correlation_index, pairwise_correlation_index) 4 | from cell2cell.tensor.tensor import (InteractionTensor, PreBuiltTensor, build_context_ccc_tensor, generate_tensor_metadata, 5 | interactions_to_tensor) 6 | from cell2cell.tensor.tensor_manipulation import (concatenate_interaction_tensors) 7 | from cell2cell.tensor.subset import (subset_tensor, subset_metadata) 8 | -------------------------------------------------------------------------------- /cell2cell/tensor/factor_manipulation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | 6 | def normalize_factors(factors): 7 | ''' 8 | L2-normalizes the factors considering all tensor dimensions 9 | from a tensor decomposition result 10 | 11 | Parameters 12 | ---------- 13 | factors : dict 14 | Ordered dictionary containing a dataframe with the factor loadings for each 15 | dimension/order of the tensor. This is the result from a tensor decomposition, 16 | it can be found as the attribute `factors` in any tensor class derived from the 17 | class BaseTensor (e.g. BaseTensor.factors). 18 | 19 | Returns 20 | ------- 21 | norm_factors : dict 22 | The normalized factors. 23 | ''' 24 | norm_factors = dict() 25 | for k, v in factors.items(): 26 | norm_factors[k] = v / np.linalg.norm(v, axis=0) 27 | return norm_factors 28 | 29 | 30 | def shuffle_factors(factors, axis=0): 31 | ''' 32 | Randomly shuffles the values of the factors in the tensor decomposition. 33 | ''' 34 | raise NotImplementedError 35 | -------------------------------------------------------------------------------- /cell2cell/tensor/metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from itertools import combinations 7 | 8 | # Authors: Hratch Baghdassarian , Erick Armingol 9 | # similarity metrics for tensor decompositions 10 | 11 | 12 | def correlation_index(factors_1, factors_2, tol=5e-16, method='stacked'): 13 | """ 14 | CorrIndex implementation to assess tensor decomposition outputs. 15 | From [1] Sobhani et al 2022 (https://doi.org/10.1016/j.sigpro.2022.108457). 16 | Metric is scaling and column-permutation invariant, wherein each column is a factor. 17 | 18 | Parameters 19 | ---------- 20 | factors_1 : dict 21 | Ordered dictionary containing a dataframe with the factor loadings for each 22 | dimension/order of the tensor. This is the result from a tensor decomposition, 23 | it can be found as the attribute `factors` in any tensor class derived from the 24 | class BaseTensor (e.g. BaseTensor.factors). 25 | 26 | factors_2 : dict 27 | Similar to factors_1 but coming from another tensor decomposition of a tensor 28 | with equal shape. 29 | 30 | tol : float, default=5e-16 31 | Precision threshold below which to call the CorrIndex score 0. 32 | 33 | method : str, default='stacked' 34 | Method to obtain the CorrIndex by comparing the A matrices from two decompositions. 35 | Possible options are: 36 | 37 | - 'stacked' : The original method implemented in [1]. Here all A matrices from the same decomposition are 38 | vertically concatenated, building a big A matrix for each decomposition. 39 | - 'max_score' : This computes the CorrIndex for each pair of A matrices (i.e. between A_1 in factors_1 and 40 | factors_2, between A_2 in factors_1 and factors_2, and so on). Then the max score is 41 | selected (the most conservative approach). In other words, it selects the max score among the 42 | CorrIndexes computed dimension-wise. 43 | - 'min_score' : Similar to 'max_score', but the min score is selected (the least conservative approach). 44 | - 'avg_score' : Similar to 'max_score', but the avg score is selected. 45 | 46 | Returns 47 | ------- 48 | score : float 49 | CorrIndex metric [0,1]; lower score indicates higher similarity between matrices 50 | """ 51 | factors_1 = list(factors_1.values()) 52 | factors_2 = list(factors_2.values()) 53 | 54 | # check input factors shape 55 | for factors in [factors_1, factors_2]: 56 | if len({np.shape(A)[1]for A in factors}) != 1: 57 | raise ValueError('Factors should be a list of loading matrices of the same rank') 58 | 59 | # check method 60 | options = ['stacked', 'max_score', 'min_score', 'avg_score'] 61 | if method not in options: 62 | raise ValueError("The `method` must be either option among {}".format(options)) 63 | 64 | if method == 'stacked': 65 | # vertically stack loading matrices -- shape sum(tensor.shape)xR) 66 | X_1 = [np.concatenate(factors_1, 0)] 67 | X_2 = [np.concatenate(factors_2, 0)] 68 | else: 69 | X_1 = factors_1 70 | X_2 = factors_2 71 | 72 | for x1, x2 in zip(X_1, X_2): 73 | if np.shape(x1) != np.shape(x2): 74 | raise ValueError('Factor matrices should be of the same shapes') 75 | 76 | # normalize columns to L2 norm - even if ran decomposition with normalize_factors=True 77 | col_norm_1 = [np.linalg.norm(x1, axis=0) for x1 in X_1] 78 | col_norm_2 = [np.linalg.norm(x2, axis=0) for x2 in X_2] 79 | for cn1, cn2 in zip(col_norm_1, col_norm_2): 80 | if np.any(cn1 == 0) or np.any(cn2 == 0): 81 | raise ValueError('Column norms must be non-zero') 82 | X_1 = [x1 / cn1 for x1, cn1 in zip(X_1, col_norm_1)] 83 | X_2 = [x2 / cn2 for x2, cn2 in zip(X_2, col_norm_2)] 84 | 85 | corr_idxs = [_compute_correlation_index(x1, x2, tol=tol) for x1, x2 in zip(X_1, X_2)] 86 | 87 | if method == 'stacked': 88 | score = corr_idxs[0] 89 | elif method == 'max_score': 90 | score = np.max(corr_idxs) 91 | elif method == 'min_score': 92 | score = np.min(corr_idxs) 93 | elif method == 'avg_score': 94 | score = np.mean(corr_idxs) 95 | else: 96 | score = 1.0 97 | return score 98 | 99 | 100 | def _compute_correlation_index(x1, x2, tol=5e-16): 101 | ''' 102 | Computes the CorrIndex from the L2-normalized A matrices. 103 | 104 | Parameters 105 | ---------- 106 | x1 : list 107 | A list containing normalized A matrix(ces) from the first tensor decomposition. 108 | 109 | x2 : list 110 | A list containing normalized A matrix(ces) from the first tensor decomposition. 111 | 112 | tol : float, default=5e-16 113 | Precision threshold below which to call the CorrIndex score 0, by default 5e-16 114 | 115 | Returns 116 | ------- 117 | score : float 118 | CorrIndex metric [0,1]; lower score indicates higher similarity between matrices 119 | ''' 120 | # generate the correlation index input 121 | c_prod_mtx = np.abs(np.matmul(np.conj(np.transpose(np.asarray(x1))), np.asarray(x2))) 122 | 123 | # correlation index scoring 124 | n_elements = np.shape(c_prod_mtx)[1] + np.shape(c_prod_mtx)[0] 125 | score = (1 / (n_elements)) * (np.sum(np.abs(np.max(c_prod_mtx, 1) - 1)) + np.sum(np.abs(np.max(c_prod_mtx, 0) - 1))) 126 | if score < tol: 127 | score = 0 128 | return score 129 | 130 | 131 | def pairwise_correlation_index(factors, tol=5e-16, method='stacked'): 132 | ''' 133 | Computes the CorrIndex between all pairs of factors 134 | 135 | Parameters 136 | ---------- 137 | factors : list 138 | List with multiple Ordered dictionaries, each containing a dataframe with 139 | the factor loadings for each dimension/order of the tensor. This is the 140 | result from a tensor decomposition, it can be found as the attribute 141 | `factors` in any tensor class derived from the class BaseTensor 142 | (e.g. BaseTensor.factors). 143 | 144 | tol : float, default=5e-16 145 | Precision threshold below which to call the CorrIndex score 0. 146 | 147 | method : str, default='stacked' 148 | Method to obtain the CorrIndex by comparing the A matrices from two decompositions. 149 | Possible options are: 150 | 151 | - 'stacked' : The original method implemented in [1]. Here all A matrices from the same decomposition are 152 | vertically concatenated, building a big A matrix for each decomposition. 153 | - 'max_score' : This computes the CorrIndex for each pair of A matrices (i.e. between A_1 in factors_1 and 154 | factors_2, between A_2 in factors_1 and factors_2, and so on). Then the max score is 155 | selected (the most conservative approach). In other words, it selects the max score among the 156 | CorrIndexes computed dimension-wise. 157 | - 'min_score' : Similar to 'max_score', but the min score is selected (the least conservative approach). 158 | - 'avg_score' : Similar to 'max_score', but the avg score is selected. 159 | 160 | Returns 161 | ------- 162 | scores : pd.DataFrame 163 | Dataframe with CorrIndex metric for each pair of decompositions. 164 | This metric bounds are [0,1]; lower score indicates higher similarity between matrices 165 | ''' 166 | N = len(factors) 167 | idxs = list(range(N)) 168 | pairs = list(combinations(idxs, 2)) 169 | scores = pd.DataFrame(np.zeros((N, N)),index=idxs, columns=idxs) 170 | for p1, p2 in pairs: 171 | corrindex = correlation_index(factors_1=factors[p1], 172 | factors_2=factors[p2], 173 | tol=tol, 174 | method=method 175 | ) 176 | 177 | scores.at[p1, p2] = corrindex 178 | scores.at[p2, p1] = corrindex 179 | return scores 180 | -------------------------------------------------------------------------------- /cell2cell/tensor/subset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import copy 3 | 4 | import numpy as np 5 | import tensorly as tl 6 | 7 | from cell2cell.preprocessing.find_elements import find_duplicates 8 | 9 | def find_element_indexes(interaction_tensor, elements, axis=0, remove_duplicates=True, keep='first', original_order=False): 10 | '''Finds the location/indexes of a list of elements in one of the 11 | axis of an InteractionTensor. 12 | 13 | Parameters 14 | ---------- 15 | interaction_tensor : cell2cell.tensor.BaseTensor 16 | A communication tensor generated with any of the tensor class in 17 | cell2cell.tensor 18 | 19 | elements : list 20 | A list of names for the elements to find in one of the axis. 21 | 22 | axis : int, default=0 23 | An axis of the interaction_tensor, representing one of 24 | its dimensions. 25 | 26 | remove_duplicates : boolean, default=True 27 | Whether removing duplicated names in `elements`. 28 | 29 | keep : str, default='first' 30 | Determines which duplicates (if any) to keep. 31 | Options are: 32 | 33 | - first : Drop duplicates except for the first occurrence. 34 | - last : Drop duplicates except for the last occurrence. 35 | - False : Drop all duplicates. 36 | 37 | original_order : boolean, default=False 38 | Whether keeping the original order of the elements in 39 | interaction_tensor.order_names[axis] or keeping the 40 | new order as indicated in `elements`. 41 | 42 | Returns 43 | ------- 44 | indexes : list 45 | List of indexes for the elements that where found in the 46 | axis indicated of the interaction_tensor. 47 | ''' 48 | assert axis < len \ 49 | (interaction_tensor.tensor.shape), "List index out of range. 'axis' must be one of the axis in the tensor." 50 | assert axis < len \ 51 | (interaction_tensor.order_names), "List index out of range. interaction_tensor.order_names must have element names for each axis of the tensor." 52 | 53 | elements = sorted(set(elements), key=list(elements).index) 54 | 55 | if original_order: 56 | # Avoids error for considering elements not in the tensor 57 | elements = set(elements).intersection(set(interaction_tensor.order_names[axis])) 58 | elements = sorted(elements, key=interaction_tensor.order_names[axis].index) 59 | 60 | 61 | # Find duplicates if we are removing them 62 | to_exclude = [] 63 | if remove_duplicates: 64 | dup_dict = find_duplicates(interaction_tensor.order_names[axis]) 65 | 66 | if len(dup_dict) > 0: # Only if we have duplicate items 67 | if keep == 'first': 68 | for k, v in dup_dict.items(): 69 | to_exclude.extend(v[1:]) 70 | elif keep == 'last': 71 | for k, v in dup_dict.items(): 72 | to_exclude.extend(v[:-1]) 73 | elif not keep: 74 | for k, v in dup_dict.items(): 75 | to_exclude.extend(v) 76 | else: 77 | raise ValueError("Not a valid option was selected for the parameter `keep`") 78 | 79 | # Find indexes in the tensor 80 | indexes = sum \ 81 | ([np.where(np.asarray(interaction_tensor.order_names[axis]) == element)[0].tolist() for element in elements], []) 82 | 83 | # Exclude duplicates if any to exclude 84 | indexes = [idx for idx in indexes if idx not in to_exclude] 85 | return indexes 86 | 87 | 88 | def subset_tensor(interaction_tensor, subset_dict, remove_duplicates=True, keep='first', original_order=False): 89 | '''Subsets an InteractionTensor to contain only specific elements in 90 | respective dimensions. 91 | 92 | Parameters 93 | ---------- 94 | interaction_tensor : cell2cell.tensor.BaseTensor 95 | A communication tensor generated with any of the tensor class in 96 | cell2cell.tensor 97 | 98 | subset_dict : dict 99 | Dictionary to subset the tensor. It must contain the axes or 100 | dimensions that will be subset as the keys of the dictionary 101 | and the values corresponds to lists of element names for the 102 | respective axes or dimensions. Those axes that are not present 103 | in this dictionary will not be subset. 104 | E.g. {0 : ['Context 1', 'Context2'], 1: ['LR 10', 'LR 100']} 105 | 106 | remove_duplicates : boolean, default=True 107 | Whether removing duplicated names in `elements`. 108 | 109 | keep : str, default='first' 110 | Determines which duplicates (if any) to keep. 111 | Options are: 112 | 113 | - first : Drop duplicates except for the first occurrence. 114 | - last : Drop duplicates except for the last occurrence. 115 | - False : Drop all duplicates. 116 | 117 | original_order : boolean, default=False 118 | Whether keeping the original order of the elements in 119 | interaction_tensor.order_names or keeping the 120 | new order as indicated in the lists in the `subset_dict`. 121 | 122 | Returns 123 | ------- 124 | subset_tensor : cell2cell.tensor.BaseTensor 125 | A copy of interaction_tensor that was subset to contain 126 | only the elements specified for the respective axis in the 127 | `subset_dict`. Corresponds to a communication tensor 128 | generated with any of the tensor class in cell2cell.tensor 129 | ''' 130 | # Perform a deep copy of the original tensor and reset previous factorization 131 | subset_tensor = copy.deepcopy(interaction_tensor) 132 | subset_tensor.rank = None 133 | subset_tensor.tl_object = None 134 | subset_tensor.factors = None 135 | 136 | # Initialize tensor into a numpy object for performing subset 137 | context = tl.context(subset_tensor.tensor) 138 | tensor = tl.to_numpy(subset_tensor.tensor) 139 | mask = None 140 | if subset_tensor.mask is not None: 141 | mask = tl.to_numpy(subset_tensor.mask) 142 | 143 | # Search for indexes 144 | axis_idxs = dict() 145 | for k, v in subset_dict.items(): 146 | if k < len(tensor.shape): 147 | if len(v) != 0: 148 | idx = find_element_indexes(interaction_tensor=subset_tensor, 149 | elements=v, 150 | axis=k, 151 | remove_duplicates=remove_duplicates, 152 | keep=keep, 153 | original_order=original_order 154 | ) 155 | if len(idx) == 0: 156 | print("No elements found for axis {}. It will return an empty tensor.".format(k)) 157 | axis_idxs[k] = idx 158 | else: 159 | print("Axis {} is out of index, not considering elements in this axis.".format(k)) 160 | 161 | # Subset tensor 162 | for k, v in axis_idxs.items(): 163 | if tensor.shape != (0,): # Avoids error when returned empty tensor 164 | tensor = tensor.take(indices=v, 165 | axis=k 166 | ) 167 | 168 | subset_tensor.order_names[k] = [subset_tensor.order_names[k][i] for i in v] 169 | if mask is not None: 170 | mask = mask.take(indices=v, 171 | axis=k 172 | ) 173 | 174 | # Restore tensor and mask properties 175 | tensor = tl.tensor(tensor, **context) 176 | if mask is not None: 177 | mask = tl.tensor(mask, **context) 178 | 179 | subset_tensor.tensor = tensor 180 | subset_tensor.mask = mask 181 | return subset_tensor 182 | 183 | 184 | def subset_metadata(tensor_metadata, interaction_tensor, sample_col='Element'): 185 | '''Subsets the metadata of an InteractionTensor to contain only 186 | elements in a reference InteractionTensor (interaction_tensor). 187 | 188 | Parameters 189 | ---------- 190 | tensor_metadata : list 191 | List of pandas dataframes with metadata information for elements of each 192 | dimension in the tensor. A column called as the variable `sample_col` contains 193 | the name of each element in the tensor while another column called as the 194 | variable `group_col` contains the metadata or grouping information of each 195 | element. 196 | 197 | interaction_tensor : cell2cell.tensor.BaseTensor 198 | A communication tensor generated with any of the tensor class in 199 | cell2cell.tensor. This tensor is used as reference to subset the metadata. 200 | The subset metadata will contain only elements that are present in this 201 | tensor, so if metadata was originally built for another tensor, the elements 202 | that are exclusive for that original tensor will be excluded. 203 | 204 | sample_col : str, default='Element' 205 | Name of the column containing the element names in the metadata. 206 | 207 | Returns 208 | ------- 209 | subset_metadata : list 210 | List of pandas dataframes with metadata information for elements contained 211 | in `interaction_tensor.order_names`. It is a subset of `tensor_metadata`. 212 | ''' 213 | subset_metadata = [] 214 | for i, meta in enumerate(tensor_metadata): 215 | if meta is not None: 216 | tmp_meta = meta.set_index(sample_col) 217 | tmp_meta = tmp_meta.loc[interaction_tensor.order_names[i], :] 218 | tmp_meta = tmp_meta.reset_index() 219 | subset_metadata.append(tmp_meta) 220 | else: 221 | subset_metadata.append(None) 222 | return subset_metadata -------------------------------------------------------------------------------- /cell2cell/tensor/tensor_manipulation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import tensorly as tl 4 | 5 | from cell2cell.tensor.tensor import PreBuiltTensor 6 | from cell2cell.tensor.subset import subset_tensor 7 | 8 | 9 | def concatenate_interaction_tensors(interaction_tensors, axis, order_labels, remove_duplicates=False, keep='first', 10 | mask=None, device=None): 11 | '''Concatenates interaction tensors in a given tensor dimension or axis. 12 | 13 | Parameters 14 | ---------- 15 | interaction_tensors : list 16 | List of any tensor class in cell2cell.tensor. 17 | 18 | axis : int 19 | The axis along which the arrays will be joined. If axis is None, arrays are flattened before use. 20 | 21 | order_labels : list 22 | List of labels for dimensions or orders in the tensor. 23 | 24 | remove_duplicates : boolean, default=False 25 | Whether removing duplicated names in the concatenated axis. 26 | 27 | keep : str, default='first' 28 | Determines which duplicates (if any) to keep. 29 | Options are: 30 | 31 | - first : Drop duplicates except for the first occurrence. 32 | - last : Drop duplicates except for the last occurrence. 33 | - False : Drop all duplicates. 34 | 35 | mask : ndarray list 36 | Helps avoiding missing values during a tensor factorization. A mask should be 37 | a boolean array of the same shape as the original tensor and should be 0 38 | where the values are missing and 1 everywhere else. This must be of equal shape 39 | as the concatenated tensor. 40 | 41 | device : str, default=None 42 | Device to use when backend is pytorch. Options are: 43 | {'cpu', 'cuda', None} 44 | 45 | Returns 46 | ------- 47 | concatenated_tensor : cell2cell.tensor.PreBuiltTensor 48 | Final tensor after concatenation. It is a PreBuiltTensor that works 49 | any interaction tensor based on the class BaseTensor. 50 | ''' 51 | # Assert if all other dimensions contains the same elements: 52 | shape = len(interaction_tensors[0].tensor.shape) 53 | assert all(shape == len(tensor.tensor.shape) for tensor in interaction_tensors[1:]), "Tensors must have same number of dimensions" 54 | 55 | for i in range(shape): 56 | if i != axis: 57 | elements = interaction_tensors[0].order_names[i] 58 | for tensor in interaction_tensors[1:]: 59 | assert elements == tensor.order_names[i], "Tensors must have the same elements in the other axes." 60 | 61 | # Initialize tensors into a numpy object for performing subset 62 | # Use the same context as first tensor for everything 63 | try: 64 | context = tl.context(interaction_tensors[0].tensor) 65 | except: 66 | context = {'dtype': interaction_tensors[0].tensor.dtype, 'device' : None} 67 | 68 | # Concatenate tensors 69 | concat_tensor = tl.concatenate([tensor.tensor.to('cpu') for tensor in interaction_tensors], axis=axis) 70 | if mask is not None: 71 | assert mask.shape == concat_tensor.shape, "Mask must have the same shape of the concatenated tensor. Here: {}".format(concat_tensor.shape) 72 | else: # Generate a new mask from all previous masks if all are not None 73 | if all([tensor.mask is not None for tensor in interaction_tensors]): 74 | mask = tl.concatenate([tensor.mask.to('cpu') for tensor in interaction_tensors], axis=axis) 75 | else: 76 | mask = None 77 | 78 | concat_tensor = tl.tensor(concat_tensor, device=context['device']) 79 | if mask is not None: 80 | mask = tl.tensor(mask, device=context['device']) 81 | 82 | # Concatenate names of elements for the given axis but keep the others as in one tensor 83 | order_names = [] 84 | for i in range(shape): 85 | tmp_names = [] 86 | if i == axis: 87 | for tensor in interaction_tensors: 88 | tmp_names += tensor.order_names[i] 89 | else: 90 | tmp_names = interaction_tensors[0].order_names[i] 91 | order_names.append(tmp_names) 92 | 93 | # Generate final object 94 | concatenated_tensor = PreBuiltTensor(tensor=concat_tensor, 95 | order_names=order_names, 96 | order_labels=order_labels, 97 | mask=mask, # Change if you want to omit values in the decomposition 98 | device=device 99 | ) 100 | 101 | # Remove duplicates 102 | if remove_duplicates: 103 | concatenated_tensor = subset_tensor(interaction_tensor=concatenated_tensor, 104 | subset_dict={axis: order_names[axis]}, 105 | remove_duplicates=remove_duplicates, 106 | keep=keep, 107 | original_order=False) 108 | return concatenated_tensor -------------------------------------------------------------------------------- /cell2cell/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from cell2cell.utils.networks import (generate_network_from_adjacency, export_network_to_gephi) 2 | from cell2cell.utils.parallel_computing import (agents_number) 3 | -------------------------------------------------------------------------------- /cell2cell/utils/networks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import networkx as nx 6 | 7 | 8 | def generate_network_from_adjacency(adjacency_matrix, package='networkx'): 9 | ''' 10 | Generates a network or graph object from an adjacency matrix. 11 | 12 | Parameters 13 | ---------- 14 | adjacency_matrix : pandas.DataFrame 15 | An adjacency matrix, where in rows and columns are nodes 16 | and values represents a weight for the respective edge. 17 | 18 | package : str, default='networkx' 19 | Package or python library to built the network. 20 | Implemented optios are {'networkx'}. Soon will be 21 | available for 'igraph'. 22 | 23 | Returns 24 | ------- 25 | network : graph-like 26 | A graph object built with a python-library for networks. 27 | ''' 28 | if package == 'networkx': 29 | network = nx.from_pandas_adjacency(adjacency_matrix) 30 | elif package == 'igraph': 31 | # A = adjacency_matrix.values 32 | # network = igraph.Graph.Weighted_Adjacency((A > 0).tolist(), mode=igraph.ADJ_UNDIRECTED) 33 | # 34 | # # Add edge weights and node labels. 35 | # network.es['weight'] = A[A.nonzero()] 36 | # network.vs['label'] = list(adjacency_matrix.columns) 37 | # 38 | # Warning("iGraph functionalities are not completely implemented yet.") 39 | raise NotImplementedError("Network using package {} not implemented".format(package)) 40 | else: 41 | raise NotImplementedError("Network using package {} not implemented".format(package)) 42 | return network 43 | 44 | 45 | def export_network_to_gephi(network, filename, format='excel', network_type='Undirected'): 46 | ''' 47 | Exports a network into a spreadsheet that is readable 48 | by the software Gephi. 49 | 50 | Parameters 51 | ---------- 52 | network : networkx.Graph, networkx.DiGraph or a pandas.DataFrame 53 | A networkx Graph or Directed Graph, or an adjacency matrix, 54 | where in rows and columns are nodes and values represents a 55 | weight for the respective edge. 56 | 57 | filename : str, default=None 58 | Path to save the network into a Gephi-readable format. 59 | 60 | format : str, default='excel' 61 | Format to export the spreadsheet. Options are: 62 | 63 | - 'excel' : An excel file, either .xls or .xlsx 64 | - 'csv' : Comma separated value format 65 | - 'tsv' : Tab separated value format 66 | 67 | network_type : str, default='Undirected' 68 | Type of edges in the network. They could be either 69 | 'Undirected' or 'Directed'. 70 | ''' 71 | # This allows to pass a network directly or an adjacency matrix 72 | if type(network) != nx.classes.graph.Graph: 73 | network = generate_network_from_adjacency(network, 74 | package='networkx') 75 | 76 | gephi_df = nx.to_pandas_edgelist(network) 77 | gephi_df = gephi_df.assign(Type=network_type) 78 | # When weight is not in the network 79 | if ('weight' not in gephi_df.columns): 80 | gephi_df = gephi_df.assign(weight=1) 81 | 82 | # Transform column names 83 | gephi_df = gephi_df[['source', 'target', 'Type', 'weight']] 84 | gephi_df.columns = [c.capitalize() for c in gephi_df.columns] 85 | 86 | # Save with different formats 87 | if format == 'excel': 88 | gephi_df.to_excel(filename, sheet_name='Edges', index=False) 89 | elif format == 'csv': 90 | gephi_df.to_csv(filename, sep=',', index=False) 91 | elif format == 'tsv': 92 | gephi_df.to_csv(filename, sep='\t', index=False) 93 | else: 94 | raise ValueError("Format not supported.") 95 | 96 | 97 | def export_network_to_cytoscape(network, filename): 98 | ''' 99 | Exports a network into a spreadsheet that is readable 100 | by the software Gephi. 101 | 102 | Parameters 103 | ---------- 104 | network : networkx.Graph, networkx.DiGraph or a pandas.DataFrame 105 | A networkx Graph or Directed Graph, or an adjacency matrix, 106 | where in rows and columns are nodes and values represents a 107 | weight for the respective edge. 108 | 109 | filename : str, default=None 110 | Path to save the network into a Cytoscape-readable format 111 | (JSON file in this case). E.g. '/home/user/network.json' 112 | ''' 113 | # This allows to pass a network directly or an adjacency matrix 114 | if type(network) != nx.classes.graph.Graph: 115 | network = generate_network_from_adjacency(network, 116 | package='networkx') 117 | 118 | data = nx.readwrite.json_graph.cytoscape.cytoscape_data(network) 119 | 120 | # Export 121 | import json 122 | json_str = json.dumps(data) 123 | with open(filename, 'w') as outfile: 124 | outfile.write(json_str) -------------------------------------------------------------------------------- /cell2cell/utils/parallel_computing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | from multiprocessing import cpu_count 6 | 7 | 8 | # GENERAL 9 | def agents_number(n_jobs): 10 | ''' 11 | Computes the number of agents/cores/threads that the 12 | computer can really provide given a number of 13 | jobs/threads requested. 14 | 15 | Parameters 16 | ---------- 17 | n_jobs : int 18 | Number of threads for parallelization. 19 | 20 | Returns 21 | ------- 22 | agents : int 23 | Number of threads that the computer can really provide. 24 | ''' 25 | if n_jobs < 0: 26 | agents = cpu_count() + 1 + n_jobs 27 | if agents < 0: 28 | agents = 1 29 | elif n_jobs > cpu_count(): 30 | agents = cpu_count() 31 | 32 | elif n_jobs == 0: 33 | agents = 1 34 | else: 35 | agents = n_jobs 36 | return agents 37 | 38 | 39 | # CORE FUNCTIONS 40 | def parallel_spatial_ccis(inputs): 41 | ''' 42 | Parallel computing in cell2cell2.analysis.pipelines.SpatialSingleCellInteractions 43 | ''' 44 | # TODO: Implement this for enabling spatial analysis and compute interactions in parallel 45 | 46 | # from cell2cell.core import spatial_operation 47 | #results = spatial_operation() 48 | 49 | # return results 50 | pass -------------------------------------------------------------------------------- /docs/documentation.md: -------------------------------------------------------------------------------- 1 | # Documentation for *cell2cell* 2 | 3 | This documentation is for our *cell2cell* suite, which includes the [regular cell2cell](https://doi.org/10.1371/journal.pcbi.1010715) 4 | and [Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2) tools. The former is for inferring cell-cell interactions 5 | and communication in one sample or context, while the latter is for deconvolving complex patterns 6 | of cell-cell communication across multiple samples or contexts simultaneously into interpretable factors 7 | representing patterns of communication. 8 | 9 | Here, multiple classes and functions are implemented to facilitate the analyses, including a variety of 10 | visualizations to simplify the interpretation of results: 11 | 12 | - **cell2cell.analysis** : Includes simplified pipelines for running the analyses, and functions for downstream analyses of Tensor-cell2cell 13 | - **cell2cell.clustering** : Includes multiple scipy-based functions for performing clustering methods. 14 | - **cell2cell.core** : Includes the core functions for inferring cell-cell interactions and communication. It includes scoring methods, cell classes, and interaction spaces. 15 | - **cell2cell.datasets** : Includes toy datasets and annotations for testing functions in basic scenarios. 16 | - **cell2cell.external** : Includes built-in approaches borrowed from other tools to avoid incompatibilities (e.g. UMAP, tensorly, and PCoA). 17 | - **cell2cell.io** : Includes functions for opening and saving diverse types of files. 18 | - **cell2cell.plotting** : Includes all the visualization options that *cell2cell* offers. 19 | - **cell2cell.preprocessing** : Includes functions for manipulating data and variables (e.g. data preprocessing, integration, permutation, among others). 20 | - **cell2cell.spatial** : Includes filtering of cell-cell interactions results given intercellular distance, as well as defining neighborhoods by grids or moving windows. 21 | - **cell2cell.stats** : Includes statistical analyses such as enrichment analysis, multiple test correction methods, permutation approaches, and Gini coefficient. 22 | - **cell2cell.tensor** : Includes all functions pertinent to the analysis of *Tensor-cell2cell* 23 | - **cell2cell.utils** : Includes general utilities for analyzing networks and performing parallel computing. 24 | 25 | 26 | Below, all the inputs, parameters (including their different options), and outputs are detailed. Source code of the functions is also included. 27 | 28 | 29 | ::: cell2cell 30 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Inferring cell-cell interactions from transcriptomes with *cell2cell* 2 | [![PyPI Version][pb]][pypi] 3 | [![Documentation Status](https://readthedocs.org/projects/cell2cell/badge/?version=latest)](https://cell2cell.readthedocs.io/en/latest/?badge=latest) 4 | [![Downloads](https://pepy.tech/badge/cell2cell/month)](https://pepy.tech/project/cell2cell) 5 | 6 | 7 | [pb]: https://badge.fury.io/py/cell2cell.svg 8 | [pypi]: https://pypi.org/project/cell2cell/ 9 | 10 | ## Getting started 11 | For tutorials and documentation, visit [**cell2cell ReadTheDocs**](https://cell2cell.readthedocs.org/) or our [**cell2cell website**](https://earmingol.github.io/cell2cell). 12 | 13 | 14 | 15 | ## Installation 16 | 17 | Step 1: Install Anaconda 18 | 19 | First, [install Anaconda following this tutorial](https://docs.anaconda.com/anaconda/install/). 20 | 21 | 22 | Step 2: Create and Activate a New Conda Environment 23 | 24 | ``` 25 | # Create a new conda environment 26 | conda create -n cell2cell -y python=3.7 jupyter 27 | 28 | # Activate the environment 29 | conda activate cell2cell 30 | ``` 31 | 32 | 33 | Step 3: Install cell2cell 34 | 35 | ``` 36 | pip install cell2cell 37 | ``` 38 | 39 | 40 | ## Examples 41 | 42 | | cell2cell Examples | Tensor-cell2cell Examples | 43 | | --- | --- | 44 | | ![cell2cell Logo](https://github.com/earmingol/cell2cell/blob/master/Logo.png?raw=true) | ![Tensor-cell2cell Logo](https://github.com/earmingol/cell2cell/blob/master/LogoTensor.png?raw=true) | 45 | | - [Step-by-step Pipeline](https://github.com/earmingol/cell2cell/blob/master/examples/cell2cell/Toy-Example.ipynb)
- [Interaction Pipeline for Bulk Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-BulkPipeline)
- [Interaction Pipeline for Single-Cell Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-SingleCellPipeline)
- [Whole Body of *C. elegans*](https://github.com/LewisLabUCSD/Celegans-cell2cell) | - [Obtaining patterns of cell-cell communication](https://earmingol.github.io/cell2cell/tutorials/ASD/01-Tensor-Factorization-ASD/)
- [Downstream 1: Factor-specific analyses](https://earmingol.github.io/cell2cell/tutorials/ASD/02-Factor-Specific-ASD/)
- [Downstream 2: Patterns to functions (GSEA)](https://earmingol.github.io/cell2cell/tutorials/ASD/03-GSEA-ASD/)
- [Tensor-cell2cell in Google Colab (**GPU**)](https://colab.research.google.com/drive/1T6MUoxafTHYhjvenDbEtQoveIlHT2U6_?usp=sharing)
- [Communication patterns in **Spatial Transcriptomics**](https://earmingol.github.io/cell2cell/tutorials/Tensor-cell2cell-Spatial/) | 46 | 47 | Reproducible runs of the analyses in the [Tensor-cell2cell paper](https://doi.org/10.1038/s41467-022-31369-2) are available at [CodeOcean.com](https://doi.org/10.24433/CO.0051950.v2) 48 | 49 | ## LIANA & Tensor-cell2cell 50 | 51 | Explore our tutorials for using Tensor-cell2cell with [LIANA](https://github.com/saezlab/liana-py) at [ccc-protocols.readthedocs.io](https://ccc-protocols.readthedocs.io/). 52 | 53 | ## Common Issues 54 | 55 | - **Memory Errors with Tensor-cell2cell:** If you encounter memory errors when performing tensor factorizations, try replacing `init='svd'` with `init='random'`. 56 | 57 | ## Ligand-Receptor Pairs 58 | Find a curated list of ligand-receptor pairs for your analyses at our [GitHub Repository](https://github.com/LewisLabUCSD/Ligand-Receptor-Pairs). 59 | 60 | ## Citation 61 | 62 | Please cite our work using the following references: 63 | 64 | - **cell2cell**: [Inferring a spatial code of cell-cell interactions across a whole animal body](https://doi.org/10.1371/journal.pcbi.1010715). 65 | *PLOS Computational Biology, 2022* 66 | 67 | - **Tensor-cell2cell**: [Context-aware deconvolution of cell-cell communication with Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2). 68 | *Nature Communications, 2022.* 69 | 70 | - **LIANA & Tensor-cell2cell tutorials**: [Combining LIANA and Tensor-cell2cell to decipher cell-cell communication across multiple samples](https://doi.org/10.1101/2023.04.28.538731). 71 | *bioRxiv, 2023* -------------------------------------------------------------------------------- /docs/requirements.in: -------------------------------------------------------------------------------- 1 | mkdocs 2 | mkdocstrings[python] 3 | markdown-include 4 | mkdocs-autorefs 5 | mkdocs-gen-files 6 | mkdocs-material 7 | mkdocs-material-extensions 8 | mkdocs-jupyter 9 | mkdocstrings-python-legacy -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.7 3 | # by the following command: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | appnope==0.1.3 8 | # via 9 | # ipykernel 10 | # ipython 11 | astunparse==1.6.3 12 | # via pytkdocs 13 | attrs==23.1.0 14 | # via jsonschema 15 | babel==2.13.1 16 | # via mkdocs-material 17 | backcall==0.2.0 18 | # via ipython 19 | beautifulsoup4==4.12.2 20 | # via nbconvert 21 | bleach==6.0.0 22 | # via nbconvert 23 | cached-property==1.5.2 24 | # via 25 | # griffe 26 | # pytkdocs 27 | certifi==2023.11.17 28 | # via requests 29 | charset-normalizer==3.3.2 30 | # via requests 31 | click==8.1.7 32 | # via mkdocs 33 | colorama==0.4.6 34 | # via 35 | # griffe 36 | # mkdocs-material 37 | debugpy==1.7.0 38 | # via ipykernel 39 | decorator==5.1.1 40 | # via ipython 41 | defusedxml==0.7.1 42 | # via nbconvert 43 | entrypoints==0.4 44 | # via jupyter-client 45 | fastjsonschema==2.19.0 46 | # via nbformat 47 | ghp-import==2.1.0 48 | # via mkdocs 49 | griffe==0.30.1 50 | # via mkdocstrings-python 51 | idna==3.4 52 | # via requests 53 | importlib-metadata==6.7.0 54 | # via 55 | # attrs 56 | # click 57 | # jsonschema 58 | # markdown 59 | # mkdocs 60 | # mkdocstrings 61 | # nbconvert 62 | # nbformat 63 | importlib-resources==5.12.0 64 | # via jsonschema 65 | ipykernel==6.16.2 66 | # via mkdocs-jupyter 67 | ipython==7.34.0 68 | # via ipykernel 69 | jedi==0.19.1 70 | # via ipython 71 | jinja2==3.1.2 72 | # via 73 | # mkdocs 74 | # mkdocs-material 75 | # mkdocstrings 76 | # nbconvert 77 | jsonschema==4.17.3 78 | # via nbformat 79 | jupyter-client==7.4.9 80 | # via 81 | # ipykernel 82 | # nbclient 83 | jupyter-core==4.12.0 84 | # via 85 | # jupyter-client 86 | # nbclient 87 | # nbconvert 88 | # nbformat 89 | jupyterlab-pygments==0.2.2 90 | # via nbconvert 91 | jupytext==1.15.2 92 | # via mkdocs-jupyter 93 | markdown==3.4.4 94 | # via 95 | # markdown-include 96 | # mkdocs 97 | # mkdocs-autorefs 98 | # mkdocs-material 99 | # mkdocstrings 100 | # pymdown-extensions 101 | markdown-include==0.8.1 102 | # via -r requirements.in 103 | markdown-it-py==2.2.0 104 | # via 105 | # jupytext 106 | # mdit-py-plugins 107 | markupsafe==2.1.3 108 | # via 109 | # jinja2 110 | # mkdocs 111 | # mkdocstrings 112 | # nbconvert 113 | matplotlib-inline==0.1.6 114 | # via 115 | # ipykernel 116 | # ipython 117 | mdit-py-plugins==0.3.5 118 | # via jupytext 119 | mdurl==0.1.2 120 | # via markdown-it-py 121 | mergedeep==1.3.4 122 | # via mkdocs 123 | mistune==3.0.2 124 | # via nbconvert 125 | mkdocs==1.5.3 126 | # via 127 | # -r requirements.in 128 | # mkdocs-autorefs 129 | # mkdocs-gen-files 130 | # mkdocs-jupyter 131 | # mkdocs-material 132 | # mkdocstrings 133 | mkdocs-autorefs==0.4.1 134 | # via 135 | # -r requirements.in 136 | # mkdocstrings 137 | mkdocs-gen-files==0.5.0 138 | # via -r requirements.in 139 | mkdocs-jupyter==0.24.3 140 | # via -r requirements.in 141 | mkdocs-material==9.2.7 142 | # via 143 | # -r requirements.in 144 | # mkdocs-jupyter 145 | mkdocs-material-extensions==1.2 146 | # via 147 | # -r requirements.in 148 | # mkdocs-material 149 | mkdocstrings[python]==0.22.0 150 | # via 151 | # -r requirements.in 152 | # mkdocstrings-python 153 | # mkdocstrings-python-legacy 154 | mkdocstrings-python==1.1.2 155 | # via mkdocstrings 156 | mkdocstrings-python-legacy==0.2.3 157 | # via -r requirements.in 158 | nbclient==0.7.4 159 | # via nbconvert 160 | nbconvert==7.6.0 161 | # via mkdocs-jupyter 162 | nbformat==5.8.0 163 | # via 164 | # jupytext 165 | # nbclient 166 | # nbconvert 167 | nest-asyncio==1.5.8 168 | # via 169 | # ipykernel 170 | # jupyter-client 171 | packaging==23.2 172 | # via 173 | # ipykernel 174 | # mkdocs 175 | # nbconvert 176 | paginate==0.5.6 177 | # via mkdocs-material 178 | pandocfilters==1.5.0 179 | # via nbconvert 180 | parso==0.8.3 181 | # via jedi 182 | pathspec==0.11.2 183 | # via mkdocs 184 | pexpect==4.8.0 185 | # via ipython 186 | pickleshare==0.7.5 187 | # via ipython 188 | pkgutil-resolve-name==1.3.10 189 | # via jsonschema 190 | platformdirs==4.0.0 191 | # via mkdocs 192 | prompt-toolkit==3.0.41 193 | # via ipython 194 | psutil==5.9.6 195 | # via ipykernel 196 | ptyprocess==0.7.0 197 | # via pexpect 198 | pygments==2.17.1 199 | # via 200 | # ipython 201 | # mkdocs-jupyter 202 | # mkdocs-material 203 | # nbconvert 204 | pymdown-extensions==10.2.1 205 | # via 206 | # mkdocs-material 207 | # mkdocstrings 208 | pyrsistent==0.19.3 209 | # via jsonschema 210 | python-dateutil==2.8.2 211 | # via 212 | # ghp-import 213 | # jupyter-client 214 | pytkdocs==0.16.1 215 | # via mkdocstrings-python-legacy 216 | pytz==2023.3.post1 217 | # via babel 218 | pyyaml==6.0.1 219 | # via 220 | # jupytext 221 | # mkdocs 222 | # pymdown-extensions 223 | # pyyaml-env-tag 224 | pyyaml-env-tag==0.1 225 | # via mkdocs 226 | pyzmq==25.1.1 227 | # via 228 | # ipykernel 229 | # jupyter-client 230 | regex==2022.10.31 231 | # via mkdocs-material 232 | requests==2.31.0 233 | # via mkdocs-material 234 | six==1.16.0 235 | # via 236 | # astunparse 237 | # bleach 238 | # python-dateutil 239 | soupsieve==2.4.1 240 | # via beautifulsoup4 241 | tinycss2==1.2.1 242 | # via nbconvert 243 | toml==0.10.2 244 | # via jupytext 245 | tornado==6.2 246 | # via 247 | # ipykernel 248 | # jupyter-client 249 | traitlets==5.9.0 250 | # via 251 | # ipykernel 252 | # ipython 253 | # jupyter-client 254 | # jupyter-core 255 | # matplotlib-inline 256 | # nbclient 257 | # nbconvert 258 | # nbformat 259 | typing-extensions==4.7.1 260 | # via 261 | # importlib-metadata 262 | # jsonschema 263 | # markdown-it-py 264 | # mkdocs 265 | # mkdocstrings 266 | # platformdirs 267 | # pytkdocs 268 | urllib3==2.0.7 269 | # via requests 270 | watchdog==3.0.0 271 | # via mkdocs 272 | wcwidth==0.2.10 273 | # via prompt-toolkit 274 | webencodings==0.5.1 275 | # via 276 | # bleach 277 | # tinycss2 278 | wheel==0.42.0 279 | # via astunparse 280 | zipp==3.15.0 281 | # via 282 | # importlib-metadata 283 | # importlib-resources 284 | 285 | # The following packages are considered to be unsafe in a requirements file: 286 | # setuptools -------------------------------------------------------------------------------- /docs/tutorials/ASD/figures/4d-tensor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/4d-tensor.png -------------------------------------------------------------------------------- /docs/tutorials/ASD/figures/tensor-approx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tensor-approx.png -------------------------------------------------------------------------------- /docs/tutorials/ASD/figures/tensor-factorization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tensor-factorization.png -------------------------------------------------------------------------------- /docs/tutorials/ASD/figures/tf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tf.png -------------------------------------------------------------------------------- /docs/tutorials/ASD/results/Loadings.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/results/Loadings.xlsx -------------------------------------------------------------------------------- /examples/tensor_cell2cell/PreBuiltMetadata-PBMC.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/examples/tensor_cell2cell/PreBuiltMetadata-PBMC.pkl -------------------------------------------------------------------------------- /examples/tensor_cell2cell/PreBuiltTensor-PBMC.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/examples/tensor_cell2cell/PreBuiltTensor-PBMC.pkl -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: cell2cell 2 | site_description: "Python package to infer cell-cell interactions and communication from gene expression of interacting proteins." 3 | 4 | theme: 5 | name: readthedocs 6 | highlightjs: true 7 | repo_url: https://github.com/earmingol/cell2cell 8 | plugins: 9 | - search 10 | - mkdocs-jupyter: 11 | execute: false 12 | include: ["*.ipynb"] 13 | include_source: True 14 | #ignore_h1_titles: True 15 | - mkdocstrings: 16 | watch: 17 | - cell2cell 18 | default_handler: python 19 | handlers: 20 | python: 21 | options: 22 | docstring_style: sphinx 23 | 24 | markdown_extensions: 25 | - def_list 26 | - attr_list 27 | - admonition 28 | - codehilite 29 | - pymdownx.tasklist: 30 | custom_checkbox: true 31 | - md_in_html 32 | - pymdownx.superfences 33 | - pymdownx.betterem 34 | - pymdownx.caret 35 | - pymdownx.mark 36 | - pymdownx.tilde 37 | - pymdownx.highlight: 38 | anchor_linenums: true 39 | - pymdownx.inlinehilite 40 | - pymdownx.snippets 41 | - pymdownx.superfences 42 | - pymdownx.tabbed: 43 | alternate_style: true 44 | 45 | nav: 46 | - "Home": index.md 47 | - "API Documentation": documentation.md 48 | - "cell2cell Tutorials": 49 | - tutorials/Toy-Example-BulkPipeline.ipynb 50 | - tutorials/Toy-Example-SingleCellPipeline.ipynb 51 | - "Tensor-cell2cell Tutorials": 52 | - tutorials/ASD/01-Tensor-Factorization-ASD.ipynb 53 | - tutorials/ASD/02-Factor-Specific-ASD.ipynb 54 | - tutorials/ASD/03-GSEA-ASD.ipynb 55 | - tutorials/Tensor-cell2cell-Spatial.ipynb 56 | - tutorials/GPU-Example.ipynb 57 | -------------------------------------------------------------------------------- /release/0.5.10-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.5.10 2 | 3 | ## New features 4 | - Added ```cell2cell.plotting.factor_plot.ccc_networks_plot()``` to visualize factor-specific 5 | CCC networks obtained from a tensor decomposition with Tensor-cell2cell 6 | - Added Gini coefficient in ```cell2cell.stats.gini.gini_coefficient()``` and 7 | ```cell2cell.analysis.tensor_downstream.compute_gini_coefficients()``` 8 | 9 | ## Feature updates 10 | - In the analysis ```cell2cell.analysis.SingleCellInteractions.permute_cell_labels()``` 11 | the score computed without permutation is now considered as part of the permutation 12 | distribution for computing P-values. So if 100 permutations are intended, the analysis 13 | should be done with 99 permutation since the original score would be the 100th element. 14 | - In the same analysis above, now the ```randomized_score``` list is converted to a numpy.array once 15 | instead of each iteration in the last foor loop (Line 704). This helps accelerate the analysis. 16 | 17 | ## Fixed Bugs 18 | - Fixed bug in ```cell2cell.plotting.tensor_plot.tensor_factors_plot_from_loadings()``` 19 | associated with the metadata when it was None. 20 | - Fixed bug in ```cell2cell.plotting.tensor_plot.tensor_factors_plot_from_loadings()``` 21 | that was preventing to use a tensor with one dimension. 22 | - ```cell2cell.plotting.factor_plot.context_boxplot()``` 23 | that was preventing to use a decomposition into just one factor. 24 | - Fixed bug when using communication_score = 'expression_gmean' in cell2cell pipelines -------------------------------------------------------------------------------- /release/0.5.11-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.5.11 2 | 3 | ## New features 4 | - Created a new function to use external communication scores, generated with other tools. This function can be found in 5 | ```cell2cell.tensor.external_scores.dataframes_to_tensor()```. 6 | - Added ```cell2cell.tensor.tensor.BaseTensor.loc_nans```, ```cell2cell.tensor.tensor.BaseTensor.loc_zeros```, and same attributes in 7 | heirs tensor classes to keep track of values assigned with NaNs and with real zeros, respectively. 8 | - ```cell2cell.tensor.external_scores.dataframes_to_tensor()``` also incorporates the previous point to keep track 9 | of NaNs and real zeros when using external communication scores. 10 | - Added ```lr_fill``` and ```cell_fill``` parameters to ```cell2cell.tensor.external_scores.dataframes_to_tensor()``` 11 | 12 | ## Feature updates 13 | - Added two new options to the parameter ```how``` in ```cell2cell.tensor.build_context_ccc_tensor()```. 14 | They are: ```how='outer_genes'``` and ```how='outer_cells'``` . These new options were also extended to all InteractionTensors 15 | derived from ```cell2cell.tensor.tensor.BaseTensor```. 16 | - These options of how were also extended to the new function ```cell2cell.tensor.external_scores.dataframes_to_tensor()```, 17 | but here implemented as ```how='outer_lrs'``` and ```how='outer_cells'```. 18 | - Implemented multiple to options to aggregate gene expression of protein complexes. Available options are using the 19 | minimum expression or the average expression among the subunits. This can be controlled with the parameter 20 | ```complex_agg_method='min'``` or ```complex_agg_method='mean'``` when creating a ```cell2cell.tensor.InteractionTensor```, 21 | ```cell2cell.core.InteractionSpace```, ```cell2cell.analysis.BulkInteractions``` pipeline, or ```cell2cell.analysis.SingleCellInteractions``` pipeline. 22 | - The previous point relies on the function ```cell2cell.preprocessing.rnaseq.add_complexes_to_expression()``` through 23 | the parameter ```agg_method='min'``` or ```agg_method='mean'``` 24 | - Added parameter ```cbar_label``` to the function ```cell2cell.plotting.factor_plot.loading_clustermap()``` 25 | to personalize the title of the color bar. 26 | - Added parameter ```manual_elbow``` to ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` to manually specify 27 | the elbow to highlight. 28 | 29 | ## Fixed Bugs 30 | - Renamed ```cell2cell.plotting.circos_plot``` into ```cell2cell.plotting.circular_plot``` to avoid incompatibility with 31 | function ```cell2cell.plotting.circos_plot.circos_plot()``` that is directly imported under ```cell2cell.plotting``` -------------------------------------------------------------------------------- /release/0.5.4-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.5.4 2 | 3 | ## New features 4 | 5 | - Implemented a gradient of colors for continuous numbers in the function ```cell2cell.plotting.aesthetics.get_colors_from_labels()``` 6 | - Added function ```excluded_value_fraction()``` in the class ```InteractionTensor``` 7 | - Implemented reordering of elements in a dimension of the tensor when plotting their loadings from the decomposition 8 | in the function ```cell2cell.plotting.tensor_plot.tensor_factors_plot()``` under the parameter ```reorder_elements```. 9 | - Changed tensor objects and implemented a function to normalize loadings to unit Euclidean length under 10 | the parameter ```normalize_loadings``` in method ```compute_tensor_factorization``` of the class ```BaseTensor``` and others such as ```InteractionTensor```. 11 | - Implemented attribute ```explained_variance_ratio_``` in a tensor object. Only outputs values when using ```normalize_loadings=True```. 12 | - Added ```explained_variance_``` attribute to tensor objects. 13 | - Implemented ```explained_variance``` in tensor objects to compute the ```explained_variance_``` attribute. Inspired 14 | by ```sklearn.metric.explained_variance_score```. 15 | 16 | ## Dependency Update 17 | 18 | - matplotlib >= 3.2.0 19 | - seaborn >= 0.11.0 -------------------------------------------------------------------------------- /release/0.5.5-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.5.5 2 | 3 | ## Fixed Bugs 4 | 5 | - Fixed bug of computing factorization error when using a GPU and a tensor without masked values. 6 | See line 180 in ```cell2cell.tensor.tensor.py```, and lines 151 and 222 of ```cell2cell.tensor.factorization.py``` -------------------------------------------------------------------------------- /release/0.5.6-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.5.6 2 | 3 | ## New features 4 | 5 | - Implemented an analysis for obtaining UMAP embeddings from a gene expression matrix. It is 6 | found in ```cell2cell.external.umap.run_umap()```. 7 | - Implemented UMAP biplot to visualize UMAP embeddings. It is found in 8 | ```cell2cell.plotting.umap_plot.umap_biplot()``` 9 | - Implemented functions to subset an InteractionTensor by lists of names of the elements in any 10 | of the tensor dimensions. 11 | - Function ```cell2cell.tensor.subset.subset_tensor()``` to subset an InteractionTensor, 12 | powered by the function ```cell2cell.tensor.subset.find_element_indexes()``` to find 13 | where each element name is located in the tensor. 14 | - Function ```cell2cell.tensor.subset.subset_metadata()``` to subset the metadata generated with 15 | ```cell2cell.tensor.tensor.generate_tensor_metadata()```. It makes the metadata to contain only elements 16 | contained in an InteractionTensor of reference. 17 | 18 | ## Dependency Update 19 | 20 | - umap-learn -------------------------------------------------------------------------------- /release/0.5.7-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.5.7 2 | 3 | ## New features 4 | 5 | - Added geometric mean as a communication score in ```cell2cell.core.communication_scores``` 6 | - Added the parameter ```var_ordered_factors``` in 7 | ```cell2cell.tensor.BaseTensor.compute_factorization()``` to decide whether reordering 8 | the factors by the variance they explain (in a descending order). 9 | - Made the parameter ```normalize_loadings=True``` as default in 10 | ```cell2cell.tensor.BaseTensor.compute_factorization()``` 11 | - Added an option to plot the loadings of a tensor factorization directly from a 12 | factors object (an OrderedDict usually found in ```cell2cell.tensor.BaseTensor.factors```). 13 | It can be done with the function ```cell2cell.plotting.tensor_factors_plot_from_loadings()``` 14 | - To complement the previous point, added a function to import factors from an Excel file 15 | previously exported with ```cell2cell.tensor.BaseTensor.export_factors(filename)```. To import the 16 | factors, use ```cell2cell.io.load_tensor_factors(filename)```. 17 | 18 | ## Fixed Bugs 19 | 20 | - Fixed minor bugs in functions ```cell2cell.external.umap.run_umap()``` and 21 | ```cell2cell.plotting.umap_plot.umap_biplot()``` -------------------------------------------------------------------------------- /release/0.5.8-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.5.8 2 | 3 | ## New features 4 | - Implemented communication score 'expression_gmean' in all pipelines of cell2cell (cell2cell.analysis.pipelines) 5 | - Updated documentation of regular cell2cell 6 | - Implemented **downstream analyses for Tensor-cell2cell**, available in ```cell2cell.analysis.tensor_downstream``` and 7 | associated plots in ```cell2cell.plotting.factor_plot``` 8 | - Implemented the **CorrIndex metric** to compare two tensor decompositions of similar tensors, available in 9 | ```cell2cell.tensor.metrics``` 10 | - Implemented a function to export networks to be read in Cytoscape. It can be called as 11 | ```cell2cell.utils.networks.export_network_to_cytoscape()``` 12 | - Renamed ```cell2cell.plotting.dot_plot.py``` into ```cell2cell.plotting.pval_plot.py``` and included a new function 13 | to perform the dot plots with any input. The original function is ```cell2cell.plotting.pval_plot.dot_plot()```, which 14 | takes a ```cell2cell.analysis.pipelines.SingleCellInteractions``` object, while the new function is 15 | ```cell2cell.plotting.pval_plot.generate_dot_plot()```, which takes any pair of dataframes of P-values and scores. 16 | 17 | ## Fixed Bugs 18 | - Fixed bugs in triangular clustermap in ```cell2cell.plotting.cci_plot``` 19 | - Fixed bug associated with duplicated gene names when building a ```cell2cell.tensor.InteractionTensor``` 20 | 21 | ## Dependency Update 22 | - Added statannotations -------------------------------------------------------------------------------- /release/0.5.9-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.5.9 2 | 3 | ## Fixed Bugs 4 | - This version was created to fix issues when importing version 0.5.8. 5 | - For changes in this version, see [notes of version 0.5.8](0.5.8-notes.md) -------------------------------------------------------------------------------- /release/0.6.0-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.0 2 | 3 | ## New features 4 | - Added 'gmean' as method to compute expression of protein complexes. 5 | It involves function ```cell2cell.preprocessing.rnaseq.add_complexes_to_expression()``` 6 | and all objects calling it. 7 | - Added new parameters for improving robustness of tensor factorization. These are 8 | ```n_iter_max``` and ```tol```. Higher n_iter_max and lower tol retrieves better optimal 9 | solutions, but at the expense of more running time. Available in: 10 | ```cell2cell.tensor.factorization._compute_tensor_factorization()``` 11 | and in ```cell2cell.tensor.tensor.BaseTensor.compute_tensor_factorization()``` and all heir classes. 12 | - Similar to the previous point, the parameter ```svd``` was added to these functions. This allows to control 13 | the type of svd method to use when using ```init='svd'```. See documentation for more information. 14 | - Added new methods/options for running a tensor decomposition in ```cell2cell.tensor.factorization._compute_tensor_factorization()``` 15 | and in ```cell2cell.tensor.tensor.BaseTensor.compute_tensor_factorization()``` and all heir classes. 16 | This can be controlled with the parameter ```tf_type```. See documentation for 17 | more options. 18 | - Added option to do a deep copy of any tensor of the class ```cell2cell.tensor.tensor.BaseTensor``` and its 19 | heir classes. Available through ```BaseTensor.copy()```. 20 | - Added new CCI score based on ICELLNET (```cell2cell.core.cci_scores```). Available in the functions 21 | of the regular cell2cell tool (```cell2cell.core.interaction_space```, ```cell2cell.analysis.pipelines.BulkInteractions```, 22 | and ```cell2cell.analysis.pipelines.SingleCellInteractions```) 23 | - Added new function to handle duplicate elements ```cell2cell.preprocessing.find_elements.find_duplicates()``` 24 | - Modified functions in ```cell2cell.tensor.subset``` to handle duplicate elements 25 | - Added new function to concatenate InteractionTensors: ```cell2cell.tensor.tensor_manipulation.concatenate_interaction_tensors()``` 26 | 27 | ## Feature updates 28 | - Updated dependency version of tensorly to 0.7.0 29 | 30 | ## Fixed Bugs 31 | - Fixed bug of return_errors in tensor decomposition using regular non_negative_parafac. 32 | New version of tensorly returns decomposition and error as a tuple in other decomposition methods. 33 | - Fixed bug of changing diagonal values of the input matrix to zeros when using ```cell2cell.plotting.cci_plot.clustermap_cci``` -------------------------------------------------------------------------------- /release/0.6.1-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.1 2 | 3 | ## New features 4 | - Implemented the option to filter for cells/genes/lr pairs that are present in a given 5 | fraction of samples/contexts in addition to using the union or intersection to build a 6 | tensor derived from `BaseTensor`. This can be controlled with the parameter `outer_fraction` 7 | in the classes/functions available in `cell2cell.tensor.tensor` and `cell2cell.tensor.external_scores`. 8 | - Added method `sparsity_fraction()` to `cell2cell.tensor.tensor.BaseTensor`, which computes the fraction of 9 | values in the tensor that are real zeros. 10 | - Added method `missing_fraction()` to `cell2cell.tensor.tensor.BaseTensor`, which computes the fraction of 11 | values in the tensor that are missing or NaNs. 12 | 13 | ## Feature updates 14 | - `cellcell2.stats.permutation.compute_pvalue_from_dist()` ignores NaN values. 15 | 16 | ## Fixed Bugs 17 | - Fixed bug of `cell2cell.tensor.concatenate_interaction_tensors()` that did not allow 18 | concatenating tensors when using a tensorly backend different to numpy. 19 | - Fixed bug to deal with GPU tensors in `cell2cell.tensor.tensor.PreBuiltTensor` 20 | - Fixed bug about dimension labelling in `cell2cell.tensor.tensor.PreBuiltTensor` -------------------------------------------------------------------------------- /release/0.6.2-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.2 2 | 3 | ## New features 4 | - Added a parameter `output_fig` to ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` that 5 | allows deciding whether to generate the figure. If `output_fig=False`, the outputs of this function 6 | will be `(None, loss)`. 7 | - Created ```cell2cell.preprocessing.signal``` to include functions such as ```smooth_curve()``` 8 | to smooth a set of values representing a curve. 9 | - Implemented curve smoothing for the elbow analysis of Tensor-cell2cell. It can be accessed wit the parameter 10 | ```smooth=True``` in ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` or in any other InteractionTensor. 11 | - Implemented curve smoothing for the elbow plot designed for multiple runs. 12 | It can be accessed wit the parameter 13 | ```smooth=True``` in ```cell2cell.plotting.tensor_plot.plot_multiple_run_elbow()```. 14 | - Implemented ```cell2cell.tensor.metrics.pairwise_correlation_index()``` to compute the CorrIndex 15 | between all pairs of tensor decompositions in a list. 16 | - Implemented elbow analysis based on similarity of multiple runs. This can be control with the option 17 | ```metric='similarity'``` in ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` or in any other InteractionTensor. 18 | Use ```metric='error'``` for the normalized error used in previous versions. 19 | 20 | ## Feature updates 21 | - Modified the way to compute normalized error of tensor decomposition in 22 | ```cell2cell.tensor.factorization._compute_norm_error()``` 23 | - Added the option to directly pass a `ylabel` to the elbow plots, including: 24 | ```cell2cell.plotting.tensor_plot.plot_elbow()``` and ```cell2cell.plotting.tensor_plot.plot_multiple_run_elbow()``` 25 | - Extended input parameters of ```cell2cell.tensor.factorization._compute_elbow()``` 26 | 27 | 28 | ## Fixed Bugs 29 | - -------------------------------------------------------------------------------- /release/0.6.3-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.3 2 | 3 | ## New features 4 | - Created ```cell2cell.analysis.tensor_pipelines``` with functions to easily run 5 | Tensor-cell2cell. 6 | 7 | ## Feature updates 8 | - Deleted ```cell2cell.external.tensorly_nn_cp``` since it is not used anymore. 9 | Tensorly is directly used instead. 10 | - Renamed ```cell2cell.analysis.pipelines``` to ```cell2cell.analysis.cell2cell_pipelines``` 11 | - Added ```elbow_metric```, ```elbow_metric_mean``` and ```elbow_metric_raw``` attributes to ```cell2cell.tensor.tensor.BaseTensor``` 12 | for storing the curve generated from the elbow analysis. 13 | - Removed parameter ```loc_zeros``` from ```cell2cell.tensor.tensor.PreBuiltTensor``` 14 | 15 | ## Fixed Bugs 16 | - Converted factors to numpy in ```cell2cell.tensor.factorization._multiple_runs_elbow_analysis()``` 17 | when ```metric='similarity'``` to avoid errors when using GPU. 18 | - Fixed error obtained with functions ```sparsity_fraction()``` and ```missing_fraction()``` in a ```cell2cell.tensor.tensor.BaseTensor``` when 19 | tensorly backend is different to numpy and the device is nto a CPU. This error was fixed with 20 | making loc_nans and loc_zeros attributes of ```cell2cell.tensor.tensor.InteractionTensor``` and ```cell2cell.tensor.tensor.PreBuiltTensor``` 21 | to be now a tensorly.tensor object. -------------------------------------------------------------------------------- /release/0.6.4-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.4 2 | 3 | ## New features 4 | - Added a the ```cell2cell.io.read_data.load_tensor()``` function to directly load a previously 5 | exported ```interaction_tensor``` variable generated with Tensor-cell2cell. 6 | - Added a new dataset from a COVID-19 study. Available in ```cell2cell.datasets.anndata.balf_covid()```. 7 | - Added functions to create and explore directories in ```cell2cell.io.directories```. 8 | - Added ```cell2cell.io.read_data.load_tables_from_directory()``` to load all tables or dataframes with the same 9 | extension that are located in such directory. 10 | 11 | ## Feature updates 12 | - Modified ```sparsity_fraction()``` and ```missing_fraction()``` methods of ```cell2cell.tensor.tensor.BaseTensor``` to return 13 | the item in the tensorly tensor object. 14 | - Added progress bar to ```cell2cell.tensor.external_scores.dataframes_to_tensor()```. 15 | - Added the option to specify the ```backend``` when running ```cell2cell.analysis.tensor_pipelines.run_tensor_cell2cell_pipeline()```. 16 | 17 | ## Fixed Bugs 18 | - Implemented a way to manage duplicated instances of a LR comm score in ```cell2cell.tensor.external_scores.dataframes_to_tensor()```. 19 | It can be controled through the parameter ```dup_aggregation```. -------------------------------------------------------------------------------- /release/0.6.5-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.5 2 | 3 | ## New features 4 | - Implemented in-house GSEA using gseapy. Associated code is located in 5 | `cell2cell.datasets.gsea_data` and `cell2cell.external.gseapy`. 6 | - Implemented a function to obtain a dataframe of lr pairs by cell pairs from a tensor decomposition 7 | result, so it can be use to make a plot. It can compute a communication score that is by factor or 8 | across all factors. See function `cell2cell.analysis.tensor_downstream.get_lr_by_cell_pairs()`. 9 | 10 | ## Feature updates 11 | - Added the axis names to the dataframe generated with ```cell2cell.analysis.tensor_downstream.get_joint_loadings()```, 12 | which correspond to the `dim1` and `dim2` parameters. 13 | - Added the axis labels (`cm.ax_heatmap.set_xlabel()` & (`cm.ax_heatmap.set_ylabel()`) using the dataframe axis names 14 | passed to ```cell2cell.plotting.factor_plot.loading_clustermap()``` 15 | 16 | ## Fixed Bugs 17 | - -------------------------------------------------------------------------------- /release/0.6.6-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.6 2 | 3 | ## New features 4 | - Added new attributes and methods to `cell2cell.tensor.tensor.BaseTensor` and any other 5 | derived class, including `BaseTensor.shape`, `BaseTensor.write_file()`, `BaseTensor.to_device()`. 6 | These new features are respectively for: 7 | - Passing the shape of the tensor directly (instead of `BaseTensor.tensor.shape`) 8 | - Export or save a tensor object to a file. 9 | - Change the device for running Tensor-cell2cell (e.g. 'cpu', 'cuda', etc.) 10 | - 11 | ## Feature updates 12 | - Added **kwargs as parameter of `cell2cell.analysis.tensor_pipelines.run_tensor_cell2cell_pipeline()` 13 | to directly pass parameters to the functions running the elbow analysis and the tensor decomposition. 14 | - Sort factors numerically in `cell2cell.external.gseapy.run_gsea()`. 15 | 16 | ## Fixed Bugs 17 | - -------------------------------------------------------------------------------- /release/0.6.7-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.7 2 | 3 | ## New features 4 | - Direct access to `interaction_elements` attribute from `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions` 5 | and `cell2cell.analysis.cell2cell_pipelines.BulkInteractions` 6 | - Added option to store GMT file in output_folder in `cell2cell.external.gseapy` 7 | 8 | ## Feature updates 9 | - Removed tqdm for jupyter notebooks. 10 | - Updated tensorly version from 0.7.0 to 0.8.1 11 | 12 | ## Fixed Bugs 13 | - Modified permutations in `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions.permute_cell_labels()` -------------------------------------------------------------------------------- /release/0.6.8-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.6.8 2 | 3 | ## New features 4 | 5 | 6 | ## Feature updates 7 | 8 | 9 | ## Fixed Bugs 10 | - Fixed bug that was skipping first factor to generate outputs from `cell2cell.external.gseapy.run_gsea()` -------------------------------------------------------------------------------- /release/0.7.0-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.7.0 2 | 3 | ## New features 4 | - Added `cell2cell.spatial` functions for enabling analyses considering spatial organization in spatial data. 5 | These functions include: 6 | - Filtering by intercellular distances by thresholding values (`cell2cell.spatial.distances` and `cell2cell.spatial.filtering`). 7 | - Dividing the tissue in square grids (`cell2cell.spatial.neighborhoods.create_spatial_grid()`) 8 | - Dividing the tissue in moving windows (`cell2cell.spatial.neighborhoods.create_moving_windows()`, `cell2cell.spatial.neighborhoods.calculate_window_size()`, 9 | and `cell2cell.spatial.neighborhoods.add_moving_window_info_to_adatae()`) 10 | 11 | ## Feature updates 12 | 13 | 14 | ## Fixed Bugs 15 | - Fixed bug that made to incorrectly visualize multiple legends in plots as for example in `cell2cell.plotting.tensor_plot` 16 | when using newer matplotlib versions. -------------------------------------------------------------------------------- /release/0.7.1-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.7.1 2 | 3 | ## New features 4 | - Refer to [v0.7.0 notes](./0.7.0-notes.md) to see the new features. This is a quick fix of that version. 5 | 6 | ## Feature updates 7 | - Renamed `cell2cell.spatial.neighborhoods.create_moving_windows()` and 8 | and `cell2cell.spatial.neighborhoods.add_moving_window_info_to_adata()` into 9 | `cell2cell.spatial.neighborhoods.create_sliding_windows()` and 10 | and `cell2cell.spatial.neighborhoods.add_sliding_window_info_to_adata()` respectively. 11 | 12 | ## Fixed Bugs 13 | -------------------------------------------------------------------------------- /release/0.7.2-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.7.2 2 | 3 | ## New features 4 | - Refer to [v0.7.0 notes](./0.7.0-notes.md) & [v0.7.1 notes](./0.7.1-notes.md) to see the new features. This is a quick fix of that version. 5 | 6 | ## Feature updates 7 | 8 | ## Fixed Bugs 9 | - Updated export of factor loadings in Tensor-cell2cell for compatibility with newer verions of `pandas`. -------------------------------------------------------------------------------- /release/0.7.3-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.7.3 2 | 3 | ## New features 4 | - Refer to [v0.7.2 notes](./0.7.2-notes.md) to see the previous new features. 5 | - Added example of using Spatial Data with Tensor-cell2cell. 6 | 7 | ## Feature updates 8 | - Updated single-cell data example with cell2cell to use COVID-19 data. 9 | - Updated bulk data example with cell2cell. 10 | - Updated `docs` folder for readthedocs.org. 11 | - Updated README.md 12 | 13 | ## Fixed Bugs 14 | - Fixed legend visualization in `cell2cell.plotting.pcoa_plot.pcoa_3dplot()` 15 | - Fixed negative P-values in `cell2cell.stats.permutation.compute_pvalue_from_dist()` 16 | - Fixed permutation analysis in `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions` 17 | - Fixed legend visualization in `cell2cell.plotting.circular_plot.circos_plot()` -------------------------------------------------------------------------------- /release/0.7.4-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes - cell2cell v0.7.4 2 | 3 | ## New features 4 | - Refer to [v0.7.3 notes](./0.7.3-notes.md) to see the previous new features. 5 | 6 | ## Feature updates 7 | - Updated assert warnings for Tensor-cell2cell 8 | 9 | ## Fixed Bugs 10 | - Fixed set indexing that was deprecated in new pandas versions (in `cell2cell.preprocessing.rnaseq.add_complexes_to_expression()`) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Copyright (c) 2019--, Cell2cell development team. 5 | # 6 | # Distributed under the terms of the BSD 3-Clause License. 7 | # 8 | # The full license is in the file LICENSE, distributed with this software. 9 | # ---------------------------------------------------------------------------- 10 | 11 | from setuptools.command.egg_info import egg_info 12 | from setuptools.command.develop import develop 13 | from setuptools.command.install import install 14 | import re 15 | import ast 16 | import os 17 | from setuptools import find_packages, setup 18 | 19 | # Dealing with Cython 20 | USE_CYTHON = os.environ.get('USE_CYTHON', False) 21 | ext = '.pyx' if USE_CYTHON else '.c' 22 | 23 | 24 | def custom_command(): 25 | import sys 26 | if sys.platform in ['darwin', 'linux']: 27 | os.system('pip install numpy') 28 | 29 | class CustomInstallCommand(install): 30 | def run(self): 31 | install.run(self) 32 | custom_command() 33 | 34 | class CustomDevelopCommand(develop): 35 | def run(self): 36 | develop.run(self) 37 | custom_command() 38 | 39 | class CustomEggInfoCommand(egg_info): 40 | def run(self): 41 | egg_info.run(self) 42 | custom_command() 43 | 44 | 45 | extensions = [ 46 | ] 47 | 48 | if USE_CYTHON: 49 | from Cython.Build import cythonize 50 | extensions = cythonize(extensions) 51 | 52 | classes = """ 53 | Development Status :: 2 - Pre-Alpha 54 | License :: OSI Approved :: BSD License 55 | Topic :: Software Development :: Libraries 56 | Topic :: Scientific/Engineering 57 | Topic :: Scientific/Engineering :: Bio-Informatics 58 | Programming Language :: Python :: 3 59 | Programming Language :: Python :: 3 :: Only 60 | Operating System :: Unix 61 | Operating System :: POSIX 62 | Operating System :: MacOS :: MacOS X 63 | """ 64 | classifiers = [s.strip() for s in classes.split('\n') if s] 65 | 66 | description = ('TBD') 67 | 68 | with open('README.md') as f: 69 | long_description = f.read() 70 | 71 | _version_re = re.compile(r'__version__\s+=\s+(.*)') 72 | 73 | with open('cell2cell/__init__.py', 'rb') as f: 74 | hit = _version_re.search(f.read().decode('utf-8')).group(1) 75 | version = str(ast.literal_eval(hit)) 76 | 77 | setup(name='cell2cell', 78 | version=version, 79 | license='BSD-3-Clause', 80 | description=description, 81 | long_description_content_type="text/markdown", 82 | long_description=long_description, 83 | author="cell2cell development team", 84 | author_email="earmingo@ucsd.edu", 85 | maintainer="cell2cell development team", 86 | maintainer_email="earmingol@eng.ucsd.edu", 87 | packages=find_packages(), 88 | ext_modules=extensions, 89 | install_requires=['numpy >= 1.16', 90 | 'pandas >= 1.0.0', 91 | 'xlrd >= 1.1', 92 | 'openpyxl >= 2.6.2', 93 | 'networkx >= 2.3', 94 | 'matplotlib >= 3.2.0', 95 | 'seaborn >= 0.11.0', 96 | 'scikit-learn', 97 | 'umap-learn', 98 | 'tqdm', 99 | 'statsmodels', 100 | 'statannotations', 101 | 'tensorly == 0.8.1', 102 | 'kneed', 103 | 'scanpy', 104 | 'gseapy == 1.0.3' 105 | ], 106 | classifiers=classifiers, 107 | entry_points={}, 108 | package_data={}, 109 | cmdclass={'install': CustomInstallCommand, 110 | 'develop': CustomDevelopCommand, 111 | 'egg_info': CustomEggInfoCommand, }, 112 | zip_safe=False) --------------------------------------------------------------------------------