├── .github
└── workflows
│ └── python-publish.yml
├── .readthedocs.yml
├── LICENSE.txt
├── Logo.png
├── LogoTensor.png
├── README.md
├── benchmarks
├── __init__.py
└── benchmarks.py
├── cell2cell
├── __init__.py
├── analysis
│ ├── __init__.py
│ ├── cell2cell_pipelines.py
│ ├── tensor_downstream.py
│ └── tensor_pipelines.py
├── clustering
│ ├── __init__.py
│ └── cluster_interactions.py
├── core
│ ├── __init__.py
│ ├── cci_scores.py
│ ├── cell.py
│ ├── communication_scores.py
│ └── interaction_space.py
├── datasets
│ ├── __init__.py
│ ├── anndata.py
│ ├── gsea_data.py
│ ├── heuristic_data.py
│ ├── random_data.py
│ └── toy_data.py
├── external
│ ├── __init__.py
│ ├── goenrich.py
│ ├── gseapy.py
│ ├── pcoa.py
│ ├── pcoa_utils.py
│ └── umap.py
├── io
│ ├── __init__.py
│ ├── directories.py
│ ├── read_data.py
│ └── save_data.py
├── plotting
│ ├── __init__.py
│ ├── aesthetics.py
│ ├── ccc_plot.py
│ ├── cci_plot.py
│ ├── circular_plot.py
│ ├── factor_plot.py
│ ├── pcoa_plot.py
│ ├── pval_plot.py
│ ├── tensor_plot.py
│ └── umap_plot.py
├── preprocessing
│ ├── __init__.py
│ ├── cutoffs.py
│ ├── find_elements.py
│ ├── gene_ontology.py
│ ├── integrate_data.py
│ ├── manipulate_dataframes.py
│ ├── ppi.py
│ ├── rnaseq.py
│ └── signal.py
├── spatial
│ ├── __init__.py
│ ├── distances.py
│ ├── filtering.py
│ └── neighborhoods.py
├── stats
│ ├── __init__.py
│ ├── enrichment.py
│ ├── gini.py
│ ├── multitest.py
│ └── permutation.py
├── tensor
│ ├── __init__.py
│ ├── external_scores.py
│ ├── factor_manipulation.py
│ ├── factorization.py
│ ├── metrics.py
│ ├── subset.py
│ ├── tensor.py
│ └── tensor_manipulation.py
└── utils
│ ├── __init__.py
│ ├── networks.py
│ └── parallel_computing.py
├── docs
├── documentation.md
├── index.md
├── requirements.in
├── requirements.txt
└── tutorials
│ ├── ASD
│ ├── 01-Tensor-Factorization-ASD.ipynb
│ ├── 02-Factor-Specific-ASD.ipynb
│ ├── 03-GSEA-ASD.ipynb
│ ├── KEGG.gmt
│ ├── figures
│ │ ├── 4d-tensor.png
│ │ ├── tensor-approx.png
│ │ ├── tensor-factorization.png
│ │ └── tf.png
│ └── results
│ │ └── Loadings.xlsx
│ ├── GPU-Example.ipynb
│ ├── Tensor-cell2cell-Spatial.ipynb
│ ├── Toy-Example-BulkPipeline.ipynb
│ └── Toy-Example-SingleCellPipeline.ipynb
├── examples
├── cell2cell
│ ├── Human-2020-Jin-LR-pairs.csv
│ ├── Toy-Example-BulkPipeline.ipynb
│ ├── Toy-Example-SingleCellPipeline.ipynb
│ └── Toy-Example.ipynb
└── tensor_cell2cell
│ ├── GPU-Example.ipynb
│ ├── Loading-PreBuiltTensor.ipynb
│ ├── PreBuiltMetadata-PBMC.pkl
│ ├── PreBuiltTensor-PBMC.pkl
│ ├── Tensor-cell2cell-PBMC.ipynb
│ └── Tensor-cell2cell-Spatial.ipynb
├── mkdocs.yml
├── release
├── 0.5.10-notes.md
├── 0.5.11-notes.md
├── 0.5.4-notes.md
├── 0.5.5-notes.md
├── 0.5.6-notes.md
├── 0.5.7-notes.md
├── 0.5.8-notes.md
├── 0.5.9-notes.md
├── 0.6.0-notes.md
├── 0.6.1-notes.md
├── 0.6.2-notes.md
├── 0.6.3-notes.md
├── 0.6.4-notes.md
├── 0.6.5-notes.md
├── 0.6.6-notes.md
├── 0.6.7-notes.md
├── 0.6.8-notes.md
├── 0.7.0-notes.md
├── 0.7.1-notes.md
├── 0.7.2-notes.md
├── 0.7.3-notes.md
└── 0.7.4-notes.md
└── setup.py
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
3 |
4 | # This workflow uses actions that are not certified by GitHub.
5 | # They are provided by a third-party and are governed by
6 | # separate terms of service, privacy policy, and support
7 | # documentation.
8 |
9 | name: Upload Python Package
10 |
11 | on:
12 | release:
13 | types: [published]
14 |
15 | permissions:
16 | contents: read
17 |
18 | jobs:
19 | deploy:
20 |
21 | runs-on: ubuntu-latest
22 |
23 | steps:
24 | - uses: actions/checkout@v3
25 | - name: Set up Python
26 | uses: actions/setup-python@v3
27 | with:
28 | python-version: '3.x'
29 | - name: Install dependencies
30 | run: |
31 | python -m pip install --upgrade pip
32 | pip install build
33 | - name: Build package
34 | run: python -m build
35 | - name: Publish package
36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 | with:
38 | user: __token__
39 | password: ${{ secrets.PYPI_CELL2CELL_TOKEN }}
40 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the version of Python and other tools you might need
9 | build:
10 | os: ubuntu-22.04
11 | tools:
12 | python: "3.7"
13 |
14 | mkdocs:
15 | configuration: mkdocs.yml
16 |
17 | # Optionally declare the Python requirements required to build your docs
18 | python:
19 | install:
20 | - requirements: docs/requirements.txt
21 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, Erick Armingol
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/Logo.png
--------------------------------------------------------------------------------
/LogoTensor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/LogoTensor.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Inferring cell-cell interactions from transcriptomes with *cell2cell*
2 | [![PyPI Version][pb]][pypi]
3 | [](https://cell2cell.readthedocs.io/en/latest/?badge=latest)
4 | [](https://pepy.tech/project/cell2cell)
5 |
6 |
7 | [pb]: https://badge.fury.io/py/cell2cell.svg
8 | [pypi]: https://pypi.org/project/cell2cell/
9 |
10 | ## :book: Getting started
11 | For tutorials and documentation, visit [**cell2cell ReadTheDocs**](https://cell2cell.readthedocs.org/) or our [**cell2cell website**](https://earmingol.github.io/cell2cell).
12 |
13 |
14 |
15 | ## :wrench: Installation
16 |
17 |
18 | Step 1: Install Anaconda :snake:
19 |
20 | First, [install Anaconda following this tutorial](https://docs.anaconda.com/anaconda/install/)
21 |
22 |
23 |
24 | Step 2: Create and Activate a New Conda Environment :computer:
25 |
26 | ```
27 | # Create a new conda environment
28 | conda create -n cell2cell -y python=3.7 jupyter
29 |
30 | # Activate the environment
31 | conda activate cell2cell
32 | ```
33 |
34 |
35 | Step 3: Install cell2cell :arrow_down:
36 |
37 | ```
38 | pip install cell2cell
39 | ```
40 |
41 |
42 | ## :bulb: Examples
43 |
44 | | cell2cell Examples | Tensor-cell2cell Examples |
45 | | --- | --- |
46 | |  |  |
47 | | - [Step-by-step Pipeline](https://github.com/earmingol/cell2cell/blob/master/examples/cell2cell/Toy-Example.ipynb)
- [Interaction Pipeline for Bulk Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-BulkPipeline)
- [Interaction Pipeline for Single-Cell Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-SingleCellPipeline)
- [Whole Body of *C. elegans*](https://github.com/LewisLabUCSD/Celegans-cell2cell) | - [Obtaining patterns of cell-cell communication](https://earmingol.github.io/cell2cell/tutorials/ASD/01-Tensor-Factorization-ASD/)
- [Downstream 1: Factor-specific analyses](https://earmingol.github.io/cell2cell/tutorials/ASD/02-Factor-Specific-ASD/)
- [Downstream 2: Patterns to functions (GSEA)](https://earmingol.github.io/cell2cell/tutorials/ASD/03-GSEA-ASD/)
- [Tensor-cell2cell in Google Colab (**GPU**)](https://colab.research.google.com/drive/1T6MUoxafTHYhjvenDbEtQoveIlHT2U6_?usp=sharing)
- [Communication patterns in **Spatial Transcriptomics**](https://earmingol.github.io/cell2cell/tutorials/Tensor-cell2cell-Spatial/) |
48 |
49 | Reproducible runs of the analyses in the [Tensor-cell2cell paper](https://doi.org/10.1038/s41467-022-31369-2) are available at [CodeOcean.com](https://doi.org/10.24433/CO.0051950.v2)
50 |
51 | ## :link: LIANA & Tensor-cell2cell
52 |
53 | Explore our tutorials for using Tensor-cell2cell with [LIANA](https://github.com/saezlab/liana-py) at [ccc-protocols.readthedocs.io](https://ccc-protocols.readthedocs.io/).
54 |
55 | ## :question: Common Issues
56 |
57 | - **Memory Errors with Tensor-cell2cell:** If you encounter memory errors when performing tensor factorizations, try replacing `init='svd'` with `init='random'`.
58 |
59 | ## :dna: Ligand-Receptor Pairs
60 | Find a curated list of ligand-receptor pairs for your analyses at our [GitHub Repository](https://github.com/LewisLabUCSD/Ligand-Receptor-Pairs).
61 |
62 | ## :bookmark_tabs: Citation
63 |
64 | Please cite our work using the following references:
65 |
66 | - **cell2cell**: [Inferring a spatial code of cell-cell interactions across a whole animal body](https://doi.org/10.1371/journal.pcbi.1010715).
67 | *PLOS Computational Biology, 2022*
68 |
69 | - **Tensor-cell2cell**: [Context-aware deconvolution of cell-cell communication with Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2).
70 | *Nature Communications, 2022.*
71 |
72 | - **LIANA & Tensor-cell2cell tutorials**: [Combining LIANA and Tensor-cell2cell to decipher cell-cell communication across multiple samples](https://doi.org/10.1016/j.crmeth.2024.100758).
73 | *Cell Reports Methods, 2024*
74 |
--------------------------------------------------------------------------------
/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | from benchmarks.benchmarks import (timeit)
6 |
--------------------------------------------------------------------------------
/benchmarks/benchmarks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import time
6 |
7 |
8 | def timeit(func, *args, **kwargs):
9 | '''
10 | This function measures the running time of a given function.
11 | Borrowed from George Armstrong's Github repo (https://github.com/gwarmstrong).
12 | '''
13 | t0 = time.time()
14 | output = func(*args, **kwargs)
15 | t1 = time.time()
16 | tot_time = t1-t0
17 | data = {'time': tot_time, 'results': output}
18 | return data
--------------------------------------------------------------------------------
/cell2cell/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from cell2cell import analysis
4 | from cell2cell import clustering
5 | from cell2cell import core
6 | from cell2cell import datasets
7 | from cell2cell import external
8 | from cell2cell import io
9 | from cell2cell import plotting
10 | from cell2cell import preprocessing
11 | from cell2cell import spatial
12 | from cell2cell import stats
13 | from cell2cell import tensor
14 | from cell2cell import utils
15 |
16 | __version__ = "0.7.4"
--------------------------------------------------------------------------------
/cell2cell/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.analysis.cell2cell_pipelines import (initialize_interaction_space, BulkInteractions, SingleCellInteractions)
2 | from cell2cell.analysis.tensor_pipelines import (run_tensor_cell2cell_pipeline)
3 | import cell2cell.analysis.tensor_downstream as tensor_downstream
4 |
5 |
--------------------------------------------------------------------------------
/cell2cell/analysis/tensor_pipelines.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import tensorly as tl
6 |
7 | from cell2cell.plotting.tensor_plot import tensor_factors_plot
8 |
9 |
10 | def run_tensor_cell2cell_pipeline(interaction_tensor, tensor_metadata, copy_tensor=False, rank=None,
11 | tf_optimization='regular', random_state=None, backend=None, device=None,
12 | elbow_metric='error', smooth_elbow=False, upper_rank=25, tf_init='random',
13 | tf_svd='numpy_svd', cmaps=None, sample_col='Element', group_col='Category',
14 | fig_fontsize=14, output_folder=None, output_fig=True, fig_format='pdf', **kwargs):
15 | '''
16 | Runs basic pipeline of Tensor-cell2cell (excluding downstream analyses).
17 |
18 | Parameters
19 | ----------
20 | interaction_tensor : cell2cell.tensor.BaseTensor
21 | A communication tensor generated with any of the tensor class in
22 | cell2cell.tensor.
23 |
24 | tensor_metadata : list
25 | List of pandas dataframes with metadata information for elements of each
26 | dimension in the tensor. A column called as the variable `sample_col` contains
27 | the name of each element in the tensor while another column called as the
28 | variable `group_col` contains the metadata or grouping information of each
29 | element.
30 |
31 | copy_tensor : boolean, default=False
32 | Whether generating a copy of the original tensor to avoid modifying it.
33 |
34 | rank : int, default=None
35 | Rank of the Tensor Factorization (number of factors to deconvolve the original
36 | tensor). If None, it will automatically inferred from an elbow analysis.
37 |
38 | tf_optimization : str, default='regular'
39 | It defines whether performing an optimization with higher number of iterations,
40 | independent factorization runs, and higher resolution (lower tolerance),
41 | or with lower number of iterations, factorization runs, and resolution.
42 | Options are:
43 |
44 | - 'regular' : It uses 100 max iterations, 1 factorization run, and 10e-7 tolerance.
45 | Faster to run.
46 | - 'robust' : It uses 500 max iterations, 100 factorization runs, and 10e-8 tolerance.
47 | Slower to run.
48 |
49 | random_state : boolean, default=None
50 | Seed for randomization.
51 |
52 | backend : str, default=None
53 | Backend that TensorLy will use to perform calculations
54 | on this tensor. When None, the default backend used is
55 | the currently active backend, usually is ('numpy'). Options are:
56 | {'cupy', 'jax', 'mxnet', 'numpy', 'pytorch', 'tensorflow'}
57 |
58 | device : str, default=None
59 | Device to use when backend allows multiple devices. Options are:
60 | {'cpu', 'cuda:0', None}
61 |
62 | elbow_metric : str, default='error'
63 | Metric to perform the elbow analysis (y-axis).
64 |
65 | - 'error' : Normalized error to compute the elbow.
66 | - 'similarity' : Similarity based on CorrIndex (1-CorrIndex).
67 |
68 | smooth_elbow : boolean, default=False
69 | Whether smoothing the elbow-analysis curve with a Savitzky-Golay filter.
70 |
71 | upper_rank : int, default=25
72 | Upper bound of ranks to explore with the elbow analysis.
73 |
74 | tf_init : str, default='random'
75 | Initialization method for computing the Tensor Factorization.
76 | {‘svd’, ‘random’}
77 |
78 | tf_svd : str, default='numpy_svd'
79 | Function to compute the SVD for initializing the Tensor Factorization,
80 | acceptable values in tensorly.SVD_FUNS
81 |
82 | cmaps : list, default=None
83 | A list of colormaps used for coloring elements in each dimension. The length
84 | of this list is equal to the number of dimensions of the tensor. If None, all
85 | dimensions will be colores with the colormap 'gist_rainbow'.
86 |
87 | sample_col : str, default='Element'
88 | Name of the column containing the element names in the metadata.
89 |
90 | group_col : str, default='Category'
91 | Name of the column containing the metadata or grouping information for each
92 | element in the metadata.
93 |
94 | fig_fontsize : int, default=14
95 | Font size of the tick labels. Axis labels will be 1.2 times the fontsize.
96 |
97 | output_folder : str, default=None
98 | Path to the folder where the figures generated will be saved.
99 | If None, figures will not be saved.
100 |
101 | output_fig : boolean, default=True
102 | Whether generating the figures with matplotlib.
103 |
104 | fig_format : str, default='pdf'
105 | Format to store figures when an `output_folder` is specified
106 | and `output_fig` is True. Otherwise, this is not necessary.
107 |
108 | **kwargs : dict
109 | Extra arguments for the tensor factorization according to inputs in
110 | tensorly.
111 |
112 | Returns
113 | -------
114 | interaction_tensor : cell2cell.tensor.tensor.BaseTensor
115 | Either the original input `interaction_tensor` or a copy of it.
116 | This also stores the results from running the Tensor-cell2cell
117 | pipeline in the corresponding attributes.
118 | '''
119 | if copy_tensor:
120 | interaction_tensor = interaction_tensor.copy()
121 |
122 | dim = len(interaction_tensor.tensor.shape)
123 |
124 | ### OUTPUT FILENAMES ###
125 | if output_folder is None:
126 | elbow_filename = None
127 | tf_filename = None
128 | loading_filename = None
129 | else:
130 | elbow_filename = output_folder + '/Elbow.{}'.format(fig_format)
131 | tf_filename = output_folder + '/Tensor-Factorization.{}'.format(fig_format)
132 | loading_filename = output_folder + '/Loadings.xlsx'
133 |
134 | ### PALETTE COLORS FOR ELEMENTS IN TENSOR DIMS ###
135 | if cmaps is None:
136 | cmap_5d = ['tab10', 'viridis', 'Dark2_r', 'tab20', 'tab20']
137 | cmap_4d = ['plasma', 'Dark2_r', 'tab20', 'tab20']
138 |
139 | if dim == 5:
140 | cmaps = cmap_5d
141 | elif dim <= 4:
142 | cmaps = cmap_4d[-dim:]
143 | else:
144 | raise ValueError('Tensor of dimension higher to 5 is not supported')
145 |
146 | assert len(cmaps) == dim, "`cmap` must be of the same len of dimensions in the tensor."
147 |
148 | ### FACTORIZATION PARAMETERS ###
149 | if tf_optimization == 'robust':
150 | elbow_runs = 20
151 | tf_runs = 100
152 | tol = 1e-8
153 | n_iter_max = 500
154 | elif tf_optimization == 'regular':
155 | elbow_runs = 10
156 | tf_runs = 1
157 | tol = 1e-7
158 | n_iter_max = 100
159 | else:
160 | raise ValueError("`factorization_type` must be either 'robust' or 'regular'.")
161 |
162 | if backend is not None:
163 | tl.set_backend(backend)
164 |
165 | if device is not None:
166 | interaction_tensor.to_device(device=device)
167 |
168 | ### ANALYSIS ###
169 | # Elbow
170 | if rank is None:
171 | print('Running Elbow Analysis')
172 | fig1, error = interaction_tensor.elbow_rank_selection(upper_rank=upper_rank,
173 | runs=elbow_runs,
174 | init=tf_init,
175 | svd=tf_svd,
176 | automatic_elbow=True,
177 | metric=elbow_metric,
178 | output_fig=output_fig,
179 | smooth=smooth_elbow,
180 | random_state=random_state,
181 | fontsize=fig_fontsize,
182 | filename=elbow_filename,
183 | tol=tol, n_iter_max=n_iter_max,
184 | **kwargs
185 | )
186 |
187 | rank = interaction_tensor.rank
188 |
189 | # Factorization
190 | print('Running Tensor Factorization')
191 | interaction_tensor.compute_tensor_factorization(rank=rank,
192 | init=tf_init,
193 | svd=tf_svd,
194 | random_state=random_state,
195 | runs=tf_runs,
196 | normalize_loadings=True,
197 | tol=tol, n_iter_max=n_iter_max,
198 | **kwargs
199 | )
200 |
201 | ### EXPORT RESULTS ###
202 | if output_folder is not None:
203 | print('Generating Outputs')
204 | interaction_tensor.export_factor_loadings(loading_filename)
205 |
206 | if output_fig:
207 | fig2, axes = tensor_factors_plot(interaction_tensor=interaction_tensor,
208 | metadata=tensor_metadata,
209 | sample_col=sample_col,
210 | group_col=group_col,
211 | meta_cmaps=cmaps,
212 | fontsize=fig_fontsize,
213 | filename=tf_filename
214 | )
215 |
216 | return interaction_tensor
--------------------------------------------------------------------------------
/cell2cell/clustering/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.clustering.cluster_interactions import (compute_distance, compute_linkage, get_clusters_from_linkage)
2 |
--------------------------------------------------------------------------------
/cell2cell/clustering/cluster_interactions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import scipy.cluster.hierarchy as hc
8 | import scipy.spatial as sp
9 |
10 |
11 | # Distance-based algorithms
12 | def compute_distance(data_matrix, axis=0, metric='euclidean'):
13 | '''Computes the pairwise distance between elements in a
14 | matrix of shape m x n. Uses the function
15 | scipy.spatial.distance.pdist
16 |
17 | Parameters
18 | ----------
19 | data_matrix : pandas.DataFrame or ndarray
20 | A m x n matrix used to compute the distances
21 |
22 | axis : int, default=0
23 | To decide on which elements to compute the distance.
24 | If axis=0, the distances will be between elements in
25 | the rows, while axis=1 will lead to distances between
26 | elements in the columns.
27 |
28 | metric : str, default='euclidean'
29 | The distance metric to use. The distance function can be 'braycurtis',
30 | 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice',
31 | 'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'kulsinski',
32 | 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
33 | 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
34 |
35 | Returns
36 | -------
37 | D : ndarray
38 | Returns a condensed distance matrix Y. For each i and j (where i < j < m),
39 | where m is the number of original observations. The metric
40 | dist(u=X[i], v=X[j]) is computed and stored in entry
41 | m * i + j - ((i + 2) * (i + 1)) // 2.
42 | '''
43 | if (type(data_matrix) is pd.core.frame.DataFrame):
44 | data = data_matrix.values
45 | else:
46 | data = data_matrix
47 | if axis == 0:
48 | D = sp.distance.squareform(sp.distance.pdist(data, metric=metric))
49 | elif axis == 1:
50 | D = sp.distance.squareform(sp.distance.pdist(data.T, metric=metric))
51 | else:
52 | raise ValueError('Not valid axis. Use 0 or 1.')
53 | return D
54 |
55 |
56 | def compute_linkage(distance_matrix, method='ward', optimal_ordering=True):
57 | '''
58 | Returns a linkage for a given distance matrix using a specific method.
59 |
60 | Parameters
61 | ----------
62 | distance_matrix : numpy.ndarray
63 | A square array containing the distance between a given row and a
64 | given column. Diagonal elements must be zero.
65 |
66 | method : str, 'ward' by default
67 | Method to compute the linkage. It could be:
68 |
69 | - 'single'
70 | - 'complete'
71 | - 'average'
72 | - 'weighted'
73 | - 'centroid'
74 | - 'median'
75 | - 'ward'
76 | For more details, go to:
77 | https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.cluster.hierarchy.linkage.html
78 |
79 | optimal_ordering : boolean, default=True
80 | Whether sorting the leaf of the dendrograms to have a minimal distance
81 | between successive leaves. For more information, see
82 | scipy.cluster.hierarchy.optimal_leaf_ordering
83 |
84 | Returns
85 | -------
86 | Z : numpy.ndarray
87 | The hierarchical clustering encoded as a linkage matrix.
88 | '''
89 | if (type(distance_matrix) is pd.core.frame.DataFrame):
90 | data = distance_matrix.values
91 | else:
92 | data = distance_matrix.copy()
93 | if ~(data.transpose() == data).all():
94 | raise ValueError('The matrix is not symmetric')
95 |
96 | np.fill_diagonal(data, 0.0)
97 |
98 | # Compute linkage
99 | D = sp.distance.squareform(data)
100 | Z = hc.linkage(D, method=method, optimal_ordering=optimal_ordering)
101 | return Z
102 |
103 |
104 | def get_clusters_from_linkage(linkage, threshold, criterion='maxclust', labels=None):
105 | '''
106 | Gets clusters from a linkage given a threshold and a criterion.
107 |
108 | Parameters
109 | ----------
110 | linkage : numpy.ndarray
111 | The hierarchical clustering encoded with the matrix returned by
112 | the linkage function (Z).
113 |
114 | threshold : float
115 | The threshold to apply when forming flat clusters.
116 |
117 | criterion : str, 'maxclust' by default
118 | The criterion to use in forming flat clusters. Depending on the
119 | criterion, the threshold has different meanings. More information on:
120 | https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.cluster.hierarchy.fcluster.html
121 |
122 | labels : array-like, None by default
123 | List of labels of the elements contained in the linkage. The order
124 | must match the order they were provided when generating the linkage.
125 |
126 | Returns
127 | -------
128 | clusters : dict
129 | A dictionary containing the clusters obtained. The keys correspond to
130 | the cluster numbers and the vaues to a list with element names given the
131 | labels, or the element index based on the linkage.
132 | '''
133 |
134 | cluster_ids = hc.fcluster(linkage, threshold, criterion=criterion)
135 | clusters = dict()
136 | for c in np.unique(cluster_ids):
137 | clusters[c] = []
138 |
139 | for i, c in enumerate(cluster_ids):
140 | if labels is not None:
141 | clusters[c].append(labels[i])
142 | else:
143 | clusters[c].append(i)
144 | return clusters
--------------------------------------------------------------------------------
/cell2cell/core/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_icellnet_score,
4 | compute_jaccard_like_cci_score, matmul_bray_curtis_like, matmul_count_active,
5 | matmul_jaccard_like)
6 | from cell2cell.core.cell import (Cell, get_cells_from_rnaseq)
7 | from cell2cell.core.communication_scores import (get_binary_scores, get_continuous_scores, compute_ccc_matrix, aggregate_ccc_matrices)
8 | from cell2cell.core.interaction_space import (generate_interaction_elements, InteractionSpace)
--------------------------------------------------------------------------------
/cell2cell/core/cell.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import pandas as pd
6 |
7 | class Cell:
8 | '''Specific cell-type/tissue/organ element in a RNAseq dataset.
9 |
10 | Parameters
11 | ----------
12 | sc_rnaseq_data : pandas.DataFrame
13 | A gene expression matrix. Contains only one column that
14 | corresponds to cell-type/tissue/sample, while the genes
15 | are rows and the specific. Column name will be the label
16 | of the instance.
17 |
18 | verbose : boolean, default=True
19 | Whether printing or not steps of the analysis.
20 |
21 | Attributes
22 | ----------
23 | id : int
24 | ID number of the instance generated.
25 |
26 | type : str
27 | Name of the respective cell-type/tissue/sample.
28 |
29 | rnaseq_data : pandas.DataFrame
30 | Copy of sc_rnaseq_data.
31 |
32 | weighted_ppi : pandas.DataFrame
33 | Dataframe created from a list of protein-protein interactions,
34 | here the columns of the interacting proteins are replaced by
35 | a score or a preprocessed gene expression of the respective
36 | proteins.
37 | '''
38 | _id_counter = 0 # Number of active instances
39 | _id = 0 # Unique ID
40 |
41 | def __init__(self, sc_rnaseq_data, verbose=True):
42 | self.id = Cell._id
43 | Cell._id_counter += 1
44 | Cell._id += 1
45 |
46 | self.type = str(sc_rnaseq_data.columns[-1])
47 |
48 | # RNAseq datasets
49 | self.rnaseq_data = sc_rnaseq_data.copy()
50 | self.rnaseq_data.columns = ['value']
51 |
52 | # Binary ppi datasets
53 | self.weighted_ppi = pd.DataFrame(columns=['A', 'B', 'score'])
54 |
55 | # Object created
56 | if verbose:
57 | print("New cell instance created for " + self.type)
58 |
59 | def __del__(self):
60 | Cell._id_counter -= 1
61 |
62 | def __str__(self):
63 | return str(self.type)
64 |
65 | __repr__ = __str__
66 |
67 |
68 | def get_cells_from_rnaseq(rnaseq_data, cell_columns=None, verbose=True):
69 | '''
70 | Creates new instances of Cell based on the RNAseq data of each
71 | cell-type/tissue/sample in a gene expression matrix.
72 |
73 | Parameters
74 | ----------
75 | rnaseq_data : pandas.DataFrame
76 | Gene expression data for a RNA-seq experiment. Columns are
77 | cell-types/tissues/samples and rows are genes.
78 |
79 | cell_columns : array-like, default=None
80 | List of names of cell-types/tissues/samples in the dataset
81 | to be used. If None, all columns will be used.
82 |
83 | verbose : boolean, default=True
84 | Whether printing or not steps of the analysis.
85 |
86 | Returns
87 | -------
88 | cells : dict
89 | Dictionary containing all Cell instances generated from a RNAseq dataset.
90 | The keys of this dictionary are the names of the corresponding Cell instances.
91 | '''
92 | if verbose:
93 | print("Generating objects according to RNAseq datasets provided")
94 | cells = dict()
95 | if cell_columns is None:
96 | cell_columns = rnaseq_data.columns
97 |
98 | for cell in cell_columns:
99 | cells[cell] = Cell(rnaseq_data[[cell]], verbose=verbose)
100 | return cells
101 |
--------------------------------------------------------------------------------
/cell2cell/core/communication_scores.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | from scipy.stats.mstats import gmean
7 |
8 |
9 | def get_binary_scores(cell1, cell2, ppi_score=None):
10 | '''Computes binary communication scores for all
11 | protein-protein interactions between a pair of
12 | cell-types/tissues/samples. This corresponds to
13 | an AND function between binary values for each
14 | interacting protein coming from each cell.
15 |
16 | Parameters
17 | ----------
18 | cell1 : cell2cell.core.cell.Cell
19 | First cell-type/tissue/sample to compute the communication
20 | score. In a directed interaction, this is the sender.
21 |
22 | cell2 : cell2cell.core.cell.Cell
23 | Second cell-type/tissue/sample to compute the communication
24 | score. In a directed interaction, this is the receiver.
25 |
26 | ppi_score : array-like, default=None
27 | An array with a weight for each PPI. The weight
28 | multiplies the communication scores.
29 |
30 | Returns
31 | -------
32 | communication_scores : numpy.array
33 | An array with the communication scores for each intercellular
34 | PPI.
35 | '''
36 | c1 = cell1.weighted_ppi['A'].values
37 | c2 = cell2.weighted_ppi['B'].values
38 |
39 | if (len(c1) == 0) or (len(c2) == 0):
40 | return 0.0
41 |
42 | if ppi_score is None:
43 | ppi_score = np.array([1.0] * len(c1))
44 |
45 | communication_scores = c1 * c2 * ppi_score
46 | return communication_scores
47 |
48 |
49 | def get_continuous_scores(cell1, cell2, ppi_score=None, method='expression_product'):
50 | '''Computes continuous communication scores for all
51 | protein-protein interactions between a pair of
52 | cell-types/tissues/samples. This corresponds to
53 | a specific scoring function between preprocessed continuous
54 | expression values for each interacting protein coming from
55 | each cell.
56 |
57 | Parameters
58 | ----------
59 | cell1 : cell2cell.core.cell.Cell
60 | First cell-type/tissue/sample to compute the communication
61 | score. In a directed interaction, this is the sender.
62 |
63 | cell2 : cell2cell.core.cell.Cell
64 | Second cell-type/tissue/sample to compute the communication
65 | score. In a directed interaction, this is the receiver.
66 |
67 | ppi_score : array-like, default=None
68 | An array with a weight for each PPI. The weight
69 | multiplies the communication scores.
70 |
71 | method : str, default='expression_product'
72 | Scoring function for computing the communication score.
73 | Options are:
74 | - 'expression_product' : Multiplication between the expression
75 | of the interacting proteins. One coming from cell1 and the
76 | other from cell2.
77 | - 'expression_mean' : Average between the expression
78 | of the interacting proteins. One coming from cell1 and the
79 | other from cell2.
80 | - 'expression_gmean' : Geometric mean between the expression
81 | of the interacting proteins. One coming from cell1 and the
82 | other from cell2.
83 |
84 | Returns
85 | -------
86 | communication_scores : numpy.array
87 | An array with the communication scores for each intercellular
88 | PPI.
89 | '''
90 | c1 = cell1.weighted_ppi['A'].values
91 | c2 = cell2.weighted_ppi['B'].values
92 |
93 | if method == 'expression_product':
94 | communication_scores = score_expression_product(c1, c2)
95 | elif method == 'expression_mean':
96 | communication_scores = score_expression_mean(c1, c2)
97 | elif method == 'expression_gmean':
98 | communication_scores = np.sqrt(score_expression_product(c1, c2))
99 | else:
100 | raise ValueError('{} is not implemented yet'.format(method))
101 |
102 | if ppi_score is None:
103 | ppi_score = np.array([1.0] * len(c1))
104 |
105 | communication_scores = communication_scores * ppi_score
106 | return communication_scores
107 |
108 |
109 | def score_expression_product(c1, c2):
110 | '''Computes the expression product score
111 |
112 | Parameters
113 | ----------
114 | c1 : array-like
115 | A 1D-array containing the preprocessed expression values
116 | for the interactors in the first column of a list of
117 | protein-protein interactions.
118 |
119 | c2 : array-like
120 | A 1D-array containing the preprocessed expression values
121 | for the interactors in the second column of a list of
122 | protein-protein interactions.
123 |
124 | Returns
125 | -------
126 | c1 * c2 : array-like
127 | Multiplication of vectors.
128 | '''
129 | if (len(c1) == 0) or (len(c2) == 0):
130 | return 0.0
131 | return c1 * c2
132 |
133 |
134 | def score_expression_mean(c1, c2):
135 | '''Computes the expression product score
136 |
137 | Parameters
138 | ----------
139 | c1 : array-like
140 | A 1D-array containing the preprocessed expression values
141 | for the interactors in the first column of a list of
142 | protein-protein interactions.
143 |
144 | c2 : array-like
145 | A 1D-array containing the preprocessed expression values
146 | for the interactors in the second column of a list of
147 | protein-protein interactions.
148 |
149 | Returns
150 | -------
151 | (c1 + c2)/2. : array-like
152 | Average of vectors.
153 | '''
154 | if (len(c1) == 0) or (len(c2) == 0):
155 | return 0.0
156 | return (c1 + c2)/2.
157 |
158 |
159 | def compute_ccc_matrix(prot_a_exp, prot_b_exp, communication_score='expression_product'):
160 | '''Computes communication scores for an specific
161 | protein-protein interaction using vectors of gene expression
162 | levels for a given interacting protein produced by
163 | different cell-types/tissues/samples.
164 |
165 | Parameters
166 | ----------
167 | prot_a_exp : array-like
168 | Vector with gene expression levels for an interacting protein A
169 | in a given PPI. Coordinates are different cell-types/tissues/samples.
170 |
171 | prot_b_exp : array-like
172 | Vector with gene expression levels for an interacting protein B
173 | in a given PPI. Coordinates are different cell-types/tissues/samples.
174 |
175 | communication_score : str, default='expression_product'
176 | Scoring function for computing the communication score.
177 | Options are:
178 |
179 | - 'expression_product' : Multiplication between the expression
180 | of the interacting proteins.
181 | - 'expression_mean' : Average between the expression
182 | of the interacting proteins.
183 | - 'expression_gmean' : Geometric mean between the expression
184 | of the interacting proteins.
185 |
186 | Returns
187 | -------
188 | communication_scores : numpy.array
189 | Matrix MxM, representing the CCC scores of an specific PPI
190 | across all pairs of cell-types/tissues/samples. M are all
191 | cell-types/tissues/samples. In directed interactions, the
192 | vertical axis (axis 0) represents the senders, while the
193 | horizontal axis (axis 1) represents the receivers.
194 | '''
195 | if communication_score == 'expression_product':
196 | communication_scores = np.outer(prot_a_exp, prot_b_exp)
197 | elif communication_score == 'expression_mean':
198 | communication_scores = (np.outer(prot_a_exp, np.ones(prot_b_exp.shape)) + np.outer(np.ones(prot_a_exp.shape), prot_b_exp)) / 2.
199 | elif communication_score == 'expression_gmean':
200 | communication_scores = np.sqrt(np.outer(prot_a_exp, prot_b_exp))
201 | else:
202 | raise ValueError("Not a valid communication_score")
203 | return communication_scores
204 |
205 |
206 | def aggregate_ccc_matrices(ccc_matrices, method='gmean'):
207 | '''Aggregates matrices of communication scores. Each
208 | matrix has the communication scores across all pairs
209 | of cell-types/tissues/samples for a different
210 | pair of interacting proteins.
211 |
212 | Parameters
213 | ----------
214 | ccc_matrices : list
215 | List of matrices of communication scores. Each matrix
216 | is for an specific pair of interacting proteins.
217 |
218 | method : str, default='gmean'.
219 | Method to aggregate the matrices element-wise.
220 | Options are:
221 |
222 | - 'gmean' : Geometric mean in an element-wise way.
223 | - 'sum' : Sum in an element-wise way.
224 | - 'mean' : Mean in an element-wise way.
225 |
226 | Returns
227 | -------
228 | aggregated_ccc_matrix : numpy.array
229 | A matrix contiaining aggregated communication scores
230 | from multiple PPIs. It's shape is of MxM, where M are all
231 | cell-types/tissues/samples. In directed interactions, the
232 | vertical axis (axis 0) represents the senders, while the
233 | horizontal axis (axis 1) represents the receivers.
234 | '''
235 | if method == 'gmean':
236 | aggregated_ccc_matrix = gmean(ccc_matrices)
237 | elif method == 'sum':
238 | aggregated_ccc_matrix = np.nansum(ccc_matrices, axis=0)
239 | elif method == 'mean':
240 | aggregated_ccc_matrix = np.nanmean(ccc_matrices, axis=0)
241 | else:
242 | raise ValueError("Not a valid method")
243 |
244 | return aggregated_ccc_matrix
--------------------------------------------------------------------------------
/cell2cell/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.datasets.anndata import (balf_covid)
2 | from cell2cell.datasets.gsea_data import (gsea_msig)
3 | from cell2cell.datasets.heuristic_data import (HeuristicGOTerms)
4 | from cell2cell.datasets.random_data import (generate_random_rnaseq, generate_random_ppi, generate_random_cci_scores,
5 | generate_random_metadata)
6 | from cell2cell.datasets.toy_data import (generate_toy_distance, generate_toy_rnaseq, generate_toy_ppi, generate_toy_metadata)
--------------------------------------------------------------------------------
/cell2cell/datasets/anndata.py:
--------------------------------------------------------------------------------
1 | from scanpy.readwrite import read
2 |
3 |
4 | def balf_covid(filename='BALF-COVID19-Liao_et_al-NatMed-2020.h5ad'):
5 | """BALF samples from COVID-19 patients
6 | The data consists in 63k immune and epithelial cells in lungs
7 | from 3 control, 3 moderate COVID-19, and 6 severe COVID-19 patients.
8 |
9 | This dataset was previously published in [1], and this objects contains
10 | the raw counts for the annotated cell types available in:
11 | https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE145926
12 |
13 | References:
14 | [1] Liao, M., Liu, Y., Yuan, J. et al.
15 | Single-cell landscape of bronchoalveolar immune cells in patients
16 | with COVID-19. Nat Med 26, 842–844 (2020).
17 | https://doi.org/10.1038/s41591-020-0901-9
18 |
19 | Parameters
20 | ----------
21 | filename : str, default='BALF-COVID19-Liao_et_al-NatMed-2020.h5ad'
22 | Path to the h5ad file in case it was manually downloaded.
23 |
24 | Returns
25 | -------
26 | Annotated data matrix.
27 | """
28 | url = 'https://zenodo.org/record/7535867/files/BALF-COVID19-Liao_et_al-NatMed-2020.h5ad'
29 | adata = read(filename, backup_url=url)
30 | return adata
--------------------------------------------------------------------------------
/cell2cell/datasets/gsea_data.py:
--------------------------------------------------------------------------------
1 | from cell2cell.external.gseapy import _check_pathwaydb, load_gmt, PATHWAY_DATA
2 |
3 |
4 | def gsea_msig(organism='human', pathwaydb='GOBP', readable_name=False):
5 | '''Load a MSigDB from a gmt file
6 |
7 | Parameters
8 | ----------
9 | organism : str, default='human'
10 | Organism for whom the DB will be loaded.
11 | Available options are {'human', 'mouse'}.
12 |
13 | pathwaydb: str, default='GOBP'
14 | Molecular Signature Database to load.
15 | Available options are {'GOBP', 'KEGG', 'Reactome'}
16 |
17 | readable_name : boolean, default=False
18 | If True, the pathway names are transformed to a more readable format.
19 | That is, removing underscores and pathway DB name at the beginning.
20 |
21 | Returns
22 | -------
23 | pathway_per_gene : defaultdict
24 | Dictionary containing all genes in the DB as keys, and
25 | their values are lists with their pathway annotations.
26 | '''
27 | _check_pathwaydb(organism, pathwaydb)
28 |
29 | pathway_per_gene = load_gmt(readable_name=readable_name, **PATHWAY_DATA[organism][pathwaydb])
30 | return pathway_per_gene
--------------------------------------------------------------------------------
/cell2cell/datasets/heuristic_data.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 |
6 | class HeuristicGOTerms:
7 | '''GO terms for contact and secreted proteins.
8 |
9 | Attributes
10 | ----------
11 | contact_go_terms : list
12 | List of GO terms associated with proteins that
13 | participate in contact interactions (usually
14 | on the surface of cells).
15 |
16 | mediator_go_terms : list
17 | List of GO terms associated with secreted
18 | proteins that mediate intercellular interactions
19 | or communication.
20 | '''
21 | def __init__(self):
22 | self.contact_go_terms = ['GO:0007155', # Cell adhesion
23 | 'GO:0022608', # Multicellular organism adhesion
24 | 'GO:0098740', # Multiorganism cell adhesion
25 | 'GO:0098743', # Cell aggregation
26 | 'GO:0030054', # Cell-junction #
27 | 'GO:0009986', # Cell surface #
28 | 'GO:0097610', # Cell surface forrow
29 | 'GO:0007160', # Cell-matrix adhesion
30 | 'GO:0043235', # Receptor complex,
31 | 'GO:0008305', # Integrin complex,
32 | 'GO:0043113', # Receptor clustering
33 | 'GO:0009897', # External side of plasma membrane #
34 | 'GO:0038023', # Signaling receptor activity #
35 | ]
36 |
37 | self.mediator_go_terms = ['GO:0005615', # Extracellular space
38 | 'GO:0005576', # Extracellular region
39 | 'GO:0031012', # Extracellular matrix
40 | 'GO:0005201', # Extracellular matrix structural constituent
41 | 'GO:1990430', # Extracellular matrix protein binding
42 | 'GO:0048018', # Receptor ligand activity #
43 | ]
--------------------------------------------------------------------------------
/cell2cell/datasets/random_data.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import pandas as pd
7 |
8 | from sklearn.utils import resample
9 |
10 | from cell2cell.preprocessing import rnaseq, ppi
11 |
12 |
13 | def generate_random_rnaseq(size, row_names, random_state=None, verbose=True):
14 | '''
15 | Generates a RNA-seq dataset that is normally distributed gene-wise and size
16 | normalized (each column sums up to a million).
17 |
18 | Parameters
19 | ----------
20 | size : int
21 | Number of cell-types/tissues/samples (columns).
22 |
23 | row_names : array-like
24 | List containing the name of genes (rows).
25 |
26 | random_state : int, default=None
27 | Seed for randomization.
28 |
29 | verbose : boolean, default=True
30 | Whether printing or not steps of the analysis.
31 |
32 | Returns
33 | -------
34 | df : pandas.DataFrame
35 | Dataframe containing gene expression given the list
36 | of genes for each cell-type/tissue/sample.
37 | '''
38 | if verbose:
39 | print('Generating random RNA-seq dataset.')
40 | columns = ['Cell-{}'.format(c) for c in range(1, size+1)]
41 |
42 | if random_state is not None:
43 | np.random.seed(random_state)
44 | data = np.random.randn(len(row_names), len(columns)) # Normal distribution
45 | min = np.abs(np.amin(data, axis=1))
46 | min = min.reshape((len(min), 1))
47 |
48 | data = data + min
49 | df = pd.DataFrame(data, index=row_names, columns=columns)
50 | if verbose:
51 | print('Normalizing random RNA-seq dataset (into TPM)')
52 | df = rnaseq.scale_expression_by_sum(df, axis=0, sum_value=1e6)
53 | return df
54 |
55 |
56 | def generate_random_ppi(max_size, interactors_A, interactors_B=None, random_state=None, verbose=True):
57 | '''Generates a random list of protein-protein interactions.
58 |
59 | Parameters
60 | ----------
61 | max_size : int
62 | Maximum size of interactions to obtain. Since the PPIs
63 | are obtained by independently resampling interactors A and B
64 | rather than creating all possible combinations (it may demand too much
65 | memory), some PPIs can be duplicated and when dropping them
66 | results into a smaller number of PPIs than the max_size.
67 |
68 | interactors_A : list
69 | A list of protein names to include in the first column of
70 | the PPIs.
71 |
72 | interactors_B : list, default=None
73 | A list of protein names to include in the second columns
74 | of the PPIs. If None, interactors_A will be used as
75 | interactors_B too.
76 |
77 | random_state : int, default=None
78 | Seed for randomization.
79 |
80 | verbose : boolean, default=True
81 | Whether printing or not steps of the analysis.
82 |
83 | Returns
84 | -------
85 | ppi_data : pandas.DataFrame
86 | DataFrame containing a list of protein-protein interactions.
87 | It has three columns: 'A', 'B', and 'score' for interactors
88 | A, B and weights of interactions, respectively.
89 | '''
90 | if interactors_B is not None:
91 | assert max_size <= len(interactors_A)*len(interactors_B), "The maximum size can't be greater than all combinations between partners A and B"
92 | else:
93 | assert max_size <= len(interactors_A)**2, "The maximum size can't be greater than all combinations of partners A"
94 |
95 |
96 | if verbose:
97 | print('Generating random PPI network.')
98 |
99 | def small_block_ppi(size, interactors_A, interactors_B, random_state):
100 | if random_state is not None:
101 | random_state += 1
102 | if interactors_B is None:
103 | interactors_B = interactors_A
104 |
105 | col_A = resample(interactors_A, n_samples=size, random_state=random_state)
106 | col_B = resample(interactors_B, n_samples=size, random_state=random_state)
107 |
108 | ppi_data = pd.DataFrame()
109 | ppi_data['A'] = col_A
110 | ppi_data['B'] = col_B
111 | ppi_data.assign(score=1.0)
112 |
113 | ppi_data = ppi.remove_ppi_bidirectionality(ppi_data, ('A', 'B'), verbose=verbose)
114 | ppi_data = ppi_data.drop_duplicates()
115 | ppi_data.reset_index(inplace=True, drop=True)
116 | return ppi_data
117 |
118 | ppi_data = small_block_ppi(max_size*2, interactors_A, interactors_B, random_state)
119 |
120 | # TODO: This part need to be fixed, it does not converge to the max_size -> len((set(A)) * len(set(B) - set(A)))
121 | # while ppi_data.shape[0] < size:
122 | # if random_state is not None:
123 | # random_state += 2
124 | # b = small_block_ppi(size, interactors_A, interactors_B, random_state)
125 | # print(b)
126 | # ppi_data = pd.concat([ppi_data, b])
127 | # ppi_data = ppi.remove_ppi_bidirectionality(ppi_data, ('A', 'B'), verbose=verbose)
128 | # ppi_data = ppi_data.drop_duplicates()
129 | # ppi_data.dropna()
130 | # ppi_data.reset_index(inplace=True, drop=True)
131 | # print(ppi_data.shape[0])
132 |
133 | if ppi_data.shape[0] > max_size:
134 | ppi_data = ppi_data.loc[list(range(max_size)), :]
135 | ppi_data.reset_index(inplace=True, drop=True)
136 | return ppi_data
137 |
138 |
139 | def generate_random_cci_scores(cell_number, labels=None, symmetric=True, random_state=None):
140 | '''Generates a square cell-cell interaction
141 | matrix with random scores.
142 |
143 | Parameters
144 | ----------
145 | cell_number : int
146 | Number of cells.
147 |
148 | labels : list, default=None
149 | List containing labels for each cells. Length of
150 | this list must match the cell_number.
151 |
152 | symmetric : boolean, default=True
153 | Whether generating a symmetric CCI matrix.
154 |
155 | random_state : int, default=None
156 | Seed for randomization.
157 |
158 | Returns
159 | -------
160 | cci_matrix : pandas.DataFrame
161 | Matrix with rows and columns as cells. Values
162 | represent a random CCI score between 0 and 1.
163 | '''
164 | if labels is not None:
165 | assert len(labels) == cell_number, "Lenght of labels must match cell_number"
166 | else:
167 | labels = ['Cell-{}'.format(n) for n in range(1, cell_number+1)]
168 |
169 | if random_state is not None:
170 | np.random.seed(random_state)
171 | cci_scores = np.random.random((cell_number, cell_number))
172 | if symmetric:
173 | cci_scores = (cci_scores + cci_scores.T) / 2.
174 | cci_matrix = pd.DataFrame(cci_scores, index=labels, columns=labels)
175 |
176 | return cci_matrix
177 |
178 |
179 | def generate_random_metadata(cell_labels, group_number):
180 | '''Randomly assigns groups to cell labels.
181 |
182 | Parameters
183 | ----------
184 | cell_labels : list
185 | A list of cell labels.
186 |
187 | group_number : int
188 | Number of major groups of cells.
189 |
190 | Returns
191 | -------
192 | metadata : pandas.DataFrame
193 | DataFrame containing the major groups that each cell
194 | received randomly (under column 'Group'). Cells are
195 | under the column 'Cell'.
196 | '''
197 | metadata = pd.DataFrame()
198 | metadata['Cell'] = cell_labels
199 |
200 | groups = list(range(1, group_number+1))
201 | metadata['Group'] = metadata['Cell'].apply(lambda x: np.random.choice(groups, 1)[0])
202 | return metadata
203 |
--------------------------------------------------------------------------------
/cell2cell/datasets/toy_data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 |
5 | def generate_toy_rnaseq():
6 | '''Generates a toy RNA-seq dataset
7 |
8 | Returns
9 | -------
10 | rnaseq : pandas.DataFrame
11 | DataFrame contianing the toy RNA-seq dataset. Columns
12 | are cells and rows are genes.
13 | '''
14 | data = np.asarray([[5, 10, 8, 15, 2],
15 | [15, 5, 20, 1, 30],
16 | [18, 12, 5, 40, 20],
17 | [9, 30, 22, 5, 2],
18 | [2, 1, 1, 27, 15],
19 | [30, 11, 16, 5, 12],
20 | ])
21 |
22 | rnaseq = pd.DataFrame(data,
23 | index=['Protein-A', 'Protein-B', 'Protein-C', 'Protein-D', 'Protein-E', 'Protein-F'],
24 | columns=['C1', 'C2', 'C3', 'C4', 'C5']
25 | )
26 | rnaseq.index.name = 'gene_id'
27 | return rnaseq
28 |
29 |
30 | def generate_toy_ppi(prot_complex=False):
31 | '''Generates a toy list of protein-protein interactions.
32 |
33 | Parameters
34 | ----------
35 | prot_complex : boolean, default=False
36 | Whether including PPIs where interactors could contain
37 | multimeric complexes.
38 |
39 | Returns
40 | -------
41 | ppi : pandas.DataFrame
42 | Dataframe containing PPIs. Columns are 'A' (first interacting
43 | partners), 'B' (second interacting partners) and 'score'
44 | for weighting each PPI.
45 | '''
46 | if prot_complex:
47 | data = np.asarray([['Protein-A', 'Protein-B'],
48 | ['Protein-B', 'Protein-C'],
49 | ['Protein-C', 'Protein-A'],
50 | ['Protein-B', 'Protein-B'],
51 | ['Protein-B', 'Protein-A'],
52 | ['Protein-E', 'Protein-F'],
53 | ['Protein-F', 'Protein-F'],
54 | ['Protein-C&Protein-E', 'Protein-F'],
55 | ['Protein-B', 'Protein-E'],
56 | ['Protein-A&Protein-B', 'Protein-F'],
57 | ])
58 | else:
59 | data = np.asarray([['Protein-A', 'Protein-B'],
60 | ['Protein-B', 'Protein-C'],
61 | ['Protein-C', 'Protein-A'],
62 | ['Protein-B', 'Protein-B'],
63 | ['Protein-B', 'Protein-A'],
64 | ['Protein-E', 'Protein-F'],
65 | ['Protein-F', 'Protein-F'],
66 | ['Protein-C', 'Protein-F'],
67 | ['Protein-B', 'Protein-E'],
68 | ['Protein-A', 'Protein-F'],
69 | ])
70 | ppi = pd.DataFrame(data, columns=['A', 'B'])
71 | ppi = ppi.assign(score=1.0)
72 | return ppi
73 |
74 |
75 | def generate_toy_metadata():
76 | '''Generates metadata for cells in the toy RNA-seq dataset.
77 |
78 | Returns
79 | -------
80 | metadata : pandas.DataFrame
81 | DataFrame with metadata for each cell. Metadata contains the
82 | major groups of those cells.
83 | '''
84 | data = np.asarray([['C1', 'G1'],
85 | ['C2', 'G2'],
86 | ['C3', 'G3'],
87 | ['C4', 'G3'],
88 | ['C5', 'G1']
89 | ])
90 |
91 | metadata = pd.DataFrame(data, columns=['#SampleID', 'Groups'])
92 | return metadata
93 |
94 |
95 | def generate_toy_distance():
96 | '''Generates a square matrix with cell-cell distance.
97 |
98 | Returns
99 | -------
100 | distance : pandas.DataFrame
101 | DataFrame with Euclidean-like distance between each
102 | pair of cells in the toy RNA-seq dataset.
103 | '''
104 | data = np.asarray([[0.0, 10.0, 12.0, 5.0, 3.0],
105 | [10.0, 0.0, 15.0, 8.0, 9.0],
106 | [12.0, 15.0, 0.0, 4.5, 7.5],
107 | [5.0, 8.0, 4.5, 0.0, 6.5],
108 | [3.0, 9.0, 7.5, 6.5, 0.0],
109 | ])
110 | distance = pd.DataFrame(data,
111 | index=['C1', 'C2', 'C3', 'C4', 'C5'],
112 | columns=['C1', 'C2', 'C3', 'C4', 'C5']
113 | )
114 | return distance
--------------------------------------------------------------------------------
/cell2cell/external/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.external.pcoa import (pcoa, pcoa_biplot, _check_ordination)
2 | from cell2cell.external.goenrich import (goa, ontology)
3 | from cell2cell.external.gseapy import (load_gmt, generate_lr_geneset, run_gsea)
4 | from cell2cell.external.umap import (run_umap)
--------------------------------------------------------------------------------
/cell2cell/external/goenrich.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------------
2 | # Copyright (c) 2017--, goenrich development team.
3 | #
4 | # Distributed under the terms of the MIT licence.
5 | # ----------------------------------------------------------------------------
6 |
7 | # CODE OBTAINED FROM: https://github.com/jdrudolph/goenrich/
8 | # COPIED HERE BECAUSE GOENRICH IS NOT AVAILABLE THROUGH CONDA
9 |
10 | import itertools
11 | import networkx as nx
12 | import pandas as pd
13 |
14 | def _tokenize(f):
15 | token = []
16 | for line in f:
17 | if line == '\n':
18 | yield token
19 | token = []
20 | else:
21 | token.append(line)
22 |
23 | def _filter_terms(tokens):
24 | for token in tokens:
25 | if token[0] == '[Term]\n':
26 | yield token[1:]
27 |
28 | def _parse_terms(terms):
29 | for term in terms:
30 | obsolete = False
31 | node = {}
32 | parents = []
33 | for line in term:
34 | if line.startswith('id:'):
35 | id = line[4:-1]
36 | elif line.startswith('name:'):
37 | node['name'] = line[6:-1]
38 | elif line.startswith('namespace:'):
39 | node['namespace'] = line[11:-1]
40 | elif line.startswith('is_a:'):
41 | parents.append(line[6:16])
42 | elif line.startswith('relationship: part_of'):
43 | parents.append(line[22:32])
44 | elif line.startswith('is_obsolete'):
45 | obsolete = True
46 | break
47 | if not obsolete:
48 | edges = [(p, id) for p in parents] # will reverse edges later
49 | yield (id, node), edges
50 | else:
51 | continue
52 |
53 | _filename = 'db/go-basic.obo'
54 |
55 | def ontology(file):
56 | """ read ontology from file
57 | :param file: file path of file handle
58 | """
59 | O = nx.DiGraph()
60 |
61 | if isinstance(file, str):
62 | f = open(file)
63 | we_opened_file = True
64 | else:
65 | f = file
66 | we_opened_file = False
67 |
68 | try:
69 | tokens = _tokenize(f)
70 | terms = _filter_terms(tokens)
71 | entries = _parse_terms(terms)
72 | nodes, edges = zip(*entries)
73 | O.add_nodes_from(nodes)
74 | O.add_edges_from(itertools.chain.from_iterable(edges))
75 | O.graph['roots'] = {data['name'] : n for n, data in O.nodes.items()
76 | if data['name'] == data['namespace']}
77 | finally:
78 | if we_opened_file:
79 | f.close()
80 |
81 | for root in O.graph['roots'].values():
82 | for n, depth in nx.shortest_path_length(O, root).items():
83 | node = O.nodes[n]
84 | node['depth'] = min(depth, node.get('depth', float('inf')))
85 | return O.reverse()
86 |
87 |
88 | """
89 | parsers for different go-annotation formats
90 | """
91 | GENE_ASSOCIATION_COLUMNS = ('db', 'db_object_id', 'db_object_symbol',
92 | 'qualifier', 'go_id', 'db_reference',
93 | 'evidence_code', 'with_from', 'aspect',
94 | 'db_object_name', 'db_object_synonym',
95 | 'db_object_type', 'taxon', 'date', 'assigned_by',
96 | 'annotation_extension', 'gene_product_form_id')
97 | EXPERIMENTAL_EVIDENCE = ('EXP', 'IDA', 'IPI', 'IMP', 'IGI', 'IEP')
98 |
99 |
100 | def goa(filename, experimental=True, **kwds):
101 | """ read go-annotation file
102 |
103 | :param filename: protein or gene identifier column
104 | :param experimental: use only experimentally validated annotations
105 | """
106 | defaults = {'comment': '!',
107 | 'names': GENE_ASSOCIATION_COLUMNS}
108 |
109 | if experimental and 'usecols' in kwds:
110 | kwds['usecols'] += ('evidence_code',)
111 |
112 | defaults.update(kwds)
113 | result = pd.read_csv(filename, sep='\t', **defaults)
114 |
115 | if experimental:
116 | retain_mask = result.evidence_code.isin(EXPERIMENTAL_EVIDENCE)
117 | result.drop(result.index[~retain_mask], inplace=True)
118 |
119 | return result
120 |
121 |
122 | def sgd(filename, experimental=False, **kwds):
123 | """ read yeast genome database go-annotation file
124 | :param filename: protein or gene identifier column
125 | :param experimental: use only experimentally validated annotations
126 | """
127 | return goa(filename, experimental, **kwds)
128 |
129 |
130 | GENE2GO_COLUMNS = ('tax_id', 'GeneID', 'GO_ID', 'Evidence', 'Qualifier', 'GO_term', 'PubMed', 'Category')
131 |
132 |
133 | def gene2go(filename, experimental=False, tax_id=9606, **kwds):
134 | """ read go-annotation file
135 |
136 | :param filename: protein or gene identifier column
137 | :param experimental: use only experimentally validated annotations
138 | :param tax_id: filter according to taxon
139 | """
140 | defaults = {'comment': '#',
141 | 'names': GENE2GO_COLUMNS}
142 | defaults.update(kwds)
143 | result = pd.read_csv(filename, sep='\t', **defaults)
144 |
145 | retain_mask = result.tax_id == tax_id
146 | result.drop(result.index[~retain_mask], inplace=True)
147 |
148 | if experimental:
149 | retain_mask = result.Evidence.isin(EXPERIMENTAL_EVIDENCE)
150 | result.drop(result.index[~retain_mask], inplace=True)
151 |
152 | return result
--------------------------------------------------------------------------------
/cell2cell/external/umap.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import umap
3 |
4 | import pandas as pd
5 | import scipy.spatial as sp
6 |
7 |
8 | def run_umap(rnaseq_data, axis=1, metric='euclidean', min_dist=0.4, n_neighbors=8, random_state=None, **kwargs):
9 | '''Runs UMAP on a expression matrix.
10 | Parameters
11 | ----------
12 | rnaseq_data : pandas.DataFrame
13 | A dataframe of gene expression values wherein the rows are the genes or
14 | embeddings of a dimensionality reduction method and columns the cells,
15 | tissues or samples.
16 |
17 | axis : int, default=0
18 | An axis of the dataframe (0 across rows, 1 across columns).
19 | Across rows means that the UMAP is to compare genes, while
20 | across columns is to compare cells, tissues or samples.
21 |
22 | metric : str, default='euclidean'
23 | The distance metric to use. The distance function can be 'braycurtis',
24 | 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice',
25 | 'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'kulsinski',
26 | 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
27 | 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
28 |
29 | min_dist: float, default=0.4
30 | The effective minimum distance between embedded points. Smaller values
31 | will result in a more clustered/clumped embedding where nearby points
32 | on the manifold are drawn closer together, while larger values will
33 | result on a more even dispersal of points. The value should be set
34 | relative to the ``spread`` value, which determines the scale at which
35 | embedded points will be spread out.
36 |
37 | n_neighbors: float, default=8
38 | The size of local neighborhood (in terms of number of neighboring
39 | sample points) used for manifold approximation. Larger values
40 | result in more global views of the manifold, while smaller
41 | values result in more local data being preserved. In general
42 | values should be in the range 2 to 100.
43 |
44 | random_state : int, default=None
45 | Seed for randomization.
46 |
47 | **kwargs : dict
48 | Extra arguments for UMAP as defined in umap.UMAP.
49 |
50 | Returns
51 | -------
52 | umap_df : pandas.DataFrame
53 | Dataframe containing the UMAP embeddings for the axis analyzed.
54 | Contains columns 'umap1 and 'umap2'.
55 | '''
56 | # Organize data
57 | if axis == 0:
58 | df = rnaseq_data
59 | elif axis == 1:
60 | df = rnaseq_data.T
61 | else:
62 | raise ValueError("The parameter axis must be either 0 or 1.")
63 |
64 | # Compute distances
65 | D = sp.distance.pdist(df, metric=metric)
66 | D_sq = sp.distance.squareform(D)
67 |
68 | # Run UMAP
69 | model = umap.UMAP(metric="precomputed",
70 | min_dist=min_dist,
71 | n_neighbors=n_neighbors,
72 | random_state=random_state,
73 | **kwargs
74 | )
75 |
76 | trans_D = model.fit_transform(D_sq)
77 |
78 | # Organize results
79 | umap_df = pd.DataFrame(trans_D, columns=['umap1', 'umap2'], index=df.index)
80 | return umap_df
--------------------------------------------------------------------------------
/cell2cell/io/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from cell2cell.io.directories import (create_directory, get_files_from_directory)
4 | from cell2cell.io.read_data import (load_cutoffs, load_go_annotations, load_go_terms, load_metadata, load_ppi,
5 | load_rnaseq, load_table, load_tables_from_directory, load_variable_with_pickle,
6 | load_tensor, load_tensor_factors)
7 | from cell2cell.io.save_data import (export_variable_with_pickle)
8 |
--------------------------------------------------------------------------------
/cell2cell/io/directories.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 |
5 |
6 | def create_directory(pathname):
7 | '''Creates a directory.
8 |
9 | Uses a path to create a directory. It creates
10 | all intermediate folders before creating the
11 | leaf folder.
12 |
13 | Parameters
14 | ----------
15 | pathname : str
16 | Full path of the folder to create.
17 | '''
18 | if not os.path.isdir(pathname):
19 | os.makedirs(pathname)
20 | print("{} was created successfully.".format(pathname))
21 | else:
22 | print("{} already exists.".format(pathname))
23 |
24 |
25 | def get_files_from_directory(pathname, dir_in_filepath=False):
26 | '''Obtains a list of filenames in a folder.
27 |
28 | Parameters
29 | ----------
30 | pathname : str
31 | Full path of the folder to explore.
32 |
33 | dir_in_filepath : boolean, default=False
34 | Whether adding `pathname` to the filenames
35 |
36 | Returns
37 | -------
38 | filenames : list
39 | A list containing the names (strings) of the files
40 | in the folder.
41 | '''
42 | directory = os.fsencode(pathname)
43 | filenames = [pathname + '/' + os.fsdecode(file) if dir_in_filepath else os.fsdecode(file) for file in os.listdir(directory)]
44 | return filenames
45 |
--------------------------------------------------------------------------------
/cell2cell/io/save_data.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import pickle
6 |
7 |
8 | def export_variable_with_pickle(variable, filename):
9 | '''Exports a large size variable in a python readable way
10 | using pickle.
11 |
12 | Parameters
13 | ----------
14 | variable : a python variable
15 | Variable to export
16 |
17 | filename : str
18 | Complete path to the file wherein the variable will be
19 | stored. For example:
20 | /home/user/variable.pkl
21 | '''
22 |
23 | max_bytes = 2 ** 31 - 1
24 |
25 | bytes_out = pickle.dumps(variable)
26 | with open(filename, 'wb') as f_out:
27 | for idx in range(0, len(bytes_out), max_bytes):
28 | f_out.write(bytes_out[idx:idx + max_bytes])
29 | print(filename, ' was correctly saved.')
--------------------------------------------------------------------------------
/cell2cell/plotting/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.plotting.aesthetics import (get_colors_from_labels, map_colors_to_metadata, generate_legend)
2 | from cell2cell.plotting.ccc_plot import (clustermap_ccc)
3 | from cell2cell.plotting.cci_plot import (clustermap_cci)
4 | from cell2cell.plotting.circular_plot import (circos_plot)
5 | from cell2cell.plotting.pval_plot import (dot_plot, generate_dot_plot)
6 | from cell2cell.plotting.factor_plot import (context_boxplot, loading_clustermap, ccc_networks_plot)
7 | from cell2cell.plotting.pcoa_plot import (pcoa_3dplot)
8 | from cell2cell.plotting.tensor_plot import (tensor_factors_plot, tensor_factors_plot_from_loadings)
9 | from cell2cell.plotting.umap_plot import (umap_biplot)
--------------------------------------------------------------------------------
/cell2cell/plotting/aesthetics.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from matplotlib import pyplot as plt
4 | from matplotlib.colors import Normalize
5 | import matplotlib.cm as cm
6 | import matplotlib.patches as patches
7 | import numpy as np
8 |
9 |
10 | def get_colors_from_labels(labels, cmap='gist_rainbow', factor=1):
11 | '''Generates colors for each label in a list given a colormap
12 |
13 | Parameters
14 | ----------
15 | labels : list
16 | A list of labels to assign a color.
17 |
18 | cmap : str, default='gist_rainbow'
19 | A matplotlib color palette name.
20 |
21 | factor : int, default=1
22 | Factor to amplify the separation of colors.
23 |
24 | Returns
25 | -------
26 | colors : dict
27 | A dictionary where the keys are the labels and the values
28 | correspond to the assigned colors.
29 | '''
30 | assert factor >= 1
31 |
32 | colors = dict.fromkeys(labels, ())
33 |
34 | factor = int(factor)
35 | cm_ = plt.get_cmap(cmap)
36 |
37 | is_number = all((isinstance(e, float) or isinstance(e, int)) for e in labels)
38 |
39 | if not is_number:
40 | NUM_COLORS = factor * len(colors)
41 | for i, label in enumerate(colors.keys()):
42 | colors[label] = cm_((1 + ((factor-1)/factor)) * i / NUM_COLORS)
43 | else:
44 | max_ = np.nanmax(labels)
45 | min_ = np.nanmin(labels)
46 | norm = Normalize(vmin=-min_, vmax=max_)
47 |
48 | m = cm.ScalarMappable(norm=norm, cmap=cmap)
49 | for label in colors.keys():
50 | colors[label] = m.to_rgba(label)
51 | return colors
52 |
53 |
54 | def map_colors_to_metadata(metadata, ref_df=None, colors=None, sample_col='#SampleID', group_col='Groups',
55 | cmap='gist_rainbow'):
56 | '''Assigns a color to elements in a dataframe containing metadata.
57 |
58 | Parameters
59 | ----------
60 | metadata : pandas.DataFrame
61 | A dataframe with metadata for specific elements.
62 |
63 | ref_df : pandas.DataFrame
64 | A dataframe whose columns contains a subset of
65 | elements in the metadata.
66 |
67 | colors : dict, default=None
68 | Dictionary containing tuples in the RGBA format for indicating colors
69 | of major groups of cells. If colors is specified, cmap will be
70 | ignored.
71 |
72 | sample_col : str, default='#SampleID'
73 | Column in the metadata for elements to color.
74 |
75 | group_col : str, default='Groups'
76 | Column in the metadata containing the major groups of the elements
77 | to color.
78 |
79 | cmap : str, default='gist_rainbow'
80 | Name of the color palette for coloring the major groups of elements.
81 |
82 | Returns
83 | -------
84 | new_colors : pandas.DataFrame
85 | A pandas dataframe where the index is the list of elements in the
86 | sample_col and the column group_col contains the colors assigned
87 | to each element given their groups.
88 | '''
89 | if ref_df is not None:
90 | meta_ = metadata.set_index(sample_col).reindex(ref_df.columns)
91 | else:
92 | meta_ = metadata.set_index(sample_col)
93 | labels = meta_[group_col].unique().tolist()
94 | if colors is None:
95 | colors = get_colors_from_labels(labels, cmap=cmap)
96 | else:
97 | upd_dict = dict([(v, (1., 1., 1., 1.)) for v in labels if v not in colors.keys()])
98 | colors.update(upd_dict)
99 |
100 | new_colors = meta_[group_col].map(colors)
101 | new_colors.index = meta_.index
102 | new_colors.name = group_col.capitalize()
103 |
104 | return new_colors
105 |
106 |
107 | def generate_legend(color_dict, loc='center left', bbox_to_anchor=(1.01, 0.5), ncol=1, fancybox=True, shadow=True,
108 | title='Legend', fontsize=14, sorted_labels=True, ax=None):
109 | '''Adds a legend to a previous plot or displays an independent legend
110 | given specific colors for labels.
111 |
112 | Parameters
113 | ----------
114 | color_dict : dict
115 | Dictionary containing tuples in the RGBA format for indicating colors
116 | of major groups of cells. Keys are the labels and values are the RGBA
117 | tuples.
118 |
119 | loc : str, default='center left'
120 | Alignment of the legend given the location specieid in bbox_to_anchor.
121 |
122 | bbox_to_anchor : tuple, default=(1.01, 0.5)
123 | Location of the legend in a (X, Y) format. For example, if you want
124 | your axes legend located at the figure's top right-hand corner instead
125 | of the axes' corner, simply specify the corner's location and the
126 | coordinate system of that location, which in this case would be (1, 1).
127 |
128 | ncol : int, default=1
129 | Number of columns to display the legend.
130 |
131 | fancybox : boolean, default=True
132 | Whether round edges should be enabled around the FancyBboxPatch which
133 | makes up the legend's background.
134 |
135 | shadow : boolean, default=True
136 | Whether to draw a shadow behind the legend.
137 |
138 | title : str, default='Legend'
139 | Title of the legend box
140 |
141 | fontsize : int, default=14
142 | Size of the text in the legends.
143 |
144 | sorted_labels : boolean, default=True
145 | Whether alphabetically sorting the labels.
146 |
147 | fig : matplotlib.figure.Figure, default=None
148 | Figure object to add a legend. If fig=None and ax=None, a new empty
149 | figure will be generated.
150 |
151 | ax : matplotlib.axes.Axes, default=None
152 | Axes instance for a plot.
153 |
154 | Returns
155 | -------
156 | legend1 : matplotlib.legend.Legend
157 | A legend object in a figure.
158 | '''
159 | color_patches = []
160 | if sorted_labels:
161 | iteritems = sorted(color_dict.items())
162 | else:
163 | iteritems = color_dict.items()
164 | for k, v in iteritems:
165 | color_patches.append(patches.Patch(color=v, label=str(k).replace('_', ' ')))
166 |
167 | if ax is None:
168 | legend1 = plt.legend(handles=color_patches,
169 | loc=loc,
170 | bbox_to_anchor=bbox_to_anchor,
171 | ncol=ncol,
172 | fancybox=fancybox,
173 | shadow=shadow,
174 | title=title,
175 | title_fontsize=fontsize,
176 | fontsize=fontsize)
177 | else:
178 | legend1 = ax.legend(handles=color_patches,
179 | loc=loc,
180 | bbox_to_anchor=bbox_to_anchor,
181 | ncol=ncol,
182 | fancybox=fancybox,
183 | shadow=shadow,
184 | title=title,
185 | title_fontsize=fontsize,
186 | fontsize=fontsize)
187 | return legend1
--------------------------------------------------------------------------------
/cell2cell/plotting/pcoa_plot.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from matplotlib import pyplot as plt
6 | from mpl_toolkits.mplot3d import Axes3D
7 |
8 | from cell2cell.external import pcoa, _check_ordination
9 | from cell2cell.plotting.aesthetics import get_colors_from_labels
10 |
11 |
12 | def pcoa_3dplot(interaction_space, metadata=None, sample_col='#SampleID', group_col='Groups', pcoa_method='eigh',
13 | meta_cmap='gist_rainbow', colors=None, excluded_cells=None, title='', axis_fontsize=14, legend_fontsize=12,
14 | figsize=(6, 5), view_angles=(30, 135), filename=None):
15 | '''Projects the cells into an Euclidean space (PCoA) given their distances
16 | based on their CCI scores. Then, plots each cell by their first three
17 | coordinates in a 3D scatter plot.
18 |
19 | Parameters
20 | ----------
21 | interaction_space : cell2cell.core.interaction_space.InteractionSpace
22 | Interaction space that contains all a distance matrix after running the
23 | the method compute_pairwise_cci_scores. Alternatively, this object
24 | can be a numpy-array or a pandas DataFrame. Also, a
25 | SingleCellInteractions or a BulkInteractions object after running
26 | the method compute_pairwise_cci_scores.
27 |
28 | metadata : pandas.Dataframe, default=None
29 | Metadata associated with the cells, cell types or samples in the
30 | matrix containing CCC scores. If None, cells will not be colored
31 | by major groups.
32 |
33 | sample_col : str, default='#SampleID'
34 | Column in the metadata for the cells, cell types or samples
35 | in the matrix containing CCI scores.
36 |
37 | group_col : str, default='Groups'
38 | Column in the metadata containing the major groups of cells, cell types
39 | or samples in the matrix with CCI scores.
40 |
41 | pcoa_method : str, default='eigh'
42 | Eigendecomposition method to use in performing PCoA.
43 | By default, uses SciPy's `eigh`, which computes exact
44 | eigenvectors and eigenvalues for all dimensions. The alternate
45 | method, `fsvd`, uses faster heuristic eigendecomposition but loses
46 | accuracy. The magnitude of accuracy lost is dependent on dataset.
47 |
48 | meta_cmap : str, default='gist_rainbow'
49 | Name of the color palette for coloring the major groups of cells.
50 |
51 | colors : dict, default=None
52 | Dictionary containing tuples in the RGBA format for indicating colors
53 | of major groups of cells. If colors is specified, meta_cmap will be
54 | ignored.
55 |
56 | excluded_cells : list, default=None
57 | List containing cell names that are present in the interaction_space
58 | object but that will be excluded from this plot.
59 |
60 | title : str, default=''
61 | Title of the PCoA 3D plot.
62 |
63 | axis_fontsize : int, default=14
64 | Size of the font for the labels of each axis (X, Y and Z).
65 |
66 | legend_fontsize : int, default=12
67 | Size of the font for labels in the legend.
68 |
69 | figsize : tuple, default=(6, 5)
70 | Size of the figure (width*height), each in inches.
71 |
72 | view_angles : tuple, default=(30, 135)
73 | Rotation angles of the plot. Set the elevation and
74 | azimuth of the axes.
75 |
76 | filename : str, default=None
77 | Path to save the figure of the elbow analysis. If None, the figure is not
78 | saved.
79 |
80 | Returns
81 | -------
82 | results : dict
83 | Dictionary that contains:
84 |
85 | - 'fig' : matplotlib.figure.Figure, containing the whole figure
86 | - 'axes' : matplotlib.axes.Axes, containing the axes of the 3D plot
87 | - 'ordination' : Ordination or projection obtained from the PCoA
88 | - 'distance_matrix' : Distance matrix used to perform the PCoA (usually in
89 | interaction_space.distance_matrix
90 | '''
91 | if hasattr(interaction_space, 'distance_matrix'):
92 | print('Interaction space detected as an InteractionSpace class')
93 | distance_matrix = interaction_space.distance_matrix
94 | elif (type(interaction_space) is np.ndarray) or (type(interaction_space) is pd.core.frame.DataFrame):
95 | print('Interaction space detected as a distance matrix')
96 | distance_matrix = interaction_space
97 | elif hasattr(interaction_space, 'interaction_space'):
98 | print('Interaction space detected as a Interactions class')
99 | if not hasattr(interaction_space.interaction_space, 'distance_matrix'):
100 | raise ValueError('First run the method compute_pairwise_interactions() in your interaction' + \
101 | ' object to generate a distance matrix.')
102 | else:
103 | distance_matrix = interaction_space.interaction_space.distance_matrix
104 | else:
105 | raise ValueError('First run the method compute_pairwise_interactions() in your interaction' + \
106 | ' object to generate a distance matrix.')
107 |
108 | # Drop excluded cells
109 | if excluded_cells is not None:
110 | df = distance_matrix.loc[~distance_matrix.index.isin(excluded_cells),
111 | ~distance_matrix.columns.isin(excluded_cells)]
112 | else:
113 | df = distance_matrix
114 |
115 | # PCoA
116 | ordination = pcoa(df, method=pcoa_method)
117 | ordination = _check_ordination(ordination)
118 | ordination['samples'].index = df.index
119 |
120 | # Biplot
121 | fig = plt.figure(figsize=figsize)
122 | ax = fig.add_subplot(111, projection='3d')
123 | #ax = Axes3D(fig) # Not displayed in newer versions
124 |
125 | if metadata is None:
126 | metadata = pd.DataFrame()
127 | metadata[sample_col] = list(distance_matrix.columns)
128 | metadata[group_col] = list(distance_matrix.columns)
129 |
130 | meta_ = metadata.set_index(sample_col)
131 | if excluded_cells is not None:
132 | meta_ = meta_.loc[~meta_.index.isin(excluded_cells)]
133 | labels = meta_[group_col].values.tolist()
134 |
135 | if colors is None:
136 | colors = get_colors_from_labels(labels, cmap=meta_cmap)
137 | else:
138 | assert all(elem in colors.keys() for elem in set(labels))
139 |
140 | # Plot each data point with respective color
141 | for i, cell_type in enumerate(sorted(meta_[group_col].unique())):
142 | cells = list(meta_.loc[meta_[group_col] == cell_type].index)
143 | if colors is not None:
144 | ax.scatter(ordination['samples'].loc[cells, 'PC1'],
145 | ordination['samples'].loc[cells, 'PC2'],
146 | ordination['samples'].loc[cells, 'PC3'],
147 | color=colors[cell_type],
148 | s=50,
149 | edgecolors='k',
150 | label=cell_type)
151 | else:
152 | ax.scatter(ordination['samples'].loc[cells, 'PC1'],
153 | ordination['samples'].loc[cells, 'PC2'],
154 | ordination['samples'].loc[cells, 'PC3'],
155 | s=50,
156 | edgecolors='k',
157 | label=cell_type)
158 |
159 | # Plot texts
160 | ax.set_xlabel('PC1 ({}%)'.format(np.round(ordination['proportion_explained']['PC1'] * 100), 2), fontsize=axis_fontsize)
161 | ax.set_ylabel('PC2 ({}%)'.format(np.round(ordination['proportion_explained']['PC2'] * 100), 2), fontsize=axis_fontsize)
162 | ax.set_zlabel('PC3 ({}%)'.format(np.round(ordination['proportion_explained']['PC3'] * 100), 2), fontsize=axis_fontsize)
163 |
164 | ax.set_xticklabels([])
165 | ax.set_yticklabels([])
166 | ax.set_zticklabels([])
167 |
168 | ax.view_init(view_angles[0], view_angles[1])
169 | plt.legend(loc='center left', bbox_to_anchor=(1.35, 0.5),
170 | ncol=2, fancybox=True, shadow=True, fontsize=legend_fontsize)
171 | plt.title(title, fontsize=16)
172 |
173 | #distskbio = skbio.DistanceMatrix(df, ids=df.index) # Not using skbio for now
174 |
175 | # Save plot
176 | if filename is not None:
177 | plt.savefig(filename, dpi=300,
178 | bbox_inches='tight')
179 |
180 | results = {'fig' : fig, 'axes' : ax, 'ordination' : ordination, 'distance_matrix' : df} # df used to be distskbio
181 | return results
--------------------------------------------------------------------------------
/cell2cell/plotting/umap_plot.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import seaborn as sns
3 | import matplotlib.pyplot as plt
4 |
5 |
6 | def umap_biplot(umap_df, figsize=(8 ,8), ax=None, show_axes=True, show_legend=True, hue=None,
7 | cmap='tab10', fontsize=20, filename=None):
8 | '''Plots a UMAP biplot for the UMAP embeddings.
9 |
10 | Parameters
11 | ----------
12 | umap_df : pandas.DataFrame
13 | Dataframe containing the UMAP embeddings for the axis analyzed.
14 | It must contain columns 'umap1 and 'umap2'. If a hue column is
15 | provided in the parameter 'hue', that column must be provided
16 | in this dataframe.
17 |
18 | figsize : tuple, default=(8, 8)
19 | Size of the figure (width*height), each in inches.
20 |
21 | ax : matplotlib.axes.Axes, default=None
22 | The matplotlib axes containing a plot.
23 |
24 | show_axes : boolean, default=True
25 | Whether showing lines, ticks and ticklabels of both axes.
26 |
27 | show_legend : boolean, default=True
28 | Whether including the legend when a hue is provided.
29 |
30 | hue : vector or key in 'umap_df'
31 | Grouping variable that will produce points with different colors.
32 | Can be either categorical or numeric, although color mapping will
33 | behave differently in latter case.
34 |
35 | cmap : str, default='tab10'
36 | Name of the color palette for coloring elements with UMAP embeddings.
37 |
38 | fontsize : int, default=20
39 | Fontsize of the axis labels (UMAP1 and UMAP2).
40 |
41 | filename : str, default=None
42 | Path to save the figure of the elbow analysis. If None, the figure is not
43 | saved.
44 |
45 | Returns
46 | -------
47 | fig : matplotlib.figure.Figure
48 | A matplotlib Figure instance.
49 |
50 | ax : matplotlib.axes.Axes
51 | The matplotlib axes containing the plot.
52 | '''
53 |
54 | if ax is None:
55 | fig = plt.figure(figsize=figsize)
56 |
57 | ax = sns.scatterplot(x='umap1',
58 | y='umap2',
59 | data=umap_df,
60 | hue=hue,
61 | palette=cmap,
62 | ax=ax
63 | )
64 |
65 | if show_axes:
66 | sns.despine(ax=ax,
67 | offset=15
68 | )
69 |
70 | ax.tick_params(axis='both',
71 | which='both',
72 | colors='black',
73 | width=2,
74 | length=5
75 | )
76 | else:
77 | ax.set_xticks([])
78 | ax.set_yticks([])
79 | for key, spine in ax.spines.items():
80 | spine.set_visible(False)
81 |
82 |
83 | for tick in ax.get_xticklabels():
84 | tick.set_fontproperties('arial')
85 | tick.set_weight("bold")
86 | tick.set_color("black")
87 | tick.set_fontsize(int(0.7*fontsize))
88 | for tick in ax.get_yticklabels():
89 | tick.set_fontproperties('arial')
90 | tick.set_weight("bold")
91 | tick.set_color("black")
92 | tick.set_fontsize(int(0.7*fontsize))
93 |
94 | ax.set_xlabel('UMAP 1', fontsize=fontsize)
95 | ax.set_ylabel('UMAP 2', fontsize=fontsize)
96 |
97 | if (show_legend) & (hue is not None):
98 | # Put the legend out of the figure
99 | legend = ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
100 | legend.set_title(hue)
101 | legend.get_title().set_fontsize(int(0.7*fontsize))
102 |
103 | for text in legend.get_texts():
104 | text.set_fontsize(int(0.7*fontsize))
105 |
106 | if filename is not None:
107 | plt.savefig(filename, dpi=300, bbox_inches='tight')
108 |
109 | if ax is None:
110 | return fig, ax
111 | else:
112 | return ax
--------------------------------------------------------------------------------
/cell2cell/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.preprocessing.cutoffs import (get_constant_cutoff, get_cutoffs, get_global_percentile_cutoffs,
2 | get_local_percentile_cutoffs)
3 | from cell2cell.preprocessing.find_elements import (find_duplicates, get_element_abundances, get_elements_over_fraction)
4 | from cell2cell.preprocessing.gene_ontology import (find_all_children_of_go_term, find_go_terms_from_keyword,
5 | get_genes_from_go_hierarchy, get_genes_from_go_terms)
6 | from cell2cell.preprocessing.integrate_data import (get_thresholded_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms,
7 | get_ppi_dict_from_proteins, get_weighted_ppi)
8 | from cell2cell.preprocessing.manipulate_dataframes import (check_presence_in_dataframe, shuffle_cols_in_df, shuffle_rows_in_df,
9 | shuffle_dataframe, subsample_dataframe)
10 | from cell2cell.preprocessing.ppi import (bidirectional_ppi_for_cci, filter_ppi_by_proteins, filter_ppi_network,
11 | get_all_to_all_ppi, get_filtered_ppi_network, get_one_group_to_other_ppi,
12 | remove_ppi_bidirectionality, simplify_ppi, filter_complex_ppi_by_proteins,
13 | get_genes_from_complexes, preprocess_ppi_data)
14 | from cell2cell.preprocessing.rnaseq import (divide_expression_by_max, divide_expression_by_mean, drop_empty_genes,
15 | log10_transformation, scale_expression_by_sum, add_complexes_to_expression,
16 | aggregate_single_cells)
17 |
18 | from cell2cell.preprocessing.signal import (smooth_curve)
--------------------------------------------------------------------------------
/cell2cell/preprocessing/cutoffs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | from cell2cell.io import read_data
6 |
7 | import numpy as np
8 | import pandas as pd
9 |
10 |
11 | def get_local_percentile_cutoffs(rnaseq_data, percentile=0.75):
12 | '''
13 | Obtains a local value associated with a given percentile across
14 | cells/tissues/samples for each gene in a rnaseq_data.
15 |
16 | Parameters
17 | ----------
18 | rnaseq_data : pandas.DataFrame
19 | Gene expression data for a bulk RNA-seq experiment or a single-cell
20 | experiment after aggregation into cell types. Columns are
21 | cell-types/tissues/samples and rows are genes.
22 |
23 | percentile : float, default=0.75
24 | This is the percentile to be computed.
25 |
26 | Returns
27 | -------
28 | cutoffs : pandas.DataFrame
29 | A dataframe containing the value corresponding to the percentile
30 | across the genes. Rows are genes and the column corresponds to
31 | 'value'.
32 | '''
33 | cutoffs = rnaseq_data.quantile(percentile, axis=1).to_frame()
34 | cutoffs.columns = ['value']
35 | return cutoffs
36 |
37 |
38 | def get_global_percentile_cutoffs(rnaseq_data, percentile=0.75):
39 | '''
40 | Obtains a global value associated with a given percentile across
41 | cells/tissues/samples and genes in a rnaseq_data.
42 |
43 | Parameters
44 | ----------
45 | rnaseq_data : pandas.DataFrame
46 | Gene expression data for a bulk RNA-seq experiment or a single-cell
47 | experiment after aggregation into cell types. Columns are
48 | cell-types/tissues/samples and rows are genes.
49 |
50 | percentile : float, default=0.75
51 | This is the percentile to be computed.
52 |
53 | Returns
54 | -------
55 | cutoffs : pandas.DataFrame
56 | A dataframe containing the value corresponding to the percentile
57 | across the dataset. Rows are genes and the column corresponds to
58 | 'value'. All values here are the same global percentile.
59 | '''
60 | cutoffs = pd.DataFrame(index=rnaseq_data.index, columns=['value'])
61 | cutoffs['value'] = np.quantile(rnaseq_data.values, percentile)
62 | return cutoffs
63 |
64 |
65 | def get_constant_cutoff(rnaseq_data, constant_cutoff=10):
66 | '''
67 | Generates a cutoff/threshold dataframe for all genes
68 | in rnaseq_data assigning a constant value as the cutoff.
69 |
70 | Parameters
71 | ----------
72 | rnaseq_data : pandas.DataFrame
73 | Gene expression data for a bulk RNA-seq experiment or a single-cell
74 | experiment after aggregation into cell types. Columns are
75 | cell-types/tissues/samples and rows are genes.
76 |
77 | constant_cutoff : float, default=10
78 | Cutoff or threshold assigned to each gene.
79 |
80 | Returns
81 | -------
82 | cutoffs : pandas.DataFrame
83 | A dataframe containing the value corresponding to cutoff or threshold
84 | assigned to each gene. Rows are genes and the column corresponds to
85 | 'value'. All values are the same and corresponds to the
86 | constant_cutoff.
87 | '''
88 | cutoffs = pd.DataFrame(index=rnaseq_data.index)
89 | cutoffs['value'] = constant_cutoff
90 | return cutoffs
91 |
92 |
93 | def get_cutoffs(rnaseq_data, parameters, verbose=True):
94 | '''
95 | This function creates cutoff/threshold values for genes
96 | in rnaseq_data and the respective cells/tissues/samples
97 | by a given method or parameter.
98 |
99 | Parameters
100 | ----------
101 | rnaseq_data : pandas.DataFrame
102 | Gene expression data for a bulk RNA-seq experiment or a single-cell
103 | experiment after aggregation into cell types. Columns are
104 | cell-types/tissues/samples and rows are genes.
105 |
106 | parameters : dict
107 | This dictionary must contain a 'parameter' key and a 'type' key.
108 | The first one is the respective parameter to compute the threshold
109 | or cutoff values. The type corresponds to the approach to
110 | compute the values according to the parameter employed.
111 | Options of 'type' that can be used:
112 |
113 | - 'local_percentile' : computes the value of a given percentile,
114 | for each gene independently. In this case,
115 | the parameter corresponds to the percentile
116 | to compute, as a float value between 0 and 1.
117 | - 'global_percentile' : computes the value of a given percentile
118 | from all genes and samples simultaneously.
119 | In this case, the parameter corresponds to
120 | the percentile to compute, as a float value
121 | between 0 and 1. All genes have the same cutoff.
122 | - 'file' : load a cutoff table from a file. Parameter in this case is
123 | the path of that file. It must contain the same genes as
124 | index and same samples as columns.
125 | - 'multi_col_matrix' : a dataframe must be provided, containing a
126 | cutoff for each gene in each sample. This allows
127 | to use specific cutoffs for each sample. The
128 | columns here must be the same as the ones in the
129 | rnaseq_data.
130 | - 'single_col_matrix' : a dataframe must be provided, containing a
131 | cutoff for each gene in only one column. These
132 | cutoffs will be applied to all samples.
133 | - 'constant_value' : binarizes the expression. Evaluates whether
134 | expression is greater than the value input in
135 | the 'parameter'.
136 |
137 | verbose : boolean, default=True
138 | Whether printing or not steps of the analysis.
139 |
140 | Returns
141 | -------
142 | cutoffs : pandas.DataFrame
143 | Dataframe wherein rows are genes in rnaseq_data. Depending on the type in
144 | the parameters dictionary, it may have only one column ('value') or the
145 | same columns that rnaseq_data has, generating specfic cutoffs for each
146 | cell/tissue/sample.
147 | '''
148 | parameter = parameters['parameter']
149 | type = parameters['type']
150 | if verbose:
151 | print("Calculating cutoffs for gene abundances")
152 | if type == 'local_percentile':
153 | cutoffs = get_local_percentile_cutoffs(rnaseq_data, parameter)
154 | cutoffs.columns = ['value']
155 | elif type == 'global_percentile':
156 | cutoffs = get_global_percentile_cutoffs(rnaseq_data, parameter)
157 | cutoffs.columns = ['value']
158 | elif type == 'constant_value':
159 | cutoffs = get_constant_cutoff(rnaseq_data, parameter)
160 | cutoffs.columns = ['value']
161 | elif type == 'file':
162 | cutoffs = read_data.load_cutoffs(parameter,
163 | format='auto')
164 | cutoffs = cutoffs.loc[rnaseq_data.index]
165 | elif type == 'multi_col_matrix':
166 | cutoffs = parameter
167 | cutoffs = cutoffs.loc[rnaseq_data.index]
168 | cutoffs = cutoffs[rnaseq_data.columns]
169 | elif type == 'single_col_matrix':
170 | cutoffs = parameter
171 | cutoffs.columns = ['value']
172 | cutoffs = cutoffs.loc[rnaseq_data.index]
173 | else:
174 | raise ValueError(type + ' is not a valid cutoff')
175 | return cutoffs
--------------------------------------------------------------------------------
/cell2cell/preprocessing/find_elements.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import itertools
6 | from collections import defaultdict, Counter
7 |
8 | def find_duplicates(element_list):
9 | '''Function based on: https://stackoverflow.com/a/5419576/12032899
10 | Finds duplicate items and list their index location.
11 |
12 | Parameters
13 | ----------
14 | element_list : list
15 | List of elements
16 |
17 | Returns
18 | -------
19 | duplicate_dict : dict
20 | Dictionary with duplicate items. Keys are the items, and values
21 | are lists with the respective indexes where they are.
22 | '''
23 | tally = defaultdict(list)
24 | for i,item in enumerate(element_list):
25 | tally[item].append(i)
26 |
27 | duplicate_dict = {key : locs for key,locs in tally.items()
28 | if len(locs)>1}
29 | return duplicate_dict
30 |
31 |
32 | def get_element_abundances(element_lists):
33 | '''Computes the fraction of occurrence of each element
34 | in a list of lists.
35 |
36 | Parameters
37 | ----------
38 | element_lists : list
39 | List of lists of elements. Elements will be
40 | counted only once in each of the lists.
41 |
42 | Returns
43 | -------
44 | abundance_dict : dict
45 | Dictionary containing the number of times that an
46 | element was present, divided by the total number of
47 | lists in `element_lists`.
48 | '''
49 | abundance_dict = Counter(itertools.chain(*map(set, element_lists)))
50 | total = len(element_lists)
51 | abundance_dict = {k : v/total for k, v in abundance_dict.items()}
52 | return abundance_dict
53 |
54 |
55 | def get_elements_over_fraction(abundance_dict, fraction):
56 | '''Obtains a list of elements with the
57 | fraction of occurrence at least the threshold.
58 |
59 | Parameters
60 | ----------
61 | abundance_dict : dict
62 | Dictionary containing the number of times that an
63 | element was present, divided by the total number of
64 | possible occurrences.
65 |
66 | fraction : float
67 | Threshold to filter the elements. Elements with at least
68 | this threshold will be included.
69 |
70 | Returns
71 | -------
72 | elements : list
73 | List of elements that met the fraction criteria.
74 | '''
75 | elements = [k for k, v in abundance_dict.items() if v >= fraction]
76 | return elements
--------------------------------------------------------------------------------
/cell2cell/preprocessing/gene_ontology.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import networkx
7 |
8 |
9 | def get_genes_from_go_terms(go_annotations, go_filter, go_header='GO', gene_header='Gene', verbose=True):
10 | '''
11 | Finds genes associated with specific GO-terms.
12 |
13 | Parameters
14 | ----------
15 | go_annotations : pandas.DataFrame
16 | Dataframe containing information about GO term annotations of each
17 | gene for a given organism according to the ga file. Can be loading
18 | with the function cell2cell.io.read_data.load_go_annotations().
19 |
20 | go_filter : list
21 | List containing one or more GO-terms to find associated genes.
22 |
23 | go_header : str, default='GO'
24 | Column name wherein GO terms are located in the dataframe.
25 |
26 | gene_header : str, default='Gene'
27 | Column name wherein genes are located in the dataframe.
28 |
29 | verbose : boolean, default=True
30 | Whether printing or not steps of the analysis.
31 |
32 | Returns
33 | -------
34 | genes : list
35 | List of genes that are associated with GO-terms contained in
36 | go_filter.
37 | '''
38 | if verbose:
39 | print('Filtering genes by using GO terms')
40 | genes = list(go_annotations.loc[go_annotations[go_header].isin(go_filter)][gene_header].unique())
41 | return genes
42 |
43 |
44 | def get_genes_from_go_hierarchy(go_annotations, go_terms, go_filter, go_header='GO', gene_header='Gene', verbose=False):
45 | '''
46 | Obtains genes associated with specific GO terms and their
47 | children GO terms (below in the hierarchy).
48 |
49 | Parameters
50 | ----------
51 | go_annotations : pandas.DataFrame
52 | Dataframe containing information about GO term annotations of each
53 | gene for a given organism according to the ga file. Can be loading
54 | with the function cell2cell.io.read_data.load_go_annotations().
55 |
56 | go_terms : networkx.Graph
57 | NetworkX Graph containing GO terms datasets from .obo file.
58 | It could be loaded using
59 | cell2cell.io.read_data.load_go_terms(filename).
60 |
61 | go_filter : list
62 | List containing one or more GO-terms to find associated genes.
63 |
64 | go_header : str, default='GO'
65 | Column name wherein GO terms are located in the dataframe.
66 |
67 | gene_header : str, default='Gene'
68 | Column name wherein genes are located in the dataframe.
69 |
70 | verbose : boolean, default=False
71 | Whether printing or not steps of the analysis.
72 |
73 | Returns
74 | -------
75 | genes : list
76 | List of genes that are associated with GO-terms contained in
77 | go_filter, and related to the children GO terms of those terms.
78 | '''
79 | go_hierarchy = go_filter.copy()
80 | iter = len(go_hierarchy)
81 | for i in range(iter):
82 | find_all_children_of_go_term(go_terms, go_hierarchy[i], go_hierarchy, verbose=verbose)
83 | go_hierarchy = list(set(go_hierarchy))
84 | genes = get_genes_from_go_terms(go_annotations=go_annotations,
85 | go_filter=go_hierarchy,
86 | go_header=go_header,
87 | gene_header=gene_header,
88 | verbose=verbose)
89 | return genes
90 |
91 |
92 | def find_all_children_of_go_term(go_terms, go_term_name, output_list, verbose=True):
93 | '''
94 | Finds all children GO terms (below in hierarchy) of
95 | a given GO term.
96 |
97 | Parameters
98 | ----------
99 | go_terms : networkx.Graph
100 | NetworkX Graph containing GO terms datasets from .obo file.
101 | It could be loaded using
102 | cell2cell.io.read_data.load_go_terms(filename).
103 |
104 | go_term_name : str
105 | Specific GO term to find their children. For example:
106 | 'GO:0007155'.
107 |
108 | output_list : list
109 | List used to perform a Depth First Search and find the
110 | children in a recursive way. Here the children will be
111 | automatically written.
112 |
113 | verbose : boolean, default=True
114 | Whether printing or not steps of the analysis.
115 | '''
116 | for child in networkx.ancestors(go_terms, go_term_name):
117 | if child not in output_list:
118 | if verbose:
119 | print('Retrieving children for ' + go_term_name)
120 | output_list.append(child)
121 | find_all_children_of_go_term(go_terms, child, output_list, verbose)
122 |
123 |
124 | def find_go_terms_from_keyword(go_terms, keyword, verbose=False):
125 | '''
126 | Uses a keyword to find related GO terms.
127 |
128 | Parameters
129 | ----------
130 | go_terms : networkx.Graph
131 | NetworkX Graph containing GO terms datasets from .obo file.
132 | It could be loaded using
133 | cell2cell.io.read_data.load_go_terms(filename).
134 |
135 | keyword : str
136 | Keyword to be included in the names of retrieved GO terms.
137 |
138 | verbose : boolean, default=False
139 | Whether printing or not steps of the analysis.
140 |
141 | Returns
142 | -------
143 | go_filter : list
144 | List containing all GO terms related to a keyword.
145 | '''
146 | go_filter = []
147 | for go, node in go_terms.nodes.items():
148 | if keyword in node['name']:
149 | go_filter.append(go)
150 | if verbose:
151 | print(go, node['name'])
152 | return go_filter
--------------------------------------------------------------------------------
/cell2cell/preprocessing/manipulate_dataframes.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import random
6 | import numpy as np
7 | import pandas as pd
8 |
9 |
10 | def check_presence_in_dataframe(df, elements, columns=None):
11 | '''
12 | Searches for elements in a dataframe and returns those
13 | that are present in the dataframe.
14 |
15 | Parameters
16 | ----------
17 | df : pandas.DataFrame
18 | A dataframe
19 |
20 | elements : list
21 | List of elements to find in the dataframe. They
22 | must be a data type contained in the dataframe.
23 |
24 | columns : list, default=None
25 | Names of columns to consider in the search. If
26 | None, all columns are used.
27 |
28 | Returns
29 | -------
30 | found_elements : list
31 | List of elements in the input list that were found
32 | in the dataframe.
33 | '''
34 | if columns is None:
35 | columns = list(df.columns)
36 | df_elements = pd.Series(np.unique(df[columns].values.flatten()))
37 | df_elements = df_elements.loc[df_elements.isin(elements)].values
38 | found_elements = list(df_elements)
39 | return found_elements
40 |
41 |
42 | def shuffle_cols_in_df(df, columns, shuffling_number=1, random_state=None):
43 | '''
44 | Randomly shuffles specific columns in a dataframe.
45 |
46 | Parameters
47 | ----------
48 | df : pandas.DataFrame
49 | A dataframe.
50 |
51 | columns : list
52 | Names of columns to shuffle.
53 |
54 | shuffling_number : int, default=1
55 | Number of shuffles per column.
56 |
57 | random_state : int, default=None
58 | Seed for randomization.
59 |
60 | Returns
61 | -------
62 | df_ : pandas.DataFrame
63 | A shuffled dataframe.
64 | '''
65 | df_ = df.copy()
66 | if isinstance(columns, str):
67 | columns = [columns]
68 |
69 | for col in columns:
70 | for i in range(shuffling_number):
71 | if random_state is not None:
72 | np.random.seed(random_state + i)
73 | df_[col] = np.random.permutation(df_[col].values)
74 | return df_
75 |
76 |
77 | def shuffle_rows_in_df(df, rows, shuffling_number=1, random_state=None):
78 | '''
79 | Randomly shuffles specific rows in a dataframe.
80 |
81 | Parameters
82 | ----------
83 | df : pandas.DataFrame
84 | A dataframe.
85 |
86 | rows : list
87 | Names of rows (or indexes) to shuffle.
88 |
89 | shuffling_number : int, default=1
90 | Number of shuffles per row.
91 |
92 | random_state : int, default=None
93 | Seed for randomization.
94 |
95 | Returns
96 | -------
97 | df_.T : pandas.DataFrame
98 | A shuffled dataframe.
99 | '''
100 | df_ = df.copy().T
101 | if isinstance(rows, str):
102 | rows = [rows]
103 |
104 | for row in rows:
105 | for i in range(shuffling_number):
106 | if random_state is not None:
107 | np.random.seed(random_state + i)
108 | df_[row] = np.random.permutation(df_[row].values)
109 | return df_.T
110 |
111 |
112 | def shuffle_dataframe(df, shuffling_number=1, axis=0, random_state=None):
113 | '''
114 | Randomly shuffles a whole dataframe across a given axis.
115 |
116 | Parameters
117 | ----------
118 | df : pandas.DataFrame
119 | A dataframe.
120 |
121 | shuffling_number : int, default=1
122 | Number of shuffles per column.
123 |
124 | axis : int, default=0
125 | An axis of the dataframe (0 across rows, 1 across columns).
126 | Across rows means that shuffles each column independently,
127 | and across columns shuffles each row independently.
128 |
129 | random_state : int, default=None
130 | Seed for randomization.
131 |
132 | Returns
133 | -------
134 | df_ : pandas.DataFrame
135 | A shuffled dataframe.
136 | '''
137 | df_ = df.copy()
138 | axis = int(not axis) # pandas.DataFrame is always 2D
139 | to_shuffle = np.rollaxis(df_.values, axis)
140 | for _ in range(shuffling_number):
141 | for i, view in enumerate(to_shuffle):
142 | if random_state is not None:
143 | np.random.seed(random_state + i)
144 | np.random.shuffle(view)
145 | df_ = pd.DataFrame(np.rollaxis(to_shuffle, axis=axis), index=df_.index, columns=df_.columns)
146 | return df_
147 |
148 |
149 | def subsample_dataframe(df, n_samples, random_state=None):
150 | '''
151 | Randomly subsamples rows of a dataframe.
152 |
153 | Parameters
154 | ----------
155 | df : pandas.DataFrame
156 | A dataframe.
157 |
158 | n_samples : int
159 | Number of samples, rows in this case. If
160 | n_samples is larger than the number of rows,
161 | the entire dataframe will be returned, but
162 | shuffled.
163 |
164 | random_state : int, default=None
165 | Seed for randomization.
166 |
167 | Returns
168 | -------
169 | subsampled_df : pandas.DataFrame
170 | A subsampled and shuffled dataframe.
171 | '''
172 | items = list(df.index)
173 | if n_samples > len(items):
174 | n_samples = len(items)
175 | if isinstance(random_state, int):
176 | random.seed(random_state)
177 | random.shuffle(items)
178 |
179 | subsampled_df = df.loc[items[:n_samples],:]
180 | return subsampled_df
181 |
182 |
183 | def check_symmetry(df):
184 | '''
185 | Checks whether a dataframe is symmetric.
186 |
187 | Parameters
188 | ----------
189 | df : pandas.DataFrame
190 | A dataframe.
191 |
192 | Returns
193 | -------
194 | symmetric : boolean
195 | Whether a dataframe is symmetric.
196 | '''
197 | shape = df.shape
198 | if shape[0] == shape[1]:
199 | symmetric = (df.values.transpose() == df.values).all()
200 | else:
201 | symmetric = False
202 | return symmetric
203 |
204 |
205 | def convert_to_distance_matrix(df):
206 | '''
207 | Converts a symmetric dataframe into a distance dataframe.
208 | That is, diagonal elements are all zero.
209 |
210 | Parameters
211 | ----------
212 | df : pandas.DataFrame
213 | A dataframe.
214 |
215 | Returns
216 | -------
217 | df_ : pandas.DataFrame
218 | A copy of df, but with all diagonal elements with a
219 | value of zero.
220 | '''
221 | if check_symmetry(df):
222 | df_ = df.copy()
223 | if np.trace(df_.values,) != 0.0:
224 | raise Warning("Diagonal elements are not zero. Automatically replaced by zeros")
225 | np.fill_diagonal(df_.values, 0.0)
226 | else:
227 | raise ValueError('The DataFrame is not symmetric')
228 | return df_
229 |
--------------------------------------------------------------------------------
/cell2cell/preprocessing/signal.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from scipy.signal import savgol_filter
4 |
5 |
6 | def smooth_curve(values, window_length=None, polyorder=3, **kwargs):
7 | '''Apply a Savitzky-Golay filter to an array to smooth the curve.
8 |
9 | Parameters
10 | ----------
11 | values : array-like
12 | An array or list of values.
13 |
14 | window_length : int, default=None
15 | Size of the window of values to use too smooth the curve.
16 |
17 | polyorder : int, default=3
18 | The order of the polynomial used to fit the samples.
19 |
20 | **kwargs : dict
21 | Extra arguments for the scipy.signal.savgol_filter function.
22 |
23 | Returns
24 | -------
25 | smooth_values : array-like
26 | An array or list of values representing the smooth curvee.
27 | '''
28 | size = len(values)
29 | if window_length is None:
30 | window_length = int(size / min([2, size]))
31 | if window_length % 2 == 0:
32 | window_length += 1
33 | assert(polyorder < window_length), "polyorder must be less than window_length."
34 | smooth_values = savgol_filter(values, window_length, polyorder, **kwargs)
35 | return smooth_values
--------------------------------------------------------------------------------
/cell2cell/spatial/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.spatial.distances import (celltype_pair_distance, pairwise_celltype_distances)
2 | from cell2cell.spatial.filtering import (dist_filter_liana, dist_filter_tensor)
3 | from cell2cell.spatial.neighborhoods import (create_spatial_grid, create_sliding_windows, calculate_window_size, add_sliding_window_info_to_adata)
--------------------------------------------------------------------------------
/cell2cell/spatial/distances.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import itertools
3 | import numpy as np
4 | import pandas as pd
5 | from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances
6 |
7 |
8 | def celltype_pair_distance(df1, df2, method='min', distance='euclidean'):
9 | '''
10 | Calculates the distance between two sets of data points (single cell coordinates)
11 | represented by df1 and df2. It supports two distance metrics: Euclidean and Manhattan
12 | distances. The method parameter allows you to specify how the distances between the
13 | two sets are aggregated.
14 |
15 | Parameters
16 | ----------
17 | df1 : pandas.DataFrame
18 | The first set of single cell coordinates.
19 |
20 | df1 : pandas.DataFrame
21 | The second set of single cell coordinates.
22 |
23 | method : str, default='min'
24 | The aggregation method for the calculated distances. It can be one of 'min',
25 | 'max', or 'mean'.
26 |
27 | distance : str, default='euclidean'
28 | The distance metric to use. It can be 'euclidean' or 'manhattan'.
29 |
30 | Returns
31 | -------
32 | agg_dist : numpy.float
33 | The aggregated distance between the two sets of data points based on the specified
34 | method and distance metric.
35 | '''
36 | if distance == 'euclidean':
37 | distances = euclidean_distances(df1, df2)
38 | elif distance == 'manhattan':
39 | distances = manhattan_distances(df1, df2)
40 | else:
41 | raise NotImplementedError("{} distance is not implemented.".format(distance.capitalize()))
42 |
43 | if method == 'min':
44 | agg_dist = np.nanmin(distances)
45 | elif method == 'max':
46 | agg_dist = np.nanmax(distances)
47 | elif method == 'mean':
48 | agg_dist = np.nanmean(distances)
49 | else:
50 | raise NotImplementedError('Method {} is not implemented.'.format(method))
51 | return agg_dist
52 |
53 |
54 | def pairwise_celltype_distances(df, group_col, coord_cols=['X', 'Y'],
55 | method='min', distance='euclidean', pairs=None):
56 | '''
57 | Calculates pairwise distances between groups of single cells. It computes an
58 | aggregate distance between all possible combinations of groups.
59 |
60 | Parameters
61 | ----------
62 | df : pandas.DataFrame
63 | A dataframe where each row is a single cell, and there are columns containing
64 | spatial coordinates and cell group.
65 |
66 | group_col : str
67 | The name of the column that defines the groups for which distances are calculated.
68 |
69 | coord_cols : list, default=None
70 | The list of column names that represent the coordinates of the single cells.
71 |
72 | pairs : list
73 | A list of specific group pairs for which distances should be calculated.
74 | If not provided, all possible combinations of group pairs will be considered.
75 |
76 | Returns
77 | -------
78 | distances : pandas.DataFrame
79 | The pairwise distances between groups based on the specified group column.
80 | In this dataframe rows and columns are the cell groups used to compute distances.
81 | '''
82 | # TODO: Adapt code below to receive AnnData or MuData objects
83 | # df_ = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y'])
84 | # df = adata.obs[[group_col]]
85 | df_ = df[coord_cols]
86 | groups = df[group_col].unique()
87 | distances = pd.DataFrame(np.zeros((len(groups), len(groups))),
88 | index=groups,
89 | columns=groups)
90 |
91 | if pairs is None:
92 | pairs = list(itertools.combinations(groups, 2))
93 |
94 | for pair in pairs:
95 | dist = celltype_pair_distance(df_.loc[df[group_col] == pair[0]], df_.loc[df[group_col] == pair[1]],
96 | method=method,
97 | distance=distance
98 | )
99 | distances.loc[pair[0], pair[1]] = dist
100 | distances.loc[pair[1], pair[0]] = dist
101 | return distances
--------------------------------------------------------------------------------
/cell2cell/spatial/filtering.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | import tensorly as tl
4 |
5 |
6 | def dist_filter_tensor(interaction_tensor, distances, max_dist, min_dist=0, source_axis=2, target_axis=3):
7 | '''
8 | Filters an Interaction Tensor based on intercellular distances between cell types.
9 |
10 | Parameters
11 | ----------
12 | interaction_tensor : cell2cell.tensor.BaseTensor
13 | A communication tensor generated with any of the tensor class in
14 | cell2cell.tensor
15 |
16 | distances : pandas.DataFrame
17 | Square dataframe containing distances between pairs of cell groups. It must contain
18 | all cell groups that act as sender and receiver cells in the tensor.
19 |
20 | max_dist : float
21 | The maximum distance between cell pairs to consider them in the interaction tensor.
22 |
23 | min_dist : float, default=0
24 | The minimum distance between cell pairs to consider them in the interaction tensor.
25 |
26 | source_axis : int, default=2
27 | The index indicating the axis in the tensor corresponding to sender cells.
28 |
29 | target_axis : int, default=3
30 | The index indicating the axis in the tensor corresponding to receiver cells.
31 |
32 | Returns
33 | -------
34 | new_interaction_tensor : cell2cell.tensor.BaseTensor
35 | A tensor with communication scores made zero for cell type pairs with intercellular
36 | distance over the distance threshold.
37 | '''
38 | # Evaluate whether we provide distances for all cell types in the tensor
39 | assert all([cell in distances.index for cell in
40 | interaction_tensor.order_names[source_axis]]), "Distances not provided for all sender cells"
41 | assert all([cell in distances.columns for cell in
42 | interaction_tensor.order_names[target_axis]]), "Distances not provided for all receiver cells"
43 |
44 | source_cell_groups = interaction_tensor.order_names[source_axis]
45 | target_cell_groups = interaction_tensor.order_names[target_axis]
46 |
47 | # Use only cell types in the tensor
48 | dist_df = distances.loc[source_cell_groups, target_cell_groups]
49 |
50 | # Filter cell types by intercellular distances
51 | dist = ((min_dist <= dist_df) & (dist_df <= max_dist)).astype(int).values
52 |
53 | # Mapping what re-arrange should be done to keep the original tensor shape
54 | tensor_shape = list(interaction_tensor.tensor.shape)
55 | original_order = list(range(len(tensor_shape)))
56 | new_order = []
57 |
58 | # Generate template tensor with cells to keep
59 | template_tensor = dist
60 | for i, size in enumerate(tensor_shape):
61 | if (i != source_axis) and (i != target_axis):
62 | template_tensor = [template_tensor] * size
63 | new_order.insert(0, i)
64 | template_tensor = np.array(template_tensor)
65 |
66 | new_order += [source_axis, target_axis]
67 | changes_needed = [new_order.index(i) for i in original_order]
68 |
69 | # Re-arrange axes by the order
70 | template_tensor = template_tensor.transpose(changes_needed)
71 |
72 | # Create tensorly object
73 | template_tensor = tl.tensor(template_tensor, **tl.context(interaction_tensor.tensor))
74 |
75 | assert template_tensor.shape == interaction_tensor.tensor.shape, "Filtering of cells was not properly done. Revise code of this function (template tensor)"
76 |
77 | # tensor = tl.zeros_like(interaction_tensor.tensor, **tl.context(tensor))
78 | new_interaction_tensor = interaction_tensor.copy()
79 | new_interaction_tensor.tensor = new_interaction_tensor.tensor * template_tensor
80 | # Make masked cells by distance to be real zeros
81 | new_interaction_tensor.loc_zeros = (new_interaction_tensor.tensor == 0).astype(int) - new_interaction_tensor.loc_nans
82 | return new_interaction_tensor
83 |
84 |
85 | def dist_filter_liana(liana_outputs, distances, max_dist, min_dist=0, source_col='source', target_col='target',
86 | keep_dist=False):
87 | '''
88 | Filters a dataframe with outputs from LIANA based on a distance threshold
89 | defined applied to another dataframe containing distances between cell groups.
90 |
91 | Parameters
92 | ----------
93 | liana_outputs : pandas.DataFrame
94 | Dataframe containing the results from LIANA, where rows are pairs of
95 | ligand-receptor interactions by pair of source-target cell groups.
96 |
97 | distances : pandas.DataFrame
98 | Square dataframe containing distances between pairs of cell groups.
99 |
100 | max_dist : float
101 | The distance threshold used to filter the pairs from the liana_outputs dataframe.
102 |
103 | min_dist : float, default=0
104 | The minimum distance between cell pairs to consider them in the interaction tensor.
105 |
106 | source_col : str, default='source'
107 | Column name in both dataframes that represents the source cell groups.
108 |
109 | target_col : str, default='target'
110 | Column name in both dataframes that represents the target cell groups.
111 |
112 | keep_dist : bool, default=False
113 | To determine whether to keep the 'distance' column in the filtered output.
114 | If set to True, the 'distance' column will be retained; otherwise, it will be dropped
115 | and the LIANA dataframe will contain the original columns.
116 |
117 | Returns
118 | -------
119 | filtered_liana_outputs : pandas.DataFrame
120 | It containing pairs from the liana_outputs dataframe that meet the distance
121 | threshold criteria.
122 | '''
123 | # Convert distances to a long-form dataframe
124 | distances = distances.stack().reset_index()
125 | distances.columns = [source_col, target_col, 'distance']
126 |
127 | # Merge the long-form distances DataFrame with pairs_df
128 | merged_df = liana_outputs.merge(distances, on=[source_col, target_col], how='left')
129 |
130 | # Filter based on the distance threshold
131 | filtered_liana_outputs = merged_df[(min_dist <= merged_df['distance']) & (merged_df['distance'] <= max_dist)]
132 |
133 | if keep_dist == False:
134 | filtered_liana_outputs = filtered_liana_outputs.drop(['distance'], axis=1)
135 |
136 | return filtered_liana_outputs
--------------------------------------------------------------------------------
/cell2cell/spatial/neighborhoods.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | import pandas as pd
4 |
5 |
6 | def create_spatial_grid(adata, num_bins, copy=False):
7 | """
8 | Segments spatial transcriptomics data into a square grid based on spatial coordinates
9 | and annotates each cell or spot with its corresponding grid position.
10 |
11 | Parameters
12 | ----------
13 | adata : AnnData
14 | The AnnData object containing spatial transcriptomics data. The spatial coordinates
15 | must be stored in `adata.obsm['spatial']`. This object is either modified in place
16 | or a copy is returned based on the `copy` parameter.
17 |
18 | num_bins : int
19 | The number of bins (squares) along each dimension of the grid. The grid is square,
20 | so this number applies to both the horizontal and vertical divisions.
21 |
22 | copy : bool, default=False
23 | If True, the function operates on and returns a copy of the input AnnData object.
24 | If False, the function modifies the input AnnData object in place.
25 |
26 | Returns
27 | -------
28 | adata_ : AnnData or None
29 | If `copy=True`, a new AnnData object with added grid annotations is returned.
30 | """
31 |
32 | if copy:
33 | adata_ = adata.copy()
34 | else:
35 | adata_ = adata
36 |
37 | # Get the spatial coordinates
38 | coords = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y'])
39 |
40 | # Define the bins for each dimension
41 | x_min, y_min = coords.min()
42 | x_max, y_max = coords.max()
43 | x_bins = np.linspace(x_min, x_max, num_bins + 1)
44 | y_bins = np.linspace(y_min, y_max, num_bins + 1)
45 |
46 | # Digitize the coordinates into bins
47 | adata_.obs['grid_x'] = np.digitize(coords['X'], x_bins, right=False) - 1
48 | adata_.obs['grid_y'] = np.digitize(coords['Y'], y_bins, right=False) - 1
49 |
50 | # Adjust indices to start from 0 and end at num_bins - 1
51 | adata_.obs['grid_x'] = np.clip(adata_.obs['grid_x'], 0, num_bins - 1)
52 | adata_.obs['grid_y'] = np.clip(adata_.obs['grid_y'], 0, num_bins - 1)
53 |
54 | # Combine grid indices to form a grid cell identifier
55 | adata_.obs['grid_cell'] = adata_.obs['grid_x'].astype(str) + "_" + adata_.obs['grid_y'].astype(str)
56 |
57 | if copy:
58 | return adata_
59 |
60 |
61 | def calculate_window_size(adata, num_windows):
62 | """
63 | Calculates the window size required to fit a specified number of windows
64 | across the width of the coordinate space in spatial transcriptomics data.
65 |
66 | Parameters
67 | ----------
68 | adata : AnnData
69 | The AnnData object containing spatial transcriptomics data. The spatial coordinates
70 | must be stored in `adata.obsm['spatial']`.
71 |
72 | num_windows : int
73 | The desired number of windows to fit across the width of the coordinate space.
74 |
75 | Returns
76 | -------
77 | window_size : float
78 | The calculated size of each window to fit the specified number of windows
79 | across the width of the coordinate space.
80 | """
81 |
82 | # Extract X coordinates
83 | x_coords = adata.obsm['spatial'][:, 0]
84 |
85 | # Determine the range of X coordinates
86 | x_min, x_max = np.min(x_coords), np.max(x_coords)
87 |
88 | # Calculate the window size
89 | window_size = (x_max - x_min) / num_windows
90 |
91 | return window_size
92 |
93 |
94 | def create_sliding_windows(adata, window_size, stride):
95 | """
96 | Maps windows to the cells they contain based on spatial transcriptomics data.
97 | Returns a dictionary where keys are window identifiers and values are sets of cell indices.
98 |
99 | Parameters
100 | ----------
101 | adata : AnnData
102 | The AnnData object containing spatial transcriptomics data. The spatial coordinates
103 | must be stored in `adata.obsm['spatial']`.
104 |
105 | window_size : float
106 | The size of each square window along each dimension.
107 |
108 | stride : float
109 | The stride with which the window moves along each dimension.
110 |
111 | Returns
112 | -------
113 | window_mapping : dict
114 | A dictionary mapping each window to a set of cell indices that fall within that window.
115 | """
116 |
117 | # Get the spatial coordinates
118 | coords = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y'])
119 |
120 | # Define the range of the sliding windows
121 | x_min, y_min = coords.min()
122 | x_max, y_max = coords.max()
123 | x_windows = np.arange(x_min, x_max - window_size + stride, stride)
124 | y_windows = np.arange(y_min, y_max - window_size + stride, stride)
125 |
126 | # Function to find all windows a point belongs to
127 | def find_windows(coord, window_edges):
128 | return [i for i, edge in enumerate(window_edges) if edge <= coord < edge + window_size]
129 |
130 | # Initialize the window mapping
131 | window_mapping = {}
132 |
133 | # Assign cells to all overlapping windows
134 | for cell_idx, (x, y) in enumerate(zip(coords['X'], coords['Y'])):
135 | cell_windows = ["window_{}_{}".format(wx, wy)
136 | for wx in find_windows(x, x_windows)
137 | for wy in find_windows(y, y_windows)]
138 |
139 | for win in cell_windows:
140 | if win not in window_mapping:
141 | window_mapping[win] = set()
142 | window_mapping[win].add(coords.index[cell_idx]) # This stores the cell/spot barcodes
143 | # For memory efficiency, it could be `window_mapping[win].add(cell_idx)` instead
144 |
145 | return window_mapping
146 |
147 |
148 | def add_sliding_window_info_to_adata(adata, window_mapping):
149 | """
150 | Adds window information to the AnnData object's .obs DataFrame. Each window is represented
151 | as a column, and cells/spots belonging to a window are marked with a 1.0, while others are marked
152 | with a 0.0. It modifies the `adata` object in place.
153 |
154 | Parameters
155 | ----------
156 | adata : AnnData
157 | The AnnData object to which the window information will be added.
158 |
159 | window_mapping : dict
160 | A dictionary mapping each window to a set of cell/spot indeces or barcodes.
161 | This is the output from the `create_moving_windows` function.
162 | """
163 |
164 | # Initialize all window columns to 0.0
165 | for window in sorted(window_mapping.keys()):
166 | adata.obs[window] = 0.0
167 |
168 | # Mark cells that belong to each window
169 | for window, barcode_indeces in window_mapping.items():
170 | adata.obs.loc[barcode_indeces, window] = 1.0
--------------------------------------------------------------------------------
/cell2cell/stats/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.stats.enrichment import (fisher_representation, hypergeom_representation)
2 | from cell2cell.stats.gini import (gini_coefficient)
3 | from cell2cell.stats.multitest import (compute_fdrcorrection_asymmetric_matrix, compute_fdrcorrection_symmetric_matrix)
4 | from cell2cell.stats.permutation import (compute_pvalue_from_dist, pvalue_from_dist, random_switching_ppi_labels,
5 | run_label_permutation)
6 |
--------------------------------------------------------------------------------
/cell2cell/stats/enrichment.py:
--------------------------------------------------------------------------------
1 | import scipy.stats as st
2 |
3 |
4 | def hypergeom_representation(sample_size, class_in_sample, population_size, class_in_population):
5 | '''
6 | Performs an analysis of enrichment/depletion based on observation
7 | in a sample. It computes a p-value given a hypergeometric
8 | distribution.
9 |
10 | Parameters
11 | ----------
12 | sample_size : int
13 | Size of the sample obtained or number of elements
14 | obtained from the analysis.
15 |
16 | class_in_sample : int
17 | Number of elements of a given class that are
18 | contained in the sample. This is the class to be tested.
19 |
20 | population_size : int
21 | Size of the sampling space. That is, the total number
22 | of possible elements to be chosen when sampling.
23 |
24 | class_in_population : int
25 | Number of elements of a given class that are contained
26 | in the population. This is the class to be tested.
27 |
28 | Returns
29 | -------
30 | p_vals : tuple
31 | A tuple containing the p-values for depletion and
32 | enrichment analysis, respectively.
33 | '''
34 | # Computing the number of elements that are not in the same class
35 | nonclass_in_sample = sample_size - class_in_sample
36 | nonclass_in_population = population_size - class_in_population
37 |
38 | # Remaining elements in population after sampling
39 | rem_class = class_in_population - class_in_sample
40 | rem_nonclass = nonclass_in_population - nonclass_in_sample
41 |
42 | # Depletion Analysis
43 | depletion_hyp_p_val = st.hypergeom.cdf(class_in_sample, population_size, class_in_population, sample_size)
44 |
45 | # Enrichment Analysis
46 | enrichment_hyp_p_val = 1.0 - st.hypergeom.cdf(class_in_sample - 1.0, population_size, class_in_population,
47 | sample_size)
48 |
49 | p_vals = (depletion_hyp_p_val, enrichment_hyp_p_val)
50 | return p_vals
51 |
52 |
53 | def fisher_representation(sample_size, class_in_sample, population_size, class_in_population):
54 | '''
55 | Performs an analysis of enrichment/depletion based on observation
56 | in a sample. It computes a p-value given a fisher exact test.
57 |
58 | Parameters
59 | ----------
60 | sample_size : int
61 | Size of the sample obtained or number of elements
62 | obtained from the analysis.
63 |
64 | class_in_sample : int
65 | Number of elements of a given class that are
66 | contained in the sample. This is the class to be tested.
67 |
68 | population_size : int
69 | Size of the sampling space. That is, the total number
70 | of possible elements to be chosen when sampling.
71 |
72 | class_in_population : int
73 | Number of elements of a given class that are contained
74 | in the population. This is the class to be tested.
75 |
76 | Returns
77 | -------
78 | results : dict
79 | A dictionary containing the odd ratios and p-values for
80 | depletion and enrichment analysis.
81 | '''
82 | # Computing the number of elements that are not in the same class
83 | nonclass_in_sample = sample_size - class_in_sample
84 | nonclass_in_population = population_size - class_in_population
85 |
86 | # Remaining elements in population after sampling
87 | rem_class = class_in_population - class_in_sample
88 | rem_nonclass = nonclass_in_population - nonclass_in_sample
89 |
90 | # Depletion Analysis
91 | depletion_odds, depletion_fisher_p_val = st.fisher_exact([[class_in_sample, rem_class],
92 | [nonclass_in_sample, rem_nonclass]],
93 | alternative='less')
94 |
95 | # Enrichment Analysis
96 | enrichment_odds, enrichment_fisher_p_val = st.fisher_exact([[class_in_sample, rem_class],
97 | [nonclass_in_sample, rem_nonclass]],
98 | alternative='greater')
99 |
100 | p_vals = (depletion_fisher_p_val, enrichment_fisher_p_val)
101 | odds = (depletion_odds, enrichment_odds)
102 | results = {'pval' : p_vals,
103 | 'odds' : odds,
104 | }
105 | return results
--------------------------------------------------------------------------------
/cell2cell/stats/gini.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 |
5 |
6 | def gini_coefficient(distribution):
7 | """Computes the Gini coefficient of an array of values.
8 | Code borrowed from:
9 | https://stackoverflow.com/questions/39512260/calculating-gini-coefficient-in-python-numpy
10 |
11 | Parameters
12 | ----------
13 | distribution : array-like
14 | An array of values representing the distribution
15 | to be evaluated.
16 |
17 | Returns
18 | -------
19 | gini : float
20 | Gini coefficient for the evaluated distribution.
21 | """
22 | diffsum = 0
23 | for i, xi in enumerate(distribution[:-1], 1):
24 | diffsum += np.sum(np.abs(xi - distribution[i:]))
25 | gini = diffsum / (len(distribution)**2 * np.mean(distribution))
26 | return gini
--------------------------------------------------------------------------------
/cell2cell/stats/multitest.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | from statsmodels.stats.multitest import fdrcorrection
5 |
6 |
7 | def compute_fdrcorrection_symmetric_matrix(X, alpha=0.1):
8 | '''
9 | Computes and FDR correction or Benjamini-Hochberg procedure
10 | on a symmetric matrix of p-values. Here, only the diagonal
11 | and values on the upper triangle are considered to avoid
12 | repetition with the lower triangle.
13 |
14 | Parameters
15 | ----------
16 | X : pandas.DataFrame
17 | A symmetric dataframe of P-values.
18 |
19 | alpha : float, default=0.1
20 | Error rate of the FDR correction. Must be 0 < alpha < 1.
21 |
22 | Returns
23 | -------
24 | adj_X : pandas.DataFrame
25 | A symmetric dataframe with adjusted P-values of X.
26 | '''
27 | pandas = False
28 | a = X.copy()
29 |
30 | if isinstance(X, pd.DataFrame):
31 | pandas = True
32 | a = X.values
33 | index = X.index
34 | columns = X.columns
35 |
36 | # Original data
37 | upper_idx = np.triu_indices_from(a)
38 | pvals = a[upper_idx]
39 |
40 | # New data
41 | adj_X = np.zeros(a.shape)
42 | rej, adj_pvals = fdrcorrection(pvals.flatten(), alpha=alpha)
43 |
44 | # Reorder_data
45 | adj_X[upper_idx] = adj_pvals
46 | adj_X = adj_X + np.triu(adj_X, 1).T
47 |
48 | if pandas:
49 | adj_X = pd.DataFrame(adj_X, index=index, columns=columns)
50 | return adj_X
51 |
52 |
53 | def compute_fdrcorrection_asymmetric_matrix(X, alpha=0.1):
54 | '''
55 | Computes and FDR correction or Benjamini-Hochberg procedure
56 | on a asymmetric matrix of p-values. Here, the correction
57 | is performed for every value in X.
58 |
59 | Parameters
60 | ----------
61 | X : pandas.DataFrame
62 | An asymmetric dataframe of P-values.
63 |
64 | alpha : float, default=0.1
65 | Error rate of the FDR correction. Must be 0 < alpha < 1.
66 |
67 | Returns
68 | -------
69 | adj_X : pandas.DataFrame
70 | An asymmetric dataframe with adjusted P-values of X.
71 | '''
72 | pandas = False
73 | a = X.copy()
74 |
75 | if isinstance(X, pd.DataFrame):
76 | pandas = True
77 | a = X.values
78 | index = X.index
79 | columns = X.columns
80 |
81 | # Original data
82 | pvals = a.flatten()
83 |
84 | # New data
85 | rej, adj_pvals = fdrcorrection(pvals, alpha=alpha)
86 |
87 | # Reorder_data
88 | #adj_X = adj_pvals.reshape(-1, a.shape[1])
89 | adj_X = adj_pvals.reshape(a.shape) # Allows using tensors
90 |
91 | if pandas:
92 | adj_X = pd.DataFrame(adj_X, index=index, columns=columns)
93 | return adj_X
--------------------------------------------------------------------------------
/cell2cell/tensor/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.tensor.external_scores import (dataframes_to_tensor)
2 | from cell2cell.tensor.factor_manipulation import (normalize_factors)
3 | from cell2cell.tensor.metrics import (correlation_index, pairwise_correlation_index)
4 | from cell2cell.tensor.tensor import (InteractionTensor, PreBuiltTensor, build_context_ccc_tensor, generate_tensor_metadata,
5 | interactions_to_tensor)
6 | from cell2cell.tensor.tensor_manipulation import (concatenate_interaction_tensors)
7 | from cell2cell.tensor.subset import (subset_tensor, subset_metadata)
8 |
--------------------------------------------------------------------------------
/cell2cell/tensor/factor_manipulation.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 |
5 |
6 | def normalize_factors(factors):
7 | '''
8 | L2-normalizes the factors considering all tensor dimensions
9 | from a tensor decomposition result
10 |
11 | Parameters
12 | ----------
13 | factors : dict
14 | Ordered dictionary containing a dataframe with the factor loadings for each
15 | dimension/order of the tensor. This is the result from a tensor decomposition,
16 | it can be found as the attribute `factors` in any tensor class derived from the
17 | class BaseTensor (e.g. BaseTensor.factors).
18 |
19 | Returns
20 | -------
21 | norm_factors : dict
22 | The normalized factors.
23 | '''
24 | norm_factors = dict()
25 | for k, v in factors.items():
26 | norm_factors[k] = v / np.linalg.norm(v, axis=0)
27 | return norm_factors
28 |
29 |
30 | def shuffle_factors(factors, axis=0):
31 | '''
32 | Randomly shuffles the values of the factors in the tensor decomposition.
33 | '''
34 | raise NotImplementedError
35 |
--------------------------------------------------------------------------------
/cell2cell/tensor/metrics.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 | from itertools import combinations
7 |
8 | # Authors: Hratch Baghdassarian , Erick Armingol
9 | # similarity metrics for tensor decompositions
10 |
11 |
12 | def correlation_index(factors_1, factors_2, tol=5e-16, method='stacked'):
13 | """
14 | CorrIndex implementation to assess tensor decomposition outputs.
15 | From [1] Sobhani et al 2022 (https://doi.org/10.1016/j.sigpro.2022.108457).
16 | Metric is scaling and column-permutation invariant, wherein each column is a factor.
17 |
18 | Parameters
19 | ----------
20 | factors_1 : dict
21 | Ordered dictionary containing a dataframe with the factor loadings for each
22 | dimension/order of the tensor. This is the result from a tensor decomposition,
23 | it can be found as the attribute `factors` in any tensor class derived from the
24 | class BaseTensor (e.g. BaseTensor.factors).
25 |
26 | factors_2 : dict
27 | Similar to factors_1 but coming from another tensor decomposition of a tensor
28 | with equal shape.
29 |
30 | tol : float, default=5e-16
31 | Precision threshold below which to call the CorrIndex score 0.
32 |
33 | method : str, default='stacked'
34 | Method to obtain the CorrIndex by comparing the A matrices from two decompositions.
35 | Possible options are:
36 |
37 | - 'stacked' : The original method implemented in [1]. Here all A matrices from the same decomposition are
38 | vertically concatenated, building a big A matrix for each decomposition.
39 | - 'max_score' : This computes the CorrIndex for each pair of A matrices (i.e. between A_1 in factors_1 and
40 | factors_2, between A_2 in factors_1 and factors_2, and so on). Then the max score is
41 | selected (the most conservative approach). In other words, it selects the max score among the
42 | CorrIndexes computed dimension-wise.
43 | - 'min_score' : Similar to 'max_score', but the min score is selected (the least conservative approach).
44 | - 'avg_score' : Similar to 'max_score', but the avg score is selected.
45 |
46 | Returns
47 | -------
48 | score : float
49 | CorrIndex metric [0,1]; lower score indicates higher similarity between matrices
50 | """
51 | factors_1 = list(factors_1.values())
52 | factors_2 = list(factors_2.values())
53 |
54 | # check input factors shape
55 | for factors in [factors_1, factors_2]:
56 | if len({np.shape(A)[1]for A in factors}) != 1:
57 | raise ValueError('Factors should be a list of loading matrices of the same rank')
58 |
59 | # check method
60 | options = ['stacked', 'max_score', 'min_score', 'avg_score']
61 | if method not in options:
62 | raise ValueError("The `method` must be either option among {}".format(options))
63 |
64 | if method == 'stacked':
65 | # vertically stack loading matrices -- shape sum(tensor.shape)xR)
66 | X_1 = [np.concatenate(factors_1, 0)]
67 | X_2 = [np.concatenate(factors_2, 0)]
68 | else:
69 | X_1 = factors_1
70 | X_2 = factors_2
71 |
72 | for x1, x2 in zip(X_1, X_2):
73 | if np.shape(x1) != np.shape(x2):
74 | raise ValueError('Factor matrices should be of the same shapes')
75 |
76 | # normalize columns to L2 norm - even if ran decomposition with normalize_factors=True
77 | col_norm_1 = [np.linalg.norm(x1, axis=0) for x1 in X_1]
78 | col_norm_2 = [np.linalg.norm(x2, axis=0) for x2 in X_2]
79 | for cn1, cn2 in zip(col_norm_1, col_norm_2):
80 | if np.any(cn1 == 0) or np.any(cn2 == 0):
81 | raise ValueError('Column norms must be non-zero')
82 | X_1 = [x1 / cn1 for x1, cn1 in zip(X_1, col_norm_1)]
83 | X_2 = [x2 / cn2 for x2, cn2 in zip(X_2, col_norm_2)]
84 |
85 | corr_idxs = [_compute_correlation_index(x1, x2, tol=tol) for x1, x2 in zip(X_1, X_2)]
86 |
87 | if method == 'stacked':
88 | score = corr_idxs[0]
89 | elif method == 'max_score':
90 | score = np.max(corr_idxs)
91 | elif method == 'min_score':
92 | score = np.min(corr_idxs)
93 | elif method == 'avg_score':
94 | score = np.mean(corr_idxs)
95 | else:
96 | score = 1.0
97 | return score
98 |
99 |
100 | def _compute_correlation_index(x1, x2, tol=5e-16):
101 | '''
102 | Computes the CorrIndex from the L2-normalized A matrices.
103 |
104 | Parameters
105 | ----------
106 | x1 : list
107 | A list containing normalized A matrix(ces) from the first tensor decomposition.
108 |
109 | x2 : list
110 | A list containing normalized A matrix(ces) from the first tensor decomposition.
111 |
112 | tol : float, default=5e-16
113 | Precision threshold below which to call the CorrIndex score 0, by default 5e-16
114 |
115 | Returns
116 | -------
117 | score : float
118 | CorrIndex metric [0,1]; lower score indicates higher similarity between matrices
119 | '''
120 | # generate the correlation index input
121 | c_prod_mtx = np.abs(np.matmul(np.conj(np.transpose(np.asarray(x1))), np.asarray(x2)))
122 |
123 | # correlation index scoring
124 | n_elements = np.shape(c_prod_mtx)[1] + np.shape(c_prod_mtx)[0]
125 | score = (1 / (n_elements)) * (np.sum(np.abs(np.max(c_prod_mtx, 1) - 1)) + np.sum(np.abs(np.max(c_prod_mtx, 0) - 1)))
126 | if score < tol:
127 | score = 0
128 | return score
129 |
130 |
131 | def pairwise_correlation_index(factors, tol=5e-16, method='stacked'):
132 | '''
133 | Computes the CorrIndex between all pairs of factors
134 |
135 | Parameters
136 | ----------
137 | factors : list
138 | List with multiple Ordered dictionaries, each containing a dataframe with
139 | the factor loadings for each dimension/order of the tensor. This is the
140 | result from a tensor decomposition, it can be found as the attribute
141 | `factors` in any tensor class derived from the class BaseTensor
142 | (e.g. BaseTensor.factors).
143 |
144 | tol : float, default=5e-16
145 | Precision threshold below which to call the CorrIndex score 0.
146 |
147 | method : str, default='stacked'
148 | Method to obtain the CorrIndex by comparing the A matrices from two decompositions.
149 | Possible options are:
150 |
151 | - 'stacked' : The original method implemented in [1]. Here all A matrices from the same decomposition are
152 | vertically concatenated, building a big A matrix for each decomposition.
153 | - 'max_score' : This computes the CorrIndex for each pair of A matrices (i.e. between A_1 in factors_1 and
154 | factors_2, between A_2 in factors_1 and factors_2, and so on). Then the max score is
155 | selected (the most conservative approach). In other words, it selects the max score among the
156 | CorrIndexes computed dimension-wise.
157 | - 'min_score' : Similar to 'max_score', but the min score is selected (the least conservative approach).
158 | - 'avg_score' : Similar to 'max_score', but the avg score is selected.
159 |
160 | Returns
161 | -------
162 | scores : pd.DataFrame
163 | Dataframe with CorrIndex metric for each pair of decompositions.
164 | This metric bounds are [0,1]; lower score indicates higher similarity between matrices
165 | '''
166 | N = len(factors)
167 | idxs = list(range(N))
168 | pairs = list(combinations(idxs, 2))
169 | scores = pd.DataFrame(np.zeros((N, N)),index=idxs, columns=idxs)
170 | for p1, p2 in pairs:
171 | corrindex = correlation_index(factors_1=factors[p1],
172 | factors_2=factors[p2],
173 | tol=tol,
174 | method=method
175 | )
176 |
177 | scores.at[p1, p2] = corrindex
178 | scores.at[p2, p1] = corrindex
179 | return scores
180 |
--------------------------------------------------------------------------------
/cell2cell/tensor/subset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import copy
3 |
4 | import numpy as np
5 | import tensorly as tl
6 |
7 | from cell2cell.preprocessing.find_elements import find_duplicates
8 |
9 | def find_element_indexes(interaction_tensor, elements, axis=0, remove_duplicates=True, keep='first', original_order=False):
10 | '''Finds the location/indexes of a list of elements in one of the
11 | axis of an InteractionTensor.
12 |
13 | Parameters
14 | ----------
15 | interaction_tensor : cell2cell.tensor.BaseTensor
16 | A communication tensor generated with any of the tensor class in
17 | cell2cell.tensor
18 |
19 | elements : list
20 | A list of names for the elements to find in one of the axis.
21 |
22 | axis : int, default=0
23 | An axis of the interaction_tensor, representing one of
24 | its dimensions.
25 |
26 | remove_duplicates : boolean, default=True
27 | Whether removing duplicated names in `elements`.
28 |
29 | keep : str, default='first'
30 | Determines which duplicates (if any) to keep.
31 | Options are:
32 |
33 | - first : Drop duplicates except for the first occurrence.
34 | - last : Drop duplicates except for the last occurrence.
35 | - False : Drop all duplicates.
36 |
37 | original_order : boolean, default=False
38 | Whether keeping the original order of the elements in
39 | interaction_tensor.order_names[axis] or keeping the
40 | new order as indicated in `elements`.
41 |
42 | Returns
43 | -------
44 | indexes : list
45 | List of indexes for the elements that where found in the
46 | axis indicated of the interaction_tensor.
47 | '''
48 | assert axis < len \
49 | (interaction_tensor.tensor.shape), "List index out of range. 'axis' must be one of the axis in the tensor."
50 | assert axis < len \
51 | (interaction_tensor.order_names), "List index out of range. interaction_tensor.order_names must have element names for each axis of the tensor."
52 |
53 | elements = sorted(set(elements), key=list(elements).index)
54 |
55 | if original_order:
56 | # Avoids error for considering elements not in the tensor
57 | elements = set(elements).intersection(set(interaction_tensor.order_names[axis]))
58 | elements = sorted(elements, key=interaction_tensor.order_names[axis].index)
59 |
60 |
61 | # Find duplicates if we are removing them
62 | to_exclude = []
63 | if remove_duplicates:
64 | dup_dict = find_duplicates(interaction_tensor.order_names[axis])
65 |
66 | if len(dup_dict) > 0: # Only if we have duplicate items
67 | if keep == 'first':
68 | for k, v in dup_dict.items():
69 | to_exclude.extend(v[1:])
70 | elif keep == 'last':
71 | for k, v in dup_dict.items():
72 | to_exclude.extend(v[:-1])
73 | elif not keep:
74 | for k, v in dup_dict.items():
75 | to_exclude.extend(v)
76 | else:
77 | raise ValueError("Not a valid option was selected for the parameter `keep`")
78 |
79 | # Find indexes in the tensor
80 | indexes = sum \
81 | ([np.where(np.asarray(interaction_tensor.order_names[axis]) == element)[0].tolist() for element in elements], [])
82 |
83 | # Exclude duplicates if any to exclude
84 | indexes = [idx for idx in indexes if idx not in to_exclude]
85 | return indexes
86 |
87 |
88 | def subset_tensor(interaction_tensor, subset_dict, remove_duplicates=True, keep='first', original_order=False):
89 | '''Subsets an InteractionTensor to contain only specific elements in
90 | respective dimensions.
91 |
92 | Parameters
93 | ----------
94 | interaction_tensor : cell2cell.tensor.BaseTensor
95 | A communication tensor generated with any of the tensor class in
96 | cell2cell.tensor
97 |
98 | subset_dict : dict
99 | Dictionary to subset the tensor. It must contain the axes or
100 | dimensions that will be subset as the keys of the dictionary
101 | and the values corresponds to lists of element names for the
102 | respective axes or dimensions. Those axes that are not present
103 | in this dictionary will not be subset.
104 | E.g. {0 : ['Context 1', 'Context2'], 1: ['LR 10', 'LR 100']}
105 |
106 | remove_duplicates : boolean, default=True
107 | Whether removing duplicated names in `elements`.
108 |
109 | keep : str, default='first'
110 | Determines which duplicates (if any) to keep.
111 | Options are:
112 |
113 | - first : Drop duplicates except for the first occurrence.
114 | - last : Drop duplicates except for the last occurrence.
115 | - False : Drop all duplicates.
116 |
117 | original_order : boolean, default=False
118 | Whether keeping the original order of the elements in
119 | interaction_tensor.order_names or keeping the
120 | new order as indicated in the lists in the `subset_dict`.
121 |
122 | Returns
123 | -------
124 | subset_tensor : cell2cell.tensor.BaseTensor
125 | A copy of interaction_tensor that was subset to contain
126 | only the elements specified for the respective axis in the
127 | `subset_dict`. Corresponds to a communication tensor
128 | generated with any of the tensor class in cell2cell.tensor
129 | '''
130 | # Perform a deep copy of the original tensor and reset previous factorization
131 | subset_tensor = copy.deepcopy(interaction_tensor)
132 | subset_tensor.rank = None
133 | subset_tensor.tl_object = None
134 | subset_tensor.factors = None
135 |
136 | # Initialize tensor into a numpy object for performing subset
137 | context = tl.context(subset_tensor.tensor)
138 | tensor = tl.to_numpy(subset_tensor.tensor)
139 | mask = None
140 | if subset_tensor.mask is not None:
141 | mask = tl.to_numpy(subset_tensor.mask)
142 |
143 | # Search for indexes
144 | axis_idxs = dict()
145 | for k, v in subset_dict.items():
146 | if k < len(tensor.shape):
147 | if len(v) != 0:
148 | idx = find_element_indexes(interaction_tensor=subset_tensor,
149 | elements=v,
150 | axis=k,
151 | remove_duplicates=remove_duplicates,
152 | keep=keep,
153 | original_order=original_order
154 | )
155 | if len(idx) == 0:
156 | print("No elements found for axis {}. It will return an empty tensor.".format(k))
157 | axis_idxs[k] = idx
158 | else:
159 | print("Axis {} is out of index, not considering elements in this axis.".format(k))
160 |
161 | # Subset tensor
162 | for k, v in axis_idxs.items():
163 | if tensor.shape != (0,): # Avoids error when returned empty tensor
164 | tensor = tensor.take(indices=v,
165 | axis=k
166 | )
167 |
168 | subset_tensor.order_names[k] = [subset_tensor.order_names[k][i] for i in v]
169 | if mask is not None:
170 | mask = mask.take(indices=v,
171 | axis=k
172 | )
173 |
174 | # Restore tensor and mask properties
175 | tensor = tl.tensor(tensor, **context)
176 | if mask is not None:
177 | mask = tl.tensor(mask, **context)
178 |
179 | subset_tensor.tensor = tensor
180 | subset_tensor.mask = mask
181 | return subset_tensor
182 |
183 |
184 | def subset_metadata(tensor_metadata, interaction_tensor, sample_col='Element'):
185 | '''Subsets the metadata of an InteractionTensor to contain only
186 | elements in a reference InteractionTensor (interaction_tensor).
187 |
188 | Parameters
189 | ----------
190 | tensor_metadata : list
191 | List of pandas dataframes with metadata information for elements of each
192 | dimension in the tensor. A column called as the variable `sample_col` contains
193 | the name of each element in the tensor while another column called as the
194 | variable `group_col` contains the metadata or grouping information of each
195 | element.
196 |
197 | interaction_tensor : cell2cell.tensor.BaseTensor
198 | A communication tensor generated with any of the tensor class in
199 | cell2cell.tensor. This tensor is used as reference to subset the metadata.
200 | The subset metadata will contain only elements that are present in this
201 | tensor, so if metadata was originally built for another tensor, the elements
202 | that are exclusive for that original tensor will be excluded.
203 |
204 | sample_col : str, default='Element'
205 | Name of the column containing the element names in the metadata.
206 |
207 | Returns
208 | -------
209 | subset_metadata : list
210 | List of pandas dataframes with metadata information for elements contained
211 | in `interaction_tensor.order_names`. It is a subset of `tensor_metadata`.
212 | '''
213 | subset_metadata = []
214 | for i, meta in enumerate(tensor_metadata):
215 | if meta is not None:
216 | tmp_meta = meta.set_index(sample_col)
217 | tmp_meta = tmp_meta.loc[interaction_tensor.order_names[i], :]
218 | tmp_meta = tmp_meta.reset_index()
219 | subset_metadata.append(tmp_meta)
220 | else:
221 | subset_metadata.append(None)
222 | return subset_metadata
--------------------------------------------------------------------------------
/cell2cell/tensor/tensor_manipulation.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import tensorly as tl
4 |
5 | from cell2cell.tensor.tensor import PreBuiltTensor
6 | from cell2cell.tensor.subset import subset_tensor
7 |
8 |
9 | def concatenate_interaction_tensors(interaction_tensors, axis, order_labels, remove_duplicates=False, keep='first',
10 | mask=None, device=None):
11 | '''Concatenates interaction tensors in a given tensor dimension or axis.
12 |
13 | Parameters
14 | ----------
15 | interaction_tensors : list
16 | List of any tensor class in cell2cell.tensor.
17 |
18 | axis : int
19 | The axis along which the arrays will be joined. If axis is None, arrays are flattened before use.
20 |
21 | order_labels : list
22 | List of labels for dimensions or orders in the tensor.
23 |
24 | remove_duplicates : boolean, default=False
25 | Whether removing duplicated names in the concatenated axis.
26 |
27 | keep : str, default='first'
28 | Determines which duplicates (if any) to keep.
29 | Options are:
30 |
31 | - first : Drop duplicates except for the first occurrence.
32 | - last : Drop duplicates except for the last occurrence.
33 | - False : Drop all duplicates.
34 |
35 | mask : ndarray list
36 | Helps avoiding missing values during a tensor factorization. A mask should be
37 | a boolean array of the same shape as the original tensor and should be 0
38 | where the values are missing and 1 everywhere else. This must be of equal shape
39 | as the concatenated tensor.
40 |
41 | device : str, default=None
42 | Device to use when backend is pytorch. Options are:
43 | {'cpu', 'cuda', None}
44 |
45 | Returns
46 | -------
47 | concatenated_tensor : cell2cell.tensor.PreBuiltTensor
48 | Final tensor after concatenation. It is a PreBuiltTensor that works
49 | any interaction tensor based on the class BaseTensor.
50 | '''
51 | # Assert if all other dimensions contains the same elements:
52 | shape = len(interaction_tensors[0].tensor.shape)
53 | assert all(shape == len(tensor.tensor.shape) for tensor in interaction_tensors[1:]), "Tensors must have same number of dimensions"
54 |
55 | for i in range(shape):
56 | if i != axis:
57 | elements = interaction_tensors[0].order_names[i]
58 | for tensor in interaction_tensors[1:]:
59 | assert elements == tensor.order_names[i], "Tensors must have the same elements in the other axes."
60 |
61 | # Initialize tensors into a numpy object for performing subset
62 | # Use the same context as first tensor for everything
63 | try:
64 | context = tl.context(interaction_tensors[0].tensor)
65 | except:
66 | context = {'dtype': interaction_tensors[0].tensor.dtype, 'device' : None}
67 |
68 | # Concatenate tensors
69 | concat_tensor = tl.concatenate([tensor.tensor.to('cpu') for tensor in interaction_tensors], axis=axis)
70 | if mask is not None:
71 | assert mask.shape == concat_tensor.shape, "Mask must have the same shape of the concatenated tensor. Here: {}".format(concat_tensor.shape)
72 | else: # Generate a new mask from all previous masks if all are not None
73 | if all([tensor.mask is not None for tensor in interaction_tensors]):
74 | mask = tl.concatenate([tensor.mask.to('cpu') for tensor in interaction_tensors], axis=axis)
75 | else:
76 | mask = None
77 |
78 | concat_tensor = tl.tensor(concat_tensor, device=context['device'])
79 | if mask is not None:
80 | mask = tl.tensor(mask, device=context['device'])
81 |
82 | # Concatenate names of elements for the given axis but keep the others as in one tensor
83 | order_names = []
84 | for i in range(shape):
85 | tmp_names = []
86 | if i == axis:
87 | for tensor in interaction_tensors:
88 | tmp_names += tensor.order_names[i]
89 | else:
90 | tmp_names = interaction_tensors[0].order_names[i]
91 | order_names.append(tmp_names)
92 |
93 | # Generate final object
94 | concatenated_tensor = PreBuiltTensor(tensor=concat_tensor,
95 | order_names=order_names,
96 | order_labels=order_labels,
97 | mask=mask, # Change if you want to omit values in the decomposition
98 | device=device
99 | )
100 |
101 | # Remove duplicates
102 | if remove_duplicates:
103 | concatenated_tensor = subset_tensor(interaction_tensor=concatenated_tensor,
104 | subset_dict={axis: order_names[axis]},
105 | remove_duplicates=remove_duplicates,
106 | keep=keep,
107 | original_order=False)
108 | return concatenated_tensor
--------------------------------------------------------------------------------
/cell2cell/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.utils.networks import (generate_network_from_adjacency, export_network_to_gephi)
2 | from cell2cell.utils.parallel_computing import (agents_number)
3 |
--------------------------------------------------------------------------------
/cell2cell/utils/networks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | import networkx as nx
6 |
7 |
8 | def generate_network_from_adjacency(adjacency_matrix, package='networkx'):
9 | '''
10 | Generates a network or graph object from an adjacency matrix.
11 |
12 | Parameters
13 | ----------
14 | adjacency_matrix : pandas.DataFrame
15 | An adjacency matrix, where in rows and columns are nodes
16 | and values represents a weight for the respective edge.
17 |
18 | package : str, default='networkx'
19 | Package or python library to built the network.
20 | Implemented optios are {'networkx'}. Soon will be
21 | available for 'igraph'.
22 |
23 | Returns
24 | -------
25 | network : graph-like
26 | A graph object built with a python-library for networks.
27 | '''
28 | if package == 'networkx':
29 | network = nx.from_pandas_adjacency(adjacency_matrix)
30 | elif package == 'igraph':
31 | # A = adjacency_matrix.values
32 | # network = igraph.Graph.Weighted_Adjacency((A > 0).tolist(), mode=igraph.ADJ_UNDIRECTED)
33 | #
34 | # # Add edge weights and node labels.
35 | # network.es['weight'] = A[A.nonzero()]
36 | # network.vs['label'] = list(adjacency_matrix.columns)
37 | #
38 | # Warning("iGraph functionalities are not completely implemented yet.")
39 | raise NotImplementedError("Network using package {} not implemented".format(package))
40 | else:
41 | raise NotImplementedError("Network using package {} not implemented".format(package))
42 | return network
43 |
44 |
45 | def export_network_to_gephi(network, filename, format='excel', network_type='Undirected'):
46 | '''
47 | Exports a network into a spreadsheet that is readable
48 | by the software Gephi.
49 |
50 | Parameters
51 | ----------
52 | network : networkx.Graph, networkx.DiGraph or a pandas.DataFrame
53 | A networkx Graph or Directed Graph, or an adjacency matrix,
54 | where in rows and columns are nodes and values represents a
55 | weight for the respective edge.
56 |
57 | filename : str, default=None
58 | Path to save the network into a Gephi-readable format.
59 |
60 | format : str, default='excel'
61 | Format to export the spreadsheet. Options are:
62 |
63 | - 'excel' : An excel file, either .xls or .xlsx
64 | - 'csv' : Comma separated value format
65 | - 'tsv' : Tab separated value format
66 |
67 | network_type : str, default='Undirected'
68 | Type of edges in the network. They could be either
69 | 'Undirected' or 'Directed'.
70 | '''
71 | # This allows to pass a network directly or an adjacency matrix
72 | if type(network) != nx.classes.graph.Graph:
73 | network = generate_network_from_adjacency(network,
74 | package='networkx')
75 |
76 | gephi_df = nx.to_pandas_edgelist(network)
77 | gephi_df = gephi_df.assign(Type=network_type)
78 | # When weight is not in the network
79 | if ('weight' not in gephi_df.columns):
80 | gephi_df = gephi_df.assign(weight=1)
81 |
82 | # Transform column names
83 | gephi_df = gephi_df[['source', 'target', 'Type', 'weight']]
84 | gephi_df.columns = [c.capitalize() for c in gephi_df.columns]
85 |
86 | # Save with different formats
87 | if format == 'excel':
88 | gephi_df.to_excel(filename, sheet_name='Edges', index=False)
89 | elif format == 'csv':
90 | gephi_df.to_csv(filename, sep=',', index=False)
91 | elif format == 'tsv':
92 | gephi_df.to_csv(filename, sep='\t', index=False)
93 | else:
94 | raise ValueError("Format not supported.")
95 |
96 |
97 | def export_network_to_cytoscape(network, filename):
98 | '''
99 | Exports a network into a spreadsheet that is readable
100 | by the software Gephi.
101 |
102 | Parameters
103 | ----------
104 | network : networkx.Graph, networkx.DiGraph or a pandas.DataFrame
105 | A networkx Graph or Directed Graph, or an adjacency matrix,
106 | where in rows and columns are nodes and values represents a
107 | weight for the respective edge.
108 |
109 | filename : str, default=None
110 | Path to save the network into a Cytoscape-readable format
111 | (JSON file in this case). E.g. '/home/user/network.json'
112 | '''
113 | # This allows to pass a network directly or an adjacency matrix
114 | if type(network) != nx.classes.graph.Graph:
115 | network = generate_network_from_adjacency(network,
116 | package='networkx')
117 |
118 | data = nx.readwrite.json_graph.cytoscape.cytoscape_data(network)
119 |
120 | # Export
121 | import json
122 | json_str = json.dumps(data)
123 | with open(filename, 'w') as outfile:
124 | outfile.write(json_str)
--------------------------------------------------------------------------------
/cell2cell/utils/parallel_computing.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 |
5 | from multiprocessing import cpu_count
6 |
7 |
8 | # GENERAL
9 | def agents_number(n_jobs):
10 | '''
11 | Computes the number of agents/cores/threads that the
12 | computer can really provide given a number of
13 | jobs/threads requested.
14 |
15 | Parameters
16 | ----------
17 | n_jobs : int
18 | Number of threads for parallelization.
19 |
20 | Returns
21 | -------
22 | agents : int
23 | Number of threads that the computer can really provide.
24 | '''
25 | if n_jobs < 0:
26 | agents = cpu_count() + 1 + n_jobs
27 | if agents < 0:
28 | agents = 1
29 | elif n_jobs > cpu_count():
30 | agents = cpu_count()
31 |
32 | elif n_jobs == 0:
33 | agents = 1
34 | else:
35 | agents = n_jobs
36 | return agents
37 |
38 |
39 | # CORE FUNCTIONS
40 | def parallel_spatial_ccis(inputs):
41 | '''
42 | Parallel computing in cell2cell2.analysis.pipelines.SpatialSingleCellInteractions
43 | '''
44 | # TODO: Implement this for enabling spatial analysis and compute interactions in parallel
45 |
46 | # from cell2cell.core import spatial_operation
47 | #results = spatial_operation()
48 |
49 | # return results
50 | pass
--------------------------------------------------------------------------------
/docs/documentation.md:
--------------------------------------------------------------------------------
1 | # Documentation for *cell2cell*
2 |
3 | This documentation is for our *cell2cell* suite, which includes the [regular cell2cell](https://doi.org/10.1371/journal.pcbi.1010715)
4 | and [Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2) tools. The former is for inferring cell-cell interactions
5 | and communication in one sample or context, while the latter is for deconvolving complex patterns
6 | of cell-cell communication across multiple samples or contexts simultaneously into interpretable factors
7 | representing patterns of communication.
8 |
9 | Here, multiple classes and functions are implemented to facilitate the analyses, including a variety of
10 | visualizations to simplify the interpretation of results:
11 |
12 | - **cell2cell.analysis** : Includes simplified pipelines for running the analyses, and functions for downstream analyses of Tensor-cell2cell
13 | - **cell2cell.clustering** : Includes multiple scipy-based functions for performing clustering methods.
14 | - **cell2cell.core** : Includes the core functions for inferring cell-cell interactions and communication. It includes scoring methods, cell classes, and interaction spaces.
15 | - **cell2cell.datasets** : Includes toy datasets and annotations for testing functions in basic scenarios.
16 | - **cell2cell.external** : Includes built-in approaches borrowed from other tools to avoid incompatibilities (e.g. UMAP, tensorly, and PCoA).
17 | - **cell2cell.io** : Includes functions for opening and saving diverse types of files.
18 | - **cell2cell.plotting** : Includes all the visualization options that *cell2cell* offers.
19 | - **cell2cell.preprocessing** : Includes functions for manipulating data and variables (e.g. data preprocessing, integration, permutation, among others).
20 | - **cell2cell.spatial** : Includes filtering of cell-cell interactions results given intercellular distance, as well as defining neighborhoods by grids or moving windows.
21 | - **cell2cell.stats** : Includes statistical analyses such as enrichment analysis, multiple test correction methods, permutation approaches, and Gini coefficient.
22 | - **cell2cell.tensor** : Includes all functions pertinent to the analysis of *Tensor-cell2cell*
23 | - **cell2cell.utils** : Includes general utilities for analyzing networks and performing parallel computing.
24 |
25 |
26 | Below, all the inputs, parameters (including their different options), and outputs are detailed. Source code of the functions is also included.
27 |
28 |
29 | ::: cell2cell
30 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # Inferring cell-cell interactions from transcriptomes with *cell2cell*
2 | [![PyPI Version][pb]][pypi]
3 | [](https://cell2cell.readthedocs.io/en/latest/?badge=latest)
4 | [](https://pepy.tech/project/cell2cell)
5 |
6 |
7 | [pb]: https://badge.fury.io/py/cell2cell.svg
8 | [pypi]: https://pypi.org/project/cell2cell/
9 |
10 | ## Getting started
11 | For tutorials and documentation, visit [**cell2cell ReadTheDocs**](https://cell2cell.readthedocs.org/) or our [**cell2cell website**](https://earmingol.github.io/cell2cell).
12 |
13 |
14 |
15 | ## Installation
16 |
17 | Step 1: Install Anaconda
18 |
19 | First, [install Anaconda following this tutorial](https://docs.anaconda.com/anaconda/install/).
20 |
21 |
22 | Step 2: Create and Activate a New Conda Environment
23 |
24 | ```
25 | # Create a new conda environment
26 | conda create -n cell2cell -y python=3.7 jupyter
27 |
28 | # Activate the environment
29 | conda activate cell2cell
30 | ```
31 |
32 |
33 | Step 3: Install cell2cell
34 |
35 | ```
36 | pip install cell2cell
37 | ```
38 |
39 |
40 | ## Examples
41 |
42 | | cell2cell Examples | Tensor-cell2cell Examples |
43 | | --- | --- |
44 | |  |  |
45 | | - [Step-by-step Pipeline](https://github.com/earmingol/cell2cell/blob/master/examples/cell2cell/Toy-Example.ipynb)
- [Interaction Pipeline for Bulk Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-BulkPipeline)
- [Interaction Pipeline for Single-Cell Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-SingleCellPipeline)
- [Whole Body of *C. elegans*](https://github.com/LewisLabUCSD/Celegans-cell2cell) | - [Obtaining patterns of cell-cell communication](https://earmingol.github.io/cell2cell/tutorials/ASD/01-Tensor-Factorization-ASD/)
- [Downstream 1: Factor-specific analyses](https://earmingol.github.io/cell2cell/tutorials/ASD/02-Factor-Specific-ASD/)
- [Downstream 2: Patterns to functions (GSEA)](https://earmingol.github.io/cell2cell/tutorials/ASD/03-GSEA-ASD/)
- [Tensor-cell2cell in Google Colab (**GPU**)](https://colab.research.google.com/drive/1T6MUoxafTHYhjvenDbEtQoveIlHT2U6_?usp=sharing)
- [Communication patterns in **Spatial Transcriptomics**](https://earmingol.github.io/cell2cell/tutorials/Tensor-cell2cell-Spatial/) |
46 |
47 | Reproducible runs of the analyses in the [Tensor-cell2cell paper](https://doi.org/10.1038/s41467-022-31369-2) are available at [CodeOcean.com](https://doi.org/10.24433/CO.0051950.v2)
48 |
49 | ## LIANA & Tensor-cell2cell
50 |
51 | Explore our tutorials for using Tensor-cell2cell with [LIANA](https://github.com/saezlab/liana-py) at [ccc-protocols.readthedocs.io](https://ccc-protocols.readthedocs.io/).
52 |
53 | ## Common Issues
54 |
55 | - **Memory Errors with Tensor-cell2cell:** If you encounter memory errors when performing tensor factorizations, try replacing `init='svd'` with `init='random'`.
56 |
57 | ## Ligand-Receptor Pairs
58 | Find a curated list of ligand-receptor pairs for your analyses at our [GitHub Repository](https://github.com/LewisLabUCSD/Ligand-Receptor-Pairs).
59 |
60 | ## Citation
61 |
62 | Please cite our work using the following references:
63 |
64 | - **cell2cell**: [Inferring a spatial code of cell-cell interactions across a whole animal body](https://doi.org/10.1371/journal.pcbi.1010715).
65 | *PLOS Computational Biology, 2022*
66 |
67 | - **Tensor-cell2cell**: [Context-aware deconvolution of cell-cell communication with Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2).
68 | *Nature Communications, 2022.*
69 |
70 | - **LIANA & Tensor-cell2cell tutorials**: [Combining LIANA and Tensor-cell2cell to decipher cell-cell communication across multiple samples](https://doi.org/10.1101/2023.04.28.538731).
71 | *bioRxiv, 2023*
--------------------------------------------------------------------------------
/docs/requirements.in:
--------------------------------------------------------------------------------
1 | mkdocs
2 | mkdocstrings[python]
3 | markdown-include
4 | mkdocs-autorefs
5 | mkdocs-gen-files
6 | mkdocs-material
7 | mkdocs-material-extensions
8 | mkdocs-jupyter
9 | mkdocstrings-python-legacy
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | #
2 | # This file is autogenerated by pip-compile with Python 3.7
3 | # by the following command:
4 | #
5 | # pip-compile requirements.in
6 | #
7 | appnope==0.1.3
8 | # via
9 | # ipykernel
10 | # ipython
11 | astunparse==1.6.3
12 | # via pytkdocs
13 | attrs==23.1.0
14 | # via jsonschema
15 | babel==2.13.1
16 | # via mkdocs-material
17 | backcall==0.2.0
18 | # via ipython
19 | beautifulsoup4==4.12.2
20 | # via nbconvert
21 | bleach==6.0.0
22 | # via nbconvert
23 | cached-property==1.5.2
24 | # via
25 | # griffe
26 | # pytkdocs
27 | certifi==2023.11.17
28 | # via requests
29 | charset-normalizer==3.3.2
30 | # via requests
31 | click==8.1.7
32 | # via mkdocs
33 | colorama==0.4.6
34 | # via
35 | # griffe
36 | # mkdocs-material
37 | debugpy==1.7.0
38 | # via ipykernel
39 | decorator==5.1.1
40 | # via ipython
41 | defusedxml==0.7.1
42 | # via nbconvert
43 | entrypoints==0.4
44 | # via jupyter-client
45 | fastjsonschema==2.19.0
46 | # via nbformat
47 | ghp-import==2.1.0
48 | # via mkdocs
49 | griffe==0.30.1
50 | # via mkdocstrings-python
51 | idna==3.4
52 | # via requests
53 | importlib-metadata==6.7.0
54 | # via
55 | # attrs
56 | # click
57 | # jsonschema
58 | # markdown
59 | # mkdocs
60 | # mkdocstrings
61 | # nbconvert
62 | # nbformat
63 | importlib-resources==5.12.0
64 | # via jsonschema
65 | ipykernel==6.16.2
66 | # via mkdocs-jupyter
67 | ipython==7.34.0
68 | # via ipykernel
69 | jedi==0.19.1
70 | # via ipython
71 | jinja2==3.1.2
72 | # via
73 | # mkdocs
74 | # mkdocs-material
75 | # mkdocstrings
76 | # nbconvert
77 | jsonschema==4.17.3
78 | # via nbformat
79 | jupyter-client==7.4.9
80 | # via
81 | # ipykernel
82 | # nbclient
83 | jupyter-core==4.12.0
84 | # via
85 | # jupyter-client
86 | # nbclient
87 | # nbconvert
88 | # nbformat
89 | jupyterlab-pygments==0.2.2
90 | # via nbconvert
91 | jupytext==1.15.2
92 | # via mkdocs-jupyter
93 | markdown==3.4.4
94 | # via
95 | # markdown-include
96 | # mkdocs
97 | # mkdocs-autorefs
98 | # mkdocs-material
99 | # mkdocstrings
100 | # pymdown-extensions
101 | markdown-include==0.8.1
102 | # via -r requirements.in
103 | markdown-it-py==2.2.0
104 | # via
105 | # jupytext
106 | # mdit-py-plugins
107 | markupsafe==2.1.3
108 | # via
109 | # jinja2
110 | # mkdocs
111 | # mkdocstrings
112 | # nbconvert
113 | matplotlib-inline==0.1.6
114 | # via
115 | # ipykernel
116 | # ipython
117 | mdit-py-plugins==0.3.5
118 | # via jupytext
119 | mdurl==0.1.2
120 | # via markdown-it-py
121 | mergedeep==1.3.4
122 | # via mkdocs
123 | mistune==3.0.2
124 | # via nbconvert
125 | mkdocs==1.5.3
126 | # via
127 | # -r requirements.in
128 | # mkdocs-autorefs
129 | # mkdocs-gen-files
130 | # mkdocs-jupyter
131 | # mkdocs-material
132 | # mkdocstrings
133 | mkdocs-autorefs==0.4.1
134 | # via
135 | # -r requirements.in
136 | # mkdocstrings
137 | mkdocs-gen-files==0.5.0
138 | # via -r requirements.in
139 | mkdocs-jupyter==0.24.3
140 | # via -r requirements.in
141 | mkdocs-material==9.2.7
142 | # via
143 | # -r requirements.in
144 | # mkdocs-jupyter
145 | mkdocs-material-extensions==1.2
146 | # via
147 | # -r requirements.in
148 | # mkdocs-material
149 | mkdocstrings[python]==0.22.0
150 | # via
151 | # -r requirements.in
152 | # mkdocstrings-python
153 | # mkdocstrings-python-legacy
154 | mkdocstrings-python==1.1.2
155 | # via mkdocstrings
156 | mkdocstrings-python-legacy==0.2.3
157 | # via -r requirements.in
158 | nbclient==0.7.4
159 | # via nbconvert
160 | nbconvert==7.6.0
161 | # via mkdocs-jupyter
162 | nbformat==5.8.0
163 | # via
164 | # jupytext
165 | # nbclient
166 | # nbconvert
167 | nest-asyncio==1.5.8
168 | # via
169 | # ipykernel
170 | # jupyter-client
171 | packaging==23.2
172 | # via
173 | # ipykernel
174 | # mkdocs
175 | # nbconvert
176 | paginate==0.5.6
177 | # via mkdocs-material
178 | pandocfilters==1.5.0
179 | # via nbconvert
180 | parso==0.8.3
181 | # via jedi
182 | pathspec==0.11.2
183 | # via mkdocs
184 | pexpect==4.8.0
185 | # via ipython
186 | pickleshare==0.7.5
187 | # via ipython
188 | pkgutil-resolve-name==1.3.10
189 | # via jsonschema
190 | platformdirs==4.0.0
191 | # via mkdocs
192 | prompt-toolkit==3.0.41
193 | # via ipython
194 | psutil==5.9.6
195 | # via ipykernel
196 | ptyprocess==0.7.0
197 | # via pexpect
198 | pygments==2.17.1
199 | # via
200 | # ipython
201 | # mkdocs-jupyter
202 | # mkdocs-material
203 | # nbconvert
204 | pymdown-extensions==10.2.1
205 | # via
206 | # mkdocs-material
207 | # mkdocstrings
208 | pyrsistent==0.19.3
209 | # via jsonschema
210 | python-dateutil==2.8.2
211 | # via
212 | # ghp-import
213 | # jupyter-client
214 | pytkdocs==0.16.1
215 | # via mkdocstrings-python-legacy
216 | pytz==2023.3.post1
217 | # via babel
218 | pyyaml==6.0.1
219 | # via
220 | # jupytext
221 | # mkdocs
222 | # pymdown-extensions
223 | # pyyaml-env-tag
224 | pyyaml-env-tag==0.1
225 | # via mkdocs
226 | pyzmq==25.1.1
227 | # via
228 | # ipykernel
229 | # jupyter-client
230 | regex==2022.10.31
231 | # via mkdocs-material
232 | requests==2.31.0
233 | # via mkdocs-material
234 | six==1.16.0
235 | # via
236 | # astunparse
237 | # bleach
238 | # python-dateutil
239 | soupsieve==2.4.1
240 | # via beautifulsoup4
241 | tinycss2==1.2.1
242 | # via nbconvert
243 | toml==0.10.2
244 | # via jupytext
245 | tornado==6.2
246 | # via
247 | # ipykernel
248 | # jupyter-client
249 | traitlets==5.9.0
250 | # via
251 | # ipykernel
252 | # ipython
253 | # jupyter-client
254 | # jupyter-core
255 | # matplotlib-inline
256 | # nbclient
257 | # nbconvert
258 | # nbformat
259 | typing-extensions==4.7.1
260 | # via
261 | # importlib-metadata
262 | # jsonschema
263 | # markdown-it-py
264 | # mkdocs
265 | # mkdocstrings
266 | # platformdirs
267 | # pytkdocs
268 | urllib3==2.0.7
269 | # via requests
270 | watchdog==3.0.0
271 | # via mkdocs
272 | wcwidth==0.2.10
273 | # via prompt-toolkit
274 | webencodings==0.5.1
275 | # via
276 | # bleach
277 | # tinycss2
278 | wheel==0.42.0
279 | # via astunparse
280 | zipp==3.15.0
281 | # via
282 | # importlib-metadata
283 | # importlib-resources
284 |
285 | # The following packages are considered to be unsafe in a requirements file:
286 | # setuptools
--------------------------------------------------------------------------------
/docs/tutorials/ASD/figures/4d-tensor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/4d-tensor.png
--------------------------------------------------------------------------------
/docs/tutorials/ASD/figures/tensor-approx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tensor-approx.png
--------------------------------------------------------------------------------
/docs/tutorials/ASD/figures/tensor-factorization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tensor-factorization.png
--------------------------------------------------------------------------------
/docs/tutorials/ASD/figures/tf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tf.png
--------------------------------------------------------------------------------
/docs/tutorials/ASD/results/Loadings.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/results/Loadings.xlsx
--------------------------------------------------------------------------------
/examples/tensor_cell2cell/PreBuiltMetadata-PBMC.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/examples/tensor_cell2cell/PreBuiltMetadata-PBMC.pkl
--------------------------------------------------------------------------------
/examples/tensor_cell2cell/PreBuiltTensor-PBMC.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/examples/tensor_cell2cell/PreBuiltTensor-PBMC.pkl
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: cell2cell
2 | site_description: "Python package to infer cell-cell interactions and communication from gene expression of interacting proteins."
3 |
4 | theme:
5 | name: readthedocs
6 | highlightjs: true
7 | repo_url: https://github.com/earmingol/cell2cell
8 | plugins:
9 | - search
10 | - mkdocs-jupyter:
11 | execute: false
12 | include: ["*.ipynb"]
13 | include_source: True
14 | #ignore_h1_titles: True
15 | - mkdocstrings:
16 | watch:
17 | - cell2cell
18 | default_handler: python
19 | handlers:
20 | python:
21 | options:
22 | docstring_style: sphinx
23 |
24 | markdown_extensions:
25 | - def_list
26 | - attr_list
27 | - admonition
28 | - codehilite
29 | - pymdownx.tasklist:
30 | custom_checkbox: true
31 | - md_in_html
32 | - pymdownx.superfences
33 | - pymdownx.betterem
34 | - pymdownx.caret
35 | - pymdownx.mark
36 | - pymdownx.tilde
37 | - pymdownx.highlight:
38 | anchor_linenums: true
39 | - pymdownx.inlinehilite
40 | - pymdownx.snippets
41 | - pymdownx.superfences
42 | - pymdownx.tabbed:
43 | alternate_style: true
44 |
45 | nav:
46 | - "Home": index.md
47 | - "API Documentation": documentation.md
48 | - "cell2cell Tutorials":
49 | - tutorials/Toy-Example-BulkPipeline.ipynb
50 | - tutorials/Toy-Example-SingleCellPipeline.ipynb
51 | - "Tensor-cell2cell Tutorials":
52 | - tutorials/ASD/01-Tensor-Factorization-ASD.ipynb
53 | - tutorials/ASD/02-Factor-Specific-ASD.ipynb
54 | - tutorials/ASD/03-GSEA-ASD.ipynb
55 | - tutorials/Tensor-cell2cell-Spatial.ipynb
56 | - tutorials/GPU-Example.ipynb
57 |
--------------------------------------------------------------------------------
/release/0.5.10-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.10
2 |
3 | ## New features
4 | - Added ```cell2cell.plotting.factor_plot.ccc_networks_plot()``` to visualize factor-specific
5 | CCC networks obtained from a tensor decomposition with Tensor-cell2cell
6 | - Added Gini coefficient in ```cell2cell.stats.gini.gini_coefficient()``` and
7 | ```cell2cell.analysis.tensor_downstream.compute_gini_coefficients()```
8 |
9 | ## Feature updates
10 | - In the analysis ```cell2cell.analysis.SingleCellInteractions.permute_cell_labels()```
11 | the score computed without permutation is now considered as part of the permutation
12 | distribution for computing P-values. So if 100 permutations are intended, the analysis
13 | should be done with 99 permutation since the original score would be the 100th element.
14 | - In the same analysis above, now the ```randomized_score``` list is converted to a numpy.array once
15 | instead of each iteration in the last foor loop (Line 704). This helps accelerate the analysis.
16 |
17 | ## Fixed Bugs
18 | - Fixed bug in ```cell2cell.plotting.tensor_plot.tensor_factors_plot_from_loadings()```
19 | associated with the metadata when it was None.
20 | - Fixed bug in ```cell2cell.plotting.tensor_plot.tensor_factors_plot_from_loadings()```
21 | that was preventing to use a tensor with one dimension.
22 | - ```cell2cell.plotting.factor_plot.context_boxplot()```
23 | that was preventing to use a decomposition into just one factor.
24 | - Fixed bug when using communication_score = 'expression_gmean' in cell2cell pipelines
--------------------------------------------------------------------------------
/release/0.5.11-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.11
2 |
3 | ## New features
4 | - Created a new function to use external communication scores, generated with other tools. This function can be found in
5 | ```cell2cell.tensor.external_scores.dataframes_to_tensor()```.
6 | - Added ```cell2cell.tensor.tensor.BaseTensor.loc_nans```, ```cell2cell.tensor.tensor.BaseTensor.loc_zeros```, and same attributes in
7 | heirs tensor classes to keep track of values assigned with NaNs and with real zeros, respectively.
8 | - ```cell2cell.tensor.external_scores.dataframes_to_tensor()``` also incorporates the previous point to keep track
9 | of NaNs and real zeros when using external communication scores.
10 | - Added ```lr_fill``` and ```cell_fill``` parameters to ```cell2cell.tensor.external_scores.dataframes_to_tensor()```
11 |
12 | ## Feature updates
13 | - Added two new options to the parameter ```how``` in ```cell2cell.tensor.build_context_ccc_tensor()```.
14 | They are: ```how='outer_genes'``` and ```how='outer_cells'``` . These new options were also extended to all InteractionTensors
15 | derived from ```cell2cell.tensor.tensor.BaseTensor```.
16 | - These options of how were also extended to the new function ```cell2cell.tensor.external_scores.dataframes_to_tensor()```,
17 | but here implemented as ```how='outer_lrs'``` and ```how='outer_cells'```.
18 | - Implemented multiple to options to aggregate gene expression of protein complexes. Available options are using the
19 | minimum expression or the average expression among the subunits. This can be controlled with the parameter
20 | ```complex_agg_method='min'``` or ```complex_agg_method='mean'``` when creating a ```cell2cell.tensor.InteractionTensor```,
21 | ```cell2cell.core.InteractionSpace```, ```cell2cell.analysis.BulkInteractions``` pipeline, or ```cell2cell.analysis.SingleCellInteractions``` pipeline.
22 | - The previous point relies on the function ```cell2cell.preprocessing.rnaseq.add_complexes_to_expression()``` through
23 | the parameter ```agg_method='min'``` or ```agg_method='mean'```
24 | - Added parameter ```cbar_label``` to the function ```cell2cell.plotting.factor_plot.loading_clustermap()```
25 | to personalize the title of the color bar.
26 | - Added parameter ```manual_elbow``` to ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` to manually specify
27 | the elbow to highlight.
28 |
29 | ## Fixed Bugs
30 | - Renamed ```cell2cell.plotting.circos_plot``` into ```cell2cell.plotting.circular_plot``` to avoid incompatibility with
31 | function ```cell2cell.plotting.circos_plot.circos_plot()``` that is directly imported under ```cell2cell.plotting```
--------------------------------------------------------------------------------
/release/0.5.4-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.4
2 |
3 | ## New features
4 |
5 | - Implemented a gradient of colors for continuous numbers in the function ```cell2cell.plotting.aesthetics.get_colors_from_labels()```
6 | - Added function ```excluded_value_fraction()``` in the class ```InteractionTensor```
7 | - Implemented reordering of elements in a dimension of the tensor when plotting their loadings from the decomposition
8 | in the function ```cell2cell.plotting.tensor_plot.tensor_factors_plot()``` under the parameter ```reorder_elements```.
9 | - Changed tensor objects and implemented a function to normalize loadings to unit Euclidean length under
10 | the parameter ```normalize_loadings``` in method ```compute_tensor_factorization``` of the class ```BaseTensor``` and others such as ```InteractionTensor```.
11 | - Implemented attribute ```explained_variance_ratio_``` in a tensor object. Only outputs values when using ```normalize_loadings=True```.
12 | - Added ```explained_variance_``` attribute to tensor objects.
13 | - Implemented ```explained_variance``` in tensor objects to compute the ```explained_variance_``` attribute. Inspired
14 | by ```sklearn.metric.explained_variance_score```.
15 |
16 | ## Dependency Update
17 |
18 | - matplotlib >= 3.2.0
19 | - seaborn >= 0.11.0
--------------------------------------------------------------------------------
/release/0.5.5-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.5
2 |
3 | ## Fixed Bugs
4 |
5 | - Fixed bug of computing factorization error when using a GPU and a tensor without masked values.
6 | See line 180 in ```cell2cell.tensor.tensor.py```, and lines 151 and 222 of ```cell2cell.tensor.factorization.py```
--------------------------------------------------------------------------------
/release/0.5.6-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.6
2 |
3 | ## New features
4 |
5 | - Implemented an analysis for obtaining UMAP embeddings from a gene expression matrix. It is
6 | found in ```cell2cell.external.umap.run_umap()```.
7 | - Implemented UMAP biplot to visualize UMAP embeddings. It is found in
8 | ```cell2cell.plotting.umap_plot.umap_biplot()```
9 | - Implemented functions to subset an InteractionTensor by lists of names of the elements in any
10 | of the tensor dimensions.
11 | - Function ```cell2cell.tensor.subset.subset_tensor()``` to subset an InteractionTensor,
12 | powered by the function ```cell2cell.tensor.subset.find_element_indexes()``` to find
13 | where each element name is located in the tensor.
14 | - Function ```cell2cell.tensor.subset.subset_metadata()``` to subset the metadata generated with
15 | ```cell2cell.tensor.tensor.generate_tensor_metadata()```. It makes the metadata to contain only elements
16 | contained in an InteractionTensor of reference.
17 |
18 | ## Dependency Update
19 |
20 | - umap-learn
--------------------------------------------------------------------------------
/release/0.5.7-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.7
2 |
3 | ## New features
4 |
5 | - Added geometric mean as a communication score in ```cell2cell.core.communication_scores```
6 | - Added the parameter ```var_ordered_factors``` in
7 | ```cell2cell.tensor.BaseTensor.compute_factorization()``` to decide whether reordering
8 | the factors by the variance they explain (in a descending order).
9 | - Made the parameter ```normalize_loadings=True``` as default in
10 | ```cell2cell.tensor.BaseTensor.compute_factorization()```
11 | - Added an option to plot the loadings of a tensor factorization directly from a
12 | factors object (an OrderedDict usually found in ```cell2cell.tensor.BaseTensor.factors```).
13 | It can be done with the function ```cell2cell.plotting.tensor_factors_plot_from_loadings()```
14 | - To complement the previous point, added a function to import factors from an Excel file
15 | previously exported with ```cell2cell.tensor.BaseTensor.export_factors(filename)```. To import the
16 | factors, use ```cell2cell.io.load_tensor_factors(filename)```.
17 |
18 | ## Fixed Bugs
19 |
20 | - Fixed minor bugs in functions ```cell2cell.external.umap.run_umap()``` and
21 | ```cell2cell.plotting.umap_plot.umap_biplot()```
--------------------------------------------------------------------------------
/release/0.5.8-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.8
2 |
3 | ## New features
4 | - Implemented communication score 'expression_gmean' in all pipelines of cell2cell (cell2cell.analysis.pipelines)
5 | - Updated documentation of regular cell2cell
6 | - Implemented **downstream analyses for Tensor-cell2cell**, available in ```cell2cell.analysis.tensor_downstream``` and
7 | associated plots in ```cell2cell.plotting.factor_plot```
8 | - Implemented the **CorrIndex metric** to compare two tensor decompositions of similar tensors, available in
9 | ```cell2cell.tensor.metrics```
10 | - Implemented a function to export networks to be read in Cytoscape. It can be called as
11 | ```cell2cell.utils.networks.export_network_to_cytoscape()```
12 | - Renamed ```cell2cell.plotting.dot_plot.py``` into ```cell2cell.plotting.pval_plot.py``` and included a new function
13 | to perform the dot plots with any input. The original function is ```cell2cell.plotting.pval_plot.dot_plot()```, which
14 | takes a ```cell2cell.analysis.pipelines.SingleCellInteractions``` object, while the new function is
15 | ```cell2cell.plotting.pval_plot.generate_dot_plot()```, which takes any pair of dataframes of P-values and scores.
16 |
17 | ## Fixed Bugs
18 | - Fixed bugs in triangular clustermap in ```cell2cell.plotting.cci_plot```
19 | - Fixed bug associated with duplicated gene names when building a ```cell2cell.tensor.InteractionTensor```
20 |
21 | ## Dependency Update
22 | - Added statannotations
--------------------------------------------------------------------------------
/release/0.5.9-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.9
2 |
3 | ## Fixed Bugs
4 | - This version was created to fix issues when importing version 0.5.8.
5 | - For changes in this version, see [notes of version 0.5.8](0.5.8-notes.md)
--------------------------------------------------------------------------------
/release/0.6.0-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.0
2 |
3 | ## New features
4 | - Added 'gmean' as method to compute expression of protein complexes.
5 | It involves function ```cell2cell.preprocessing.rnaseq.add_complexes_to_expression()```
6 | and all objects calling it.
7 | - Added new parameters for improving robustness of tensor factorization. These are
8 | ```n_iter_max``` and ```tol```. Higher n_iter_max and lower tol retrieves better optimal
9 | solutions, but at the expense of more running time. Available in:
10 | ```cell2cell.tensor.factorization._compute_tensor_factorization()```
11 | and in ```cell2cell.tensor.tensor.BaseTensor.compute_tensor_factorization()``` and all heir classes.
12 | - Similar to the previous point, the parameter ```svd``` was added to these functions. This allows to control
13 | the type of svd method to use when using ```init='svd'```. See documentation for more information.
14 | - Added new methods/options for running a tensor decomposition in ```cell2cell.tensor.factorization._compute_tensor_factorization()```
15 | and in ```cell2cell.tensor.tensor.BaseTensor.compute_tensor_factorization()``` and all heir classes.
16 | This can be controlled with the parameter ```tf_type```. See documentation for
17 | more options.
18 | - Added option to do a deep copy of any tensor of the class ```cell2cell.tensor.tensor.BaseTensor``` and its
19 | heir classes. Available through ```BaseTensor.copy()```.
20 | - Added new CCI score based on ICELLNET (```cell2cell.core.cci_scores```). Available in the functions
21 | of the regular cell2cell tool (```cell2cell.core.interaction_space```, ```cell2cell.analysis.pipelines.BulkInteractions```,
22 | and ```cell2cell.analysis.pipelines.SingleCellInteractions```)
23 | - Added new function to handle duplicate elements ```cell2cell.preprocessing.find_elements.find_duplicates()```
24 | - Modified functions in ```cell2cell.tensor.subset``` to handle duplicate elements
25 | - Added new function to concatenate InteractionTensors: ```cell2cell.tensor.tensor_manipulation.concatenate_interaction_tensors()```
26 |
27 | ## Feature updates
28 | - Updated dependency version of tensorly to 0.7.0
29 |
30 | ## Fixed Bugs
31 | - Fixed bug of return_errors in tensor decomposition using regular non_negative_parafac.
32 | New version of tensorly returns decomposition and error as a tuple in other decomposition methods.
33 | - Fixed bug of changing diagonal values of the input matrix to zeros when using ```cell2cell.plotting.cci_plot.clustermap_cci```
--------------------------------------------------------------------------------
/release/0.6.1-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.1
2 |
3 | ## New features
4 | - Implemented the option to filter for cells/genes/lr pairs that are present in a given
5 | fraction of samples/contexts in addition to using the union or intersection to build a
6 | tensor derived from `BaseTensor`. This can be controlled with the parameter `outer_fraction`
7 | in the classes/functions available in `cell2cell.tensor.tensor` and `cell2cell.tensor.external_scores`.
8 | - Added method `sparsity_fraction()` to `cell2cell.tensor.tensor.BaseTensor`, which computes the fraction of
9 | values in the tensor that are real zeros.
10 | - Added method `missing_fraction()` to `cell2cell.tensor.tensor.BaseTensor`, which computes the fraction of
11 | values in the tensor that are missing or NaNs.
12 |
13 | ## Feature updates
14 | - `cellcell2.stats.permutation.compute_pvalue_from_dist()` ignores NaN values.
15 |
16 | ## Fixed Bugs
17 | - Fixed bug of `cell2cell.tensor.concatenate_interaction_tensors()` that did not allow
18 | concatenating tensors when using a tensorly backend different to numpy.
19 | - Fixed bug to deal with GPU tensors in `cell2cell.tensor.tensor.PreBuiltTensor`
20 | - Fixed bug about dimension labelling in `cell2cell.tensor.tensor.PreBuiltTensor`
--------------------------------------------------------------------------------
/release/0.6.2-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.2
2 |
3 | ## New features
4 | - Added a parameter `output_fig` to ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` that
5 | allows deciding whether to generate the figure. If `output_fig=False`, the outputs of this function
6 | will be `(None, loss)`.
7 | - Created ```cell2cell.preprocessing.signal``` to include functions such as ```smooth_curve()```
8 | to smooth a set of values representing a curve.
9 | - Implemented curve smoothing for the elbow analysis of Tensor-cell2cell. It can be accessed wit the parameter
10 | ```smooth=True``` in ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` or in any other InteractionTensor.
11 | - Implemented curve smoothing for the elbow plot designed for multiple runs.
12 | It can be accessed wit the parameter
13 | ```smooth=True``` in ```cell2cell.plotting.tensor_plot.plot_multiple_run_elbow()```.
14 | - Implemented ```cell2cell.tensor.metrics.pairwise_correlation_index()``` to compute the CorrIndex
15 | between all pairs of tensor decompositions in a list.
16 | - Implemented elbow analysis based on similarity of multiple runs. This can be control with the option
17 | ```metric='similarity'``` in ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` or in any other InteractionTensor.
18 | Use ```metric='error'``` for the normalized error used in previous versions.
19 |
20 | ## Feature updates
21 | - Modified the way to compute normalized error of tensor decomposition in
22 | ```cell2cell.tensor.factorization._compute_norm_error()```
23 | - Added the option to directly pass a `ylabel` to the elbow plots, including:
24 | ```cell2cell.plotting.tensor_plot.plot_elbow()``` and ```cell2cell.plotting.tensor_plot.plot_multiple_run_elbow()```
25 | - Extended input parameters of ```cell2cell.tensor.factorization._compute_elbow()```
26 |
27 |
28 | ## Fixed Bugs
29 | -
--------------------------------------------------------------------------------
/release/0.6.3-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.3
2 |
3 | ## New features
4 | - Created ```cell2cell.analysis.tensor_pipelines``` with functions to easily run
5 | Tensor-cell2cell.
6 |
7 | ## Feature updates
8 | - Deleted ```cell2cell.external.tensorly_nn_cp``` since it is not used anymore.
9 | Tensorly is directly used instead.
10 | - Renamed ```cell2cell.analysis.pipelines``` to ```cell2cell.analysis.cell2cell_pipelines```
11 | - Added ```elbow_metric```, ```elbow_metric_mean``` and ```elbow_metric_raw``` attributes to ```cell2cell.tensor.tensor.BaseTensor```
12 | for storing the curve generated from the elbow analysis.
13 | - Removed parameter ```loc_zeros``` from ```cell2cell.tensor.tensor.PreBuiltTensor```
14 |
15 | ## Fixed Bugs
16 | - Converted factors to numpy in ```cell2cell.tensor.factorization._multiple_runs_elbow_analysis()```
17 | when ```metric='similarity'``` to avoid errors when using GPU.
18 | - Fixed error obtained with functions ```sparsity_fraction()``` and ```missing_fraction()``` in a ```cell2cell.tensor.tensor.BaseTensor``` when
19 | tensorly backend is different to numpy and the device is nto a CPU. This error was fixed with
20 | making loc_nans and loc_zeros attributes of ```cell2cell.tensor.tensor.InteractionTensor``` and ```cell2cell.tensor.tensor.PreBuiltTensor```
21 | to be now a tensorly.tensor object.
--------------------------------------------------------------------------------
/release/0.6.4-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.4
2 |
3 | ## New features
4 | - Added a the ```cell2cell.io.read_data.load_tensor()``` function to directly load a previously
5 | exported ```interaction_tensor``` variable generated with Tensor-cell2cell.
6 | - Added a new dataset from a COVID-19 study. Available in ```cell2cell.datasets.anndata.balf_covid()```.
7 | - Added functions to create and explore directories in ```cell2cell.io.directories```.
8 | - Added ```cell2cell.io.read_data.load_tables_from_directory()``` to load all tables or dataframes with the same
9 | extension that are located in such directory.
10 |
11 | ## Feature updates
12 | - Modified ```sparsity_fraction()``` and ```missing_fraction()``` methods of ```cell2cell.tensor.tensor.BaseTensor``` to return
13 | the item in the tensorly tensor object.
14 | - Added progress bar to ```cell2cell.tensor.external_scores.dataframes_to_tensor()```.
15 | - Added the option to specify the ```backend``` when running ```cell2cell.analysis.tensor_pipelines.run_tensor_cell2cell_pipeline()```.
16 |
17 | ## Fixed Bugs
18 | - Implemented a way to manage duplicated instances of a LR comm score in ```cell2cell.tensor.external_scores.dataframes_to_tensor()```.
19 | It can be controled through the parameter ```dup_aggregation```.
--------------------------------------------------------------------------------
/release/0.6.5-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.5
2 |
3 | ## New features
4 | - Implemented in-house GSEA using gseapy. Associated code is located in
5 | `cell2cell.datasets.gsea_data` and `cell2cell.external.gseapy`.
6 | - Implemented a function to obtain a dataframe of lr pairs by cell pairs from a tensor decomposition
7 | result, so it can be use to make a plot. It can compute a communication score that is by factor or
8 | across all factors. See function `cell2cell.analysis.tensor_downstream.get_lr_by_cell_pairs()`.
9 |
10 | ## Feature updates
11 | - Added the axis names to the dataframe generated with ```cell2cell.analysis.tensor_downstream.get_joint_loadings()```,
12 | which correspond to the `dim1` and `dim2` parameters.
13 | - Added the axis labels (`cm.ax_heatmap.set_xlabel()` & (`cm.ax_heatmap.set_ylabel()`) using the dataframe axis names
14 | passed to ```cell2cell.plotting.factor_plot.loading_clustermap()```
15 |
16 | ## Fixed Bugs
17 | -
--------------------------------------------------------------------------------
/release/0.6.6-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.6
2 |
3 | ## New features
4 | - Added new attributes and methods to `cell2cell.tensor.tensor.BaseTensor` and any other
5 | derived class, including `BaseTensor.shape`, `BaseTensor.write_file()`, `BaseTensor.to_device()`.
6 | These new features are respectively for:
7 | - Passing the shape of the tensor directly (instead of `BaseTensor.tensor.shape`)
8 | - Export or save a tensor object to a file.
9 | - Change the device for running Tensor-cell2cell (e.g. 'cpu', 'cuda', etc.)
10 | -
11 | ## Feature updates
12 | - Added **kwargs as parameter of `cell2cell.analysis.tensor_pipelines.run_tensor_cell2cell_pipeline()`
13 | to directly pass parameters to the functions running the elbow analysis and the tensor decomposition.
14 | - Sort factors numerically in `cell2cell.external.gseapy.run_gsea()`.
15 |
16 | ## Fixed Bugs
17 | -
--------------------------------------------------------------------------------
/release/0.6.7-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.7
2 |
3 | ## New features
4 | - Direct access to `interaction_elements` attribute from `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions`
5 | and `cell2cell.analysis.cell2cell_pipelines.BulkInteractions`
6 | - Added option to store GMT file in output_folder in `cell2cell.external.gseapy`
7 |
8 | ## Feature updates
9 | - Removed tqdm for jupyter notebooks.
10 | - Updated tensorly version from 0.7.0 to 0.8.1
11 |
12 | ## Fixed Bugs
13 | - Modified permutations in `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions.permute_cell_labels()`
--------------------------------------------------------------------------------
/release/0.6.8-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.6.8
2 |
3 | ## New features
4 |
5 |
6 | ## Feature updates
7 |
8 |
9 | ## Fixed Bugs
10 | - Fixed bug that was skipping first factor to generate outputs from `cell2cell.external.gseapy.run_gsea()`
--------------------------------------------------------------------------------
/release/0.7.0-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.7.0
2 |
3 | ## New features
4 | - Added `cell2cell.spatial` functions for enabling analyses considering spatial organization in spatial data.
5 | These functions include:
6 | - Filtering by intercellular distances by thresholding values (`cell2cell.spatial.distances` and `cell2cell.spatial.filtering`).
7 | - Dividing the tissue in square grids (`cell2cell.spatial.neighborhoods.create_spatial_grid()`)
8 | - Dividing the tissue in moving windows (`cell2cell.spatial.neighborhoods.create_moving_windows()`, `cell2cell.spatial.neighborhoods.calculate_window_size()`,
9 | and `cell2cell.spatial.neighborhoods.add_moving_window_info_to_adatae()`)
10 |
11 | ## Feature updates
12 |
13 |
14 | ## Fixed Bugs
15 | - Fixed bug that made to incorrectly visualize multiple legends in plots as for example in `cell2cell.plotting.tensor_plot`
16 | when using newer matplotlib versions.
--------------------------------------------------------------------------------
/release/0.7.1-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.7.1
2 |
3 | ## New features
4 | - Refer to [v0.7.0 notes](./0.7.0-notes.md) to see the new features. This is a quick fix of that version.
5 |
6 | ## Feature updates
7 | - Renamed `cell2cell.spatial.neighborhoods.create_moving_windows()` and
8 | and `cell2cell.spatial.neighborhoods.add_moving_window_info_to_adata()` into
9 | `cell2cell.spatial.neighborhoods.create_sliding_windows()` and
10 | and `cell2cell.spatial.neighborhoods.add_sliding_window_info_to_adata()` respectively.
11 |
12 | ## Fixed Bugs
13 |
--------------------------------------------------------------------------------
/release/0.7.2-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.7.2
2 |
3 | ## New features
4 | - Refer to [v0.7.0 notes](./0.7.0-notes.md) & [v0.7.1 notes](./0.7.1-notes.md) to see the new features. This is a quick fix of that version.
5 |
6 | ## Feature updates
7 |
8 | ## Fixed Bugs
9 | - Updated export of factor loadings in Tensor-cell2cell for compatibility with newer verions of `pandas`.
--------------------------------------------------------------------------------
/release/0.7.3-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.7.3
2 |
3 | ## New features
4 | - Refer to [v0.7.2 notes](./0.7.2-notes.md) to see the previous new features.
5 | - Added example of using Spatial Data with Tensor-cell2cell.
6 |
7 | ## Feature updates
8 | - Updated single-cell data example with cell2cell to use COVID-19 data.
9 | - Updated bulk data example with cell2cell.
10 | - Updated `docs` folder for readthedocs.org.
11 | - Updated README.md
12 |
13 | ## Fixed Bugs
14 | - Fixed legend visualization in `cell2cell.plotting.pcoa_plot.pcoa_3dplot()`
15 | - Fixed negative P-values in `cell2cell.stats.permutation.compute_pvalue_from_dist()`
16 | - Fixed permutation analysis in `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions`
17 | - Fixed legend visualization in `cell2cell.plotting.circular_plot.circos_plot()`
--------------------------------------------------------------------------------
/release/0.7.4-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.7.4
2 |
3 | ## New features
4 | - Refer to [v0.7.3 notes](./0.7.3-notes.md) to see the previous new features.
5 |
6 | ## Feature updates
7 | - Updated assert warnings for Tensor-cell2cell
8 |
9 | ## Fixed Bugs
10 | - Fixed set indexing that was deprecated in new pandas versions (in `cell2cell.preprocessing.rnaseq.add_complexes_to_expression()`)
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ----------------------------------------------------------------------------
4 | # Copyright (c) 2019--, Cell2cell development team.
5 | #
6 | # Distributed under the terms of the BSD 3-Clause License.
7 | #
8 | # The full license is in the file LICENSE, distributed with this software.
9 | # ----------------------------------------------------------------------------
10 |
11 | from setuptools.command.egg_info import egg_info
12 | from setuptools.command.develop import develop
13 | from setuptools.command.install import install
14 | import re
15 | import ast
16 | import os
17 | from setuptools import find_packages, setup
18 |
19 | # Dealing with Cython
20 | USE_CYTHON = os.environ.get('USE_CYTHON', False)
21 | ext = '.pyx' if USE_CYTHON else '.c'
22 |
23 |
24 | def custom_command():
25 | import sys
26 | if sys.platform in ['darwin', 'linux']:
27 | os.system('pip install numpy')
28 |
29 | class CustomInstallCommand(install):
30 | def run(self):
31 | install.run(self)
32 | custom_command()
33 |
34 | class CustomDevelopCommand(develop):
35 | def run(self):
36 | develop.run(self)
37 | custom_command()
38 |
39 | class CustomEggInfoCommand(egg_info):
40 | def run(self):
41 | egg_info.run(self)
42 | custom_command()
43 |
44 |
45 | extensions = [
46 | ]
47 |
48 | if USE_CYTHON:
49 | from Cython.Build import cythonize
50 | extensions = cythonize(extensions)
51 |
52 | classes = """
53 | Development Status :: 2 - Pre-Alpha
54 | License :: OSI Approved :: BSD License
55 | Topic :: Software Development :: Libraries
56 | Topic :: Scientific/Engineering
57 | Topic :: Scientific/Engineering :: Bio-Informatics
58 | Programming Language :: Python :: 3
59 | Programming Language :: Python :: 3 :: Only
60 | Operating System :: Unix
61 | Operating System :: POSIX
62 | Operating System :: MacOS :: MacOS X
63 | """
64 | classifiers = [s.strip() for s in classes.split('\n') if s]
65 |
66 | description = ('TBD')
67 |
68 | with open('README.md') as f:
69 | long_description = f.read()
70 |
71 | _version_re = re.compile(r'__version__\s+=\s+(.*)')
72 |
73 | with open('cell2cell/__init__.py', 'rb') as f:
74 | hit = _version_re.search(f.read().decode('utf-8')).group(1)
75 | version = str(ast.literal_eval(hit))
76 |
77 | setup(name='cell2cell',
78 | version=version,
79 | license='BSD-3-Clause',
80 | description=description,
81 | long_description_content_type="text/markdown",
82 | long_description=long_description,
83 | author="cell2cell development team",
84 | author_email="earmingo@ucsd.edu",
85 | maintainer="cell2cell development team",
86 | maintainer_email="earmingol@eng.ucsd.edu",
87 | packages=find_packages(),
88 | ext_modules=extensions,
89 | install_requires=['numpy >= 1.16',
90 | 'pandas >= 1.0.0',
91 | 'xlrd >= 1.1',
92 | 'openpyxl >= 2.6.2',
93 | 'networkx >= 2.3',
94 | 'matplotlib >= 3.2.0',
95 | 'seaborn >= 0.11.0',
96 | 'scikit-learn',
97 | 'umap-learn',
98 | 'tqdm',
99 | 'statsmodels',
100 | 'statannotations',
101 | 'tensorly == 0.8.1',
102 | 'kneed',
103 | 'scanpy',
104 | 'gseapy == 1.0.3'
105 | ],
106 | classifiers=classifiers,
107 | entry_points={},
108 | package_data={},
109 | cmdclass={'install': CustomInstallCommand,
110 | 'develop': CustomDevelopCommand,
111 | 'egg_info': CustomEggInfoCommand, },
112 | zip_safe=False)
--------------------------------------------------------------------------------