├── .github
    └── workflows
    │   └── python-publish.yml
├── .readthedocs.yml
├── LICENSE.txt
├── Logo.png
├── LogoTensor.png
├── README.md
├── benchmarks
    ├── __init__.py
    └── benchmarks.py
├── cell2cell
    ├── __init__.py
    ├── analysis
    │   ├── __init__.py
    │   ├── cell2cell_pipelines.py
    │   ├── tensor_downstream.py
    │   └── tensor_pipelines.py
    ├── clustering
    │   ├── __init__.py
    │   └── cluster_interactions.py
    ├── core
    │   ├── __init__.py
    │   ├── cci_scores.py
    │   ├── cell.py
    │   ├── communication_scores.py
    │   └── interaction_space.py
    ├── datasets
    │   ├── __init__.py
    │   ├── anndata.py
    │   ├── gsea_data.py
    │   ├── heuristic_data.py
    │   ├── random_data.py
    │   └── toy_data.py
    ├── external
    │   ├── __init__.py
    │   ├── goenrich.py
    │   ├── gseapy.py
    │   ├── pcoa.py
    │   ├── pcoa_utils.py
    │   └── umap.py
    ├── io
    │   ├── __init__.py
    │   ├── directories.py
    │   ├── read_data.py
    │   └── save_data.py
    ├── plotting
    │   ├── __init__.py
    │   ├── aesthetics.py
    │   ├── ccc_plot.py
    │   ├── cci_plot.py
    │   ├── circular_plot.py
    │   ├── factor_plot.py
    │   ├── pcoa_plot.py
    │   ├── pval_plot.py
    │   ├── tensor_plot.py
    │   └── umap_plot.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── cutoffs.py
    │   ├── find_elements.py
    │   ├── gene_ontology.py
    │   ├── integrate_data.py
    │   ├── manipulate_dataframes.py
    │   ├── ppi.py
    │   ├── rnaseq.py
    │   └── signal.py
    ├── spatial
    │   ├── __init__.py
    │   ├── distances.py
    │   ├── filtering.py
    │   └── neighborhoods.py
    ├── stats
    │   ├── __init__.py
    │   ├── enrichment.py
    │   ├── gini.py
    │   ├── multitest.py
    │   └── permutation.py
    ├── tensor
    │   ├── __init__.py
    │   ├── external_scores.py
    │   ├── factor_manipulation.py
    │   ├── factorization.py
    │   ├── metrics.py
    │   ├── subset.py
    │   ├── tensor.py
    │   └── tensor_manipulation.py
    └── utils
    │   ├── __init__.py
    │   ├── networks.py
    │   └── parallel_computing.py
├── docs
    ├── documentation.md
    ├── index.md
    ├── requirements.in
    ├── requirements.txt
    └── tutorials
    │   ├── ASD
    │       ├── 01-Tensor-Factorization-ASD.ipynb
    │       ├── 02-Factor-Specific-ASD.ipynb
    │       ├── 03-GSEA-ASD.ipynb
    │       ├── KEGG.gmt
    │       ├── figures
    │       │   ├── 4d-tensor.png
    │       │   ├── tensor-approx.png
    │       │   ├── tensor-factorization.png
    │       │   └── tf.png
    │       └── results
    │       │   └── Loadings.xlsx
    │   ├── GPU-Example.ipynb
    │   ├── Tensor-cell2cell-Spatial.ipynb
    │   ├── Toy-Example-BulkPipeline.ipynb
    │   └── Toy-Example-SingleCellPipeline.ipynb
├── examples
    ├── cell2cell
    │   ├── Human-2020-Jin-LR-pairs.csv
    │   ├── Toy-Example-BulkPipeline.ipynb
    │   ├── Toy-Example-SingleCellPipeline.ipynb
    │   └── Toy-Example.ipynb
    └── tensor_cell2cell
    │   ├── GPU-Example.ipynb
    │   ├── Loading-PreBuiltTensor.ipynb
    │   ├── PreBuiltMetadata-PBMC.pkl
    │   ├── PreBuiltTensor-PBMC.pkl
    │   ├── Tensor-cell2cell-PBMC.ipynb
    │   └── Tensor-cell2cell-Spatial.ipynb
├── mkdocs.yml
├── release
    ├── 0.5.10-notes.md
    ├── 0.5.11-notes.md
    ├── 0.5.4-notes.md
    ├── 0.5.5-notes.md
    ├── 0.5.6-notes.md
    ├── 0.5.7-notes.md
    ├── 0.5.8-notes.md
    ├── 0.5.9-notes.md
    ├── 0.6.0-notes.md
    ├── 0.6.1-notes.md
    ├── 0.6.2-notes.md
    ├── 0.6.3-notes.md
    ├── 0.6.4-notes.md
    ├── 0.6.5-notes.md
    ├── 0.6.6-notes.md
    ├── 0.6.7-notes.md
    ├── 0.6.8-notes.md
    ├── 0.7.0-notes.md
    ├── 0.7.1-notes.md
    ├── 0.7.2-notes.md
    ├── 0.7.3-notes.md
    └── 0.7.4-notes.md
└── setup.py


/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_CELL2CELL_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |    os: ubuntu-22.04
11 |    tools:
12 |      python: "3.7"
13 | 
14 | mkdocs:
15 |   configuration: mkdocs.yml
16 | 
17 | # Optionally declare the Python requirements required to build your docs
18 | python:
19 |    install:
20 |    - requirements: docs/requirements.txt
21 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Erick Armingol
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/Logo.png


--------------------------------------------------------------------------------
/LogoTensor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/LogoTensor.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Inferring cell-cell interactions from transcriptomes with *cell2cell*
 2 | [![PyPI Version][pb]][pypi]
 3 | [![Documentation Status](https://readthedocs.org/projects/cell2cell/badge/?version=latest)](https://cell2cell.readthedocs.io/en/latest/?badge=latest)
 4 | [![Downloads](https://pepy.tech/badge/cell2cell/month)](https://pepy.tech/project/cell2cell)
 5 | 
 6 | 
 7 | [pb]: https://badge.fury.io/py/cell2cell.svg
 8 | [pypi]: https://pypi.org/project/cell2cell/
 9 | 
10 | ## :book: Getting started
11 | For tutorials and documentation, visit [**cell2cell ReadTheDocs**](https://cell2cell.readthedocs.org/) or our [**cell2cell website**](https://earmingol.github.io/cell2cell).
12 | 
13 | 
14 | 
15 | ## :wrench: Installation
16 | 
17 | <details>
18 | <summary><b>Step 1: Install Anaconda</b> :snake:</summary>
19 |   
20 | First, [install Anaconda following this tutorial](https://docs.anaconda.com/anaconda/install/)
21 | </details>
22 | 
23 | <details>
24 | <summary><b>Step 2: Create and Activate a New Conda Environment</b> :computer:</summary>
25 | 
26 | ```
27 | # Create a new conda environment
28 | conda create -n cell2cell -y python=3.7 jupyter
29 | 
30 | # Activate the environment
31 | conda activate cell2cell
32 | ```
33 | </details> 
34 | 
35 | <details> <summary><b>Step 3: Install cell2cell</b> :arrow_down:</summary>
36 | 
37 | ```
38 | pip install cell2cell
39 | ```
40 | </details>
41 | 
42 | ## :bulb: Examples
43 | 
44 | | cell2cell Examples | Tensor-cell2cell Examples |
45 | | --- | --- |
46 | | ![cell2cell Logo](https://github.com/earmingol/cell2cell/blob/master/Logo.png?raw=true) | ![Tensor-cell2cell Logo](https://github.com/earmingol/cell2cell/blob/master/LogoTensor.png?raw=true) |
47 | | - [Step-by-step Pipeline](https://github.com/earmingol/cell2cell/blob/master/examples/cell2cell/Toy-Example.ipynb)<br>- [Interaction Pipeline for Bulk Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-BulkPipeline)<br>- [Interaction Pipeline for Single-Cell Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-SingleCellPipeline)<br>- [Whole Body of *C. elegans*](https://github.com/LewisLabUCSD/Celegans-cell2cell) | - [Obtaining patterns of cell-cell communication](https://earmingol.github.io/cell2cell/tutorials/ASD/01-Tensor-Factorization-ASD/)<br>- [Downstream 1: Factor-specific analyses](https://earmingol.github.io/cell2cell/tutorials/ASD/02-Factor-Specific-ASD/)<br>- [Downstream 2: Patterns to functions (GSEA)](https://earmingol.github.io/cell2cell/tutorials/ASD/03-GSEA-ASD/)<br>- [Tensor-cell2cell in Google Colab (**GPU**)](https://colab.research.google.com/drive/1T6MUoxafTHYhjvenDbEtQoveIlHT2U6_?usp=sharing)<br>- [Communication patterns in **Spatial Transcriptomics**](https://earmingol.github.io/cell2cell/tutorials/Tensor-cell2cell-Spatial/) |
48 | 
49 | Reproducible runs of the analyses in the [Tensor-cell2cell paper](https://doi.org/10.1038/s41467-022-31369-2) are available at [CodeOcean.com](https://doi.org/10.24433/CO.0051950.v2)
50 | 
51 | ## :link: LIANA & Tensor-cell2cell
52 | 
53 | Explore our tutorials for using Tensor-cell2cell with [LIANA](https://github.com/saezlab/liana-py) at [ccc-protocols.readthedocs.io](https://ccc-protocols.readthedocs.io/).
54 | 
55 | ## :question: Common Issues
56 | 
57 | - **Memory Errors with Tensor-cell2cell:** If you encounter memory errors when performing tensor factorizations, try replacing `init='svd'` with `init='random'`.
58 |   
59 | ## :dna: Ligand-Receptor Pairs
60 | Find a curated list of ligand-receptor pairs for your analyses at our [GitHub Repository](https://github.com/LewisLabUCSD/Ligand-Receptor-Pairs).
61 | 
62 | ## :bookmark_tabs: Citation
63 | 
64 | Please cite our work using the following references:
65 | 
66 | - **cell2cell**: [Inferring a spatial code of cell-cell interactions across a whole animal body](https://doi.org/10.1371/journal.pcbi.1010715).
67 |   *PLOS Computational Biology, 2022*
68 | 
69 | - **Tensor-cell2cell**: [Context-aware deconvolution of cell-cell communication with Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2).
70 |   *Nature Communications, 2022.*
71 | 
72 | - **LIANA & Tensor-cell2cell tutorials**: [Combining LIANA and Tensor-cell2cell to decipher cell-cell communication across multiple samples](https://doi.org/10.1016/j.crmeth.2024.100758).
73 |   *Cell Reports Methods, 2024*
74 | 


--------------------------------------------------------------------------------
/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from __future__ import absolute_import
4 | 
5 | from benchmarks.benchmarks import (timeit)
6 | 


--------------------------------------------------------------------------------
/benchmarks/benchmarks.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | 
 5 | import time
 6 | 
 7 | 
 8 | def timeit(func, *args, **kwargs):
 9 |     '''
10 |     This function measures the running time of a given function.
11 |     Borrowed from George Armstrong's Github repo (https://github.com/gwarmstrong).
12 |     '''
13 |     t0 = time.time()
14 |     output = func(*args, **kwargs)
15 |     t1 = time.time()
16 |     tot_time = t1-t0
17 |     data = {'time': tot_time, 'results': output}
18 |     return data


--------------------------------------------------------------------------------
/cell2cell/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from cell2cell import analysis
 4 | from cell2cell import clustering
 5 | from cell2cell import core
 6 | from cell2cell import datasets
 7 | from cell2cell import external
 8 | from cell2cell import io
 9 | from cell2cell import plotting
10 | from cell2cell import preprocessing
11 | from cell2cell import spatial
12 | from cell2cell import stats
13 | from cell2cell import tensor
14 | from cell2cell import utils
15 | 
16 | __version__ = "0.7.4"


--------------------------------------------------------------------------------
/cell2cell/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.analysis.cell2cell_pipelines import (initialize_interaction_space, BulkInteractions, SingleCellInteractions)
2 | from cell2cell.analysis.tensor_pipelines import (run_tensor_cell2cell_pipeline)
3 | import cell2cell.analysis.tensor_downstream as tensor_downstream
4 | 
5 | 


--------------------------------------------------------------------------------
/cell2cell/analysis/tensor_pipelines.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import tensorly as tl
  6 | 
  7 | from cell2cell.plotting.tensor_plot import tensor_factors_plot
  8 | 
  9 | 
 10 | def run_tensor_cell2cell_pipeline(interaction_tensor, tensor_metadata, copy_tensor=False, rank=None,
 11 |                                   tf_optimization='regular', random_state=None, backend=None, device=None,
 12 |                                   elbow_metric='error', smooth_elbow=False, upper_rank=25, tf_init='random',
 13 |                                   tf_svd='numpy_svd', cmaps=None, sample_col='Element', group_col='Category',
 14 |                                   fig_fontsize=14, output_folder=None, output_fig=True, fig_format='pdf', **kwargs):
 15 |     '''
 16 |     Runs basic pipeline of Tensor-cell2cell (excluding downstream analyses).
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     interaction_tensor : cell2cell.tensor.BaseTensor
 21 |         A communication tensor generated with any of the tensor class in
 22 |         cell2cell.tensor.
 23 | 
 24 |     tensor_metadata : list
 25 |         List of pandas dataframes with metadata information for elements of each
 26 |         dimension in the tensor. A column called as the variable `sample_col` contains
 27 |         the name of each element in the tensor while another column called as the
 28 |         variable `group_col` contains the metadata or grouping information of each
 29 |         element.
 30 | 
 31 |     copy_tensor : boolean, default=False
 32 |         Whether generating a copy of the original tensor to avoid modifying it.
 33 | 
 34 |     rank : int, default=None
 35 |         Rank of the Tensor Factorization (number of factors to deconvolve the original
 36 |         tensor). If None, it will automatically inferred from an elbow analysis.
 37 | 
 38 |     tf_optimization : str, default='regular'
 39 |         It defines whether performing an optimization with higher number of iterations,
 40 |         independent factorization runs, and higher resolution (lower tolerance),
 41 |         or with lower number of iterations, factorization runs, and resolution.
 42 |         Options are:
 43 | 
 44 |         - 'regular' : It uses 100 max iterations, 1 factorization run, and 10e-7 tolerance.
 45 |                       Faster to run.
 46 |         - 'robust' : It uses 500 max iterations, 100 factorization runs, and 10e-8 tolerance.
 47 |                      Slower to run.
 48 | 
 49 |     random_state : boolean, default=None
 50 |         Seed for randomization.
 51 | 
 52 |     backend : str, default=None
 53 |         Backend that TensorLy will use to perform calculations
 54 |         on this tensor. When None, the default backend used is
 55 |         the currently active backend, usually is ('numpy'). Options are:
 56 |         {'cupy', 'jax', 'mxnet', 'numpy', 'pytorch', 'tensorflow'}
 57 | 
 58 |     device : str, default=None
 59 |         Device to use when backend allows multiple devices. Options are:
 60 |          {'cpu', 'cuda:0', None}
 61 | 
 62 |     elbow_metric : str, default='error'
 63 |         Metric to perform the elbow analysis (y-axis).
 64 | 
 65 |             - 'error' : Normalized error to compute the elbow.
 66 |             - 'similarity' : Similarity based on CorrIndex (1-CorrIndex).
 67 | 
 68 |     smooth_elbow : boolean, default=False
 69 |         Whether smoothing the elbow-analysis curve with a Savitzky-Golay filter.
 70 | 
 71 |     upper_rank : int, default=25
 72 |         Upper bound of ranks to explore with the elbow analysis.
 73 | 
 74 |     tf_init : str, default='random'
 75 |         Initialization method for computing the Tensor Factorization.
 76 |         {‘svd’, ‘random’}
 77 | 
 78 |     tf_svd : str, default='numpy_svd'
 79 |         Function to compute the SVD for initializing the Tensor Factorization,
 80 |         acceptable values in tensorly.SVD_FUNS
 81 | 
 82 |     cmaps : list, default=None
 83 |         A list of colormaps used for coloring elements in each dimension. The length
 84 |         of this list is equal to the number of dimensions of the tensor. If None, all
 85 |         dimensions will be colores with the colormap 'gist_rainbow'.
 86 | 
 87 |     sample_col : str, default='Element'
 88 |         Name of the column containing the element names in the metadata.
 89 | 
 90 |     group_col : str, default='Category'
 91 |         Name of the column containing the metadata or grouping information for each
 92 |         element in the metadata.
 93 | 
 94 |     fig_fontsize : int, default=14
 95 |         Font size of the tick labels. Axis labels will be 1.2 times the fontsize.
 96 | 
 97 |     output_folder : str, default=None
 98 |         Path to the folder where the figures generated will be saved.
 99 |         If None, figures will not be saved.
100 | 
101 |     output_fig : boolean, default=True
102 |         Whether generating the figures with matplotlib.
103 | 
104 |     fig_format : str, default='pdf'
105 |         Format to store figures when an `output_folder` is specified
106 |         and `output_fig` is True. Otherwise, this is not necessary.
107 | 
108 |     **kwargs : dict
109 |             Extra arguments for the tensor factorization according to inputs in
110 |             tensorly.
111 | 
112 |     Returns
113 |     -------
114 |     interaction_tensor : cell2cell.tensor.tensor.BaseTensor
115 |         Either the original input `interaction_tensor` or a copy of it.
116 |         This also stores the results from running the Tensor-cell2cell
117 |         pipeline in the corresponding attributes.
118 |     '''
119 |     if copy_tensor:
120 |         interaction_tensor = interaction_tensor.copy()
121 | 
122 |     dim = len(interaction_tensor.tensor.shape)
123 | 
124 |     ### OUTPUT FILENAMES ###
125 |     if output_folder is None:
126 |         elbow_filename = None
127 |         tf_filename = None
128 |         loading_filename = None
129 |     else:
130 |         elbow_filename = output_folder + '/Elbow.{}'.format(fig_format)
131 |         tf_filename = output_folder + '/Tensor-Factorization.{}'.format(fig_format)
132 |         loading_filename = output_folder + '/Loadings.xlsx'
133 | 
134 |     ### PALETTE COLORS FOR ELEMENTS IN TENSOR DIMS ###
135 |     if cmaps is None:
136 |         cmap_5d = ['tab10', 'viridis', 'Dark2_r', 'tab20', 'tab20']
137 |         cmap_4d = ['plasma', 'Dark2_r', 'tab20', 'tab20']
138 | 
139 |         if dim == 5:
140 |             cmaps = cmap_5d
141 |         elif dim <= 4:
142 |             cmaps = cmap_4d[-dim:]
143 |         else:
144 |             raise ValueError('Tensor of dimension higher to 5 is not supported')
145 | 
146 |     assert len(cmaps) == dim, "`cmap` must be of the same len of dimensions in the tensor."
147 | 
148 |     ### FACTORIZATION PARAMETERS ###
149 |     if tf_optimization == 'robust':
150 |         elbow_runs = 20
151 |         tf_runs = 100
152 |         tol = 1e-8
153 |         n_iter_max = 500
154 |     elif tf_optimization == 'regular':
155 |         elbow_runs = 10
156 |         tf_runs = 1
157 |         tol = 1e-7
158 |         n_iter_max = 100
159 |     else:
160 |         raise ValueError("`factorization_type` must be either 'robust' or 'regular'.")
161 | 
162 |     if backend is not None:
163 |         tl.set_backend(backend)
164 | 
165 |     if device is not None:
166 |         interaction_tensor.to_device(device=device)
167 | 
168 |     ### ANALYSIS ###
169 |     # Elbow
170 |     if rank is None:
171 |         print('Running Elbow Analysis')
172 |         fig1, error = interaction_tensor.elbow_rank_selection(upper_rank=upper_rank,
173 |                                                               runs=elbow_runs,
174 |                                                               init=tf_init,
175 |                                                               svd=tf_svd,
176 |                                                               automatic_elbow=True,
177 |                                                               metric=elbow_metric,
178 |                                                               output_fig=output_fig,
179 |                                                               smooth=smooth_elbow,
180 |                                                               random_state=random_state,
181 |                                                               fontsize=fig_fontsize,
182 |                                                               filename=elbow_filename,
183 |                                                               tol=tol, n_iter_max=n_iter_max,
184 |                                                               **kwargs
185 |                                                               )
186 | 
187 |         rank = interaction_tensor.rank
188 | 
189 |     # Factorization
190 |     print('Running Tensor Factorization')
191 |     interaction_tensor.compute_tensor_factorization(rank=rank,
192 |                                                     init=tf_init,
193 |                                                     svd=tf_svd,
194 |                                                     random_state=random_state,
195 |                                                     runs=tf_runs,
196 |                                                     normalize_loadings=True,
197 |                                                     tol=tol, n_iter_max=n_iter_max,
198 |                                                     **kwargs
199 |                                                     )
200 | 
201 |     ### EXPORT RESULTS ###
202 |     if output_folder is not None:
203 |         print('Generating Outputs')
204 |         interaction_tensor.export_factor_loadings(loading_filename)
205 | 
206 |     if output_fig:
207 |         fig2, axes = tensor_factors_plot(interaction_tensor=interaction_tensor,
208 |                                          metadata=tensor_metadata,
209 |                                          sample_col=sample_col,
210 |                                          group_col=group_col,
211 |                                          meta_cmaps=cmaps,
212 |                                          fontsize=fig_fontsize,
213 |                                          filename=tf_filename
214 |                                          )
215 | 
216 |     return interaction_tensor


--------------------------------------------------------------------------------
/cell2cell/clustering/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.clustering.cluster_interactions import (compute_distance, compute_linkage, get_clusters_from_linkage)
2 | 


--------------------------------------------------------------------------------
/cell2cell/clustering/cluster_interactions.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import scipy.cluster.hierarchy as hc
  8 | import scipy.spatial as sp
  9 | 
 10 | 
 11 | # Distance-based algorithms
 12 | def compute_distance(data_matrix, axis=0, metric='euclidean'):
 13 |     '''Computes the pairwise distance between elements in a
 14 |     matrix of shape m x n. Uses the function
 15 |     scipy.spatial.distance.pdist
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     data_matrix : pandas.DataFrame or ndarray
 20 |         A m x n matrix used to compute the distances
 21 | 
 22 |     axis : int, default=0
 23 |         To decide on which elements to compute the distance.
 24 |         If axis=0, the distances will be between elements in
 25 |         the rows, while axis=1 will lead to distances between
 26 |         elements in the columns.
 27 | 
 28 |     metric : str, default='euclidean'
 29 |         The distance metric to use. The distance function can be 'braycurtis',
 30 |         'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice',
 31 |         'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'kulsinski',
 32 |         'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
 33 |         'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
 34 | 
 35 |     Returns
 36 |     -------
 37 |     D : ndarray
 38 |         Returns a condensed distance matrix Y. For each i and j (where i < j < m),
 39 |         where m is the number of original observations. The metric
 40 |         dist(u=X[i], v=X[j]) is computed and stored in entry
 41 |         m * i + j - ((i + 2) * (i + 1)) // 2.
 42 |     '''
 43 |     if (type(data_matrix) is pd.core.frame.DataFrame):
 44 |         data = data_matrix.values
 45 |     else:
 46 |         data = data_matrix
 47 |     if axis == 0:
 48 |         D = sp.distance.squareform(sp.distance.pdist(data, metric=metric))
 49 |     elif axis == 1:
 50 |         D = sp.distance.squareform(sp.distance.pdist(data.T, metric=metric))
 51 |     else:
 52 |         raise ValueError('Not valid axis. Use 0 or 1.')
 53 |     return D
 54 | 
 55 | 
 56 | def compute_linkage(distance_matrix, method='ward', optimal_ordering=True):
 57 |     '''
 58 |     Returns a linkage for a given distance matrix using a specific method.
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     distance_matrix : numpy.ndarray
 63 |         A square array containing the distance between a given row and a
 64 |         given column. Diagonal elements must be zero.
 65 | 
 66 |     method : str, 'ward' by default
 67 |         Method to compute the linkage. It could be:
 68 | 
 69 |         - 'single'
 70 |         - 'complete'
 71 |         - 'average'
 72 |         - 'weighted'
 73 |         - 'centroid'
 74 |         - 'median'
 75 |         - 'ward'
 76 |         For more details, go to:
 77 |         https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.cluster.hierarchy.linkage.html
 78 | 
 79 |     optimal_ordering : boolean, default=True
 80 |         Whether sorting the leaf of the dendrograms to have a minimal distance
 81 |         between successive leaves. For more information, see
 82 |         scipy.cluster.hierarchy.optimal_leaf_ordering
 83 | 
 84 |     Returns
 85 |     -------
 86 |     Z : numpy.ndarray
 87 |         The hierarchical clustering encoded as a linkage matrix.
 88 |     '''
 89 |     if (type(distance_matrix) is pd.core.frame.DataFrame):
 90 |         data = distance_matrix.values
 91 |     else:
 92 |         data = distance_matrix.copy()
 93 |     if ~(data.transpose() == data).all():
 94 |         raise ValueError('The matrix is not symmetric')
 95 | 
 96 |     np.fill_diagonal(data, 0.0)
 97 | 
 98 |     # Compute linkage
 99 |     D = sp.distance.squareform(data)
100 |     Z = hc.linkage(D, method=method, optimal_ordering=optimal_ordering)
101 |     return Z
102 | 
103 | 
104 | def get_clusters_from_linkage(linkage, threshold, criterion='maxclust', labels=None):
105 |     '''
106 |     Gets clusters from a linkage given a threshold and a criterion.
107 | 
108 |     Parameters
109 |     ----------
110 |     linkage : numpy.ndarray
111 |         The hierarchical clustering encoded with the matrix returned by
112 |         the linkage function (Z).
113 | 
114 |     threshold : float
115 |         The threshold to apply when forming flat clusters.
116 | 
117 |     criterion : str, 'maxclust' by default
118 |         The criterion to use in forming flat clusters. Depending on the
119 |         criterion, the threshold has different meanings. More information on:
120 |         https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.cluster.hierarchy.fcluster.html
121 | 
122 |     labels : array-like, None by default
123 |         List of labels of the elements contained in the linkage. The order
124 |         must match the order they were provided when generating the linkage.
125 | 
126 |     Returns
127 |     -------
128 |     clusters : dict
129 |         A dictionary containing the clusters obtained. The keys correspond to
130 |         the cluster numbers and the vaues to a list with element names given the
131 |         labels, or the element index based on the linkage.
132 |     '''
133 | 
134 |     cluster_ids = hc.fcluster(linkage, threshold, criterion=criterion)
135 |     clusters = dict()
136 |     for c in np.unique(cluster_ids):
137 |         clusters[c] = []
138 | 
139 |     for i, c in enumerate(cluster_ids):
140 |         if labels is not None:
141 |             clusters[c].append(labels[i])
142 |         else:
143 |             clusters[c].append(i)
144 |     return clusters


--------------------------------------------------------------------------------
/cell2cell/core/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_icellnet_score,
4 |                                        compute_jaccard_like_cci_score, matmul_bray_curtis_like, matmul_count_active,
5 |                                        matmul_jaccard_like)
6 | from cell2cell.core.cell import (Cell, get_cells_from_rnaseq)
7 | from cell2cell.core.communication_scores import (get_binary_scores, get_continuous_scores, compute_ccc_matrix, aggregate_ccc_matrices)
8 | from cell2cell.core.interaction_space import (generate_interaction_elements, InteractionSpace)


--------------------------------------------------------------------------------
/cell2cell/core/cell.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import pandas as pd
  6 | 
  7 | class Cell:
  8 |     '''Specific cell-type/tissue/organ element in a RNAseq dataset.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     sc_rnaseq_data : pandas.DataFrame
 13 |         A gene expression matrix. Contains only one column that
 14 |         corresponds to cell-type/tissue/sample, while the genes
 15 |         are rows and the specific. Column name will be the label
 16 |         of the instance.
 17 | 
 18 |     verbose : boolean, default=True
 19 |         Whether printing or not steps of the analysis.
 20 | 
 21 |     Attributes
 22 |     ----------
 23 |     id : int
 24 |         ID number of the instance generated.
 25 | 
 26 |     type : str
 27 |         Name of the respective cell-type/tissue/sample.
 28 | 
 29 |     rnaseq_data : pandas.DataFrame
 30 |         Copy of sc_rnaseq_data.
 31 | 
 32 |     weighted_ppi : pandas.DataFrame
 33 |         Dataframe created from a list of protein-protein interactions,
 34 |         here the columns of the interacting proteins are replaced by
 35 |         a score or a preprocessed gene expression of the respective
 36 |         proteins.
 37 |     '''
 38 |     _id_counter = 0  # Number of active instances
 39 |     _id = 0 # Unique ID
 40 | 
 41 |     def __init__(self, sc_rnaseq_data, verbose=True):
 42 |         self.id = Cell._id
 43 |         Cell._id_counter += 1
 44 |         Cell._id += 1
 45 | 
 46 |         self.type = str(sc_rnaseq_data.columns[-1])
 47 | 
 48 |         # RNAseq datasets
 49 |         self.rnaseq_data = sc_rnaseq_data.copy()
 50 |         self.rnaseq_data.columns = ['value']
 51 | 
 52 |         # Binary ppi datasets
 53 |         self.weighted_ppi = pd.DataFrame(columns=['A', 'B', 'score'])
 54 | 
 55 |         # Object created
 56 |         if verbose:
 57 |             print("New cell instance created for " + self.type)
 58 | 
 59 |     def __del__(self):
 60 |         Cell._id_counter -= 1
 61 | 
 62 |     def __str__(self):
 63 |         return str(self.type)
 64 | 
 65 |     __repr__ = __str__
 66 | 
 67 | 
 68 | def get_cells_from_rnaseq(rnaseq_data, cell_columns=None, verbose=True):
 69 |     '''
 70 |     Creates new instances of Cell based on the RNAseq data of each
 71 |     cell-type/tissue/sample in a gene expression matrix.
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     rnaseq_data : pandas.DataFrame
 76 |         Gene expression data for a RNA-seq experiment. Columns are
 77 |         cell-types/tissues/samples and rows are genes.
 78 | 
 79 |     cell_columns : array-like, default=None
 80 |         List of names of cell-types/tissues/samples in the dataset
 81 |         to be used. If None, all columns will be used.
 82 | 
 83 |     verbose : boolean, default=True
 84 |         Whether printing or not steps of the analysis.
 85 | 
 86 |     Returns
 87 |     -------
 88 |     cells : dict
 89 |         Dictionary containing all Cell instances generated from a RNAseq dataset.
 90 |         The keys of this dictionary are the names of the corresponding Cell instances.
 91 |     '''
 92 |     if verbose:
 93 |         print("Generating objects according to RNAseq datasets provided")
 94 |     cells = dict()
 95 |     if cell_columns is None:
 96 |         cell_columns = rnaseq_data.columns
 97 | 
 98 |     for cell in cell_columns:
 99 |         cells[cell] = Cell(rnaseq_data[[cell]], verbose=verbose)
100 |     return cells
101 | 


--------------------------------------------------------------------------------
/cell2cell/core/communication_scores.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | from scipy.stats.mstats import gmean
  7 | 
  8 | 
  9 | def get_binary_scores(cell1, cell2, ppi_score=None):
 10 |     '''Computes binary communication scores for all
 11 |     protein-protein interactions between a pair of
 12 |     cell-types/tissues/samples. This corresponds to
 13 |     an AND function between binary values for each
 14 |     interacting protein coming from each cell.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     cell1 : cell2cell.core.cell.Cell
 19 |         First cell-type/tissue/sample to compute the communication
 20 |         score. In a directed interaction, this is the sender.
 21 | 
 22 |     cell2 : cell2cell.core.cell.Cell
 23 |         Second cell-type/tissue/sample to compute the communication
 24 |         score. In a directed interaction, this is the receiver.
 25 | 
 26 |     ppi_score : array-like, default=None
 27 |         An array with a weight for each PPI. The weight
 28 |         multiplies the communication scores.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     communication_scores : numpy.array
 33 |         An array with the communication scores for each intercellular
 34 |         PPI.
 35 |     '''
 36 |     c1 = cell1.weighted_ppi['A'].values
 37 |     c2 = cell2.weighted_ppi['B'].values
 38 | 
 39 |     if (len(c1) == 0) or (len(c2) == 0):
 40 |         return 0.0
 41 | 
 42 |     if ppi_score is None:
 43 |         ppi_score = np.array([1.0] * len(c1))
 44 | 
 45 |     communication_scores = c1 * c2 * ppi_score
 46 |     return communication_scores
 47 | 
 48 | 
 49 | def get_continuous_scores(cell1, cell2, ppi_score=None, method='expression_product'):
 50 |     '''Computes continuous communication scores for all
 51 |     protein-protein interactions between a pair of
 52 |     cell-types/tissues/samples. This corresponds to
 53 |     a specific scoring function between preprocessed continuous
 54 |     expression values for each interacting protein coming from
 55 |     each cell.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     cell1 : cell2cell.core.cell.Cell
 60 |         First cell-type/tissue/sample to compute the communication
 61 |         score. In a directed interaction, this is the sender.
 62 | 
 63 |     cell2 : cell2cell.core.cell.Cell
 64 |         Second cell-type/tissue/sample to compute the communication
 65 |         score. In a directed interaction, this is the receiver.
 66 | 
 67 |     ppi_score : array-like, default=None
 68 |         An array with a weight for each PPI. The weight
 69 |         multiplies the communication scores.
 70 | 
 71 |     method : str, default='expression_product'
 72 |         Scoring function for computing the communication score.
 73 |         Options are:
 74 |             - 'expression_product' : Multiplication between the expression
 75 |                 of the interacting proteins. One coming from cell1 and the
 76 |                 other from cell2.
 77 |             - 'expression_mean' : Average between the expression
 78 |                 of the interacting proteins. One coming from cell1 and the
 79 |                 other from cell2.
 80 |             - 'expression_gmean' : Geometric mean between the expression
 81 |                 of the interacting proteins. One coming from cell1 and the
 82 |                 other from cell2.
 83 | 
 84 |     Returns
 85 |     -------
 86 |     communication_scores : numpy.array
 87 |         An array with the communication scores for each intercellular
 88 |         PPI.
 89 |     '''
 90 |     c1 = cell1.weighted_ppi['A'].values
 91 |     c2 = cell2.weighted_ppi['B'].values
 92 | 
 93 |     if method == 'expression_product':
 94 |         communication_scores = score_expression_product(c1, c2)
 95 |     elif method == 'expression_mean':
 96 |         communication_scores = score_expression_mean(c1, c2)
 97 |     elif method == 'expression_gmean':
 98 |         communication_scores = np.sqrt(score_expression_product(c1, c2))
 99 |     else:
100 |         raise ValueError('{} is not implemented yet'.format(method))
101 | 
102 |     if ppi_score is None:
103 |         ppi_score = np.array([1.0] * len(c1))
104 | 
105 |     communication_scores = communication_scores * ppi_score
106 |     return communication_scores
107 | 
108 | 
109 | def score_expression_product(c1, c2):
110 |     '''Computes the expression product score
111 | 
112 |     Parameters
113 |     ----------
114 |     c1 : array-like
115 |         A 1D-array containing the preprocessed expression values
116 |         for the interactors in the first column of a list of
117 |         protein-protein interactions.
118 | 
119 |     c2 : array-like
120 |         A 1D-array containing the preprocessed expression values
121 |         for the interactors in the second column of a list of
122 |         protein-protein interactions.
123 | 
124 |     Returns
125 |     -------
126 |     c1 * c2 : array-like
127 |         Multiplication of vectors.
128 |     '''
129 |     if (len(c1) == 0) or (len(c2) == 0):
130 |         return 0.0
131 |     return c1 * c2
132 | 
133 | 
134 | def score_expression_mean(c1, c2):
135 |     '''Computes the expression product score
136 | 
137 |     Parameters
138 |     ----------
139 |     c1 : array-like
140 |         A 1D-array containing the preprocessed expression values
141 |         for the interactors in the first column of a list of
142 |         protein-protein interactions.
143 | 
144 |     c2 : array-like
145 |         A 1D-array containing the preprocessed expression values
146 |         for the interactors in the second column of a list of
147 |         protein-protein interactions.
148 | 
149 |     Returns
150 |     -------
151 |     (c1 + c2)/2. : array-like
152 |         Average of vectors.
153 |     '''
154 |     if (len(c1) == 0) or (len(c2) == 0):
155 |         return 0.0
156 |     return (c1 + c2)/2.
157 | 
158 | 
159 | def compute_ccc_matrix(prot_a_exp, prot_b_exp, communication_score='expression_product'):
160 |     '''Computes communication scores for an specific
161 |     protein-protein interaction using vectors of gene expression
162 |     levels for a given interacting protein produced by
163 |     different cell-types/tissues/samples.
164 | 
165 |     Parameters
166 |     ----------
167 |     prot_a_exp : array-like
168 |         Vector with gene expression levels for an interacting protein A
169 |         in a given PPI. Coordinates are different cell-types/tissues/samples.
170 | 
171 |     prot_b_exp : array-like
172 |         Vector with gene expression levels for an interacting protein B
173 |         in a given PPI. Coordinates are different cell-types/tissues/samples.
174 | 
175 |     communication_score : str, default='expression_product'
176 |         Scoring function for computing the communication score.
177 |         Options are:
178 | 
179 |         - 'expression_product' : Multiplication between the expression
180 |             of the interacting proteins.
181 |         - 'expression_mean' : Average between the expression
182 |             of the interacting proteins.
183 |         - 'expression_gmean' : Geometric mean between the expression
184 |             of the interacting proteins.
185 | 
186 |     Returns
187 |     -------
188 |     communication_scores : numpy.array
189 |         Matrix MxM, representing the CCC scores of an specific PPI
190 |         across all pairs of cell-types/tissues/samples. M are all
191 |         cell-types/tissues/samples. In directed interactions, the
192 |         vertical axis (axis 0) represents the senders, while the
193 |         horizontal axis (axis 1) represents the receivers.
194 |     '''
195 |     if communication_score == 'expression_product':
196 |         communication_scores = np.outer(prot_a_exp, prot_b_exp)
197 |     elif communication_score == 'expression_mean':
198 |         communication_scores = (np.outer(prot_a_exp, np.ones(prot_b_exp.shape)) + np.outer(np.ones(prot_a_exp.shape), prot_b_exp)) / 2.
199 |     elif communication_score == 'expression_gmean':
200 |         communication_scores = np.sqrt(np.outer(prot_a_exp, prot_b_exp))
201 |     else:
202 |         raise ValueError("Not a valid communication_score")
203 |     return communication_scores
204 | 
205 | 
206 | def aggregate_ccc_matrices(ccc_matrices, method='gmean'):
207 |     '''Aggregates matrices of communication scores. Each
208 |     matrix has the communication scores across all pairs
209 |     of cell-types/tissues/samples for a different
210 |     pair of interacting proteins.
211 | 
212 |     Parameters
213 |     ----------
214 |     ccc_matrices : list
215 |         List of matrices of communication scores. Each matrix
216 |         is for an specific pair of interacting proteins.
217 | 
218 |     method : str, default='gmean'.
219 |         Method to aggregate the matrices element-wise.
220 |         Options are:
221 | 
222 |         - 'gmean' : Geometric mean in an element-wise way.
223 |         - 'sum' : Sum in an element-wise way.
224 |         - 'mean' : Mean in an element-wise way.
225 | 
226 |     Returns
227 |     -------
228 |     aggregated_ccc_matrix : numpy.array
229 |         A matrix contiaining aggregated communication scores
230 |         from multiple PPIs. It's shape is of MxM, where M are all
231 |         cell-types/tissues/samples. In directed interactions, the
232 |         vertical axis (axis 0) represents the senders, while the
233 |         horizontal axis (axis 1) represents the receivers.
234 |     '''
235 |     if method == 'gmean':
236 |         aggregated_ccc_matrix = gmean(ccc_matrices)
237 |     elif method == 'sum':
238 |         aggregated_ccc_matrix = np.nansum(ccc_matrices, axis=0)
239 |     elif method == 'mean':
240 |         aggregated_ccc_matrix = np.nanmean(ccc_matrices, axis=0)
241 |     else:
242 |         raise ValueError("Not a valid method")
243 | 
244 |     return aggregated_ccc_matrix


--------------------------------------------------------------------------------
/cell2cell/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.datasets.anndata import (balf_covid)
2 | from cell2cell.datasets.gsea_data import (gsea_msig)
3 | from cell2cell.datasets.heuristic_data import (HeuristicGOTerms)
4 | from cell2cell.datasets.random_data import (generate_random_rnaseq, generate_random_ppi, generate_random_cci_scores,
5 |                                             generate_random_metadata)
6 | from cell2cell.datasets.toy_data import (generate_toy_distance, generate_toy_rnaseq, generate_toy_ppi, generate_toy_metadata)


--------------------------------------------------------------------------------
/cell2cell/datasets/anndata.py:
--------------------------------------------------------------------------------
 1 | from scanpy.readwrite import read
 2 | 
 3 | 
 4 | def balf_covid(filename='BALF-COVID19-Liao_et_al-NatMed-2020.h5ad'):
 5 |     """BALF samples from COVID-19 patients
 6 |     The data consists in 63k immune and epithelial cells in lungs
 7 |     from 3 control, 3 moderate COVID-19, and 6 severe COVID-19 patients.
 8 | 
 9 |     This dataset was previously published in [1], and this objects contains
10 |     the raw counts for the annotated cell types available in:
11 |     https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE145926
12 | 
13 |     References:
14 |     [1] Liao, M., Liu, Y., Yuan, J. et al.
15 |         Single-cell landscape of bronchoalveolar immune cells in patients
16 |         with COVID-19. Nat Med 26, 842–844 (2020).
17 |         https://doi.org/10.1038/s41591-020-0901-9
18 | 
19 |     Parameters
20 |     ----------
21 |         filename : str, default='BALF-COVID19-Liao_et_al-NatMed-2020.h5ad'
22 |             Path to the h5ad file in case it was manually downloaded.
23 | 
24 |     Returns
25 |     -------
26 |         Annotated data matrix.
27 |     """
28 |     url = 'https://zenodo.org/record/7535867/files/BALF-COVID19-Liao_et_al-NatMed-2020.h5ad'
29 |     adata = read(filename, backup_url=url)
30 |     return adata


--------------------------------------------------------------------------------
/cell2cell/datasets/gsea_data.py:
--------------------------------------------------------------------------------
 1 | from cell2cell.external.gseapy import _check_pathwaydb, load_gmt, PATHWAY_DATA
 2 | 
 3 | 
 4 | def gsea_msig(organism='human', pathwaydb='GOBP', readable_name=False):
 5 |     '''Load a MSigDB from a gmt file
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     organism : str, default='human'
10 |         Organism for whom the DB will be loaded.
11 |         Available options are {'human', 'mouse'}.
12 | 
13 |     pathwaydb: str, default='GOBP'
14 |         Molecular Signature Database to load.
15 |         Available options are {'GOBP', 'KEGG', 'Reactome'}
16 | 
17 |     readable_name : boolean, default=False
18 |         If True, the pathway names are transformed to a more readable format.
19 |         That is, removing underscores and pathway DB name at the beginning.
20 | 
21 |     Returns
22 |     -------
23 |     pathway_per_gene : defaultdict
24 |         Dictionary containing all genes in the DB as keys, and
25 |         their values are lists with their pathway annotations.
26 |     '''
27 |     _check_pathwaydb(organism, pathwaydb)
28 | 
29 |     pathway_per_gene = load_gmt(readable_name=readable_name, **PATHWAY_DATA[organism][pathwaydb])
30 |     return pathway_per_gene


--------------------------------------------------------------------------------
/cell2cell/datasets/heuristic_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | 
 5 | 
 6 | class HeuristicGOTerms:
 7 |     '''GO terms for contact and secreted proteins.
 8 | 
 9 |     Attributes
10 |     ----------
11 |     contact_go_terms : list
12 |         List of GO terms associated with proteins that
13 |         participate in contact interactions (usually
14 |         on the surface of cells).
15 | 
16 |     mediator_go_terms : list
17 |         List of GO terms associated with secreted
18 |         proteins that mediate intercellular interactions
19 |         or communication.
20 |     '''
21 |     def __init__(self):
22 |         self.contact_go_terms = ['GO:0007155',  # Cell adhesion
23 |                                  'GO:0022608',  # Multicellular organism adhesion
24 |                                  'GO:0098740',  # Multiorganism cell adhesion
25 |                                  'GO:0098743',  # Cell aggregation
26 |                                  'GO:0030054',  # Cell-junction #
27 |                                  'GO:0009986',  # Cell surface #
28 |                                  'GO:0097610',  # Cell surface forrow
29 |                                  'GO:0007160',  # Cell-matrix adhesion
30 |                                  'GO:0043235',  # Receptor complex,
31 |                                  'GO:0008305',  # Integrin complex,
32 |                                  'GO:0043113',  # Receptor clustering
33 |                                  'GO:0009897',  # External side of plasma membrane #
34 |                                  'GO:0038023',  # Signaling receptor activity #
35 |                                  ]
36 | 
37 |         self.mediator_go_terms = ['GO:0005615',  # Extracellular space
38 |                                   'GO:0005576',  # Extracellular region
39 |                                   'GO:0031012',  # Extracellular matrix
40 |                                   'GO:0005201',  # Extracellular matrix structural constituent
41 |                                   'GO:1990430',  # Extracellular matrix protein binding
42 |                                   'GO:0048018',  # Receptor ligand activity #
43 |                                   ]


--------------------------------------------------------------------------------
/cell2cell/datasets/random_data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | from sklearn.utils import resample
  9 | 
 10 | from cell2cell.preprocessing import rnaseq, ppi
 11 | 
 12 | 
 13 | def generate_random_rnaseq(size, row_names, random_state=None, verbose=True):
 14 |     '''
 15 |     Generates a RNA-seq dataset that is normally distributed gene-wise and size
 16 |     normalized (each column sums up to a million).
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     size : int
 21 |         Number of cell-types/tissues/samples (columns).
 22 | 
 23 |     row_names : array-like
 24 |         List containing the name of genes (rows).
 25 | 
 26 |     random_state : int, default=None
 27 |         Seed for randomization.
 28 | 
 29 |     verbose : boolean, default=True
 30 |         Whether printing or not steps of the analysis.
 31 | 
 32 |     Returns
 33 |     -------
 34 |     df : pandas.DataFrame
 35 |         Dataframe containing gene expression given the list
 36 |         of genes for each cell-type/tissue/sample.
 37 |     '''
 38 |     if verbose:
 39 |         print('Generating random RNA-seq dataset.')
 40 |     columns = ['Cell-{}'.format(c) for c in range(1, size+1)]
 41 | 
 42 |     if random_state is not None:
 43 |         np.random.seed(random_state)
 44 |     data = np.random.randn(len(row_names), len(columns))    # Normal distribution
 45 |     min = np.abs(np.amin(data, axis=1))
 46 |     min = min.reshape((len(min), 1))
 47 | 
 48 |     data = data + min
 49 |     df = pd.DataFrame(data, index=row_names, columns=columns)
 50 |     if verbose:
 51 |         print('Normalizing random RNA-seq dataset (into TPM)')
 52 |     df = rnaseq.scale_expression_by_sum(df, axis=0, sum_value=1e6)
 53 |     return df
 54 | 
 55 | 
 56 | def generate_random_ppi(max_size, interactors_A, interactors_B=None, random_state=None, verbose=True):
 57 |     '''Generates a random list of protein-protein interactions.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     max_size : int
 62 |         Maximum size of interactions to obtain. Since the PPIs
 63 |         are obtained by independently resampling interactors A and B
 64 |         rather than creating all possible combinations (it may demand too much
 65 |         memory), some PPIs can be duplicated and when dropping them
 66 |         results into a smaller number of PPIs than the max_size.
 67 | 
 68 |     interactors_A : list
 69 |         A list of protein names to include in the first column of
 70 |         the PPIs.
 71 | 
 72 |     interactors_B : list, default=None
 73 |         A list of protein names to include in the second columns
 74 |         of the PPIs. If None, interactors_A will be used as
 75 |         interactors_B too.
 76 | 
 77 |     random_state : int, default=None
 78 |         Seed for randomization.
 79 | 
 80 |     verbose : boolean, default=True
 81 |         Whether printing or not steps of the analysis.
 82 | 
 83 |     Returns
 84 |     -------
 85 |     ppi_data : pandas.DataFrame
 86 |         DataFrame containing a list of protein-protein interactions.
 87 |         It has three columns: 'A', 'B', and 'score' for interactors
 88 |         A, B and weights of interactions, respectively.
 89 |     '''
 90 |     if interactors_B is not None:
 91 |         assert max_size <= len(interactors_A)*len(interactors_B), "The maximum size can't be greater than all combinations between partners A and B"
 92 |     else:
 93 |         assert max_size <= len(interactors_A)**2, "The maximum size can't be greater than all combinations of partners A"
 94 | 
 95 | 
 96 |     if verbose:
 97 |         print('Generating random PPI network.')
 98 | 
 99 |     def small_block_ppi(size, interactors_A, interactors_B, random_state):
100 |         if random_state is not None:
101 |             random_state += 1
102 |         if interactors_B is None:
103 |             interactors_B = interactors_A
104 | 
105 |         col_A = resample(interactors_A, n_samples=size, random_state=random_state)
106 |         col_B = resample(interactors_B, n_samples=size, random_state=random_state)
107 | 
108 |         ppi_data = pd.DataFrame()
109 |         ppi_data['A'] = col_A
110 |         ppi_data['B'] = col_B
111 |         ppi_data.assign(score=1.0)
112 | 
113 |         ppi_data = ppi.remove_ppi_bidirectionality(ppi_data, ('A', 'B'), verbose=verbose)
114 |         ppi_data = ppi_data.drop_duplicates()
115 |         ppi_data.reset_index(inplace=True, drop=True)
116 |         return ppi_data
117 | 
118 |     ppi_data = small_block_ppi(max_size*2, interactors_A, interactors_B, random_state)
119 | 
120 |     # TODO: This part need to be fixed, it does not converge to the max_size -> len((set(A)) * len(set(B) - set(A)))
121 |     # while ppi_data.shape[0] < size:
122 |     #     if random_state is not None:
123 |     #         random_state += 2
124 |     #     b = small_block_ppi(size, interactors_A, interactors_B, random_state)
125 |     #     print(b)
126 |     #     ppi_data = pd.concat([ppi_data, b])
127 |     #     ppi_data = ppi.remove_ppi_bidirectionality(ppi_data, ('A', 'B'), verbose=verbose)
128 |     #     ppi_data = ppi_data.drop_duplicates()
129 |     #     ppi_data.dropna()
130 |     #     ppi_data.reset_index(inplace=True, drop=True)
131 |     #     print(ppi_data.shape[0])
132 | 
133 |     if ppi_data.shape[0] > max_size:
134 |         ppi_data = ppi_data.loc[list(range(max_size)), :]
135 |     ppi_data.reset_index(inplace=True, drop=True)
136 |     return ppi_data
137 | 
138 | 
139 | def generate_random_cci_scores(cell_number, labels=None, symmetric=True, random_state=None):
140 |     '''Generates a square cell-cell interaction
141 |     matrix with random scores.
142 | 
143 |     Parameters
144 |     ----------
145 |     cell_number : int
146 |         Number of cells.
147 | 
148 |     labels : list, default=None
149 |         List containing labels for each cells. Length of
150 |         this list must match the cell_number.
151 | 
152 |     symmetric : boolean, default=True
153 |         Whether generating a symmetric CCI matrix.
154 | 
155 |     random_state : int, default=None
156 |         Seed for randomization.
157 | 
158 |     Returns
159 |     -------
160 |     cci_matrix : pandas.DataFrame
161 |         Matrix with rows and columns as cells. Values
162 |         represent a random CCI score between 0 and 1.
163 |     '''
164 |     if labels is not None:
165 |         assert len(labels) == cell_number, "Lenght of labels must match cell_number"
166 |     else:
167 |         labels = ['Cell-{}'.format(n) for n in range(1, cell_number+1)]
168 | 
169 |     if random_state is not None:
170 |         np.random.seed(random_state)
171 |     cci_scores = np.random.random((cell_number, cell_number))
172 |     if symmetric:
173 |         cci_scores = (cci_scores + cci_scores.T) / 2.
174 |     cci_matrix = pd.DataFrame(cci_scores, index=labels, columns=labels)
175 | 
176 |     return cci_matrix
177 | 
178 | 
179 | def generate_random_metadata(cell_labels, group_number):
180 |     '''Randomly assigns groups to cell labels.
181 | 
182 |     Parameters
183 |     ----------
184 |     cell_labels : list
185 |         A list of cell labels.
186 | 
187 |     group_number : int
188 |         Number of major groups of cells.
189 | 
190 |     Returns
191 |     -------
192 |     metadata : pandas.DataFrame
193 |         DataFrame containing the major groups that each cell
194 |         received randomly (under column 'Group'). Cells are
195 |         under the column 'Cell'.
196 |     '''
197 |     metadata = pd.DataFrame()
198 |     metadata['Cell'] = cell_labels
199 | 
200 |     groups = list(range(1, group_number+1))
201 |     metadata['Group'] = metadata['Cell'].apply(lambda x: np.random.choice(groups, 1)[0])
202 |     return metadata
203 | 


--------------------------------------------------------------------------------
/cell2cell/datasets/toy_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | 
  5 | def generate_toy_rnaseq():
  6 |     '''Generates a toy RNA-seq dataset
  7 | 
  8 |     Returns
  9 |     -------
 10 |     rnaseq : pandas.DataFrame
 11 |         DataFrame contianing the toy RNA-seq dataset. Columns
 12 |         are cells and rows are genes.
 13 |     '''
 14 |     data = np.asarray([[5, 10, 8, 15, 2],
 15 |                        [15, 5, 20, 1, 30],
 16 |                        [18, 12, 5, 40, 20],
 17 |                        [9, 30, 22, 5, 2],
 18 |                        [2, 1, 1, 27, 15],
 19 |                        [30, 11, 16, 5, 12],
 20 |                        ])
 21 | 
 22 |     rnaseq = pd.DataFrame(data,
 23 |                           index=['Protein-A', 'Protein-B', 'Protein-C', 'Protein-D', 'Protein-E', 'Protein-F'],
 24 |                           columns=['C1', 'C2', 'C3', 'C4', 'C5']
 25 |                           )
 26 |     rnaseq.index.name = 'gene_id'
 27 |     return rnaseq
 28 | 
 29 | 
 30 | def generate_toy_ppi(prot_complex=False):
 31 |     '''Generates a toy list of protein-protein interactions.
 32 | 
 33 |     Parameters
 34 |     ----------
 35 |     prot_complex : boolean, default=False
 36 |         Whether including PPIs where interactors could contain
 37 |         multimeric complexes.
 38 | 
 39 |     Returns
 40 |     -------
 41 |     ppi : pandas.DataFrame
 42 |         Dataframe containing PPIs. Columns are 'A' (first interacting
 43 |         partners), 'B' (second interacting partners) and 'score'
 44 |         for weighting each PPI.
 45 |     '''
 46 |     if prot_complex:
 47 |         data = np.asarray([['Protein-A', 'Protein-B'],
 48 |                            ['Protein-B', 'Protein-C'],
 49 |                            ['Protein-C', 'Protein-A'],
 50 |                            ['Protein-B', 'Protein-B'],
 51 |                            ['Protein-B', 'Protein-A'],
 52 |                            ['Protein-E', 'Protein-F'],
 53 |                            ['Protein-F', 'Protein-F'],
 54 |                            ['Protein-C&Protein-E', 'Protein-F'],
 55 |                            ['Protein-B', 'Protein-E'],
 56 |                            ['Protein-A&Protein-B', 'Protein-F'],
 57 |                            ])
 58 |     else:
 59 |         data = np.asarray([['Protein-A', 'Protein-B'],
 60 |                            ['Protein-B', 'Protein-C'],
 61 |                            ['Protein-C', 'Protein-A'],
 62 |                            ['Protein-B', 'Protein-B'],
 63 |                            ['Protein-B', 'Protein-A'],
 64 |                            ['Protein-E', 'Protein-F'],
 65 |                            ['Protein-F', 'Protein-F'],
 66 |                            ['Protein-C', 'Protein-F'],
 67 |                            ['Protein-B', 'Protein-E'],
 68 |                            ['Protein-A', 'Protein-F'],
 69 |                            ])
 70 |     ppi = pd.DataFrame(data, columns=['A', 'B'])
 71 |     ppi = ppi.assign(score=1.0)
 72 |     return ppi
 73 | 
 74 | 
 75 | def generate_toy_metadata():
 76 |     '''Generates metadata for cells in the toy RNA-seq dataset.
 77 | 
 78 |     Returns
 79 |     -------
 80 |     metadata : pandas.DataFrame
 81 |         DataFrame with metadata for each cell. Metadata contains the
 82 |         major groups of those cells.
 83 |     '''
 84 |     data = np.asarray([['C1', 'G1'],
 85 |                        ['C2', 'G2'],
 86 |                        ['C3', 'G3'],
 87 |                        ['C4', 'G3'],
 88 |                        ['C5', 'G1']
 89 |                        ])
 90 | 
 91 |     metadata = pd.DataFrame(data, columns=['#SampleID', 'Groups'])
 92 |     return metadata
 93 | 
 94 | 
 95 | def generate_toy_distance():
 96 |     '''Generates a square matrix with cell-cell distance.
 97 | 
 98 |     Returns
 99 |     -------
100 |     distance : pandas.DataFrame
101 |         DataFrame with Euclidean-like distance between each
102 |         pair of cells in the toy RNA-seq dataset.
103 |     '''
104 |     data = np.asarray([[0.0, 10.0, 12.0, 5.0, 3.0],
105 |                        [10.0, 0.0, 15.0, 8.0, 9.0],
106 |                        [12.0, 15.0, 0.0, 4.5, 7.5],
107 |                        [5.0, 8.0, 4.5, 0.0, 6.5],
108 |                        [3.0, 9.0, 7.5, 6.5, 0.0],
109 |                        ])
110 |     distance = pd.DataFrame(data,
111 |                             index=['C1', 'C2', 'C3', 'C4', 'C5'],
112 |                             columns=['C1', 'C2', 'C3', 'C4', 'C5']
113 |                             )
114 |     return distance


--------------------------------------------------------------------------------
/cell2cell/external/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.external.pcoa import (pcoa, pcoa_biplot, _check_ordination)
2 | from cell2cell.external.goenrich import (goa, ontology)
3 | from cell2cell.external.gseapy import (load_gmt, generate_lr_geneset, run_gsea)
4 | from cell2cell.external.umap import (run_umap)


--------------------------------------------------------------------------------
/cell2cell/external/goenrich.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2017--, goenrich development team.
  3 | #
  4 | # Distributed under the terms of the MIT licence.
  5 | # ----------------------------------------------------------------------------
  6 | 
  7 | # CODE OBTAINED FROM: https://github.com/jdrudolph/goenrich/
  8 | # COPIED HERE BECAUSE GOENRICH IS NOT AVAILABLE THROUGH CONDA
  9 | 
 10 | import itertools
 11 | import networkx as nx
 12 | import pandas as pd
 13 | 
 14 | def _tokenize(f):
 15 |     token = []
 16 |     for line in f:
 17 |         if line == '\n':
 18 |             yield token
 19 |             token = []
 20 |         else:
 21 |             token.append(line)
 22 | 
 23 | def _filter_terms(tokens):
 24 |     for token in tokens:
 25 |         if token[0] == '[Term]\n':
 26 |             yield token[1:]
 27 | 
 28 | def _parse_terms(terms):
 29 |     for term in terms:
 30 |         obsolete = False
 31 |         node = {}
 32 |         parents = []
 33 |         for line in term:
 34 |             if line.startswith('id:'):
 35 |                 id = line[4:-1]
 36 |             elif line.startswith('name:'):
 37 |                 node['name'] = line[6:-1]
 38 |             elif line.startswith('namespace:'):
 39 |                 node['namespace'] = line[11:-1]
 40 |             elif line.startswith('is_a:'):
 41 |                 parents.append(line[6:16])
 42 |             elif line.startswith('relationship: part_of'):
 43 |                 parents.append(line[22:32])
 44 |             elif line.startswith('is_obsolete'):
 45 |                 obsolete = True
 46 |                 break
 47 |         if not obsolete:
 48 |             edges = [(p, id) for p in parents] # will reverse edges later
 49 |             yield (id, node), edges
 50 |         else:
 51 |             continue
 52 | 
 53 | _filename = 'db/go-basic.obo'
 54 | 
 55 | def ontology(file):
 56 |     """ read ontology from file
 57 |     :param file: file path of file handle
 58 |     """
 59 |     O = nx.DiGraph()
 60 | 
 61 |     if isinstance(file, str):
 62 |         f = open(file)
 63 |         we_opened_file = True
 64 |     else:
 65 |         f = file
 66 |         we_opened_file = False
 67 | 
 68 |     try:
 69 |         tokens = _tokenize(f)
 70 |         terms = _filter_terms(tokens)
 71 |         entries = _parse_terms(terms)
 72 |         nodes, edges = zip(*entries)
 73 |         O.add_nodes_from(nodes)
 74 |         O.add_edges_from(itertools.chain.from_iterable(edges))
 75 |         O.graph['roots'] = {data['name'] : n for n, data in O.nodes.items()
 76 |                 if data['name'] == data['namespace']}
 77 |     finally:
 78 |         if we_opened_file:
 79 |             f.close()
 80 | 
 81 |     for root in O.graph['roots'].values():
 82 |         for n, depth in nx.shortest_path_length(O, root).items():
 83 |             node = O.nodes[n]
 84 |             node['depth'] = min(depth, node.get('depth', float('inf')))
 85 |     return O.reverse()
 86 | 
 87 | 
 88 | """
 89 | parsers for different go-annotation formats
 90 | """
 91 | GENE_ASSOCIATION_COLUMNS = ('db', 'db_object_id', 'db_object_symbol',
 92 |                             'qualifier', 'go_id', 'db_reference',
 93 |                             'evidence_code', 'with_from', 'aspect',
 94 |                             'db_object_name', 'db_object_synonym',
 95 |                             'db_object_type', 'taxon', 'date', 'assigned_by',
 96 |                             'annotation_extension', 'gene_product_form_id')
 97 | EXPERIMENTAL_EVIDENCE = ('EXP', 'IDA', 'IPI', 'IMP', 'IGI', 'IEP')
 98 | 
 99 | 
100 | def goa(filename, experimental=True, **kwds):
101 |     """ read go-annotation file
102 | 
103 |     :param filename: protein or gene identifier column
104 |     :param experimental: use only experimentally validated annotations
105 |     """
106 |     defaults = {'comment': '!',
107 |                 'names': GENE_ASSOCIATION_COLUMNS}
108 | 
109 |     if experimental and 'usecols' in kwds:
110 |         kwds['usecols'] += ('evidence_code',)
111 | 
112 |     defaults.update(kwds)
113 |     result = pd.read_csv(filename, sep='\t', **defaults)
114 | 
115 |     if experimental:
116 |         retain_mask = result.evidence_code.isin(EXPERIMENTAL_EVIDENCE)
117 |         result.drop(result.index[~retain_mask], inplace=True)
118 | 
119 |     return result
120 | 
121 | 
122 | def sgd(filename, experimental=False, **kwds):
123 |     """ read yeast genome database go-annotation file
124 |     :param filename: protein or gene identifier column
125 |     :param experimental: use only experimentally validated annotations
126 |     """
127 |     return goa(filename, experimental, **kwds)
128 | 
129 | 
130 | GENE2GO_COLUMNS = ('tax_id', 'GeneID', 'GO_ID', 'Evidence', 'Qualifier', 'GO_term', 'PubMed', 'Category')
131 | 
132 | 
133 | def gene2go(filename, experimental=False, tax_id=9606, **kwds):
134 |     """ read go-annotation file
135 | 
136 |     :param filename: protein or gene identifier column
137 |     :param experimental: use only experimentally validated annotations
138 |     :param tax_id: filter according to taxon
139 |     """
140 |     defaults = {'comment': '#',
141 |                 'names': GENE2GO_COLUMNS}
142 |     defaults.update(kwds)
143 |     result = pd.read_csv(filename, sep='\t', **defaults)
144 | 
145 |     retain_mask = result.tax_id == tax_id
146 |     result.drop(result.index[~retain_mask], inplace=True)
147 | 
148 |     if experimental:
149 |         retain_mask = result.Evidence.isin(EXPERIMENTAL_EVIDENCE)
150 |         result.drop(result.index[~retain_mask], inplace=True)
151 | 
152 |     return result


--------------------------------------------------------------------------------
/cell2cell/external/umap.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import umap
 3 | 
 4 | import pandas as pd
 5 | import scipy.spatial as sp
 6 | 
 7 | 
 8 | def run_umap(rnaseq_data, axis=1, metric='euclidean', min_dist=0.4, n_neighbors=8, random_state=None, **kwargs):
 9 |     '''Runs UMAP on a expression matrix.
10 |     Parameters
11 |     ----------
12 |     rnaseq_data : pandas.DataFrame
13 |         A dataframe of gene expression values wherein the rows are the genes or
14 |         embeddings of a dimensionality reduction method and columns the cells,
15 |         tissues or samples.
16 | 
17 |     axis : int, default=0
18 |         An axis of the dataframe (0 across rows, 1 across columns).
19 |         Across rows means that the UMAP is to compare genes, while
20 |         across columns is to compare cells, tissues or samples.
21 | 
22 |     metric : str, default='euclidean'
23 |         The distance metric to use. The distance function can be 'braycurtis',
24 |         'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice',
25 |         'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'kulsinski',
26 |         'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
27 |         'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
28 | 
29 |     min_dist: float, default=0.4
30 |         The effective minimum distance between embedded points. Smaller values
31 |         will result in a more clustered/clumped embedding where nearby points
32 |         on the manifold are drawn closer together, while larger values will
33 |         result on a more even dispersal of points. The value should be set
34 |         relative to the ``spread`` value, which determines the scale at which
35 |         embedded points will be spread out.
36 | 
37 |     n_neighbors: float, default=8
38 |         The size of local neighborhood (in terms of number of neighboring
39 |         sample points) used for manifold approximation. Larger values
40 |         result in more global views of the manifold, while smaller
41 |         values result in more local data being preserved. In general
42 |         values should be in the range 2 to 100.
43 | 
44 |     random_state : int, default=None
45 |         Seed for randomization.
46 | 
47 |     **kwargs : dict
48 |         Extra arguments for UMAP as defined in umap.UMAP.
49 | 
50 |     Returns
51 |     -------
52 |     umap_df : pandas.DataFrame
53 |         Dataframe containing the UMAP embeddings for the axis analyzed.
54 |         Contains columns 'umap1 and 'umap2'.
55 |     '''
56 |     # Organize data
57 |     if axis == 0:
58 |         df = rnaseq_data
59 |     elif axis == 1:
60 |         df = rnaseq_data.T
61 |     else:
62 |         raise ValueError("The parameter axis must be either 0 or 1.")
63 | 
64 |     # Compute distances
65 |     D = sp.distance.pdist(df, metric=metric)
66 |     D_sq = sp.distance.squareform(D)
67 | 
68 |     # Run UMAP
69 |     model = umap.UMAP(metric="precomputed",
70 |                       min_dist=min_dist,
71 |                       n_neighbors=n_neighbors,
72 |                       random_state=random_state,
73 |                       **kwargs
74 |                       )
75 | 
76 |     trans_D = model.fit_transform(D_sq)
77 | 
78 |     # Organize results
79 |     umap_df = pd.DataFrame(trans_D, columns=['umap1', 'umap2'], index=df.index)
80 |     return umap_df


--------------------------------------------------------------------------------
/cell2cell/io/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from cell2cell.io.directories import (create_directory, get_files_from_directory)
4 | from cell2cell.io.read_data import (load_cutoffs, load_go_annotations, load_go_terms, load_metadata, load_ppi,
5 |                                     load_rnaseq, load_table, load_tables_from_directory, load_variable_with_pickle,
6 |                                     load_tensor, load_tensor_factors)
7 | from cell2cell.io.save_data import (export_variable_with_pickle)
8 | 


--------------------------------------------------------------------------------
/cell2cell/io/directories.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | def create_directory(pathname):
 7 |     '''Creates a directory.
 8 | 
 9 |     Uses a path to create a directory. It creates
10 |     all intermediate folders before creating the
11 |     leaf folder.
12 | 
13 |     Parameters
14 |     ----------
15 |     pathname : str
16 |         Full path of the folder to create.
17 |     '''
18 |     if not os.path.isdir(pathname):
19 |         os.makedirs(pathname)
20 |         print("{} was created successfully.".format(pathname))
21 |     else:
22 |         print("{} already exists.".format(pathname))
23 | 
24 | 
25 | def get_files_from_directory(pathname, dir_in_filepath=False):
26 |     '''Obtains a list of filenames in a folder.
27 | 
28 |     Parameters
29 |     ----------
30 |     pathname : str
31 |         Full path of the folder to explore.
32 | 
33 |     dir_in_filepath : boolean, default=False
34 |         Whether adding `pathname` to the filenames
35 | 
36 |     Returns
37 |     -------
38 |     filenames : list
39 |         A list containing the names (strings) of the files
40 |         in the folder.
41 |     '''
42 |     directory = os.fsencode(pathname)
43 |     filenames = [pathname + '/' + os.fsdecode(file) if dir_in_filepath else os.fsdecode(file) for file in os.listdir(directory)]
44 |     return filenames
45 | 


--------------------------------------------------------------------------------
/cell2cell/io/save_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | 
 5 | import pickle
 6 | 
 7 | 
 8 | def export_variable_with_pickle(variable, filename):
 9 |     '''Exports a large size variable in a python readable way
10 |     using pickle.
11 | 
12 |     Parameters
13 |     ----------
14 |     variable : a python variable
15 |         Variable to export
16 | 
17 |     filename : str
18 |         Complete path to the file wherein the variable will be
19 |         stored. For example:
20 |         /home/user/variable.pkl
21 |     '''
22 | 
23 |     max_bytes = 2 ** 31 - 1
24 | 
25 |     bytes_out = pickle.dumps(variable)
26 |     with open(filename, 'wb') as f_out:
27 |         for idx in range(0, len(bytes_out), max_bytes):
28 |             f_out.write(bytes_out[idx:idx + max_bytes])
29 |     print(filename, ' was correctly saved.')


--------------------------------------------------------------------------------
/cell2cell/plotting/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.plotting.aesthetics import (get_colors_from_labels, map_colors_to_metadata, generate_legend)
2 | from cell2cell.plotting.ccc_plot import (clustermap_ccc)
3 | from cell2cell.plotting.cci_plot import (clustermap_cci)
4 | from cell2cell.plotting.circular_plot import (circos_plot)
5 | from cell2cell.plotting.pval_plot import (dot_plot, generate_dot_plot)
6 | from cell2cell.plotting.factor_plot import (context_boxplot, loading_clustermap, ccc_networks_plot)
7 | from cell2cell.plotting.pcoa_plot import (pcoa_3dplot)
8 | from cell2cell.plotting.tensor_plot import (tensor_factors_plot, tensor_factors_plot_from_loadings)
9 | from cell2cell.plotting.umap_plot import (umap_biplot)


--------------------------------------------------------------------------------
/cell2cell/plotting/aesthetics.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from matplotlib import pyplot as plt
  4 | from matplotlib.colors import Normalize
  5 | import matplotlib.cm as cm
  6 | import matplotlib.patches as patches
  7 | import numpy as np
  8 | 
  9 | 
 10 | def get_colors_from_labels(labels, cmap='gist_rainbow', factor=1):
 11 |     '''Generates colors for each label in a list given a colormap
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     labels : list
 16 |         A list of labels to assign a color.
 17 | 
 18 |     cmap : str, default='gist_rainbow'
 19 |         A matplotlib color palette name.
 20 | 
 21 |     factor : int, default=1
 22 |         Factor to amplify the separation of colors.
 23 | 
 24 |     Returns
 25 |     -------
 26 |     colors : dict
 27 |         A dictionary where the keys are the labels and the values
 28 |         correspond to the assigned colors.
 29 |     '''
 30 |     assert factor >= 1
 31 | 
 32 |     colors = dict.fromkeys(labels, ())
 33 | 
 34 |     factor = int(factor)
 35 |     cm_ = plt.get_cmap(cmap)
 36 | 
 37 |     is_number = all((isinstance(e, float) or isinstance(e, int)) for e in labels)
 38 | 
 39 |     if not is_number:
 40 |         NUM_COLORS = factor * len(colors)
 41 |         for i, label in enumerate(colors.keys()):
 42 |             colors[label] = cm_((1 + ((factor-1)/factor)) * i / NUM_COLORS)
 43 |     else:
 44 |         max_ = np.nanmax(labels)
 45 |         min_ = np.nanmin(labels)
 46 |         norm = Normalize(vmin=-min_, vmax=max_)
 47 | 
 48 |         m = cm.ScalarMappable(norm=norm, cmap=cmap)
 49 |         for label in colors.keys():
 50 |             colors[label] = m.to_rgba(label)
 51 |     return colors
 52 | 
 53 | 
 54 | def map_colors_to_metadata(metadata, ref_df=None, colors=None, sample_col='#SampleID', group_col='Groups',
 55 |                            cmap='gist_rainbow'):
 56 |     '''Assigns a color to elements in a dataframe containing metadata.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     metadata : pandas.DataFrame
 61 |         A dataframe with metadata for specific elements.
 62 | 
 63 |     ref_df : pandas.DataFrame
 64 |         A dataframe whose columns contains a subset of
 65 |         elements in the metadata.
 66 | 
 67 |     colors : dict, default=None
 68 |         Dictionary containing tuples in the RGBA format for indicating colors
 69 |         of major groups of cells. If colors is specified, cmap will be
 70 |         ignored.
 71 | 
 72 |     sample_col : str, default='#SampleID'
 73 |         Column in the metadata for elements to color.
 74 | 
 75 |     group_col : str, default='Groups'
 76 |         Column in the metadata containing the major groups of the elements
 77 |         to color.
 78 | 
 79 |     cmap : str, default='gist_rainbow'
 80 |         Name of the color palette for coloring the major groups of elements.
 81 | 
 82 |     Returns
 83 |     -------
 84 |     new_colors : pandas.DataFrame
 85 |         A pandas dataframe where the index is the list of elements in the
 86 |         sample_col and the column group_col contains the colors assigned
 87 |         to each element given their groups.
 88 |     '''
 89 |     if ref_df is not None:
 90 |         meta_ = metadata.set_index(sample_col).reindex(ref_df.columns)
 91 |     else:
 92 |         meta_ = metadata.set_index(sample_col)
 93 |     labels = meta_[group_col].unique().tolist()
 94 |     if colors is None:
 95 |         colors = get_colors_from_labels(labels, cmap=cmap)
 96 |     else:
 97 |         upd_dict = dict([(v, (1., 1., 1., 1.)) for v in labels if v not in colors.keys()])
 98 |         colors.update(upd_dict)
 99 | 
100 |     new_colors = meta_[group_col].map(colors)
101 |     new_colors.index = meta_.index
102 |     new_colors.name = group_col.capitalize()
103 | 
104 |     return new_colors
105 | 
106 | 
107 | def generate_legend(color_dict, loc='center left', bbox_to_anchor=(1.01, 0.5), ncol=1, fancybox=True, shadow=True,
108 |                     title='Legend', fontsize=14, sorted_labels=True, ax=None):
109 |     '''Adds a legend to a previous plot or displays an independent legend
110 |     given specific colors for labels.
111 | 
112 |     Parameters
113 |     ----------
114 |     color_dict : dict
115 |         Dictionary containing tuples in the RGBA format for indicating colors
116 |         of major groups of cells. Keys are the labels and values are the RGBA
117 |         tuples.
118 | 
119 |     loc : str, default='center left'
120 |         Alignment of the legend given the location specieid in bbox_to_anchor.
121 | 
122 |     bbox_to_anchor : tuple, default=(1.01, 0.5)
123 |         Location of the legend in a (X, Y) format. For example, if you want
124 |         your axes legend located at the figure's top right-hand corner instead
125 |         of the axes' corner, simply specify the corner's location and the
126 |         coordinate system of that location, which in this case would be (1, 1).
127 | 
128 |     ncol : int, default=1
129 |         Number of columns to display the legend.
130 | 
131 |     fancybox : boolean, default=True
132 |         Whether round edges should be enabled around the FancyBboxPatch which
133 |         makes up the legend's background.
134 | 
135 |     shadow : boolean, default=True
136 |         Whether to draw a shadow behind the legend.
137 | 
138 |     title : str, default='Legend'
139 |         Title of the legend box
140 | 
141 |     fontsize : int, default=14
142 |         Size of the text in the legends.
143 | 
144 |     sorted_labels : boolean, default=True
145 |         Whether alphabetically sorting the labels.
146 | 
147 |     fig : matplotlib.figure.Figure, default=None
148 |         Figure object to add a legend. If fig=None and ax=None, a new empty
149 |         figure will be generated.
150 | 
151 |     ax : matplotlib.axes.Axes, default=None
152 |         Axes instance for a plot.
153 | 
154 |     Returns
155 |     -------
156 |     legend1 : matplotlib.legend.Legend
157 |         A legend object in a figure.
158 |     '''
159 |     color_patches = []
160 |     if sorted_labels:
161 |         iteritems = sorted(color_dict.items())
162 |     else:
163 |         iteritems = color_dict.items()
164 |     for k, v in iteritems:
165 |         color_patches.append(patches.Patch(color=v, label=str(k).replace('_', ' ')))
166 | 
167 |     if ax is None:
168 |         legend1 = plt.legend(handles=color_patches,
169 |                              loc=loc,
170 |                              bbox_to_anchor=bbox_to_anchor,
171 |                              ncol=ncol,
172 |                              fancybox=fancybox,
173 |                              shadow=shadow,
174 |                              title=title,
175 |                              title_fontsize=fontsize,
176 |                              fontsize=fontsize)
177 |     else:
178 |         legend1 = ax.legend(handles=color_patches,
179 |                             loc=loc,
180 |                             bbox_to_anchor=bbox_to_anchor,
181 |                             ncol=ncol,
182 |                             fancybox=fancybox,
183 |                             shadow=shadow,
184 |                             title=title,
185 |                             title_fontsize=fontsize,
186 |                             fontsize=fontsize)
187 |     return legend1


--------------------------------------------------------------------------------
/cell2cell/plotting/pcoa_plot.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from matplotlib import pyplot as plt
  6 | from mpl_toolkits.mplot3d import Axes3D
  7 | 
  8 | from cell2cell.external import pcoa, _check_ordination
  9 | from cell2cell.plotting.aesthetics import get_colors_from_labels
 10 | 
 11 | 
 12 | def pcoa_3dplot(interaction_space, metadata=None, sample_col='#SampleID', group_col='Groups', pcoa_method='eigh',
 13 |                 meta_cmap='gist_rainbow', colors=None, excluded_cells=None, title='', axis_fontsize=14, legend_fontsize=12,
 14 |                 figsize=(6, 5), view_angles=(30, 135), filename=None):
 15 |     '''Projects the cells into an Euclidean space (PCoA) given their distances
 16 |     based on their CCI scores. Then, plots each cell by their first three
 17 |     coordinates in a 3D scatter plot.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     interaction_space : cell2cell.core.interaction_space.InteractionSpace
 22 |         Interaction space that contains all a distance matrix after running the
 23 |         the method compute_pairwise_cci_scores. Alternatively, this object
 24 |         can be a numpy-array or a pandas DataFrame. Also, a
 25 |         SingleCellInteractions or a BulkInteractions object after running
 26 |         the method compute_pairwise_cci_scores.
 27 | 
 28 |     metadata : pandas.Dataframe, default=None
 29 |         Metadata associated with the cells, cell types or samples in the
 30 |         matrix containing CCC scores. If None, cells will not be colored
 31 |         by major groups.
 32 | 
 33 |     sample_col : str, default='#SampleID'
 34 |         Column in the metadata for the cells, cell types or samples
 35 |         in the matrix containing CCI scores.
 36 | 
 37 |     group_col : str, default='Groups'
 38 |         Column in the metadata containing the major groups of cells, cell types
 39 |         or samples in the matrix with CCI scores.
 40 | 
 41 |     pcoa_method : str, default='eigh'
 42 |         Eigendecomposition method to use in performing PCoA.
 43 |         By default, uses SciPy's `eigh`, which computes exact
 44 |         eigenvectors and eigenvalues for all dimensions. The alternate
 45 |         method, `fsvd`, uses faster heuristic eigendecomposition but loses
 46 |         accuracy. The magnitude of accuracy lost is dependent on dataset.
 47 | 
 48 |     meta_cmap : str, default='gist_rainbow'
 49 |         Name of the color palette for coloring the major groups of cells.
 50 | 
 51 |     colors : dict, default=None
 52 |         Dictionary containing tuples in the RGBA format for indicating colors
 53 |         of major groups of cells. If colors is specified, meta_cmap will be
 54 |         ignored.
 55 | 
 56 |     excluded_cells : list, default=None
 57 |         List containing cell names that are present in the interaction_space
 58 |         object but that will be excluded from this plot.
 59 | 
 60 |     title : str, default=''
 61 |         Title of the PCoA 3D plot.
 62 | 
 63 |     axis_fontsize : int, default=14
 64 |         Size of the font for the labels of each axis (X, Y and Z).
 65 | 
 66 |     legend_fontsize : int, default=12
 67 |         Size of the font for labels in the legend.
 68 | 
 69 |     figsize : tuple, default=(6, 5)
 70 |         Size of the figure (width*height), each in inches.
 71 | 
 72 |     view_angles : tuple, default=(30, 135)
 73 |         Rotation angles of the plot. Set the elevation and
 74 |         azimuth of the axes.
 75 | 
 76 |     filename : str, default=None
 77 |         Path to save the figure of the elbow analysis. If None, the figure is not
 78 |         saved.
 79 | 
 80 |     Returns
 81 |     -------
 82 |     results : dict
 83 |         Dictionary that contains:
 84 | 
 85 |         - 'fig' : matplotlib.figure.Figure, containing the whole figure
 86 |         - 'axes' : matplotlib.axes.Axes, containing the axes of the 3D plot
 87 |         - 'ordination' : Ordination or projection obtained from the PCoA
 88 |         - 'distance_matrix' : Distance matrix used to perform the PCoA (usually in
 89 |             interaction_space.distance_matrix
 90 |     '''
 91 |     if hasattr(interaction_space, 'distance_matrix'):
 92 |         print('Interaction space detected as an InteractionSpace class')
 93 |         distance_matrix = interaction_space.distance_matrix
 94 |     elif (type(interaction_space) is np.ndarray) or (type(interaction_space) is pd.core.frame.DataFrame):
 95 |         print('Interaction space detected as a distance matrix')
 96 |         distance_matrix = interaction_space
 97 |     elif hasattr(interaction_space, 'interaction_space'):
 98 |         print('Interaction space detected as a Interactions class')
 99 |         if not hasattr(interaction_space.interaction_space, 'distance_matrix'):
100 |             raise ValueError('First run the method compute_pairwise_interactions() in your interaction' + \
101 |                              ' object to generate a distance matrix.')
102 |         else:
103 |             distance_matrix = interaction_space.interaction_space.distance_matrix
104 |     else:
105 |         raise ValueError('First run the method compute_pairwise_interactions() in your interaction' + \
106 |                          ' object to generate a distance matrix.')
107 | 
108 |     # Drop excluded cells
109 |     if excluded_cells is not None:
110 |         df = distance_matrix.loc[~distance_matrix.index.isin(excluded_cells),
111 |                                  ~distance_matrix.columns.isin(excluded_cells)]
112 |     else:
113 |         df = distance_matrix
114 | 
115 |     # PCoA
116 |     ordination = pcoa(df, method=pcoa_method)
117 |     ordination = _check_ordination(ordination)
118 |     ordination['samples'].index = df.index
119 | 
120 |     # Biplot
121 |     fig = plt.figure(figsize=figsize)
122 |     ax = fig.add_subplot(111, projection='3d')
123 |     #ax = Axes3D(fig) # Not displayed in newer versions
124 | 
125 |     if metadata is None:
126 |         metadata = pd.DataFrame()
127 |         metadata[sample_col] = list(distance_matrix.columns)
128 |         metadata[group_col] = list(distance_matrix.columns)
129 | 
130 |     meta_ = metadata.set_index(sample_col)
131 |     if excluded_cells is not None:
132 |         meta_ = meta_.loc[~meta_.index.isin(excluded_cells)]
133 |     labels = meta_[group_col].values.tolist()
134 | 
135 |     if colors is None:
136 |         colors = get_colors_from_labels(labels, cmap=meta_cmap)
137 |     else:
138 |         assert all(elem in colors.keys() for elem in set(labels))
139 | 
140 |     # Plot each data point with respective color
141 |     for i, cell_type in enumerate(sorted(meta_[group_col].unique())):
142 |         cells = list(meta_.loc[meta_[group_col] == cell_type].index)
143 |         if colors is not None:
144 |             ax.scatter(ordination['samples'].loc[cells, 'PC1'],
145 |                        ordination['samples'].loc[cells, 'PC2'],
146 |                        ordination['samples'].loc[cells, 'PC3'],
147 |                        color=colors[cell_type],
148 |                        s=50,
149 |                        edgecolors='k',
150 |                        label=cell_type)
151 |         else:
152 |             ax.scatter(ordination['samples'].loc[cells, 'PC1'],
153 |                        ordination['samples'].loc[cells, 'PC2'],
154 |                        ordination['samples'].loc[cells, 'PC3'],
155 |                        s=50,
156 |                        edgecolors='k',
157 |                        label=cell_type)
158 | 
159 |     # Plot texts
160 |     ax.set_xlabel('PC1 ({}%)'.format(np.round(ordination['proportion_explained']['PC1'] * 100), 2), fontsize=axis_fontsize)
161 |     ax.set_ylabel('PC2 ({}%)'.format(np.round(ordination['proportion_explained']['PC2'] * 100), 2), fontsize=axis_fontsize)
162 |     ax.set_zlabel('PC3 ({}%)'.format(np.round(ordination['proportion_explained']['PC3'] * 100), 2), fontsize=axis_fontsize)
163 | 
164 |     ax.set_xticklabels([])
165 |     ax.set_yticklabels([])
166 |     ax.set_zticklabels([])
167 | 
168 |     ax.view_init(view_angles[0], view_angles[1])
169 |     plt.legend(loc='center left', bbox_to_anchor=(1.35, 0.5),
170 |                ncol=2, fancybox=True, shadow=True, fontsize=legend_fontsize)
171 |     plt.title(title, fontsize=16)
172 | 
173 |     #distskbio = skbio.DistanceMatrix(df, ids=df.index) # Not using skbio for now
174 | 
175 |     # Save plot
176 |     if filename is not None:
177 |         plt.savefig(filename, dpi=300,
178 |                     bbox_inches='tight')
179 | 
180 |     results = {'fig' : fig, 'axes' : ax, 'ordination' : ordination, 'distance_matrix' : df} # df used to be distskbio
181 |     return results


--------------------------------------------------------------------------------
/cell2cell/plotting/umap_plot.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import seaborn as sns
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | def umap_biplot(umap_df, figsize=(8 ,8), ax=None, show_axes=True, show_legend=True, hue=None,
  7 |                 cmap='tab10', fontsize=20, filename=None):
  8 |     '''Plots a UMAP biplot for the UMAP embeddings.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     umap_df : pandas.DataFrame
 13 |         Dataframe containing the UMAP embeddings for the axis analyzed.
 14 |         It must contain columns 'umap1 and 'umap2'. If a hue column is
 15 |         provided in the parameter 'hue', that column must be provided
 16 |         in this dataframe.
 17 | 
 18 |     figsize : tuple, default=(8, 8)
 19 |         Size of the figure (width*height), each in inches.
 20 | 
 21 |     ax : matplotlib.axes.Axes, default=None
 22 |         The matplotlib axes containing a plot.
 23 | 
 24 |     show_axes : boolean, default=True
 25 |         Whether showing lines, ticks and ticklabels of both axes.
 26 | 
 27 |     show_legend : boolean, default=True
 28 |         Whether including the legend when a hue is provided.
 29 | 
 30 |     hue : vector or key in 'umap_df'
 31 |         Grouping variable that will produce points with different colors.
 32 |         Can be either categorical or numeric, although color mapping will
 33 |         behave differently in latter case.
 34 | 
 35 |     cmap : str, default='tab10'
 36 |         Name of the color palette for coloring elements with UMAP embeddings.
 37 | 
 38 |     fontsize : int, default=20
 39 |         Fontsize of the axis labels (UMAP1 and UMAP2).
 40 | 
 41 |     filename : str, default=None
 42 |         Path to save the figure of the elbow analysis. If None, the figure is not
 43 |         saved.
 44 | 
 45 |     Returns
 46 |     -------
 47 |         fig : matplotlib.figure.Figure
 48 |             A matplotlib Figure instance.
 49 | 
 50 |         ax : matplotlib.axes.Axes
 51 |             The matplotlib axes containing the plot.
 52 |     '''
 53 | 
 54 |     if ax is None:
 55 |         fig = plt.figure(figsize=figsize)
 56 | 
 57 |     ax = sns.scatterplot(x='umap1',
 58 |                          y='umap2',
 59 |                          data=umap_df,
 60 |                          hue=hue,
 61 |                          palette=cmap,
 62 |                          ax=ax
 63 |                          )
 64 | 
 65 |     if show_axes:
 66 |         sns.despine(ax=ax,
 67 |                     offset=15
 68 |                     )
 69 | 
 70 |         ax.tick_params(axis='both',
 71 |                        which='both',
 72 |                        colors='black',
 73 |                        width=2,
 74 |                        length=5
 75 |                        )
 76 |     else:
 77 |         ax.set_xticks([])
 78 |         ax.set_yticks([])
 79 |         for key, spine in ax.spines.items():
 80 |             spine.set_visible(False)
 81 | 
 82 | 
 83 |     for tick in ax.get_xticklabels():
 84 |         tick.set_fontproperties('arial')
 85 |         tick.set_weight("bold")
 86 |         tick.set_color("black")
 87 |         tick.set_fontsize(int(0.7*fontsize))
 88 |     for tick in ax.get_yticklabels():
 89 |         tick.set_fontproperties('arial')
 90 |         tick.set_weight("bold")
 91 |         tick.set_color("black")
 92 |         tick.set_fontsize(int(0.7*fontsize))
 93 | 
 94 |     ax.set_xlabel('UMAP 1', fontsize=fontsize)
 95 |     ax.set_ylabel('UMAP 2', fontsize=fontsize)
 96 | 
 97 |     if (show_legend) & (hue is not None):
 98 |         # Put the legend out of the figure
 99 |         legend = ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
100 |         legend.set_title(hue)
101 |         legend.get_title().set_fontsize(int(0.7*fontsize))
102 | 
103 |         for text in legend.get_texts():
104 |             text.set_fontsize(int(0.7*fontsize))
105 | 
106 |     if filename is not None:
107 |         plt.savefig(filename, dpi=300, bbox_inches='tight')
108 | 
109 |     if ax is None:
110 |         return fig, ax
111 |     else:
112 |         return ax


--------------------------------------------------------------------------------
/cell2cell/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | from cell2cell.preprocessing.cutoffs import (get_constant_cutoff, get_cutoffs, get_global_percentile_cutoffs,
 2 |                                              get_local_percentile_cutoffs)
 3 | from cell2cell.preprocessing.find_elements import (find_duplicates, get_element_abundances, get_elements_over_fraction)
 4 | from cell2cell.preprocessing.gene_ontology import (find_all_children_of_go_term, find_go_terms_from_keyword,
 5 |                                                    get_genes_from_go_hierarchy, get_genes_from_go_terms)
 6 | from cell2cell.preprocessing.integrate_data import (get_thresholded_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms,
 7 |                                                     get_ppi_dict_from_proteins, get_weighted_ppi)
 8 | from cell2cell.preprocessing.manipulate_dataframes import (check_presence_in_dataframe, shuffle_cols_in_df, shuffle_rows_in_df,
 9 |                                                            shuffle_dataframe, subsample_dataframe)
10 | from cell2cell.preprocessing.ppi import (bidirectional_ppi_for_cci, filter_ppi_by_proteins, filter_ppi_network,
11 |                                          get_all_to_all_ppi, get_filtered_ppi_network, get_one_group_to_other_ppi,
12 |                                          remove_ppi_bidirectionality, simplify_ppi, filter_complex_ppi_by_proteins,
13 |                                          get_genes_from_complexes, preprocess_ppi_data)
14 | from cell2cell.preprocessing.rnaseq import (divide_expression_by_max, divide_expression_by_mean, drop_empty_genes,
15 |                                             log10_transformation, scale_expression_by_sum, add_complexes_to_expression,
16 |                                             aggregate_single_cells)
17 | 
18 | from cell2cell.preprocessing.signal import (smooth_curve)


--------------------------------------------------------------------------------
/cell2cell/preprocessing/cutoffs.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | from cell2cell.io import read_data
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | 
 11 | def get_local_percentile_cutoffs(rnaseq_data, percentile=0.75):
 12 |     '''
 13 |     Obtains a local value associated with a given percentile across
 14 |     cells/tissues/samples for each gene in a rnaseq_data.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     rnaseq_data : pandas.DataFrame
 19 |         Gene expression data for a bulk RNA-seq experiment or a single-cell
 20 |         experiment after aggregation into cell types. Columns are
 21 |         cell-types/tissues/samples and rows are genes.
 22 | 
 23 |     percentile : float, default=0.75
 24 |         This is the percentile to be computed.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     cutoffs : pandas.DataFrame
 29 |         A dataframe containing the value corresponding to the percentile
 30 |         across the genes. Rows are genes and the column corresponds to
 31 |         'value'.
 32 |     '''
 33 |     cutoffs = rnaseq_data.quantile(percentile, axis=1).to_frame()
 34 |     cutoffs.columns = ['value']
 35 |     return cutoffs
 36 | 
 37 | 
 38 | def get_global_percentile_cutoffs(rnaseq_data, percentile=0.75):
 39 |     '''
 40 |     Obtains a global value associated with a given percentile across
 41 |     cells/tissues/samples and genes in a rnaseq_data.
 42 | 
 43 |     Parameters
 44 |     ----------
 45 |     rnaseq_data : pandas.DataFrame
 46 |         Gene expression data for a bulk RNA-seq experiment or a single-cell
 47 |         experiment after aggregation into cell types. Columns are
 48 |         cell-types/tissues/samples and rows are genes.
 49 | 
 50 |     percentile : float, default=0.75
 51 |         This is the percentile to be computed.
 52 | 
 53 |     Returns
 54 |     -------
 55 |     cutoffs : pandas.DataFrame
 56 |         A dataframe containing the value corresponding to the percentile
 57 |         across the dataset. Rows are genes and the column corresponds to
 58 |         'value'. All values here are the same global percentile.
 59 |     '''
 60 |     cutoffs = pd.DataFrame(index=rnaseq_data.index, columns=['value'])
 61 |     cutoffs['value'] = np.quantile(rnaseq_data.values, percentile)
 62 |     return cutoffs
 63 | 
 64 | 
 65 | def get_constant_cutoff(rnaseq_data, constant_cutoff=10):
 66 |     '''
 67 |     Generates a cutoff/threshold dataframe for all genes
 68 |     in rnaseq_data assigning a constant value as the cutoff.
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     rnaseq_data : pandas.DataFrame
 73 |         Gene expression data for a bulk RNA-seq experiment or a single-cell
 74 |         experiment after aggregation into cell types. Columns are
 75 |         cell-types/tissues/samples and rows are genes.
 76 | 
 77 |     constant_cutoff : float, default=10
 78 |         Cutoff or threshold assigned to each gene.
 79 | 
 80 |     Returns
 81 |     -------
 82 |     cutoffs : pandas.DataFrame
 83 |         A dataframe containing the value corresponding to cutoff or threshold
 84 |         assigned to each gene. Rows are genes and the column corresponds to
 85 |         'value'. All values are the same and corresponds to the
 86 |         constant_cutoff.
 87 |     '''
 88 |     cutoffs = pd.DataFrame(index=rnaseq_data.index)
 89 |     cutoffs['value'] = constant_cutoff
 90 |     return cutoffs
 91 | 
 92 | 
 93 | def get_cutoffs(rnaseq_data, parameters, verbose=True):
 94 |     '''
 95 |     This function creates cutoff/threshold values for genes
 96 |     in rnaseq_data and the respective cells/tissues/samples
 97 |     by a given method or parameter.
 98 | 
 99 |     Parameters
100 |     ----------
101 |     rnaseq_data : pandas.DataFrame
102 |         Gene expression data for a bulk RNA-seq experiment or a single-cell
103 |         experiment after aggregation into cell types. Columns are
104 |         cell-types/tissues/samples and rows are genes.
105 | 
106 |     parameters : dict
107 |         This dictionary must contain a 'parameter' key and a 'type' key.
108 |         The first one is the respective parameter to compute the threshold
109 |         or cutoff values. The type corresponds to the approach to
110 |         compute the values according to the parameter employed.
111 |         Options of 'type' that can be used:
112 | 
113 |         - 'local_percentile' : computes the value of a given percentile,
114 |                                for each gene independently. In this case,
115 |                                the parameter corresponds to the percentile
116 |                                to compute, as a float value between 0 and 1.
117 |         - 'global_percentile' : computes the value of a given percentile
118 |                                 from all genes and samples simultaneously.
119 |                                 In this case, the parameter corresponds to
120 |                                 the percentile to compute, as a float value
121 |                                 between 0 and 1. All genes have the same cutoff.
122 |         - 'file' : load a cutoff table from a file. Parameter in this case is
123 |                    the path of that file. It must contain the same genes as
124 |                    index and same samples as columns.
125 |         - 'multi_col_matrix' : a dataframe must be provided, containing a
126 |                                cutoff for each gene in each sample. This allows
127 |                                to use specific cutoffs for each sample. The
128 |                                columns here must be the same as the ones in the
129 |                                rnaseq_data.
130 |         - 'single_col_matrix' : a dataframe must be provided, containing a
131 |                                 cutoff for each gene in only one column. These
132 |                                 cutoffs will be applied to all samples.
133 |         - 'constant_value' : binarizes the expression. Evaluates whether
134 |                              expression is greater than the value input in
135 |                              the 'parameter'.
136 | 
137 |     verbose : boolean, default=True
138 |         Whether printing or not steps of the analysis.
139 | 
140 |     Returns
141 |     -------
142 |     cutoffs : pandas.DataFrame
143 |         Dataframe wherein rows are genes in rnaseq_data. Depending on the type in
144 |         the parameters dictionary, it may have only one column ('value') or the
145 |         same columns that rnaseq_data has, generating specfic cutoffs for each
146 |         cell/tissue/sample.
147 |     '''
148 |     parameter = parameters['parameter']
149 |     type = parameters['type']
150 |     if verbose:
151 |         print("Calculating cutoffs for gene abundances")
152 |     if type == 'local_percentile':
153 |         cutoffs = get_local_percentile_cutoffs(rnaseq_data, parameter)
154 |         cutoffs.columns = ['value']
155 |     elif type == 'global_percentile':
156 |         cutoffs = get_global_percentile_cutoffs(rnaseq_data, parameter)
157 |         cutoffs.columns = ['value']
158 |     elif type == 'constant_value':
159 |         cutoffs = get_constant_cutoff(rnaseq_data, parameter)
160 |         cutoffs.columns = ['value']
161 |     elif type == 'file':
162 |         cutoffs = read_data.load_cutoffs(parameter,
163 |                                          format='auto')
164 |         cutoffs = cutoffs.loc[rnaseq_data.index]
165 |     elif type == 'multi_col_matrix':
166 |         cutoffs = parameter
167 |         cutoffs = cutoffs.loc[rnaseq_data.index]
168 |         cutoffs = cutoffs[rnaseq_data.columns]
169 |     elif type == 'single_col_matrix':
170 |         cutoffs = parameter
171 |         cutoffs.columns = ['value']
172 |         cutoffs = cutoffs.loc[rnaseq_data.index]
173 |     else:
174 |         raise ValueError(type + ' is not a valid cutoff')
175 |     return cutoffs


--------------------------------------------------------------------------------
/cell2cell/preprocessing/find_elements.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | 
 5 | import itertools
 6 | from collections import defaultdict, Counter
 7 | 
 8 | def find_duplicates(element_list):
 9 |     '''Function based on: https://stackoverflow.com/a/5419576/12032899
10 |     Finds duplicate items and list their index location.
11 | 
12 |     Parameters
13 |     ----------
14 |     element_list : list
15 |         List of elements
16 | 
17 |     Returns
18 |     -------
19 |     duplicate_dict : dict
20 |         Dictionary with duplicate items. Keys are the items, and values
21 |         are lists with the respective indexes where they are.
22 |     '''
23 |     tally = defaultdict(list)
24 |     for i,item in enumerate(element_list):
25 |         tally[item].append(i)
26 | 
27 |     duplicate_dict = {key : locs for key,locs in tally.items()
28 |                             if len(locs)>1}
29 |     return duplicate_dict
30 | 
31 | 
32 | def get_element_abundances(element_lists):
33 |     '''Computes the fraction of occurrence of each element
34 |     in a list of lists.
35 | 
36 |     Parameters
37 |     ----------
38 |     element_lists : list
39 |         List of lists of elements. Elements will be
40 |         counted only once in each of the lists.
41 | 
42 |     Returns
43 |     -------
44 |     abundance_dict : dict
45 |         Dictionary containing the number of times that an
46 |         element was present, divided by the total number of
47 |         lists in `element_lists`.
48 |     '''
49 |     abundance_dict = Counter(itertools.chain(*map(set, element_lists)))
50 |     total = len(element_lists)
51 |     abundance_dict = {k : v/total for k, v in abundance_dict.items()}
52 |     return abundance_dict
53 | 
54 | 
55 | def get_elements_over_fraction(abundance_dict, fraction):
56 |     '''Obtains a list of elements with the
57 |     fraction of occurrence at least the threshold.
58 | 
59 |     Parameters
60 |     ----------
61 |     abundance_dict : dict
62 |         Dictionary containing the number of times that an
63 |         element was present, divided by the total number of
64 |         possible occurrences.
65 | 
66 |     fraction : float
67 |         Threshold to filter the elements. Elements with at least
68 |         this threshold will be included.
69 | 
70 |     Returns
71 |     -------
72 |     elements : list
73 |         List of elements that met the fraction criteria.
74 |     '''
75 |     elements = [k for k, v in abundance_dict.items() if v >= fraction]
76 |     return elements


--------------------------------------------------------------------------------
/cell2cell/preprocessing/gene_ontology.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import networkx
  7 | 
  8 | 
  9 | def get_genes_from_go_terms(go_annotations, go_filter, go_header='GO', gene_header='Gene', verbose=True):
 10 |     '''
 11 |     Finds genes associated with specific GO-terms.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     go_annotations : pandas.DataFrame
 16 |         Dataframe containing information about GO term annotations of each
 17 |         gene for a given organism according to the ga file. Can be loading
 18 |         with the function cell2cell.io.read_data.load_go_annotations().
 19 | 
 20 |     go_filter : list
 21 |         List containing one or more GO-terms to find associated genes.
 22 | 
 23 |     go_header : str, default='GO'
 24 |         Column name wherein GO terms are located in the dataframe.
 25 | 
 26 |     gene_header : str, default='Gene'
 27 |         Column name wherein genes are located in the dataframe.
 28 | 
 29 |     verbose : boolean, default=True
 30 |         Whether printing or not steps of the analysis.
 31 | 
 32 |     Returns
 33 |     -------
 34 |     genes : list
 35 |         List of genes that are associated with GO-terms contained in
 36 |         go_filter.
 37 |     '''
 38 |     if verbose:
 39 |         print('Filtering genes by using GO terms')
 40 |     genes = list(go_annotations.loc[go_annotations[go_header].isin(go_filter)][gene_header].unique())
 41 |     return genes
 42 | 
 43 | 
 44 | def get_genes_from_go_hierarchy(go_annotations, go_terms, go_filter, go_header='GO', gene_header='Gene', verbose=False):
 45 |     '''
 46 |     Obtains genes associated with specific GO terms and their
 47 |     children GO terms (below in the hierarchy).
 48 | 
 49 |     Parameters
 50 |     ----------
 51 |     go_annotations : pandas.DataFrame
 52 |         Dataframe containing information about GO term annotations of each
 53 |         gene for a given organism according to the ga file. Can be loading
 54 |         with the function cell2cell.io.read_data.load_go_annotations().
 55 | 
 56 |     go_terms : networkx.Graph
 57 |         NetworkX Graph containing GO terms datasets from .obo file.
 58 |         It could be loaded using
 59 |         cell2cell.io.read_data.load_go_terms(filename).
 60 | 
 61 |     go_filter : list
 62 |         List containing one or more GO-terms to find associated genes.
 63 | 
 64 |     go_header : str, default='GO'
 65 |         Column name wherein GO terms are located in the dataframe.
 66 | 
 67 |     gene_header : str, default='Gene'
 68 |         Column name wherein genes are located in the dataframe.
 69 | 
 70 |     verbose : boolean, default=False
 71 |         Whether printing or not steps of the analysis.
 72 | 
 73 |     Returns
 74 |     -------
 75 |     genes : list
 76 |         List of genes that are associated with GO-terms contained in
 77 |         go_filter, and related to the children GO terms of those terms.
 78 |     '''
 79 |     go_hierarchy = go_filter.copy()
 80 |     iter = len(go_hierarchy)
 81 |     for i in range(iter):
 82 |         find_all_children_of_go_term(go_terms, go_hierarchy[i], go_hierarchy, verbose=verbose)
 83 |     go_hierarchy = list(set(go_hierarchy))
 84 |     genes = get_genes_from_go_terms(go_annotations=go_annotations,
 85 |                                     go_filter=go_hierarchy,
 86 |                                     go_header=go_header,
 87 |                                     gene_header=gene_header,
 88 |                                     verbose=verbose)
 89 |     return genes
 90 | 
 91 | 
 92 | def find_all_children_of_go_term(go_terms, go_term_name, output_list, verbose=True):
 93 |     '''
 94 |     Finds all children GO terms (below in hierarchy) of
 95 |     a given GO term.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     go_terms : networkx.Graph
100 |         NetworkX Graph containing GO terms datasets from .obo file.
101 |         It could be loaded using
102 |         cell2cell.io.read_data.load_go_terms(filename).
103 | 
104 |     go_term_name : str
105 |         Specific GO term to find their children. For example:
106 |         'GO:0007155'.
107 | 
108 |     output_list : list
109 |         List used to perform a Depth First Search and find the
110 |         children in a recursive way. Here the children will be
111 |         automatically written.
112 | 
113 |     verbose : boolean, default=True
114 |         Whether printing or not steps of the analysis.
115 |     '''
116 |     for child in networkx.ancestors(go_terms, go_term_name):
117 |         if child not in output_list:
118 |             if verbose:
119 |                 print('Retrieving children for ' + go_term_name)
120 |             output_list.append(child)
121 |         find_all_children_of_go_term(go_terms, child, output_list, verbose)
122 | 
123 | 
124 | def find_go_terms_from_keyword(go_terms, keyword, verbose=False):
125 |     '''
126 |     Uses a keyword to find related GO terms.
127 | 
128 |     Parameters
129 |     ----------
130 |     go_terms : networkx.Graph
131 |         NetworkX Graph containing GO terms datasets from .obo file.
132 |         It could be loaded using
133 |         cell2cell.io.read_data.load_go_terms(filename).
134 | 
135 |     keyword : str
136 |         Keyword to be included in the names of retrieved GO terms.
137 | 
138 |     verbose : boolean, default=False
139 |         Whether printing or not steps of the analysis.
140 | 
141 |     Returns
142 |     -------
143 |     go_filter : list
144 |         List containing all GO terms related to a keyword.
145 |     '''
146 |     go_filter = []
147 |     for go, node in go_terms.nodes.items():
148 |         if keyword in node['name']:
149 |             go_filter.append(go)
150 |             if verbose:
151 |                 print(go, node['name'])
152 |     return go_filter


--------------------------------------------------------------------------------
/cell2cell/preprocessing/manipulate_dataframes.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import random
  6 | import numpy as np
  7 | import pandas as pd
  8 | 
  9 | 
 10 | def check_presence_in_dataframe(df, elements, columns=None):
 11 |     '''
 12 |     Searches for elements in a dataframe and returns those
 13 |     that are present in the dataframe.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     df : pandas.DataFrame
 18 |         A dataframe
 19 | 
 20 |     elements : list
 21 |         List of elements to find in the dataframe. They
 22 |         must be a data type contained in the dataframe.
 23 | 
 24 |     columns : list, default=None
 25 |         Names of columns to consider in the search. If
 26 |         None, all columns are used.
 27 | 
 28 |     Returns
 29 |     -------
 30 |     found_elements : list
 31 |         List of elements in the input list that were found
 32 |         in the dataframe.
 33 |     '''
 34 |     if columns is None:
 35 |         columns = list(df.columns)
 36 |     df_elements = pd.Series(np.unique(df[columns].values.flatten()))
 37 |     df_elements = df_elements.loc[df_elements.isin(elements)].values
 38 |     found_elements = list(df_elements)
 39 |     return found_elements
 40 | 
 41 | 
 42 | def shuffle_cols_in_df(df, columns, shuffling_number=1, random_state=None):
 43 |     '''
 44 |     Randomly shuffles specific columns in a dataframe.
 45 | 
 46 |     Parameters
 47 |     ----------
 48 |     df : pandas.DataFrame
 49 |         A dataframe.
 50 | 
 51 |     columns : list
 52 |         Names of columns to shuffle.
 53 | 
 54 |     shuffling_number : int, default=1
 55 |         Number of shuffles per column.
 56 | 
 57 |     random_state : int, default=None
 58 |         Seed for randomization.
 59 | 
 60 |     Returns
 61 |     -------
 62 |     df_ : pandas.DataFrame
 63 |         A shuffled dataframe.
 64 |     '''
 65 |     df_ = df.copy()
 66 |     if isinstance(columns, str):
 67 |         columns = [columns]
 68 | 
 69 |     for col in columns:
 70 |         for i in range(shuffling_number):
 71 |             if random_state is not None:
 72 |                 np.random.seed(random_state + i)
 73 |             df_[col] = np.random.permutation(df_[col].values)
 74 |     return df_
 75 | 
 76 | 
 77 | def shuffle_rows_in_df(df, rows, shuffling_number=1, random_state=None):
 78 |     '''
 79 |     Randomly shuffles specific rows in a dataframe.
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     df : pandas.DataFrame
 84 |         A dataframe.
 85 | 
 86 |     rows : list
 87 |         Names of rows (or indexes) to shuffle.
 88 | 
 89 |     shuffling_number : int, default=1
 90 |         Number of shuffles per row.
 91 | 
 92 |     random_state : int, default=None
 93 |         Seed for randomization.
 94 | 
 95 |     Returns
 96 |     -------
 97 |     df_.T : pandas.DataFrame
 98 |         A shuffled dataframe.
 99 |     '''
100 |     df_ = df.copy().T
101 |     if isinstance(rows, str):
102 |         rows = [rows]
103 | 
104 |     for row in rows:
105 |         for i in range(shuffling_number):
106 |             if random_state is not None:
107 |                 np.random.seed(random_state + i)
108 |             df_[row] = np.random.permutation(df_[row].values)
109 |     return df_.T
110 | 
111 | 
112 | def shuffle_dataframe(df, shuffling_number=1, axis=0, random_state=None):
113 |     '''
114 |     Randomly shuffles a whole dataframe across a given axis.
115 | 
116 |     Parameters
117 |     ----------
118 |     df : pandas.DataFrame
119 |         A dataframe.
120 | 
121 |     shuffling_number : int, default=1
122 |         Number of shuffles per column.
123 | 
124 |     axis : int, default=0
125 |         An axis of the dataframe (0 across rows, 1 across columns).
126 |         Across rows means that shuffles each column independently,
127 |         and across columns shuffles each row independently.
128 | 
129 |     random_state : int, default=None
130 |         Seed for randomization.
131 | 
132 |     Returns
133 |     -------
134 |     df_ : pandas.DataFrame
135 |         A shuffled dataframe.
136 |     '''
137 |     df_ = df.copy()
138 |     axis = int(not axis)  # pandas.DataFrame is always 2D
139 |     to_shuffle = np.rollaxis(df_.values, axis)
140 |     for _ in range(shuffling_number):
141 |         for i, view in enumerate(to_shuffle):
142 |             if random_state is not None:
143 |                 np.random.seed(random_state + i)
144 |             np.random.shuffle(view)
145 |     df_ = pd.DataFrame(np.rollaxis(to_shuffle, axis=axis), index=df_.index, columns=df_.columns)
146 |     return df_
147 | 
148 | 
149 | def subsample_dataframe(df, n_samples, random_state=None):
150 |     '''
151 |     Randomly subsamples rows of a dataframe.
152 | 
153 |     Parameters
154 |     ----------
155 |     df : pandas.DataFrame
156 |         A dataframe.
157 | 
158 |     n_samples : int
159 |         Number of samples, rows in this case. If
160 |         n_samples is larger than the number of rows,
161 |         the entire dataframe will be returned, but
162 |         shuffled.
163 | 
164 |     random_state : int, default=None
165 |         Seed for randomization.
166 | 
167 |     Returns
168 |     -------
169 |     subsampled_df : pandas.DataFrame
170 |         A subsampled and shuffled dataframe.
171 |     '''
172 |     items = list(df.index)
173 |     if n_samples > len(items):
174 |         n_samples = len(items)
175 |     if isinstance(random_state, int):
176 |         random.seed(random_state)
177 |     random.shuffle(items)
178 | 
179 |     subsampled_df = df.loc[items[:n_samples],:]
180 |     return subsampled_df
181 | 
182 | 
183 | def check_symmetry(df):
184 |     '''
185 |     Checks whether a dataframe is symmetric.
186 | 
187 |     Parameters
188 |     ----------
189 |     df : pandas.DataFrame
190 |         A dataframe.
191 | 
192 |     Returns
193 |     -------
194 |     symmetric : boolean
195 |         Whether a dataframe is symmetric.
196 |     '''
197 |     shape = df.shape
198 |     if shape[0] == shape[1]:
199 |         symmetric = (df.values.transpose() == df.values).all()
200 |     else:
201 |         symmetric = False
202 |     return symmetric
203 | 
204 | 
205 | def convert_to_distance_matrix(df):
206 |     '''
207 |     Converts a symmetric dataframe into a distance dataframe.
208 |     That is, diagonal elements are all zero.
209 | 
210 |     Parameters
211 |     ----------
212 |     df : pandas.DataFrame
213 |         A dataframe.
214 | 
215 |     Returns
216 |     -------
217 |     df_ : pandas.DataFrame
218 |         A copy of df, but with all diagonal elements with a
219 |         value of zero.
220 |     '''
221 |     if check_symmetry(df):
222 |         df_ = df.copy()
223 |         if np.trace(df_.values,) != 0.0:
224 |             raise Warning("Diagonal elements are not zero. Automatically replaced by zeros")
225 |         np.fill_diagonal(df_.values, 0.0)
226 |     else:
227 |         raise ValueError('The DataFrame is not symmetric')
228 |     return df_
229 | 


--------------------------------------------------------------------------------
/cell2cell/preprocessing/signal.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from scipy.signal import savgol_filter
 4 | 
 5 | 
 6 | def smooth_curve(values, window_length=None, polyorder=3, **kwargs):
 7 |     '''Apply a Savitzky-Golay filter to an array to smooth the curve.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     values : array-like
12 |         An array or list of values.
13 | 
14 |     window_length : int, default=None
15 |         Size of the window of values to use too smooth the curve.
16 | 
17 |     polyorder : int, default=3
18 |         The order of the polynomial used to fit the samples.
19 | 
20 |     **kwargs : dict
21 |         Extra arguments for the scipy.signal.savgol_filter function.
22 | 
23 |     Returns
24 |     -------
25 |     smooth_values : array-like
26 |         An array or list of values representing the smooth curvee.
27 |     '''
28 |     size = len(values)
29 |     if window_length is None:
30 |         window_length = int(size / min([2, size]))
31 |     if window_length % 2 == 0:
32 |         window_length += 1
33 |     assert(polyorder < window_length), "polyorder must be less than window_length."
34 |     smooth_values = savgol_filter(values, window_length, polyorder, **kwargs)
35 |     return smooth_values


--------------------------------------------------------------------------------
/cell2cell/spatial/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.spatial.distances import (celltype_pair_distance, pairwise_celltype_distances)
2 | from cell2cell.spatial.filtering import (dist_filter_liana, dist_filter_tensor)
3 | from cell2cell.spatial.neighborhoods import (create_spatial_grid, create_sliding_windows, calculate_window_size, add_sliding_window_info_to_adata)


--------------------------------------------------------------------------------
/cell2cell/spatial/distances.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import itertools
  3 | import numpy as np
  4 | import pandas as pd
  5 | from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances
  6 | 
  7 | 
  8 | def celltype_pair_distance(df1, df2, method='min', distance='euclidean'):
  9 |     '''
 10 |     Calculates the distance between two sets of data points (single cell coordinates)
 11 |     represented by df1 and df2. It supports two distance metrics: Euclidean and Manhattan
 12 |     distances. The method parameter allows you to specify how the distances between the
 13 |     two sets are aggregated.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     df1 : pandas.DataFrame
 18 |         The first set of single cell coordinates.
 19 | 
 20 |     df1 : pandas.DataFrame
 21 |         The second set of single cell coordinates.
 22 | 
 23 |     method : str, default='min'
 24 |         The aggregation method for the calculated distances. It can be one of 'min',
 25 |         'max', or 'mean'.
 26 | 
 27 |     distance : str, default='euclidean'
 28 |         The distance metric to use. It can be 'euclidean' or 'manhattan'.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     agg_dist : numpy.float
 33 |         The aggregated distance between the two sets of data points based on the specified
 34 |         method and distance metric.
 35 |     '''
 36 |     if distance == 'euclidean':
 37 |         distances = euclidean_distances(df1, df2)
 38 |     elif distance == 'manhattan':
 39 |         distances = manhattan_distances(df1, df2)
 40 |     else:
 41 |         raise NotImplementedError("{} distance is not implemented.".format(distance.capitalize()))
 42 | 
 43 |     if method == 'min':
 44 |         agg_dist = np.nanmin(distances)
 45 |     elif method == 'max':
 46 |         agg_dist = np.nanmax(distances)
 47 |     elif method == 'mean':
 48 |         agg_dist = np.nanmean(distances)
 49 |     else:
 50 |         raise NotImplementedError('Method {} is not implemented.'.format(method))
 51 |     return agg_dist
 52 | 
 53 | 
 54 | def pairwise_celltype_distances(df, group_col, coord_cols=['X', 'Y'],
 55 |                                 method='min', distance='euclidean', pairs=None):
 56 |     '''
 57 |     Calculates pairwise distances between groups of single cells. It computes an
 58 |     aggregate distance between all possible combinations of groups.
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     df : pandas.DataFrame
 63 |         A dataframe where each row is a single cell, and there are columns containing
 64 |         spatial coordinates and cell group.
 65 | 
 66 |     group_col : str
 67 |         The name of the column that defines the groups for which distances are calculated.
 68 | 
 69 |     coord_cols : list, default=None
 70 |         The list of column names that represent the coordinates of the single cells.
 71 | 
 72 |     pairs : list
 73 |         A list of specific group pairs for which distances should be calculated.
 74 |         If not provided, all possible combinations of group pairs will be considered.
 75 | 
 76 |     Returns
 77 |     -------
 78 |     distances : pandas.DataFrame
 79 |         The pairwise distances between groups based on the specified group column.
 80 |         In this dataframe rows and columns are the cell groups used to compute distances.
 81 |     '''
 82 |     # TODO: Adapt code below to receive AnnData or MuData objects
 83 |     # df_ = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y'])
 84 |     # df = adata.obs[[group_col]]
 85 |     df_ = df[coord_cols]
 86 |     groups = df[group_col].unique()
 87 |     distances = pd.DataFrame(np.zeros((len(groups), len(groups))),
 88 |                              index=groups,
 89 |                              columns=groups)
 90 | 
 91 |     if pairs is None:
 92 |         pairs = list(itertools.combinations(groups, 2))
 93 | 
 94 |     for pair in pairs:
 95 |         dist = celltype_pair_distance(df_.loc[df[group_col] == pair[0]], df_.loc[df[group_col] == pair[1]],
 96 |                                       method=method,
 97 |                                       distance=distance
 98 |                                       )
 99 |         distances.loc[pair[0], pair[1]] = dist
100 |         distances.loc[pair[1], pair[0]] = dist
101 |     return distances


--------------------------------------------------------------------------------
/cell2cell/spatial/filtering.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import numpy as np
  3 | import tensorly as tl
  4 | 
  5 | 
  6 | def dist_filter_tensor(interaction_tensor, distances, max_dist, min_dist=0, source_axis=2, target_axis=3):
  7 |     '''
  8 |     Filters an Interaction Tensor based on intercellular distances between cell types.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     interaction_tensor : cell2cell.tensor.BaseTensor
 13 |         A communication tensor generated with any of the tensor class in
 14 |         cell2cell.tensor
 15 | 
 16 |     distances : pandas.DataFrame
 17 |         Square dataframe containing distances between pairs of cell groups. It must contain
 18 |         all cell groups that act as sender and receiver cells in the tensor.
 19 | 
 20 |     max_dist : float
 21 |         The maximum distance between cell pairs to consider them in the interaction tensor.
 22 | 
 23 |     min_dist : float, default=0
 24 |         The minimum distance between cell pairs to consider them in the interaction tensor.
 25 | 
 26 |     source_axis : int, default=2
 27 |         The index indicating the axis in the tensor corresponding to sender cells.
 28 | 
 29 |     target_axis : int, default=3
 30 |         The index indicating the axis in the tensor corresponding to receiver cells.
 31 | 
 32 |     Returns
 33 |     -------
 34 |     new_interaction_tensor : cell2cell.tensor.BaseTensor
 35 |         A tensor with communication scores made zero for cell type pairs with intercellular
 36 |         distance over the distance threshold.
 37 |     '''
 38 |     # Evaluate whether we provide distances for all cell types in the tensor
 39 |     assert all([cell in distances.index for cell in
 40 |                 interaction_tensor.order_names[source_axis]]), "Distances not provided for all sender cells"
 41 |     assert all([cell in distances.columns for cell in
 42 |                 interaction_tensor.order_names[target_axis]]), "Distances not provided for all receiver cells"
 43 | 
 44 |     source_cell_groups = interaction_tensor.order_names[source_axis]
 45 |     target_cell_groups = interaction_tensor.order_names[target_axis]
 46 | 
 47 |     # Use only cell types in the tensor
 48 |     dist_df = distances.loc[source_cell_groups, target_cell_groups]
 49 | 
 50 |     # Filter cell types by intercellular distances
 51 |     dist = ((min_dist <= dist_df) & (dist_df <= max_dist)).astype(int).values
 52 | 
 53 |     # Mapping what re-arrange should be done to keep the original tensor shape
 54 |     tensor_shape = list(interaction_tensor.tensor.shape)
 55 |     original_order = list(range(len(tensor_shape)))
 56 |     new_order = []
 57 | 
 58 |     # Generate template tensor with cells to keep
 59 |     template_tensor = dist
 60 |     for i, size in enumerate(tensor_shape):
 61 |         if (i != source_axis) and (i != target_axis):
 62 |             template_tensor = [template_tensor] * size
 63 |             new_order.insert(0, i)
 64 |     template_tensor = np.array(template_tensor)
 65 | 
 66 |     new_order += [source_axis, target_axis]
 67 |     changes_needed = [new_order.index(i) for i in original_order]
 68 | 
 69 |     # Re-arrange axes by the order
 70 |     template_tensor = template_tensor.transpose(changes_needed)
 71 | 
 72 |     # Create tensorly object
 73 |     template_tensor = tl.tensor(template_tensor, **tl.context(interaction_tensor.tensor))
 74 | 
 75 |     assert template_tensor.shape == interaction_tensor.tensor.shape, "Filtering of cells was not properly done. Revise code of this function (template tensor)"
 76 | 
 77 |     # tensor = tl.zeros_like(interaction_tensor.tensor, **tl.context(tensor))
 78 |     new_interaction_tensor = interaction_tensor.copy()
 79 |     new_interaction_tensor.tensor = new_interaction_tensor.tensor * template_tensor
 80 |     # Make masked cells by distance to be real zeros
 81 |     new_interaction_tensor.loc_zeros = (new_interaction_tensor.tensor == 0).astype(int) - new_interaction_tensor.loc_nans
 82 |     return new_interaction_tensor
 83 | 
 84 | 
 85 | def dist_filter_liana(liana_outputs, distances, max_dist, min_dist=0, source_col='source', target_col='target',
 86 |                       keep_dist=False):
 87 |     '''
 88 |     Filters a dataframe with outputs from LIANA based on a distance threshold
 89 |     defined applied to another dataframe containing distances between cell groups.
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     liana_outputs : pandas.DataFrame
 94 |         Dataframe containing the results from LIANA, where rows are pairs of
 95 |         ligand-receptor interactions by pair of source-target cell groups.
 96 | 
 97 |     distances : pandas.DataFrame
 98 |         Square dataframe containing distances between pairs of cell groups.
 99 | 
100 |     max_dist : float
101 |         The distance threshold used to filter the pairs from the liana_outputs dataframe.
102 | 
103 |     min_dist : float, default=0
104 |         The minimum distance between cell pairs to consider them in the interaction tensor.
105 | 
106 |     source_col : str, default='source'
107 |         Column name in both dataframes that represents the source cell groups.
108 | 
109 |     target_col : str, default='target'
110 |          Column name in both dataframes that represents the target cell groups.
111 | 
112 |     keep_dist : bool, default=False
113 |         To determine whether to keep the 'distance' column in the filtered output.
114 |         If set to True, the 'distance' column will be retained; otherwise, it will be dropped
115 |         and the LIANA dataframe will contain the original columns.
116 | 
117 |     Returns
118 |     -------
119 |     filtered_liana_outputs : pandas.DataFrame
120 |         It containing pairs from the liana_outputs dataframe that meet the distance
121 |         threshold criteria.
122 |     '''
123 |     # Convert distances to a long-form dataframe
124 |     distances = distances.stack().reset_index()
125 |     distances.columns = [source_col, target_col, 'distance']
126 | 
127 |     # Merge the long-form distances DataFrame with pairs_df
128 |     merged_df = liana_outputs.merge(distances, on=[source_col, target_col], how='left')
129 | 
130 |     # Filter based on the distance threshold
131 |     filtered_liana_outputs = merged_df[(min_dist <= merged_df['distance']) & (merged_df['distance'] <= max_dist)]
132 | 
133 |     if keep_dist == False:
134 |         filtered_liana_outputs = filtered_liana_outputs.drop(['distance'], axis=1)
135 | 
136 |     return filtered_liana_outputs


--------------------------------------------------------------------------------
/cell2cell/spatial/neighborhoods.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | 
  6 | def create_spatial_grid(adata, num_bins, copy=False):
  7 |     """
  8 |     Segments spatial transcriptomics data into a square grid based on spatial coordinates
  9 |     and annotates each cell or spot with its corresponding grid position.
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     adata : AnnData
 14 |         The AnnData object containing spatial transcriptomics data. The spatial coordinates
 15 |         must be stored in `adata.obsm['spatial']`. This object is either modified in place
 16 |         or a copy is returned based on the `copy` parameter.
 17 | 
 18 |     num_bins : int
 19 |         The number of bins (squares) along each dimension of the grid. The grid is square,
 20 |         so this number applies to both the horizontal and vertical divisions.
 21 | 
 22 |     copy : bool, default=False
 23 |         If True, the function operates on and returns a copy of the input AnnData object.
 24 |         If False, the function modifies the input AnnData object in place.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     adata_ : AnnData or None
 29 |         If `copy=True`, a new AnnData object with added grid annotations is returned.
 30 |     """
 31 | 
 32 |     if copy:
 33 |         adata_ = adata.copy()
 34 |     else:
 35 |         adata_ = adata
 36 | 
 37 |     # Get the spatial coordinates
 38 |     coords = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y'])
 39 | 
 40 |     # Define the bins for each dimension
 41 |     x_min, y_min = coords.min()
 42 |     x_max, y_max = coords.max()
 43 |     x_bins = np.linspace(x_min, x_max, num_bins + 1)
 44 |     y_bins = np.linspace(y_min, y_max, num_bins + 1)
 45 | 
 46 |     # Digitize the coordinates into bins
 47 |     adata_.obs['grid_x'] = np.digitize(coords['X'], x_bins, right=False) - 1
 48 |     adata_.obs['grid_y'] = np.digitize(coords['Y'], y_bins, right=False) - 1
 49 | 
 50 |     # Adjust indices to start from 0 and end at num_bins - 1
 51 |     adata_.obs['grid_x'] = np.clip(adata_.obs['grid_x'], 0, num_bins - 1)
 52 |     adata_.obs['grid_y'] = np.clip(adata_.obs['grid_y'], 0, num_bins - 1)
 53 | 
 54 |     # Combine grid indices to form a grid cell identifier
 55 |     adata_.obs['grid_cell'] = adata_.obs['grid_x'].astype(str) + "_" + adata_.obs['grid_y'].astype(str)
 56 | 
 57 |     if copy:
 58 |         return adata_
 59 | 
 60 | 
 61 | def calculate_window_size(adata, num_windows):
 62 |     """
 63 |     Calculates the window size required to fit a specified number of windows
 64 |     across the width of the coordinate space in spatial transcriptomics data.
 65 | 
 66 |     Parameters
 67 |     ----------
 68 |     adata : AnnData
 69 |         The AnnData object containing spatial transcriptomics data. The spatial coordinates
 70 |         must be stored in `adata.obsm['spatial']`.
 71 | 
 72 |     num_windows : int
 73 |         The desired number of windows to fit across the width of the coordinate space.
 74 | 
 75 |     Returns
 76 |     -------
 77 |     window_size : float
 78 |         The calculated size of each window to fit the specified number of windows
 79 |         across the width of the coordinate space.
 80 |     """
 81 | 
 82 |     # Extract X coordinates
 83 |     x_coords = adata.obsm['spatial'][:, 0]
 84 | 
 85 |     # Determine the range of X coordinates
 86 |     x_min, x_max = np.min(x_coords), np.max(x_coords)
 87 | 
 88 |     # Calculate the window size
 89 |     window_size = (x_max - x_min) / num_windows
 90 | 
 91 |     return window_size
 92 | 
 93 | 
 94 | def create_sliding_windows(adata, window_size, stride):
 95 |     """
 96 |     Maps windows to the cells they contain based on spatial transcriptomics data.
 97 |     Returns a dictionary where keys are window identifiers and values are sets of cell indices.
 98 | 
 99 |     Parameters
100 |     ----------
101 |     adata : AnnData
102 |         The AnnData object containing spatial transcriptomics data. The spatial coordinates
103 |         must be stored in `adata.obsm['spatial']`.
104 | 
105 |     window_size : float
106 |         The size of each square window along each dimension.
107 | 
108 |     stride : float
109 |         The stride with which the window moves along each dimension.
110 | 
111 |     Returns
112 |     -------
113 |     window_mapping : dict
114 |         A dictionary mapping each window to a set of cell indices that fall within that window.
115 |     """
116 | 
117 |     # Get the spatial coordinates
118 |     coords = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['X', 'Y'])
119 | 
120 |     # Define the range of the sliding windows
121 |     x_min, y_min = coords.min()
122 |     x_max, y_max = coords.max()
123 |     x_windows = np.arange(x_min, x_max - window_size + stride, stride)
124 |     y_windows = np.arange(y_min, y_max - window_size + stride, stride)
125 | 
126 |     # Function to find all windows a point belongs to
127 |     def find_windows(coord, window_edges):
128 |         return [i for i, edge in enumerate(window_edges) if edge <= coord < edge + window_size]
129 | 
130 |     # Initialize the window mapping
131 |     window_mapping = {}
132 | 
133 |     # Assign cells to all overlapping windows
134 |     for cell_idx, (x, y) in enumerate(zip(coords['X'], coords['Y'])):
135 |         cell_windows = ["window_{}_{}".format(wx, wy)
136 |                         for wx in find_windows(x, x_windows)
137 |                         for wy in find_windows(y, y_windows)]
138 | 
139 |         for win in cell_windows:
140 |             if win not in window_mapping:
141 |                 window_mapping[win] = set()
142 |             window_mapping[win].add(coords.index[cell_idx])  # This stores the cell/spot barcodes
143 |             # For memory efficiency, it could be `window_mapping[win].add(cell_idx)` instead
144 | 
145 |     return window_mapping
146 | 
147 | 
148 | def add_sliding_window_info_to_adata(adata, window_mapping):
149 |     """
150 |     Adds window information to the AnnData object's .obs DataFrame. Each window is represented
151 |     as a column, and cells/spots belonging to a window are marked with a 1.0, while others are marked
152 |     with a 0.0. It modifies the `adata` object in place.
153 | 
154 |     Parameters
155 |     ----------
156 |     adata : AnnData
157 |         The AnnData object to which the window information will be added.
158 | 
159 |     window_mapping : dict
160 |         A dictionary mapping each window to a set of cell/spot indeces or barcodes.
161 |         This is the output from the `create_moving_windows` function.
162 |     """
163 | 
164 |     # Initialize all window columns to 0.0
165 |     for window in sorted(window_mapping.keys()):
166 |         adata.obs[window] = 0.0
167 | 
168 |     # Mark cells that belong to each window
169 |     for window, barcode_indeces in window_mapping.items():
170 |         adata.obs.loc[barcode_indeces, window] = 1.0


--------------------------------------------------------------------------------
/cell2cell/stats/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.stats.enrichment import (fisher_representation, hypergeom_representation)
2 | from cell2cell.stats.gini import (gini_coefficient)
3 | from cell2cell.stats.multitest import (compute_fdrcorrection_asymmetric_matrix, compute_fdrcorrection_symmetric_matrix)
4 | from cell2cell.stats.permutation import (compute_pvalue_from_dist, pvalue_from_dist, random_switching_ppi_labels,
5 |                                          run_label_permutation)
6 | 


--------------------------------------------------------------------------------
/cell2cell/stats/enrichment.py:
--------------------------------------------------------------------------------
  1 | import scipy.stats as st
  2 | 
  3 | 
  4 | def hypergeom_representation(sample_size, class_in_sample, population_size, class_in_population):
  5 |     '''
  6 |     Performs an analysis of enrichment/depletion based on observation
  7 |     in a sample. It computes a p-value given a hypergeometric
  8 |     distribution.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     sample_size : int
 13 |         Size of the sample obtained or number of elements
 14 |         obtained from the analysis.
 15 | 
 16 |     class_in_sample : int
 17 |         Number of elements of a given class that are
 18 |         contained in the sample. This is the class to be tested.
 19 | 
 20 |     population_size : int
 21 |         Size of the sampling space. That is, the total number
 22 |         of possible elements to be chosen when sampling.
 23 | 
 24 |     class_in_population : int
 25 |         Number of elements of a given class that are contained
 26 |         in the population. This is the class to be tested.
 27 | 
 28 |     Returns
 29 |     -------
 30 |     p_vals : tuple
 31 |         A tuple containing the p-values for depletion and
 32 |         enrichment analysis, respectively.
 33 |     '''
 34 |     # Computing the number of elements that are not in the same class
 35 |     nonclass_in_sample = sample_size - class_in_sample
 36 |     nonclass_in_population = population_size - class_in_population
 37 | 
 38 |     # Remaining elements in population after sampling
 39 |     rem_class = class_in_population - class_in_sample
 40 |     rem_nonclass = nonclass_in_population - nonclass_in_sample
 41 | 
 42 |     # Depletion Analysis
 43 |     depletion_hyp_p_val = st.hypergeom.cdf(class_in_sample, population_size, class_in_population, sample_size)
 44 | 
 45 |     # Enrichment Analysis
 46 |     enrichment_hyp_p_val = 1.0 - st.hypergeom.cdf(class_in_sample - 1.0, population_size, class_in_population,
 47 |                                                   sample_size)
 48 | 
 49 |     p_vals = (depletion_hyp_p_val, enrichment_hyp_p_val)
 50 |     return p_vals
 51 | 
 52 | 
 53 | def fisher_representation(sample_size, class_in_sample, population_size, class_in_population):
 54 |     '''
 55 |     Performs an analysis of enrichment/depletion based on observation
 56 |     in a sample. It computes a p-value given a fisher exact test.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     sample_size : int
 61 |         Size of the sample obtained or number of elements
 62 |         obtained from the analysis.
 63 | 
 64 |     class_in_sample : int
 65 |         Number of elements of a given class that are
 66 |         contained in the sample. This is the class to be tested.
 67 | 
 68 |     population_size : int
 69 |         Size of the sampling space. That is, the total number
 70 |         of possible elements to be chosen when sampling.
 71 | 
 72 |     class_in_population : int
 73 |         Number of elements of a given class that are contained
 74 |         in the population. This is the class to be tested.
 75 | 
 76 |     Returns
 77 |     -------
 78 |     results : dict
 79 |         A dictionary containing the odd ratios and p-values for
 80 |         depletion and enrichment analysis.
 81 |     '''
 82 |     # Computing the number of elements that are not in the same class
 83 |     nonclass_in_sample = sample_size - class_in_sample
 84 |     nonclass_in_population = population_size - class_in_population
 85 | 
 86 |     # Remaining elements in population after sampling
 87 |     rem_class = class_in_population - class_in_sample
 88 |     rem_nonclass = nonclass_in_population - nonclass_in_sample
 89 | 
 90 |     # Depletion Analysis
 91 |     depletion_odds, depletion_fisher_p_val = st.fisher_exact([[class_in_sample, rem_class],
 92 |                                                               [nonclass_in_sample, rem_nonclass]],
 93 |                                                              alternative='less')
 94 | 
 95 |     # Enrichment Analysis
 96 |     enrichment_odds, enrichment_fisher_p_val = st.fisher_exact([[class_in_sample, rem_class],
 97 |                                                                 [nonclass_in_sample, rem_nonclass]],
 98 |                                                                alternative='greater')
 99 | 
100 |     p_vals = (depletion_fisher_p_val, enrichment_fisher_p_val)
101 |     odds = (depletion_odds, enrichment_odds)
102 |     results = {'pval' : p_vals,
103 |                'odds' : odds,
104 |               }
105 |     return results


--------------------------------------------------------------------------------
/cell2cell/stats/gini.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def gini_coefficient(distribution):
 7 |     """Computes the Gini coefficient of an array of values.
 8 |     Code borrowed from:
 9 |     https://stackoverflow.com/questions/39512260/calculating-gini-coefficient-in-python-numpy
10 | 
11 |     Parameters
12 |     ----------
13 |     distribution : array-like
14 |         An array of values representing the distribution
15 |         to be evaluated.
16 | 
17 |     Returns
18 |     -------
19 |     gini : float
20 |         Gini coefficient for the evaluated distribution.
21 |     """
22 |     diffsum = 0
23 |     for i, xi in enumerate(distribution[:-1], 1):
24 |         diffsum += np.sum(np.abs(xi - distribution[i:]))
25 |     gini = diffsum / (len(distribution)**2 * np.mean(distribution))
26 |     return gini


--------------------------------------------------------------------------------
/cell2cell/stats/multitest.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from statsmodels.stats.multitest import fdrcorrection
 5 | 
 6 | 
 7 | def compute_fdrcorrection_symmetric_matrix(X, alpha=0.1):
 8 |     '''
 9 |     Computes and FDR correction or Benjamini-Hochberg procedure
10 |     on a symmetric matrix of p-values. Here, only the diagonal
11 |     and values on the upper triangle are considered to avoid
12 |     repetition with the lower triangle.
13 | 
14 |     Parameters
15 |     ----------
16 |     X : pandas.DataFrame
17 |         A symmetric dataframe of P-values.
18 | 
19 |     alpha : float, default=0.1
20 |         Error rate of the FDR correction. Must be 0 < alpha < 1.
21 | 
22 |     Returns
23 |     -------
24 |     adj_X : pandas.DataFrame
25 |         A symmetric dataframe with adjusted P-values of X.
26 |     '''
27 |     pandas = False
28 |     a = X.copy()
29 | 
30 |     if isinstance(X, pd.DataFrame):
31 |         pandas = True
32 |         a = X.values
33 |         index = X.index
34 |         columns = X.columns
35 | 
36 |     # Original data
37 |     upper_idx = np.triu_indices_from(a)
38 |     pvals = a[upper_idx]
39 | 
40 |     # New data
41 |     adj_X = np.zeros(a.shape)
42 |     rej, adj_pvals = fdrcorrection(pvals.flatten(), alpha=alpha)
43 | 
44 |     # Reorder_data
45 |     adj_X[upper_idx] = adj_pvals
46 |     adj_X = adj_X + np.triu(adj_X, 1).T
47 | 
48 |     if pandas:
49 |         adj_X = pd.DataFrame(adj_X, index=index, columns=columns)
50 |     return adj_X
51 | 
52 | 
53 | def compute_fdrcorrection_asymmetric_matrix(X, alpha=0.1):
54 |     '''
55 |     Computes and FDR correction or Benjamini-Hochberg procedure
56 |     on a asymmetric matrix of p-values. Here, the correction
57 |     is performed for every value in X.
58 | 
59 |     Parameters
60 |     ----------
61 |     X : pandas.DataFrame
62 |         An asymmetric dataframe of P-values.
63 | 
64 |     alpha : float, default=0.1
65 |         Error rate of the FDR correction. Must be 0 < alpha < 1.
66 | 
67 |     Returns
68 |     -------
69 |     adj_X : pandas.DataFrame
70 |         An asymmetric dataframe with adjusted P-values of X.
71 |     '''
72 |     pandas = False
73 |     a = X.copy()
74 | 
75 |     if isinstance(X, pd.DataFrame):
76 |         pandas = True
77 |         a = X.values
78 |         index = X.index
79 |         columns = X.columns
80 | 
81 |     # Original data
82 |     pvals = a.flatten()
83 | 
84 |     # New data
85 |     rej, adj_pvals = fdrcorrection(pvals, alpha=alpha)
86 | 
87 |     # Reorder_data
88 |     #adj_X = adj_pvals.reshape(-1, a.shape[1])
89 |     adj_X = adj_pvals.reshape(a.shape) # Allows using tensors
90 | 
91 |     if pandas:
92 |         adj_X = pd.DataFrame(adj_X, index=index, columns=columns)
93 |     return adj_X


--------------------------------------------------------------------------------
/cell2cell/tensor/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.tensor.external_scores import (dataframes_to_tensor)
2 | from cell2cell.tensor.factor_manipulation import (normalize_factors)
3 | from cell2cell.tensor.metrics import (correlation_index, pairwise_correlation_index)
4 | from cell2cell.tensor.tensor import (InteractionTensor, PreBuiltTensor, build_context_ccc_tensor, generate_tensor_metadata,
5 |                                      interactions_to_tensor)
6 | from cell2cell.tensor.tensor_manipulation import (concatenate_interaction_tensors)
7 | from cell2cell.tensor.subset import (subset_tensor, subset_metadata)
8 | 


--------------------------------------------------------------------------------
/cell2cell/tensor/factor_manipulation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def normalize_factors(factors):
 7 |     '''
 8 |     L2-normalizes the factors considering all tensor dimensions
 9 |     from a tensor decomposition result
10 | 
11 |     Parameters
12 |     ----------
13 |     factors : dict
14 |         Ordered dictionary containing a dataframe with the factor loadings for each
15 |         dimension/order of the tensor. This is the result from a tensor decomposition,
16 |         it can be found as the attribute `factors` in any tensor class derived from the
17 |         class BaseTensor (e.g. BaseTensor.factors).
18 | 
19 |     Returns
20 |     -------
21 |     norm_factors : dict
22 |         The normalized factors.
23 |     '''
24 |     norm_factors = dict()
25 |     for k, v in factors.items():
26 |         norm_factors[k] = v / np.linalg.norm(v, axis=0)
27 |     return norm_factors
28 | 
29 | 
30 | def shuffle_factors(factors, axis=0):
31 |     '''
32 |     Randomly shuffles the values of the factors in the tensor decomposition.
33 |     '''
34 |     raise NotImplementedError
35 | 


--------------------------------------------------------------------------------
/cell2cell/tensor/metrics.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | from itertools import combinations
  7 | 
  8 | # Authors: Hratch Baghdassarian <hmbaghdassarian@gmail.com>, Erick Armingol <earmingol14@gmail.com>
  9 | # similarity metrics for tensor decompositions
 10 | 
 11 | 
 12 | def correlation_index(factors_1, factors_2, tol=5e-16, method='stacked'):
 13 |     """
 14 |     CorrIndex implementation to assess tensor decomposition outputs.
 15 |     From [1] Sobhani et al 2022 (https://doi.org/10.1016/j.sigpro.2022.108457).
 16 |     Metric is scaling and column-permutation invariant, wherein each column is a factor.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     factors_1 : dict
 21 |         Ordered dictionary containing a dataframe with the factor loadings for each
 22 |         dimension/order of the tensor. This is the result from a tensor decomposition,
 23 |         it can be found as the attribute `factors` in any tensor class derived from the
 24 |         class BaseTensor (e.g. BaseTensor.factors).
 25 | 
 26 |     factors_2 : dict
 27 |         Similar to factors_1 but coming from another tensor decomposition of a tensor
 28 |         with equal shape.
 29 | 
 30 |     tol : float, default=5e-16
 31 |         Precision threshold below which to call the CorrIndex score 0.
 32 | 
 33 |     method : str, default='stacked'
 34 |         Method to obtain the CorrIndex by comparing the A matrices from two decompositions.
 35 |         Possible options are:
 36 | 
 37 |         - 'stacked' : The original method implemented in [1]. Here all A matrices from the same decomposition are
 38 |                       vertically concatenated, building a big A matrix for each decomposition.
 39 |         - 'max_score' : This computes the CorrIndex for each pair of A matrices (i.e. between A_1 in factors_1 and
 40 |                         factors_2, between A_2 in factors_1 and factors_2, and so on). Then the max score is
 41 |                         selected (the most conservative approach). In other words, it selects the max score among the
 42 |                         CorrIndexes computed dimension-wise.
 43 |         - 'min_score' : Similar to 'max_score', but the min score is selected (the least conservative approach).
 44 |         - 'avg_score' : Similar to 'max_score', but the avg score is selected.
 45 | 
 46 |     Returns
 47 |     -------
 48 |     score : float
 49 |          CorrIndex metric [0,1]; lower score indicates higher similarity between matrices
 50 |     """
 51 |     factors_1 = list(factors_1.values())
 52 |     factors_2 = list(factors_2.values())
 53 | 
 54 |     # check input factors shape
 55 |     for factors in [factors_1, factors_2]:
 56 |         if len({np.shape(A)[1]for A in factors}) != 1:
 57 |             raise ValueError('Factors should be a list of loading matrices of the same rank')
 58 | 
 59 |     # check method
 60 |     options = ['stacked', 'max_score', 'min_score', 'avg_score']
 61 |     if method not in options:
 62 |         raise ValueError("The `method` must be either option among {}".format(options))
 63 | 
 64 |     if method == 'stacked':
 65 |         # vertically stack loading matrices -- shape sum(tensor.shape)xR)
 66 |         X_1 = [np.concatenate(factors_1, 0)]
 67 |         X_2 = [np.concatenate(factors_2, 0)]
 68 |     else:
 69 |         X_1 = factors_1
 70 |         X_2 = factors_2
 71 | 
 72 |     for x1, x2 in zip(X_1, X_2):
 73 |         if np.shape(x1) != np.shape(x2):
 74 |             raise ValueError('Factor matrices should be of the same shapes')
 75 | 
 76 |     # normalize columns to L2 norm - even if ran decomposition with normalize_factors=True
 77 |     col_norm_1 = [np.linalg.norm(x1, axis=0) for x1 in X_1]
 78 |     col_norm_2 = [np.linalg.norm(x2, axis=0) for x2 in X_2]
 79 |     for cn1, cn2 in zip(col_norm_1, col_norm_2):
 80 |         if np.any(cn1 == 0) or np.any(cn2 == 0):
 81 |             raise ValueError('Column norms must be non-zero')
 82 |     X_1 = [x1 / cn1 for x1, cn1 in zip(X_1, col_norm_1)]
 83 |     X_2 = [x2 / cn2 for x2, cn2 in zip(X_2, col_norm_2)]
 84 | 
 85 |     corr_idxs = [_compute_correlation_index(x1, x2, tol=tol) for x1, x2 in zip(X_1, X_2)]
 86 | 
 87 |     if method == 'stacked':
 88 |         score = corr_idxs[0]
 89 |     elif method == 'max_score':
 90 |         score = np.max(corr_idxs)
 91 |     elif method == 'min_score':
 92 |         score = np.min(corr_idxs)
 93 |     elif method == 'avg_score':
 94 |         score = np.mean(corr_idxs)
 95 |     else:
 96 |         score = 1.0
 97 |     return score
 98 | 
 99 | 
100 | def _compute_correlation_index(x1, x2, tol=5e-16):
101 |     '''
102 |     Computes the CorrIndex from the L2-normalized A matrices.
103 | 
104 |     Parameters
105 |     ----------
106 |     x1 : list
107 |         A list containing normalized A matrix(ces) from the first tensor decomposition.
108 | 
109 |     x2 : list
110 |         A list containing normalized A matrix(ces) from the first tensor decomposition.
111 | 
112 |     tol : float, default=5e-16
113 |         Precision threshold below which to call the CorrIndex score 0, by default 5e-16
114 | 
115 |     Returns
116 |     -------
117 |     score : float
118 |          CorrIndex metric [0,1]; lower score indicates higher similarity between matrices
119 |     '''
120 |     # generate the correlation index input
121 |     c_prod_mtx = np.abs(np.matmul(np.conj(np.transpose(np.asarray(x1))), np.asarray(x2)))
122 | 
123 |     # correlation index scoring
124 |     n_elements = np.shape(c_prod_mtx)[1] + np.shape(c_prod_mtx)[0]
125 |     score = (1 / (n_elements)) * (np.sum(np.abs(np.max(c_prod_mtx, 1) - 1)) + np.sum(np.abs(np.max(c_prod_mtx, 0) - 1)))
126 |     if score < tol:
127 |         score = 0
128 |     return score
129 | 
130 | 
131 | def pairwise_correlation_index(factors, tol=5e-16, method='stacked'):
132 |     '''
133 |     Computes the CorrIndex between all pairs of factors
134 | 
135 |     Parameters
136 |     ----------
137 |     factors : list
138 |         List with multiple Ordered dictionaries, each containing a dataframe with
139 |         the factor loadings for each dimension/order of the tensor. This is the
140 |         result from a tensor decomposition, it can be found as the attribute
141 |         `factors` in any tensor class derived from the class BaseTensor
142 |         (e.g. BaseTensor.factors).
143 | 
144 |     tol : float, default=5e-16
145 |         Precision threshold below which to call the CorrIndex score 0.
146 | 
147 |     method : str, default='stacked'
148 |         Method to obtain the CorrIndex by comparing the A matrices from two decompositions.
149 |         Possible options are:
150 | 
151 |         - 'stacked' : The original method implemented in [1]. Here all A matrices from the same decomposition are
152 |                       vertically concatenated, building a big A matrix for each decomposition.
153 |         - 'max_score' : This computes the CorrIndex for each pair of A matrices (i.e. between A_1 in factors_1 and
154 |                         factors_2, between A_2 in factors_1 and factors_2, and so on). Then the max score is
155 |                         selected (the most conservative approach). In other words, it selects the max score among the
156 |                         CorrIndexes computed dimension-wise.
157 |         - 'min_score' : Similar to 'max_score', but the min score is selected (the least conservative approach).
158 |         - 'avg_score' : Similar to 'max_score', but the avg score is selected.
159 | 
160 |     Returns
161 |     -------
162 |     scores : pd.DataFrame
163 |          Dataframe with CorrIndex metric for each pair of decompositions.
164 |          This metric bounds are [0,1]; lower score indicates higher similarity between matrices
165 |     '''
166 |     N = len(factors)
167 |     idxs = list(range(N))
168 |     pairs = list(combinations(idxs, 2))
169 |     scores = pd.DataFrame(np.zeros((N, N)),index=idxs, columns=idxs)
170 |     for p1, p2 in pairs:
171 |         corrindex = correlation_index(factors_1=factors[p1],
172 |                                       factors_2=factors[p2],
173 |                                       tol=tol,
174 |                                       method=method
175 |                                       )
176 | 
177 |         scores.at[p1, p2] = corrindex
178 |         scores.at[p2, p1] = corrindex
179 |     return scores
180 | 


--------------------------------------------------------------------------------
/cell2cell/tensor/subset.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import copy
  3 | 
  4 | import numpy as np
  5 | import tensorly as tl
  6 | 
  7 | from cell2cell.preprocessing.find_elements import find_duplicates
  8 | 
  9 | def find_element_indexes(interaction_tensor, elements, axis=0, remove_duplicates=True, keep='first', original_order=False):
 10 |     '''Finds the location/indexes of a list of elements in one of the
 11 |     axis of an InteractionTensor.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     interaction_tensor : cell2cell.tensor.BaseTensor
 16 |         A communication tensor generated with any of the tensor class in
 17 |         cell2cell.tensor
 18 | 
 19 |     elements : list
 20 |         A list of names for the elements to find in one of the axis.
 21 | 
 22 |     axis : int, default=0
 23 |         An axis of the interaction_tensor, representing one of
 24 |         its dimensions.
 25 | 
 26 |     remove_duplicates : boolean, default=True
 27 |         Whether removing duplicated names in `elements`.
 28 | 
 29 |     keep : str, default='first'
 30 |         Determines which duplicates (if any) to keep.
 31 |         Options are:
 32 | 
 33 |         - first : Drop duplicates except for the first occurrence.
 34 |         - last : Drop duplicates except for the last occurrence.
 35 |         - False : Drop all duplicates.
 36 | 
 37 |     original_order : boolean, default=False
 38 |         Whether keeping the original order of the elements in
 39 |         interaction_tensor.order_names[axis] or keeping the
 40 |         new order as indicated in `elements`.
 41 | 
 42 |     Returns
 43 |     -------
 44 |     indexes : list
 45 |         List of indexes for the elements that where found in the
 46 |         axis indicated of the interaction_tensor.
 47 |     '''
 48 |     assert axis < len \
 49 |         (interaction_tensor.tensor.shape), "List index out of range. 'axis' must be one of the axis in the tensor."
 50 |     assert axis < len \
 51 |         (interaction_tensor.order_names), "List index out of range. interaction_tensor.order_names must have element names for each axis of the tensor."
 52 | 
 53 |     elements = sorted(set(elements), key=list(elements).index)
 54 | 
 55 |     if original_order:
 56 |         # Avoids error for considering elements not in the tensor
 57 |         elements = set(elements).intersection(set(interaction_tensor.order_names[axis]))
 58 |         elements = sorted(elements, key=interaction_tensor.order_names[axis].index)
 59 | 
 60 | 
 61 |     # Find duplicates if we are removing them
 62 |     to_exclude = []
 63 |     if remove_duplicates:
 64 |         dup_dict = find_duplicates(interaction_tensor.order_names[axis])
 65 | 
 66 |         if len(dup_dict) > 0:  # Only if we have duplicate items
 67 |             if keep == 'first':
 68 |                 for k, v in dup_dict.items():
 69 |                     to_exclude.extend(v[1:])
 70 |             elif keep == 'last':
 71 |                 for k, v in dup_dict.items():
 72 |                     to_exclude.extend(v[:-1])
 73 |             elif not keep:
 74 |                 for k, v in dup_dict.items():
 75 |                     to_exclude.extend(v)
 76 |             else:
 77 |                 raise ValueError("Not a valid option was selected for the parameter `keep`")
 78 | 
 79 |     # Find indexes in the tensor
 80 |     indexes = sum \
 81 |         ([np.where(np.asarray(interaction_tensor.order_names[axis]) == element)[0].tolist() for element in elements], [])
 82 | 
 83 |     # Exclude duplicates if any to exclude
 84 |     indexes = [idx for idx in indexes if idx not in to_exclude]
 85 |     return indexes
 86 | 
 87 | 
 88 | def subset_tensor(interaction_tensor, subset_dict, remove_duplicates=True, keep='first', original_order=False):
 89 |     '''Subsets an InteractionTensor to contain only specific elements in
 90 |     respective dimensions.
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     interaction_tensor : cell2cell.tensor.BaseTensor
 95 |         A communication tensor generated with any of the tensor class in
 96 |         cell2cell.tensor
 97 | 
 98 |     subset_dict : dict
 99 |         Dictionary to subset the tensor. It must contain the axes or
100 |         dimensions that will be subset as the keys of the dictionary
101 |         and the values corresponds to lists of element names for the
102 |         respective axes or dimensions. Those axes that are not present
103 |         in this dictionary will not be subset.
104 |         E.g. {0 : ['Context 1', 'Context2'], 1: ['LR 10', 'LR 100']}
105 | 
106 |     remove_duplicates : boolean, default=True
107 |         Whether removing duplicated names in `elements`.
108 | 
109 |     keep : str, default='first'
110 |         Determines which duplicates (if any) to keep.
111 |         Options are:
112 | 
113 |         - first : Drop duplicates except for the first occurrence.
114 |         - last : Drop duplicates except for the last occurrence.
115 |         - False : Drop all duplicates.
116 | 
117 |     original_order : boolean, default=False
118 |         Whether keeping the original order of the elements in
119 |         interaction_tensor.order_names or keeping the
120 |         new order as indicated in the lists in the `subset_dict`.
121 | 
122 |     Returns
123 |     -------
124 |     subset_tensor : cell2cell.tensor.BaseTensor
125 |         A copy of interaction_tensor that was subset to contain
126 |         only the elements specified for the respective axis in the
127 |         `subset_dict`. Corresponds to a communication tensor
128 |         generated with any of the tensor class in cell2cell.tensor
129 |     '''
130 |     # Perform a deep copy of the original tensor and reset previous factorization
131 |     subset_tensor = copy.deepcopy(interaction_tensor)
132 |     subset_tensor.rank = None
133 |     subset_tensor.tl_object = None
134 |     subset_tensor.factors = None
135 | 
136 |     # Initialize tensor into a numpy object for performing subset
137 |     context = tl.context(subset_tensor.tensor)
138 |     tensor = tl.to_numpy(subset_tensor.tensor)
139 |     mask = None
140 |     if subset_tensor.mask is not None:
141 |         mask = tl.to_numpy(subset_tensor.mask)
142 | 
143 |     # Search for indexes
144 |     axis_idxs = dict()
145 |     for k, v in subset_dict.items():
146 |         if k < len(tensor.shape):
147 |             if len(v) != 0:
148 |                 idx = find_element_indexes(interaction_tensor=subset_tensor,
149 |                                            elements=v,
150 |                                            axis=k,
151 |                                            remove_duplicates=remove_duplicates,
152 |                                            keep=keep,
153 |                                            original_order=original_order
154 |                                            )
155 |                 if len(idx) == 0:
156 |                     print("No elements found for axis {}. It will return an empty tensor.".format(k))
157 |                 axis_idxs[k] = idx
158 |         else:
159 |             print("Axis {} is out of index, not considering elements in this axis.".format(k))
160 | 
161 |     # Subset tensor
162 |     for k, v in axis_idxs.items():
163 |         if tensor.shape != (0,):  # Avoids error when returned empty tensor
164 |             tensor = tensor.take(indices=v,
165 |                                  axis=k
166 |                                  )
167 | 
168 |             subset_tensor.order_names[k] = [subset_tensor.order_names[k][i] for i in v]
169 |             if mask is not None:
170 |                 mask = mask.take(indices=v,
171 |                                  axis=k
172 |                                  )
173 | 
174 |     # Restore tensor and mask properties
175 |     tensor = tl.tensor(tensor, **context)
176 |     if mask is not None:
177 |         mask = tl.tensor(mask, **context)
178 | 
179 |     subset_tensor.tensor = tensor
180 |     subset_tensor.mask = mask
181 |     return subset_tensor
182 | 
183 | 
184 | def subset_metadata(tensor_metadata, interaction_tensor, sample_col='Element'):
185 |     '''Subsets the metadata of an InteractionTensor to contain only
186 |     elements in a reference InteractionTensor (interaction_tensor).
187 | 
188 |     Parameters
189 |     ----------
190 |     tensor_metadata : list
191 |         List of pandas dataframes with metadata information for elements of each
192 |         dimension in the tensor. A column called as the variable `sample_col` contains
193 |         the name of each element in the tensor while another column called as the
194 |         variable `group_col` contains the metadata or grouping information of each
195 |         element.
196 | 
197 |     interaction_tensor : cell2cell.tensor.BaseTensor
198 |         A communication tensor generated with any of the tensor class in
199 |         cell2cell.tensor. This tensor is used as reference to subset the metadata.
200 |         The subset metadata will contain only elements that are present in this
201 |         tensor, so if metadata was originally built for another tensor, the elements
202 |         that are exclusive for that original tensor will be excluded.
203 | 
204 |     sample_col : str, default='Element'
205 |         Name of the column containing the element names in the metadata.
206 | 
207 |     Returns
208 |     -------
209 |     subset_metadata : list
210 |         List of pandas dataframes with metadata information for elements contained
211 |         in `interaction_tensor.order_names`. It is a subset of `tensor_metadata`.
212 |     '''
213 |     subset_metadata = []
214 |     for i, meta in enumerate(tensor_metadata):
215 |         if meta is not None:
216 |             tmp_meta = meta.set_index(sample_col)
217 |             tmp_meta = tmp_meta.loc[interaction_tensor.order_names[i], :]
218 |             tmp_meta = tmp_meta.reset_index()
219 |             subset_metadata.append(tmp_meta)
220 |         else:
221 |             subset_metadata.append(None)
222 |     return subset_metadata


--------------------------------------------------------------------------------
/cell2cell/tensor/tensor_manipulation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import tensorly as tl
  4 | 
  5 | from cell2cell.tensor.tensor import PreBuiltTensor
  6 | from cell2cell.tensor.subset import subset_tensor
  7 | 
  8 | 
  9 | def concatenate_interaction_tensors(interaction_tensors, axis, order_labels, remove_duplicates=False, keep='first',
 10 |                                     mask=None, device=None):
 11 |     '''Concatenates interaction tensors in a given tensor dimension or axis.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     interaction_tensors : list
 16 |         List of any tensor class in cell2cell.tensor.
 17 | 
 18 |     axis : int
 19 |         The axis along which the arrays will be joined. If axis is None, arrays are flattened before use.
 20 | 
 21 |     order_labels : list
 22 |         List of labels for dimensions or orders in the tensor.
 23 | 
 24 |     remove_duplicates : boolean, default=False
 25 |         Whether removing duplicated names in the concatenated axis.
 26 | 
 27 |     keep : str, default='first'
 28 |         Determines which duplicates (if any) to keep.
 29 |         Options are:
 30 | 
 31 |         - first : Drop duplicates except for the first occurrence.
 32 |         - last : Drop duplicates except for the last occurrence.
 33 |         - False : Drop all duplicates.
 34 | 
 35 |     mask : ndarray list
 36 |         Helps avoiding missing values during a tensor factorization. A mask should be
 37 |         a boolean array of the same shape as the original tensor and should be 0
 38 |         where the values are missing and 1 everywhere else. This must be of equal shape
 39 |         as the concatenated tensor.
 40 | 
 41 |     device : str, default=None
 42 |         Device to use when backend is pytorch. Options are:
 43 |          {'cpu', 'cuda', None}
 44 | 
 45 |     Returns
 46 |     -------
 47 |     concatenated_tensor : cell2cell.tensor.PreBuiltTensor
 48 |         Final tensor after concatenation. It is a PreBuiltTensor that works
 49 |         any interaction tensor based on the class BaseTensor.
 50 |     '''
 51 |     # Assert if all other dimensions contains the same elements:
 52 |     shape = len(interaction_tensors[0].tensor.shape)
 53 |     assert all(shape == len(tensor.tensor.shape) for tensor in interaction_tensors[1:]), "Tensors must have same number of dimensions"
 54 | 
 55 |     for i in range(shape):
 56 |         if i != axis:
 57 |             elements = interaction_tensors[0].order_names[i]
 58 |             for tensor in interaction_tensors[1:]:
 59 |                 assert elements == tensor.order_names[i], "Tensors must have the same elements in the other axes."
 60 | 
 61 |     # Initialize tensors into a numpy object for performing subset
 62 |     # Use the same context as first tensor for everything
 63 |     try:
 64 |         context = tl.context(interaction_tensors[0].tensor)
 65 |     except:
 66 |         context = {'dtype': interaction_tensors[0].tensor.dtype, 'device' : None}
 67 | 
 68 |     # Concatenate tensors
 69 |     concat_tensor = tl.concatenate([tensor.tensor.to('cpu') for tensor in interaction_tensors], axis=axis)
 70 |     if mask is not None:
 71 |         assert mask.shape == concat_tensor.shape, "Mask must have the same shape of the concatenated tensor. Here: {}".format(concat_tensor.shape)
 72 |     else: # Generate a new mask from all previous masks if all are not None
 73 |         if all([tensor.mask is not None for tensor in interaction_tensors]):
 74 |             mask = tl.concatenate([tensor.mask.to('cpu') for tensor in interaction_tensors], axis=axis)
 75 |         else:
 76 |             mask = None
 77 | 
 78 |     concat_tensor = tl.tensor(concat_tensor, device=context['device'])
 79 |     if mask is not None:
 80 |         mask = tl.tensor(mask, device=context['device'])
 81 | 
 82 |     # Concatenate names of elements for the given axis but keep the others as in one tensor
 83 |     order_names = []
 84 |     for i in range(shape):
 85 |         tmp_names = []
 86 |         if i == axis:
 87 |             for tensor in interaction_tensors:
 88 |                 tmp_names += tensor.order_names[i]
 89 |         else:
 90 |             tmp_names = interaction_tensors[0].order_names[i]
 91 |         order_names.append(tmp_names)
 92 | 
 93 |     # Generate final object
 94 |     concatenated_tensor = PreBuiltTensor(tensor=concat_tensor,
 95 |                                          order_names=order_names,
 96 |                                          order_labels=order_labels,
 97 |                                          mask=mask,  # Change if you want to omit values in the decomposition
 98 |                                          device=device
 99 |                                         )
100 | 
101 |     # Remove duplicates
102 |     if remove_duplicates:
103 |         concatenated_tensor = subset_tensor(interaction_tensor=concatenated_tensor,
104 |                                             subset_dict={axis: order_names[axis]},
105 |                                             remove_duplicates=remove_duplicates,
106 |                                             keep=keep,
107 |                                             original_order=False)
108 |     return concatenated_tensor


--------------------------------------------------------------------------------
/cell2cell/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from cell2cell.utils.networks import (generate_network_from_adjacency, export_network_to_gephi)
2 | from cell2cell.utils.parallel_computing import (agents_number)
3 | 


--------------------------------------------------------------------------------
/cell2cell/utils/networks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import networkx as nx
  6 | 
  7 | 
  8 | def generate_network_from_adjacency(adjacency_matrix, package='networkx'):
  9 |     '''
 10 |     Generates a network or graph object from an adjacency matrix.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     adjacency_matrix : pandas.DataFrame
 15 |         An adjacency matrix, where in rows and columns are nodes
 16 |         and values represents a weight for the respective edge.
 17 | 
 18 |     package : str, default='networkx'
 19 |         Package or python library to built the network.
 20 |         Implemented optios are {'networkx'}. Soon will be
 21 |         available for 'igraph'.
 22 | 
 23 |     Returns
 24 |     -------
 25 |     network : graph-like
 26 |         A graph object built with a python-library for networks.
 27 |     '''
 28 |     if package == 'networkx':
 29 |         network = nx.from_pandas_adjacency(adjacency_matrix)
 30 |     elif package == 'igraph':
 31 |         # A = adjacency_matrix.values
 32 |         # network = igraph.Graph.Weighted_Adjacency((A > 0).tolist(), mode=igraph.ADJ_UNDIRECTED)
 33 |         #
 34 |         # # Add edge weights and node labels.
 35 |         # network.es['weight'] = A[A.nonzero()]
 36 |         # network.vs['label'] = list(adjacency_matrix.columns)
 37 |         #
 38 |         # Warning("iGraph functionalities are not completely implemented yet.")
 39 |         raise NotImplementedError("Network using package {} not implemented".format(package))
 40 |     else:
 41 |         raise NotImplementedError("Network using package {} not implemented".format(package))
 42 |     return network
 43 | 
 44 | 
 45 | def export_network_to_gephi(network, filename, format='excel', network_type='Undirected'):
 46 |     '''
 47 |     Exports a network into a spreadsheet that is readable
 48 |     by the software Gephi.
 49 | 
 50 |     Parameters
 51 |     ----------
 52 |     network : networkx.Graph, networkx.DiGraph or a pandas.DataFrame
 53 |         A networkx Graph or Directed Graph, or an adjacency matrix,
 54 |         where in rows and columns are nodes and values represents a
 55 |         weight for the respective edge.
 56 | 
 57 |     filename : str, default=None
 58 |         Path to save the network into a Gephi-readable format.
 59 | 
 60 |     format : str, default='excel'
 61 |         Format to export the spreadsheet. Options are:
 62 | 
 63 |         - 'excel' : An excel file, either .xls or .xlsx
 64 |         - 'csv' : Comma separated value format
 65 |         - 'tsv' : Tab separated value format
 66 | 
 67 |     network_type : str, default='Undirected'
 68 |         Type of edges in the network. They could be either
 69 |         'Undirected' or 'Directed'.
 70 |     '''
 71 |     # This allows to pass a network directly or an adjacency matrix
 72 |     if type(network) != nx.classes.graph.Graph:
 73 |         network = generate_network_from_adjacency(network,
 74 |                                                   package='networkx')
 75 | 
 76 |     gephi_df = nx.to_pandas_edgelist(network)
 77 |     gephi_df = gephi_df.assign(Type=network_type)
 78 |     # When weight is not in the network
 79 |     if ('weight' not in gephi_df.columns):
 80 |         gephi_df = gephi_df.assign(weight=1)
 81 | 
 82 |     # Transform column names
 83 |     gephi_df = gephi_df[['source', 'target', 'Type', 'weight']]
 84 |     gephi_df.columns = [c.capitalize() for c in gephi_df.columns]
 85 | 
 86 |     # Save with different formats
 87 |     if format == 'excel':
 88 |         gephi_df.to_excel(filename, sheet_name='Edges', index=False)
 89 |     elif format == 'csv':
 90 |         gephi_df.to_csv(filename, sep=',', index=False)
 91 |     elif format == 'tsv':
 92 |         gephi_df.to_csv(filename, sep='\t', index=False)
 93 |     else:
 94 |         raise ValueError("Format not supported.")
 95 | 
 96 | 
 97 | def export_network_to_cytoscape(network, filename):
 98 |     '''
 99 |     Exports a network into a spreadsheet that is readable
100 |     by the software Gephi.
101 | 
102 |     Parameters
103 |     ----------
104 |     network : networkx.Graph, networkx.DiGraph or a pandas.DataFrame
105 |         A networkx Graph or Directed Graph, or an adjacency matrix,
106 |         where in rows and columns are nodes and values represents a
107 |         weight for the respective edge.
108 | 
109 |     filename : str, default=None
110 |         Path to save the network into a Cytoscape-readable format
111 |         (JSON file in this case). E.g. '/home/user/network.json'
112 |     '''
113 |     # This allows to pass a network directly or an adjacency matrix
114 |     if type(network) != nx.classes.graph.Graph:
115 |         network = generate_network_from_adjacency(network,
116 |                                                   package='networkx')
117 | 
118 |     data = nx.readwrite.json_graph.cytoscape.cytoscape_data(network)
119 | 
120 |     # Export
121 |     import json
122 |     json_str = json.dumps(data)
123 |     with open(filename, 'w') as outfile:
124 |         outfile.write(json_str)


--------------------------------------------------------------------------------
/cell2cell/utils/parallel_computing.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | 
 5 | from multiprocessing import cpu_count
 6 | 
 7 | 
 8 | # GENERAL
 9 | def agents_number(n_jobs):
10 |     '''
11 |     Computes the number of agents/cores/threads that the
12 |     computer can really provide given a number of
13 |     jobs/threads requested.
14 | 
15 |     Parameters
16 |     ----------
17 |     n_jobs : int
18 |         Number of threads for parallelization.
19 | 
20 |     Returns
21 |     -------
22 |     agents : int
23 |         Number of threads that the computer can really provide.
24 |     '''
25 |     if n_jobs < 0:
26 |         agents = cpu_count() + 1 + n_jobs
27 |         if agents < 0:
28 |             agents = 1
29 |     elif n_jobs > cpu_count():
30 |         agents = cpu_count()
31 | 
32 |     elif n_jobs == 0:
33 |         agents = 1
34 |     else:
35 |         agents = n_jobs
36 |     return agents
37 | 
38 | 
39 | # CORE FUNCTIONS
40 | def parallel_spatial_ccis(inputs):
41 |     '''
42 |     Parallel computing in cell2cell2.analysis.pipelines.SpatialSingleCellInteractions
43 |     '''
44 |     # TODO: Implement this for enabling spatial analysis and compute interactions in parallel
45 | 
46 |     # from cell2cell.core import spatial_operation
47 |     #results = spatial_operation()
48 | 
49 |     # return results
50 |     pass


--------------------------------------------------------------------------------
/docs/documentation.md:
--------------------------------------------------------------------------------
 1 | # Documentation for *cell2cell*
 2 | 
 3 | This documentation is for our *cell2cell* suite, which includes the [regular cell2cell](https://doi.org/10.1371/journal.pcbi.1010715)
 4 | and [Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2) tools. The former is for inferring cell-cell interactions
 5 | and communication in one sample or context, while the latter is for deconvolving complex patterns
 6 | of cell-cell communication across multiple samples or contexts simultaneously into interpretable factors
 7 | representing patterns of communication.
 8 | 
 9 | Here, multiple classes and functions are implemented to facilitate the analyses, including a variety of
10 | visualizations to simplify the interpretation of results:
11 | 
12 | - **cell2cell.analysis** : Includes simplified pipelines for running the analyses, and functions for downstream analyses of Tensor-cell2cell
13 | - **cell2cell.clustering** : Includes multiple scipy-based functions for performing clustering methods.
14 | - **cell2cell.core** : Includes the core functions for inferring cell-cell interactions and communication. It includes scoring methods, cell classes, and interaction spaces.
15 | - **cell2cell.datasets** : Includes toy datasets and annotations for testing functions in basic scenarios.
16 | - **cell2cell.external** : Includes built-in approaches borrowed from other tools to avoid incompatibilities (e.g. UMAP, tensorly, and PCoA).
17 | - **cell2cell.io** : Includes functions for opening and saving diverse types of files.
18 | - **cell2cell.plotting** : Includes all the visualization options that *cell2cell* offers.
19 | - **cell2cell.preprocessing** : Includes functions for manipulating data and variables (e.g. data preprocessing, integration, permutation, among others).
20 | - **cell2cell.spatial** : Includes filtering of cell-cell interactions results given intercellular distance, as well as defining neighborhoods by grids or moving windows.
21 | - **cell2cell.stats** : Includes statistical analyses such as enrichment analysis, multiple test correction methods, permutation approaches, and Gini coefficient.
22 | - **cell2cell.tensor** : Includes all functions pertinent to the analysis of *Tensor-cell2cell*
23 | - **cell2cell.utils** : Includes general utilities for analyzing networks and performing parallel computing.
24 | 
25 | 
26 | Below, all the inputs, parameters (including their different options), and outputs are detailed. Source code of the functions is also included.
27 | 
28 | 
29 | ::: cell2cell
30 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Inferring cell-cell interactions from transcriptomes with *cell2cell*
 2 | [![PyPI Version][pb]][pypi]
 3 | [![Documentation Status](https://readthedocs.org/projects/cell2cell/badge/?version=latest)](https://cell2cell.readthedocs.io/en/latest/?badge=latest)
 4 | [![Downloads](https://pepy.tech/badge/cell2cell/month)](https://pepy.tech/project/cell2cell)
 5 | 
 6 | 
 7 | [pb]: https://badge.fury.io/py/cell2cell.svg
 8 | [pypi]: https://pypi.org/project/cell2cell/
 9 | 
10 | ## Getting started
11 | For tutorials and documentation, visit [**cell2cell ReadTheDocs**](https://cell2cell.readthedocs.org/) or our [**cell2cell website**](https://earmingol.github.io/cell2cell).
12 | 
13 | 
14 | 
15 | ## Installation
16 | 
17 | <b>Step 1: Install Anaconda</b>
18 |   
19 | First, [install Anaconda following this tutorial](https://docs.anaconda.com/anaconda/install/).
20 | 
21 | 
22 | <b>Step 2: Create and Activate a New Conda Environment</b>
23 | 
24 | ```
25 | # Create a new conda environment
26 | conda create -n cell2cell -y python=3.7 jupyter
27 | 
28 | # Activate the environment
29 | conda activate cell2cell
30 | ```
31 | 
32 | 
33 | <b>Step 3: Install cell2cell</b>
34 | 
35 | ```
36 | pip install cell2cell
37 | ```
38 | 
39 | 
40 | ## Examples
41 | 
42 | | cell2cell Examples | Tensor-cell2cell Examples |
43 | | --- | --- |
44 | | ![cell2cell Logo](https://github.com/earmingol/cell2cell/blob/master/Logo.png?raw=true) | ![Tensor-cell2cell Logo](https://github.com/earmingol/cell2cell/blob/master/LogoTensor.png?raw=true) |
45 | | - [Step-by-step Pipeline](https://github.com/earmingol/cell2cell/blob/master/examples/cell2cell/Toy-Example.ipynb)<br>- [Interaction Pipeline for Bulk Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-BulkPipeline)<br>- [Interaction Pipeline for Single-Cell Data](https://earmingol.github.io/cell2cell/tutorials/Toy-Example-SingleCellPipeline)<br>- [Whole Body of *C. elegans*](https://github.com/LewisLabUCSD/Celegans-cell2cell) | - [Obtaining patterns of cell-cell communication](https://earmingol.github.io/cell2cell/tutorials/ASD/01-Tensor-Factorization-ASD/)<br>- [Downstream 1: Factor-specific analyses](https://earmingol.github.io/cell2cell/tutorials/ASD/02-Factor-Specific-ASD/)<br>- [Downstream 2: Patterns to functions (GSEA)](https://earmingol.github.io/cell2cell/tutorials/ASD/03-GSEA-ASD/)<br>- [Tensor-cell2cell in Google Colab (**GPU**)](https://colab.research.google.com/drive/1T6MUoxafTHYhjvenDbEtQoveIlHT2U6_?usp=sharing)<br>- [Communication patterns in **Spatial Transcriptomics**](https://earmingol.github.io/cell2cell/tutorials/Tensor-cell2cell-Spatial/) |
46 | 
47 | Reproducible runs of the analyses in the [Tensor-cell2cell paper](https://doi.org/10.1038/s41467-022-31369-2) are available at [CodeOcean.com](https://doi.org/10.24433/CO.0051950.v2)
48 | 
49 | ## LIANA & Tensor-cell2cell
50 | 
51 | Explore our tutorials for using Tensor-cell2cell with [LIANA](https://github.com/saezlab/liana-py) at [ccc-protocols.readthedocs.io](https://ccc-protocols.readthedocs.io/).
52 | 
53 | ## Common Issues
54 | 
55 | - **Memory Errors with Tensor-cell2cell:** If you encounter memory errors when performing tensor factorizations, try replacing `init='svd'` with `init='random'`.
56 |   
57 | ## Ligand-Receptor Pairs
58 | Find a curated list of ligand-receptor pairs for your analyses at our [GitHub Repository](https://github.com/LewisLabUCSD/Ligand-Receptor-Pairs).
59 | 
60 | ## Citation
61 | 
62 | Please cite our work using the following references:
63 | 
64 | - **cell2cell**: [Inferring a spatial code of cell-cell interactions across a whole animal body](https://doi.org/10.1371/journal.pcbi.1010715).
65 |   *PLOS Computational Biology, 2022*
66 | 
67 | - **Tensor-cell2cell**: [Context-aware deconvolution of cell-cell communication with Tensor-cell2cell](https://doi.org/10.1038/s41467-022-31369-2).
68 |   *Nature Communications, 2022.*
69 | 
70 | - **LIANA & Tensor-cell2cell tutorials**: [Combining LIANA and Tensor-cell2cell to decipher cell-cell communication across multiple samples](https://doi.org/10.1101/2023.04.28.538731).
71 |   *bioRxiv, 2023*


--------------------------------------------------------------------------------
/docs/requirements.in:
--------------------------------------------------------------------------------
1 | mkdocs
2 | mkdocstrings[python]
3 | markdown-include
4 | mkdocs-autorefs
5 | mkdocs-gen-files
6 | mkdocs-material
7 | mkdocs-material-extensions
8 | mkdocs-jupyter
9 | mkdocstrings-python-legacy


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with Python 3.7
  3 | # by the following command:
  4 | #
  5 | #    pip-compile requirements.in
  6 | #
  7 | appnope==0.1.3
  8 |     # via
  9 |     #   ipykernel
 10 |     #   ipython
 11 | astunparse==1.6.3
 12 |     # via pytkdocs
 13 | attrs==23.1.0
 14 |     # via jsonschema
 15 | babel==2.13.1
 16 |     # via mkdocs-material
 17 | backcall==0.2.0
 18 |     # via ipython
 19 | beautifulsoup4==4.12.2
 20 |     # via nbconvert
 21 | bleach==6.0.0
 22 |     # via nbconvert
 23 | cached-property==1.5.2
 24 |     # via
 25 |     #   griffe
 26 |     #   pytkdocs
 27 | certifi==2023.11.17
 28 |     # via requests
 29 | charset-normalizer==3.3.2
 30 |     # via requests
 31 | click==8.1.7
 32 |     # via mkdocs
 33 | colorama==0.4.6
 34 |     # via
 35 |     #   griffe
 36 |     #   mkdocs-material
 37 | debugpy==1.7.0
 38 |     # via ipykernel
 39 | decorator==5.1.1
 40 |     # via ipython
 41 | defusedxml==0.7.1
 42 |     # via nbconvert
 43 | entrypoints==0.4
 44 |     # via jupyter-client
 45 | fastjsonschema==2.19.0
 46 |     # via nbformat
 47 | ghp-import==2.1.0
 48 |     # via mkdocs
 49 | griffe==0.30.1
 50 |     # via mkdocstrings-python
 51 | idna==3.4
 52 |     # via requests
 53 | importlib-metadata==6.7.0
 54 |     # via
 55 |     #   attrs
 56 |     #   click
 57 |     #   jsonschema
 58 |     #   markdown
 59 |     #   mkdocs
 60 |     #   mkdocstrings
 61 |     #   nbconvert
 62 |     #   nbformat
 63 | importlib-resources==5.12.0
 64 |     # via jsonschema
 65 | ipykernel==6.16.2
 66 |     # via mkdocs-jupyter
 67 | ipython==7.34.0
 68 |     # via ipykernel
 69 | jedi==0.19.1
 70 |     # via ipython
 71 | jinja2==3.1.2
 72 |     # via
 73 |     #   mkdocs
 74 |     #   mkdocs-material
 75 |     #   mkdocstrings
 76 |     #   nbconvert
 77 | jsonschema==4.17.3
 78 |     # via nbformat
 79 | jupyter-client==7.4.9
 80 |     # via
 81 |     #   ipykernel
 82 |     #   nbclient
 83 | jupyter-core==4.12.0
 84 |     # via
 85 |     #   jupyter-client
 86 |     #   nbclient
 87 |     #   nbconvert
 88 |     #   nbformat
 89 | jupyterlab-pygments==0.2.2
 90 |     # via nbconvert
 91 | jupytext==1.15.2
 92 |     # via mkdocs-jupyter
 93 | markdown==3.4.4
 94 |     # via
 95 |     #   markdown-include
 96 |     #   mkdocs
 97 |     #   mkdocs-autorefs
 98 |     #   mkdocs-material
 99 |     #   mkdocstrings
100 |     #   pymdown-extensions
101 | markdown-include==0.8.1
102 |     # via -r requirements.in
103 | markdown-it-py==2.2.0
104 |     # via
105 |     #   jupytext
106 |     #   mdit-py-plugins
107 | markupsafe==2.1.3
108 |     # via
109 |     #   jinja2
110 |     #   mkdocs
111 |     #   mkdocstrings
112 |     #   nbconvert
113 | matplotlib-inline==0.1.6
114 |     # via
115 |     #   ipykernel
116 |     #   ipython
117 | mdit-py-plugins==0.3.5
118 |     # via jupytext
119 | mdurl==0.1.2
120 |     # via markdown-it-py
121 | mergedeep==1.3.4
122 |     # via mkdocs
123 | mistune==3.0.2
124 |     # via nbconvert
125 | mkdocs==1.5.3
126 |     # via
127 |     #   -r requirements.in
128 |     #   mkdocs-autorefs
129 |     #   mkdocs-gen-files
130 |     #   mkdocs-jupyter
131 |     #   mkdocs-material
132 |     #   mkdocstrings
133 | mkdocs-autorefs==0.4.1
134 |     # via
135 |     #   -r requirements.in
136 |     #   mkdocstrings
137 | mkdocs-gen-files==0.5.0
138 |     # via -r requirements.in
139 | mkdocs-jupyter==0.24.3
140 |     # via -r requirements.in
141 | mkdocs-material==9.2.7
142 |     # via
143 |     #   -r requirements.in
144 |     #   mkdocs-jupyter
145 | mkdocs-material-extensions==1.2
146 |     # via
147 |     #   -r requirements.in
148 |     #   mkdocs-material
149 | mkdocstrings[python]==0.22.0
150 |     # via
151 |     #   -r requirements.in
152 |     #   mkdocstrings-python
153 |     #   mkdocstrings-python-legacy
154 | mkdocstrings-python==1.1.2
155 |     # via mkdocstrings
156 | mkdocstrings-python-legacy==0.2.3
157 |     # via -r requirements.in
158 | nbclient==0.7.4
159 |     # via nbconvert
160 | nbconvert==7.6.0
161 |     # via mkdocs-jupyter
162 | nbformat==5.8.0
163 |     # via
164 |     #   jupytext
165 |     #   nbclient
166 |     #   nbconvert
167 | nest-asyncio==1.5.8
168 |     # via
169 |     #   ipykernel
170 |     #   jupyter-client
171 | packaging==23.2
172 |     # via
173 |     #   ipykernel
174 |     #   mkdocs
175 |     #   nbconvert
176 | paginate==0.5.6
177 |     # via mkdocs-material
178 | pandocfilters==1.5.0
179 |     # via nbconvert
180 | parso==0.8.3
181 |     # via jedi
182 | pathspec==0.11.2
183 |     # via mkdocs
184 | pexpect==4.8.0
185 |     # via ipython
186 | pickleshare==0.7.5
187 |     # via ipython
188 | pkgutil-resolve-name==1.3.10
189 |     # via jsonschema
190 | platformdirs==4.0.0
191 |     # via mkdocs
192 | prompt-toolkit==3.0.41
193 |     # via ipython
194 | psutil==5.9.6
195 |     # via ipykernel
196 | ptyprocess==0.7.0
197 |     # via pexpect
198 | pygments==2.17.1
199 |     # via
200 |     #   ipython
201 |     #   mkdocs-jupyter
202 |     #   mkdocs-material
203 |     #   nbconvert
204 | pymdown-extensions==10.2.1
205 |     # via
206 |     #   mkdocs-material
207 |     #   mkdocstrings
208 | pyrsistent==0.19.3
209 |     # via jsonschema
210 | python-dateutil==2.8.2
211 |     # via
212 |     #   ghp-import
213 |     #   jupyter-client
214 | pytkdocs==0.16.1
215 |     # via mkdocstrings-python-legacy
216 | pytz==2023.3.post1
217 |     # via babel
218 | pyyaml==6.0.1
219 |     # via
220 |     #   jupytext
221 |     #   mkdocs
222 |     #   pymdown-extensions
223 |     #   pyyaml-env-tag
224 | pyyaml-env-tag==0.1
225 |     # via mkdocs
226 | pyzmq==25.1.1
227 |     # via
228 |     #   ipykernel
229 |     #   jupyter-client
230 | regex==2022.10.31
231 |     # via mkdocs-material
232 | requests==2.31.0
233 |     # via mkdocs-material
234 | six==1.16.0
235 |     # via
236 |     #   astunparse
237 |     #   bleach
238 |     #   python-dateutil
239 | soupsieve==2.4.1
240 |     # via beautifulsoup4
241 | tinycss2==1.2.1
242 |     # via nbconvert
243 | toml==0.10.2
244 |     # via jupytext
245 | tornado==6.2
246 |     # via
247 |     #   ipykernel
248 |     #   jupyter-client
249 | traitlets==5.9.0
250 |     # via
251 |     #   ipykernel
252 |     #   ipython
253 |     #   jupyter-client
254 |     #   jupyter-core
255 |     #   matplotlib-inline
256 |     #   nbclient
257 |     #   nbconvert
258 |     #   nbformat
259 | typing-extensions==4.7.1
260 |     # via
261 |     #   importlib-metadata
262 |     #   jsonschema
263 |     #   markdown-it-py
264 |     #   mkdocs
265 |     #   mkdocstrings
266 |     #   platformdirs
267 |     #   pytkdocs
268 | urllib3==2.0.7
269 |     # via requests
270 | watchdog==3.0.0
271 |     # via mkdocs
272 | wcwidth==0.2.10
273 |     # via prompt-toolkit
274 | webencodings==0.5.1
275 |     # via
276 |     #   bleach
277 |     #   tinycss2
278 | wheel==0.42.0
279 |     # via astunparse
280 | zipp==3.15.0
281 |     # via
282 |     #   importlib-metadata
283 |     #   importlib-resources
284 | 
285 | # The following packages are considered to be unsafe in a requirements file:
286 | # setuptools


--------------------------------------------------------------------------------
/docs/tutorials/ASD/figures/4d-tensor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/4d-tensor.png


--------------------------------------------------------------------------------
/docs/tutorials/ASD/figures/tensor-approx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tensor-approx.png


--------------------------------------------------------------------------------
/docs/tutorials/ASD/figures/tensor-factorization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tensor-factorization.png


--------------------------------------------------------------------------------
/docs/tutorials/ASD/figures/tf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/figures/tf.png


--------------------------------------------------------------------------------
/docs/tutorials/ASD/results/Loadings.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/docs/tutorials/ASD/results/Loadings.xlsx


--------------------------------------------------------------------------------
/examples/tensor_cell2cell/PreBuiltMetadata-PBMC.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/examples/tensor_cell2cell/PreBuiltMetadata-PBMC.pkl


--------------------------------------------------------------------------------
/examples/tensor_cell2cell/PreBuiltTensor-PBMC.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/earmingol/cell2cell/6a609fa3d3e1d65fc3b92be459f47e636c89f061/examples/tensor_cell2cell/PreBuiltTensor-PBMC.pkl


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: cell2cell
 2 | site_description: "Python package to infer cell-cell interactions and communication from gene expression of interacting proteins."
 3 | 
 4 | theme:
 5 |     name: readthedocs
 6 |     highlightjs: true
 7 | repo_url: https://github.com/earmingol/cell2cell
 8 | plugins:
 9 |   - search
10 |   - mkdocs-jupyter:
11 |       execute: false
12 |       include: ["*.ipynb"]
13 |       include_source: True
14 |       #ignore_h1_titles: True
15 |   - mkdocstrings:
16 |       watch:
17 |         - cell2cell
18 |       default_handler: python
19 |       handlers:
20 |         python:
21 |           options:
22 |             docstring_style: sphinx
23 | 
24 | markdown_extensions:
25 |   - def_list
26 |   - attr_list
27 |   - admonition
28 |   - codehilite
29 |   - pymdownx.tasklist:
30 |       custom_checkbox: true
31 |   - md_in_html
32 |   - pymdownx.superfences
33 |   - pymdownx.betterem
34 |   - pymdownx.caret
35 |   - pymdownx.mark
36 |   - pymdownx.tilde
37 |   - pymdownx.highlight:
38 |       anchor_linenums: true
39 |   - pymdownx.inlinehilite
40 |   - pymdownx.snippets
41 |   - pymdownx.superfences
42 |   - pymdownx.tabbed:
43 |       alternate_style: true
44 | 
45 | nav:
46 |   - "Home": index.md
47 |   - "API Documentation": documentation.md
48 |   - "cell2cell Tutorials":
49 |       - tutorials/Toy-Example-BulkPipeline.ipynb
50 |       - tutorials/Toy-Example-SingleCellPipeline.ipynb
51 |   - "Tensor-cell2cell Tutorials":
52 |     - tutorials/ASD/01-Tensor-Factorization-ASD.ipynb
53 |     - tutorials/ASD/02-Factor-Specific-ASD.ipynb
54 |     - tutorials/ASD/03-GSEA-ASD.ipynb
55 |     - tutorials/Tensor-cell2cell-Spatial.ipynb
56 |     - tutorials/GPU-Example.ipynb
57 | 


--------------------------------------------------------------------------------
/release/0.5.10-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.5.10
 2 | 
 3 | ## New features
 4 | - Added ```cell2cell.plotting.factor_plot.ccc_networks_plot()``` to visualize factor-specific
 5 |   CCC networks obtained from a tensor decomposition with Tensor-cell2cell
 6 | - Added Gini coefficient in ```cell2cell.stats.gini.gini_coefficient()``` and 
 7 |   ```cell2cell.analysis.tensor_downstream.compute_gini_coefficients()```
 8 | 
 9 | ## Feature updates
10 | - In the analysis ```cell2cell.analysis.SingleCellInteractions.permute_cell_labels()```
11 | the score computed without permutation is now considered as part of the permutation
12 | distribution for computing P-values. So if 100 permutations are intended, the analysis
13 | should be done with 99 permutation since the original score would be the 100th element.
14 | - In the same analysis above, now the ```randomized_score``` list is converted to a numpy.array once
15 | instead of each iteration in the last foor loop (Line 704). This helps accelerate the analysis.
16 |  
17 | ## Fixed Bugs
18 | - Fixed bug in ```cell2cell.plotting.tensor_plot.tensor_factors_plot_from_loadings()```
19 | associated with the metadata when it was None.
20 | - Fixed bug in ```cell2cell.plotting.tensor_plot.tensor_factors_plot_from_loadings()```
21 | that was preventing to use a tensor with one dimension.
22 | - ```cell2cell.plotting.factor_plot.context_boxplot()```
23 | that was preventing to use a decomposition into just one factor.
24 | - Fixed bug when using communication_score = 'expression_gmean' in cell2cell pipelines


--------------------------------------------------------------------------------
/release/0.5.11-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.5.11
 2 | 
 3 | ## New features
 4 | - Created a new function to use external communication scores, generated with other tools. This function can be found in
 5 | ```cell2cell.tensor.external_scores.dataframes_to_tensor()```.
 6 | - Added ```cell2cell.tensor.tensor.BaseTensor.loc_nans```, ```cell2cell.tensor.tensor.BaseTensor.loc_zeros```, and same attributes in
 7 | heirs tensor classes to keep track of values assigned with NaNs and with real zeros, respectively.
 8 | - ```cell2cell.tensor.external_scores.dataframes_to_tensor()``` also incorporates the previous point to keep track
 9 | of NaNs and real zeros when using external communication scores.
10 | - Added ```lr_fill``` and ```cell_fill``` parameters to ```cell2cell.tensor.external_scores.dataframes_to_tensor()```
11 | 
12 | ## Feature updates
13 | - Added two new options to the parameter ```how``` in ```cell2cell.tensor.build_context_ccc_tensor()```.
14 | They are: ```how='outer_genes'``` and ```how='outer_cells'``` . These new options were also extended to all InteractionTensors
15 | derived from ```cell2cell.tensor.tensor.BaseTensor```.
16 | - These options of how were also extended to the new function ```cell2cell.tensor.external_scores.dataframes_to_tensor()```,
17 | but here implemented as ```how='outer_lrs'``` and ```how='outer_cells'```.
18 | - Implemented multiple to options to aggregate gene expression of protein complexes. Available options are using the
19 | minimum expression or the average expression among the subunits. This can be controlled with the parameter
20 | ```complex_agg_method='min'``` or ```complex_agg_method='mean'``` when creating a ```cell2cell.tensor.InteractionTensor```,
21 | ```cell2cell.core.InteractionSpace```, ```cell2cell.analysis.BulkInteractions``` pipeline, or ```cell2cell.analysis.SingleCellInteractions``` pipeline.
22 | - The previous point relies on the function ```cell2cell.preprocessing.rnaseq.add_complexes_to_expression()``` through
23 | the parameter ```agg_method='min'``` or ```agg_method='mean'```
24 | - Added parameter ```cbar_label``` to the function ```cell2cell.plotting.factor_plot.loading_clustermap()```
25 | to personalize the title of the color bar.
26 | - Added parameter ```manual_elbow``` to ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` to manually specify
27 | the elbow to highlight.
28 |  
29 | ## Fixed Bugs
30 | - Renamed ```cell2cell.plotting.circos_plot``` into ```cell2cell.plotting.circular_plot``` to avoid incompatibility with
31 | function ```cell2cell.plotting.circos_plot.circos_plot()``` that is directly imported under ```cell2cell.plotting```


--------------------------------------------------------------------------------
/release/0.5.4-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.5.4 
 2 | 
 3 | ## New features
 4 | 
 5 | - Implemented a gradient of colors for continuous numbers in the function ```cell2cell.plotting.aesthetics.get_colors_from_labels()```
 6 | - Added function ```excluded_value_fraction()``` in the class ```InteractionTensor```
 7 | - Implemented reordering of elements in a dimension of the tensor when plotting their loadings from the decomposition
 8 | in the function ```cell2cell.plotting.tensor_plot.tensor_factors_plot()``` under the parameter ```reorder_elements```.
 9 | - Changed tensor objects and implemented a function to normalize loadings to unit Euclidean length under
10 | the parameter ```normalize_loadings``` in method ```compute_tensor_factorization``` of the class ```BaseTensor``` and others such as ```InteractionTensor```.
11 | - Implemented attribute ```explained_variance_ratio_``` in a tensor object. Only outputs values when using ```normalize_loadings=True```.
12 | - Added ```explained_variance_``` attribute to tensor objects.
13 | - Implemented ```explained_variance``` in tensor objects to compute the ```explained_variance_``` attribute. Inspired
14 | by ```sklearn.metric.explained_variance_score```.
15 | 
16 | ## Dependency Update
17 | 
18 | - matplotlib >= 3.2.0
19 | - seaborn >= 0.11.0


--------------------------------------------------------------------------------
/release/0.5.5-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.5
2 | 
3 | ## Fixed Bugs
4 | 
5 | - Fixed bug of computing factorization error when using a GPU and a tensor without masked values.
6 | See line 180 in ```cell2cell.tensor.tensor.py```, and lines 151 and 222 of ```cell2cell.tensor.factorization.py```


--------------------------------------------------------------------------------
/release/0.5.6-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.5.6
 2 | 
 3 | ## New features
 4 | 
 5 | - Implemented an analysis for obtaining UMAP embeddings from a gene expression matrix. It is
 6 |  found in  ```cell2cell.external.umap.run_umap()```.
 7 | - Implemented UMAP biplot to visualize UMAP embeddings. It is found in 
 8 |  ```cell2cell.plotting.umap_plot.umap_biplot()```
 9 | - Implemented functions to subset an InteractionTensor by lists of names of the elements in any
10 |  of the tensor dimensions.
11 |     - Function ```cell2cell.tensor.subset.subset_tensor()``` to subset an InteractionTensor,
12 |       powered by the function ```cell2cell.tensor.subset.find_element_indexes()``` to find
13 |       where each element name is located in the tensor.
14 |     - Function ```cell2cell.tensor.subset.subset_metadata()``` to subset the metadata generated with
15 |       ```cell2cell.tensor.tensor.generate_tensor_metadata()```. It makes the metadata to contain only elements
16 |       contained in an InteractionTensor of reference.
17 | 
18 | ## Dependency Update
19 | 
20 | - umap-learn


--------------------------------------------------------------------------------
/release/0.5.7-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.5.7
 2 | 
 3 | ## New features
 4 | 
 5 | - Added geometric mean as a communication score in ```cell2cell.core.communication_scores```
 6 | - Added the parameter ```var_ordered_factors``` in
 7 |  ```cell2cell.tensor.BaseTensor.compute_factorization()``` to decide whether reordering
 8 |  the factors by the variance they explain (in a descending order).
 9 | - Made the parameter ```normalize_loadings=True``` as default in
10 |  ```cell2cell.tensor.BaseTensor.compute_factorization()```
11 | - Added an option to plot the loadings of a tensor factorization directly from a
12 | factors object (an OrderedDict usually found in ```cell2cell.tensor.BaseTensor.factors```).
13 | It can be done with the function ```cell2cell.plotting.tensor_factors_plot_from_loadings()```
14 | - To complement the previous point, added a function to import factors from an Excel file
15 | previously exported with ```cell2cell.tensor.BaseTensor.export_factors(filename)```. To import the
16 | factors, use ```cell2cell.io.load_tensor_factors(filename)```.
17 | 
18 | ## Fixed Bugs
19 | 
20 | - Fixed minor bugs in functions ```cell2cell.external.umap.run_umap()``` and
21 | ```cell2cell.plotting.umap_plot.umap_biplot()```


--------------------------------------------------------------------------------
/release/0.5.8-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.5.8
 2 | 
 3 | ## New features
 4 | - Implemented communication score 'expression_gmean' in all pipelines of cell2cell (cell2cell.analysis.pipelines)
 5 | - Updated documentation of regular cell2cell
 6 | - Implemented **downstream analyses for Tensor-cell2cell**, available in ```cell2cell.analysis.tensor_downstream``` and
 7 |  associated plots in ```cell2cell.plotting.factor_plot```
 8 | - Implemented the **CorrIndex metric** to compare two tensor decompositions of similar tensors, available in
 9 | ```cell2cell.tensor.metrics```
10 | - Implemented a function to export networks to be read in Cytoscape. It can be called as
11 | ```cell2cell.utils.networks.export_network_to_cytoscape()```
12 | - Renamed ```cell2cell.plotting.dot_plot.py``` into ```cell2cell.plotting.pval_plot.py``` and included a new function
13 | to perform the dot plots with any input. The original function is ```cell2cell.plotting.pval_plot.dot_plot()```, which
14 | takes a ```cell2cell.analysis.pipelines.SingleCellInteractions``` object, while the new function is
15 | ```cell2cell.plotting.pval_plot.generate_dot_plot()```, which takes any pair of dataframes of P-values and scores.
16 | 
17 | ## Fixed Bugs
18 | - Fixed bugs in triangular clustermap in ```cell2cell.plotting.cci_plot``` 
19 | - Fixed bug associated with duplicated gene names when building a ```cell2cell.tensor.InteractionTensor```
20 | 
21 | ## Dependency Update
22 | - Added statannotations


--------------------------------------------------------------------------------
/release/0.5.9-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.5.9
2 | 
3 | ## Fixed Bugs
4 | - This version was created to fix issues when importing version 0.5.8.
5 | - For changes in this version, see [notes of version 0.5.8](0.5.8-notes.md)


--------------------------------------------------------------------------------
/release/0.6.0-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.0
 2 | 
 3 | ## New features
 4 | - Added 'gmean' as method to compute expression of protein complexes.
 5 | It involves function ```cell2cell.preprocessing.rnaseq.add_complexes_to_expression()```
 6 | and all objects calling it.
 7 | - Added new parameters for improving robustness of tensor factorization. These are
 8 | ```n_iter_max``` and ```tol```. Higher n_iter_max and lower tol retrieves better optimal
 9 | solutions, but at the expense of more running time. Available in:
10 | ```cell2cell.tensor.factorization._compute_tensor_factorization()```
11 | and in ```cell2cell.tensor.tensor.BaseTensor.compute_tensor_factorization()``` and all heir classes.
12 | - Similar to the previous point, the parameter ```svd``` was added to these functions. This allows to control
13 | the type of svd method to use when using ```init='svd'```. See documentation for more information.
14 | - Added new methods/options for running a tensor decomposition in ```cell2cell.tensor.factorization._compute_tensor_factorization()```
15 | and in ```cell2cell.tensor.tensor.BaseTensor.compute_tensor_factorization()``` and all heir classes.
16 |  This can be controlled with the parameter ```tf_type```. See documentation for
17 | more options.
18 | - Added option to do a deep copy of any tensor of the class ```cell2cell.tensor.tensor.BaseTensor``` and its
19 | heir classes. Available through ```BaseTensor.copy()```.
20 | - Added new CCI score based on ICELLNET (```cell2cell.core.cci_scores```). Available in the functions
21 | of the regular cell2cell tool (```cell2cell.core.interaction_space```, ```cell2cell.analysis.pipelines.BulkInteractions```, 
22 | and ```cell2cell.analysis.pipelines.SingleCellInteractions```)
23 | - Added new function to handle duplicate elements ```cell2cell.preprocessing.find_elements.find_duplicates()```
24 | - Modified functions in ```cell2cell.tensor.subset``` to handle duplicate elements
25 | - Added new function to concatenate InteractionTensors: ```cell2cell.tensor.tensor_manipulation.concatenate_interaction_tensors()```
26 | 
27 | ## Feature updates
28 | - Updated dependency version of tensorly to 0.7.0
29 |  
30 | ## Fixed Bugs
31 | - Fixed bug of return_errors in tensor decomposition using regular non_negative_parafac.
32 |  New version of tensorly returns decomposition and error as a tuple in other decomposition methods.
33 | - Fixed bug of changing diagonal values of the input matrix to zeros when using ```cell2cell.plotting.cci_plot.clustermap_cci```


--------------------------------------------------------------------------------
/release/0.6.1-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.1
 2 | 
 3 | ## New features
 4 | - Implemented the option to filter for cells/genes/lr pairs that are present in a given
 5 | fraction of samples/contexts in addition to using the union or intersection to build a 
 6 | tensor derived from `BaseTensor`. This can be controlled with the parameter `outer_fraction`
 7 | in the classes/functions available in `cell2cell.tensor.tensor` and `cell2cell.tensor.external_scores`.
 8 | - Added method `sparsity_fraction()` to `cell2cell.tensor.tensor.BaseTensor`, which computes the fraction of
 9 | values in the tensor that are real zeros.
10 | - Added method `missing_fraction()` to `cell2cell.tensor.tensor.BaseTensor`, which computes the fraction of
11 | values in the tensor that are missing or NaNs.
12 | 
13 | ## Feature updates
14 | - `cellcell2.stats.permutation.compute_pvalue_from_dist()` ignores NaN values.
15 |  
16 | ## Fixed Bugs
17 | - Fixed bug of `cell2cell.tensor.concatenate_interaction_tensors()` that did not allow
18 | concatenating tensors when using a tensorly backend different to numpy.
19 | - Fixed bug to deal with GPU tensors in `cell2cell.tensor.tensor.PreBuiltTensor`
20 | - Fixed bug about dimension labelling in `cell2cell.tensor.tensor.PreBuiltTensor`


--------------------------------------------------------------------------------
/release/0.6.2-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.2
 2 | 
 3 | ## New features
 4 | - Added a parameter `output_fig` to ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` that
 5 | allows deciding whether to generate the figure. If `output_fig=False`, the outputs of this function
 6 | will be `(None, loss)`.
 7 | - Created ```cell2cell.preprocessing.signal``` to include functions such as ```smooth_curve()```
 8 | to smooth a set of values representing a curve.
 9 | - Implemented curve smoothing for the elbow analysis of Tensor-cell2cell. It can be accessed wit the parameter
10 | ```smooth=True``` in ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` or in any other InteractionTensor.
11 | - Implemented curve smoothing for the elbow plot designed for multiple runs.
12 | It can be accessed wit the parameter
13 | ```smooth=True``` in ```cell2cell.plotting.tensor_plot.plot_multiple_run_elbow()```.
14 | - Implemented ```cell2cell.tensor.metrics.pairwise_correlation_index()``` to compute the CorrIndex
15 | between all pairs of tensor decompositions in a list.
16 | - Implemented elbow analysis based on similarity of multiple runs. This can be control with the option
17 | ```metric='similarity'``` in ```cell2cell.tensor.tensor.BaseTensor.elbow_rank_selection()``` or in any other InteractionTensor.
18 | Use ```metric='error'``` for the normalized error used in previous versions.
19 | 
20 | ## Feature updates
21 | - Modified the way to compute normalized error of tensor decomposition in
22 | ```cell2cell.tensor.factorization._compute_norm_error()```
23 | - Added the option to directly pass a `ylabel` to the elbow plots, including:
24 | ```cell2cell.plotting.tensor_plot.plot_elbow()``` and ```cell2cell.plotting.tensor_plot.plot_multiple_run_elbow()```
25 | - Extended input parameters of ```cell2cell.tensor.factorization._compute_elbow()```
26 | 
27 |  
28 | ## Fixed Bugs
29 | - 


--------------------------------------------------------------------------------
/release/0.6.3-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.3
 2 | 
 3 | ## New features
 4 | - Created ```cell2cell.analysis.tensor_pipelines``` with functions to easily run
 5 | Tensor-cell2cell.
 6 | 
 7 | ## Feature updates
 8 | - Deleted ```cell2cell.external.tensorly_nn_cp``` since it is not used anymore.
 9 | Tensorly is directly used instead.
10 | - Renamed ```cell2cell.analysis.pipelines``` to ```cell2cell.analysis.cell2cell_pipelines```
11 | - Added ```elbow_metric```, ```elbow_metric_mean``` and ```elbow_metric_raw``` attributes to ```cell2cell.tensor.tensor.BaseTensor```
12 | for storing the curve generated from the elbow analysis.
13 | - Removed parameter ```loc_zeros``` from ```cell2cell.tensor.tensor.PreBuiltTensor```
14 |  
15 | ## Fixed Bugs
16 | - Converted factors to numpy in ```cell2cell.tensor.factorization._multiple_runs_elbow_analysis()```
17 | when ```metric='similarity'``` to avoid errors when using GPU.
18 | - Fixed error obtained with functions ```sparsity_fraction()``` and ```missing_fraction()``` in a ```cell2cell.tensor.tensor.BaseTensor``` when
19 | tensorly backend is different to numpy and the device is nto a CPU. This error was fixed with
20 | making loc_nans and loc_zeros attributes of ```cell2cell.tensor.tensor.InteractionTensor``` and ```cell2cell.tensor.tensor.PreBuiltTensor```
21 | to be now a tensorly.tensor object.


--------------------------------------------------------------------------------
/release/0.6.4-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.4
 2 | 
 3 | ## New features
 4 | - Added a the ```cell2cell.io.read_data.load_tensor()``` function to directly load a previously
 5 |  exported ```interaction_tensor``` variable generated with Tensor-cell2cell.
 6 | - Added a new dataset from a COVID-19 study. Available in ```cell2cell.datasets.anndata.balf_covid()```.
 7 | - Added functions to create and explore directories in ```cell2cell.io.directories```.
 8 | - Added ```cell2cell.io.read_data.load_tables_from_directory()``` to load all tables or dataframes with the same
 9 | extension that are located in such directory.
10 | 
11 | ## Feature updates
12 | - Modified ```sparsity_fraction()``` and ```missing_fraction()``` methods of ```cell2cell.tensor.tensor.BaseTensor``` to return
13 | the item in the tensorly tensor object.
14 | - Added progress bar to ```cell2cell.tensor.external_scores.dataframes_to_tensor()```.
15 | - Added the option to specify the ```backend``` when running ```cell2cell.analysis.tensor_pipelines.run_tensor_cell2cell_pipeline()```.
16 |  
17 | ## Fixed Bugs
18 | - Implemented a way to manage duplicated instances of a LR comm score in ```cell2cell.tensor.external_scores.dataframes_to_tensor()```.
19 | It can be controled through the parameter ```dup_aggregation```.


--------------------------------------------------------------------------------
/release/0.6.5-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.5
 2 | 
 3 | ## New features
 4 | - Implemented in-house GSEA using gseapy. Associated code is located in
 5 |  `cell2cell.datasets.gsea_data` and `cell2cell.external.gseapy`.
 6 | - Implemented a function to obtain a dataframe of lr pairs by cell pairs from a tensor decomposition
 7 |  result, so it can be use to make a plot. It can compute a communication score that is by factor or 
 8 |  across all factors. See function `cell2cell.analysis.tensor_downstream.get_lr_by_cell_pairs()`.
 9 | 
10 | ## Feature updates
11 | - Added the axis names to the dataframe generated with ```cell2cell.analysis.tensor_downstream.get_joint_loadings()```,
12 |  which correspond to the `dim1` and `dim2` parameters.
13 | - Added the axis labels (`cm.ax_heatmap.set_xlabel()` & (`cm.ax_heatmap.set_ylabel()`) using the dataframe axis names
14 |  passed to ```cell2cell.plotting.factor_plot.loading_clustermap()```
15 |  
16 | ## Fixed Bugs
17 | - 


--------------------------------------------------------------------------------
/release/0.6.6-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.6
 2 | 
 3 | ## New features
 4 | - Added new attributes and methods to `cell2cell.tensor.tensor.BaseTensor` and any other
 5 | derived class, including `BaseTensor.shape`, `BaseTensor.write_file()`, `BaseTensor.to_device()`.
 6 | These new features are respectively for:
 7 |     - Passing the shape of the tensor directly (instead of `BaseTensor.tensor.shape`)
 8 |     - Export or save a tensor object to a file.
 9 |     - Change the device for running Tensor-cell2cell (e.g. 'cpu', 'cuda', etc.)
10 | - 
11 | ## Feature updates
12 | - Added **kwargs as parameter of `cell2cell.analysis.tensor_pipelines.run_tensor_cell2cell_pipeline()`
13 | to directly pass parameters to the functions running the elbow analysis and the tensor decomposition.
14 | - Sort factors numerically in `cell2cell.external.gseapy.run_gsea()`.
15 |  
16 | ## Fixed Bugs
17 | - 


--------------------------------------------------------------------------------
/release/0.6.7-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.7
 2 | 
 3 | ## New features
 4 | - Direct access to `interaction_elements` attribute from `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions`
 5 | and `cell2cell.analysis.cell2cell_pipelines.BulkInteractions`
 6 | - Added option to store GMT file in output_folder in `cell2cell.external.gseapy`
 7 | 
 8 | ## Feature updates
 9 | - Removed tqdm for jupyter notebooks.
10 | - Updated tensorly version from 0.7.0 to 0.8.1
11 |  
12 | ## Fixed Bugs
13 | - Modified permutations in `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions.permute_cell_labels()`


--------------------------------------------------------------------------------
/release/0.6.8-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.6.8
 2 | 
 3 | ## New features
 4 | 
 5 | 
 6 | ## Feature updates
 7 | 
 8 |  
 9 | ## Fixed Bugs
10 | - Fixed bug that was skipping first factor to generate outputs from `cell2cell.external.gseapy.run_gsea()`


--------------------------------------------------------------------------------
/release/0.7.0-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.7.0
 2 | 
 3 | ## New features
 4 | - Added `cell2cell.spatial` functions for enabling analyses considering spatial organization in spatial data.
 5 | These functions include:
 6 |   - Filtering by intercellular distances by thresholding values (`cell2cell.spatial.distances` and `cell2cell.spatial.filtering`).
 7 |   - Dividing the tissue in square grids (`cell2cell.spatial.neighborhoods.create_spatial_grid()`)
 8 |   - Dividing the tissue in moving windows (`cell2cell.spatial.neighborhoods.create_moving_windows()`, `cell2cell.spatial.neighborhoods.calculate_window_size()`,
 9 |     and `cell2cell.spatial.neighborhoods.add_moving_window_info_to_adatae()`)
10 | 
11 | ## Feature updates
12 | 
13 |  
14 | ## Fixed Bugs
15 | - Fixed bug that made to incorrectly visualize multiple legends in plots as for example in `cell2cell.plotting.tensor_plot`
16 |  when using newer matplotlib versions.


--------------------------------------------------------------------------------
/release/0.7.1-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.7.1
 2 | 
 3 | ## New features
 4 | - Refer to [v0.7.0 notes](./0.7.0-notes.md) to see the new features. This is a quick fix of that version.
 5 | 
 6 | ## Feature updates
 7 | - Renamed `cell2cell.spatial.neighborhoods.create_moving_windows()` and 
 8 |     and `cell2cell.spatial.neighborhoods.add_moving_window_info_to_adata()` into
 9 |   `cell2cell.spatial.neighborhoods.create_sliding_windows()` and 
10 |     and `cell2cell.spatial.neighborhoods.add_sliding_window_info_to_adata()` respectively.
11 |  
12 | ## Fixed Bugs
13 | 


--------------------------------------------------------------------------------
/release/0.7.2-notes.md:
--------------------------------------------------------------------------------
1 | # Release Notes - cell2cell v0.7.2
2 | 
3 | ## New features
4 | - Refer to [v0.7.0 notes](./0.7.0-notes.md) & [v0.7.1 notes](./0.7.1-notes.md) to see the new features. This is a quick fix of that version.
5 | 
6 | ## Feature updates
7 | 
8 | ## Fixed Bugs
9 | - Updated export of factor loadings in Tensor-cell2cell for compatibility with newer verions of `pandas`.


--------------------------------------------------------------------------------
/release/0.7.3-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.7.3
 2 | 
 3 | ## New features
 4 | - Refer to [v0.7.2 notes](./0.7.2-notes.md) to see the previous new features.
 5 | - Added example of using Spatial Data with Tensor-cell2cell.
 6 | 
 7 | ## Feature updates
 8 | - Updated single-cell data example with cell2cell to use COVID-19 data.
 9 | - Updated bulk data example with cell2cell.
10 | - Updated `docs` folder for readthedocs.org.
11 | - Updated README.md
12 | 
13 | ## Fixed Bugs
14 | - Fixed legend visualization in `cell2cell.plotting.pcoa_plot.pcoa_3dplot()`
15 | - Fixed negative P-values in `cell2cell.stats.permutation.compute_pvalue_from_dist()`
16 | - Fixed permutation analysis in `cell2cell.analysis.cell2cell_pipelines.SingleCellInteractions`
17 | - Fixed legend visualization in `cell2cell.plotting.circular_plot.circos_plot()`


--------------------------------------------------------------------------------
/release/0.7.4-notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes - cell2cell v0.7.4
 2 | 
 3 | ## New features
 4 | - Refer to [v0.7.3 notes](./0.7.3-notes.md) to see the previous new features.
 5 | 
 6 | ## Feature updates
 7 | - Updated assert warnings for Tensor-cell2cell
 8 | 
 9 | ## Fixed Bugs
10 | - Fixed set indexing that was deprecated in new pandas versions (in `cell2cell.preprocessing.rnaseq.add_complexes_to_expression()`)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # ----------------------------------------------------------------------------
  4 | # Copyright (c) 2019--, Cell2cell development team.
  5 | #
  6 | # Distributed under the terms of the BSD 3-Clause License.
  7 | #
  8 | # The full license is in the file LICENSE, distributed with this software.
  9 | # ----------------------------------------------------------------------------
 10 | 
 11 | from setuptools.command.egg_info import egg_info
 12 | from setuptools.command.develop import develop
 13 | from setuptools.command.install import install
 14 | import re
 15 | import ast
 16 | import os
 17 | from setuptools import find_packages, setup
 18 | 
 19 | # Dealing with Cython
 20 | USE_CYTHON = os.environ.get('USE_CYTHON', False)
 21 | ext = '.pyx' if USE_CYTHON else '.c'
 22 | 
 23 | 
 24 | def custom_command():
 25 |     import sys
 26 |     if sys.platform in ['darwin', 'linux']:
 27 |         os.system('pip install numpy')
 28 | 
 29 | class CustomInstallCommand(install):
 30 |     def run(self):
 31 |         install.run(self)
 32 |         custom_command()
 33 | 
 34 | class CustomDevelopCommand(develop):
 35 |     def run(self):
 36 |         develop.run(self)
 37 |         custom_command()
 38 | 
 39 | class CustomEggInfoCommand(egg_info):
 40 |     def run(self):
 41 |         egg_info.run(self)
 42 |         custom_command()
 43 | 
 44 | 
 45 | extensions = [
 46 | ]
 47 | 
 48 | if USE_CYTHON:
 49 |     from Cython.Build import cythonize
 50 |     extensions = cythonize(extensions)
 51 | 
 52 | classes = """
 53 |     Development Status :: 2 - Pre-Alpha
 54 |     License :: OSI Approved :: BSD License
 55 |     Topic :: Software Development :: Libraries
 56 |     Topic :: Scientific/Engineering
 57 |     Topic :: Scientific/Engineering :: Bio-Informatics
 58 |     Programming Language :: Python :: 3
 59 |     Programming Language :: Python :: 3 :: Only
 60 |     Operating System :: Unix
 61 |     Operating System :: POSIX
 62 |     Operating System :: MacOS :: MacOS X
 63 | """
 64 | classifiers = [s.strip() for s in classes.split('\n') if s]
 65 | 
 66 | description = ('TBD')
 67 | 
 68 | with open('README.md') as f:
 69 |     long_description = f.read()
 70 | 
 71 | _version_re = re.compile(r'__version__\s+=\s+(.*)')
 72 | 
 73 | with open('cell2cell/__init__.py', 'rb') as f:
 74 |     hit = _version_re.search(f.read().decode('utf-8')).group(1)
 75 |     version = str(ast.literal_eval(hit))
 76 | 
 77 | setup(name='cell2cell',
 78 |       version=version,
 79 |       license='BSD-3-Clause',
 80 |       description=description,
 81 |       long_description_content_type="text/markdown",
 82 |       long_description=long_description,
 83 |       author="cell2cell development team",
 84 |       author_email="earmingo@ucsd.edu",
 85 |       maintainer="cell2cell development team",
 86 |       maintainer_email="earmingol@eng.ucsd.edu",
 87 |       packages=find_packages(),
 88 |       ext_modules=extensions,
 89 |       install_requires=['numpy >= 1.16',
 90 |                         'pandas >= 1.0.0',
 91 |                         'xlrd >= 1.1',
 92 |                         'openpyxl >= 2.6.2',
 93 |                         'networkx >= 2.3',
 94 |                         'matplotlib >= 3.2.0',
 95 |                         'seaborn >= 0.11.0',
 96 |                         'scikit-learn',
 97 |                         'umap-learn',
 98 |                         'tqdm',
 99 |                         'statsmodels',
100 |                         'statannotations',
101 |                         'tensorly == 0.8.1',
102 |                         'kneed',
103 |                         'scanpy',
104 |                         'gseapy == 1.0.3'
105 |                         ],
106 |       classifiers=classifiers,
107 |       entry_points={},
108 |       package_data={},
109 |       cmdclass={'install': CustomInstallCommand,
110 |                 'develop': CustomDevelopCommand,
111 |                 'egg_info': CustomEggInfoCommand, },
112 |       zip_safe=False)


--------------------------------------------------------------------------------