├── .codecov.yml
├── .github
└── workflows
│ └── python-package.yml
├── .gitignore
├── LICENSE
├── README.md
├── data
├── marrow_sample_scseq_counts.csv.gz
├── marrow_sample_scseq_counts.h5ad
└── sample_tsne.p
├── docs
├── Makefile
├── make.bat
├── requirements.txt
└── source
│ ├── conf.py
│ ├── core.rst
│ ├── index.rst
│ ├── notebooks
│ ├── plot.rst
│ ├── preprocess.rst
│ ├── presults.rst
│ └── utils.rst
├── notebooks
├── Palantir_sample_notebook.ipynb
├── comparisons
│ ├── dpt.ipynb
│ ├── fateid.ipynb
│ ├── monocle2.ipynb
│ ├── paga.ipynb
│ ├── results
│ │ ├── fateid
│ │ │ ├── CLP_order.csv
│ │ │ ├── DC_order.csv
│ │ │ ├── Ery_order.csv
│ │ │ ├── Mega_order.csv
│ │ │ ├── Mono_order.csv
│ │ │ ├── clusters.csv
│ │ │ ├── probs.csv
│ │ │ └── tsne.csv
│ │ ├── monocle2
│ │ │ ├── phenodata.csv
│ │ │ └── red_dims.csv
│ │ └── slingshot
│ │ │ ├── Lineage1_CD34.csv
│ │ │ ├── Lineage1_CD79A.csv
│ │ │ ├── Lineage1_CD79B.csv
│ │ │ ├── Lineage1_CEBPD.csv
│ │ │ ├── Lineage1_CEBPG.csv
│ │ │ ├── Lineage1_CSF1R.csv
│ │ │ ├── Lineage1_GATA1.csv
│ │ │ ├── Lineage1_IRF8.csv
│ │ │ ├── Lineage1_ITGA2B.csv
│ │ │ ├── Lineage1_MPO.csv
│ │ │ ├── Lineage1_RAG1.csv
│ │ │ ├── Lineage1_SPI1.csv
│ │ │ ├── Lineage2_CD34.csv
│ │ │ ├── Lineage2_CD79A.csv
│ │ │ ├── Lineage2_CD79B.csv
│ │ │ ├── Lineage2_CEBPD.csv
│ │ │ ├── Lineage2_CEBPG.csv
│ │ │ ├── Lineage2_CSF1R.csv
│ │ │ ├── Lineage2_GATA1.csv
│ │ │ ├── Lineage2_IRF8.csv
│ │ │ ├── Lineage2_ITGA2B.csv
│ │ │ ├── Lineage2_MPO.csv
│ │ │ ├── Lineage2_RAG1.csv
│ │ │ ├── Lineage2_SPI1.csv
│ │ │ ├── Lineage3_CD34.csv
│ │ │ ├── Lineage3_CD79A.csv
│ │ │ ├── Lineage3_CD79B.csv
│ │ │ ├── Lineage3_CEBPD.csv
│ │ │ ├── Lineage3_CEBPG.csv
│ │ │ ├── Lineage3_CSF1R.csv
│ │ │ ├── Lineage3_GATA1.csv
│ │ │ ├── Lineage3_IRF8.csv
│ │ │ ├── Lineage3_ITGA2B.csv
│ │ │ ├── Lineage3_MPO.csv
│ │ │ ├── Lineage3_RAG1.csv
│ │ │ ├── Lineage3_SPI1.csv
│ │ │ ├── Lineage4_CD34.csv
│ │ │ ├── Lineage4_CD79A.csv
│ │ │ ├── Lineage4_CD79B.csv
│ │ │ ├── Lineage4_CEBPD.csv
│ │ │ ├── Lineage4_CEBPG.csv
│ │ │ ├── Lineage4_CSF1R.csv
│ │ │ ├── Lineage4_GATA1.csv
│ │ │ ├── Lineage4_IRF8.csv
│ │ │ ├── Lineage4_ITGA2B.csv
│ │ │ ├── Lineage4_MPO.csv
│ │ │ ├── Lineage4_RAG1.csv
│ │ │ ├── Lineage4_SPI1.csv
│ │ │ ├── clusters.csv
│ │ │ ├── data.csv
│ │ │ ├── exprs.csv
│ │ │ └── weights.csv
│ └── slignshot.ipynb
└── manuscript_data.ipynb
├── pyproject.toml
├── readthedocs.yaml
├── requirements.txt
├── setup.py
├── src
└── palantir
│ ├── __init__.py
│ ├── cli.py
│ ├── config.py
│ ├── core.py
│ ├── io.py
│ ├── plot.py
│ ├── plot_utils.py
│ ├── preprocess.py
│ ├── presults.py
│ ├── utils.py
│ ├── validation.py
│ └── version.py
└── tests
├── conftest.py
├── core_run_palantir.py
├── plot.py
├── presults.py
├── presults_compute_gene_trends.py
├── test_core_run_palantir.py
├── test_integration.py
├── test_io.py
├── test_plot.py
├── test_preprocess.py
├── test_presults.py
├── test_presults_cluster_gene_trends.py
├── test_presults_compute_gene_trends.py
├── test_presults_gam_fit_predict.py
├── test_presults_select_branch_cells.py
├── test_util_density.py
├── test_util_run_pca.py
├── test_utils_compute_kernel.py
├── test_utils_density_functions.py
├── test_utils_determine_multiscale_space.py
├── test_utils_diffusion_maps_from_kernel.py
├── test_utils_early_cell_functions.py
├── test_utils_run_diffusion_maps.py
├── test_utils_run_local_variability.py
├── test_utils_run_magic_imputation.py
├── test_validation.py
├── utils_compute_kernel.py
├── utils_diffusion_maps_from_kernel.py
├── utils_run_diffusion_maps.py
├── utils_run_local_variability.py
├── utils_run_magic_imputation.py
└── utils_run_pca.py
/.codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 | status:
3 | project:
4 | default:
5 | target: 80%
6 | threshold: 1%
7 | patch:
8 | default:
9 | target: 80%
10 | threshold: 1%
11 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ "master", "main", "dev" ]
9 | pull_request:
10 | branches: [ "master", "main", "dev" ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
20 |
21 | steps:
22 | - uses: actions/checkout@v3
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v3
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | python -m pip install flake8 pytest pytest-cov coverage typing-extensions
31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 |
33 | # Install the package with appropriate test dependencies
34 | # For Python 3.13+, use test-base (without fcsparser)
35 | # For Python <3.13, use test (includes fcsparser)
36 | if python -c "import sys; exit(0 if sys.version_info >= (3, 13) else 1)"; then
37 | python -m pip install -e ".[test-base]"
38 | echo "Installed with test-base dependencies (no fcsparser) for Python 3.13+"
39 | else
40 | python -m pip install -e ".[test]"
41 | echo "Installed with test dependencies (includes fcsparser) for Python < 3.13"
42 | fi
43 | - name: Lint with flake8
44 | run: |
45 | # stop the build if there are Python syntax errors or undefined names
46 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
47 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
48 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
49 | - name: Test with pytest
50 | env:
51 | PYTHONPATH: ./src/
52 | run: |
53 | python -m pytest --cov=src/palantir
54 | - name: Upload coverage reports to Codecov
55 | uses: codecov/codecov-action@v3
56 | with:
57 | token: ${{ secrets.CODECOV_TOKEN }}
58 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .DS_Store
3 | .ipynb_checkpoints/
4 | *.h5ad
5 | build/
6 | palantir.egg-info/
7 | .coverage*
8 | notebooks/testing.ipynb
9 | .pytest_cache/
10 | dist/
11 | .vscode/
12 | data/
13 | *.ipynb
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020–present Dana Pe'er Lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://badge.fury.io/py/palantir)
2 | [](https://codecov.io/github/settylab/Palantir)
3 |
4 | Palantir
5 | ------
6 |
7 | Palantir is an algorithm to align cells along differentiation trajectories. Palantir models differentiation as a stochastic process where stem cells differentiate to terminally differentiated cells by a series of steps through a low dimensional phenotypic manifold. Palantir effectively captures the continuity in cell states and the stochasticity in cell fate determination. Palantir has been designed to work with multidimensional single cell data from diverse technologies such as Mass cytometry and single cell RNA-seq.
8 |
9 | ## Installation
10 | Palantir has been implemented in Python3 and can be installed using:
11 |
12 | ### Using pip
13 | ```sh
14 | pip install palantir
15 | ```
16 |
17 | ### Using conda, mamba, or micromamba from the bioconda channel
18 | You can also install Palantir via conda, mamba, or micromamba from the bioconda channel:
19 |
20 | #### Using conda
21 | ```sh
22 | conda install -c conda-forge -c bioconda palantir
23 | ```
24 |
25 | #### Using mamba
26 | ```sh
27 | mamba install -c conda-forge -c bioconda palantir
28 | ```
29 |
30 | #### Using micromamba
31 | ```sh
32 | micromamba install -c conda-forge -c bioconda palantir
33 | ```
34 |
35 | These methods ensure that all dependencies are resolved and installed efficiently.
36 |
37 |
38 | ## Usage
39 |
40 | A tutorial on Palantir usage and results visualization for single cell RNA-seq
41 | data can be found in this notebook:
42 | https://github.com/dpeerlab/Palantir/blob/master/notebooks/Palantir_sample_notebook.ipynb
43 |
44 | More tutorials and a documentation of all the Palantir components can be found
45 | here: https://palantir.readthedocs.io
46 |
47 | ## Processed data and metadata
48 |
49 | `scanpy anndata` objects are available for download for the three replicates generated in the manuscript:
50 | - [Replicate 1 (Rep1)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad)
51 | - [Replicate 2 (Rep2)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad)
52 | - [Replicate 3 (Rep3)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad)
53 |
54 | This notebook details how to use the data in `Python` and `R`: http://nbviewer.jupyter.org/github/dpeerlab/Palantir/blob/master/notebooks/manuscript_data.ipynb
55 |
56 | ## Comparison to trajectory detection algorithms
57 | Notebooks detailing the generation of results comparing Palantir to trajectory detection algorithms are available [here](https://github.com/dpeerlab/Palantir/blob/master/notebooks/comparisons)
58 |
59 | ## Citations
60 | Palantir manuscript is available from [Nature Biotechnology](https://www.nature.com/articles/s41587-019-0068-4). If you use Palantir for your work, please cite our paper.
61 |
62 | @article{Palantir_2019,
63 | title = {Characterization of cell fate probabilities in single-cell data with Palantir},
64 | author = {Manu Setty and Vaidotas Kiseliovas and Jacob Levine and Adam Gayoso and Linas Mazutis and Dana Pe'er},
65 | journal = {Nature Biotechnology},
66 | year = {2019},
67 | month = {march},
68 | url = {https://doi.org/10.1038/s41587-019-0068-4},
69 | doi = {10.1038/s41587-019-0068-4}
70 | }
71 | ____
72 |
73 | Release Notes
74 | -------------
75 |
76 | ### Next Release
77 | * fix "lightgray" error in `plot_trend`
78 |
79 | ### Version 1.4.1
80 | * update `LICENSE` file to be consistent with MIT - license
81 | * implement `plot_trajectories` to show multiple poaths on the UMAP
82 | * Drop leiden dependency to allow Python >= 3.13. igrph is used instead.
83 |
84 | ### Version 1.4.0
85 | * Made pygam an optional dependency that can be installed with `pip install palantir[gam]` or `pip install palantir[full]`
86 | * Added proper conditional imports and improved error handling for pygam
87 | * Enhanced `run_magic_imputation` to return appropriate data types for different inputs
88 | * Updated code to use direct AnnData imports for newer compatibility
89 | * Improved version detection using `importlib.metadata` with graceful fallbacks
90 | * Fixed Series indexing deprecation warnings in early cell detection functions
91 | * Expanded and standardized documentation with NumPy-style docstrings throughout the codebase
92 | * Added comprehensive type hints to improve code quality and IDE support
93 | * Remove dependency from `_` methods in scanpy for plotting.
94 | * add `pseudotime_interval` argument to control path length in `palantir.plot.plot_trajectory`
95 |
96 | #### Testing and Quality Improvements
97 | * Added comprehensive tests for optional pygam dependency
98 | * Improved test coverage for run_magic_imputation with various input/output types
99 | * Added integration tests against expected results
100 | * Enhanced test infrastructure to work with newer library versions
101 | * Expanded test coverage to catch edge cases in data processing
102 |
103 | ### Version 1.3.6
104 | * `run_magic_imputation` now has a boolean parameter `sparse` to control output sparsity
105 | * **bugfix**: `run_local_variability` for dense expression arrays now runs much faster and more accurate
106 |
107 | ### Version 1.3.4
108 | * avoid devision by zero in `select_branch_cells` for very small datasets
109 | * make branch selection robust against NaNs
110 | * do not plot unclustered trends (NaN cluster) in `plot_gene_trend_clusters`
111 |
112 | ### Version 1.3.3
113 | * optional progress bar with `progress=True` in `palantir.utils.run_local_variability`
114 | * avoid NaN in local variablility output
115 | * compatibility with `scanpy>=1.10.0`
116 |
117 | ### Version 1.3.2
118 | * require `python>=3.9`
119 | * implement CI for testing
120 | * fixes for edge cases discovered through extended testing
121 | * implement `plot_trajectory` function to show trajectory on the umap
122 | * scale pseudotime to unit interval in anndata
123 |
124 | ### Version 1.3.1
125 | * implemented `palantir.plot.plot_stats` to plot arbitrary cell-wise statistics as x-/y-positions.
126 | * reduce memory usage of `palantir.presults.compute_gene_trends`
127 | * removed seaborn dependency
128 | * refactor `run_diffusion_maps` to split out `compute_kernel` and `diffusion_maps_from_kernel`
129 | * remove unused dependencies `tables`, `Cython`, `cmake`, and `tzlocal`.
130 | * fixes in `run_pca` (return correct projections and do not use too many components)
131 |
132 | ### Version 1.3.0
133 |
134 | #### New Features
135 | * Enable an AnnData-centric workflow for improved usability and interoperability with other single-cell analysis tools.
136 | * Introduced new utility functions
137 | * `palantir.utils.early_cell` To automate finding an early cell based on cell type and diffusion components.
138 | * `palantir.utils.find_terminal_states` To automate finding terminal cell states based on cell type and diffusion components.
139 | * `palantir.presults.select_branch_cells` To find cells associated to each branch based on fate probability.
140 | * `palantir.plot.plot_branch_selection` To inspect the cell to branch association.
141 | * `palantir.utils.run_local_variability` To compute local gene expression variability.
142 | * `palantir.utils.run_density` A wrapper for [mellon.DensityEstimator](https://mellon.readthedocs.io/en/latest/model.html#mellon.model.DensityEstimator).
143 | * `palantir.utils.run_density_evaluation` Evaluate computed density on a different dataset.
144 | * `palantir.utils.run_low_density_variability`. To aggregate local gene expression variability in low density.
145 | * `palantir.plot.plot_branch`. To plot branch-selected cells over pseudotime in arbitrary y-position and coloring.
146 | * `palantir.plot.plot_trend`. To plot the gene trend on top of `palantir.plot.plot_branch`.
147 | * Added input validation for better error handling and improved user experience.
148 | * Expanded documentation within docstrings, providing additional clarity for users and developers.
149 |
150 | #### Enhancements
151 | * Updated tutorial notebook to reflect the new workflow, guiding users through the updated processes.
152 | * Implemented gene trend computation using [Mellon](https://github.com/settylab/Mellon), providing more robust and efficient gene trend analysis.
153 | * Enable annotation in `palantir.plot.highlight_cells_on_umap`.
154 |
155 | #### Changes
156 | * Replaced PhenoGraph dependency with `scanpy.tl.leiden` for gene trend clustering.
157 | * Deprecated the `run_tsne`, `determine_cell_clusters`, and `plot_cell_clusters` functions. Use corresponding implementations from [Scanpy](https://scanpy.readthedocs.io/en/stable/), widely used single-cell analysis library and direct dependency of Palantir.
158 | * Rename `palantir.plot.highlight_cells_on_tsne` to `palantir.plot.highlight_cells_on_umap`
159 | * Depend on `anndata>=0.8.0` to avoid issues writing dataframes in `ad.obsm`.
160 |
161 | #### Fixes
162 | * Addressed the issue of variability when reproducing results ([issue#64](https://github.com/dpeerlab/Palantir/issues/64)), enhancing the reproducibility and reliability of Palantir.
163 |
164 |
165 | ### Version 1.1.0
166 | * Replaced rpy2 with pyGAM for computing gene expression trends.
167 | * Updated tutorial and plotting functions
168 |
169 |
170 | ### Version 1.0.0
171 |
172 | * A fix to [issue#41](https://github.com/dpeerlab/Palantir/issues/41)
173 | * A fix to [issue#42](https://github.com/dpeerlab/Palantir/issues/42)
174 | * Revamped tutorial with support for Anndata and force directed layouts
175 |
176 | ### Version 0.2.6
177 |
178 | * A fix to [issue#33](https://github.com/dpeerlab/Palantir/issues/33) and [issue#31](https://github.com/dpeerlab/Palantir/issues/31)
179 |
180 | ### Version 0.2.5
181 |
182 | * A fix related to [issue#28](https://github.com/dpeerlab/Palantir/issues/28). When identifying terminal states, duplicate values were generated instead of unique ones.
183 |
--------------------------------------------------------------------------------
/data/marrow_sample_scseq_counts.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dpeerlab/Palantir/3be143443a601eaf163e9ff63e76f32330ac9cd2/data/marrow_sample_scseq_counts.csv.gz
--------------------------------------------------------------------------------
/data/marrow_sample_scseq_counts.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dpeerlab/Palantir/3be143443a601eaf163e9ff63e76f32330ac9cd2/data/marrow_sample_scseq_counts.h5ad
--------------------------------------------------------------------------------
/data/sample_tsne.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dpeerlab/Palantir/3be143443a601eaf163e9ff63e76f32330ac9cd2/data/sample_tsne.p
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinxcontrib-autoprogram
2 | sphinxcontrib-napoleon
3 | sphinx-autodocgen
4 | sphinx-github-style>=1.2.2
5 | sphinx-mdinclude
6 | m2r2
7 | nbsphinx
8 | furo
9 | typing-extensions
10 | IPython
11 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | from pathlib import Path
16 |
17 | sys.path.insert(0, os.path.abspath("../../src"))
18 |
19 | this_directory = Path(__file__).parent
20 |
21 |
22 | # get version and other attributes
23 | version_info = {}
24 | with open("../../src/palantir/version.py") as f:
25 | exec(f.read(), version_info)
26 |
27 |
28 | # -- Project information -----------------------------------------------------
29 |
30 | project = "Palantir"
31 | copyright = "2024, " + version_info['__author__']
32 | author = version_info['__author__']
33 |
34 | # The full version, including alpha/beta/rc tags
35 | release = version_info['__version__']
36 |
37 |
38 | # -- General configuration ---------------------------------------------------
39 |
40 | # Add any Sphinx extension module names here, as strings. They can be
41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
42 | # ones.
43 | extensions = [
44 | "sphinx.ext.autodoc",
45 | "nbsphinx",
46 | "sphinx.ext.napoleon",
47 | "sphinx_mdinclude",
48 | ]
49 | if os.environ.get('READTHEDOCS') == 'True':
50 | extensions.append("sphinx_github_style")
51 | extensions.append("sphinx.ext.linkcode")
52 |
53 | # GitHub repo information
54 | html_context = {
55 | "github_user": "dpeerlab",
56 | "github_repo": "Palantir",
57 | "github_version": "master",
58 | }
59 |
60 | # Set linkcode_url for sphinx-github-style
61 | linkcode_url = "https://github.com/dpeerlab/Palantir/blob/master/{filepath}#L{linestart}-L{linestop}"
62 |
63 | source_suffix = [".rst", ".md"]
64 |
65 | # Add any paths that contain templates here, relative to this directory.
66 | templates_path = ["_templates"]
67 |
68 | # List of patterns, relative to source directory, that match files and
69 | # directories to ignore when looking for source files.
70 | # This pattern also affects html_static_path and html_extra_path.
71 | exclude_patterns = ["_build", "**.ipynb_checkpoints"]
72 |
73 |
74 | # -- Options for HTML output -------------------------------------------------
75 |
76 | # The theme to use for HTML and HTML Help pages. See the documentation for
77 | # a list of builtin themes.
78 | #
79 | html_theme = "furo"
80 | pygments_style = "tango"
81 |
82 | html_theme_options = {
83 | "footer_icons": [
84 | {
85 | "name": "GitHub",
86 | "url": "https://github.com/dpeerlab/Palantir",
87 | "html": """
88 |
91 | """,
92 | "class": "",
93 | },
94 | ],
95 | }
96 |
97 | highlight_language = "none"
98 |
99 | # Add any paths that contain custom static files (such as style sheets) here,
100 | # relative to this directory. They are copied after the builtin static files,
101 | # so a file named "default.css" will overwrite the builtin "default.css".
102 | html_static_path = []
103 |
--------------------------------------------------------------------------------
/docs/source/core.rst:
--------------------------------------------------------------------------------
1 | Core
2 | ====
3 |
4 | .. automodule:: palantir.core
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. mellon documentation master file, created by
2 | sphinx-quickstart on Fri Sep 16 22:07:21 2022.
3 |
4 | .. toctree::
5 | :hidden:
6 | :caption: Tutorials:
7 |
8 | notebooks/Palantir_sample_notebook.ipynb
9 | notebooks/manuscript_data.ipynb
10 |
11 | .. toctree::
12 | :hidden:
13 | :maxdepth: 2
14 | :caption: Modules:
15 |
16 | Preprocessing
17 | Utilities
18 | Core
19 | Postprocessing
20 | Plotting
21 |
22 |
23 | .. toctree::
24 | :hidden:
25 | :caption: Comparisons:
26 |
27 | notebooks/comparisons/dpt.ipynb
28 | notebooks/comparisons/fateid.ipynb
29 | notebooks/comparisons/monocle2.ipynb
30 | notebooks/comparisons/paga.ipynb
31 | notebooks/comparisons/slignshot.ipynb
32 |
33 | |
34 |
35 | .. mdinclude:: ../../README.md
36 |
37 | .. toctree::
38 | :hidden:
39 | :caption: Links:
40 |
41 | Github Repo
42 |
43 |
44 | Index
45 | =====
46 |
47 | * :ref:`genindex`
48 |
--------------------------------------------------------------------------------
/docs/source/notebooks:
--------------------------------------------------------------------------------
1 | ../../notebooks
--------------------------------------------------------------------------------
/docs/source/plot.rst:
--------------------------------------------------------------------------------
1 | Plotting
2 | ========
3 |
4 | .. automodule:: palantir.plot
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/source/preprocess.rst:
--------------------------------------------------------------------------------
1 | Preprocessing
2 | =============
3 |
4 | .. automodule:: palantir.preprocess
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/source/presults.rst:
--------------------------------------------------------------------------------
1 | Postprocessing
2 | ==============
3 |
4 | .. automodule:: palantir.presults
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/source/utils.rst:
--------------------------------------------------------------------------------
1 | Utilities
2 | =========
3 |
4 | .. automodule:: palantir.utils
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/notebooks/comparisons/results/fateid/DC_order.csv:
--------------------------------------------------------------------------------
1 | "","t4"
2 | "1","Run4_126886713386862"
3 | "2","Run4_130057606915939"
4 | "3","Run4_133829902874925"
5 | "4","Run4_157613970647844"
6 | "5","Run4_196163864475485"
7 | "6","Run4_204403356948907"
8 | "7","Run4_231280295401270"
9 | "8","Run4_231280310074741"
10 | "9","Run4_235678490908891"
11 | "10","Run4_235697683417844"
12 | "11","Run4_240001494408486"
13 | "12","Run5_126886713416990"
14 | "13","Run5_134531774106534"
15 | "14","Run5_157613955956011"
16 | "15","Run5_160440071702940"
17 | "16","Run5_169022462749603"
18 | "17","Run5_197213599750044"
19 | "18","Run5_205510773992812"
20 | "19","Run5_227516844984628"
21 | "20","Run5_231692907800427"
22 | "21","Run5_232327507728678"
23 | "22","Run5_235122054879964"
24 | "23","Run4_230732701756837"
25 | "24","Run5_160996525231478"
26 | "25","Run5_165860320406308"
27 | "26","Run5_170137140189547"
28 | "27","Run5_166290863569707"
29 | "28","Run4_135150498626486"
30 | "29","Run5_200967535908581"
31 | "30","Run4_199962360724702"
32 | "31","Run5_239477254471070"
33 | "32","Run5_232318630480750"
34 | "33","Run4_129565698677470"
35 | "34","Run4_129663530646894"
36 | "35","Run4_239596318186406"
37 | "36","Run5_227844214340460"
38 | "37","Run4_226958752541091"
39 | "38","Run5_131097901611291"
40 | "39","Run5_160910657575723"
41 | "40","Run4_161889641978219"
42 | "41","Run5_157536929077979"
43 | "42","Run4_226205924964702"
44 | "43","Run4_166264200448795"
45 | "44","Run5_191080758196467"
46 | "45","Run5_239458061268844"
47 | "46","Run5_234992235597163"
48 | "47","Run5_201595540363172"
49 | "48","Run5_169174308408606"
50 | "49","Run4_134377538219363"
51 | "50","Run5_227516876250396"
52 | "51","Run5_239448263378870"
53 | "52","Run5_227981806484195"
54 | "53","Run4_235214351259421"
55 | "54","Run4_200570519276955"
56 | "55","Run4_235122036431276"
57 | "56","Run4_130754452543710"
58 | "57","Run4_235548433927395"
59 | "58","Run4_126364087900021"
60 | "59","Run4_134530815449317"
61 | "60","Run4_226901469313437"
62 | "61","Run5_226901468854118"
63 | "62","Run5_126836668226926"
64 | "63","Run5_130675129636790"
65 | "64","Run4_161960508214004"
66 | "65","Run5_122282776848630"
67 | "66","Run5_160440071605166"
68 | "67","Run4_126765621271260"
69 | "68","Run4_236768101816741"
70 | "69","Run4_126680976217949"
71 | "70","Run5_122282776874270"
72 | "71","Run4_227903404136812"
73 | "72","Run5_169794765737372"
74 | "73","Run5_230816707177379"
75 | "74","Run5_227284928916381"
76 | "75","Run4_157613970578164"
77 | "76","Run5_170181044099822"
78 | "77","Run5_231711851042724"
79 | "78","Run4_236082114522339"
80 | "79","Run5_197060157730102"
81 | "80","Run4_131234312154540"
82 | "81","Run4_231291315932590"
83 | "82","Run4_155971413489590"
84 | "83","Run4_133974617123676"
85 | "84","Run5_134539660647150"
86 | "85","Run4_205381641819869"
87 | "86","Run4_235533386607836"
88 | "87","Run4_130134915902195"
89 | "88","Run5_226883187948253"
90 | "89","Run5_226874778041270"
91 | "90","Run4_122428824599908"
92 | "91","Run5_226953398700339"
93 | "92","Run5_129497382209436"
94 | "93","Run5_197206037674717"
95 | "94","Run4_235199318669100"
96 | "95","Run4_232450553724148"
97 | "96","Run5_205527671486381"
98 | "97","Run4_121202296712412"
99 | "98","Run5_230800378678062"
100 | "99","Run4_126836669073780"
101 | "100","Run5_161883198962589"
102 | "101","Run4_227357125986725"
103 | "102","Run4_160990095927006"
104 | "103","Run4_200967489997038"
105 | "104","Run5_131097901558494"
106 | "105","Run5_227921645386163"
107 | "106","Run5_201114485475756"
108 | "107","Run5_195967073605980"
109 | "108","Run4_204825356323118"
110 | "109","Run5_133854492289318"
111 | "110","Run4_134592152354101"
112 | "111","Run5_231822829995765"
113 | "112","Run4_200974094001899"
114 | "113","Run5_204765475519403"
115 | "114","Run5_236846721915189"
116 | "115","Run4_170265211464501"
117 | "116","Run4_192255443986782"
118 | "117","Run4_129582985497013"
119 | "118","Run5_155971444992926"
120 | "119","Run4_160447829068124"
121 | "120","Run5_157004353418995"
122 | "121","Run5_232321033853659"
123 | "122","Run4_205510774745395"
124 | "123","Run5_227284901345125"
125 | "124","Run4_121896114436398"
126 | "125","Run4_236634018002741"
127 | "126","Run4_130126447786731"
128 | "127","Run5_240094641601390"
129 | "128","Run4_204825356024622"
130 | "129","Run4_134522332629341"
131 | "130","Run4_200570520164645"
132 | "131","Run4_192747070778206"
133 | "132","Run4_236176853810421"
134 | "133","Run4_195546194598117"
135 | "134","Run5_129582985832693"
136 | "135","Run4_126835192150451"
137 | "136","Run4_231890207586150"
138 | "137","Run4_162078857030885"
139 | "138","Run4_126835160956259"
140 | "139","Run5_195606445677876"
141 | "140","Run5_130142286011806"
142 | "141","Run5_121826167613876"
143 | "142","Run4_133895267445470"
144 | "143","Run4_170137155393252"
145 | "144","Run5_226283262430126"
146 | "145","Run5_126885746359029"
147 | "146","Run5_192653655202716"
148 | "147","Run5_121276833385781"
149 | "148","Run5_195562319787948"
150 | "149","Run4_239382662691758"
151 | "150","Run4_205527685446061"
152 | "151","Run5_227516844763356"
153 | "152","Run4_134592171981109"
154 | "153","Run4_236639608782067"
155 | "154","Run4_130736332883894"
156 | "155","Run5_196180748688101"
157 | "156","Run4_160928731187629"
158 | "157","Run4_226394650633955"
159 | "158","Run4_232330326660853"
160 | "159","Run5_227305168426859"
161 | "160","Run4_161889623076126"
162 | "161","Run4_235763078969779"
163 | "162","Run5_157536960305052"
164 | "163","Run4_160928758131548"
165 | "164","Run5_170181030168308"
166 | "165","Run4_131217401166134"
167 | "166","Run5_169793557481390"
168 | "167","Run4_192180135835932"
169 | "168","Run4_165741375405493"
170 | "169","Run5_130761014307164"
171 | "170","Run4_160903140923115"
172 | "171","Run5_205381641881900"
173 | "172","Run4_227844214442804"
174 | "173","Run5_232327489150307"
175 | "174","Run4_227363580664092"
176 | "175","Run5_161890937269108"
177 | "176","Run4_200563808394158"
178 | "177","Run5_226829528151854"
179 | "178","Run4_135571512376109"
180 | "179","Run5_120703436077806"
181 | "180","Run4_162090668374451"
182 | "181","Run4_200570500410678"
183 | "182","Run5_170798612175131"
184 | "183","Run4_134447976003501"
185 | "184","Run4_134448002525940"
186 | "185","Run4_134376349616557"
187 | "186","Run4_130144733412140"
188 | "187","Run5_227853877790557"
189 | "188","Run4_126227588791004"
190 | "189","Run4_164818762807660"
191 | "190","Run5_227305214368564"
192 | "191","Run5_160903141124900"
193 | "192","Run5_196527701059949"
194 | "193","Run4_241106402073885"
195 | "194","Run4_199962347330923"
196 | "195","Run5_204762120841651"
197 | "196","Run5_200563808655086"
198 | "197","Run5_204954185882334"
199 | "198","Run5_201523599693555"
200 | "199","Run5_232318649121630"
201 | "200","Run4_236785415612150"
202 | "201","Run4_239458073925534"
203 | "202","Run5_164631977839027"
204 | "203","Run5_131097927993718"
205 | "204","Run5_226265813962150"
206 | "205","Run5_131217382140251"
207 | "206","Run5_204959708010213"
208 | "207","Run5_160440058596140"
209 | "208","Run4_129565671210277"
210 | "209","Run4_200441249359709"
211 | "210","Run4_195562319506654"
212 | "211","Run4_191210578165158"
213 | "212","Run4_191774111938413"
214 | "213","Run5_169794778294622"
215 | "214","Run4_236768055152038"
216 | "215","Run5_161960476956974"
217 | "216","Run4_170199731882726"
218 | "217","Run4_239468516337884"
219 | "218","Run5_201114484915573"
220 | "219","Run5_200922572711788"
221 | "220","Run4_240634645371700"
222 | "221","Run4_228042023823214"
223 | "222","Run5_169014052698013"
224 | "223","Run5_205510805765038"
225 | "224","Run4_170180373273324"
226 | "225","Run5_236082068118877"
227 | "226","Run4_231900127545652"
228 | "227","Run5_196715518278579"
229 | "228","Run5_240490718546668"
230 | "229","Run4_200983461354781"
231 | "230","Run4_131310516590814"
232 | "231","Run4_162001560395102"
233 | "232","Run5_192678095637236"
234 | "233","Run4_230741457394027"
235 | "234","Run5_232302646085862"
236 | "235","Run5_227357125462838"
237 | "236","Run5_129497369332638"
238 | "237","Run4_195625772766949"
239 | "238","Run4_121202324139420"
240 | "239","Run4_169768874531107"
241 | "240","Run4_125042414278451"
242 | "241","Run5_125197018937182"
243 | "242","Run5_126706342876973"
244 | "243","Run5_120786805020899"
245 | "244","Run4_200899053082478"
246 | "245","Run4_232301316466550"
247 | "246","Run4_131242078170357"
248 | "247","Run5_192745876088540"
249 | "248","Run5_126706311458101"
250 | "249","Run4_121812496309539"
251 | "250","Run5_196180763102627"
252 | "251","Run4_169174309156595"
253 | "252","Run5_228042069723892"
254 | "253","Run5_226283261651870"
255 | "254","Run4_239596291410229"
256 | "255","Run4_161548562741995"
257 | "256","Run4_129565671779052"
258 | "257","Run5_227853850536860"
259 | "258","Run4_236650124458796"
260 | "259","Run4_195615853792491"
261 | "260","Run5_134522331948828"
262 | "261","Run4_165325596974387"
263 | "262","Run5_227991469641059"
264 | "263","Run4_134448002779892"
265 | "264","Run5_239407492945651"
266 | "265","Run5_126681615386990"
267 | "266","Run4_230809071209270"
268 | "267","Run5_121896094784430"
269 | "268","Run4_239468515839403"
270 | "269","Run4_197060142685020"
271 | "270","Run5_125719958739371"
272 | "271","Run4_241098858289510"
273 | "272","Run5_134377557416179"
274 | "273","Run4_125719977541037"
275 | "274","Run4_226949759945627"
276 | "275","Run4_227844233247005"
277 | "276","Run4_125719977548197"
278 | "277","Run5_230741456907635"
279 | "278","Run5_164753426012523"
280 | "279","Run4_126776271099742"
281 | "280","Run5_133974589627685"
282 | "281","Run4_196043578137333"
283 | "282","Run4_232330307783476"
284 | "283","Run5_126836668483956"
285 | "284","Run5_235199332793123"
286 | "285","Run5_240617733974324"
287 | "286","Run4_161893246425373"
288 | "287","Run5_200983476648171"
289 | "288","Run4_133895294208868"
290 | "289","Run5_160990068434789"
291 | "290","Run4_161451911564523"
292 | "291","Run4_240490745285349"
293 | "292","Run5_125042414704491"
294 | "293","Run5_236639654950813"
295 | "294","Run4_160447874844573"
296 | "295","Run4_231291328088997"
297 | "296","Run4_197256786963700"
298 | "297","Run5_130203891448094"
299 | "298","Run4_195622417386716"
300 | "299","Run4_130185503820724"
301 | "300","Run5_120726897153838"
302 | "301","Run4_170344372688676"
303 | "302","Run4_231365403663132"
304 | "303","Run5_126765621164972"
305 | "304","Run4_231891403167645"
306 | "305","Run5_226970322292126"
307 | "306","Run5_131293471160755"
308 | "307","Run4_231897711163678"
309 | "308","Run5_231291328542958"
310 | "309","Run5_160440058755507"
311 | "310","Run4_196527729007907"
312 | "311","Run4_165385699191715"
313 | "312","Run4_235199345678774"
314 | "313","Run5_165741390641899"
315 | "314","Run4_134376302950822"
316 | "315","Run5_227982982933812"
317 | "316","Run4_197213568841971"
318 | "317","Run5_195625785604396"
319 | "318","Run4_166332095651107"
320 | "319","Run4_170198344091502"
321 | "320","Run4_235763078646708"
322 | "321","Run4_157562564695862"
323 | "322","Run4_201111666641630"
324 | "323","Run4_201111685483379"
325 | "324","Run4_231711851559731"
326 | "325","Run5_199934564264886"
327 | "326","Run5_240498368433515"
328 | "327","Run4_241057654323483"
329 | "328","Run4_200348223530852"
330 | "329","Run5_120797898435828"
331 | "330","Run4_162090699901166"
332 | "331","Run4_166400431147246"
333 | "332","Run5_227903423569126"
334 | "333","Run5_232261741046116"
335 | "334","Run5_231760541113126"
336 | "335","Run4_161883180383004"
337 | "336","Run5_226958767155629"
338 | "337","Run5_192745863559517"
339 | "338","Run5_191760467873139"
340 | "339","Run4_240482289609438"
341 | "340","Run4_170877933992798"
342 | "341","Run5_164753445435613"
343 | "342","Run5_231291328292659"
344 | "343","Run5_191774126987494"
345 | "344","Run4_131242051128684"
346 | "345","Run5_197135453862187"
347 | "346","Run5_170265184692014"
348 | "347","Run4_196174352079662"
349 | "348","Run5_191691448281523"
350 | "349","Run4_122310412323676"
351 | "350","Run4_121965753884006"
352 | "351","Run5_122293352064750"
353 | "352","Run5_134531742550430"
354 | "353","Run4_230732701551332"
355 | "354","Run4_191080758131627"
356 | "355","Run4_192101737523115"
357 | "356","Run5_161883167537899"
358 | "357","Run4_134033659874678"
359 | "358","Run5_157486628656875"
360 | "359","Run5_239408816215900"
361 | "360","Run5_227844201564581"
362 | "361","Run4_191647322069734"
363 | "362","Run5_204756752198491"
364 | "363","Run4_236167209045339"
365 | "364","Run4_130194240822060"
366 | "365","Run4_191647291132852"
367 | "366","Run5_130144732663078"
368 | "367","Run5_201670683478364"
369 | "368","Run4_120864497952619"
370 | "369","Run5_169174309227379"
371 | "370","Run5_170276727507299"
372 | "371","Run5_232327508278067"
373 | "372","Run5_126776251632476"
374 | "373","Run4_241106420979955"
375 | "374","Run4_122308384450908"
376 | "375","Run4_226970310434140"
377 | "376","Run4_126886700825827"
378 | "377","Run4_240490732735862"
379 | "378","Run4_235198559214838"
380 | "379","Run4_166400399718763"
381 | "380","Run5_166264181209964"
382 | "381","Run4_165302242536245"
383 | "382","Run5_227364251261236"
384 | "383","Run5_195546212919582"
385 | "384","Run4_241098904779638"
386 | "385","Run4_126364088155371"
387 | "386","Run4_130693100129694"
388 | "387","Run5_162078842833638"
389 | "388","Run4_200363958975844"
390 | "389","Run5_236177497470814"
391 | "390","Run4_126886700599133"
392 | "391","Run4_164631946934515"
393 | "392","Run4_134539674041203"
394 | "393","Run4_235695789762414"
395 | "394","Run4_170137155200884"
396 | "395","Run5_235533413242717"
397 | "396","Run4_231770055399670"
398 | "397","Run4_170745979922142"
399 | "398","Run5_230749746972902"
400 | "399","Run4_231754903800667"
401 | "400","Run5_236177512348958"
402 | "401","Run5_235737666242989"
403 | "402","Run5_129663543397093"
404 | "403","Run4_126706343636326"
405 | "404","Run5_195958617790259"
406 | "405","Run5_226205940444581"
407 | "406","Run4_200562850811166"
408 | "407","Run5_161462676024222"
409 | "408","Run5_166400412274019"
410 | "409","Run4_231279536236910"
411 | "410","Run5_169795603024612"
412 | "411","Run4_195967101618597"
413 | "412","Run5_239596318419685"
414 | "413","Run4_204224148454835"
415 | "414","Run5_205527685680502"
416 | "415","Run4_201474310264686"
417 | "416","Run4_196163864287654"
418 | "417","Run5_197188214515957"
419 | "418","Run5_230807894976284"
420 | "419","Run4_226265786076572"
421 | "420","Run4_156463307344229"
422 | "421","Run5_197213568489189"
423 | "422","Run5_129978437586219"
424 | "423","Run5_162078842349987"
425 | "424","Run5_196019714186036"
426 | "425","Run4_232449749174174"
427 | "426","Run4_157477617129885"
428 | "427","Run5_231280341306278"
429 | "428","Run5_240634599983342"
430 | "429","Run4_134539692072302"
431 | "430","Run5_196019687483237"
432 | "431","Run5_226283234735902"
433 | "432","Run5_170276754217310"
434 | "433","Run5_133829949614302"
435 | "434","Run5_169768874358630"
436 | "435","Run5_164761077148979"
437 | "436","Run4_195562273655733"
438 | "437","Run5_192255417076019"
439 | "438","Run4_169794751290212"
440 | "439","Run5_235007433391339"
441 | "440","Run5_227991450671990"
442 | "441","Run4_125197033818014"
443 | "442","Run5_166290909227805"
444 | "443","Run4_231270765686061"
445 | "444","Run4_191026787674525"
446 | "445","Run5_196715471816621"
447 | "446","Run4_227510415157549"
448 | "447","Run5_227973438224811"
449 | "448","Run4_235737666173734"
450 | "449","Run5_121879183911859"
451 | "450","Run5_131309590173998"
452 | "451","Run4_169022462769963"
453 | "452","Run5_166416939870565"
454 | "453","Run5_165164925765363"
455 | "454","Run5_235763060394844"
456 | "455","Run5_155971426576092"
457 | "456","Run5_227516829882603"
458 | "457","Run5_156933517855093"
459 | "458","Run4_191026787670262"
460 | "459","Run5_121319764642219"
461 | "460","Run5_164698906278253"
462 | "461","Run5_160785117669293"
463 | "462","Run5_121276786920878"
464 | "463","Run5_155911403748787"
465 | "464","Run5_125239700146011"
466 | "465","Run5_236221700958053"
467 | "466","Run5_204825337420060"
468 | "467","Run4_129497382115619"
469 | "468","Run4_125728252684085"
470 | "469","Run4_195426088052590"
471 | "470","Run4_130142298336179"
472 | "471","Run5_164753399208804"
473 | "472","Run5_191708896914742"
474 | "473","Run5_192121092658916"
475 | "474","Run5_157690352593243"
476 | "475","Run4_236768070092718"
477 | "476","Run5_239408835385565"
478 | "477","Run5_204358278368691"
479 | "478","Run4_240136813497564"
480 | "479","Run5_130529086105910"
481 | "480","Run5_160785117669683"
482 | "481","Run4_170276741868852"
483 | "482","Run5_227921645431155"
484 | "483","Run5_235070752148316"
485 | "484","Run4_200922526825900"
486 | "485","Run5_161893265356581"
487 | "486","Run5_235601986762677"
488 | "487","Run4_235198559475508"
489 | "488","Run5_130529086859099"
490 | "489","Run4_199962347296556"
491 | "490","Run4_235129955600805"
492 | "491","Run5_239932687441126"
493 | "492","Run5_130194213035254"
494 | "493","Run4_197188213762932"
495 | "494","Run4_130142297803558"
496 | "495","Run5_239587881241958"
497 | "496","Run4_236221669165997"
498 | "497","Run5_134447989975788"
499 | "498","Run5_231340604583221"
500 | "499","Run4_130529086818542"
501 | "500","Run4_129457538058668"
502 | "501","Run5_227354856024420"
503 | "502","Run4_164631978096541"
504 | "503","Run4_166416894024044"
505 | "504","Run5_227930535311277"
506 | "505","Run5_199944394402717"
507 | "506","Run5_162001548134131"
508 | "507","Run4_130529132723550"
509 | "508","Run4_131309562153836"
510 | "509","Run4_121202311609131"
511 | "510","Run5_121319736597797"
512 | "511","Run5_235188984072027"
513 | "512","Run5_129457491692468"
514 | "513","Run5_191553907124134"
515 | "514","Run4_192325895502708"
516 | "515","Run4_170198390135595"
517 | "516","Run5_240482289375084"
518 | "517","Run5_161890952759717"
519 | "518","Run5_204825324018989"
520 | "519","Run5_226343142447326"
521 | "520","Run4_235198527781284"
522 | "521","Run5_126765636139947"
523 | "522","Run5_227357156624308"
524 | "523","Run4_122282776423132"
525 | "524","Run4_129457519708470"
526 | "525","Run4_122308430620398"
527 | "526","Run4_235070752934772"
528 | "527","Run4_199934565079284"
529 | "528","Run5_230809083768053"
530 | "529","Run5_129457519184181"
531 | "530","Run4_166402846776101"
532 | "531","Run5_200416674364835"
533 | "532","Run5_239382643984237"
534 | "533","Run5_134447990234011"
535 | "534","Run5_235533432441715"
536 | "535","Run5_226829547517806"
537 | "536","Run4_164698921291108"
538 | "537","Run5_135064452549364"
539 | "538","Run5_235070752209780"
540 | "539","Run4_134936689588014"
541 | "540","Run4_126227569621427"
542 | "541","Run4_195958632734107"
543 | "542","Run4_231692907690797"
544 | "543","Run5_157004353465123"
545 | "544","Run5_236846734231900"
546 | "545","Run4_126834220845997"
547 | "546","Run4_232439042431772"
548 | "547","Run5_226901441268660"
549 | "548","Run4_122436428412653"
550 | "549","Run5_227982982691117"
551 | "550","Run4_134936676650349"
552 | "551","Run5_191226652777307"
553 | "552","Run5_227990499191587"
554 | "553","Run5_122282776614758"
555 | "554","Run5_205510806027174"
556 | "555","Run5_170798580160300"
557 | "556","Run5_164698933651885"
558 | "557","Run4_126218845501230"
559 | "558","Run5_121812477929373"
560 | "559","Run4_241114562055595"
561 | "560","Run5_227354875717342"
562 | "561","Run5_166416920763701"
563 | "562","Run4_169174277412580"
564 | "563","Run4_230652439090988"
565 | "564","Run4_230799438727924"
566 | "565","Run5_129457506937244"
567 | "566","Run5_227903404206819"
568 | "567","Run5_121319736923940"
569 | "568","Run4_196180748720476"
570 | "569","Run4_231291315473310"
571 | "570","Run4_197264047290742"
572 | "571","Run5_169014006758196"
573 | "572","Run5_200974093211372"
574 | "573","Run5_120703409670492"
575 | "574","Run4_161451911605148"
576 | "575","Run5_169014033836460"
577 | "576","Run4_161883153348404"
578 | "577","Run4_192180120643819"
579 | "578","Run4_241057668974364"
580 | "579","Run5_235763032582965"
581 | "580","Run5_166400384687350"
582 | "581","Run5_126886699980715"
583 | "582","Run4_164761063865758"
584 | "583","Run5_205527670966709"
585 | "584","Run4_130184146991460"
586 | "585","Run5_133829929907949"
587 | "586","Run5_196043592617316"
588 | "587","Run5_240001541139755"
589 | "588","Run4_239468561943923"
590 | "589","Run5_204224163363755"
591 | "590","Run5_130142297810790"
592 | "591","Run5_135081885584621"
593 | "592","Run4_200983461189340"
594 |
--------------------------------------------------------------------------------
/notebooks/comparisons/results/fateid/Ery_order.csv:
--------------------------------------------------------------------------------
1 | "","t2"
2 | "1","Run4_130142298659099"
3 | "2","Run4_155911430977836"
4 | "3","Run4_236650105264556"
5 | "4","Run5_121276786923948"
6 | "5","Run5_121319737121187"
7 | "6","Run5_121742671894326"
8 | "7","Run5_121896083017067"
9 | "8","Run5_156515920426790"
10 | "9","Run5_165741421874550"
11 | "10","Run5_166351410154397"
12 | "11","Run5_169723777145627"
13 | "12","Run5_197256771755294"
14 | "13","Run5_205870209919347"
15 | "14","Run5_226806174202228"
16 | "15","Run5_226953384323893"
17 | "16","Run5_239596291184941"
18 | "17","Run4_204359620552548"
19 | "18","Run4_170276741343972"
20 | "19","Run4_235007414484187"
21 | "20","Run5_226205971995877"
22 | "21","Run5_131175256832285"
23 | "22","Run5_191553860978972"
24 | "23","Run4_227973438560036"
25 | "24","Run5_204756751867293"
26 | "25","Run4_126218818710238"
27 | "26","Run4_121742671661982"
28 | "27","Run5_160990068329317"
29 | "28","Run4_201670702021045"
30 | "29","Run4_130736333471003"
31 | "30","Run4_170268049172398"
32 | "31","Run5_201114458372918"
33 | "32","Run4_165164911094003"
34 | "33","Run5_236169981277996"
35 | "34","Run5_230809103154419"
36 | "35","Run4_170258488281828"
37 | "36","Run5_169768846837492"
38 | "37","Run5_130531382381404"
39 | "38","Run5_200562869747052"
40 | "39","Run5_121896095594741"
41 | "40","Run4_161462648487205"
42 | "41","Run4_130057607240948"
43 | "42","Run4_192121064778654"
44 | "43","Run5_135081932307366"
45 | "44","Run4_166351422549219"
46 | "45","Run5_164753445702515"
47 | "46","Run4_228042051639012"
48 | "47","Run5_204403356874997"
49 | "48","Run4_235129954749341"
50 | "49","Run4_239407461947620"
51 | "50","Run5_192678083644829"
52 | "51","Run5_192747102420395"
53 | "52","Run4_191645443803486"
54 | "53","Run4_120797945018284"
55 | "54","Run5_240498414937837"
56 | "55","Run5_161462663689971"
57 | "56","Run4_157140481390493"
58 | "57","Run5_226953398269812"
59 | "58","Run4_232327508286387"
60 | "59","Run4_239448275970990"
61 | "60","Run5_130142316719326"
62 | "61","Run4_204763059935523"
63 | "62","Run4_235559336233702"
64 | "63","Run4_192859711334757"
65 | "64","Run4_239448248936886"
66 | "65","Run5_130185489499868"
67 | "66","Run4_200562869397916"
68 | "67","Run4_240154798308595"
69 | "68","Run4_170265230849779"
70 | "69","Run5_231692926573411"
71 | "70","Run4_232311355557748"
72 | "71","Run5_129978418448620"
73 | "72","Run5_197135439182046"
74 | "73","Run4_130142298127654"
75 | "74","Run4_196180748389739"
76 | "75","Run4_197185798555446"
77 | "76","Run5_195562319858547"
78 | "77","Run4_235069826000611"
79 | "78","Run5_236650124490590"
80 | "79","Run4_235199346129845"
81 | "80","Run5_235678522329899"
82 | "81","Run5_236167208548133"
83 | "82","Run5_230732686616430"
84 | "83","Run4_121812465170790"
85 | "84","Run4_170327475927477"
86 | "85","Run4_155972634078110"
87 | "86","Run4_241098904988574"
88 | "87","Run5_227853877819827"
89 | "88","Run5_121955407219108"
90 | "89","Run5_236177511832499"
91 | "90","Run4_195958633326307"
92 | "91","Run4_226265800857309"
93 | "92","Run5_227973438495990"
94 | "93","Run4_201670656740573"
95 | "94","Run4_204765463174555"
96 | "95","Run4_235626713532342"
97 | "96","Run5_204959708006827"
98 | "97","Run4_165385745418163"
99 | "98","Run5_121319783321011"
100 | "99","Run4_192325908097756"
101 | "100","Run5_227846649498395"
102 | "101","Run5_231711851628380"
103 | "102","Run5_239407446480605"
104 | "103","Run5_165302261693363"
105 | "104","Run5_239448248933108"
106 | "105","Run5_130144701369189"
107 | "106","Run5_226949787674870"
108 | "107","Run4_236633999137206"
109 | "108","Run4_160928730367731"
110 | "109","Run5_239587881762227"
111 | "110","Run5_241114589357942"
112 | "111","Run5_169156963424675"
113 | "112","Run5_240136800360374"
114 | "113","Run5_235601986776285"
115 | "114","Run4_165945548589915"
116 | "115","Run4_226283261941494"
117 | "116","Run4_240490764462491"
118 | "117","Run5_156453536717555"
119 | "118","Run5_195547402488691"
120 | "119","Run5_120703455284661"
121 | "120","Run4_227844187442397"
122 | "121","Run5_226394662987636"
123 | "122","Run5_170180358387485"
124 | "123","Run4_227975317540203"
125 | "124","Run5_227516857597294"
126 | "125","Run4_230807875762028"
127 | "126","Run4_164698921000299"
128 | "127","Run4_235533401209062"
129 | "128","Run5_239587850311396"
130 | "129","Run4_130754465351916"
131 | "130","Run5_192653655693100"
132 | "131","Run5_135081913084340"
133 | "132","Run5_122293380103390"
134 | "133","Run5_125719977089261"
135 | "134","Run4_126776224631582"
136 | "135","Run5_165188413545701"
137 | "136","Run5_230749746977195"
138 | "137","Run4_227363580922669"
139 | "138","Run5_226949759723830"
140 | "139","Run5_126707788369181"
141 | "140","Run5_231340558632877"
142 | "141","Run5_230741456637214"
143 | "142","Run5_197264047237411"
144 | "143","Run4_236650123947235"
145 | "144","Run4_160928758380782"
146 | "145","Run5_231917070280429"
147 | "146","Run4_235199318879141"
148 | "147","Run5_205922701096750"
149 | "148","Run4_166416908205293"
150 | "149","Run5_226394635692277"
151 | "150","Run5_230654437378996"
152 | "151","Run5_205381688060779"
153 | "152","Run5_196113282821429"
154 | "153","Run4_157072011967413"
155 | "154","Run5_157684166474486"
156 | "155","Run4_166400431123379"
157 | "156","Run5_120726924977910"
158 | "157","Run4_155972621262251"
159 | "158","Run4_161478635801460"
160 | "159","Run5_192745875824029"
161 | "160","Run5_239408816277428"
162 | "161","Run4_204959689431340"
163 | "162","Run5_169793557511413"
164 | "163","Run4_235069813181678"
165 | "164","Run4_130144686946166"
166 | "165","Run5_192745848129885"
167 | "166","Run5_201465586202923"
168 | "167","Run5_131310502198700"
169 | "168","Run5_162078870100196"
170 | "169","Run4_201474323212139"
171 | "170","Run4_235000971513075"
172 | "171","Run4_196019733085555"
173 | "172","Run5_227991451220918"
174 | "173","Run5_239458093546284"
175 | "174","Run5_235697715238645"
176 | "175","Run5_195623388559670"
177 | "176","Run5_170327461710572"
178 | "177","Run4_165325568949149"
179 | "178","Run5_195967089073564"
180 | "179","Run5_160996511074155"
181 | "180","Run5_131089728490724"
182 | "181","Run5_235214378003372"
183 | "182","Run4_126707819506413"
184 | "183","Run5_126886700497331"
185 | "184","Run4_134531742587125"
186 | "185","Run4_170198358251748"
187 | "186","Run4_231890207533877"
188 | "187","Run5_191647304015075"
189 | "188","Run5_232328281942245"
190 | "189","Run4_226901487896435"
191 | "190","Run4_125239714305827"
192 | "191","Run4_169723777735083"
193 | "192","Run4_156515947727276"
194 | "193","Run5_205870194965276"
195 | "194","Run4_197213581699892"
196 | "195","Run5_241114589051630"
197 | "196","Run4_131234312894774"
198 | "197","Run5_131097900824877"
199 | "198","Run5_130736364939547"
200 | "199","Run5_231891388447148"
201 | "200","Run4_239475387980214"
202 | "201","Run5_204765462948636"
203 | "202","Run5_165860319841012"
204 | "203","Run5_240617715883244"
205 | "204","Run4_162090668419428"
206 | "205","Run5_195426056628588"
207 | "206","Run5_226283280783795"
208 | "207","Run5_134531773294827"
209 | "208","Run4_204359601874859"
210 | "209","Run5_240634613909861"
211 | "210","Run5_196113282746270"
212 | "211","Run4_232302646515444"
213 | "212","Run5_191760421734701"
214 | "213","Run4_129457538329326"
215 | "214","Run5_131309590141149"
216 | "215","Run4_192180134984044"
217 | "216","Run4_135149155899102"
218 | "217","Run5_191708882168230"
219 | "218","Run5_230800378227622"
220 | "219","Run4_232330326096222"
221 | "220","Run5_135150485751668"
222 | "221","Run5_196527728802213"
223 | "222","Run4_121965741528803"
224 | "223","Run4_204360513969572"
225 | "224","Run4_230732732423086"
226 | "225","Run4_130624798276324"
227 | "226","Run5_161890983963060"
228 | "227","Run5_240482277252390"
229 | "228","Run4_239391521495341"
230 | "229","Run4_204763060394286"
231 | "230","Run5_197256787216805"
232 | "231","Run5_156515920468213"
233 | "232","Run4_165385744804644"
234 | "233","Run5_195967101593454"
235 | "234","Run5_232449748355932"
236 | "235","Run4_160928744992052"
237 | "236","Run4_230592321801117"
238 | "237","Run5_191553860713379"
239 | "238","Run4_227930535483109"
240 | "239","Run5_166288090655461"
241 | "240","Run4_200561495795932"
242 | "241","Run4_126707800401843"
243 | "242","Run4_205381656537828"
244 | "243","Run5_192101783386349"
245 | "244","Run4_232318649916766"
246 | "245","Run5_122308412197221"
247 | "246","Run4_134936708167980"
248 | "247","Run4_125042414733164"
249 | "248","Run4_235763079474548"
250 | "249","Run5_201595508942708"
251 | "250","Run5_130529100716782"
252 | "251","Run5_191708882659611"
253 | "252","Run4_170265199196588"
254 | "253","Run4_120797898946870"
255 | "254","Run4_131097928022955"
256 | "255","Run4_191210578200421"
257 | "256","Run4_126132159462771"
258 | "257","Run5_236768100990253"
259 | "258","Run4_226874732506019"
260 | "259","Run5_230800346954166"
261 | "260","Run4_131242050939755"
262 | "261","Run4_156463306828011"
263 | "262","Run5_227305195793196"
264 | "263","Run5_235695836354861"
265 | "264","Run5_161548575394725"
266 | "265","Run4_130142316947294"
267 | "266","Run5_192315277915955"
268 | "267","Run4_196163852160430"
269 | "268","Run4_164753426303725"
270 | "269","Run5_197264019737437"
271 | "270","Run4_236639623530278"
272 | "271","Run5_164769505164701"
273 | "272","Run5_231760540657515"
274 | "273","Run5_197213599947573"
275 | "274","Run4_235199332796637"
276 | "275","Run5_126836637060515"
277 | "276","Run5_239468543363316"
278 | "277","Run5_131309608286054"
279 | "278","Run4_192859665184630"
280 | "279","Run4_227991451036963"
281 | "280","Run4_165945548167606"
282 | "281","Run5_160447874328294"
283 | "282","Run4_164818809441715"
284 | "283","Run5_231770082461547"
285 | "284","Run5_169579734755611"
286 | "285","Run5_226970322222389"
287 | "286","Run4_240498414410590"
288 | "287","Run5_165945520642284"
289 | "288","Run4_135014255343900"
290 | "289","Run4_204213438037211"
291 | "290","Run4_232327462206683"
292 | "291","Run4_121965773241694"
293 | "292","Run5_121955406957492"
294 | "293","Run5_170327476157284"
295 | "294","Run5_239458093263795"
296 | "295","Run4_131234280914166"
297 | "296","Run5_204825309337963"
298 | "297","Run4_191760436250853"
299 | "298","Run5_170798580418413"
300 | "299","Run4_204213426011356"
301 | "300","Run5_130126448114988"
302 | "301","Run5_169158202808677"
303 | "302","Run4_204360540904307"
304 | "303","Run4_135081913600813"
305 | "304","Run4_205510759631086"
306 | "305","Run5_201595494283171"
307 | "306","Run5_192678095603557"
308 | "307","Run5_195958645050611"
309 | "308","Run5_239391474670813"
310 | "309","Run5_157684197702566"
311 | "310","Run4_195606458297180"
312 | "311","Run5_191647290907571"
313 | "312","Run4_133895281851236"
314 | "313","Run4_204427131571635"
315 | "314","Run4_191647276193582"
316 | "315","Run4_195426042109174"
317 | "316","Run5_126227569404333"
318 | "317","Run5_197060170008990"
319 | "318","Run4_192121092557547"
320 | "319","Run4_196527701277405"
321 | "320","Run5_195625773387165"
322 | "321","Run5_235737680669475"
323 | "322","Run4_126707773105894"
324 | "323","Run4_240001494992669"
325 | "324","Run5_235678490913572"
326 | "325","Run4_191708897331555"
327 | "326","Run4_166400431373164"
328 | "327","Run5_165385713138932"
329 | "328","Run5_160440070850909"
330 | "329","Run5_134600596020581"
331 | "330","Run4_170327476169508"
332 | "331","Run4_200426592069939"
333 | "332","Run5_165188413082030"
334 | "333","Run4_122293367564598"
335 | "334","Run5_134041208282342"
336 | "335","Run5_131293483227549"
337 | "336","Run4_195615840460084"
338 | "337","Run4_231890207794995"
339 | "338","Run5_155971426310875"
340 | "339","Run5_131234280957724"
341 | "340","Run5_196043592910699"
342 | "341","Run5_195623370049883"
343 | "342","Run5_227846617222428"
344 | "343","Run4_197060157631339"
345 | "344","Run5_156024161557286"
346 | "345","Run4_170137155157238"
347 | "346","Run4_197060189186996"
348 | "347","Run5_195958617892636"
349 | "348","Run4_120703436057910"
350 | "349","Run5_200441237006260"
351 | "350","Run5_231760540621548"
352 | "351","Run4_135149156194140"
353 | "352","Run5_200994225872100"
354 | "353","Run5_170327460919661"
355 | "354","Run4_240634645822899"
356 | "355","Run5_235559336171301"
357 | "356","Run4_126834252630899"
358 | "357","Run4_226953429932837"
359 | "358","Run5_134592125360374"
360 | "359","Run4_169794766224757"
361 | "360","Run5_196638190225763"
362 | "361","Run5_165164957252445"
363 | "362","Run5_169174290291051"
364 | "363","Run4_170198358768860"
365 | "364","Run5_156453554772764"
366 | "365","Run5_122436443553141"
367 | "366","Run5_195562288142755"
368 | "367","Run5_196019701636523"
369 | "368","Run5_169768893692214"
370 | "369","Run5_133854479994206"
371 | "370","Run5_157536960572838"
372 | "371","Run5_227991469870389"
373 | "372","Run4_239932655712989"
374 | "373","Run5_241098905205038"
375 | "374","Run4_131175210441444"
376 | "375","Run5_169727132587886"
377 | "376","Run4_230592310074590"
378 | "377","Run5_236175633364835"
379 | "378","Run4_166469265381213"
380 | "379","Run5_155971444979997"
381 | "380","Run5_120786804751086"
382 | "381","Run5_120864484738990"
383 | "382","Run5_235697669064539"
384 | "383","Run5_120726924413669"
385 | "384","Run5_232450554023270"
386 | "385","Run4_191215678445341"
387 | "386","Run4_191576409230699"
388 | "387","Run5_161462649252574"
389 | "388","Run5_133974590118189"
390 | "389","Run5_199934564223404"
391 | "390","Run4_204765463759213"
392 |
--------------------------------------------------------------------------------
/notebooks/manuscript_data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "68a5c2f5-9391-4170-b5ea-9df9ad5eafb4",
6 | "metadata": {},
7 | "source": [
8 | "# Access and Analyze `scanpy anndata` Objects from a Manuscript\n",
9 | "\n",
10 | "This guide provides steps to access and analyze the `scanpy anndata` objects associated with a recent manuscript. These objects are essential for computational biologists and data scientists working in genomics and related fields. There are three replicates available for download:\n",
11 | "\n",
12 | "- [Replicate 1 (Rep1)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad)\n",
13 | "- [Replicate 2 (Rep2)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad)\n",
14 | "- [Replicate 3 (Rep3)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad)\n",
15 | "\n",
16 | "Each `anndata` object contains several elements crucial for comprehensive data analysis:\n",
17 | "\n",
18 | "1. `.X`: Filtered, normalized, and log-transformed count matrix.\n",
19 | "2. `.raw`: Original, filtered raw count matrix.\n",
20 | "3. `.obsm['MAGIC_imputed_data']`: Imputed count matrix using MAGIC algorithm.\n",
21 | "4. `.obsm['tsne']`: t-SNE maps (as presented in the manuscript), generated using scaled diffusion components.\n",
22 | "5. `.obs['clusters']`: Cell clustering information.\n",
23 | "6. `.obs['palantir_pseudotime']`: Cell pseudo-time ordering, as determined by Palantir.\n",
24 | "7. `.obs['palantir_diff_potential']`: Palantir-determined differentiation potential of cells.\n",
25 | "8. `.obsm['palantir_branch_probs']`: Probabilities of cells branching into different lineages, according to Palantir.\n",
26 | "9. `.uns['palantir_branch_probs_cell_types']`: Labels for Palantir branch probabilities.\n",
27 | "10. `.uns['ct_colors']`: Color codes for cell types, as used in the manuscript.\n",
28 | "11. `.uns['cluster_colors']`: Color codes for cell clusters, as used in the manuscript.\n",
29 | "\n",
30 | "## Python Code for Data Access:"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 1,
36 | "id": "63f356a7-3856-4596-a7b3-9fc05cc3029a",
37 | "metadata": {
38 | "execution": {
39 | "iopub.execute_input": "2023-11-28T21:20:46.755293Z",
40 | "iopub.status.busy": "2023-11-28T21:20:46.755059Z",
41 | "iopub.status.idle": "2023-11-28T21:20:59.646740Z",
42 | "shell.execute_reply": "2023-11-28T21:20:59.645355Z",
43 | "shell.execute_reply.started": "2023-11-28T21:20:46.755266Z"
44 | }
45 | },
46 | "outputs": [],
47 | "source": [
48 | "import scanpy as sc\n",
49 | "\n",
50 | "# Read in the data, with backup URLs provided\n",
51 | "adata_Rep1 = sc.read(\n",
52 | " \"../data/human_cd34_bm_rep1.h5ad\",\n",
53 | " backup_url=\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad\",\n",
54 | ")\n",
55 | "adata_Rep2 = sc.read(\n",
56 | " \"../data/human_cd34_bm_rep2.h5ad\",\n",
57 | " backup_url=\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad\",\n",
58 | ")\n",
59 | "adata_Rep3 = sc.read(\n",
60 | " \"../data/human_cd34_bm_rep3.h5ad\",\n",
61 | " backup_url=\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad\",\n",
62 | ")"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 2,
68 | "id": "bee4a735-7c47-415a-b1e3-ee776998dbd5",
69 | "metadata": {
70 | "execution": {
71 | "iopub.execute_input": "2023-11-28T21:20:59.650053Z",
72 | "iopub.status.busy": "2023-11-28T21:20:59.649313Z",
73 | "iopub.status.idle": "2023-11-28T21:20:59.659463Z",
74 | "shell.execute_reply": "2023-11-28T21:20:59.658910Z",
75 | "shell.execute_reply.started": "2023-11-28T21:20:59.650021Z"
76 | }
77 | },
78 | "outputs": [
79 | {
80 | "data": {
81 | "text/plain": [
82 | "AnnData object with n_obs × n_vars = 5780 × 14651\n",
83 | " obs: 'clusters', 'palantir_pseudotime', 'palantir_diff_potential'\n",
84 | " uns: 'cluster_colors', 'ct_colors', 'palantir_branch_probs_cell_types'\n",
85 | " obsm: 'tsne', 'MAGIC_imputed_data', 'palantir_branch_probs'"
86 | ]
87 | },
88 | "execution_count": 2,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "adata_Rep1"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 3,
100 | "id": "515e6760-8f95-42d6-87ba-1a2375797ccf",
101 | "metadata": {
102 | "execution": {
103 | "iopub.execute_input": "2023-11-28T21:20:59.660313Z",
104 | "iopub.status.busy": "2023-11-28T21:20:59.660133Z",
105 | "iopub.status.idle": "2023-11-28T21:20:59.676952Z",
106 | "shell.execute_reply": "2023-11-28T21:20:59.676283Z",
107 | "shell.execute_reply.started": "2023-11-28T21:20:59.660295Z"
108 | }
109 | },
110 | "outputs": [
111 | {
112 | "data": {
113 | "text/plain": [
114 | "AnnData object with n_obs × n_vars = 6501 × 14913\n",
115 | " obs: 'clusters', 'palantir_pseudotime', 'palantir_diff_potential'\n",
116 | " uns: 'cluster_colors', 'ct_colors', 'palantir_branch_probs_cell_types'\n",
117 | " obsm: 'tsne', 'MAGIC_imputed_data', 'palantir_branch_probs'"
118 | ]
119 | },
120 | "execution_count": 3,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "adata_Rep2"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 4,
132 | "id": "61d7a8e0-0916-4099-8982-5599d7166104",
133 | "metadata": {
134 | "execution": {
135 | "iopub.execute_input": "2023-11-28T21:20:59.678250Z",
136 | "iopub.status.busy": "2023-11-28T21:20:59.677863Z",
137 | "iopub.status.idle": "2023-11-28T21:20:59.691822Z",
138 | "shell.execute_reply": "2023-11-28T21:20:59.691131Z",
139 | "shell.execute_reply.started": "2023-11-28T21:20:59.678220Z"
140 | }
141 | },
142 | "outputs": [
143 | {
144 | "data": {
145 | "text/plain": [
146 | "AnnData object with n_obs × n_vars = 12046 × 14044\n",
147 | " obs: 'clusters', 'palantir_pseudotime', 'palantir_diff_potential'\n",
148 | " uns: 'cluster_colors', 'ct_colors', 'palantir_branch_probs_cell_types'\n",
149 | " obsm: 'tsne', 'MAGIC_imputed_data', 'palantir_branch_probs'"
150 | ]
151 | },
152 | "execution_count": 4,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "adata_Rep3"
159 | ]
160 | },
161 | {
162 | "cell_type": "markdown",
163 | "id": "b057a720-f0f4-40b0-8bcf-02efc9b2124d",
164 | "metadata": {
165 | "execution": {
166 | "iopub.execute_input": "2023-11-28T19:21:40.634650Z",
167 | "iopub.status.busy": "2023-11-28T19:21:40.634039Z",
168 | "iopub.status.idle": "2023-11-28T19:21:40.647637Z",
169 | "shell.execute_reply": "2023-11-28T19:21:40.646498Z",
170 | "shell.execute_reply.started": "2023-11-28T19:21:40.634595Z"
171 | }
172 | },
173 | "source": [
174 | "# Converting `anndata` Objects to `Seurat` Objects Using R\n",
175 | "\n",
176 | "For researchers working with R and Seurat, the process to convert `anndata` objects to Seurat objects involves the following steps:\n",
177 | "\n",
178 | "1. **Set Up R Environment and Libraries**:\n",
179 | " - Load the necessary libraries: `Seurat` and `anndata`.\n",
180 | "\n",
181 | "2. **Download and Read the Data**:\n",
182 | " - Use `curl::curl_download` to download the `anndata` from the provided URLs.\n",
183 | " - Read the data using the `read_h5ad` method from the `anndata` library.\n",
184 | "\n",
185 | "3. **Create Seurat Objects**:\n",
186 | " - Use the `CreateSeuratObject` function to convert the data into Seurat objects, incorporating counts and metadata from the `anndata` object.\n",
187 | " - Transfer additional data like tSNE embeddings, imputed gene expressions, and cell fate probabilities into the appropriate slots in the Seurat object.\n",
188 | "\n",
189 | "### R Code Snippet:"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "id": "562d56fb-80dc-4f44-8266-3ca559e79106",
196 | "metadata": {
197 | "jupyter": {
198 | "source_hidden": true
199 | }
200 | },
201 | "outputs": [],
202 | "source": [
203 | "# this cell only exists to allow running R code inside this python notebook using a conda kernel\n",
204 | "import sys\n",
205 | "import os\n",
206 | "\n",
207 | "# Get the path to the python executable\n",
208 | "python_executable_path = sys.executable\n",
209 | "\n",
210 | "# Extract the path to the environment from the path to the python executable\n",
211 | "env_path = os.path.dirname(os.path.dirname(python_executable_path))\n",
212 | "\n",
213 | "print(\n",
214 | " f\"Conda env path: {env_path}\\n\"\n",
215 | " \"Please make sure you have R installed in the conda environment.\"\n",
216 | ")\n",
217 | "\n",
218 | "os.environ['R_HOME'] = os.path.join(env_path, 'lib', 'R')\n",
219 | "\n",
220 | "%load_ext rpy2.ipython"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 6,
226 | "id": "ed46f119-e8be-45ba-b447-b46e8b947cf8",
227 | "metadata": {
228 | "execution": {
229 | "iopub.execute_input": "2023-11-28T21:21:01.081154Z",
230 | "iopub.status.busy": "2023-11-28T21:21:01.080675Z",
231 | "iopub.status.idle": "2023-11-28T21:23:08.313753Z",
232 | "shell.execute_reply": "2023-11-28T21:23:08.313058Z",
233 | "shell.execute_reply.started": "2023-11-28T21:21:01.081128Z"
234 | }
235 | },
236 | "outputs": [
237 | {
238 | "name": "stderr",
239 | "output_type": "stream",
240 | "text": [
241 | "R[write to console]: Loading required package: SeuratObject\n",
242 | "\n",
243 | "R[write to console]: Loading required package: sp\n",
244 | "\n",
245 | "R[write to console]: \n",
246 | "Attaching package: ‘SeuratObject’\n",
247 | "\n",
248 | "\n",
249 | "R[write to console]: The following object is masked from ‘package:base’:\n",
250 | "\n",
251 | " intersect\n",
252 | "\n",
253 | "\n"
254 | ]
255 | },
256 | {
257 | "name": "stdout",
258 | "output_type": "stream",
259 | "text": [
260 | "\n",
261 | " WARNING: The R package \"reticulate\" only fixed recently\n",
262 | " an issue that caused a segfault when used with rpy2:\n",
263 | " https://github.com/rstudio/reticulate/pull/1188\n",
264 | " Make sure that you use a version of that package that includes\n",
265 | " the fix.\n",
266 | " "
267 | ]
268 | },
269 | {
270 | "name": "stderr",
271 | "output_type": "stream",
272 | "text": [
273 | "R[write to console]: \n",
274 | "Attaching package: ‘anndata’\n",
275 | "\n",
276 | "\n",
277 | "R[write to console]: The following object is masked from ‘package:SeuratObject’:\n",
278 | "\n",
279 | " Layers\n",
280 | "\n",
281 | "\n",
282 | "R[write to console]: Warning:\n",
283 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
284 | "\n",
285 | "R[write to console]: Warning:\n",
286 | "R[write to console]: Data is of class matrix. Coercing to dgCMatrix.\n",
287 | "\n",
288 | "R[write to console]: Warning:\n",
289 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
290 | "\n",
291 | "R[write to console]: Warning:\n",
292 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
293 | "\n",
294 | "R[write to console]: Warning:\n",
295 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
296 | "\n",
297 | "R[write to console]: Warning:\n",
298 | "R[write to console]: Data is of class matrix. Coercing to dgCMatrix.\n",
299 | "\n",
300 | "R[write to console]: Warning:\n",
301 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
302 | "\n",
303 | "R[write to console]: Warning:\n",
304 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
305 | "\n",
306 | "R[write to console]: Warning:\n",
307 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
308 | "\n",
309 | "R[write to console]: Warning:\n",
310 | "R[write to console]: Data is of class matrix. Coercing to dgCMatrix.\n",
311 | "\n",
312 | "R[write to console]: Warning:\n",
313 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
314 | "\n",
315 | "R[write to console]: Warning:\n",
316 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n",
317 | "\n"
318 | ]
319 | }
320 | ],
321 | "source": [
322 | "%%R\n",
323 | "library(Seurat)\n",
324 | "library(anndata)\n",
325 | "\n",
326 | "create_seurat <- function(url) {\n",
327 | " file_path <- sub(\"https://s3.amazonaws.com/dp-lab-data-public/palantir/\", \"../data/\", url)\n",
328 | " if (!file.exists(file_path)) {\n",
329 | " curl::curl_download(url, file_path)\n",
330 | " }\n",
331 | " data <- read_h5ad(file_path)\n",
332 | " \n",
333 | " seurat_obj <- CreateSeuratObject(\n",
334 | " counts = t(data$X), \n",
335 | " meta.data = data$obs,\n",
336 | " project = \"CD34+ Bone Marrow Cells\"\n",
337 | " )\n",
338 | " tsne_data <- data$obsm[[\"tsne\"]]\n",
339 | " rownames(tsne_data) <- rownames(data$obs)\n",
340 | " colnames(tsne_data) <- c(\"tSNE_1\", \"tSNE_2\")\n",
341 | " seurat_obj[[\"tsne\"]] <- CreateDimReducObject(\n",
342 | " embeddings = tsne_data,\n",
343 | " key = \"tSNE_\"\n",
344 | " )\n",
345 | " imputed_data <- t(data$obsm[[\"MAGIC_imputed_data\"]])\n",
346 | " colnames(imputed_data) <- rownames(data$obs)\n",
347 | " rownames(imputed_data) <- rownames(data$var)\n",
348 | " seurat_obj[[\"MAGIC_imputed\"]] <- CreateAssayObject(counts = imputed_data)\n",
349 | " fate_probs <- as.data.frame(data$obsm[[\"palantir_branch_probs\"]])\n",
350 | " colnames(fate_probs) <- data$uns[[\"palantir_branch_probs_cell_types\"]]\n",
351 | " rownames(fate_probs) <- rownames(data$obs)\n",
352 | " seurat_obj <- AddMetaData(seurat_obj, metadata = fate_probs)\n",
353 | "\n",
354 | " return(seurat_obj)\n",
355 | "}\n",
356 | "\n",
357 | "human_cd34_bm_Rep1 <- create_seurat(\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad\")\n",
358 | "human_cd34_bm_Rep2 <- create_seurat(\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad\")\n",
359 | "human_cd34_bm_Rep3 <- create_seurat(\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad\")"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": 7,
365 | "id": "a7c8b823-4d18-4252-acc1-4a9f51f929b9",
366 | "metadata": {
367 | "execution": {
368 | "iopub.execute_input": "2023-11-28T21:23:08.315660Z",
369 | "iopub.status.busy": "2023-11-28T21:23:08.315364Z",
370 | "iopub.status.idle": "2023-11-28T21:23:08.361153Z",
371 | "shell.execute_reply": "2023-11-28T21:23:08.360630Z",
372 | "shell.execute_reply.started": "2023-11-28T21:23:08.315642Z"
373 | }
374 | },
375 | "outputs": [
376 | {
377 | "name": "stdout",
378 | "output_type": "stream",
379 | "text": [
380 | "An object of class Seurat \n",
381 | "29302 features across 5780 samples within 2 assays \n",
382 | "Active assay: RNA (14651 features, 0 variable features)\n",
383 | " 1 layer present: counts\n",
384 | " 1 other assay present: MAGIC_imputed\n",
385 | " 1 dimensional reduction calculated: tsne\n"
386 | ]
387 | }
388 | ],
389 | "source": [
390 | "%%R\n",
391 | "\n",
392 | "human_cd34_bm_Rep1"
393 | ]
394 | },
395 | {
396 | "cell_type": "code",
397 | "execution_count": 8,
398 | "id": "094067ac-b251-4e37-8d67-eedc2641b8fa",
399 | "metadata": {
400 | "execution": {
401 | "iopub.execute_input": "2023-11-28T21:23:08.362383Z",
402 | "iopub.status.busy": "2023-11-28T21:23:08.361964Z",
403 | "iopub.status.idle": "2023-11-28T21:23:08.400063Z",
404 | "shell.execute_reply": "2023-11-28T21:23:08.399518Z",
405 | "shell.execute_reply.started": "2023-11-28T21:23:08.362356Z"
406 | }
407 | },
408 | "outputs": [
409 | {
410 | "name": "stdout",
411 | "output_type": "stream",
412 | "text": [
413 | "An object of class Seurat \n",
414 | "29826 features across 6501 samples within 2 assays \n",
415 | "Active assay: RNA (14913 features, 0 variable features)\n",
416 | " 1 layer present: counts\n",
417 | " 1 other assay present: MAGIC_imputed\n",
418 | " 1 dimensional reduction calculated: tsne\n"
419 | ]
420 | }
421 | ],
422 | "source": [
423 | "%%R\n",
424 | "\n",
425 | "human_cd34_bm_Rep2"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "execution_count": 9,
431 | "id": "6fb000c4-41ee-4147-aba8-08c0e6f7deb5",
432 | "metadata": {
433 | "execution": {
434 | "iopub.execute_input": "2023-11-28T21:23:08.401196Z",
435 | "iopub.status.busy": "2023-11-28T21:23:08.400878Z",
436 | "iopub.status.idle": "2023-11-28T21:23:08.441148Z",
437 | "shell.execute_reply": "2023-11-28T21:23:08.440627Z",
438 | "shell.execute_reply.started": "2023-11-28T21:23:08.401171Z"
439 | }
440 | },
441 | "outputs": [
442 | {
443 | "name": "stdout",
444 | "output_type": "stream",
445 | "text": [
446 | "An object of class Seurat \n",
447 | "28088 features across 12046 samples within 2 assays \n",
448 | "Active assay: RNA (14044 features, 0 variable features)\n",
449 | " 1 layer present: counts\n",
450 | " 1 other assay present: MAGIC_imputed\n",
451 | " 1 dimensional reduction calculated: tsne\n"
452 | ]
453 | }
454 | ],
455 | "source": [
456 | "%%R\n",
457 | "\n",
458 | "human_cd34_bm_Rep3"
459 | ]
460 | },
461 | {
462 | "cell_type": "code",
463 | "execution_count": null,
464 | "id": "e208ff84-85d0-40f7-b08d-9153537b088a",
465 | "metadata": {},
466 | "outputs": [],
467 | "source": []
468 | }
469 | ],
470 | "metadata": {
471 | "kernelspec": {
472 | "display_name": "da1",
473 | "language": "python",
474 | "name": "da1"
475 | },
476 | "language_info": {
477 | "codemirror_mode": {
478 | "name": "ipython",
479 | "version": 3
480 | },
481 | "file_extension": ".py",
482 | "mimetype": "text/x-python",
483 | "name": "python",
484 | "nbconvert_exporter": "python",
485 | "pygments_lexer": "ipython3",
486 | "version": "3.11.5"
487 | },
488 | "widgets": {
489 | "application/vnd.jupyter.widget-state+json": {
490 | "state": {},
491 | "version_major": 2,
492 | "version_minor": 0
493 | }
494 | }
495 | },
496 | "nbformat": 4,
497 | "nbformat_minor": 5
498 | }
499 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=68.0.0", "wheel>=0.40.0"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "palantir"
7 | version = "1.4.2rc"
8 | description = "Palantir for modeling continuous cell state and cell fate choices in single cell data"
9 | authors = [
10 | {name = "Palantir development team", email = "manu.talanki@gmail.com"}
11 | ]
12 | readme = "README.md"
13 | requires-python = ">=3.9"
14 | license = {text = "MIT"}
15 | classifiers = [
16 | "Programming Language :: Python :: 3",
17 | "Programming Language :: Python :: 3.9",
18 | "Programming Language :: Python :: 3.10",
19 | "Programming Language :: Python :: 3.11",
20 | "Programming Language :: Python :: 3.12",
21 | "Programming Language :: Python :: 3.13",
22 | "License :: OSI Approved :: MIT License",
23 | "Operating System :: OS Independent",
24 | "Operating System :: POSIX :: Linux",
25 | "Development Status :: 5 - Production/Stable",
26 | "Topic :: Scientific/Engineering :: Bio-Informatics",
27 | "Topic :: Scientific/Engineering :: Visualization",
28 | ]
29 | dependencies = [
30 | "numpy>=1.14.2",
31 | "pandas>=0.22.0",
32 | "scipy>=1.3",
33 | "networkx>=2.1",
34 | "scikit-learn",
35 | "joblib",
36 | "matplotlib>=3.8.0",
37 | "anndata>=0.8.0",
38 | "scanpy>=1.6.0",
39 | "mellon>=1.6.1",
40 | "ml_dtypes>=0.5.0",
41 | "igraph>=0.11.8",
42 | ]
43 |
44 | [project.optional-dependencies]
45 | gam = [
46 | "pygam",
47 | ]
48 |
49 | fcs = [
50 | "fcsparser>=0.1.2",
51 | ]
52 |
53 | full = [
54 | "pygam",
55 | "fcsparser>=0.1.2",
56 | ]
57 |
58 | test-base = [
59 | "pytest>=7.0.0",
60 | "pytest-cov>=4.0.0",
61 | "h5py",
62 | ]
63 |
64 | test = [
65 | "pytest>=7.0.0",
66 | "pytest-cov>=4.0.0",
67 | "h5py",
68 | "fcsparser>=0.1.2",
69 | ]
70 |
71 | pre313 = [
72 | "fcsparser>=0.1.2",
73 | ]
74 |
75 | [project.urls]
76 | Homepage = "https://github.com/dpeerlab/palantir"
77 | "Bug Tracker" = "https://github.com/dpeerlab/palantir/issues"
78 |
79 | [tool.setuptools]
80 | package-dir = {"" = "src"}
81 |
82 | [tool.pytest.ini_options]
83 | testpaths = ["tests"]
84 | python_files = "test_*.py"
85 |
86 | [tool.black]
87 | line-length = 100
88 | target-version = ['py39']
89 | include = '\.pyi?$'
90 | exclude = '''
91 | /(
92 | \.eggs
93 | | \.git
94 | | \.hg
95 | | \.mypy_cache
96 | | \.tox
97 | | \.venv
98 | | _build
99 | | buck-out
100 | | build
101 | | dist
102 | )/
103 | '''
104 |
105 | [tool.coverage.run]
106 | source = ["palantir"]
107 | omit = [
108 | "tests/*",
109 | "*/config.py"
110 | ]
111 |
112 | [tool.coverage.report]
113 | exclude_lines = [
114 | "pragma: no cover",
115 | "raise NotImplementedError",
116 | "if __name__ == .__main__.:",
117 | "pass",
118 | "raise ImportError",
119 | "def _return_cell",
120 | "print"
121 | ]
122 |
123 | [project.scripts]
124 | palantir-cli = "palantir.cli:main"
125 |
--------------------------------------------------------------------------------
/readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-20.04
5 | tools:
6 | python: "3.9"
7 |
8 | sphinx:
9 | configuration: docs/source/conf.py
10 |
11 | python:
12 | install:
13 | - requirements: docs/requirements.txt
14 | - method: pip
15 | path: .
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from setuptools import setup
3 |
4 | # For backward compatibility, use setup.py as a proxy to pyproject.toml
5 | # All configuration is in pyproject.toml
6 |
7 | setup()
8 |
--------------------------------------------------------------------------------
/src/palantir/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Palantir - Modeling continuous cell state and cell fate choices in single cell data.
3 |
4 | Palantir is an algorithm to align cells along differentiation trajectories, identify
5 | differentiation endpoints, and estimate cell-fate probabilities in single-cell data.
6 | The package provides functions for preprocessing, visualization, trajectory analysis,
7 | and gene expression modeling along the trajectories.
8 |
9 | Modules
10 | -------
11 | config : Configuration settings for Palantir
12 | core : Core functions for running the Palantir algorithm
13 | presults : Class for storing and accessing Palantir results
14 | io : Input/output functions for loading and saving data
15 | preprocess : Preprocessing functions for single-cell data
16 | utils : Utility functions for analysis
17 | plot : Visualization functions
18 | """
19 |
20 | import importlib.metadata
21 |
22 | from . import config
23 |
24 | # Import modules in a specific order to avoid circular imports
25 | from . import presults
26 | from . import core
27 | from . import io
28 | from . import preprocess
29 | from . import utils
30 | from . import plot
31 |
32 | __version__ = importlib.metadata.version("palantir")
33 |
34 | __all__ = [
35 | "config",
36 | "core",
37 | "presults",
38 | "io",
39 | "preprocess",
40 | "utils",
41 | "plot",
42 | "__version__",
43 | ]
44 |
--------------------------------------------------------------------------------
/src/palantir/cli.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import subprocess
3 | import argparse
4 |
5 |
6 | def check_python_version():
7 | """Check Python version and install appropriate dependencies."""
8 | version = sys.version_info
9 | if version.major == 3 and version.minor < 13:
10 | try:
11 | import pip
12 | subprocess.check_call([
13 | sys.executable, "-m", "pip", "install", "fcsparser>=0.1.2"
14 | ])
15 | print("Installed fcsparser for Python < 3.13")
16 | except Exception as e:
17 | print(f"Failed to install fcsparser: {e}")
18 | print("You may need to install it manually with: pip install fcsparser>=0.1.2")
19 | else:
20 | print("Python 3.13+ detected. fcsparser is optional.")
21 | print("If you need FCS file support, install with: pip install palantir[fcs]")
22 |
23 |
24 | def main():
25 | """Main CLI function."""
26 | parser = argparse.ArgumentParser(
27 | description="Palantir - Modeling continuous cell state and cell fate choices in single cell data"
28 | )
29 | parser.add_argument(
30 | "--version", action="store_true", help="Print version information"
31 | )
32 | parser.add_argument(
33 | "--check-deps", action="store_true", help="Check dependencies and install as needed"
34 | )
35 |
36 | args = parser.parse_args()
37 |
38 | if args.version:
39 | from palantir.version import __version__
40 | print(f"Palantir version: {__version__}")
41 | return
42 |
43 | if args.check_deps:
44 | check_python_version()
45 | return
46 |
47 | if len(sys.argv) == 1:
48 | parser.print_help()
49 |
50 |
51 | if __name__ == "__main__":
52 | main()
--------------------------------------------------------------------------------
/src/palantir/config.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 |
3 | matplotlib.rcParams["figure.dpi"] = 100
4 | matplotlib.rcParams["image.cmap"] = "Spectral_r"
5 | matplotlib.rcParams["axes.spines.bottom"] = "on"
6 | matplotlib.rcParams["axes.spines.top"] = "off"
7 | matplotlib.rcParams["axes.spines.left"] = "on"
8 | matplotlib.rcParams["axes.spines.right"] = "off"
9 | matplotlib.rcParams["figure.figsize"] = [4, 4]
10 |
11 | SELECTED_COLOR = "#377EB8"
12 | DESELECTED_COLOR = "#CFD5E2"
13 |
14 | # This global variable sets the default behaviour for saving pandas.DataFrames
15 | # in AnnData.obsm and AnnData.varm. When set to True, the data is saved as pandas.DataFrame.
16 | SAVE_AS_DF = True
17 |
--------------------------------------------------------------------------------
/src/palantir/io.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import os.path
4 | import sys
5 | import scanpy as sc
6 | from scipy.io import mmread
7 | import anndata
8 | from typing import Optional, List, Union
9 |
10 |
11 | def _clean_up(df: pd.DataFrame) -> pd.DataFrame:
12 | """
13 | Remove rows and columns with all zeros from a DataFrame.
14 |
15 | Parameters
16 | ----------
17 | df : pd.DataFrame
18 | Input DataFrame to clean.
19 |
20 | Returns
21 | -------
22 | pd.DataFrame
23 | Cleaned DataFrame with rows and columns containing all zeros removed.
24 | """
25 | df = df.loc[df.sum(axis=1) > 0, :]
26 | df = df.loc[:, df.sum(axis=0) > 0]
27 | return df
28 |
29 |
30 | def from_csv(counts_csv_file: str, delimiter: str = ",") -> pd.DataFrame:
31 | """
32 | Read gene expression data from a CSV file.
33 |
34 | Parameters
35 | ----------
36 | counts_csv_file : str
37 | Path to the CSV file containing gene expression data.
38 | delimiter : str, optional
39 | Delimiter used in the CSV file. Default is ','.
40 |
41 | Returns
42 | -------
43 | pd.DataFrame
44 | Gene expression data with rows as cells and columns as genes.
45 | Cells and genes with zero counts are removed.
46 | """
47 | # Read in csv file
48 | df = pd.read_csv(counts_csv_file, sep=delimiter, index_col=0)
49 | clean_df = _clean_up(df)
50 | return clean_df
51 |
52 |
53 | def from_mtx(mtx_file: str, gene_name_file: str) -> pd.DataFrame:
54 | """
55 | Read gene expression data from a Matrix Market format file.
56 |
57 | Parameters
58 | ----------
59 | mtx_file : str
60 | Path to the Matrix Market file containing gene expression data.
61 | gene_name_file : str
62 | Path to the file containing gene names, one per line.
63 |
64 | Returns
65 | -------
66 | pd.DataFrame
67 | Gene expression data with rows as cells and columns as genes.
68 | Cells and genes with zero counts are removed.
69 | """
70 | # Read in mtx file
71 | count_matrix = mmread(mtx_file)
72 |
73 | gene_names = np.loadtxt(gene_name_file, dtype=np.dtype("S"))
74 | gene_names = np.array([gene.decode("utf-8") for gene in gene_names])
75 |
76 | # Convert to dense format
77 | df = pd.DataFrame(count_matrix.todense(), columns=gene_names)
78 |
79 | return _clean_up(df)
80 |
81 |
82 | def from_10x(data_dir: Optional[str], use_ensemble_id: bool = True) -> pd.DataFrame:
83 | """
84 | Load data from 10X Genomics format.
85 |
86 | Parameters
87 | ----------
88 | data_dir : Optional[str]
89 | Directory containing the 10X Genomics output files:
90 | matrix.mtx, genes.tsv, and barcodes.tsv.
91 | If None, the current directory is used.
92 | use_ensemble_id : bool, optional
93 | If True, use Ensembl IDs as gene identifiers.
94 | If False, use gene symbols. Default is True.
95 |
96 | Returns
97 | -------
98 | pd.DataFrame
99 | Gene expression data with rows as cells and columns as genes.
100 | Cells and genes with zero counts are removed.
101 | """
102 | # loads 10x sparse format data
103 | if data_dir is None:
104 | data_dir = "./"
105 | elif data_dir[len(data_dir) - 1] != "/":
106 | data_dir = data_dir + "/"
107 |
108 | filename_dataMatrix = os.path.expanduser(data_dir + "matrix.mtx")
109 | filename_genes = os.path.expanduser(data_dir + "genes.tsv")
110 | filename_cells = os.path.expanduser(data_dir + "barcodes.tsv")
111 |
112 | # Read in gene expression matrix (sparse matrix)
113 | # Rows = genes, columns = cells
114 | dataMatrix = mmread(filename_dataMatrix)
115 |
116 | # Read in row names (gene names / IDs)
117 | gene_names = np.loadtxt(filename_genes, delimiter="\t", dtype=bytes).astype(str)
118 | if use_ensemble_id:
119 | gene_names = [gene[0] for gene in gene_names]
120 | else:
121 | gene_names = [gene[1] for gene in gene_names]
122 | cell_names = np.loadtxt(filename_cells, delimiter="\t", dtype=bytes).astype(str)
123 |
124 | dataMatrix = pd.DataFrame(dataMatrix.todense(), columns=cell_names, index=gene_names)
125 |
126 | # combine duplicate genes
127 | if not use_ensemble_id:
128 | dataMatrix = dataMatrix.groupby(dataMatrix.index).sum()
129 | dataMatrix = dataMatrix.transpose()
130 |
131 | return _clean_up(dataMatrix)
132 |
133 |
134 | def from_10x_HDF5(filename: str, genome: Optional[str] = None) -> pd.DataFrame:
135 | """
136 | Load data from 10X Genomics HDF5 format.
137 |
138 | Parameters
139 | ----------
140 | filename : str
141 | Path to the HDF5 file containing 10X Genomics data.
142 | genome : Optional[str], optional
143 | Name of the genome to load. If None, the first genome is used.
144 |
145 | Returns
146 | -------
147 | pd.DataFrame
148 | Gene expression data with rows as cells and columns as genes.
149 | Cells and genes with zero counts are removed.
150 | """
151 | ad = sc.read_10x_h5(filename, genome=genome, gex_only=True)
152 |
153 | dataMatrix = pd.DataFrame(ad.X.todense(), columns=ad.var_names, index=ad.obs_names)
154 |
155 | return _clean_up(dataMatrix)
156 |
157 |
158 | def from_fcs(
159 | cls,
160 | fcs_file: str,
161 | cofactor: float = 5,
162 | metadata_channels: List[str] = [
163 | "Time",
164 | "Event_length",
165 | "DNA1",
166 | "DNA2",
167 | "Cisplatin",
168 | "beadDist",
169 | "bead1",
170 | ],
171 | ) -> pd.DataFrame:
172 | """
173 | Load data from Flow Cytometry Standard (FCS) format.
174 |
175 | Parameters
176 | ----------
177 | cls : object
178 | Class instance (unused, kept for compatibility).
179 | fcs_file : str
180 | Path to the FCS file to load.
181 | cofactor : float, optional
182 | Cofactor for arcsinh transformation. Default is 5.
183 | metadata_channels : List[str], optional
184 | List of metadata channel names to exclude from the returned data.
185 |
186 | Returns
187 | -------
188 | pd.DataFrame
189 | Processed cytometry data with metadata channels removed and
190 | optionally transformed using arcsinh.
191 |
192 | Notes
193 | -----
194 | This function requires the fcsparser package to be installed.
195 | If not installed, it will raise an ImportError with instructions.
196 | """
197 | try:
198 | import fcsparser
199 | except ImportError:
200 | raise ImportError(
201 | "The fcsparser package is required for reading FCS files. "
202 | "Please install it with: pip install fcsparser"
203 | )
204 | # Parse the fcs file
205 | text, data = fcsparser.parse(fcs_file)
206 | # Use view instead of newbyteorder for NumPy 2.0 compatibility
207 | data = data.astype(np.float64, copy=False)
208 |
209 | # Metadata and data
210 | metadata_channels = data.columns.intersection(metadata_channels)
211 | data_channels = data.columns.difference(metadata_channels)
212 | # metadata = data[metadata_channels]
213 | data = data[data_channels]
214 |
215 | # Transform if necessary
216 | if cofactor is not None or cofactor > 0:
217 | data = np.arcsinh(np.divide(data, cofactor))
218 |
219 | return data
220 |
--------------------------------------------------------------------------------
/src/palantir/plot_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility functions for plotting in Palantir
3 | """
4 |
5 | from typing import Optional, Union, Dict, List, Tuple, Any, Callable
6 | import numpy as np
7 | import pandas as pd
8 | import matplotlib
9 | import matplotlib.pyplot as plt
10 | from matplotlib.colors import Normalize
11 | from matplotlib.axes import Axes
12 | from mpl_toolkits.axes_grid1 import make_axes_locatable
13 | import contextlib
14 | import logging
15 |
16 |
17 | @contextlib.contextmanager
18 | def no_mellon_log_messages():
19 | # Import mellon locally to avoid JAX fork warnings in other parts of the code
20 | import mellon
21 | current_level = mellon.logger.level
22 | mellon.logger.setLevel(logging.ERROR)
23 | try:
24 | yield
25 | finally:
26 | mellon.logger.setLevel(current_level)
27 |
28 | def _scatter_with_colorbar(
29 | ax: Axes,
30 | x: np.ndarray,
31 | y: np.ndarray,
32 | c: np.ndarray,
33 | colorbar_label: Optional[str] = None,
34 | s: float = 5,
35 | cmap: Union[str, matplotlib.colors.Colormap] = "viridis",
36 | norm: Optional[Normalize] = None,
37 | alpha: float = 1.0,
38 | **kwargs,
39 | ) -> Tuple[Axes, matplotlib.colorbar.Colorbar]:
40 | """Helper function to create scatter plot with colorbar.
41 |
42 | Parameters
43 | ----------
44 | ax : Axes
45 | Matplotlib axes object to plot on.
46 | x : np.ndarray
47 | X-coordinates for scatter plot.
48 | y : np.ndarray
49 | Y-coordinates for scatter plot.
50 | c : np.ndarray
51 | Values for color mapping.
52 | colorbar_label : str, optional
53 | Label for the colorbar. Default is None.
54 | s : float, optional
55 | Size of scatter points. Default is 5.
56 | cmap : str or matplotlib.colors.Colormap, optional
57 | Colormap for the scatter plot. Default is 'viridis'.
58 | norm : Normalize, optional
59 | Normalization for colormap. Default is None.
60 | alpha : float, optional
61 | Transparency of scatter points. Default is 1.0.
62 | **kwargs : dict
63 | Additional keyword arguments to pass to plt.scatter.
64 |
65 | Returns
66 | -------
67 | Tuple[Axes, matplotlib.colorbar.Colorbar]
68 | The axes object and the colorbar object.
69 | """
70 | sc = ax.scatter(x, y, c=c, s=s, cmap=cmap, norm=norm, alpha=alpha, **kwargs)
71 | divider = make_axes_locatable(ax)
72 | cax = divider.append_axes("right", size="5%", pad=0.05)
73 | cbar = plt.colorbar(sc, cax=cax, orientation="vertical")
74 | if colorbar_label:
75 | cbar.set_label(colorbar_label)
76 | return ax, cbar
77 |
78 |
79 | def _highlight_cells(
80 | ax: Axes,
81 | x: np.ndarray,
82 | y: np.ndarray,
83 | mask: np.ndarray,
84 | deselected_color: str = "lightgray",
85 | selected_color: str = "crimson",
86 | s_selected: float = 10,
87 | s_deselected: float = 3,
88 | alpha_deselected: float = 0.5,
89 | alpha_selected: float = 1.0,
90 | **kwargs,
91 | ) -> Axes:
92 | """Helper function to highlight cells in scatter plot based on mask.
93 |
94 | Parameters
95 | ----------
96 | ax : Axes
97 | Matplotlib axes object to plot on.
98 | x : np.ndarray
99 | X-coordinates for scatter plot.
100 | y : np.ndarray
101 | Y-coordinates for scatter plot.
102 | mask : np.ndarray
103 | Boolean mask for selecting cells to highlight.
104 | deselected_color : str, optional
105 | Color for non-highlighted cells. Default is "lightgray".
106 | selected_color : str, optional
107 | Color for highlighted cells. Default is "crimson".
108 | s_selected : float, optional
109 | Size of highlighted scatter points. Default is 10.
110 | s_deselected : float, optional
111 | Size of non-highlighted scatter points. Default is 3.
112 | alpha_deselected : float, optional
113 | Transparency of non-highlighted cells. Default is 0.5.
114 | alpha_selected : float, optional
115 | Transparency of highlighted cells. Default is 1.0.
116 | **kwargs : dict
117 | Additional keyword arguments to pass to plt.scatter.
118 |
119 | Returns
120 | -------
121 | Axes
122 | The modified axes object.
123 | """
124 | ax.scatter(
125 | x[~mask],
126 | y[~mask],
127 | c=deselected_color,
128 | s=s_deselected,
129 | alpha=alpha_deselected,
130 | label="Other Cells",
131 | **kwargs,
132 | )
133 | ax.scatter(
134 | x[mask],
135 | y[mask],
136 | c=selected_color,
137 | s=s_selected,
138 | alpha=alpha_selected,
139 | label="Selected Cells",
140 | **kwargs,
141 | )
142 | return ax
143 |
144 |
145 | def _add_legend(
146 | ax: Axes,
147 | handles: Optional[List] = None,
148 | labels: Optional[List[str]] = None,
149 | loc: str = "best",
150 | title: Optional[str] = None,
151 | **kwargs,
152 | ) -> matplotlib.legend.Legend:
153 | """Helper function to add legend to plot.
154 |
155 | Parameters
156 | ----------
157 | ax : Axes
158 | Matplotlib axes object to add legend to.
159 | handles : List, optional
160 | List of artists (lines, patches) to be added to the legend. Default is None.
161 | labels : List[str], optional
162 | List of labels for the legend. Default is None.
163 | loc : str, optional
164 | Location of the legend. Default is "best".
165 | title : str, optional
166 | Title for the legend. Default is None.
167 | **kwargs : dict
168 | Additional keyword arguments to pass to ax.legend().
169 |
170 | Returns
171 | -------
172 | matplotlib.legend.Legend
173 | The legend object.
174 | """
175 | if handles is not None and labels is not None:
176 | legend = ax.legend(handles, labels, loc=loc, title=title, **kwargs)
177 | else:
178 | legend = ax.legend(loc=loc, title=title, **kwargs)
179 | return legend
180 |
181 |
182 | def _setup_axes(
183 | figsize: Tuple[float, float] = (6, 6),
184 | ax: Optional[Axes] = None,
185 | fig: Optional[plt.Figure] = None,
186 | **kwargs,
187 | ) -> Tuple[plt.Figure, Axes]:
188 | """Helper function to set up figure and axes for plotting.
189 |
190 | Parameters
191 | ----------
192 | figsize : Tuple[float, float], optional
193 | Size of the figure (width, height) in inches. Default is (6, 6).
194 | ax : Axes, optional
195 | Existing axes to plot on. Default is None.
196 | fig : Figure, optional
197 | Existing figure to plot on. Default is None.
198 | **kwargs : dict
199 | Additional keyword arguments to pass to plt.subplots().
200 |
201 | Returns
202 | -------
203 | Tuple[plt.Figure, Axes]
204 | The figure and axes objects.
205 | """
206 | if ax is None:
207 | fig, ax = plt.subplots(figsize=figsize, **kwargs)
208 | elif fig is None:
209 | fig = ax.figure
210 | return fig, ax
211 |
212 |
213 | def _get_palantir_fates_colors(
214 | ad,
215 | fate_names: List[str],
216 | palantir_fates_colors: Optional[Union[List[str], Dict[str, str]]] = None
217 | ) -> Dict[str, str]:
218 | """
219 | Generate or update the mapping from branch names to colors.
220 |
221 | This utility checks if ad.uns already contains predefined colors.
222 | Then, if the `palantir_fates_colors` parameter is provided, its values are merged
223 | (with user-specified colors taking precedence). For any missing branch the function
224 | generates a new color ensuring that no color is used twice.
225 |
226 | Parameters
227 | ----------
228 | ad : AnnData
229 | The annotated data object from which .uns will be checked.
230 | fate_names : list of str
231 | List of branch (fate) names.
232 | palantir_fates_colors : dict or list or None, optional
233 | If a dict, keys should be branch names with a color for each.
234 | If a list, its order is assumed to correspond to fate_names.
235 | If None, only the predefined colors (if any) and generated defaults are used.
236 |
237 | Returns
238 | -------
239 | dict
240 | Mapping from branch names to colors.
241 | """
242 | # Get any predefined colors stored in ad.uns.
243 | predefined = {}
244 | if "palantir_fates_colors" in ad.uns:
245 | predefined = ad.uns["palantir_fates_colors"]
246 |
247 | # Process user-provided colors from argument.
248 | provided = {}
249 | if palantir_fates_colors is not None:
250 | if isinstance(palantir_fates_colors, dict):
251 | provided = palantir_fates_colors
252 | elif isinstance(palantir_fates_colors, list):
253 | if len(palantir_fates_colors) < len(fate_names):
254 | raise ValueError("Provided color list length is less than the number of branch names.")
255 | provided = {name: clr for name, clr in zip(fate_names, palantir_fates_colors)}
256 | else:
257 | raise TypeError("palantir_fates_colors must be a dict, list, or None.")
258 |
259 | # Merge: user-provided takes precedence, then predefined.
260 | mapping = {}
261 | for branch in fate_names:
262 | if branch in provided:
263 | mapping[branch] = provided[branch]
264 | elif branch in predefined:
265 | mapping[branch] = predefined[branch]
266 |
267 | # Collect already used colors to exclude duplicates.
268 | used_colors = set(mapping.values())
269 |
270 | # Generate colors for missing branches.
271 | missing = [branch for branch in fate_names if branch not in mapping]
272 | if missing:
273 | # Get the default color cycle.
274 | default_cycle = plt.rcParams['axes.prop_cycle'].by_key().get(
275 | 'color', ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9']
276 | )
277 | # Create a generator that skips colors already used.
278 | def color_generator(exclude):
279 | for clr in default_cycle:
280 | if clr not in exclude:
281 | yield clr
282 | hex_digits = np.array(list("0123456789ABCDEF"))
283 | # If default cycle is exhausted, generate random colors.
284 | while True:
285 | new_color = "#" + "".join(np.random.choice(hex_digits, size=6))
286 | if new_color not in exclude:
287 | yield new_color
288 |
289 | gen = color_generator(used_colors)
290 | for branch in missing:
291 | new_color = next(gen)
292 | mapping[branch] = new_color
293 | used_colors.add(new_color)
294 |
295 | return mapping
296 |
297 |
298 | def _plot_arrows(x, y, n=5, ax=None, arrowprops=dict(), arrow_zorder=2, head_offset=0.0, **kwargs):
299 | """
300 | Helper function to plot arrows on a trajectory line.
301 |
302 | The new 'head_offset' parameter (as a fraction of the segment length)
303 | moves the arrow head slightly forward.
304 |
305 | Parameters
306 | ----------
307 | x, y : array-like
308 | Coordinates of the trajectory points.
309 | n : int, optional
310 | Number of arrows to plot. Defaults to 5.
311 | ax : matplotlib.axes.Axes, optional
312 | Axes to plot on.
313 | arrowprops : dict, optional
314 | Properties for the arrow style.
315 | arrow_zorder : int, optional
316 | zorder level for both the line and arrow annotations.
317 | head_offset : float, optional
318 | Fraction of the segment length to move the arrow head forward.
319 | **kwargs :
320 | Extra keyword arguments passed to the plot function.
321 |
322 | Returns
323 | -------
324 | matplotlib.axes.Axes
325 | The axis with the arrows plotted.
326 | """
327 | if ax is None:
328 | fig, ax = plt.subplots()
329 |
330 | default_kwargs = {"color": "black", "zorder": arrow_zorder}
331 | default_kwargs.update(kwargs)
332 |
333 | # Plot the trajectory line.
334 | ax.plot(x, y, **default_kwargs)
335 |
336 | if n <= 0:
337 | return ax
338 |
339 | default_arrowprops = dict(arrowstyle="->", lw=1, mutation_scale=20)
340 | default_arrowprops["color"] = default_kwargs.get("color", "black")
341 | default_arrowprops.update(arrowprops)
342 |
343 | total_points = len(x)
344 | section_length = total_points // n
345 |
346 | for i in range(n):
347 | idx = total_points - i * section_length
348 | if idx < 2:
349 | break
350 | # Compute the vector from the previous point to the arrow head.
351 | dx = x[idx - 1] - x[idx - 2]
352 | dy = y[idx - 1] - y[idx - 2]
353 | norm = (dx**2 + dy**2) ** 0.5
354 | # Compute the forward offset.
355 | if norm != 0:
356 | offset_dx = head_offset * dx / norm
357 | offset_dy = head_offset * dy / norm
358 | else:
359 | offset_dx = offset_dy = 0
360 | # Adjust the arrow head coordinates.
361 | target = (x[idx - 1] + offset_dx, y[idx - 1] + offset_dy)
362 |
363 | ax.annotate(
364 | "",
365 | xy=target,
366 | xytext=(x[idx - 2], y[idx - 2]),
367 | arrowprops=default_arrowprops,
368 | zorder=arrow_zorder,
369 | )
370 | return ax
--------------------------------------------------------------------------------
/src/palantir/preprocess.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for preprocessing of single cell RNA-seq counts
3 | """
4 |
5 | import numpy as np
6 | import anndata
7 | from scipy.sparse import issparse
8 |
9 |
10 | def filter_counts_data(data, cell_min_molecules=1000, genes_min_cells=10):
11 | """Remove low molecule count cells and low detection genes
12 |
13 | :param data: Counts matrix: Cells x Genes
14 | :param cell_min_molecules: Minimum number of molecules per cell
15 | :param genes_min_cells: Minimum number of cells in which a gene is detected
16 | :return: Filtered counts matrix
17 | """
18 |
19 | # Molecule and cell counts
20 | ms = data.sum(axis=1)
21 | cs = data.sum()
22 |
23 | # Filter
24 | return data.loc[ms.index[ms > cell_min_molecules], cs.index[cs > genes_min_cells]]
25 |
26 |
27 | def normalize_counts(data):
28 | """Correct the counts for molecule count variability
29 |
30 | :param data: Counts matrix: Cells x Genes
31 | :return: Normalized matrix
32 | """
33 | ms = data.sum(axis=1)
34 | norm_df = data.div(ms, axis=0).mul(np.median(ms), axis=0)
35 | return norm_df
36 |
37 |
38 | def log_transform(data, pseudo_count=0.1):
39 | """Log transform the matrix
40 |
41 | :param data: Counts matrix: Cells x Genes or Anndata object
42 | :return: Log transformed matrix
43 | """
44 | if isinstance(data, anndata.AnnData):
45 | if issparse(data.X):
46 | data.X.data = np.log2(data.X.data + pseudo_count) - np.log2(pseudo_count)
47 | else:
48 | data.X = np.log2(data.X + pseudo_count) - np.log2(pseudo_count)
49 | else:
50 | return np.log2(data + pseudo_count)
51 |
--------------------------------------------------------------------------------
/src/palantir/validation.py:
--------------------------------------------------------------------------------
1 | from typing import Union, List, Dict
2 | import numpy as np
3 | import pandas as pd
4 | import scanpy as sc
5 | from anndata import AnnData
6 |
7 |
8 | def _validate_obsm_key(ad, key, as_df=True):
9 | """
10 | Validates and retrieves the data associated with a specified key from the provided AnnData object.
11 |
12 | Parameters
13 | ----------
14 | ad : AnnData
15 | The annotated data matrix from which the data is to be retrieved.
16 | key : str
17 | The key for accessing the data from the AnnData object's obsm.
18 | as_df : bool, optional
19 | If True, the data will be returned as pandas DataFrame with pseudotime as column names.
20 | If False, the data will be returned as numpy array.
21 | Default is True.
22 |
23 | Returns
24 | -------
25 | data : pd.DataFrame
26 | A DataFrame containing the data associated with the specified key.
27 | data_names : List[str]
28 | A list of column names for the DataFrame.
29 |
30 | Raises
31 | ------
32 | KeyError
33 | If the key or its corresponding columns are not found in the AnnData object.
34 | """
35 | if key not in ad.obsm:
36 | raise KeyError(f"{key} not found in ad.obsm")
37 | data = ad.obsm[key]
38 | if not isinstance(data, pd.DataFrame):
39 | if key + "_columns" not in ad.uns:
40 | raise KeyError(
41 | f"{key}_columns not found in ad.uns and ad.obsm[key] is not a DataFrame."
42 | )
43 | data_names = list(ad.uns[key + "_columns"])
44 | if as_df:
45 | data = pd.DataFrame(data, columns=data_names, index=ad.obs_names)
46 | else:
47 | data_names = list(data.columns)
48 | if not as_df:
49 | data = data.values
50 | return data, data_names
51 |
52 |
53 | def _validate_varm_key(ad, key, as_df=True):
54 | """
55 | Validates and retrieves the data associated with a specified key from the provided AnnData object's varm attribute.
56 |
57 | Parameters
58 | ----------
59 | ad : AnnData
60 | The annotated data matrix from which the data is to be retrieved.
61 | key : str
62 | The key for accessing the data from the AnnData object's varm.
63 | as_df : bool, optional
64 | If True, the trends will be returned as pandas DataFrame with pseudotime as column names.
65 | If False, the trends will be returned as numpy array.
66 | Default is True.
67 |
68 | Returns
69 | -------
70 | data : Union[pd.DataFrame, np.ndarray]
71 | A DataFrame or numpy array containing the data associated with the specified key.
72 | data_names : np.ndarray
73 | A an array of pseudotimes.
74 |
75 | Raises
76 | ------
77 | KeyError
78 | If the key or its corresponding columns are not found in the AnnData object.
79 | """
80 | if key not in ad.varm:
81 | raise KeyError(f"{key} not found in ad.varm")
82 | data = ad.varm[key]
83 | if not isinstance(data, pd.DataFrame):
84 | if key + "_pseudotime" not in ad.uns:
85 | raise KeyError(
86 | f"{key}_pseudotime not found in ad.uns and ad.varm[key] is not a DataFrame."
87 | )
88 | data_names = np.array(ad.uns[key + "_pseudotime"])
89 | if as_df:
90 | data = pd.DataFrame(data, columns=data_names, index=ad.var_names)
91 | else:
92 | data_names = np.array(data.columns.astype(float))
93 | if not as_df:
94 | data = data.values
95 | return data, data_names
96 |
97 |
98 | def _validate_gene_trend_input(
99 | data: Union[AnnData, Dict],
100 | gene_trend_key: str = "gene_trends",
101 | branch_names: Union[str, List[str]] = "branch_masks",
102 | ) -> Dict:
103 | """
104 | Validates the input for gene trend plots, and converts it into a dictionary of gene trends.
105 |
106 | Parameters
107 | ----------
108 | data : Union[AnnData, Dict]
109 | An AnnData object or a dictionary containing gene trends.
110 | gene_trend_key : str, optional
111 | Key to access gene trends in the varm of the AnnData object. Default is 'gene_trends'.
112 | branch_names : Union[str, List[str]], optional
113 | Key to retrieve branch names from the AnnData object or a list of branch names. If a string is provided,
114 | it is assumed to be a key in AnnData.uns. Default is 'branch_masks'.
115 |
116 | Returns
117 | -------
118 | gene_trends : Dict
119 | A dictionary containing gene trends.
120 |
121 | Raises
122 | ------
123 | KeyError
124 | If 'branch_names' is a string that is not found in .uns, or if 'gene_trend_key + "_" + branch_name'
125 | is not found in .varm.
126 | ValueError
127 | If 'data' is neither an AnnData object nor a dictionary.
128 | """
129 | if isinstance(data, AnnData):
130 | if isinstance(branch_names, str):
131 | if branch_names in data.uns.keys():
132 | branch_names = data.uns[branch_names]
133 | elif branch_names in data.obsm.keys() and isinstance(
134 | data.obsm[branch_names], pd.DataFrame
135 | ):
136 | branch_names = list(data.obsm[branch_names].columns)
137 | elif branch_names + "_columns" in data.uns.keys():
138 | branch_names = data.uns[branch_names + "_columns"]
139 | else:
140 | raise KeyError(
141 | f"The provided key '{branch_names}' is not found in AnnData.uns or as a DataFrame in AnnData.obsm. "
142 | "Please ensure the 'branch_names' either exists in AnnData.uns or is a list of branch names."
143 | )
144 |
145 | gene_trends = dict()
146 | for branch in branch_names:
147 | trends, pt_grid = _validate_varm_key(data, gene_trend_key + "_" + branch)
148 | gene_trends[branch] = {"trends": trends}
149 | elif isinstance(data, Dict):
150 | gene_trends = data
151 | else:
152 | raise ValueError(
153 | "The input 'data' must be an instance of either AnnData object or dictionary."
154 | )
155 |
156 | return gene_trends
157 |
--------------------------------------------------------------------------------
/src/palantir/version.py:
--------------------------------------------------------------------------------
1 | """Version information."""
2 | import importlib.metadata
3 |
4 | try:
5 | # Get version from pyproject.toml via package metadata
6 | __version__ = importlib.metadata.version("palantir")
7 | except importlib.metadata.PackageNotFoundError:
8 | # Package is not installed, fall back to hardcoded version
9 | __version__ = "1.4.2rc" # Should match pyproject.toml
10 |
11 | __author__ = "Palantir development team"
12 | __author_email__ = "manu.talanki@gmail.com"
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import scanpy as sc
4 | from anndata import AnnData
5 | import numpy as np
6 | from scipy.sparse import csr_matrix
7 | from anndata import AnnData
8 |
9 |
10 | @pytest.fixture
11 | def example_dataframe():
12 | # Create an example dataframe for testing
13 | return pd.DataFrame(
14 | [[1, 2, 0, 4], [0, 0, 0, 0], [3, 0, 0, 0]],
15 | columns=["A", "B", "C", "D"],
16 | index=["X", "Y", "Z"],
17 | )
18 |
19 |
20 | @pytest.fixture
21 | def mock_data():
22 | n_cells = 50
23 | n_genes = 10
24 | return pd.DataFrame(
25 | np.random.rand(n_cells, n_genes),
26 | columns=[f"gene_{i}" for i in range(n_genes)],
27 | index=[f"cell_{i}" for i in range(n_cells)],
28 | )
29 |
30 |
31 | @pytest.fixture
32 | def mock_anndata(mock_data):
33 | ad = AnnData(X=mock_data)
34 | ad.obsm["X_pca"] = mock_data
35 | ad.obsm["DM_EigenVectors_multiscaled"] = mock_data
36 | return ad
37 |
38 |
39 | @pytest.fixture
40 | def mock_tsne():
41 | n_cells = 50
42 | return pd.DataFrame(
43 | np.random.rand(n_cells, 2),
44 | columns=["tSNE1", "tSNE2"],
45 | index=[f"cell_{i}" for i in range(n_cells)],
46 | )
47 |
48 |
49 | @pytest.fixture
50 | def mock_umap_df():
51 | n_cells = 50
52 | return pd.DataFrame(
53 | np.random.rand(n_cells, 2),
54 | columns=["UMAP1", "UMAP2"],
55 | index=[f"cell_{i}" for i in range(n_cells)],
56 | )
57 |
58 |
59 | @pytest.fixture
60 | def mock_gene_data():
61 | n_cells = 50
62 | n_genes = 5
63 | return pd.DataFrame(
64 | np.random.rand(n_cells, n_genes),
65 | columns=[f"gene_{i}" for i in range(n_genes)],
66 | index=[f"cell_{i}" for i in range(n_cells)],
67 | )
68 |
69 |
70 | @pytest.fixture
71 | def mock_dm_res():
72 | n_cells = 50
73 | n_components = 10
74 | return {
75 | "EigenVectors": pd.DataFrame(
76 | np.random.rand(n_cells, n_components),
77 | columns=[f"DC_{i}" for i in range(n_components)],
78 | index=[f"cell_{i}" for i in range(n_cells)],
79 | ),
80 | "EigenValues": np.random.rand(n_components),
81 | }
82 |
83 |
84 | @pytest.fixture
85 | def mock_clusters():
86 | n_cells = 50
87 | return pd.Series(
88 | np.random.randint(0, 5, n_cells),
89 | index=[f"cell_{i}" for i in range(n_cells)],
90 | )
91 |
92 |
93 | @pytest.fixture
94 | def mock_gene_trends():
95 | n_bins = 25
96 | n_genes = 5
97 | return pd.DataFrame(
98 | np.random.rand(n_bins, n_genes),
99 | columns=[f"gene_{i}" for i in range(n_genes)],
100 | index=np.linspace(0, 1, n_bins),
101 | )
102 |
--------------------------------------------------------------------------------
/tests/core_run_palantir.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import scanpy as sc
4 | import numpy as np
5 |
6 | from palantir.presults import PResults
7 | from palantir.core import run_palantir
8 |
9 |
10 | @pytest.fixture
11 | def mock_data():
12 | n_cells = 50
13 | n_genes = 10
14 | return pd.DataFrame(
15 | np.random.rand(n_cells, n_genes),
16 | columns=[f"gene_{i}" for i in range(n_genes)],
17 | index=[f"cell_{i}" for i in range(n_cells)],
18 | )
19 |
20 |
21 | @pytest.fixture
22 | def mock_anndata(mock_data):
23 | ad = sc.AnnData(X=mock_data)
24 | ad.obsm["DM_EigenVectors_multiscaled"] = mock_data
25 | return ad
26 |
27 |
28 | # Test with basic DataFrame input
29 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
30 | @pytest.mark.filterwarnings(
31 | "ignore:Changing the sparsity structure of a csr_matrix is expensive."
32 | )
33 | def test_palantir_dataframe(mock_data):
34 | result = run_palantir(mock_data, "cell_0")
35 | assert isinstance(result, PResults), "Should return a PResults object"
36 |
37 |
38 | # Test with basic AnnData input
39 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
40 | @pytest.mark.filterwarnings(
41 | "ignore:Changing the sparsity structure of a csr_matrix is expensive."
42 | )
43 | def test_palantir_anndata(mock_anndata):
44 | run_palantir(mock_anndata, "cell_0")
45 | assert (
46 | "palantir_pseudotime" in mock_anndata.obs.keys()
47 | ), "Pseudotime key missing in AnnData object"
48 | assert (
49 | "palantir_entropy" in mock_anndata.obs.keys()
50 | ), "Entropy key missing in AnnData object"
51 | assert (
52 | "palantir_fate_probabilities" in mock_anndata.obsm.keys()
53 | ), "Fate probability key missing in AnnData object"
54 | assert (
55 | "palantir_waypoints" in mock_anndata.uns.keys()
56 | ), "Waypoint key missing in AnnData object"
57 |
58 |
59 | # Test terminal states
60 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
61 | @pytest.mark.filterwarnings(
62 | "ignore:Changing the sparsity structure of a csr_matrix is expensive."
63 | )
64 | def test_palantir_terminal_states(mock_data):
65 | result = run_palantir(mock_data, "cell_0", terminal_states=["cell_1", "cell_2"])
66 | assert "cell_1" in result.branch_probs.columns, "Terminal state cell_1 missing"
67 | assert "cell_2" in result.branch_probs.columns, "Terminal state cell_2 missing"
68 |
69 |
70 | # Test scaling components
71 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
72 | @pytest.mark.filterwarnings(
73 | "ignore:Changing the sparsity structure of a csr_matrix is expensive."
74 | )
75 | def test_scaling_components(mock_data):
76 | result1 = run_palantir(mock_data, "cell_0", scale_components=True)
77 | result2 = run_palantir(mock_data, "cell_0", scale_components=False)
78 | assert not np.array_equal(
79 | result1.pseudotime, result2.pseudotime
80 | ), "Scaling components should affect pseudotime"
81 |
82 |
83 | # Test for invalid knn
84 | def test_invalid_knn(mock_data):
85 | with pytest.raises(ValueError):
86 | run_palantir(mock_data, "cell_0", knn=0)
87 |
--------------------------------------------------------------------------------
/tests/presults.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import palantir
4 |
5 |
6 | def test_PResults():
7 | # Create some dummy data
8 | pseudotime = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
9 | entropy = None
10 | branch_probs = pd.DataFrame(
11 | {"branch1": [0.1, 0.2, 0.3, 0.4, 0.5], "branch2": [0.5, 0.4, 0.3, 0.2, 0.1]}
12 | )
13 | waypoints = None
14 |
15 | # Initialize PResults object
16 | presults = palantir.presults.PResults(pseudotime, entropy, branch_probs, waypoints)
17 |
18 | # Asserts to check attributes
19 | assert np.array_equal(presults.pseudotime, pseudotime)
20 | assert presults.entropy is None
21 | assert presults.waypoints is None
22 | assert np.array_equal(presults.branch_probs, branch_probs.values)
23 |
24 |
25 | def test_gam_fit_predict():
26 | # Create some dummy data
27 | x = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
28 | y = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
29 | weights = None
30 | pred_x = None
31 | n_splines = 4
32 | spline_order = 2
33 |
34 | # Call the function
35 | y_pred, stds = palantir.presults.gam_fit_predict(
36 | x, y, weights, pred_x, n_splines, spline_order
37 | )
38 |
39 | # Asserts to check the output
40 | assert isinstance(y_pred, np.ndarray)
41 | assert isinstance(stds, np.ndarray)
42 |
--------------------------------------------------------------------------------
/tests/presults_compute_gene_trends.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import numpy as np
4 | from anndata import AnnData
5 | import palantir
6 |
7 |
8 | @pytest.fixture
9 | def mock_adata():
10 | n_cells = 10
11 |
12 | # Create mock data
13 | adata = AnnData(
14 | X=np.random.rand(n_cells, 3),
15 | obs=pd.DataFrame(
16 | {"palantir_pseudotime": np.random.rand(n_cells)},
17 | index=[f"cell_{i}" for i in range(n_cells)],
18 | ),
19 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]),
20 | )
21 |
22 | adata.obsm["branch_masks"] = pd.DataFrame(
23 | np.random.randint(2, size=(n_cells, 2)),
24 | columns=["branch_1", "branch_2"],
25 | index=adata.obs_names,
26 | ).astype(bool)
27 |
28 | return adata
29 |
30 |
31 | @pytest.fixture
32 | def custom_mock_adata():
33 | n_cells = 10
34 |
35 | # Create mock data
36 | adata = AnnData(
37 | X=np.random.rand(n_cells, 3),
38 | obs=pd.DataFrame(
39 | {"custom_time": np.random.rand(n_cells)},
40 | index=[f"cell_{i}" for i in range(n_cells)],
41 | ),
42 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]),
43 | )
44 |
45 | adata.obsm["custom_masks"] = pd.DataFrame(
46 | np.random.randint(2, size=(n_cells, 2)),
47 | columns=["branch_1", "branch_2"],
48 | index=adata.obs_names,
49 | ).astype(bool)
50 |
51 | return adata
52 |
53 |
54 | @pytest.fixture
55 | def mock_adata_old():
56 | n_cells = 10
57 |
58 | # Create mock data
59 | adata = AnnData(
60 | X=np.random.rand(n_cells, 3),
61 | obs=pd.DataFrame(
62 | {"palantir_pseudotime": np.random.rand(n_cells)},
63 | index=[f"cell_{i}" for i in range(n_cells)],
64 | ),
65 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]),
66 | )
67 |
68 | # Create mock branch_masks in obsm
69 | adata.obsm["branch_masks"] = np.random.randint(2, size=(n_cells, 2)).astype(bool)
70 | adata.uns["branch_masks_columns"] = ["branch_1", "branch_2"]
71 |
72 | return adata
73 |
74 |
75 | @pytest.mark.parametrize("adata_fixture", ["mock_adata", "mock_adata_old"])
76 | def test_compute_gene_trends(request, adata_fixture):
77 | adata = request.getfixturevalue(adata_fixture)
78 |
79 | # Call the function with default keys
80 | res = palantir.presults.compute_gene_trends(adata)
81 |
82 | # Asserts to check the output
83 | assert isinstance(res, dict)
84 | assert "branch_1" in res
85 | assert "branch_2" in res
86 | assert isinstance(res["branch_1"], dict)
87 | assert isinstance(res["branch_1"]["trends"], pd.DataFrame)
88 | assert "gene_0" in res["branch_1"]["trends"].index
89 | assert adata.varm["gene_trends_branch_1"].shape == (3, 500)
90 |
91 |
92 | def test_compute_gene_trends_custom_anndata(custom_mock_adata):
93 | # Call the function with custom keys
94 | res = palantir.presults.compute_gene_trends(
95 | custom_mock_adata,
96 | masks_key="custom_masks",
97 | pseudo_time_key="custom_time",
98 | gene_trend_key="custom_trends",
99 | )
100 |
101 | # Asserts to check the output with custom keys
102 | assert isinstance(res, dict)
103 | assert "branch_1" in res
104 | assert "branch_2" in res
105 | assert isinstance(res["branch_1"], dict)
106 | assert isinstance(res["branch_1"]["trends"], pd.DataFrame)
107 | assert "gene_0" in res["branch_1"]["trends"].index
108 | assert custom_mock_adata.varm["custom_trends_branch_1"].shape == (3, 500)
109 |
--------------------------------------------------------------------------------
/tests/test_core_run_palantir.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import numpy as np
4 |
5 | from palantir.presults import PResults
6 | from palantir.core import run_palantir
7 |
8 |
9 | # Test with basic DataFrame input
10 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
11 | @pytest.mark.filterwarnings("ignore:Changing the sparsity structure of a csr_matrix is expensive.")
12 | def test_palantir_dataframe(mock_data):
13 | result = run_palantir(mock_data, "cell_0")
14 | assert isinstance(result, PResults), "Should return a PResults object"
15 |
16 |
17 | # Test with basic AnnData input
18 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
19 | @pytest.mark.filterwarnings("ignore:Changing the sparsity structure of a csr_matrix is expensive.")
20 | def test_palantir_anndata(mock_anndata):
21 | run_palantir(mock_anndata, "cell_0")
22 | assert (
23 | "palantir_pseudotime" in mock_anndata.obs.keys()
24 | ), "Pseudotime key missing in AnnData object"
25 | assert "palantir_entropy" in mock_anndata.obs.keys(), "Entropy key missing in AnnData object"
26 | assert (
27 | "palantir_fate_probabilities" in mock_anndata.obsm.keys()
28 | ), "Fate probability key missing in AnnData object"
29 | assert "palantir_waypoints" in mock_anndata.uns.keys(), "Waypoint key missing in AnnData object"
30 |
31 |
32 | # Test terminal states
33 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
34 | @pytest.mark.filterwarnings("ignore:Changing the sparsity structure of a csr_matrix is expensive.")
35 | def test_palantir_terminal_states(mock_data):
36 | result = run_palantir(mock_data, "cell_0", terminal_states=["cell_1", "cell_2"])
37 | assert "cell_1" in result.branch_probs.columns, "Terminal state cell_1 missing"
38 | assert "cell_2" in result.branch_probs.columns, "Terminal state cell_2 missing"
39 |
40 |
41 | # Test scaling components
42 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
43 | @pytest.mark.filterwarnings("ignore:Changing the sparsity structure of a csr_matrix is expensive.")
44 | def test_scaling_components(mock_data):
45 | result1 = run_palantir(mock_data, "cell_0", scale_components=True)
46 | result2 = run_palantir(mock_data, "cell_0", scale_components=False)
47 | assert not np.array_equal(
48 | result1.pseudotime, result2.pseudotime
49 | ), "Scaling components should affect pseudotime"
50 |
51 |
52 | # Test for invalid knn
53 | def test_invalid_knn(mock_data):
54 | with pytest.raises(ValueError):
55 | run_palantir(mock_data, "cell_0", knn=0)
56 |
--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import numpy as np
4 | import scanpy as sc
5 | from anndata import AnnData
6 | import os
7 | import tempfile
8 | import anndata
9 | import warnings
10 |
11 | import palantir
12 |
13 |
14 | @pytest.fixture
15 | def sample_data():
16 | """Load the sample data from the data directory"""
17 | # Get the data directory relative to the test file
18 | data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
19 | file_path = os.path.join(data_dir, "marrow_sample_scseq_counts.h5ad")
20 |
21 | # Skip test if the data file doesn't exist
22 | if not os.path.exists(file_path):
23 | pytest.skip(f"Sample data file {file_path} not found")
24 |
25 | # Load the data
26 | ad = anndata.read_h5ad(file_path)
27 | return ad
28 |
29 |
30 | @pytest.fixture
31 | def processed_data(sample_data):
32 | """Process the sample data for Palantir"""
33 | ad = sample_data.copy()
34 |
35 | # Normalize and log transform
36 | sc.pp.normalize_per_cell(ad)
37 | palantir.preprocess.log_transform(ad)
38 |
39 | # Select highly variable genes
40 | sc.pp.highly_variable_genes(ad, n_top_genes=1500, flavor="cell_ranger")
41 |
42 | # Run PCA
43 | sc.pp.pca(ad)
44 |
45 | # Run diffusion maps
46 | palantir.utils.run_diffusion_maps(ad, n_components=5)
47 |
48 | # Determine multiscale space
49 | palantir.utils.determine_multiscale_space(ad)
50 |
51 | # Set up neighbors for visualization
52 | sc.pp.neighbors(ad)
53 | sc.tl.umap(ad)
54 |
55 | # Run MAGIC imputation
56 | palantir.utils.run_magic_imputation(ad)
57 |
58 | return ad
59 |
60 |
61 | def test_palantir_reproducibility(processed_data):
62 | """Test that Palantir results are reproducible"""
63 | ad = processed_data.copy()
64 |
65 | # Set up terminal states (same as sample notebook)
66 | terminal_states = pd.Series(
67 | ["DC", "Mono", "Ery"],
68 | index=["Run5_131097901611291", "Run5_134936662236454", "Run4_200562869397916"],
69 | )
70 |
71 | # Set start cell (same as sample notebook)
72 | start_cell = "Run5_164698952452459"
73 |
74 | # Run Palantir
75 | pr_res = palantir.core.run_palantir(
76 | ad, start_cell, num_waypoints=500, terminal_states=terminal_states
77 | )
78 |
79 | # Expected values for the start cell
80 | # These are expected probabilities for the start cell from the sample notebook
81 | expected_probs = {"Ery": 0.33, "DC": 0.33, "Mono": 0.33}
82 |
83 | # Get actual values
84 | actual_probs = pr_res.branch_probs.loc[start_cell]
85 |
86 | # Check that probabilities are close to expected (start cell should be roughly equal probabilities)
87 | for branch, expected in expected_probs.items():
88 | assert (
89 | np.abs(actual_probs[branch] - expected) < 0.15
90 | ), f"Branch {branch} probability differs more than expected"
91 |
92 | # Expected values for terminal state cells
93 | for term_cell, term_name in terminal_states.items():
94 | # Terminal state cell should have high probability for its own fate
95 | assert (
96 | pr_res.branch_probs.loc[term_cell, term_name] > 0.7
97 | ), f"Terminal state {term_name} doesn't have high probability"
98 |
99 | # Pseudotime should be 0 for start cell (or very close)
100 | assert pr_res.pseudotime[start_cell] < 0.05, "Start cell pseudotime should be close to 0"
101 |
102 | # Entropy should be high for start cell (multipotent state)
103 | assert pr_res.entropy[start_cell] > 0.8, "Start cell entropy should be high"
104 |
105 | # Terminal states should have low entropy
106 | for term_cell in terminal_states.index:
107 | assert (
108 | pr_res.entropy[term_cell] < 0.5
109 | ), f"Terminal state {term_cell} should have low entropy"
110 |
111 |
112 | def test_branch_selection(processed_data):
113 | """Test the branch selection functionality"""
114 | ad = processed_data.copy()
115 |
116 | # Set up terminal states
117 | terminal_states = pd.Series(
118 | ["DC", "Mono", "Ery"],
119 | index=["Run5_131097901611291", "Run5_134936662236454", "Run4_200562869397916"],
120 | )
121 |
122 | # Run Palantir
123 | start_cell = "Run5_164698952452459"
124 | palantir.core.run_palantir(ad, start_cell, num_waypoints=500, terminal_states=terminal_states)
125 |
126 | # Run branch selection
127 | masks = palantir.presults.select_branch_cells(ad, eps=0)
128 |
129 | # Check that the masks were computed correctly
130 | assert masks.shape[1] == 3, "Should have 3 branches selected"
131 | assert masks.shape[0] == ad.n_obs, "Should have a mask for each cell"
132 |
133 | # Check that the masks were stored in the AnnData object
134 | assert "branch_masks" in ad.obsm, "Branch masks should be stored in obsm"
135 |
136 | # Check that terminal cells are selected in their respective branches
137 | for term_cell, term_name in terminal_states.items():
138 | branch_idx = list(ad.obsm["palantir_fate_probabilities"].columns).index(term_name)
139 | assert masks[ad.obs_names == term_cell, branch_idx][
140 | 0
141 | ], f"Terminal cell {term_name} should be selected in its branch"
142 |
143 |
144 | def test_gene_trends(processed_data):
145 | """Test gene trend computation"""
146 | ad = processed_data.copy()
147 |
148 | # Set up terminal states
149 | terminal_states = pd.Series(
150 | ["DC", "Mono", "Ery"],
151 | index=["Run5_131097901611291", "Run5_134936662236454", "Run4_200562869397916"],
152 | )
153 |
154 | # Run Palantir
155 | start_cell = "Run5_164698952452459"
156 | palantir.core.run_palantir(ad, start_cell, num_waypoints=500, terminal_states=terminal_states)
157 |
158 | # Select branch cells
159 | palantir.presults.select_branch_cells(ad, eps=0)
160 |
161 | # Compute gene trends
162 | gene_trends = palantir.presults.compute_gene_trends(
163 | ad,
164 | expression_key="MAGIC_imputed_data",
165 | )
166 |
167 | # Expected gene expression patterns
168 | # CD34 should decrease along all lineages (stem cell marker)
169 | # GATA1 should increase in erythroid lineage
170 | # MPO should increase in monocyte lineage
171 | # IRF8 should increase in DC lineage
172 |
173 | # Check that gene trends were computed for all branches
174 | assert "Ery" in gene_trends, "Erythroid gene trends missing"
175 | assert "DC" in gene_trends, "DC gene trends missing"
176 | assert "Mono" in gene_trends, "Monocyte gene trends missing"
177 |
178 | # Check that gene trends were stored in the AnnData object
179 | assert "gene_trends_Ery" in ad.varm, "Erythroid gene trends not stored in varm"
180 |
181 | # Get the trend data for specific genes
182 | cd34_ery = ad.varm["gene_trends_Ery"].loc["CD34"].values
183 | gata1_ery = ad.varm["gene_trends_Ery"].loc["GATA1"].values
184 |
185 | # CD34 should decrease in erythroid lineage (end lower than start)
186 | assert cd34_ery[0] > cd34_ery[-1], "CD34 should decrease along erythroid lineage"
187 |
188 | # GATA1 should increase in erythroid lineage (end higher than start)
189 | assert gata1_ery[0] < gata1_ery[-1], "GATA1 should increase along erythroid lineage"
190 |
191 |
192 | def test_clustering_gene_trends(processed_data):
193 | """Test clustering of gene trends"""
194 | ad = processed_data.copy()
195 |
196 | # Set up terminal states
197 | terminal_states = pd.Series(
198 | ["DC", "Mono", "Ery"],
199 | index=["Run5_131097901611291", "Run5_134936662236454", "Run4_200562869397916"],
200 | )
201 |
202 | # Run Palantir
203 | start_cell = "Run5_164698952452459"
204 | palantir.core.run_palantir(ad, start_cell, num_waypoints=500, terminal_states=terminal_states)
205 |
206 | # Select branch cells
207 | palantir.presults.select_branch_cells(ad, eps=0)
208 |
209 | # Compute gene trends
210 | palantir.presults.compute_gene_trends(
211 | ad,
212 | expression_key="MAGIC_imputed_data",
213 | )
214 |
215 | # Select a subset of genes for clustering
216 | genes = ["CD34", "MPO", "GATA1", "IRF8", "CSF1R", "ITGA2B", "CD79A", "CD79B"]
217 |
218 | # Cluster gene trends
219 | clusters = palantir.presults.cluster_gene_trends(ad, "Ery", genes)
220 |
221 | # Check that all genes were clustered
222 | assert len(clusters) == len(genes), "Not all genes were clustered"
223 |
224 | # Check that clusters were stored in the AnnData object
225 | assert "gene_trends_clusters" in ad.var, "Clusters should be stored in var"
226 |
227 | # Related genes should be clustered together
228 | # For example, CD79A and CD79B should be in the same cluster
229 | cd79a_cluster = clusters.loc["CD79A"]
230 | cd79b_cluster = clusters.loc["CD79B"]
231 | assert cd79a_cluster == cd79b_cluster, "CD79A and CD79B should be in the same cluster"
232 |
--------------------------------------------------------------------------------
/tests/test_io.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import h5py
3 | import pandas as pd
4 | import numpy as np
5 | import os.path
6 | import scanpy as sc
7 | from scipy.io import mmread, mmwrite
8 | from scipy.sparse import csr_matrix, csc_matrix
9 | from palantir.io import (
10 | _clean_up,
11 | from_csv,
12 | from_mtx,
13 | from_10x,
14 | from_10x_HDF5,
15 | from_fcs,
16 | )
17 |
18 | # Check if fcsparser is available
19 | try:
20 | import fcsparser
21 | FCSPARSER_AVAILABLE = True
22 | except ImportError:
23 | FCSPARSER_AVAILABLE = False
24 |
25 |
26 | @pytest.fixture
27 | def example_dataframe():
28 | # Create an example dataframe for testing
29 | return pd.DataFrame(
30 | [[1, 2, 0, 4], [0, 0, 0, 0], [3, 0, 0, 0]],
31 | columns=["A", "B", "C", "D"],
32 | index=["X", "Y", "Z"],
33 | )
34 |
35 |
36 | @pytest.fixture
37 | def mock_10x_h5(tmp_path):
38 | # Number of genes and cells
39 | n_genes = 400
40 | n_cells = 300
41 |
42 | # Simulate a sparse gene expression matrix
43 | data = np.random.poisson(lam=0.3, size=(n_genes, n_cells))
44 | sparse_matrix = csc_matrix(data)
45 |
46 | # Create barcodes, gene names, etc.
47 | barcodes = np.array([f"Cell_{i:05d}-1" for i in range(n_cells)])
48 | gene_names = np.array([f"Gene_{i}" for i in range(n_genes)])
49 | feature_type = np.array(["Gene Expression" for i in range(n_genes)])
50 | features = np.array(
51 | [
52 | "gene",
53 | ]
54 | )
55 | genome = np.array([f"genome_{i%4}" for i in range(n_genes)])
56 |
57 | # Creating an HDF5 file
58 | hdf5_file = tmp_path / "mock_10x_v3_data.h5"
59 | with h5py.File(hdf5_file, "w") as f:
60 | f.create_group("matrix")
61 | f["matrix"].create_dataset("shape", data=np.array(sparse_matrix.shape))
62 | f["matrix"].create_dataset("data", data=sparse_matrix.data)
63 | f["matrix"].create_dataset("indices", data=sparse_matrix.indices)
64 | f["matrix"].create_dataset("indptr", data=sparse_matrix.indptr)
65 | f["matrix"].create_dataset("barcodes", data=barcodes.astype("S"))
66 | f["matrix"].create_dataset("name", data=gene_names.astype("S"))
67 | f["matrix"].create_dataset("id", data=gene_names.astype("S"))
68 | f["matrix"].create_dataset("feature_type", data=feature_type.astype("S"))
69 | f["matrix"].create_dataset("genome", data=genome.astype("S"))
70 |
71 | f["matrix"].create_group("features")
72 | f["matrix/features"].create_dataset("name", data=gene_names.astype("S"))
73 | f["matrix/features"].create_dataset("id", data=gene_names.astype("S"))
74 | f["matrix/features"].create_dataset("feature_type", data=feature_type.astype("S"))
75 | f["matrix/features"].create_dataset("genome", data=genome.astype("S"))
76 |
77 | return str(hdf5_file)
78 |
79 |
80 | def test_clean_up(example_dataframe):
81 | # Test for the _clean_up function
82 | cleaned_df = _clean_up(example_dataframe)
83 | assert len(cleaned_df) == 2
84 | assert len(cleaned_df.columns) == 3
85 |
86 |
87 | def test_from_csv(tmp_path, example_dataframe):
88 | # Test for the from_csv function
89 | csv_file = tmp_path / "test.csv"
90 | example_dataframe.to_csv(csv_file)
91 |
92 | clean_df = from_csv(csv_file)
93 | assert len(clean_df) == 2
94 | assert len(clean_df.columns) == 3
95 |
96 |
97 | def test_from_mtx(tmp_path):
98 | # Test for the from_mtx function
99 | mtx_file = tmp_path / "test.mtx"
100 | gene_name_file = tmp_path / "gene_names.txt"
101 |
102 | # Create a mock mtx file
103 | mtx_data = [
104 | "%%MatrixMarket matrix coordinate integer general",
105 | "3 4 6",
106 | "1 1 1",
107 | "1 2 2",
108 | "2 4 3",
109 | "3 1 3",
110 | "3 2 4",
111 | "3 3 5",
112 | ]
113 | with open(mtx_file, "w") as f:
114 | f.write("\n".join(mtx_data))
115 |
116 | # Create gene names file
117 | gene_names = ["Gene1", "Gene2", "Gene3", "Gene4"]
118 | np.savetxt(gene_name_file, gene_names, fmt="%s")
119 |
120 | clean_df = from_mtx(mtx_file, gene_name_file)
121 | assert len(clean_df) == 3
122 | assert len(clean_df.columns) == 4
123 |
124 |
125 | def test_from_10x(tmp_path):
126 | # Test for the from_10x function
127 | data_dir = tmp_path / "data"
128 | os.makedirs(data_dir, exist_ok=True)
129 |
130 | matrix_file = data_dir / "matrix.mtx"
131 | gene_file = data_dir / "genes.tsv"
132 | barcode_file = data_dir / "barcodes.tsv"
133 |
134 | mmwrite(str(matrix_file), csr_matrix([[1, 2], [3, 4]]))
135 | np.savetxt(str(gene_file), ["Gene1", "Gene2"], fmt="%s")
136 | np.savetxt(str(barcode_file), ["Cell1", "Cell2"], fmt="%s")
137 |
138 | clean_df = from_10x(str(data_dir))
139 | print(clean_df)
140 | assert len(clean_df) == 2
141 | assert len(clean_df.columns) == 2
142 |
143 |
144 | def test_from_10x_HDF5(mock_10x_h5):
145 | clean_df = from_10x_HDF5(mock_10x_h5)
146 | assert len(clean_df) == 300
147 | assert len(clean_df.columns) == 400
148 |
149 |
150 | @pytest.mark.skipif(not FCSPARSER_AVAILABLE, reason="fcsparser not installed")
151 | def test_from_fcs():
152 | df = from_fcs(None, fcsparser.test_sample_path)
153 | assert len(df) == 14945
154 | assert len(df.columns) == 10
155 |
--------------------------------------------------------------------------------
/tests/test_preprocess.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import numpy as np
4 | import anndata
5 | from scipy.sparse import csr_matrix
6 |
7 | from palantir.preprocess import filter_counts_data, normalize_counts, log_transform
8 |
9 |
10 | def test_filter_counts_data():
11 | """Test filtering of low count cells and genes"""
12 | # Create test data
13 | data = pd.DataFrame(
14 | [[10, 0, 5, 8], [0, 0, 0, 0], [15, 20, 0, 0]],
15 | columns=["gene1", "gene2", "gene3", "gene4"],
16 | index=["cell1", "cell2", "cell3"],
17 | )
18 |
19 | # Test with minimal thresholds to match our test data
20 | filtered = filter_counts_data(data, cell_min_molecules=1, genes_min_cells=1)
21 | assert filtered.shape == (2, 4) # Only cell2 should be filtered out
22 | assert "cell2" not in filtered.index
23 | assert "gene1" in filtered.columns
24 |
25 | # Test with higher thresholds
26 | filtered = filter_counts_data(data, cell_min_molecules=20, genes_min_cells=1)
27 | # Based on actual implementation behavior
28 | assert len(filtered) > 0 # At least some cells remain
29 | assert "cell2" not in filtered.index # cell2 should be filtered out
30 |
31 |
32 | def test_normalize_counts():
33 | """Test count normalization"""
34 | # Create test data
35 | data = pd.DataFrame(
36 | [[10, 5, 5], [5, 10, 5], [5, 5, 10]],
37 | columns=["gene1", "gene2", "gene3"],
38 | index=["cell1", "cell2", "cell3"],
39 | )
40 |
41 | # Test normalization
42 | normalized = normalize_counts(data)
43 |
44 | # Check that row sums are equal (or very close due to floating point)
45 | row_sums = normalized.sum(axis=1)
46 | assert np.allclose(row_sums, row_sums.iloc[0])
47 |
48 | # Check relative abundances are maintained
49 | assert normalized.loc["cell1", "gene1"] > normalized.loc["cell1", "gene2"]
50 | assert normalized.loc["cell2", "gene2"] > normalized.loc["cell2", "gene1"]
51 | assert normalized.loc["cell3", "gene3"] > normalized.loc["cell3", "gene1"]
52 |
53 |
54 | def test_log_transform_dataframe():
55 | """Test log transformation on DataFrame"""
56 | # Create test data
57 | data = pd.DataFrame(
58 | [[1, 2], [3, 4]],
59 | columns=["gene1", "gene2"],
60 | index=["cell1", "cell2"],
61 | )
62 |
63 | # Test with default pseudo_count
64 | transformed = log_transform(data)
65 | # The function returns np.log2(data + pseudo_count)
66 | expected = np.log2(data + 0.1)
67 | assert np.allclose(transformed, expected)
68 |
69 | # Test with custom pseudo_count
70 | transformed = log_transform(data, pseudo_count=1)
71 | expected = np.log2(data + 1)
72 | assert np.allclose(transformed, expected)
73 |
74 |
75 | def test_log_transform_anndata():
76 | """Test log transformation on AnnData"""
77 | # Create dense AnnData
78 | X = np.array([[1, 2], [3, 4]])
79 | adata = anndata.AnnData(X)
80 |
81 | # Test dense case
82 | original_X = adata.X.copy()
83 | log_transform(adata)
84 | # The implementation adds an offset to log2(x + pseudo_count)
85 | expected = np.log2(original_X + 0.1) - np.log2(0.1)
86 | assert np.allclose(adata.X, expected)
87 |
88 | # Create sparse AnnData
89 | X_sparse = csr_matrix(np.array([[1, 2], [3, 4]]))
90 | adata_sparse = anndata.AnnData(X_sparse)
91 |
92 | # Test sparse case
93 | original_data = X_sparse.data.copy()
94 | log_transform(adata_sparse)
95 | # The implementation adds an offset to log2(x + pseudo_count)
96 | expected_data = np.log2(original_data + 0.1) - np.log2(0.1)
97 | assert np.allclose(adata_sparse.X.data, expected_data)
98 |
--------------------------------------------------------------------------------
/tests/test_presults.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import palantir
4 |
5 |
6 | def test_PResults():
7 | # Create some dummy data
8 | pseudotime = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
9 | entropy = None
10 | branch_probs = pd.DataFrame(
11 | {"branch1": [0.1, 0.2, 0.3, 0.4, 0.5], "branch2": [0.5, 0.4, 0.3, 0.2, 0.1]}
12 | )
13 | waypoints = None
14 |
15 | # Initialize PResults object
16 | presults = palantir.presults.PResults(pseudotime, entropy, branch_probs, waypoints)
17 |
18 | # Asserts to check attributes
19 | assert np.array_equal(presults.pseudotime, pseudotime)
20 | assert presults.entropy is None
21 | assert presults.waypoints is None
22 | assert np.array_equal(presults.branch_probs, branch_probs.values)
23 |
24 |
25 | def test_gam_fit_predict():
26 | # Skip test if pygam is not installed
27 | try:
28 | import pygam
29 | except ImportError:
30 | import pytest
31 | pytest.skip("pygam not installed, skipping test_gam_fit_predict")
32 |
33 | # Create some dummy data
34 | x = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
35 | y = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
36 | weights = None
37 | pred_x = None
38 | n_splines = 4
39 | spline_order = 2
40 |
41 | try:
42 | # Call the function
43 | y_pred, stds = palantir.presults.gam_fit_predict(x, y, weights, pred_x, n_splines, spline_order)
44 |
45 | # Asserts to check the output
46 | assert isinstance(y_pred, np.ndarray)
47 | assert isinstance(stds, np.ndarray)
48 | except Exception as e:
49 | import pytest
50 | if "csr_matrix" in str(e) and "attribute 'A'" in str(e):
51 | pytest.skip("scipy/pygam compatibility issue, skipping test")
52 | else:
53 | raise
54 |
--------------------------------------------------------------------------------
/tests/test_presults_cluster_gene_trends.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import numpy as np
4 | import scanpy as sc
5 | from anndata import AnnData
6 |
7 | from palantir.presults import cluster_gene_trends
8 |
9 |
10 | def test_cluster_gene_trends_basic():
11 | """Test basic functionality of cluster_gene_trends"""
12 | # Create a simple DataFrame of gene trends
13 | n_genes = 30
14 | n_timepoints = 50
15 |
16 | # Create some patterns that should cluster together
17 | timepoints = np.linspace(0, 1, n_timepoints)
18 |
19 | # Create random trends with some patterns
20 | np.random.seed(42)
21 | trends = pd.DataFrame(index=[f"gene_{i}" for i in range(n_genes)], columns=timepoints)
22 |
23 | # First 10 genes follow similar pattern (increasing)
24 | for i in range(10):
25 | trends.iloc[i] = np.linspace(0, 1, n_timepoints) + np.random.normal(0, 0.1, n_timepoints)
26 |
27 | # Next 10 genes follow another pattern (decreasing)
28 | for i in range(10, 20):
29 | trends.iloc[i] = np.linspace(1, 0, n_timepoints) + np.random.normal(0, 0.1, n_timepoints)
30 |
31 | # Last 10 genes follow a third pattern (bell curve)
32 | for i in range(20, 30):
33 | trends.iloc[i] = np.sin(np.linspace(0, np.pi, n_timepoints)) + np.random.normal(
34 | 0, 0.1, n_timepoints
35 | )
36 |
37 | # Test with DataFrame
38 | clusters = cluster_gene_trends(trends, "branch1")
39 |
40 | # Check output
41 | assert isinstance(clusters, pd.Series)
42 | assert len(clusters) == n_genes
43 | assert clusters.index.equals(trends.index)
44 |
45 | # There should be at least 2 clusters found
46 | assert len(clusters.unique()) >= 2
47 |
48 | # Check that similar genes are clustered together
49 | # First 10 genes should mostly be in the same cluster
50 | first_cluster = clusters.iloc[:10].mode().iloc[0]
51 | assert (
52 | clusters.iloc[:10] == first_cluster
53 | ).mean() > 0.5 # More than half should be in the same cluster
54 |
55 |
56 | def test_cluster_gene_trends_anndata():
57 | """Test cluster_gene_trends with AnnData input"""
58 | # Create AnnData object
59 | n_cells = 100
60 | n_genes = 30
61 | adata = AnnData(np.random.normal(0, 1, (n_cells, n_genes)))
62 | adata.var_names = [f"gene_{i}" for i in range(n_genes)]
63 |
64 | # Create gene trends for the branch
65 | n_timepoints = 50
66 | timepoints = np.linspace(0, 1, n_timepoints)
67 | branch_key = "test_branch"
68 |
69 | # Same trends as before
70 | trends = np.zeros((n_genes, n_timepoints))
71 | # First 10 genes
72 | for i in range(10):
73 | trends[i] = np.linspace(0, 1, n_timepoints) + np.random.normal(0, 0.1, n_timepoints)
74 | # Next 10 genes
75 | for i in range(10, 20):
76 | trends[i] = np.linspace(1, 0, n_timepoints) + np.random.normal(0, 0.1, n_timepoints)
77 | # Last 10 genes
78 | for i in range(20, 30):
79 | trends[i] = np.sin(np.linspace(0, np.pi, n_timepoints)) + np.random.normal(
80 | 0, 0.1, n_timepoints
81 | )
82 |
83 | # Store the trends in AnnData
84 | adata.varm[f"gene_trends_{branch_key}"] = pd.DataFrame(
85 | trends, index=adata.var_names, columns=[str(t) for t in timepoints]
86 | )
87 |
88 | # Run clustering
89 | clusters = cluster_gene_trends(adata, branch_key, gene_trend_key="gene_trends")
90 |
91 | # Check output
92 | assert isinstance(clusters, pd.Series)
93 | assert len(clusters) == n_genes
94 | assert clusters.index.equals(adata.var_names)
95 |
96 | # The clusters should be stored in the var annotation
97 | assert "gene_trends_clusters" in adata.var
98 | assert np.all(adata.var["gene_trends_clusters"] == clusters)
99 |
100 |
101 | def test_cluster_gene_trends_custom_genes():
102 | """Test cluster_gene_trends with subset of genes"""
103 | # Create a simple DataFrame of gene trends
104 | n_genes = 30
105 | n_timepoints = 50
106 | timepoints = np.linspace(0, 1, n_timepoints)
107 |
108 | # Create trends
109 | np.random.seed(42)
110 | trends = pd.DataFrame(
111 | np.random.normal(0, 1, (n_genes, n_timepoints)),
112 | index=[f"gene_{i}" for i in range(n_genes)],
113 | columns=timepoints,
114 | )
115 |
116 | # Select a subset of genes
117 | selected_genes = [f"gene_{i}" for i in range(0, n_genes, 2)] # Every other gene
118 |
119 | # Test with subset of genes
120 | clusters = cluster_gene_trends(trends, "branch1", genes=selected_genes)
121 |
122 | # Check output
123 | assert isinstance(clusters, pd.Series)
124 | assert len(clusters) == len(selected_genes)
125 | assert set(clusters.index) == set(selected_genes)
126 |
127 |
128 | def test_cluster_gene_trends_parameters():
129 | """Test cluster_gene_trends with custom parameters"""
130 | # Create a simple DataFrame of gene trends
131 | n_genes = 30
132 | n_timepoints = 50
133 | timepoints = np.linspace(0, 1, n_timepoints)
134 |
135 | # Create trends
136 | np.random.seed(42)
137 | trends = pd.DataFrame(
138 | np.random.normal(0, 1, (n_genes, n_timepoints)),
139 | index=[f"gene_{i}" for i in range(n_genes)],
140 | columns=timepoints,
141 | )
142 |
143 | # Test with custom parameters
144 | clusters1 = cluster_gene_trends(trends, "branch1", n_neighbors=10)
145 | clusters2 = cluster_gene_trends(trends, "branch1", n_neighbors=20)
146 |
147 | # The clusters should be different with different parameters
148 | assert (clusters1 != clusters2).any()
149 |
150 |
151 | def test_cluster_gene_trends_error_handling():
152 | """Test error handling of cluster_gene_trends"""
153 | # Create AnnData without varm data
154 | adata = AnnData(np.random.normal(0, 1, (10, 10)))
155 |
156 | # Should raise KeyError for missing gene_trend_key
157 | with pytest.raises(KeyError):
158 | cluster_gene_trends(adata, "branch1", gene_trend_key=None)
159 |
160 | # Should raise KeyError for missing branch data
161 | with pytest.raises(KeyError):
162 | cluster_gene_trends(adata, "nonexistent_branch", gene_trend_key="some_key")
163 |
--------------------------------------------------------------------------------
/tests/test_presults_compute_gene_trends.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import numpy as np
4 | from anndata import AnnData
5 | import palantir
6 |
7 |
8 | @pytest.fixture
9 | def mock_adata():
10 | n_cells = 10
11 |
12 | # Create mock data
13 | adata = AnnData(
14 | X=np.random.rand(n_cells, 3),
15 | obs=pd.DataFrame(
16 | {"palantir_pseudotime": np.random.rand(n_cells)},
17 | index=[f"cell_{i}" for i in range(n_cells)],
18 | ),
19 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]),
20 | )
21 |
22 | adata.obsm["branch_masks"] = pd.DataFrame(
23 | np.random.randint(2, size=(n_cells, 2)),
24 | columns=["branch_1", "branch_2"],
25 | index=adata.obs_names,
26 | ).astype(bool)
27 |
28 | return adata
29 |
30 |
31 | @pytest.fixture
32 | def custom_mock_adata():
33 | n_cells = 10
34 |
35 | # Create mock data
36 | adata = AnnData(
37 | X=np.random.rand(n_cells, 3),
38 | obs=pd.DataFrame(
39 | {"custom_time": np.random.rand(n_cells)},
40 | index=[f"cell_{i}" for i in range(n_cells)],
41 | ),
42 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]),
43 | )
44 |
45 | adata.obsm["custom_masks"] = pd.DataFrame(
46 | np.random.randint(2, size=(n_cells, 2)),
47 | columns=["branch_1", "branch_2"],
48 | index=adata.obs_names,
49 | ).astype(bool)
50 |
51 | return adata
52 |
53 |
54 | @pytest.fixture
55 | def mock_adata_old():
56 | n_cells = 10
57 |
58 | # Create mock data
59 | adata = AnnData(
60 | X=np.random.rand(n_cells, 3),
61 | obs=pd.DataFrame(
62 | {"palantir_pseudotime": np.random.rand(n_cells)},
63 | index=[f"cell_{i}" for i in range(n_cells)],
64 | ),
65 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]),
66 | )
67 |
68 | # Create mock branch_masks in obsm
69 | adata.obsm["branch_masks"] = np.random.randint(2, size=(n_cells, 2)).astype(bool)
70 | adata.uns["branch_masks_columns"] = ["branch_1", "branch_2"]
71 |
72 | return adata
73 |
74 |
75 | @pytest.mark.parametrize("adata_fixture", ["mock_adata", "mock_adata_old"])
76 | def test_compute_gene_trends(request, adata_fixture):
77 | adata = request.getfixturevalue(adata_fixture)
78 |
79 | # Call the function with default keys
80 | res = palantir.presults.compute_gene_trends(adata)
81 |
82 | # Asserts to check the output
83 | assert isinstance(res, dict)
84 | assert "branch_1" in res
85 | assert "branch_2" in res
86 | assert isinstance(res["branch_1"], dict)
87 | assert isinstance(res["branch_1"]["trends"], pd.DataFrame)
88 | assert "gene_0" in res["branch_1"]["trends"].index
89 | assert adata.varm["gene_trends_branch_1"].shape == (3, 500)
90 |
91 |
92 | def test_compute_gene_trends_custom_anndata(custom_mock_adata):
93 | # Call the function with custom keys
94 | res = palantir.presults.compute_gene_trends(
95 | custom_mock_adata,
96 | masks_key="custom_masks",
97 | pseudo_time_key="custom_time",
98 | gene_trend_key="custom_trends",
99 | )
100 |
101 | # Asserts to check the output with custom keys
102 | assert isinstance(res, dict)
103 | assert "branch_1" in res
104 | assert "branch_2" in res
105 | assert isinstance(res["branch_1"], dict)
106 | assert isinstance(res["branch_1"]["trends"], pd.DataFrame)
107 | assert "gene_0" in res["branch_1"]["trends"].index
108 | assert custom_mock_adata.varm["custom_trends_branch_1"].shape == (3, 500)
109 |
--------------------------------------------------------------------------------
/tests/test_presults_gam_fit_predict.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 |
5 | # Skip all tests in this file if pygam is not installed
6 | try:
7 | import pygam
8 | except ImportError:
9 | pytestmark = pytest.mark.skip(reason="pygam not installed")
10 |
11 | # Handle scipy compatibility issues
12 | try:
13 | import scipy.sparse as sp
14 | test_matrix = sp.csr_matrix((1, 1))
15 | if not hasattr(test_matrix, 'A'):
16 | pytestmark = pytest.mark.skip(reason="scipy/pygam compatibility issue")
17 | except:
18 | pass
19 |
20 | from palantir.presults import gam_fit_predict
21 |
22 |
23 | def test_gam_fit_predict_basic():
24 | """Test basic functionality of gam_fit_predict"""
25 | # Create test data
26 | x = np.linspace(0, 1, 50)
27 | y = np.sin(2 * np.pi * x) + 0.1 * np.random.randn(50)
28 |
29 | # Run gam_fit_predict
30 | y_pred, stds = gam_fit_predict(x, y)
31 |
32 | # Check output shapes
33 | assert len(y_pred) == len(x)
34 | assert len(stds) == len(x)
35 |
36 | # Check that predictions follow the general trend
37 | assert np.corrcoef(y, y_pred)[0, 1] > 0.8 # Strong correlation
38 |
39 |
40 | def test_gam_fit_predict_with_weights():
41 | """Test gam_fit_predict with weights"""
42 | # Create test data
43 | x = np.linspace(0, 1, 50)
44 | y = np.sin(2 * np.pi * x) + 0.1 * np.random.randn(50)
45 |
46 | # Create weights biased toward the beginning
47 | weights = np.exp(-3 * x)
48 |
49 | # Run gam_fit_predict with weights
50 | y_pred_weighted, _ = gam_fit_predict(x, y, weights=weights)
51 | # Run without weights for comparison
52 | y_pred_unweighted, _ = gam_fit_predict(x, y)
53 |
54 | # Check that predictions differ when using weights
55 | assert not np.allclose(y_pred_weighted, y_pred_unweighted)
56 |
57 | # Early points should be fitted better with weights
58 | early_idx = x < 0.3
59 | early_mse_weighted = np.mean((y[early_idx] - y_pred_weighted[early_idx]) ** 2)
60 | early_mse_unweighted = np.mean((y[early_idx] - y_pred_unweighted[early_idx]) ** 2)
61 | assert early_mse_weighted <= early_mse_unweighted
62 |
63 |
64 | def test_gam_fit_predict_with_pred_x():
65 | """Test gam_fit_predict with custom prediction points"""
66 | # Create test data
67 | x = np.linspace(0, 1, 50)
68 | y = np.sin(2 * np.pi * x) + 0.1 * np.random.randn(50)
69 |
70 | # Create custom prediction points
71 | pred_x = np.linspace(0, 1, 100) # Higher resolution
72 |
73 | # Run gam_fit_predict with custom prediction points
74 | y_pred, stds = gam_fit_predict(x, y, pred_x=pred_x)
75 |
76 | # Check that output shapes match the custom prediction points
77 | assert len(y_pred) == len(pred_x)
78 | assert len(stds) == len(pred_x)
79 |
80 |
81 | def test_gam_fit_predict_spline_params():
82 | """Test gam_fit_predict with different spline parameters"""
83 | # Create test data
84 | x = np.linspace(0, 1, 50)
85 | y = np.sin(2 * np.pi * x) + 0.1 * np.random.randn(50)
86 |
87 | # Run with default spline parameters
88 | y_pred_default, _ = gam_fit_predict(x, y)
89 |
90 | # Run with custom spline parameters
91 | y_pred_custom, _ = gam_fit_predict(x, y, n_splines=8, spline_order=3)
92 |
93 | # Check that predictions differ with different spline parameters
94 | assert not np.allclose(y_pred_default, y_pred_custom)
95 |
--------------------------------------------------------------------------------
/tests/test_presults_select_branch_cells.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import numpy as np
4 | import scanpy as sc
5 | from anndata import AnnData
6 |
7 | from palantir.presults import select_branch_cells
8 | import palantir.presults
9 |
10 |
11 | def test_select_branch_cells_basic():
12 | """Test basic functionality of select_branch_cells"""
13 | # Create test AnnData
14 | n_cells = 100
15 | n_genes = 20
16 | adata = AnnData(np.random.normal(0, 1, (n_cells, n_genes)))
17 | adata.obs_names = [f"cell_{i}" for i in range(n_cells)]
18 |
19 | # Create pseudotime
20 | pseudotime = np.linspace(0, 1, n_cells)
21 | adata.obs["palantir_pseudotime"] = pseudotime
22 |
23 | # Create fate probabilities
24 | fate_probs = np.zeros((n_cells, 3))
25 | # First branch: higher probability at beginning
26 | fate_probs[:, 0] = np.linspace(0.8, 0.1, n_cells)
27 | # Second branch: higher probability in middle
28 | x = np.linspace(-3, 3, n_cells)
29 | fate_probs[:, 1] = np.exp(-(x**2)) / 2
30 | # Third branch: higher probability at end
31 | fate_probs[:, 2] = np.linspace(0.1, 0.8, n_cells)
32 |
33 | # Normalize rows to sum to 1
34 | fate_probs = fate_probs / fate_probs.sum(axis=1, keepdims=True)
35 |
36 | # Store in AnnData
37 | adata.obsm["palantir_fate_probabilities"] = pd.DataFrame(
38 | fate_probs, index=adata.obs_names, columns=["branch1", "branch2", "branch3"]
39 | )
40 |
41 | # Run select_branch_cells
42 | masks = select_branch_cells(adata)
43 |
44 | # Check that the masks are boolean arrays
45 | assert masks.dtype == bool
46 | assert masks.shape == (n_cells, 3)
47 |
48 | # Check that masks are stored in AnnData
49 | assert "branch_masks" in adata.obsm
50 |
51 | # Check masks make sense with probabilities
52 | # Higher probability cells should be selected - but we don't check specific values
53 | # as branch selection behavior depends on the quantile-based algorithm
54 | high_prob_branch1 = fate_probs[:, 0] > 0.5
55 | assert np.any(masks[high_prob_branch1, 0]) # At least some high prob cells should be selected
56 |
57 |
58 | def test_select_branch_cells_custom_keys():
59 | """Test select_branch_cells with custom keys"""
60 | # Create test AnnData
61 | n_cells = 100
62 | n_genes = 20
63 | adata = AnnData(np.random.normal(0, 1, (n_cells, n_genes)))
64 |
65 | # Create pseudotime with custom key
66 | pseudotime_key = "custom_pseudotime"
67 | adata.obs[pseudotime_key] = np.linspace(0, 1, n_cells)
68 |
69 | # Create fate probabilities with custom key
70 | fate_prob_key = "custom_fate_probs"
71 | fate_probs = np.random.random((n_cells, 3))
72 | fate_probs = fate_probs / fate_probs.sum(axis=1, keepdims=True)
73 | adata.obsm[fate_prob_key] = pd.DataFrame(
74 | fate_probs, index=adata.obs_names, columns=["branch1", "branch2", "branch3"]
75 | )
76 |
77 | # Custom masks key
78 | masks_key = "custom_masks"
79 |
80 | # Run select_branch_cells with custom keys
81 | masks = select_branch_cells(
82 | adata, pseudo_time_key=pseudotime_key, fate_prob_key=fate_prob_key, masks_key=masks_key
83 | )
84 |
85 | # Check that masks are stored in AnnData with custom key
86 | assert masks_key in adata.obsm
87 |
88 | # Check shapes
89 | assert masks.shape == (n_cells, 3)
90 |
91 |
92 | def test_select_branch_cells_parameters():
93 | """Test select_branch_cells with different parameters"""
94 | # Create test AnnData
95 | n_cells = 100
96 | n_genes = 20
97 | adata = AnnData(np.random.normal(0, 1, (n_cells, n_genes)))
98 |
99 | # Create pseudotime
100 | adata.obs["palantir_pseudotime"] = np.linspace(0, 1, n_cells)
101 |
102 | # Create fate probabilities
103 | fate_probs = np.random.random((n_cells, 3))
104 | fate_probs = fate_probs / fate_probs.sum(axis=1, keepdims=True)
105 | adata.obsm["palantir_fate_probabilities"] = pd.DataFrame(
106 | fate_probs, index=adata.obs_names, columns=["branch1", "branch2", "branch3"]
107 | )
108 |
109 | # Run with different q parameters - for randomly generated data, the relationship between
110 | # q and the number of selected cells can be unpredictable
111 | masks1 = select_branch_cells(adata, q=0.01)
112 | masks2 = select_branch_cells(adata, q=0.5)
113 |
114 | # Just verify we get different results with different parameters
115 | assert masks1.shape == masks2.shape
116 |
117 | # Run with different eps parameters
118 | masks3 = select_branch_cells(adata, eps=0.01)
119 | masks4 = select_branch_cells(adata, eps=0.1)
120 |
121 | # Higher eps should select more cells or at least the same number
122 | assert masks3.sum() <= masks4.sum()
123 |
124 | # Test save_as_df parameter
125 | # True is default, test False
126 | select_branch_cells(adata, save_as_df=False)
127 | assert isinstance(adata.obsm["branch_masks"], np.ndarray)
128 | assert "branch_masks_columns" in adata.uns
129 |
130 |
131 | def test_select_branch_cells_with_different_resolutions():
132 | """Test select_branch_cells with different resolution settings"""
133 |
134 | # Store original resolution
135 | original_res = palantir.presults.PSEUDOTIME_RES
136 |
137 | try:
138 | # Test with high resolution (potential division by zero case for small datasets)
139 | n_cells = 10
140 | n_genes = 5
141 |
142 | # Create small test AnnData
143 | adata_small = AnnData(np.random.normal(0, 1, (n_cells, n_genes)))
144 | adata_small.obs["palantir_pseudotime"] = np.linspace(0, 1, n_cells)
145 | adata_small.obsm["palantir_fate_probabilities"] = pd.DataFrame(
146 | np.random.random((n_cells, 2)),
147 | columns=["branch1", "branch2"],
148 | index=adata_small.obs_names,
149 | )
150 |
151 | # Test with a very high resolution (will trigger nsteps == 0 case)
152 | palantir.presults.PSEUDOTIME_RES = 1000
153 | masks_high_res = select_branch_cells(adata_small)
154 | assert masks_high_res.shape == (n_cells, 2)
155 |
156 | # Test with a very low resolution (regular case)
157 | palantir.presults.PSEUDOTIME_RES = 2
158 | masks_low_res = select_branch_cells(adata_small)
159 | assert masks_low_res.shape == (n_cells, 2)
160 |
161 | # Create larger test AnnData
162 | n_cells = 100
163 | adata_large = AnnData(np.random.normal(0, 1, (n_cells, n_genes)))
164 | adata_large.obs["palantir_pseudotime"] = np.linspace(0, 1, n_cells)
165 | adata_large.obsm["palantir_fate_probabilities"] = pd.DataFrame(
166 | np.random.random((n_cells, 2)),
167 | columns=["branch1", "branch2"],
168 | index=adata_large.obs_names,
169 | )
170 |
171 | # Test with medium resolution (regular case)
172 | palantir.presults.PSEUDOTIME_RES = 10
173 | masks_medium_res = select_branch_cells(adata_large)
174 | assert masks_medium_res.shape == (n_cells, 2)
175 |
176 | finally:
177 | # Restore original resolution
178 | palantir.presults.PSEUDOTIME_RES = original_res
179 |
180 |
181 | def test_select_branch_cells_error_handling():
182 | """Test error handling in select_branch_cells"""
183 | # Create AnnData without required data
184 | adata = AnnData(np.random.normal(0, 1, (10, 10)))
185 |
186 | # Should raise KeyError for missing pseudotime
187 | with pytest.raises(KeyError):
188 | select_branch_cells(adata)
189 |
190 | # Add pseudotime but no fate probabilities
191 | adata.obs["palantir_pseudotime"] = np.linspace(0, 1, 10)
192 |
193 | # Should raise KeyError for missing fate probabilities
194 | with pytest.raises(KeyError):
195 | select_branch_cells(adata)
196 |
--------------------------------------------------------------------------------
/tests/test_util_density.py:
--------------------------------------------------------------------------------
1 | from anndata._core.anndata import AnnData
2 | from pandas.core.frame import DataFrame
3 | import pytest
4 | import pandas as pd
5 | import scanpy as sc
6 | from anndata import AnnData
7 | import numpy as np
8 |
9 | from palantir.utils import (
10 | run_density,
11 | run_low_density_variability,
12 | run_density_evaluation,
13 | )
14 |
15 |
16 | @pytest.fixture
17 | def mock_data():
18 | n_cells = 50
19 | n_genes = 500
20 | return pd.DataFrame(
21 | np.random.rand(n_cells, n_genes),
22 | columns=[f"gene_{i}" for i in range(n_genes)],
23 | index=[f"cell_{i}" for i in range(n_cells)],
24 | )
25 |
26 |
27 | @pytest.fixture
28 | def mock_anndata(mock_data: DataFrame):
29 | ad = AnnData(X=mock_data)
30 | ad.obsm["DM_EigenVectors"] = mock_data.iloc[:, :10].copy()
31 | ad.obsm["branch_masks"] = pd.DataFrame(
32 | columns=["branch_0", "branch_1"],
33 | index=mock_data.index,
34 | data=np.random.choice([True, False], size=(mock_data.shape[0], 2)),
35 | )
36 | ad.obs["other_density"] = np.random.rand(mock_data.shape[0])
37 | ad.layers["local_variability"] = np.random.rand(*mock_data.shape)
38 | return ad
39 |
40 |
41 | @pytest.fixture
42 | def mock_anndata_custom(mock_data: DataFrame):
43 | ad = AnnData(X=mock_data)
44 | ad.obsm["DM_EigenVectors_custom"] = mock_data.iloc[:, :10].copy()
45 | return ad
46 |
47 |
48 | def test_run_density(mock_anndata: AnnData):
49 | run_density(mock_anndata)
50 | assert "mellon_log_density" in mock_anndata.obs.keys()
51 | assert "mellon_log_density_clipped" in mock_anndata.obs.keys()
52 |
53 |
54 | def test_run_density_custom_keys(mock_anndata_custom: AnnData):
55 | run_density(mock_anndata_custom, repr_key="DM_EigenVectors_custom", density_key="custom_key")
56 | assert "custom_key" in mock_anndata_custom.obs.keys()
57 | assert "custom_key_clipped" in mock_anndata_custom.obs.keys()
58 |
59 |
60 | def test_run_low_density_variability(mock_anndata: AnnData):
61 | run_low_density_variability(mock_anndata, density_key="other_density")
62 | for branch in mock_anndata.obsm["branch_masks"].columns:
63 | assert f"low_density_gene_variability_{branch}" in mock_anndata.var.keys()
64 |
65 |
66 | def test_run_density_evaluation(mock_anndata: AnnData, mock_anndata_custom: AnnData):
67 | run_density(mock_anndata)
68 | run_density_evaluation(mock_anndata, mock_anndata_custom, repr_key="DM_EigenVectors_custom")
69 | assert "cross_log_density" in mock_anndata_custom.obs.keys()
70 | assert "cross_log_density_clipped" in mock_anndata_custom.obs.keys()
71 |
--------------------------------------------------------------------------------
/tests/test_util_run_pca.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import scanpy as sc
4 | from anndata import AnnData
5 | import numpy as np
6 |
7 | from palantir.utils import run_pca
8 |
9 |
10 | @pytest.fixture
11 | def mock_data():
12 | n_cells = 50
13 | n_genes = 500
14 | return pd.DataFrame(
15 | np.random.rand(n_cells, n_genes),
16 | columns=[f"gene_{i}" for i in range(n_genes)],
17 | index=[f"cell_{i}" for i in range(n_cells)],
18 | )
19 |
20 |
21 | @pytest.fixture
22 | def mock_anndata(mock_data):
23 | ad = AnnData(X=mock_data)
24 | ad.obsm["DM_EigenVectors_multiscaled"] = mock_data
25 | ad.var["highly_variable"] = np.random.choice([True, False], size=mock_data.shape[1])
26 | return ad
27 |
28 |
29 | # Test with DataFrame
30 | def test_run_pca_dataframe(mock_data):
31 | pca_results, var_ratio = run_pca(mock_data, use_hvg=False)
32 | assert isinstance(pca_results, pd.DataFrame)
33 | assert isinstance(var_ratio, np.ndarray)
34 | assert pca_results.shape[1] <= 300 # Check n_components
35 |
36 |
37 | # Test with AnnData
38 | def test_run_pca_anndata(mock_anndata):
39 | pca_results, var_ratio = run_pca(mock_anndata)
40 | assert "X_pca" in mock_anndata.obsm.keys()
41 | assert mock_anndata.obsm["X_pca"].shape[1] <= 300
42 |
43 |
44 | # Test n_components parameter
45 | def test_run_pca_components(mock_data):
46 | pca_results, _ = run_pca(mock_data, n_components=5, use_hvg=False)
47 | assert pca_results.shape[1] == 5
48 |
49 |
50 | # Test use_hvg parameter
51 | def test_run_pca_hvg(mock_anndata):
52 | pca_results, _ = run_pca(mock_anndata, use_hvg=True)
53 | assert pca_results.shape[1] <= 300
54 |
55 |
56 | # Test pca_key parameter
57 | def test_run_pca_pca_key(mock_anndata):
58 | run_pca(mock_anndata, pca_key="custom_key")
59 | assert "custom_key" in mock_anndata.obsm.keys()
60 | assert mock_anndata.obsm["custom_key"].shape[1] <= 300
61 |
--------------------------------------------------------------------------------
/tests/test_utils_compute_kernel.py:
--------------------------------------------------------------------------------
1 | from scipy.sparse import find, csr_matrix
2 | import pytest
3 | import pandas as pd
4 | import numpy as np
5 |
6 | from palantir.utils import compute_kernel
7 |
8 |
9 | # Test with DataFrame
10 | def test_compute_kernel_dataframe(mock_data):
11 | kernel = compute_kernel(mock_data)
12 | assert isinstance(kernel, csr_matrix)
13 |
14 |
15 | # Test with AnnData
16 | def test_compute_kernel_anndata(mock_anndata):
17 | kernel = compute_kernel(mock_anndata)
18 | assert "DM_Kernel" in mock_anndata.obsp.keys()
19 |
20 |
21 | # Test knn parameter
22 | def test_compute_kernel_knn(mock_data):
23 | kernel = compute_kernel(mock_data, knn=10)
24 | assert isinstance(kernel, csr_matrix)
25 |
26 |
27 | # Test alpha parameter
28 | def test_compute_kernel_alpha(mock_data):
29 | kernel = compute_kernel(mock_data, alpha=0.5)
30 | assert isinstance(kernel, csr_matrix)
31 |
32 |
33 | # Test pca_key parameter
34 | def test_compute_kernel_pca_key(mock_anndata):
35 | mock_anndata.obsm["custom_pca"] = np.random.rand(mock_anndata.shape[0], 10)
36 | kernel = compute_kernel(mock_anndata, pca_key="custom_pca")
37 | assert "DM_Kernel" in mock_anndata.obsp.keys()
38 |
39 |
40 | # Test kernel_key parameter
41 | def test_compute_kernel_kernel_key(mock_anndata):
42 | kernel = compute_kernel(mock_anndata, kernel_key="custom_kernel")
43 | assert "custom_kernel" in mock_anndata.obsp.keys()
44 |
--------------------------------------------------------------------------------
/tests/test_utils_density_functions.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 | import scanpy as sc
5 | from anndata import AnnData
6 | from unittest.mock import patch, MagicMock
7 |
8 | from palantir.utils import run_low_density_variability, run_density_evaluation
9 |
10 |
11 | @pytest.fixture
12 | def mock_anndata_with_density(mock_anndata):
13 | """Create anndata with density for testing low_density_variability"""
14 | # Add density values
15 | mock_anndata.obs["mellon_log_density"] = np.random.rand(mock_anndata.n_obs)
16 |
17 | # Add local variability
18 | mock_anndata.layers["local_variability"] = np.random.rand(
19 | mock_anndata.n_obs, mock_anndata.n_vars
20 | )
21 |
22 | # Add branch masks
23 | mock_anndata.obsm["branch_masks"] = pd.DataFrame(
24 | np.random.randint(0, 2, size=(mock_anndata.n_obs, 2)),
25 | columns=["branch1", "branch2"],
26 | index=mock_anndata.obs_names,
27 | )
28 |
29 | # Also add branch mask in obs
30 | mock_anndata.obs["obs_branch"] = np.random.randint(0, 2, size=mock_anndata.n_obs)
31 |
32 | return mock_anndata
33 |
34 |
35 | def test_run_low_density_variability_with_obsm(mock_anndata_with_density):
36 | """Test run_low_density_variability function with obsm branch masks"""
37 | ad = mock_anndata_with_density
38 |
39 | # Test with default parameters (branch_masks in obsm)
40 | result = run_low_density_variability(ad)
41 |
42 | # Check results
43 | assert result.shape == (ad.n_vars, 2) # 2 branches
44 | assert "low_density_gene_variability_branch1" in ad.var.columns
45 | assert "low_density_gene_variability_branch2" in ad.var.columns
46 |
47 | # Test with custom parameters
48 | result = run_low_density_variability(
49 | ad,
50 | cell_mask="branch_masks",
51 | density_key="mellon_log_density",
52 | localvar_key="local_variability",
53 | score_key="test_prefix",
54 | )
55 |
56 | assert "test_prefix_branch1" in ad.var.columns
57 | assert "test_prefix_branch2" in ad.var.columns
58 |
59 |
60 | def test_run_low_density_variability_with_obs(mock_anndata_with_density):
61 | """Test run_low_density_variability function with obs column"""
62 | ad = mock_anndata_with_density
63 |
64 | # Test with obs column
65 | result = run_low_density_variability(ad, cell_mask="obs_branch")
66 |
67 | # Check results
68 | assert result.shape == (ad.n_vars, 1)
69 | assert "low_density_gene_variability__obs_branch" in ad.var.columns
70 |
71 |
72 | def test_run_low_density_variability_with_array(mock_anndata_with_density):
73 | """Test run_low_density_variability function with array input"""
74 | ad = mock_anndata_with_density
75 |
76 | # Test with np.array mask
77 | mask = np.zeros(ad.n_obs, dtype=bool)
78 | mask[:10] = True
79 | result = run_low_density_variability(ad, cell_mask=mask)
80 | assert "low_density_gene_variability_" in ad.var.columns
81 |
82 | # Test with list of cell names
83 | cell_list = ad.obs_names[:10].tolist()
84 | result = run_low_density_variability(ad, cell_mask=cell_list)
85 | assert "low_density_gene_variability_" in ad.var.columns
86 |
87 |
88 | def test_run_low_density_variability_errors(mock_anndata_with_density):
89 | """Test error handling in run_low_density_variability"""
90 | ad = mock_anndata_with_density
91 |
92 | # Test missing density key
93 | with pytest.raises(ValueError, match="not_a_key' not found in ad.obs"):
94 | run_low_density_variability(ad, density_key="not_a_key")
95 |
96 | # Test missing layer key
97 | with pytest.raises(ValueError, match="not_a_key' not found in ad.layers"):
98 | run_low_density_variability(ad, localvar_key="not_a_key")
99 |
100 | # Test missing cell_mask key
101 | with pytest.raises(ValueError, match="not_a_key' not found in ad.obsm or ad.obs"):
102 | run_low_density_variability(ad, cell_mask="not_a_key")
103 |
104 | # Test invalid cell_mask type
105 | with pytest.raises(ValueError, match="cell_mask must be either a string key"):
106 | run_low_density_variability(ad, cell_mask=42) # Integer is invalid
107 |
108 |
109 | @patch("mellon.Predictor.from_dict")
110 | def test_run_density_evaluation(mock_predictor_from_dict):
111 | """Test run_density_evaluation function"""
112 | # Create input and output anndata objects
113 | in_ad = AnnData(X=np.random.rand(20, 10))
114 | out_ad = AnnData(X=np.random.rand(15, 10))
115 |
116 | # Setup predictor mock
117 | mock_predictor = MagicMock()
118 | mock_predictor.return_value = np.random.rand(15)
119 | mock_predictor_from_dict.return_value = mock_predictor
120 |
121 | # Add required fields
122 | in_ad.uns["mellon_log_density_predictor"] = {"mock": "predictor"}
123 | out_ad.obsm["DM_EigenVectors"] = np.random.rand(15, 5)
124 |
125 | # Run the function
126 | result = run_density_evaluation(in_ad, out_ad)
127 |
128 | # Check results
129 | assert len(result) == 15
130 | assert "cross_log_density" in out_ad.obs.columns
131 | assert "cross_log_density_clipped" in out_ad.obs.columns
132 |
133 | # Verify predictor was called
134 | mock_predictor_from_dict.assert_called_once_with(in_ad.uns["mellon_log_density_predictor"])
135 | mock_predictor.assert_called_once_with(out_ad.obsm["DM_EigenVectors"])
136 |
137 | # Test with custom parameters
138 | result = run_density_evaluation(
139 | in_ad,
140 | out_ad,
141 | predictor_key="mellon_log_density_predictor",
142 | repr_key="DM_EigenVectors",
143 | density_key="custom_density",
144 | )
145 |
146 | assert "custom_density" in out_ad.obs.columns
147 | assert "custom_density_clipped" in out_ad.obs.columns
148 |
149 |
150 | def test_run_density_evaluation_errors():
151 | """Test error handling in run_density_evaluation"""
152 | # Create input and output anndata objects
153 | in_ad = AnnData(X=np.random.rand(20, 10))
154 | out_ad = AnnData(X=np.random.rand(15, 10))
155 |
156 | # Test missing repr_key
157 | with pytest.raises(ValueError, match="'DM_EigenVectors' not found in out_ad.obsm"):
158 | run_density_evaluation(in_ad, out_ad)
159 |
160 | # Add eigenvectors but no predictor
161 | out_ad.obsm["DM_EigenVectors"] = np.random.rand(15, 5)
162 |
163 | # Test missing predictor_key
164 | with pytest.raises(ValueError, match="'mellon_log_density_predictor' not found in in_ad.uns"):
165 | run_density_evaluation(in_ad, out_ad)
166 |
--------------------------------------------------------------------------------
/tests/test_utils_determine_multiscale_space.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 | import scanpy as sc
5 | from anndata import AnnData
6 | from pandas.testing import assert_frame_equal
7 |
8 | from palantir.utils import determine_multiscale_space
9 |
10 |
11 | def test_determine_multiscale_space_with_dict(mock_dm_res):
12 | """Test determine_multiscale_space with dictionary input"""
13 | # Test with default n_eigs (determined by eigen gap)
14 | result = determine_multiscale_space(mock_dm_res)
15 | assert isinstance(result, pd.DataFrame)
16 | assert result.shape[0] == 50 # Should have 50 cells
17 | # The number of components can vary depending on the generated eigenvalues
18 |
19 | # Test with specific n_eigs
20 | result = determine_multiscale_space(mock_dm_res, n_eigs=3)
21 | assert isinstance(result, pd.DataFrame)
22 | assert result.shape == (50, 2) # Only use 2 eigenvectors (skip first)
23 |
24 |
25 | def test_determine_multiscale_space_with_anndata(mock_anndata):
26 | """Test determine_multiscale_space with AnnData input"""
27 | # Setup eigenvalues with a clear gap for testing auto-selection
28 | n_components = 10
29 | eigvals = np.zeros(n_components)
30 | eigvals[0] = 0.95 # First eigenvalue
31 | eigvals[1] = 0.85
32 | eigvals[2] = 0.75
33 | eigvals[3] = 0.30 # Big gap after this one
34 | eigvals[4:] = np.linspace(0.25, 0.1, n_components - 4)
35 |
36 | # Create eigenvectors
37 | eigvecs = np.random.rand(mock_anndata.n_obs, n_components)
38 |
39 | # Add to mock anndata
40 | mock_anndata.uns["DM_EigenValues"] = eigvals
41 | mock_anndata.obsm["DM_EigenVectors"] = eigvecs
42 |
43 | # Test with AnnData input - both stores in obsm and returns DataFrame
44 | result = determine_multiscale_space(mock_anndata)
45 | assert isinstance(result, pd.DataFrame) # Returns DataFrame for both AnnData and dict input
46 | assert "DM_EigenVectors_multiscaled" in mock_anndata.obsm # Also stores in AnnData
47 |
48 | # Should detect gap and use components after skipping first
49 | scaled_shape = mock_anndata.obsm["DM_EigenVectors_multiscaled"].shape
50 | assert scaled_shape[0] == mock_anndata.n_obs # Number of cells matches
51 | # Number of components can vary based on how the algorithm detects eigen gaps
52 |
53 |
54 | def test_determine_multiscale_space_with_small_gap(mock_anndata):
55 | """Test determine_multiscale_space with small eigen gap"""
56 | # Setup eigenvalues with no clear gap
57 | n_components = 5
58 | eigvals = np.linspace(0.9, 0.5, n_components)
59 |
60 | # Create eigenvectors
61 | eigvecs = np.random.rand(mock_anndata.n_obs, n_components)
62 |
63 | # Add to mock anndata
64 | mock_anndata.uns["DM_EigenValues"] = eigvals
65 | mock_anndata.obsm["DM_EigenVectors"] = eigvecs
66 |
67 | # Test with AnnData input - both stores in obsm and returns DataFrame
68 | result = determine_multiscale_space(mock_anndata)
69 | assert isinstance(result, pd.DataFrame) # Returns DataFrame
70 | assert "DM_EigenVectors_multiscaled" in mock_anndata.obsm # Also stores in AnnData
71 |
72 | # Should fall back to second largest gap
73 | scaled_shape = mock_anndata.obsm["DM_EigenVectors_multiscaled"].shape
74 | assert scaled_shape[0] == mock_anndata.n_obs
75 |
--------------------------------------------------------------------------------
/tests/test_utils_diffusion_maps_from_kernel.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 | from scipy.sparse import csr_matrix
5 | from scipy.sparse.linalg import eigs
6 | from pytest import approx
7 |
8 | from palantir.utils import diffusion_maps_from_kernel, determine_multiscale_space
9 |
10 |
11 | @pytest.fixture
12 | def mock_kernel():
13 | size = 50
14 | A = np.random.rand(size, size)
15 | return csr_matrix((A + A.T) / 2)
16 |
17 |
18 | def test_diffusion_maps_basic(mock_kernel):
19 | result = diffusion_maps_from_kernel(mock_kernel)
20 |
21 | assert isinstance(result, dict)
22 | assert "T" in result and "EigenVectors" in result and "EigenValues" in result
23 |
24 | assert result["T"].shape == (50, 50)
25 | assert result["EigenVectors"].shape == (50, 10)
26 | assert result["EigenValues"].shape == (10,)
27 |
28 |
29 | def test_diffusion_maps_n_components(mock_kernel):
30 | result = diffusion_maps_from_kernel(mock_kernel, n_components=5)
31 |
32 | assert result["EigenVectors"].shape == (50, 5)
33 | assert result["EigenValues"].shape == (5,)
34 |
35 |
36 | def test_diffusion_maps_seed(mock_kernel):
37 | result1 = diffusion_maps_from_kernel(mock_kernel, seed=0)
38 | result2 = diffusion_maps_from_kernel(mock_kernel, seed=0)
39 |
40 | # Seed usage should yield the same result
41 | assert np.allclose(result1["EigenValues"], result2["EigenValues"])
42 |
43 |
44 | def test_diffusion_maps_eigen(mock_kernel):
45 | result = diffusion_maps_from_kernel(mock_kernel)
46 |
47 | T = result["T"].toarray()
48 | e_values, e_vectors = eigs(T, 10, tol=1e-4, maxiter=1000)
49 |
50 | assert np.allclose(
51 | result["EigenValues"], np.real(sorted(e_values, reverse=True)[:10]), atol=1e-4
52 | )
53 |
54 |
55 | def test_determine_multiscale_space(mock_kernel):
56 | result = diffusion_maps_from_kernel(mock_kernel)
57 | msresult = determine_multiscale_space(result)
58 | assert msresult.shape[0] == result["EigenVectors"].shape[0]
59 |
--------------------------------------------------------------------------------
/tests/test_utils_early_cell_functions.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 | import scanpy as sc
5 | from anndata import AnnData
6 | from unittest.mock import patch, MagicMock
7 |
8 | from palantir.utils import (
9 | early_cell,
10 | fallback_terminal_cell,
11 | find_terminal_states,
12 | CellNotFoundException,
13 | )
14 |
15 |
16 | @pytest.fixture
17 | def mock_anndata_with_celltypes(mock_anndata):
18 | """Create anndata with cell types for early_cell and terminal states tests"""
19 | # Add cell types
20 | celltypes = np.array(["A", "B", "C", "A", "B"] * 10)
21 | mock_anndata.obs["celltype"] = pd.Categorical(celltypes)
22 |
23 | # Add multiscale space with one cell type at extremes
24 | eigvecs = mock_anndata.obsm["DM_EigenVectors_multiscaled"].copy()
25 | # Make cell 0 (type A) maximum in component 0
26 | eigvecs[0, 0] = 100.0
27 | # Make cell 4 (type B) minimum in component 1
28 | eigvecs[4, 1] = -100.0
29 |
30 | mock_anndata.obsm["DM_EigenVectors_multiscaled"] = eigvecs
31 |
32 | return mock_anndata
33 |
34 |
35 | def test_early_cell_extreme_max(mock_anndata_with_celltypes):
36 | """Test early_cell finding cell at maximum of component"""
37 | ad = mock_anndata_with_celltypes
38 |
39 | # Test finding a cell of type 'A' - we don't need to know which cell it will be
40 | with patch("palantir.utils._return_cell", return_value="cell_0") as mock_return:
41 | result = early_cell(ad, "A")
42 | assert result == "cell_0" # Just check the mocked return value
43 | mock_return.assert_called_once()
44 |
45 | # Only check the cell type and that it's finding some kind of extreme
46 | args = mock_return.call_args[0]
47 | assert args[2] == "A" # Cell type
48 | assert args[3] in ["max", "min"] # Extreme type (don't care which one)
49 |
50 |
51 | def test_early_cell_extreme_min(mock_anndata_with_celltypes):
52 | """Test early_cell finding cell at minimum of component"""
53 | ad = mock_anndata_with_celltypes
54 |
55 | # Test finding a cell of type 'B' - we don't need to know which cell it will be
56 | with patch("palantir.utils._return_cell", return_value="cell_4") as mock_return:
57 | result = early_cell(ad, "B")
58 | assert result == "cell_4" # Just check the mocked return value
59 | mock_return.assert_called_once()
60 |
61 | # Only check the cell type and that it's finding some kind of extreme
62 | args = mock_return.call_args[0]
63 | assert args[2] == "B" # Cell type
64 | assert args[3] in ["max", "min"] # Extreme type (don't care which one)
65 |
66 |
67 | def test_early_cell_fallback():
68 | """Test early_cell with fallback to fallback_terminal_cell"""
69 | # Create a very simple AnnData with a cell type that won't be at extremes
70 | ad = AnnData(X=np.random.rand(10, 5))
71 | ad.obs["celltype"] = pd.Categorical(
72 | ["A", "A", "A", "A", "A", "B", "B", "B", "C", "C"], categories=["A", "B", "C"]
73 | )
74 |
75 | # Add a fake eigenvectors matrix where no 'B' cells are at extremes
76 | eigvecs = np.zeros((10, 3))
77 | # Make 'A' cells dominate the extremes
78 | eigvecs[0, 0] = 100 # max in component 0 is cell 0 (type A)
79 | eigvecs[1, 0] = -100 # min in component 0 is cell 1 (type A)
80 | eigvecs[2, 1] = 100 # max in component 1 is cell 2 (type A)
81 | eigvecs[3, 1] = -100 # min in component 1 is cell 3 (type A)
82 | eigvecs[4, 2] = 100 # max in component 2 is cell 4 (type A)
83 | eigvecs[5, 2] = -100 # min in component 2 is cell 5 (type B)
84 | ad.obsm["DM_EigenVectors_multiscaled"] = eigvecs
85 |
86 | # Give the AnnData proper observation names
87 | ad.obs_names = [f"cell_{i}" for i in range(10)]
88 |
89 | # Mock fallback_terminal_cell to avoid actual computation
90 | with patch("palantir.utils.fallback_terminal_cell", return_value="cell_5") as mock_fallback:
91 | # Test early_cell with fallback - it should find no cell in extremes and fall back
92 | result = early_cell(ad, "C", fallback_seed=42) # Cell type C doesn't exist
93 | assert result == "cell_5"
94 | mock_fallback.assert_called_once_with(ad, "C", celltype_column="celltype", seed=42)
95 |
96 |
97 | def test_early_cell_exception():
98 | """Test early_cell raising exception when no cell found"""
99 | # Create a very simple AnnData with a cell type that won't be at extremes
100 | ad = AnnData(X=np.random.rand(10, 5))
101 | ad.obs["celltype"] = pd.Categorical(
102 | ["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"], categories=["A", "B"]
103 | )
104 |
105 | # Add a fake eigenvectors matrix where no 'B' cells are at extremes
106 | eigvecs = np.zeros((10, 3))
107 | # Make 'A' cells dominate the extremes
108 | eigvecs[0, 0] = 100 # max in component 0 is cell 0 (type A)
109 | eigvecs[1, 0] = -100 # min in component 0 is cell 1 (type A)
110 | eigvecs[2, 1] = 100 # max in component 1 is cell 2 (type A)
111 | eigvecs[3, 1] = -100 # min in component 1 is cell 3 (type A)
112 | eigvecs[4, 2] = 100 # max in component 2 is cell 4 (type A)
113 | eigvecs[0, 2] = -100 # min in component 2 is cell 0 (type A)
114 | ad.obsm["DM_EigenVectors_multiscaled"] = eigvecs
115 |
116 | # Test without fallback_seed - should raise CellNotFoundException
117 | with pytest.raises(CellNotFoundException):
118 | early_cell(ad, "B")
119 |
120 |
121 | @patch("palantir.utils.run_palantir")
122 | def test_fallback_terminal_cell(mock_run_palantir, mock_anndata_with_celltypes):
123 | """Test fallback_terminal_cell with mocked palantir run"""
124 | ad = mock_anndata_with_celltypes
125 |
126 | # Setup mock pseudotime result
127 | mock_result = MagicMock()
128 | pseudotime = pd.Series([0.1, 0.2, 0.3, 0.9, 0.5], index=ad.obs_names[:5])
129 | mock_result.pseudotime = pseudotime
130 | mock_run_palantir.return_value = mock_result
131 |
132 | # Test fallback_terminal_cell
133 | with patch("palantir.utils.print"): # Suppress print output
134 | result = fallback_terminal_cell(ad, "A", celltype_column="celltype", seed=42)
135 | assert result == ad.obs_names[3] # Should pick cell with max pseudotime
136 |
137 | # Verify run_palantir was called with correct arguments
138 | mock_run_palantir.assert_called_once()
139 | call_args = mock_run_palantir.call_args[0]
140 | assert call_args[0] is ad
141 | # Second arg should be a non-A cell
142 |
143 |
144 | @patch("palantir.utils.early_cell")
145 | def test_find_terminal_states(mock_early_cell, mock_anndata_with_celltypes):
146 | """Test find_terminal_states"""
147 | ad = mock_anndata_with_celltypes
148 |
149 | # Setup mock early_cell behavior
150 | def side_effect(ad, celltype, *args, **kwargs):
151 | if celltype == "A":
152 | return "cell_0"
153 | elif celltype == "B":
154 | return "cell_4"
155 | elif celltype == "C":
156 | raise CellNotFoundException("Test exception")
157 | return None
158 |
159 | mock_early_cell.side_effect = side_effect
160 |
161 | # Test find_terminal_states with a warning for type C
162 | with pytest.warns(UserWarning):
163 | result = find_terminal_states(ad, ["A", "B", "C"], celltype_column="celltype")
164 |
165 | # Check result - should have entries for A and B, but not C
166 | assert isinstance(result, pd.Series)
167 | assert len(result) == 2
168 | assert result["cell_0"] == "A"
169 | assert result["cell_4"] == "B"
170 | assert "cell_C" not in result.index
171 |
--------------------------------------------------------------------------------
/tests/test_utils_run_diffusion_maps.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import scanpy as sc
4 | from anndata import AnnData
5 | from scipy.sparse import csr_matrix, issparse
6 | import numpy as np
7 |
8 | from palantir.utils import run_diffusion_maps
9 |
10 |
11 | # Generate mock DataFrame data
12 | def mock_dataframe(rows, cols):
13 | return pd.DataFrame(np.random.rand(rows, cols))
14 |
15 |
16 | # Generate mock AnnData object
17 | def mock_anndata(rows, cols, keys):
18 | ad = AnnData(np.random.rand(rows, cols))
19 | for key in keys:
20 | ad.obsm[key] = np.random.rand(rows, cols)
21 | return ad
22 |
23 |
24 | def test_run_diffusion_maps_dataframe():
25 | df = mock_dataframe(50, 30)
26 | result = run_diffusion_maps(df)
27 |
28 | assert isinstance(result, dict)
29 | assert set(result.keys()) == {"T", "EigenVectors", "EigenValues", "kernel"}
30 |
31 | assert isinstance(result["kernel"], csr_matrix)
32 | assert isinstance(result["T"], csr_matrix)
33 | assert isinstance(result["EigenVectors"], pd.DataFrame)
34 | assert isinstance(result["EigenValues"], pd.Series)
35 |
36 |
37 | def test_run_diffusion_maps_anndata():
38 | keys = ["X_pca"]
39 | ad = mock_anndata(50, 30, keys)
40 | result = run_diffusion_maps(ad)
41 |
42 | assert "DM_Kernel" in ad.obsp
43 | assert "DM_Similarity" in ad.obsp
44 | assert "DM_EigenVectors" in ad.obsm
45 | assert "DM_EigenValues" in ad.uns
46 |
47 | assert np.array_equal(ad.obsp["DM_Kernel"].toarray(), result["kernel"].toarray())
48 | assert np.array_equal(ad.obsp["DM_Similarity"].toarray(), result["T"].toarray())
49 | assert np.array_equal(ad.obsm["DM_EigenVectors"], result["EigenVectors"].values)
50 | assert np.array_equal(ad.uns["DM_EigenValues"], result["EigenValues"])
51 |
52 |
53 | def test_run_diffusion_maps_exceptions():
54 | # Test with neither pd.DataFrame nor AnnData
55 | with pytest.raises(ValueError):
56 | run_diffusion_maps("invalid_type")
57 |
--------------------------------------------------------------------------------
/tests/test_utils_run_local_variability.py:
--------------------------------------------------------------------------------
1 | import scanpy as sc
2 | from anndata import AnnData
3 | import numpy as np
4 | import pytest
5 | from scipy.sparse import csr_matrix
6 |
7 | from palantir.utils import run_local_variability
8 |
9 |
10 | # Mock data for dense matrix
11 | def mock_anndata_dense(n_cells, n_genes, layer_keys, obsp_keys):
12 | ad = AnnData(np.random.rand(n_cells, n_genes))
13 | for key in layer_keys:
14 | ad.layers[key] = np.random.rand(n_cells, n_genes)
15 | for key in obsp_keys:
16 | ad.obsp[key] = np.random.rand(n_cells, n_cells)
17 | return ad
18 |
19 |
20 | # Mock data for sparse matrix
21 | def mock_anndata_sparse(n_cells, n_genes, layer_keys, obsp_keys):
22 | ad = AnnData(csr_matrix(np.random.rand(n_cells, n_genes)))
23 | for key in layer_keys:
24 | ad.layers[key] = csr_matrix(np.random.rand(n_cells, n_genes))
25 | for key in obsp_keys:
26 | ad.obsp[key] = csr_matrix(np.random.rand(n_cells, n_cells))
27 | return ad
28 |
29 |
30 | # Test with default keys, dense
31 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
32 | def test_run_local_variability_default_dense():
33 | ad = mock_anndata_dense(50, 20, ["MAGIC_imputed_data"], ["distances"])
34 | _test_run_local_variability(ad)
35 |
36 |
37 | # Test with default keys, sparse
38 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
39 | def test_run_local_variability_default_sparse():
40 | ad = mock_anndata_sparse(50, 20, ["MAGIC_imputed_data"], ["distances"])
41 | _test_run_local_variability(ad)
42 |
43 |
44 | # Test with custom keys, dense
45 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
46 | def test_run_local_variability_custom_keys_dense():
47 | ad = mock_anndata_dense(50, 20, ["custom_expression"], ["custom_distances"])
48 | _test_run_local_variability(ad, "custom_expression", "custom_distances", "custom_local_var")
49 |
50 |
51 | # Test with custom keys, sparse
52 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
53 | def test_run_local_variability_custom_keys_sparse():
54 | ad = mock_anndata_sparse(50, 20, ["custom_expression"], ["custom_distances"])
55 | _test_run_local_variability(ad, "custom_expression", "custom_distances", "custom_local_var")
56 |
57 |
58 | # Helper function for assertions
59 | def _test_run_local_variability(
60 | ad,
61 | expression_key="MAGIC_imputed_data",
62 | distances_key="distances",
63 | localvar_key="local_variability",
64 | ):
65 | result = run_local_variability(ad, expression_key, distances_key, localvar_key)
66 |
67 | assert localvar_key in ad.layers
68 | assert isinstance(result, np.ndarray) or isinstance(result, csr_matrix)
69 | assert result.shape == (50, 20)
70 |
71 |
72 | # Test missing keys
73 | def test_run_local_variability_missing_keys():
74 | ad = mock_anndata_dense(50, 20, ["MAGIC_imputed_data"], ["distances"])
75 |
76 | with pytest.raises(KeyError):
77 | run_local_variability(ad, "missing_expression", "distances")
78 |
79 | with pytest.raises(KeyError):
80 | run_local_variability(ad, "MAGIC_imputed_data", "missing_distances")
81 |
--------------------------------------------------------------------------------
/tests/test_utils_run_magic_imputation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 | import scanpy as sc
5 | from anndata import AnnData
6 | from scipy.sparse import csr_matrix
7 | from anndata import AnnData
8 |
9 | from palantir.utils import run_magic_imputation
10 |
11 |
12 | @pytest.fixture
13 | def mock_dm_res():
14 | return {"T": csr_matrix(np.random.rand(50, 50))}
15 |
16 |
17 | # Test with numpy ndarray
18 | def test_run_magic_imputation_ndarray(mock_dm_res):
19 | data = np.random.rand(50, 20)
20 | # With default sparse=True
21 | result = run_magic_imputation(data, dm_res=mock_dm_res)
22 | assert isinstance(result, csr_matrix)
23 | # With sparse=False
24 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False)
25 | assert isinstance(result, np.ndarray)
26 |
27 |
28 | # Test with pandas DataFrame
29 | def test_run_magic_imputation_dataframe(mock_dm_res):
30 | data = pd.DataFrame(np.random.rand(50, 20))
31 | # With default sparse=True, converts to DataFrame
32 | result = run_magic_imputation(data, dm_res=mock_dm_res)
33 | assert isinstance(result, pd.DataFrame)
34 | # With sparse=False
35 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False)
36 | assert isinstance(result, pd.DataFrame)
37 |
38 |
39 | # Test with csr_matrix
40 | def test_run_magic_imputation_csr(mock_dm_res):
41 | data = csr_matrix(np.random.rand(50, 20))
42 | # With default sparse=True
43 | result = run_magic_imputation(data, dm_res=mock_dm_res)
44 | assert isinstance(result, csr_matrix)
45 | # With sparse=False
46 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False)
47 | assert isinstance(result, np.ndarray)
48 |
49 |
50 | # Test with AnnData
51 | def test_run_magic_imputation_anndata():
52 | data = AnnData(np.random.rand(50, 20))
53 | data.obsp["DM_Similarity"] = np.random.rand(50, 50)
54 | # With default sparse=True
55 | result = run_magic_imputation(data)
56 | assert "MAGIC_imputed_data" in data.layers
57 | assert isinstance(result, csr_matrix)
58 |
59 | # With sparse=False
60 | data2 = AnnData(np.random.rand(50, 20))
61 | data2.obsp["DM_Similarity"] = np.random.rand(50, 50)
62 | result2 = run_magic_imputation(data2, sparse=False)
63 | assert "MAGIC_imputed_data" in data2.layers
64 | assert isinstance(result2, np.ndarray)
65 |
66 |
67 | # Test with AnnData and custom keys
68 | def test_run_magic_imputation_anndata_custom_keys():
69 | data = AnnData(np.random.rand(50, 20))
70 | data.layers["custom_expr"] = np.random.rand(50, 20)
71 | data.obsp["custom_sim"] = np.random.rand(50, 50)
72 | result = run_magic_imputation(
73 | data,
74 | expression_key="custom_expr",
75 | sim_key="custom_sim",
76 | imputation_key="custom_imp",
77 | )
78 | assert "custom_imp" in data.layers
79 | assert isinstance(result, csr_matrix)
80 |
81 | # With sparse=False
82 | data2 = AnnData(np.random.rand(50, 20))
83 | data2.layers["custom_expr"] = np.random.rand(50, 20)
84 | data2.obsp["custom_sim"] = np.random.rand(50, 50)
85 | result2 = run_magic_imputation(
86 | data2,
87 | expression_key="custom_expr",
88 | sim_key="custom_sim",
89 | imputation_key="custom_imp2",
90 | sparse=False,
91 | )
92 | assert "custom_imp2" in data2.layers
93 | assert isinstance(result2, np.ndarray)
94 |
95 |
96 | # Test with missing dm_res and not AnnData
97 | def test_run_magic_imputation_missing_dm_res():
98 | data = np.random.rand(50, 20)
99 | with pytest.raises(ValueError):
100 | run_magic_imputation(data)
101 |
102 |
103 | # Test with missing expression_key in AnnData
104 | def test_run_magic_imputation_missing_expression_key():
105 | data = AnnData(np.random.rand(50, 20))
106 | data.obsp["DM_Similarity"] = np.random.rand(50, 50)
107 | with pytest.raises(ValueError):
108 | run_magic_imputation(data, expression_key="missing_key")
109 |
--------------------------------------------------------------------------------
/tests/test_validation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import numpy as np
4 | import scanpy as sc
5 | from anndata import AnnData
6 | from pandas.testing import assert_frame_equal, assert_series_equal
7 | from anndata import AnnData
8 |
9 | from palantir.validation import (
10 | _validate_obsm_key,
11 | _validate_varm_key,
12 | _validate_gene_trend_input,
13 | )
14 |
15 |
16 | @pytest.fixture
17 | def mock_anndata_with_obsm():
18 | """Create anndata with obsm for testing validation functions"""
19 | n_cells = 20
20 | n_genes = 10
21 | ad = AnnData(X=np.random.rand(n_cells, n_genes))
22 |
23 | # Add DataFrame in obsm
24 | ad.obsm["df_key"] = pd.DataFrame(
25 | np.random.rand(n_cells, 3), columns=["c1", "c2", "c3"], index=ad.obs_names
26 | )
27 |
28 | # Add numpy array in obsm with column names in uns
29 | ad.obsm["np_key"] = np.random.rand(n_cells, 3)
30 | ad.uns["np_key_columns"] = ["c1", "c2", "c3"]
31 |
32 | return ad
33 |
34 |
35 | @pytest.fixture
36 | def mock_anndata_with_varm():
37 | """Create anndata with varm for testing validation functions"""
38 | n_cells = 20
39 | n_genes = 10
40 | ad = AnnData(X=np.random.rand(n_cells, n_genes))
41 |
42 | # Add DataFrame in varm
43 | ad.varm["df_key"] = pd.DataFrame(
44 | np.random.rand(n_genes, 5), columns=[0.1, 0.2, 0.3, 0.4, 0.5], index=ad.var_names
45 | )
46 |
47 | # Add numpy array in varm with pseudotime in uns
48 | ad.varm["np_key"] = np.random.rand(n_genes, 5)
49 | ad.uns["np_key_pseudotime"] = [0.1, 0.2, 0.3, 0.4, 0.5]
50 |
51 | return ad
52 |
53 |
54 | @pytest.fixture
55 | def mock_anndata_with_gene_trends():
56 | """Create anndata with gene trends for testing validation functions"""
57 | n_cells = 20
58 | n_genes = 10
59 | ad = AnnData(X=np.random.rand(n_cells, n_genes))
60 |
61 | # Add branch masks in various locations
62 | # 1. as DataFrame in obsm
63 | ad.obsm["branch_masks"] = pd.DataFrame(
64 | np.random.randint(0, 2, size=(n_cells, 3)),
65 | columns=["branch1", "branch2", "branch3"],
66 | index=ad.obs_names,
67 | )
68 |
69 | # 2. as list in uns
70 | ad.uns["branch_list"] = ["branch1", "branch2", "branch3"]
71 |
72 | # 3. as numpy array with columns in uns
73 | ad.obsm["branch_array"] = np.random.randint(0, 2, size=(n_cells, 3))
74 | ad.uns["branch_array_columns"] = ["branch1", "branch2", "branch3"]
75 |
76 | # Add gene trends for each branch
77 | for branch in ["branch1", "branch2", "branch3"]:
78 | trend_key = f"gene_trends_{branch}"
79 | ad.varm[trend_key] = pd.DataFrame(
80 | np.random.rand(n_genes, 5), columns=[0.1, 0.2, 0.3, 0.4, 0.5], index=ad.var_names
81 | )
82 |
83 | return ad
84 |
85 |
86 | def test_validate_obsm_key_with_df(mock_anndata_with_obsm):
87 | """Test _validate_obsm_key with DataFrame input"""
88 | ad = mock_anndata_with_obsm
89 |
90 | # Test DataFrame as_df=True (default)
91 | data, data_names = _validate_obsm_key(ad, "df_key")
92 | assert isinstance(data, pd.DataFrame)
93 | assert data.shape == (ad.n_obs, 3)
94 | assert list(data_names) == ["c1", "c2", "c3"]
95 |
96 | # Test DataFrame as_df=False
97 | data, data_names = _validate_obsm_key(ad, "df_key", as_df=False)
98 | assert isinstance(data, np.ndarray)
99 | assert data.shape == (ad.n_obs, 3)
100 | assert list(data_names) == ["c1", "c2", "c3"]
101 |
102 |
103 | def test_validate_obsm_key_with_array(mock_anndata_with_obsm):
104 | """Test _validate_obsm_key with numpy array input"""
105 | ad = mock_anndata_with_obsm
106 |
107 | # Test numpy array as_df=True
108 | data, data_names = _validate_obsm_key(ad, "np_key")
109 | assert isinstance(data, pd.DataFrame)
110 | assert data.shape == (ad.n_obs, 3)
111 | assert list(data_names) == ["c1", "c2", "c3"]
112 |
113 | # Test numpy array as_df=False
114 | data, data_names = _validate_obsm_key(ad, "np_key", as_df=False)
115 | assert isinstance(data, np.ndarray)
116 | assert data.shape == (ad.n_obs, 3)
117 | assert list(data_names) == ["c1", "c2", "c3"]
118 |
119 |
120 | def test_validate_obsm_key_errors(mock_anndata_with_obsm):
121 | """Test _validate_obsm_key error handling"""
122 | ad = mock_anndata_with_obsm
123 |
124 | # Test key not in obsm
125 | with pytest.raises(KeyError, match="not_a_key not found in ad.obsm"):
126 | _validate_obsm_key(ad, "not_a_key")
127 |
128 | # Test numpy array without columns in uns
129 | ad.obsm["bad_key"] = np.random.rand(ad.n_obs, 3)
130 | with pytest.raises(KeyError, match="bad_key_columns not found"):
131 | _validate_obsm_key(ad, "bad_key")
132 |
133 |
134 | def test_validate_varm_key_with_df(mock_anndata_with_varm):
135 | """Test _validate_varm_key with DataFrame input"""
136 | ad = mock_anndata_with_varm
137 |
138 | # Test DataFrame as_df=True (default)
139 | data, data_names = _validate_varm_key(ad, "df_key")
140 | assert isinstance(data, pd.DataFrame)
141 | assert data.shape == (ad.n_vars, 5)
142 | assert list(data_names) == [0.1, 0.2, 0.3, 0.4, 0.5]
143 |
144 | # Test DataFrame as_df=False
145 | data, data_names = _validate_varm_key(ad, "df_key", as_df=False)
146 | assert isinstance(data, np.ndarray)
147 | assert data.shape == (ad.n_vars, 5)
148 | assert list(data_names) == [0.1, 0.2, 0.3, 0.4, 0.5]
149 |
150 |
151 | def test_validate_varm_key_with_array(mock_anndata_with_varm):
152 | """Test _validate_varm_key with numpy array input"""
153 | ad = mock_anndata_with_varm
154 |
155 | # Test numpy array as_df=True
156 | data, data_names = _validate_varm_key(ad, "np_key")
157 | assert isinstance(data, pd.DataFrame)
158 | assert data.shape == (ad.n_vars, 5)
159 | assert np.allclose(data_names, [0.1, 0.2, 0.3, 0.4, 0.5])
160 |
161 | # Test numpy array as_df=False
162 | data, data_names = _validate_varm_key(ad, "np_key", as_df=False)
163 | assert isinstance(data, np.ndarray)
164 | assert data.shape == (ad.n_vars, 5)
165 | assert np.allclose(data_names, [0.1, 0.2, 0.3, 0.4, 0.5])
166 |
167 |
168 | def test_validate_varm_key_errors(mock_anndata_with_varm):
169 | """Test _validate_varm_key error handling"""
170 | ad = mock_anndata_with_varm
171 |
172 | # Test key not in varm
173 | with pytest.raises(KeyError, match="not_a_key not found in ad.varm"):
174 | _validate_varm_key(ad, "not_a_key")
175 |
176 | # Test numpy array without pseudotime in uns
177 | ad.varm["bad_key"] = np.random.rand(ad.n_vars, 3)
178 | with pytest.raises(KeyError, match="bad_key_pseudotime not found"):
179 | _validate_varm_key(ad, "bad_key")
180 |
181 |
182 | def test_validate_gene_trend_input_anndata(mock_anndata_with_gene_trends):
183 | """Test _validate_gene_trend_input with AnnData input"""
184 | ad = mock_anndata_with_gene_trends
185 |
186 | # Test with default parameters (branch_masks in obsm)
187 | gene_trends = _validate_gene_trend_input(ad)
188 | assert isinstance(gene_trends, dict)
189 | assert len(gene_trends) == 3
190 | assert "branch1" in gene_trends
191 | assert "branch2" in gene_trends
192 | assert "branch3" in gene_trends
193 |
194 | # Test with branch_names as a string key in uns
195 | gene_trends = _validate_gene_trend_input(ad, branch_names="branch_list")
196 | assert isinstance(gene_trends, dict)
197 | assert len(gene_trends) == 3
198 |
199 | # Test with branch_names as a key in obsm with DataFrame
200 | gene_trends = _validate_gene_trend_input(ad, branch_names="branch_masks")
201 | assert isinstance(gene_trends, dict)
202 | assert len(gene_trends) == 3
203 |
204 | # Test with branch_names as a key with columns in uns
205 | gene_trends = _validate_gene_trend_input(ad, branch_names="branch_array")
206 | assert isinstance(gene_trends, dict)
207 | assert len(gene_trends) == 3
208 |
209 |
210 | def test_validate_gene_trend_input_dict():
211 | """Test _validate_gene_trend_input with dict input"""
212 | # Create test dictionary
213 | trends1 = pd.DataFrame(np.random.rand(10, 5), columns=[0.1, 0.2, 0.3, 0.4, 0.5])
214 | trends2 = pd.DataFrame(np.random.rand(10, 5), columns=[0.1, 0.2, 0.3, 0.4, 0.5])
215 |
216 | input_dict = {"branch1": {"trends": trends1}, "branch2": {"trends": trends2}}
217 |
218 | gene_trends = _validate_gene_trend_input(input_dict)
219 | assert gene_trends is input_dict # Should return the same dict
220 |
221 |
222 | def test_validate_gene_trend_input_errors(mock_anndata_with_gene_trends):
223 | """Test _validate_gene_trend_input error handling"""
224 | ad = mock_anndata_with_gene_trends
225 |
226 | # Test invalid branch_names key
227 | with pytest.raises(KeyError, match="not_a_key.*not found"):
228 | _validate_gene_trend_input(ad, branch_names="not_a_key")
229 |
230 | # Test invalid data type
231 | with pytest.raises(ValueError, match="must be an instance of either AnnData"):
232 | _validate_gene_trend_input([1, 2, 3]) # List is not valid input
233 |
--------------------------------------------------------------------------------
/tests/utils_compute_kernel.py:
--------------------------------------------------------------------------------
1 | from scipy.sparse import find, csr_matrix
2 | import pytest
3 | import pandas as pd
4 | import scanpy as sc
5 | import numpy as np
6 |
7 | from palantir.utils import compute_kernel
8 |
9 |
10 | @pytest.fixture
11 | def mock_data():
12 | n_cells = 50
13 | n_genes = 10
14 | return pd.DataFrame(
15 | np.random.rand(n_cells, n_genes),
16 | columns=[f"gene_{i}" for i in range(n_genes)],
17 | index=[f"cell_{i}" for i in range(n_cells)],
18 | )
19 |
20 |
21 | @pytest.fixture
22 | def mock_anndata(mock_data):
23 | ad = sc.AnnData(X=mock_data)
24 | ad.obsm["X_pca"] = mock_data
25 | return ad
26 |
27 |
28 | # Test with DataFrame
29 | def test_compute_kernel_dataframe(mock_data):
30 | kernel = compute_kernel(mock_data)
31 | assert isinstance(kernel, csr_matrix)
32 |
33 |
34 | # Test with AnnData
35 | def test_compute_kernel_anndata(mock_anndata):
36 | kernel = compute_kernel(mock_anndata)
37 | assert "DM_Kernel" in mock_anndata.obsp.keys()
38 |
39 |
40 | # Test knn parameter
41 | def test_compute_kernel_knn(mock_data):
42 | kernel = compute_kernel(mock_data, knn=10)
43 | assert isinstance(kernel, csr_matrix)
44 |
45 |
46 | # Test alpha parameter
47 | def test_compute_kernel_alpha(mock_data):
48 | kernel = compute_kernel(mock_data, alpha=0.5)
49 | assert isinstance(kernel, csr_matrix)
50 |
51 |
52 | # Test pca_key parameter
53 | def test_compute_kernel_pca_key(mock_anndata):
54 | mock_anndata.obsm["custom_pca"] = np.random.rand(mock_anndata.shape[0], 10)
55 | kernel = compute_kernel(mock_anndata, pca_key="custom_pca")
56 | assert "DM_Kernel" in mock_anndata.obsp.keys()
57 |
58 |
59 | # Test kernel_key parameter
60 | def test_compute_kernel_kernel_key(mock_anndata):
61 | kernel = compute_kernel(mock_anndata, kernel_key="custom_kernel")
62 | assert "custom_kernel" in mock_anndata.obsp.keys()
63 |
--------------------------------------------------------------------------------
/tests/utils_diffusion_maps_from_kernel.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | from scipy.sparse import csr_matrix
4 | from scipy.sparse.linalg import eigs
5 | from pytest import approx
6 |
7 | from palantir.utils import diffusion_maps_from_kernel, determine_multiscale_space
8 |
9 |
10 | @pytest.fixture
11 | def mock_kernel():
12 | size = 50
13 | A = np.random.rand(size, size)
14 | return csr_matrix((A + A.T) / 2)
15 |
16 |
17 | def test_diffusion_maps_basic(mock_kernel):
18 | result = diffusion_maps_from_kernel(mock_kernel)
19 |
20 | assert isinstance(result, dict)
21 | assert "T" in result and "EigenVectors" in result and "EigenValues" in result
22 |
23 | assert result["T"].shape == (50, 50)
24 | assert result["EigenVectors"].shape == (50, 10)
25 | assert result["EigenValues"].shape == (10,)
26 |
27 |
28 | def test_diffusion_maps_n_components(mock_kernel):
29 | result = diffusion_maps_from_kernel(mock_kernel, n_components=5)
30 |
31 | assert result["EigenVectors"].shape == (50, 5)
32 | assert result["EigenValues"].shape == (5,)
33 |
34 |
35 | def test_diffusion_maps_seed(mock_kernel):
36 | result1 = diffusion_maps_from_kernel(mock_kernel, seed=0)
37 | result2 = diffusion_maps_from_kernel(mock_kernel, seed=0)
38 |
39 | # Seed usage should yield the same result
40 | assert np.allclose(result1["EigenValues"], result2["EigenValues"])
41 |
42 |
43 | def test_diffusion_maps_eigen(mock_kernel):
44 | result = diffusion_maps_from_kernel(mock_kernel)
45 |
46 | T = result["T"].toarray()
47 | e_values, e_vectors = eigs(T, 10, tol=1e-4, maxiter=1000)
48 |
49 | assert np.allclose(
50 | result["EigenValues"], np.real(sorted(e_values, reverse=True)[:10]), atol=1e-4
51 | )
52 |
53 |
54 | def test_determine_multiscale_space(mock_kernel):
55 | result = diffusion_maps_from_kernel(mock_kernel)
56 | msresult = determine_multiscale_space(result)
57 | assert msresult.shape[0] == result["EigenVectors"].shape[0]
58 |
--------------------------------------------------------------------------------
/tests/utils_run_diffusion_maps.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import scanpy as sc
4 | from scipy.sparse import csr_matrix, issparse
5 | import numpy as np
6 |
7 | from palantir.utils import run_diffusion_maps
8 |
9 |
10 | # Generate mock DataFrame data
11 | def mock_dataframe(rows, cols):
12 | return pd.DataFrame(np.random.rand(rows, cols))
13 |
14 |
15 | # Generate mock sc.AnnData object
16 | def mock_anndata(rows, cols, keys):
17 | ad = sc.AnnData(np.random.rand(rows, cols))
18 | for key in keys:
19 | ad.obsm[key] = np.random.rand(rows, cols)
20 | return ad
21 |
22 |
23 | def test_run_diffusion_maps_dataframe():
24 | df = mock_dataframe(50, 30)
25 | result = run_diffusion_maps(df)
26 |
27 | assert isinstance(result, dict)
28 | assert set(result.keys()) == {"T", "EigenVectors", "EigenValues", "kernel"}
29 |
30 | assert isinstance(result["kernel"], csr_matrix)
31 | assert isinstance(result["T"], csr_matrix)
32 | assert isinstance(result["EigenVectors"], pd.DataFrame)
33 | assert isinstance(result["EigenValues"], pd.Series)
34 |
35 |
36 | def test_run_diffusion_maps_anndata():
37 | keys = ["X_pca"]
38 | ad = mock_anndata(50, 30, keys)
39 | result = run_diffusion_maps(ad)
40 |
41 | assert "DM_Kernel" in ad.obsp
42 | assert "DM_Similarity" in ad.obsp
43 | assert "DM_EigenVectors" in ad.obsm
44 | assert "DM_EigenValues" in ad.uns
45 |
46 | assert np.array_equal(ad.obsp["DM_Kernel"].toarray(), result["kernel"].toarray())
47 | assert np.array_equal(ad.obsp["DM_Similarity"].toarray(), result["T"].toarray())
48 | assert np.array_equal(ad.obsm["DM_EigenVectors"], result["EigenVectors"].values)
49 | assert np.array_equal(ad.uns["DM_EigenValues"], result["EigenValues"])
50 |
51 |
52 | def test_run_diffusion_maps_exceptions():
53 | # Test with neither pd.DataFrame nor sc.AnnData
54 | with pytest.raises(ValueError):
55 | run_diffusion_maps("invalid_type")
56 |
--------------------------------------------------------------------------------
/tests/utils_run_local_variability.py:
--------------------------------------------------------------------------------
1 | import scanpy as sc
2 | import numpy as np
3 | import pytest
4 | from scipy.sparse import csr_matrix
5 |
6 | from palantir.utils import run_local_variability
7 |
8 |
9 | # Mock data for dense matrix
10 | def mock_anndata_dense(n_cells, n_genes, layer_keys, obsp_keys):
11 | ad = sc.AnnData(np.random.rand(n_cells, n_genes))
12 | for key in layer_keys:
13 | ad.layers[key] = np.random.rand(n_cells, n_genes)
14 | for key in obsp_keys:
15 | ad.obsp[key] = np.random.rand(n_cells, n_cells)
16 | return ad
17 |
18 |
19 | # Mock data for sparse matrix
20 | def mock_anndata_sparse(n_cells, n_genes, layer_keys, obsp_keys):
21 | ad = sc.AnnData(csr_matrix(np.random.rand(n_cells, n_genes)))
22 | for key in layer_keys:
23 | ad.layers[key] = csr_matrix(np.random.rand(n_cells, n_genes))
24 | for key in obsp_keys:
25 | ad.obsp[key] = csr_matrix(np.random.rand(n_cells, n_cells))
26 | return ad
27 |
28 |
29 | # Test with default keys, dense
30 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
31 | def test_run_local_variability_default_dense():
32 | ad = mock_anndata_dense(50, 20, ["MAGIC_imputed_data"], ["distances"])
33 | _test_run_local_variability(ad)
34 |
35 |
36 | # Test with default keys, sparse
37 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
38 | def test_run_local_variability_default_sparse():
39 | ad = mock_anndata_sparse(50, 20, ["MAGIC_imputed_data"], ["distances"])
40 | _test_run_local_variability(ad)
41 |
42 |
43 | # Test with custom keys, dense
44 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
45 | def test_run_local_variability_custom_keys_dense():
46 | ad = mock_anndata_dense(50, 20, ["custom_expression"], ["custom_distances"])
47 | _test_run_local_variability(
48 | ad, "custom_expression", "custom_distances", "custom_local_var"
49 | )
50 |
51 |
52 | # Test with custom keys, sparse
53 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
54 | def test_run_local_variability_custom_keys_sparse():
55 | ad = mock_anndata_sparse(50, 20, ["custom_expression"], ["custom_distances"])
56 | _test_run_local_variability(
57 | ad, "custom_expression", "custom_distances", "custom_local_var"
58 | )
59 |
60 |
61 | # Helper function for assertions
62 | def _test_run_local_variability(
63 | ad,
64 | expression_key="MAGIC_imputed_data",
65 | distances_key="distances",
66 | localvar_key="local_variability",
67 | ):
68 | result = run_local_variability(ad, expression_key, distances_key, localvar_key)
69 |
70 | assert localvar_key in ad.layers
71 | assert isinstance(result, np.ndarray) or isinstance(result, csr_matrix)
72 | assert result.shape == (50, 20)
73 |
74 |
75 | # Test missing keys
76 | def test_run_local_variability_missing_keys():
77 | ad = mock_anndata_dense(50, 20, ["MAGIC_imputed_data"], ["distances"])
78 |
79 | with pytest.raises(KeyError):
80 | run_local_variability(ad, "missing_expression", "distances")
81 |
82 | with pytest.raises(KeyError):
83 | run_local_variability(ad, "MAGIC_imputed_data", "missing_distances")
84 |
--------------------------------------------------------------------------------
/tests/utils_run_magic_imputation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 | import scanpy as sc
5 | from scipy.sparse import csr_matrix
6 |
7 | from palantir.utils import run_magic_imputation
8 |
9 |
10 | @pytest.fixture
11 | def mock_dm_res():
12 | return {"T": csr_matrix(np.random.rand(50, 50))}
13 |
14 |
15 | # Test with numpy ndarray
16 | def test_run_magic_imputation_ndarray(mock_dm_res):
17 | data = np.random.rand(50, 20)
18 | result = run_magic_imputation(data, dm_res=mock_dm_res)
19 | assert isinstance(result, csr_matrix)
20 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False)
21 | assert isinstance(result, np.ndarray)
22 |
23 |
24 | # Test with pandas DataFrame
25 | def test_run_magic_imputation_dataframe(mock_dm_res):
26 | data = pd.DataFrame(np.random.rand(50, 20))
27 | result = run_magic_imputation(data, dm_res=mock_dm_res)
28 | assert isinstance(result, pd.DataFrame)
29 |
30 |
31 | # Test with csr_matrix
32 | def test_run_magic_imputation_csr(mock_dm_res):
33 | data = csr_matrix(np.random.rand(50, 20))
34 | result = run_magic_imputation(data, dm_res=mock_dm_res)
35 | assert isinstance(result, csr_matrix)
36 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False)
37 | assert isinstance(result, np.ndarray)
38 |
39 |
40 | # Test with AnnData
41 | def test_run_magic_imputation_anndata():
42 | data = sc.AnnData(np.random.rand(50, 20))
43 | data.obsp["DM_Similarity"] = np.random.rand(50, 50)
44 | result = run_magic_imputation(data)
45 | assert "MAGIC_imputed_data" in data.layers
46 | assert isinstance(result, csr_matrix)
47 |
48 |
49 | # Test with AnnData and custom keys
50 | def test_run_magic_imputation_anndata_custom_keys():
51 | data = sc.AnnData(np.random.rand(50, 20))
52 | data.layers["custom_expr"] = np.random.rand(50, 20)
53 | data.obsp["custom_sim"] = np.random.rand(50, 50)
54 | result = run_magic_imputation(
55 | data,
56 | expression_key="custom_expr",
57 | sim_key="custom_sim",
58 | imputation_key="custom_imp",
59 | )
60 | assert "custom_imp" in data.layers
61 |
62 |
63 | # Test with missing dm_res and not AnnData
64 | def test_run_magic_imputation_missing_dm_res():
65 | data = np.random.rand(50, 20)
66 | with pytest.raises(ValueError):
67 | run_magic_imputation(data)
68 |
69 |
70 | # Test with missing expression_key in AnnData
71 | def test_run_magic_imputation_missing_expression_key():
72 | data = sc.AnnData(np.random.rand(50, 20))
73 | data.obsp["DM_Similarity"] = np.random.rand(50, 50)
74 | with pytest.raises(ValueError):
75 | run_magic_imputation(data, expression_key="missing_key")
76 |
--------------------------------------------------------------------------------
/tests/utils_run_pca.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pandas as pd
3 | import scanpy as sc
4 | import numpy as np
5 |
6 | from palantir.utils import run_pca
7 |
8 |
9 | @pytest.fixture
10 | def mock_data():
11 | n_cells = 50
12 | n_genes = 500
13 | return pd.DataFrame(
14 | np.random.rand(n_cells, n_genes),
15 | columns=[f"gene_{i}" for i in range(n_genes)],
16 | index=[f"cell_{i}" for i in range(n_cells)],
17 | )
18 |
19 |
20 | @pytest.fixture
21 | def mock_anndata(mock_data):
22 | ad = sc.AnnData(X=mock_data)
23 | ad.obsm["DM_EigenVectors_multiscaled"] = mock_data
24 | ad.var["highly_variable"] = np.random.choice([True, False], size=mock_data.shape[1])
25 | return ad
26 |
27 |
28 | # Test with DataFrame
29 | def test_run_pca_dataframe(mock_data):
30 | pca_results, var_ratio = run_pca(mock_data, use_hvg=False)
31 | assert isinstance(pca_results, pd.DataFrame)
32 | assert isinstance(var_ratio, np.ndarray)
33 | assert pca_results.shape[1] <= 300 # Check n_components
34 |
35 |
36 | # Test with AnnData
37 | def test_run_pca_anndata(mock_anndata):
38 | pca_results, var_ratio = run_pca(mock_anndata)
39 | assert "X_pca" in mock_anndata.obsm.keys()
40 | assert mock_anndata.obsm["X_pca"].shape[1] <= 300
41 |
42 |
43 | # Test n_components parameter
44 | def test_run_pca_components(mock_data):
45 | pca_results, _ = run_pca(mock_data, n_components=5, use_hvg=False)
46 | assert pca_results.shape[1] == 5
47 |
48 |
49 | # Test use_hvg parameter
50 | def test_run_pca_hvg(mock_anndata):
51 | pca_results, _ = run_pca(mock_anndata, use_hvg=True)
52 | assert pca_results.shape[1] <= 300
53 |
54 |
55 | # Test pca_key parameter
56 | def test_run_pca_pca_key(mock_anndata):
57 | run_pca(mock_anndata, pca_key="custom_key")
58 | assert "custom_key" in mock_anndata.obsm.keys()
59 | assert mock_anndata.obsm["custom_key"].shape[1] <= 300
60 |
--------------------------------------------------------------------------------