├── .codecov.yml ├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── LICENSE ├── README.md ├── data ├── marrow_sample_scseq_counts.csv.gz ├── marrow_sample_scseq_counts.h5ad └── sample_tsne.p ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── conf.py │ ├── core.rst │ ├── index.rst │ ├── notebooks │ ├── plot.rst │ ├── preprocess.rst │ ├── presults.rst │ └── utils.rst ├── notebooks ├── Palantir_sample_notebook.ipynb ├── comparisons │ ├── dpt.ipynb │ ├── fateid.ipynb │ ├── monocle2.ipynb │ ├── paga.ipynb │ ├── results │ │ ├── fateid │ │ │ ├── CLP_order.csv │ │ │ ├── DC_order.csv │ │ │ ├── Ery_order.csv │ │ │ ├── Mega_order.csv │ │ │ ├── Mono_order.csv │ │ │ ├── clusters.csv │ │ │ ├── probs.csv │ │ │ └── tsne.csv │ │ ├── monocle2 │ │ │ ├── phenodata.csv │ │ │ └── red_dims.csv │ │ └── slingshot │ │ │ ├── Lineage1_CD34.csv │ │ │ ├── Lineage1_CD79A.csv │ │ │ ├── Lineage1_CD79B.csv │ │ │ ├── Lineage1_CEBPD.csv │ │ │ ├── Lineage1_CEBPG.csv │ │ │ ├── Lineage1_CSF1R.csv │ │ │ ├── Lineage1_GATA1.csv │ │ │ ├── Lineage1_IRF8.csv │ │ │ ├── Lineage1_ITGA2B.csv │ │ │ ├── Lineage1_MPO.csv │ │ │ ├── Lineage1_RAG1.csv │ │ │ ├── Lineage1_SPI1.csv │ │ │ ├── Lineage2_CD34.csv │ │ │ ├── Lineage2_CD79A.csv │ │ │ ├── Lineage2_CD79B.csv │ │ │ ├── Lineage2_CEBPD.csv │ │ │ ├── Lineage2_CEBPG.csv │ │ │ ├── Lineage2_CSF1R.csv │ │ │ ├── Lineage2_GATA1.csv │ │ │ ├── Lineage2_IRF8.csv │ │ │ ├── Lineage2_ITGA2B.csv │ │ │ ├── Lineage2_MPO.csv │ │ │ ├── Lineage2_RAG1.csv │ │ │ ├── Lineage2_SPI1.csv │ │ │ ├── Lineage3_CD34.csv │ │ │ ├── Lineage3_CD79A.csv │ │ │ ├── Lineage3_CD79B.csv │ │ │ ├── Lineage3_CEBPD.csv │ │ │ ├── Lineage3_CEBPG.csv │ │ │ ├── Lineage3_CSF1R.csv │ │ │ ├── Lineage3_GATA1.csv │ │ │ ├── Lineage3_IRF8.csv │ │ │ ├── Lineage3_ITGA2B.csv │ │ │ ├── Lineage3_MPO.csv │ │ │ ├── Lineage3_RAG1.csv │ │ │ ├── Lineage3_SPI1.csv │ │ │ ├── Lineage4_CD34.csv │ │ │ ├── Lineage4_CD79A.csv │ │ │ ├── Lineage4_CD79B.csv │ │ │ ├── Lineage4_CEBPD.csv │ │ │ ├── Lineage4_CEBPG.csv │ │ │ ├── Lineage4_CSF1R.csv │ │ │ ├── Lineage4_GATA1.csv │ │ │ ├── Lineage4_IRF8.csv │ │ │ ├── Lineage4_ITGA2B.csv │ │ │ ├── Lineage4_MPO.csv │ │ │ ├── Lineage4_RAG1.csv │ │ │ ├── Lineage4_SPI1.csv │ │ │ ├── clusters.csv │ │ │ ├── data.csv │ │ │ ├── exprs.csv │ │ │ └── weights.csv │ └── slignshot.ipynb └── manuscript_data.ipynb ├── pyproject.toml ├── readthedocs.yaml ├── requirements.txt ├── setup.py ├── src └── palantir │ ├── __init__.py │ ├── cli.py │ ├── config.py │ ├── core.py │ ├── io.py │ ├── plot.py │ ├── plot_utils.py │ ├── preprocess.py │ ├── presults.py │ ├── utils.py │ ├── validation.py │ └── version.py └── tests ├── conftest.py ├── core_run_palantir.py ├── plot.py ├── presults.py ├── presults_compute_gene_trends.py ├── test_core_run_palantir.py ├── test_integration.py ├── test_io.py ├── test_plot.py ├── test_preprocess.py ├── test_presults.py ├── test_presults_cluster_gene_trends.py ├── test_presults_compute_gene_trends.py ├── test_presults_gam_fit_predict.py ├── test_presults_select_branch_cells.py ├── test_util_density.py ├── test_util_run_pca.py ├── test_utils_compute_kernel.py ├── test_utils_density_functions.py ├── test_utils_determine_multiscale_space.py ├── test_utils_diffusion_maps_from_kernel.py ├── test_utils_early_cell_functions.py ├── test_utils_run_diffusion_maps.py ├── test_utils_run_local_variability.py ├── test_utils_run_magic_imputation.py ├── test_validation.py ├── utils_compute_kernel.py ├── utils_diffusion_maps_from_kernel.py ├── utils_run_diffusion_maps.py ├── utils_run_local_variability.py ├── utils_run_magic_imputation.py └── utils_run_pca.py /.codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: 80% 6 | threshold: 1% 7 | patch: 8 | default: 9 | target: 80% 10 | threshold: 1% 11 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "master", "main", "dev" ] 9 | pull_request: 10 | branches: [ "master", "main", "dev" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install flake8 pytest pytest-cov coverage typing-extensions 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | 33 | # Install the package with appropriate test dependencies 34 | # For Python 3.13+, use test-base (without fcsparser) 35 | # For Python <3.13, use test (includes fcsparser) 36 | if python -c "import sys; exit(0 if sys.version_info >= (3, 13) else 1)"; then 37 | python -m pip install -e ".[test-base]" 38 | echo "Installed with test-base dependencies (no fcsparser) for Python 3.13+" 39 | else 40 | python -m pip install -e ".[test]" 41 | echo "Installed with test dependencies (includes fcsparser) for Python < 3.13" 42 | fi 43 | - name: Lint with flake8 44 | run: | 45 | # stop the build if there are Python syntax errors or undefined names 46 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 47 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 48 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 49 | - name: Test with pytest 50 | env: 51 | PYTHONPATH: ./src/ 52 | run: | 53 | python -m pytest --cov=src/palantir 54 | - name: Upload coverage reports to Codecov 55 | uses: codecov/codecov-action@v3 56 | with: 57 | token: ${{ secrets.CODECOV_TOKEN }} 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .DS_Store 3 | .ipynb_checkpoints/ 4 | *.h5ad 5 | build/ 6 | palantir.egg-info/ 7 | .coverage* 8 | notebooks/testing.ipynb 9 | .pytest_cache/ 10 | dist/ 11 | .vscode/ 12 | data/ 13 | *.ipynb 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020–present Dana Pe'er Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI version](https://badge.fury.io/py/palantir.svg)](https://badge.fury.io/py/palantir) 2 | [![codecov](https://codecov.io/github/settylab/Palantir/graph/badge.svg?token=KJTEY76FTK)](https://codecov.io/github/settylab/Palantir) 3 | 4 | Palantir 5 | ------ 6 | 7 | Palantir is an algorithm to align cells along differentiation trajectories. Palantir models differentiation as a stochastic process where stem cells differentiate to terminally differentiated cells by a series of steps through a low dimensional phenotypic manifold. Palantir effectively captures the continuity in cell states and the stochasticity in cell fate determination. Palantir has been designed to work with multidimensional single cell data from diverse technologies such as Mass cytometry and single cell RNA-seq. 8 | 9 | ## Installation 10 | Palantir has been implemented in Python3 and can be installed using: 11 | 12 | ### Using pip 13 | ```sh 14 | pip install palantir 15 | ``` 16 | 17 | ### Using conda, mamba, or micromamba from the bioconda channel 18 | You can also install Palantir via conda, mamba, or micromamba from the bioconda channel: 19 | 20 | #### Using conda 21 | ```sh 22 | conda install -c conda-forge -c bioconda palantir 23 | ``` 24 | 25 | #### Using mamba 26 | ```sh 27 | mamba install -c conda-forge -c bioconda palantir 28 | ``` 29 | 30 | #### Using micromamba 31 | ```sh 32 | micromamba install -c conda-forge -c bioconda palantir 33 | ``` 34 | 35 | These methods ensure that all dependencies are resolved and installed efficiently. 36 | 37 | 38 | ## Usage 39 | 40 | A tutorial on Palantir usage and results visualization for single cell RNA-seq 41 | data can be found in this notebook: 42 | https://github.com/dpeerlab/Palantir/blob/master/notebooks/Palantir_sample_notebook.ipynb 43 | 44 | More tutorials and a documentation of all the Palantir components can be found 45 | here: https://palantir.readthedocs.io 46 | 47 | ## Processed data and metadata 48 | 49 | `scanpy anndata` objects are available for download for the three replicates generated in the manuscript: 50 | - [Replicate 1 (Rep1)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad) 51 | - [Replicate 2 (Rep2)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad) 52 | - [Replicate 3 (Rep3)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad) 53 | 54 | This notebook details how to use the data in `Python` and `R`: http://nbviewer.jupyter.org/github/dpeerlab/Palantir/blob/master/notebooks/manuscript_data.ipynb 55 | 56 | ## Comparison to trajectory detection algorithms 57 | Notebooks detailing the generation of results comparing Palantir to trajectory detection algorithms are available [here](https://github.com/dpeerlab/Palantir/blob/master/notebooks/comparisons) 58 | 59 | ## Citations 60 | Palantir manuscript is available from [Nature Biotechnology](https://www.nature.com/articles/s41587-019-0068-4). If you use Palantir for your work, please cite our paper. 61 | 62 | @article{Palantir_2019, 63 | title = {Characterization of cell fate probabilities in single-cell data with Palantir}, 64 | author = {Manu Setty and Vaidotas Kiseliovas and Jacob Levine and Adam Gayoso and Linas Mazutis and Dana Pe'er}, 65 | journal = {Nature Biotechnology}, 66 | year = {2019}, 67 | month = {march}, 68 | url = {https://doi.org/10.1038/s41587-019-0068-4}, 69 | doi = {10.1038/s41587-019-0068-4} 70 | } 71 | ____ 72 | 73 | Release Notes 74 | ------------- 75 | 76 | ### Next Release 77 | * fix "lightgray" error in `plot_trend` 78 | 79 | ### Version 1.4.1 80 | * update `LICENSE` file to be consistent with MIT - license 81 | * implement `plot_trajectories` to show multiple poaths on the UMAP 82 | * Drop leiden dependency to allow Python >= 3.13. igrph is used instead. 83 | 84 | ### Version 1.4.0 85 | * Made pygam an optional dependency that can be installed with `pip install palantir[gam]` or `pip install palantir[full]` 86 | * Added proper conditional imports and improved error handling for pygam 87 | * Enhanced `run_magic_imputation` to return appropriate data types for different inputs 88 | * Updated code to use direct AnnData imports for newer compatibility 89 | * Improved version detection using `importlib.metadata` with graceful fallbacks 90 | * Fixed Series indexing deprecation warnings in early cell detection functions 91 | * Expanded and standardized documentation with NumPy-style docstrings throughout the codebase 92 | * Added comprehensive type hints to improve code quality and IDE support 93 | * Remove dependency from `_` methods in scanpy for plotting. 94 | * add `pseudotime_interval` argument to control path length in `palantir.plot.plot_trajectory` 95 | 96 | #### Testing and Quality Improvements 97 | * Added comprehensive tests for optional pygam dependency 98 | * Improved test coverage for run_magic_imputation with various input/output types 99 | * Added integration tests against expected results 100 | * Enhanced test infrastructure to work with newer library versions 101 | * Expanded test coverage to catch edge cases in data processing 102 | 103 | ### Version 1.3.6 104 | * `run_magic_imputation` now has a boolean parameter `sparse` to control output sparsity 105 | * **bugfix**: `run_local_variability` for dense expression arrays now runs much faster and more accurate 106 | 107 | ### Version 1.3.4 108 | * avoid devision by zero in `select_branch_cells` for very small datasets 109 | * make branch selection robust against NaNs 110 | * do not plot unclustered trends (NaN cluster) in `plot_gene_trend_clusters` 111 | 112 | ### Version 1.3.3 113 | * optional progress bar with `progress=True` in `palantir.utils.run_local_variability` 114 | * avoid NaN in local variablility output 115 | * compatibility with `scanpy>=1.10.0` 116 | 117 | ### Version 1.3.2 118 | * require `python>=3.9` 119 | * implement CI for testing 120 | * fixes for edge cases discovered through extended testing 121 | * implement `plot_trajectory` function to show trajectory on the umap 122 | * scale pseudotime to unit interval in anndata 123 | 124 | ### Version 1.3.1 125 | * implemented `palantir.plot.plot_stats` to plot arbitrary cell-wise statistics as x-/y-positions. 126 | * reduce memory usage of `palantir.presults.compute_gene_trends` 127 | * removed seaborn dependency 128 | * refactor `run_diffusion_maps` to split out `compute_kernel` and `diffusion_maps_from_kernel` 129 | * remove unused dependencies `tables`, `Cython`, `cmake`, and `tzlocal`. 130 | * fixes in `run_pca` (return correct projections and do not use too many components) 131 | 132 | ### Version 1.3.0 133 | 134 | #### New Features 135 | * Enable an AnnData-centric workflow for improved usability and interoperability with other single-cell analysis tools. 136 | * Introduced new utility functions 137 | * `palantir.utils.early_cell` To automate finding an early cell based on cell type and diffusion components. 138 | * `palantir.utils.find_terminal_states` To automate finding terminal cell states based on cell type and diffusion components. 139 | * `palantir.presults.select_branch_cells` To find cells associated to each branch based on fate probability. 140 | * `palantir.plot.plot_branch_selection` To inspect the cell to branch association. 141 | * `palantir.utils.run_local_variability` To compute local gene expression variability. 142 | * `palantir.utils.run_density` A wrapper for [mellon.DensityEstimator](https://mellon.readthedocs.io/en/latest/model.html#mellon.model.DensityEstimator). 143 | * `palantir.utils.run_density_evaluation` Evaluate computed density on a different dataset. 144 | * `palantir.utils.run_low_density_variability`. To aggregate local gene expression variability in low density. 145 | * `palantir.plot.plot_branch`. To plot branch-selected cells over pseudotime in arbitrary y-position and coloring. 146 | * `palantir.plot.plot_trend`. To plot the gene trend on top of `palantir.plot.plot_branch`. 147 | * Added input validation for better error handling and improved user experience. 148 | * Expanded documentation within docstrings, providing additional clarity for users and developers. 149 | 150 | #### Enhancements 151 | * Updated tutorial notebook to reflect the new workflow, guiding users through the updated processes. 152 | * Implemented gene trend computation using [Mellon](https://github.com/settylab/Mellon), providing more robust and efficient gene trend analysis. 153 | * Enable annotation in `palantir.plot.highlight_cells_on_umap`. 154 | 155 | #### Changes 156 | * Replaced PhenoGraph dependency with `scanpy.tl.leiden` for gene trend clustering. 157 | * Deprecated the `run_tsne`, `determine_cell_clusters`, and `plot_cell_clusters` functions. Use corresponding implementations from [Scanpy](https://scanpy.readthedocs.io/en/stable/), widely used single-cell analysis library and direct dependency of Palantir. 158 | * Rename `palantir.plot.highlight_cells_on_tsne` to `palantir.plot.highlight_cells_on_umap` 159 | * Depend on `anndata>=0.8.0` to avoid issues writing dataframes in `ad.obsm`. 160 | 161 | #### Fixes 162 | * Addressed the issue of variability when reproducing results ([issue#64](https://github.com/dpeerlab/Palantir/issues/64)), enhancing the reproducibility and reliability of Palantir. 163 | 164 | 165 | ### Version 1.1.0 166 | * Replaced rpy2 with pyGAM for computing gene expression trends. 167 | * Updated tutorial and plotting functions 168 | 169 | 170 | ### Version 1.0.0 171 | 172 | * A fix to [issue#41](https://github.com/dpeerlab/Palantir/issues/41) 173 | * A fix to [issue#42](https://github.com/dpeerlab/Palantir/issues/42) 174 | * Revamped tutorial with support for Anndata and force directed layouts 175 | 176 | ### Version 0.2.6 177 | 178 | * A fix to [issue#33](https://github.com/dpeerlab/Palantir/issues/33) and [issue#31](https://github.com/dpeerlab/Palantir/issues/31) 179 | 180 | ### Version 0.2.5 181 | 182 | * A fix related to [issue#28](https://github.com/dpeerlab/Palantir/issues/28). When identifying terminal states, duplicate values were generated instead of unique ones. 183 | -------------------------------------------------------------------------------- /data/marrow_sample_scseq_counts.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpeerlab/Palantir/3be143443a601eaf163e9ff63e76f32330ac9cd2/data/marrow_sample_scseq_counts.csv.gz -------------------------------------------------------------------------------- /data/marrow_sample_scseq_counts.h5ad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpeerlab/Palantir/3be143443a601eaf163e9ff63e76f32330ac9cd2/data/marrow_sample_scseq_counts.h5ad -------------------------------------------------------------------------------- /data/sample_tsne.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpeerlab/Palantir/3be143443a601eaf163e9ff63e76f32330ac9cd2/data/sample_tsne.p -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinxcontrib-autoprogram 2 | sphinxcontrib-napoleon 3 | sphinx-autodocgen 4 | sphinx-github-style>=1.2.2 5 | sphinx-mdinclude 6 | m2r2 7 | nbsphinx 8 | furo 9 | typing-extensions 10 | IPython 11 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | from pathlib import Path 16 | 17 | sys.path.insert(0, os.path.abspath("../../src")) 18 | 19 | this_directory = Path(__file__).parent 20 | 21 | 22 | # get version and other attributes 23 | version_info = {} 24 | with open("../../src/palantir/version.py") as f: 25 | exec(f.read(), version_info) 26 | 27 | 28 | # -- Project information ----------------------------------------------------- 29 | 30 | project = "Palantir" 31 | copyright = "2024, " + version_info['__author__'] 32 | author = version_info['__author__'] 33 | 34 | # The full version, including alpha/beta/rc tags 35 | release = version_info['__version__'] 36 | 37 | 38 | # -- General configuration --------------------------------------------------- 39 | 40 | # Add any Sphinx extension module names here, as strings. They can be 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 42 | # ones. 43 | extensions = [ 44 | "sphinx.ext.autodoc", 45 | "nbsphinx", 46 | "sphinx.ext.napoleon", 47 | "sphinx_mdinclude", 48 | ] 49 | if os.environ.get('READTHEDOCS') == 'True': 50 | extensions.append("sphinx_github_style") 51 | extensions.append("sphinx.ext.linkcode") 52 | 53 | # GitHub repo information 54 | html_context = { 55 | "github_user": "dpeerlab", 56 | "github_repo": "Palantir", 57 | "github_version": "master", 58 | } 59 | 60 | # Set linkcode_url for sphinx-github-style 61 | linkcode_url = "https://github.com/dpeerlab/Palantir/blob/master/{filepath}#L{linestart}-L{linestop}" 62 | 63 | source_suffix = [".rst", ".md"] 64 | 65 | # Add any paths that contain templates here, relative to this directory. 66 | templates_path = ["_templates"] 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | # This pattern also affects html_static_path and html_extra_path. 71 | exclude_patterns = ["_build", "**.ipynb_checkpoints"] 72 | 73 | 74 | # -- Options for HTML output ------------------------------------------------- 75 | 76 | # The theme to use for HTML and HTML Help pages. See the documentation for 77 | # a list of builtin themes. 78 | # 79 | html_theme = "furo" 80 | pygments_style = "tango" 81 | 82 | html_theme_options = { 83 | "footer_icons": [ 84 | { 85 | "name": "GitHub", 86 | "url": "https://github.com/dpeerlab/Palantir", 87 | "html": """ 88 | 89 | 90 | 91 | """, 92 | "class": "", 93 | }, 94 | ], 95 | } 96 | 97 | highlight_language = "none" 98 | 99 | # Add any paths that contain custom static files (such as style sheets) here, 100 | # relative to this directory. They are copied after the builtin static files, 101 | # so a file named "default.css" will overwrite the builtin "default.css". 102 | html_static_path = [] 103 | -------------------------------------------------------------------------------- /docs/source/core.rst: -------------------------------------------------------------------------------- 1 | Core 2 | ==== 3 | 4 | .. automodule:: palantir.core 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. mellon documentation master file, created by 2 | sphinx-quickstart on Fri Sep 16 22:07:21 2022. 3 | 4 | .. toctree:: 5 | :hidden: 6 | :caption: Tutorials: 7 | 8 | notebooks/Palantir_sample_notebook.ipynb 9 | notebooks/manuscript_data.ipynb 10 | 11 | .. toctree:: 12 | :hidden: 13 | :maxdepth: 2 14 | :caption: Modules: 15 | 16 | Preprocessing 17 | Utilities 18 | Core 19 | Postprocessing 20 | Plotting 21 | 22 | 23 | .. toctree:: 24 | :hidden: 25 | :caption: Comparisons: 26 | 27 | notebooks/comparisons/dpt.ipynb 28 | notebooks/comparisons/fateid.ipynb 29 | notebooks/comparisons/monocle2.ipynb 30 | notebooks/comparisons/paga.ipynb 31 | notebooks/comparisons/slignshot.ipynb 32 | 33 | | 34 | 35 | .. mdinclude:: ../../README.md 36 | 37 | .. toctree:: 38 | :hidden: 39 | :caption: Links: 40 | 41 | Github Repo 42 | 43 | 44 | Index 45 | ===== 46 | 47 | * :ref:`genindex` 48 | -------------------------------------------------------------------------------- /docs/source/notebooks: -------------------------------------------------------------------------------- 1 | ../../notebooks -------------------------------------------------------------------------------- /docs/source/plot.rst: -------------------------------------------------------------------------------- 1 | Plotting 2 | ======== 3 | 4 | .. automodule:: palantir.plot 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/preprocess.rst: -------------------------------------------------------------------------------- 1 | Preprocessing 2 | ============= 3 | 4 | .. automodule:: palantir.preprocess 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/presults.rst: -------------------------------------------------------------------------------- 1 | Postprocessing 2 | ============== 3 | 4 | .. automodule:: palantir.presults 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | Utilities 2 | ========= 3 | 4 | .. automodule:: palantir.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /notebooks/comparisons/results/fateid/DC_order.csv: -------------------------------------------------------------------------------- 1 | "","t4" 2 | "1","Run4_126886713386862" 3 | "2","Run4_130057606915939" 4 | "3","Run4_133829902874925" 5 | "4","Run4_157613970647844" 6 | "5","Run4_196163864475485" 7 | "6","Run4_204403356948907" 8 | "7","Run4_231280295401270" 9 | "8","Run4_231280310074741" 10 | "9","Run4_235678490908891" 11 | "10","Run4_235697683417844" 12 | "11","Run4_240001494408486" 13 | "12","Run5_126886713416990" 14 | "13","Run5_134531774106534" 15 | "14","Run5_157613955956011" 16 | "15","Run5_160440071702940" 17 | "16","Run5_169022462749603" 18 | "17","Run5_197213599750044" 19 | "18","Run5_205510773992812" 20 | "19","Run5_227516844984628" 21 | "20","Run5_231692907800427" 22 | "21","Run5_232327507728678" 23 | "22","Run5_235122054879964" 24 | "23","Run4_230732701756837" 25 | "24","Run5_160996525231478" 26 | "25","Run5_165860320406308" 27 | "26","Run5_170137140189547" 28 | "27","Run5_166290863569707" 29 | "28","Run4_135150498626486" 30 | "29","Run5_200967535908581" 31 | "30","Run4_199962360724702" 32 | "31","Run5_239477254471070" 33 | "32","Run5_232318630480750" 34 | "33","Run4_129565698677470" 35 | "34","Run4_129663530646894" 36 | "35","Run4_239596318186406" 37 | "36","Run5_227844214340460" 38 | "37","Run4_226958752541091" 39 | "38","Run5_131097901611291" 40 | "39","Run5_160910657575723" 41 | "40","Run4_161889641978219" 42 | "41","Run5_157536929077979" 43 | "42","Run4_226205924964702" 44 | "43","Run4_166264200448795" 45 | "44","Run5_191080758196467" 46 | "45","Run5_239458061268844" 47 | "46","Run5_234992235597163" 48 | "47","Run5_201595540363172" 49 | "48","Run5_169174308408606" 50 | "49","Run4_134377538219363" 51 | "50","Run5_227516876250396" 52 | "51","Run5_239448263378870" 53 | "52","Run5_227981806484195" 54 | "53","Run4_235214351259421" 55 | "54","Run4_200570519276955" 56 | "55","Run4_235122036431276" 57 | "56","Run4_130754452543710" 58 | "57","Run4_235548433927395" 59 | "58","Run4_126364087900021" 60 | "59","Run4_134530815449317" 61 | "60","Run4_226901469313437" 62 | "61","Run5_226901468854118" 63 | "62","Run5_126836668226926" 64 | "63","Run5_130675129636790" 65 | "64","Run4_161960508214004" 66 | "65","Run5_122282776848630" 67 | "66","Run5_160440071605166" 68 | "67","Run4_126765621271260" 69 | "68","Run4_236768101816741" 70 | "69","Run4_126680976217949" 71 | "70","Run5_122282776874270" 72 | "71","Run4_227903404136812" 73 | "72","Run5_169794765737372" 74 | "73","Run5_230816707177379" 75 | "74","Run5_227284928916381" 76 | "75","Run4_157613970578164" 77 | "76","Run5_170181044099822" 78 | "77","Run5_231711851042724" 79 | "78","Run4_236082114522339" 80 | "79","Run5_197060157730102" 81 | "80","Run4_131234312154540" 82 | "81","Run4_231291315932590" 83 | "82","Run4_155971413489590" 84 | "83","Run4_133974617123676" 85 | "84","Run5_134539660647150" 86 | "85","Run4_205381641819869" 87 | "86","Run4_235533386607836" 88 | "87","Run4_130134915902195" 89 | "88","Run5_226883187948253" 90 | "89","Run5_226874778041270" 91 | "90","Run4_122428824599908" 92 | "91","Run5_226953398700339" 93 | "92","Run5_129497382209436" 94 | "93","Run5_197206037674717" 95 | "94","Run4_235199318669100" 96 | "95","Run4_232450553724148" 97 | "96","Run5_205527671486381" 98 | "97","Run4_121202296712412" 99 | "98","Run5_230800378678062" 100 | "99","Run4_126836669073780" 101 | "100","Run5_161883198962589" 102 | "101","Run4_227357125986725" 103 | "102","Run4_160990095927006" 104 | "103","Run4_200967489997038" 105 | "104","Run5_131097901558494" 106 | "105","Run5_227921645386163" 107 | "106","Run5_201114485475756" 108 | "107","Run5_195967073605980" 109 | "108","Run4_204825356323118" 110 | "109","Run5_133854492289318" 111 | "110","Run4_134592152354101" 112 | "111","Run5_231822829995765" 113 | "112","Run4_200974094001899" 114 | "113","Run5_204765475519403" 115 | "114","Run5_236846721915189" 116 | "115","Run4_170265211464501" 117 | "116","Run4_192255443986782" 118 | "117","Run4_129582985497013" 119 | "118","Run5_155971444992926" 120 | "119","Run4_160447829068124" 121 | "120","Run5_157004353418995" 122 | "121","Run5_232321033853659" 123 | "122","Run4_205510774745395" 124 | "123","Run5_227284901345125" 125 | "124","Run4_121896114436398" 126 | "125","Run4_236634018002741" 127 | "126","Run4_130126447786731" 128 | "127","Run5_240094641601390" 129 | "128","Run4_204825356024622" 130 | "129","Run4_134522332629341" 131 | "130","Run4_200570520164645" 132 | "131","Run4_192747070778206" 133 | "132","Run4_236176853810421" 134 | "133","Run4_195546194598117" 135 | "134","Run5_129582985832693" 136 | "135","Run4_126835192150451" 137 | "136","Run4_231890207586150" 138 | "137","Run4_162078857030885" 139 | "138","Run4_126835160956259" 140 | "139","Run5_195606445677876" 141 | "140","Run5_130142286011806" 142 | "141","Run5_121826167613876" 143 | "142","Run4_133895267445470" 144 | "143","Run4_170137155393252" 145 | "144","Run5_226283262430126" 146 | "145","Run5_126885746359029" 147 | "146","Run5_192653655202716" 148 | "147","Run5_121276833385781" 149 | "148","Run5_195562319787948" 150 | "149","Run4_239382662691758" 151 | "150","Run4_205527685446061" 152 | "151","Run5_227516844763356" 153 | "152","Run4_134592171981109" 154 | "153","Run4_236639608782067" 155 | "154","Run4_130736332883894" 156 | "155","Run5_196180748688101" 157 | "156","Run4_160928731187629" 158 | "157","Run4_226394650633955" 159 | "158","Run4_232330326660853" 160 | "159","Run5_227305168426859" 161 | "160","Run4_161889623076126" 162 | "161","Run4_235763078969779" 163 | "162","Run5_157536960305052" 164 | "163","Run4_160928758131548" 165 | "164","Run5_170181030168308" 166 | "165","Run4_131217401166134" 167 | "166","Run5_169793557481390" 168 | "167","Run4_192180135835932" 169 | "168","Run4_165741375405493" 170 | "169","Run5_130761014307164" 171 | "170","Run4_160903140923115" 172 | "171","Run5_205381641881900" 173 | "172","Run4_227844214442804" 174 | "173","Run5_232327489150307" 175 | "174","Run4_227363580664092" 176 | "175","Run5_161890937269108" 177 | "176","Run4_200563808394158" 178 | "177","Run5_226829528151854" 179 | "178","Run4_135571512376109" 180 | "179","Run5_120703436077806" 181 | "180","Run4_162090668374451" 182 | "181","Run4_200570500410678" 183 | "182","Run5_170798612175131" 184 | "183","Run4_134447976003501" 185 | "184","Run4_134448002525940" 186 | "185","Run4_134376349616557" 187 | "186","Run4_130144733412140" 188 | "187","Run5_227853877790557" 189 | "188","Run4_126227588791004" 190 | "189","Run4_164818762807660" 191 | "190","Run5_227305214368564" 192 | "191","Run5_160903141124900" 193 | "192","Run5_196527701059949" 194 | "193","Run4_241106402073885" 195 | "194","Run4_199962347330923" 196 | "195","Run5_204762120841651" 197 | "196","Run5_200563808655086" 198 | "197","Run5_204954185882334" 199 | "198","Run5_201523599693555" 200 | "199","Run5_232318649121630" 201 | "200","Run4_236785415612150" 202 | "201","Run4_239458073925534" 203 | "202","Run5_164631977839027" 204 | "203","Run5_131097927993718" 205 | "204","Run5_226265813962150" 206 | "205","Run5_131217382140251" 207 | "206","Run5_204959708010213" 208 | "207","Run5_160440058596140" 209 | "208","Run4_129565671210277" 210 | "209","Run4_200441249359709" 211 | "210","Run4_195562319506654" 212 | "211","Run4_191210578165158" 213 | "212","Run4_191774111938413" 214 | "213","Run5_169794778294622" 215 | "214","Run4_236768055152038" 216 | "215","Run5_161960476956974" 217 | "216","Run4_170199731882726" 218 | "217","Run4_239468516337884" 219 | "218","Run5_201114484915573" 220 | "219","Run5_200922572711788" 221 | "220","Run4_240634645371700" 222 | "221","Run4_228042023823214" 223 | "222","Run5_169014052698013" 224 | "223","Run5_205510805765038" 225 | "224","Run4_170180373273324" 226 | "225","Run5_236082068118877" 227 | "226","Run4_231900127545652" 228 | "227","Run5_196715518278579" 229 | "228","Run5_240490718546668" 230 | "229","Run4_200983461354781" 231 | "230","Run4_131310516590814" 232 | "231","Run4_162001560395102" 233 | "232","Run5_192678095637236" 234 | "233","Run4_230741457394027" 235 | "234","Run5_232302646085862" 236 | "235","Run5_227357125462838" 237 | "236","Run5_129497369332638" 238 | "237","Run4_195625772766949" 239 | "238","Run4_121202324139420" 240 | "239","Run4_169768874531107" 241 | "240","Run4_125042414278451" 242 | "241","Run5_125197018937182" 243 | "242","Run5_126706342876973" 244 | "243","Run5_120786805020899" 245 | "244","Run4_200899053082478" 246 | "245","Run4_232301316466550" 247 | "246","Run4_131242078170357" 248 | "247","Run5_192745876088540" 249 | "248","Run5_126706311458101" 250 | "249","Run4_121812496309539" 251 | "250","Run5_196180763102627" 252 | "251","Run4_169174309156595" 253 | "252","Run5_228042069723892" 254 | "253","Run5_226283261651870" 255 | "254","Run4_239596291410229" 256 | "255","Run4_161548562741995" 257 | "256","Run4_129565671779052" 258 | "257","Run5_227853850536860" 259 | "258","Run4_236650124458796" 260 | "259","Run4_195615853792491" 261 | "260","Run5_134522331948828" 262 | "261","Run4_165325596974387" 263 | "262","Run5_227991469641059" 264 | "263","Run4_134448002779892" 265 | "264","Run5_239407492945651" 266 | "265","Run5_126681615386990" 267 | "266","Run4_230809071209270" 268 | "267","Run5_121896094784430" 269 | "268","Run4_239468515839403" 270 | "269","Run4_197060142685020" 271 | "270","Run5_125719958739371" 272 | "271","Run4_241098858289510" 273 | "272","Run5_134377557416179" 274 | "273","Run4_125719977541037" 275 | "274","Run4_226949759945627" 276 | "275","Run4_227844233247005" 277 | "276","Run4_125719977548197" 278 | "277","Run5_230741456907635" 279 | "278","Run5_164753426012523" 280 | "279","Run4_126776271099742" 281 | "280","Run5_133974589627685" 282 | "281","Run4_196043578137333" 283 | "282","Run4_232330307783476" 284 | "283","Run5_126836668483956" 285 | "284","Run5_235199332793123" 286 | "285","Run5_240617733974324" 287 | "286","Run4_161893246425373" 288 | "287","Run5_200983476648171" 289 | "288","Run4_133895294208868" 290 | "289","Run5_160990068434789" 291 | "290","Run4_161451911564523" 292 | "291","Run4_240490745285349" 293 | "292","Run5_125042414704491" 294 | "293","Run5_236639654950813" 295 | "294","Run4_160447874844573" 296 | "295","Run4_231291328088997" 297 | "296","Run4_197256786963700" 298 | "297","Run5_130203891448094" 299 | "298","Run4_195622417386716" 300 | "299","Run4_130185503820724" 301 | "300","Run5_120726897153838" 302 | "301","Run4_170344372688676" 303 | "302","Run4_231365403663132" 304 | "303","Run5_126765621164972" 305 | "304","Run4_231891403167645" 306 | "305","Run5_226970322292126" 307 | "306","Run5_131293471160755" 308 | "307","Run4_231897711163678" 309 | "308","Run5_231291328542958" 310 | "309","Run5_160440058755507" 311 | "310","Run4_196527729007907" 312 | "311","Run4_165385699191715" 313 | "312","Run4_235199345678774" 314 | "313","Run5_165741390641899" 315 | "314","Run4_134376302950822" 316 | "315","Run5_227982982933812" 317 | "316","Run4_197213568841971" 318 | "317","Run5_195625785604396" 319 | "318","Run4_166332095651107" 320 | "319","Run4_170198344091502" 321 | "320","Run4_235763078646708" 322 | "321","Run4_157562564695862" 323 | "322","Run4_201111666641630" 324 | "323","Run4_201111685483379" 325 | "324","Run4_231711851559731" 326 | "325","Run5_199934564264886" 327 | "326","Run5_240498368433515" 328 | "327","Run4_241057654323483" 329 | "328","Run4_200348223530852" 330 | "329","Run5_120797898435828" 331 | "330","Run4_162090699901166" 332 | "331","Run4_166400431147246" 333 | "332","Run5_227903423569126" 334 | "333","Run5_232261741046116" 335 | "334","Run5_231760541113126" 336 | "335","Run4_161883180383004" 337 | "336","Run5_226958767155629" 338 | "337","Run5_192745863559517" 339 | "338","Run5_191760467873139" 340 | "339","Run4_240482289609438" 341 | "340","Run4_170877933992798" 342 | "341","Run5_164753445435613" 343 | "342","Run5_231291328292659" 344 | "343","Run5_191774126987494" 345 | "344","Run4_131242051128684" 346 | "345","Run5_197135453862187" 347 | "346","Run5_170265184692014" 348 | "347","Run4_196174352079662" 349 | "348","Run5_191691448281523" 350 | "349","Run4_122310412323676" 351 | "350","Run4_121965753884006" 352 | "351","Run5_122293352064750" 353 | "352","Run5_134531742550430" 354 | "353","Run4_230732701551332" 355 | "354","Run4_191080758131627" 356 | "355","Run4_192101737523115" 357 | "356","Run5_161883167537899" 358 | "357","Run4_134033659874678" 359 | "358","Run5_157486628656875" 360 | "359","Run5_239408816215900" 361 | "360","Run5_227844201564581" 362 | "361","Run4_191647322069734" 363 | "362","Run5_204756752198491" 364 | "363","Run4_236167209045339" 365 | "364","Run4_130194240822060" 366 | "365","Run4_191647291132852" 367 | "366","Run5_130144732663078" 368 | "367","Run5_201670683478364" 369 | "368","Run4_120864497952619" 370 | "369","Run5_169174309227379" 371 | "370","Run5_170276727507299" 372 | "371","Run5_232327508278067" 373 | "372","Run5_126776251632476" 374 | "373","Run4_241106420979955" 375 | "374","Run4_122308384450908" 376 | "375","Run4_226970310434140" 377 | "376","Run4_126886700825827" 378 | "377","Run4_240490732735862" 379 | "378","Run4_235198559214838" 380 | "379","Run4_166400399718763" 381 | "380","Run5_166264181209964" 382 | "381","Run4_165302242536245" 383 | "382","Run5_227364251261236" 384 | "383","Run5_195546212919582" 385 | "384","Run4_241098904779638" 386 | "385","Run4_126364088155371" 387 | "386","Run4_130693100129694" 388 | "387","Run5_162078842833638" 389 | "388","Run4_200363958975844" 390 | "389","Run5_236177497470814" 391 | "390","Run4_126886700599133" 392 | "391","Run4_164631946934515" 393 | "392","Run4_134539674041203" 394 | "393","Run4_235695789762414" 395 | "394","Run4_170137155200884" 396 | "395","Run5_235533413242717" 397 | "396","Run4_231770055399670" 398 | "397","Run4_170745979922142" 399 | "398","Run5_230749746972902" 400 | "399","Run4_231754903800667" 401 | "400","Run5_236177512348958" 402 | "401","Run5_235737666242989" 403 | "402","Run5_129663543397093" 404 | "403","Run4_126706343636326" 405 | "404","Run5_195958617790259" 406 | "405","Run5_226205940444581" 407 | "406","Run4_200562850811166" 408 | "407","Run5_161462676024222" 409 | "408","Run5_166400412274019" 410 | "409","Run4_231279536236910" 411 | "410","Run5_169795603024612" 412 | "411","Run4_195967101618597" 413 | "412","Run5_239596318419685" 414 | "413","Run4_204224148454835" 415 | "414","Run5_205527685680502" 416 | "415","Run4_201474310264686" 417 | "416","Run4_196163864287654" 418 | "417","Run5_197188214515957" 419 | "418","Run5_230807894976284" 420 | "419","Run4_226265786076572" 421 | "420","Run4_156463307344229" 422 | "421","Run5_197213568489189" 423 | "422","Run5_129978437586219" 424 | "423","Run5_162078842349987" 425 | "424","Run5_196019714186036" 426 | "425","Run4_232449749174174" 427 | "426","Run4_157477617129885" 428 | "427","Run5_231280341306278" 429 | "428","Run5_240634599983342" 430 | "429","Run4_134539692072302" 431 | "430","Run5_196019687483237" 432 | "431","Run5_226283234735902" 433 | "432","Run5_170276754217310" 434 | "433","Run5_133829949614302" 435 | "434","Run5_169768874358630" 436 | "435","Run5_164761077148979" 437 | "436","Run4_195562273655733" 438 | "437","Run5_192255417076019" 439 | "438","Run4_169794751290212" 440 | "439","Run5_235007433391339" 441 | "440","Run5_227991450671990" 442 | "441","Run4_125197033818014" 443 | "442","Run5_166290909227805" 444 | "443","Run4_231270765686061" 445 | "444","Run4_191026787674525" 446 | "445","Run5_196715471816621" 447 | "446","Run4_227510415157549" 448 | "447","Run5_227973438224811" 449 | "448","Run4_235737666173734" 450 | "449","Run5_121879183911859" 451 | "450","Run5_131309590173998" 452 | "451","Run4_169022462769963" 453 | "452","Run5_166416939870565" 454 | "453","Run5_165164925765363" 455 | "454","Run5_235763060394844" 456 | "455","Run5_155971426576092" 457 | "456","Run5_227516829882603" 458 | "457","Run5_156933517855093" 459 | "458","Run4_191026787670262" 460 | "459","Run5_121319764642219" 461 | "460","Run5_164698906278253" 462 | "461","Run5_160785117669293" 463 | "462","Run5_121276786920878" 464 | "463","Run5_155911403748787" 465 | "464","Run5_125239700146011" 466 | "465","Run5_236221700958053" 467 | "466","Run5_204825337420060" 468 | "467","Run4_129497382115619" 469 | "468","Run4_125728252684085" 470 | "469","Run4_195426088052590" 471 | "470","Run4_130142298336179" 472 | "471","Run5_164753399208804" 473 | "472","Run5_191708896914742" 474 | "473","Run5_192121092658916" 475 | "474","Run5_157690352593243" 476 | "475","Run4_236768070092718" 477 | "476","Run5_239408835385565" 478 | "477","Run5_204358278368691" 479 | "478","Run4_240136813497564" 480 | "479","Run5_130529086105910" 481 | "480","Run5_160785117669683" 482 | "481","Run4_170276741868852" 483 | "482","Run5_227921645431155" 484 | "483","Run5_235070752148316" 485 | "484","Run4_200922526825900" 486 | "485","Run5_161893265356581" 487 | "486","Run5_235601986762677" 488 | "487","Run4_235198559475508" 489 | "488","Run5_130529086859099" 490 | "489","Run4_199962347296556" 491 | "490","Run4_235129955600805" 492 | "491","Run5_239932687441126" 493 | "492","Run5_130194213035254" 494 | "493","Run4_197188213762932" 495 | "494","Run4_130142297803558" 496 | "495","Run5_239587881241958" 497 | "496","Run4_236221669165997" 498 | "497","Run5_134447989975788" 499 | "498","Run5_231340604583221" 500 | "499","Run4_130529086818542" 501 | "500","Run4_129457538058668" 502 | "501","Run5_227354856024420" 503 | "502","Run4_164631978096541" 504 | "503","Run4_166416894024044" 505 | "504","Run5_227930535311277" 506 | "505","Run5_199944394402717" 507 | "506","Run5_162001548134131" 508 | "507","Run4_130529132723550" 509 | "508","Run4_131309562153836" 510 | "509","Run4_121202311609131" 511 | "510","Run5_121319736597797" 512 | "511","Run5_235188984072027" 513 | "512","Run5_129457491692468" 514 | "513","Run5_191553907124134" 515 | "514","Run4_192325895502708" 516 | "515","Run4_170198390135595" 517 | "516","Run5_240482289375084" 518 | "517","Run5_161890952759717" 519 | "518","Run5_204825324018989" 520 | "519","Run5_226343142447326" 521 | "520","Run4_235198527781284" 522 | "521","Run5_126765636139947" 523 | "522","Run5_227357156624308" 524 | "523","Run4_122282776423132" 525 | "524","Run4_129457519708470" 526 | "525","Run4_122308430620398" 527 | "526","Run4_235070752934772" 528 | "527","Run4_199934565079284" 529 | "528","Run5_230809083768053" 530 | "529","Run5_129457519184181" 531 | "530","Run4_166402846776101" 532 | "531","Run5_200416674364835" 533 | "532","Run5_239382643984237" 534 | "533","Run5_134447990234011" 535 | "534","Run5_235533432441715" 536 | "535","Run5_226829547517806" 537 | "536","Run4_164698921291108" 538 | "537","Run5_135064452549364" 539 | "538","Run5_235070752209780" 540 | "539","Run4_134936689588014" 541 | "540","Run4_126227569621427" 542 | "541","Run4_195958632734107" 543 | "542","Run4_231692907690797" 544 | "543","Run5_157004353465123" 545 | "544","Run5_236846734231900" 546 | "545","Run4_126834220845997" 547 | "546","Run4_232439042431772" 548 | "547","Run5_226901441268660" 549 | "548","Run4_122436428412653" 550 | "549","Run5_227982982691117" 551 | "550","Run4_134936676650349" 552 | "551","Run5_191226652777307" 553 | "552","Run5_227990499191587" 554 | "553","Run5_122282776614758" 555 | "554","Run5_205510806027174" 556 | "555","Run5_170798580160300" 557 | "556","Run5_164698933651885" 558 | "557","Run4_126218845501230" 559 | "558","Run5_121812477929373" 560 | "559","Run4_241114562055595" 561 | "560","Run5_227354875717342" 562 | "561","Run5_166416920763701" 563 | "562","Run4_169174277412580" 564 | "563","Run4_230652439090988" 565 | "564","Run4_230799438727924" 566 | "565","Run5_129457506937244" 567 | "566","Run5_227903404206819" 568 | "567","Run5_121319736923940" 569 | "568","Run4_196180748720476" 570 | "569","Run4_231291315473310" 571 | "570","Run4_197264047290742" 572 | "571","Run5_169014006758196" 573 | "572","Run5_200974093211372" 574 | "573","Run5_120703409670492" 575 | "574","Run4_161451911605148" 576 | "575","Run5_169014033836460" 577 | "576","Run4_161883153348404" 578 | "577","Run4_192180120643819" 579 | "578","Run4_241057668974364" 580 | "579","Run5_235763032582965" 581 | "580","Run5_166400384687350" 582 | "581","Run5_126886699980715" 583 | "582","Run4_164761063865758" 584 | "583","Run5_205527670966709" 585 | "584","Run4_130184146991460" 586 | "585","Run5_133829929907949" 587 | "586","Run5_196043592617316" 588 | "587","Run5_240001541139755" 589 | "588","Run4_239468561943923" 590 | "589","Run5_204224163363755" 591 | "590","Run5_130142297810790" 592 | "591","Run5_135081885584621" 593 | "592","Run4_200983461189340" 594 | -------------------------------------------------------------------------------- /notebooks/comparisons/results/fateid/Ery_order.csv: -------------------------------------------------------------------------------- 1 | "","t2" 2 | "1","Run4_130142298659099" 3 | "2","Run4_155911430977836" 4 | "3","Run4_236650105264556" 5 | "4","Run5_121276786923948" 6 | "5","Run5_121319737121187" 7 | "6","Run5_121742671894326" 8 | "7","Run5_121896083017067" 9 | "8","Run5_156515920426790" 10 | "9","Run5_165741421874550" 11 | "10","Run5_166351410154397" 12 | "11","Run5_169723777145627" 13 | "12","Run5_197256771755294" 14 | "13","Run5_205870209919347" 15 | "14","Run5_226806174202228" 16 | "15","Run5_226953384323893" 17 | "16","Run5_239596291184941" 18 | "17","Run4_204359620552548" 19 | "18","Run4_170276741343972" 20 | "19","Run4_235007414484187" 21 | "20","Run5_226205971995877" 22 | "21","Run5_131175256832285" 23 | "22","Run5_191553860978972" 24 | "23","Run4_227973438560036" 25 | "24","Run5_204756751867293" 26 | "25","Run4_126218818710238" 27 | "26","Run4_121742671661982" 28 | "27","Run5_160990068329317" 29 | "28","Run4_201670702021045" 30 | "29","Run4_130736333471003" 31 | "30","Run4_170268049172398" 32 | "31","Run5_201114458372918" 33 | "32","Run4_165164911094003" 34 | "33","Run5_236169981277996" 35 | "34","Run5_230809103154419" 36 | "35","Run4_170258488281828" 37 | "36","Run5_169768846837492" 38 | "37","Run5_130531382381404" 39 | "38","Run5_200562869747052" 40 | "39","Run5_121896095594741" 41 | "40","Run4_161462648487205" 42 | "41","Run4_130057607240948" 43 | "42","Run4_192121064778654" 44 | "43","Run5_135081932307366" 45 | "44","Run4_166351422549219" 46 | "45","Run5_164753445702515" 47 | "46","Run4_228042051639012" 48 | "47","Run5_204403356874997" 49 | "48","Run4_235129954749341" 50 | "49","Run4_239407461947620" 51 | "50","Run5_192678083644829" 52 | "51","Run5_192747102420395" 53 | "52","Run4_191645443803486" 54 | "53","Run4_120797945018284" 55 | "54","Run5_240498414937837" 56 | "55","Run5_161462663689971" 57 | "56","Run4_157140481390493" 58 | "57","Run5_226953398269812" 59 | "58","Run4_232327508286387" 60 | "59","Run4_239448275970990" 61 | "60","Run5_130142316719326" 62 | "61","Run4_204763059935523" 63 | "62","Run4_235559336233702" 64 | "63","Run4_192859711334757" 65 | "64","Run4_239448248936886" 66 | "65","Run5_130185489499868" 67 | "66","Run4_200562869397916" 68 | "67","Run4_240154798308595" 69 | "68","Run4_170265230849779" 70 | "69","Run5_231692926573411" 71 | "70","Run4_232311355557748" 72 | "71","Run5_129978418448620" 73 | "72","Run5_197135439182046" 74 | "73","Run4_130142298127654" 75 | "74","Run4_196180748389739" 76 | "75","Run4_197185798555446" 77 | "76","Run5_195562319858547" 78 | "77","Run4_235069826000611" 79 | "78","Run5_236650124490590" 80 | "79","Run4_235199346129845" 81 | "80","Run5_235678522329899" 82 | "81","Run5_236167208548133" 83 | "82","Run5_230732686616430" 84 | "83","Run4_121812465170790" 85 | "84","Run4_170327475927477" 86 | "85","Run4_155972634078110" 87 | "86","Run4_241098904988574" 88 | "87","Run5_227853877819827" 89 | "88","Run5_121955407219108" 90 | "89","Run5_236177511832499" 91 | "90","Run4_195958633326307" 92 | "91","Run4_226265800857309" 93 | "92","Run5_227973438495990" 94 | "93","Run4_201670656740573" 95 | "94","Run4_204765463174555" 96 | "95","Run4_235626713532342" 97 | "96","Run5_204959708006827" 98 | "97","Run4_165385745418163" 99 | "98","Run5_121319783321011" 100 | "99","Run4_192325908097756" 101 | "100","Run5_227846649498395" 102 | "101","Run5_231711851628380" 103 | "102","Run5_239407446480605" 104 | "103","Run5_165302261693363" 105 | "104","Run5_239448248933108" 106 | "105","Run5_130144701369189" 107 | "106","Run5_226949787674870" 108 | "107","Run4_236633999137206" 109 | "108","Run4_160928730367731" 110 | "109","Run5_239587881762227" 111 | "110","Run5_241114589357942" 112 | "111","Run5_169156963424675" 113 | "112","Run5_240136800360374" 114 | "113","Run5_235601986776285" 115 | "114","Run4_165945548589915" 116 | "115","Run4_226283261941494" 117 | "116","Run4_240490764462491" 118 | "117","Run5_156453536717555" 119 | "118","Run5_195547402488691" 120 | "119","Run5_120703455284661" 121 | "120","Run4_227844187442397" 122 | "121","Run5_226394662987636" 123 | "122","Run5_170180358387485" 124 | "123","Run4_227975317540203" 125 | "124","Run5_227516857597294" 126 | "125","Run4_230807875762028" 127 | "126","Run4_164698921000299" 128 | "127","Run4_235533401209062" 129 | "128","Run5_239587850311396" 130 | "129","Run4_130754465351916" 131 | "130","Run5_192653655693100" 132 | "131","Run5_135081913084340" 133 | "132","Run5_122293380103390" 134 | "133","Run5_125719977089261" 135 | "134","Run4_126776224631582" 136 | "135","Run5_165188413545701" 137 | "136","Run5_230749746977195" 138 | "137","Run4_227363580922669" 139 | "138","Run5_226949759723830" 140 | "139","Run5_126707788369181" 141 | "140","Run5_231340558632877" 142 | "141","Run5_230741456637214" 143 | "142","Run5_197264047237411" 144 | "143","Run4_236650123947235" 145 | "144","Run4_160928758380782" 146 | "145","Run5_231917070280429" 147 | "146","Run4_235199318879141" 148 | "147","Run5_205922701096750" 149 | "148","Run4_166416908205293" 150 | "149","Run5_226394635692277" 151 | "150","Run5_230654437378996" 152 | "151","Run5_205381688060779" 153 | "152","Run5_196113282821429" 154 | "153","Run4_157072011967413" 155 | "154","Run5_157684166474486" 156 | "155","Run4_166400431123379" 157 | "156","Run5_120726924977910" 158 | "157","Run4_155972621262251" 159 | "158","Run4_161478635801460" 160 | "159","Run5_192745875824029" 161 | "160","Run5_239408816277428" 162 | "161","Run4_204959689431340" 163 | "162","Run5_169793557511413" 164 | "163","Run4_235069813181678" 165 | "164","Run4_130144686946166" 166 | "165","Run5_192745848129885" 167 | "166","Run5_201465586202923" 168 | "167","Run5_131310502198700" 169 | "168","Run5_162078870100196" 170 | "169","Run4_201474323212139" 171 | "170","Run4_235000971513075" 172 | "171","Run4_196019733085555" 173 | "172","Run5_227991451220918" 174 | "173","Run5_239458093546284" 175 | "174","Run5_235697715238645" 176 | "175","Run5_195623388559670" 177 | "176","Run5_170327461710572" 178 | "177","Run4_165325568949149" 179 | "178","Run5_195967089073564" 180 | "179","Run5_160996511074155" 181 | "180","Run5_131089728490724" 182 | "181","Run5_235214378003372" 183 | "182","Run4_126707819506413" 184 | "183","Run5_126886700497331" 185 | "184","Run4_134531742587125" 186 | "185","Run4_170198358251748" 187 | "186","Run4_231890207533877" 188 | "187","Run5_191647304015075" 189 | "188","Run5_232328281942245" 190 | "189","Run4_226901487896435" 191 | "190","Run4_125239714305827" 192 | "191","Run4_169723777735083" 193 | "192","Run4_156515947727276" 194 | "193","Run5_205870194965276" 195 | "194","Run4_197213581699892" 196 | "195","Run5_241114589051630" 197 | "196","Run4_131234312894774" 198 | "197","Run5_131097900824877" 199 | "198","Run5_130736364939547" 200 | "199","Run5_231891388447148" 201 | "200","Run4_239475387980214" 202 | "201","Run5_204765462948636" 203 | "202","Run5_165860319841012" 204 | "203","Run5_240617715883244" 205 | "204","Run4_162090668419428" 206 | "205","Run5_195426056628588" 207 | "206","Run5_226283280783795" 208 | "207","Run5_134531773294827" 209 | "208","Run4_204359601874859" 210 | "209","Run5_240634613909861" 211 | "210","Run5_196113282746270" 212 | "211","Run4_232302646515444" 213 | "212","Run5_191760421734701" 214 | "213","Run4_129457538329326" 215 | "214","Run5_131309590141149" 216 | "215","Run4_192180134984044" 217 | "216","Run4_135149155899102" 218 | "217","Run5_191708882168230" 219 | "218","Run5_230800378227622" 220 | "219","Run4_232330326096222" 221 | "220","Run5_135150485751668" 222 | "221","Run5_196527728802213" 223 | "222","Run4_121965741528803" 224 | "223","Run4_204360513969572" 225 | "224","Run4_230732732423086" 226 | "225","Run4_130624798276324" 227 | "226","Run5_161890983963060" 228 | "227","Run5_240482277252390" 229 | "228","Run4_239391521495341" 230 | "229","Run4_204763060394286" 231 | "230","Run5_197256787216805" 232 | "231","Run5_156515920468213" 233 | "232","Run4_165385744804644" 234 | "233","Run5_195967101593454" 235 | "234","Run5_232449748355932" 236 | "235","Run4_160928744992052" 237 | "236","Run4_230592321801117" 238 | "237","Run5_191553860713379" 239 | "238","Run4_227930535483109" 240 | "239","Run5_166288090655461" 241 | "240","Run4_200561495795932" 242 | "241","Run4_126707800401843" 243 | "242","Run4_205381656537828" 244 | "243","Run5_192101783386349" 245 | "244","Run4_232318649916766" 246 | "245","Run5_122308412197221" 247 | "246","Run4_134936708167980" 248 | "247","Run4_125042414733164" 249 | "248","Run4_235763079474548" 250 | "249","Run5_201595508942708" 251 | "250","Run5_130529100716782" 252 | "251","Run5_191708882659611" 253 | "252","Run4_170265199196588" 254 | "253","Run4_120797898946870" 255 | "254","Run4_131097928022955" 256 | "255","Run4_191210578200421" 257 | "256","Run4_126132159462771" 258 | "257","Run5_236768100990253" 259 | "258","Run4_226874732506019" 260 | "259","Run5_230800346954166" 261 | "260","Run4_131242050939755" 262 | "261","Run4_156463306828011" 263 | "262","Run5_227305195793196" 264 | "263","Run5_235695836354861" 265 | "264","Run5_161548575394725" 266 | "265","Run4_130142316947294" 267 | "266","Run5_192315277915955" 268 | "267","Run4_196163852160430" 269 | "268","Run4_164753426303725" 270 | "269","Run5_197264019737437" 271 | "270","Run4_236639623530278" 272 | "271","Run5_164769505164701" 273 | "272","Run5_231760540657515" 274 | "273","Run5_197213599947573" 275 | "274","Run4_235199332796637" 276 | "275","Run5_126836637060515" 277 | "276","Run5_239468543363316" 278 | "277","Run5_131309608286054" 279 | "278","Run4_192859665184630" 280 | "279","Run4_227991451036963" 281 | "280","Run4_165945548167606" 282 | "281","Run5_160447874328294" 283 | "282","Run4_164818809441715" 284 | "283","Run5_231770082461547" 285 | "284","Run5_169579734755611" 286 | "285","Run5_226970322222389" 287 | "286","Run4_240498414410590" 288 | "287","Run5_165945520642284" 289 | "288","Run4_135014255343900" 290 | "289","Run4_204213438037211" 291 | "290","Run4_232327462206683" 292 | "291","Run4_121965773241694" 293 | "292","Run5_121955406957492" 294 | "293","Run5_170327476157284" 295 | "294","Run5_239458093263795" 296 | "295","Run4_131234280914166" 297 | "296","Run5_204825309337963" 298 | "297","Run4_191760436250853" 299 | "298","Run5_170798580418413" 300 | "299","Run4_204213426011356" 301 | "300","Run5_130126448114988" 302 | "301","Run5_169158202808677" 303 | "302","Run4_204360540904307" 304 | "303","Run4_135081913600813" 305 | "304","Run4_205510759631086" 306 | "305","Run5_201595494283171" 307 | "306","Run5_192678095603557" 308 | "307","Run5_195958645050611" 309 | "308","Run5_239391474670813" 310 | "309","Run5_157684197702566" 311 | "310","Run4_195606458297180" 312 | "311","Run5_191647290907571" 313 | "312","Run4_133895281851236" 314 | "313","Run4_204427131571635" 315 | "314","Run4_191647276193582" 316 | "315","Run4_195426042109174" 317 | "316","Run5_126227569404333" 318 | "317","Run5_197060170008990" 319 | "318","Run4_192121092557547" 320 | "319","Run4_196527701277405" 321 | "320","Run5_195625773387165" 322 | "321","Run5_235737680669475" 323 | "322","Run4_126707773105894" 324 | "323","Run4_240001494992669" 325 | "324","Run5_235678490913572" 326 | "325","Run4_191708897331555" 327 | "326","Run4_166400431373164" 328 | "327","Run5_165385713138932" 329 | "328","Run5_160440070850909" 330 | "329","Run5_134600596020581" 331 | "330","Run4_170327476169508" 332 | "331","Run4_200426592069939" 333 | "332","Run5_165188413082030" 334 | "333","Run4_122293367564598" 335 | "334","Run5_134041208282342" 336 | "335","Run5_131293483227549" 337 | "336","Run4_195615840460084" 338 | "337","Run4_231890207794995" 339 | "338","Run5_155971426310875" 340 | "339","Run5_131234280957724" 341 | "340","Run5_196043592910699" 342 | "341","Run5_195623370049883" 343 | "342","Run5_227846617222428" 344 | "343","Run4_197060157631339" 345 | "344","Run5_156024161557286" 346 | "345","Run4_170137155157238" 347 | "346","Run4_197060189186996" 348 | "347","Run5_195958617892636" 349 | "348","Run4_120703436057910" 350 | "349","Run5_200441237006260" 351 | "350","Run5_231760540621548" 352 | "351","Run4_135149156194140" 353 | "352","Run5_200994225872100" 354 | "353","Run5_170327460919661" 355 | "354","Run4_240634645822899" 356 | "355","Run5_235559336171301" 357 | "356","Run4_126834252630899" 358 | "357","Run4_226953429932837" 359 | "358","Run5_134592125360374" 360 | "359","Run4_169794766224757" 361 | "360","Run5_196638190225763" 362 | "361","Run5_165164957252445" 363 | "362","Run5_169174290291051" 364 | "363","Run4_170198358768860" 365 | "364","Run5_156453554772764" 366 | "365","Run5_122436443553141" 367 | "366","Run5_195562288142755" 368 | "367","Run5_196019701636523" 369 | "368","Run5_169768893692214" 370 | "369","Run5_133854479994206" 371 | "370","Run5_157536960572838" 372 | "371","Run5_227991469870389" 373 | "372","Run4_239932655712989" 374 | "373","Run5_241098905205038" 375 | "374","Run4_131175210441444" 376 | "375","Run5_169727132587886" 377 | "376","Run4_230592310074590" 378 | "377","Run5_236175633364835" 379 | "378","Run4_166469265381213" 380 | "379","Run5_155971444979997" 381 | "380","Run5_120786804751086" 382 | "381","Run5_120864484738990" 383 | "382","Run5_235697669064539" 384 | "383","Run5_120726924413669" 385 | "384","Run5_232450554023270" 386 | "385","Run4_191215678445341" 387 | "386","Run4_191576409230699" 388 | "387","Run5_161462649252574" 389 | "388","Run5_133974590118189" 390 | "389","Run5_199934564223404" 391 | "390","Run4_204765463759213" 392 | -------------------------------------------------------------------------------- /notebooks/manuscript_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "68a5c2f5-9391-4170-b5ea-9df9ad5eafb4", 6 | "metadata": {}, 7 | "source": [ 8 | "# Access and Analyze `scanpy anndata` Objects from a Manuscript\n", 9 | "\n", 10 | "This guide provides steps to access and analyze the `scanpy anndata` objects associated with a recent manuscript. These objects are essential for computational biologists and data scientists working in genomics and related fields. There are three replicates available for download:\n", 11 | "\n", 12 | "- [Replicate 1 (Rep1)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad)\n", 13 | "- [Replicate 2 (Rep2)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad)\n", 14 | "- [Replicate 3 (Rep3)](https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad)\n", 15 | "\n", 16 | "Each `anndata` object contains several elements crucial for comprehensive data analysis:\n", 17 | "\n", 18 | "1. `.X`: Filtered, normalized, and log-transformed count matrix.\n", 19 | "2. `.raw`: Original, filtered raw count matrix.\n", 20 | "3. `.obsm['MAGIC_imputed_data']`: Imputed count matrix using MAGIC algorithm.\n", 21 | "4. `.obsm['tsne']`: t-SNE maps (as presented in the manuscript), generated using scaled diffusion components.\n", 22 | "5. `.obs['clusters']`: Cell clustering information.\n", 23 | "6. `.obs['palantir_pseudotime']`: Cell pseudo-time ordering, as determined by Palantir.\n", 24 | "7. `.obs['palantir_diff_potential']`: Palantir-determined differentiation potential of cells.\n", 25 | "8. `.obsm['palantir_branch_probs']`: Probabilities of cells branching into different lineages, according to Palantir.\n", 26 | "9. `.uns['palantir_branch_probs_cell_types']`: Labels for Palantir branch probabilities.\n", 27 | "10. `.uns['ct_colors']`: Color codes for cell types, as used in the manuscript.\n", 28 | "11. `.uns['cluster_colors']`: Color codes for cell clusters, as used in the manuscript.\n", 29 | "\n", 30 | "## Python Code for Data Access:" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "id": "63f356a7-3856-4596-a7b3-9fc05cc3029a", 37 | "metadata": { 38 | "execution": { 39 | "iopub.execute_input": "2023-11-28T21:20:46.755293Z", 40 | "iopub.status.busy": "2023-11-28T21:20:46.755059Z", 41 | "iopub.status.idle": "2023-11-28T21:20:59.646740Z", 42 | "shell.execute_reply": "2023-11-28T21:20:59.645355Z", 43 | "shell.execute_reply.started": "2023-11-28T21:20:46.755266Z" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "import scanpy as sc\n", 49 | "\n", 50 | "# Read in the data, with backup URLs provided\n", 51 | "adata_Rep1 = sc.read(\n", 52 | " \"../data/human_cd34_bm_rep1.h5ad\",\n", 53 | " backup_url=\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad\",\n", 54 | ")\n", 55 | "adata_Rep2 = sc.read(\n", 56 | " \"../data/human_cd34_bm_rep2.h5ad\",\n", 57 | " backup_url=\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad\",\n", 58 | ")\n", 59 | "adata_Rep3 = sc.read(\n", 60 | " \"../data/human_cd34_bm_rep3.h5ad\",\n", 61 | " backup_url=\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad\",\n", 62 | ")" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 2, 68 | "id": "bee4a735-7c47-415a-b1e3-ee776998dbd5", 69 | "metadata": { 70 | "execution": { 71 | "iopub.execute_input": "2023-11-28T21:20:59.650053Z", 72 | "iopub.status.busy": "2023-11-28T21:20:59.649313Z", 73 | "iopub.status.idle": "2023-11-28T21:20:59.659463Z", 74 | "shell.execute_reply": "2023-11-28T21:20:59.658910Z", 75 | "shell.execute_reply.started": "2023-11-28T21:20:59.650021Z" 76 | } 77 | }, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "AnnData object with n_obs × n_vars = 5780 × 14651\n", 83 | " obs: 'clusters', 'palantir_pseudotime', 'palantir_diff_potential'\n", 84 | " uns: 'cluster_colors', 'ct_colors', 'palantir_branch_probs_cell_types'\n", 85 | " obsm: 'tsne', 'MAGIC_imputed_data', 'palantir_branch_probs'" 86 | ] 87 | }, 88 | "execution_count": 2, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "adata_Rep1" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "id": "515e6760-8f95-42d6-87ba-1a2375797ccf", 101 | "metadata": { 102 | "execution": { 103 | "iopub.execute_input": "2023-11-28T21:20:59.660313Z", 104 | "iopub.status.busy": "2023-11-28T21:20:59.660133Z", 105 | "iopub.status.idle": "2023-11-28T21:20:59.676952Z", 106 | "shell.execute_reply": "2023-11-28T21:20:59.676283Z", 107 | "shell.execute_reply.started": "2023-11-28T21:20:59.660295Z" 108 | } 109 | }, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "AnnData object with n_obs × n_vars = 6501 × 14913\n", 115 | " obs: 'clusters', 'palantir_pseudotime', 'palantir_diff_potential'\n", 116 | " uns: 'cluster_colors', 'ct_colors', 'palantir_branch_probs_cell_types'\n", 117 | " obsm: 'tsne', 'MAGIC_imputed_data', 'palantir_branch_probs'" 118 | ] 119 | }, 120 | "execution_count": 3, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "adata_Rep2" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 4, 132 | "id": "61d7a8e0-0916-4099-8982-5599d7166104", 133 | "metadata": { 134 | "execution": { 135 | "iopub.execute_input": "2023-11-28T21:20:59.678250Z", 136 | "iopub.status.busy": "2023-11-28T21:20:59.677863Z", 137 | "iopub.status.idle": "2023-11-28T21:20:59.691822Z", 138 | "shell.execute_reply": "2023-11-28T21:20:59.691131Z", 139 | "shell.execute_reply.started": "2023-11-28T21:20:59.678220Z" 140 | } 141 | }, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "AnnData object with n_obs × n_vars = 12046 × 14044\n", 147 | " obs: 'clusters', 'palantir_pseudotime', 'palantir_diff_potential'\n", 148 | " uns: 'cluster_colors', 'ct_colors', 'palantir_branch_probs_cell_types'\n", 149 | " obsm: 'tsne', 'MAGIC_imputed_data', 'palantir_branch_probs'" 150 | ] 151 | }, 152 | "execution_count": 4, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "adata_Rep3" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "id": "b057a720-f0f4-40b0-8bcf-02efc9b2124d", 164 | "metadata": { 165 | "execution": { 166 | "iopub.execute_input": "2023-11-28T19:21:40.634650Z", 167 | "iopub.status.busy": "2023-11-28T19:21:40.634039Z", 168 | "iopub.status.idle": "2023-11-28T19:21:40.647637Z", 169 | "shell.execute_reply": "2023-11-28T19:21:40.646498Z", 170 | "shell.execute_reply.started": "2023-11-28T19:21:40.634595Z" 171 | } 172 | }, 173 | "source": [ 174 | "# Converting `anndata` Objects to `Seurat` Objects Using R\n", 175 | "\n", 176 | "For researchers working with R and Seurat, the process to convert `anndata` objects to Seurat objects involves the following steps:\n", 177 | "\n", 178 | "1. **Set Up R Environment and Libraries**:\n", 179 | " - Load the necessary libraries: `Seurat` and `anndata`.\n", 180 | "\n", 181 | "2. **Download and Read the Data**:\n", 182 | " - Use `curl::curl_download` to download the `anndata` from the provided URLs.\n", 183 | " - Read the data using the `read_h5ad` method from the `anndata` library.\n", 184 | "\n", 185 | "3. **Create Seurat Objects**:\n", 186 | " - Use the `CreateSeuratObject` function to convert the data into Seurat objects, incorporating counts and metadata from the `anndata` object.\n", 187 | " - Transfer additional data like tSNE embeddings, imputed gene expressions, and cell fate probabilities into the appropriate slots in the Seurat object.\n", 188 | "\n", 189 | "### R Code Snippet:" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "id": "562d56fb-80dc-4f44-8266-3ca559e79106", 196 | "metadata": { 197 | "jupyter": { 198 | "source_hidden": true 199 | } 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "# this cell only exists to allow running R code inside this python notebook using a conda kernel\n", 204 | "import sys\n", 205 | "import os\n", 206 | "\n", 207 | "# Get the path to the python executable\n", 208 | "python_executable_path = sys.executable\n", 209 | "\n", 210 | "# Extract the path to the environment from the path to the python executable\n", 211 | "env_path = os.path.dirname(os.path.dirname(python_executable_path))\n", 212 | "\n", 213 | "print(\n", 214 | " f\"Conda env path: {env_path}\\n\"\n", 215 | " \"Please make sure you have R installed in the conda environment.\"\n", 216 | ")\n", 217 | "\n", 218 | "os.environ['R_HOME'] = os.path.join(env_path, 'lib', 'R')\n", 219 | "\n", 220 | "%load_ext rpy2.ipython" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 6, 226 | "id": "ed46f119-e8be-45ba-b447-b46e8b947cf8", 227 | "metadata": { 228 | "execution": { 229 | "iopub.execute_input": "2023-11-28T21:21:01.081154Z", 230 | "iopub.status.busy": "2023-11-28T21:21:01.080675Z", 231 | "iopub.status.idle": "2023-11-28T21:23:08.313753Z", 232 | "shell.execute_reply": "2023-11-28T21:23:08.313058Z", 233 | "shell.execute_reply.started": "2023-11-28T21:21:01.081128Z" 234 | } 235 | }, 236 | "outputs": [ 237 | { 238 | "name": "stderr", 239 | "output_type": "stream", 240 | "text": [ 241 | "R[write to console]: Loading required package: SeuratObject\n", 242 | "\n", 243 | "R[write to console]: Loading required package: sp\n", 244 | "\n", 245 | "R[write to console]: \n", 246 | "Attaching package: ‘SeuratObject’\n", 247 | "\n", 248 | "\n", 249 | "R[write to console]: The following object is masked from ‘package:base’:\n", 250 | "\n", 251 | " intersect\n", 252 | "\n", 253 | "\n" 254 | ] 255 | }, 256 | { 257 | "name": "stdout", 258 | "output_type": "stream", 259 | "text": [ 260 | "\n", 261 | " WARNING: The R package \"reticulate\" only fixed recently\n", 262 | " an issue that caused a segfault when used with rpy2:\n", 263 | " https://github.com/rstudio/reticulate/pull/1188\n", 264 | " Make sure that you use a version of that package that includes\n", 265 | " the fix.\n", 266 | " " 267 | ] 268 | }, 269 | { 270 | "name": "stderr", 271 | "output_type": "stream", 272 | "text": [ 273 | "R[write to console]: \n", 274 | "Attaching package: ‘anndata’\n", 275 | "\n", 276 | "\n", 277 | "R[write to console]: The following object is masked from ‘package:SeuratObject’:\n", 278 | "\n", 279 | " Layers\n", 280 | "\n", 281 | "\n", 282 | "R[write to console]: Warning:\n", 283 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 284 | "\n", 285 | "R[write to console]: Warning:\n", 286 | "R[write to console]: Data is of class matrix. Coercing to dgCMatrix.\n", 287 | "\n", 288 | "R[write to console]: Warning:\n", 289 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 290 | "\n", 291 | "R[write to console]: Warning:\n", 292 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 293 | "\n", 294 | "R[write to console]: Warning:\n", 295 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 296 | "\n", 297 | "R[write to console]: Warning:\n", 298 | "R[write to console]: Data is of class matrix. Coercing to dgCMatrix.\n", 299 | "\n", 300 | "R[write to console]: Warning:\n", 301 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 302 | "\n", 303 | "R[write to console]: Warning:\n", 304 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 305 | "\n", 306 | "R[write to console]: Warning:\n", 307 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 308 | "\n", 309 | "R[write to console]: Warning:\n", 310 | "R[write to console]: Data is of class matrix. Coercing to dgCMatrix.\n", 311 | "\n", 312 | "R[write to console]: Warning:\n", 313 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 314 | "\n", 315 | "R[write to console]: Warning:\n", 316 | "R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')\n", 317 | "\n" 318 | ] 319 | } 320 | ], 321 | "source": [ 322 | "%%R\n", 323 | "library(Seurat)\n", 324 | "library(anndata)\n", 325 | "\n", 326 | "create_seurat <- function(url) {\n", 327 | " file_path <- sub(\"https://s3.amazonaws.com/dp-lab-data-public/palantir/\", \"../data/\", url)\n", 328 | " if (!file.exists(file_path)) {\n", 329 | " curl::curl_download(url, file_path)\n", 330 | " }\n", 331 | " data <- read_h5ad(file_path)\n", 332 | " \n", 333 | " seurat_obj <- CreateSeuratObject(\n", 334 | " counts = t(data$X), \n", 335 | " meta.data = data$obs,\n", 336 | " project = \"CD34+ Bone Marrow Cells\"\n", 337 | " )\n", 338 | " tsne_data <- data$obsm[[\"tsne\"]]\n", 339 | " rownames(tsne_data) <- rownames(data$obs)\n", 340 | " colnames(tsne_data) <- c(\"tSNE_1\", \"tSNE_2\")\n", 341 | " seurat_obj[[\"tsne\"]] <- CreateDimReducObject(\n", 342 | " embeddings = tsne_data,\n", 343 | " key = \"tSNE_\"\n", 344 | " )\n", 345 | " imputed_data <- t(data$obsm[[\"MAGIC_imputed_data\"]])\n", 346 | " colnames(imputed_data) <- rownames(data$obs)\n", 347 | " rownames(imputed_data) <- rownames(data$var)\n", 348 | " seurat_obj[[\"MAGIC_imputed\"]] <- CreateAssayObject(counts = imputed_data)\n", 349 | " fate_probs <- as.data.frame(data$obsm[[\"palantir_branch_probs\"]])\n", 350 | " colnames(fate_probs) <- data$uns[[\"palantir_branch_probs_cell_types\"]]\n", 351 | " rownames(fate_probs) <- rownames(data$obs)\n", 352 | " seurat_obj <- AddMetaData(seurat_obj, metadata = fate_probs)\n", 353 | "\n", 354 | " return(seurat_obj)\n", 355 | "}\n", 356 | "\n", 357 | "human_cd34_bm_Rep1 <- create_seurat(\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad\")\n", 358 | "human_cd34_bm_Rep2 <- create_seurat(\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad\")\n", 359 | "human_cd34_bm_Rep3 <- create_seurat(\"https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad\")" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 7, 365 | "id": "a7c8b823-4d18-4252-acc1-4a9f51f929b9", 366 | "metadata": { 367 | "execution": { 368 | "iopub.execute_input": "2023-11-28T21:23:08.315660Z", 369 | "iopub.status.busy": "2023-11-28T21:23:08.315364Z", 370 | "iopub.status.idle": "2023-11-28T21:23:08.361153Z", 371 | "shell.execute_reply": "2023-11-28T21:23:08.360630Z", 372 | "shell.execute_reply.started": "2023-11-28T21:23:08.315642Z" 373 | } 374 | }, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "An object of class Seurat \n", 381 | "29302 features across 5780 samples within 2 assays \n", 382 | "Active assay: RNA (14651 features, 0 variable features)\n", 383 | " 1 layer present: counts\n", 384 | " 1 other assay present: MAGIC_imputed\n", 385 | " 1 dimensional reduction calculated: tsne\n" 386 | ] 387 | } 388 | ], 389 | "source": [ 390 | "%%R\n", 391 | "\n", 392 | "human_cd34_bm_Rep1" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 8, 398 | "id": "094067ac-b251-4e37-8d67-eedc2641b8fa", 399 | "metadata": { 400 | "execution": { 401 | "iopub.execute_input": "2023-11-28T21:23:08.362383Z", 402 | "iopub.status.busy": "2023-11-28T21:23:08.361964Z", 403 | "iopub.status.idle": "2023-11-28T21:23:08.400063Z", 404 | "shell.execute_reply": "2023-11-28T21:23:08.399518Z", 405 | "shell.execute_reply.started": "2023-11-28T21:23:08.362356Z" 406 | } 407 | }, 408 | "outputs": [ 409 | { 410 | "name": "stdout", 411 | "output_type": "stream", 412 | "text": [ 413 | "An object of class Seurat \n", 414 | "29826 features across 6501 samples within 2 assays \n", 415 | "Active assay: RNA (14913 features, 0 variable features)\n", 416 | " 1 layer present: counts\n", 417 | " 1 other assay present: MAGIC_imputed\n", 418 | " 1 dimensional reduction calculated: tsne\n" 419 | ] 420 | } 421 | ], 422 | "source": [ 423 | "%%R\n", 424 | "\n", 425 | "human_cd34_bm_Rep2" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 9, 431 | "id": "6fb000c4-41ee-4147-aba8-08c0e6f7deb5", 432 | "metadata": { 433 | "execution": { 434 | "iopub.execute_input": "2023-11-28T21:23:08.401196Z", 435 | "iopub.status.busy": "2023-11-28T21:23:08.400878Z", 436 | "iopub.status.idle": "2023-11-28T21:23:08.441148Z", 437 | "shell.execute_reply": "2023-11-28T21:23:08.440627Z", 438 | "shell.execute_reply.started": "2023-11-28T21:23:08.401171Z" 439 | } 440 | }, 441 | "outputs": [ 442 | { 443 | "name": "stdout", 444 | "output_type": "stream", 445 | "text": [ 446 | "An object of class Seurat \n", 447 | "28088 features across 12046 samples within 2 assays \n", 448 | "Active assay: RNA (14044 features, 0 variable features)\n", 449 | " 1 layer present: counts\n", 450 | " 1 other assay present: MAGIC_imputed\n", 451 | " 1 dimensional reduction calculated: tsne\n" 452 | ] 453 | } 454 | ], 455 | "source": [ 456 | "%%R\n", 457 | "\n", 458 | "human_cd34_bm_Rep3" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": null, 464 | "id": "e208ff84-85d0-40f7-b08d-9153537b088a", 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [] 468 | } 469 | ], 470 | "metadata": { 471 | "kernelspec": { 472 | "display_name": "da1", 473 | "language": "python", 474 | "name": "da1" 475 | }, 476 | "language_info": { 477 | "codemirror_mode": { 478 | "name": "ipython", 479 | "version": 3 480 | }, 481 | "file_extension": ".py", 482 | "mimetype": "text/x-python", 483 | "name": "python", 484 | "nbconvert_exporter": "python", 485 | "pygments_lexer": "ipython3", 486 | "version": "3.11.5" 487 | }, 488 | "widgets": { 489 | "application/vnd.jupyter.widget-state+json": { 490 | "state": {}, 491 | "version_major": 2, 492 | "version_minor": 0 493 | } 494 | } 495 | }, 496 | "nbformat": 4, 497 | "nbformat_minor": 5 498 | } 499 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=68.0.0", "wheel>=0.40.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "palantir" 7 | version = "1.4.2rc" 8 | description = "Palantir for modeling continuous cell state and cell fate choices in single cell data" 9 | authors = [ 10 | {name = "Palantir development team", email = "manu.talanki@gmail.com"} 11 | ] 12 | readme = "README.md" 13 | requires-python = ">=3.9" 14 | license = {text = "MIT"} 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Programming Language :: Python :: 3.13", 22 | "License :: OSI Approved :: MIT License", 23 | "Operating System :: OS Independent", 24 | "Operating System :: POSIX :: Linux", 25 | "Development Status :: 5 - Production/Stable", 26 | "Topic :: Scientific/Engineering :: Bio-Informatics", 27 | "Topic :: Scientific/Engineering :: Visualization", 28 | ] 29 | dependencies = [ 30 | "numpy>=1.14.2", 31 | "pandas>=0.22.0", 32 | "scipy>=1.3", 33 | "networkx>=2.1", 34 | "scikit-learn", 35 | "joblib", 36 | "matplotlib>=3.8.0", 37 | "anndata>=0.8.0", 38 | "scanpy>=1.6.0", 39 | "mellon>=1.6.1", 40 | "ml_dtypes>=0.5.0", 41 | "igraph>=0.11.8", 42 | ] 43 | 44 | [project.optional-dependencies] 45 | gam = [ 46 | "pygam", 47 | ] 48 | 49 | fcs = [ 50 | "fcsparser>=0.1.2", 51 | ] 52 | 53 | full = [ 54 | "pygam", 55 | "fcsparser>=0.1.2", 56 | ] 57 | 58 | test-base = [ 59 | "pytest>=7.0.0", 60 | "pytest-cov>=4.0.0", 61 | "h5py", 62 | ] 63 | 64 | test = [ 65 | "pytest>=7.0.0", 66 | "pytest-cov>=4.0.0", 67 | "h5py", 68 | "fcsparser>=0.1.2", 69 | ] 70 | 71 | pre313 = [ 72 | "fcsparser>=0.1.2", 73 | ] 74 | 75 | [project.urls] 76 | Homepage = "https://github.com/dpeerlab/palantir" 77 | "Bug Tracker" = "https://github.com/dpeerlab/palantir/issues" 78 | 79 | [tool.setuptools] 80 | package-dir = {"" = "src"} 81 | 82 | [tool.pytest.ini_options] 83 | testpaths = ["tests"] 84 | python_files = "test_*.py" 85 | 86 | [tool.black] 87 | line-length = 100 88 | target-version = ['py39'] 89 | include = '\.pyi?$' 90 | exclude = ''' 91 | /( 92 | \.eggs 93 | | \.git 94 | | \.hg 95 | | \.mypy_cache 96 | | \.tox 97 | | \.venv 98 | | _build 99 | | buck-out 100 | | build 101 | | dist 102 | )/ 103 | ''' 104 | 105 | [tool.coverage.run] 106 | source = ["palantir"] 107 | omit = [ 108 | "tests/*", 109 | "*/config.py" 110 | ] 111 | 112 | [tool.coverage.report] 113 | exclude_lines = [ 114 | "pragma: no cover", 115 | "raise NotImplementedError", 116 | "if __name__ == .__main__.:", 117 | "pass", 118 | "raise ImportError", 119 | "def _return_cell", 120 | "print" 121 | ] 122 | 123 | [project.scripts] 124 | palantir-cli = "palantir.cli:main" 125 | -------------------------------------------------------------------------------- /readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-20.04 5 | tools: 6 | python: "3.9" 7 | 8 | sphinx: 9 | configuration: docs/source/conf.py 10 | 11 | python: 12 | install: 13 | - requirements: docs/requirements.txt 14 | - method: pip 15 | path: . -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from setuptools import setup 3 | 4 | # For backward compatibility, use setup.py as a proxy to pyproject.toml 5 | # All configuration is in pyproject.toml 6 | 7 | setup() 8 | -------------------------------------------------------------------------------- /src/palantir/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Palantir - Modeling continuous cell state and cell fate choices in single cell data. 3 | 4 | Palantir is an algorithm to align cells along differentiation trajectories, identify 5 | differentiation endpoints, and estimate cell-fate probabilities in single-cell data. 6 | The package provides functions for preprocessing, visualization, trajectory analysis, 7 | and gene expression modeling along the trajectories. 8 | 9 | Modules 10 | ------- 11 | config : Configuration settings for Palantir 12 | core : Core functions for running the Palantir algorithm 13 | presults : Class for storing and accessing Palantir results 14 | io : Input/output functions for loading and saving data 15 | preprocess : Preprocessing functions for single-cell data 16 | utils : Utility functions for analysis 17 | plot : Visualization functions 18 | """ 19 | 20 | import importlib.metadata 21 | 22 | from . import config 23 | 24 | # Import modules in a specific order to avoid circular imports 25 | from . import presults 26 | from . import core 27 | from . import io 28 | from . import preprocess 29 | from . import utils 30 | from . import plot 31 | 32 | __version__ = importlib.metadata.version("palantir") 33 | 34 | __all__ = [ 35 | "config", 36 | "core", 37 | "presults", 38 | "io", 39 | "preprocess", 40 | "utils", 41 | "plot", 42 | "__version__", 43 | ] 44 | -------------------------------------------------------------------------------- /src/palantir/cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import subprocess 3 | import argparse 4 | 5 | 6 | def check_python_version(): 7 | """Check Python version and install appropriate dependencies.""" 8 | version = sys.version_info 9 | if version.major == 3 and version.minor < 13: 10 | try: 11 | import pip 12 | subprocess.check_call([ 13 | sys.executable, "-m", "pip", "install", "fcsparser>=0.1.2" 14 | ]) 15 | print("Installed fcsparser for Python < 3.13") 16 | except Exception as e: 17 | print(f"Failed to install fcsparser: {e}") 18 | print("You may need to install it manually with: pip install fcsparser>=0.1.2") 19 | else: 20 | print("Python 3.13+ detected. fcsparser is optional.") 21 | print("If you need FCS file support, install with: pip install palantir[fcs]") 22 | 23 | 24 | def main(): 25 | """Main CLI function.""" 26 | parser = argparse.ArgumentParser( 27 | description="Palantir - Modeling continuous cell state and cell fate choices in single cell data" 28 | ) 29 | parser.add_argument( 30 | "--version", action="store_true", help="Print version information" 31 | ) 32 | parser.add_argument( 33 | "--check-deps", action="store_true", help="Check dependencies and install as needed" 34 | ) 35 | 36 | args = parser.parse_args() 37 | 38 | if args.version: 39 | from palantir.version import __version__ 40 | print(f"Palantir version: {__version__}") 41 | return 42 | 43 | if args.check_deps: 44 | check_python_version() 45 | return 46 | 47 | if len(sys.argv) == 1: 48 | parser.print_help() 49 | 50 | 51 | if __name__ == "__main__": 52 | main() -------------------------------------------------------------------------------- /src/palantir/config.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | 3 | matplotlib.rcParams["figure.dpi"] = 100 4 | matplotlib.rcParams["image.cmap"] = "Spectral_r" 5 | matplotlib.rcParams["axes.spines.bottom"] = "on" 6 | matplotlib.rcParams["axes.spines.top"] = "off" 7 | matplotlib.rcParams["axes.spines.left"] = "on" 8 | matplotlib.rcParams["axes.spines.right"] = "off" 9 | matplotlib.rcParams["figure.figsize"] = [4, 4] 10 | 11 | SELECTED_COLOR = "#377EB8" 12 | DESELECTED_COLOR = "#CFD5E2" 13 | 14 | # This global variable sets the default behaviour for saving pandas.DataFrames 15 | # in AnnData.obsm and AnnData.varm. When set to True, the data is saved as pandas.DataFrame. 16 | SAVE_AS_DF = True 17 | -------------------------------------------------------------------------------- /src/palantir/io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import os.path 4 | import sys 5 | import scanpy as sc 6 | from scipy.io import mmread 7 | import anndata 8 | from typing import Optional, List, Union 9 | 10 | 11 | def _clean_up(df: pd.DataFrame) -> pd.DataFrame: 12 | """ 13 | Remove rows and columns with all zeros from a DataFrame. 14 | 15 | Parameters 16 | ---------- 17 | df : pd.DataFrame 18 | Input DataFrame to clean. 19 | 20 | Returns 21 | ------- 22 | pd.DataFrame 23 | Cleaned DataFrame with rows and columns containing all zeros removed. 24 | """ 25 | df = df.loc[df.sum(axis=1) > 0, :] 26 | df = df.loc[:, df.sum(axis=0) > 0] 27 | return df 28 | 29 | 30 | def from_csv(counts_csv_file: str, delimiter: str = ",") -> pd.DataFrame: 31 | """ 32 | Read gene expression data from a CSV file. 33 | 34 | Parameters 35 | ---------- 36 | counts_csv_file : str 37 | Path to the CSV file containing gene expression data. 38 | delimiter : str, optional 39 | Delimiter used in the CSV file. Default is ','. 40 | 41 | Returns 42 | ------- 43 | pd.DataFrame 44 | Gene expression data with rows as cells and columns as genes. 45 | Cells and genes with zero counts are removed. 46 | """ 47 | # Read in csv file 48 | df = pd.read_csv(counts_csv_file, sep=delimiter, index_col=0) 49 | clean_df = _clean_up(df) 50 | return clean_df 51 | 52 | 53 | def from_mtx(mtx_file: str, gene_name_file: str) -> pd.DataFrame: 54 | """ 55 | Read gene expression data from a Matrix Market format file. 56 | 57 | Parameters 58 | ---------- 59 | mtx_file : str 60 | Path to the Matrix Market file containing gene expression data. 61 | gene_name_file : str 62 | Path to the file containing gene names, one per line. 63 | 64 | Returns 65 | ------- 66 | pd.DataFrame 67 | Gene expression data with rows as cells and columns as genes. 68 | Cells and genes with zero counts are removed. 69 | """ 70 | # Read in mtx file 71 | count_matrix = mmread(mtx_file) 72 | 73 | gene_names = np.loadtxt(gene_name_file, dtype=np.dtype("S")) 74 | gene_names = np.array([gene.decode("utf-8") for gene in gene_names]) 75 | 76 | # Convert to dense format 77 | df = pd.DataFrame(count_matrix.todense(), columns=gene_names) 78 | 79 | return _clean_up(df) 80 | 81 | 82 | def from_10x(data_dir: Optional[str], use_ensemble_id: bool = True) -> pd.DataFrame: 83 | """ 84 | Load data from 10X Genomics format. 85 | 86 | Parameters 87 | ---------- 88 | data_dir : Optional[str] 89 | Directory containing the 10X Genomics output files: 90 | matrix.mtx, genes.tsv, and barcodes.tsv. 91 | If None, the current directory is used. 92 | use_ensemble_id : bool, optional 93 | If True, use Ensembl IDs as gene identifiers. 94 | If False, use gene symbols. Default is True. 95 | 96 | Returns 97 | ------- 98 | pd.DataFrame 99 | Gene expression data with rows as cells and columns as genes. 100 | Cells and genes with zero counts are removed. 101 | """ 102 | # loads 10x sparse format data 103 | if data_dir is None: 104 | data_dir = "./" 105 | elif data_dir[len(data_dir) - 1] != "/": 106 | data_dir = data_dir + "/" 107 | 108 | filename_dataMatrix = os.path.expanduser(data_dir + "matrix.mtx") 109 | filename_genes = os.path.expanduser(data_dir + "genes.tsv") 110 | filename_cells = os.path.expanduser(data_dir + "barcodes.tsv") 111 | 112 | # Read in gene expression matrix (sparse matrix) 113 | # Rows = genes, columns = cells 114 | dataMatrix = mmread(filename_dataMatrix) 115 | 116 | # Read in row names (gene names / IDs) 117 | gene_names = np.loadtxt(filename_genes, delimiter="\t", dtype=bytes).astype(str) 118 | if use_ensemble_id: 119 | gene_names = [gene[0] for gene in gene_names] 120 | else: 121 | gene_names = [gene[1] for gene in gene_names] 122 | cell_names = np.loadtxt(filename_cells, delimiter="\t", dtype=bytes).astype(str) 123 | 124 | dataMatrix = pd.DataFrame(dataMatrix.todense(), columns=cell_names, index=gene_names) 125 | 126 | # combine duplicate genes 127 | if not use_ensemble_id: 128 | dataMatrix = dataMatrix.groupby(dataMatrix.index).sum() 129 | dataMatrix = dataMatrix.transpose() 130 | 131 | return _clean_up(dataMatrix) 132 | 133 | 134 | def from_10x_HDF5(filename: str, genome: Optional[str] = None) -> pd.DataFrame: 135 | """ 136 | Load data from 10X Genomics HDF5 format. 137 | 138 | Parameters 139 | ---------- 140 | filename : str 141 | Path to the HDF5 file containing 10X Genomics data. 142 | genome : Optional[str], optional 143 | Name of the genome to load. If None, the first genome is used. 144 | 145 | Returns 146 | ------- 147 | pd.DataFrame 148 | Gene expression data with rows as cells and columns as genes. 149 | Cells and genes with zero counts are removed. 150 | """ 151 | ad = sc.read_10x_h5(filename, genome=genome, gex_only=True) 152 | 153 | dataMatrix = pd.DataFrame(ad.X.todense(), columns=ad.var_names, index=ad.obs_names) 154 | 155 | return _clean_up(dataMatrix) 156 | 157 | 158 | def from_fcs( 159 | cls, 160 | fcs_file: str, 161 | cofactor: float = 5, 162 | metadata_channels: List[str] = [ 163 | "Time", 164 | "Event_length", 165 | "DNA1", 166 | "DNA2", 167 | "Cisplatin", 168 | "beadDist", 169 | "bead1", 170 | ], 171 | ) -> pd.DataFrame: 172 | """ 173 | Load data from Flow Cytometry Standard (FCS) format. 174 | 175 | Parameters 176 | ---------- 177 | cls : object 178 | Class instance (unused, kept for compatibility). 179 | fcs_file : str 180 | Path to the FCS file to load. 181 | cofactor : float, optional 182 | Cofactor for arcsinh transformation. Default is 5. 183 | metadata_channels : List[str], optional 184 | List of metadata channel names to exclude from the returned data. 185 | 186 | Returns 187 | ------- 188 | pd.DataFrame 189 | Processed cytometry data with metadata channels removed and 190 | optionally transformed using arcsinh. 191 | 192 | Notes 193 | ----- 194 | This function requires the fcsparser package to be installed. 195 | If not installed, it will raise an ImportError with instructions. 196 | """ 197 | try: 198 | import fcsparser 199 | except ImportError: 200 | raise ImportError( 201 | "The fcsparser package is required for reading FCS files. " 202 | "Please install it with: pip install fcsparser" 203 | ) 204 | # Parse the fcs file 205 | text, data = fcsparser.parse(fcs_file) 206 | # Use view instead of newbyteorder for NumPy 2.0 compatibility 207 | data = data.astype(np.float64, copy=False) 208 | 209 | # Metadata and data 210 | metadata_channels = data.columns.intersection(metadata_channels) 211 | data_channels = data.columns.difference(metadata_channels) 212 | # metadata = data[metadata_channels] 213 | data = data[data_channels] 214 | 215 | # Transform if necessary 216 | if cofactor is not None or cofactor > 0: 217 | data = np.arcsinh(np.divide(data, cofactor)) 218 | 219 | return data 220 | -------------------------------------------------------------------------------- /src/palantir/plot_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for plotting in Palantir 3 | """ 4 | 5 | from typing import Optional, Union, Dict, List, Tuple, Any, Callable 6 | import numpy as np 7 | import pandas as pd 8 | import matplotlib 9 | import matplotlib.pyplot as plt 10 | from matplotlib.colors import Normalize 11 | from matplotlib.axes import Axes 12 | from mpl_toolkits.axes_grid1 import make_axes_locatable 13 | import contextlib 14 | import logging 15 | 16 | 17 | @contextlib.contextmanager 18 | def no_mellon_log_messages(): 19 | # Import mellon locally to avoid JAX fork warnings in other parts of the code 20 | import mellon 21 | current_level = mellon.logger.level 22 | mellon.logger.setLevel(logging.ERROR) 23 | try: 24 | yield 25 | finally: 26 | mellon.logger.setLevel(current_level) 27 | 28 | def _scatter_with_colorbar( 29 | ax: Axes, 30 | x: np.ndarray, 31 | y: np.ndarray, 32 | c: np.ndarray, 33 | colorbar_label: Optional[str] = None, 34 | s: float = 5, 35 | cmap: Union[str, matplotlib.colors.Colormap] = "viridis", 36 | norm: Optional[Normalize] = None, 37 | alpha: float = 1.0, 38 | **kwargs, 39 | ) -> Tuple[Axes, matplotlib.colorbar.Colorbar]: 40 | """Helper function to create scatter plot with colorbar. 41 | 42 | Parameters 43 | ---------- 44 | ax : Axes 45 | Matplotlib axes object to plot on. 46 | x : np.ndarray 47 | X-coordinates for scatter plot. 48 | y : np.ndarray 49 | Y-coordinates for scatter plot. 50 | c : np.ndarray 51 | Values for color mapping. 52 | colorbar_label : str, optional 53 | Label for the colorbar. Default is None. 54 | s : float, optional 55 | Size of scatter points. Default is 5. 56 | cmap : str or matplotlib.colors.Colormap, optional 57 | Colormap for the scatter plot. Default is 'viridis'. 58 | norm : Normalize, optional 59 | Normalization for colormap. Default is None. 60 | alpha : float, optional 61 | Transparency of scatter points. Default is 1.0. 62 | **kwargs : dict 63 | Additional keyword arguments to pass to plt.scatter. 64 | 65 | Returns 66 | ------- 67 | Tuple[Axes, matplotlib.colorbar.Colorbar] 68 | The axes object and the colorbar object. 69 | """ 70 | sc = ax.scatter(x, y, c=c, s=s, cmap=cmap, norm=norm, alpha=alpha, **kwargs) 71 | divider = make_axes_locatable(ax) 72 | cax = divider.append_axes("right", size="5%", pad=0.05) 73 | cbar = plt.colorbar(sc, cax=cax, orientation="vertical") 74 | if colorbar_label: 75 | cbar.set_label(colorbar_label) 76 | return ax, cbar 77 | 78 | 79 | def _highlight_cells( 80 | ax: Axes, 81 | x: np.ndarray, 82 | y: np.ndarray, 83 | mask: np.ndarray, 84 | deselected_color: str = "lightgray", 85 | selected_color: str = "crimson", 86 | s_selected: float = 10, 87 | s_deselected: float = 3, 88 | alpha_deselected: float = 0.5, 89 | alpha_selected: float = 1.0, 90 | **kwargs, 91 | ) -> Axes: 92 | """Helper function to highlight cells in scatter plot based on mask. 93 | 94 | Parameters 95 | ---------- 96 | ax : Axes 97 | Matplotlib axes object to plot on. 98 | x : np.ndarray 99 | X-coordinates for scatter plot. 100 | y : np.ndarray 101 | Y-coordinates for scatter plot. 102 | mask : np.ndarray 103 | Boolean mask for selecting cells to highlight. 104 | deselected_color : str, optional 105 | Color for non-highlighted cells. Default is "lightgray". 106 | selected_color : str, optional 107 | Color for highlighted cells. Default is "crimson". 108 | s_selected : float, optional 109 | Size of highlighted scatter points. Default is 10. 110 | s_deselected : float, optional 111 | Size of non-highlighted scatter points. Default is 3. 112 | alpha_deselected : float, optional 113 | Transparency of non-highlighted cells. Default is 0.5. 114 | alpha_selected : float, optional 115 | Transparency of highlighted cells. Default is 1.0. 116 | **kwargs : dict 117 | Additional keyword arguments to pass to plt.scatter. 118 | 119 | Returns 120 | ------- 121 | Axes 122 | The modified axes object. 123 | """ 124 | ax.scatter( 125 | x[~mask], 126 | y[~mask], 127 | c=deselected_color, 128 | s=s_deselected, 129 | alpha=alpha_deselected, 130 | label="Other Cells", 131 | **kwargs, 132 | ) 133 | ax.scatter( 134 | x[mask], 135 | y[mask], 136 | c=selected_color, 137 | s=s_selected, 138 | alpha=alpha_selected, 139 | label="Selected Cells", 140 | **kwargs, 141 | ) 142 | return ax 143 | 144 | 145 | def _add_legend( 146 | ax: Axes, 147 | handles: Optional[List] = None, 148 | labels: Optional[List[str]] = None, 149 | loc: str = "best", 150 | title: Optional[str] = None, 151 | **kwargs, 152 | ) -> matplotlib.legend.Legend: 153 | """Helper function to add legend to plot. 154 | 155 | Parameters 156 | ---------- 157 | ax : Axes 158 | Matplotlib axes object to add legend to. 159 | handles : List, optional 160 | List of artists (lines, patches) to be added to the legend. Default is None. 161 | labels : List[str], optional 162 | List of labels for the legend. Default is None. 163 | loc : str, optional 164 | Location of the legend. Default is "best". 165 | title : str, optional 166 | Title for the legend. Default is None. 167 | **kwargs : dict 168 | Additional keyword arguments to pass to ax.legend(). 169 | 170 | Returns 171 | ------- 172 | matplotlib.legend.Legend 173 | The legend object. 174 | """ 175 | if handles is not None and labels is not None: 176 | legend = ax.legend(handles, labels, loc=loc, title=title, **kwargs) 177 | else: 178 | legend = ax.legend(loc=loc, title=title, **kwargs) 179 | return legend 180 | 181 | 182 | def _setup_axes( 183 | figsize: Tuple[float, float] = (6, 6), 184 | ax: Optional[Axes] = None, 185 | fig: Optional[plt.Figure] = None, 186 | **kwargs, 187 | ) -> Tuple[plt.Figure, Axes]: 188 | """Helper function to set up figure and axes for plotting. 189 | 190 | Parameters 191 | ---------- 192 | figsize : Tuple[float, float], optional 193 | Size of the figure (width, height) in inches. Default is (6, 6). 194 | ax : Axes, optional 195 | Existing axes to plot on. Default is None. 196 | fig : Figure, optional 197 | Existing figure to plot on. Default is None. 198 | **kwargs : dict 199 | Additional keyword arguments to pass to plt.subplots(). 200 | 201 | Returns 202 | ------- 203 | Tuple[plt.Figure, Axes] 204 | The figure and axes objects. 205 | """ 206 | if ax is None: 207 | fig, ax = plt.subplots(figsize=figsize, **kwargs) 208 | elif fig is None: 209 | fig = ax.figure 210 | return fig, ax 211 | 212 | 213 | def _get_palantir_fates_colors( 214 | ad, 215 | fate_names: List[str], 216 | palantir_fates_colors: Optional[Union[List[str], Dict[str, str]]] = None 217 | ) -> Dict[str, str]: 218 | """ 219 | Generate or update the mapping from branch names to colors. 220 | 221 | This utility checks if ad.uns already contains predefined colors. 222 | Then, if the `palantir_fates_colors` parameter is provided, its values are merged 223 | (with user-specified colors taking precedence). For any missing branch the function 224 | generates a new color ensuring that no color is used twice. 225 | 226 | Parameters 227 | ---------- 228 | ad : AnnData 229 | The annotated data object from which .uns will be checked. 230 | fate_names : list of str 231 | List of branch (fate) names. 232 | palantir_fates_colors : dict or list or None, optional 233 | If a dict, keys should be branch names with a color for each. 234 | If a list, its order is assumed to correspond to fate_names. 235 | If None, only the predefined colors (if any) and generated defaults are used. 236 | 237 | Returns 238 | ------- 239 | dict 240 | Mapping from branch names to colors. 241 | """ 242 | # Get any predefined colors stored in ad.uns. 243 | predefined = {} 244 | if "palantir_fates_colors" in ad.uns: 245 | predefined = ad.uns["palantir_fates_colors"] 246 | 247 | # Process user-provided colors from argument. 248 | provided = {} 249 | if palantir_fates_colors is not None: 250 | if isinstance(palantir_fates_colors, dict): 251 | provided = palantir_fates_colors 252 | elif isinstance(palantir_fates_colors, list): 253 | if len(palantir_fates_colors) < len(fate_names): 254 | raise ValueError("Provided color list length is less than the number of branch names.") 255 | provided = {name: clr for name, clr in zip(fate_names, palantir_fates_colors)} 256 | else: 257 | raise TypeError("palantir_fates_colors must be a dict, list, or None.") 258 | 259 | # Merge: user-provided takes precedence, then predefined. 260 | mapping = {} 261 | for branch in fate_names: 262 | if branch in provided: 263 | mapping[branch] = provided[branch] 264 | elif branch in predefined: 265 | mapping[branch] = predefined[branch] 266 | 267 | # Collect already used colors to exclude duplicates. 268 | used_colors = set(mapping.values()) 269 | 270 | # Generate colors for missing branches. 271 | missing = [branch for branch in fate_names if branch not in mapping] 272 | if missing: 273 | # Get the default color cycle. 274 | default_cycle = plt.rcParams['axes.prop_cycle'].by_key().get( 275 | 'color', ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'] 276 | ) 277 | # Create a generator that skips colors already used. 278 | def color_generator(exclude): 279 | for clr in default_cycle: 280 | if clr not in exclude: 281 | yield clr 282 | hex_digits = np.array(list("0123456789ABCDEF")) 283 | # If default cycle is exhausted, generate random colors. 284 | while True: 285 | new_color = "#" + "".join(np.random.choice(hex_digits, size=6)) 286 | if new_color not in exclude: 287 | yield new_color 288 | 289 | gen = color_generator(used_colors) 290 | for branch in missing: 291 | new_color = next(gen) 292 | mapping[branch] = new_color 293 | used_colors.add(new_color) 294 | 295 | return mapping 296 | 297 | 298 | def _plot_arrows(x, y, n=5, ax=None, arrowprops=dict(), arrow_zorder=2, head_offset=0.0, **kwargs): 299 | """ 300 | Helper function to plot arrows on a trajectory line. 301 | 302 | The new 'head_offset' parameter (as a fraction of the segment length) 303 | moves the arrow head slightly forward. 304 | 305 | Parameters 306 | ---------- 307 | x, y : array-like 308 | Coordinates of the trajectory points. 309 | n : int, optional 310 | Number of arrows to plot. Defaults to 5. 311 | ax : matplotlib.axes.Axes, optional 312 | Axes to plot on. 313 | arrowprops : dict, optional 314 | Properties for the arrow style. 315 | arrow_zorder : int, optional 316 | zorder level for both the line and arrow annotations. 317 | head_offset : float, optional 318 | Fraction of the segment length to move the arrow head forward. 319 | **kwargs : 320 | Extra keyword arguments passed to the plot function. 321 | 322 | Returns 323 | ------- 324 | matplotlib.axes.Axes 325 | The axis with the arrows plotted. 326 | """ 327 | if ax is None: 328 | fig, ax = plt.subplots() 329 | 330 | default_kwargs = {"color": "black", "zorder": arrow_zorder} 331 | default_kwargs.update(kwargs) 332 | 333 | # Plot the trajectory line. 334 | ax.plot(x, y, **default_kwargs) 335 | 336 | if n <= 0: 337 | return ax 338 | 339 | default_arrowprops = dict(arrowstyle="->", lw=1, mutation_scale=20) 340 | default_arrowprops["color"] = default_kwargs.get("color", "black") 341 | default_arrowprops.update(arrowprops) 342 | 343 | total_points = len(x) 344 | section_length = total_points // n 345 | 346 | for i in range(n): 347 | idx = total_points - i * section_length 348 | if idx < 2: 349 | break 350 | # Compute the vector from the previous point to the arrow head. 351 | dx = x[idx - 1] - x[idx - 2] 352 | dy = y[idx - 1] - y[idx - 2] 353 | norm = (dx**2 + dy**2) ** 0.5 354 | # Compute the forward offset. 355 | if norm != 0: 356 | offset_dx = head_offset * dx / norm 357 | offset_dy = head_offset * dy / norm 358 | else: 359 | offset_dx = offset_dy = 0 360 | # Adjust the arrow head coordinates. 361 | target = (x[idx - 1] + offset_dx, y[idx - 1] + offset_dy) 362 | 363 | ax.annotate( 364 | "", 365 | xy=target, 366 | xytext=(x[idx - 2], y[idx - 2]), 367 | arrowprops=default_arrowprops, 368 | zorder=arrow_zorder, 369 | ) 370 | return ax -------------------------------------------------------------------------------- /src/palantir/preprocess.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for preprocessing of single cell RNA-seq counts 3 | """ 4 | 5 | import numpy as np 6 | import anndata 7 | from scipy.sparse import issparse 8 | 9 | 10 | def filter_counts_data(data, cell_min_molecules=1000, genes_min_cells=10): 11 | """Remove low molecule count cells and low detection genes 12 | 13 | :param data: Counts matrix: Cells x Genes 14 | :param cell_min_molecules: Minimum number of molecules per cell 15 | :param genes_min_cells: Minimum number of cells in which a gene is detected 16 | :return: Filtered counts matrix 17 | """ 18 | 19 | # Molecule and cell counts 20 | ms = data.sum(axis=1) 21 | cs = data.sum() 22 | 23 | # Filter 24 | return data.loc[ms.index[ms > cell_min_molecules], cs.index[cs > genes_min_cells]] 25 | 26 | 27 | def normalize_counts(data): 28 | """Correct the counts for molecule count variability 29 | 30 | :param data: Counts matrix: Cells x Genes 31 | :return: Normalized matrix 32 | """ 33 | ms = data.sum(axis=1) 34 | norm_df = data.div(ms, axis=0).mul(np.median(ms), axis=0) 35 | return norm_df 36 | 37 | 38 | def log_transform(data, pseudo_count=0.1): 39 | """Log transform the matrix 40 | 41 | :param data: Counts matrix: Cells x Genes or Anndata object 42 | :return: Log transformed matrix 43 | """ 44 | if isinstance(data, anndata.AnnData): 45 | if issparse(data.X): 46 | data.X.data = np.log2(data.X.data + pseudo_count) - np.log2(pseudo_count) 47 | else: 48 | data.X = np.log2(data.X + pseudo_count) - np.log2(pseudo_count) 49 | else: 50 | return np.log2(data + pseudo_count) 51 | -------------------------------------------------------------------------------- /src/palantir/validation.py: -------------------------------------------------------------------------------- 1 | from typing import Union, List, Dict 2 | import numpy as np 3 | import pandas as pd 4 | import scanpy as sc 5 | from anndata import AnnData 6 | 7 | 8 | def _validate_obsm_key(ad, key, as_df=True): 9 | """ 10 | Validates and retrieves the data associated with a specified key from the provided AnnData object. 11 | 12 | Parameters 13 | ---------- 14 | ad : AnnData 15 | The annotated data matrix from which the data is to be retrieved. 16 | key : str 17 | The key for accessing the data from the AnnData object's obsm. 18 | as_df : bool, optional 19 | If True, the data will be returned as pandas DataFrame with pseudotime as column names. 20 | If False, the data will be returned as numpy array. 21 | Default is True. 22 | 23 | Returns 24 | ------- 25 | data : pd.DataFrame 26 | A DataFrame containing the data associated with the specified key. 27 | data_names : List[str] 28 | A list of column names for the DataFrame. 29 | 30 | Raises 31 | ------ 32 | KeyError 33 | If the key or its corresponding columns are not found in the AnnData object. 34 | """ 35 | if key not in ad.obsm: 36 | raise KeyError(f"{key} not found in ad.obsm") 37 | data = ad.obsm[key] 38 | if not isinstance(data, pd.DataFrame): 39 | if key + "_columns" not in ad.uns: 40 | raise KeyError( 41 | f"{key}_columns not found in ad.uns and ad.obsm[key] is not a DataFrame." 42 | ) 43 | data_names = list(ad.uns[key + "_columns"]) 44 | if as_df: 45 | data = pd.DataFrame(data, columns=data_names, index=ad.obs_names) 46 | else: 47 | data_names = list(data.columns) 48 | if not as_df: 49 | data = data.values 50 | return data, data_names 51 | 52 | 53 | def _validate_varm_key(ad, key, as_df=True): 54 | """ 55 | Validates and retrieves the data associated with a specified key from the provided AnnData object's varm attribute. 56 | 57 | Parameters 58 | ---------- 59 | ad : AnnData 60 | The annotated data matrix from which the data is to be retrieved. 61 | key : str 62 | The key for accessing the data from the AnnData object's varm. 63 | as_df : bool, optional 64 | If True, the trends will be returned as pandas DataFrame with pseudotime as column names. 65 | If False, the trends will be returned as numpy array. 66 | Default is True. 67 | 68 | Returns 69 | ------- 70 | data : Union[pd.DataFrame, np.ndarray] 71 | A DataFrame or numpy array containing the data associated with the specified key. 72 | data_names : np.ndarray 73 | A an array of pseudotimes. 74 | 75 | Raises 76 | ------ 77 | KeyError 78 | If the key or its corresponding columns are not found in the AnnData object. 79 | """ 80 | if key not in ad.varm: 81 | raise KeyError(f"{key} not found in ad.varm") 82 | data = ad.varm[key] 83 | if not isinstance(data, pd.DataFrame): 84 | if key + "_pseudotime" not in ad.uns: 85 | raise KeyError( 86 | f"{key}_pseudotime not found in ad.uns and ad.varm[key] is not a DataFrame." 87 | ) 88 | data_names = np.array(ad.uns[key + "_pseudotime"]) 89 | if as_df: 90 | data = pd.DataFrame(data, columns=data_names, index=ad.var_names) 91 | else: 92 | data_names = np.array(data.columns.astype(float)) 93 | if not as_df: 94 | data = data.values 95 | return data, data_names 96 | 97 | 98 | def _validate_gene_trend_input( 99 | data: Union[AnnData, Dict], 100 | gene_trend_key: str = "gene_trends", 101 | branch_names: Union[str, List[str]] = "branch_masks", 102 | ) -> Dict: 103 | """ 104 | Validates the input for gene trend plots, and converts it into a dictionary of gene trends. 105 | 106 | Parameters 107 | ---------- 108 | data : Union[AnnData, Dict] 109 | An AnnData object or a dictionary containing gene trends. 110 | gene_trend_key : str, optional 111 | Key to access gene trends in the varm of the AnnData object. Default is 'gene_trends'. 112 | branch_names : Union[str, List[str]], optional 113 | Key to retrieve branch names from the AnnData object or a list of branch names. If a string is provided, 114 | it is assumed to be a key in AnnData.uns. Default is 'branch_masks'. 115 | 116 | Returns 117 | ------- 118 | gene_trends : Dict 119 | A dictionary containing gene trends. 120 | 121 | Raises 122 | ------ 123 | KeyError 124 | If 'branch_names' is a string that is not found in .uns, or if 'gene_trend_key + "_" + branch_name' 125 | is not found in .varm. 126 | ValueError 127 | If 'data' is neither an AnnData object nor a dictionary. 128 | """ 129 | if isinstance(data, AnnData): 130 | if isinstance(branch_names, str): 131 | if branch_names in data.uns.keys(): 132 | branch_names = data.uns[branch_names] 133 | elif branch_names in data.obsm.keys() and isinstance( 134 | data.obsm[branch_names], pd.DataFrame 135 | ): 136 | branch_names = list(data.obsm[branch_names].columns) 137 | elif branch_names + "_columns" in data.uns.keys(): 138 | branch_names = data.uns[branch_names + "_columns"] 139 | else: 140 | raise KeyError( 141 | f"The provided key '{branch_names}' is not found in AnnData.uns or as a DataFrame in AnnData.obsm. " 142 | "Please ensure the 'branch_names' either exists in AnnData.uns or is a list of branch names." 143 | ) 144 | 145 | gene_trends = dict() 146 | for branch in branch_names: 147 | trends, pt_grid = _validate_varm_key(data, gene_trend_key + "_" + branch) 148 | gene_trends[branch] = {"trends": trends} 149 | elif isinstance(data, Dict): 150 | gene_trends = data 151 | else: 152 | raise ValueError( 153 | "The input 'data' must be an instance of either AnnData object or dictionary." 154 | ) 155 | 156 | return gene_trends 157 | -------------------------------------------------------------------------------- /src/palantir/version.py: -------------------------------------------------------------------------------- 1 | """Version information.""" 2 | import importlib.metadata 3 | 4 | try: 5 | # Get version from pyproject.toml via package metadata 6 | __version__ = importlib.metadata.version("palantir") 7 | except importlib.metadata.PackageNotFoundError: 8 | # Package is not installed, fall back to hardcoded version 9 | __version__ = "1.4.2rc" # Should match pyproject.toml 10 | 11 | __author__ = "Palantir development team" 12 | __author_email__ = "manu.talanki@gmail.com" -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import scanpy as sc 4 | from anndata import AnnData 5 | import numpy as np 6 | from scipy.sparse import csr_matrix 7 | from anndata import AnnData 8 | 9 | 10 | @pytest.fixture 11 | def example_dataframe(): 12 | # Create an example dataframe for testing 13 | return pd.DataFrame( 14 | [[1, 2, 0, 4], [0, 0, 0, 0], [3, 0, 0, 0]], 15 | columns=["A", "B", "C", "D"], 16 | index=["X", "Y", "Z"], 17 | ) 18 | 19 | 20 | @pytest.fixture 21 | def mock_data(): 22 | n_cells = 50 23 | n_genes = 10 24 | return pd.DataFrame( 25 | np.random.rand(n_cells, n_genes), 26 | columns=[f"gene_{i}" for i in range(n_genes)], 27 | index=[f"cell_{i}" for i in range(n_cells)], 28 | ) 29 | 30 | 31 | @pytest.fixture 32 | def mock_anndata(mock_data): 33 | ad = AnnData(X=mock_data) 34 | ad.obsm["X_pca"] = mock_data 35 | ad.obsm["DM_EigenVectors_multiscaled"] = mock_data 36 | return ad 37 | 38 | 39 | @pytest.fixture 40 | def mock_tsne(): 41 | n_cells = 50 42 | return pd.DataFrame( 43 | np.random.rand(n_cells, 2), 44 | columns=["tSNE1", "tSNE2"], 45 | index=[f"cell_{i}" for i in range(n_cells)], 46 | ) 47 | 48 | 49 | @pytest.fixture 50 | def mock_umap_df(): 51 | n_cells = 50 52 | return pd.DataFrame( 53 | np.random.rand(n_cells, 2), 54 | columns=["UMAP1", "UMAP2"], 55 | index=[f"cell_{i}" for i in range(n_cells)], 56 | ) 57 | 58 | 59 | @pytest.fixture 60 | def mock_gene_data(): 61 | n_cells = 50 62 | n_genes = 5 63 | return pd.DataFrame( 64 | np.random.rand(n_cells, n_genes), 65 | columns=[f"gene_{i}" for i in range(n_genes)], 66 | index=[f"cell_{i}" for i in range(n_cells)], 67 | ) 68 | 69 | 70 | @pytest.fixture 71 | def mock_dm_res(): 72 | n_cells = 50 73 | n_components = 10 74 | return { 75 | "EigenVectors": pd.DataFrame( 76 | np.random.rand(n_cells, n_components), 77 | columns=[f"DC_{i}" for i in range(n_components)], 78 | index=[f"cell_{i}" for i in range(n_cells)], 79 | ), 80 | "EigenValues": np.random.rand(n_components), 81 | } 82 | 83 | 84 | @pytest.fixture 85 | def mock_clusters(): 86 | n_cells = 50 87 | return pd.Series( 88 | np.random.randint(0, 5, n_cells), 89 | index=[f"cell_{i}" for i in range(n_cells)], 90 | ) 91 | 92 | 93 | @pytest.fixture 94 | def mock_gene_trends(): 95 | n_bins = 25 96 | n_genes = 5 97 | return pd.DataFrame( 98 | np.random.rand(n_bins, n_genes), 99 | columns=[f"gene_{i}" for i in range(n_genes)], 100 | index=np.linspace(0, 1, n_bins), 101 | ) 102 | -------------------------------------------------------------------------------- /tests/core_run_palantir.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import scanpy as sc 4 | import numpy as np 5 | 6 | from palantir.presults import PResults 7 | from palantir.core import run_palantir 8 | 9 | 10 | @pytest.fixture 11 | def mock_data(): 12 | n_cells = 50 13 | n_genes = 10 14 | return pd.DataFrame( 15 | np.random.rand(n_cells, n_genes), 16 | columns=[f"gene_{i}" for i in range(n_genes)], 17 | index=[f"cell_{i}" for i in range(n_cells)], 18 | ) 19 | 20 | 21 | @pytest.fixture 22 | def mock_anndata(mock_data): 23 | ad = sc.AnnData(X=mock_data) 24 | ad.obsm["DM_EigenVectors_multiscaled"] = mock_data 25 | return ad 26 | 27 | 28 | # Test with basic DataFrame input 29 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.") 30 | @pytest.mark.filterwarnings( 31 | "ignore:Changing the sparsity structure of a csr_matrix is expensive." 32 | ) 33 | def test_palantir_dataframe(mock_data): 34 | result = run_palantir(mock_data, "cell_0") 35 | assert isinstance(result, PResults), "Should return a PResults object" 36 | 37 | 38 | # Test with basic AnnData input 39 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.") 40 | @pytest.mark.filterwarnings( 41 | "ignore:Changing the sparsity structure of a csr_matrix is expensive." 42 | ) 43 | def test_palantir_anndata(mock_anndata): 44 | run_palantir(mock_anndata, "cell_0") 45 | assert ( 46 | "palantir_pseudotime" in mock_anndata.obs.keys() 47 | ), "Pseudotime key missing in AnnData object" 48 | assert ( 49 | "palantir_entropy" in mock_anndata.obs.keys() 50 | ), "Entropy key missing in AnnData object" 51 | assert ( 52 | "palantir_fate_probabilities" in mock_anndata.obsm.keys() 53 | ), "Fate probability key missing in AnnData object" 54 | assert ( 55 | "palantir_waypoints" in mock_anndata.uns.keys() 56 | ), "Waypoint key missing in AnnData object" 57 | 58 | 59 | # Test terminal states 60 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.") 61 | @pytest.mark.filterwarnings( 62 | "ignore:Changing the sparsity structure of a csr_matrix is expensive." 63 | ) 64 | def test_palantir_terminal_states(mock_data): 65 | result = run_palantir(mock_data, "cell_0", terminal_states=["cell_1", "cell_2"]) 66 | assert "cell_1" in result.branch_probs.columns, "Terminal state cell_1 missing" 67 | assert "cell_2" in result.branch_probs.columns, "Terminal state cell_2 missing" 68 | 69 | 70 | # Test scaling components 71 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.") 72 | @pytest.mark.filterwarnings( 73 | "ignore:Changing the sparsity structure of a csr_matrix is expensive." 74 | ) 75 | def test_scaling_components(mock_data): 76 | result1 = run_palantir(mock_data, "cell_0", scale_components=True) 77 | result2 = run_palantir(mock_data, "cell_0", scale_components=False) 78 | assert not np.array_equal( 79 | result1.pseudotime, result2.pseudotime 80 | ), "Scaling components should affect pseudotime" 81 | 82 | 83 | # Test for invalid knn 84 | def test_invalid_knn(mock_data): 85 | with pytest.raises(ValueError): 86 | run_palantir(mock_data, "cell_0", knn=0) 87 | -------------------------------------------------------------------------------- /tests/presults.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import palantir 4 | 5 | 6 | def test_PResults(): 7 | # Create some dummy data 8 | pseudotime = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) 9 | entropy = None 10 | branch_probs = pd.DataFrame( 11 | {"branch1": [0.1, 0.2, 0.3, 0.4, 0.5], "branch2": [0.5, 0.4, 0.3, 0.2, 0.1]} 12 | ) 13 | waypoints = None 14 | 15 | # Initialize PResults object 16 | presults = palantir.presults.PResults(pseudotime, entropy, branch_probs, waypoints) 17 | 18 | # Asserts to check attributes 19 | assert np.array_equal(presults.pseudotime, pseudotime) 20 | assert presults.entropy is None 21 | assert presults.waypoints is None 22 | assert np.array_equal(presults.branch_probs, branch_probs.values) 23 | 24 | 25 | def test_gam_fit_predict(): 26 | # Create some dummy data 27 | x = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) 28 | y = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) 29 | weights = None 30 | pred_x = None 31 | n_splines = 4 32 | spline_order = 2 33 | 34 | # Call the function 35 | y_pred, stds = palantir.presults.gam_fit_predict( 36 | x, y, weights, pred_x, n_splines, spline_order 37 | ) 38 | 39 | # Asserts to check the output 40 | assert isinstance(y_pred, np.ndarray) 41 | assert isinstance(stds, np.ndarray) 42 | -------------------------------------------------------------------------------- /tests/presults_compute_gene_trends.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import numpy as np 4 | from anndata import AnnData 5 | import palantir 6 | 7 | 8 | @pytest.fixture 9 | def mock_adata(): 10 | n_cells = 10 11 | 12 | # Create mock data 13 | adata = AnnData( 14 | X=np.random.rand(n_cells, 3), 15 | obs=pd.DataFrame( 16 | {"palantir_pseudotime": np.random.rand(n_cells)}, 17 | index=[f"cell_{i}" for i in range(n_cells)], 18 | ), 19 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]), 20 | ) 21 | 22 | adata.obsm["branch_masks"] = pd.DataFrame( 23 | np.random.randint(2, size=(n_cells, 2)), 24 | columns=["branch_1", "branch_2"], 25 | index=adata.obs_names, 26 | ).astype(bool) 27 | 28 | return adata 29 | 30 | 31 | @pytest.fixture 32 | def custom_mock_adata(): 33 | n_cells = 10 34 | 35 | # Create mock data 36 | adata = AnnData( 37 | X=np.random.rand(n_cells, 3), 38 | obs=pd.DataFrame( 39 | {"custom_time": np.random.rand(n_cells)}, 40 | index=[f"cell_{i}" for i in range(n_cells)], 41 | ), 42 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]), 43 | ) 44 | 45 | adata.obsm["custom_masks"] = pd.DataFrame( 46 | np.random.randint(2, size=(n_cells, 2)), 47 | columns=["branch_1", "branch_2"], 48 | index=adata.obs_names, 49 | ).astype(bool) 50 | 51 | return adata 52 | 53 | 54 | @pytest.fixture 55 | def mock_adata_old(): 56 | n_cells = 10 57 | 58 | # Create mock data 59 | adata = AnnData( 60 | X=np.random.rand(n_cells, 3), 61 | obs=pd.DataFrame( 62 | {"palantir_pseudotime": np.random.rand(n_cells)}, 63 | index=[f"cell_{i}" for i in range(n_cells)], 64 | ), 65 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]), 66 | ) 67 | 68 | # Create mock branch_masks in obsm 69 | adata.obsm["branch_masks"] = np.random.randint(2, size=(n_cells, 2)).astype(bool) 70 | adata.uns["branch_masks_columns"] = ["branch_1", "branch_2"] 71 | 72 | return adata 73 | 74 | 75 | @pytest.mark.parametrize("adata_fixture", ["mock_adata", "mock_adata_old"]) 76 | def test_compute_gene_trends(request, adata_fixture): 77 | adata = request.getfixturevalue(adata_fixture) 78 | 79 | # Call the function with default keys 80 | res = palantir.presults.compute_gene_trends(adata) 81 | 82 | # Asserts to check the output 83 | assert isinstance(res, dict) 84 | assert "branch_1" in res 85 | assert "branch_2" in res 86 | assert isinstance(res["branch_1"], dict) 87 | assert isinstance(res["branch_1"]["trends"], pd.DataFrame) 88 | assert "gene_0" in res["branch_1"]["trends"].index 89 | assert adata.varm["gene_trends_branch_1"].shape == (3, 500) 90 | 91 | 92 | def test_compute_gene_trends_custom_anndata(custom_mock_adata): 93 | # Call the function with custom keys 94 | res = palantir.presults.compute_gene_trends( 95 | custom_mock_adata, 96 | masks_key="custom_masks", 97 | pseudo_time_key="custom_time", 98 | gene_trend_key="custom_trends", 99 | ) 100 | 101 | # Asserts to check the output with custom keys 102 | assert isinstance(res, dict) 103 | assert "branch_1" in res 104 | assert "branch_2" in res 105 | assert isinstance(res["branch_1"], dict) 106 | assert isinstance(res["branch_1"]["trends"], pd.DataFrame) 107 | assert "gene_0" in res["branch_1"]["trends"].index 108 | assert custom_mock_adata.varm["custom_trends_branch_1"].shape == (3, 500) 109 | -------------------------------------------------------------------------------- /tests/test_core_run_palantir.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import numpy as np 4 | 5 | from palantir.presults import PResults 6 | from palantir.core import run_palantir 7 | 8 | 9 | # Test with basic DataFrame input 10 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.") 11 | @pytest.mark.filterwarnings("ignore:Changing the sparsity structure of a csr_matrix is expensive.") 12 | def test_palantir_dataframe(mock_data): 13 | result = run_palantir(mock_data, "cell_0") 14 | assert isinstance(result, PResults), "Should return a PResults object" 15 | 16 | 17 | # Test with basic AnnData input 18 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.") 19 | @pytest.mark.filterwarnings("ignore:Changing the sparsity structure of a csr_matrix is expensive.") 20 | def test_palantir_anndata(mock_anndata): 21 | run_palantir(mock_anndata, "cell_0") 22 | assert ( 23 | "palantir_pseudotime" in mock_anndata.obs.keys() 24 | ), "Pseudotime key missing in AnnData object" 25 | assert "palantir_entropy" in mock_anndata.obs.keys(), "Entropy key missing in AnnData object" 26 | assert ( 27 | "palantir_fate_probabilities" in mock_anndata.obsm.keys() 28 | ), "Fate probability key missing in AnnData object" 29 | assert "palantir_waypoints" in mock_anndata.uns.keys(), "Waypoint key missing in AnnData object" 30 | 31 | 32 | # Test terminal states 33 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.") 34 | @pytest.mark.filterwarnings("ignore:Changing the sparsity structure of a csr_matrix is expensive.") 35 | def test_palantir_terminal_states(mock_data): 36 | result = run_palantir(mock_data, "cell_0", terminal_states=["cell_1", "cell_2"]) 37 | assert "cell_1" in result.branch_probs.columns, "Terminal state cell_1 missing" 38 | assert "cell_2" in result.branch_probs.columns, "Terminal state cell_2 missing" 39 | 40 | 41 | # Test scaling components 42 | @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.") 43 | @pytest.mark.filterwarnings("ignore:Changing the sparsity structure of a csr_matrix is expensive.") 44 | def test_scaling_components(mock_data): 45 | result1 = run_palantir(mock_data, "cell_0", scale_components=True) 46 | result2 = run_palantir(mock_data, "cell_0", scale_components=False) 47 | assert not np.array_equal( 48 | result1.pseudotime, result2.pseudotime 49 | ), "Scaling components should affect pseudotime" 50 | 51 | 52 | # Test for invalid knn 53 | def test_invalid_knn(mock_data): 54 | with pytest.raises(ValueError): 55 | run_palantir(mock_data, "cell_0", knn=0) 56 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import numpy as np 4 | import scanpy as sc 5 | from anndata import AnnData 6 | import os 7 | import tempfile 8 | import anndata 9 | import warnings 10 | 11 | import palantir 12 | 13 | 14 | @pytest.fixture 15 | def sample_data(): 16 | """Load the sample data from the data directory""" 17 | # Get the data directory relative to the test file 18 | data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data") 19 | file_path = os.path.join(data_dir, "marrow_sample_scseq_counts.h5ad") 20 | 21 | # Skip test if the data file doesn't exist 22 | if not os.path.exists(file_path): 23 | pytest.skip(f"Sample data file {file_path} not found") 24 | 25 | # Load the data 26 | ad = anndata.read_h5ad(file_path) 27 | return ad 28 | 29 | 30 | @pytest.fixture 31 | def processed_data(sample_data): 32 | """Process the sample data for Palantir""" 33 | ad = sample_data.copy() 34 | 35 | # Normalize and log transform 36 | sc.pp.normalize_per_cell(ad) 37 | palantir.preprocess.log_transform(ad) 38 | 39 | # Select highly variable genes 40 | sc.pp.highly_variable_genes(ad, n_top_genes=1500, flavor="cell_ranger") 41 | 42 | # Run PCA 43 | sc.pp.pca(ad) 44 | 45 | # Run diffusion maps 46 | palantir.utils.run_diffusion_maps(ad, n_components=5) 47 | 48 | # Determine multiscale space 49 | palantir.utils.determine_multiscale_space(ad) 50 | 51 | # Set up neighbors for visualization 52 | sc.pp.neighbors(ad) 53 | sc.tl.umap(ad) 54 | 55 | # Run MAGIC imputation 56 | palantir.utils.run_magic_imputation(ad) 57 | 58 | return ad 59 | 60 | 61 | def test_palantir_reproducibility(processed_data): 62 | """Test that Palantir results are reproducible""" 63 | ad = processed_data.copy() 64 | 65 | # Set up terminal states (same as sample notebook) 66 | terminal_states = pd.Series( 67 | ["DC", "Mono", "Ery"], 68 | index=["Run5_131097901611291", "Run5_134936662236454", "Run4_200562869397916"], 69 | ) 70 | 71 | # Set start cell (same as sample notebook) 72 | start_cell = "Run5_164698952452459" 73 | 74 | # Run Palantir 75 | pr_res = palantir.core.run_palantir( 76 | ad, start_cell, num_waypoints=500, terminal_states=terminal_states 77 | ) 78 | 79 | # Expected values for the start cell 80 | # These are expected probabilities for the start cell from the sample notebook 81 | expected_probs = {"Ery": 0.33, "DC": 0.33, "Mono": 0.33} 82 | 83 | # Get actual values 84 | actual_probs = pr_res.branch_probs.loc[start_cell] 85 | 86 | # Check that probabilities are close to expected (start cell should be roughly equal probabilities) 87 | for branch, expected in expected_probs.items(): 88 | assert ( 89 | np.abs(actual_probs[branch] - expected) < 0.15 90 | ), f"Branch {branch} probability differs more than expected" 91 | 92 | # Expected values for terminal state cells 93 | for term_cell, term_name in terminal_states.items(): 94 | # Terminal state cell should have high probability for its own fate 95 | assert ( 96 | pr_res.branch_probs.loc[term_cell, term_name] > 0.7 97 | ), f"Terminal state {term_name} doesn't have high probability" 98 | 99 | # Pseudotime should be 0 for start cell (or very close) 100 | assert pr_res.pseudotime[start_cell] < 0.05, "Start cell pseudotime should be close to 0" 101 | 102 | # Entropy should be high for start cell (multipotent state) 103 | assert pr_res.entropy[start_cell] > 0.8, "Start cell entropy should be high" 104 | 105 | # Terminal states should have low entropy 106 | for term_cell in terminal_states.index: 107 | assert ( 108 | pr_res.entropy[term_cell] < 0.5 109 | ), f"Terminal state {term_cell} should have low entropy" 110 | 111 | 112 | def test_branch_selection(processed_data): 113 | """Test the branch selection functionality""" 114 | ad = processed_data.copy() 115 | 116 | # Set up terminal states 117 | terminal_states = pd.Series( 118 | ["DC", "Mono", "Ery"], 119 | index=["Run5_131097901611291", "Run5_134936662236454", "Run4_200562869397916"], 120 | ) 121 | 122 | # Run Palantir 123 | start_cell = "Run5_164698952452459" 124 | palantir.core.run_palantir(ad, start_cell, num_waypoints=500, terminal_states=terminal_states) 125 | 126 | # Run branch selection 127 | masks = palantir.presults.select_branch_cells(ad, eps=0) 128 | 129 | # Check that the masks were computed correctly 130 | assert masks.shape[1] == 3, "Should have 3 branches selected" 131 | assert masks.shape[0] == ad.n_obs, "Should have a mask for each cell" 132 | 133 | # Check that the masks were stored in the AnnData object 134 | assert "branch_masks" in ad.obsm, "Branch masks should be stored in obsm" 135 | 136 | # Check that terminal cells are selected in their respective branches 137 | for term_cell, term_name in terminal_states.items(): 138 | branch_idx = list(ad.obsm["palantir_fate_probabilities"].columns).index(term_name) 139 | assert masks[ad.obs_names == term_cell, branch_idx][ 140 | 0 141 | ], f"Terminal cell {term_name} should be selected in its branch" 142 | 143 | 144 | def test_gene_trends(processed_data): 145 | """Test gene trend computation""" 146 | ad = processed_data.copy() 147 | 148 | # Set up terminal states 149 | terminal_states = pd.Series( 150 | ["DC", "Mono", "Ery"], 151 | index=["Run5_131097901611291", "Run5_134936662236454", "Run4_200562869397916"], 152 | ) 153 | 154 | # Run Palantir 155 | start_cell = "Run5_164698952452459" 156 | palantir.core.run_palantir(ad, start_cell, num_waypoints=500, terminal_states=terminal_states) 157 | 158 | # Select branch cells 159 | palantir.presults.select_branch_cells(ad, eps=0) 160 | 161 | # Compute gene trends 162 | gene_trends = palantir.presults.compute_gene_trends( 163 | ad, 164 | expression_key="MAGIC_imputed_data", 165 | ) 166 | 167 | # Expected gene expression patterns 168 | # CD34 should decrease along all lineages (stem cell marker) 169 | # GATA1 should increase in erythroid lineage 170 | # MPO should increase in monocyte lineage 171 | # IRF8 should increase in DC lineage 172 | 173 | # Check that gene trends were computed for all branches 174 | assert "Ery" in gene_trends, "Erythroid gene trends missing" 175 | assert "DC" in gene_trends, "DC gene trends missing" 176 | assert "Mono" in gene_trends, "Monocyte gene trends missing" 177 | 178 | # Check that gene trends were stored in the AnnData object 179 | assert "gene_trends_Ery" in ad.varm, "Erythroid gene trends not stored in varm" 180 | 181 | # Get the trend data for specific genes 182 | cd34_ery = ad.varm["gene_trends_Ery"].loc["CD34"].values 183 | gata1_ery = ad.varm["gene_trends_Ery"].loc["GATA1"].values 184 | 185 | # CD34 should decrease in erythroid lineage (end lower than start) 186 | assert cd34_ery[0] > cd34_ery[-1], "CD34 should decrease along erythroid lineage" 187 | 188 | # GATA1 should increase in erythroid lineage (end higher than start) 189 | assert gata1_ery[0] < gata1_ery[-1], "GATA1 should increase along erythroid lineage" 190 | 191 | 192 | def test_clustering_gene_trends(processed_data): 193 | """Test clustering of gene trends""" 194 | ad = processed_data.copy() 195 | 196 | # Set up terminal states 197 | terminal_states = pd.Series( 198 | ["DC", "Mono", "Ery"], 199 | index=["Run5_131097901611291", "Run5_134936662236454", "Run4_200562869397916"], 200 | ) 201 | 202 | # Run Palantir 203 | start_cell = "Run5_164698952452459" 204 | palantir.core.run_palantir(ad, start_cell, num_waypoints=500, terminal_states=terminal_states) 205 | 206 | # Select branch cells 207 | palantir.presults.select_branch_cells(ad, eps=0) 208 | 209 | # Compute gene trends 210 | palantir.presults.compute_gene_trends( 211 | ad, 212 | expression_key="MAGIC_imputed_data", 213 | ) 214 | 215 | # Select a subset of genes for clustering 216 | genes = ["CD34", "MPO", "GATA1", "IRF8", "CSF1R", "ITGA2B", "CD79A", "CD79B"] 217 | 218 | # Cluster gene trends 219 | clusters = palantir.presults.cluster_gene_trends(ad, "Ery", genes) 220 | 221 | # Check that all genes were clustered 222 | assert len(clusters) == len(genes), "Not all genes were clustered" 223 | 224 | # Check that clusters were stored in the AnnData object 225 | assert "gene_trends_clusters" in ad.var, "Clusters should be stored in var" 226 | 227 | # Related genes should be clustered together 228 | # For example, CD79A and CD79B should be in the same cluster 229 | cd79a_cluster = clusters.loc["CD79A"] 230 | cd79b_cluster = clusters.loc["CD79B"] 231 | assert cd79a_cluster == cd79b_cluster, "CD79A and CD79B should be in the same cluster" 232 | -------------------------------------------------------------------------------- /tests/test_io.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import h5py 3 | import pandas as pd 4 | import numpy as np 5 | import os.path 6 | import scanpy as sc 7 | from scipy.io import mmread, mmwrite 8 | from scipy.sparse import csr_matrix, csc_matrix 9 | from palantir.io import ( 10 | _clean_up, 11 | from_csv, 12 | from_mtx, 13 | from_10x, 14 | from_10x_HDF5, 15 | from_fcs, 16 | ) 17 | 18 | # Check if fcsparser is available 19 | try: 20 | import fcsparser 21 | FCSPARSER_AVAILABLE = True 22 | except ImportError: 23 | FCSPARSER_AVAILABLE = False 24 | 25 | 26 | @pytest.fixture 27 | def example_dataframe(): 28 | # Create an example dataframe for testing 29 | return pd.DataFrame( 30 | [[1, 2, 0, 4], [0, 0, 0, 0], [3, 0, 0, 0]], 31 | columns=["A", "B", "C", "D"], 32 | index=["X", "Y", "Z"], 33 | ) 34 | 35 | 36 | @pytest.fixture 37 | def mock_10x_h5(tmp_path): 38 | # Number of genes and cells 39 | n_genes = 400 40 | n_cells = 300 41 | 42 | # Simulate a sparse gene expression matrix 43 | data = np.random.poisson(lam=0.3, size=(n_genes, n_cells)) 44 | sparse_matrix = csc_matrix(data) 45 | 46 | # Create barcodes, gene names, etc. 47 | barcodes = np.array([f"Cell_{i:05d}-1" for i in range(n_cells)]) 48 | gene_names = np.array([f"Gene_{i}" for i in range(n_genes)]) 49 | feature_type = np.array(["Gene Expression" for i in range(n_genes)]) 50 | features = np.array( 51 | [ 52 | "gene", 53 | ] 54 | ) 55 | genome = np.array([f"genome_{i%4}" for i in range(n_genes)]) 56 | 57 | # Creating an HDF5 file 58 | hdf5_file = tmp_path / "mock_10x_v3_data.h5" 59 | with h5py.File(hdf5_file, "w") as f: 60 | f.create_group("matrix") 61 | f["matrix"].create_dataset("shape", data=np.array(sparse_matrix.shape)) 62 | f["matrix"].create_dataset("data", data=sparse_matrix.data) 63 | f["matrix"].create_dataset("indices", data=sparse_matrix.indices) 64 | f["matrix"].create_dataset("indptr", data=sparse_matrix.indptr) 65 | f["matrix"].create_dataset("barcodes", data=barcodes.astype("S")) 66 | f["matrix"].create_dataset("name", data=gene_names.astype("S")) 67 | f["matrix"].create_dataset("id", data=gene_names.astype("S")) 68 | f["matrix"].create_dataset("feature_type", data=feature_type.astype("S")) 69 | f["matrix"].create_dataset("genome", data=genome.astype("S")) 70 | 71 | f["matrix"].create_group("features") 72 | f["matrix/features"].create_dataset("name", data=gene_names.astype("S")) 73 | f["matrix/features"].create_dataset("id", data=gene_names.astype("S")) 74 | f["matrix/features"].create_dataset("feature_type", data=feature_type.astype("S")) 75 | f["matrix/features"].create_dataset("genome", data=genome.astype("S")) 76 | 77 | return str(hdf5_file) 78 | 79 | 80 | def test_clean_up(example_dataframe): 81 | # Test for the _clean_up function 82 | cleaned_df = _clean_up(example_dataframe) 83 | assert len(cleaned_df) == 2 84 | assert len(cleaned_df.columns) == 3 85 | 86 | 87 | def test_from_csv(tmp_path, example_dataframe): 88 | # Test for the from_csv function 89 | csv_file = tmp_path / "test.csv" 90 | example_dataframe.to_csv(csv_file) 91 | 92 | clean_df = from_csv(csv_file) 93 | assert len(clean_df) == 2 94 | assert len(clean_df.columns) == 3 95 | 96 | 97 | def test_from_mtx(tmp_path): 98 | # Test for the from_mtx function 99 | mtx_file = tmp_path / "test.mtx" 100 | gene_name_file = tmp_path / "gene_names.txt" 101 | 102 | # Create a mock mtx file 103 | mtx_data = [ 104 | "%%MatrixMarket matrix coordinate integer general", 105 | "3 4 6", 106 | "1 1 1", 107 | "1 2 2", 108 | "2 4 3", 109 | "3 1 3", 110 | "3 2 4", 111 | "3 3 5", 112 | ] 113 | with open(mtx_file, "w") as f: 114 | f.write("\n".join(mtx_data)) 115 | 116 | # Create gene names file 117 | gene_names = ["Gene1", "Gene2", "Gene3", "Gene4"] 118 | np.savetxt(gene_name_file, gene_names, fmt="%s") 119 | 120 | clean_df = from_mtx(mtx_file, gene_name_file) 121 | assert len(clean_df) == 3 122 | assert len(clean_df.columns) == 4 123 | 124 | 125 | def test_from_10x(tmp_path): 126 | # Test for the from_10x function 127 | data_dir = tmp_path / "data" 128 | os.makedirs(data_dir, exist_ok=True) 129 | 130 | matrix_file = data_dir / "matrix.mtx" 131 | gene_file = data_dir / "genes.tsv" 132 | barcode_file = data_dir / "barcodes.tsv" 133 | 134 | mmwrite(str(matrix_file), csr_matrix([[1, 2], [3, 4]])) 135 | np.savetxt(str(gene_file), ["Gene1", "Gene2"], fmt="%s") 136 | np.savetxt(str(barcode_file), ["Cell1", "Cell2"], fmt="%s") 137 | 138 | clean_df = from_10x(str(data_dir)) 139 | print(clean_df) 140 | assert len(clean_df) == 2 141 | assert len(clean_df.columns) == 2 142 | 143 | 144 | def test_from_10x_HDF5(mock_10x_h5): 145 | clean_df = from_10x_HDF5(mock_10x_h5) 146 | assert len(clean_df) == 300 147 | assert len(clean_df.columns) == 400 148 | 149 | 150 | @pytest.mark.skipif(not FCSPARSER_AVAILABLE, reason="fcsparser not installed") 151 | def test_from_fcs(): 152 | df = from_fcs(None, fcsparser.test_sample_path) 153 | assert len(df) == 14945 154 | assert len(df.columns) == 10 155 | -------------------------------------------------------------------------------- /tests/test_preprocess.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import numpy as np 4 | import anndata 5 | from scipy.sparse import csr_matrix 6 | 7 | from palantir.preprocess import filter_counts_data, normalize_counts, log_transform 8 | 9 | 10 | def test_filter_counts_data(): 11 | """Test filtering of low count cells and genes""" 12 | # Create test data 13 | data = pd.DataFrame( 14 | [[10, 0, 5, 8], [0, 0, 0, 0], [15, 20, 0, 0]], 15 | columns=["gene1", "gene2", "gene3", "gene4"], 16 | index=["cell1", "cell2", "cell3"], 17 | ) 18 | 19 | # Test with minimal thresholds to match our test data 20 | filtered = filter_counts_data(data, cell_min_molecules=1, genes_min_cells=1) 21 | assert filtered.shape == (2, 4) # Only cell2 should be filtered out 22 | assert "cell2" not in filtered.index 23 | assert "gene1" in filtered.columns 24 | 25 | # Test with higher thresholds 26 | filtered = filter_counts_data(data, cell_min_molecules=20, genes_min_cells=1) 27 | # Based on actual implementation behavior 28 | assert len(filtered) > 0 # At least some cells remain 29 | assert "cell2" not in filtered.index # cell2 should be filtered out 30 | 31 | 32 | def test_normalize_counts(): 33 | """Test count normalization""" 34 | # Create test data 35 | data = pd.DataFrame( 36 | [[10, 5, 5], [5, 10, 5], [5, 5, 10]], 37 | columns=["gene1", "gene2", "gene3"], 38 | index=["cell1", "cell2", "cell3"], 39 | ) 40 | 41 | # Test normalization 42 | normalized = normalize_counts(data) 43 | 44 | # Check that row sums are equal (or very close due to floating point) 45 | row_sums = normalized.sum(axis=1) 46 | assert np.allclose(row_sums, row_sums.iloc[0]) 47 | 48 | # Check relative abundances are maintained 49 | assert normalized.loc["cell1", "gene1"] > normalized.loc["cell1", "gene2"] 50 | assert normalized.loc["cell2", "gene2"] > normalized.loc["cell2", "gene1"] 51 | assert normalized.loc["cell3", "gene3"] > normalized.loc["cell3", "gene1"] 52 | 53 | 54 | def test_log_transform_dataframe(): 55 | """Test log transformation on DataFrame""" 56 | # Create test data 57 | data = pd.DataFrame( 58 | [[1, 2], [3, 4]], 59 | columns=["gene1", "gene2"], 60 | index=["cell1", "cell2"], 61 | ) 62 | 63 | # Test with default pseudo_count 64 | transformed = log_transform(data) 65 | # The function returns np.log2(data + pseudo_count) 66 | expected = np.log2(data + 0.1) 67 | assert np.allclose(transformed, expected) 68 | 69 | # Test with custom pseudo_count 70 | transformed = log_transform(data, pseudo_count=1) 71 | expected = np.log2(data + 1) 72 | assert np.allclose(transformed, expected) 73 | 74 | 75 | def test_log_transform_anndata(): 76 | """Test log transformation on AnnData""" 77 | # Create dense AnnData 78 | X = np.array([[1, 2], [3, 4]]) 79 | adata = anndata.AnnData(X) 80 | 81 | # Test dense case 82 | original_X = adata.X.copy() 83 | log_transform(adata) 84 | # The implementation adds an offset to log2(x + pseudo_count) 85 | expected = np.log2(original_X + 0.1) - np.log2(0.1) 86 | assert np.allclose(adata.X, expected) 87 | 88 | # Create sparse AnnData 89 | X_sparse = csr_matrix(np.array([[1, 2], [3, 4]])) 90 | adata_sparse = anndata.AnnData(X_sparse) 91 | 92 | # Test sparse case 93 | original_data = X_sparse.data.copy() 94 | log_transform(adata_sparse) 95 | # The implementation adds an offset to log2(x + pseudo_count) 96 | expected_data = np.log2(original_data + 0.1) - np.log2(0.1) 97 | assert np.allclose(adata_sparse.X.data, expected_data) 98 | -------------------------------------------------------------------------------- /tests/test_presults.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import palantir 4 | 5 | 6 | def test_PResults(): 7 | # Create some dummy data 8 | pseudotime = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) 9 | entropy = None 10 | branch_probs = pd.DataFrame( 11 | {"branch1": [0.1, 0.2, 0.3, 0.4, 0.5], "branch2": [0.5, 0.4, 0.3, 0.2, 0.1]} 12 | ) 13 | waypoints = None 14 | 15 | # Initialize PResults object 16 | presults = palantir.presults.PResults(pseudotime, entropy, branch_probs, waypoints) 17 | 18 | # Asserts to check attributes 19 | assert np.array_equal(presults.pseudotime, pseudotime) 20 | assert presults.entropy is None 21 | assert presults.waypoints is None 22 | assert np.array_equal(presults.branch_probs, branch_probs.values) 23 | 24 | 25 | def test_gam_fit_predict(): 26 | # Skip test if pygam is not installed 27 | try: 28 | import pygam 29 | except ImportError: 30 | import pytest 31 | pytest.skip("pygam not installed, skipping test_gam_fit_predict") 32 | 33 | # Create some dummy data 34 | x = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) 35 | y = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) 36 | weights = None 37 | pred_x = None 38 | n_splines = 4 39 | spline_order = 2 40 | 41 | try: 42 | # Call the function 43 | y_pred, stds = palantir.presults.gam_fit_predict(x, y, weights, pred_x, n_splines, spline_order) 44 | 45 | # Asserts to check the output 46 | assert isinstance(y_pred, np.ndarray) 47 | assert isinstance(stds, np.ndarray) 48 | except Exception as e: 49 | import pytest 50 | if "csr_matrix" in str(e) and "attribute 'A'" in str(e): 51 | pytest.skip("scipy/pygam compatibility issue, skipping test") 52 | else: 53 | raise 54 | -------------------------------------------------------------------------------- /tests/test_presults_cluster_gene_trends.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import numpy as np 4 | import scanpy as sc 5 | from anndata import AnnData 6 | 7 | from palantir.presults import cluster_gene_trends 8 | 9 | 10 | def test_cluster_gene_trends_basic(): 11 | """Test basic functionality of cluster_gene_trends""" 12 | # Create a simple DataFrame of gene trends 13 | n_genes = 30 14 | n_timepoints = 50 15 | 16 | # Create some patterns that should cluster together 17 | timepoints = np.linspace(0, 1, n_timepoints) 18 | 19 | # Create random trends with some patterns 20 | np.random.seed(42) 21 | trends = pd.DataFrame(index=[f"gene_{i}" for i in range(n_genes)], columns=timepoints) 22 | 23 | # First 10 genes follow similar pattern (increasing) 24 | for i in range(10): 25 | trends.iloc[i] = np.linspace(0, 1, n_timepoints) + np.random.normal(0, 0.1, n_timepoints) 26 | 27 | # Next 10 genes follow another pattern (decreasing) 28 | for i in range(10, 20): 29 | trends.iloc[i] = np.linspace(1, 0, n_timepoints) + np.random.normal(0, 0.1, n_timepoints) 30 | 31 | # Last 10 genes follow a third pattern (bell curve) 32 | for i in range(20, 30): 33 | trends.iloc[i] = np.sin(np.linspace(0, np.pi, n_timepoints)) + np.random.normal( 34 | 0, 0.1, n_timepoints 35 | ) 36 | 37 | # Test with DataFrame 38 | clusters = cluster_gene_trends(trends, "branch1") 39 | 40 | # Check output 41 | assert isinstance(clusters, pd.Series) 42 | assert len(clusters) == n_genes 43 | assert clusters.index.equals(trends.index) 44 | 45 | # There should be at least 2 clusters found 46 | assert len(clusters.unique()) >= 2 47 | 48 | # Check that similar genes are clustered together 49 | # First 10 genes should mostly be in the same cluster 50 | first_cluster = clusters.iloc[:10].mode().iloc[0] 51 | assert ( 52 | clusters.iloc[:10] == first_cluster 53 | ).mean() > 0.5 # More than half should be in the same cluster 54 | 55 | 56 | def test_cluster_gene_trends_anndata(): 57 | """Test cluster_gene_trends with AnnData input""" 58 | # Create AnnData object 59 | n_cells = 100 60 | n_genes = 30 61 | adata = AnnData(np.random.normal(0, 1, (n_cells, n_genes))) 62 | adata.var_names = [f"gene_{i}" for i in range(n_genes)] 63 | 64 | # Create gene trends for the branch 65 | n_timepoints = 50 66 | timepoints = np.linspace(0, 1, n_timepoints) 67 | branch_key = "test_branch" 68 | 69 | # Same trends as before 70 | trends = np.zeros((n_genes, n_timepoints)) 71 | # First 10 genes 72 | for i in range(10): 73 | trends[i] = np.linspace(0, 1, n_timepoints) + np.random.normal(0, 0.1, n_timepoints) 74 | # Next 10 genes 75 | for i in range(10, 20): 76 | trends[i] = np.linspace(1, 0, n_timepoints) + np.random.normal(0, 0.1, n_timepoints) 77 | # Last 10 genes 78 | for i in range(20, 30): 79 | trends[i] = np.sin(np.linspace(0, np.pi, n_timepoints)) + np.random.normal( 80 | 0, 0.1, n_timepoints 81 | ) 82 | 83 | # Store the trends in AnnData 84 | adata.varm[f"gene_trends_{branch_key}"] = pd.DataFrame( 85 | trends, index=adata.var_names, columns=[str(t) for t in timepoints] 86 | ) 87 | 88 | # Run clustering 89 | clusters = cluster_gene_trends(adata, branch_key, gene_trend_key="gene_trends") 90 | 91 | # Check output 92 | assert isinstance(clusters, pd.Series) 93 | assert len(clusters) == n_genes 94 | assert clusters.index.equals(adata.var_names) 95 | 96 | # The clusters should be stored in the var annotation 97 | assert "gene_trends_clusters" in adata.var 98 | assert np.all(adata.var["gene_trends_clusters"] == clusters) 99 | 100 | 101 | def test_cluster_gene_trends_custom_genes(): 102 | """Test cluster_gene_trends with subset of genes""" 103 | # Create a simple DataFrame of gene trends 104 | n_genes = 30 105 | n_timepoints = 50 106 | timepoints = np.linspace(0, 1, n_timepoints) 107 | 108 | # Create trends 109 | np.random.seed(42) 110 | trends = pd.DataFrame( 111 | np.random.normal(0, 1, (n_genes, n_timepoints)), 112 | index=[f"gene_{i}" for i in range(n_genes)], 113 | columns=timepoints, 114 | ) 115 | 116 | # Select a subset of genes 117 | selected_genes = [f"gene_{i}" for i in range(0, n_genes, 2)] # Every other gene 118 | 119 | # Test with subset of genes 120 | clusters = cluster_gene_trends(trends, "branch1", genes=selected_genes) 121 | 122 | # Check output 123 | assert isinstance(clusters, pd.Series) 124 | assert len(clusters) == len(selected_genes) 125 | assert set(clusters.index) == set(selected_genes) 126 | 127 | 128 | def test_cluster_gene_trends_parameters(): 129 | """Test cluster_gene_trends with custom parameters""" 130 | # Create a simple DataFrame of gene trends 131 | n_genes = 30 132 | n_timepoints = 50 133 | timepoints = np.linspace(0, 1, n_timepoints) 134 | 135 | # Create trends 136 | np.random.seed(42) 137 | trends = pd.DataFrame( 138 | np.random.normal(0, 1, (n_genes, n_timepoints)), 139 | index=[f"gene_{i}" for i in range(n_genes)], 140 | columns=timepoints, 141 | ) 142 | 143 | # Test with custom parameters 144 | clusters1 = cluster_gene_trends(trends, "branch1", n_neighbors=10) 145 | clusters2 = cluster_gene_trends(trends, "branch1", n_neighbors=20) 146 | 147 | # The clusters should be different with different parameters 148 | assert (clusters1 != clusters2).any() 149 | 150 | 151 | def test_cluster_gene_trends_error_handling(): 152 | """Test error handling of cluster_gene_trends""" 153 | # Create AnnData without varm data 154 | adata = AnnData(np.random.normal(0, 1, (10, 10))) 155 | 156 | # Should raise KeyError for missing gene_trend_key 157 | with pytest.raises(KeyError): 158 | cluster_gene_trends(adata, "branch1", gene_trend_key=None) 159 | 160 | # Should raise KeyError for missing branch data 161 | with pytest.raises(KeyError): 162 | cluster_gene_trends(adata, "nonexistent_branch", gene_trend_key="some_key") 163 | -------------------------------------------------------------------------------- /tests/test_presults_compute_gene_trends.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import numpy as np 4 | from anndata import AnnData 5 | import palantir 6 | 7 | 8 | @pytest.fixture 9 | def mock_adata(): 10 | n_cells = 10 11 | 12 | # Create mock data 13 | adata = AnnData( 14 | X=np.random.rand(n_cells, 3), 15 | obs=pd.DataFrame( 16 | {"palantir_pseudotime": np.random.rand(n_cells)}, 17 | index=[f"cell_{i}" for i in range(n_cells)], 18 | ), 19 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]), 20 | ) 21 | 22 | adata.obsm["branch_masks"] = pd.DataFrame( 23 | np.random.randint(2, size=(n_cells, 2)), 24 | columns=["branch_1", "branch_2"], 25 | index=adata.obs_names, 26 | ).astype(bool) 27 | 28 | return adata 29 | 30 | 31 | @pytest.fixture 32 | def custom_mock_adata(): 33 | n_cells = 10 34 | 35 | # Create mock data 36 | adata = AnnData( 37 | X=np.random.rand(n_cells, 3), 38 | obs=pd.DataFrame( 39 | {"custom_time": np.random.rand(n_cells)}, 40 | index=[f"cell_{i}" for i in range(n_cells)], 41 | ), 42 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]), 43 | ) 44 | 45 | adata.obsm["custom_masks"] = pd.DataFrame( 46 | np.random.randint(2, size=(n_cells, 2)), 47 | columns=["branch_1", "branch_2"], 48 | index=adata.obs_names, 49 | ).astype(bool) 50 | 51 | return adata 52 | 53 | 54 | @pytest.fixture 55 | def mock_adata_old(): 56 | n_cells = 10 57 | 58 | # Create mock data 59 | adata = AnnData( 60 | X=np.random.rand(n_cells, 3), 61 | obs=pd.DataFrame( 62 | {"palantir_pseudotime": np.random.rand(n_cells)}, 63 | index=[f"cell_{i}" for i in range(n_cells)], 64 | ), 65 | var=pd.DataFrame(index=[f"gene_{i}" for i in range(3)]), 66 | ) 67 | 68 | # Create mock branch_masks in obsm 69 | adata.obsm["branch_masks"] = np.random.randint(2, size=(n_cells, 2)).astype(bool) 70 | adata.uns["branch_masks_columns"] = ["branch_1", "branch_2"] 71 | 72 | return adata 73 | 74 | 75 | @pytest.mark.parametrize("adata_fixture", ["mock_adata", "mock_adata_old"]) 76 | def test_compute_gene_trends(request, adata_fixture): 77 | adata = request.getfixturevalue(adata_fixture) 78 | 79 | # Call the function with default keys 80 | res = palantir.presults.compute_gene_trends(adata) 81 | 82 | # Asserts to check the output 83 | assert isinstance(res, dict) 84 | assert "branch_1" in res 85 | assert "branch_2" in res 86 | assert isinstance(res["branch_1"], dict) 87 | assert isinstance(res["branch_1"]["trends"], pd.DataFrame) 88 | assert "gene_0" in res["branch_1"]["trends"].index 89 | assert adata.varm["gene_trends_branch_1"].shape == (3, 500) 90 | 91 | 92 | def test_compute_gene_trends_custom_anndata(custom_mock_adata): 93 | # Call the function with custom keys 94 | res = palantir.presults.compute_gene_trends( 95 | custom_mock_adata, 96 | masks_key="custom_masks", 97 | pseudo_time_key="custom_time", 98 | gene_trend_key="custom_trends", 99 | ) 100 | 101 | # Asserts to check the output with custom keys 102 | assert isinstance(res, dict) 103 | assert "branch_1" in res 104 | assert "branch_2" in res 105 | assert isinstance(res["branch_1"], dict) 106 | assert isinstance(res["branch_1"]["trends"], pd.DataFrame) 107 | assert "gene_0" in res["branch_1"]["trends"].index 108 | assert custom_mock_adata.varm["custom_trends_branch_1"].shape == (3, 500) 109 | -------------------------------------------------------------------------------- /tests/test_presults_gam_fit_predict.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | 5 | # Skip all tests in this file if pygam is not installed 6 | try: 7 | import pygam 8 | except ImportError: 9 | pytestmark = pytest.mark.skip(reason="pygam not installed") 10 | 11 | # Handle scipy compatibility issues 12 | try: 13 | import scipy.sparse as sp 14 | test_matrix = sp.csr_matrix((1, 1)) 15 | if not hasattr(test_matrix, 'A'): 16 | pytestmark = pytest.mark.skip(reason="scipy/pygam compatibility issue") 17 | except: 18 | pass 19 | 20 | from palantir.presults import gam_fit_predict 21 | 22 | 23 | def test_gam_fit_predict_basic(): 24 | """Test basic functionality of gam_fit_predict""" 25 | # Create test data 26 | x = np.linspace(0, 1, 50) 27 | y = np.sin(2 * np.pi * x) + 0.1 * np.random.randn(50) 28 | 29 | # Run gam_fit_predict 30 | y_pred, stds = gam_fit_predict(x, y) 31 | 32 | # Check output shapes 33 | assert len(y_pred) == len(x) 34 | assert len(stds) == len(x) 35 | 36 | # Check that predictions follow the general trend 37 | assert np.corrcoef(y, y_pred)[0, 1] > 0.8 # Strong correlation 38 | 39 | 40 | def test_gam_fit_predict_with_weights(): 41 | """Test gam_fit_predict with weights""" 42 | # Create test data 43 | x = np.linspace(0, 1, 50) 44 | y = np.sin(2 * np.pi * x) + 0.1 * np.random.randn(50) 45 | 46 | # Create weights biased toward the beginning 47 | weights = np.exp(-3 * x) 48 | 49 | # Run gam_fit_predict with weights 50 | y_pred_weighted, _ = gam_fit_predict(x, y, weights=weights) 51 | # Run without weights for comparison 52 | y_pred_unweighted, _ = gam_fit_predict(x, y) 53 | 54 | # Check that predictions differ when using weights 55 | assert not np.allclose(y_pred_weighted, y_pred_unweighted) 56 | 57 | # Early points should be fitted better with weights 58 | early_idx = x < 0.3 59 | early_mse_weighted = np.mean((y[early_idx] - y_pred_weighted[early_idx]) ** 2) 60 | early_mse_unweighted = np.mean((y[early_idx] - y_pred_unweighted[early_idx]) ** 2) 61 | assert early_mse_weighted <= early_mse_unweighted 62 | 63 | 64 | def test_gam_fit_predict_with_pred_x(): 65 | """Test gam_fit_predict with custom prediction points""" 66 | # Create test data 67 | x = np.linspace(0, 1, 50) 68 | y = np.sin(2 * np.pi * x) + 0.1 * np.random.randn(50) 69 | 70 | # Create custom prediction points 71 | pred_x = np.linspace(0, 1, 100) # Higher resolution 72 | 73 | # Run gam_fit_predict with custom prediction points 74 | y_pred, stds = gam_fit_predict(x, y, pred_x=pred_x) 75 | 76 | # Check that output shapes match the custom prediction points 77 | assert len(y_pred) == len(pred_x) 78 | assert len(stds) == len(pred_x) 79 | 80 | 81 | def test_gam_fit_predict_spline_params(): 82 | """Test gam_fit_predict with different spline parameters""" 83 | # Create test data 84 | x = np.linspace(0, 1, 50) 85 | y = np.sin(2 * np.pi * x) + 0.1 * np.random.randn(50) 86 | 87 | # Run with default spline parameters 88 | y_pred_default, _ = gam_fit_predict(x, y) 89 | 90 | # Run with custom spline parameters 91 | y_pred_custom, _ = gam_fit_predict(x, y, n_splines=8, spline_order=3) 92 | 93 | # Check that predictions differ with different spline parameters 94 | assert not np.allclose(y_pred_default, y_pred_custom) 95 | -------------------------------------------------------------------------------- /tests/test_presults_select_branch_cells.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import numpy as np 4 | import scanpy as sc 5 | from anndata import AnnData 6 | 7 | from palantir.presults import select_branch_cells 8 | import palantir.presults 9 | 10 | 11 | def test_select_branch_cells_basic(): 12 | """Test basic functionality of select_branch_cells""" 13 | # Create test AnnData 14 | n_cells = 100 15 | n_genes = 20 16 | adata = AnnData(np.random.normal(0, 1, (n_cells, n_genes))) 17 | adata.obs_names = [f"cell_{i}" for i in range(n_cells)] 18 | 19 | # Create pseudotime 20 | pseudotime = np.linspace(0, 1, n_cells) 21 | adata.obs["palantir_pseudotime"] = pseudotime 22 | 23 | # Create fate probabilities 24 | fate_probs = np.zeros((n_cells, 3)) 25 | # First branch: higher probability at beginning 26 | fate_probs[:, 0] = np.linspace(0.8, 0.1, n_cells) 27 | # Second branch: higher probability in middle 28 | x = np.linspace(-3, 3, n_cells) 29 | fate_probs[:, 1] = np.exp(-(x**2)) / 2 30 | # Third branch: higher probability at end 31 | fate_probs[:, 2] = np.linspace(0.1, 0.8, n_cells) 32 | 33 | # Normalize rows to sum to 1 34 | fate_probs = fate_probs / fate_probs.sum(axis=1, keepdims=True) 35 | 36 | # Store in AnnData 37 | adata.obsm["palantir_fate_probabilities"] = pd.DataFrame( 38 | fate_probs, index=adata.obs_names, columns=["branch1", "branch2", "branch3"] 39 | ) 40 | 41 | # Run select_branch_cells 42 | masks = select_branch_cells(adata) 43 | 44 | # Check that the masks are boolean arrays 45 | assert masks.dtype == bool 46 | assert masks.shape == (n_cells, 3) 47 | 48 | # Check that masks are stored in AnnData 49 | assert "branch_masks" in adata.obsm 50 | 51 | # Check masks make sense with probabilities 52 | # Higher probability cells should be selected - but we don't check specific values 53 | # as branch selection behavior depends on the quantile-based algorithm 54 | high_prob_branch1 = fate_probs[:, 0] > 0.5 55 | assert np.any(masks[high_prob_branch1, 0]) # At least some high prob cells should be selected 56 | 57 | 58 | def test_select_branch_cells_custom_keys(): 59 | """Test select_branch_cells with custom keys""" 60 | # Create test AnnData 61 | n_cells = 100 62 | n_genes = 20 63 | adata = AnnData(np.random.normal(0, 1, (n_cells, n_genes))) 64 | 65 | # Create pseudotime with custom key 66 | pseudotime_key = "custom_pseudotime" 67 | adata.obs[pseudotime_key] = np.linspace(0, 1, n_cells) 68 | 69 | # Create fate probabilities with custom key 70 | fate_prob_key = "custom_fate_probs" 71 | fate_probs = np.random.random((n_cells, 3)) 72 | fate_probs = fate_probs / fate_probs.sum(axis=1, keepdims=True) 73 | adata.obsm[fate_prob_key] = pd.DataFrame( 74 | fate_probs, index=adata.obs_names, columns=["branch1", "branch2", "branch3"] 75 | ) 76 | 77 | # Custom masks key 78 | masks_key = "custom_masks" 79 | 80 | # Run select_branch_cells with custom keys 81 | masks = select_branch_cells( 82 | adata, pseudo_time_key=pseudotime_key, fate_prob_key=fate_prob_key, masks_key=masks_key 83 | ) 84 | 85 | # Check that masks are stored in AnnData with custom key 86 | assert masks_key in adata.obsm 87 | 88 | # Check shapes 89 | assert masks.shape == (n_cells, 3) 90 | 91 | 92 | def test_select_branch_cells_parameters(): 93 | """Test select_branch_cells with different parameters""" 94 | # Create test AnnData 95 | n_cells = 100 96 | n_genes = 20 97 | adata = AnnData(np.random.normal(0, 1, (n_cells, n_genes))) 98 | 99 | # Create pseudotime 100 | adata.obs["palantir_pseudotime"] = np.linspace(0, 1, n_cells) 101 | 102 | # Create fate probabilities 103 | fate_probs = np.random.random((n_cells, 3)) 104 | fate_probs = fate_probs / fate_probs.sum(axis=1, keepdims=True) 105 | adata.obsm["palantir_fate_probabilities"] = pd.DataFrame( 106 | fate_probs, index=adata.obs_names, columns=["branch1", "branch2", "branch3"] 107 | ) 108 | 109 | # Run with different q parameters - for randomly generated data, the relationship between 110 | # q and the number of selected cells can be unpredictable 111 | masks1 = select_branch_cells(adata, q=0.01) 112 | masks2 = select_branch_cells(adata, q=0.5) 113 | 114 | # Just verify we get different results with different parameters 115 | assert masks1.shape == masks2.shape 116 | 117 | # Run with different eps parameters 118 | masks3 = select_branch_cells(adata, eps=0.01) 119 | masks4 = select_branch_cells(adata, eps=0.1) 120 | 121 | # Higher eps should select more cells or at least the same number 122 | assert masks3.sum() <= masks4.sum() 123 | 124 | # Test save_as_df parameter 125 | # True is default, test False 126 | select_branch_cells(adata, save_as_df=False) 127 | assert isinstance(adata.obsm["branch_masks"], np.ndarray) 128 | assert "branch_masks_columns" in adata.uns 129 | 130 | 131 | def test_select_branch_cells_with_different_resolutions(): 132 | """Test select_branch_cells with different resolution settings""" 133 | 134 | # Store original resolution 135 | original_res = palantir.presults.PSEUDOTIME_RES 136 | 137 | try: 138 | # Test with high resolution (potential division by zero case for small datasets) 139 | n_cells = 10 140 | n_genes = 5 141 | 142 | # Create small test AnnData 143 | adata_small = AnnData(np.random.normal(0, 1, (n_cells, n_genes))) 144 | adata_small.obs["palantir_pseudotime"] = np.linspace(0, 1, n_cells) 145 | adata_small.obsm["palantir_fate_probabilities"] = pd.DataFrame( 146 | np.random.random((n_cells, 2)), 147 | columns=["branch1", "branch2"], 148 | index=adata_small.obs_names, 149 | ) 150 | 151 | # Test with a very high resolution (will trigger nsteps == 0 case) 152 | palantir.presults.PSEUDOTIME_RES = 1000 153 | masks_high_res = select_branch_cells(adata_small) 154 | assert masks_high_res.shape == (n_cells, 2) 155 | 156 | # Test with a very low resolution (regular case) 157 | palantir.presults.PSEUDOTIME_RES = 2 158 | masks_low_res = select_branch_cells(adata_small) 159 | assert masks_low_res.shape == (n_cells, 2) 160 | 161 | # Create larger test AnnData 162 | n_cells = 100 163 | adata_large = AnnData(np.random.normal(0, 1, (n_cells, n_genes))) 164 | adata_large.obs["palantir_pseudotime"] = np.linspace(0, 1, n_cells) 165 | adata_large.obsm["palantir_fate_probabilities"] = pd.DataFrame( 166 | np.random.random((n_cells, 2)), 167 | columns=["branch1", "branch2"], 168 | index=adata_large.obs_names, 169 | ) 170 | 171 | # Test with medium resolution (regular case) 172 | palantir.presults.PSEUDOTIME_RES = 10 173 | masks_medium_res = select_branch_cells(adata_large) 174 | assert masks_medium_res.shape == (n_cells, 2) 175 | 176 | finally: 177 | # Restore original resolution 178 | palantir.presults.PSEUDOTIME_RES = original_res 179 | 180 | 181 | def test_select_branch_cells_error_handling(): 182 | """Test error handling in select_branch_cells""" 183 | # Create AnnData without required data 184 | adata = AnnData(np.random.normal(0, 1, (10, 10))) 185 | 186 | # Should raise KeyError for missing pseudotime 187 | with pytest.raises(KeyError): 188 | select_branch_cells(adata) 189 | 190 | # Add pseudotime but no fate probabilities 191 | adata.obs["palantir_pseudotime"] = np.linspace(0, 1, 10) 192 | 193 | # Should raise KeyError for missing fate probabilities 194 | with pytest.raises(KeyError): 195 | select_branch_cells(adata) 196 | -------------------------------------------------------------------------------- /tests/test_util_density.py: -------------------------------------------------------------------------------- 1 | from anndata._core.anndata import AnnData 2 | from pandas.core.frame import DataFrame 3 | import pytest 4 | import pandas as pd 5 | import scanpy as sc 6 | from anndata import AnnData 7 | import numpy as np 8 | 9 | from palantir.utils import ( 10 | run_density, 11 | run_low_density_variability, 12 | run_density_evaluation, 13 | ) 14 | 15 | 16 | @pytest.fixture 17 | def mock_data(): 18 | n_cells = 50 19 | n_genes = 500 20 | return pd.DataFrame( 21 | np.random.rand(n_cells, n_genes), 22 | columns=[f"gene_{i}" for i in range(n_genes)], 23 | index=[f"cell_{i}" for i in range(n_cells)], 24 | ) 25 | 26 | 27 | @pytest.fixture 28 | def mock_anndata(mock_data: DataFrame): 29 | ad = AnnData(X=mock_data) 30 | ad.obsm["DM_EigenVectors"] = mock_data.iloc[:, :10].copy() 31 | ad.obsm["branch_masks"] = pd.DataFrame( 32 | columns=["branch_0", "branch_1"], 33 | index=mock_data.index, 34 | data=np.random.choice([True, False], size=(mock_data.shape[0], 2)), 35 | ) 36 | ad.obs["other_density"] = np.random.rand(mock_data.shape[0]) 37 | ad.layers["local_variability"] = np.random.rand(*mock_data.shape) 38 | return ad 39 | 40 | 41 | @pytest.fixture 42 | def mock_anndata_custom(mock_data: DataFrame): 43 | ad = AnnData(X=mock_data) 44 | ad.obsm["DM_EigenVectors_custom"] = mock_data.iloc[:, :10].copy() 45 | return ad 46 | 47 | 48 | def test_run_density(mock_anndata: AnnData): 49 | run_density(mock_anndata) 50 | assert "mellon_log_density" in mock_anndata.obs.keys() 51 | assert "mellon_log_density_clipped" in mock_anndata.obs.keys() 52 | 53 | 54 | def test_run_density_custom_keys(mock_anndata_custom: AnnData): 55 | run_density(mock_anndata_custom, repr_key="DM_EigenVectors_custom", density_key="custom_key") 56 | assert "custom_key" in mock_anndata_custom.obs.keys() 57 | assert "custom_key_clipped" in mock_anndata_custom.obs.keys() 58 | 59 | 60 | def test_run_low_density_variability(mock_anndata: AnnData): 61 | run_low_density_variability(mock_anndata, density_key="other_density") 62 | for branch in mock_anndata.obsm["branch_masks"].columns: 63 | assert f"low_density_gene_variability_{branch}" in mock_anndata.var.keys() 64 | 65 | 66 | def test_run_density_evaluation(mock_anndata: AnnData, mock_anndata_custom: AnnData): 67 | run_density(mock_anndata) 68 | run_density_evaluation(mock_anndata, mock_anndata_custom, repr_key="DM_EigenVectors_custom") 69 | assert "cross_log_density" in mock_anndata_custom.obs.keys() 70 | assert "cross_log_density_clipped" in mock_anndata_custom.obs.keys() 71 | -------------------------------------------------------------------------------- /tests/test_util_run_pca.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import scanpy as sc 4 | from anndata import AnnData 5 | import numpy as np 6 | 7 | from palantir.utils import run_pca 8 | 9 | 10 | @pytest.fixture 11 | def mock_data(): 12 | n_cells = 50 13 | n_genes = 500 14 | return pd.DataFrame( 15 | np.random.rand(n_cells, n_genes), 16 | columns=[f"gene_{i}" for i in range(n_genes)], 17 | index=[f"cell_{i}" for i in range(n_cells)], 18 | ) 19 | 20 | 21 | @pytest.fixture 22 | def mock_anndata(mock_data): 23 | ad = AnnData(X=mock_data) 24 | ad.obsm["DM_EigenVectors_multiscaled"] = mock_data 25 | ad.var["highly_variable"] = np.random.choice([True, False], size=mock_data.shape[1]) 26 | return ad 27 | 28 | 29 | # Test with DataFrame 30 | def test_run_pca_dataframe(mock_data): 31 | pca_results, var_ratio = run_pca(mock_data, use_hvg=False) 32 | assert isinstance(pca_results, pd.DataFrame) 33 | assert isinstance(var_ratio, np.ndarray) 34 | assert pca_results.shape[1] <= 300 # Check n_components 35 | 36 | 37 | # Test with AnnData 38 | def test_run_pca_anndata(mock_anndata): 39 | pca_results, var_ratio = run_pca(mock_anndata) 40 | assert "X_pca" in mock_anndata.obsm.keys() 41 | assert mock_anndata.obsm["X_pca"].shape[1] <= 300 42 | 43 | 44 | # Test n_components parameter 45 | def test_run_pca_components(mock_data): 46 | pca_results, _ = run_pca(mock_data, n_components=5, use_hvg=False) 47 | assert pca_results.shape[1] == 5 48 | 49 | 50 | # Test use_hvg parameter 51 | def test_run_pca_hvg(mock_anndata): 52 | pca_results, _ = run_pca(mock_anndata, use_hvg=True) 53 | assert pca_results.shape[1] <= 300 54 | 55 | 56 | # Test pca_key parameter 57 | def test_run_pca_pca_key(mock_anndata): 58 | run_pca(mock_anndata, pca_key="custom_key") 59 | assert "custom_key" in mock_anndata.obsm.keys() 60 | assert mock_anndata.obsm["custom_key"].shape[1] <= 300 61 | -------------------------------------------------------------------------------- /tests/test_utils_compute_kernel.py: -------------------------------------------------------------------------------- 1 | from scipy.sparse import find, csr_matrix 2 | import pytest 3 | import pandas as pd 4 | import numpy as np 5 | 6 | from palantir.utils import compute_kernel 7 | 8 | 9 | # Test with DataFrame 10 | def test_compute_kernel_dataframe(mock_data): 11 | kernel = compute_kernel(mock_data) 12 | assert isinstance(kernel, csr_matrix) 13 | 14 | 15 | # Test with AnnData 16 | def test_compute_kernel_anndata(mock_anndata): 17 | kernel = compute_kernel(mock_anndata) 18 | assert "DM_Kernel" in mock_anndata.obsp.keys() 19 | 20 | 21 | # Test knn parameter 22 | def test_compute_kernel_knn(mock_data): 23 | kernel = compute_kernel(mock_data, knn=10) 24 | assert isinstance(kernel, csr_matrix) 25 | 26 | 27 | # Test alpha parameter 28 | def test_compute_kernel_alpha(mock_data): 29 | kernel = compute_kernel(mock_data, alpha=0.5) 30 | assert isinstance(kernel, csr_matrix) 31 | 32 | 33 | # Test pca_key parameter 34 | def test_compute_kernel_pca_key(mock_anndata): 35 | mock_anndata.obsm["custom_pca"] = np.random.rand(mock_anndata.shape[0], 10) 36 | kernel = compute_kernel(mock_anndata, pca_key="custom_pca") 37 | assert "DM_Kernel" in mock_anndata.obsp.keys() 38 | 39 | 40 | # Test kernel_key parameter 41 | def test_compute_kernel_kernel_key(mock_anndata): 42 | kernel = compute_kernel(mock_anndata, kernel_key="custom_kernel") 43 | assert "custom_kernel" in mock_anndata.obsp.keys() 44 | -------------------------------------------------------------------------------- /tests/test_utils_density_functions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | import scanpy as sc 5 | from anndata import AnnData 6 | from unittest.mock import patch, MagicMock 7 | 8 | from palantir.utils import run_low_density_variability, run_density_evaluation 9 | 10 | 11 | @pytest.fixture 12 | def mock_anndata_with_density(mock_anndata): 13 | """Create anndata with density for testing low_density_variability""" 14 | # Add density values 15 | mock_anndata.obs["mellon_log_density"] = np.random.rand(mock_anndata.n_obs) 16 | 17 | # Add local variability 18 | mock_anndata.layers["local_variability"] = np.random.rand( 19 | mock_anndata.n_obs, mock_anndata.n_vars 20 | ) 21 | 22 | # Add branch masks 23 | mock_anndata.obsm["branch_masks"] = pd.DataFrame( 24 | np.random.randint(0, 2, size=(mock_anndata.n_obs, 2)), 25 | columns=["branch1", "branch2"], 26 | index=mock_anndata.obs_names, 27 | ) 28 | 29 | # Also add branch mask in obs 30 | mock_anndata.obs["obs_branch"] = np.random.randint(0, 2, size=mock_anndata.n_obs) 31 | 32 | return mock_anndata 33 | 34 | 35 | def test_run_low_density_variability_with_obsm(mock_anndata_with_density): 36 | """Test run_low_density_variability function with obsm branch masks""" 37 | ad = mock_anndata_with_density 38 | 39 | # Test with default parameters (branch_masks in obsm) 40 | result = run_low_density_variability(ad) 41 | 42 | # Check results 43 | assert result.shape == (ad.n_vars, 2) # 2 branches 44 | assert "low_density_gene_variability_branch1" in ad.var.columns 45 | assert "low_density_gene_variability_branch2" in ad.var.columns 46 | 47 | # Test with custom parameters 48 | result = run_low_density_variability( 49 | ad, 50 | cell_mask="branch_masks", 51 | density_key="mellon_log_density", 52 | localvar_key="local_variability", 53 | score_key="test_prefix", 54 | ) 55 | 56 | assert "test_prefix_branch1" in ad.var.columns 57 | assert "test_prefix_branch2" in ad.var.columns 58 | 59 | 60 | def test_run_low_density_variability_with_obs(mock_anndata_with_density): 61 | """Test run_low_density_variability function with obs column""" 62 | ad = mock_anndata_with_density 63 | 64 | # Test with obs column 65 | result = run_low_density_variability(ad, cell_mask="obs_branch") 66 | 67 | # Check results 68 | assert result.shape == (ad.n_vars, 1) 69 | assert "low_density_gene_variability__obs_branch" in ad.var.columns 70 | 71 | 72 | def test_run_low_density_variability_with_array(mock_anndata_with_density): 73 | """Test run_low_density_variability function with array input""" 74 | ad = mock_anndata_with_density 75 | 76 | # Test with np.array mask 77 | mask = np.zeros(ad.n_obs, dtype=bool) 78 | mask[:10] = True 79 | result = run_low_density_variability(ad, cell_mask=mask) 80 | assert "low_density_gene_variability_" in ad.var.columns 81 | 82 | # Test with list of cell names 83 | cell_list = ad.obs_names[:10].tolist() 84 | result = run_low_density_variability(ad, cell_mask=cell_list) 85 | assert "low_density_gene_variability_" in ad.var.columns 86 | 87 | 88 | def test_run_low_density_variability_errors(mock_anndata_with_density): 89 | """Test error handling in run_low_density_variability""" 90 | ad = mock_anndata_with_density 91 | 92 | # Test missing density key 93 | with pytest.raises(ValueError, match="not_a_key' not found in ad.obs"): 94 | run_low_density_variability(ad, density_key="not_a_key") 95 | 96 | # Test missing layer key 97 | with pytest.raises(ValueError, match="not_a_key' not found in ad.layers"): 98 | run_low_density_variability(ad, localvar_key="not_a_key") 99 | 100 | # Test missing cell_mask key 101 | with pytest.raises(ValueError, match="not_a_key' not found in ad.obsm or ad.obs"): 102 | run_low_density_variability(ad, cell_mask="not_a_key") 103 | 104 | # Test invalid cell_mask type 105 | with pytest.raises(ValueError, match="cell_mask must be either a string key"): 106 | run_low_density_variability(ad, cell_mask=42) # Integer is invalid 107 | 108 | 109 | @patch("mellon.Predictor.from_dict") 110 | def test_run_density_evaluation(mock_predictor_from_dict): 111 | """Test run_density_evaluation function""" 112 | # Create input and output anndata objects 113 | in_ad = AnnData(X=np.random.rand(20, 10)) 114 | out_ad = AnnData(X=np.random.rand(15, 10)) 115 | 116 | # Setup predictor mock 117 | mock_predictor = MagicMock() 118 | mock_predictor.return_value = np.random.rand(15) 119 | mock_predictor_from_dict.return_value = mock_predictor 120 | 121 | # Add required fields 122 | in_ad.uns["mellon_log_density_predictor"] = {"mock": "predictor"} 123 | out_ad.obsm["DM_EigenVectors"] = np.random.rand(15, 5) 124 | 125 | # Run the function 126 | result = run_density_evaluation(in_ad, out_ad) 127 | 128 | # Check results 129 | assert len(result) == 15 130 | assert "cross_log_density" in out_ad.obs.columns 131 | assert "cross_log_density_clipped" in out_ad.obs.columns 132 | 133 | # Verify predictor was called 134 | mock_predictor_from_dict.assert_called_once_with(in_ad.uns["mellon_log_density_predictor"]) 135 | mock_predictor.assert_called_once_with(out_ad.obsm["DM_EigenVectors"]) 136 | 137 | # Test with custom parameters 138 | result = run_density_evaluation( 139 | in_ad, 140 | out_ad, 141 | predictor_key="mellon_log_density_predictor", 142 | repr_key="DM_EigenVectors", 143 | density_key="custom_density", 144 | ) 145 | 146 | assert "custom_density" in out_ad.obs.columns 147 | assert "custom_density_clipped" in out_ad.obs.columns 148 | 149 | 150 | def test_run_density_evaluation_errors(): 151 | """Test error handling in run_density_evaluation""" 152 | # Create input and output anndata objects 153 | in_ad = AnnData(X=np.random.rand(20, 10)) 154 | out_ad = AnnData(X=np.random.rand(15, 10)) 155 | 156 | # Test missing repr_key 157 | with pytest.raises(ValueError, match="'DM_EigenVectors' not found in out_ad.obsm"): 158 | run_density_evaluation(in_ad, out_ad) 159 | 160 | # Add eigenvectors but no predictor 161 | out_ad.obsm["DM_EigenVectors"] = np.random.rand(15, 5) 162 | 163 | # Test missing predictor_key 164 | with pytest.raises(ValueError, match="'mellon_log_density_predictor' not found in in_ad.uns"): 165 | run_density_evaluation(in_ad, out_ad) 166 | -------------------------------------------------------------------------------- /tests/test_utils_determine_multiscale_space.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | import scanpy as sc 5 | from anndata import AnnData 6 | from pandas.testing import assert_frame_equal 7 | 8 | from palantir.utils import determine_multiscale_space 9 | 10 | 11 | def test_determine_multiscale_space_with_dict(mock_dm_res): 12 | """Test determine_multiscale_space with dictionary input""" 13 | # Test with default n_eigs (determined by eigen gap) 14 | result = determine_multiscale_space(mock_dm_res) 15 | assert isinstance(result, pd.DataFrame) 16 | assert result.shape[0] == 50 # Should have 50 cells 17 | # The number of components can vary depending on the generated eigenvalues 18 | 19 | # Test with specific n_eigs 20 | result = determine_multiscale_space(mock_dm_res, n_eigs=3) 21 | assert isinstance(result, pd.DataFrame) 22 | assert result.shape == (50, 2) # Only use 2 eigenvectors (skip first) 23 | 24 | 25 | def test_determine_multiscale_space_with_anndata(mock_anndata): 26 | """Test determine_multiscale_space with AnnData input""" 27 | # Setup eigenvalues with a clear gap for testing auto-selection 28 | n_components = 10 29 | eigvals = np.zeros(n_components) 30 | eigvals[0] = 0.95 # First eigenvalue 31 | eigvals[1] = 0.85 32 | eigvals[2] = 0.75 33 | eigvals[3] = 0.30 # Big gap after this one 34 | eigvals[4:] = np.linspace(0.25, 0.1, n_components - 4) 35 | 36 | # Create eigenvectors 37 | eigvecs = np.random.rand(mock_anndata.n_obs, n_components) 38 | 39 | # Add to mock anndata 40 | mock_anndata.uns["DM_EigenValues"] = eigvals 41 | mock_anndata.obsm["DM_EigenVectors"] = eigvecs 42 | 43 | # Test with AnnData input - both stores in obsm and returns DataFrame 44 | result = determine_multiscale_space(mock_anndata) 45 | assert isinstance(result, pd.DataFrame) # Returns DataFrame for both AnnData and dict input 46 | assert "DM_EigenVectors_multiscaled" in mock_anndata.obsm # Also stores in AnnData 47 | 48 | # Should detect gap and use components after skipping first 49 | scaled_shape = mock_anndata.obsm["DM_EigenVectors_multiscaled"].shape 50 | assert scaled_shape[0] == mock_anndata.n_obs # Number of cells matches 51 | # Number of components can vary based on how the algorithm detects eigen gaps 52 | 53 | 54 | def test_determine_multiscale_space_with_small_gap(mock_anndata): 55 | """Test determine_multiscale_space with small eigen gap""" 56 | # Setup eigenvalues with no clear gap 57 | n_components = 5 58 | eigvals = np.linspace(0.9, 0.5, n_components) 59 | 60 | # Create eigenvectors 61 | eigvecs = np.random.rand(mock_anndata.n_obs, n_components) 62 | 63 | # Add to mock anndata 64 | mock_anndata.uns["DM_EigenValues"] = eigvals 65 | mock_anndata.obsm["DM_EigenVectors"] = eigvecs 66 | 67 | # Test with AnnData input - both stores in obsm and returns DataFrame 68 | result = determine_multiscale_space(mock_anndata) 69 | assert isinstance(result, pd.DataFrame) # Returns DataFrame 70 | assert "DM_EigenVectors_multiscaled" in mock_anndata.obsm # Also stores in AnnData 71 | 72 | # Should fall back to second largest gap 73 | scaled_shape = mock_anndata.obsm["DM_EigenVectors_multiscaled"].shape 74 | assert scaled_shape[0] == mock_anndata.n_obs 75 | -------------------------------------------------------------------------------- /tests/test_utils_diffusion_maps_from_kernel.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | from scipy.sparse import csr_matrix 5 | from scipy.sparse.linalg import eigs 6 | from pytest import approx 7 | 8 | from palantir.utils import diffusion_maps_from_kernel, determine_multiscale_space 9 | 10 | 11 | @pytest.fixture 12 | def mock_kernel(): 13 | size = 50 14 | A = np.random.rand(size, size) 15 | return csr_matrix((A + A.T) / 2) 16 | 17 | 18 | def test_diffusion_maps_basic(mock_kernel): 19 | result = diffusion_maps_from_kernel(mock_kernel) 20 | 21 | assert isinstance(result, dict) 22 | assert "T" in result and "EigenVectors" in result and "EigenValues" in result 23 | 24 | assert result["T"].shape == (50, 50) 25 | assert result["EigenVectors"].shape == (50, 10) 26 | assert result["EigenValues"].shape == (10,) 27 | 28 | 29 | def test_diffusion_maps_n_components(mock_kernel): 30 | result = diffusion_maps_from_kernel(mock_kernel, n_components=5) 31 | 32 | assert result["EigenVectors"].shape == (50, 5) 33 | assert result["EigenValues"].shape == (5,) 34 | 35 | 36 | def test_diffusion_maps_seed(mock_kernel): 37 | result1 = diffusion_maps_from_kernel(mock_kernel, seed=0) 38 | result2 = diffusion_maps_from_kernel(mock_kernel, seed=0) 39 | 40 | # Seed usage should yield the same result 41 | assert np.allclose(result1["EigenValues"], result2["EigenValues"]) 42 | 43 | 44 | def test_diffusion_maps_eigen(mock_kernel): 45 | result = diffusion_maps_from_kernel(mock_kernel) 46 | 47 | T = result["T"].toarray() 48 | e_values, e_vectors = eigs(T, 10, tol=1e-4, maxiter=1000) 49 | 50 | assert np.allclose( 51 | result["EigenValues"], np.real(sorted(e_values, reverse=True)[:10]), atol=1e-4 52 | ) 53 | 54 | 55 | def test_determine_multiscale_space(mock_kernel): 56 | result = diffusion_maps_from_kernel(mock_kernel) 57 | msresult = determine_multiscale_space(result) 58 | assert msresult.shape[0] == result["EigenVectors"].shape[0] 59 | -------------------------------------------------------------------------------- /tests/test_utils_early_cell_functions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | import scanpy as sc 5 | from anndata import AnnData 6 | from unittest.mock import patch, MagicMock 7 | 8 | from palantir.utils import ( 9 | early_cell, 10 | fallback_terminal_cell, 11 | find_terminal_states, 12 | CellNotFoundException, 13 | ) 14 | 15 | 16 | @pytest.fixture 17 | def mock_anndata_with_celltypes(mock_anndata): 18 | """Create anndata with cell types for early_cell and terminal states tests""" 19 | # Add cell types 20 | celltypes = np.array(["A", "B", "C", "A", "B"] * 10) 21 | mock_anndata.obs["celltype"] = pd.Categorical(celltypes) 22 | 23 | # Add multiscale space with one cell type at extremes 24 | eigvecs = mock_anndata.obsm["DM_EigenVectors_multiscaled"].copy() 25 | # Make cell 0 (type A) maximum in component 0 26 | eigvecs[0, 0] = 100.0 27 | # Make cell 4 (type B) minimum in component 1 28 | eigvecs[4, 1] = -100.0 29 | 30 | mock_anndata.obsm["DM_EigenVectors_multiscaled"] = eigvecs 31 | 32 | return mock_anndata 33 | 34 | 35 | def test_early_cell_extreme_max(mock_anndata_with_celltypes): 36 | """Test early_cell finding cell at maximum of component""" 37 | ad = mock_anndata_with_celltypes 38 | 39 | # Test finding a cell of type 'A' - we don't need to know which cell it will be 40 | with patch("palantir.utils._return_cell", return_value="cell_0") as mock_return: 41 | result = early_cell(ad, "A") 42 | assert result == "cell_0" # Just check the mocked return value 43 | mock_return.assert_called_once() 44 | 45 | # Only check the cell type and that it's finding some kind of extreme 46 | args = mock_return.call_args[0] 47 | assert args[2] == "A" # Cell type 48 | assert args[3] in ["max", "min"] # Extreme type (don't care which one) 49 | 50 | 51 | def test_early_cell_extreme_min(mock_anndata_with_celltypes): 52 | """Test early_cell finding cell at minimum of component""" 53 | ad = mock_anndata_with_celltypes 54 | 55 | # Test finding a cell of type 'B' - we don't need to know which cell it will be 56 | with patch("palantir.utils._return_cell", return_value="cell_4") as mock_return: 57 | result = early_cell(ad, "B") 58 | assert result == "cell_4" # Just check the mocked return value 59 | mock_return.assert_called_once() 60 | 61 | # Only check the cell type and that it's finding some kind of extreme 62 | args = mock_return.call_args[0] 63 | assert args[2] == "B" # Cell type 64 | assert args[3] in ["max", "min"] # Extreme type (don't care which one) 65 | 66 | 67 | def test_early_cell_fallback(): 68 | """Test early_cell with fallback to fallback_terminal_cell""" 69 | # Create a very simple AnnData with a cell type that won't be at extremes 70 | ad = AnnData(X=np.random.rand(10, 5)) 71 | ad.obs["celltype"] = pd.Categorical( 72 | ["A", "A", "A", "A", "A", "B", "B", "B", "C", "C"], categories=["A", "B", "C"] 73 | ) 74 | 75 | # Add a fake eigenvectors matrix where no 'B' cells are at extremes 76 | eigvecs = np.zeros((10, 3)) 77 | # Make 'A' cells dominate the extremes 78 | eigvecs[0, 0] = 100 # max in component 0 is cell 0 (type A) 79 | eigvecs[1, 0] = -100 # min in component 0 is cell 1 (type A) 80 | eigvecs[2, 1] = 100 # max in component 1 is cell 2 (type A) 81 | eigvecs[3, 1] = -100 # min in component 1 is cell 3 (type A) 82 | eigvecs[4, 2] = 100 # max in component 2 is cell 4 (type A) 83 | eigvecs[5, 2] = -100 # min in component 2 is cell 5 (type B) 84 | ad.obsm["DM_EigenVectors_multiscaled"] = eigvecs 85 | 86 | # Give the AnnData proper observation names 87 | ad.obs_names = [f"cell_{i}" for i in range(10)] 88 | 89 | # Mock fallback_terminal_cell to avoid actual computation 90 | with patch("palantir.utils.fallback_terminal_cell", return_value="cell_5") as mock_fallback: 91 | # Test early_cell with fallback - it should find no cell in extremes and fall back 92 | result = early_cell(ad, "C", fallback_seed=42) # Cell type C doesn't exist 93 | assert result == "cell_5" 94 | mock_fallback.assert_called_once_with(ad, "C", celltype_column="celltype", seed=42) 95 | 96 | 97 | def test_early_cell_exception(): 98 | """Test early_cell raising exception when no cell found""" 99 | # Create a very simple AnnData with a cell type that won't be at extremes 100 | ad = AnnData(X=np.random.rand(10, 5)) 101 | ad.obs["celltype"] = pd.Categorical( 102 | ["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"], categories=["A", "B"] 103 | ) 104 | 105 | # Add a fake eigenvectors matrix where no 'B' cells are at extremes 106 | eigvecs = np.zeros((10, 3)) 107 | # Make 'A' cells dominate the extremes 108 | eigvecs[0, 0] = 100 # max in component 0 is cell 0 (type A) 109 | eigvecs[1, 0] = -100 # min in component 0 is cell 1 (type A) 110 | eigvecs[2, 1] = 100 # max in component 1 is cell 2 (type A) 111 | eigvecs[3, 1] = -100 # min in component 1 is cell 3 (type A) 112 | eigvecs[4, 2] = 100 # max in component 2 is cell 4 (type A) 113 | eigvecs[0, 2] = -100 # min in component 2 is cell 0 (type A) 114 | ad.obsm["DM_EigenVectors_multiscaled"] = eigvecs 115 | 116 | # Test without fallback_seed - should raise CellNotFoundException 117 | with pytest.raises(CellNotFoundException): 118 | early_cell(ad, "B") 119 | 120 | 121 | @patch("palantir.utils.run_palantir") 122 | def test_fallback_terminal_cell(mock_run_palantir, mock_anndata_with_celltypes): 123 | """Test fallback_terminal_cell with mocked palantir run""" 124 | ad = mock_anndata_with_celltypes 125 | 126 | # Setup mock pseudotime result 127 | mock_result = MagicMock() 128 | pseudotime = pd.Series([0.1, 0.2, 0.3, 0.9, 0.5], index=ad.obs_names[:5]) 129 | mock_result.pseudotime = pseudotime 130 | mock_run_palantir.return_value = mock_result 131 | 132 | # Test fallback_terminal_cell 133 | with patch("palantir.utils.print"): # Suppress print output 134 | result = fallback_terminal_cell(ad, "A", celltype_column="celltype", seed=42) 135 | assert result == ad.obs_names[3] # Should pick cell with max pseudotime 136 | 137 | # Verify run_palantir was called with correct arguments 138 | mock_run_palantir.assert_called_once() 139 | call_args = mock_run_palantir.call_args[0] 140 | assert call_args[0] is ad 141 | # Second arg should be a non-A cell 142 | 143 | 144 | @patch("palantir.utils.early_cell") 145 | def test_find_terminal_states(mock_early_cell, mock_anndata_with_celltypes): 146 | """Test find_terminal_states""" 147 | ad = mock_anndata_with_celltypes 148 | 149 | # Setup mock early_cell behavior 150 | def side_effect(ad, celltype, *args, **kwargs): 151 | if celltype == "A": 152 | return "cell_0" 153 | elif celltype == "B": 154 | return "cell_4" 155 | elif celltype == "C": 156 | raise CellNotFoundException("Test exception") 157 | return None 158 | 159 | mock_early_cell.side_effect = side_effect 160 | 161 | # Test find_terminal_states with a warning for type C 162 | with pytest.warns(UserWarning): 163 | result = find_terminal_states(ad, ["A", "B", "C"], celltype_column="celltype") 164 | 165 | # Check result - should have entries for A and B, but not C 166 | assert isinstance(result, pd.Series) 167 | assert len(result) == 2 168 | assert result["cell_0"] == "A" 169 | assert result["cell_4"] == "B" 170 | assert "cell_C" not in result.index 171 | -------------------------------------------------------------------------------- /tests/test_utils_run_diffusion_maps.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import scanpy as sc 4 | from anndata import AnnData 5 | from scipy.sparse import csr_matrix, issparse 6 | import numpy as np 7 | 8 | from palantir.utils import run_diffusion_maps 9 | 10 | 11 | # Generate mock DataFrame data 12 | def mock_dataframe(rows, cols): 13 | return pd.DataFrame(np.random.rand(rows, cols)) 14 | 15 | 16 | # Generate mock AnnData object 17 | def mock_anndata(rows, cols, keys): 18 | ad = AnnData(np.random.rand(rows, cols)) 19 | for key in keys: 20 | ad.obsm[key] = np.random.rand(rows, cols) 21 | return ad 22 | 23 | 24 | def test_run_diffusion_maps_dataframe(): 25 | df = mock_dataframe(50, 30) 26 | result = run_diffusion_maps(df) 27 | 28 | assert isinstance(result, dict) 29 | assert set(result.keys()) == {"T", "EigenVectors", "EigenValues", "kernel"} 30 | 31 | assert isinstance(result["kernel"], csr_matrix) 32 | assert isinstance(result["T"], csr_matrix) 33 | assert isinstance(result["EigenVectors"], pd.DataFrame) 34 | assert isinstance(result["EigenValues"], pd.Series) 35 | 36 | 37 | def test_run_diffusion_maps_anndata(): 38 | keys = ["X_pca"] 39 | ad = mock_anndata(50, 30, keys) 40 | result = run_diffusion_maps(ad) 41 | 42 | assert "DM_Kernel" in ad.obsp 43 | assert "DM_Similarity" in ad.obsp 44 | assert "DM_EigenVectors" in ad.obsm 45 | assert "DM_EigenValues" in ad.uns 46 | 47 | assert np.array_equal(ad.obsp["DM_Kernel"].toarray(), result["kernel"].toarray()) 48 | assert np.array_equal(ad.obsp["DM_Similarity"].toarray(), result["T"].toarray()) 49 | assert np.array_equal(ad.obsm["DM_EigenVectors"], result["EigenVectors"].values) 50 | assert np.array_equal(ad.uns["DM_EigenValues"], result["EigenValues"]) 51 | 52 | 53 | def test_run_diffusion_maps_exceptions(): 54 | # Test with neither pd.DataFrame nor AnnData 55 | with pytest.raises(ValueError): 56 | run_diffusion_maps("invalid_type") 57 | -------------------------------------------------------------------------------- /tests/test_utils_run_local_variability.py: -------------------------------------------------------------------------------- 1 | import scanpy as sc 2 | from anndata import AnnData 3 | import numpy as np 4 | import pytest 5 | from scipy.sparse import csr_matrix 6 | 7 | from palantir.utils import run_local_variability 8 | 9 | 10 | # Mock data for dense matrix 11 | def mock_anndata_dense(n_cells, n_genes, layer_keys, obsp_keys): 12 | ad = AnnData(np.random.rand(n_cells, n_genes)) 13 | for key in layer_keys: 14 | ad.layers[key] = np.random.rand(n_cells, n_genes) 15 | for key in obsp_keys: 16 | ad.obsp[key] = np.random.rand(n_cells, n_cells) 17 | return ad 18 | 19 | 20 | # Mock data for sparse matrix 21 | def mock_anndata_sparse(n_cells, n_genes, layer_keys, obsp_keys): 22 | ad = AnnData(csr_matrix(np.random.rand(n_cells, n_genes))) 23 | for key in layer_keys: 24 | ad.layers[key] = csr_matrix(np.random.rand(n_cells, n_genes)) 25 | for key in obsp_keys: 26 | ad.obsp[key] = csr_matrix(np.random.rand(n_cells, n_cells)) 27 | return ad 28 | 29 | 30 | # Test with default keys, dense 31 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide") 32 | def test_run_local_variability_default_dense(): 33 | ad = mock_anndata_dense(50, 20, ["MAGIC_imputed_data"], ["distances"]) 34 | _test_run_local_variability(ad) 35 | 36 | 37 | # Test with default keys, sparse 38 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide") 39 | def test_run_local_variability_default_sparse(): 40 | ad = mock_anndata_sparse(50, 20, ["MAGIC_imputed_data"], ["distances"]) 41 | _test_run_local_variability(ad) 42 | 43 | 44 | # Test with custom keys, dense 45 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide") 46 | def test_run_local_variability_custom_keys_dense(): 47 | ad = mock_anndata_dense(50, 20, ["custom_expression"], ["custom_distances"]) 48 | _test_run_local_variability(ad, "custom_expression", "custom_distances", "custom_local_var") 49 | 50 | 51 | # Test with custom keys, sparse 52 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide") 53 | def test_run_local_variability_custom_keys_sparse(): 54 | ad = mock_anndata_sparse(50, 20, ["custom_expression"], ["custom_distances"]) 55 | _test_run_local_variability(ad, "custom_expression", "custom_distances", "custom_local_var") 56 | 57 | 58 | # Helper function for assertions 59 | def _test_run_local_variability( 60 | ad, 61 | expression_key="MAGIC_imputed_data", 62 | distances_key="distances", 63 | localvar_key="local_variability", 64 | ): 65 | result = run_local_variability(ad, expression_key, distances_key, localvar_key) 66 | 67 | assert localvar_key in ad.layers 68 | assert isinstance(result, np.ndarray) or isinstance(result, csr_matrix) 69 | assert result.shape == (50, 20) 70 | 71 | 72 | # Test missing keys 73 | def test_run_local_variability_missing_keys(): 74 | ad = mock_anndata_dense(50, 20, ["MAGIC_imputed_data"], ["distances"]) 75 | 76 | with pytest.raises(KeyError): 77 | run_local_variability(ad, "missing_expression", "distances") 78 | 79 | with pytest.raises(KeyError): 80 | run_local_variability(ad, "MAGIC_imputed_data", "missing_distances") 81 | -------------------------------------------------------------------------------- /tests/test_utils_run_magic_imputation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | import scanpy as sc 5 | from anndata import AnnData 6 | from scipy.sparse import csr_matrix 7 | from anndata import AnnData 8 | 9 | from palantir.utils import run_magic_imputation 10 | 11 | 12 | @pytest.fixture 13 | def mock_dm_res(): 14 | return {"T": csr_matrix(np.random.rand(50, 50))} 15 | 16 | 17 | # Test with numpy ndarray 18 | def test_run_magic_imputation_ndarray(mock_dm_res): 19 | data = np.random.rand(50, 20) 20 | # With default sparse=True 21 | result = run_magic_imputation(data, dm_res=mock_dm_res) 22 | assert isinstance(result, csr_matrix) 23 | # With sparse=False 24 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False) 25 | assert isinstance(result, np.ndarray) 26 | 27 | 28 | # Test with pandas DataFrame 29 | def test_run_magic_imputation_dataframe(mock_dm_res): 30 | data = pd.DataFrame(np.random.rand(50, 20)) 31 | # With default sparse=True, converts to DataFrame 32 | result = run_magic_imputation(data, dm_res=mock_dm_res) 33 | assert isinstance(result, pd.DataFrame) 34 | # With sparse=False 35 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False) 36 | assert isinstance(result, pd.DataFrame) 37 | 38 | 39 | # Test with csr_matrix 40 | def test_run_magic_imputation_csr(mock_dm_res): 41 | data = csr_matrix(np.random.rand(50, 20)) 42 | # With default sparse=True 43 | result = run_magic_imputation(data, dm_res=mock_dm_res) 44 | assert isinstance(result, csr_matrix) 45 | # With sparse=False 46 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False) 47 | assert isinstance(result, np.ndarray) 48 | 49 | 50 | # Test with AnnData 51 | def test_run_magic_imputation_anndata(): 52 | data = AnnData(np.random.rand(50, 20)) 53 | data.obsp["DM_Similarity"] = np.random.rand(50, 50) 54 | # With default sparse=True 55 | result = run_magic_imputation(data) 56 | assert "MAGIC_imputed_data" in data.layers 57 | assert isinstance(result, csr_matrix) 58 | 59 | # With sparse=False 60 | data2 = AnnData(np.random.rand(50, 20)) 61 | data2.obsp["DM_Similarity"] = np.random.rand(50, 50) 62 | result2 = run_magic_imputation(data2, sparse=False) 63 | assert "MAGIC_imputed_data" in data2.layers 64 | assert isinstance(result2, np.ndarray) 65 | 66 | 67 | # Test with AnnData and custom keys 68 | def test_run_magic_imputation_anndata_custom_keys(): 69 | data = AnnData(np.random.rand(50, 20)) 70 | data.layers["custom_expr"] = np.random.rand(50, 20) 71 | data.obsp["custom_sim"] = np.random.rand(50, 50) 72 | result = run_magic_imputation( 73 | data, 74 | expression_key="custom_expr", 75 | sim_key="custom_sim", 76 | imputation_key="custom_imp", 77 | ) 78 | assert "custom_imp" in data.layers 79 | assert isinstance(result, csr_matrix) 80 | 81 | # With sparse=False 82 | data2 = AnnData(np.random.rand(50, 20)) 83 | data2.layers["custom_expr"] = np.random.rand(50, 20) 84 | data2.obsp["custom_sim"] = np.random.rand(50, 50) 85 | result2 = run_magic_imputation( 86 | data2, 87 | expression_key="custom_expr", 88 | sim_key="custom_sim", 89 | imputation_key="custom_imp2", 90 | sparse=False, 91 | ) 92 | assert "custom_imp2" in data2.layers 93 | assert isinstance(result2, np.ndarray) 94 | 95 | 96 | # Test with missing dm_res and not AnnData 97 | def test_run_magic_imputation_missing_dm_res(): 98 | data = np.random.rand(50, 20) 99 | with pytest.raises(ValueError): 100 | run_magic_imputation(data) 101 | 102 | 103 | # Test with missing expression_key in AnnData 104 | def test_run_magic_imputation_missing_expression_key(): 105 | data = AnnData(np.random.rand(50, 20)) 106 | data.obsp["DM_Similarity"] = np.random.rand(50, 50) 107 | with pytest.raises(ValueError): 108 | run_magic_imputation(data, expression_key="missing_key") 109 | -------------------------------------------------------------------------------- /tests/test_validation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import numpy as np 4 | import scanpy as sc 5 | from anndata import AnnData 6 | from pandas.testing import assert_frame_equal, assert_series_equal 7 | from anndata import AnnData 8 | 9 | from palantir.validation import ( 10 | _validate_obsm_key, 11 | _validate_varm_key, 12 | _validate_gene_trend_input, 13 | ) 14 | 15 | 16 | @pytest.fixture 17 | def mock_anndata_with_obsm(): 18 | """Create anndata with obsm for testing validation functions""" 19 | n_cells = 20 20 | n_genes = 10 21 | ad = AnnData(X=np.random.rand(n_cells, n_genes)) 22 | 23 | # Add DataFrame in obsm 24 | ad.obsm["df_key"] = pd.DataFrame( 25 | np.random.rand(n_cells, 3), columns=["c1", "c2", "c3"], index=ad.obs_names 26 | ) 27 | 28 | # Add numpy array in obsm with column names in uns 29 | ad.obsm["np_key"] = np.random.rand(n_cells, 3) 30 | ad.uns["np_key_columns"] = ["c1", "c2", "c3"] 31 | 32 | return ad 33 | 34 | 35 | @pytest.fixture 36 | def mock_anndata_with_varm(): 37 | """Create anndata with varm for testing validation functions""" 38 | n_cells = 20 39 | n_genes = 10 40 | ad = AnnData(X=np.random.rand(n_cells, n_genes)) 41 | 42 | # Add DataFrame in varm 43 | ad.varm["df_key"] = pd.DataFrame( 44 | np.random.rand(n_genes, 5), columns=[0.1, 0.2, 0.3, 0.4, 0.5], index=ad.var_names 45 | ) 46 | 47 | # Add numpy array in varm with pseudotime in uns 48 | ad.varm["np_key"] = np.random.rand(n_genes, 5) 49 | ad.uns["np_key_pseudotime"] = [0.1, 0.2, 0.3, 0.4, 0.5] 50 | 51 | return ad 52 | 53 | 54 | @pytest.fixture 55 | def mock_anndata_with_gene_trends(): 56 | """Create anndata with gene trends for testing validation functions""" 57 | n_cells = 20 58 | n_genes = 10 59 | ad = AnnData(X=np.random.rand(n_cells, n_genes)) 60 | 61 | # Add branch masks in various locations 62 | # 1. as DataFrame in obsm 63 | ad.obsm["branch_masks"] = pd.DataFrame( 64 | np.random.randint(0, 2, size=(n_cells, 3)), 65 | columns=["branch1", "branch2", "branch3"], 66 | index=ad.obs_names, 67 | ) 68 | 69 | # 2. as list in uns 70 | ad.uns["branch_list"] = ["branch1", "branch2", "branch3"] 71 | 72 | # 3. as numpy array with columns in uns 73 | ad.obsm["branch_array"] = np.random.randint(0, 2, size=(n_cells, 3)) 74 | ad.uns["branch_array_columns"] = ["branch1", "branch2", "branch3"] 75 | 76 | # Add gene trends for each branch 77 | for branch in ["branch1", "branch2", "branch3"]: 78 | trend_key = f"gene_trends_{branch}" 79 | ad.varm[trend_key] = pd.DataFrame( 80 | np.random.rand(n_genes, 5), columns=[0.1, 0.2, 0.3, 0.4, 0.5], index=ad.var_names 81 | ) 82 | 83 | return ad 84 | 85 | 86 | def test_validate_obsm_key_with_df(mock_anndata_with_obsm): 87 | """Test _validate_obsm_key with DataFrame input""" 88 | ad = mock_anndata_with_obsm 89 | 90 | # Test DataFrame as_df=True (default) 91 | data, data_names = _validate_obsm_key(ad, "df_key") 92 | assert isinstance(data, pd.DataFrame) 93 | assert data.shape == (ad.n_obs, 3) 94 | assert list(data_names) == ["c1", "c2", "c3"] 95 | 96 | # Test DataFrame as_df=False 97 | data, data_names = _validate_obsm_key(ad, "df_key", as_df=False) 98 | assert isinstance(data, np.ndarray) 99 | assert data.shape == (ad.n_obs, 3) 100 | assert list(data_names) == ["c1", "c2", "c3"] 101 | 102 | 103 | def test_validate_obsm_key_with_array(mock_anndata_with_obsm): 104 | """Test _validate_obsm_key with numpy array input""" 105 | ad = mock_anndata_with_obsm 106 | 107 | # Test numpy array as_df=True 108 | data, data_names = _validate_obsm_key(ad, "np_key") 109 | assert isinstance(data, pd.DataFrame) 110 | assert data.shape == (ad.n_obs, 3) 111 | assert list(data_names) == ["c1", "c2", "c3"] 112 | 113 | # Test numpy array as_df=False 114 | data, data_names = _validate_obsm_key(ad, "np_key", as_df=False) 115 | assert isinstance(data, np.ndarray) 116 | assert data.shape == (ad.n_obs, 3) 117 | assert list(data_names) == ["c1", "c2", "c3"] 118 | 119 | 120 | def test_validate_obsm_key_errors(mock_anndata_with_obsm): 121 | """Test _validate_obsm_key error handling""" 122 | ad = mock_anndata_with_obsm 123 | 124 | # Test key not in obsm 125 | with pytest.raises(KeyError, match="not_a_key not found in ad.obsm"): 126 | _validate_obsm_key(ad, "not_a_key") 127 | 128 | # Test numpy array without columns in uns 129 | ad.obsm["bad_key"] = np.random.rand(ad.n_obs, 3) 130 | with pytest.raises(KeyError, match="bad_key_columns not found"): 131 | _validate_obsm_key(ad, "bad_key") 132 | 133 | 134 | def test_validate_varm_key_with_df(mock_anndata_with_varm): 135 | """Test _validate_varm_key with DataFrame input""" 136 | ad = mock_anndata_with_varm 137 | 138 | # Test DataFrame as_df=True (default) 139 | data, data_names = _validate_varm_key(ad, "df_key") 140 | assert isinstance(data, pd.DataFrame) 141 | assert data.shape == (ad.n_vars, 5) 142 | assert list(data_names) == [0.1, 0.2, 0.3, 0.4, 0.5] 143 | 144 | # Test DataFrame as_df=False 145 | data, data_names = _validate_varm_key(ad, "df_key", as_df=False) 146 | assert isinstance(data, np.ndarray) 147 | assert data.shape == (ad.n_vars, 5) 148 | assert list(data_names) == [0.1, 0.2, 0.3, 0.4, 0.5] 149 | 150 | 151 | def test_validate_varm_key_with_array(mock_anndata_with_varm): 152 | """Test _validate_varm_key with numpy array input""" 153 | ad = mock_anndata_with_varm 154 | 155 | # Test numpy array as_df=True 156 | data, data_names = _validate_varm_key(ad, "np_key") 157 | assert isinstance(data, pd.DataFrame) 158 | assert data.shape == (ad.n_vars, 5) 159 | assert np.allclose(data_names, [0.1, 0.2, 0.3, 0.4, 0.5]) 160 | 161 | # Test numpy array as_df=False 162 | data, data_names = _validate_varm_key(ad, "np_key", as_df=False) 163 | assert isinstance(data, np.ndarray) 164 | assert data.shape == (ad.n_vars, 5) 165 | assert np.allclose(data_names, [0.1, 0.2, 0.3, 0.4, 0.5]) 166 | 167 | 168 | def test_validate_varm_key_errors(mock_anndata_with_varm): 169 | """Test _validate_varm_key error handling""" 170 | ad = mock_anndata_with_varm 171 | 172 | # Test key not in varm 173 | with pytest.raises(KeyError, match="not_a_key not found in ad.varm"): 174 | _validate_varm_key(ad, "not_a_key") 175 | 176 | # Test numpy array without pseudotime in uns 177 | ad.varm["bad_key"] = np.random.rand(ad.n_vars, 3) 178 | with pytest.raises(KeyError, match="bad_key_pseudotime not found"): 179 | _validate_varm_key(ad, "bad_key") 180 | 181 | 182 | def test_validate_gene_trend_input_anndata(mock_anndata_with_gene_trends): 183 | """Test _validate_gene_trend_input with AnnData input""" 184 | ad = mock_anndata_with_gene_trends 185 | 186 | # Test with default parameters (branch_masks in obsm) 187 | gene_trends = _validate_gene_trend_input(ad) 188 | assert isinstance(gene_trends, dict) 189 | assert len(gene_trends) == 3 190 | assert "branch1" in gene_trends 191 | assert "branch2" in gene_trends 192 | assert "branch3" in gene_trends 193 | 194 | # Test with branch_names as a string key in uns 195 | gene_trends = _validate_gene_trend_input(ad, branch_names="branch_list") 196 | assert isinstance(gene_trends, dict) 197 | assert len(gene_trends) == 3 198 | 199 | # Test with branch_names as a key in obsm with DataFrame 200 | gene_trends = _validate_gene_trend_input(ad, branch_names="branch_masks") 201 | assert isinstance(gene_trends, dict) 202 | assert len(gene_trends) == 3 203 | 204 | # Test with branch_names as a key with columns in uns 205 | gene_trends = _validate_gene_trend_input(ad, branch_names="branch_array") 206 | assert isinstance(gene_trends, dict) 207 | assert len(gene_trends) == 3 208 | 209 | 210 | def test_validate_gene_trend_input_dict(): 211 | """Test _validate_gene_trend_input with dict input""" 212 | # Create test dictionary 213 | trends1 = pd.DataFrame(np.random.rand(10, 5), columns=[0.1, 0.2, 0.3, 0.4, 0.5]) 214 | trends2 = pd.DataFrame(np.random.rand(10, 5), columns=[0.1, 0.2, 0.3, 0.4, 0.5]) 215 | 216 | input_dict = {"branch1": {"trends": trends1}, "branch2": {"trends": trends2}} 217 | 218 | gene_trends = _validate_gene_trend_input(input_dict) 219 | assert gene_trends is input_dict # Should return the same dict 220 | 221 | 222 | def test_validate_gene_trend_input_errors(mock_anndata_with_gene_trends): 223 | """Test _validate_gene_trend_input error handling""" 224 | ad = mock_anndata_with_gene_trends 225 | 226 | # Test invalid branch_names key 227 | with pytest.raises(KeyError, match="not_a_key.*not found"): 228 | _validate_gene_trend_input(ad, branch_names="not_a_key") 229 | 230 | # Test invalid data type 231 | with pytest.raises(ValueError, match="must be an instance of either AnnData"): 232 | _validate_gene_trend_input([1, 2, 3]) # List is not valid input 233 | -------------------------------------------------------------------------------- /tests/utils_compute_kernel.py: -------------------------------------------------------------------------------- 1 | from scipy.sparse import find, csr_matrix 2 | import pytest 3 | import pandas as pd 4 | import scanpy as sc 5 | import numpy as np 6 | 7 | from palantir.utils import compute_kernel 8 | 9 | 10 | @pytest.fixture 11 | def mock_data(): 12 | n_cells = 50 13 | n_genes = 10 14 | return pd.DataFrame( 15 | np.random.rand(n_cells, n_genes), 16 | columns=[f"gene_{i}" for i in range(n_genes)], 17 | index=[f"cell_{i}" for i in range(n_cells)], 18 | ) 19 | 20 | 21 | @pytest.fixture 22 | def mock_anndata(mock_data): 23 | ad = sc.AnnData(X=mock_data) 24 | ad.obsm["X_pca"] = mock_data 25 | return ad 26 | 27 | 28 | # Test with DataFrame 29 | def test_compute_kernel_dataframe(mock_data): 30 | kernel = compute_kernel(mock_data) 31 | assert isinstance(kernel, csr_matrix) 32 | 33 | 34 | # Test with AnnData 35 | def test_compute_kernel_anndata(mock_anndata): 36 | kernel = compute_kernel(mock_anndata) 37 | assert "DM_Kernel" in mock_anndata.obsp.keys() 38 | 39 | 40 | # Test knn parameter 41 | def test_compute_kernel_knn(mock_data): 42 | kernel = compute_kernel(mock_data, knn=10) 43 | assert isinstance(kernel, csr_matrix) 44 | 45 | 46 | # Test alpha parameter 47 | def test_compute_kernel_alpha(mock_data): 48 | kernel = compute_kernel(mock_data, alpha=0.5) 49 | assert isinstance(kernel, csr_matrix) 50 | 51 | 52 | # Test pca_key parameter 53 | def test_compute_kernel_pca_key(mock_anndata): 54 | mock_anndata.obsm["custom_pca"] = np.random.rand(mock_anndata.shape[0], 10) 55 | kernel = compute_kernel(mock_anndata, pca_key="custom_pca") 56 | assert "DM_Kernel" in mock_anndata.obsp.keys() 57 | 58 | 59 | # Test kernel_key parameter 60 | def test_compute_kernel_kernel_key(mock_anndata): 61 | kernel = compute_kernel(mock_anndata, kernel_key="custom_kernel") 62 | assert "custom_kernel" in mock_anndata.obsp.keys() 63 | -------------------------------------------------------------------------------- /tests/utils_diffusion_maps_from_kernel.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from scipy.sparse import csr_matrix 4 | from scipy.sparse.linalg import eigs 5 | from pytest import approx 6 | 7 | from palantir.utils import diffusion_maps_from_kernel, determine_multiscale_space 8 | 9 | 10 | @pytest.fixture 11 | def mock_kernel(): 12 | size = 50 13 | A = np.random.rand(size, size) 14 | return csr_matrix((A + A.T) / 2) 15 | 16 | 17 | def test_diffusion_maps_basic(mock_kernel): 18 | result = diffusion_maps_from_kernel(mock_kernel) 19 | 20 | assert isinstance(result, dict) 21 | assert "T" in result and "EigenVectors" in result and "EigenValues" in result 22 | 23 | assert result["T"].shape == (50, 50) 24 | assert result["EigenVectors"].shape == (50, 10) 25 | assert result["EigenValues"].shape == (10,) 26 | 27 | 28 | def test_diffusion_maps_n_components(mock_kernel): 29 | result = diffusion_maps_from_kernel(mock_kernel, n_components=5) 30 | 31 | assert result["EigenVectors"].shape == (50, 5) 32 | assert result["EigenValues"].shape == (5,) 33 | 34 | 35 | def test_diffusion_maps_seed(mock_kernel): 36 | result1 = diffusion_maps_from_kernel(mock_kernel, seed=0) 37 | result2 = diffusion_maps_from_kernel(mock_kernel, seed=0) 38 | 39 | # Seed usage should yield the same result 40 | assert np.allclose(result1["EigenValues"], result2["EigenValues"]) 41 | 42 | 43 | def test_diffusion_maps_eigen(mock_kernel): 44 | result = diffusion_maps_from_kernel(mock_kernel) 45 | 46 | T = result["T"].toarray() 47 | e_values, e_vectors = eigs(T, 10, tol=1e-4, maxiter=1000) 48 | 49 | assert np.allclose( 50 | result["EigenValues"], np.real(sorted(e_values, reverse=True)[:10]), atol=1e-4 51 | ) 52 | 53 | 54 | def test_determine_multiscale_space(mock_kernel): 55 | result = diffusion_maps_from_kernel(mock_kernel) 56 | msresult = determine_multiscale_space(result) 57 | assert msresult.shape[0] == result["EigenVectors"].shape[0] 58 | -------------------------------------------------------------------------------- /tests/utils_run_diffusion_maps.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import scanpy as sc 4 | from scipy.sparse import csr_matrix, issparse 5 | import numpy as np 6 | 7 | from palantir.utils import run_diffusion_maps 8 | 9 | 10 | # Generate mock DataFrame data 11 | def mock_dataframe(rows, cols): 12 | return pd.DataFrame(np.random.rand(rows, cols)) 13 | 14 | 15 | # Generate mock sc.AnnData object 16 | def mock_anndata(rows, cols, keys): 17 | ad = sc.AnnData(np.random.rand(rows, cols)) 18 | for key in keys: 19 | ad.obsm[key] = np.random.rand(rows, cols) 20 | return ad 21 | 22 | 23 | def test_run_diffusion_maps_dataframe(): 24 | df = mock_dataframe(50, 30) 25 | result = run_diffusion_maps(df) 26 | 27 | assert isinstance(result, dict) 28 | assert set(result.keys()) == {"T", "EigenVectors", "EigenValues", "kernel"} 29 | 30 | assert isinstance(result["kernel"], csr_matrix) 31 | assert isinstance(result["T"], csr_matrix) 32 | assert isinstance(result["EigenVectors"], pd.DataFrame) 33 | assert isinstance(result["EigenValues"], pd.Series) 34 | 35 | 36 | def test_run_diffusion_maps_anndata(): 37 | keys = ["X_pca"] 38 | ad = mock_anndata(50, 30, keys) 39 | result = run_diffusion_maps(ad) 40 | 41 | assert "DM_Kernel" in ad.obsp 42 | assert "DM_Similarity" in ad.obsp 43 | assert "DM_EigenVectors" in ad.obsm 44 | assert "DM_EigenValues" in ad.uns 45 | 46 | assert np.array_equal(ad.obsp["DM_Kernel"].toarray(), result["kernel"].toarray()) 47 | assert np.array_equal(ad.obsp["DM_Similarity"].toarray(), result["T"].toarray()) 48 | assert np.array_equal(ad.obsm["DM_EigenVectors"], result["EigenVectors"].values) 49 | assert np.array_equal(ad.uns["DM_EigenValues"], result["EigenValues"]) 50 | 51 | 52 | def test_run_diffusion_maps_exceptions(): 53 | # Test with neither pd.DataFrame nor sc.AnnData 54 | with pytest.raises(ValueError): 55 | run_diffusion_maps("invalid_type") 56 | -------------------------------------------------------------------------------- /tests/utils_run_local_variability.py: -------------------------------------------------------------------------------- 1 | import scanpy as sc 2 | import numpy as np 3 | import pytest 4 | from scipy.sparse import csr_matrix 5 | 6 | from palantir.utils import run_local_variability 7 | 8 | 9 | # Mock data for dense matrix 10 | def mock_anndata_dense(n_cells, n_genes, layer_keys, obsp_keys): 11 | ad = sc.AnnData(np.random.rand(n_cells, n_genes)) 12 | for key in layer_keys: 13 | ad.layers[key] = np.random.rand(n_cells, n_genes) 14 | for key in obsp_keys: 15 | ad.obsp[key] = np.random.rand(n_cells, n_cells) 16 | return ad 17 | 18 | 19 | # Mock data for sparse matrix 20 | def mock_anndata_sparse(n_cells, n_genes, layer_keys, obsp_keys): 21 | ad = sc.AnnData(csr_matrix(np.random.rand(n_cells, n_genes))) 22 | for key in layer_keys: 23 | ad.layers[key] = csr_matrix(np.random.rand(n_cells, n_genes)) 24 | for key in obsp_keys: 25 | ad.obsp[key] = csr_matrix(np.random.rand(n_cells, n_cells)) 26 | return ad 27 | 28 | 29 | # Test with default keys, dense 30 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide") 31 | def test_run_local_variability_default_dense(): 32 | ad = mock_anndata_dense(50, 20, ["MAGIC_imputed_data"], ["distances"]) 33 | _test_run_local_variability(ad) 34 | 35 | 36 | # Test with default keys, sparse 37 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide") 38 | def test_run_local_variability_default_sparse(): 39 | ad = mock_anndata_sparse(50, 20, ["MAGIC_imputed_data"], ["distances"]) 40 | _test_run_local_variability(ad) 41 | 42 | 43 | # Test with custom keys, dense 44 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide") 45 | def test_run_local_variability_custom_keys_dense(): 46 | ad = mock_anndata_dense(50, 20, ["custom_expression"], ["custom_distances"]) 47 | _test_run_local_variability( 48 | ad, "custom_expression", "custom_distances", "custom_local_var" 49 | ) 50 | 51 | 52 | # Test with custom keys, sparse 53 | @pytest.mark.filterwarnings("ignore:invalid value encountered in divide") 54 | def test_run_local_variability_custom_keys_sparse(): 55 | ad = mock_anndata_sparse(50, 20, ["custom_expression"], ["custom_distances"]) 56 | _test_run_local_variability( 57 | ad, "custom_expression", "custom_distances", "custom_local_var" 58 | ) 59 | 60 | 61 | # Helper function for assertions 62 | def _test_run_local_variability( 63 | ad, 64 | expression_key="MAGIC_imputed_data", 65 | distances_key="distances", 66 | localvar_key="local_variability", 67 | ): 68 | result = run_local_variability(ad, expression_key, distances_key, localvar_key) 69 | 70 | assert localvar_key in ad.layers 71 | assert isinstance(result, np.ndarray) or isinstance(result, csr_matrix) 72 | assert result.shape == (50, 20) 73 | 74 | 75 | # Test missing keys 76 | def test_run_local_variability_missing_keys(): 77 | ad = mock_anndata_dense(50, 20, ["MAGIC_imputed_data"], ["distances"]) 78 | 79 | with pytest.raises(KeyError): 80 | run_local_variability(ad, "missing_expression", "distances") 81 | 82 | with pytest.raises(KeyError): 83 | run_local_variability(ad, "MAGIC_imputed_data", "missing_distances") 84 | -------------------------------------------------------------------------------- /tests/utils_run_magic_imputation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | import scanpy as sc 5 | from scipy.sparse import csr_matrix 6 | 7 | from palantir.utils import run_magic_imputation 8 | 9 | 10 | @pytest.fixture 11 | def mock_dm_res(): 12 | return {"T": csr_matrix(np.random.rand(50, 50))} 13 | 14 | 15 | # Test with numpy ndarray 16 | def test_run_magic_imputation_ndarray(mock_dm_res): 17 | data = np.random.rand(50, 20) 18 | result = run_magic_imputation(data, dm_res=mock_dm_res) 19 | assert isinstance(result, csr_matrix) 20 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False) 21 | assert isinstance(result, np.ndarray) 22 | 23 | 24 | # Test with pandas DataFrame 25 | def test_run_magic_imputation_dataframe(mock_dm_res): 26 | data = pd.DataFrame(np.random.rand(50, 20)) 27 | result = run_magic_imputation(data, dm_res=mock_dm_res) 28 | assert isinstance(result, pd.DataFrame) 29 | 30 | 31 | # Test with csr_matrix 32 | def test_run_magic_imputation_csr(mock_dm_res): 33 | data = csr_matrix(np.random.rand(50, 20)) 34 | result = run_magic_imputation(data, dm_res=mock_dm_res) 35 | assert isinstance(result, csr_matrix) 36 | result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False) 37 | assert isinstance(result, np.ndarray) 38 | 39 | 40 | # Test with AnnData 41 | def test_run_magic_imputation_anndata(): 42 | data = sc.AnnData(np.random.rand(50, 20)) 43 | data.obsp["DM_Similarity"] = np.random.rand(50, 50) 44 | result = run_magic_imputation(data) 45 | assert "MAGIC_imputed_data" in data.layers 46 | assert isinstance(result, csr_matrix) 47 | 48 | 49 | # Test with AnnData and custom keys 50 | def test_run_magic_imputation_anndata_custom_keys(): 51 | data = sc.AnnData(np.random.rand(50, 20)) 52 | data.layers["custom_expr"] = np.random.rand(50, 20) 53 | data.obsp["custom_sim"] = np.random.rand(50, 50) 54 | result = run_magic_imputation( 55 | data, 56 | expression_key="custom_expr", 57 | sim_key="custom_sim", 58 | imputation_key="custom_imp", 59 | ) 60 | assert "custom_imp" in data.layers 61 | 62 | 63 | # Test with missing dm_res and not AnnData 64 | def test_run_magic_imputation_missing_dm_res(): 65 | data = np.random.rand(50, 20) 66 | with pytest.raises(ValueError): 67 | run_magic_imputation(data) 68 | 69 | 70 | # Test with missing expression_key in AnnData 71 | def test_run_magic_imputation_missing_expression_key(): 72 | data = sc.AnnData(np.random.rand(50, 20)) 73 | data.obsp["DM_Similarity"] = np.random.rand(50, 50) 74 | with pytest.raises(ValueError): 75 | run_magic_imputation(data, expression_key="missing_key") 76 | -------------------------------------------------------------------------------- /tests/utils_run_pca.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | import scanpy as sc 4 | import numpy as np 5 | 6 | from palantir.utils import run_pca 7 | 8 | 9 | @pytest.fixture 10 | def mock_data(): 11 | n_cells = 50 12 | n_genes = 500 13 | return pd.DataFrame( 14 | np.random.rand(n_cells, n_genes), 15 | columns=[f"gene_{i}" for i in range(n_genes)], 16 | index=[f"cell_{i}" for i in range(n_cells)], 17 | ) 18 | 19 | 20 | @pytest.fixture 21 | def mock_anndata(mock_data): 22 | ad = sc.AnnData(X=mock_data) 23 | ad.obsm["DM_EigenVectors_multiscaled"] = mock_data 24 | ad.var["highly_variable"] = np.random.choice([True, False], size=mock_data.shape[1]) 25 | return ad 26 | 27 | 28 | # Test with DataFrame 29 | def test_run_pca_dataframe(mock_data): 30 | pca_results, var_ratio = run_pca(mock_data, use_hvg=False) 31 | assert isinstance(pca_results, pd.DataFrame) 32 | assert isinstance(var_ratio, np.ndarray) 33 | assert pca_results.shape[1] <= 300 # Check n_components 34 | 35 | 36 | # Test with AnnData 37 | def test_run_pca_anndata(mock_anndata): 38 | pca_results, var_ratio = run_pca(mock_anndata) 39 | assert "X_pca" in mock_anndata.obsm.keys() 40 | assert mock_anndata.obsm["X_pca"].shape[1] <= 300 41 | 42 | 43 | # Test n_components parameter 44 | def test_run_pca_components(mock_data): 45 | pca_results, _ = run_pca(mock_data, n_components=5, use_hvg=False) 46 | assert pca_results.shape[1] == 5 47 | 48 | 49 | # Test use_hvg parameter 50 | def test_run_pca_hvg(mock_anndata): 51 | pca_results, _ = run_pca(mock_anndata, use_hvg=True) 52 | assert pca_results.shape[1] <= 300 53 | 54 | 55 | # Test pca_key parameter 56 | def test_run_pca_pca_key(mock_anndata): 57 | run_pca(mock_anndata, pca_key="custom_key") 58 | assert "custom_key" in mock_anndata.obsm.keys() 59 | assert mock_anndata.obsm["custom_key"].shape[1] <= 300 60 | --------------------------------------------------------------------------------