├── .github └── Figures │ ├── Fig1_v10.png │ ├── Fig2_v7.png │ ├── Figure1.png │ ├── Figure2.png │ ├── archive │ ├── Fig1_v9.png │ ├── model_comparison_example_data_grouped.png │ ├── model_comparison_plots │ │ ├── model_comparison_logfold_confint_extended.png │ │ └── model_comparison_replicates_confint_extended.png │ ├── overall_benchmark_plots │ │ ├── 9_heatmaps_concept_fig.png │ │ ├── absolute_increase_lines_concept_fig.png │ │ ├── log_fold_increase_lines_concept_fig.png │ │ └── negative_heatmaps.png │ └── threshold_determination_plots │ │ ├── threshold_determination_fdr.png │ │ ├── threshold_determination_mcc.png │ │ ├── threshold_determination_tnr.png │ │ ├── threshold_determination_tpr.png │ │ └── threshold_function_optimal_mcc.png │ ├── covariate_structure.png │ └── data_structure.png ├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── LICENSE ├── README.md ├── docs ├── Makefile ├── make.bat ├── requires.txt └── source │ ├── Data_import_and_visualization.ipynb │ ├── Modeling_options_and_result_analysis.ipynb │ ├── _ext │ └── edit_on_github.py │ ├── _static │ ├── colab_badge.svg │ └── nbviewer_badge.svg │ ├── _templates │ ├── Data_import_and_visualization.ipynb │ ├── Modeling_options_and_result_analysis.ipynb │ ├── autosummary │ │ ├── base.rst │ │ └── class.rst │ ├── getting_started.ipynb │ └── using_other_compositional_methods.ipynb │ ├── api.rst │ ├── compositional_data.rst │ ├── conf.py │ ├── data.rst │ ├── getting_started.ipynb │ ├── index.rst │ ├── installation.rst │ ├── models.rst │ ├── sccoda.model.other_models.ALDEx2Model.eval_model.rst │ ├── sccoda.model.other_models.ALDEx2Model.fit_model.rst │ ├── sccoda.model.other_models.ALDEx2Model.rst │ ├── sccoda.model.other_models.ALRModel_ttest.eval_model.rst │ ├── sccoda.model.other_models.ALRModel_ttest.fit_model.rst │ ├── sccoda.model.other_models.ALRModel_ttest.rst │ ├── sccoda.model.other_models.ALRModel_wilcoxon.eval_model.rst │ ├── sccoda.model.other_models.ALRModel_wilcoxon.fit_model.rst │ ├── sccoda.model.other_models.ALRModel_wilcoxon.rst │ ├── sccoda.model.other_models.ANCOMBCModel.eval_model.rst │ ├── sccoda.model.other_models.ANCOMBCModel.fit_model.rst │ ├── sccoda.model.other_models.ANCOMBCModel.rst │ ├── sccoda.model.other_models.AncomModel.eval_model.rst │ ├── sccoda.model.other_models.AncomModel.fit_model.rst │ ├── sccoda.model.other_models.AncomModel.rst │ ├── sccoda.model.other_models.BetaBinomialModel.eval_model.rst │ ├── sccoda.model.other_models.BetaBinomialModel.fit_model.rst │ ├── sccoda.model.other_models.BetaBinomialModel.rst │ ├── sccoda.model.other_models.CLRModel.eval_model.rst │ ├── sccoda.model.other_models.CLRModel.fit_model.rst │ ├── sccoda.model.other_models.CLRModel.rst │ ├── sccoda.model.other_models.CLRModel_ttest.eval_model.rst │ ├── sccoda.model.other_models.CLRModel_ttest.fit_model.rst │ ├── sccoda.model.other_models.CLRModel_ttest.rst │ ├── sccoda.model.other_models.DirichRegModel.eval_model.rst │ ├── sccoda.model.other_models.DirichRegModel.fit_model.rst │ ├── sccoda.model.other_models.DirichRegModel.rst │ ├── sccoda.model.other_models.HaberModel.eval_model.rst │ ├── sccoda.model.other_models.HaberModel.fit_model.rst │ ├── sccoda.model.other_models.HaberModel.rst │ ├── sccoda.model.other_models.SimpleModel.get_chains_after_burnin.rst │ ├── sccoda.model.other_models.SimpleModel.get_y_hat.rst │ ├── sccoda.model.other_models.SimpleModel.make_result.rst │ ├── sccoda.model.other_models.SimpleModel.rst │ ├── sccoda.model.other_models.SimpleModel.sample_hmc.rst │ ├── sccoda.model.other_models.SimpleModel.sample_hmc_da.rst │ ├── sccoda.model.other_models.SimpleModel.sample_nuts.rst │ ├── sccoda.model.other_models.SimpleModel.sampling.rst │ ├── sccoda.model.other_models.TTest.eval_model.rst │ ├── sccoda.model.other_models.TTest.fit_model.rst │ ├── sccoda.model.other_models.TTest.rst │ ├── sccoda.model.other_models.scdney_model.analyze.rst │ ├── sccoda.model.other_models.scdney_model.rst │ ├── sccoda.model.scCODA_model.CompositionalModel.get_chains_after_burnin.rst │ ├── sccoda.model.scCODA_model.CompositionalModel.make_result.rst │ ├── sccoda.model.scCODA_model.CompositionalModel.rst │ ├── sccoda.model.scCODA_model.CompositionalModel.sample_hmc.rst │ ├── sccoda.model.scCODA_model.CompositionalModel.sample_hmc_da.rst │ ├── sccoda.model.scCODA_model.CompositionalModel.sample_nuts.rst │ ├── sccoda.model.scCODA_model.CompositionalModel.sampling.rst │ ├── sccoda.model.scCODA_model.scCODAModel.get_chains_after_burnin.rst │ ├── sccoda.model.scCODA_model.scCODAModel.get_y_hat.rst │ ├── sccoda.model.scCODA_model.scCODAModel.make_result.rst │ ├── sccoda.model.scCODA_model.scCODAModel.rst │ ├── sccoda.model.scCODA_model.scCODAModel.sample_hmc.rst │ ├── sccoda.model.scCODA_model.scCODAModel.sample_hmc_da.rst │ ├── sccoda.model.scCODA_model.scCODAModel.sample_nuts.rst │ ├── sccoda.model.scCODA_model.scCODAModel.sampling.rst │ ├── sccoda.util.cell_composition_data.from_pandas.rst │ ├── sccoda.util.cell_composition_data.from_scanpy.rst │ ├── sccoda.util.cell_composition_data.from_scanpy_dir.rst │ ├── sccoda.util.cell_composition_data.from_scanpy_list.rst │ ├── sccoda.util.cell_composition_data.read_anndata_one_sample.rst │ ├── sccoda.util.comp_ana.CompositionalAnalysis.rst │ ├── sccoda.util.data_generation.b_w_from_abs_change.rst │ ├── sccoda.util.data_generation.counts_from_first.rst │ ├── sccoda.util.data_generation.generate_case_control.rst │ ├── sccoda.util.data_generation.sparse_effect_matrix.rst │ ├── sccoda.util.data_visualization.boxplots.rst │ ├── sccoda.util.data_visualization.stackbar.rst │ ├── sccoda.util.data_visualization.stacked_barplot.rst │ ├── sccoda.util.helper_functions.sample_size_estimate.rst │ ├── sccoda.util.result_classes.CAResult.compare_parameters_to_truth.rst │ ├── sccoda.util.result_classes.CAResult.complete_alpha_df.rst │ ├── sccoda.util.result_classes.CAResult.complete_beta_df.rst │ ├── sccoda.util.result_classes.CAResult.credible_effects.rst │ ├── sccoda.util.result_classes.CAResult.distance_to_truth.rst │ ├── sccoda.util.result_classes.CAResult.rst │ ├── sccoda.util.result_classes.CAResult.save.rst │ ├── sccoda.util.result_classes.CAResult.set_fdr.rst │ ├── sccoda.util.result_classes.CAResult.summary.rst │ ├── sccoda.util.result_classes.CAResult.summary_extended.rst │ ├── sccoda.util.result_classes.CAResult.summary_prepare.rst │ └── using_other_compositional_methods.ipynb ├── requirements.txt ├── sccoda ├── __init__.py ├── datasets │ ├── __init__.py │ ├── _datasets.py │ └── haber_counts.csv ├── model │ ├── __init__.py │ ├── other_models.py │ └── scCODA_model.py └── util │ ├── __init__.py │ ├── cell_composition_data.py │ ├── comp_ana.py │ ├── data_generation.py │ ├── data_visualization.py │ ├── helper_functions.py │ └── result_classes.py ├── setup.py ├── tests ├── __init__.py └── unit_tests.py └── tutorials ├── Data_import_and_visualization.ipynb ├── Modeling_options_and_result_analysis.ipynb ├── __init__.py ├── getting_started.ipynb ├── test └── using_other_compositional_methods.ipynb /.github/Figures/Fig1_v10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/Fig1_v10.png -------------------------------------------------------------------------------- /.github/Figures/Fig2_v7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/Fig2_v7.png -------------------------------------------------------------------------------- /.github/Figures/Figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/Figure1.png -------------------------------------------------------------------------------- /.github/Figures/Figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/Figure2.png -------------------------------------------------------------------------------- /.github/Figures/archive/Fig1_v9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/Fig1_v9.png -------------------------------------------------------------------------------- /.github/Figures/archive/model_comparison_example_data_grouped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/model_comparison_example_data_grouped.png -------------------------------------------------------------------------------- /.github/Figures/archive/model_comparison_plots/model_comparison_logfold_confint_extended.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/model_comparison_plots/model_comparison_logfold_confint_extended.png -------------------------------------------------------------------------------- /.github/Figures/archive/model_comparison_plots/model_comparison_replicates_confint_extended.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/model_comparison_plots/model_comparison_replicates_confint_extended.png -------------------------------------------------------------------------------- /.github/Figures/archive/overall_benchmark_plots/9_heatmaps_concept_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/overall_benchmark_plots/9_heatmaps_concept_fig.png -------------------------------------------------------------------------------- /.github/Figures/archive/overall_benchmark_plots/absolute_increase_lines_concept_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/overall_benchmark_plots/absolute_increase_lines_concept_fig.png -------------------------------------------------------------------------------- /.github/Figures/archive/overall_benchmark_plots/log_fold_increase_lines_concept_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/overall_benchmark_plots/log_fold_increase_lines_concept_fig.png -------------------------------------------------------------------------------- /.github/Figures/archive/overall_benchmark_plots/negative_heatmaps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/overall_benchmark_plots/negative_heatmaps.png -------------------------------------------------------------------------------- /.github/Figures/archive/threshold_determination_plots/threshold_determination_fdr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_determination_fdr.png -------------------------------------------------------------------------------- /.github/Figures/archive/threshold_determination_plots/threshold_determination_mcc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_determination_mcc.png -------------------------------------------------------------------------------- /.github/Figures/archive/threshold_determination_plots/threshold_determination_tnr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_determination_tnr.png -------------------------------------------------------------------------------- /.github/Figures/archive/threshold_determination_plots/threshold_determination_tpr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_determination_tpr.png -------------------------------------------------------------------------------- /.github/Figures/archive/threshold_determination_plots/threshold_function_optimal_mcc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_function_optimal_mcc.png -------------------------------------------------------------------------------- /.github/Figures/covariate_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/covariate_structure.png -------------------------------------------------------------------------------- /.github/Figures/data_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/data_structure.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | .idea/ 27 | data/ 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # R stuff 109 | .RData 110 | .Rhistory 111 | 112 | # Other 113 | prototyping/ 114 | .pkl 115 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | build: 9 | image: latest 10 | 11 | # Build documentation in the docs/ directory with Sphinx 12 | sphinx: 13 | configuration: docs/source/conf.py 14 | 15 | # Optionally build your docs in additional formats such as PDF 16 | # formats: 17 | # - pdf 18 | 19 | # Optionally set the version of Python and requirements required to build your docs 20 | python: 21 | version: 3.8 22 | install: 23 | - requirements: docs/requires.txt 24 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.7" 5 | 6 | install: 7 | - pip install -r requirements.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Theis, Schubert and Müller Lab 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scCODA - Single-cell differential composition analysis 2 | 3 | > **Note** 4 | > This implementation is no longer maintained. A new version in Jax is available in [pertpy](https://pertpy.readthedocs.io/en/latest). 5 | > 6 | > For more information and contribution guidelines please visit the associated Github repository: https://github.com/theislab/pertpy 7 | 8 | scCODA allows for identification of compositional changes in high-throughput sequencing count data, especially cell compositions from scRNA-seq. 9 | It also provides a framework for integration of cell-type annotated data directly from [scanpy](https://scanpy.readthedocs.io/en/stable/) and other sources. 10 | Aside from the scCODA model (Büttner, Ostner *et al* (2021)), the package also allows the easy application of other differential testing methods. 11 | 12 | ![scCODA](.github/Figures/Figure1.png) 13 | 14 | The statistical methodology and benchmarking performance are described in: 15 | 16 | Büttner, Ostner *et al* (2021). **scCODA is A Bayesian model for compositional single-cell data analysis** 17 | ([*Nature Communications*](https://www.nature.com/articles/s41467-021-27150-6)) 18 | 19 | Code for reproducing the analysis from the paper is available [here](https://github.com/theislab/scCODA_reproducibility). 20 | 21 | For further information on the scCODA package and model, please refer to the 22 | [documentation](https://sccoda.readthedocs.io/en/latest/) and the 23 | [tutorials](https://github.com/theislab/scCODA/blob/master/tutorials). 24 | 25 | ## Installation 26 | 27 | Running the package requires a working Python environment (>=3.8). 28 | 29 | This package uses the `tensorflow` (`>=2.8`) and `tensorflow-probability` (`>=0.16`) packages. 30 | The GPU computation features of these packages have not been tested with scCODA and are thus not recommended. 31 | 32 | **To install scCODA via pip, call**: 33 | 34 | pip install sccoda 35 | 36 | 37 | **To install scCODA from source**: 38 | 39 | - Navigate to the directory that you want to install scCODA in 40 | - Clone the repository from Github (https://github.com/theislab/scCODA): 41 | 42 | `git clone https://github.com/theislab/scCODA` 43 | 44 | - Navigate to the root directory of scCODA: 45 | 46 | `cd scCODA` 47 | 48 | - Install dependencies:: 49 | 50 | `pip install -r requirements.txt` 51 | 52 | - Install the package: 53 | 54 | `python setup.py install` 55 | 56 | **Docker container**: 57 | 58 | We provide a Docker container image for scCODA (https://hub.docker.com/repository/docker/wollmilchsau/scanpy_sccoda). 59 | 60 | ## Usage 61 | 62 | Import scCODA in a Python session via: 63 | 64 | import sccoda 65 | 66 | **Tutorials** 67 | 68 | scCODA provides a number of tutorials for various purposes. Please also visit the [documentation](https://sccoda.readthedocs.io/en/latest/) for further information on the statistical model, data structure and API. 69 | 70 | - The ["getting started" tutorial](https://sccoda.readthedocs.io/en/latest/getting_started.html) provides a quick-start guide for using scCODA. 71 | 72 | - In the [advanced tutorial](https://sccoda.readthedocs.io/en/latest/Modeling_options_and_result_analysis.html), options for model specification, diagnostics, and result interpretation are disccussed. 73 | 74 | - The [data import and visualization tutorial](https://sccoda.readthedocs.io/en/latest/Data_import_and_visualization.html) focuses on loading data from different sources and visualizing their characteristics. 75 | 76 | - The [tutorial on other methods](https://sccoda.readthedocs.io/en/latest/using_other_compositional_methods.html) explains how to apply other methods for differential abundance testing from within scCODA. 77 | 78 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python3 -msphinx 7 | SPHINXPROJ = scCODA 8 | SOURCEDIR = source 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=python3 -msphinx 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=scCODA 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/requires.txt: -------------------------------------------------------------------------------- 1 | -r ../requirements.txt 2 | sphinx==3.4.3 3 | sphinx_autodoc_typehints==1.11.1 4 | sphinx_rtd_theme==0.5.1 5 | docutils==0.16 6 | typing_extensions==3.7.4.3 7 | importlib_metadata==3.3.0 8 | setuptools==51.0.0 9 | setuptools_scm==5.0.1 10 | ipykernel==5.4.2 11 | nbsphinx==0.8.0 12 | jinja2==2.11.2 13 | markupsafe==1.1.1 14 | scanpydoc==0.5.5 15 | git+https://github.com/theislab/scCODA.git@master 16 | -------------------------------------------------------------------------------- /docs/source/_ext/edit_on_github.py: -------------------------------------------------------------------------------- 1 | """ 2 | Loosely based on gist.github.com/MantasVaitkunas/7c16de233812adcb7028 3 | """ 4 | 5 | import os 6 | import warnings 7 | 8 | 9 | __licence__ = "BSD (3 clause)" 10 | 11 | 12 | def get_github_repo(app): 13 | return app.config.github_repo, "/docs/" 14 | 15 | 16 | def html_page_context(app, pagename, templatename, context, doctree): 17 | if templatename != "page.html": 18 | return 19 | 20 | if not app.config.github_repo: 21 | warnings.warn("`github_repo `not specified") 22 | return 23 | 24 | 25 | path = os.path.relpath(doctree.get("source"), app.builder.srcdir) 26 | repo, conf_py_path = get_github_repo(app) 27 | 28 | # For sphinx_rtd_theme. 29 | context["display_github"] = True 30 | context["github_user"] = "theislab" 31 | context["github_version"] = "master" 32 | context["github_repo"] = repo 33 | context["conf_py_path"] = conf_py_path 34 | 35 | 36 | def setup(app): 37 | app.add_config_value("github_repo", "", True) 38 | app.connect("html-page-context", html_page_context) -------------------------------------------------------------------------------- /docs/source/_static/colab_badge.svg: -------------------------------------------------------------------------------- 1 | Open in ColabOpen in Colab 2 | -------------------------------------------------------------------------------- /docs/source/_static/nbviewer_badge.svg: -------------------------------------------------------------------------------- 1 | 2 | 18 | 20 | 21 | 23 | image/svg+xml 24 | 26 | 27 | 28 | 29 | 30 | 32 | 35 | 38 | 41 | 44 | 47 | 50 | 53 | 56 | 59 | 62 | 65 | 68 | 69 | 89 | 93 | 98 | 102 | 103 | 105 | 111 | 112 | 115 | 119 | 123 | 127 | 128 | 133 | 141 | 148 | 156 | Open in nbviewer 159 | 160 | 167 | Open in nbviewer 172 | 173 | 183 | 184 | 188 | 193 | 197 | 201 | 205 | 210 | 214 | 223 | 224 | 225 | 230 | 234 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/base.rst: -------------------------------------------------------------------------------- 1 | :github_url: {{ fullname | github_url }} 2 | 3 | {% extends "!autosummary/base.rst" %} 4 | 5 | .. http://www.sphinx-doc.org/en/stable/ext/autosummary.html#customizing-templates -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | :github_url: {{ fullname | github_url }} 2 | 3 | {{ fullname | escape | underline}} 4 | 5 | .. currentmodule:: {{ module }} 6 | 7 | .. add toctree option to make autodoc generate the pages 8 | 9 | .. autoclass:: {{ objname }} 10 | 11 | {% block attributes %} 12 | {% if attributes %} 13 | .. rubric:: Attributes 14 | 15 | .. autosummary:: 16 | :toctree: . 17 | {% for item in attributes %} 18 | {%- if item[0] != "_" %} 19 | ~{{ fullname }}.{{ item }} 20 | {%- endif -%} 21 | {%- endfor %} 22 | {% endif %} 23 | {% endblock %} 24 | 25 | {% block methods %} 26 | {% if methods %} 27 | .. rubric:: Methods 28 | 29 | .. autosummary:: 30 | :toctree: . 31 | {% if objname != "CAResult" %} 32 | {% for item in methods %} 33 | {% if item[0] != "_" %} 34 | ~{{ fullname }}.{{ item }} 35 | {% endif %} 36 | {%- endfor %} 37 | {% else %} 38 | {% for item in methods %} 39 | {% if ((item[0] != "_") and (item not in inherited_members)) %} 40 | ~{{ fullname }}.{{ item }} 41 | {% endif %} 42 | {%- endfor %} 43 | {% endif %} 44 | {% endif %} 45 | {% endblock %} 46 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: sccoda 2 | 3 | API 4 | === 5 | 6 | We advise to import scCODA in a python session via:: 7 | 8 | import sccoda 9 | dat = sccoda.util.cell_composition_data 10 | ana = sccoda.util.compositional_analysis 11 | viz = sccoda.util.data_visualization 12 | 13 | The workflow in scCODA starts with reading in cell count data (``dat``) and visualizing them (``viz``) 14 | or synthetically generating cell count data (``util.data_generation``). 15 | 16 | Data acquisition 17 | ---------------- 18 | 19 | **Integrating data sources (dat)** (scanpy or pandas) 20 | 21 | .. autosummary:: 22 | :toctree: . 23 | 24 | sccoda.util.cell_composition_data.from_pandas 25 | sccoda.util.cell_composition_data.from_scanpy 26 | sccoda.util.cell_composition_data.from_scanpy_dir 27 | sccoda.util.cell_composition_data.from_scanpy_list 28 | sccoda.util.cell_composition_data.read_anndata_one_sample 29 | 30 | 31 | **Synthetic data generation** 32 | 33 | .. autosummary:: 34 | :toctree: . 35 | 36 | sccoda.util.data_generation.generate_case_control 37 | sccoda.util.data_generation.b_w_from_abs_change 38 | sccoda.util.data_generation.counts_from_first 39 | sccoda.util.data_generation.sparse_effect_matrix 40 | 41 | **Compositional data visualization** 42 | 43 | Compositional datasets can be plotted via the methods in ``util.data_visualization``. 44 | 45 | .. autosummary:: 46 | :toctree: . 47 | 48 | sccoda.util.data_visualization.stacked_barplot 49 | sccoda.util.data_visualization.boxplots 50 | sccoda.util.data_visualization.stackbar 51 | 52 | Model setup and inference 53 | ------------------------- 54 | 55 | Using the scCODA model is easiest by generating an instance of ``ana.CompositionalAnalysis``. 56 | By specifying the formula via the `patsy `_ syntax, many combinations and 57 | transformations of the covariates can be performed without redefining the covariate matrix. Also, the reference cell 58 | type needs to be specified in this step. 59 | 60 | **The scCODA model** 61 | 62 | .. autosummary:: 63 | :toctree: . 64 | 65 | sccoda.util.comp_ana.CompositionalAnalysis 66 | sccoda.model.scCODA_model.CompositionalModel 67 | sccoda.model.scCODA_model.scCODAModel 68 | 69 | **Utility functions** 70 | 71 | .. autosummary:: 72 | :toctree: . 73 | 74 | sccoda.util.helper_functions.sample_size_estimate 75 | 76 | Result evaluation 77 | ----------------- 78 | 79 | Executing an inference method on a compositional model produces a ``sccoda.util.result_classes.CAResult`` object. This 80 | class extends the ``InferenceData`` class of `arviz `_ and supports all its 81 | diagnostic and plotting functionality. 82 | 83 | .. autosummary:: 84 | :toctree: . 85 | 86 | sccoda.util.result_classes.CAResult 87 | 88 | 89 | Model comparison 90 | ---------------- 91 | 92 | ``sccoda.models.other_models`` contains implementations of several compositional methods frm microbiome analysis and 93 | non-compositional tests that can be used for comparison. 94 | 95 | .. autosummary:: 96 | :toctree: . 97 | 98 | sccoda.model.other_models.SimpleModel 99 | sccoda.model.other_models.scdney_model 100 | sccoda.model.other_models.HaberModel 101 | sccoda.model.other_models.CLRModel 102 | sccoda.model.other_models.TTest 103 | sccoda.model.other_models.CLRModel_ttest 104 | sccoda.model.other_models.ALDEx2Model 105 | sccoda.model.other_models.ALRModel_ttest 106 | sccoda.model.other_models.ALRModel_wilcoxon 107 | sccoda.model.other_models.AncomModel 108 | sccoda.model.other_models.DirichRegModel 109 | sccoda.model.other_models.BetaBinomialModel 110 | sccoda.model.other_models.ANCOMBCModel 111 | -------------------------------------------------------------------------------- /docs/source/compositional_data.rst: -------------------------------------------------------------------------------- 1 | About scCODA 2 | ============ 3 | 4 | Various biological factors, such as diseases, aging, and immunity, are known to have significant effects on the 5 | cellular structure on a wide range of tissues. Thus, studying these changes more carefully is of particular interest 6 | for many research questions. Recent advances in single-cell RNA sequencing technologies open up the possibility of 7 | accurately annotating large numbers of individual cells from a tissue sample, paving the way for differential analysis 8 | of cell populations. 9 | 10 | Compositional data analysis in scRNA-seq 11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 12 | 13 | When doing differential population analysis, one property of cell population data is often overlooked. Since all 14 | single-cell analysis platforms are limited in their throughput, the number of individual cells in a sample is 15 | predetermined. Thus, cell populations are compositional. They can only be determined up to a multiplicative factor, inducing a negative 16 | correlative bias between the cell types. Following 17 | `Aitchison (Journal of the Royal Statistical Society, 1982) `_, 18 | compositional data also has to be interpreted in terms of ratios, e.g. with respect to a reference factor. 19 | 20 | Features of scCODA 21 | ^^^^^^^^^^^^^^^^^^ 22 | 23 | The scCODA model (`Büttner, Ostner et al. (2021) `_) 24 | is a model that was specifically designed to perform compositional data analysis in scRNA-seq. 25 | Apart from the compositionality of cell population data, there are some other challenges in comparing scRNA-seq 26 | populations, which scCODA addresses, including very small sample sizes and multiple comparisons. 27 | It allows the user to select any reference cell type in order to see the effects 28 | of biological factors from different perspectives. 29 | 30 | Because each sample in scRNA-seq usually contains thousands of cells, performing scRNA-seq on a large number of samples is expensive 31 | and time-consuming. Thus, there are often very few biological replicates available, and frequentist tests will 32 | result in highly uncertain estimates with large confidence intervals. scCODA uses Bayesian 33 | modeling and its possibility to include prior beliefs to obtain accurate results even in a low-sample setting. 34 | 35 | Also, most biological factors only effect a fraction of the total cell population. It is therefore important to 36 | determine the most important changes during the analysis. Since Bayesian analysis does not support the concept 37 | of p-values, scCODA instead uses spike-and-slab priors to automatically determine statistically credible effects. 38 | 39 | For more detailed information on the scCODA model, see 40 | `Büttner, Ostner et al. (2021) `_. 41 | 42 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | 14 | import os 15 | import sys 16 | import datetime 17 | import matplotlib 18 | 19 | from pathlib import Path 20 | 21 | matplotlib.use('agg') 22 | 23 | HERE = Path(__file__).parent 24 | sys.path[:0] = [str(HERE.parent), str(HERE / '_ext')] 25 | 26 | import sccoda 27 | on_rtd = os.environ.get('READTHEDOCS') == 'True' 28 | 29 | needs_sphinx = "2.0" 30 | 31 | # -- Retrieve notebooks ------------------------------------------------ 32 | 33 | from urllib.request import urlretrieve 34 | 35 | notebooks_url = "https://github.com/theislab/scCODA/raw/master/tutorials/" 36 | notebooks = [ 37 | "getting_started.ipynb", 38 | "Data_import_and_visualization.ipynb", 39 | "Modeling_options_and_result_analysis.ipynb" 40 | "using_other_compositional_methods.ipynb" 41 | ] 42 | for nb in notebooks: 43 | try: 44 | urlretrieve(notebooks_url + nb, nb) 45 | except: 46 | pass 47 | 48 | # -- Project information ----------------------------------------------------- 49 | 50 | project = 'scCODA' 51 | title = 'scCODA: A Bayesian model for compositional single-cell data analysis' 52 | author = 'Johannes Ostner, Maren Büttner, Benjamin Schubert' 53 | copyright = f"{datetime.datetime.now():%Y}, {author}" 54 | 55 | version = sccoda.__version__.replace(".dirty", "") 56 | release = version 57 | 58 | # -- General configuration --------------------------------------------------- 59 | 60 | # Add any paths that contain templates here, relative to this directory. 61 | templates_path = ['_templates'] 62 | source_suffix = [".rst", ".ipynb"] 63 | master_doc = 'index' 64 | default_role = 'literal' 65 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 66 | pygments_style = 'sphinx' 67 | 68 | extensions = ['sphinx.ext.autodoc', 69 | 'sphinx.ext.intersphinx', 70 | 'sphinx.ext.doctest', 71 | 'sphinx.ext.coverage', 72 | 'sphinx.ext.mathjax', 73 | 'sphinx.ext.napoleon', 74 | 'sphinx.ext.autosummary', 75 | "sphinx_autodoc_typehints", 76 | "nbsphinx", 77 | "scanpydoc", 78 | *[p.stem for p in (HERE / 'extensions').glob('*.py')], 79 | ] 80 | 81 | # Generate the API documentation when building 82 | autosummary_generate = True 83 | autodoc_member_order = 'bysource' 84 | napoleon_google_docstring = False 85 | napoleon_numpy_docstring = True 86 | napoleon_include_init_with_doc = False 87 | napoleon_use_rtype = True # having a separate entry generally helps readability 88 | napoleon_use_param = True 89 | napoleon_custom_sections = [('Params', 'Parameters')] 90 | todo_include_todos = False 91 | 92 | intersphinx_mapping = dict( 93 | python=("https://docs.python.org/3", None), 94 | anndata=("https://anndata.readthedocs.io/en/latest/", None), 95 | scanpy=("https://scanpy.readthedocs.io/en/latest/", None), 96 | numpy=("https://numpy.org/doc/stable/", None), 97 | matplotlib=('https://matplotlib.org/', None), 98 | pandas=('https://pandas.pydata.org/pandas-docs/stable/', None), 99 | seaborn=('https://seaborn.pydata.org/', None), 100 | 101 | ) 102 | 103 | # Add notebooks prolog to Google Colab and nbviewer 104 | nbsphinx_prolog = r""" 105 | {% set docname = 'github/theislab/scCODA/blob/master/' + env.doc2path(env.docname, base=None) %} 106 | .. raw:: html 107 | 108 |
109 | 110 | Open In Colab 111 | 112 | Open In nbviewer 113 |
114 | """ 115 | 116 | 117 | # -- Options for HTML output ------------------------------------------------- 118 | 119 | html_theme = 'scanpydoc' 120 | html_theme_options = dict(navigation_depth=1, titles_only=True) 121 | github_repo = "sccoda" 122 | html_context = dict( 123 | display_github=True, # Integrate GitHub 124 | github_user='theislab', # Username 125 | github_repo='scCODA', # Repo name 126 | github_version='master', # Version 127 | conf_py_path='/docs/', # Path in the checkout to the docs root 128 | ) 129 | html_static_path = ['_static'] 130 | html_show_sphinx = False 131 | 132 | # def setup(app): 133 | # app.warningiserror = on_rtd 134 | 135 | # -- Options for other output ------------------------------------------ 136 | 137 | htmlhelp_basename = "scCODAdoc" 138 | title_doc = f"{project} documentation" 139 | 140 | # latex_engine = "xelatex" 141 | latex_elements = { 142 | 'preamble': r''' 143 | \usepackage[utf8]{inputenc} 144 | 145 | \newcommand{\block}[1]{\raisebox{\dimexpr(\fontcharht\font`X-1em)/2}{\rule{1em}{#1\dimexpr1em/8}}} 146 | 147 | \DeclareUnicodeCharacter{2581}{\block{1}} 148 | \DeclareUnicodeCharacter{2582}{\block{2}} 149 | \DeclareUnicodeCharacter{2583}{\block{3}} 150 | \DeclareUnicodeCharacter{2584}{\block{4}} 151 | \DeclareUnicodeCharacter{2585}{\block{5}} 152 | \DeclareUnicodeCharacter{2586}{\block{6}} 153 | \DeclareUnicodeCharacter{2587}{\block{7}} 154 | \DeclareUnicodeCharacter{2588}{\block{8}} 155 | ''' 156 | } 157 | 158 | latex_documents = [(master_doc, f"{project}.tex", title_doc, author, "manual")] 159 | man_pages = [(master_doc, project, title_doc, [author], 1)] 160 | texinfo_documents = [ 161 | (master_doc, project, title_doc, author, project, title, "Miscellaneous") 162 | ] 163 | 164 | # -- Override some classnames in autodoc -------------------------------------------- 165 | 166 | qualname_overrides = { 167 | } 168 | -------------------------------------------------------------------------------- /docs/source/data.rst: -------------------------------------------------------------------------------- 1 | Data structure 2 | ============== 3 | 4 | .. image:: ../../.github/Figures/data_structure.png 5 | :width: 45% 6 | :height: 200px 7 | :align: left 8 | 9 | .. image:: ../../.github/Figures/covariate_structure.png 10 | :width: 45% 11 | :height: 200px 12 | :align: right 13 | 14 | Annotating the cells from a biological sample with individual types, e.g. via clustering methods, and grouping them by cell type, 15 | results in a vector of counts (of dimension *K*), with each entry representing a cell type. A scCODA dataset aggregates *N* cell count 16 | vectors as the rows of a matrix of dimension *NxK*, the so-called **cell count matrix** *Y*. The cell count data does not 17 | need to be normalized, as scCODA works on the integer count data. 18 | In addition to the cell counts, scCODA also requires covariates that contain information about each sample. 19 | These can be indicators for e.g. diseases, or continuous variables, such as age or BMI. The *M* covariates for an 20 | scCODA dataset are described by the (*NxM* dimensional) **covariate matrix** *X*. 21 | 22 | scCODA uses the `anndata `_ format to store compositional datasets. 23 | Hereby, ``data.X`` represents the cell count matrix, and ``data.obs`` the covariates (The actual covariate or design matrix is generated when calling a model). 24 | The ``data.var`` and ``data.uns`` elements are currently not used. 25 | 26 | .. image:: https://falexwolf.de/img/scanpy/anndata.svg 27 | :width: 500px 28 | :align: center 29 | 30 | 31 | Data generation methods 32 | ^^^^^^^^^^^^^^^^^^^^^^^ 33 | 34 | ``sccoda.util.data_generation`` contains methods to generate compositional data with different properties that mimics 35 | the properties of scRNA-seq datasets. 36 | 37 | 38 | Data import methods 39 | ^^^^^^^^^^^^^^^^^^^ 40 | 41 | ``sccoda.util.cell_composition_data`` contains methods to import count data from various sources into the data structure used by scCODA. 42 | You can either import data directly from a pandas DataFrame via ``from_pandas``, or get the count data from single-cell expression data used in `scanpy `_. 43 | If all cells from all samples are stored in one anndata object, ``from_scanpy`` generates a compositional analysis dataset from this. 44 | If there is one anndata object with the single-cell expression data for each sample, 45 | ``from_scanpy_list`` (for in-memory data) and ``from_scanpy_dir`` (for data stored on disk) can transform the information from these files directly into a compositional analysis dataset. 46 | For more information, see the `data import and visualization tutorial `_. 47 | 48 | 49 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Welcome to scCODA's documentation! 3 | ============================================= 4 | 5 | .. image:: ../../.github/Figures/Figure1.png 6 | :width: 500px 7 | :align: right 8 | 9 | scCODA is a toolbox for statistical models to analyze changes in compositional data, 10 | especially from single-cell RNA-seq experiments. 11 | Its main purpose is to provide a platform and implementation for the *scCODA* model, which is described by 12 | `Büttner, Ostner et al. `_. 13 | 14 | The package is available on `github `_. 15 | 16 | Please also check out the `tutorials `_ 17 | that explain the most important functionalities. 18 | 19 | Motivation 20 | ^^^^^^^^^^^^ 21 | 22 | When analyzing biological processes via single-cell RNA-sequencing experiments, it is often of interest to assess how 23 | cell populations change under one or more conditions. This task, however, is non-trivial, as there are several 24 | limitations that have to be addressed: 25 | 26 | - scRNA-seq population data is compositional. This must be considered to avoid an inflation of false-positive results. 27 | - Most datasets consist only of very few samples, making frequentist tests inaccurate. 28 | - A condition usually only effects a fraction of cell types. Therefore, sparse effects are preferable. 29 | 30 | The scCODA model overcomes all these limitations in a fully Bayesian model, that outperforms other 31 | compositional and non-compositional methods. 32 | 33 | scCODA is fully integrable with scanpy_, but provides its own data structure for aggregating, plotting and analyzing 34 | compositional data from scRNA-seq. Additionally to the scCODA model, the package also features a variety of 35 | implementations of other statistical models that can be used as comparisons. 36 | 37 | .. toctree:: 38 | :caption: Contents 39 | :maxdepth: 2 40 | 41 | compositional_data 42 | data 43 | models 44 | installation 45 | api 46 | 47 | .. toctree:: 48 | :caption: Tutorials 49 | :maxdepth: 1 50 | 51 | getting_started 52 | Data_import_and_visualization 53 | Modeling_options_and_result_analysis 54 | using_other_compositional_methods 55 | 56 | Reference 57 | ^^^^^^^^^^ 58 | 59 | Büttner, Ostner *et al.* (2021), scCODA is a Bayesian model for compositional single-cell data analysis 60 | `NatComms `_. 61 | |dim| 62 | 63 | Indices and tables 64 | ^^^^^^^^^^^^^^^^^^^^^^ 65 | 66 | * :ref:`genindex` 67 | * :ref:`modindex` 68 | * :ref:`search` 69 | 70 | .. _scanpy: https://scanpy.readthedocs.io 71 | 72 | .. |dim| raw:: html 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | A functioning python environment (>=3.8) is necessary to run this package. 5 | 6 | This package uses the tensorflow (>= 2.4) and tensorflow-probability (>=0.12) packages. 7 | The GPU versions of these packages have not been tested with scCODA and are thus not recommended. 8 | 9 | **To install scCODA via pip, call**:: 10 | 11 | pip install sccoda 12 | 13 | **To install scCODA from source**: 14 | 15 | - Navigate to the directory you want scCODA in 16 | - Clone the repository from `github `_:: 17 | 18 | git clone https://github.com/theislab/scCODA 19 | 20 | - Navigate to the root directory of scCODA:: 21 | 22 | cd scCODA 23 | 24 | - Install dependencies:: 25 | 26 | pip install -r requirements.txt 27 | 28 | **Import scCODA in a Python session via**:: 29 | 30 | import sccoda 31 | 32 | -------------------------------------------------------------------------------- /docs/source/models.rst: -------------------------------------------------------------------------------- 1 | The scCODA model 2 | ================ 3 | 4 | scCODA uses Bayesian modeling to detect statistically credible changes in compositional data. 5 | The model is implemented in ``sccoda.model.scCODA_model``. 6 | The easiest way to call a compositional model is via calling an instance of ``sccoda.util.comp_ana.CompositionalAnalysis``. 7 | It requires an ``anndata`` object that contains the compositional data and covariates, a formula string that defines the covariate matrix 8 | (see the `patsy `_ syntax for details), and a reference cell type. 9 | 10 | 11 | Model structure 12 | ^^^^^^^^^^^^^^^ 13 | 14 | The model is based on a Dirichlet-multinomial model, in which each cell type is described by the covariates through a log-linear linkage. 15 | The intercepts :math:`\alpha` are modeled via a normal prior. 16 | For the effect (:math:`\beta`) of a covariate on a cell type, scCODA performs model selection via a spike-and-slab prior (Continuous approximation via a Logit-normal prior). 17 | The underlying prior for significant effects is normal with a covariate-specific scaling factor. 18 | The only exception are the effects of the reference cell type :math:`\hat{k}`, which are always set to 0. 19 | 20 | .. math:: 21 | y|x &\sim DirMult(\phi, \bar{y}) \\ 22 | \log(\phi) &= \alpha + x \beta \\ 23 | \alpha_k &\sim N(0, 5) \quad &\forall k \in [K] \\ 24 | \beta_{m, \hat{k}} &= 0 &\forall m \in [M]\\ 25 | \beta_{m, k} &= \tau_{m, k} \tilde{\beta}_{m, k} \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\ 26 | \tau_{m, k} &= \frac{\exp(t_{m, k})}{1+ \exp(t_{m, k})} \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\ 27 | \frac{t_{m, k}}{50} &\sim N(0, 1) \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\ 28 | \tilde{\beta}_{m, k} &= \sigma_m^2 \cdot \gamma_{m, k} \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\ 29 | \sigma_m^2 &\sim HC(0, 1) \quad &\forall m \in [M] \\ 30 | \gamma_{m, k} &\sim N(0,1) \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\ 31 | 32 | 33 | For further information regarding the model structure, please refer to: 34 | 35 | Büttner, Ostner *et al.* (2021), scCODA is a Bayesian model for compositional single-cell data analysis 36 | `NatComms `_. 37 | 38 | Inference 39 | ^^^^^^^^^ 40 | 41 | Once the model is set up, inference via HMC sampling can be performed via ``sample_hmc()``. 42 | Alternatively, No-U-Turn sampling is available via ``sample_nuts()``. 43 | Depending on the size of the dataset and the system hardware, inference usually takes up to 5 minutes. 44 | The resulting ``sccoda.util.result_classes.CAResult`` object extends the ``InferenceData`` class of 45 | `arviz `_ and supports all its diagnostic and plotting functionality. 46 | 47 | 48 | Result analysis 49 | ^^^^^^^^^^^^^^^ 50 | 51 | To see which effects were found to be significant, call ``summary()`` on the result object. 52 | The ``Final Parameter`` column of the effects data frame shows the significances. 53 | If the value is 0, the effect is not found to be statistically credible, otherwise it is. 54 | The sign of the effect indicates a decrease or increase in abundance (relative to the reference cell type). 55 | However, the numerical value of these effects should not be used for analysis, as it depends on multiple parameters. 56 | Please refer to the `tutorials `_ for more information on how to evaluate scCODA's results. 57 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALDEx2Model.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.ALDEx2Model.eval\_model 4 | ================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: ALDEx2Model.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALDEx2Model.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L640-L705 2 | 3 | sccoda.model.other\_models.ALDEx2Model.fit\_model 4 | ================================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: ALDEx2Model.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALDEx2Model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L635-L705 2 | 3 | sccoda.model.other\_models.ALDEx2Model 4 | ====================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: ALDEx2Model 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.ALDEx2Model.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.ALDEx2Model.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALRModel_ttest.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.ALRModel\_ttest.eval\_model 4 | ====================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: ALRModel_ttest.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALRModel_ttest.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L713-L751 2 | 3 | sccoda.model.other\_models.ALRModel\_ttest.fit\_model 4 | ===================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: ALRModel_ttest.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALRModel_ttest.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L708-L751 2 | 3 | sccoda.model.other\_models.ALRModel\_ttest 4 | ========================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: ALRModel_ttest 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.ALRModel_ttest.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.ALRModel_ttest.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALRModel_wilcoxon.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.ALRModel\_wilcoxon.eval\_model 4 | ========================================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: ALRModel_wilcoxon.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALRModel_wilcoxon.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L759-L797 2 | 3 | sccoda.model.other\_models.ALRModel\_wilcoxon.fit\_model 4 | ======================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: ALRModel_wilcoxon.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ALRModel_wilcoxon.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L754-L797 2 | 3 | sccoda.model.other\_models.ALRModel\_wilcoxon 4 | ============================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: ALRModel_wilcoxon 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.ALRModel_wilcoxon.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.ALRModel_wilcoxon.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ANCOMBCModel.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.ANCOMBCModel.eval\_model 4 | =================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: ANCOMBCModel.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ANCOMBCModel.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L1077-L1162 2 | 3 | sccoda.model.other\_models.ANCOMBCModel.fit\_model 4 | ================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: ANCOMBCModel.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.ANCOMBCModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L1072-L1162 2 | 3 | sccoda.model.other\_models.ANCOMBCModel 4 | ======================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: ANCOMBCModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.ANCOMBCModel.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.ANCOMBCModel.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.AncomModel.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L873-L905 2 | 3 | sccoda.model.other\_models.AncomModel.eval\_model 4 | ================================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: AncomModel.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.AncomModel.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L839-L871 2 | 3 | sccoda.model.other\_models.AncomModel.fit\_model 4 | ================================================ 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: AncomModel.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.AncomModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L800-L905 2 | 3 | sccoda.model.other\_models.AncomModel 4 | ===================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: AncomModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.AncomModel.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.AncomModel.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.BetaBinomialModel.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.BetaBinomialModel.eval\_model 4 | ======================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: BetaBinomialModel.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.BetaBinomialModel.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L980-L1069 2 | 3 | sccoda.model.other\_models.BetaBinomialModel.fit\_model 4 | ======================================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: BetaBinomialModel.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.BetaBinomialModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L975-L1069 2 | 3 | sccoda.model.other\_models.BetaBinomialModel 4 | ============================================ 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: BetaBinomialModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.BetaBinomialModel.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.BetaBinomialModel.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.CLRModel.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.CLRModel.eval\_model 4 | =============================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: CLRModel.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.CLRModel.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L540-L567 2 | 3 | sccoda.model.other\_models.CLRModel.fit\_model 4 | ============================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: CLRModel.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.CLRModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L535-L567 2 | 3 | sccoda.model.other\_models.CLRModel 4 | =================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: CLRModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.CLRModel.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.CLRModel.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.CLRModel_ttest.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.CLRModel\_ttest.eval\_model 4 | ====================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: CLRModel_ttest.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.CLRModel_ttest.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L606-L632 2 | 3 | sccoda.model.other\_models.CLRModel\_ttest.fit\_model 4 | ===================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: CLRModel_ttest.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.CLRModel_ttest.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L601-L632 2 | 3 | sccoda.model.other\_models.CLRModel\_ttest 4 | ========================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: CLRModel_ttest 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.CLRModel_ttest.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.CLRModel_ttest.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.DirichRegModel.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.DirichRegModel.eval\_model 4 | ===================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: DirichRegModel.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.DirichRegModel.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L914-L972 2 | 3 | sccoda.model.other\_models.DirichRegModel.fit\_model 4 | ==================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: DirichRegModel.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.DirichRegModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L908-L972 2 | 3 | sccoda.model.other\_models.DirichRegModel 4 | ========================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: DirichRegModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.DirichRegModel.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.DirichRegModel.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.HaberModel.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.HaberModel.eval\_model 4 | ================================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: HaberModel.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.HaberModel.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L504-L532 2 | 3 | sccoda.model.other\_models.HaberModel.fit\_model 4 | ================================================ 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: HaberModel.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.HaberModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L499-L532 2 | 3 | sccoda.model.other\_models.HaberModel 4 | ===================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: HaberModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.HaberModel.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.HaberModel.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.SimpleModel.get_chains_after_burnin.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L169-L225 2 | 3 | sccoda.model.other\_models.SimpleModel.get\_chains\_after\_burnin 4 | ================================================================= 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: SimpleModel.get_chains_after_burnin -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.SimpleModel.get_y_hat.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L227-L282 2 | 3 | sccoda.model.other\_models.SimpleModel.get\_y\_hat 4 | ================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: SimpleModel.get_y_hat -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.SimpleModel.make_result.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L515-L559 2 | 3 | sccoda.model.other\_models.SimpleModel.make\_result 4 | =================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: SimpleModel.make_result -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.SimpleModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L31-L282 2 | 3 | sccoda.model.other\_models.SimpleModel 4 | ====================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: SimpleModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.SimpleModel.get_chains_after_burnin 27 | 28 | 29 | ~sccoda.model.other_models.SimpleModel.get_y_hat 30 | 31 | 32 | ~sccoda.model.other_models.SimpleModel.make_result 33 | 34 | 35 | ~sccoda.model.other_models.SimpleModel.sample_hmc 36 | 37 | 38 | ~sccoda.model.other_models.SimpleModel.sample_hmc_da 39 | 40 | 41 | ~sccoda.model.other_models.SimpleModel.sample_nuts 42 | 43 | 44 | ~sccoda.model.other_models.SimpleModel.sampling 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.SimpleModel.sample_hmc.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L111-L224 2 | 3 | sccoda.model.other\_models.SimpleModel.sample\_hmc 4 | ================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: SimpleModel.sample_hmc -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.SimpleModel.sample_hmc_da.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L316-L403 2 | 3 | sccoda.model.other\_models.SimpleModel.sample\_hmc\_da 4 | ====================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: SimpleModel.sample_hmc_da -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.SimpleModel.sample_nuts.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L405-L513 2 | 3 | sccoda.model.other\_models.SimpleModel.sample\_nuts 4 | =================================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: SimpleModel.sample_nuts -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.SimpleModel.sampling.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L113-L167 2 | 3 | sccoda.model.other\_models.SimpleModel.sampling 4 | =============================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: SimpleModel.sampling -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.TTest.eval_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496 2 | 3 | sccoda.model.other\_models.TTest.eval\_model 4 | ============================================ 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: TTest.eval_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.TTest.fit_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L575-L598 2 | 3 | sccoda.model.other\_models.TTest.fit\_model 4 | =========================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: TTest.fit_model -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.TTest.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L570-L598 2 | 3 | sccoda.model.other\_models.TTest 4 | ================================ 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: TTest 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.TTest.eval_model 27 | 28 | 29 | ~sccoda.model.other_models.TTest.fit_model 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.scdney_model.analyze.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L341-L411 2 | 3 | sccoda.model.other\_models.scdney\_model.analyze 4 | ================================================ 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. automethod:: scdney_model.analyze -------------------------------------------------------------------------------- /docs/source/sccoda.model.other_models.scdney_model.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L285-L411 2 | 3 | sccoda.model.other\_models.scdney\_model 4 | ======================================== 5 | 6 | .. currentmodule:: sccoda.model.other_models 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: scdney_model 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.other_models.scdney_model.analyze 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.CompositionalModel.get_chains_after_burnin.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L169-L225 2 | 3 | sccoda.model.scCODA\_model.CompositionalModel.get\_chains\_after\_burnin 4 | ======================================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: CompositionalModel.get_chains_after_burnin -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.CompositionalModel.make_result.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L515-L559 2 | 3 | sccoda.model.scCODA\_model.CompositionalModel.make\_result 4 | ========================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: CompositionalModel.make_result -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.CompositionalModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L23-L559 2 | 3 | sccoda.model.scCODA\_model.CompositionalModel 4 | ============================================= 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: CompositionalModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.scCODA_model.CompositionalModel.get_chains_after_burnin 27 | 28 | 29 | ~sccoda.model.scCODA_model.CompositionalModel.make_result 30 | 31 | 32 | ~sccoda.model.scCODA_model.CompositionalModel.sample_hmc 33 | 34 | 35 | ~sccoda.model.scCODA_model.CompositionalModel.sample_hmc_da 36 | 37 | 38 | ~sccoda.model.scCODA_model.CompositionalModel.sample_nuts 39 | 40 | 41 | ~sccoda.model.scCODA_model.CompositionalModel.sampling 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.CompositionalModel.sample_hmc.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L227-L314 2 | 3 | sccoda.model.scCODA\_model.CompositionalModel.sample\_hmc 4 | ========================================================= 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: CompositionalModel.sample_hmc -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.CompositionalModel.sample_hmc_da.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L316-L403 2 | 3 | sccoda.model.scCODA\_model.CompositionalModel.sample\_hmc\_da 4 | ============================================================= 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: CompositionalModel.sample_hmc_da -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.CompositionalModel.sample_nuts.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L405-L513 2 | 3 | sccoda.model.scCODA\_model.CompositionalModel.sample\_nuts 4 | ========================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: CompositionalModel.sample_nuts -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.CompositionalModel.sampling.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L113-L167 2 | 3 | sccoda.model.scCODA\_model.CompositionalModel.sampling 4 | ====================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: CompositionalModel.sampling -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.scCODAModel.get_chains_after_burnin.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L169-L225 2 | 3 | sccoda.model.scCODA\_model.scCODAModel.get\_chains\_after\_burnin 4 | ================================================================= 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: scCODAModel.get_chains_after_burnin -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.scCODAModel.get_y_hat.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L695-L762 2 | 3 | sccoda.model.scCODA\_model.scCODAModel.get\_y\_hat 4 | ================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: scCODAModel.get_y_hat -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.scCODAModel.make_result.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L515-L559 2 | 3 | sccoda.model.scCODA\_model.scCODAModel.make\_result 4 | =================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: scCODAModel.make_result -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.scCODAModel.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L562-L762 2 | 3 | sccoda.model.scCODA\_model.scCODAModel 4 | ====================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: scCODAModel 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | ~sccoda.model.scCODA_model.scCODAModel.get_chains_after_burnin 27 | 28 | 29 | ~sccoda.model.scCODA_model.scCODAModel.get_y_hat 30 | 31 | 32 | ~sccoda.model.scCODA_model.scCODAModel.make_result 33 | 34 | 35 | ~sccoda.model.scCODA_model.scCODAModel.sample_hmc 36 | 37 | 38 | ~sccoda.model.scCODA_model.scCODAModel.sample_hmc_da 39 | 40 | 41 | ~sccoda.model.scCODA_model.scCODAModel.sample_nuts 42 | 43 | 44 | ~sccoda.model.scCODA_model.scCODAModel.sampling 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.scCODAModel.sample_hmc.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L227-L314 2 | 3 | sccoda.model.scCODA\_model.scCODAModel.sample\_hmc 4 | ================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: scCODAModel.sample_hmc -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.scCODAModel.sample_hmc_da.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L316-L403 2 | 3 | sccoda.model.scCODA\_model.scCODAModel.sample\_hmc\_da 4 | ====================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: scCODAModel.sample_hmc_da -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.scCODAModel.sample_nuts.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L405-L513 2 | 3 | sccoda.model.scCODA\_model.scCODAModel.sample\_nuts 4 | =================================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: scCODAModel.sample_nuts -------------------------------------------------------------------------------- /docs/source/sccoda.model.scCODA_model.scCODAModel.sampling.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L113-L167 2 | 3 | sccoda.model.scCODA\_model.scCODAModel.sampling 4 | =============================================== 5 | 6 | .. currentmodule:: sccoda.model.scCODA_model 7 | 8 | .. automethod:: scCODAModel.sampling -------------------------------------------------------------------------------- /docs/source/sccoda.util.cell_composition_data.from_pandas.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L244-L278 2 | 3 | sccoda.util.cell\_composition\_data.from\_pandas 4 | ================================================ 5 | 6 | .. currentmodule:: sccoda.util.cell_composition_data 7 | 8 | .. autofunction:: from_pandas -------------------------------------------------------------------------------- /docs/source/sccoda.util.cell_composition_data.from_scanpy.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L187-L241 2 | 3 | sccoda.util.cell\_composition\_data.from\_scanpy 4 | ================================================ 5 | 6 | .. currentmodule:: sccoda.util.cell_composition_data 7 | 8 | .. autofunction:: from_scanpy -------------------------------------------------------------------------------- /docs/source/sccoda.util.cell_composition_data.from_scanpy_dir.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L123-L184 2 | 3 | sccoda.util.cell\_composition\_data.from\_scanpy\_dir 4 | ===================================================== 5 | 6 | .. currentmodule:: sccoda.util.cell_composition_data 7 | 8 | .. autofunction:: from_scanpy_dir -------------------------------------------------------------------------------- /docs/source/sccoda.util.cell_composition_data.from_scanpy_list.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L61-L120 2 | 3 | sccoda.util.cell\_composition\_data.from\_scanpy\_list 4 | ====================================================== 5 | 6 | .. currentmodule:: sccoda.util.cell_composition_data 7 | 8 | .. autofunction:: from_scanpy_list -------------------------------------------------------------------------------- /docs/source/sccoda.util.cell_composition_data.read_anndata_one_sample.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L15-L58 2 | 3 | sccoda.util.cell\_composition\_data.read\_anndata\_one\_sample 4 | ============================================================== 5 | 6 | .. currentmodule:: sccoda.util.cell_composition_data 7 | 8 | .. autofunction:: read_anndata_one_sample -------------------------------------------------------------------------------- /docs/source/sccoda.util.comp_ana.CompositionalAnalysis.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/comp_ana.py#L14-L130 2 | 3 | sccoda.util.comp\_ana.CompositionalAnalysis 4 | =========================================== 5 | 6 | .. currentmodule:: sccoda.util.comp_ana 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: CompositionalAnalysis 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/source/sccoda.util.data_generation.b_w_from_abs_change.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_generation.py#L123-L172 2 | 3 | sccoda.util.data\_generation.b\_w\_from\_abs\_change 4 | ==================================================== 5 | 6 | .. currentmodule:: sccoda.util.data_generation 7 | 8 | .. autofunction:: b_w_from_abs_change -------------------------------------------------------------------------------- /docs/source/sccoda.util.data_generation.counts_from_first.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_generation.py#L175-L202 2 | 3 | sccoda.util.data\_generation.counts\_from\_first 4 | ================================================ 5 | 6 | .. currentmodule:: sccoda.util.data_generation 7 | 8 | .. autofunction:: counts_from_first -------------------------------------------------------------------------------- /docs/source/sccoda.util.data_generation.generate_case_control.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_generation.py#L32-L120 2 | 3 | sccoda.util.data\_generation.generate\_case\_control 4 | ==================================================== 5 | 6 | .. currentmodule:: sccoda.util.data_generation 7 | 8 | .. autofunction:: generate_case_control -------------------------------------------------------------------------------- /docs/source/sccoda.util.data_generation.sparse_effect_matrix.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_generation.py#L205-L247 2 | 3 | sccoda.util.data\_generation.sparse\_effect\_matrix 4 | =================================================== 5 | 6 | .. currentmodule:: sccoda.util.data_generation 7 | 8 | .. autofunction:: sparse_effect_matrix -------------------------------------------------------------------------------- /docs/source/sccoda.util.data_visualization.boxplots.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_visualization.py#L178-L347 2 | 3 | sccoda.util.data\_visualization.boxplots 4 | ======================================== 5 | 6 | .. currentmodule:: sccoda.util.data_visualization 7 | 8 | .. autofunction:: boxplots -------------------------------------------------------------------------------- /docs/source/sccoda.util.data_visualization.stackbar.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_visualization.py#L22-L87 2 | 3 | sccoda.util.data\_visualization.stackbar 4 | ======================================== 5 | 6 | .. currentmodule:: sccoda.util.data_visualization 7 | 8 | .. autofunction:: stackbar -------------------------------------------------------------------------------- /docs/source/sccoda.util.data_visualization.stacked_barplot.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_visualization.py#L90-L175 2 | 3 | sccoda.util.data\_visualization.stacked\_barplot 4 | ================================================ 5 | 6 | .. currentmodule:: sccoda.util.data_visualization 7 | 8 | .. autofunction:: stacked_barplot -------------------------------------------------------------------------------- /docs/source/sccoda.util.helper_functions.sample_size_estimate.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/helper_functions.py#L4-L55 2 | 3 | sccoda.util.helper\_functions.sample\_size\_estimate 4 | ==================================================== 5 | 6 | .. currentmodule:: sccoda.util.helper_functions 7 | 8 | .. autofunction:: sample_size_estimate -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.compare_parameters_to_truth.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L451-L500 2 | 3 | sccoda.util.result\_classes.CAResult.compare\_parameters\_to\_truth 4 | =================================================================== 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.compare_parameters_to_truth -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.complete_alpha_df.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L311-L339 2 | 3 | sccoda.util.result\_classes.CAResult.complete\_alpha\_df 4 | ======================================================== 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.complete_alpha_df -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.complete_beta_df.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L214-L309 2 | 3 | sccoda.util.result\_classes.CAResult.complete\_beta\_df 4 | ======================================================= 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.complete_beta_df -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.credible_effects.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L538-L570 2 | 3 | sccoda.util.result\_classes.CAResult.credible\_effects 4 | ====================================================== 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.credible_effects -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.distance_to_truth.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L502-L536 2 | 3 | sccoda.util.result\_classes.CAResult.distance\_to\_truth 4 | ======================================================== 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.distance_to_truth -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L47-L616 2 | 3 | sccoda.util.result\_classes.CAResult 4 | ==================================== 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. add toctree option to make autodoc generate the pages 9 | 10 | .. autoclass:: CAResult 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. rubric:: Methods 19 | 20 | .. autosummary:: 21 | :toctree: . 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | ~sccoda.util.result_classes.CAResult.compare_parameters_to_truth 31 | 32 | 33 | ~sccoda.util.result_classes.CAResult.complete_alpha_df 34 | 35 | 36 | ~sccoda.util.result_classes.CAResult.complete_beta_df 37 | 38 | 39 | 40 | 41 | ~sccoda.util.result_classes.CAResult.credible_effects 42 | 43 | 44 | 45 | ~sccoda.util.result_classes.CAResult.distance_to_truth 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | ~sccoda.util.result_classes.CAResult.save 71 | 72 | 73 | 74 | 75 | ~sccoda.util.result_classes.CAResult.set_fdr 76 | 77 | 78 | 79 | 80 | 81 | 82 | ~sccoda.util.result_classes.CAResult.summary 83 | 84 | 85 | ~sccoda.util.result_classes.CAResult.summary_extended 86 | 87 | 88 | ~sccoda.util.result_classes.CAResult.summary_prepare 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.save.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L572-L589 2 | 3 | sccoda.util.result\_classes.CAResult.save 4 | ========================================= 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.save -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.set_fdr.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L591-L616 2 | 3 | sccoda.util.result\_classes.CAResult.set\_fdr 4 | ============================================= 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.set_fdr -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.summary.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L341-L394 2 | 3 | sccoda.util.result\_classes.CAResult.summary 4 | ============================================ 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.summary -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.summary_extended.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L396-L449 2 | 3 | sccoda.util.result\_classes.CAResult.summary\_extended 4 | ====================================================== 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.summary_extended -------------------------------------------------------------------------------- /docs/source/sccoda.util.result_classes.CAResult.summary_prepare.rst: -------------------------------------------------------------------------------- 1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L108-L212 2 | 3 | sccoda.util.result\_classes.CAResult.summary\_prepare 4 | ===================================================== 5 | 6 | .. currentmodule:: sccoda.util.result_classes 7 | 8 | .. automethod:: CAResult.summary_prepare -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.21 2 | pandas>=1.0 3 | seaborn 4 | matplotlib>=3.0 5 | tensorflow>=2.8 6 | tensorflow-probability>=0.16 7 | arviz>=0.11 8 | scipy 9 | anndata 10 | patsy 11 | scanpy 12 | statsmodels 13 | rpy2 14 | scikit-bio 15 | -------------------------------------------------------------------------------- /sccoda/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools_scm import get_version 3 | 4 | __version__ = get_version(root="..", relative_to=__file__) 5 | del get_version 6 | except (LookupError, ImportError): 7 | try: 8 | from importlib_metadata import version # Python < 3.8 9 | except: 10 | from importlib.metadata import version # Python = 3.8 11 | __version__ = version(__name__) 12 | del version 13 | -------------------------------------------------------------------------------- /sccoda/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Builtin Datasets. 2 | """ 3 | 4 | from ._datasets import ( 5 | haber 6 | ) 7 | -------------------------------------------------------------------------------- /sccoda/datasets/_datasets.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pandas as pd 4 | 5 | HERE = Path(__file__).parent 6 | 7 | def haber() -> pd.DataFrame: 8 | """ 9 | Tabularized counts of cell types in the 10 | small intestinal epithelium of mice with different conditions. 11 | 12 | Haber et al. 2017 13 | 14 | Returns 15 | ------- 16 | data matrix as pandas data frame. 17 | 18 | """ 19 | filename = HERE / 'haber_counts.csv' 20 | 21 | return pd.read_csv(filename) 22 | -------------------------------------------------------------------------------- /sccoda/datasets/haber_counts.csv: -------------------------------------------------------------------------------- 1 | Mouse,Endocrine,Enterocyte,Enterocyte.Progenitor,Goblet,Stem,TA,TA.Early,Tuft 2 | Control_1,36,59,136,36,239,125,191,18 3 | Control_2,5,46,23,20,50,11,40,5 4 | Control_3,45,98,188,124,250,155,365,33 5 | Control_4,26,221,198,36,131,130,196,4 6 | H.poly.Day10_1,42,71,203,147,271,109,180,146 7 | H.poly.Day10_2,40,57,383,170,321,244,256,71 8 | H.poly.Day3_1,52,75,347,66,323,263,313,51 9 | H.poly.Day3_2,65,126,115,33,65,39,129,59 10 | Salm_1,37,332,113,59,90,47,132,10 11 | Salm_2,32,373,116,67,117,65,168,12 12 | -------------------------------------------------------------------------------- /sccoda/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/sccoda/model/__init__.py -------------------------------------------------------------------------------- /sccoda/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/sccoda/util/__init__.py -------------------------------------------------------------------------------- /sccoda/util/cell_composition_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions to convert single-cell data to scCODA compositional analysis data 3 | 4 | :authors: Johannes Ostner 5 | """ 6 | import pandas as pd 7 | import anndata as ad 8 | import os 9 | import numpy as np 10 | 11 | from anndata import AnnData 12 | from typing import Optional, Tuple, Collection, Union, List 13 | 14 | 15 | def read_anndata_one_sample( 16 | adata: AnnData, 17 | cell_type_identifier: str, 18 | covariate_key: Optional[str] = None 19 | ) -> Tuple[np.ndarray, dict]: 20 | """ 21 | Converts a single scRNA-seq data set from scanpy (anndata format) to a row of a cell count matrix. 22 | 23 | It is assumed that a column of adata.obs (e.g. Louvain clustering results) contains the cell type assignment. 24 | Additionally, covariates (control/disease group, ...) can be specified as a subdict in adata.uns 25 | 26 | Usage: 27 | 28 | ``cell_counts, covs = from_scanpy(adata, cell_type_identifier="Louvain", covariate_key="covariates")`` 29 | 30 | Parameters 31 | ---------- 32 | adata 33 | single-cell data object from scanpy 34 | cell_type_identifier 35 | column name in adata.obs that specifies the cell types 36 | covariate_key 37 | key for adata.uns, where the covariate values are stored 38 | 39 | Returns 40 | ------- 41 | A numpy array for the cell counts and a dict for the covariates 42 | 43 | cell_counts 44 | cell count vector 45 | covs 46 | covariate dictionary 47 | """ 48 | 49 | # Calculate cell counts for the sample 50 | cell_counts = adata.obs[cell_type_identifier].value_counts() 51 | 52 | # extracting covariates from uns 53 | if covariate_key is not None: 54 | covs = adata.uns[covariate_key] 55 | return cell_counts, covs 56 | 57 | else: 58 | return cell_counts 59 | 60 | 61 | def from_scanpy_list( 62 | samples: List[AnnData], 63 | cell_type_identifier: str, 64 | covariate_key: Optional[str] = None, 65 | covariate_df: Optional[pd.DataFrame] = None 66 | ) -> AnnData: 67 | """ 68 | Creates a compositional analysis data set from a list of scanpy data sets. 69 | 70 | To use this function, all data sets need to have one identically named column in adata.obs that contains the cell type assignment. 71 | Covariates can either be specified via a key in adata.uns, or as a separate DataFrame 72 | 73 | Usage: 74 | 75 | ``data = from_scanpy_list([adata1, adata2, adata3], cell_type_identifier="Louvain", covariate_df="covariates")`` 76 | 77 | Parameters 78 | ---------- 79 | samples 80 | list of scanpy data sets 81 | cell_type_identifier 82 | column name in adata.obs that specifies the cell types 83 | covariate_key 84 | key for adata.uns, where covariate values are stored 85 | covariate_df 86 | DataFrame with covariates 87 | 88 | Returns 89 | ------- 90 | A compositional analysis data set 91 | 92 | data 93 | A compositional analysis data set 94 | """ 95 | 96 | count_data = pd.DataFrame() 97 | covariate_data = pd.DataFrame() 98 | 99 | # iterate over anndata objects for each sample 100 | if covariate_key is not None: 101 | for s in samples: 102 | 103 | cell_counts, covs = read_anndata_one_sample(s, cell_type_identifier, covariate_key) 104 | cell_counts = pd.DataFrame(cell_counts).T 105 | count_data = pd.concat([count_data, cell_counts]) 106 | covariate_data = pd.concat([covariate_data, pd.Series(covs).to_frame().T], ignore_index=True) 107 | elif covariate_df is not None: 108 | for s in samples: 109 | cell_counts = read_anndata_one_sample(s, cell_type_identifier) 110 | cell_counts = pd.DataFrame(cell_counts).T 111 | count_data = pd.concat([count_data, cell_counts]) 112 | covariate_data = covariate_df 113 | else: 114 | print("No covariate information specified!") 115 | return 116 | 117 | # Replace NaNs 118 | count_data = count_data.fillna(0) 119 | covariate_data.index = covariate_data.index.astype(str) 120 | 121 | var_dat = count_data.sum(axis=0).rename("n_cells").to_frame() 122 | var_dat.index = var_dat.index.astype(str) 123 | 124 | return ad.AnnData(X=count_data.values, 125 | var=var_dat, 126 | obs=covariate_data) 127 | 128 | 129 | def from_scanpy_dir( 130 | path: str, 131 | cell_type_identifier: str, 132 | covariate_key: Optional[str] = None, 133 | covariate_df: Optional[pd.DataFrame] = None 134 | ) -> AnnData: 135 | """ 136 | Creates a compositional analysis data set from all scanpy data sets in a directory. 137 | 138 | To use this function, all data sets need to have one identically named column in adata.obs that contains the cell type assignment. 139 | Covariates can either be specified via a key in adata.uns, or as a separate DataFrame 140 | 141 | Usage: 142 | ``data = from_scanpy_dir("./path/to/directory", cell_type_identifier="Louvain", covariate_key="covariates")`` 143 | 144 | Parameters 145 | ---------- 146 | path 147 | path to directory 148 | cell_type_identifier 149 | column name in adata.obs that specifies the cell types 150 | covariate_key 151 | key for adata.uns, where covariate values are stored 152 | covariate_df 153 | DataFrame with covariates 154 | 155 | Returns 156 | ------- 157 | A compositional analysis data set 158 | 159 | data 160 | A compositional analysis data set 161 | """ 162 | 163 | count_data = pd.DataFrame() 164 | covariate_data = pd.DataFrame() 165 | 166 | filenames = os.listdir(path) 167 | if covariate_key is not None: 168 | for f in filenames: 169 | adata = ad.read_h5ad(f) 170 | 171 | cell_counts, covs = read_anndata_one_sample(adata, cell_type_identifier, covariate_key) 172 | cell_counts = pd.DataFrame(cell_counts).T 173 | count_data = pd.concat([count_data, cell_counts]) 174 | covariate_data = pd.concat([covariate_data, pd.Series(covs).to_frame().T], ignore_index=True) 175 | elif covariate_df is not None: 176 | for f in filenames: 177 | adata = ad.read_h5ad(f) 178 | 179 | cell_counts = read_anndata_one_sample(adata, cell_type_identifier) 180 | cell_counts = pd.DataFrame(cell_counts).T 181 | count_data = pd.concat([count_data, cell_counts]) 182 | covariate_data = covariate_df 183 | else: 184 | print("No covariate information specified!") 185 | return 186 | 187 | # Replace NaNs 188 | count_data = count_data.fillna(0) 189 | covariate_data.index = covariate_data.index.astype(str) 190 | 191 | var_dat = count_data.sum(axis=0).rename("n_cells").to_frame() 192 | var_dat.index = var_dat.index.astype(str) 193 | 194 | return ad.AnnData(X=count_data.values, 195 | var=var_dat, 196 | obs=covariate_data) 197 | 198 | 199 | def from_scanpy( 200 | adata: AnnData, 201 | cell_type_identifier: str, 202 | sample_identifier: str, 203 | covariate_key: Optional[str] = None, 204 | covariate_df: Optional[pd.DataFrame] = None 205 | ) -> AnnData: 206 | 207 | """ 208 | Creates a compositional analysis dataset from a single anndata object, as it is produced by e.g. scanpy. 209 | 210 | The anndata object needs to have a column in adata.obs that contains the cell type assignment, 211 | and one column that specifies the grouping into samples. 212 | Covariates can either be specified via a key in adata.uns, or as a separate DataFrame. 213 | 214 | NOTE: The order of samples in the returned dataset is determined by the first occurence of cells from each sample in `adata` 215 | 216 | Parameters 217 | ---------- 218 | adata 219 | list of scanpy data sets 220 | cell_type_identifier 221 | column name in adata.obs that specifies the cell types 222 | sample_identifier 223 | column name in adata.obs that specifies the sample 224 | covariate_key 225 | key for adata.uns, where covariate values are stored 226 | covariate_df 227 | DataFrame with covariates 228 | 229 | Returns 230 | ------- 231 | A compositional analysis data set 232 | 233 | data 234 | A compositional analysis data set 235 | 236 | """ 237 | 238 | groups = adata.obs.value_counts([sample_identifier, cell_type_identifier]) 239 | count_data = groups.unstack(level=cell_type_identifier) 240 | count_data = count_data.fillna(0) 241 | 242 | if covariate_key is not None: 243 | covariate_df = pd.DataFrame(adata.uns[covariate_key]) 244 | elif covariate_df is None: 245 | print("No covariate information specified!") 246 | covariate_df = pd.DataFrame(index=count_data.index) 247 | 248 | if set(covariate_df.index) != set(count_data.index): 249 | raise ValueError("anndata sample names and covariate_df index do not have the same elements!") 250 | covs_ord = covariate_df.reindex(count_data.index) 251 | covs_ord.index = covs_ord.index.astype(str) 252 | 253 | var_dat = count_data.sum(axis=0).rename("n_cells").to_frame() 254 | var_dat.index = var_dat.index.astype(str) 255 | 256 | return ad.AnnData(X=count_data.values, 257 | var=var_dat, 258 | obs=covs_ord) 259 | 260 | 261 | def from_pandas( 262 | df: pd.DataFrame, 263 | covariate_columns: List[str] 264 | ) -> AnnData: 265 | """ 266 | Converts a Pandas DataFrame into a compositional analysis data set. 267 | The DataFrame must contain one row per sample, columns can be cell types or covariates 268 | 269 | Note that all columns that are not specified as covariates are assumed to be cell types. 270 | 271 | Usage: 272 | ``data = from_pandas(df, covariate_columns=["cov1", "cov2"])`` 273 | 274 | Parameters 275 | ---------- 276 | df 277 | A pandas DataFrame with each row representing a sample; the columns can be cell counts or covariates 278 | covariate_columns 279 | List of column names that are interpreted as covariates; all other columns will be seen as cell types 280 | 281 | Returns 282 | ------- 283 | A compositional analysis data set 284 | 285 | data 286 | A compositional analysis data set 287 | """ 288 | 289 | covariate_data = df.loc[:, covariate_columns] 290 | covariate_data.index = covariate_data.index.astype(str) 291 | count_data = df.loc[:, ~df.columns.isin(covariate_data)] 292 | celltypes = pd.DataFrame(index=count_data.columns) 293 | 294 | return ad.AnnData(X=count_data.values, 295 | var=celltypes, 296 | obs=covariate_data) 297 | -------------------------------------------------------------------------------- /sccoda/util/comp_ana.py: -------------------------------------------------------------------------------- 1 | """ 2 | Initialization of scCODA models. 3 | 4 | :authors: Johannes Ostner 5 | """ 6 | import numpy as np 7 | import patsy as pt 8 | 9 | from anndata import AnnData 10 | from sccoda.model import scCODA_model as dm 11 | from typing import Union, Optional 12 | 13 | 14 | class CompositionalAnalysis: 15 | """ 16 | Initializer class for scCODA models. This class is called when performing compositional analysis with scCODA. 17 | 18 | Usage: model = CompositionalAnalysis(data, formula="covariate1 + covariate2", reference_cell_type="CellTypeA") 19 | 20 | Calling an scCODA model requires these parameters: 21 | 22 | data 23 | anndata object with cell counts as data.X and covariates saved in data.obs 24 | formula 25 | patsy-style formula for building the covariate matrix. 26 | Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category. 27 | To set a different level as the base category for a categorical covariate, use "C(, Treatment(''))" 28 | reference_cell_type 29 | Column index that sets the reference cell type. Can either reference the name of a column or a column number (starting at 0). 30 | If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen. 31 | """ 32 | 33 | def __new__( 34 | cls, 35 | data: AnnData, 36 | formula: str, 37 | reference_cell_type: Union[str, int] = "automatic", 38 | automatic_reference_absence_threshold: float = 0.05, 39 | ) -> dm.scCODAModel: 40 | """ 41 | Builds count and covariate matrix, returns a CompositionalModel object 42 | 43 | Usage: model = CompositionalAnalysis(data, formula="covariate1 + covariate2", reference_cell_type="CellTypeA") 44 | 45 | Parameters 46 | ---------- 47 | data 48 | anndata object with cell counts as data.X and covariates saved in data.obs 49 | formula 50 | R-style formula for building the covariate matrix. 51 | Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category. 52 | To set a different level as the base category for a categorical covariate, use "C(, Treatment(''))" 53 | reference_cell_type 54 | Column index that sets the reference cell type. Can either reference the name of a column or the n-th column (indexed at 0). 55 | If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen. 56 | automatic_reference_absence_threshold 57 | If using reference_cell_type = "automatic", determine what the maximum fraction of zero entries for a cell type is to be considered as a possible reference cell type 58 | 59 | Returns 60 | ------- 61 | A compositional model 62 | 63 | model 64 | A scCODA.models.scCODA_model.CompositionalModel object 65 | """ 66 | 67 | cell_types = data.var.index.to_list() 68 | 69 | # Get count data 70 | data_matrix = data.X.astype("float64") 71 | 72 | # Build covariate matrix from R-like formula 73 | covariate_matrix = pt.dmatrix(formula, data.obs) 74 | covariate_names = covariate_matrix.design_info.column_names[1:] 75 | covariate_matrix = covariate_matrix[:, 1:] 76 | 77 | # Invoke instance of the correct model depending on reference cell type 78 | # Automatic reference selection (dispersion-based) 79 | if reference_cell_type == "automatic": 80 | percent_zero = np.sum(data_matrix == 0, axis=0)/data_matrix.shape[0] 81 | nonrare_ct = np.where(percent_zero < automatic_reference_absence_threshold)[0] 82 | 83 | if len(nonrare_ct) == 0: 84 | raise ValueError("No cell types that have large enough presence! Please increase automatic_reference_absence_threshold") 85 | 86 | rel_abun = data_matrix / np.sum(data_matrix, axis=1, keepdims=True) 87 | 88 | # select reference 89 | cell_type_disp = np.var(rel_abun, axis=0)/np.mean(rel_abun, axis=0) 90 | min_var = np.min(cell_type_disp[nonrare_ct]) 91 | ref_index = np.where(cell_type_disp == min_var)[0][0] 92 | 93 | ref_cell_type = cell_types[ref_index] 94 | print(f"Automatic reference selection! Reference cell type set to {ref_cell_type}") 95 | 96 | return dm.scCODAModel( 97 | covariate_matrix=np.array(covariate_matrix), 98 | data_matrix=data_matrix, 99 | cell_types=cell_types, 100 | covariate_names=covariate_names, 101 | reference_cell_type=ref_index, 102 | formula=formula, 103 | ) 104 | 105 | # Column name as reference cell type 106 | elif reference_cell_type in cell_types: 107 | num_index = cell_types.index(reference_cell_type) 108 | return dm.scCODAModel( 109 | covariate_matrix=np.array(covariate_matrix), 110 | data_matrix=data_matrix, 111 | cell_types=cell_types, 112 | covariate_names=covariate_names, 113 | reference_cell_type=num_index, 114 | formula=formula, 115 | ) 116 | 117 | # Numeric reference cell type 118 | elif isinstance(reference_cell_type, int) & (reference_cell_type < len(cell_types)) & (reference_cell_type >= 0): 119 | return dm.scCODAModel( 120 | covariate_matrix=np.array(covariate_matrix), 121 | data_matrix=data_matrix, 122 | cell_types=cell_types, 123 | covariate_names=covariate_names, 124 | reference_cell_type=reference_cell_type, 125 | formula=formula, 126 | ) 127 | 128 | # None of the above: Throw error 129 | else: 130 | raise NameError("Reference index is not a valid cell type name or numerical index!") 131 | -------------------------------------------------------------------------------- /sccoda/util/data_generation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Toolbox for simulating compositional data from ScRNA-seq 3 | 4 | This toolbox provides data generation and modelling solutions for compositional data with different specifications. 5 | This data might e.g. come from scRNA-seq experiments. 6 | The covariates are represented by ``X``, the cell count matrix is denoted ``Y``. 7 | 8 | To start, we set the dimensions of the data: Number of cell types (``K``), number of covariates (``D``), 9 | number of samples (``N``), and number of cells per sample (``n_total``). 10 | 11 | We now generate composition parameters (``b_true``, ``w_true``) and a covariance matrix (``Sigma``) 12 | from some input specifications. 13 | ``b_true`` represents the base composition with all covariates set to 0. Adding ``X * w_true`` to this 14 | gives the corresponding parameter for each sample. 15 | 16 | After adding a gaussian noise (``Sigma``), we can build a concentration vector for each sample that sums up to 1 17 | via the softmax function. 18 | From there, we can calculate each row of the cell count matrix (``Y``) via a multinomial distribution. 19 | 20 | :authors: Johannes Ostner 21 | """ 22 | 23 | import numpy as np 24 | import anndata as ad 25 | import pandas as pd 26 | from scipy.special import softmax 27 | 28 | from anndata import AnnData 29 | from typing import Optional, Tuple, Collection, Union, List 30 | 31 | 32 | def generate_case_control( 33 | cases: int = 1, 34 | K: int = 5, 35 | n_total: int = 1000, 36 | n_samples: List[any] = [5, 5], 37 | sigma: Optional[np.ndarray] = None, 38 | b_true: Optional[np.ndarray] = None, 39 | w_true: Optional[np.ndarray] = None 40 | ) -> AnnData: 41 | """ 42 | Generates compositional data with binary covariates. 43 | 44 | Parameters 45 | ---------- 46 | cases 47 | number of covariates. 48 | This will lead to D=2**cases columns in X, one for each combination of active/inactive covariates. 49 | K 50 | Number of cell types 51 | n_total 52 | number of cells per sample 53 | n_samples 54 | Number of samples per case combination. len(n_samples)=[2**cases] 55 | sigma 56 | correlation matrix for cell types,size KxK 57 | b_true 58 | bias coefficients, size K 59 | w_true 60 | Effect matrix, size DxK 61 | 62 | Returns 63 | ------- 64 | compositional data 65 | 66 | data 67 | Anndata object 68 | """ 69 | D = cases**2 70 | 71 | # Uniform intercepts if none are specifed 72 | if b_true is None: 73 | b_true = np.random.uniform(-3, 3, size=K).astype(np.float64) # bias (alpha) 74 | 75 | # Randomly select covariates that should correlate if none are specified 76 | if w_true is None: 77 | n_d = np.random.choice(range(D), size=1) 78 | n_k = np.random.choice(range(K), size=1) 79 | w_true = sparse_effect_matrix(D, K, n_d, n_k) 80 | 81 | # Sigma is identity if not specified else 82 | if sigma is None: 83 | sigma = np.identity(K) * 0.05 84 | 85 | # noise = noise_std_true * np.random.randn(N, 1).astype(np.float64) 86 | 87 | # Initialize x, y 88 | x = np.zeros((sum(n_samples), cases)) 89 | y = np.zeros((sum(n_samples), K)) 90 | c = 0 91 | 92 | # Binary representation of a number x as list of fixed length 93 | def binary(num, length): 94 | return [int(x_n) for x_n in bin(num)[2:].zfill(length)] 95 | 96 | # For all combinations of cases 97 | for i in range(2**cases): 98 | # For each sample with this combination 99 | for j in range(n_samples[i]): 100 | # row of x is binary representation 101 | x[c+j] = binary(i, cases) 102 | 103 | # Generate y 104 | alpha = np.random.multivariate_normal(mean=x[c+j, :].T @ w_true + b_true, cov=sigma).astype( 105 | np.float64) 106 | 107 | concentration = softmax(alpha).astype(np.float64) 108 | z = np.random.multinomial(n_total, concentration) 109 | y[c+j] = z 110 | c = c+n_samples[i] 111 | 112 | x = x.astype(np.float64) 113 | y = y.astype(np.float64) 114 | 115 | x_names = ["x_" + str(n) for n in range(x.shape[1])] 116 | x_df = pd.DataFrame(x, columns=x_names) 117 | x_df.index = x_df.index.astype(str) 118 | 119 | data = ad.AnnData(X=y, obs=x_df, uns={"b_true": b_true, "w_true": w_true}) 120 | 121 | return data 122 | 123 | 124 | def b_w_from_abs_change( 125 | counts_before: np.ndarray = np.array([200, 200, 200, 200, 200]), 126 | abs_change: np.ndarray = np.array([50, 0, 0, 0, 0]), 127 | n_total: int = 1000 128 | ) -> Tuple[np.ndarray, np.ndarray]: 129 | """ 130 | Calculates intercepts and slopes from a starting count and an absolute change for the first cell type 131 | 132 | Parameters 133 | ---------- 134 | counts_before 135 | cell counts for control samples 136 | abs_change 137 | change of first cell type in terms of cell counts 138 | n_total 139 | number of cells per sample. This stays constant over all samples!!! 140 | 141 | Returns 142 | ------- 143 | Returns an intercept and an effect array 144 | 145 | intercepts 146 | intercept parameters 147 | slopes 148 | slope parameters 149 | """ 150 | 151 | K = counts_before.shape[0] 152 | 153 | # calculate intercepts for control samples 154 | b = np.log(counts_before / n_total) 155 | 156 | # count vector after applying the effect. 157 | counts_after = counts_before + abs_change 158 | da = np.where(abs_change!=0)[0] 159 | sum_after_da = np.sum(counts_after[da]) 160 | non_da = [x for x in np.arange(K) if x not in da] 161 | n_non_da = len(non_da) 162 | count_non_da = (n_total - sum_after_da)/n_non_da 163 | counts_after[non_da] = count_non_da 164 | 165 | # Get parameter vector with effect 166 | b_after = np.log(counts_after / n_total) 167 | 168 | # w is the difference of b before and after 169 | w = b_after - b 170 | # Transform w such that only first entry is nonzero 171 | w = w - w[K - 1] 172 | 173 | return b, w 174 | 175 | 176 | def counts_from_first( 177 | b_0: int = 200, 178 | n_total: int = 1000, 179 | K: int = 5 180 | ) -> np.ndarray: 181 | """ 182 | Calculates a count vector from a given first entry, length and sum. The entries 2...K will get the same value. 183 | 184 | Parameters 185 | ---------- 186 | b_0 187 | size of first entry 188 | n_total 189 | total sum of all entries 190 | K 191 | length of output vector (number of cell types) 192 | 193 | Returns 194 | ------- 195 | An intercept array 196 | 197 | b 198 | count vector (not necessarily integer), size K 199 | 200 | """ 201 | b = np.repeat((n_total-b_0)/(K-1), K) 202 | b[0] = b_0 203 | return b 204 | 205 | 206 | def sparse_effect_matrix( 207 | D: int, 208 | K: int, 209 | n_d: int, 210 | n_k: int 211 | ) -> np.ndarray: 212 | """ 213 | Generates a sparse effect matrix 214 | 215 | Parameters 216 | ---------- 217 | D 218 | Number of covariates 219 | K 220 | Number of cell types 221 | n_d 222 | Number of covariates that effect each cell type 223 | n_k 224 | Number of cell types that are affected by each covariate 225 | 226 | Returns 227 | ------- 228 | An effect matrix 229 | 230 | w_true 231 | Effect matrix 232 | """ 233 | 234 | # Choose indices of affected cell types and covariates randomly 235 | d_eff = np.random.choice(range(D), size=n_d, replace=False) 236 | k_eff = np.random.choice(range(K), size=n_k, replace=False) 237 | 238 | # Possible entries of w_true 239 | w_choice = [0.3, 0.5, 1] 240 | 241 | w_true = np.zeros((D, K)) 242 | # Fill in w_true 243 | for i in d_eff: 244 | for j in k_eff: 245 | c = np.random.choice(3, 1) 246 | w_true[i, j] = w_choice[c] 247 | 248 | return w_true 249 | -------------------------------------------------------------------------------- /sccoda/util/data_visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | This document contains methods to visualize compositional data that was imported into scCODA's data format. 3 | 4 | :authors: Johannes Ostner 5 | """ 6 | 7 | # Setup 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | from matplotlib import cm, rcParams 14 | from matplotlib.colors import ListedColormap 15 | 16 | from anndata import AnnData 17 | from typing import Optional, Tuple, Collection, Union, List 18 | 19 | sns.set_style("ticks") 20 | 21 | 22 | def stackbar( 23 | y: np.ndarray, 24 | type_names: List[str], 25 | title: str, 26 | level_names: List[str], 27 | figsize: Optional[Tuple[int, int]] = None, 28 | dpi: Optional[int] = 100, 29 | cmap: Optional[ListedColormap] = cm.tab20, 30 | plot_legend: Optional[bool] = True, 31 | ) -> plt.Subplot: 32 | """ 33 | Plots a stacked barplot for one (discrete) covariate 34 | Typical use (only inside stacked_barplot): plot_one_stackbar(data.X, data.var.index, "xyz", data.obs.index) 35 | 36 | Parameters 37 | ---------- 38 | y 39 | The count data, collapsed onto the level of interest. i.e. a binary covariate has two rows, one for each group, containing the count 40 | mean of each cell type 41 | type_names 42 | The names of all cell types 43 | title 44 | Plot title, usually the covariate's name 45 | level_names 46 | names of the covariate's levels 47 | figsize 48 | figure size 49 | dpi 50 | dpi setting 51 | cmap 52 | The color map for the barplot 53 | plot_legend 54 | If True, adds a legend 55 | 56 | Returns 57 | ------- 58 | Returns a plot 59 | 60 | ax 61 | a plot 62 | 63 | """ 64 | n_bars, n_types = y.shape 65 | 66 | figsize = rcParams["figure.figsize"] if figsize is None else figsize 67 | 68 | fig, ax = plt.subplots(figsize=figsize, dpi=dpi) 69 | r = np.array(range(n_bars)) 70 | sample_sums = np.sum(y, axis=1) 71 | 72 | barwidth = 0.85 73 | cum_bars = np.zeros(n_bars) 74 | 75 | for n in range(n_types): 76 | bars = [i / j * 100 for i, j in zip([y[k][n] for k in range(n_bars)], sample_sums)] 77 | plt.bar(r, bars, bottom=cum_bars, color=cmap(n % cmap.N), width=barwidth, label=type_names[n], linewidth=0) 78 | cum_bars += bars 79 | 80 | ax.set_title(title) 81 | if plot_legend: 82 | ax.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1) 83 | ax.set_xticks(r) 84 | ax.set_xticklabels(level_names, rotation=45) 85 | ax.set_ylabel("Proportion") 86 | 87 | return ax 88 | 89 | 90 | def stacked_barplot( 91 | data: AnnData, 92 | feature_name: str, 93 | figsize: Optional[Tuple[int, int]] = None, 94 | dpi: Optional[int] = 100, 95 | cmap: Optional[ListedColormap] = cm.tab20, 96 | plot_legend: Optional[bool] = True, 97 | level_order: List[str] = None 98 | ) -> plt.Subplot: 99 | 100 | """ 101 | Plots a stacked barplot for all levels of a covariate or all samples (if feature_name=="samples"). 102 | Usage: plot_feature_stackbars(data, ["cov1", "cov2", "cov3"]) 103 | 104 | Parameters 105 | ---------- 106 | data 107 | A scCODA compositional data object 108 | feature_name 109 | The name of the covariate to plot. If feature_name=="samples", one bar for every sample will be plotted 110 | figsize 111 | figure size 112 | dpi 113 | dpi setting 114 | cmap 115 | The color map for the barplot 116 | plot_legend 117 | If True, adds a legend 118 | level_order 119 | Custom ordering of bars on the x-axis 120 | 121 | Returns 122 | ------- 123 | Returns a plot 124 | 125 | g: 126 | a plot 127 | 128 | """ 129 | 130 | # cell type names 131 | type_names = data.var.index 132 | 133 | # option to plot one stacked barplot per sample 134 | if feature_name == "samples": 135 | if level_order: 136 | assert set(level_order) == set(data.obs.index), "level order is inconsistent with levels" 137 | data = data[level_order] 138 | g = stackbar( 139 | data.X, 140 | type_names=data.var.index, 141 | title="samples", 142 | level_names=data.obs.index, 143 | figsize=figsize, 144 | dpi=dpi, 145 | cmap=cmap, 146 | plot_legend=plot_legend, 147 | ) 148 | else: 149 | # Order levels 150 | if level_order: 151 | assert set(level_order) == set(data.obs[feature_name]), "level order is inconsistent with levels" 152 | levels = level_order 153 | elif hasattr(data.obs[feature_name], 'cat'): 154 | levels = data.obs[feature_name].cat.categories.to_list() 155 | else: 156 | levels = pd.unique(data.obs[feature_name]) 157 | n_levels = len(levels) 158 | feature_totals = np.zeros([n_levels, data.X.shape[1]]) 159 | 160 | for level in range(n_levels): 161 | l_indices = np.where(data.obs[feature_name] == levels[level]) 162 | feature_totals[level] = np.sum(data.X[l_indices], axis=0) 163 | 164 | g = stackbar( 165 | feature_totals, 166 | type_names=type_names, 167 | title=feature_name, 168 | level_names=levels, 169 | figsize=figsize, 170 | dpi=dpi, 171 | cmap=cmap, 172 | plot_legend=plot_legend, 173 | ) 174 | 175 | return g 176 | 177 | 178 | def boxplots( 179 | data: AnnData, 180 | feature_name: str, 181 | y_scale: str = "relative", 182 | plot_facets: bool = False, 183 | add_dots: bool = False, 184 | cell_types: Optional[list] = None, 185 | args_boxplot: Optional[dict] = {}, 186 | args_swarmplot: Optional[dict] = {}, 187 | figsize: Optional[Tuple[int, int]] = None, 188 | dpi: Optional[int] = 100, 189 | cmap: Optional[str] = "Blues", 190 | plot_legend: Optional[bool] = True, 191 | level_order: List[str] = None 192 | ) -> Optional[Tuple[plt.Subplot, sns.axisgrid.FacetGrid]]: 193 | """\ 194 | Grouped boxplot visualization. The cell counts for each cell type are shown as a group of boxplots, 195 | with intra--group separation by a covariate from data.obs. 196 | 197 | The cell type groups can either be ordered along the x-axis of a single plot (plot_facets=False) or as plot facets (plot_facets=True). 198 | 199 | Parameters 200 | ---------- 201 | data 202 | A scCODA-compatible data object 203 | feature_name 204 | The name of the feature in data.obs to plot 205 | y_scale 206 | Transformation to of cell counts. Options: "relative" - Relative abundance, "log" - log(count), "count" - absolute abundance (cell counts) 207 | plot_facets 208 | If False, plot cell types on the x-axis. If True, plot as facets 209 | add_dots 210 | If True, overlay a scatterplot with one dot for each data point 211 | cell_types 212 | Subset of cell types that should be plotted 213 | args_boxplot 214 | Arguments passed to sns.boxplot 215 | args_swarmplot 216 | Arguments passed to sns.swarmplot 217 | figsize 218 | figure size 219 | dpi 220 | dpi setting 221 | cmap 222 | The seaborn color map for the barplot 223 | plot_legend 224 | If True, adds a legend 225 | level_order 226 | Custom ordering of bars on the x-axis 227 | 228 | Returns 229 | ------- 230 | Depending on `plot_facets`, returns a :class:`~plt.AxesSubplot` (`plot_facets = False`) or :class:`~sns.axisgrid.FacetGrid` (`plot_facets = True`) object 231 | 232 | ax 233 | if `plot_facets = False` 234 | g 235 | if `plot_facets = True` 236 | """ 237 | 238 | # y scale transformations 239 | if y_scale == "relative": 240 | sample_sums = np.sum(data.X, axis=1, keepdims=True) 241 | X = data.X/sample_sums 242 | value_name = "Proportion" 243 | # add pseudocount 1 if using log scale (needs to be improved) 244 | elif y_scale == "log": 245 | X = np.log(data.X + 1) 246 | value_name = "log(count)" 247 | elif y_scale == "count": 248 | X = data.X 249 | value_name = "count" 250 | else: 251 | raise ValueError("Invalid y_scale transformation") 252 | 253 | count_df = pd.DataFrame(X, columns=data.var.index, index=data.obs.index).\ 254 | merge(data.obs[feature_name], left_index=True, right_index=True) 255 | plot_df = pd.melt(count_df, id_vars=feature_name, var_name="Cell type", value_name=value_name) 256 | if cell_types is not None: 257 | plot_df = plot_df[plot_df["Cell type"].isin(cell_types)] 258 | 259 | if plot_facets: 260 | 261 | if level_order is None: 262 | level_order = pd.unique(plot_df[feature_name]) 263 | 264 | K = X.shape[1] 265 | 266 | g = sns.FacetGrid( 267 | plot_df, 268 | col="Cell type", 269 | sharey=False, 270 | col_wrap=int(np.floor(np.sqrt(K))), 271 | height=5, 272 | aspect=2, 273 | ) 274 | g.map( 275 | sns.boxplot, 276 | feature_name, 277 | value_name, 278 | palette=cmap, 279 | order=level_order, 280 | **args_boxplot 281 | ) 282 | 283 | if add_dots: 284 | 285 | if "hue" in args_swarmplot: 286 | hue = args_swarmplot.pop("hue") 287 | else: 288 | hue = None 289 | 290 | if hue is None: 291 | g.map( 292 | sns.swarmplot, 293 | feature_name, 294 | value_name, 295 | color="black", 296 | order=level_order, 297 | **args_swarmplot 298 | ).set_titles("{col_name}") 299 | else: 300 | g.map( 301 | sns.swarmplot, 302 | feature_name, 303 | value_name, 304 | hue, 305 | order=level_order, 306 | **args_swarmplot 307 | ).set_titles("{col_name}") 308 | 309 | return g 310 | 311 | else: 312 | 313 | if level_order: 314 | args_boxplot["hue_order"] = level_order 315 | args_swarmplot["hue_order"] = level_order 316 | 317 | fig, ax = plt.subplots(figsize=figsize, dpi=dpi) 318 | 319 | sns.boxplot(x="Cell type", y=value_name, hue=feature_name, data=plot_df, fliersize=1, 320 | palette=cmap, ax=ax, **args_boxplot) 321 | 322 | if add_dots: 323 | sns.swarmplot( 324 | x="Cell type", 325 | y=value_name, 326 | data=plot_df, 327 | hue=feature_name, 328 | ax=ax, 329 | dodge=True, 330 | color="black", 331 | **args_swarmplot 332 | ) 333 | 334 | cell_types = pd.unique(plot_df["Cell type"]) 335 | ax.set_xticklabels(cell_types, rotation=90) 336 | 337 | if plot_legend: 338 | handles, labels = ax.get_legend_handles_labels() 339 | handout = [] 340 | labelout = [] 341 | for h, l in zip(handles, labels): 342 | if l not in labelout: 343 | labelout.append(l) 344 | handout.append(h) 345 | ax.legend(handout, labelout, loc='upper left', bbox_to_anchor=(1, 1), ncol=1, title=feature_name) 346 | 347 | plt.tight_layout() 348 | 349 | return ax 350 | 351 | 352 | def rel_abundance_dispersion_plot( 353 | data: AnnData, 354 | abundant_threshold: Optional[float] = 0.9, 355 | default_color: Optional[str] = "Grey", 356 | abundant_color: Optional[str] = "Red", 357 | label_cell_types: bool = "True", 358 | figsize: Optional[Tuple[int, int]] = None, 359 | dpi: Optional[int] = 100, 360 | 361 | ) -> plt.Subplot: 362 | """ 363 | Plots total variance of relative abundance versus minimum relative abundance of all cell types for determination of a reference cell type. 364 | If the count of the cell type is larger than 0 in more than abundant_threshold percent of all samples, 365 | the cell type will be marked in a different color. 366 | 367 | Parameters 368 | ---------- 369 | data 370 | A scCODA compositional data object 371 | abundant_threshold 372 | Presence threshold for abundant cell types. 373 | default_color 374 | bar color for all non-minimal cell types, default: "Grey" 375 | abundant_color 376 | bar color for cell types with abundant percentage larger than abundant_threshold, default: "Red" 377 | label_cell_types 378 | boolean - label dots with cell type names 379 | figsize 380 | figure size 381 | dpi 382 | dpi setting 383 | 384 | Returns 385 | ------- 386 | Returns a plot 387 | 388 | ax 389 | a plot 390 | """ 391 | 392 | fig, ax = plt.subplots(figsize=figsize, dpi=dpi) 393 | 394 | rel_abun = data.X / np.sum(data.X, axis=1, keepdims=True) 395 | 396 | percent_zero = np.sum(data.X == 0, axis=0) / data.X.shape[0] 397 | nonrare_ct = np.where(percent_zero < 1-abundant_threshold)[0] 398 | 399 | # select reference 400 | cell_type_disp = np.var(rel_abun, axis=0) / np.mean(rel_abun, axis=0) 401 | 402 | is_abundant = [x in nonrare_ct for x in range(data.X.shape[1])] 403 | 404 | # Scatterplot 405 | plot_df = pd.DataFrame({ 406 | "Total dispersion": cell_type_disp, 407 | "Cell type": data.var.index, 408 | "Presence": 1-percent_zero, 409 | "Is abundant": is_abundant 410 | }) 411 | 412 | if len(np.unique(plot_df["Is abundant"])) > 1: 413 | palette = [default_color, abundant_color] 414 | elif np.unique(plot_df["Is abundant"]) == [False]: 415 | palette = [default_color] 416 | else: 417 | palette = [abundant_color] 418 | 419 | sns.scatterplot( 420 | data=plot_df, 421 | x="Presence", 422 | y="Total dispersion", 423 | hue="Is abundant", 424 | palette=palette 425 | ) 426 | 427 | # Text labels for abundant cell types 428 | 429 | abundant_df = plot_df.loc[plot_df["Is abundant"] == True, :] 430 | 431 | def label_point(x, y, val, ax): 432 | a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1) 433 | for i, point in a.iterrows(): 434 | ax.text(point['x'] + .02*ax.get_xlim()[1], point['y'], str(point['val'])) 435 | 436 | if label_cell_types: 437 | label_point( 438 | abundant_df["Presence"], 439 | abundant_df["Total dispersion"], 440 | abundant_df["Cell type"], 441 | plt.gca() 442 | ) 443 | 444 | ax.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1, title="Is abundant") 445 | 446 | plt.tight_layout() 447 | return ax 448 | -------------------------------------------------------------------------------- /sccoda/util/helper_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sample_size_estimate( 5 | mcc_desired: float, 6 | increase: float, 7 | lf_increase: float 8 | ) -> int: 9 | """ 10 | calculates the estimated number of required samples for 11 | fixed MCC, absolute increase and log2-fold change using the fitted linear model from 12 | Büttner, Ostner et al., 2020 13 | 14 | Linear model parameters: 15 | 16 | - `(Intercept)`: -1.3675613850217 17 | 18 | - `total_samples`: 0.0193158965178381 19 | 20 | - `log_fold_increase`: 0.704729538709909 21 | 22 | - `log_increase`: 0.315857162659738 23 | 24 | - `log_fold_increase`: -0.0927955725385892 25 | 26 | Parameters 27 | ---------- 28 | mcc_desired 29 | desired MCC value 30 | increase 31 | mean absolute increase of cells between the groups 32 | lf_increase 33 | mean log2-fold increase of cells from one group to the other 34 | 35 | Returns 36 | ------- 37 | sample size estimate 38 | 39 | n_samples -- int 40 | estimated number of required samples 41 | 42 | """ 43 | 44 | # scale and transform input features 45 | mscale_min = 0.3440976088844191 46 | scaled_mcc = (mcc_desired+1)/2 47 | scaled_mcc = np.log((scaled_mcc+mscale_min)/(1-scaled_mcc+mscale_min)) 48 | log_inc = np.log(increase) 49 | 50 | # inverse regress sample size 51 | increase_effects = 0.704729538709909 * lf_increase + 0.315857162659738 * log_inc - 0.0927955725385892 * lf_increase * log_inc 52 | n_samples = (scaled_mcc + 1.3675613850217 - increase_effects) / 0.0193158965178381 53 | n_samples[n_samples < 0] = 0 54 | n_samples[0.0927955725385892 * lf_increase * log_inc > scaled_mcc + 1.3675613850217] = 0 55 | return np.round(n_samples) 56 | -------------------------------------------------------------------------------- /sccoda/util/result_classes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Results class that summarizes the results of scCODA and calculates test statistics. 3 | This class extends the ´´InferenceData`` class in the ``arviz`` package and can use all plotting and diacgnostic 4 | functionalities of it. 5 | 6 | Additionally, this class can produce nicely readable outputs for scCODA. 7 | 8 | :authors: Johannes Ostner 9 | """ 10 | import numpy as np 11 | import arviz as az 12 | import pandas as pd 13 | import pickle as pkl 14 | 15 | from typing import Optional, Tuple, Collection, Union, List 16 | 17 | 18 | class CAResultConverter(az.data.io_dict.DictConverter): 19 | """ 20 | Helper class for result conversion into arviz's format 21 | """ 22 | 23 | def to_result_data(self, sampling_stats, model_specs): 24 | 25 | post = self.posterior_to_xarray() 26 | ss = self.sample_stats_to_xarray() 27 | postp = self.posterior_predictive_to_xarray() 28 | prior = self.prior_to_xarray() 29 | ssp = self.sample_stats_prior_to_xarray() 30 | prip = self.prior_predictive_to_xarray() 31 | obs = self.observed_data_to_xarray() 32 | 33 | return CAResult( 34 | sampling_stats, model_specs, 35 | **{ 36 | "posterior": post, 37 | "sample_stats": ss, 38 | "posterior_predictive": postp, 39 | "prior": prior, 40 | "sample_stats_prior": ssp, 41 | "prior_predictive": prip, 42 | "observed_data": obs, 43 | } 44 | ) 45 | 46 | 47 | class CAResult(az.InferenceData): 48 | """ 49 | Result class for scCODA, extends the arviz framework for inference data. 50 | 51 | The CAResult class is an extension of az.InferenceData, that adds some information about the compositional model 52 | and is able to print humanly readable results. 53 | It supports all functionality from az.InferenceData. 54 | """ 55 | 56 | def __init__( 57 | self, 58 | sampling_stats: dict, 59 | model_specs: dict, 60 | **kwargs 61 | ): 62 | """ 63 | Gathers sampling information from a compositional model and converts it to a ``az.InferenceData`` object. 64 | The following attributes are added during class initialization: 65 | 66 | ``self.sampling_stats``: dict - see below 67 | ``self.model_specs``: dict - see below 68 | 69 | ``self.intercept_df``: Intercept dataframe from ``CAResult.summary_prepare`` 70 | ``self.effect_df``: Effect dataframe from ``CAResult.summary_prepare`` 71 | 72 | Parameters 73 | ---------- 74 | sampling_stats 75 | Information and statistics about the MCMC sampling procedure. 76 | Default keys: 77 | - "chain_length": Length of MCMC chain (with burnin samples) 78 | - "num_burnin": Number of burnin samples 79 | - "acc_rate": MCMC Acceptance rate 80 | - "duration": Duration of MCMC sampling 81 | 82 | model_specs 83 | All information and statistics about the model specifications. 84 | Default keys: 85 | - "formula": Formula string 86 | - "reference": int - identifier of reference cell type 87 | 88 | Added during class initialization: 89 | - "threshold_prob": Threshold for inclusion probability that separates significant from non-significant effects 90 | kwargs 91 | passed to az.InferenceData. This includes the MCMC chain states and statistics for eachs MCMC sample. 92 | """ 93 | super(self.__class__, self).__init__(**kwargs) 94 | 95 | self.sampling_stats = sampling_stats 96 | self.model_specs = model_specs 97 | 98 | if "ind" in list(self.posterior.data_vars): 99 | self.is_sccoda = True 100 | else: 101 | self.is_sccoda = False 102 | 103 | intercept_df, effect_df = self.summary_prepare() 104 | 105 | self.intercept_df = intercept_df 106 | self.effect_df = effect_df 107 | 108 | def summary_prepare( 109 | self, 110 | est_fdr: float = 0.05, 111 | *args, 112 | **kwargs 113 | ) -> Tuple[pd.DataFrame, pd.DataFrame]: 114 | """ 115 | Generates summary dataframes for intercepts and slopes. 116 | This function builds on and supports all functionalities from ``az.summary``. 117 | 118 | Parameters 119 | ---------- 120 | est_fdr 121 | Desired FDR value 122 | args 123 | Passed to ``az.summary`` 124 | kwargs 125 | Passed to ``az.summary`` 126 | 127 | Returns 128 | ------- 129 | Intercept and effect DataFrames 130 | 131 | intercept_df -- pandas df 132 | Summary of intercept parameters. Contains one row per cell type. 133 | 134 | Columns: 135 | - Final Parameter: Final intercept model parameter 136 | - HDI X%: Upper and lower boundaries of confidence interval (width specified via hdi_prob=) 137 | - SD: Standard deviation of MCMC samples 138 | - Expected sample: Expected cell counts for a sample with no present covariates. See the tutorial for more explanation 139 | 140 | effect_df -- pandas df 141 | Summary of effect (slope) parameters. Contains one row per covariate/cell type combination. 142 | 143 | Columns: 144 | - Final Parameter: Final effect model parameter. If this parameter is 0, the effect is not significant, else it is. 145 | - HDI X%: Upper and lower boundaries of confidence interval (width specified via hdi_prob=) 146 | - SD: Standard deviation of MCMC samples 147 | - Expected sample: Expected cell counts for a sample with only the current covariate set to 1. See the tutorial for more explanation 148 | - log2-fold change: Log2-fold change between expected cell counts with no covariates and with only the current covariate 149 | - Inclusion probability: Share of MCMC samples, for which this effect was not set to 0 by the spike-and-slab prior. 150 | """ 151 | 152 | # initialize summary df from arviz and separate into intercepts and effects. 153 | summ = az.summary(self, *args, **kwargs, kind="stats", var_names=["alpha", "beta"]) 154 | effect_df = summ.loc[summ.index.str.match("|".join(["beta"]))].copy() 155 | intercept_df = summ.loc[summ.index.str.match("|".join(["alpha"]))].copy() 156 | 157 | # Build neat index 158 | cell_types = self.posterior.coords["cell_type"].values 159 | covariates = self.posterior.coords["covariate"].values 160 | 161 | intercept_df.index = pd.Index(cell_types, name="Cell Type") 162 | effect_df.index = pd.MultiIndex.from_product([covariates, cell_types], 163 | names=["Covariate", "Cell Type"]) 164 | 165 | # Calculation of columns that are not from az.summary 166 | intercept_df = self.complete_alpha_df(intercept_df) 167 | effect_df = self.complete_beta_df(intercept_df, effect_df, est_fdr) 168 | 169 | # Give nice column names, remove unnecessary columns 170 | hdis = intercept_df.columns[intercept_df.columns.str.contains("hdi")] 171 | hdis_new = hdis.str.replace("hdi_", "HDI ") 172 | 173 | 174 | # Credible interval 175 | if self.is_sccoda is True: 176 | ind_post = self.posterior["ind"] 177 | 178 | b_raw_sel = self.posterior["b_raw"] * ind_post.where(ind_post >= 1e-3) 179 | 180 | res = az.convert_to_inference_data(b_raw_sel) 181 | 182 | summary_sel = az.summary(res, kind="stats", var_names=["x"], skipna=True, *args, **kwargs) 183 | 184 | ref_index = self.model_specs["reference"] 185 | n_conditions = len(self.posterior.coords["covariate"]) 186 | n_cell_types = len(self.posterior.coords["cell_type"]) 187 | 188 | def insert_row(idx, df, df_insert): 189 | return pd.concat([df.iloc[:idx, ], df_insert, df.iloc[idx:, ]]).reset_index(drop=True) 190 | 191 | for i in range(n_conditions): 192 | summary_sel = insert_row((i*n_cell_types) + ref_index, summary_sel, 193 | pd.DataFrame.from_dict(data={"mean": [0], "sd": [0], hdis[0]: [0], hdis[1]: [0]})) 194 | 195 | effect_df.loc[:, hdis[0]] = list(summary_sel[hdis[0]]) 196 | effect_df.loc[:, hdis[1]] = list(summary_sel.loc[:, hdis[1]]) 197 | 198 | intercept_df = intercept_df.loc[:, ["final_parameter", hdis[0], hdis[1], "sd", "expected_sample"]].copy() 199 | intercept_df = intercept_df.rename(columns=dict(zip( 200 | intercept_df.columns, 201 | ["Final Parameter", hdis_new[0], hdis_new[1], "SD", "Expected Sample"] 202 | ))) 203 | 204 | effect_df = effect_df.loc[:, ["final_parameter", hdis[0], hdis[1], "sd", "inclusion_prob", 205 | "expected_sample", "log_fold"]].copy() 206 | effect_df = effect_df.rename(columns=dict(zip( 207 | effect_df.columns, 208 | ["Final Parameter", hdis_new[0], hdis_new[1], "SD", "Inclusion probability", 209 | "Expected Sample", "log2-fold change"] 210 | ))) 211 | 212 | return intercept_df, effect_df 213 | 214 | def complete_beta_df( 215 | self, 216 | intercept_df: pd.DataFrame, 217 | effect_df: pd.DataFrame, 218 | target_fdr: float=0.05, 219 | ) -> pd.DataFrame: 220 | """ 221 | Evaluation of MCMC results for effect parameters. This function is only used within self.summary_prepare. 222 | This function also calculates the posterior inclusion probability for each effect and decides whether effects are significant. 223 | 224 | Parameters 225 | ---------- 226 | intercept_df 227 | Intercept summary, see ``self.summary_prepare`` 228 | effect_df 229 | Effect summary, see ``self.summary_prepare`` 230 | target_fdr 231 | Desired FDR value 232 | 233 | Returns 234 | ------- 235 | effect DataFrame 236 | 237 | effect_df 238 | DataFrame with inclusion probability, final parameters, expected sample 239 | """ 240 | beta_inc_prob = [] 241 | beta_nonzero_mean = [] 242 | 243 | beta_raw = np.array(self.posterior["beta"])[0] 244 | 245 | # Calculate inclusion prob, nonzero mean for every effect 246 | for j in range(beta_raw.shape[1]): 247 | for i in range(beta_raw.shape[2]): 248 | beta_i_raw = beta_raw[:, j, i] 249 | beta_i_raw_nonzero = np.where(np.abs(beta_i_raw) > 1e-3)[0] 250 | prob = beta_i_raw_nonzero.shape[0] / beta_i_raw.shape[0] 251 | beta_inc_prob.append(prob) 252 | if len(beta_i_raw[beta_i_raw_nonzero]) > 0: 253 | beta_nonzero_mean.append(beta_i_raw[beta_i_raw_nonzero].mean()) 254 | else: 255 | beta_nonzero_mean.append(0) 256 | 257 | effect_df.loc[:, "inclusion_prob"] = beta_inc_prob 258 | effect_df.loc[:, "mean_nonzero"] = beta_nonzero_mean 259 | 260 | # Inclusion prob threshold value. Direct posterior probability approach cf. Newton et al. (2004) 261 | if self.is_sccoda is True: 262 | def opt_thresh(result, alpha): 263 | 264 | incs = np.array(result.loc[result["inclusion_prob"] > 0, "inclusion_prob"]) 265 | incs[::-1].sort() 266 | 267 | for c in np.unique(incs): 268 | fdr = np.mean(1 - incs[incs >= c]) 269 | 270 | if fdr < alpha: 271 | # ceiling with 3 decimals precision 272 | c = np.floor(c * 10 ** 3) / 10 ** 3 273 | return c, fdr 274 | return 1., 0 275 | 276 | threshold, fdr_ = opt_thresh(effect_df, target_fdr) 277 | 278 | self.model_specs["threshold_prob"] = threshold 279 | 280 | # Decide whether betas are significant or not, set non-significant ones to 0 281 | effect_df.loc[:, "final_parameter"] = np.where(effect_df.loc[:, "inclusion_prob"] >= threshold, 282 | effect_df.loc[:, "mean_nonzero"], 283 | 0) 284 | else: 285 | effect_df.loc[:, "final_parameter"] = effect_df.loc[:, "mean_nonzero"] 286 | 287 | # Get expected sample, log-fold change 288 | D = len(effect_df.index.levels[0]) 289 | K = len(effect_df.index.levels[1]) 290 | 291 | y_bar = np.mean(np.sum(np.array(self.observed_data.y), axis=1)) 292 | alpha_par = intercept_df.loc[:, "final_parameter"] 293 | alphas_exp = np.exp(alpha_par) 294 | alpha_sample = (alphas_exp / np.sum(alphas_exp) * y_bar).values 295 | 296 | beta_mean = alpha_par 297 | beta_sample = [] 298 | log_sample = [] 299 | 300 | for d in range(D): 301 | beta_d = effect_df.loc[:, "final_parameter"].values[(d*K):((d+1)*K)] 302 | beta_d = (beta_mean + beta_d) 303 | beta_d = np.exp(beta_d) 304 | beta_d = beta_d / np.sum(beta_d) * y_bar 305 | 306 | beta_sample = np.append(beta_sample, beta_d) 307 | log_sample = np.append(log_sample, np.log2(beta_d/alpha_sample)) 308 | 309 | effect_df.loc[:, "expected_sample"] = beta_sample 310 | effect_df.loc[:, "log_fold"] = log_sample 311 | 312 | return effect_df 313 | 314 | def complete_alpha_df( 315 | self, 316 | intercept_df: pd.DataFrame 317 | ) -> pd.DataFrame: 318 | """ 319 | Evaluation of MCMC results for intercepts. This function is only used within self.summary_prepare. 320 | 321 | Parameters 322 | ---------- 323 | intercept_df 324 | Intercept summary, see self.summary_prepare 325 | 326 | Returns 327 | ------- 328 | intercept DataFrame 329 | 330 | intercept_df 331 | Summary DataFrame with expected sample, final parameters 332 | """ 333 | 334 | intercept_df = intercept_df.rename(columns={"mean": "final_parameter"}) 335 | 336 | # Get expected sample 337 | y_bar = np.mean(np.sum(np.array(self.observed_data.y), axis=1)) 338 | alphas_exp = np.exp(intercept_df.loc[:, "final_parameter"]) 339 | alpha_sample = (alphas_exp / np.sum(alphas_exp) * y_bar).values 340 | intercept_df.loc[:, "expected_sample"] = alpha_sample 341 | 342 | return intercept_df 343 | 344 | def summary( 345 | self, 346 | *args, 347 | **kwargs 348 | ): 349 | """ 350 | Printing method for scCODA's summary. 351 | 352 | Usage: ``result.summary()`` 353 | 354 | Parameters 355 | ---------- 356 | args 357 | Passed to az.summary 358 | kwargs 359 | Passed to az.summary 360 | 361 | Returns 362 | ------- 363 | prints to console 364 | 365 | """ 366 | 367 | # If other than default values for e.g. confidence interval are specified, 368 | # recalculate them for intercept and effect DataFrames 369 | if args or kwargs: 370 | intercept_df, effect_df = self.summary_prepare(*args, **kwargs) 371 | else: 372 | intercept_df = self.intercept_df 373 | effect_df = self.effect_df 374 | 375 | # Get number of samples, cell types 376 | if self.sampling_stats["y_hat"] is not None: 377 | data_dims = self.sampling_stats["y_hat"].shape 378 | else: 379 | data_dims = (10, 5) 380 | 381 | # Cut down DataFrames to relevant info 382 | alphas_print = intercept_df.loc[:, ["Final Parameter", "Expected Sample"]] 383 | betas_print = effect_df.loc[:, ["Final Parameter", "Expected Sample", "log2-fold change"]] 384 | 385 | # Print everything neatly 386 | print("Compositional Analysis summary:") 387 | print("") 388 | print("Data: %d samples, %d cell types" % data_dims) 389 | print("Reference index: %s" % str(self.model_specs["reference"])) 390 | print("Formula: %s" % self.model_specs["formula"]) 391 | print("") 392 | print("Intercepts:") 393 | print(alphas_print) 394 | print("") 395 | print("") 396 | print("Effects:") 397 | print(betas_print) 398 | 399 | def summary_extended( 400 | self, 401 | *args, 402 | **kwargs 403 | ): 404 | 405 | """ 406 | Extended (diagnostic) printing function that shows more info about the sampling result 407 | 408 | Parameters 409 | ---------- 410 | args 411 | Passed to az.summary 412 | kwargs 413 | Passed to az.summary 414 | 415 | Returns 416 | ------- 417 | Prints to console 418 | 419 | """ 420 | 421 | # If other than default values for e.g. confidence interval are specified, 422 | # recalculate them for intercept and effect DataFrames 423 | if args or kwargs: 424 | intercept_df, effect_df = self.summary_prepare(*args, **kwargs) 425 | else: 426 | intercept_df = self.intercept_df 427 | effect_df = self.effect_df 428 | 429 | # Get number of samples, cell types 430 | data_dims = self.sampling_stats["y_hat"].shape 431 | 432 | # Print everything 433 | print("Compositional Analysis summary (extended):") 434 | print("") 435 | print("Data: %d samples, %d cell types" % data_dims) 436 | print("Reference index: %s" % str(self.model_specs["reference"])) 437 | print("Formula: %s" % self.model_specs["formula"]) 438 | if self.is_sccoda: 439 | print("Spike-and-slab threshold: {threshold:.3f}".format(threshold=self.model_specs["threshold_prob"])) 440 | print("") 441 | print("MCMC Sampling: Sampled {num_results} chain states ({num_burnin} burnin samples) in {duration:.3f} sec. " 442 | "Acceptance rate: {ar:.1f}%".format(num_results=self.sampling_stats["chain_length"], 443 | num_burnin=self.sampling_stats["num_burnin"], 444 | duration=self.sampling_stats["duration"], 445 | ar=(100*self.sampling_stats["acc_rate"]))) 446 | print("") 447 | print("Intercepts:") 448 | print(intercept_df) 449 | print("") 450 | print("") 451 | print("Effects:") 452 | print(effect_df) 453 | 454 | def compare_parameters_to_truth( 455 | self, 456 | b_true: pd.Series, 457 | w_true: pd.Series, 458 | *args, 459 | **kwargs 460 | ) -> Tuple[pd.DataFrame, pd.DataFrame]: 461 | """ 462 | Extends data frames from summary_prepare by a comparison to some ground truth slope and intercept values that are 463 | assumed to be from the same generative model (e.g. in data_generation) 464 | 465 | Parameters 466 | ---------- 467 | b_true 468 | Ground truth slope values. Length must be same as number of cell types 469 | w_true 470 | Ground truth intercept values. Length must be same as number of cell types*number of covariates 471 | args 472 | Passed to az.summary 473 | kwargs 474 | Passed to az.summary 475 | 476 | Returns 477 | ------- 478 | Extends intercept and effect DataFrames 479 | 480 | intercept_df 481 | Summary DataFrame for intercepts 482 | effect_df 483 | Summary DataFrame for effects 484 | """ 485 | 486 | intercept_df, effect_df = self.summary_prepare(*args, **kwargs) 487 | 488 | intercept_df.columns = intercept_df.columns.str.replace('final_parameter', 'predicted') 489 | effect_df.columns = effect_df.columns.str.replace('final_parameter', 'predicted') 490 | 491 | # Get true params, join to calculated parameters 492 | b_true = b_true.rename("truth") 493 | intercept_df = intercept_df.join(b_true) 494 | w_true = w_true.rename("truth") 495 | effect_df = effect_df.join(w_true) 496 | 497 | # decide whether effects are found correctly 498 | intercept_df['dist_to_truth'] = intercept_df['truth'] - intercept_df['predicted'] 499 | intercept_df['effect_correct'] = ((intercept_df['truth'] == 0) == (intercept_df['predicted'] == 0)) 500 | effect_df['dist_to_truth'] = effect_df['truth'] - effect_df['predicted'] 501 | effect_df['effect_correct'] = ((effect_df['truth'] == 0) == (effect_df['predicted'] == 0)) 502 | 503 | return intercept_df, effect_df 504 | 505 | def distance_to_truth(self) -> pd.DataFrame: 506 | """ 507 | Compares real cell count matrix to the posterior mode cell count matrix that arises from the calculated parameters 508 | 509 | Returns 510 | ------- 511 | DataFrame with distances 512 | 513 | ret 514 | DataFrame 515 | """ 516 | 517 | # Get absolute (counts) and relative error matrices 518 | y = np.array(self.observed_data.y) 519 | y_hat = self.sampling_stats["y_hat"] 520 | err = np.abs(y_hat - y) 521 | 522 | err_rel = err / y 523 | err_rel[np.isinf(err_rel)] = 1. 524 | err_rel[np.isnan(err_rel)] = 0. 525 | 526 | # Calculate mean errors for each cell type and overall 527 | avg_abs_cell_type_error = np.mean(err, axis=0, dtype=np.float64) 528 | avg_rel_cell_type_error = np.mean(err_rel, axis=0, dtype=np.float64) 529 | avg_abs_total_error = np.mean(err, dtype=np.float64) 530 | avg_rel_total_error = np.mean(err_rel, dtype=np.float64) 531 | 532 | ret = pd.DataFrame({'Cell Type': np.arange(y.shape[1] + 1), 533 | 'Absolute Error': np.append(avg_abs_total_error, avg_abs_cell_type_error), 534 | 'Relative Error': np.append(avg_rel_total_error, avg_rel_cell_type_error), 535 | 'Actual Means': np.append(np.mean(y, axis=(0, 1)), np.mean(y, axis=0)), 536 | 'Predicted Means': np.append(np.mean(y_hat, axis=(0, 1)), np.mean(y_hat, axis=0))}) 537 | 538 | ret['Cell Type'][0] = 'Total' 539 | return ret 540 | 541 | def credible_effects( 542 | self, 543 | est_fdr=None 544 | ) -> pd.Series: 545 | 546 | """ 547 | Decides which effects of the scCODA model are credible based on an adjustable inclusion probability threshold. 548 | 549 | Parameters 550 | ---------- 551 | est_fdr 552 | Estimated false discovery rate. Must be between 0 and 1 553 | 554 | Returns 555 | ------- 556 | Credible effect decision series 557 | 558 | out 559 | Boolean values whether effects are credible under inc_prob_threshold 560 | """ 561 | 562 | if type(est_fdr) == float: 563 | if est_fdr < 0 or est_fdr > 1: 564 | raise ValueError("est_fdr must be between 0 and 1!") 565 | else: 566 | _, eff_df = self.summary_prepare(est_fdr=est_fdr) 567 | else: 568 | eff_df = self.effect_df 569 | 570 | out = eff_df["Final Parameter"] != 0 571 | out.rename("credible change") 572 | 573 | return out 574 | 575 | def save( 576 | self, 577 | path_to_file: str 578 | ): 579 | """ 580 | Function to save scCODA results to disk via pickle. Caution: Files can quickly become very large! 581 | 582 | Parameters 583 | ---------- 584 | path_to_file 585 | saving location on disk 586 | 587 | Returns 588 | ------- 589 | 590 | """ 591 | with open(path_to_file, "wb") as f: 592 | pkl.dump(self, file=f, protocol=4) 593 | 594 | def set_fdr( 595 | self, 596 | est_fdr: float, 597 | *args, 598 | **kwargs): 599 | """ 600 | Direct posterior probability approach to calculate credible effects while keeping the expected FDR at a certain level 601 | 602 | Parameters 603 | ---------- 604 | est_fdr 605 | Desired FDR value 606 | args 607 | passed to self.summary_prepare 608 | kwargs 609 | passed to self.summary_prepare 610 | 611 | Returns 612 | ------- 613 | Adjusts self.intercept_df and self.effect_df 614 | """ 615 | 616 | intercept_df, effect_df = self.summary_prepare(est_fdr=est_fdr, *args, **kwargs) 617 | 618 | self.intercept_df = intercept_df 619 | self.effect_df = effect_df 620 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pip install twine 6 | 7 | import io 8 | import os 9 | import sys 10 | from shutil import rmtree 11 | 12 | from setuptools import find_packages, setup, Command 13 | 14 | # Package meta-data. 15 | NAME = 'scCODA' 16 | DESCRIPTION = 'A Dirichlet-Multinomial approach to identify compositional changes in count data.' 17 | URL = 'https://github.com/theislab/scCODA' 18 | EMAIL = 'johannes.ostner@helmholtz-muenchen.de' 19 | AUTHOR = 'Johannes Ostner, Benjamin Schubert' 20 | REQUIRES_PYTHON = '>=3.7.0' 21 | VERSION = "0.1.9" 22 | 23 | # What packages are required for this module to be executed? 24 | REQUIRED = [ 25 | "numpy>=1.21", 26 | "scipy", 27 | "tensorflow>=2.8", 28 | "tensorflow-probability>=0.16.0", 29 | "arviz>=0.11", 30 | "seaborn", 31 | "pandas>=1.0", 32 | "matplotlib>=3.0", 33 | "scanpy", 34 | "anndata", 35 | "patsy", 36 | "statsmodels", 37 | "rpy2", 38 | ] 39 | 40 | # What packages are optional? 41 | EXTRAS = {} 42 | 43 | # The rest you shouldn't have to touch too much :) 44 | # ------------------------------------------------ 45 | # Except, perhaps the License and Trove Classifiers! 46 | # If you do change the License, remember to change the Trove Classifier for that! 47 | 48 | here = os.path.abspath(os.path.dirname(__file__)) 49 | 50 | # Import the README and use it as the long-description. 51 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 52 | try: 53 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 54 | long_description = '\n' + f.read() 55 | except FileNotFoundError: 56 | long_description = DESCRIPTION 57 | 58 | # Load the package's __version__.py module as a dictionary. 59 | about = {} 60 | if not VERSION: 61 | with open(os.path.join(here, NAME, '__version__.py')) as f: 62 | exec(f.read(), about) 63 | else: 64 | about['__version__'] = VERSION 65 | 66 | 67 | class UploadCommand(Command): 68 | """Support setup.py upload.""" 69 | 70 | description = 'Build and publish the package.' 71 | user_options = [] 72 | 73 | @staticmethod 74 | def status(s): 75 | """Prints things in bold.""" 76 | print('\033[1m{0}\033[0m'.format(s)) 77 | 78 | def initialize_options(self): 79 | pass 80 | 81 | def finalize_options(self): 82 | pass 83 | 84 | def run(self): 85 | try: 86 | self.status('Removing previous builds…') 87 | rmtree(os.path.join(here, 'dist')) 88 | except OSError: 89 | pass 90 | 91 | self.status('Building Source and Wheel (universal) distribution…') 92 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 93 | 94 | self.status('Uploading the package to PyPI via Twine…') 95 | os.system('twine upload dist/*') 96 | 97 | self.status('Pushing git tags…') 98 | os.system('git tag v{0}'.format(about['__version__'])) 99 | os.system('git push --tags') 100 | 101 | sys.exit() 102 | 103 | 104 | # Where the magic happens: 105 | setup( 106 | name=NAME, 107 | version=about['__version__'], 108 | description=DESCRIPTION, 109 | long_description=long_description, 110 | long_description_content_type='text/markdown', 111 | author=AUTHOR, 112 | author_email=EMAIL, 113 | python_requires=REQUIRES_PYTHON, 114 | url=URL, 115 | packages=find_packages(exclude=("data", "tests", "tutorials")), 116 | install_requires=REQUIRED, 117 | extras_require=EXTRAS, 118 | include_package_data=True, 119 | license='BSD', 120 | keywords=[ 121 | "RNA", 122 | "single cell", 123 | "composition", 124 | "CODA", 125 | "compositional analysis" 126 | ], 127 | classifiers=[ 128 | # Trove classifiers 129 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 130 | "License :: OSI Approved :: BSD License", 131 | "Intended Audience :: Science/Research", 132 | 'Programming Language :: Python', 133 | 'Programming Language :: Python :: 3', 134 | 'Programming Language :: Python :: 3.7', 135 | "Topic :: Scientific/Engineering :: Bio-Informatics", 136 | ], 137 | # $ setup.py publish support. 138 | cmdclass={ 139 | 'upload': UploadCommand, 140 | }, 141 | ) 142 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/tests/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for scCODA 3 | """ 4 | 5 | import unittest 6 | import numpy as np 7 | import scanpy as sc 8 | import tensorflow as tf 9 | import pandas as pd 10 | import os 11 | import sys 12 | sys.path.insert(0, os.path.abspath('.')) 13 | sys.path.insert(0, os.path.abspath('..')) 14 | 15 | from sccoda.util import cell_composition_data as dat 16 | from sccoda.util import comp_ana as mod 17 | from sccoda.util import data_generation as gen 18 | 19 | pd.set_option('display.max_columns', 500) 20 | pd.set_option('display.max_rows', 500) 21 | 22 | 23 | class TestDataGeneration(unittest.TestCase): 24 | """ 25 | Testing whether the data generation functions from data_generation work as intended 26 | Returns 27 | ------- 28 | boolean -- all tests were passed or not 29 | """ 30 | 31 | def setUp(self): 32 | 33 | self.N = 3 34 | self.D = 1 35 | self.K = 2 36 | self.n_total = [1000] * self.N 37 | self.noise_std_true = 1 38 | self.covariate_mean = None 39 | self.covariate_var = None 40 | self.sigma = None 41 | self.b_true = None 42 | self.w_true = None 43 | 44 | def test_case_control_gen(self): 45 | """ 46 | Tests data generation for case/control scenarios 47 | Returns 48 | ------- 49 | boolean -- all tests were passed or not 50 | """ 51 | np.random.seed(1234) 52 | 53 | cases = 1 54 | K = 2 55 | n_total = 1000 56 | n_samples = [2, 2] 57 | noise_std_true = 0 58 | sigma = None 59 | b_true = None 60 | w_true = None 61 | 62 | data = gen.generate_case_control(cases, K, n_total, n_samples, sigma, b_true, w_true) 63 | 64 | test = True 65 | if any(np.abs(data.obs["x_0"] - [0, 0, 1, 1]) > 1e-5): 66 | print("obs is not correct!") 67 | test = False 68 | if not np.array_equal(data.X, np.array([[74., 926.], [58., 942.], [32., 968.], [53., 947.]])): 69 | print("X is not correct!") 70 | test = False 71 | if any(data.uns["b_true"] - np.array([-1.8508832, 0.7326526], dtype=np.float64) > 1e-5) or \ 72 | not np.array_equal(data.uns["w_true"], np.array([[0., 0.]])): 73 | print("uns is not correct!") 74 | test = False 75 | 76 | self.assertTrue(test) 77 | 78 | def test_change_functions(self): 79 | """ 80 | Tests gen.b_w_from_abs_change and gen.counts_from_first 81 | Returns 82 | ------- 83 | boolean -- all tests were passed or not 84 | """ 85 | np.random.seed(1234) 86 | correct = True 87 | 88 | counts_before = np.array([600, 400]) 89 | abs_change = 100 90 | n_total = 1000 91 | K = 2 92 | b_0 = 600 93 | 94 | b, w = gen.b_w_from_abs_change(counts_before, abs_change, n_total) 95 | 96 | if any(np.abs(b - [-0.51082562, -0.91629073]) > 1e-5): 97 | print("gen.b_w_from_abs_change: b not correct!") 98 | correct = False 99 | 100 | if any(np.abs(w - [0.44183275, 0.]) > 1e-5): 101 | print("gen.b_w_from_abs_change: b not correct!") 102 | correct = False 103 | 104 | b_2 = gen.counts_from_first(b_0, n_total, K) 105 | if not np.array_equal(b_2, [600., 400.]): 106 | print("gen.counts_from_first not correct!") 107 | correct = False 108 | 109 | self.assertTrue(correct) 110 | 111 | 112 | class TestDataImport(unittest.TestCase): 113 | 114 | def test_from_pandas(self): 115 | # Get Haber Salmonella data 116 | data_raw = pd.read_csv(os.path.abspath("sccoda/datasets/haber_counts.csv")) 117 | 118 | salm_indices = [0, 1, 2, 3, 8, 9] 119 | salm_df = data_raw.iloc[salm_indices, :] 120 | 121 | data_salm = dat.from_pandas(salm_df, covariate_columns=["Mouse"]) 122 | data_salm.obs["Condition"] = data_salm.obs["Mouse"].str.replace(r"_[0-9]", "") 123 | 124 | # Only check size of x, obs 125 | x_shape = (data_salm.X.shape == (6, 8)) 126 | obs_shape = (data_salm.obs.shape == (6, 2)) 127 | 128 | self.assertTrue(x_shape & obs_shape) 129 | 130 | def test_from_scanpy(self): 131 | # Get scanpy example data, add covariates, read in three times 132 | adata_ref = sc.datasets.pbmc3k_processed() 133 | adata_ref.uns["cov"] = {"x_0": 0, "x_1": 1} 134 | adata_ref_1 = adata_ref.copy() 135 | adata_ref_1.uns["cov"] = {"x_0": 1, "x_1": 1} 136 | 137 | data = dat.from_scanpy_list([adata_ref, adata_ref, adata_ref_1], 138 | cell_type_identifier="louvain", 139 | covariate_key="cov") 140 | 141 | # Only check size of x, obs 142 | x_shape = (data.X.shape == (3, 8)) 143 | obs_shape = (data.obs.shape == (3, 2)) 144 | var_names = (data.var.index.tolist() == ['CD4 T cells', 'CD14+ Monocytes', 'B cells', 'CD8 T cells', 145 | 'NK cells', 'FCGR3A+ Monocytes', 'Dendritic cells', 'Megakaryocytes']) 146 | 147 | self.assertTrue(x_shape & obs_shape & var_names) 148 | 149 | 150 | class TestModels(unittest.TestCase): 151 | 152 | def setUp(self): 153 | 154 | # Get Haber count data 155 | data_raw = pd.read_csv(os.path.abspath("sccoda/datasets/haber_counts.csv")) 156 | 157 | salm_indices = [0, 1, 2, 3, 8, 9] 158 | salm_df = data_raw.iloc[salm_indices, :] 159 | 160 | data_salm = dat.from_pandas(salm_df, covariate_columns=["Mouse"]) 161 | data_salm.obs["Condition"] = data_salm.obs["Mouse"].str.replace(r"_[0-9]", "") 162 | self.data = data_salm 163 | 164 | def test_hmc(self): 165 | np.random.seed(1234) 166 | tf.random.set_seed(5678) 167 | 168 | model_salm = mod.CompositionalAnalysis(self.data, formula="Condition", reference_cell_type=5) 169 | 170 | # Run MCMC 171 | sim_results = model_salm.sample_hmc(num_results=20000, num_burnin=5000) 172 | self.sim_results = sim_results 173 | alpha_df, beta_df = sim_results.summary_prepare() 174 | 175 | # Mean cell counts for both groups 176 | alphas_true = np.round(np.mean(self.data.X[:4], 0), 0) 177 | betas_true = np.round(np.mean(self.data.X[4:], 0), 0) 178 | 179 | # Mean cell counts for simulated data 180 | final_alphas = np.round(alpha_df.loc[:, "Expected Sample"].tolist(), 0) 181 | final_betas = np.round(beta_df.loc[:, "Expected Sample"].tolist(), 0) 182 | 183 | # Check if model approximately predicts ground truth 184 | differing_alphas = any(np.abs(alphas_true - final_alphas) > 30) 185 | differing_betas = any(np.abs(betas_true - final_betas) > 30) 186 | 187 | self.assertTrue((not differing_alphas) & (not differing_betas)) 188 | 189 | def test_hmc_da(self): 190 | np.random.seed(1234) 191 | tf.random.set_seed(5678) 192 | 193 | model_salm = mod.CompositionalAnalysis(self.data, formula="Condition", reference_cell_type=5) 194 | 195 | # Run MCMC 196 | sim_results = model_salm.sample_hmc_da(num_results=20000, num_burnin=5000) 197 | self.sim_results = sim_results 198 | alpha_df, beta_df = sim_results.summary_prepare() 199 | 200 | # Mean cell counts for both groups 201 | alphas_true = np.round(np.mean(self.data.X[:4], 0), 0) 202 | betas_true = np.round(np.mean(self.data.X[4:], 0), 0) 203 | 204 | # Mean cell counts for simulated data 205 | final_alphas = np.round(alpha_df.loc[:, "Expected Sample"].tolist(), 0) 206 | final_betas = np.round(beta_df.loc[:, "Expected Sample"].tolist(), 0) 207 | 208 | # Check if model approximately predicts ground truth 209 | differing_alphas = any(np.abs(alphas_true - final_alphas) > 30) 210 | differing_betas = any(np.abs(betas_true - final_betas) > 30) 211 | 212 | self.assertTrue((not differing_alphas) & (not differing_betas)) 213 | 214 | def test_nuts(self): 215 | np.random.seed(1234) 216 | tf.random.set_seed(5678) 217 | 218 | model_salm = mod.CompositionalAnalysis(self.data, formula="Condition", reference_cell_type=5) 219 | 220 | # Run MCMC 221 | sim_results = model_salm.sample_nuts(num_results=2000, num_burnin=500) 222 | self.sim_results = sim_results 223 | alpha_df, beta_df = sim_results.summary_prepare() 224 | 225 | # Mean cell counts for both groups 226 | alphas_true = np.round(np.mean(self.data.X[:4], 0), 0) 227 | betas_true = np.round(np.mean(self.data.X[4:], 0), 0) 228 | 229 | # Mean cell counts for simulated data 230 | final_alphas = np.round(alpha_df.loc[:, "Expected Sample"].tolist(), 0) 231 | final_betas = np.round(beta_df.loc[:, "Expected Sample"].tolist(), 0) 232 | 233 | # Check if model approximately predicts ground truth 234 | differing_alphas = any(np.abs(alphas_true - final_alphas) > 30) 235 | differing_betas = any(np.abs(betas_true - final_betas) > 30) 236 | 237 | self.assertTrue((not differing_alphas) & (not differing_betas)) 238 | 239 | def test_multi_cond(self): 240 | np.random.seed(1234) 241 | tf.random.set_seed(5678) 242 | 243 | self.data.obs["Condition2"] = np.random.randint(0, 2, len(self.data.obs)) 244 | 245 | model_salm = mod.CompositionalAnalysis(self.data, formula="Condition+Condition2", reference_cell_type=5) 246 | 247 | # Run MCMC 248 | sim_results = model_salm.sample_hmc(num_results=20000, num_burnin=5000) 249 | self.sim_results = sim_results 250 | alpha_df, beta_df = sim_results.summary_prepare() 251 | 252 | # Mean cell counts for both groups 253 | alphas_true = np.round(np.mean(self.data.X[:4], 0), 0) 254 | betas_true = np.round(np.mean(self.data.X[4:], 0), 0) 255 | 256 | # Mean cell counts for simulated data 257 | final_alphas = np.round(alpha_df.loc[:, "Expected Sample"].tolist(), 0) 258 | final_betas = np.round(beta_df.loc[("Condition[T.Salm]",), "Expected Sample"].tolist(), 0) 259 | 260 | # Check if model approximately predicts ground truth 261 | differing_alphas = any(np.abs(alphas_true - final_alphas) > 30) 262 | differing_betas = any(np.abs(betas_true - final_betas) > 30) 263 | differing_rand = any(beta_df.loc[("Condition2",), "Final Parameter"] != 0) 264 | 265 | self.assertTrue((not differing_alphas) & (not differing_betas) & (not differing_rand)) 266 | 267 | 268 | if __name__ == '__main__': 269 | unittest.main() 270 | -------------------------------------------------------------------------------- /tutorials/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/tutorials/__init__.py -------------------------------------------------------------------------------- /tutorials/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/tutorials/test --------------------------------------------------------------------------------