├── .github
└── Figures
│ ├── Fig1_v10.png
│ ├── Fig2_v7.png
│ ├── Figure1.png
│ ├── Figure2.png
│ ├── archive
│ ├── Fig1_v9.png
│ ├── model_comparison_example_data_grouped.png
│ ├── model_comparison_plots
│ │ ├── model_comparison_logfold_confint_extended.png
│ │ └── model_comparison_replicates_confint_extended.png
│ ├── overall_benchmark_plots
│ │ ├── 9_heatmaps_concept_fig.png
│ │ ├── absolute_increase_lines_concept_fig.png
│ │ ├── log_fold_increase_lines_concept_fig.png
│ │ └── negative_heatmaps.png
│ └── threshold_determination_plots
│ │ ├── threshold_determination_fdr.png
│ │ ├── threshold_determination_mcc.png
│ │ ├── threshold_determination_tnr.png
│ │ ├── threshold_determination_tpr.png
│ │ └── threshold_function_optimal_mcc.png
│ ├── covariate_structure.png
│ └── data_structure.png
├── .gitignore
├── .readthedocs.yml
├── .travis.yml
├── LICENSE
├── README.md
├── docs
├── Makefile
├── make.bat
├── requires.txt
└── source
│ ├── Data_import_and_visualization.ipynb
│ ├── Modeling_options_and_result_analysis.ipynb
│ ├── _ext
│ └── edit_on_github.py
│ ├── _static
│ ├── colab_badge.svg
│ └── nbviewer_badge.svg
│ ├── _templates
│ ├── Data_import_and_visualization.ipynb
│ ├── Modeling_options_and_result_analysis.ipynb
│ ├── autosummary
│ │ ├── base.rst
│ │ └── class.rst
│ ├── getting_started.ipynb
│ └── using_other_compositional_methods.ipynb
│ ├── api.rst
│ ├── compositional_data.rst
│ ├── conf.py
│ ├── data.rst
│ ├── getting_started.ipynb
│ ├── index.rst
│ ├── installation.rst
│ ├── models.rst
│ ├── sccoda.model.other_models.ALDEx2Model.eval_model.rst
│ ├── sccoda.model.other_models.ALDEx2Model.fit_model.rst
│ ├── sccoda.model.other_models.ALDEx2Model.rst
│ ├── sccoda.model.other_models.ALRModel_ttest.eval_model.rst
│ ├── sccoda.model.other_models.ALRModel_ttest.fit_model.rst
│ ├── sccoda.model.other_models.ALRModel_ttest.rst
│ ├── sccoda.model.other_models.ALRModel_wilcoxon.eval_model.rst
│ ├── sccoda.model.other_models.ALRModel_wilcoxon.fit_model.rst
│ ├── sccoda.model.other_models.ALRModel_wilcoxon.rst
│ ├── sccoda.model.other_models.ANCOMBCModel.eval_model.rst
│ ├── sccoda.model.other_models.ANCOMBCModel.fit_model.rst
│ ├── sccoda.model.other_models.ANCOMBCModel.rst
│ ├── sccoda.model.other_models.AncomModel.eval_model.rst
│ ├── sccoda.model.other_models.AncomModel.fit_model.rst
│ ├── sccoda.model.other_models.AncomModel.rst
│ ├── sccoda.model.other_models.BetaBinomialModel.eval_model.rst
│ ├── sccoda.model.other_models.BetaBinomialModel.fit_model.rst
│ ├── sccoda.model.other_models.BetaBinomialModel.rst
│ ├── sccoda.model.other_models.CLRModel.eval_model.rst
│ ├── sccoda.model.other_models.CLRModel.fit_model.rst
│ ├── sccoda.model.other_models.CLRModel.rst
│ ├── sccoda.model.other_models.CLRModel_ttest.eval_model.rst
│ ├── sccoda.model.other_models.CLRModel_ttest.fit_model.rst
│ ├── sccoda.model.other_models.CLRModel_ttest.rst
│ ├── sccoda.model.other_models.DirichRegModel.eval_model.rst
│ ├── sccoda.model.other_models.DirichRegModel.fit_model.rst
│ ├── sccoda.model.other_models.DirichRegModel.rst
│ ├── sccoda.model.other_models.HaberModel.eval_model.rst
│ ├── sccoda.model.other_models.HaberModel.fit_model.rst
│ ├── sccoda.model.other_models.HaberModel.rst
│ ├── sccoda.model.other_models.SimpleModel.get_chains_after_burnin.rst
│ ├── sccoda.model.other_models.SimpleModel.get_y_hat.rst
│ ├── sccoda.model.other_models.SimpleModel.make_result.rst
│ ├── sccoda.model.other_models.SimpleModel.rst
│ ├── sccoda.model.other_models.SimpleModel.sample_hmc.rst
│ ├── sccoda.model.other_models.SimpleModel.sample_hmc_da.rst
│ ├── sccoda.model.other_models.SimpleModel.sample_nuts.rst
│ ├── sccoda.model.other_models.SimpleModel.sampling.rst
│ ├── sccoda.model.other_models.TTest.eval_model.rst
│ ├── sccoda.model.other_models.TTest.fit_model.rst
│ ├── sccoda.model.other_models.TTest.rst
│ ├── sccoda.model.other_models.scdney_model.analyze.rst
│ ├── sccoda.model.other_models.scdney_model.rst
│ ├── sccoda.model.scCODA_model.CompositionalModel.get_chains_after_burnin.rst
│ ├── sccoda.model.scCODA_model.CompositionalModel.make_result.rst
│ ├── sccoda.model.scCODA_model.CompositionalModel.rst
│ ├── sccoda.model.scCODA_model.CompositionalModel.sample_hmc.rst
│ ├── sccoda.model.scCODA_model.CompositionalModel.sample_hmc_da.rst
│ ├── sccoda.model.scCODA_model.CompositionalModel.sample_nuts.rst
│ ├── sccoda.model.scCODA_model.CompositionalModel.sampling.rst
│ ├── sccoda.model.scCODA_model.scCODAModel.get_chains_after_burnin.rst
│ ├── sccoda.model.scCODA_model.scCODAModel.get_y_hat.rst
│ ├── sccoda.model.scCODA_model.scCODAModel.make_result.rst
│ ├── sccoda.model.scCODA_model.scCODAModel.rst
│ ├── sccoda.model.scCODA_model.scCODAModel.sample_hmc.rst
│ ├── sccoda.model.scCODA_model.scCODAModel.sample_hmc_da.rst
│ ├── sccoda.model.scCODA_model.scCODAModel.sample_nuts.rst
│ ├── sccoda.model.scCODA_model.scCODAModel.sampling.rst
│ ├── sccoda.util.cell_composition_data.from_pandas.rst
│ ├── sccoda.util.cell_composition_data.from_scanpy.rst
│ ├── sccoda.util.cell_composition_data.from_scanpy_dir.rst
│ ├── sccoda.util.cell_composition_data.from_scanpy_list.rst
│ ├── sccoda.util.cell_composition_data.read_anndata_one_sample.rst
│ ├── sccoda.util.comp_ana.CompositionalAnalysis.rst
│ ├── sccoda.util.data_generation.b_w_from_abs_change.rst
│ ├── sccoda.util.data_generation.counts_from_first.rst
│ ├── sccoda.util.data_generation.generate_case_control.rst
│ ├── sccoda.util.data_generation.sparse_effect_matrix.rst
│ ├── sccoda.util.data_visualization.boxplots.rst
│ ├── sccoda.util.data_visualization.stackbar.rst
│ ├── sccoda.util.data_visualization.stacked_barplot.rst
│ ├── sccoda.util.helper_functions.sample_size_estimate.rst
│ ├── sccoda.util.result_classes.CAResult.compare_parameters_to_truth.rst
│ ├── sccoda.util.result_classes.CAResult.complete_alpha_df.rst
│ ├── sccoda.util.result_classes.CAResult.complete_beta_df.rst
│ ├── sccoda.util.result_classes.CAResult.credible_effects.rst
│ ├── sccoda.util.result_classes.CAResult.distance_to_truth.rst
│ ├── sccoda.util.result_classes.CAResult.rst
│ ├── sccoda.util.result_classes.CAResult.save.rst
│ ├── sccoda.util.result_classes.CAResult.set_fdr.rst
│ ├── sccoda.util.result_classes.CAResult.summary.rst
│ ├── sccoda.util.result_classes.CAResult.summary_extended.rst
│ ├── sccoda.util.result_classes.CAResult.summary_prepare.rst
│ └── using_other_compositional_methods.ipynb
├── requirements.txt
├── sccoda
├── __init__.py
├── datasets
│ ├── __init__.py
│ ├── _datasets.py
│ └── haber_counts.csv
├── model
│ ├── __init__.py
│ ├── other_models.py
│ └── scCODA_model.py
└── util
│ ├── __init__.py
│ ├── cell_composition_data.py
│ ├── comp_ana.py
│ ├── data_generation.py
│ ├── data_visualization.py
│ ├── helper_functions.py
│ └── result_classes.py
├── setup.py
├── tests
├── __init__.py
└── unit_tests.py
└── tutorials
├── Data_import_and_visualization.ipynb
├── Modeling_options_and_result_analysis.ipynb
├── __init__.py
├── getting_started.ipynb
├── test
└── using_other_compositional_methods.ipynb
/.github/Figures/Fig1_v10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/Fig1_v10.png
--------------------------------------------------------------------------------
/.github/Figures/Fig2_v7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/Fig2_v7.png
--------------------------------------------------------------------------------
/.github/Figures/Figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/Figure1.png
--------------------------------------------------------------------------------
/.github/Figures/Figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/Figure2.png
--------------------------------------------------------------------------------
/.github/Figures/archive/Fig1_v9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/Fig1_v9.png
--------------------------------------------------------------------------------
/.github/Figures/archive/model_comparison_example_data_grouped.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/model_comparison_example_data_grouped.png
--------------------------------------------------------------------------------
/.github/Figures/archive/model_comparison_plots/model_comparison_logfold_confint_extended.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/model_comparison_plots/model_comparison_logfold_confint_extended.png
--------------------------------------------------------------------------------
/.github/Figures/archive/model_comparison_plots/model_comparison_replicates_confint_extended.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/model_comparison_plots/model_comparison_replicates_confint_extended.png
--------------------------------------------------------------------------------
/.github/Figures/archive/overall_benchmark_plots/9_heatmaps_concept_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/overall_benchmark_plots/9_heatmaps_concept_fig.png
--------------------------------------------------------------------------------
/.github/Figures/archive/overall_benchmark_plots/absolute_increase_lines_concept_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/overall_benchmark_plots/absolute_increase_lines_concept_fig.png
--------------------------------------------------------------------------------
/.github/Figures/archive/overall_benchmark_plots/log_fold_increase_lines_concept_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/overall_benchmark_plots/log_fold_increase_lines_concept_fig.png
--------------------------------------------------------------------------------
/.github/Figures/archive/overall_benchmark_plots/negative_heatmaps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/overall_benchmark_plots/negative_heatmaps.png
--------------------------------------------------------------------------------
/.github/Figures/archive/threshold_determination_plots/threshold_determination_fdr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_determination_fdr.png
--------------------------------------------------------------------------------
/.github/Figures/archive/threshold_determination_plots/threshold_determination_mcc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_determination_mcc.png
--------------------------------------------------------------------------------
/.github/Figures/archive/threshold_determination_plots/threshold_determination_tnr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_determination_tnr.png
--------------------------------------------------------------------------------
/.github/Figures/archive/threshold_determination_plots/threshold_determination_tpr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_determination_tpr.png
--------------------------------------------------------------------------------
/.github/Figures/archive/threshold_determination_plots/threshold_function_optimal_mcc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/archive/threshold_determination_plots/threshold_function_optimal_mcc.png
--------------------------------------------------------------------------------
/.github/Figures/covariate_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/covariate_structure.png
--------------------------------------------------------------------------------
/.github/Figures/data_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/.github/Figures/data_structure.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | .idea/
27 | data/
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | .hypothesis/
50 | .pytest_cache/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 | db.sqlite3
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # Environments
87 | .env
88 | .venv
89 | env/
90 | venv/
91 | ENV/
92 | env.bak/
93 | venv.bak/
94 |
95 | # Spyder project settings
96 | .spyderproject
97 | .spyproject
98 |
99 | # Rope project settings
100 | .ropeproject
101 |
102 | # mkdocs documentation
103 | /site
104 |
105 | # mypy
106 | .mypy_cache/
107 |
108 | # R stuff
109 | .RData
110 | .Rhistory
111 |
112 | # Other
113 | prototyping/
114 | .pkl
115 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | build:
9 | image: latest
10 |
11 | # Build documentation in the docs/ directory with Sphinx
12 | sphinx:
13 | configuration: docs/source/conf.py
14 |
15 | # Optionally build your docs in additional formats such as PDF
16 | # formats:
17 | # - pdf
18 |
19 | # Optionally set the version of Python and requirements required to build your docs
20 | python:
21 | version: 3.8
22 | install:
23 | - requirements: docs/requires.txt
24 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | python:
4 | - "3.7"
5 |
6 | install:
7 | - pip install -r requirements.txt
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2020, Theis, Schubert and Müller Lab
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # scCODA - Single-cell differential composition analysis
2 |
3 | > **Note**
4 | > This implementation is no longer maintained. A new version in Jax is available in [pertpy](https://pertpy.readthedocs.io/en/latest).
5 | >
6 | > For more information and contribution guidelines please visit the associated Github repository: https://github.com/theislab/pertpy
7 |
8 | scCODA allows for identification of compositional changes in high-throughput sequencing count data, especially cell compositions from scRNA-seq.
9 | It also provides a framework for integration of cell-type annotated data directly from [scanpy](https://scanpy.readthedocs.io/en/stable/) and other sources.
10 | Aside from the scCODA model (Büttner, Ostner *et al* (2021)), the package also allows the easy application of other differential testing methods.
11 |
12 | 
13 |
14 | The statistical methodology and benchmarking performance are described in:
15 |
16 | Büttner, Ostner *et al* (2021). **scCODA is A Bayesian model for compositional single-cell data analysis**
17 | ([*Nature Communications*](https://www.nature.com/articles/s41467-021-27150-6))
18 |
19 | Code for reproducing the analysis from the paper is available [here](https://github.com/theislab/scCODA_reproducibility).
20 |
21 | For further information on the scCODA package and model, please refer to the
22 | [documentation](https://sccoda.readthedocs.io/en/latest/) and the
23 | [tutorials](https://github.com/theislab/scCODA/blob/master/tutorials).
24 |
25 | ## Installation
26 |
27 | Running the package requires a working Python environment (>=3.8).
28 |
29 | This package uses the `tensorflow` (`>=2.8`) and `tensorflow-probability` (`>=0.16`) packages.
30 | The GPU computation features of these packages have not been tested with scCODA and are thus not recommended.
31 |
32 | **To install scCODA via pip, call**:
33 |
34 | pip install sccoda
35 |
36 |
37 | **To install scCODA from source**:
38 |
39 | - Navigate to the directory that you want to install scCODA in
40 | - Clone the repository from Github (https://github.com/theislab/scCODA):
41 |
42 | `git clone https://github.com/theislab/scCODA`
43 |
44 | - Navigate to the root directory of scCODA:
45 |
46 | `cd scCODA`
47 |
48 | - Install dependencies::
49 |
50 | `pip install -r requirements.txt`
51 |
52 | - Install the package:
53 |
54 | `python setup.py install`
55 |
56 | **Docker container**:
57 |
58 | We provide a Docker container image for scCODA (https://hub.docker.com/repository/docker/wollmilchsau/scanpy_sccoda).
59 |
60 | ## Usage
61 |
62 | Import scCODA in a Python session via:
63 |
64 | import sccoda
65 |
66 | **Tutorials**
67 |
68 | scCODA provides a number of tutorials for various purposes. Please also visit the [documentation](https://sccoda.readthedocs.io/en/latest/) for further information on the statistical model, data structure and API.
69 |
70 | - The ["getting started" tutorial](https://sccoda.readthedocs.io/en/latest/getting_started.html) provides a quick-start guide for using scCODA.
71 |
72 | - In the [advanced tutorial](https://sccoda.readthedocs.io/en/latest/Modeling_options_and_result_analysis.html), options for model specification, diagnostics, and result interpretation are disccussed.
73 |
74 | - The [data import and visualization tutorial](https://sccoda.readthedocs.io/en/latest/Data_import_and_visualization.html) focuses on loading data from different sources and visualizing their characteristics.
75 |
76 | - The [tutorial on other methods](https://sccoda.readthedocs.io/en/latest/using_other_compositional_methods.html) explains how to apply other methods for differential abundance testing from within scCODA.
77 |
78 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = python3 -msphinx
7 | SPHINXPROJ = scCODA
8 | SOURCEDIR = source
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=python3 -msphinx
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=scCODA
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | echo.installed, then set the SPHINXBUILD environment variable to point
21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | echo.may add the Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/docs/requires.txt:
--------------------------------------------------------------------------------
1 | -r ../requirements.txt
2 | sphinx==3.4.3
3 | sphinx_autodoc_typehints==1.11.1
4 | sphinx_rtd_theme==0.5.1
5 | docutils==0.16
6 | typing_extensions==3.7.4.3
7 | importlib_metadata==3.3.0
8 | setuptools==51.0.0
9 | setuptools_scm==5.0.1
10 | ipykernel==5.4.2
11 | nbsphinx==0.8.0
12 | jinja2==2.11.2
13 | markupsafe==1.1.1
14 | scanpydoc==0.5.5
15 | git+https://github.com/theislab/scCODA.git@master
16 |
--------------------------------------------------------------------------------
/docs/source/_ext/edit_on_github.py:
--------------------------------------------------------------------------------
1 | """
2 | Loosely based on gist.github.com/MantasVaitkunas/7c16de233812adcb7028
3 | """
4 |
5 | import os
6 | import warnings
7 |
8 |
9 | __licence__ = "BSD (3 clause)"
10 |
11 |
12 | def get_github_repo(app):
13 | return app.config.github_repo, "/docs/"
14 |
15 |
16 | def html_page_context(app, pagename, templatename, context, doctree):
17 | if templatename != "page.html":
18 | return
19 |
20 | if not app.config.github_repo:
21 | warnings.warn("`github_repo `not specified")
22 | return
23 |
24 |
25 | path = os.path.relpath(doctree.get("source"), app.builder.srcdir)
26 | repo, conf_py_path = get_github_repo(app)
27 |
28 | # For sphinx_rtd_theme.
29 | context["display_github"] = True
30 | context["github_user"] = "theislab"
31 | context["github_version"] = "master"
32 | context["github_repo"] = repo
33 | context["conf_py_path"] = conf_py_path
34 |
35 |
36 | def setup(app):
37 | app.add_config_value("github_repo", "", True)
38 | app.connect("html-page-context", html_page_context)
--------------------------------------------------------------------------------
/docs/source/_static/colab_badge.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/docs/source/_static/nbviewer_badge.svg:
--------------------------------------------------------------------------------
1 |
2 |
18 |
20 |
21 |
23 | image/svg+xml
24 |
26 |
27 |
28 |
29 |
30 |
32 |
35 |
38 |
41 |
44 |
47 |
50 |
53 |
56 |
59 |
62 |
65 |
68 |
69 |
89 |
93 |
98 |
102 |
103 |
105 |
111 |
112 |
115 |
119 |
123 |
127 |
128 |
133 |
141 |
148 |
156 | Open in nbviewer
159 |
160 |
167 | Open in nbviewer
172 |
173 |
183 |
184 |
188 |
193 |
197 |
201 |
205 |
210 |
214 |
223 |
224 |
225 |
230 |
234 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/base.rst:
--------------------------------------------------------------------------------
1 | :github_url: {{ fullname | github_url }}
2 |
3 | {% extends "!autosummary/base.rst" %}
4 |
5 | .. http://www.sphinx-doc.org/en/stable/ext/autosummary.html#customizing-templates
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
1 | :github_url: {{ fullname | github_url }}
2 |
3 | {{ fullname | escape | underline}}
4 |
5 | .. currentmodule:: {{ module }}
6 |
7 | .. add toctree option to make autodoc generate the pages
8 |
9 | .. autoclass:: {{ objname }}
10 |
11 | {% block attributes %}
12 | {% if attributes %}
13 | .. rubric:: Attributes
14 |
15 | .. autosummary::
16 | :toctree: .
17 | {% for item in attributes %}
18 | {%- if item[0] != "_" %}
19 | ~{{ fullname }}.{{ item }}
20 | {%- endif -%}
21 | {%- endfor %}
22 | {% endif %}
23 | {% endblock %}
24 |
25 | {% block methods %}
26 | {% if methods %}
27 | .. rubric:: Methods
28 |
29 | .. autosummary::
30 | :toctree: .
31 | {% if objname != "CAResult" %}
32 | {% for item in methods %}
33 | {% if item[0] != "_" %}
34 | ~{{ fullname }}.{{ item }}
35 | {% endif %}
36 | {%- endfor %}
37 | {% else %}
38 | {% for item in methods %}
39 | {% if ((item[0] != "_") and (item not in inherited_members)) %}
40 | ~{{ fullname }}.{{ item }}
41 | {% endif %}
42 | {%- endfor %}
43 | {% endif %}
44 | {% endif %}
45 | {% endblock %}
46 |
--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: sccoda
2 |
3 | API
4 | ===
5 |
6 | We advise to import scCODA in a python session via::
7 |
8 | import sccoda
9 | dat = sccoda.util.cell_composition_data
10 | ana = sccoda.util.compositional_analysis
11 | viz = sccoda.util.data_visualization
12 |
13 | The workflow in scCODA starts with reading in cell count data (``dat``) and visualizing them (``viz``)
14 | or synthetically generating cell count data (``util.data_generation``).
15 |
16 | Data acquisition
17 | ----------------
18 |
19 | **Integrating data sources (dat)** (scanpy or pandas)
20 |
21 | .. autosummary::
22 | :toctree: .
23 |
24 | sccoda.util.cell_composition_data.from_pandas
25 | sccoda.util.cell_composition_data.from_scanpy
26 | sccoda.util.cell_composition_data.from_scanpy_dir
27 | sccoda.util.cell_composition_data.from_scanpy_list
28 | sccoda.util.cell_composition_data.read_anndata_one_sample
29 |
30 |
31 | **Synthetic data generation**
32 |
33 | .. autosummary::
34 | :toctree: .
35 |
36 | sccoda.util.data_generation.generate_case_control
37 | sccoda.util.data_generation.b_w_from_abs_change
38 | sccoda.util.data_generation.counts_from_first
39 | sccoda.util.data_generation.sparse_effect_matrix
40 |
41 | **Compositional data visualization**
42 |
43 | Compositional datasets can be plotted via the methods in ``util.data_visualization``.
44 |
45 | .. autosummary::
46 | :toctree: .
47 |
48 | sccoda.util.data_visualization.stacked_barplot
49 | sccoda.util.data_visualization.boxplots
50 | sccoda.util.data_visualization.stackbar
51 |
52 | Model setup and inference
53 | -------------------------
54 |
55 | Using the scCODA model is easiest by generating an instance of ``ana.CompositionalAnalysis``.
56 | By specifying the formula via the `patsy `_ syntax, many combinations and
57 | transformations of the covariates can be performed without redefining the covariate matrix. Also, the reference cell
58 | type needs to be specified in this step.
59 |
60 | **The scCODA model**
61 |
62 | .. autosummary::
63 | :toctree: .
64 |
65 | sccoda.util.comp_ana.CompositionalAnalysis
66 | sccoda.model.scCODA_model.CompositionalModel
67 | sccoda.model.scCODA_model.scCODAModel
68 |
69 | **Utility functions**
70 |
71 | .. autosummary::
72 | :toctree: .
73 |
74 | sccoda.util.helper_functions.sample_size_estimate
75 |
76 | Result evaluation
77 | -----------------
78 |
79 | Executing an inference method on a compositional model produces a ``sccoda.util.result_classes.CAResult`` object. This
80 | class extends the ``InferenceData`` class of `arviz `_ and supports all its
81 | diagnostic and plotting functionality.
82 |
83 | .. autosummary::
84 | :toctree: .
85 |
86 | sccoda.util.result_classes.CAResult
87 |
88 |
89 | Model comparison
90 | ----------------
91 |
92 | ``sccoda.models.other_models`` contains implementations of several compositional methods frm microbiome analysis and
93 | non-compositional tests that can be used for comparison.
94 |
95 | .. autosummary::
96 | :toctree: .
97 |
98 | sccoda.model.other_models.SimpleModel
99 | sccoda.model.other_models.scdney_model
100 | sccoda.model.other_models.HaberModel
101 | sccoda.model.other_models.CLRModel
102 | sccoda.model.other_models.TTest
103 | sccoda.model.other_models.CLRModel_ttest
104 | sccoda.model.other_models.ALDEx2Model
105 | sccoda.model.other_models.ALRModel_ttest
106 | sccoda.model.other_models.ALRModel_wilcoxon
107 | sccoda.model.other_models.AncomModel
108 | sccoda.model.other_models.DirichRegModel
109 | sccoda.model.other_models.BetaBinomialModel
110 | sccoda.model.other_models.ANCOMBCModel
111 |
--------------------------------------------------------------------------------
/docs/source/compositional_data.rst:
--------------------------------------------------------------------------------
1 | About scCODA
2 | ============
3 |
4 | Various biological factors, such as diseases, aging, and immunity, are known to have significant effects on the
5 | cellular structure on a wide range of tissues. Thus, studying these changes more carefully is of particular interest
6 | for many research questions. Recent advances in single-cell RNA sequencing technologies open up the possibility of
7 | accurately annotating large numbers of individual cells from a tissue sample, paving the way for differential analysis
8 | of cell populations.
9 |
10 | Compositional data analysis in scRNA-seq
11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12 |
13 | When doing differential population analysis, one property of cell population data is often overlooked. Since all
14 | single-cell analysis platforms are limited in their throughput, the number of individual cells in a sample is
15 | predetermined. Thus, cell populations are compositional. They can only be determined up to a multiplicative factor, inducing a negative
16 | correlative bias between the cell types. Following
17 | `Aitchison (Journal of the Royal Statistical Society, 1982) `_,
18 | compositional data also has to be interpreted in terms of ratios, e.g. with respect to a reference factor.
19 |
20 | Features of scCODA
21 | ^^^^^^^^^^^^^^^^^^
22 |
23 | The scCODA model (`Büttner, Ostner et al. (2021) `_)
24 | is a model that was specifically designed to perform compositional data analysis in scRNA-seq.
25 | Apart from the compositionality of cell population data, there are some other challenges in comparing scRNA-seq
26 | populations, which scCODA addresses, including very small sample sizes and multiple comparisons.
27 | It allows the user to select any reference cell type in order to see the effects
28 | of biological factors from different perspectives.
29 |
30 | Because each sample in scRNA-seq usually contains thousands of cells, performing scRNA-seq on a large number of samples is expensive
31 | and time-consuming. Thus, there are often very few biological replicates available, and frequentist tests will
32 | result in highly uncertain estimates with large confidence intervals. scCODA uses Bayesian
33 | modeling and its possibility to include prior beliefs to obtain accurate results even in a low-sample setting.
34 |
35 | Also, most biological factors only effect a fraction of the total cell population. It is therefore important to
36 | determine the most important changes during the analysis. Since Bayesian analysis does not support the concept
37 | of p-values, scCODA instead uses spike-and-slab priors to automatically determine statistically credible effects.
38 |
39 | For more detailed information on the scCODA model, see
40 | `Büttner, Ostner et al. (2021) `_.
41 |
42 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # http://www.sphinx-doc.org/en/master/config
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 |
14 | import os
15 | import sys
16 | import datetime
17 | import matplotlib
18 |
19 | from pathlib import Path
20 |
21 | matplotlib.use('agg')
22 |
23 | HERE = Path(__file__).parent
24 | sys.path[:0] = [str(HERE.parent), str(HERE / '_ext')]
25 |
26 | import sccoda
27 | on_rtd = os.environ.get('READTHEDOCS') == 'True'
28 |
29 | needs_sphinx = "2.0"
30 |
31 | # -- Retrieve notebooks ------------------------------------------------
32 |
33 | from urllib.request import urlretrieve
34 |
35 | notebooks_url = "https://github.com/theislab/scCODA/raw/master/tutorials/"
36 | notebooks = [
37 | "getting_started.ipynb",
38 | "Data_import_and_visualization.ipynb",
39 | "Modeling_options_and_result_analysis.ipynb"
40 | "using_other_compositional_methods.ipynb"
41 | ]
42 | for nb in notebooks:
43 | try:
44 | urlretrieve(notebooks_url + nb, nb)
45 | except:
46 | pass
47 |
48 | # -- Project information -----------------------------------------------------
49 |
50 | project = 'scCODA'
51 | title = 'scCODA: A Bayesian model for compositional single-cell data analysis'
52 | author = 'Johannes Ostner, Maren Büttner, Benjamin Schubert'
53 | copyright = f"{datetime.datetime.now():%Y}, {author}"
54 |
55 | version = sccoda.__version__.replace(".dirty", "")
56 | release = version
57 |
58 | # -- General configuration ---------------------------------------------------
59 |
60 | # Add any paths that contain templates here, relative to this directory.
61 | templates_path = ['_templates']
62 | source_suffix = [".rst", ".ipynb"]
63 | master_doc = 'index'
64 | default_role = 'literal'
65 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
66 | pygments_style = 'sphinx'
67 |
68 | extensions = ['sphinx.ext.autodoc',
69 | 'sphinx.ext.intersphinx',
70 | 'sphinx.ext.doctest',
71 | 'sphinx.ext.coverage',
72 | 'sphinx.ext.mathjax',
73 | 'sphinx.ext.napoleon',
74 | 'sphinx.ext.autosummary',
75 | "sphinx_autodoc_typehints",
76 | "nbsphinx",
77 | "scanpydoc",
78 | *[p.stem for p in (HERE / 'extensions').glob('*.py')],
79 | ]
80 |
81 | # Generate the API documentation when building
82 | autosummary_generate = True
83 | autodoc_member_order = 'bysource'
84 | napoleon_google_docstring = False
85 | napoleon_numpy_docstring = True
86 | napoleon_include_init_with_doc = False
87 | napoleon_use_rtype = True # having a separate entry generally helps readability
88 | napoleon_use_param = True
89 | napoleon_custom_sections = [('Params', 'Parameters')]
90 | todo_include_todos = False
91 |
92 | intersphinx_mapping = dict(
93 | python=("https://docs.python.org/3", None),
94 | anndata=("https://anndata.readthedocs.io/en/latest/", None),
95 | scanpy=("https://scanpy.readthedocs.io/en/latest/", None),
96 | numpy=("https://numpy.org/doc/stable/", None),
97 | matplotlib=('https://matplotlib.org/', None),
98 | pandas=('https://pandas.pydata.org/pandas-docs/stable/', None),
99 | seaborn=('https://seaborn.pydata.org/', None),
100 |
101 | )
102 |
103 | # Add notebooks prolog to Google Colab and nbviewer
104 | nbsphinx_prolog = r"""
105 | {% set docname = 'github/theislab/scCODA/blob/master/' + env.doc2path(env.docname, base=None) %}
106 | .. raw:: html
107 |
108 |
114 | """
115 |
116 |
117 | # -- Options for HTML output -------------------------------------------------
118 |
119 | html_theme = 'scanpydoc'
120 | html_theme_options = dict(navigation_depth=1, titles_only=True)
121 | github_repo = "sccoda"
122 | html_context = dict(
123 | display_github=True, # Integrate GitHub
124 | github_user='theislab', # Username
125 | github_repo='scCODA', # Repo name
126 | github_version='master', # Version
127 | conf_py_path='/docs/', # Path in the checkout to the docs root
128 | )
129 | html_static_path = ['_static']
130 | html_show_sphinx = False
131 |
132 | # def setup(app):
133 | # app.warningiserror = on_rtd
134 |
135 | # -- Options for other output ------------------------------------------
136 |
137 | htmlhelp_basename = "scCODAdoc"
138 | title_doc = f"{project} documentation"
139 |
140 | # latex_engine = "xelatex"
141 | latex_elements = {
142 | 'preamble': r'''
143 | \usepackage[utf8]{inputenc}
144 |
145 | \newcommand{\block}[1]{\raisebox{\dimexpr(\fontcharht\font`X-1em)/2}{\rule{1em}{#1\dimexpr1em/8}}}
146 |
147 | \DeclareUnicodeCharacter{2581}{\block{1}}
148 | \DeclareUnicodeCharacter{2582}{\block{2}}
149 | \DeclareUnicodeCharacter{2583}{\block{3}}
150 | \DeclareUnicodeCharacter{2584}{\block{4}}
151 | \DeclareUnicodeCharacter{2585}{\block{5}}
152 | \DeclareUnicodeCharacter{2586}{\block{6}}
153 | \DeclareUnicodeCharacter{2587}{\block{7}}
154 | \DeclareUnicodeCharacter{2588}{\block{8}}
155 | '''
156 | }
157 |
158 | latex_documents = [(master_doc, f"{project}.tex", title_doc, author, "manual")]
159 | man_pages = [(master_doc, project, title_doc, [author], 1)]
160 | texinfo_documents = [
161 | (master_doc, project, title_doc, author, project, title, "Miscellaneous")
162 | ]
163 |
164 | # -- Override some classnames in autodoc --------------------------------------------
165 |
166 | qualname_overrides = {
167 | }
168 |
--------------------------------------------------------------------------------
/docs/source/data.rst:
--------------------------------------------------------------------------------
1 | Data structure
2 | ==============
3 |
4 | .. image:: ../../.github/Figures/data_structure.png
5 | :width: 45%
6 | :height: 200px
7 | :align: left
8 |
9 | .. image:: ../../.github/Figures/covariate_structure.png
10 | :width: 45%
11 | :height: 200px
12 | :align: right
13 |
14 | Annotating the cells from a biological sample with individual types, e.g. via clustering methods, and grouping them by cell type,
15 | results in a vector of counts (of dimension *K*), with each entry representing a cell type. A scCODA dataset aggregates *N* cell count
16 | vectors as the rows of a matrix of dimension *NxK*, the so-called **cell count matrix** *Y*. The cell count data does not
17 | need to be normalized, as scCODA works on the integer count data.
18 | In addition to the cell counts, scCODA also requires covariates that contain information about each sample.
19 | These can be indicators for e.g. diseases, or continuous variables, such as age or BMI. The *M* covariates for an
20 | scCODA dataset are described by the (*NxM* dimensional) **covariate matrix** *X*.
21 |
22 | scCODA uses the `anndata `_ format to store compositional datasets.
23 | Hereby, ``data.X`` represents the cell count matrix, and ``data.obs`` the covariates (The actual covariate or design matrix is generated when calling a model).
24 | The ``data.var`` and ``data.uns`` elements are currently not used.
25 |
26 | .. image:: https://falexwolf.de/img/scanpy/anndata.svg
27 | :width: 500px
28 | :align: center
29 |
30 |
31 | Data generation methods
32 | ^^^^^^^^^^^^^^^^^^^^^^^
33 |
34 | ``sccoda.util.data_generation`` contains methods to generate compositional data with different properties that mimics
35 | the properties of scRNA-seq datasets.
36 |
37 |
38 | Data import methods
39 | ^^^^^^^^^^^^^^^^^^^
40 |
41 | ``sccoda.util.cell_composition_data`` contains methods to import count data from various sources into the data structure used by scCODA.
42 | You can either import data directly from a pandas DataFrame via ``from_pandas``, or get the count data from single-cell expression data used in `scanpy `_.
43 | If all cells from all samples are stored in one anndata object, ``from_scanpy`` generates a compositional analysis dataset from this.
44 | If there is one anndata object with the single-cell expression data for each sample,
45 | ``from_scanpy_list`` (for in-memory data) and ``from_scanpy_dir`` (for data stored on disk) can transform the information from these files directly into a compositional analysis dataset.
46 | For more information, see the `data import and visualization tutorial `_.
47 |
48 |
49 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 |
2 | Welcome to scCODA's documentation!
3 | =============================================
4 |
5 | .. image:: ../../.github/Figures/Figure1.png
6 | :width: 500px
7 | :align: right
8 |
9 | scCODA is a toolbox for statistical models to analyze changes in compositional data,
10 | especially from single-cell RNA-seq experiments.
11 | Its main purpose is to provide a platform and implementation for the *scCODA* model, which is described by
12 | `Büttner, Ostner et al. `_.
13 |
14 | The package is available on `github `_.
15 |
16 | Please also check out the `tutorials `_
17 | that explain the most important functionalities.
18 |
19 | Motivation
20 | ^^^^^^^^^^^^
21 |
22 | When analyzing biological processes via single-cell RNA-sequencing experiments, it is often of interest to assess how
23 | cell populations change under one or more conditions. This task, however, is non-trivial, as there are several
24 | limitations that have to be addressed:
25 |
26 | - scRNA-seq population data is compositional. This must be considered to avoid an inflation of false-positive results.
27 | - Most datasets consist only of very few samples, making frequentist tests inaccurate.
28 | - A condition usually only effects a fraction of cell types. Therefore, sparse effects are preferable.
29 |
30 | The scCODA model overcomes all these limitations in a fully Bayesian model, that outperforms other
31 | compositional and non-compositional methods.
32 |
33 | scCODA is fully integrable with scanpy_, but provides its own data structure for aggregating, plotting and analyzing
34 | compositional data from scRNA-seq. Additionally to the scCODA model, the package also features a variety of
35 | implementations of other statistical models that can be used as comparisons.
36 |
37 | .. toctree::
38 | :caption: Contents
39 | :maxdepth: 2
40 |
41 | compositional_data
42 | data
43 | models
44 | installation
45 | api
46 |
47 | .. toctree::
48 | :caption: Tutorials
49 | :maxdepth: 1
50 |
51 | getting_started
52 | Data_import_and_visualization
53 | Modeling_options_and_result_analysis
54 | using_other_compositional_methods
55 |
56 | Reference
57 | ^^^^^^^^^^
58 |
59 | Büttner, Ostner *et al.* (2021), scCODA is a Bayesian model for compositional single-cell data analysis
60 | `NatComms `_.
61 | |dim|
62 |
63 | Indices and tables
64 | ^^^^^^^^^^^^^^^^^^^^^^
65 |
66 | * :ref:`genindex`
67 | * :ref:`modindex`
68 | * :ref:`search`
69 |
70 | .. _scanpy: https://scanpy.readthedocs.io
71 |
72 | .. |dim| raw:: html
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | A functioning python environment (>=3.8) is necessary to run this package.
5 |
6 | This package uses the tensorflow (>= 2.4) and tensorflow-probability (>=0.12) packages.
7 | The GPU versions of these packages have not been tested with scCODA and are thus not recommended.
8 |
9 | **To install scCODA via pip, call**::
10 |
11 | pip install sccoda
12 |
13 | **To install scCODA from source**:
14 |
15 | - Navigate to the directory you want scCODA in
16 | - Clone the repository from `github `_::
17 |
18 | git clone https://github.com/theislab/scCODA
19 |
20 | - Navigate to the root directory of scCODA::
21 |
22 | cd scCODA
23 |
24 | - Install dependencies::
25 |
26 | pip install -r requirements.txt
27 |
28 | **Import scCODA in a Python session via**::
29 |
30 | import sccoda
31 |
32 |
--------------------------------------------------------------------------------
/docs/source/models.rst:
--------------------------------------------------------------------------------
1 | The scCODA model
2 | ================
3 |
4 | scCODA uses Bayesian modeling to detect statistically credible changes in compositional data.
5 | The model is implemented in ``sccoda.model.scCODA_model``.
6 | The easiest way to call a compositional model is via calling an instance of ``sccoda.util.comp_ana.CompositionalAnalysis``.
7 | It requires an ``anndata`` object that contains the compositional data and covariates, a formula string that defines the covariate matrix
8 | (see the `patsy `_ syntax for details), and a reference cell type.
9 |
10 |
11 | Model structure
12 | ^^^^^^^^^^^^^^^
13 |
14 | The model is based on a Dirichlet-multinomial model, in which each cell type is described by the covariates through a log-linear linkage.
15 | The intercepts :math:`\alpha` are modeled via a normal prior.
16 | For the effect (:math:`\beta`) of a covariate on a cell type, scCODA performs model selection via a spike-and-slab prior (Continuous approximation via a Logit-normal prior).
17 | The underlying prior for significant effects is normal with a covariate-specific scaling factor.
18 | The only exception are the effects of the reference cell type :math:`\hat{k}`, which are always set to 0.
19 |
20 | .. math::
21 | y|x &\sim DirMult(\phi, \bar{y}) \\
22 | \log(\phi) &= \alpha + x \beta \\
23 | \alpha_k &\sim N(0, 5) \quad &\forall k \in [K] \\
24 | \beta_{m, \hat{k}} &= 0 &\forall m \in [M]\\
25 | \beta_{m, k} &= \tau_{m, k} \tilde{\beta}_{m, k} \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\
26 | \tau_{m, k} &= \frac{\exp(t_{m, k})}{1+ \exp(t_{m, k})} \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\
27 | \frac{t_{m, k}}{50} &\sim N(0, 1) \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\
28 | \tilde{\beta}_{m, k} &= \sigma_m^2 \cdot \gamma_{m, k} \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\
29 | \sigma_m^2 &\sim HC(0, 1) \quad &\forall m \in [M] \\
30 | \gamma_{m, k} &\sim N(0,1) \quad &\forall m \in [M], k \in \{[K] \smallsetminus \hat{k}\} \\
31 |
32 |
33 | For further information regarding the model structure, please refer to:
34 |
35 | Büttner, Ostner *et al.* (2021), scCODA is a Bayesian model for compositional single-cell data analysis
36 | `NatComms `_.
37 |
38 | Inference
39 | ^^^^^^^^^
40 |
41 | Once the model is set up, inference via HMC sampling can be performed via ``sample_hmc()``.
42 | Alternatively, No-U-Turn sampling is available via ``sample_nuts()``.
43 | Depending on the size of the dataset and the system hardware, inference usually takes up to 5 minutes.
44 | The resulting ``sccoda.util.result_classes.CAResult`` object extends the ``InferenceData`` class of
45 | `arviz `_ and supports all its diagnostic and plotting functionality.
46 |
47 |
48 | Result analysis
49 | ^^^^^^^^^^^^^^^
50 |
51 | To see which effects were found to be significant, call ``summary()`` on the result object.
52 | The ``Final Parameter`` column of the effects data frame shows the significances.
53 | If the value is 0, the effect is not found to be statistically credible, otherwise it is.
54 | The sign of the effect indicates a decrease or increase in abundance (relative to the reference cell type).
55 | However, the numerical value of these effects should not be used for analysis, as it depends on multiple parameters.
56 | Please refer to the `tutorials `_ for more information on how to evaluate scCODA's results.
57 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALDEx2Model.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.ALDEx2Model.eval\_model
4 | ==================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: ALDEx2Model.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALDEx2Model.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L640-L705
2 |
3 | sccoda.model.other\_models.ALDEx2Model.fit\_model
4 | =================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: ALDEx2Model.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALDEx2Model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L635-L705
2 |
3 | sccoda.model.other\_models.ALDEx2Model
4 | ======================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: ALDEx2Model
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.ALDEx2Model.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.ALDEx2Model.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALRModel_ttest.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.ALRModel\_ttest.eval\_model
4 | ======================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: ALRModel_ttest.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALRModel_ttest.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L713-L751
2 |
3 | sccoda.model.other\_models.ALRModel\_ttest.fit\_model
4 | =====================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: ALRModel_ttest.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALRModel_ttest.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L708-L751
2 |
3 | sccoda.model.other\_models.ALRModel\_ttest
4 | ==========================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: ALRModel_ttest
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.ALRModel_ttest.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.ALRModel_ttest.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALRModel_wilcoxon.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.ALRModel\_wilcoxon.eval\_model
4 | =========================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: ALRModel_wilcoxon.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALRModel_wilcoxon.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L759-L797
2 |
3 | sccoda.model.other\_models.ALRModel\_wilcoxon.fit\_model
4 | ========================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: ALRModel_wilcoxon.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ALRModel_wilcoxon.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L754-L797
2 |
3 | sccoda.model.other\_models.ALRModel\_wilcoxon
4 | =============================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: ALRModel_wilcoxon
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.ALRModel_wilcoxon.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.ALRModel_wilcoxon.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ANCOMBCModel.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.ANCOMBCModel.eval\_model
4 | ===================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: ANCOMBCModel.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ANCOMBCModel.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L1077-L1162
2 |
3 | sccoda.model.other\_models.ANCOMBCModel.fit\_model
4 | ==================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: ANCOMBCModel.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.ANCOMBCModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L1072-L1162
2 |
3 | sccoda.model.other\_models.ANCOMBCModel
4 | =======================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: ANCOMBCModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.ANCOMBCModel.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.ANCOMBCModel.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.AncomModel.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L873-L905
2 |
3 | sccoda.model.other\_models.AncomModel.eval\_model
4 | =================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: AncomModel.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.AncomModel.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L839-L871
2 |
3 | sccoda.model.other\_models.AncomModel.fit\_model
4 | ================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: AncomModel.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.AncomModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L800-L905
2 |
3 | sccoda.model.other\_models.AncomModel
4 | =====================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: AncomModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.AncomModel.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.AncomModel.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.BetaBinomialModel.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.BetaBinomialModel.eval\_model
4 | ========================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: BetaBinomialModel.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.BetaBinomialModel.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L980-L1069
2 |
3 | sccoda.model.other\_models.BetaBinomialModel.fit\_model
4 | =======================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: BetaBinomialModel.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.BetaBinomialModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L975-L1069
2 |
3 | sccoda.model.other\_models.BetaBinomialModel
4 | ============================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: BetaBinomialModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.BetaBinomialModel.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.BetaBinomialModel.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.CLRModel.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.CLRModel.eval\_model
4 | ===============================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: CLRModel.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.CLRModel.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L540-L567
2 |
3 | sccoda.model.other\_models.CLRModel.fit\_model
4 | ==============================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: CLRModel.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.CLRModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L535-L567
2 |
3 | sccoda.model.other\_models.CLRModel
4 | ===================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: CLRModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.CLRModel.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.CLRModel.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.CLRModel_ttest.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.CLRModel\_ttest.eval\_model
4 | ======================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: CLRModel_ttest.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.CLRModel_ttest.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L606-L632
2 |
3 | sccoda.model.other\_models.CLRModel\_ttest.fit\_model
4 | =====================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: CLRModel_ttest.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.CLRModel_ttest.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L601-L632
2 |
3 | sccoda.model.other\_models.CLRModel\_ttest
4 | ==========================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: CLRModel_ttest
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.CLRModel_ttest.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.CLRModel_ttest.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.DirichRegModel.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.DirichRegModel.eval\_model
4 | =====================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: DirichRegModel.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.DirichRegModel.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L914-L972
2 |
3 | sccoda.model.other\_models.DirichRegModel.fit\_model
4 | ====================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: DirichRegModel.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.DirichRegModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L908-L972
2 |
3 | sccoda.model.other\_models.DirichRegModel
4 | =========================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: DirichRegModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.DirichRegModel.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.DirichRegModel.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.HaberModel.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.HaberModel.eval\_model
4 | =================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: HaberModel.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.HaberModel.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L504-L532
2 |
3 | sccoda.model.other\_models.HaberModel.fit\_model
4 | ================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: HaberModel.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.HaberModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L499-L532
2 |
3 | sccoda.model.other\_models.HaberModel
4 | =====================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: HaberModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.HaberModel.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.HaberModel.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.SimpleModel.get_chains_after_burnin.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L169-L225
2 |
3 | sccoda.model.other\_models.SimpleModel.get\_chains\_after\_burnin
4 | =================================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: SimpleModel.get_chains_after_burnin
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.SimpleModel.get_y_hat.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L227-L282
2 |
3 | sccoda.model.other\_models.SimpleModel.get\_y\_hat
4 | ==================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: SimpleModel.get_y_hat
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.SimpleModel.make_result.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L515-L559
2 |
3 | sccoda.model.other\_models.SimpleModel.make\_result
4 | ===================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: SimpleModel.make_result
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.SimpleModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L31-L282
2 |
3 | sccoda.model.other\_models.SimpleModel
4 | ======================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: SimpleModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.SimpleModel.get_chains_after_burnin
27 |
28 |
29 | ~sccoda.model.other_models.SimpleModel.get_y_hat
30 |
31 |
32 | ~sccoda.model.other_models.SimpleModel.make_result
33 |
34 |
35 | ~sccoda.model.other_models.SimpleModel.sample_hmc
36 |
37 |
38 | ~sccoda.model.other_models.SimpleModel.sample_hmc_da
39 |
40 |
41 | ~sccoda.model.other_models.SimpleModel.sample_nuts
42 |
43 |
44 | ~sccoda.model.other_models.SimpleModel.sampling
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.SimpleModel.sample_hmc.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L111-L224
2 |
3 | sccoda.model.other\_models.SimpleModel.sample\_hmc
4 | ==================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: SimpleModel.sample_hmc
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.SimpleModel.sample_hmc_da.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L316-L403
2 |
3 | sccoda.model.other\_models.SimpleModel.sample\_hmc\_da
4 | ======================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: SimpleModel.sample_hmc_da
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.SimpleModel.sample_nuts.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L405-L513
2 |
3 | sccoda.model.other\_models.SimpleModel.sample\_nuts
4 | ===================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: SimpleModel.sample_nuts
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.SimpleModel.sampling.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L113-L167
2 |
3 | sccoda.model.other\_models.SimpleModel.sampling
4 | ===============================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: SimpleModel.sampling
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.TTest.eval_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L454-L496
2 |
3 | sccoda.model.other\_models.TTest.eval\_model
4 | ============================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: TTest.eval_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.TTest.fit_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L575-L598
2 |
3 | sccoda.model.other\_models.TTest.fit\_model
4 | ===========================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: TTest.fit_model
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.TTest.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L570-L598
2 |
3 | sccoda.model.other\_models.TTest
4 | ================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: TTest
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.TTest.eval_model
27 |
28 |
29 | ~sccoda.model.other_models.TTest.fit_model
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.scdney_model.analyze.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L341-L411
2 |
3 | sccoda.model.other\_models.scdney\_model.analyze
4 | ================================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. automethod:: scdney_model.analyze
--------------------------------------------------------------------------------
/docs/source/sccoda.model.other_models.scdney_model.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/other_models.py#L285-L411
2 |
3 | sccoda.model.other\_models.scdney\_model
4 | ========================================
5 |
6 | .. currentmodule:: sccoda.model.other_models
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: scdney_model
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.other_models.scdney_model.analyze
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.CompositionalModel.get_chains_after_burnin.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L169-L225
2 |
3 | sccoda.model.scCODA\_model.CompositionalModel.get\_chains\_after\_burnin
4 | ========================================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: CompositionalModel.get_chains_after_burnin
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.CompositionalModel.make_result.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L515-L559
2 |
3 | sccoda.model.scCODA\_model.CompositionalModel.make\_result
4 | ==========================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: CompositionalModel.make_result
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.CompositionalModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L23-L559
2 |
3 | sccoda.model.scCODA\_model.CompositionalModel
4 | =============================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: CompositionalModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.scCODA_model.CompositionalModel.get_chains_after_burnin
27 |
28 |
29 | ~sccoda.model.scCODA_model.CompositionalModel.make_result
30 |
31 |
32 | ~sccoda.model.scCODA_model.CompositionalModel.sample_hmc
33 |
34 |
35 | ~sccoda.model.scCODA_model.CompositionalModel.sample_hmc_da
36 |
37 |
38 | ~sccoda.model.scCODA_model.CompositionalModel.sample_nuts
39 |
40 |
41 | ~sccoda.model.scCODA_model.CompositionalModel.sampling
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.CompositionalModel.sample_hmc.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L227-L314
2 |
3 | sccoda.model.scCODA\_model.CompositionalModel.sample\_hmc
4 | =========================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: CompositionalModel.sample_hmc
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.CompositionalModel.sample_hmc_da.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L316-L403
2 |
3 | sccoda.model.scCODA\_model.CompositionalModel.sample\_hmc\_da
4 | =============================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: CompositionalModel.sample_hmc_da
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.CompositionalModel.sample_nuts.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L405-L513
2 |
3 | sccoda.model.scCODA\_model.CompositionalModel.sample\_nuts
4 | ==========================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: CompositionalModel.sample_nuts
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.CompositionalModel.sampling.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L113-L167
2 |
3 | sccoda.model.scCODA\_model.CompositionalModel.sampling
4 | ======================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: CompositionalModel.sampling
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.scCODAModel.get_chains_after_burnin.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L169-L225
2 |
3 | sccoda.model.scCODA\_model.scCODAModel.get\_chains\_after\_burnin
4 | =================================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: scCODAModel.get_chains_after_burnin
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.scCODAModel.get_y_hat.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L695-L762
2 |
3 | sccoda.model.scCODA\_model.scCODAModel.get\_y\_hat
4 | ==================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: scCODAModel.get_y_hat
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.scCODAModel.make_result.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L515-L559
2 |
3 | sccoda.model.scCODA\_model.scCODAModel.make\_result
4 | ===================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: scCODAModel.make_result
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.scCODAModel.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L562-L762
2 |
3 | sccoda.model.scCODA\_model.scCODAModel
4 | ======================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: scCODAModel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 | ~sccoda.model.scCODA_model.scCODAModel.get_chains_after_burnin
27 |
28 |
29 | ~sccoda.model.scCODA_model.scCODAModel.get_y_hat
30 |
31 |
32 | ~sccoda.model.scCODA_model.scCODAModel.make_result
33 |
34 |
35 | ~sccoda.model.scCODA_model.scCODAModel.sample_hmc
36 |
37 |
38 | ~sccoda.model.scCODA_model.scCODAModel.sample_hmc_da
39 |
40 |
41 | ~sccoda.model.scCODA_model.scCODAModel.sample_nuts
42 |
43 |
44 | ~sccoda.model.scCODA_model.scCODAModel.sampling
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.scCODAModel.sample_hmc.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L227-L314
2 |
3 | sccoda.model.scCODA\_model.scCODAModel.sample\_hmc
4 | ==================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: scCODAModel.sample_hmc
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.scCODAModel.sample_hmc_da.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L316-L403
2 |
3 | sccoda.model.scCODA\_model.scCODAModel.sample\_hmc\_da
4 | ======================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: scCODAModel.sample_hmc_da
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.scCODAModel.sample_nuts.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L405-L513
2 |
3 | sccoda.model.scCODA\_model.scCODAModel.sample\_nuts
4 | ===================================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: scCODAModel.sample_nuts
--------------------------------------------------------------------------------
/docs/source/sccoda.model.scCODA_model.scCODAModel.sampling.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/model/scCODA_model.py#L113-L167
2 |
3 | sccoda.model.scCODA\_model.scCODAModel.sampling
4 | ===============================================
5 |
6 | .. currentmodule:: sccoda.model.scCODA_model
7 |
8 | .. automethod:: scCODAModel.sampling
--------------------------------------------------------------------------------
/docs/source/sccoda.util.cell_composition_data.from_pandas.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L244-L278
2 |
3 | sccoda.util.cell\_composition\_data.from\_pandas
4 | ================================================
5 |
6 | .. currentmodule:: sccoda.util.cell_composition_data
7 |
8 | .. autofunction:: from_pandas
--------------------------------------------------------------------------------
/docs/source/sccoda.util.cell_composition_data.from_scanpy.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L187-L241
2 |
3 | sccoda.util.cell\_composition\_data.from\_scanpy
4 | ================================================
5 |
6 | .. currentmodule:: sccoda.util.cell_composition_data
7 |
8 | .. autofunction:: from_scanpy
--------------------------------------------------------------------------------
/docs/source/sccoda.util.cell_composition_data.from_scanpy_dir.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L123-L184
2 |
3 | sccoda.util.cell\_composition\_data.from\_scanpy\_dir
4 | =====================================================
5 |
6 | .. currentmodule:: sccoda.util.cell_composition_data
7 |
8 | .. autofunction:: from_scanpy_dir
--------------------------------------------------------------------------------
/docs/source/sccoda.util.cell_composition_data.from_scanpy_list.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L61-L120
2 |
3 | sccoda.util.cell\_composition\_data.from\_scanpy\_list
4 | ======================================================
5 |
6 | .. currentmodule:: sccoda.util.cell_composition_data
7 |
8 | .. autofunction:: from_scanpy_list
--------------------------------------------------------------------------------
/docs/source/sccoda.util.cell_composition_data.read_anndata_one_sample.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/cell_composition_data.py#L15-L58
2 |
3 | sccoda.util.cell\_composition\_data.read\_anndata\_one\_sample
4 | ==============================================================
5 |
6 | .. currentmodule:: sccoda.util.cell_composition_data
7 |
8 | .. autofunction:: read_anndata_one_sample
--------------------------------------------------------------------------------
/docs/source/sccoda.util.comp_ana.CompositionalAnalysis.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/comp_ana.py#L14-L130
2 |
3 | sccoda.util.comp\_ana.CompositionalAnalysis
4 | ===========================================
5 |
6 | .. currentmodule:: sccoda.util.comp_ana
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: CompositionalAnalysis
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/source/sccoda.util.data_generation.b_w_from_abs_change.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_generation.py#L123-L172
2 |
3 | sccoda.util.data\_generation.b\_w\_from\_abs\_change
4 | ====================================================
5 |
6 | .. currentmodule:: sccoda.util.data_generation
7 |
8 | .. autofunction:: b_w_from_abs_change
--------------------------------------------------------------------------------
/docs/source/sccoda.util.data_generation.counts_from_first.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_generation.py#L175-L202
2 |
3 | sccoda.util.data\_generation.counts\_from\_first
4 | ================================================
5 |
6 | .. currentmodule:: sccoda.util.data_generation
7 |
8 | .. autofunction:: counts_from_first
--------------------------------------------------------------------------------
/docs/source/sccoda.util.data_generation.generate_case_control.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_generation.py#L32-L120
2 |
3 | sccoda.util.data\_generation.generate\_case\_control
4 | ====================================================
5 |
6 | .. currentmodule:: sccoda.util.data_generation
7 |
8 | .. autofunction:: generate_case_control
--------------------------------------------------------------------------------
/docs/source/sccoda.util.data_generation.sparse_effect_matrix.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_generation.py#L205-L247
2 |
3 | sccoda.util.data\_generation.sparse\_effect\_matrix
4 | ===================================================
5 |
6 | .. currentmodule:: sccoda.util.data_generation
7 |
8 | .. autofunction:: sparse_effect_matrix
--------------------------------------------------------------------------------
/docs/source/sccoda.util.data_visualization.boxplots.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_visualization.py#L178-L347
2 |
3 | sccoda.util.data\_visualization.boxplots
4 | ========================================
5 |
6 | .. currentmodule:: sccoda.util.data_visualization
7 |
8 | .. autofunction:: boxplots
--------------------------------------------------------------------------------
/docs/source/sccoda.util.data_visualization.stackbar.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_visualization.py#L22-L87
2 |
3 | sccoda.util.data\_visualization.stackbar
4 | ========================================
5 |
6 | .. currentmodule:: sccoda.util.data_visualization
7 |
8 | .. autofunction:: stackbar
--------------------------------------------------------------------------------
/docs/source/sccoda.util.data_visualization.stacked_barplot.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/data_visualization.py#L90-L175
2 |
3 | sccoda.util.data\_visualization.stacked\_barplot
4 | ================================================
5 |
6 | .. currentmodule:: sccoda.util.data_visualization
7 |
8 | .. autofunction:: stacked_barplot
--------------------------------------------------------------------------------
/docs/source/sccoda.util.helper_functions.sample_size_estimate.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/helper_functions.py#L4-L55
2 |
3 | sccoda.util.helper\_functions.sample\_size\_estimate
4 | ====================================================
5 |
6 | .. currentmodule:: sccoda.util.helper_functions
7 |
8 | .. autofunction:: sample_size_estimate
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.compare_parameters_to_truth.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L451-L500
2 |
3 | sccoda.util.result\_classes.CAResult.compare\_parameters\_to\_truth
4 | ===================================================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.compare_parameters_to_truth
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.complete_alpha_df.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L311-L339
2 |
3 | sccoda.util.result\_classes.CAResult.complete\_alpha\_df
4 | ========================================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.complete_alpha_df
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.complete_beta_df.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L214-L309
2 |
3 | sccoda.util.result\_classes.CAResult.complete\_beta\_df
4 | =======================================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.complete_beta_df
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.credible_effects.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L538-L570
2 |
3 | sccoda.util.result\_classes.CAResult.credible\_effects
4 | ======================================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.credible_effects
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.distance_to_truth.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L502-L536
2 |
3 | sccoda.util.result\_classes.CAResult.distance\_to\_truth
4 | ========================================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.distance_to_truth
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L47-L616
2 |
3 | sccoda.util.result\_classes.CAResult
4 | ====================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. add toctree option to make autodoc generate the pages
9 |
10 | .. autoclass:: CAResult
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. rubric:: Methods
19 |
20 | .. autosummary::
21 | :toctree: .
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | ~sccoda.util.result_classes.CAResult.compare_parameters_to_truth
31 |
32 |
33 | ~sccoda.util.result_classes.CAResult.complete_alpha_df
34 |
35 |
36 | ~sccoda.util.result_classes.CAResult.complete_beta_df
37 |
38 |
39 |
40 |
41 | ~sccoda.util.result_classes.CAResult.credible_effects
42 |
43 |
44 |
45 | ~sccoda.util.result_classes.CAResult.distance_to_truth
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 | ~sccoda.util.result_classes.CAResult.save
71 |
72 |
73 |
74 |
75 | ~sccoda.util.result_classes.CAResult.set_fdr
76 |
77 |
78 |
79 |
80 |
81 |
82 | ~sccoda.util.result_classes.CAResult.summary
83 |
84 |
85 | ~sccoda.util.result_classes.CAResult.summary_extended
86 |
87 |
88 | ~sccoda.util.result_classes.CAResult.summary_prepare
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.save.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L572-L589
2 |
3 | sccoda.util.result\_classes.CAResult.save
4 | =========================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.save
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.set_fdr.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L591-L616
2 |
3 | sccoda.util.result\_classes.CAResult.set\_fdr
4 | =============================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.set_fdr
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.summary.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L341-L394
2 |
3 | sccoda.util.result\_classes.CAResult.summary
4 | ============================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.summary
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.summary_extended.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L396-L449
2 |
3 | sccoda.util.result\_classes.CAResult.summary\_extended
4 | ======================================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.summary_extended
--------------------------------------------------------------------------------
/docs/source/sccoda.util.result_classes.CAResult.summary_prepare.rst:
--------------------------------------------------------------------------------
1 | :github_url: https://github.com/theislab/scCODA/tree/master/util/result_classes.py#L108-L212
2 |
3 | sccoda.util.result\_classes.CAResult.summary\_prepare
4 | =====================================================
5 |
6 | .. currentmodule:: sccoda.util.result_classes
7 |
8 | .. automethod:: CAResult.summary_prepare
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.21
2 | pandas>=1.0
3 | seaborn
4 | matplotlib>=3.0
5 | tensorflow>=2.8
6 | tensorflow-probability>=0.16
7 | arviz>=0.11
8 | scipy
9 | anndata
10 | patsy
11 | scanpy
12 | statsmodels
13 | rpy2
14 | scikit-bio
15 |
--------------------------------------------------------------------------------
/sccoda/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | from setuptools_scm import get_version
3 |
4 | __version__ = get_version(root="..", relative_to=__file__)
5 | del get_version
6 | except (LookupError, ImportError):
7 | try:
8 | from importlib_metadata import version # Python < 3.8
9 | except:
10 | from importlib.metadata import version # Python = 3.8
11 | __version__ = version(__name__)
12 | del version
13 |
--------------------------------------------------------------------------------
/sccoda/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """Builtin Datasets.
2 | """
3 |
4 | from ._datasets import (
5 | haber
6 | )
7 |
--------------------------------------------------------------------------------
/sccoda/datasets/_datasets.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pandas as pd
4 |
5 | HERE = Path(__file__).parent
6 |
7 | def haber() -> pd.DataFrame:
8 | """
9 | Tabularized counts of cell types in the
10 | small intestinal epithelium of mice with different conditions.
11 |
12 | Haber et al. 2017
13 |
14 | Returns
15 | -------
16 | data matrix as pandas data frame.
17 |
18 | """
19 | filename = HERE / 'haber_counts.csv'
20 |
21 | return pd.read_csv(filename)
22 |
--------------------------------------------------------------------------------
/sccoda/datasets/haber_counts.csv:
--------------------------------------------------------------------------------
1 | Mouse,Endocrine,Enterocyte,Enterocyte.Progenitor,Goblet,Stem,TA,TA.Early,Tuft
2 | Control_1,36,59,136,36,239,125,191,18
3 | Control_2,5,46,23,20,50,11,40,5
4 | Control_3,45,98,188,124,250,155,365,33
5 | Control_4,26,221,198,36,131,130,196,4
6 | H.poly.Day10_1,42,71,203,147,271,109,180,146
7 | H.poly.Day10_2,40,57,383,170,321,244,256,71
8 | H.poly.Day3_1,52,75,347,66,323,263,313,51
9 | H.poly.Day3_2,65,126,115,33,65,39,129,59
10 | Salm_1,37,332,113,59,90,47,132,10
11 | Salm_2,32,373,116,67,117,65,168,12
12 |
--------------------------------------------------------------------------------
/sccoda/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/sccoda/model/__init__.py
--------------------------------------------------------------------------------
/sccoda/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/sccoda/util/__init__.py
--------------------------------------------------------------------------------
/sccoda/util/cell_composition_data.py:
--------------------------------------------------------------------------------
1 | """
2 | Helper functions to convert single-cell data to scCODA compositional analysis data
3 |
4 | :authors: Johannes Ostner
5 | """
6 | import pandas as pd
7 | import anndata as ad
8 | import os
9 | import numpy as np
10 |
11 | from anndata import AnnData
12 | from typing import Optional, Tuple, Collection, Union, List
13 |
14 |
15 | def read_anndata_one_sample(
16 | adata: AnnData,
17 | cell_type_identifier: str,
18 | covariate_key: Optional[str] = None
19 | ) -> Tuple[np.ndarray, dict]:
20 | """
21 | Converts a single scRNA-seq data set from scanpy (anndata format) to a row of a cell count matrix.
22 |
23 | It is assumed that a column of adata.obs (e.g. Louvain clustering results) contains the cell type assignment.
24 | Additionally, covariates (control/disease group, ...) can be specified as a subdict in adata.uns
25 |
26 | Usage:
27 |
28 | ``cell_counts, covs = from_scanpy(adata, cell_type_identifier="Louvain", covariate_key="covariates")``
29 |
30 | Parameters
31 | ----------
32 | adata
33 | single-cell data object from scanpy
34 | cell_type_identifier
35 | column name in adata.obs that specifies the cell types
36 | covariate_key
37 | key for adata.uns, where the covariate values are stored
38 |
39 | Returns
40 | -------
41 | A numpy array for the cell counts and a dict for the covariates
42 |
43 | cell_counts
44 | cell count vector
45 | covs
46 | covariate dictionary
47 | """
48 |
49 | # Calculate cell counts for the sample
50 | cell_counts = adata.obs[cell_type_identifier].value_counts()
51 |
52 | # extracting covariates from uns
53 | if covariate_key is not None:
54 | covs = adata.uns[covariate_key]
55 | return cell_counts, covs
56 |
57 | else:
58 | return cell_counts
59 |
60 |
61 | def from_scanpy_list(
62 | samples: List[AnnData],
63 | cell_type_identifier: str,
64 | covariate_key: Optional[str] = None,
65 | covariate_df: Optional[pd.DataFrame] = None
66 | ) -> AnnData:
67 | """
68 | Creates a compositional analysis data set from a list of scanpy data sets.
69 |
70 | To use this function, all data sets need to have one identically named column in adata.obs that contains the cell type assignment.
71 | Covariates can either be specified via a key in adata.uns, or as a separate DataFrame
72 |
73 | Usage:
74 |
75 | ``data = from_scanpy_list([adata1, adata2, adata3], cell_type_identifier="Louvain", covariate_df="covariates")``
76 |
77 | Parameters
78 | ----------
79 | samples
80 | list of scanpy data sets
81 | cell_type_identifier
82 | column name in adata.obs that specifies the cell types
83 | covariate_key
84 | key for adata.uns, where covariate values are stored
85 | covariate_df
86 | DataFrame with covariates
87 |
88 | Returns
89 | -------
90 | A compositional analysis data set
91 |
92 | data
93 | A compositional analysis data set
94 | """
95 |
96 | count_data = pd.DataFrame()
97 | covariate_data = pd.DataFrame()
98 |
99 | # iterate over anndata objects for each sample
100 | if covariate_key is not None:
101 | for s in samples:
102 |
103 | cell_counts, covs = read_anndata_one_sample(s, cell_type_identifier, covariate_key)
104 | cell_counts = pd.DataFrame(cell_counts).T
105 | count_data = pd.concat([count_data, cell_counts])
106 | covariate_data = pd.concat([covariate_data, pd.Series(covs).to_frame().T], ignore_index=True)
107 | elif covariate_df is not None:
108 | for s in samples:
109 | cell_counts = read_anndata_one_sample(s, cell_type_identifier)
110 | cell_counts = pd.DataFrame(cell_counts).T
111 | count_data = pd.concat([count_data, cell_counts])
112 | covariate_data = covariate_df
113 | else:
114 | print("No covariate information specified!")
115 | return
116 |
117 | # Replace NaNs
118 | count_data = count_data.fillna(0)
119 | covariate_data.index = covariate_data.index.astype(str)
120 |
121 | var_dat = count_data.sum(axis=0).rename("n_cells").to_frame()
122 | var_dat.index = var_dat.index.astype(str)
123 |
124 | return ad.AnnData(X=count_data.values,
125 | var=var_dat,
126 | obs=covariate_data)
127 |
128 |
129 | def from_scanpy_dir(
130 | path: str,
131 | cell_type_identifier: str,
132 | covariate_key: Optional[str] = None,
133 | covariate_df: Optional[pd.DataFrame] = None
134 | ) -> AnnData:
135 | """
136 | Creates a compositional analysis data set from all scanpy data sets in a directory.
137 |
138 | To use this function, all data sets need to have one identically named column in adata.obs that contains the cell type assignment.
139 | Covariates can either be specified via a key in adata.uns, or as a separate DataFrame
140 |
141 | Usage:
142 | ``data = from_scanpy_dir("./path/to/directory", cell_type_identifier="Louvain", covariate_key="covariates")``
143 |
144 | Parameters
145 | ----------
146 | path
147 | path to directory
148 | cell_type_identifier
149 | column name in adata.obs that specifies the cell types
150 | covariate_key
151 | key for adata.uns, where covariate values are stored
152 | covariate_df
153 | DataFrame with covariates
154 |
155 | Returns
156 | -------
157 | A compositional analysis data set
158 |
159 | data
160 | A compositional analysis data set
161 | """
162 |
163 | count_data = pd.DataFrame()
164 | covariate_data = pd.DataFrame()
165 |
166 | filenames = os.listdir(path)
167 | if covariate_key is not None:
168 | for f in filenames:
169 | adata = ad.read_h5ad(f)
170 |
171 | cell_counts, covs = read_anndata_one_sample(adata, cell_type_identifier, covariate_key)
172 | cell_counts = pd.DataFrame(cell_counts).T
173 | count_data = pd.concat([count_data, cell_counts])
174 | covariate_data = pd.concat([covariate_data, pd.Series(covs).to_frame().T], ignore_index=True)
175 | elif covariate_df is not None:
176 | for f in filenames:
177 | adata = ad.read_h5ad(f)
178 |
179 | cell_counts = read_anndata_one_sample(adata, cell_type_identifier)
180 | cell_counts = pd.DataFrame(cell_counts).T
181 | count_data = pd.concat([count_data, cell_counts])
182 | covariate_data = covariate_df
183 | else:
184 | print("No covariate information specified!")
185 | return
186 |
187 | # Replace NaNs
188 | count_data = count_data.fillna(0)
189 | covariate_data.index = covariate_data.index.astype(str)
190 |
191 | var_dat = count_data.sum(axis=0).rename("n_cells").to_frame()
192 | var_dat.index = var_dat.index.astype(str)
193 |
194 | return ad.AnnData(X=count_data.values,
195 | var=var_dat,
196 | obs=covariate_data)
197 |
198 |
199 | def from_scanpy(
200 | adata: AnnData,
201 | cell_type_identifier: str,
202 | sample_identifier: str,
203 | covariate_key: Optional[str] = None,
204 | covariate_df: Optional[pd.DataFrame] = None
205 | ) -> AnnData:
206 |
207 | """
208 | Creates a compositional analysis dataset from a single anndata object, as it is produced by e.g. scanpy.
209 |
210 | The anndata object needs to have a column in adata.obs that contains the cell type assignment,
211 | and one column that specifies the grouping into samples.
212 | Covariates can either be specified via a key in adata.uns, or as a separate DataFrame.
213 |
214 | NOTE: The order of samples in the returned dataset is determined by the first occurence of cells from each sample in `adata`
215 |
216 | Parameters
217 | ----------
218 | adata
219 | list of scanpy data sets
220 | cell_type_identifier
221 | column name in adata.obs that specifies the cell types
222 | sample_identifier
223 | column name in adata.obs that specifies the sample
224 | covariate_key
225 | key for adata.uns, where covariate values are stored
226 | covariate_df
227 | DataFrame with covariates
228 |
229 | Returns
230 | -------
231 | A compositional analysis data set
232 |
233 | data
234 | A compositional analysis data set
235 |
236 | """
237 |
238 | groups = adata.obs.value_counts([sample_identifier, cell_type_identifier])
239 | count_data = groups.unstack(level=cell_type_identifier)
240 | count_data = count_data.fillna(0)
241 |
242 | if covariate_key is not None:
243 | covariate_df = pd.DataFrame(adata.uns[covariate_key])
244 | elif covariate_df is None:
245 | print("No covariate information specified!")
246 | covariate_df = pd.DataFrame(index=count_data.index)
247 |
248 | if set(covariate_df.index) != set(count_data.index):
249 | raise ValueError("anndata sample names and covariate_df index do not have the same elements!")
250 | covs_ord = covariate_df.reindex(count_data.index)
251 | covs_ord.index = covs_ord.index.astype(str)
252 |
253 | var_dat = count_data.sum(axis=0).rename("n_cells").to_frame()
254 | var_dat.index = var_dat.index.astype(str)
255 |
256 | return ad.AnnData(X=count_data.values,
257 | var=var_dat,
258 | obs=covs_ord)
259 |
260 |
261 | def from_pandas(
262 | df: pd.DataFrame,
263 | covariate_columns: List[str]
264 | ) -> AnnData:
265 | """
266 | Converts a Pandas DataFrame into a compositional analysis data set.
267 | The DataFrame must contain one row per sample, columns can be cell types or covariates
268 |
269 | Note that all columns that are not specified as covariates are assumed to be cell types.
270 |
271 | Usage:
272 | ``data = from_pandas(df, covariate_columns=["cov1", "cov2"])``
273 |
274 | Parameters
275 | ----------
276 | df
277 | A pandas DataFrame with each row representing a sample; the columns can be cell counts or covariates
278 | covariate_columns
279 | List of column names that are interpreted as covariates; all other columns will be seen as cell types
280 |
281 | Returns
282 | -------
283 | A compositional analysis data set
284 |
285 | data
286 | A compositional analysis data set
287 | """
288 |
289 | covariate_data = df.loc[:, covariate_columns]
290 | covariate_data.index = covariate_data.index.astype(str)
291 | count_data = df.loc[:, ~df.columns.isin(covariate_data)]
292 | celltypes = pd.DataFrame(index=count_data.columns)
293 |
294 | return ad.AnnData(X=count_data.values,
295 | var=celltypes,
296 | obs=covariate_data)
297 |
--------------------------------------------------------------------------------
/sccoda/util/comp_ana.py:
--------------------------------------------------------------------------------
1 | """
2 | Initialization of scCODA models.
3 |
4 | :authors: Johannes Ostner
5 | """
6 | import numpy as np
7 | import patsy as pt
8 |
9 | from anndata import AnnData
10 | from sccoda.model import scCODA_model as dm
11 | from typing import Union, Optional
12 |
13 |
14 | class CompositionalAnalysis:
15 | """
16 | Initializer class for scCODA models. This class is called when performing compositional analysis with scCODA.
17 |
18 | Usage: model = CompositionalAnalysis(data, formula="covariate1 + covariate2", reference_cell_type="CellTypeA")
19 |
20 | Calling an scCODA model requires these parameters:
21 |
22 | data
23 | anndata object with cell counts as data.X and covariates saved in data.obs
24 | formula
25 | patsy-style formula for building the covariate matrix.
26 | Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
27 | To set a different level as the base category for a categorical covariate, use "C(, Treatment(''))"
28 | reference_cell_type
29 | Column index that sets the reference cell type. Can either reference the name of a column or a column number (starting at 0).
30 | If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
31 | """
32 |
33 | def __new__(
34 | cls,
35 | data: AnnData,
36 | formula: str,
37 | reference_cell_type: Union[str, int] = "automatic",
38 | automatic_reference_absence_threshold: float = 0.05,
39 | ) -> dm.scCODAModel:
40 | """
41 | Builds count and covariate matrix, returns a CompositionalModel object
42 |
43 | Usage: model = CompositionalAnalysis(data, formula="covariate1 + covariate2", reference_cell_type="CellTypeA")
44 |
45 | Parameters
46 | ----------
47 | data
48 | anndata object with cell counts as data.X and covariates saved in data.obs
49 | formula
50 | R-style formula for building the covariate matrix.
51 | Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
52 | To set a different level as the base category for a categorical covariate, use "C(, Treatment(''))"
53 | reference_cell_type
54 | Column index that sets the reference cell type. Can either reference the name of a column or the n-th column (indexed at 0).
55 | If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
56 | automatic_reference_absence_threshold
57 | If using reference_cell_type = "automatic", determine what the maximum fraction of zero entries for a cell type is to be considered as a possible reference cell type
58 |
59 | Returns
60 | -------
61 | A compositional model
62 |
63 | model
64 | A scCODA.models.scCODA_model.CompositionalModel object
65 | """
66 |
67 | cell_types = data.var.index.to_list()
68 |
69 | # Get count data
70 | data_matrix = data.X.astype("float64")
71 |
72 | # Build covariate matrix from R-like formula
73 | covariate_matrix = pt.dmatrix(formula, data.obs)
74 | covariate_names = covariate_matrix.design_info.column_names[1:]
75 | covariate_matrix = covariate_matrix[:, 1:]
76 |
77 | # Invoke instance of the correct model depending on reference cell type
78 | # Automatic reference selection (dispersion-based)
79 | if reference_cell_type == "automatic":
80 | percent_zero = np.sum(data_matrix == 0, axis=0)/data_matrix.shape[0]
81 | nonrare_ct = np.where(percent_zero < automatic_reference_absence_threshold)[0]
82 |
83 | if len(nonrare_ct) == 0:
84 | raise ValueError("No cell types that have large enough presence! Please increase automatic_reference_absence_threshold")
85 |
86 | rel_abun = data_matrix / np.sum(data_matrix, axis=1, keepdims=True)
87 |
88 | # select reference
89 | cell_type_disp = np.var(rel_abun, axis=0)/np.mean(rel_abun, axis=0)
90 | min_var = np.min(cell_type_disp[nonrare_ct])
91 | ref_index = np.where(cell_type_disp == min_var)[0][0]
92 |
93 | ref_cell_type = cell_types[ref_index]
94 | print(f"Automatic reference selection! Reference cell type set to {ref_cell_type}")
95 |
96 | return dm.scCODAModel(
97 | covariate_matrix=np.array(covariate_matrix),
98 | data_matrix=data_matrix,
99 | cell_types=cell_types,
100 | covariate_names=covariate_names,
101 | reference_cell_type=ref_index,
102 | formula=formula,
103 | )
104 |
105 | # Column name as reference cell type
106 | elif reference_cell_type in cell_types:
107 | num_index = cell_types.index(reference_cell_type)
108 | return dm.scCODAModel(
109 | covariate_matrix=np.array(covariate_matrix),
110 | data_matrix=data_matrix,
111 | cell_types=cell_types,
112 | covariate_names=covariate_names,
113 | reference_cell_type=num_index,
114 | formula=formula,
115 | )
116 |
117 | # Numeric reference cell type
118 | elif isinstance(reference_cell_type, int) & (reference_cell_type < len(cell_types)) & (reference_cell_type >= 0):
119 | return dm.scCODAModel(
120 | covariate_matrix=np.array(covariate_matrix),
121 | data_matrix=data_matrix,
122 | cell_types=cell_types,
123 | covariate_names=covariate_names,
124 | reference_cell_type=reference_cell_type,
125 | formula=formula,
126 | )
127 |
128 | # None of the above: Throw error
129 | else:
130 | raise NameError("Reference index is not a valid cell type name or numerical index!")
131 |
--------------------------------------------------------------------------------
/sccoda/util/data_generation.py:
--------------------------------------------------------------------------------
1 | """
2 | Toolbox for simulating compositional data from ScRNA-seq
3 |
4 | This toolbox provides data generation and modelling solutions for compositional data with different specifications.
5 | This data might e.g. come from scRNA-seq experiments.
6 | The covariates are represented by ``X``, the cell count matrix is denoted ``Y``.
7 |
8 | To start, we set the dimensions of the data: Number of cell types (``K``), number of covariates (``D``),
9 | number of samples (``N``), and number of cells per sample (``n_total``).
10 |
11 | We now generate composition parameters (``b_true``, ``w_true``) and a covariance matrix (``Sigma``)
12 | from some input specifications.
13 | ``b_true`` represents the base composition with all covariates set to 0. Adding ``X * w_true`` to this
14 | gives the corresponding parameter for each sample.
15 |
16 | After adding a gaussian noise (``Sigma``), we can build a concentration vector for each sample that sums up to 1
17 | via the softmax function.
18 | From there, we can calculate each row of the cell count matrix (``Y``) via a multinomial distribution.
19 |
20 | :authors: Johannes Ostner
21 | """
22 |
23 | import numpy as np
24 | import anndata as ad
25 | import pandas as pd
26 | from scipy.special import softmax
27 |
28 | from anndata import AnnData
29 | from typing import Optional, Tuple, Collection, Union, List
30 |
31 |
32 | def generate_case_control(
33 | cases: int = 1,
34 | K: int = 5,
35 | n_total: int = 1000,
36 | n_samples: List[any] = [5, 5],
37 | sigma: Optional[np.ndarray] = None,
38 | b_true: Optional[np.ndarray] = None,
39 | w_true: Optional[np.ndarray] = None
40 | ) -> AnnData:
41 | """
42 | Generates compositional data with binary covariates.
43 |
44 | Parameters
45 | ----------
46 | cases
47 | number of covariates.
48 | This will lead to D=2**cases columns in X, one for each combination of active/inactive covariates.
49 | K
50 | Number of cell types
51 | n_total
52 | number of cells per sample
53 | n_samples
54 | Number of samples per case combination. len(n_samples)=[2**cases]
55 | sigma
56 | correlation matrix for cell types,size KxK
57 | b_true
58 | bias coefficients, size K
59 | w_true
60 | Effect matrix, size DxK
61 |
62 | Returns
63 | -------
64 | compositional data
65 |
66 | data
67 | Anndata object
68 | """
69 | D = cases**2
70 |
71 | # Uniform intercepts if none are specifed
72 | if b_true is None:
73 | b_true = np.random.uniform(-3, 3, size=K).astype(np.float64) # bias (alpha)
74 |
75 | # Randomly select covariates that should correlate if none are specified
76 | if w_true is None:
77 | n_d = np.random.choice(range(D), size=1)
78 | n_k = np.random.choice(range(K), size=1)
79 | w_true = sparse_effect_matrix(D, K, n_d, n_k)
80 |
81 | # Sigma is identity if not specified else
82 | if sigma is None:
83 | sigma = np.identity(K) * 0.05
84 |
85 | # noise = noise_std_true * np.random.randn(N, 1).astype(np.float64)
86 |
87 | # Initialize x, y
88 | x = np.zeros((sum(n_samples), cases))
89 | y = np.zeros((sum(n_samples), K))
90 | c = 0
91 |
92 | # Binary representation of a number x as list of fixed length
93 | def binary(num, length):
94 | return [int(x_n) for x_n in bin(num)[2:].zfill(length)]
95 |
96 | # For all combinations of cases
97 | for i in range(2**cases):
98 | # For each sample with this combination
99 | for j in range(n_samples[i]):
100 | # row of x is binary representation
101 | x[c+j] = binary(i, cases)
102 |
103 | # Generate y
104 | alpha = np.random.multivariate_normal(mean=x[c+j, :].T @ w_true + b_true, cov=sigma).astype(
105 | np.float64)
106 |
107 | concentration = softmax(alpha).astype(np.float64)
108 | z = np.random.multinomial(n_total, concentration)
109 | y[c+j] = z
110 | c = c+n_samples[i]
111 |
112 | x = x.astype(np.float64)
113 | y = y.astype(np.float64)
114 |
115 | x_names = ["x_" + str(n) for n in range(x.shape[1])]
116 | x_df = pd.DataFrame(x, columns=x_names)
117 | x_df.index = x_df.index.astype(str)
118 |
119 | data = ad.AnnData(X=y, obs=x_df, uns={"b_true": b_true, "w_true": w_true})
120 |
121 | return data
122 |
123 |
124 | def b_w_from_abs_change(
125 | counts_before: np.ndarray = np.array([200, 200, 200, 200, 200]),
126 | abs_change: np.ndarray = np.array([50, 0, 0, 0, 0]),
127 | n_total: int = 1000
128 | ) -> Tuple[np.ndarray, np.ndarray]:
129 | """
130 | Calculates intercepts and slopes from a starting count and an absolute change for the first cell type
131 |
132 | Parameters
133 | ----------
134 | counts_before
135 | cell counts for control samples
136 | abs_change
137 | change of first cell type in terms of cell counts
138 | n_total
139 | number of cells per sample. This stays constant over all samples!!!
140 |
141 | Returns
142 | -------
143 | Returns an intercept and an effect array
144 |
145 | intercepts
146 | intercept parameters
147 | slopes
148 | slope parameters
149 | """
150 |
151 | K = counts_before.shape[0]
152 |
153 | # calculate intercepts for control samples
154 | b = np.log(counts_before / n_total)
155 |
156 | # count vector after applying the effect.
157 | counts_after = counts_before + abs_change
158 | da = np.where(abs_change!=0)[0]
159 | sum_after_da = np.sum(counts_after[da])
160 | non_da = [x for x in np.arange(K) if x not in da]
161 | n_non_da = len(non_da)
162 | count_non_da = (n_total - sum_after_da)/n_non_da
163 | counts_after[non_da] = count_non_da
164 |
165 | # Get parameter vector with effect
166 | b_after = np.log(counts_after / n_total)
167 |
168 | # w is the difference of b before and after
169 | w = b_after - b
170 | # Transform w such that only first entry is nonzero
171 | w = w - w[K - 1]
172 |
173 | return b, w
174 |
175 |
176 | def counts_from_first(
177 | b_0: int = 200,
178 | n_total: int = 1000,
179 | K: int = 5
180 | ) -> np.ndarray:
181 | """
182 | Calculates a count vector from a given first entry, length and sum. The entries 2...K will get the same value.
183 |
184 | Parameters
185 | ----------
186 | b_0
187 | size of first entry
188 | n_total
189 | total sum of all entries
190 | K
191 | length of output vector (number of cell types)
192 |
193 | Returns
194 | -------
195 | An intercept array
196 |
197 | b
198 | count vector (not necessarily integer), size K
199 |
200 | """
201 | b = np.repeat((n_total-b_0)/(K-1), K)
202 | b[0] = b_0
203 | return b
204 |
205 |
206 | def sparse_effect_matrix(
207 | D: int,
208 | K: int,
209 | n_d: int,
210 | n_k: int
211 | ) -> np.ndarray:
212 | """
213 | Generates a sparse effect matrix
214 |
215 | Parameters
216 | ----------
217 | D
218 | Number of covariates
219 | K
220 | Number of cell types
221 | n_d
222 | Number of covariates that effect each cell type
223 | n_k
224 | Number of cell types that are affected by each covariate
225 |
226 | Returns
227 | -------
228 | An effect matrix
229 |
230 | w_true
231 | Effect matrix
232 | """
233 |
234 | # Choose indices of affected cell types and covariates randomly
235 | d_eff = np.random.choice(range(D), size=n_d, replace=False)
236 | k_eff = np.random.choice(range(K), size=n_k, replace=False)
237 |
238 | # Possible entries of w_true
239 | w_choice = [0.3, 0.5, 1]
240 |
241 | w_true = np.zeros((D, K))
242 | # Fill in w_true
243 | for i in d_eff:
244 | for j in k_eff:
245 | c = np.random.choice(3, 1)
246 | w_true[i, j] = w_choice[c]
247 |
248 | return w_true
249 |
--------------------------------------------------------------------------------
/sccoda/util/data_visualization.py:
--------------------------------------------------------------------------------
1 | """
2 | This document contains methods to visualize compositional data that was imported into scCODA's data format.
3 |
4 | :authors: Johannes Ostner
5 | """
6 |
7 | # Setup
8 |
9 | import numpy as np
10 | import pandas as pd
11 | import matplotlib.pyplot as plt
12 | import seaborn as sns
13 | from matplotlib import cm, rcParams
14 | from matplotlib.colors import ListedColormap
15 |
16 | from anndata import AnnData
17 | from typing import Optional, Tuple, Collection, Union, List
18 |
19 | sns.set_style("ticks")
20 |
21 |
22 | def stackbar(
23 | y: np.ndarray,
24 | type_names: List[str],
25 | title: str,
26 | level_names: List[str],
27 | figsize: Optional[Tuple[int, int]] = None,
28 | dpi: Optional[int] = 100,
29 | cmap: Optional[ListedColormap] = cm.tab20,
30 | plot_legend: Optional[bool] = True,
31 | ) -> plt.Subplot:
32 | """
33 | Plots a stacked barplot for one (discrete) covariate
34 | Typical use (only inside stacked_barplot): plot_one_stackbar(data.X, data.var.index, "xyz", data.obs.index)
35 |
36 | Parameters
37 | ----------
38 | y
39 | The count data, collapsed onto the level of interest. i.e. a binary covariate has two rows, one for each group, containing the count
40 | mean of each cell type
41 | type_names
42 | The names of all cell types
43 | title
44 | Plot title, usually the covariate's name
45 | level_names
46 | names of the covariate's levels
47 | figsize
48 | figure size
49 | dpi
50 | dpi setting
51 | cmap
52 | The color map for the barplot
53 | plot_legend
54 | If True, adds a legend
55 |
56 | Returns
57 | -------
58 | Returns a plot
59 |
60 | ax
61 | a plot
62 |
63 | """
64 | n_bars, n_types = y.shape
65 |
66 | figsize = rcParams["figure.figsize"] if figsize is None else figsize
67 |
68 | fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
69 | r = np.array(range(n_bars))
70 | sample_sums = np.sum(y, axis=1)
71 |
72 | barwidth = 0.85
73 | cum_bars = np.zeros(n_bars)
74 |
75 | for n in range(n_types):
76 | bars = [i / j * 100 for i, j in zip([y[k][n] for k in range(n_bars)], sample_sums)]
77 | plt.bar(r, bars, bottom=cum_bars, color=cmap(n % cmap.N), width=barwidth, label=type_names[n], linewidth=0)
78 | cum_bars += bars
79 |
80 | ax.set_title(title)
81 | if plot_legend:
82 | ax.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
83 | ax.set_xticks(r)
84 | ax.set_xticklabels(level_names, rotation=45)
85 | ax.set_ylabel("Proportion")
86 |
87 | return ax
88 |
89 |
90 | def stacked_barplot(
91 | data: AnnData,
92 | feature_name: str,
93 | figsize: Optional[Tuple[int, int]] = None,
94 | dpi: Optional[int] = 100,
95 | cmap: Optional[ListedColormap] = cm.tab20,
96 | plot_legend: Optional[bool] = True,
97 | level_order: List[str] = None
98 | ) -> plt.Subplot:
99 |
100 | """
101 | Plots a stacked barplot for all levels of a covariate or all samples (if feature_name=="samples").
102 | Usage: plot_feature_stackbars(data, ["cov1", "cov2", "cov3"])
103 |
104 | Parameters
105 | ----------
106 | data
107 | A scCODA compositional data object
108 | feature_name
109 | The name of the covariate to plot. If feature_name=="samples", one bar for every sample will be plotted
110 | figsize
111 | figure size
112 | dpi
113 | dpi setting
114 | cmap
115 | The color map for the barplot
116 | plot_legend
117 | If True, adds a legend
118 | level_order
119 | Custom ordering of bars on the x-axis
120 |
121 | Returns
122 | -------
123 | Returns a plot
124 |
125 | g:
126 | a plot
127 |
128 | """
129 |
130 | # cell type names
131 | type_names = data.var.index
132 |
133 | # option to plot one stacked barplot per sample
134 | if feature_name == "samples":
135 | if level_order:
136 | assert set(level_order) == set(data.obs.index), "level order is inconsistent with levels"
137 | data = data[level_order]
138 | g = stackbar(
139 | data.X,
140 | type_names=data.var.index,
141 | title="samples",
142 | level_names=data.obs.index,
143 | figsize=figsize,
144 | dpi=dpi,
145 | cmap=cmap,
146 | plot_legend=plot_legend,
147 | )
148 | else:
149 | # Order levels
150 | if level_order:
151 | assert set(level_order) == set(data.obs[feature_name]), "level order is inconsistent with levels"
152 | levels = level_order
153 | elif hasattr(data.obs[feature_name], 'cat'):
154 | levels = data.obs[feature_name].cat.categories.to_list()
155 | else:
156 | levels = pd.unique(data.obs[feature_name])
157 | n_levels = len(levels)
158 | feature_totals = np.zeros([n_levels, data.X.shape[1]])
159 |
160 | for level in range(n_levels):
161 | l_indices = np.where(data.obs[feature_name] == levels[level])
162 | feature_totals[level] = np.sum(data.X[l_indices], axis=0)
163 |
164 | g = stackbar(
165 | feature_totals,
166 | type_names=type_names,
167 | title=feature_name,
168 | level_names=levels,
169 | figsize=figsize,
170 | dpi=dpi,
171 | cmap=cmap,
172 | plot_legend=plot_legend,
173 | )
174 |
175 | return g
176 |
177 |
178 | def boxplots(
179 | data: AnnData,
180 | feature_name: str,
181 | y_scale: str = "relative",
182 | plot_facets: bool = False,
183 | add_dots: bool = False,
184 | cell_types: Optional[list] = None,
185 | args_boxplot: Optional[dict] = {},
186 | args_swarmplot: Optional[dict] = {},
187 | figsize: Optional[Tuple[int, int]] = None,
188 | dpi: Optional[int] = 100,
189 | cmap: Optional[str] = "Blues",
190 | plot_legend: Optional[bool] = True,
191 | level_order: List[str] = None
192 | ) -> Optional[Tuple[plt.Subplot, sns.axisgrid.FacetGrid]]:
193 | """\
194 | Grouped boxplot visualization. The cell counts for each cell type are shown as a group of boxplots,
195 | with intra--group separation by a covariate from data.obs.
196 |
197 | The cell type groups can either be ordered along the x-axis of a single plot (plot_facets=False) or as plot facets (plot_facets=True).
198 |
199 | Parameters
200 | ----------
201 | data
202 | A scCODA-compatible data object
203 | feature_name
204 | The name of the feature in data.obs to plot
205 | y_scale
206 | Transformation to of cell counts. Options: "relative" - Relative abundance, "log" - log(count), "count" - absolute abundance (cell counts)
207 | plot_facets
208 | If False, plot cell types on the x-axis. If True, plot as facets
209 | add_dots
210 | If True, overlay a scatterplot with one dot for each data point
211 | cell_types
212 | Subset of cell types that should be plotted
213 | args_boxplot
214 | Arguments passed to sns.boxplot
215 | args_swarmplot
216 | Arguments passed to sns.swarmplot
217 | figsize
218 | figure size
219 | dpi
220 | dpi setting
221 | cmap
222 | The seaborn color map for the barplot
223 | plot_legend
224 | If True, adds a legend
225 | level_order
226 | Custom ordering of bars on the x-axis
227 |
228 | Returns
229 | -------
230 | Depending on `plot_facets`, returns a :class:`~plt.AxesSubplot` (`plot_facets = False`) or :class:`~sns.axisgrid.FacetGrid` (`plot_facets = True`) object
231 |
232 | ax
233 | if `plot_facets = False`
234 | g
235 | if `plot_facets = True`
236 | """
237 |
238 | # y scale transformations
239 | if y_scale == "relative":
240 | sample_sums = np.sum(data.X, axis=1, keepdims=True)
241 | X = data.X/sample_sums
242 | value_name = "Proportion"
243 | # add pseudocount 1 if using log scale (needs to be improved)
244 | elif y_scale == "log":
245 | X = np.log(data.X + 1)
246 | value_name = "log(count)"
247 | elif y_scale == "count":
248 | X = data.X
249 | value_name = "count"
250 | else:
251 | raise ValueError("Invalid y_scale transformation")
252 |
253 | count_df = pd.DataFrame(X, columns=data.var.index, index=data.obs.index).\
254 | merge(data.obs[feature_name], left_index=True, right_index=True)
255 | plot_df = pd.melt(count_df, id_vars=feature_name, var_name="Cell type", value_name=value_name)
256 | if cell_types is not None:
257 | plot_df = plot_df[plot_df["Cell type"].isin(cell_types)]
258 |
259 | if plot_facets:
260 |
261 | if level_order is None:
262 | level_order = pd.unique(plot_df[feature_name])
263 |
264 | K = X.shape[1]
265 |
266 | g = sns.FacetGrid(
267 | plot_df,
268 | col="Cell type",
269 | sharey=False,
270 | col_wrap=int(np.floor(np.sqrt(K))),
271 | height=5,
272 | aspect=2,
273 | )
274 | g.map(
275 | sns.boxplot,
276 | feature_name,
277 | value_name,
278 | palette=cmap,
279 | order=level_order,
280 | **args_boxplot
281 | )
282 |
283 | if add_dots:
284 |
285 | if "hue" in args_swarmplot:
286 | hue = args_swarmplot.pop("hue")
287 | else:
288 | hue = None
289 |
290 | if hue is None:
291 | g.map(
292 | sns.swarmplot,
293 | feature_name,
294 | value_name,
295 | color="black",
296 | order=level_order,
297 | **args_swarmplot
298 | ).set_titles("{col_name}")
299 | else:
300 | g.map(
301 | sns.swarmplot,
302 | feature_name,
303 | value_name,
304 | hue,
305 | order=level_order,
306 | **args_swarmplot
307 | ).set_titles("{col_name}")
308 |
309 | return g
310 |
311 | else:
312 |
313 | if level_order:
314 | args_boxplot["hue_order"] = level_order
315 | args_swarmplot["hue_order"] = level_order
316 |
317 | fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
318 |
319 | sns.boxplot(x="Cell type", y=value_name, hue=feature_name, data=plot_df, fliersize=1,
320 | palette=cmap, ax=ax, **args_boxplot)
321 |
322 | if add_dots:
323 | sns.swarmplot(
324 | x="Cell type",
325 | y=value_name,
326 | data=plot_df,
327 | hue=feature_name,
328 | ax=ax,
329 | dodge=True,
330 | color="black",
331 | **args_swarmplot
332 | )
333 |
334 | cell_types = pd.unique(plot_df["Cell type"])
335 | ax.set_xticklabels(cell_types, rotation=90)
336 |
337 | if plot_legend:
338 | handles, labels = ax.get_legend_handles_labels()
339 | handout = []
340 | labelout = []
341 | for h, l in zip(handles, labels):
342 | if l not in labelout:
343 | labelout.append(l)
344 | handout.append(h)
345 | ax.legend(handout, labelout, loc='upper left', bbox_to_anchor=(1, 1), ncol=1, title=feature_name)
346 |
347 | plt.tight_layout()
348 |
349 | return ax
350 |
351 |
352 | def rel_abundance_dispersion_plot(
353 | data: AnnData,
354 | abundant_threshold: Optional[float] = 0.9,
355 | default_color: Optional[str] = "Grey",
356 | abundant_color: Optional[str] = "Red",
357 | label_cell_types: bool = "True",
358 | figsize: Optional[Tuple[int, int]] = None,
359 | dpi: Optional[int] = 100,
360 |
361 | ) -> plt.Subplot:
362 | """
363 | Plots total variance of relative abundance versus minimum relative abundance of all cell types for determination of a reference cell type.
364 | If the count of the cell type is larger than 0 in more than abundant_threshold percent of all samples,
365 | the cell type will be marked in a different color.
366 |
367 | Parameters
368 | ----------
369 | data
370 | A scCODA compositional data object
371 | abundant_threshold
372 | Presence threshold for abundant cell types.
373 | default_color
374 | bar color for all non-minimal cell types, default: "Grey"
375 | abundant_color
376 | bar color for cell types with abundant percentage larger than abundant_threshold, default: "Red"
377 | label_cell_types
378 | boolean - label dots with cell type names
379 | figsize
380 | figure size
381 | dpi
382 | dpi setting
383 |
384 | Returns
385 | -------
386 | Returns a plot
387 |
388 | ax
389 | a plot
390 | """
391 |
392 | fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
393 |
394 | rel_abun = data.X / np.sum(data.X, axis=1, keepdims=True)
395 |
396 | percent_zero = np.sum(data.X == 0, axis=0) / data.X.shape[0]
397 | nonrare_ct = np.where(percent_zero < 1-abundant_threshold)[0]
398 |
399 | # select reference
400 | cell_type_disp = np.var(rel_abun, axis=0) / np.mean(rel_abun, axis=0)
401 |
402 | is_abundant = [x in nonrare_ct for x in range(data.X.shape[1])]
403 |
404 | # Scatterplot
405 | plot_df = pd.DataFrame({
406 | "Total dispersion": cell_type_disp,
407 | "Cell type": data.var.index,
408 | "Presence": 1-percent_zero,
409 | "Is abundant": is_abundant
410 | })
411 |
412 | if len(np.unique(plot_df["Is abundant"])) > 1:
413 | palette = [default_color, abundant_color]
414 | elif np.unique(plot_df["Is abundant"]) == [False]:
415 | palette = [default_color]
416 | else:
417 | palette = [abundant_color]
418 |
419 | sns.scatterplot(
420 | data=plot_df,
421 | x="Presence",
422 | y="Total dispersion",
423 | hue="Is abundant",
424 | palette=palette
425 | )
426 |
427 | # Text labels for abundant cell types
428 |
429 | abundant_df = plot_df.loc[plot_df["Is abundant"] == True, :]
430 |
431 | def label_point(x, y, val, ax):
432 | a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
433 | for i, point in a.iterrows():
434 | ax.text(point['x'] + .02*ax.get_xlim()[1], point['y'], str(point['val']))
435 |
436 | if label_cell_types:
437 | label_point(
438 | abundant_df["Presence"],
439 | abundant_df["Total dispersion"],
440 | abundant_df["Cell type"],
441 | plt.gca()
442 | )
443 |
444 | ax.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1, title="Is abundant")
445 |
446 | plt.tight_layout()
447 | return ax
448 |
--------------------------------------------------------------------------------
/sccoda/util/helper_functions.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def sample_size_estimate(
5 | mcc_desired: float,
6 | increase: float,
7 | lf_increase: float
8 | ) -> int:
9 | """
10 | calculates the estimated number of required samples for
11 | fixed MCC, absolute increase and log2-fold change using the fitted linear model from
12 | Büttner, Ostner et al., 2020
13 |
14 | Linear model parameters:
15 |
16 | - `(Intercept)`: -1.3675613850217
17 |
18 | - `total_samples`: 0.0193158965178381
19 |
20 | - `log_fold_increase`: 0.704729538709909
21 |
22 | - `log_increase`: 0.315857162659738
23 |
24 | - `log_fold_increase`: -0.0927955725385892
25 |
26 | Parameters
27 | ----------
28 | mcc_desired
29 | desired MCC value
30 | increase
31 | mean absolute increase of cells between the groups
32 | lf_increase
33 | mean log2-fold increase of cells from one group to the other
34 |
35 | Returns
36 | -------
37 | sample size estimate
38 |
39 | n_samples -- int
40 | estimated number of required samples
41 |
42 | """
43 |
44 | # scale and transform input features
45 | mscale_min = 0.3440976088844191
46 | scaled_mcc = (mcc_desired+1)/2
47 | scaled_mcc = np.log((scaled_mcc+mscale_min)/(1-scaled_mcc+mscale_min))
48 | log_inc = np.log(increase)
49 |
50 | # inverse regress sample size
51 | increase_effects = 0.704729538709909 * lf_increase + 0.315857162659738 * log_inc - 0.0927955725385892 * lf_increase * log_inc
52 | n_samples = (scaled_mcc + 1.3675613850217 - increase_effects) / 0.0193158965178381
53 | n_samples[n_samples < 0] = 0
54 | n_samples[0.0927955725385892 * lf_increase * log_inc > scaled_mcc + 1.3675613850217] = 0
55 | return np.round(n_samples)
56 |
--------------------------------------------------------------------------------
/sccoda/util/result_classes.py:
--------------------------------------------------------------------------------
1 | """
2 | Results class that summarizes the results of scCODA and calculates test statistics.
3 | This class extends the ´´InferenceData`` class in the ``arviz`` package and can use all plotting and diacgnostic
4 | functionalities of it.
5 |
6 | Additionally, this class can produce nicely readable outputs for scCODA.
7 |
8 | :authors: Johannes Ostner
9 | """
10 | import numpy as np
11 | import arviz as az
12 | import pandas as pd
13 | import pickle as pkl
14 |
15 | from typing import Optional, Tuple, Collection, Union, List
16 |
17 |
18 | class CAResultConverter(az.data.io_dict.DictConverter):
19 | """
20 | Helper class for result conversion into arviz's format
21 | """
22 |
23 | def to_result_data(self, sampling_stats, model_specs):
24 |
25 | post = self.posterior_to_xarray()
26 | ss = self.sample_stats_to_xarray()
27 | postp = self.posterior_predictive_to_xarray()
28 | prior = self.prior_to_xarray()
29 | ssp = self.sample_stats_prior_to_xarray()
30 | prip = self.prior_predictive_to_xarray()
31 | obs = self.observed_data_to_xarray()
32 |
33 | return CAResult(
34 | sampling_stats, model_specs,
35 | **{
36 | "posterior": post,
37 | "sample_stats": ss,
38 | "posterior_predictive": postp,
39 | "prior": prior,
40 | "sample_stats_prior": ssp,
41 | "prior_predictive": prip,
42 | "observed_data": obs,
43 | }
44 | )
45 |
46 |
47 | class CAResult(az.InferenceData):
48 | """
49 | Result class for scCODA, extends the arviz framework for inference data.
50 |
51 | The CAResult class is an extension of az.InferenceData, that adds some information about the compositional model
52 | and is able to print humanly readable results.
53 | It supports all functionality from az.InferenceData.
54 | """
55 |
56 | def __init__(
57 | self,
58 | sampling_stats: dict,
59 | model_specs: dict,
60 | **kwargs
61 | ):
62 | """
63 | Gathers sampling information from a compositional model and converts it to a ``az.InferenceData`` object.
64 | The following attributes are added during class initialization:
65 |
66 | ``self.sampling_stats``: dict - see below
67 | ``self.model_specs``: dict - see below
68 |
69 | ``self.intercept_df``: Intercept dataframe from ``CAResult.summary_prepare``
70 | ``self.effect_df``: Effect dataframe from ``CAResult.summary_prepare``
71 |
72 | Parameters
73 | ----------
74 | sampling_stats
75 | Information and statistics about the MCMC sampling procedure.
76 | Default keys:
77 | - "chain_length": Length of MCMC chain (with burnin samples)
78 | - "num_burnin": Number of burnin samples
79 | - "acc_rate": MCMC Acceptance rate
80 | - "duration": Duration of MCMC sampling
81 |
82 | model_specs
83 | All information and statistics about the model specifications.
84 | Default keys:
85 | - "formula": Formula string
86 | - "reference": int - identifier of reference cell type
87 |
88 | Added during class initialization:
89 | - "threshold_prob": Threshold for inclusion probability that separates significant from non-significant effects
90 | kwargs
91 | passed to az.InferenceData. This includes the MCMC chain states and statistics for eachs MCMC sample.
92 | """
93 | super(self.__class__, self).__init__(**kwargs)
94 |
95 | self.sampling_stats = sampling_stats
96 | self.model_specs = model_specs
97 |
98 | if "ind" in list(self.posterior.data_vars):
99 | self.is_sccoda = True
100 | else:
101 | self.is_sccoda = False
102 |
103 | intercept_df, effect_df = self.summary_prepare()
104 |
105 | self.intercept_df = intercept_df
106 | self.effect_df = effect_df
107 |
108 | def summary_prepare(
109 | self,
110 | est_fdr: float = 0.05,
111 | *args,
112 | **kwargs
113 | ) -> Tuple[pd.DataFrame, pd.DataFrame]:
114 | """
115 | Generates summary dataframes for intercepts and slopes.
116 | This function builds on and supports all functionalities from ``az.summary``.
117 |
118 | Parameters
119 | ----------
120 | est_fdr
121 | Desired FDR value
122 | args
123 | Passed to ``az.summary``
124 | kwargs
125 | Passed to ``az.summary``
126 |
127 | Returns
128 | -------
129 | Intercept and effect DataFrames
130 |
131 | intercept_df -- pandas df
132 | Summary of intercept parameters. Contains one row per cell type.
133 |
134 | Columns:
135 | - Final Parameter: Final intercept model parameter
136 | - HDI X%: Upper and lower boundaries of confidence interval (width specified via hdi_prob=)
137 | - SD: Standard deviation of MCMC samples
138 | - Expected sample: Expected cell counts for a sample with no present covariates. See the tutorial for more explanation
139 |
140 | effect_df -- pandas df
141 | Summary of effect (slope) parameters. Contains one row per covariate/cell type combination.
142 |
143 | Columns:
144 | - Final Parameter: Final effect model parameter. If this parameter is 0, the effect is not significant, else it is.
145 | - HDI X%: Upper and lower boundaries of confidence interval (width specified via hdi_prob=)
146 | - SD: Standard deviation of MCMC samples
147 | - Expected sample: Expected cell counts for a sample with only the current covariate set to 1. See the tutorial for more explanation
148 | - log2-fold change: Log2-fold change between expected cell counts with no covariates and with only the current covariate
149 | - Inclusion probability: Share of MCMC samples, for which this effect was not set to 0 by the spike-and-slab prior.
150 | """
151 |
152 | # initialize summary df from arviz and separate into intercepts and effects.
153 | summ = az.summary(self, *args, **kwargs, kind="stats", var_names=["alpha", "beta"])
154 | effect_df = summ.loc[summ.index.str.match("|".join(["beta"]))].copy()
155 | intercept_df = summ.loc[summ.index.str.match("|".join(["alpha"]))].copy()
156 |
157 | # Build neat index
158 | cell_types = self.posterior.coords["cell_type"].values
159 | covariates = self.posterior.coords["covariate"].values
160 |
161 | intercept_df.index = pd.Index(cell_types, name="Cell Type")
162 | effect_df.index = pd.MultiIndex.from_product([covariates, cell_types],
163 | names=["Covariate", "Cell Type"])
164 |
165 | # Calculation of columns that are not from az.summary
166 | intercept_df = self.complete_alpha_df(intercept_df)
167 | effect_df = self.complete_beta_df(intercept_df, effect_df, est_fdr)
168 |
169 | # Give nice column names, remove unnecessary columns
170 | hdis = intercept_df.columns[intercept_df.columns.str.contains("hdi")]
171 | hdis_new = hdis.str.replace("hdi_", "HDI ")
172 |
173 |
174 | # Credible interval
175 | if self.is_sccoda is True:
176 | ind_post = self.posterior["ind"]
177 |
178 | b_raw_sel = self.posterior["b_raw"] * ind_post.where(ind_post >= 1e-3)
179 |
180 | res = az.convert_to_inference_data(b_raw_sel)
181 |
182 | summary_sel = az.summary(res, kind="stats", var_names=["x"], skipna=True, *args, **kwargs)
183 |
184 | ref_index = self.model_specs["reference"]
185 | n_conditions = len(self.posterior.coords["covariate"])
186 | n_cell_types = len(self.posterior.coords["cell_type"])
187 |
188 | def insert_row(idx, df, df_insert):
189 | return pd.concat([df.iloc[:idx, ], df_insert, df.iloc[idx:, ]]).reset_index(drop=True)
190 |
191 | for i in range(n_conditions):
192 | summary_sel = insert_row((i*n_cell_types) + ref_index, summary_sel,
193 | pd.DataFrame.from_dict(data={"mean": [0], "sd": [0], hdis[0]: [0], hdis[1]: [0]}))
194 |
195 | effect_df.loc[:, hdis[0]] = list(summary_sel[hdis[0]])
196 | effect_df.loc[:, hdis[1]] = list(summary_sel.loc[:, hdis[1]])
197 |
198 | intercept_df = intercept_df.loc[:, ["final_parameter", hdis[0], hdis[1], "sd", "expected_sample"]].copy()
199 | intercept_df = intercept_df.rename(columns=dict(zip(
200 | intercept_df.columns,
201 | ["Final Parameter", hdis_new[0], hdis_new[1], "SD", "Expected Sample"]
202 | )))
203 |
204 | effect_df = effect_df.loc[:, ["final_parameter", hdis[0], hdis[1], "sd", "inclusion_prob",
205 | "expected_sample", "log_fold"]].copy()
206 | effect_df = effect_df.rename(columns=dict(zip(
207 | effect_df.columns,
208 | ["Final Parameter", hdis_new[0], hdis_new[1], "SD", "Inclusion probability",
209 | "Expected Sample", "log2-fold change"]
210 | )))
211 |
212 | return intercept_df, effect_df
213 |
214 | def complete_beta_df(
215 | self,
216 | intercept_df: pd.DataFrame,
217 | effect_df: pd.DataFrame,
218 | target_fdr: float=0.05,
219 | ) -> pd.DataFrame:
220 | """
221 | Evaluation of MCMC results for effect parameters. This function is only used within self.summary_prepare.
222 | This function also calculates the posterior inclusion probability for each effect and decides whether effects are significant.
223 |
224 | Parameters
225 | ----------
226 | intercept_df
227 | Intercept summary, see ``self.summary_prepare``
228 | effect_df
229 | Effect summary, see ``self.summary_prepare``
230 | target_fdr
231 | Desired FDR value
232 |
233 | Returns
234 | -------
235 | effect DataFrame
236 |
237 | effect_df
238 | DataFrame with inclusion probability, final parameters, expected sample
239 | """
240 | beta_inc_prob = []
241 | beta_nonzero_mean = []
242 |
243 | beta_raw = np.array(self.posterior["beta"])[0]
244 |
245 | # Calculate inclusion prob, nonzero mean for every effect
246 | for j in range(beta_raw.shape[1]):
247 | for i in range(beta_raw.shape[2]):
248 | beta_i_raw = beta_raw[:, j, i]
249 | beta_i_raw_nonzero = np.where(np.abs(beta_i_raw) > 1e-3)[0]
250 | prob = beta_i_raw_nonzero.shape[0] / beta_i_raw.shape[0]
251 | beta_inc_prob.append(prob)
252 | if len(beta_i_raw[beta_i_raw_nonzero]) > 0:
253 | beta_nonzero_mean.append(beta_i_raw[beta_i_raw_nonzero].mean())
254 | else:
255 | beta_nonzero_mean.append(0)
256 |
257 | effect_df.loc[:, "inclusion_prob"] = beta_inc_prob
258 | effect_df.loc[:, "mean_nonzero"] = beta_nonzero_mean
259 |
260 | # Inclusion prob threshold value. Direct posterior probability approach cf. Newton et al. (2004)
261 | if self.is_sccoda is True:
262 | def opt_thresh(result, alpha):
263 |
264 | incs = np.array(result.loc[result["inclusion_prob"] > 0, "inclusion_prob"])
265 | incs[::-1].sort()
266 |
267 | for c in np.unique(incs):
268 | fdr = np.mean(1 - incs[incs >= c])
269 |
270 | if fdr < alpha:
271 | # ceiling with 3 decimals precision
272 | c = np.floor(c * 10 ** 3) / 10 ** 3
273 | return c, fdr
274 | return 1., 0
275 |
276 | threshold, fdr_ = opt_thresh(effect_df, target_fdr)
277 |
278 | self.model_specs["threshold_prob"] = threshold
279 |
280 | # Decide whether betas are significant or not, set non-significant ones to 0
281 | effect_df.loc[:, "final_parameter"] = np.where(effect_df.loc[:, "inclusion_prob"] >= threshold,
282 | effect_df.loc[:, "mean_nonzero"],
283 | 0)
284 | else:
285 | effect_df.loc[:, "final_parameter"] = effect_df.loc[:, "mean_nonzero"]
286 |
287 | # Get expected sample, log-fold change
288 | D = len(effect_df.index.levels[0])
289 | K = len(effect_df.index.levels[1])
290 |
291 | y_bar = np.mean(np.sum(np.array(self.observed_data.y), axis=1))
292 | alpha_par = intercept_df.loc[:, "final_parameter"]
293 | alphas_exp = np.exp(alpha_par)
294 | alpha_sample = (alphas_exp / np.sum(alphas_exp) * y_bar).values
295 |
296 | beta_mean = alpha_par
297 | beta_sample = []
298 | log_sample = []
299 |
300 | for d in range(D):
301 | beta_d = effect_df.loc[:, "final_parameter"].values[(d*K):((d+1)*K)]
302 | beta_d = (beta_mean + beta_d)
303 | beta_d = np.exp(beta_d)
304 | beta_d = beta_d / np.sum(beta_d) * y_bar
305 |
306 | beta_sample = np.append(beta_sample, beta_d)
307 | log_sample = np.append(log_sample, np.log2(beta_d/alpha_sample))
308 |
309 | effect_df.loc[:, "expected_sample"] = beta_sample
310 | effect_df.loc[:, "log_fold"] = log_sample
311 |
312 | return effect_df
313 |
314 | def complete_alpha_df(
315 | self,
316 | intercept_df: pd.DataFrame
317 | ) -> pd.DataFrame:
318 | """
319 | Evaluation of MCMC results for intercepts. This function is only used within self.summary_prepare.
320 |
321 | Parameters
322 | ----------
323 | intercept_df
324 | Intercept summary, see self.summary_prepare
325 |
326 | Returns
327 | -------
328 | intercept DataFrame
329 |
330 | intercept_df
331 | Summary DataFrame with expected sample, final parameters
332 | """
333 |
334 | intercept_df = intercept_df.rename(columns={"mean": "final_parameter"})
335 |
336 | # Get expected sample
337 | y_bar = np.mean(np.sum(np.array(self.observed_data.y), axis=1))
338 | alphas_exp = np.exp(intercept_df.loc[:, "final_parameter"])
339 | alpha_sample = (alphas_exp / np.sum(alphas_exp) * y_bar).values
340 | intercept_df.loc[:, "expected_sample"] = alpha_sample
341 |
342 | return intercept_df
343 |
344 | def summary(
345 | self,
346 | *args,
347 | **kwargs
348 | ):
349 | """
350 | Printing method for scCODA's summary.
351 |
352 | Usage: ``result.summary()``
353 |
354 | Parameters
355 | ----------
356 | args
357 | Passed to az.summary
358 | kwargs
359 | Passed to az.summary
360 |
361 | Returns
362 | -------
363 | prints to console
364 |
365 | """
366 |
367 | # If other than default values for e.g. confidence interval are specified,
368 | # recalculate them for intercept and effect DataFrames
369 | if args or kwargs:
370 | intercept_df, effect_df = self.summary_prepare(*args, **kwargs)
371 | else:
372 | intercept_df = self.intercept_df
373 | effect_df = self.effect_df
374 |
375 | # Get number of samples, cell types
376 | if self.sampling_stats["y_hat"] is not None:
377 | data_dims = self.sampling_stats["y_hat"].shape
378 | else:
379 | data_dims = (10, 5)
380 |
381 | # Cut down DataFrames to relevant info
382 | alphas_print = intercept_df.loc[:, ["Final Parameter", "Expected Sample"]]
383 | betas_print = effect_df.loc[:, ["Final Parameter", "Expected Sample", "log2-fold change"]]
384 |
385 | # Print everything neatly
386 | print("Compositional Analysis summary:")
387 | print("")
388 | print("Data: %d samples, %d cell types" % data_dims)
389 | print("Reference index: %s" % str(self.model_specs["reference"]))
390 | print("Formula: %s" % self.model_specs["formula"])
391 | print("")
392 | print("Intercepts:")
393 | print(alphas_print)
394 | print("")
395 | print("")
396 | print("Effects:")
397 | print(betas_print)
398 |
399 | def summary_extended(
400 | self,
401 | *args,
402 | **kwargs
403 | ):
404 |
405 | """
406 | Extended (diagnostic) printing function that shows more info about the sampling result
407 |
408 | Parameters
409 | ----------
410 | args
411 | Passed to az.summary
412 | kwargs
413 | Passed to az.summary
414 |
415 | Returns
416 | -------
417 | Prints to console
418 |
419 | """
420 |
421 | # If other than default values for e.g. confidence interval are specified,
422 | # recalculate them for intercept and effect DataFrames
423 | if args or kwargs:
424 | intercept_df, effect_df = self.summary_prepare(*args, **kwargs)
425 | else:
426 | intercept_df = self.intercept_df
427 | effect_df = self.effect_df
428 |
429 | # Get number of samples, cell types
430 | data_dims = self.sampling_stats["y_hat"].shape
431 |
432 | # Print everything
433 | print("Compositional Analysis summary (extended):")
434 | print("")
435 | print("Data: %d samples, %d cell types" % data_dims)
436 | print("Reference index: %s" % str(self.model_specs["reference"]))
437 | print("Formula: %s" % self.model_specs["formula"])
438 | if self.is_sccoda:
439 | print("Spike-and-slab threshold: {threshold:.3f}".format(threshold=self.model_specs["threshold_prob"]))
440 | print("")
441 | print("MCMC Sampling: Sampled {num_results} chain states ({num_burnin} burnin samples) in {duration:.3f} sec. "
442 | "Acceptance rate: {ar:.1f}%".format(num_results=self.sampling_stats["chain_length"],
443 | num_burnin=self.sampling_stats["num_burnin"],
444 | duration=self.sampling_stats["duration"],
445 | ar=(100*self.sampling_stats["acc_rate"])))
446 | print("")
447 | print("Intercepts:")
448 | print(intercept_df)
449 | print("")
450 | print("")
451 | print("Effects:")
452 | print(effect_df)
453 |
454 | def compare_parameters_to_truth(
455 | self,
456 | b_true: pd.Series,
457 | w_true: pd.Series,
458 | *args,
459 | **kwargs
460 | ) -> Tuple[pd.DataFrame, pd.DataFrame]:
461 | """
462 | Extends data frames from summary_prepare by a comparison to some ground truth slope and intercept values that are
463 | assumed to be from the same generative model (e.g. in data_generation)
464 |
465 | Parameters
466 | ----------
467 | b_true
468 | Ground truth slope values. Length must be same as number of cell types
469 | w_true
470 | Ground truth intercept values. Length must be same as number of cell types*number of covariates
471 | args
472 | Passed to az.summary
473 | kwargs
474 | Passed to az.summary
475 |
476 | Returns
477 | -------
478 | Extends intercept and effect DataFrames
479 |
480 | intercept_df
481 | Summary DataFrame for intercepts
482 | effect_df
483 | Summary DataFrame for effects
484 | """
485 |
486 | intercept_df, effect_df = self.summary_prepare(*args, **kwargs)
487 |
488 | intercept_df.columns = intercept_df.columns.str.replace('final_parameter', 'predicted')
489 | effect_df.columns = effect_df.columns.str.replace('final_parameter', 'predicted')
490 |
491 | # Get true params, join to calculated parameters
492 | b_true = b_true.rename("truth")
493 | intercept_df = intercept_df.join(b_true)
494 | w_true = w_true.rename("truth")
495 | effect_df = effect_df.join(w_true)
496 |
497 | # decide whether effects are found correctly
498 | intercept_df['dist_to_truth'] = intercept_df['truth'] - intercept_df['predicted']
499 | intercept_df['effect_correct'] = ((intercept_df['truth'] == 0) == (intercept_df['predicted'] == 0))
500 | effect_df['dist_to_truth'] = effect_df['truth'] - effect_df['predicted']
501 | effect_df['effect_correct'] = ((effect_df['truth'] == 0) == (effect_df['predicted'] == 0))
502 |
503 | return intercept_df, effect_df
504 |
505 | def distance_to_truth(self) -> pd.DataFrame:
506 | """
507 | Compares real cell count matrix to the posterior mode cell count matrix that arises from the calculated parameters
508 |
509 | Returns
510 | -------
511 | DataFrame with distances
512 |
513 | ret
514 | DataFrame
515 | """
516 |
517 | # Get absolute (counts) and relative error matrices
518 | y = np.array(self.observed_data.y)
519 | y_hat = self.sampling_stats["y_hat"]
520 | err = np.abs(y_hat - y)
521 |
522 | err_rel = err / y
523 | err_rel[np.isinf(err_rel)] = 1.
524 | err_rel[np.isnan(err_rel)] = 0.
525 |
526 | # Calculate mean errors for each cell type and overall
527 | avg_abs_cell_type_error = np.mean(err, axis=0, dtype=np.float64)
528 | avg_rel_cell_type_error = np.mean(err_rel, axis=0, dtype=np.float64)
529 | avg_abs_total_error = np.mean(err, dtype=np.float64)
530 | avg_rel_total_error = np.mean(err_rel, dtype=np.float64)
531 |
532 | ret = pd.DataFrame({'Cell Type': np.arange(y.shape[1] + 1),
533 | 'Absolute Error': np.append(avg_abs_total_error, avg_abs_cell_type_error),
534 | 'Relative Error': np.append(avg_rel_total_error, avg_rel_cell_type_error),
535 | 'Actual Means': np.append(np.mean(y, axis=(0, 1)), np.mean(y, axis=0)),
536 | 'Predicted Means': np.append(np.mean(y_hat, axis=(0, 1)), np.mean(y_hat, axis=0))})
537 |
538 | ret['Cell Type'][0] = 'Total'
539 | return ret
540 |
541 | def credible_effects(
542 | self,
543 | est_fdr=None
544 | ) -> pd.Series:
545 |
546 | """
547 | Decides which effects of the scCODA model are credible based on an adjustable inclusion probability threshold.
548 |
549 | Parameters
550 | ----------
551 | est_fdr
552 | Estimated false discovery rate. Must be between 0 and 1
553 |
554 | Returns
555 | -------
556 | Credible effect decision series
557 |
558 | out
559 | Boolean values whether effects are credible under inc_prob_threshold
560 | """
561 |
562 | if type(est_fdr) == float:
563 | if est_fdr < 0 or est_fdr > 1:
564 | raise ValueError("est_fdr must be between 0 and 1!")
565 | else:
566 | _, eff_df = self.summary_prepare(est_fdr=est_fdr)
567 | else:
568 | eff_df = self.effect_df
569 |
570 | out = eff_df["Final Parameter"] != 0
571 | out.rename("credible change")
572 |
573 | return out
574 |
575 | def save(
576 | self,
577 | path_to_file: str
578 | ):
579 | """
580 | Function to save scCODA results to disk via pickle. Caution: Files can quickly become very large!
581 |
582 | Parameters
583 | ----------
584 | path_to_file
585 | saving location on disk
586 |
587 | Returns
588 | -------
589 |
590 | """
591 | with open(path_to_file, "wb") as f:
592 | pkl.dump(self, file=f, protocol=4)
593 |
594 | def set_fdr(
595 | self,
596 | est_fdr: float,
597 | *args,
598 | **kwargs):
599 | """
600 | Direct posterior probability approach to calculate credible effects while keeping the expected FDR at a certain level
601 |
602 | Parameters
603 | ----------
604 | est_fdr
605 | Desired FDR value
606 | args
607 | passed to self.summary_prepare
608 | kwargs
609 | passed to self.summary_prepare
610 |
611 | Returns
612 | -------
613 | Adjusts self.intercept_df and self.effect_df
614 | """
615 |
616 | intercept_df, effect_df = self.summary_prepare(est_fdr=est_fdr, *args, **kwargs)
617 |
618 | self.intercept_df = intercept_df
619 | self.effect_df = effect_df
620 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | # Note: To use the 'upload' functionality of this file, you must:
5 | # $ pip install twine
6 |
7 | import io
8 | import os
9 | import sys
10 | from shutil import rmtree
11 |
12 | from setuptools import find_packages, setup, Command
13 |
14 | # Package meta-data.
15 | NAME = 'scCODA'
16 | DESCRIPTION = 'A Dirichlet-Multinomial approach to identify compositional changes in count data.'
17 | URL = 'https://github.com/theislab/scCODA'
18 | EMAIL = 'johannes.ostner@helmholtz-muenchen.de'
19 | AUTHOR = 'Johannes Ostner, Benjamin Schubert'
20 | REQUIRES_PYTHON = '>=3.7.0'
21 | VERSION = "0.1.9"
22 |
23 | # What packages are required for this module to be executed?
24 | REQUIRED = [
25 | "numpy>=1.21",
26 | "scipy",
27 | "tensorflow>=2.8",
28 | "tensorflow-probability>=0.16.0",
29 | "arviz>=0.11",
30 | "seaborn",
31 | "pandas>=1.0",
32 | "matplotlib>=3.0",
33 | "scanpy",
34 | "anndata",
35 | "patsy",
36 | "statsmodels",
37 | "rpy2",
38 | ]
39 |
40 | # What packages are optional?
41 | EXTRAS = {}
42 |
43 | # The rest you shouldn't have to touch too much :)
44 | # ------------------------------------------------
45 | # Except, perhaps the License and Trove Classifiers!
46 | # If you do change the License, remember to change the Trove Classifier for that!
47 |
48 | here = os.path.abspath(os.path.dirname(__file__))
49 |
50 | # Import the README and use it as the long-description.
51 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
52 | try:
53 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
54 | long_description = '\n' + f.read()
55 | except FileNotFoundError:
56 | long_description = DESCRIPTION
57 |
58 | # Load the package's __version__.py module as a dictionary.
59 | about = {}
60 | if not VERSION:
61 | with open(os.path.join(here, NAME, '__version__.py')) as f:
62 | exec(f.read(), about)
63 | else:
64 | about['__version__'] = VERSION
65 |
66 |
67 | class UploadCommand(Command):
68 | """Support setup.py upload."""
69 |
70 | description = 'Build and publish the package.'
71 | user_options = []
72 |
73 | @staticmethod
74 | def status(s):
75 | """Prints things in bold."""
76 | print('\033[1m{0}\033[0m'.format(s))
77 |
78 | def initialize_options(self):
79 | pass
80 |
81 | def finalize_options(self):
82 | pass
83 |
84 | def run(self):
85 | try:
86 | self.status('Removing previous builds…')
87 | rmtree(os.path.join(here, 'dist'))
88 | except OSError:
89 | pass
90 |
91 | self.status('Building Source and Wheel (universal) distribution…')
92 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
93 |
94 | self.status('Uploading the package to PyPI via Twine…')
95 | os.system('twine upload dist/*')
96 |
97 | self.status('Pushing git tags…')
98 | os.system('git tag v{0}'.format(about['__version__']))
99 | os.system('git push --tags')
100 |
101 | sys.exit()
102 |
103 |
104 | # Where the magic happens:
105 | setup(
106 | name=NAME,
107 | version=about['__version__'],
108 | description=DESCRIPTION,
109 | long_description=long_description,
110 | long_description_content_type='text/markdown',
111 | author=AUTHOR,
112 | author_email=EMAIL,
113 | python_requires=REQUIRES_PYTHON,
114 | url=URL,
115 | packages=find_packages(exclude=("data", "tests", "tutorials")),
116 | install_requires=REQUIRED,
117 | extras_require=EXTRAS,
118 | include_package_data=True,
119 | license='BSD',
120 | keywords=[
121 | "RNA",
122 | "single cell",
123 | "composition",
124 | "CODA",
125 | "compositional analysis"
126 | ],
127 | classifiers=[
128 | # Trove classifiers
129 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
130 | "License :: OSI Approved :: BSD License",
131 | "Intended Audience :: Science/Research",
132 | 'Programming Language :: Python',
133 | 'Programming Language :: Python :: 3',
134 | 'Programming Language :: Python :: 3.7',
135 | "Topic :: Scientific/Engineering :: Bio-Informatics",
136 | ],
137 | # $ setup.py publish support.
138 | cmdclass={
139 | 'upload': UploadCommand,
140 | },
141 | )
142 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/tests/__init__.py
--------------------------------------------------------------------------------
/tests/unit_tests.py:
--------------------------------------------------------------------------------
1 | """
2 | Unit tests for scCODA
3 | """
4 |
5 | import unittest
6 | import numpy as np
7 | import scanpy as sc
8 | import tensorflow as tf
9 | import pandas as pd
10 | import os
11 | import sys
12 | sys.path.insert(0, os.path.abspath('.'))
13 | sys.path.insert(0, os.path.abspath('..'))
14 |
15 | from sccoda.util import cell_composition_data as dat
16 | from sccoda.util import comp_ana as mod
17 | from sccoda.util import data_generation as gen
18 |
19 | pd.set_option('display.max_columns', 500)
20 | pd.set_option('display.max_rows', 500)
21 |
22 |
23 | class TestDataGeneration(unittest.TestCase):
24 | """
25 | Testing whether the data generation functions from data_generation work as intended
26 | Returns
27 | -------
28 | boolean -- all tests were passed or not
29 | """
30 |
31 | def setUp(self):
32 |
33 | self.N = 3
34 | self.D = 1
35 | self.K = 2
36 | self.n_total = [1000] * self.N
37 | self.noise_std_true = 1
38 | self.covariate_mean = None
39 | self.covariate_var = None
40 | self.sigma = None
41 | self.b_true = None
42 | self.w_true = None
43 |
44 | def test_case_control_gen(self):
45 | """
46 | Tests data generation for case/control scenarios
47 | Returns
48 | -------
49 | boolean -- all tests were passed or not
50 | """
51 | np.random.seed(1234)
52 |
53 | cases = 1
54 | K = 2
55 | n_total = 1000
56 | n_samples = [2, 2]
57 | noise_std_true = 0
58 | sigma = None
59 | b_true = None
60 | w_true = None
61 |
62 | data = gen.generate_case_control(cases, K, n_total, n_samples, sigma, b_true, w_true)
63 |
64 | test = True
65 | if any(np.abs(data.obs["x_0"] - [0, 0, 1, 1]) > 1e-5):
66 | print("obs is not correct!")
67 | test = False
68 | if not np.array_equal(data.X, np.array([[74., 926.], [58., 942.], [32., 968.], [53., 947.]])):
69 | print("X is not correct!")
70 | test = False
71 | if any(data.uns["b_true"] - np.array([-1.8508832, 0.7326526], dtype=np.float64) > 1e-5) or \
72 | not np.array_equal(data.uns["w_true"], np.array([[0., 0.]])):
73 | print("uns is not correct!")
74 | test = False
75 |
76 | self.assertTrue(test)
77 |
78 | def test_change_functions(self):
79 | """
80 | Tests gen.b_w_from_abs_change and gen.counts_from_first
81 | Returns
82 | -------
83 | boolean -- all tests were passed or not
84 | """
85 | np.random.seed(1234)
86 | correct = True
87 |
88 | counts_before = np.array([600, 400])
89 | abs_change = 100
90 | n_total = 1000
91 | K = 2
92 | b_0 = 600
93 |
94 | b, w = gen.b_w_from_abs_change(counts_before, abs_change, n_total)
95 |
96 | if any(np.abs(b - [-0.51082562, -0.91629073]) > 1e-5):
97 | print("gen.b_w_from_abs_change: b not correct!")
98 | correct = False
99 |
100 | if any(np.abs(w - [0.44183275, 0.]) > 1e-5):
101 | print("gen.b_w_from_abs_change: b not correct!")
102 | correct = False
103 |
104 | b_2 = gen.counts_from_first(b_0, n_total, K)
105 | if not np.array_equal(b_2, [600., 400.]):
106 | print("gen.counts_from_first not correct!")
107 | correct = False
108 |
109 | self.assertTrue(correct)
110 |
111 |
112 | class TestDataImport(unittest.TestCase):
113 |
114 | def test_from_pandas(self):
115 | # Get Haber Salmonella data
116 | data_raw = pd.read_csv(os.path.abspath("sccoda/datasets/haber_counts.csv"))
117 |
118 | salm_indices = [0, 1, 2, 3, 8, 9]
119 | salm_df = data_raw.iloc[salm_indices, :]
120 |
121 | data_salm = dat.from_pandas(salm_df, covariate_columns=["Mouse"])
122 | data_salm.obs["Condition"] = data_salm.obs["Mouse"].str.replace(r"_[0-9]", "")
123 |
124 | # Only check size of x, obs
125 | x_shape = (data_salm.X.shape == (6, 8))
126 | obs_shape = (data_salm.obs.shape == (6, 2))
127 |
128 | self.assertTrue(x_shape & obs_shape)
129 |
130 | def test_from_scanpy(self):
131 | # Get scanpy example data, add covariates, read in three times
132 | adata_ref = sc.datasets.pbmc3k_processed()
133 | adata_ref.uns["cov"] = {"x_0": 0, "x_1": 1}
134 | adata_ref_1 = adata_ref.copy()
135 | adata_ref_1.uns["cov"] = {"x_0": 1, "x_1": 1}
136 |
137 | data = dat.from_scanpy_list([adata_ref, adata_ref, adata_ref_1],
138 | cell_type_identifier="louvain",
139 | covariate_key="cov")
140 |
141 | # Only check size of x, obs
142 | x_shape = (data.X.shape == (3, 8))
143 | obs_shape = (data.obs.shape == (3, 2))
144 | var_names = (data.var.index.tolist() == ['CD4 T cells', 'CD14+ Monocytes', 'B cells', 'CD8 T cells',
145 | 'NK cells', 'FCGR3A+ Monocytes', 'Dendritic cells', 'Megakaryocytes'])
146 |
147 | self.assertTrue(x_shape & obs_shape & var_names)
148 |
149 |
150 | class TestModels(unittest.TestCase):
151 |
152 | def setUp(self):
153 |
154 | # Get Haber count data
155 | data_raw = pd.read_csv(os.path.abspath("sccoda/datasets/haber_counts.csv"))
156 |
157 | salm_indices = [0, 1, 2, 3, 8, 9]
158 | salm_df = data_raw.iloc[salm_indices, :]
159 |
160 | data_salm = dat.from_pandas(salm_df, covariate_columns=["Mouse"])
161 | data_salm.obs["Condition"] = data_salm.obs["Mouse"].str.replace(r"_[0-9]", "")
162 | self.data = data_salm
163 |
164 | def test_hmc(self):
165 | np.random.seed(1234)
166 | tf.random.set_seed(5678)
167 |
168 | model_salm = mod.CompositionalAnalysis(self.data, formula="Condition", reference_cell_type=5)
169 |
170 | # Run MCMC
171 | sim_results = model_salm.sample_hmc(num_results=20000, num_burnin=5000)
172 | self.sim_results = sim_results
173 | alpha_df, beta_df = sim_results.summary_prepare()
174 |
175 | # Mean cell counts for both groups
176 | alphas_true = np.round(np.mean(self.data.X[:4], 0), 0)
177 | betas_true = np.round(np.mean(self.data.X[4:], 0), 0)
178 |
179 | # Mean cell counts for simulated data
180 | final_alphas = np.round(alpha_df.loc[:, "Expected Sample"].tolist(), 0)
181 | final_betas = np.round(beta_df.loc[:, "Expected Sample"].tolist(), 0)
182 |
183 | # Check if model approximately predicts ground truth
184 | differing_alphas = any(np.abs(alphas_true - final_alphas) > 30)
185 | differing_betas = any(np.abs(betas_true - final_betas) > 30)
186 |
187 | self.assertTrue((not differing_alphas) & (not differing_betas))
188 |
189 | def test_hmc_da(self):
190 | np.random.seed(1234)
191 | tf.random.set_seed(5678)
192 |
193 | model_salm = mod.CompositionalAnalysis(self.data, formula="Condition", reference_cell_type=5)
194 |
195 | # Run MCMC
196 | sim_results = model_salm.sample_hmc_da(num_results=20000, num_burnin=5000)
197 | self.sim_results = sim_results
198 | alpha_df, beta_df = sim_results.summary_prepare()
199 |
200 | # Mean cell counts for both groups
201 | alphas_true = np.round(np.mean(self.data.X[:4], 0), 0)
202 | betas_true = np.round(np.mean(self.data.X[4:], 0), 0)
203 |
204 | # Mean cell counts for simulated data
205 | final_alphas = np.round(alpha_df.loc[:, "Expected Sample"].tolist(), 0)
206 | final_betas = np.round(beta_df.loc[:, "Expected Sample"].tolist(), 0)
207 |
208 | # Check if model approximately predicts ground truth
209 | differing_alphas = any(np.abs(alphas_true - final_alphas) > 30)
210 | differing_betas = any(np.abs(betas_true - final_betas) > 30)
211 |
212 | self.assertTrue((not differing_alphas) & (not differing_betas))
213 |
214 | def test_nuts(self):
215 | np.random.seed(1234)
216 | tf.random.set_seed(5678)
217 |
218 | model_salm = mod.CompositionalAnalysis(self.data, formula="Condition", reference_cell_type=5)
219 |
220 | # Run MCMC
221 | sim_results = model_salm.sample_nuts(num_results=2000, num_burnin=500)
222 | self.sim_results = sim_results
223 | alpha_df, beta_df = sim_results.summary_prepare()
224 |
225 | # Mean cell counts for both groups
226 | alphas_true = np.round(np.mean(self.data.X[:4], 0), 0)
227 | betas_true = np.round(np.mean(self.data.X[4:], 0), 0)
228 |
229 | # Mean cell counts for simulated data
230 | final_alphas = np.round(alpha_df.loc[:, "Expected Sample"].tolist(), 0)
231 | final_betas = np.round(beta_df.loc[:, "Expected Sample"].tolist(), 0)
232 |
233 | # Check if model approximately predicts ground truth
234 | differing_alphas = any(np.abs(alphas_true - final_alphas) > 30)
235 | differing_betas = any(np.abs(betas_true - final_betas) > 30)
236 |
237 | self.assertTrue((not differing_alphas) & (not differing_betas))
238 |
239 | def test_multi_cond(self):
240 | np.random.seed(1234)
241 | tf.random.set_seed(5678)
242 |
243 | self.data.obs["Condition2"] = np.random.randint(0, 2, len(self.data.obs))
244 |
245 | model_salm = mod.CompositionalAnalysis(self.data, formula="Condition+Condition2", reference_cell_type=5)
246 |
247 | # Run MCMC
248 | sim_results = model_salm.sample_hmc(num_results=20000, num_burnin=5000)
249 | self.sim_results = sim_results
250 | alpha_df, beta_df = sim_results.summary_prepare()
251 |
252 | # Mean cell counts for both groups
253 | alphas_true = np.round(np.mean(self.data.X[:4], 0), 0)
254 | betas_true = np.round(np.mean(self.data.X[4:], 0), 0)
255 |
256 | # Mean cell counts for simulated data
257 | final_alphas = np.round(alpha_df.loc[:, "Expected Sample"].tolist(), 0)
258 | final_betas = np.round(beta_df.loc[("Condition[T.Salm]",), "Expected Sample"].tolist(), 0)
259 |
260 | # Check if model approximately predicts ground truth
261 | differing_alphas = any(np.abs(alphas_true - final_alphas) > 30)
262 | differing_betas = any(np.abs(betas_true - final_betas) > 30)
263 | differing_rand = any(beta_df.loc[("Condition2",), "Final Parameter"] != 0)
264 |
265 | self.assertTrue((not differing_alphas) & (not differing_betas) & (not differing_rand))
266 |
267 |
268 | if __name__ == '__main__':
269 | unittest.main()
270 |
--------------------------------------------------------------------------------
/tutorials/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/tutorials/__init__.py
--------------------------------------------------------------------------------
/tutorials/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/scCODA/887955e5f968960e2112fdab4258a205596540ee/tutorials/test
--------------------------------------------------------------------------------