├── .codecov.yaml
├── .cruft.json
├── .editorconfig
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ ├── config.yml
│ └── feature_request.yml
└── workflows
│ ├── build.yaml
│ ├── release.yaml
│ └── test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .vscode
├── extensions.json
├── launch.json
└── settings.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── biome.jsonc
├── docs
├── _static
│ ├── .gitkeep
│ ├── css
│ │ └── custom.css
│ └── images
│ │ ├── logo.png
│ │ ├── mlm.png
│ │ ├── ora.png
│ │ └── ulm.png
├── _templates
│ ├── .gitkeep
│ └── autosummary
│ │ └── class.rst
├── api
│ ├── bm.md
│ ├── ds.md
│ ├── index.md
│ ├── mt.md
│ ├── op.md
│ ├── pl.md
│ ├── pp.md
│ └── tl.md
├── changelog.md
├── conf.py
├── extensions
│ └── typed_returns.py
├── index.md
├── notebooks
│ ├── bench
│ │ ├── index.md
│ │ └── rna.ipynb
│ ├── bulk
│ │ ├── index.md
│ │ └── rna.ipynb
│ ├── example.ipynb
│ ├── index.md
│ ├── omnipath
│ │ ├── index.md
│ │ ├── licenses.ipynb
│ │ └── orthologs.ipynb
│ ├── scell
│ │ ├── index.md
│ │ ├── rna_psbk.ipynb
│ │ ├── rna_pstime.ipynb
│ │ └── rna_sc.ipynb
│ └── spatial
│ │ ├── index.md
│ │ └── rna_visium.ipynb
├── references.bib
└── references.md
├── pyproject.toml
├── src
└── decoupler
│ ├── _Method.py
│ ├── _Plotter.py
│ ├── __init__.py
│ ├── _datatype.py
│ ├── _docs.py
│ ├── _download.py
│ ├── _log.py
│ ├── _odeps.py
│ ├── bm
│ ├── __init__.py
│ ├── _pp.py
│ ├── _run.py
│ ├── metric
│ │ ├── __init__.py
│ │ ├── _auc.py
│ │ ├── _fscore.py
│ │ ├── _hmean.py
│ │ └── _qrank.py
│ └── pl
│ │ ├── __init__.py
│ │ ├── _auc.py
│ │ ├── _bar.py
│ │ ├── _format.py
│ │ ├── _fscore.py
│ │ ├── _qrank.py
│ │ └── _summary.py
│ ├── ds
│ ├── __init__.py
│ ├── _bulk.py
│ ├── _scell.py
│ ├── _spatial.py
│ ├── _toy.py
│ └── _utils.py
│ ├── mt
│ ├── __init__.py
│ ├── _aucell.py
│ ├── _consensus.py
│ ├── _decouple.py
│ ├── _gsea.py
│ ├── _gsva.py
│ ├── _mdt.py
│ ├── _methods.py
│ ├── _mlm.py
│ ├── _ora.py
│ ├── _run.py
│ ├── _udt.py
│ ├── _ulm.py
│ ├── _viper.py
│ ├── _waggr.py
│ └── _zscore.py
│ ├── op
│ ├── __init__.py
│ ├── _collectri.py
│ ├── _dorothea.py
│ ├── _dtype.py
│ ├── _hallmark.py
│ ├── _progeny.py
│ ├── _resource.py
│ └── _translate.py
│ ├── pl
│ ├── __init__.py
│ ├── _barplot.py
│ ├── _dotplot.py
│ ├── _filter_by_expr.py
│ ├── _filter_by_prop.py
│ ├── _filter_samples.py
│ ├── _leading_edge.py
│ ├── _network.py
│ ├── _obsbar.py
│ ├── _obsm.py
│ ├── _order.py
│ ├── _order_targets.py
│ ├── _source_targets.py
│ └── _volcano.py
│ ├── pp
│ ├── __init__.py
│ ├── anndata.py
│ ├── data.py
│ └── net.py
│ └── tl
│ ├── __init__.py
│ ├── _rankby_group.py
│ ├── _rankby_obsm.py
│ └── _rankby_order.py
└── tests
├── bm
├── test_benchmark.py
└── test_pl.py
├── conftest.py
├── ds
├── test_bulk.py
├── test_scell.py
├── test_spatial.py
├── test_toy.py
└── test_utils.py
├── mt
├── test_aucell.py
├── test_consensus.py
├── test_decouple.py
├── test_gsea.py
├── test_gsva.py
├── test_mdt.py
├── test_methods.py
├── test_mlm.py
├── test_ora.py
├── test_run.py
├── test_udt.py
├── test_ulm.py
├── test_viper.py
├── test_waggr.py
└── test_zscore.py
├── op
├── test_collectri.py
├── test_dorothea.py
├── test_dtype.py
├── test_hallmark.py
├── test_progeny.py
├── test_resource.py
└── test_translate.py
├── pl
├── test_Plotter.py
├── test_barplot.py
├── test_dotplot.py
├── test_filter_by_expr.py
├── test_filter_by_prop.py
├── test_filter_samples.py
├── test_leading_edge.py
├── test_network.py
├── test_obsbar.py
├── test_obsm.py
├── test_order.py
├── test_order_targets.py
├── test_source_targets.py
└── test_volcano.py
├── pp
├── test_anndata.py
├── test_data.py
└── test_net.py
├── test_download.py
├── test_version.py
└── tl
├── test_rankby_group.py
├── test_rankby_obsm.py
└── test_rankby_order.py
/.codecov.yaml:
--------------------------------------------------------------------------------
1 | # Based on pydata/xarray
2 | codecov:
3 | require_ci_to_pass: no
4 |
5 | coverage:
6 | status:
7 | project:
8 | default:
9 | # Require 1% coverage, i.e., always succeed
10 | target: 1
11 | patch: false
12 | changes: false
13 |
14 | comment:
15 | layout: diff, flags, files
16 | behavior: once
17 | require_base: no
18 |
--------------------------------------------------------------------------------
/.cruft.json:
--------------------------------------------------------------------------------
1 | {
2 | "template": "https://github.com/scverse/cookiecutter-scverse",
3 | "commit": "5842d5cb8510e1d4a037a8f772630d51ec86de96",
4 | "checkout": null,
5 | "context": {
6 | "cookiecutter": {
7 | "project_name": "decoupler",
8 | "package_name": "decoupler",
9 | "project_description": "Python package to perform enrichment analysis from omics data.",
10 | "author_full_name": "Pau Badia i Mompel",
11 | "author_email": "pau.badia@uni-heidelberg.de",
12 | "github_user": "PauBadiaM",
13 | "github_repo": "decoupler",
14 | "license": "BSD 3-Clause License",
15 | "ide_integration": true,
16 | "_copy_without_render": [
17 | ".github/workflows/build.yaml",
18 | ".github/workflows/test.yaml",
19 | "docs/_templates/autosummary/**.rst"
20 | ],
21 | "_exclude_on_template_update": [
22 | "CHANGELOG.md",
23 | "LICENSE",
24 | "README.md",
25 | "docs/api.md",
26 | "docs/index.md",
27 | "docs/notebooks/example.ipynb",
28 | "docs/references.bib",
29 | "docs/references.md",
30 | "src/**",
31 | "tests/**"
32 | ],
33 | "_render_devdocs": false,
34 | "_jinja2_env_vars": {
35 | "lstrip_blocks": true,
36 | "trim_blocks": true
37 | },
38 | "_template": "https://github.com/scverse/cookiecutter-scverse",
39 | "_commit": "5842d5cb8510e1d4a037a8f772630d51ec86de96"
40 | }
41 | },
42 | "directory": null
43 | }
44 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | indent_style = space
5 | indent_size = 4
6 | end_of_line = lf
7 | charset = utf-8
8 | trim_trailing_whitespace = true
9 | insert_final_newline = true
10 |
11 | [{*.{yml,yaml,toml},.cruft.json}]
12 | indent_size = 2
13 |
14 | [Makefile]
15 | indent_style = tab
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: Bug report
2 | description: Report something that is broken or incorrect
3 | labels: bug
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: |
8 | **Note**: Please read [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports)
9 | detailing how to provide the necessary information for us to reproduce your bug. In brief:
10 | * Please provide exact steps how to reproduce the bug in a clean Python environment.
11 | * In case it's not clear what's causing this bug, please provide the data or the data generation procedure.
12 | * Sometimes it is not possible to share the data, but usually it is possible to replicate problems on publicly
13 | available datasets or to share a subset of your data.
14 |
15 | - type: textarea
16 | id: report
17 | attributes:
18 | label: Report
19 | description: A clear and concise description of what the bug is.
20 | validations:
21 | required: true
22 |
23 | - type: textarea
24 | id: versions
25 | attributes:
26 | label: Versions
27 | description: |
28 | Which version of packages.
29 |
30 | Please install `session-info2`, run the following command in a notebook,
31 | click the “Copy as Markdown” button, then paste the results into the text box below.
32 |
33 | ```python
34 | In[1]: import session_info2; session_info2.session_info(dependencies=True)
35 | ```
36 |
37 | Alternatively, run this in a console:
38 |
39 | ```python
40 | >>> import session_info2; print(session_info2.session_info(dependencies=True)._repr_mimebundle_()["text/markdown"])
41 | ```
42 | render: python
43 | placeholder: |
44 | anndata 0.11.3
45 | ---- ----
46 | charset-normalizer 3.4.1
47 | coverage 7.7.0
48 | psutil 7.0.0
49 | dask 2024.7.1
50 | jaraco.context 5.3.0
51 | numcodecs 0.15.1
52 | jaraco.functools 4.0.1
53 | Jinja2 3.1.6
54 | sphinxcontrib-jsmath 1.0.1
55 | sphinxcontrib-htmlhelp 2.1.0
56 | toolz 1.0.0
57 | session-info2 0.1.2
58 | PyYAML 6.0.2
59 | llvmlite 0.44.0
60 | scipy 1.15.2
61 | pandas 2.2.3
62 | sphinxcontrib-devhelp 2.0.0
63 | h5py 3.13.0
64 | tblib 3.0.0
65 | setuptools-scm 8.2.0
66 | more-itertools 10.3.0
67 | msgpack 1.1.0
68 | sparse 0.15.5
69 | wrapt 1.17.2
70 | jaraco.collections 5.1.0
71 | numba 0.61.0
72 | pyarrow 19.0.1
73 | pytz 2025.1
74 | MarkupSafe 3.0.2
75 | crc32c 2.7.1
76 | sphinxcontrib-qthelp 2.0.0
77 | sphinxcontrib-serializinghtml 2.0.0
78 | zarr 2.18.4
79 | asciitree 0.3.3
80 | six 1.17.0
81 | sphinxcontrib-applehelp 2.0.0
82 | numpy 2.1.3
83 | cloudpickle 3.1.1
84 | sphinxcontrib-bibtex 2.6.3
85 | natsort 8.4.0
86 | jaraco.text 3.12.1
87 | setuptools 76.1.0
88 | Deprecated 1.2.18
89 | packaging 24.2
90 | python-dateutil 2.9.0.post0
91 | ---- ----
92 | Python 3.13.2 | packaged by conda-forge | (main, Feb 17 2025, 14:10:22) [GCC 13.3.0]
93 | OS Linux-6.11.0-109019-tuxedo-x86_64-with-glibc2.39
94 | Updated 2025-03-18 15:47
95 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: Scverse Community Forum
4 | url: https://discourse.scverse.org/
5 | about: If you have questions about “How to do X”, please ask them here.
6 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
1 | name: Feature request
2 | description: Propose a new feature for decoupler
3 | labels: enhancement
4 | body:
5 | - type: textarea
6 | id: description
7 | attributes:
8 | label: Description of feature
9 | description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered.
10 | validations:
11 | required: true
12 |
--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
1 | name: Check Build
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request:
7 | branches: [main]
8 |
9 | concurrency:
10 | group: ${{ github.workflow }}-${{ github.ref }}
11 | cancel-in-progress: true
12 |
13 | defaults:
14 | run:
15 | # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
16 | shell: bash -euo pipefail {0}
17 |
18 | jobs:
19 | package:
20 | runs-on: ubuntu-latest
21 | steps:
22 | - uses: actions/checkout@v4
23 | with:
24 | filter: blob:none
25 | fetch-depth: 0
26 | - name: Install uv
27 | uses: astral-sh/setup-uv@v5
28 | with:
29 | cache-dependency-glob: pyproject.toml
30 | - name: Build package
31 | run: uv build
32 | - name: Check package
33 | run: uvx twine check --strict dist/*.whl
34 |
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | defaults:
8 | run:
9 | # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
10 | shell: bash -euo pipefail {0}
11 |
12 | # Use "trusted publishing", see https://docs.pypi.org/trusted-publishers/
13 | jobs:
14 | release:
15 | name: Upload release to PyPI
16 | runs-on: ubuntu-latest
17 | environment:
18 | name: pypi
19 | url: https://pypi.org/p/decoupler
20 | permissions:
21 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
22 | steps:
23 | - uses: actions/checkout@v4
24 | with:
25 | filter: blob:none
26 | fetch-depth: 0
27 | - name: Install uv
28 | uses: astral-sh/setup-uv@v5
29 | with:
30 | cache-dependency-glob: pyproject.toml
31 | - name: Build package
32 | run: uv build
33 | - name: Publish package distributions to PyPI
34 | uses: pypa/gh-action-pypi-publish@release/v1
35 |
--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request:
7 | branches: [main]
8 | schedule:
9 | - cron: "0 5 1,15 * *"
10 |
11 | concurrency:
12 | group: ${{ github.workflow }}-${{ github.ref }}
13 | cancel-in-progress: true
14 |
15 | defaults:
16 | run:
17 | # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
18 | shell: bash -euo pipefail {0}
19 |
20 | jobs:
21 | # Get the test environment from hatch as defined in pyproject.toml.
22 | # This ensures that the pyproject.toml is the single point of truth for test definitions and the same tests are
23 | # run locally and on continuous integration.
24 | # Check [[tool.hatch.envs.hatch-test.matrix]] in pyproject.toml and https://hatch.pypa.io/latest/environment/ for
25 | # more details.
26 | get-environments:
27 | runs-on: ubuntu-latest
28 | outputs:
29 | envs: ${{ steps.get-envs.outputs.envs }}
30 | steps:
31 | - uses: actions/checkout@v4
32 | with:
33 | filter: blob:none
34 | fetch-depth: 0
35 | - name: Install uv
36 | uses: astral-sh/setup-uv@v5
37 | - name: Get test environments
38 | id: get-envs
39 | run: |
40 | ENVS_JSON=$(uvx hatch env show --json | jq -c 'to_entries
41 | | map(
42 | select(.key | startswith("hatch-test"))
43 | | {
44 | name: .key,
45 | label: (if (.key | contains("pre")) then .key + " (PRE-RELEASE DEPENDENCIES)" else .key end),
46 | python: .value.python
47 | }
48 | )')
49 | echo "envs=${ENVS_JSON}" | tee $GITHUB_OUTPUT
50 |
51 | # Run tests through hatch. Spawns a separate runner for each environment defined in the hatch matrix obtained above.
52 | test:
53 | needs: get-environments
54 |
55 | strategy:
56 | fail-fast: false
57 | matrix:
58 | os: [ubuntu-latest]
59 | env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
60 |
61 | name: ${{ matrix.env.label }}
62 | runs-on: ${{ matrix.os }}
63 |
64 | steps:
65 | - uses: actions/checkout@v4
66 | with:
67 | filter: blob:none
68 | fetch-depth: 0
69 | - name: Install uv
70 | uses: astral-sh/setup-uv@v5
71 | with:
72 | python-version: ${{ matrix.env.python }}
73 | cache-dependency-glob: pyproject.toml
74 | - name: create hatch environment
75 | run: uvx hatch env create ${{ matrix.env.name }}
76 | - name: run tests using hatch
77 | env:
78 | MPLBACKEND: agg
79 | PLATFORM: ${{ matrix.os }}
80 | DISPLAY: :42
81 | run: uvx hatch run ${{ matrix.env.name }}:run-cov
82 | - name: generate coverage report
83 | run: uvx hatch run ${{ matrix.env.name }}:coverage xml
84 | - name: Upload coverage
85 | uses: codecov/codecov-action@v4
86 | with:
87 | token: ${{ secrets.CODECOV_TOKEN }}
88 |
89 | # Check that all tests defined above pass. This makes it easy to set a single "required" test in branch
90 | # protection instead of having to update it frequently. See https://github.com/re-actors/alls-green#why.
91 | check:
92 | name: Tests pass in all hatch environments
93 | if: always()
94 | needs:
95 | - get-environments
96 | - test
97 | runs-on: ubuntu-latest
98 | steps:
99 | - uses: re-actors/alls-green@release/v1
100 | with:
101 | jobs: ${{ toJSON(needs) }}
102 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Temp files
2 | .DS_Store
3 | *~
4 | buck-out/
5 |
6 | # Compiled files
7 | .venv/
8 | __pycache__/
9 | .*cache/
10 |
11 | # Distribution / packaging
12 | /dist/
13 |
14 | # Tests and coverage
15 | /data/
16 | /node_modules/
17 |
18 | # docs
19 | /docs/generated/
20 | /docs/_build/
21 |
22 | # jupyter
23 | .ipynb_checkpoints/
24 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | fail_fast: false
2 | default_language_version:
3 | python: python3
4 | default_stages:
5 | - pre-commit
6 | - pre-push
7 | minimum_pre_commit_version: 2.16.0
8 | repos:
9 | - repo: https://github.com/biomejs/pre-commit
10 | rev: v1.9.4
11 | hooks:
12 | - id: biome-format
13 | exclude: ^\.cruft\.json$ # inconsistent indentation with cruft - file never to be modified manually.
14 | - repo: https://github.com/tox-dev/pyproject-fmt
15 | rev: v2.5.1
16 | hooks:
17 | - id: pyproject-fmt
18 | - repo: https://github.com/astral-sh/ruff-pre-commit
19 | rev: v0.11.5
20 | hooks:
21 | - id: ruff
22 | types_or: [python, pyi, jupyter]
23 | args: [--fix, --exit-non-zero-on-fix]
24 | - id: ruff-format
25 | types_or: [python, pyi, jupyter]
26 | - repo: https://github.com/pre-commit/pre-commit-hooks
27 | rev: v5.0.0
28 | hooks:
29 | - id: detect-private-key
30 | - id: check-ast
31 | - id: end-of-file-fixer
32 | - id: mixed-line-ending
33 | args: [--fix=lf]
34 | - id: trailing-whitespace
35 | - id: check-case-conflict
36 | # Check that there are no merge conflicts (could be generated by template sync)
37 | - id: check-merge-conflict
38 | args: [--assume-in-merge]
39 | - repo: local
40 | hooks:
41 | - id: forbid-to-commit
42 | name: Don't commit rej files
43 | entry: |
44 | Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates.
45 | Fix the merge conflicts manually and remove the .rej files.
46 | language: fail
47 | files: '.*\.rej$'
48 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # https://docs.readthedocs.io/en/stable/config-file/v2.html
2 | version: 2
3 | build:
4 | os: ubuntu-20.04
5 | tools:
6 | python: "3.10"
7 | sphinx:
8 | configuration: docs/conf.py
9 | # disable this for more lenient docs builds
10 | fail_on_warning: true
11 | python:
12 | install:
13 | - method: pip
14 | path: .
15 | extra_requirements:
16 | - doc
17 | - full
18 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": [
3 | // GitHub integration
4 | "github.vscode-github-actions",
5 | "github.vscode-pull-request-github",
6 | // Language support
7 | "ms-python.python",
8 | "ms-python.vscode-pylance",
9 | "ms-toolsai.jupyter",
10 | "tamasfe.even-better-toml",
11 | // Dependency management
12 | "ninoseki.vscode-mogami",
13 | // Linting and formatting
14 | "editorconfig.editorconfig",
15 | "charliermarsh.ruff",
16 | "biomejs.biome",
17 | ],
18 | }
19 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": "Python: Build Documentation",
9 | "type": "debugpy",
10 | "request": "launch",
11 | "module": "sphinx",
12 | "args": ["-M", "html", ".", "_build"],
13 | "cwd": "${workspaceFolder}/docs",
14 | "console": "internalConsole",
15 | "justMyCode": false,
16 | },
17 | {
18 | "name": "Python: Debug Test",
19 | "type": "debugpy",
20 | "request": "launch",
21 | "program": "${file}",
22 | "purpose": ["debug-test"],
23 | "console": "internalConsole",
24 | "justMyCode": false,
25 | "env": {
26 | "PYTEST_ADDOPTS": "--color=yes",
27 | },
28 | "presentation": {
29 | "hidden": true,
30 | },
31 | },
32 | ],
33 | }
34 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "[python][json][jsonc]": {
3 | "editor.formatOnSave": true,
4 | },
5 | "[python]": {
6 | "editor.defaultFormatter": "charliermarsh.ruff",
7 | "editor.codeActionsOnSave": {
8 | "source.fixAll": "always",
9 | "source.organizeImports": "always",
10 | },
11 | },
12 | "[json][jsonc]": {
13 | "editor.defaultFormatter": "biomejs.biome",
14 | },
15 | "python.analysis.typeCheckingMode": "basic",
16 | "python.testing.pytestEnabled": true,
17 | "python.testing.pytestArgs": ["-vv", "--color=yes"],
18 | }
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2025, Pau Badia i Mompel, Saez lab
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # decoupler - Ensemble of methods to infer enrichment scores
2 |
3 |
4 |
5 | [![Tests][badge-tests]][tests]
6 | [![Documentation][badge-docs]][documentation]
7 |
8 | [![Issues][badge-issues]][issue tracker]
9 | [![Coverage][badge-coverage]][codecoverage]
10 | [![Stars][badge-stars]](https://github.com/scverse/anndata/stargazers)
11 |
12 | [![PyPI][badge-pypi]][pypi]
13 | [![Downloads month][badge-mdown]][down]
14 | [![Downloads all][badge-adown]][down]
15 |
16 | [![Conda version][badge-condav]][conda]
17 | [![Conda downloads][badge-condad]][conda]
18 |
19 | [badge-tests]: https://img.shields.io/github/actions/workflow/status/saezlab/decoupler-py/test.yaml?branch=main
20 | [badge-docs]: https://img.shields.io/readthedocs/decoupler-py
21 | [badge-condav]: https://img.shields.io/conda/vn/conda-forge/decoupler-py.svg
22 | [badge-condad]: https://img.shields.io/conda/dn/conda-forge/decoupler-py.svg
23 | [badge-issues]: https://img.shields.io/github/issues/saezlab/decoupler-py
24 | [badge-coverage]: https://codecov.io/gh/saezlab/decoupler-py/branch/main/graph/badge.svg
25 | [badge-pypi]: https://img.shields.io/pypi/v/decoupler.svg
26 | [badge-mdown]: https://static.pepy.tech/badge/decoupler/month
27 | [badge-adown]: https://static.pepy.tech/badge/decoupler
28 | [badge-stars]: https://img.shields.io/github/stars/saezlab/decoupler-py?style=flat&logo=github&color=yellow
29 |
30 | `decoupler` is a python package containing different enrichment statistical
31 | methods to extract biologically driven scores
32 | from omics data within a unified framework. This is its faster and memory efficient Python implementation,
33 | a deprecated version in R can be found [here](https://github.com/saezlab/decoupler).
34 |
35 | It is a package from the [scverse][] ecosystem {cite:p}`scverse`,
36 | designed for easy interoperability with `anndata`, `scanpy` {cite:p}`scanpy` and other related packages.
37 |
38 | ## Getting started
39 |
40 | Please refer to the [documentation][],
41 | in particular, the [API documentation][].
42 |
43 | ## Installation
44 |
45 | You need to have Python 3.10 or newer installed on your system.
46 | If you don't have Python installed, we recommend installing [uv][].
47 |
48 | There are several alternative options to install decoupler:
49 |
50 | 1. Install the latest stable release from [PyPI][pypi] with minimal dependancies:
51 |
52 | ```bash
53 | pip install decoupler
54 | ```
55 |
56 | 2. Install the latest stable full release from [PyPI][pypi] with extra dependancies:
57 |
58 | ```bash
59 | pip install decoupler[full]
60 | ```
61 |
62 | 3. Install the latest stable version from [conda-forge][conda] using mamba or conda (pay attention to the `-py` suffix at the end):
63 |
64 | ```bash
65 | mamba create -n=dcp conda-forge::decoupler-py
66 | ```
67 |
68 | 4. Install the latest development version:
69 |
70 | ```bash
71 | pip install git+https://github.com/saezlab/decoupler-py.git@main
72 | ```
73 |
74 | ## Release notes
75 |
76 | See the [changelog][].
77 |
78 | ## Contact
79 |
80 | For questions and help requests, you can reach out in the [scverse discourse][].
81 | If you found a bug, please use the [issue tracker][].
82 |
83 | ## Citation
84 |
85 | > Badia-i-Mompel P., Vélez Santiago J., Braunger J., Geiss C., Dimitrov D.,
86 | Müller-Dott S., Taus P., Dugourd A., Holland C.H., Ramirez Flores R.O.
87 | and Saez-Rodriguez J. 2022. decoupleR: Ensemble of computational methods
88 | to infer biological activities from omics data. Bioinformatics Advances.
89 |
90 |
91 | [uv]: https://github.com/astral-sh/uv
92 | [scverse discourse]: https://discourse.scverse.org/
93 | [scverse]: https://scverse.org/
94 | [issue tracker]: https://github.com/saezlab/decoupler-py/issues
95 | [tests]: https://github.com/saezlab/decoupler-py/actions/workflows/test.yaml
96 | [documentation]: https://decoupler-py.readthedocs.io
97 | [changelog]: https://decoupler-py.readthedocs.io/en/latest/changelog.html
98 | [api documentation]: https://decoupler-py.readthedocs.io/en/latest/api.html
99 | [pypi]: https://pypi.org/project/decoupler
100 | [down]: https://pepy.tech/project/decoupler
101 | [conda]: https://anaconda.org/conda-forge/decoupler-py
102 | [codecoverage]: https://codecov.io/gh/saezlab/decoupler-py
103 |
--------------------------------------------------------------------------------
/biome.jsonc:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
3 | "formatter": { "useEditorconfig": true },
4 | "overrides": [
5 | {
6 | "include": ["./.vscode/*.json", "**/*.jsonc"],
7 | "json": {
8 | "formatter": { "trailingCommas": "all" },
9 | "parser": {
10 | "allowComments": true,
11 | "allowTrailingCommas": true,
12 | },
13 | },
14 | },
15 | ],
16 | }
17 |
--------------------------------------------------------------------------------
/docs/_static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/.gitkeep
--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | /* Reduce the font size in data frames - See https://github.com/scverse/cookiecutter-scverse/issues/193 */
2 | div.cell_output table.dataframe {
3 | font-size: 0.8em;
4 | }
5 |
6 | /* Adjust the logo size */
7 | .logo img {
8 | width: 50%; /* or any percentage you want */
9 | height: auto; /* maintain aspect ratio */
10 | }
11 |
12 | img.no-scaled-link {
13 | background: transparent !important;
14 | }
--------------------------------------------------------------------------------
/docs/_static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/logo.png
--------------------------------------------------------------------------------
/docs/_static/images/mlm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/mlm.png
--------------------------------------------------------------------------------
/docs/_static/images/ora.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/ora.png
--------------------------------------------------------------------------------
/docs/_static/images/ulm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/ulm.png
--------------------------------------------------------------------------------
/docs/_templates/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_templates/.gitkeep
--------------------------------------------------------------------------------
/docs/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
1 | {{ fullname | escape | underline}}
2 |
3 | .. currentmodule:: {{ module }}
4 |
5 | .. add toctree option to make autodoc generate the pages
6 |
7 | .. autoclass:: {{ objname }}
8 |
9 | {% block attributes %}
10 | {% if attributes %}
11 | Attributes table
12 | ~~~~~~~~~~~~~~~~
13 |
14 | .. autosummary::
15 | {% for item in attributes %}
16 | ~{{ name }}.{{ item }}
17 | {%- endfor %}
18 | {% endif %}
19 | {% endblock %}
20 |
21 | {% block methods %}
22 | {% if methods %}
23 | Methods table
24 | ~~~~~~~~~~~~~
25 |
26 | .. autosummary::
27 | {% for item in methods %}
28 | {%- if item != '__init__' %}
29 | ~{{ name }}.{{ item }}
30 | {%- endif -%}
31 | {%- endfor %}
32 | {% endif %}
33 | {% endblock %}
34 |
35 | {% block attributes_documentation %}
36 | {% if attributes %}
37 | Attributes
38 | ~~~~~~~~~~
39 |
40 | {% for item in attributes %}
41 |
42 | .. autoattribute:: {{ [objname, item] | join(".") }}
43 | {%- endfor %}
44 |
45 | {% endif %}
46 | {% endblock %}
47 |
48 | {% block methods_documentation %}
49 | {% if methods %}
50 | Methods
51 | ~~~~~~~
52 |
53 | {% for item in methods %}
54 | {%- if item != '__init__' %}
55 |
56 | .. automethod:: {{ [objname, item] | join(".") }}
57 | {%- endif -%}
58 | {%- endfor %}
59 |
60 | {% endif %}
61 | {% endblock %}
62 |
--------------------------------------------------------------------------------
/docs/api/bm.md:
--------------------------------------------------------------------------------
1 | # Benchmark
2 |
3 |
4 | ## Pipeline
5 | ```{eval-rst}
6 | .. module:: decoupler.bm
7 | .. currentmodule:: decoupler
8 |
9 | .. autosummary::
10 | :toctree: generated
11 |
12 | bm.benchmark
13 | ```
14 |
15 | ## Metrics
16 |
17 | ```{eval-rst}
18 | .. module:: decoupler.bm.metric
19 | .. currentmodule:: decoupler
20 |
21 | .. autosummary::
22 | :toctree: generated
23 |
24 | bm.metric.auc
25 | bm.metric.fscore
26 | bm.metric.qrank
27 | bm.metric.hmean
28 | ```
29 |
30 | ## Plotting
31 |
32 | ```{eval-rst}
33 | .. module:: decoupler.bm.pl
34 | .. currentmodule:: decoupler
35 |
36 | .. autosummary::
37 | :toctree: generated
38 |
39 | bm.pl.auc
40 | bm.pl.fscore
41 | bm.pl.qrank
42 | bm.pl.bar
43 | bm.pl.summary
44 | ```
45 |
--------------------------------------------------------------------------------
/docs/api/ds.md:
--------------------------------------------------------------------------------
1 | # Datasets
2 |
3 | ## Bulk
4 | ```{eval-rst}
5 | .. module:: decoupler.ds
6 | .. currentmodule:: decoupler
7 |
8 | .. autosummary::
9 | :toctree: generated
10 |
11 | ds.hsctgfb
12 | ds.knocktf
13 | ```
14 |
15 | ## Single-cell
16 | ```{eval-rst}
17 | .. autosummary::
18 | :toctree: generated
19 |
20 | ds.covid5k
21 | ds.erygast1k
22 | ds.pbmc3k
23 | ```
24 |
25 | ## Spatial
26 | ```{eval-rst}
27 | .. autosummary::
28 | :toctree: generated
29 |
30 | ds.msvisium
31 | ```
32 |
33 | ## Toy
34 | ```{eval-rst}
35 | .. autosummary::
36 | :toctree: generated
37 |
38 | ds.toy
39 | ds.toy_bench
40 | ```
41 |
42 | ## Utils
43 | ```{eval-rst}
44 | .. autosummary::
45 | :toctree: generated
46 |
47 | ds.ensmbl_to_symbol
48 | ```
49 |
--------------------------------------------------------------------------------
/docs/api/index.md:
--------------------------------------------------------------------------------
1 | # API
2 |
3 | Import decoupler as:
4 |
5 | ```
6 | import decoupler as dc
7 | ```
8 |
9 | ```{toctree}
10 | :maxdepth: 2
11 |
12 | bm
13 | ds
14 | mt
15 | op
16 | pp
17 | ```
18 |
19 | ```{toctree}
20 | :maxdepth: 1
21 |
22 | pl
23 | tl
24 | ```
--------------------------------------------------------------------------------
/docs/api/mt.md:
--------------------------------------------------------------------------------
1 | # Methods
2 |
3 | ## Single methods
4 |
5 | ```{eval-rst}
6 | .. module:: decoupler.mt
7 | .. currentmodule:: decoupler
8 |
9 | .. autosummary::
10 | :toctree: generated
11 |
12 | mt.aucell
13 | mt.gsea
14 | mt.gsva
15 | mt.mdt
16 | mt.mlm
17 | mt.ora
18 | mt.udt
19 | mt.ulm
20 | mt.viper
21 | mt.waggr
22 | mt.zscore
23 | ```
24 |
25 | ## Multiple methods
26 |
27 | ```{eval-rst}
28 | .. autosummary::
29 | :toctree: generated
30 |
31 | mt.decouple
32 | mt.consensus
33 | ```
--------------------------------------------------------------------------------
/docs/api/op.md:
--------------------------------------------------------------------------------
1 | # OmniPath
2 |
3 | ## Resources
4 |
5 | ```{eval-rst}
6 | .. module:: decoupler.op
7 | .. currentmodule:: decoupler
8 |
9 | .. autosummary::
10 | :toctree: generated
11 |
12 | op.collectri
13 | op.dorothea
14 | op.hallmark
15 | op.progeny
16 | op.resource
17 | ```
18 |
19 | ## Utils
20 |
21 | ```{eval-rst}
22 | .. autosummary::
23 | :toctree: generated
24 |
25 | op.show_resources
26 | op.show_organisms
27 | op.translate
28 | ```
--------------------------------------------------------------------------------
/docs/api/pl.md:
--------------------------------------------------------------------------------
1 | # Plotting
2 |
3 | ```{eval-rst}
4 | .. module:: decoupler.pl
5 | .. currentmodule:: decoupler
6 |
7 | .. autosummary::
8 | :toctree: generated
9 |
10 | pl.barplot
11 | pl.dotplot
12 | pl.filter_by_expr
13 | pl.filter_by_prop
14 | pl.filter_samples
15 | pl.leading_edge
16 | pl.network
17 | pl.obsbar
18 | pl.obsm
19 | pl.order_targets
20 | pl.order
21 | pl.source_targets
22 | pl.volcano
23 | ```
--------------------------------------------------------------------------------
/docs/api/pp.md:
--------------------------------------------------------------------------------
1 | # Preprocessing
2 |
3 | ## Data
4 | ```{eval-rst}
5 | .. module:: decoupler.pp
6 | .. currentmodule:: decoupler
7 |
8 | .. autosummary::
9 | :toctree: generated
10 |
11 | pp.extract
12 | ```
13 |
14 | ## Network
15 | ```{eval-rst}
16 | .. autosummary::
17 | :toctree: generated
18 |
19 | pp.read_gmt
20 | pp.prune
21 | pp.adjmat
22 | pp.idxmat
23 | pp.shuffle_net
24 | pp.net_corr
25 | ```
26 |
27 | ## AnnData
28 | ```{eval-rst}
29 | .. autosummary::
30 | :toctree: generated
31 |
32 | pp.get_obsm
33 | pp.swap_layer
34 | pp.pseudobulk
35 | pp.filter_samples
36 | pp.filter_by_expr
37 | pp.filter_by_prop
38 | pp.knn
39 | pp.bin_order
40 | ```
41 |
--------------------------------------------------------------------------------
/docs/api/tl.md:
--------------------------------------------------------------------------------
1 | # Tools
2 |
3 | ```{eval-rst}
4 | .. module:: decoupler.tl
5 | .. currentmodule:: decoupler
6 |
7 | .. autosummary::
8 | :toctree: generated
9 |
10 | tl.rankby_group
11 | tl.rankby_obsm
12 | tl.rankby_order
13 | ```
14 |
--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | ```{include} ../CHANGELOG.md
2 |
3 | ```
4 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 |
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 | import sys
9 | from datetime import datetime
10 | from importlib.metadata import metadata
11 | from pathlib import Path
12 |
13 | HERE = Path(__file__).parent
14 | sys.path.insert(0, str(HERE / "extensions"))
15 |
16 |
17 | # -- Project information -----------------------------------------------------
18 |
19 | # NOTE: If you installed your project in editable mode, this might be stale.
20 | # If this is the case, reinstall it to refresh the metadata
21 | info = metadata("decoupler")
22 | project_name = info["Name"]
23 | author = info["Author"]
24 | copyright = f"{datetime.now():%Y}, {author}."
25 | version = info["Version"]
26 | urls = dict(pu.split(", ") for pu in info.get_all("Project-URL"))
27 | repository_url = urls["Source"]
28 |
29 | # The full version, including alpha/beta/rc tags
30 | release = info["Version"]
31 |
32 | bibtex_bibfiles = ["references.bib"]
33 | templates_path = ["_templates"]
34 | nitpicky = True # Warn about broken links
35 | needs_sphinx = "4.0"
36 |
37 | html_context = {
38 | "display_github": True, # Integrate GitHub
39 | "github_user": "PauBadiaM",
40 | "github_repo": project_name,
41 | "github_version": "main",
42 | "conf_py_path": "/docs/",
43 | }
44 |
45 | # -- General configuration ---------------------------------------------------
46 |
47 | # Add any Sphinx extension module names here, as strings.
48 | # They can be extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
49 | extensions = [
50 | "myst_nb",
51 | "sphinx_copybutton",
52 | "sphinx.ext.autodoc",
53 | "sphinx.ext.intersphinx",
54 | "sphinx.ext.autosummary",
55 | "sphinx.ext.napoleon",
56 | "sphinxcontrib.bibtex",
57 | "sphinx_autodoc_typehints",
58 | "sphinx_tabs.tabs",
59 | "sphinx.ext.mathjax",
60 | "IPython.sphinxext.ipython_console_highlighting",
61 | "sphinxext.opengraph",
62 | *[p.stem for p in (HERE / "extensions").glob("*.py")],
63 | ]
64 |
65 | autosummary_generate = True
66 | autodoc_member_order = "groupwise"
67 | default_role = "literal"
68 | napoleon_google_docstring = False
69 | napoleon_numpy_docstring = True
70 | napoleon_include_init_with_doc = False
71 | napoleon_use_rtype = True # having a separate entry generally helps readability
72 | napoleon_use_param = True
73 | myst_heading_anchors = 6 # create anchors for h1-h6
74 | myst_enable_extensions = [
75 | "amsmath",
76 | "colon_fence",
77 | "deflist",
78 | "dollarmath",
79 | "html_image",
80 | "html_admonition",
81 | ]
82 | myst_url_schemes = ("http", "https", "mailto")
83 | nb_output_stderr = "remove"
84 | nb_execution_mode = "off"
85 | nb_merge_streams = True
86 | typehints_defaults = "braces"
87 |
88 | source_suffix = {
89 | ".rst": "restructuredtext",
90 | ".ipynb": "myst-nb",
91 | ".myst": "myst-nb",
92 | }
93 |
94 | intersphinx_mapping = {
95 | "python": ("https://docs.python.org/3", None),
96 | "anndata": ("https://anndata.readthedocs.io/en/stable/", None),
97 | "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
98 | "numpy": ("https://numpy.org/doc/stable/", None),
99 | "matplotlib": ("https://matplotlib.org/stable/", None),
100 | 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
101 | }
102 |
103 | # List of patterns, relative to source directory, that match files and
104 | # directories to ignore when looking for source files.
105 | # This pattern also affects html_static_path and html_extra_path.
106 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
107 |
108 |
109 | # -- Options for HTML output -------------------------------------------------
110 |
111 | # The theme to use for HTML and HTML Help pages. See the documentation for
112 | # a list of builtin themes.
113 | #
114 | html_theme = "sphinx_book_theme"
115 | html_static_path = ["_static"]
116 | html_css_files = ["css/custom.css"]
117 | html_title = project_name
118 | html_logo = '_static/images/logo.png'
119 | html_favicon = '_static/images/logo.png'
120 |
121 | html_theme_options = {
122 | "repository_url": repository_url,
123 | "use_repository_button": True,
124 | "path_to_docs": "docs/",
125 | "navigation_with_keys": False,
126 | }
127 |
128 | pygments_style = "default"
129 |
130 | nitpick_ignore = [
131 | # If building the documentation fails because of a missing link that is outside your control,
132 | # you can add an exception to this list.
133 | # ("py:class", "igraph.Graph"),
134 | ]
135 |
--------------------------------------------------------------------------------
/docs/extensions/typed_returns.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/theislab/scanpy/blob/master/docs/extensions/typed_returns.py
2 | # with some minor adjustment
3 | from __future__ import annotations
4 |
5 | import re
6 | from collections.abc import Generator, Iterable
7 |
8 | from sphinx.application import Sphinx
9 | from sphinx.ext.napoleon import NumpyDocstring
10 |
11 |
12 | def _process_return(lines: Iterable[str]) -> Generator[str, None, None]:
13 | for line in lines:
14 | if m := re.fullmatch(r"(?P\w+)\s+:\s+(?P[\w.]+)", line):
15 | yield f"-{m['param']} (:class:`~{m['type']}`)"
16 | else:
17 | yield line
18 |
19 |
20 | def _parse_returns_section(self: NumpyDocstring, section: str) -> list[str]:
21 | lines_raw = self._dedent(self._consume_to_next_section())
22 | if lines_raw[0] == ":":
23 | del lines_raw[0]
24 | lines = self._format_block(":returns: ", list(_process_return(lines_raw)))
25 | if lines and lines[-1]:
26 | lines.append("")
27 | return lines
28 |
29 |
30 | def setup(app: Sphinx):
31 | """Set app."""
32 | NumpyDocstring._parse_returns_section = _parse_returns_section
33 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ```{include} ../README.md
2 |
3 | ```
4 |
5 | ```{toctree}
6 | :maxdepth: 1
7 | :hidden: true
8 |
9 | api/index
10 | notebooks/index
11 | changelog
12 | references
13 | ```
14 |
--------------------------------------------------------------------------------
/docs/notebooks/bench/index.md:
--------------------------------------------------------------------------------
1 | # Benchmark
2 | ```{toctree}
3 | :maxdepth: 1
4 |
5 | rna
6 | ```
7 |
--------------------------------------------------------------------------------
/docs/notebooks/bulk/index.md:
--------------------------------------------------------------------------------
1 | # Bulk
2 | ```{toctree}
3 | :maxdepth: 1
4 |
5 | rna
6 | ```
7 |
--------------------------------------------------------------------------------
/docs/notebooks/index.md:
--------------------------------------------------------------------------------
1 | # Tutorials
2 | ```{toctree}
3 | :maxdepth: 1
4 |
5 | example
6 | scell/index
7 | spatial/index
8 | bulk/index
9 | omnipath/index
10 | bench/index
11 | ```
12 |
--------------------------------------------------------------------------------
/docs/notebooks/omnipath/index.md:
--------------------------------------------------------------------------------
1 | # OmniPath
2 | ```{toctree}
3 | :maxdepth: 1
4 |
5 | licenses
6 | orthologs
7 | ```
8 |
--------------------------------------------------------------------------------
/docs/notebooks/scell/index.md:
--------------------------------------------------------------------------------
1 | # Single-cell
2 | ```{toctree}
3 | :maxdepth: 1
4 |
5 | rna_sc
6 | rna_psbk
7 | rna_pstime
8 | ```
9 |
--------------------------------------------------------------------------------
/docs/notebooks/spatial/index.md:
--------------------------------------------------------------------------------
1 | # Spatial
2 | ```{toctree}
3 | :maxdepth: 1
4 |
5 | rna_visium
6 | ```
7 |
--------------------------------------------------------------------------------
/docs/references.md:
--------------------------------------------------------------------------------
1 | # References
2 |
3 | ```{bibliography}
4 | :cited:
5 | ```
6 |
--------------------------------------------------------------------------------
/src/decoupler/_Method.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 | import textwrap
3 |
4 | import pandas as pd
5 |
6 | from decoupler._docs import docs
7 | from decoupler._datatype import DataType
8 | from decoupler.mt._run import _run
9 |
10 |
11 | class MethodMeta:
12 | def __init__(
13 | self,
14 | name: str,
15 | desc: str,
16 | func: Callable,
17 | stype: str,
18 | adj: bool,
19 | weight: bool,
20 | test: bool,
21 | limits: tuple,
22 | reference: str,
23 | ):
24 | self.name = name
25 | self.desc = desc
26 | self.func = func
27 | self.stype = stype
28 | self.adj = adj
29 | self.weight = weight
30 | self.test = test
31 | self.limits = limits
32 | self.reference = reference
33 |
34 | def meta(self) -> pd.DataFrame:
35 | meta = pd.DataFrame([{
36 | 'name': self.name,
37 | 'desc': self.desc,
38 | 'stype': self.stype,
39 | 'weight': self.weight,
40 | 'test': self.test,
41 | 'limits': self.limits,
42 | 'reference': self.reference
43 | }])
44 | return meta
45 |
46 |
47 | #@docs.dedent
48 | class Method(MethodMeta):
49 | def __init__(
50 | self,
51 | _method: MethodMeta,
52 | ):
53 | super().__init__(
54 | name=_method.name,
55 | desc=_method.desc,
56 | func=_method.func,
57 | stype=_method.stype,
58 | adj=_method.adj,
59 | weight=_method.weight,
60 | test=_method.test,
61 | limits=_method.limits,
62 | reference=_method.reference,
63 | )
64 | self._method = _method
65 | self.__doc__ = self.func.__doc__
66 |
67 | def __call__(
68 | self,
69 | data: DataType,
70 | net: pd.DataFrame,
71 | tmin: int | float = 5,
72 | raw: bool = False,
73 | empty: bool = True,
74 | bsize: int | float = 250_000,
75 | verbose: bool = False,
76 | **kwargs,
77 | ):
78 | return _run(
79 | name=self.name,
80 | func=self.func,
81 | adj=self.adj,
82 | test=self.test,
83 | data=data,
84 | net=net,
85 | tmin=tmin,
86 | raw=raw,
87 | empty=empty,
88 | bsize=bsize,
89 | verbose=verbose,
90 | **kwargs,
91 | )
92 |
93 |
94 | def _show_methods(methods):
95 | return pd.concat([method.meta() for method in methods]).reset_index(drop=True)
96 |
--------------------------------------------------------------------------------
/src/decoupler/_Plotter.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.axes._axes import Axes
3 | from matplotlib.figure import Figure
4 |
5 | from decoupler._docs import docs
6 |
7 |
8 | class Plotter:
9 | @docs.dedent
10 | def __init__(
11 | self,
12 | ax: Axes | None = None,
13 | figsize: tuple | None = (4, 3),
14 | dpi: int = 100,
15 | return_fig: bool = False,
16 | save: str | None = None,
17 | ) -> Figure | None:
18 | """
19 | Base class for plotters.
20 |
21 | Parameters
22 | ----------
23 | %(plot)s
24 | """
25 | # Validate
26 | assert isinstance(ax, Axes) or ax is None, \
27 | 'ax must be matplotlib.axes._axes.Axes or None'
28 | assert isinstance(figsize, tuple), \
29 | 'figsize must be tuple'
30 | assert isinstance(dpi, (int, float)) and dpi > 0, \
31 | 'dpi must be numerical and > 0'
32 | assert isinstance(return_fig, bool), \
33 | 'return_fig must be bool'
34 | assert isinstance(save, str) or save is None, \
35 | 'save must be str or None'
36 | self.ax = ax
37 | self.figsize = figsize
38 | self.dpi = dpi
39 | self.return_fig = return_fig
40 | self.save = save
41 | if self.ax is None:
42 | self.fig, self.ax = plt.subplots(1, 1, figsize=self.figsize, dpi=self.dpi, tight_layout=True)
43 | else:
44 | self.fig = self.ax.figure
45 |
46 | def _return(self):
47 | if self.save is not None:
48 | self.fig.savefig(self.save, bbox_inches='tight')
49 | if self.return_fig:
50 | return self.fig
51 |
--------------------------------------------------------------------------------
/src/decoupler/__init__.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import version
2 |
3 | from . import bm, ds, mt, op, pl, pp, tl
4 |
5 | __all__ = ['bm', 'ds', 'mt', 'op', 'pl', 'pp', 'tl']
6 |
7 | __version__ = version('decoupler')
8 |
--------------------------------------------------------------------------------
/src/decoupler/_datatype.py:
--------------------------------------------------------------------------------
1 | from typing import Union, Tuple
2 |
3 | from anndata import AnnData
4 | import pandas as pd
5 | import numpy as np
6 |
7 |
8 | DataType = Union[
9 | AnnData,
10 | pd.DataFrame,
11 | Tuple[np.ndarray, np.ndarray, np.ndarray],
12 | ]
13 |
--------------------------------------------------------------------------------
/src/decoupler/_download.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from tqdm import tqdm
3 | import pandas as pd
4 | import io
5 |
6 | from decoupler._log import _log
7 |
8 | URL_DBS = 'https://omnipathdb.org/annotations?databases='
9 | URL_INT = 'https://omnipathdb.org/interactions/?genesymbols=1&'
10 |
11 | def _download(
12 | url: str,
13 | verbose: bool = False,
14 | **kwargs,
15 | ) -> pd.DataFrame:
16 | assert isinstance(url, str), 'url must be str'
17 | # Download with progress bar
18 | m = f'Downloading {url}'
19 | _log(m, level='info', verbose=verbose)
20 | chunks = []
21 | with requests.get(url, stream=True) as r:
22 | r.raise_for_status()
23 | with tqdm(unit='B', unit_scale=True, desc="Progress", disable=not verbose) as pbar:
24 | for chunk in r.iter_content(chunk_size=8192):
25 | if chunk:
26 | chunks.append(chunk)
27 | pbar.update(len(chunk))
28 | # Read into pandas
29 | data = io.BytesIO(b"".join(chunks))
30 | df = pd.read_csv(data, **kwargs)
31 | m = f'Download finished'
32 | _log(m, level='info', verbose=verbose)
33 | return df
34 |
--------------------------------------------------------------------------------
/src/decoupler/_log.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | logging.basicConfig(
4 | level=logging.INFO,
5 | format="%(asctime)s | [%(levelname)s] %(message)s",
6 | datefmt="%Y-%m-%d %H:%M:%S"
7 | )
8 |
9 | def _log(
10 | message: str,
11 | level: str = 'info',
12 | verbose: bool = False
13 | ) -> None:
14 | """
15 | Log a message with a specified logging level.
16 |
17 | Parameters
18 | ----------
19 | message
20 | The message to log.
21 | level
22 | The logging level.
23 | verbose
24 | Whether to emit the log.
25 | """
26 | level = level.lower()
27 | if verbose:
28 | if level == "warn":
29 | logging.warning(message)
30 | elif level == "info":
31 | logging.info(message)
32 |
--------------------------------------------------------------------------------
/src/decoupler/_odeps.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | import types
3 | from typing import TYPE_CHECKING
4 |
5 |
6 | def _try_import(
7 | name: str
8 | ) -> types.ModuleType | None:
9 | try:
10 | with warnings.catch_warnings():
11 | warnings.filterwarnings("ignore", category=FutureWarning, module=name)
12 | module = __import__(name, fromlist=[""])
13 | return module
14 | except ImportError:
15 | return None
16 |
17 |
18 | def _check_import(
19 | module: types.ModuleType
20 | ) -> None:
21 | if module is None:
22 | name = module.__name__
23 | raise ImportError(
24 | f"{name} is not installed. Please install it using:\n"
25 | f" pip install {name}"
26 | "or install decoupler with full dependencies:\n"
27 | " pip install 'decoupler[full]'"
28 | )
29 |
30 |
31 | # Handle optional dependencies
32 | ig = _try_import("igraph")
33 | if ig is not None:
34 | if TYPE_CHECKING:
35 | from igraph import Graph
36 | else:
37 | Graph = ig.Graph
38 | else:
39 | if TYPE_CHECKING:
40 | from typing import Any as Graph
41 | else:
42 | Graph = None
43 |
44 | xgboost = _try_import("xgboost")
45 | dcor = _try_import("dcor")
46 |
--------------------------------------------------------------------------------
/src/decoupler/bm/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.bm._run import benchmark
2 | from decoupler.bm import metric
3 | from decoupler.bm import pl
4 |
--------------------------------------------------------------------------------
/src/decoupler/bm/_pp.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import pandas as pd
4 | import numpy as np
5 | import scipy.sparse as sps
6 | from anndata import AnnData
7 |
8 | from decoupler._log import _log
9 | from decoupler.pp.net import prune
10 |
11 |
12 | def _validate_groupby(
13 | obs: pd.DataFrame,
14 | groupby: str | list | None,
15 | runby: str,
16 | ) -> None | list:
17 | assert isinstance(groupby, (str, list)) or groupby is None, \
18 | 'groupby must be str, list or None'
19 | assert isinstance(runby, str) and runby in ['expr', 'source'], \
20 | 'runby must be str and either expr or source'
21 | if groupby is not None:
22 | if type(groupby) is str:
23 | groupby = [groupby]
24 | for grp_i in groupby:
25 | if type(grp_i) is str:
26 | grp_i = [grp_i]
27 | # For each group inside each groupby
28 | for grp_j in grp_i:
29 | assert not ('source' == grp_j and runby == 'source'), \
30 | f'source cannot be in groupby if runby="source"'
31 | # Assert that columns exist in obs
32 | assert grp_j in obs.columns, \
33 | f'Column name "{grp_j}" must be in adata.obs.columns'
34 | # Assert that column doesn't contain "|"
35 | assert '|' not in grp_j, \
36 | 'Column names must not contain the \"|\" character'
37 | return groupby
38 |
39 |
40 | def _validate_obs(
41 | obs: pd.DataFrame,
42 | ) -> None:
43 | assert 'source' in obs.columns, \
44 | 'source must be in adata.obs.columns'
45 | assert 'type_p' in obs.columns, \
46 | 'type_p must be in adata.obs.columns'
47 | assert pd.api.types.is_numeric_dtype(obs['type_p']), \
48 | 'type_p must contain numeric values'
49 | assert np.isin(obs['type_p'].sort_values().unique(), np.array([-1, 1])).all(), \
50 | 'type_p must be -1 or +1'
51 |
52 |
53 | def _filter(
54 | adata: AnnData,
55 | net: pd.DataFrame,
56 | sfilt: bool,
57 | verbose: bool,
58 | ) -> Tuple[AnnData, pd.DataFrame]:
59 | # Remove experiments without sources in net
60 | srcs = net['source'].unique()
61 | prts = set()
62 | msk_exp = np.zeros(adata.obs_names.size, dtype=np.bool_)
63 | for i, src in enumerate(adata.obs['source']):
64 | if isinstance(src, list):
65 | prts.update(src)
66 | if np.isin(src, srcs).any():
67 | msk_exp[i] = True
68 | elif isinstance(src, str):
69 | prts.add(src)
70 | if src in srcs:
71 | msk_exp[i] = True
72 | n_exp = adata.shape[0]
73 | m = f'benchmark - found {len(prts)} unique perturbed sources across {n_exp} experiments'
74 | _log(m, level='info', verbose=verbose)
75 | r_exp = int((~msk_exp).sum())
76 | m = f'benchmark - removing {r_exp} experiments out of {n_exp} without sources in net'
77 | _log(m, level='info', verbose=verbose)
78 | adata = adata[msk_exp, :].copy()
79 | # Remove sources without experiments in obs
80 | if sfilt:
81 | msk_src = np.array([s in prts for s in net['source']])
82 | rsrc = net.loc[~msk_src].groupby('source').size().index.size
83 | m = f'benchmark - removing {rsrc} sources out of {srcs.size} without experiments in obs'
84 | _log(m, level='info', verbose=verbose)
85 | net = net.loc[msk_src, :]
86 | adata.uns['p_sources'] = prts
87 | return adata, net
88 |
89 |
90 | def _sign(
91 | adata: AnnData,
92 | ) -> None:
93 | v_sign = adata.obs['type_p'].values.reshape(-1, 1)
94 | if sps.issparse(adata.X):
95 | adata.layers['tmp'] = adata.X.multiply(v_sign).tocsr()
96 | else:
97 | adata.layers['tmp'] = adata.X * v_sign
98 |
99 |
100 | def _validate_bool(
101 | y_true: np.ndarray,
102 | y_score: np.ndarray,
103 | ) -> None:
104 | assert isinstance(y_true, np.ndarray), 'y_true must be numpy.ndarray'
105 | assert isinstance(y_score, np.ndarray), 'y_score must be numpy.ndarray'
106 | unq = np.sort(np.unique(y_true))
107 | m = 'y_true must contain two binary classes, 0 and 1'
108 | assert unq.size <= 2, m
109 | lbl = np.array([0, 1])
110 | assert np.all(unq == lbl), m
111 | assert y_true.size == y_score.size, \
112 | 'y_true and y_score must have the same size'
113 |
--------------------------------------------------------------------------------
/src/decoupler/bm/metric/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.bm.metric._auc import auc
2 | from decoupler.bm.metric._fscore import fscore
3 | from decoupler.bm.metric._qrank import qrank
4 | from decoupler.bm.metric._hmean import hmean
5 |
6 | dict_metric = {
7 | 'auc': auc,
8 | 'fscore': fscore,
9 | 'qrank': qrank,
10 | }
11 |
--------------------------------------------------------------------------------
/src/decoupler/bm/metric/_auc.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import numpy as np
4 |
5 | from decoupler.bm._pp import _validate_bool
6 |
7 |
8 | def _binary_clf_curve(
9 | y_true: np.ndarray,
10 | y_score: np.ndarray,
11 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
12 | # Sort scores
13 | idx = np.flip(np.argsort(y_score))
14 | y_score = y_score[idx]
15 | y_true = y_true[idx]
16 | # Find unique value idxs
17 | idx = np.where(np.diff(y_score))[0]
18 | # Append a value for the end of the curve
19 | idx = np.append(idx, y_true.size - 1)
20 | # Acucmulate TP with decreasing threshold
21 | tps = np.cumsum(y_true)[idx]
22 | fps = 1 + idx - tps
23 | return fps, tps, y_score[idx]
24 |
25 |
26 | def auroc(
27 | y_true: np.ndarray,
28 | y_score: np.ndarray,
29 | ) -> float:
30 | _validate_bool(y_true=y_true, y_score=y_score)
31 | # Compute binary curve
32 | fps, tps, thr = _binary_clf_curve(y_true, y_score)
33 | # Add limits
34 | fps = np.append(0., fps)
35 | tps = np.append(0., tps)
36 | thr = np.append(thr[0] + 1., thr)
37 | # Compute ratios
38 | fpr = fps / fps[-1]
39 | tpr = tps / tps[-1]
40 | # Compute auc
41 | dx = np.diff(np.ascontiguousarray(fpr))
42 | # Get direction slope
43 | if np.all(dx <= 0):
44 | d = -1.
45 | else:
46 | d = 1.
47 | # Compute area
48 | ret = np.sum((dx * (tpr[1:] + tpr[:-1]) / 2.0))
49 | auc = d * ret
50 | return auc
51 |
52 |
53 | def auprc(
54 | y_true: np.ndarray,
55 | y_score: np.ndarray,
56 | pi0: float = 0.5
57 | ) -> float:
58 | _validate_bool(y_true=y_true, y_score=y_score)
59 | assert isinstance(pi0, (int, float)) and 0. <= pi0 <= 1., \
60 | 'pi0 must be numeric and between 0 and 1'
61 | # Compute binary curve
62 | fps, tps, thr = _binary_clf_curve(y_true, y_score)
63 | # Compute prc
64 | ps = tps + fps
65 | msk = ps != 0
66 | # Siblini W., Fréry J., He-Guelton L., Oblé F., Wang YQ. (2020) Master
67 | # Your Metrics with Calibration. In: Berthold M., Feelders A., Krempl G.
68 | # (eds) Advances in Intelligent Data Analysis XVIII. IDA 2020. Lecture
69 | # Notes in Computer Science, vol 12080. Springer, Cham
70 | pi = np.sum(y_true) / y_true.size
71 | ratio = pi * (1 - pi0) / (pi0 * (1 - pi))
72 | prc = tps[msk] / (tps[msk] + ratio * fps[msk])
73 | # Compute rcl
74 | rcl = tps / tps[-1]
75 | # Flip and add limits
76 | prc = np.append(np.flip(prc), 1)
77 | rcl = np.append(np.flip(rcl), 0)
78 | thr = np.flip(thr)
79 | dx = np.diff(np.ascontiguousarray(rcl))
80 | auc = -np.sum(dx * prc[:-1])
81 | return auc
82 |
83 |
84 | def auc(
85 | y_true: np.ndarray,
86 | y_score: np.ndarray,
87 | pi0: float = 0.5,
88 | ) -> Tuple[float, float]:
89 | """
90 | Area Under the Curve.
91 | """
92 | # Normalize to make comparable
93 | norm = np.nanmax(np.abs(y_score), axis=1)
94 | msk = norm == 0.
95 | norm[msk] = 1.
96 | y_score = y_score / norm.reshape(-1, 1)
97 | assert ((-1. <= y_score) & (y_score <= 1.)).all()
98 | # Flatten and remove nans
99 | y_true, y_score = y_true.ravel(), y_score.ravel()
100 | msk_nan = ~np.isnan(y_score)
101 | y_true, y_score = y_true[msk_nan], y_score[msk_nan]
102 | auc_roc = auroc(y_true=y_true, y_score=y_score)
103 | auc_prc = auprc(y_true=y_true, y_score=y_score, pi0=pi0)
104 | return auc_roc, auc_prc
105 |
106 | auc.scores = ['auroc', 'auprc']
107 |
--------------------------------------------------------------------------------
/src/decoupler/bm/metric/_fscore.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import numpy as np
4 |
5 | from decoupler.bm._pp import _validate_bool
6 |
7 |
8 | def fscore(
9 | y_true: np.ndarray,
10 | y_score: np.ndarray,
11 | ) -> Tuple[float, float, float]:
12 | """
13 | F-beta score
14 | """
15 | # Validate
16 | _validate_bool(y_true=y_true, y_score=y_score)
17 | assert y_score.dtype == np.bool_, \
18 | 'y_score must be bool numpy.ndarray'
19 | y_true = y_true.astype(np.bool_)
20 | # Compute
21 | tp = np.sum(y_true * y_score)
22 | fp = np.sum((~y_true) * y_score)
23 | fn = np.sum(y_true * (~y_score))
24 | if tp > 0:
25 | prc = tp / (tp + fp)
26 | rcl = tp / (tp + fn)
27 | else:
28 | prc = 0.
29 | rcl = 0.
30 | return prc, rcl
31 |
32 | fscore.scores = ['precision', 'recall']
33 |
--------------------------------------------------------------------------------
/src/decoupler/bm/metric/_hmean.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | from decoupler._docs import docs
5 | from decoupler.bm.pl._format import _format
6 |
7 |
8 | def _hmean(
9 | x: float | int,
10 | y: float | int,
11 | beta: float | int = 1,
12 | ) -> float:
13 | assert isinstance(beta, (int, float)) and 0 < beta, \
14 | 'beta must be numeric and > 0'
15 | h = np.zeros(len(x))
16 | msk = (x != 0.) & (y != 0.)
17 | h[msk] = (1 + beta**2) * (x[msk] * y[msk]) / ((x[msk] * beta**2) + y[msk])
18 | return h
19 |
20 |
21 | @docs.dedent
22 | def hmean(
23 | df: pd.DataFrame,
24 | metrics: str | list = ['auc', 'fscore', 'qrank'],
25 | beta: int | float = 0.5,
26 | ) -> pd.DataFrame:
27 | """
28 | Computes the harmonic mean between two metric statistics.
29 |
30 | Parameters
31 | ----------
32 | %(df)s
33 | metrics
34 | Metrics which to compute the harmonic mean between their own statistics.
35 | beta
36 | Controls the balance between statistics, where beta > 1 favors the first one (for example recall),
37 | beta < 1 the other one (for example precision), and beta = 1 gives equal weight to both.
38 |
39 | Returns
40 | -------
41 | Dataframe containing the harmonic mean per metric.
42 | """
43 | # Validate
44 | assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame'
45 | assert isinstance(metrics, (str, list)), 'metrics must be str or list'
46 | if isinstance(metrics, str):
47 | metrics = [metrics]
48 | # Run
49 | d_metrics = {
50 | 'auc': {
51 | 'name': 'H(auroc, auprc)',
52 | 'cols': ['auprc', 'auroc'],
53 | },
54 | 'fscore': {
55 | 'name': 'F-score',
56 | 'cols': ['precision', 'recall'],
57 | },
58 | 'qrank': {
59 | 'name': 'H(1-qrank, -log10(pval))',
60 | 'cols': ['-log10(pval)', '1-qrank'],
61 | },
62 | }
63 | hdf = []
64 | h_cols = []
65 | for i, metric in enumerate(metrics):
66 | # Format
67 | cols = d_metrics[metric]['cols']
68 | tmp = _format(df=df, cols=cols)
69 | # Compute harmonic mean
70 | name = d_metrics[metric]['name']
71 | tmp[name] = _hmean(tmp[cols[0]], tmp[cols[1]], beta=beta)
72 | if i == 0:
73 | hdf.append(tmp)
74 | else:
75 | hdf.append(tmp[cols + [name]])
76 | h_cols.append(name)
77 | hdf = pd.concat(hdf, axis=1)
78 | # Mean qrank (final score)
79 | hdf['score'] = hdf[h_cols].mean(axis=1, numeric_only=True)
80 | hdf['score'] = (hdf['score'] - hdf['score'].min()) / (hdf['score'].max() - hdf['score'].min())
81 | return hdf
82 |
--------------------------------------------------------------------------------
/src/decoupler/bm/metric/_qrank.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import numpy as np
4 | import scipy.stats as sts
5 |
6 | from decoupler.bm._pp import _validate_bool
7 |
8 |
9 | def qrank(
10 | y_true: np.ndarray,
11 | y_score: np.ndarray,
12 | ) -> Tuple[float, float]:
13 | """
14 | 1 - quantile normalized rank
15 | """
16 | _validate_bool(y_true=y_true, y_score=y_score)
17 | y_rank = sts.rankdata(y_score, axis=1, nan_policy='omit', method='average')
18 | y_rank = y_rank / np.sum(~np.isnan(y_rank), axis=1).reshape(-1, 1)
19 | msk = y_true.astype(np.bool_)
20 | score = y_rank[msk]
21 | rest = y_rank[~msk]
22 | _, pval = sts.ranksums(score, rest, alternative='greater')
23 | score = np.nanmean(score)
24 | return score, -np.log10(pval)
25 |
26 | qrank.scores = ['1-qrank', '-log10(pval)']
27 |
--------------------------------------------------------------------------------
/src/decoupler/bm/pl/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.bm.pl._auc import auc
2 | from decoupler.bm.pl._fscore import fscore
3 | from decoupler.bm.pl._qrank import qrank
4 | from decoupler.bm.pl._bar import bar
5 | from decoupler.bm.pl._summary import summary
6 |
--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_auc.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import seaborn as sns
3 | from matplotlib.figure import Figure
4 |
5 | from decoupler._docs import docs
6 | from decoupler._Plotter import Plotter
7 | from decoupler.bm.pl._format import _format
8 |
9 |
10 | @docs.dedent
11 | def auc(
12 | df: pd.DataFrame,
13 | hue: str | None = None,
14 | palette: str = 'tab20',
15 | thr_auroc: float = 0.5,
16 | thr_auprc: float = 0.5,
17 | **kwargs
18 | ) -> None | Figure:
19 | """
20 | Plot auroc and auprc.
21 |
22 | x-axis represent the auroc calculated by ranking all obtained enrichment scores, calculating different class thresholds
23 | and finally obtaining the area under the curve.
24 | The higher value the better performance is.
25 |
26 | y-axis represent the auprc calculated by ranking all obtained enrichment scores, calculating different class thresholds
27 | and finally obtaining the area under the curve.
28 | The higher value the better performance is.
29 |
30 | Parameters
31 | ----------
32 | %(df)s
33 | %(hue)s
34 | %(palette)s
35 | thr_auroc
36 | Dashed line to indicate baseline of auroc.
37 | thr_auprc
38 | Dashed line to indicate baseline of auprc.
39 | %(plot)s
40 | """
41 | # Validate
42 | assert isinstance(hue, str) or hue is None, 'hue must be str or None'
43 | assert isinstance(thr_auroc, float) and 0. <= thr_auroc <= 1., \
44 | 'thr_auroc must be float and between 0 and 1'
45 | assert isinstance(thr_auprc, float) and 0. <= thr_auprc <= 1., \
46 | 'thr_auprc must be float and between 0 and 1'
47 | # Format
48 | tmp = _format(df=df, cols=['auroc', 'auprc'])
49 | # Instance
50 | bp = Plotter(**kwargs)
51 | # Plot
52 | if hue is not None:
53 | sns.scatterplot(
54 | data=tmp,
55 | x='auroc',
56 | y='auprc',
57 | hue=hue,
58 | ax=bp.ax,
59 | palette=palette,
60 | )
61 | else:
62 | sns.scatterplot(
63 | data=tmp,
64 | x='auroc',
65 | y='auprc',
66 | ax=bp.ax,
67 | )
68 | bp.ax.axvline(x=thr_auroc, ls='--', c='black', zorder=0)
69 | bp.ax.axhline(y=thr_auprc, ls='--', c='black', zorder=0)
70 | if hue is not None:
71 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
72 | return bp._return()
73 |
--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_bar.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import seaborn as sns
3 | from matplotlib.figure import Figure
4 |
5 | from decoupler._docs import docs
6 | from decoupler._Plotter import Plotter
7 | from decoupler.bm.pl._format import _format
8 |
9 |
10 | @docs.dedent
11 | def bar(
12 | df: pd.DataFrame,
13 | x: str,
14 | y: str,
15 | hue: str | None = None,
16 | palette: str = 'tab20',
17 | **kwargs
18 | ) -> None | Figure:
19 | """
20 | Plot the harmonic mean between two metric statistics as a barplot.
21 |
22 | x-axis represent the harmonic mean between metric statistics.
23 |
24 | y-axis represent a grouping variable.
25 |
26 | Parameters
27 | ----------
28 | %(df)s
29 | x
30 | Continous variable to plot on x axis.
31 | %(y)s
32 | %(hue)s
33 | %(palette)s
34 | %(plot)s
35 | """
36 | # Validate
37 | assert isinstance(x, str), 'x must be str'
38 | assert isinstance(y, str), 'y must be str'
39 | assert isinstance(hue, str) or hue is None, 'hue must be str or None'
40 | # Instance
41 | bp = Plotter(**kwargs)
42 | # Plot
43 | order = (
44 | df
45 | .groupby(y)[x]
46 | .mean()
47 | .sort_values(ascending=False)
48 | .index
49 | )
50 | args = dict()
51 | if hue is not None:
52 | args['hue'] = hue
53 | args['palette'] = palette
54 | sns.barplot(
55 | data=df,
56 | y=y,
57 | x=x,
58 | order=order,
59 | **args
60 | )
61 | if hue is not None and hue != y:
62 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
63 | return bp._return()
64 |
--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_format.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 |
5 | def _format(
6 | df: pd.DataFrame,
7 | cols: list,
8 | ) -> pd.DataFrame:
9 | # Validate
10 | assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame'
11 | assert isinstance(cols, list), 'cols must be list'
12 | assert 'metric' in df.columns, 'metric must be in df.columns'
13 | assert 'score' in df.columns, 'score must be in df.columns'
14 | # Extract
15 | tmp = df[df['metric'].isin(cols)].copy()
16 | assert tmp.shape[0] > 0, 'cols must be in df["metric"]'
17 | # Add small variations so not same number
18 | rng = np.random.default_rng(seed=0)
19 | tmp.loc[:, 'score'] = tmp.loc[:, 'score'] + rng.normal(loc=0, scale=2.2e-16, size=tmp.shape[0])
20 | tmp.loc[:, 'score'] = tmp.loc[:, 'score'].clip(lower=0)
21 | # Transform
22 | grp_cols = ['net', 'groupby', 'group', 'source', 'method']
23 | grp_cols = [c for c in grp_cols if c in df.columns]
24 | tmp = (
25 | tmp
26 | .pivot(index=grp_cols, columns='metric', values='score')
27 | .reset_index()
28 | ).dropna(axis=1)
29 | if np.all(np.isin(['groupby', 'group'], tmp.columns)):
30 | tmp = (
31 | tmp
32 | .pivot(index=['source', 'method'] + cols, columns='groupby', values='group')
33 | .reset_index()
34 | )
35 | # Remove names
36 | tmp.index.name = None
37 | tmp.columns.name = None
38 | return tmp
39 |
--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_fscore.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import seaborn as sns
3 | from matplotlib.figure import Figure
4 |
5 | from decoupler._docs import docs
6 | from decoupler._Plotter import Plotter
7 | from decoupler.bm.pl._format import _format
8 |
9 |
10 | @docs.dedent
11 | def fscore(
12 | df: pd.DataFrame,
13 | hue: str | None = None,
14 | palette: str = 'tab20',
15 | **kwargs
16 | ) -> None | Figure:
17 | """
18 | Plot precision and recall as scatterplot.
19 |
20 | x-axis represent the recall of correctly predicted sources after filtering by significance.
21 | The higher value the better performance is.
22 |
23 | x-axis represent the precision of correctly predicted sources after filtering by significance.
24 | The higher value the better performance is.
25 |
26 | Parameters
27 | ----------
28 | %(df)s
29 | %(hue)s
30 | %(palette)s
31 | %(plot)s
32 | """
33 | # Validate
34 | assert isinstance(hue, str) or hue is None, 'hue must be str or None'
35 | # Format
36 | tmp = _format(df=df, cols=['recall', 'precision'])
37 | # Instance
38 | bp = Plotter(**kwargs)
39 | # Plot
40 | args = dict()
41 | if hue is not None:
42 | args['hue'] = hue
43 | args['palette'] = palette
44 | sns.scatterplot(
45 | data=tmp,
46 | x='recall',
47 | y='precision',
48 | ax=bp.ax,
49 | **args
50 | )
51 | if hue is not None:
52 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
53 | return bp._return()
54 |
--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_qrank.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import seaborn as sns
4 | from matplotlib.figure import Figure
5 |
6 | from decoupler._docs import docs
7 | from decoupler._Plotter import Plotter
8 | from decoupler.bm.pl._format import _format
9 |
10 |
11 | @docs.dedent
12 | def qrank(
13 | df: pd.DataFrame,
14 | hue: str | None = None,
15 | palette: str = 'tab20',
16 | thr_rank: float = 0.5,
17 | thr_pval: float = 0.05,
18 | **kwargs
19 | ) -> None | Figure:
20 | """
21 | Plot 1-qrank and p-value.
22 |
23 | x-axis represent the one minus the quantile normalized ranks for the sources that belong to the ground truth.
24 | The closer to 1 the better performance is.
25 |
26 | y-axis represents the p-value (-log10) obtained after performing a Ranksums test between the quantile normalized
27 | ranks of the sources that belong to the ground truth against the sources that do not.
28 | The higher value the better performance is.
29 |
30 | Parameters
31 | ----------
32 | %(df)s
33 | %(hue)s
34 | %(palette)s
35 | thr_rank
36 | Dashed line to indicate baseline of ranks.
37 | thr_pval
38 | Dashed line to indicate baseline of p-values.
39 | %(plot)s
40 | """
41 | # Validate
42 | assert isinstance(hue, str) or hue is None, 'hue must be str or None'
43 | assert isinstance(thr_rank, float) and 0. <= thr_rank <= 1., \
44 | 'thr_rank must be float and between 0 and 1'
45 | assert isinstance(thr_pval, float) and 0. <= thr_pval <= 1., \
46 | 'thr_pval must be float and between 0 and 1'
47 | # Format
48 | tmp = _format(df=df, cols=['1-qrank', '-log10(pval)'])
49 | # Instance
50 | bp = Plotter(**kwargs)
51 | # Plot
52 | if hue is not None:
53 | sns.scatterplot(
54 | data=tmp,
55 | x='1-qrank',
56 | y='-log10(pval)',
57 | hue=hue,
58 | ax=bp.ax,
59 | palette=palette,
60 | )
61 | else:
62 | sns.scatterplot(
63 | data=tmp,
64 | x='1-qrank',
65 | y='-log10(pval)',
66 | ax=bp.ax,
67 | )
68 | bp.ax.set_xlim(0, 1)
69 | bp.ax.axvline(x=thr_rank, ls='--', c='black', zorder=0)
70 | bp.ax.axhline(y=-np.log10(thr_pval), ls='--', c='black', zorder=0)
71 | bp.ax.set_ylabel(r'$\log_{10}$(pval)')
72 | if hue is not None:
73 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
74 | return bp._return()
75 |
--------------------------------------------------------------------------------
/src/decoupler/ds/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.ds._bulk import hsctgfb, knocktf
2 | from decoupler.ds._scell import pbmc3k, covid5k, erygast1k
3 | from decoupler.ds._spatial import msvisium
4 | from decoupler.ds._toy import toy, toy_bench
5 | from decoupler.ds._utils import ensmbl_to_symbol
6 |
--------------------------------------------------------------------------------
/src/decoupler/ds/_bulk.py:
--------------------------------------------------------------------------------
1 | from anndata import AnnData
2 |
3 | from decoupler._docs import docs
4 | from decoupler._log import _log
5 | from decoupler._download import _download
6 |
7 |
8 | @docs.dedent
9 | def hsctgfb(
10 | verbose: bool = False,
11 | ) -> AnnData:
12 | """
13 | Downloads RNA-seq bulk data consisting of 6 samples of hepatic stellate cells
14 | (HSC) where three of them were activated by the cytokine
15 | Transforming growth factor (TGF-β) :cite:`hsc_tgfb`.
16 |
17 | Parameters
18 | ----------
19 | %(verbose)s
20 |
21 | Returns
22 | -------
23 | AnnData object.
24 | """
25 | # Download
26 | url = (
27 | 'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE151251&format=file&'
28 | 'file=GSE151251%5FHSCs%5FCtrl%2Evs%2EHSCs%5FTGFb%2Ecounts%2Etsv%2Egz'
29 | )
30 | adata = _download(url, compression='gzip', sep='\t', verbose=verbose)
31 | # Transform to AnnData
32 | adata = adata.drop_duplicates('GeneName').set_index('GeneName').iloc[:, 5:].T
33 | adata.columns.name = None
34 | adata = AnnData(adata)
35 | adata.X = adata.X.astype(float)
36 | # Format obs
37 | adata.obs['condition'] = ['control' if '-Ctrl' in sample_id else 'treatment' for sample_id in adata.obs.index]
38 | adata.obs['sample_id'] = [sample_id.split('_')[0] for sample_id in adata.obs.index]
39 | adata.obs['condition'] = adata.obs['condition'].astype('category')
40 | adata.obs['sample_id'] = adata.obs['sample_id'].astype('category')
41 | m = f'generated AnnData with shape={adata.shape}'
42 | _log(m, level='info', verbose=verbose)
43 | return adata
44 |
45 |
46 | @docs.dedent
47 | def knocktf(
48 | thr_fc: int | float | None = -1,
49 | verbose: bool = False,
50 | ) -> AnnData:
51 | """
52 | Downloads gene contrast statistics from KnockTF :cite:`knocktf`,
53 | a large collection of transcription factor (TF) RNA-seq
54 | perturbation experiments.
55 |
56 | The values in ``adata.X`` represent the log2FCs of genes between
57 | perturbed and unperturbed samples.
58 |
59 | It also downloads all metadata associated to each perturbation
60 | experiment, such as which TF was perturbed, or in which tissue.
61 |
62 | Parameters
63 | ----------
64 | %(verbose)s
65 |
66 | Returns
67 | -------
68 | AnnData object.
69 | """
70 | assert isinstance(thr_fc, (int, float)) or thr_fc is None, \
71 | 'thr_fc must be numeric or None'
72 | # Download
73 | url = (
74 | 'https://zenodo.org/record/7035528/'
75 | 'files/knockTF_expr.csv?download=1'
76 | )
77 | adata = _download(url, sep=',', index_col=0, verbose=verbose)
78 | url = (
79 | 'https://zenodo.org/record/7035528/'
80 | 'files/knockTF_meta.csv?download=1'
81 | )
82 | obs = _download(url, sep=',', index_col=0, verbose=verbose)
83 | obs = obs.rename(columns={'TF': 'source'}).assign(type_p=-1)
84 | # Make anndata
85 | adata = AnnData(X=adata, obs=obs)
86 | # Filter by thr_fc
87 | if thr_fc is not None:
88 | msk = adata.obs['logFC'] < thr_fc
89 | prc_keep = (msk.sum()/msk.size) * 100
90 | m = f'filtering AnnData for thr_fc={thr_fc}, will keep {prc_keep:.2f}% of observations'
91 | _log(m, level='info', verbose=verbose)
92 | adata = adata[msk, :].copy()
93 | m = f'generated AnnData with shape={adata.shape}'
94 | _log(m, level='info', verbose=verbose)
95 | return adata
96 |
--------------------------------------------------------------------------------
/src/decoupler/ds/_spatial.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import io
3 | import gzip
4 | import json
5 |
6 | import pandas as pd
7 | import scipy.io as sio
8 | from matplotlib.image import imread
9 | from anndata import AnnData
10 |
11 | from decoupler._docs import docs
12 | from decoupler._log import _log
13 |
14 |
15 | @docs.dedent
16 | def msvisium(
17 | verbose: bool = False,
18 | ) -> AnnData:
19 | """
20 | Downloads a spatial RNA-seq (Visium) human sample with multiple sclerosis
21 | displaying a chronic active lesion in the white matter of the brain :cite:`msvisium`.
22 |
23 | Parameters
24 | ----------
25 | %(verbose)s
26 |
27 | Returns
28 | -------
29 | AnnData object.
30 | """
31 | url = (
32 | 'https://www.ncbi.nlm.nih.gov/geo/download/'
33 | '?acc=GSM8563708&format=file&file=GSM8563708%5FMS377T%5F'
34 | )
35 | # Download mat
36 | response = requests.get(url + 'matrix%2Emtx%2Egz')
37 | with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
38 | X = sio.mmread(f).T.tocsr().rint()
39 | X.eliminate_zeros()
40 | var = pd.read_csv(
41 | url + 'features%2Etsv%2Egz',
42 | compression='gzip',
43 | sep='\t',
44 | header=None,
45 | usecols=[1],
46 | index_col=0,
47 | )
48 | var.index.name = None
49 | # Remove repeated genes
50 | msk_var = ~(var.index.duplicated(keep='first'))
51 | var = var.loc[msk_var]
52 | X = X[:, msk_var]
53 | obs = pd.read_csv(
54 | url + 'barcodes%2Etsv%2Egz',
55 | compression='gzip',
56 | sep='\t',
57 | header=None,
58 | usecols=[0],
59 | index_col=0,
60 | )
61 | obs.index.name = None
62 | # Create anndata
63 | adata = AnnData(X=X, obs=obs, var=var)
64 | # Add images
65 | adata.uns['spatial'] = dict()
66 | adata.uns['spatial']['MS377T'] = dict()
67 | adata.uns['spatial']['MS377T']['images'] = dict()
68 | response = requests.get(url + 'scalefactors%5Fjson%2Ejson%2Egz')
69 | with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
70 | adata.uns['spatial']['MS377T']['scalefactors'] = json.load(f)
71 | response = requests.get(url + 'tissue%5Fhires%5Fimage%2Epng%2Egz')
72 | with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
73 | adata.uns['spatial']['MS377T']['images']['hires'] = imread(f)
74 | response = requests.get(url + 'tissue%5Flowres%5Fimage%2Epng%2Egz')
75 | with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
76 | adata.uns['spatial']['MS377T']['images']['lowres'] = imread(f)
77 | # Add coordinates
78 | coords = pd.read_csv(
79 | url + 'tissue%5Fpositions%5Flist%2Ecsv%2Egz',
80 | compression='gzip',
81 | index_col=0,
82 | )
83 | adata.obs = adata.obs.join(coords, how='left')
84 | adata.obsm['spatial'] = adata.obs[['pxl_col_in_fullres', 'pxl_row_in_fullres']].values
85 | adata.obs.drop(
86 | columns=['in_tissue', 'pxl_row_in_fullres', 'pxl_col_in_fullres'],
87 | inplace=True,
88 | )
89 | # Add metadata
90 | url_meta = (
91 | 'https://cells-test.gi.ucsc.edu/ms-subcortical-lesions/'
92 | 'visium-ms377T/meta.tsv'
93 | )
94 | meta = pd.read_csv(url_meta, sep='\t', usecols=[0, 4], index_col=0)
95 | adata = adata[meta.index, :].copy()
96 | adata.obs = adata.obs.join(meta, how='right')
97 | adata.obs['niches'] = adata.obs['niches'].astype('category')
98 | adata.obs.index.name = None
99 | # Filter vars
100 | msk_var = adata.X.getnnz(axis=0) > 9
101 | adata = adata[:, msk_var].copy()
102 | m = f'generated AnnData with shape={adata.shape}'
103 | _log(m, level='info', verbose=verbose)
104 | return adata
105 |
--------------------------------------------------------------------------------
/src/decoupler/ds/_utils.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import os
3 | import io
4 |
5 | import pandas as pd
6 |
7 |
8 | def ensmbl_to_symbol(
9 | genes: list,
10 | organism: str,
11 | ) -> list:
12 | """
13 | Transforms ensembl gene ids to gene symbols.
14 |
15 | Parameters
16 | ----------
17 | genes
18 | List of ensembl gene ids to transform.
19 |
20 | Returns
21 | -------
22 | List of gene symbols
23 | """
24 | url = (
25 | 'http://www.ensembl.org/biomart/martservice?query='
26 | ''
30 | )
31 | # Organisms
32 | # hsapiens_gene_ensembl
33 | # mmusculus_gene_ensembl
34 | # dmelanogaster_gene_ensembl
35 | # rnorvegicus_gene_ensembl
36 | # drerio_gene_ensembl
37 | # celegans_gene_ensembl
38 | # scerevisiae_gene_ensembl
39 | # Validate
40 | assert isinstance(genes, list), 'genes must be list'
41 | assert isinstance(organism, str), f'organism must be str'
42 | # Try different mirrors
43 | response = requests.get(url.format(miror='www', organism=organism))
44 | if any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']):
45 | response = requests.get(url.format(miror='useast', organism=organism))
46 | if any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']):
47 | response = requests.get(url.format(miror='asia', organism=organism))
48 | if not any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']):
49 | eids = pd.read_csv(io.StringIO(response.text), sep='\t', header=None, index_col=0)[1].to_dict()
50 | elif organism in ['hsapiens_gene_ensembl', 'mmusculus_gene_ensembl']:
51 | url = f'https://zenodo.org/records/15551885/files/{organism}.csv.gz?download=1'
52 | eids = pd.read_csv(url, index_col=0, compression='gzip')['symbol'].to_dict()
53 | else:
54 | assert False, 'ensembl servers are down, try again later'
55 | return [eids[g] if g in eids else None for g in genes]
56 |
--------------------------------------------------------------------------------
/src/decoupler/mt/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler._Method import _show_methods
2 | from decoupler.mt._methods import aucell
3 | from decoupler.mt._methods import gsea
4 | from decoupler.mt._methods import gsva
5 | from decoupler.mt._methods import mdt
6 | from decoupler.mt._methods import mlm
7 | from decoupler.mt._methods import ora
8 | from decoupler.mt._methods import udt
9 | from decoupler.mt._methods import ulm
10 | from decoupler.mt._methods import viper
11 | from decoupler.mt._methods import waggr
12 | from decoupler.mt._methods import zscore
13 | from decoupler.mt._methods import _methods
14 | from decoupler.mt._decouple import decouple
15 | from decoupler.mt._consensus import consensus
16 |
17 | def show() -> None:
18 | """Displays the methods available in decoupler"""
19 | return _show_methods(_methods)
20 |
--------------------------------------------------------------------------------
/src/decoupler/mt/_aucell.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import numpy as np
4 | import scipy.stats as sts
5 | import scipy.sparse as sps
6 | from tqdm.auto import tqdm
7 | import numba as nb
8 |
9 | from decoupler._docs import docs
10 | from decoupler._log import _log
11 | from decoupler._Method import MethodMeta, Method
12 | from decoupler.pp.net import _getset
13 |
14 |
15 | @nb.njit(parallel=True, cache=True)
16 | def _auc(
17 | row: np.ndarray,
18 | cnct: np.ndarray,
19 | starts: np.ndarray,
20 | offsets: np.ndarray,
21 | n_up: int,
22 | nsrc: int,
23 | ) -> np.ndarray:
24 | # Empty acts
25 | es = np.zeros(nsrc)
26 | # For each feature set
27 | for j in nb.prange(nsrc):
28 | # Extract feature set
29 | fset = _getset(cnct, starts, offsets, j)
30 | # Compute max AUC for fset
31 | x_th = np.arange(1, stop=fset.shape[0] + 1)
32 | x_th = x_th[x_th < n_up]
33 | max_auc = np.sum(np.diff(np.append(x_th, n_up)) * x_th)
34 | # Compute AUC
35 | x = row[fset]
36 | x = np.sort(x[x <= n_up])
37 | y = np.arange(x.shape[0]) + 1
38 | x = np.append(x, n_up)
39 | # Update acts matrix
40 | es[j] = np.sum(np.diff(x) * y) / max_auc
41 | return es
42 |
43 |
44 | def _validate_n_up(
45 | nvar: int,
46 | n_up: int | float | None = None,
47 | ) -> int:
48 | assert isinstance(n_up, (int, float)) or n_up is None, 'n_up must be numerical or None'
49 | if n_up is None:
50 | n_up = np.ceil(0.05 * nvar)
51 | n_up = int(np.clip(n_up, a_min=2, a_max=nvar))
52 | else:
53 | n_up = int(np.ceil(n_up))
54 | assert nvar >= n_up > 1, f'For nvar={nvar}, n_up={n_up} must be between 1 and {nvar}'
55 | return n_up
56 |
57 |
58 | @docs.dedent
59 | def _func_aucell(
60 | mat: np.ndarray,
61 | cnct: np.ndarray,
62 | starts: np.ndarray,
63 | offsets: np.ndarray,
64 | n_up: int | float | None = None,
65 | verbose: bool = False,
66 | ) -> Tuple[np.ndarray, None]:
67 | r"""
68 | Area Under the Curve for set enrichment within single cells (AUCell) :cite:`aucell`.
69 |
70 | Given a ranked list of features per observation, AUCell calculates the AUC by measuring how early the features in
71 | the set appear in this ranking. Specifically, the enrichment score :math:`ES` is:
72 |
73 | .. math::
74 |
75 | {ES}_{i, F} = \int_0^1 {RecoveryCurve}_{i, F}(r_i) \, dr
76 |
77 | Where:
78 |
79 | - :math:`i` is the obervation
80 | - :math:`F` is the feature set
81 | - :math:`{RecoveryCurve}_{i, F}(r_i)` is the proportion of features from :math:`F` recovered in the top :math:`r_i`-fraction of the ranked list for observation :math:`i`
82 |
83 | %(notest)s
84 |
85 | %(params)s
86 | n_up
87 | Number of features to include in the AUC calculation.
88 | If ``None``, the top 5% of features based on their magnitude are selected.
89 |
90 | %(returns)s
91 | """
92 | nobs, nvar = mat.shape
93 | nsrc = starts.size
94 | n_up = _validate_n_up(nvar, n_up)
95 | m = f'aucell - calculating {nsrc} AUCs for {nvar} targets across {nobs} observations, categorizing features at rank={n_up}'
96 | _log(m, level='info', verbose=verbose)
97 | es = np.zeros(shape=(nobs, nsrc))
98 | for i in tqdm(range(mat.shape[0]), disable=not verbose):
99 | if isinstance(mat, sps.csr_matrix):
100 | row = mat[i].toarray()[0]
101 | else:
102 | row = mat[i]
103 | row = sts.rankdata(a=-row, method='ordinal')
104 | es[i] = _auc(row=row, cnct=cnct, starts=starts, offsets=offsets, n_up=n_up, nsrc=nsrc)
105 | return es, None
106 |
107 |
108 | _aucell = MethodMeta(
109 | name='aucell',
110 | desc='AUCell',
111 | func=_func_aucell,
112 | stype='categorical',
113 | adj=False,
114 | weight=False,
115 | test=False,
116 | limits=(0, 1),
117 | reference='https://doi.org/10.1038/nmeth.4463',
118 | )
119 | aucell = Method(_method=_aucell)
120 |
--------------------------------------------------------------------------------
/src/decoupler/mt/_decouple.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | from decoupler._docs import docs
4 | from decoupler._datatype import DataType
5 | from decoupler.mt._methods import _methods
6 | from decoupler.mt._consensus import consensus
7 |
8 |
9 | @docs.dedent
10 | def decouple(
11 | data: DataType,
12 | net: pd.DataFrame,
13 | methods: str | list = 'all',
14 | args: dict = dict(),
15 | cons: bool = False,
16 | **kwargs
17 | ) -> dict | None:
18 | """
19 | Runs multiple enrichment methods sequentially.
20 |
21 | Parameters
22 | ----------
23 | %(data)s
24 | %(net)s
25 | methods
26 | List of methods to run.
27 | args
28 | Dictionary of dictionaries containing method-specific keyword arguments.
29 | cons
30 | Whether to get a consensus score across the used methods.
31 | %(tmin)s
32 | %(raw)s
33 | %(empty)s
34 | %(bsize)s
35 | %(verbose)s
36 | """
37 | # Validate
38 | _mdict = {m.name: m for m in _methods}
39 | if isinstance(methods, str):
40 | if methods == 'all':
41 | methods = _mdict.keys()
42 | else:
43 | methods = [methods]
44 | methods = set(methods)
45 | assert methods.issubset(_mdict), \
46 | f'methods={methods} must be in decoupler.\nUse decoupler.mt.show_methods to check which ones are available'
47 | assert all(k in methods for k in args), \
48 | f'All keys in args={args.keys()} must belong to a method in methods={methods}'
49 | kwargs = kwargs.copy()
50 | kwargs.setdefault('verbose', False)
51 | # Run each method
52 | all_res = {}
53 | for name in methods:
54 | mth = _mdict[name]
55 | arg = args.setdefault(name, {})
56 | res = mth(data=data, net=net, **arg, **kwargs)
57 | if res:
58 | res = {
59 | f'score_{mth.name}': res[0],
60 | f'padj_{mth.name}': res[1],
61 | }
62 | all_res = all_res | res
63 | if all_res:
64 | if cons:
65 | all_res['score_consensus'], all_res['padj_consensus'] = consensus(all_res, verbose=kwargs['verbose'])
66 | return all_res
67 | elif cons:
68 | consensus(data, verbose=kwargs['verbose'])
69 |
--------------------------------------------------------------------------------
/src/decoupler/mt/_mdt.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from typing import Tuple
3 |
4 | import numpy as np
5 | import scipy.sparse as sps
6 | from tqdm.auto import tqdm
7 |
8 | from decoupler._odeps import xgboost, _check_import
9 | from decoupler._docs import docs
10 | from decoupler._log import _log
11 | from decoupler._Method import MethodMeta, Method
12 |
13 |
14 | def _xgbr(
15 | x: np.ndarray,
16 | y: np.ndarray,
17 | **kwargs,
18 | ) -> np.ndarray:
19 | # Init model
20 | reg = xgboost.XGBRegressor(**kwargs)
21 | # Fit
22 | y = y.reshape(-1, 1)
23 | reg = reg.fit(x, y)
24 | # Get R score
25 | es = reg.feature_importances_
26 | return es
27 |
28 |
29 | @docs.dedent
30 | def _func_mdt(
31 | mat: np.ndarray,
32 | adj: np.ndarray,
33 | verbose: bool = False,
34 | **kwargs,
35 | ) -> Tuple[np.ndarray, None]:
36 | r"""
37 | Multivariate Decision Trees (MDT) :cite:`decoupler`.
38 |
39 | This approach uses the molecular features from one observation as the population of samples
40 | and it fits a gradient boosted decision trees model with multiple covariates,
41 | which are the weights of all feature sets :math:`F`. It uses the implementation provided by ``xgboost`` :cite:`xgboost`.
42 |
43 | The enrichment score :math:`ES` for each :math:`F` is then calculated as the importance of each covariate in the model.
44 |
45 | %(notest)s
46 |
47 | %(params)s
48 |
49 | kwargs
50 | All other keyword arguments are passed to ``xgboost.XGBRegressor``.
51 | %(returns)s
52 | """
53 | _check_import(xgboost)
54 | nobs = mat.shape[0]
55 | nvar, nsrc = adj.shape
56 | m = f'mdt - fitting {nsrc} multivariate decision tree models (XGBoost) of {nvar} targets across {nobs} observations'
57 | _log(m, level='info', verbose=verbose)
58 | es = np.zeros(shape=(nobs, nsrc))
59 | for i in tqdm(range(nobs), disable=not verbose):
60 | obs = mat[i]
61 | es[i, :] = _xgbr(x=adj, y=obs, **kwargs)
62 | return (es, None)
63 |
64 |
65 | _mdt = MethodMeta(
66 | name='mdt',
67 | desc='Multivariate Decision Tree (MDT)',
68 | func=_func_mdt,
69 | stype='numerical',
70 | adj=True,
71 | weight=True,
72 | test=False,
73 | limits=(0, 1),
74 | reference='https://doi.org/10.1093/bioadv/vbac016',
75 | )
76 | mdt = Method(_method=_mdt)
77 |
--------------------------------------------------------------------------------
/src/decoupler/mt/_methods.py:
--------------------------------------------------------------------------------
1 | from decoupler._Method import _show_methods
2 | from decoupler.mt._aucell import aucell
3 | from decoupler.mt._gsea import gsea
4 | from decoupler.mt._gsva import gsva
5 | from decoupler.mt._mdt import mdt
6 | from decoupler.mt._mlm import mlm
7 | from decoupler.mt._ora import ora
8 | from decoupler.mt._udt import udt
9 | from decoupler.mt._ulm import ulm
10 | from decoupler.mt._viper import viper
11 | from decoupler.mt._waggr import waggr
12 | from decoupler.mt._zscore import zscore
13 |
14 | _methods = [
15 | aucell,
16 | gsea,
17 | gsva,
18 | mdt,
19 | mlm,
20 | ora,
21 | udt,
22 | ulm,
23 | viper,
24 | waggr,
25 | zscore,
26 | ]
27 |
--------------------------------------------------------------------------------
/src/decoupler/mt/_run.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, Callable
2 |
3 | import pandas as pd
4 | import numpy as np
5 | from anndata import AnnData
6 | import scipy.sparse as sps
7 | import scipy.stats as sts
8 | from tqdm.auto import tqdm
9 |
10 | from decoupler._log import _log
11 | from decoupler._datatype import DataType
12 | from decoupler.pp.net import prune, adjmat, idxmat
13 | from decoupler.pp.data import extract
14 |
15 |
16 | def _return(
17 | name: str,
18 | data: DataType,
19 | es: pd.DataFrame,
20 | pv: pd.DataFrame,
21 | verbose: bool = False,
22 | ) -> Tuple[pd.DataFrame, pd.DataFrame] | AnnData | None:
23 | if isinstance(data, AnnData):
24 | if data.obs_names.size != es.index.size:
25 | m = 'Provided AnnData contains empty observations, returning repaired object'
26 | _log(m, level='warn', verbose=verbose)
27 | data = data[es.index, :].copy()
28 | data.obsm[f'score_{name}'] = es
29 | if pv is not None:
30 | data.obsm[f'padj_{name}'] = pv
31 | return data
32 | else:
33 | data.obsm[f'score_{name}'] = es
34 | if pv is not None:
35 | data.obsm[f'padj_{name}'] = pv
36 | return None
37 | else:
38 | return es, pv
39 |
40 |
41 | def _run(
42 | name: str,
43 | func: Callable,
44 | adj: bool,
45 | test: bool,
46 | data: DataType,
47 | net: pd.DataFrame,
48 | tmin: int | float = 5,
49 | layer: str | None = None,
50 | raw: bool = False,
51 | empty: bool = True,
52 | bsize: int | float = 250_000,
53 | verbose: bool = False,
54 | **kwargs
55 | ) -> Tuple[pd.DataFrame, pd.DataFrame] | AnnData | None:
56 | _log(f'{name} - Running {name}', level='info', verbose=verbose)
57 | # Process data
58 | mat, obs, var = extract(data, layer=layer, raw=raw, empty=empty, verbose=verbose)
59 | sparse = sps.issparse(mat)
60 | # Process net
61 | net = prune(features=var, net=net, tmin=tmin, verbose=verbose)
62 | # Handle stat type
63 | if adj:
64 | sources, targets, adjm = adjmat(features=var, net=net, verbose=verbose)
65 | # Handle sparse
66 | if sparse:
67 | nbatch = int(np.ceil(obs.size / bsize))
68 | es, pv = [], []
69 | for i in tqdm(range(nbatch), disable=not verbose):
70 | srt, end = i * bsize, i * bsize + bsize
71 | bmat = mat[srt:end].toarray()
72 | bes, bpv = func(bmat, adjm, verbose=verbose, **kwargs)
73 | es.append(bes)
74 | pv.append(bpv)
75 | es = np.vstack(es)
76 | es = pd.DataFrame(es, index=obs, columns=sources)
77 | else:
78 | es, pv = func(mat, adjm, verbose=verbose, **kwargs)
79 | es = pd.DataFrame(es, index=obs, columns=sources)
80 | else:
81 | sources, cnct, starts, offsets = idxmat(features=var, net=net, verbose=verbose)
82 | es, pv = func(mat, cnct, starts, offsets, verbose=verbose, **kwargs)
83 | es = pd.DataFrame(es, index=obs, columns=sources)
84 | # Handle pvals and FDR correction
85 | if test:
86 | pv = np.vstack(pv)
87 | pv = pd.DataFrame(pv, index=obs, columns=sources)
88 | if name != 'mlm':
89 | _log(f'{name} - adjusting p-values by FDR', level='info', verbose=verbose)
90 | pv.loc[:, :] = sts.false_discovery_control(pv.values, axis=1, method='bh')
91 | else:
92 | pv = None
93 | _log(f'{name} - done', level='info', verbose=verbose)
94 | return _return(name, data, es, pv, verbose=verbose)
95 |
--------------------------------------------------------------------------------
/src/decoupler/mt/_udt.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from typing import Tuple
3 |
4 | import numpy as np
5 | import scipy.sparse as sps
6 | from tqdm.auto import tqdm
7 |
8 | from decoupler._odeps import xgboost, _check_import
9 | from decoupler._docs import docs
10 | from decoupler._log import _log
11 | from decoupler._Method import MethodMeta, Method
12 |
13 |
14 | def _xgbr(
15 | x: np.ndarray,
16 | y: np.ndarray,
17 | **kwargs,
18 | ) -> np.ndarray:
19 | kwargs.setdefault('n_estimators', 10)
20 | # Init model
21 | reg = xgboost.XGBRegressor(**kwargs)
22 | # Fit
23 | x, y = x.reshape(-1, 1), y.reshape(-1, 1)
24 | reg = reg.fit(x, y)
25 | # Get R score
26 | es = reg.score(x, y)
27 | # Clip to [0, 1]
28 | es = np.clip(es, 0, 1)
29 | return es
30 |
31 |
32 | @docs.dedent
33 | def _func_udt(
34 | mat: np.ndarray,
35 | adj: np.ndarray,
36 | verbose: bool = False,
37 | **kwargs,
38 | ) -> Tuple[np.ndarray, None]:
39 | """
40 | Univariate Decision Tree (UDT) :cite:`decoupler`.
41 |
42 | This approach uses the molecular features from one observation as the population of samples
43 | and it fits a gradient boosted decision trees model with a single covariate,
44 | which is the feature weights of a set :math:`F`.
45 | It uses the implementation provided by ``xgboost`` :cite:`xgboost`.
46 |
47 | The enrichment score :math:`ES` is then calculated as the coefficient of determination :math:`R^2`.
48 |
49 | %(notest)s
50 |
51 | %(params)s
52 |
53 | kwargs
54 | All other keyword arguments are passed to ``xgboost.XGBRegressor``.
55 | %(returns)s
56 | """
57 | _check_import(xgboost)
58 | nobs = mat.shape[0]
59 | nvar, nsrc = adj.shape
60 | m = f'udt - fitting {nsrc} univariate decision tree models (XGBoost) of {nvar} targets across {nobs} observations'
61 | _log(m, level='info', verbose=verbose)
62 | es = np.zeros(shape=(nobs, nsrc))
63 | for i in tqdm(range(nobs), disable=not verbose):
64 | obs = mat[i]
65 | for j in range(adj.shape[1]):
66 | es[i, j] = _xgbr(x=adj[:, j], y=obs, **kwargs)
67 | return es, None
68 |
69 |
70 | _udt = MethodMeta(
71 | name='udt',
72 | desc='Univariate Decision Tree (UDT)',
73 | func=_func_udt,
74 | stype='numerical',
75 | adj=True,
76 | weight=True,
77 | test=False,
78 | limits=(0, 1),
79 | reference='https://doi.org/10.1093/bioadv/vbac016',
80 | )
81 | udt = Method(_method=_udt)
82 |
--------------------------------------------------------------------------------
/src/decoupler/mt/_ulm.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import numpy as np
4 | import scipy.stats as sts
5 |
6 | from decoupler._docs import docs
7 | from decoupler._log import _log
8 | from decoupler._Method import MethodMeta, Method
9 |
10 |
11 | def _cov(
12 | A: np.ndarray,
13 | b: np.ndarray
14 | ) -> np.ndarray:
15 | return np.dot(b.T - b.mean(), A - A.mean(axis=0)) / (b.shape[0]-1)
16 |
17 |
18 | def _cor(
19 | A: np.ndarray,
20 | b: np.ndarray
21 | ) -> np.ndarray:
22 | cov = _cov(A, b)
23 | ssd = np.std(A, axis=0, ddof=1) * np.std(b, axis=0, ddof=1).reshape(-1, 1)
24 | return cov / ssd
25 |
26 |
27 | def _tval(
28 | r: np.ndarray,
29 | df: float
30 | ) -> np.ndarray:
31 | return r * np.sqrt(df / ((1.0 - r + 2.2e-16) * (1.0 + r + 2.2e-16)))
32 |
33 |
34 | @docs.dedent
35 | def _func_ulm(
36 | mat: np.ndarray,
37 | adj: np.ndarray,
38 | tval: bool = True,
39 | verbose: bool = False,
40 | ) -> Tuple[np.ndarray, np.ndarray]:
41 | r"""
42 | Univariate Linear Model (ULM) :cite:`decoupler`.
43 |
44 | This approach uses the molecular features from one observation as the population of samples
45 | and it fits a linear model with a single covariate, which is the feature weights of a set :math:`F`.
46 |
47 | .. math::
48 |
49 | y_i = \beta_0 + \beta_1 x_i + \varepsilon, \quad i = 1, 2, \ldots, n
50 |
51 | Where:
52 |
53 | - :math:`y_i` is the observed feature statistic (e.g. gene expression, :math:`log_{2}FC`, etc.) for feature :math:`i`
54 | - :math:`x_i` is the weight of feature :math:`i` in feature set :math:`F`. For unweighted sets, membership in the set is indicated by 1, and non-membership by 0.
55 | - :math:`\beta_0` is the intercept
56 | - :math:`\beta_1` is the slope coefficient
57 | - :math:`\varepsilon` is the error term for feature :math:`i`
58 |
59 | .. figure:: /_static/images/ulm.png
60 | :alt: Univariate Linear Model (ULM) schematic.
61 | :align: center
62 | :width: 75%
63 |
64 | Univariate Linear Model (ULM) scheme.
65 | In this example, the observed gene expression of :math:`Sample_1` is predicted using
66 | the interaction weights of :math:`TF_1`.
67 | Since the target genes that have negative weights are lowly expressed,
68 | and the positive target genes are highly expressed,
69 | the relationship between the two variables is positive so the obtained :math:`ES` score is positive.
70 | Scores can be interpreted as active when positive, repressive when negative, and inconclusive when close to 0.
71 |
72 | The enrichment score :math:`ES` is then calculated as the t-value of the slope coefficient.
73 |
74 | .. math::
75 |
76 | ES = t_{\beta_1} = \frac{\hat{\beta}_1}{\mathrm{SE}(\hat{\beta}_1)}
77 |
78 | Where:
79 |
80 | - :math:`t_{\beta_1}` is the t-value of the slope
81 | - :math:`\mathrm{SE}(\hat{\beta}_1)` is the standard error of the slope
82 |
83 | Next, :math:`p_{value}` are obtained by evaluating the two-sided survival function
84 | (:math:`sf`) of the Student’s t-distribution.
85 |
86 | .. math::
87 |
88 | p_{value} = 2 \times \mathrm{sf}(|ES|, \text{df})
89 |
90 | %(yestest)s
91 |
92 | %(params)s
93 | %(tval)s
94 |
95 | %(returns)s
96 | """
97 | # Get degrees of freedom
98 | n_var, n_src = adj.shape
99 | df = n_var - 2
100 | m = f'ulm - fitting {n_src} univariate models of {n_var} observations (targets) with {df} degrees of freedom'
101 | _log(m, level='info', verbose=verbose)
102 | # Compute R value for all
103 | r = _cor(adj, mat.T)
104 | # Compute t-value
105 | t = _tval(r, df)
106 | # Compute p-value
107 | pv = sts.t.sf(abs(t), df) * 2
108 | if tval:
109 | es = t
110 | else:
111 | # Compute coef
112 | es = r * (np.std(mat.T, ddof=1, axis=0).reshape(-1, 1) / np.std(adj, ddof=1, axis=0))
113 | return es, pv
114 |
115 |
116 | _ulm = MethodMeta(
117 | name='ulm',
118 | desc='Univariate Linear Model (ULM)',
119 | func=_func_ulm,
120 | stype='numerical',
121 | adj=True,
122 | weight=True,
123 | test=True,
124 | limits=(-np.inf, +np.inf),
125 | reference='https://doi.org/10.1093/bioadv/vbac016',
126 | )
127 | ulm = Method(_method=_ulm)
128 |
--------------------------------------------------------------------------------
/src/decoupler/mt/_zscore.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import numpy as np
4 | import scipy.stats as sts
5 |
6 | from decoupler._docs import docs
7 | from decoupler._log import _log
8 | from decoupler._Method import MethodMeta, Method
9 |
10 |
11 | @docs.dedent
12 | def _func_zscore(
13 | mat: np.ndarray,
14 | adj: np.ndarray,
15 | flavor: str = 'RoKAI',
16 | verbose: bool = False,
17 | ) -> Tuple[np.ndarray, np.ndarray]:
18 | r"""
19 | Z-score (ZSCORE) :cite:`zscore`.
20 |
21 | This approach computes the mean value of the molecular features for known targets,
22 | optionally subtracts the overall mean of all measured features,
23 | and normalizes the result by the standard deviation of all features and the square
24 | root of the number of targets.
25 |
26 | This formulation was originally introduced in KSEA, which explicitly includes the
27 | subtraction of the global mean to compute the enrichment score :math:`ES`.
28 |
29 | .. math::
30 |
31 | ES = \frac{(\mu_s-\mu_p) \times \sqrt m }{\sigma}
32 |
33 | Where:
34 |
35 | - :math:`\mu_s` is the mean of targets
36 | - :math:`\mu_p` is the mean of all features
37 | - :math:`m` is the number of targets
38 | - :math:`\sigma` is the standard deviation of all features
39 |
40 | However, in the RoKAI implementation, this global mean subtraction was omitted.
41 |
42 | .. math::
43 |
44 | ES = \frac{\mu_s \times \sqrt m }{\sigma}
45 |
46 | A two-sided :math:`p_{value}` is then calculated from the consensus score using
47 | the survival function :math:`sf` of the standard normal distribution.
48 |
49 | .. math::
50 |
51 | p = 2 \times \mathrm{sf}\bigl(\lvert \mathrm{ES} \rvert \bigr)
52 |
53 | %(yestest)s
54 |
55 | %(params)s
56 |
57 | flavor
58 | Which flavor to use when calculating the z-score, either KSEA or RoKAI.
59 |
60 | %(returns)s
61 | """
62 | assert isinstance(flavor, str) and flavor in ['KSEA', 'RoKAI'], \
63 | 'flavor must be str and KSEA or RoKAI'
64 | nobs, nvar = mat.shape
65 | nvar, nsrc = adj.shape
66 | m = f'zscore - calculating {nsrc} scores with flavor={flavor}'
67 | _log(m, level='info', verbose=verbose)
68 | stds = np.std(mat, axis=1, ddof=1)
69 | if flavor == 'RoKAI':
70 | mean_all = np.mean(mat, axis=1)
71 | elif flavor == 'KSEA':
72 | mean_all = np.zeros(stds.shape)
73 | n = np.sqrt(np.count_nonzero(adj, axis=0))
74 | mean = mat.dot(adj) / np.sum(np.abs(adj), axis=0)
75 | es = ((mean - mean_all.reshape(-1, 1)) * n) / stds.reshape(-1, 1)
76 | pv = 2 * sts.norm.sf(np.abs(es))
77 | return es, pv
78 |
79 |
80 | _zscore = MethodMeta(
81 | name='zscore',
82 | desc='Z-score (ZSCORE)',
83 | func=_func_zscore,
84 | stype='numerical',
85 | adj=True,
86 | weight=True,
87 | test=True,
88 | limits=(-np.inf, +np.inf),
89 | reference='https://doi.org/10.1038/s41467-021-21211-6',
90 | )
91 | zscore = Method(_method=_zscore)
92 |
--------------------------------------------------------------------------------
/src/decoupler/op/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.op._translate import show_organisms, translate
2 | from decoupler.op._resource import show_resources, resource
3 | from decoupler.op._collectri import collectri
4 | from decoupler.op._dorothea import dorothea
5 | from decoupler.op._hallmark import hallmark
6 | from decoupler.op._progeny import progeny
7 |
--------------------------------------------------------------------------------
/src/decoupler/op/_collectri.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | from decoupler._docs import docs
5 | from decoupler._log import _log
6 | from decoupler._download import URL_INT, _download
7 | from decoupler.op._translate import translate
8 | from decoupler.op._dtype import _infer_dtypes
9 |
10 |
11 | @docs.dedent
12 | def collectri(
13 | organism: str = 'human',
14 | remove_complexes: bool = False,
15 | license: str = 'academic',
16 | verbose: bool = False,
17 | ) -> pd.DataFrame:
18 | """
19 | CollecTRI gene regulatory network :cite:p:`collectri`.
20 |
21 | Wrapper to access CollecTRI gene regulatory network. CollecTRI is a
22 | comprehensive resource containing a curated collection of transcription
23 | factors (TFs) and their target genes. It is an expansion of DoRothEA.
24 | Each interaction is weighted by its mode of regulation (either positive or negative).
25 |
26 | Parameters
27 | ----------
28 | %(organism)s
29 | remove_complexes
30 | Whether to remove complexes.
31 | %(license)s
32 | %(verbose)s
33 |
34 | Returns
35 | -------
36 | Dataframe in long format containing target genes for each TF with their associated weights,
37 | and if available, the PMIDs supporting each interaction.
38 | """
39 | url = 'https://zenodo.org/records/8192729/files/CollecTRI_regulons.csv?download=1'
40 | ct = _download(url, verbose=verbose)
41 | # Update resources
42 | resources = []
43 | for str_res in ct['resources']:
44 | lst_res = str_res.replace('CollecTRI', '').split(';')
45 | str_res = ';'.join(sorted([res.replace('_', '') for res in lst_res if res != '']))
46 | resources.append(str_res)
47 | ct['resources'] = resources
48 | # Format references
49 | ct['references'] = ct['references'].str.replace('CollecTRI:', '')
50 | ct = ct.dropna()
51 | if remove_complexes:
52 | ct = ct[~ct['source'].isin(['AP1', 'NFKB'])]
53 | ct = _infer_dtypes(ct)
54 | if organism != 'human':
55 | ct = translate(ct, columns=['source', 'target'], target_organism=organism, verbose=verbose)
56 | ct = ct.drop_duplicates(['source', 'target']).reset_index(drop=True)
57 | return ct
58 |
--------------------------------------------------------------------------------
/src/decoupler/op/_dorothea.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | from decoupler._docs import docs
5 | from decoupler._log import _log
6 | from decoupler._download import URL_INT, _download
7 | from decoupler.op._translate import translate
8 | from decoupler.op._dtype import _infer_dtypes
9 |
10 |
11 | @docs.dedent
12 | def dorothea(
13 | organism: str = 'human',
14 | levels: str | list = ['A', 'B', 'C'],
15 | dict_weights: dict | None = None,
16 | license: str = 'academic',
17 | verbose: bool = False,
18 | ) -> pd.DataFrame:
19 | """
20 | DoRothEA gene regulatory network :cite:p:`dorothea`.
21 |
22 | Wrapper to access DoRothEA gene regulatory network. DoRothEA is a
23 | comprehensive resource containing a curated collection of transcription
24 | factors (TFs) and their target genes. Each interaction is weighted by its
25 | mode of regulation (either positive or negative) and by its confidence
26 | level.
27 |
28 | Parameters
29 | ----------
30 | %(organism)s
31 | levels
32 | List of confidence levels to return. Goes from A to D, A being the
33 | most confident and D being the less.
34 | dict_weights
35 | Dictionary of values to divide the mode of regulation (-1 or 1),
36 | one for each confidence level. Bigger values will generate weights
37 | close to zero.
38 | %(license)s
39 | %(verbose)s
40 |
41 | Returns
42 | -------
43 | Dataframe in long format containing target genes for each TF with their associated weights and confidence level.
44 | """
45 | assert isinstance(levels, (str, list)), 'levels must be str or list'
46 | if isinstance(levels, str):
47 | levels = [levels]
48 | assert all(l in {'A', 'B', 'C', 'D'} for l in levels), 'levels can only contain any of these values: A, B, C, and/or D'
49 | assert isinstance(dict_weights, dict) or dict_weights is None, 'dict_weights must be dict or None'
50 | if dict_weights:
51 | assert all(k in levels for k in dict_weights), f'dict_weights keys must be in levels={levels}'
52 | weights = dict_weights
53 | else:
54 | weights = {'A': 1, 'B': 2, 'C': 3, 'D': 4}
55 | weights = {k: weights[k] for k in weights if k in levels}
56 | # Read
57 | str_levels = ','.join(levels)
58 | url_ext = f'datasets=dorothea&dorothea_levels={str_levels}&fields=dorothea_level&license={license}'
59 | url = URL_INT + url_ext
60 | m = f'dorothea - Accessing DoRothEA (levels {str_levels}) with {license} license and weights={weights}'
61 | _log(m, level='info', verbose=verbose)
62 | do = _download(url, sep='\t', verbose=verbose)
63 | # Filter extra columns
64 | do = do[[
65 | 'source_genesymbol', 'target_genesymbol',
66 | 'is_stimulation', 'is_inhibition',
67 | 'consensus_direction', 'consensus_stimulation',
68 | 'consensus_inhibition', 'dorothea_level',
69 | ]]
70 | # Remove duplicates
71 | do = do[~do.duplicated(['source_genesymbol', 'dorothea_level', 'target_genesymbol'])]
72 | # Assign top level if more than 2
73 | do['dorothea_level'] = [lvl.split(';')[0] for lvl in do['dorothea_level']]
74 | # Assign mode of regulation
75 | mor = []
76 | for i in do.itertuples():
77 | if i.is_stimulation and i.is_inhibition:
78 | if i.consensus_stimulation:
79 | mor.append(1)
80 | else:
81 | mor.append(-1)
82 | elif i.is_stimulation:
83 | mor.append(1)
84 | elif i.is_inhibition:
85 | mor.append(-1)
86 | else:
87 | mor.append(1)
88 | do['mor'] = mor
89 | # Compute weight based on confidence: mor/confidence
90 | do['weight'] = [i.mor / weights[i.dorothea_level] for i in do.itertuples()]
91 | # Format
92 | do = (
93 | do
94 | .rename(columns={'source_genesymbol': 'source', 'target_genesymbol': 'target', 'dorothea_level': 'confidence'})
95 | [['source', 'target', 'weight', 'confidence']]
96 | .sort_values('confidence')
97 | )
98 | do = do[do['confidence'].isin(levels)].reset_index(drop=True)
99 | do = _infer_dtypes(do)
100 | if organism != 'human':
101 | do = translate(do, columns=['source', 'target'], target_organism=organism, verbose=verbose)
102 | do = do.drop_duplicates(['source', 'target']).reset_index(drop=True)
103 | return do
104 |
--------------------------------------------------------------------------------
/src/decoupler/op/_dtype.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 |
4 | def _infer_dtypes(
5 | df: pd.DataFrame
6 | ) -> pd.DataFrame:
7 | for col in df.columns:
8 | try:
9 | df[col] = pd.to_numeric(df[col])
10 | continue
11 | except ValueError:
12 | pass
13 | if df[col].dtype == 'string':
14 | df[col] = df[col].astype(str)
15 | lowered = df[col].str.lower()
16 | if lowered.isin(["true", "false"]).all():
17 | df[col] = lowered == "true"
18 | continue
19 | return df
20 |
--------------------------------------------------------------------------------
/src/decoupler/op/_hallmark.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | from decoupler._docs import docs
5 | from decoupler._log import _log
6 | from decoupler._download import URL_INT, _download
7 | from decoupler.op._translate import translate
8 | from decoupler.op._dtype import _infer_dtypes
9 |
10 |
11 | @docs.dedent
12 | def hallmark(
13 | organism: str = 'human',
14 | license: str = 'academic',
15 | verbose: bool = False,
16 | ) -> pd.DataFrame:
17 | """
18 | Hallmark gene sets :cite:p:`msigdb`.
19 |
20 | Hallmark gene sets summarize and represent specific well-defined
21 | biological states or processes and display coherent expression.
22 |
23 | Parameters
24 | ----------
25 | %(organism)s
26 | %(license)s
27 | %(verbose)s
28 |
29 | Returns
30 | -------
31 | Dataframe in long format containing the hallmark gene sets.
32 | """
33 | url = 'https://static.omnipathdb.org/tables/msigdb-hallmark.tsv.gz'
34 | hm = _download(url, sep='\t', compression='gzip', verbose=verbose)
35 | hm = hm[['geneset', 'genesymbol']]
36 | hm['geneset'] = hm['geneset'].str.replace('HALLMARK_', '')
37 | hm['genesymbol'] = hm['genesymbol'].str.replace('COMPLEX:', '').str.split('_')
38 | hm = hm.explode('genesymbol')
39 | hm = _infer_dtypes(hm)
40 | if organism != 'human':
41 | hm = translate(hm, columns=['genesymbol'], target_organism=organism, verbose=verbose)
42 | hm = hm.rename(columns={'geneset': 'source', 'genesymbol': 'target'})
43 | hm = hm.drop_duplicates(['source', 'target']).reset_index(drop=True)
44 | return hm
45 |
--------------------------------------------------------------------------------
/src/decoupler/op/_progeny.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | from decoupler._docs import docs
5 | from decoupler._log import _log
6 | from decoupler.op._resource import resource
7 |
8 |
9 | @docs.dedent
10 | def progeny(
11 | organism: str = 'human',
12 | top: int | float = np.inf,
13 | thr_padj: float = 0.05,
14 | license: str = 'academic',
15 | verbose: bool = False,
16 | ) -> pd.DataFrame:
17 | """
18 | Pathway RespOnsive GENes for activity inference (PROGENy) :cite:p:`progeny`.
19 |
20 | Wrapper to access PROGENy model gene weights. Each pathway is defined with
21 | a collection of target genes, each interaction has an associated p-value
22 | and weight. The top significant interactions per pathway are returned.
23 |
24 | Here is a brief description of each pathway:
25 |
26 | - **Androgen**: involved in the growth and development of the male reproductive organs
27 | - **EGFR**: regulates growth, survival, migration, apoptosis, proliferation, and differentiation in mammalian cells
28 | - **Estrogen**: promotes the growth and development of the female reproductive organs
29 | - **Hypoxia**: promotes angiogenesis and metabolic reprogramming when O2 levels are low
30 | - **JAK-STAT**: involved in immunity, cell division, cell death, and tumor formation
31 | - **MAPK**: integrates external signals and promotes cell growth and proliferation
32 | - **NFkB**: regulates immune response, cytokine production and cell survival
33 | - **p53**: regulates cell cycle, apoptosis, DNA repair and tumor suppression
34 | - **PI3K**: promotes growth and proliferation
35 | - **TGFb**: involved in development, homeostasis, and repair of most tissues
36 | - **TNFa**: mediates haematopoiesis, immune surveillance, tumour regression and protection from infection
37 | - **Trail**: induces apoptosis
38 | - **VEGF**: mediates angiogenesis, vascular permeability, and cell migration
39 | - **WNT**: regulates organ morphogenesis during development and tissue repair
40 |
41 | Parameters
42 | ----------
43 | %(organism)s
44 | top
45 | Number of genes per pathway to return. By default all of them.
46 | thr_padj
47 | Significance threshold to trim interactions.
48 | %(license)s
49 | %(verbose)s
50 |
51 | Returns
52 | -------
53 | Dataframe in long format containing target genes for each pathway with their associated weights and p-values.
54 | """
55 | # Validate
56 | assert isinstance(top, (int, float)) and top > 0, \
57 | 'top must be numeric and > 0'
58 | assert isinstance(thr_padj, (int, float)) and 0. <= thr_padj <= 1., \
59 | 'thr_padj must be numeric and between 0 and 1'
60 | # Download
61 | p = resource(name='PROGENy', organism=organism, license=license, verbose=verbose)
62 | p = (
63 | p
64 | .sort_values('p_value')
65 | .groupby('pathway')
66 | .head(top)
67 | .sort_values(['pathway', 'p_value'])
68 | .reset_index(drop=True)
69 | )
70 | p = p.rename(columns={'pathway': 'source', 'genesymbol': 'target', 'p_value': 'padj'})
71 | p = p[p['padj'] < thr_padj]
72 | p = p[['source', 'target', 'weight', 'padj']]
73 | m = f'progeny - filtered interactions for padj < {thr_padj}'
74 | _log(m, level='info', verbose=verbose)
75 | p = p.drop_duplicates(['source', 'target']).reset_index(drop=True)
76 | return p
77 |
--------------------------------------------------------------------------------
/src/decoupler/op/_resource.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import requests
4 | import pandas as pd
5 |
6 | from decoupler._docs import docs
7 | from decoupler._log import _log
8 | from decoupler._download import URL_DBS, _download
9 | from decoupler.op._translate import translate
10 | from decoupler.op._dtype import _infer_dtypes
11 |
12 |
13 | def show_resources(
14 | ) -> pd.DataFrame:
15 | """
16 | Shows available resources in Omnipath :cite:p:`omnipath`.
17 | For more information visit the official
18 | [website](https://omnipathdb.org/).
19 |
20 | Returns
21 | -------
22 | List of available resources to query with `decoupler.op.resource`.
23 | """
24 | ann = pd.read_csv('https://omnipathdb.org/queries/annotations', sep='\t')
25 | ann = ann.set_index('argument').loc['databases'].str.split(';')['values']
26 | url = 'https://omnipathdb.org/resources'
27 | response = requests.get(url)
28 | lcs = response.json()
29 | df = pd.DataFrame(ann, columns=['name'])
30 | df['license'] = [lcs[a]['license']['purpose'] if a in lcs else None for a in ann]
31 | return df
32 |
33 |
34 | @docs.dedent
35 | def resource(
36 | name: str,
37 | organism: str = 'human',
38 | license: str = 'academic',
39 | verbose: bool = False,
40 | ):
41 | """
42 | Wrapper to access resources inside Omnipath :cite:p:`omnipath`.
43 |
44 | This wrapper allows to easly query different prior knowledge resources. To
45 | check available resources run ``decoupler.op.show_resources()``. For more
46 | information visit the official [website](https://omnipathdb.org/).
47 |
48 | Parameters
49 | ----------
50 | name:
51 | Name of the resource to query.
52 | %(organism)s
53 | %(license)s
54 | %(verbose)s
55 | kwargs
56 | Passed to ``decoupler.op.translate``.
57 |
58 | Returns
59 | -------
60 | Network in long format.
61 | """
62 | # Validate
63 | assert isinstance(name, str), 'name must be str'
64 | names = set(show_resources()['name'])
65 | assert name in names, f'name must be one of these: {names}'
66 | assert isinstance(organism, str), 'organism must be str'
67 | assert isinstance(license, str) and license in ['academic', 'commercial', 'nonprofit'], \
68 | 'license must be academic, commercial or nonprofit'
69 | assert isinstance(verbose, bool), 'verbose must be bool'
70 | m = f'Accessing {name} with {license} license'
71 | _log(m, level='info', verbose=verbose)
72 | # Download
73 | url = URL_DBS + f'{name}&license={license}'
74 | df = _download(url, sep='\t', verbose=verbose)
75 | # Process
76 | labels = df['label'].unique()
77 | for label in labels:
78 | if label in df.columns:
79 | df.loc[df['label'] == label, 'label'] = f'_{label}'
80 | df = df[['genesymbol', 'label', 'value', 'record_id']]
81 | df = df.pivot(index=["genesymbol", "record_id"], columns="label", values="value").reset_index()
82 | df.index.name = ''
83 | df.columns.name = ''
84 | cols_to_remove = ['record_id', 'entity_type', '_entity_type']
85 | df = df.drop(columns=[c for c in cols_to_remove if c in df.columns])
86 | df = _infer_dtypes(df)
87 | if organism != 'human':
88 | df = translate(df, columns='genesymbol', target_organism=organism, verbose=verbose)
89 | return df
90 |
--------------------------------------------------------------------------------
/src/decoupler/pl/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.pl._barplot import barplot
2 | from decoupler.pl._dotplot import dotplot
3 | from decoupler.pl._filter_by_expr import filter_by_expr
4 | from decoupler.pl._filter_by_prop import filter_by_prop
5 | from decoupler.pl._leading_edge import leading_edge
6 | from decoupler.pl._network import network
7 | from decoupler.pl._obsbar import obsbar
8 | from decoupler.pl._order_targets import order_targets
9 | from decoupler.pl._order import order
10 | from decoupler.pl._obsm import obsm
11 | from decoupler.pl._filter_samples import filter_samples
12 | from decoupler.pl._source_targets import source_targets
13 | from decoupler.pl._volcano import volcano
14 |
--------------------------------------------------------------------------------
/src/decoupler/pl/_barplot.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import matplotlib
6 | import matplotlib.pyplot as plt
7 | from matplotlib.figure import Figure
8 | import seaborn as sns
9 |
10 | from decoupler._docs import docs
11 | from decoupler._Plotter import Plotter
12 |
13 |
14 | def _set_limits(
15 | vmin: int | float,
16 | vcenter: int | float,
17 | vmax: int | float,
18 | values: np.ndarray
19 | ) -> Tuple[float, float, float]:
20 | assert np.isfinite(values).all(), 'values in data mut be finite'
21 | assert isinstance(vmin, (int, float)) or vmin is None, 'vmin must be numerical or None'
22 | assert isinstance(vcenter, (int, float)) or vcenter is None, 'vcenter must be numerical or None'
23 | assert isinstance(vmax, (int, float)) or vmax is None, 'vmax must be numerical or None'
24 | if vmin is None:
25 | vmin = values.min()
26 | if vmax is None:
27 | vmax = values.max()
28 | if vcenter is None:
29 | vcenter = values.mean()
30 | if vmin >= vcenter:
31 | vmin = -vmax
32 | if vcenter >= vmax:
33 | vmax = -vmin
34 | return vmin, vcenter, vmax
35 |
36 |
37 | @docs.dedent
38 | def barplot(
39 | data: pd.DataFrame,
40 | name: str,
41 | top: int = 25,
42 | vertical: bool = False,
43 | cmap: str = 'RdBu_r',
44 | vmin: float | None = None,
45 | vcenter: float | None = 0,
46 | vmax: float | None = None,
47 | **kwargs,
48 | ) -> None | Figure:
49 | """
50 | Plot barplots showing top scores.
51 |
52 | Parameters
53 | ----------
54 | data
55 | DataFrame in wide format containing enrichment scores (contrasts, sources).
56 | name
57 | Name of the contrast (row) to plot.
58 | %(top)s
59 | vertical
60 | Whether to plot the bars verticaly or horizontaly.
61 | %(cmap)s
62 | %(vmin)s
63 | %(vcenter)s
64 | %(vmax)s
65 | %(plot)s
66 | """
67 | # Validate
68 | assert isinstance(data, pd.DataFrame), 'data must be pandas.DataFrame'
69 | assert isinstance(name, str) and name in data.index, \
70 | 'name must be str and in data.index'
71 | assert isinstance(top, int) and top > 0, 'top must be int and > 0'
72 | assert isinstance(vertical, bool), 'vertical must be bool'
73 | # Process df
74 | df = data.loc[[name]]
75 | df.index.name = None
76 | df.columns.name = None
77 | df = df.melt(var_name='source', value_name='score')
78 | df['abs_score'] = df['score'].abs()
79 | df = df.sort_values('abs_score', ascending=False)
80 | df = df.head(top).sort_values('score', ascending=False)
81 | if not vertical:
82 | x, y = 'score', 'source'
83 | else:
84 | x, y = 'source', 'score'
85 | # Instance
86 | bp = Plotter(**kwargs)
87 | # Plot
88 | sns.barplot(data=df, x=x, y=y, ax=bp.ax)
89 | if not vertical:
90 | sizes = np.array([bar.get_width() for bar in bp.ax.containers[0]])
91 | bp.ax.set_xlabel('Score')
92 | bp.ax.set_ylabel('')
93 | else:
94 | sizes = np.array([bar.get_height() for bar in bp.ax.containers[0]])
95 | bp.ax.tick_params(axis='x', rotation=90)
96 | bp.ax.set_ylabel('Score')
97 | bp.ax.set_xlabel('')
98 | bp.ax.invert_xaxis()
99 | # Compute color limits
100 | vmin, vcenter, vmax = _set_limits(vmin, vcenter, vmax, df['score'])
101 | # Rescale cmap
102 | divnorm = matplotlib.colors.TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
103 | cmap_f = plt.get_cmap(cmap)
104 | div_colors = cmap_f(divnorm(sizes))
105 | for bar, color in zip(bp.ax.containers[0], div_colors):
106 | bar.set_facecolor(color)
107 | # Add legend
108 | sm = plt.cm.ScalarMappable(cmap=cmap, norm=divnorm)
109 | sm.set_array([])
110 | bp.fig.colorbar(sm, ax=bp.ax, shrink=0.5)
111 | return bp._return()
112 |
--------------------------------------------------------------------------------
/src/decoupler/pl/_dotplot.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | from matplotlib.figure import Figure
5 | from matplotlib.colors import TwoSlopeNorm
6 |
7 | from decoupler._docs import docs
8 | from decoupler._Plotter import Plotter
9 |
10 |
11 | @docs.dedent
12 | def dotplot(
13 | df: pd.DataFrame,
14 | x: str,
15 | y: str,
16 | c: str,
17 | s: str,
18 | top: int | float = 10,
19 | scale: int | float = 0.15,
20 | cmap: str = 'RdBu_r',
21 | vcenter: int | float | None = None,
22 | **kwargs
23 | ) -> None | Figure:
24 | """
25 | Plot results of enrichment analysis as dots.
26 |
27 | Parameters
28 | ----------
29 | df
30 | DataFrame containing enrichment results.
31 | x
32 | Name of the column containing values to place on the x-axis.
33 | y
34 | Name of the column containing values to place on the y-axis.
35 | c
36 | Name of the column containing values to use for coloring.
37 | s
38 | Name of the column containing values to use for setting the size of the dots.
39 | %(top)s
40 | scale
41 | Scale of the dots.
42 | %(cmap)s
43 | %(vcenter)s
44 | %(plot)s
45 | """
46 | # Validate
47 | assert isinstance(df, pd.DataFrame), 'df must be a pd.DataFrame'
48 | assert isinstance(x, str) and x in df.columns, 'x must be str and in df.columns'
49 | assert isinstance(y, str) and y in df.columns, 'y must be str and in df.columns'
50 | assert isinstance(c, str) and c in df.columns, 'c must be str and in df.columns'
51 | assert isinstance(s, str) and s in df.columns, 's must be str and in df.columns'
52 | assert isinstance(top, (int, float)) and top > 0, 'top must be numerical and > 0'
53 | assert isinstance(scale, (int, float)), 'scale must be numerical'
54 | assert isinstance(vcenter, (int, float)) or vcenter is None, 'vcenter must be numeric or None'
55 | # Filter by top
56 | df = df.copy()
57 | df['abs_x_col'] = df[x].abs()
58 | df = df.sort_values('abs_x_col', ascending=False).head(top)
59 | # Extract from df
60 | x_vals = df[x].values
61 | y_vals = df[y].values
62 | c_vals = df[c].values
63 | s_vals = df[s].values
64 | # Sort by x
65 | idxs = np.argsort(x_vals)
66 | x_vals = x_vals[idxs]
67 | y_vals = y_vals[idxs]
68 | c_vals = c_vals[idxs]
69 | s_vals = s_vals[idxs]
70 | # Instance
71 | bp = Plotter(**kwargs)
72 | # Plot
73 | ns = (s_vals * scale * plt.rcParams["lines.markersize"]) ** 2
74 | bp.ax.grid(axis='x')
75 | if vcenter:
76 | norm = TwoSlopeNorm(vmin=None, vcenter=vcenter, vmax=None)
77 | else:
78 | norm = None
79 | scatter = bp.ax.scatter(
80 | x=x_vals,
81 | y=y_vals,
82 | c=c_vals,
83 | s=ns,
84 | cmap=cmap,
85 | norm=norm,
86 | )
87 | bp.ax.set_axisbelow(True)
88 | bp.ax.set_xlabel(x)
89 | # Add legend
90 | handles, labels = scatter.legend_elements(
91 | prop="sizes",
92 | num=3,
93 | fmt="{x:.2f}",
94 | func=lambda s: np.sqrt(s) / plt.rcParams["lines.markersize"] / scale
95 | )
96 | bp.ax.legend(
97 | handles,
98 | labels,
99 | title=s,
100 | frameon=False,
101 | loc='lower left',
102 | bbox_to_anchor=(1.05, 0.5),
103 | alignment='left',
104 | labelspacing=1.
105 | )
106 | # Add colorbar
107 | clb = bp.fig.colorbar(
108 | scatter,
109 | ax=bp.ax,
110 | shrink=0.25,
111 | aspect=5,
112 | orientation='vertical',
113 | anchor=(0., 0.),
114 | )
115 | clb.ax.set_title(c, loc="left",)
116 | bp.ax.margins(x=0.25, y=0.1)
117 | return bp._return()
118 |
--------------------------------------------------------------------------------
/src/decoupler/pl/_filter_by_expr.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from matplotlib.figure import Figure
3 | import seaborn as sns
4 | from anndata import AnnData
5 |
6 | from decoupler._docs import docs
7 | from decoupler._Plotter import Plotter
8 | from decoupler.pp.data import extract
9 | from decoupler.pp.anndata import _min_sample_size, _ssize_tcount
10 |
11 |
12 | @docs.dedent
13 | def filter_by_expr(
14 | adata: AnnData,
15 | group: str | None = None,
16 | lib_size: float | None = None,
17 | min_count: int = 10,
18 | min_total_count: int = 15,
19 | large_n: int = 10,
20 | min_prop: float = 0.7,
21 | cmap: str = 'viridis',
22 | **kwargs,
23 | ) -> None | Figure:
24 | """
25 | Plot to help determining the thresholds of the ``decoupler.pp.filter_by_expr`` function.
26 |
27 | Parameters
28 | ----------
29 | %(adata)s
30 | %(cmap)s
31 | %(group)s
32 | %(lib_size)s
33 | %(min_count)s
34 | %(min_total_count)s
35 | %(large_n)s
36 | %(min_prop_expr)s
37 | %(plot)s
38 | """
39 | assert isinstance(adata, AnnData), 'adata must be AnnData'
40 | # Extract inputs
41 | X, _, _ = extract(adata, empty=False)
42 | obs = adata.obs
43 | # Minimum sample size cutoff
44 | min_sample_size = _min_sample_size(
45 | obs=obs,
46 | group=group,
47 | large_n=large_n,
48 | min_prop=min_prop,
49 | )
50 | # Compute sample size and total count
51 | sample_size, total_count = _ssize_tcount(
52 | X=X,
53 | lib_size=lib_size,
54 | min_count=min_count,
55 | )
56 | # Total counts
57 | total_count[total_count < 1.] = np.nan # Handle 0s
58 | # Instance
59 | bp = Plotter(**kwargs)
60 | # Plot
61 | sns.histplot(
62 | x=np.log10(total_count),
63 | y=sample_size,
64 | cmap=cmap,
65 | cbar=True,
66 | cbar_kws=dict(shrink=.75, label='Number of genes'),
67 | discrete=(False, True),
68 | ax=bp.ax,
69 | )
70 | bp.ax.axhline(y=min_sample_size - 0.5, c='gray', ls='--')
71 | bp.ax.axvline(x=np.log10(min_total_count), c='gray', ls='--')
72 | bp.ax.set_xlabel(r'$\log_{10}$ total sum of counts')
73 | bp.ax.set_ylabel('Number of samples')
74 | return bp._return()
75 |
--------------------------------------------------------------------------------
/src/decoupler/pl/_filter_by_prop.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from anndata import AnnData
4 | from matplotlib.figure import Figure
5 |
6 | from decoupler._docs import docs
7 | from decoupler._Plotter import Plotter
8 |
9 |
10 | @docs.dedent
11 | def filter_by_prop(
12 | adata: AnnData,
13 | min_prop: float = 0.1,
14 | min_smpls: int = 2,
15 | log: bool = True,
16 | color = 'gray',
17 | **kwargs
18 | ) -> None | Figure:
19 | """
20 | Plot to help determining the thresholds of the ``decoupler.pp.filter_by_prop`` function.
21 |
22 | Parameters
23 | ----------
24 | %(adata)s
25 | %(min_prop_prop)s
26 | %(min_smpls)s
27 | log
28 | Whether to log-scale the y axis.
29 | color
30 | Color to use in ``matplotlib.pyplot.hist``.
31 | %(plot)s
32 | """
33 | assert isinstance(adata, AnnData), 'adata must be AnnData'
34 | assert 'psbulk_props' in adata.layers.keys(), \
35 | 'psbulk_props must be in adata.layers, use this function afer running decoupler.pp.pseudobulk'
36 | props = adata.layers['psbulk_props']
37 | if isinstance(props, pd.DataFrame):
38 | props = props.values
39 | nsmpls = np.sum(props >= min_prop, axis=0)
40 | # Instance
41 | bp = Plotter(**kwargs)
42 | # Plot
43 | _ = bp.ax.hist(
44 | nsmpls,
45 | bins=range(min(nsmpls), max(nsmpls) + 2),
46 | log=log,
47 | color=color,
48 | align='left',
49 | rwidth=0.95,
50 | )
51 | bp.ax.axvline(x=min_smpls - 0.5, c='black', ls='--')
52 | bp.ax.set_xlabel('Samples (≥ min_prop)')
53 | bp.ax.set_ylabel('Number of genes')
54 | return bp._return()
55 |
--------------------------------------------------------------------------------
/src/decoupler/pl/_filter_samples.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | from matplotlib.figure import Figure
5 | import seaborn as sns
6 | from anndata import AnnData
7 |
8 | from decoupler._docs import docs
9 | from decoupler._Plotter import Plotter
10 |
11 |
12 | @docs.dedent
13 | def filter_samples(
14 | adata: AnnData,
15 | groupby: str | list,
16 | log: bool = True,
17 | min_cells: int | float = 10,
18 | min_counts: int | float = 1000,
19 | **kwargs
20 | ) -> None | Figure:
21 | """
22 | Plot to assess the quality of the obtained pseudobulk samples from ``decoupler.pp.pseudobulk``.
23 |
24 | Parameters
25 | ----------
26 | %(adata)s
27 | groupby
28 | Name or nomes of the ``adata.obs`` column/s to group by.
29 | log
30 | If set, log10 transform the ``psbulk_n_cells`` and ``psbulk_counts`` columns during visualization.
31 | %(min_cells)s
32 | %(min_counts)s
33 | %(plot)s
34 | """
35 | # Validate
36 | assert isinstance(adata, AnnData), 'adata must be AnnData'
37 | assert isinstance(adata.obs, pd.DataFrame) and adata.obs is not None, \
38 | f'adata.obs must be a pd.DataFrame not {type(adata.obs)}'
39 | assert all(col in adata.obs.columns for col in ['psbulk_cells', 'psbulk_counts']), \
40 | 'psbulk_* columns not present in adata.obs, this function should be used after running decoupler.pp.pseudobulk'
41 | assert isinstance(groupby, (str, list)), 'groupby must be str or list'
42 | if isinstance(groupby, str):
43 | groupby = [groupby]
44 | assert all(col in adata.obs for col in groupby), 'columns in groupby must be in adata.obs'
45 | # Extract obs
46 | df = adata.obs.copy()
47 | # Transform to log10
48 | label_x, label_y = 'cells', 'counts'
49 | if log:
50 | df['psbulk_cells'] = np.log10(df['psbulk_cells'] + 1)
51 | df['psbulk_counts'] = np.log10(df['psbulk_counts'] + 1)
52 | label_x, label_y = r'$\log_{10}$ ' + label_x, r'$\log_{10}$ ' + label_y
53 | min_cells, min_counts = np.log10(min_cells), np.log10(min_counts)
54 | # Plot
55 | if len(groupby) > 1:
56 | # Instance
57 | assert kwargs.get('ax') is None, 'when groupby is list, ax must be None'
58 | kwargs['ax'] = None
59 | bp = Plotter(**kwargs)
60 | bp.fig.delaxes(bp.ax)
61 | plt.close(bp.fig)
62 | bp.fig, axes = plt.subplots(len(groupby), 1, figsize=bp.figsize, dpi=bp.dpi, tight_layout=True)
63 | axes = axes.ravel()
64 | for ax, grp in zip(axes, groupby):
65 | ax.grid(zorder=0)
66 | ax.set_axisbelow(True)
67 | sns.scatterplot(x='psbulk_cells', y='psbulk_counts', hue=grp, ax=ax, data=df, zorder=1)
68 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=grp)
69 | ax.set_xlabel(label_x)
70 | ax.set_ylabel(label_y)
71 | ax.axvline(x=min_cells, linestyle='--', color="black")
72 | ax.axhline(y=min_counts, linestyle='--', color="black")
73 | else:
74 | # Instance
75 | groupby = groupby[0]
76 | bp = Plotter(**kwargs)
77 | bp.ax.grid(zorder=0)
78 | bp.ax.set_axisbelow(True)
79 | sns.scatterplot(x='psbulk_cells', y='psbulk_counts', hue=groupby, ax=bp.ax, data=df, zorder=1)
80 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=groupby)
81 | bp.ax.set_xlabel(label_x)
82 | bp.ax.set_ylabel(label_y)
83 | bp.ax.axvline(x=min_cells, linestyle='--', color="black")
84 | bp.ax.axhline(y=min_counts, linestyle='--', color="black")
85 | return bp._return()
86 |
--------------------------------------------------------------------------------
/src/decoupler/pl/_obsbar.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.figure import Figure
3 | import seaborn as sns
4 | from anndata import AnnData
5 |
6 | from decoupler._docs import docs
7 | from decoupler._Plotter import Plotter
8 |
9 |
10 | @docs.dedent
11 | def obsbar(
12 | adata: AnnData,
13 | y: str,
14 | hue: str | None = None,
15 | kw_barplot: dict = dict(),
16 | **kwargs
17 | ) -> None | Figure:
18 | """
19 | Plot ``adata.obs`` metadata as a grouped barplot.
20 |
21 | Parameters
22 | ----------
23 | %(adata)s
24 | y
25 | Column name in ``adata.obs`` to plot in y axis.
26 | hue
27 | Column name in ``adata.obs`` to color bars.
28 | kw_barplot
29 | Keyword arguments passed to ``seaborn.barplot``.
30 | %(plot)s
31 | """
32 | # Validate
33 | assert isinstance(adata, AnnData), 'adata must be an AnnData instance'
34 | assert isinstance(y, str), 'y must be str'
35 | assert isinstance(hue, str) or hue is None, 'hue must be str or None'
36 | cols = {y, hue}
37 | if hue is None:
38 | cols.remove(None)
39 | assert cols.issubset(adata.obs.columns), \
40 | f'y={y} and hue={hue} must be in adata.obs.columns={adata.obs.columns}'
41 | cols = list(cols)
42 | # Process
43 | data = (
44 | adata.obs
45 | .groupby(cols, observed=True, as_index=False)
46 | .size()
47 | )
48 | # Instance
49 | bp = Plotter(**kwargs)
50 | # Plot
51 | sns.barplot(
52 | data=data,
53 | y=y,
54 | x='size',
55 | hue=hue,
56 | ax=bp.ax,
57 | **kw_barplot
58 | )
59 | if hue is not None and y != hue:
60 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
61 | return bp._return()
62 |
--------------------------------------------------------------------------------
/src/decoupler/pl/_order.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from matplotlib.colors import to_rgb
4 | from matplotlib.figure import Figure
5 | import seaborn as sns
6 |
7 | from decoupler._docs import docs
8 | from decoupler._Plotter import Plotter
9 |
10 |
11 | @docs.dedent
12 | def order(
13 | df: pd.DataFrame,
14 | mode: str = 'line',
15 | kw_order = dict(),
16 | **kwargs
17 | ) -> None | Figure:
18 | """
19 | Plot features along a continuous, ordered process such as pseudotime.
20 |
21 | Parameters
22 | ----------
23 | df
24 | Results of ``decoupler.pp.bin_order``.
25 | mode
26 | The type of plot to use, either "line" or "mat".
27 | kw_order
28 | Other keyword arguments are passed down to ``seaborn.lineplot`` or ``matplotlib.pyplot.imshow``,
29 | depending on ``mode`` used.
30 | %(plot)s
31 | """
32 | # Validate
33 | assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame'
34 | assert isinstance(mode, str) and mode in ['line', 'mat'], \
35 | 'mode must be str and either "line" or "mat"'
36 | assert isinstance(kw_order, dict), \
37 | 'kw_order must be dict'
38 | # Process
39 | ymax = df['value'].max()
40 | xmin, xmax = df['order'].min(), df['order'].max()
41 | n_names = df['name'].unique().size
42 | # Add cbar if added
43 | has_cbar = False
44 | if np.isin(['label', 'color'], df.columns).all():
45 | colors = df[df['name'] == df.loc[0, 'name']]['color']
46 | colors = [[to_rgb(c) for c in colors]]
47 | has_cbar = True
48 | # Instance
49 | bp = Plotter(**kwargs)
50 | # Plot
51 | if mode == 'line':
52 | if has_cbar:
53 | bp.ax.imshow(
54 | colors,
55 | aspect='auto',
56 | extent=[xmin, xmax, 1.05 * ymax, 1.2 * ymax],
57 | transform=bp.ax.transData,
58 | zorder=2
59 | )
60 | bp.ax.axhline(y=1.05 * ymax, c='black', lw=1)
61 | kw_order = kw_order.copy()
62 | kw_order.setdefault('palette', 'tab20')
63 | sns.lineplot(
64 | data=df,
65 | x='order',
66 | y='value',
67 | hue='name',
68 | ax=bp.ax,
69 | **kw_order
70 | )
71 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False)
72 | elif mode == 'mat':
73 | mat = (
74 | df
75 | .groupby(['name', 'order'], as_index=False)['value'].mean()
76 | .pivot(index='name', columns='order', values='value')
77 | )
78 | img = bp.ax.imshow(mat, extent=[xmin, xmax, 0, n_names], aspect='auto', **kw_order)
79 | if has_cbar:
80 | bp.ax.imshow(colors, aspect='auto', extent=[xmin, xmax, n_names, 1.1 * n_names], zorder=2)
81 | bp.ax.axhline(y=n_names, c='black', lw=1)
82 | bp.ax.set_ylim(0, 1.1 * n_names)
83 | bp.fig.colorbar(img, ax=bp.ax, shrink=0.5, label='Mean value', location='top')
84 | bp.ax.set_yticks(np.arange(n_names) + 0.5)
85 | bp.ax.set_yticklabels(np.flip(mat.index))
86 | bp.ax.grid(axis='y', visible=False)
87 | bp.ax.set_xlabel('order')
88 | bp.ax.set_xlim(xmin, xmax)
89 | return bp._return()
90 |
--------------------------------------------------------------------------------
/src/decoupler/pl/_source_targets.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from matplotlib.figure import Figure
4 | import adjustText as at
5 |
6 | from decoupler._docs import docs
7 | from decoupler._Plotter import Plotter
8 | from decoupler.pp.net import _validate_net
9 |
10 |
11 | @docs.dedent
12 | def source_targets(
13 | data: pd.DataFrame,
14 | net: pd.DataFrame,
15 | x: str,
16 | y: str,
17 | name: str,
18 | top: int = 5,
19 | thr_x: float = 0.,
20 | thr_y: float = 0.,
21 | max_x: float | None = None,
22 | max_y: float | None = None,
23 | color_pos: str = '#D62728',
24 | color_neg: str = '#1F77B4',
25 | **kwargs,
26 | ) -> None | Figure:
27 | """
28 | Plots target features of a given source as a scatter plot.
29 |
30 | Parameters
31 | ----------
32 | %(data_plot)s
33 | %(net)s
34 | x
35 | Name of the column containing values to place on the x-axis.
36 | y
37 | Name of the column containing values to place on the y-axis.
38 | name
39 | Name of the source to plot.
40 | top
41 | Number of top features based on the product of x and y to label.
42 | thr_x
43 | Value were to place a baseline for the x-axis.
44 | thr_y
45 | Value were to place a baseline for the y-axis.
46 | max_x
47 | Maximum value to plot on x-axis.
48 | max_y
49 | Maximum value to plot on y-axis.
50 | color_pos
51 | Color to plot positively associated features.
52 | color_neg
53 | Color to plot negatively associated features.
54 | %(plot)s
55 | """
56 | # Validate inputs
57 | m = f'data must be a pd.DataFrame containing the columns {x} and {y}'
58 | assert isinstance(data, pd.DataFrame), m
59 | assert {x, y}.issubset(data.columns.union(net.columns)), m
60 | assert not pd.api.types.is_numeric_dtype(data.index), 'data index must be features in net'
61 | assert isinstance(net, pd.DataFrame), \
62 | f'net must be a pd.DataFrame containing the columns {x} and {y}'
63 | assert isinstance(name, str), 'name must be a str'
64 | assert isinstance(top, int) and top > 0, 'top must be int and > 0'
65 | assert isinstance(thr_x, (int, float)), 'thr_x must be numeric'
66 | assert isinstance(thr_y, (int, float)), 'thr_y must be numeric'
67 | if max_x is None:
68 | max_x = np.inf
69 | if max_y is None:
70 | max_y = np.inf
71 | assert isinstance(max_x, (int, float)) and max_x > 0, \
72 | 'max_x must be None, or numeric and > 0'
73 | assert isinstance(max_y, (int, float)) and max_y > 0, \
74 | 'max_y must be None, or numeric and > 0'
75 | assert isinstance(color_pos, str), 'color_pos must be str'
76 | assert isinstance(color_neg, str), 'color_neg must be str'
77 | # Instance
78 | bp = Plotter(**kwargs)
79 | # Extract df
80 | df = data.copy().reset_index(names='target')
81 | # Filter by net shared targets
82 | vnet = _validate_net(net)
83 | snet = vnet[vnet['source'] == name]
84 | assert snet.shape[0] > 0, f'name={name} must be in net["source"]'
85 | df = pd.merge(df, snet, on=['target'], how='inner').set_index('target')
86 | # Filter by limits
87 | msk_x = np.abs(df[x]) < np.abs(max_x)
88 | msk_y = np.abs(df[y]) < np.abs(max_y)
89 | df = df.loc[msk_x & msk_y]
90 | # Define +/- color
91 | pos = ((df[x] >= 0) & (df[y] >= 0)) | ((df[x] < 0) & (df[y] < 0))
92 | df['color'] = color_neg
93 | df.loc[pos, 'color'] = color_pos
94 | # Plot
95 | df.plot.scatter(x=x, y=y, c='color', ax=bp.ax)
96 | # Draw thr lines
97 | bp.ax.axvline(x=thr_x, linestyle='--', color="black")
98 | bp.ax.axhline(y=thr_y, linestyle='--', color="black")
99 | # Add labels
100 | bp.ax.set_title(name)
101 | bp.ax.set_xlabel(x)
102 | bp.ax.set_ylabel(y)
103 | # Show top features
104 | df['order'] = df[x].abs() * df[y].abs()
105 | signs = df.sort_values('order', ascending=False)
106 | signs = signs.iloc[:top]
107 | texts = []
108 | for x, y, s in zip(signs[x], signs[y], signs.index):
109 | texts.append(bp.ax.text(x, y, s))
110 | if len(texts) > 0:
111 | at.adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'), ax=bp.ax)
112 | return bp._return()
113 |
--------------------------------------------------------------------------------
/src/decoupler/pp/__init__.py:
--------------------------------------------------------------------------------
1 | from .net import read_gmt, prune, adjmat, idxmat, shuffle_net, net_corr
2 | from .data import extract
3 | from .anndata import get_obsm, swap_layer, pseudobulk, filter_samples, \
4 | filter_by_expr, filter_by_prop, knn, bin_order
5 |
--------------------------------------------------------------------------------
/src/decoupler/tl/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.tl._rankby_group import rankby_group
2 | from decoupler.tl._rankby_obsm import rankby_obsm
3 | from decoupler.tl._rankby_order import rankby_order
4 |
--------------------------------------------------------------------------------
/src/decoupler/tl/_rankby_obsm.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import pandas as pd
4 | import scipy.stats as sts
5 | from anndata import AnnData
6 |
7 | from decoupler._docs import docs
8 |
9 |
10 | def _input_rank_obsm(
11 | adata: AnnData,
12 | key: str,
13 | ) -> Tuple[pd.DataFrame, list, list]:
14 | # Validate
15 | assert isinstance(adata, AnnData), 'adata must be anndata.AnnData'
16 | assert key in adata.obsm, f'key={key} must be in adata.obsm'
17 | # Process
18 | name_col = (
19 | key
20 | .replace('X_', '')
21 | .replace('pca', 'PC')
22 | .replace('mofa', 'Factor')
23 | .replace('umap', 'UMAP')
24 | )
25 | df = adata.obsm[key]
26 | if isinstance(df, pd.DataFrame):
27 | y_vars = df.std(ddof=1, axis=0).sort_values(ascending=False).index
28 | df = df.loc[:, y_vars].values
29 | else:
30 | ncol = df.shape[1]
31 | digits = len(str(ncol))
32 | y_vars = [f"{name_col}{str(i).zfill(digits)}" for i in range(1, ncol + 1)]
33 | df = pd.DataFrame(
34 | data=df,
35 | index=adata.obs_names,
36 | columns=y_vars
37 | )
38 | x_vars = adata.obs.columns
39 | # Merge
40 | df = pd.merge(df, adata.obs, left_index=True, right_index=True)
41 | return df, x_vars, y_vars
42 |
43 |
44 | @docs.dedent
45 | def rankby_obsm(
46 | adata: AnnData,
47 | key: str,
48 | uns_key: str | None = 'rank_obsm',
49 | ) -> None | pd.DataFrame:
50 | """
51 | Ranks features in ``adata.obsm`` by the significance of their association with metadata in ``adata.obs``.
52 |
53 | For categorical variables it uses ANOVA, for continous Spearman's correlation.
54 |
55 | The obtained p-values are corrected by Benjamini-Hochberg.
56 |
57 | Parameters
58 | ----------
59 | %(adata)s
60 | %(key)s
61 | uns_key
62 | ``adata.uns`` key to store the results.
63 |
64 | Returns
65 | -------
66 | If ``uns_key=False``, a pandas.DataFrame with the resulting statistics.
67 | """
68 | assert isinstance(uns_key, str) or uns_key is None, \
69 | 'uns_key must be str or None'
70 | # Extract
71 | df, x_vars, y_vars = _input_rank_obsm(adata=adata, key=key)
72 | # Test
73 | res = []
74 | for x_var in x_vars:
75 | for y_var in y_vars:
76 | if pd.api.types.is_numeric_dtype(df[x_var]):
77 | # Correlation
78 | x = df[x_var].values.ravel()
79 | y = df[y_var].values.ravel()
80 | stat, pval = sts.spearmanr(x, y)
81 | else:
82 | # ANOVA
83 | x = [group[y_var].dropna().values for _, group in df.groupby(x_var, observed=True)]
84 | # At least n=2 per group else skip
85 | if all(len(g) >= 2 for g in x):
86 | stat, pval = sts.f_oneway(*x)
87 | else:
88 | stat, pval = None, 1.
89 | row = [y_var, x_var, stat, pval]
90 | res.append(row)
91 | res = pd.DataFrame(res, columns=['obsm', 'obs', 'stat', 'pval'])
92 | res['padj'] = sts.false_discovery_control(res['pval'])
93 | # Rank
94 | res = res.sort_values('padj').reset_index(drop=True)
95 | # Add obsm key
96 | res.key = key
97 | # Save or return
98 | if uns_key:
99 | adata.uns[uns_key] = res
100 | else:
101 | return res
102 |
--------------------------------------------------------------------------------
/src/decoupler/tl/_rankby_order.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import pandas as pd
4 | import numpy as np
5 | from tqdm.auto import tqdm
6 | import scipy.stats as sts
7 | import scipy.sparse as sps
8 | from anndata import AnnData
9 |
10 | from decoupler._odeps import dcor, _check_import
11 | from decoupler._docs import docs
12 | from decoupler.pp.data import extract
13 |
14 |
15 |
16 | @docs.dedent
17 | def rankby_order(
18 | adata: AnnData,
19 | order: str,
20 | stat: str = 'dcor',
21 | verbose: bool = False,
22 | **kwargs
23 | ) -> pd.DataFrame:
24 | """
25 | Rank features along a continuous, ordered process such as pseudotime.
26 |
27 | Parameters
28 | ----------
29 | %(adata)s
30 | %(order)s
31 | stat
32 | Which statistic to compute.
33 | Must be one of these:
34 |
35 | - ``dcor`` (distance correlation from ``dcor.independence.distance_correlation_t_test``)
36 | - ``pearsonr`` (Pearson's R from ``scipy.stats.pearsonr``)
37 | - ``spearmanr`` (Spearman's R from ``scipy.stats.spearmanr``)
38 | - ``kendalltau`` (Kendall's Tau from ``scipy.stats.kendalltau``)
39 |
40 | %(verbose)s
41 | kwargs
42 | Key arguments passed to the selected ``stat`` function.
43 |
44 | Returns
45 | -------
46 | DataFrame with features associated with the ordering variable.
47 | """
48 | # Validate
49 | assert isinstance(adata, AnnData), 'adata must be anndata.AnnData'
50 | assert isinstance(order, str) and order in adata.obs.columns, 'order must be str and in adata.obs.columns'
51 | stats = {'dcor', 'pearsonr', 'spearmanr', 'kendalltau'}
52 | assert (isinstance(stat, str) and stat in stats) or callable(stat), \
53 | f'stat must be str and one of these {stats}, or a function that returns statistic and pvalue'
54 | # Get vars and ordinal variable
55 | X = adata.X
56 | if sps.issparse(X):
57 | X = X.toarray()
58 | X = X.astype(float)
59 | y = adata.obs[order].values.astype(float)
60 | # Init
61 | df = pd.DataFrame()
62 | df['name'] = adata.var_names
63 | # Fit
64 | if stat == 'dcor':
65 | _check_import(dcor)
66 | f = dcor.independence.distance_correlation_t_test
67 | elif stat == 'pearsonr':
68 | f = sts.pearsonr
69 | elif stat == 'spearmanr':
70 | f = sts.spearmanr
71 | elif stat == 'kendalltau':
72 | f = sts.kendalltau
73 | else:
74 | f = stat
75 | ss = []
76 | ps = []
77 | for i in tqdm(range(X.shape[1]), disable=not verbose):
78 | x = X[:, i]
79 | if not np.all(x == x[0]):
80 | res = f(x, y)
81 | s = res.statistic
82 | p = res.pvalue
83 | else:
84 | s = 0
85 | p = 1
86 | ss.append(s)
87 | ps.append(p)
88 | df['stat'] = ss
89 | df['pval'] = ps
90 | df['padj'] = sts.false_discovery_control(df['pval'])
91 | df['abs_stat'] = df['stat'].abs()
92 | df = df.sort_values(['padj', 'pval', 'abs_stat'], ascending=[True, True, False]).reset_index(drop=True)
93 | df = df.drop(columns='abs_stat')
94 | return df
95 |
--------------------------------------------------------------------------------
/tests/bm/test_benchmark.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import scipy.sparse as sps
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | @pytest.mark.parametrize(
9 | 'metrics,groupby,runby,sfilt,thr,emin,mnet',
10 | [
11 | ['auc', None, 'expr', False, 0.05, 5, False],
12 | ['auc', None, 'expr', True, 0.05, 5, False],
13 | [['auc'], None, 'expr', False, 0.05, 5, False],
14 | [['auc', 'fscore'], 'group', 'expr', False, 0.05, 5, False],
15 | [['auc', 'fscore', 'qrank'], None, 'source', False, 0.05, 2, False],
16 | [['auc', 'fscore', 'qrank'], 'group', 'source', False, 0.05, 1, False],
17 | [['auc', 'fscore', 'qrank'], 'bm_group', 'expr', True, 0.05, 5, False],
18 | [['auc', 'fscore', 'qrank'], 'source', 'expr', True, 0.05, 5, False],
19 | ]
20 | )
21 | def test_benchmark(
22 | bdata,
23 | net,
24 | metrics,
25 | groupby,
26 | runby,
27 | sfilt,
28 | thr,
29 | emin,
30 | mnet,
31 | rng,
32 | ):
33 | dc.mt.ulm(data=bdata, net=net, tmin=0)
34 | if mnet:
35 | net = {'w_net': net, 'unw_net': net.drop(columns=['weight'])}
36 | bdata = bdata.copy()
37 | bdata.obs['source'] = rng.choice(['x', 'y', 'z'], size=bdata.n_obs, replace=True)
38 | bdata.X = sps.csr_matrix(bdata.X)
39 | df = dc.bm.benchmark(
40 | adata=bdata,
41 | net=net,
42 | metrics=metrics,
43 | groupby=groupby,
44 | runby=runby,
45 | sfilt=sfilt,
46 | thr=thr,
47 | emin=emin,
48 | kws_decouple={
49 | 'cons': True,
50 | 'tmin': 3,
51 | 'methods': ['ulm', 'zscore', 'aucell']
52 | },
53 | verbose=True
54 | )
55 | assert isinstance(df, pd.DataFrame)
56 | cols = {'method', 'metric', 'score'}
57 | assert cols.issubset(df.columns)
58 | hdf = dc.bm.metric.hmean(df, metrics=metrics)
59 | assert isinstance(hdf, pd.DataFrame)
60 |
--------------------------------------------------------------------------------
/tests/bm/test_pl.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib
3 | matplotlib.use("Agg")
4 | import matplotlib.pyplot as plt
5 | from matplotlib.figure import Figure
6 | import pytest
7 |
8 | import decoupler as dc
9 |
10 | @pytest.fixture
11 | def df():
12 | df = pd.DataFrame(
13 | data=[
14 | ['aucell', 'auroc', 0.45],
15 | ['aucell', 'auprc', 0.55],
16 | ['ulm', 'auroc', 0.9],
17 | ['ulm', 'auprc', 0.8],
18 | ['aucell', 'recall', 0.45],
19 | ['aucell', 'precision', 0.55],
20 | ['ulm', 'recall', 0.9],
21 | ['ulm', 'precision', 0.8],
22 | ['aucell', '1-qrank', 0.45],
23 | ['aucell', '-log10(pval)', 0.9],
24 | ['ulm', '1-qrank', 0.9],
25 | ['ulm', '-log10(pval)', 5.6],
26 | ],
27 | columns = ['method', 'metric', 'score']
28 | )
29 | return df
30 |
31 |
32 | @pytest.fixture
33 | def hdf(
34 | df,
35 | ):
36 | hdf = dc.bm.metric.hmean(df)
37 | return hdf
38 |
39 |
40 | def test_auc(
41 | df,
42 | ):
43 | fig = dc.bm.pl.auc(df=df, hue=None, return_fig=True)
44 | assert isinstance(fig, Figure)
45 | plt.close(fig)
46 | fig = dc.bm.pl.auc(df=df, hue='method', return_fig=True)
47 | assert isinstance(fig, Figure)
48 | plt.close(fig)
49 |
50 |
51 | def test_fscore(
52 | df,
53 | ):
54 | fig = dc.bm.pl.fscore(df=df, hue=None, return_fig=True)
55 | assert isinstance(fig, Figure)
56 | plt.close(fig)
57 | fig = dc.bm.pl.fscore(df=df, hue='method', return_fig=True)
58 | assert isinstance(fig, Figure)
59 | plt.close(fig)
60 |
61 |
62 | def test_qrank(
63 | df,
64 | ):
65 | fig = dc.bm.pl.qrank(df=df, hue=None, return_fig=True)
66 | assert isinstance(fig, Figure)
67 | plt.close(fig)
68 | fig = dc.bm.pl.qrank(df=df, hue='method', return_fig=True)
69 | assert isinstance(fig, Figure)
70 | plt.close(fig)
71 |
72 |
73 | def test_bar(
74 | hdf,
75 | ):
76 | fig = dc.bm.pl.bar(df=hdf, x='H(auroc, auprc)', y='method', hue=None, return_fig=True)
77 | assert isinstance(fig, Figure)
78 | plt.close(fig)
79 | fig = dc.bm.pl.bar(df=hdf, x='H(auroc, auprc)', y='method', hue='method', return_fig=True)
80 | assert isinstance(fig, Figure)
81 | plt.close(fig)
82 |
83 |
84 | def test_summary(
85 | hdf,
86 | ):
87 | fig = dc.bm.pl.summary(df=hdf, y='method', figsize=(6, 3), return_fig=True)
88 | assert isinstance(fig, Figure)
89 | plt.close(fig)
90 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import pytest
4 | import scanpy as sc
5 |
6 | import decoupler as dc
7 |
8 |
9 | @pytest.fixture
10 | def rng():
11 | rng = np.random.default_rng(seed=42)
12 | return rng
13 |
14 |
15 | @pytest.fixture
16 | def adata():
17 | adata, _ = dc.ds.toy(nobs=40, nvar=20, bval=2, seed=42, verbose=False)
18 | adata.layers['counts'] = adata.X.round()
19 | return adata
20 |
21 |
22 | @pytest.fixture
23 | def tdata():
24 | tdata, _ = dc.ds.toy(nobs=40, nvar=20, bval=2, seed=42, verbose=False, pstime=True)
25 | return tdata
26 |
27 |
28 | @pytest.fixture
29 | def tdata_obsm(
30 | tdata,
31 | net,
32 | rng,
33 | ):
34 | sc.tl.pca(tdata)
35 | tdata.obsm['X_umap'] = tdata.obsm['X_pca'][:, :2] + rng.random(tdata.obsm['X_pca'][:, :2].shape)
36 | dc.mt.ulm(data=tdata, net=net, tmin=0)
37 | return tdata
38 |
39 |
40 | @pytest.fixture
41 | def pdata(
42 | adata,
43 | rng,
44 | ):
45 | adata.X = adata.X.round() * (rng.random(adata.shape) > 0.75)
46 | return dc.pp.pseudobulk(adata=adata, sample_col='sample', groups_col='group')
47 |
48 |
49 | @pytest.fixture
50 | def bdata():
51 | adata, _ = dc.ds.toy_bench(nobs=100, nvar=20, bval=2, seed=42, verbose=False)
52 | adata.obs['bm_group'] = adata.obs.apply(lambda x: [x['sample'], x['group']], axis=1)
53 | return adata
54 |
55 |
56 | @pytest.fixture
57 | def deg():
58 | deg = pd.DataFrame(
59 | data = [
60 | [1, 0.5],
61 | [-2, 0.25],
62 | [3, 0.125],
63 | [-4, 0.05],
64 | [5, 0.025],
65 | ],
66 | columns=['stat', 'padj'],
67 | index=['G01', 'G02', 'G03', 'G04', 'G05']
68 | )
69 | return deg
70 |
71 |
72 | @pytest.fixture
73 | def net():
74 | _, net = dc.ds.toy(nobs=2, nvar=12, bval=2, seed=42, verbose=False)
75 | net = dc.pp.prune(features=net['target'].unique(), net=net, tmin=3)
76 | return net
77 |
78 |
79 | @pytest.fixture
80 | def unwnet(net):
81 | return net.drop(columns=['weight'], inplace=False)
82 |
83 |
84 | @pytest.fixture
85 | def mat(
86 | adata,
87 | ):
88 | return dc.pp.extract(data=adata)
89 |
90 |
91 | @pytest.fixture
92 | def idxmat(
93 | mat,
94 | net,
95 | ):
96 | X, obs, var = mat
97 | sources, cnct, starts, offsets = dc.pp.idxmat(features=var, net=net, verbose=False)
98 | return cnct, starts, offsets
99 |
100 |
101 | @pytest.fixture
102 | def adjmat(
103 | mat,
104 | net,
105 | ):
106 | X, obs, var = mat
107 | sources, targets, adjmat = dc.pp.adjmat(features=var, net=net, verbose=False)
108 | return adjmat
109 |
--------------------------------------------------------------------------------
/tests/ds/test_bulk.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import pytest
3 | import anndata as ad
4 |
5 | import decoupler as dc
6 |
7 |
8 | def test_hsctgfb():
9 | adata = dc.ds.hsctgfb()
10 | assert isinstance(adata, ad.AnnData)
11 | assert isinstance(adata.obs, pd.DataFrame)
12 | assert {'condition', 'sample_id'}.issubset(adata.obs)
13 |
14 | @pytest.mark.parametrize(
15 | 'thr_fc', [None, -1]
16 | )
17 | def test_knocktf(
18 | thr_fc, # val, None
19 | ):
20 | adata = dc.ds.knocktf(thr_fc=thr_fc)
21 | assert isinstance(adata, ad.AnnData)
22 | assert isinstance(adata.obs, pd.DataFrame)
23 | assert {'source', 'type_p'}.issubset(adata.obs.columns)
24 | if thr_fc is not None:
25 | assert (adata.obs['logFC'] < thr_fc).all()
26 |
--------------------------------------------------------------------------------
/tests/ds/test_scell.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import pandas as pd
4 | import pytest
5 | import anndata as ad
6 |
7 | import decoupler as dc
8 |
9 |
10 | @pytest.mark.parametrize(
11 | 'url', [
12 | 'https://datasets.cellxgene.cziscience.com/' +
13 | 'f665effe-d95a-4211-ab03-9d1777ca0806.h5ad',
14 | 'https://datasets.cellxgene.cziscience.com/' +
15 | '1338d08a-481a-426c-ad60-9f4ac08afe16.h5ad'
16 | ]
17 | )
18 | def test_download_anndata(
19 | url
20 | ):
21 | warnings.filterwarnings("ignore", module="anndata")
22 | adata = dc.ds._scell._download_anndata(url=url)
23 | assert isinstance(adata, ad.AnnData)
24 |
25 |
26 | def test_pbmc3k():
27 | warnings.filterwarnings("ignore", module="anndata")
28 | adata = dc.ds.pbmc3k()
29 | assert isinstance(adata, ad.AnnData)
30 | assert adata.raw is None
31 | assert isinstance(adata.obs, pd.DataFrame)
32 | cols = {'celltype', 'leiden'}
33 | assert cols.issubset(adata.obs.columns)
34 | assert 'louvain' not in adata.obs.columns
35 | for col in cols:
36 | assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype)
37 |
38 |
39 | def test_covid5k():
40 | adata = dc.ds.covid5k()
41 | assert isinstance(adata, ad.AnnData)
42 | assert adata.raw is None
43 | assert isinstance(adata.obs, pd.DataFrame)
44 | cols = {'individual', 'sex', 'disease', 'celltype'}
45 | assert cols.issubset(adata.obs.columns)
46 | for col in cols:
47 | assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype)
48 |
49 |
50 | def test_erygast1k():
51 | adata = dc.ds.erygast1k()
52 | assert isinstance(adata, ad.AnnData)
53 | assert adata.raw is None
54 | assert isinstance(adata.obs, pd.DataFrame)
55 | cols = {'sample', 'stage', 'sequencing.batch', 'theiler', 'celltype'}
56 | assert cols.issubset(adata.obs.columns)
57 | for col in cols:
58 | assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype)
59 | keys = {'X_pca', 'X_umap'}
60 | assert keys.issubset(adata.obsm.keys())
61 |
--------------------------------------------------------------------------------
/tests/ds/test_spatial.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import pandas as pd
4 | import pytest
5 | import anndata as ad
6 |
7 | import decoupler as dc
8 |
9 |
10 | def test_msvisium():
11 | adata = dc.ds.msvisium()
12 | assert isinstance(adata, ad.AnnData)
13 | assert adata.raw is None
14 | assert isinstance(adata.obs, pd.DataFrame)
15 | cols = {'niches'}
16 | assert cols.issubset(adata.obs.columns)
17 | for col in cols:
18 | assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype)
19 |
--------------------------------------------------------------------------------
/tests/ds/test_toy.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import numpy as np
4 | import scipy.stats as sts
5 | import pytest
6 |
7 | import decoupler as dc
8 |
9 |
10 | @pytest.mark.parametrize(
11 | 'nvar,val,size,hasval',
12 | [
13 | [3, 0., 5, False],
14 | [10, 0., 10, True],
15 | ]
16 | )
17 | def test_fillval(
18 | nvar,
19 | val,
20 | size,
21 | hasval,
22 | ):
23 | arr = np.array([1., 2., 3., 4., 5.])
24 | farr = dc.ds._toy._fillval(arr=arr, nvar=nvar, val=val)
25 |
26 | assert farr.size == size
27 | assert (val == farr[-1]) == hasval
28 |
29 |
30 | @pytest.mark.parametrize(
31 | 'nobs,nvar,bval,pstime,seed,verbose',
32 | [
33 | [10, 15, 2, True, 42, False],
34 | [2, 12, 2, False, 42, False],
35 | [100, 50, 0, False, 0, True],
36 | [10, 500, 0, True, 0, True],
37 |
38 | ]
39 | )
40 | def test_toy(
41 | nobs,
42 | nvar,
43 | bval,
44 | pstime,
45 | seed,
46 | verbose,
47 | caplog,
48 | ):
49 | with caplog.at_level(logging.INFO):
50 | adata, net = dc.ds.toy(nobs=nobs, nvar=nvar, bval=bval, pstime=pstime, seed=seed, verbose=verbose)
51 | if verbose:
52 | assert len(caplog.text) > 0
53 | else:
54 | assert caplog.text == ''
55 | assert all(adata.obs['group'].cat.categories == ['A', 'B'])
56 | msk = adata.obs['group'] == 'A'
57 | assert all(adata[msk, :4].X.mean(0) > adata[~msk, :4].X.mean(0))
58 | assert all(adata[msk, 4:8].X.mean(0) < adata[~msk, 4:8].X.mean(0))
59 | assert nobs == adata.n_obs
60 | assert nvar == adata.n_vars
61 | assert ((bval - 1) < np.mean(adata.X[:, -1].ravel()) < (bval + 1)) or nvar == 12
62 | if pstime:
63 | assert 'pstime' in adata.obs.columns
64 | assert ((0. <= adata.obs['pstime']) & (adata.obs['pstime'] <= 1.)).all()
65 |
66 |
67 | @pytest.mark.parametrize(
68 | 'shuffle_r,seed,nobs,nvar,is_diff',
69 | [
70 | [0.0, 1, 20, 31, True],
71 | [0.1, 2, 36, 41, True],
72 | [0.9, 3, 49, 21, False],
73 | [1.0, 4, 18, 41, False],
74 |
75 | ]
76 | )
77 | def test_toy_bench(
78 | net,
79 | shuffle_r,
80 | seed,
81 | nobs,
82 | nvar,
83 | is_diff,
84 | ):
85 | adata, bmnet = dc.ds.toy_bench(shuffle_r=shuffle_r, seed=seed, nobs=nobs, nvar=nvar)
86 | assert (net == bmnet).values.all()
87 | assert adata.n_obs == nobs
88 | assert adata.n_vars == nvar
89 | msk = adata.obs['group'] == 'A'
90 | a_adata = adata[msk, :].copy()
91 | b_adata = adata[~msk, :].copy()
92 | for j in adata.var_names[:8]:
93 | a = a_adata[:, j].X.ravel()
94 | b = b_adata[:, j].X.ravel()
95 | stat, pval = sts.ranksums(a, b)
96 | if is_diff:
97 | assert pval < 0.05
98 | else:
99 | assert pval > 0.05
100 |
101 |
--------------------------------------------------------------------------------
/tests/ds/test_utils.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import decoupler as dc
4 |
5 |
6 | @pytest.mark.parametrize(
7 | 'organism,lst_ens,lst_sym',
8 | [
9 | ['hsapiens_gene_ensembl', ['ENSG00000196092', 'ENSG00000115415'], ['PAX5', 'STAT1']],
10 | ['hsapiens_gene_ensembl', ['ENSG00000204655', 'ENSG00000184221'], ['MOG', 'OLIG1']],
11 | ['mmusculus_gene_ensembl', ['ENSMUSG00000076439', 'ENSMUSG00000046160'], ['Mog', 'Olig1']],
12 | ]
13 | )
14 | def test_ensmbl_to_symbol(
15 | organism,
16 | lst_ens,
17 | lst_sym,
18 | ):
19 | lst_trn = dc.ds.ensmbl_to_symbol(genes=lst_ens, organism=organism)
20 | assert all(s == t for s, t in zip(lst_trn, lst_sym))
21 |
--------------------------------------------------------------------------------
/tests/mt/test_aucell.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import scipy.sparse as sps
4 | import pytest
5 |
6 | import decoupler as dc
7 |
8 |
9 | """
10 | gs <- list(
11 | T1=c('G01', 'G02', 'G03'),
12 | T2=c('G04', 'G06', 'G07', 'G08'),
13 | T3=c('G06', 'G07', 'G08'),
14 | T4=c('G05', 'G10', 'G11', 'G09'),
15 | T5=c('G09', 'G10', 'G11')
16 | )
17 | mat <- matrix(c(
18 | 0.879, 8.941, 1.951, 8.75, 0.128, 2.959, 2.369, 9.04, 0.853, 3.127, 0.017, 2.859, 0.316, 2.066, 2.05, 8.305, 0.778, 2.468, 1.302, 2.878,
19 | 2.142, 8.155, 0.428, 9.223, 0.532, 2.84, 2.114, 8.681, 0.431, 2.814, 0.413, 3.129, 0.365, 2.512, 2.651, 8.185, 0.406, 2.616, 0.352, 2.824,
20 | 1.729, 0.637, 8.341, 0.74, 8.084, 2.397, 3.093, 0.635, 1.682, 3.351, 1.28, 2.203, 8.556, 2.255, 3.303, 1.25, 1.359, 2.012, 9.784, 2.06,
21 | 0.746, 0.894, 8.011, 1.798, 8.044, 3.059, 2.996, 0.08, 0.151, 2.391, 1.082, 2.123, 8.203, 2.511, 2.039, 0.051, 1.25, 3.787, 8.249, 3.026
22 | ), nrow=4, byrow=TRUE)
23 | colnames(mat) <- c('G11', 'G04', 'G05', 'G03', 'G07', 'G18', 'G17', 'G02', 'G10',
24 | 'G14', 'G09', 'G16', 'G08', 'G13', 'G20', 'G01', 'G12', 'G15',
25 | 'G06', 'G19')
26 | rownames(mat) <- c("S01", "S02", "S29", "S30")
27 | rnks <- AUCell::AUCell_buildRankings(t(mat), plotStats=FALSE)
28 | t(AUCell::AUCell_calcAUC(gs, rnks, aucMaxRank=3)@assays@data$AUC)
29 | """
30 |
31 | def test_auc(
32 | mat,
33 | idxmat,
34 | ):
35 | X, obs, var = mat
36 | cnct, starts, offsets = idxmat
37 | row = X[0]
38 | es = dc.mt._aucell._auc.py_func(
39 | row=row,
40 | cnct=cnct,
41 | starts=starts,
42 | offsets=offsets,
43 | n_up=2,
44 | nsrc=offsets.size
45 | )
46 | assert isinstance(es, np.ndarray)
47 | assert es.size == offsets.size
48 |
49 |
50 | def test_func_aucell(
51 | mat,
52 | idxmat,
53 | ):
54 | X, obs, var = mat
55 | cnct, starts, offsets = idxmat
56 | obs = np.array(['S01', 'S02', 'S29', 'S30'])
57 | X = np.vstack((X[:2, :], X[-2:, :]))
58 | X = sps.csr_matrix(X)
59 | ac_es = pd.DataFrame(
60 | data=np.array([
61 | [0.6666667, 0.3333333, 0, 0, 0],
62 | [1.0000000, 0.0000000, 0, 0, 0],
63 | [0.0000000, 1.0000000, 1, 0, 0],
64 | [0.0000000, 1.0000000, 1, 0, 0],
65 | ]),
66 | columns=['T1', 'T2', 'T3', 'T4', 'T5'],
67 | index=obs
68 | )
69 | dc_es, _ = dc.mt._aucell._func_aucell(
70 | mat=X,
71 | cnct=cnct,
72 | starts=starts,
73 | offsets=offsets,
74 | n_up=3,
75 |
76 | )
77 | assert np.isclose(dc_es, ac_es.values).all()
78 |
--------------------------------------------------------------------------------
/tests/mt/test_consensus.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | @pytest.mark.parametrize('sel', [np.array([0., 0., 0., 0.]), np.array([1., 3., 8., 2.])])
8 | def test_zscore(
9 | sel,
10 | ):
11 | z = dc.mt._consensus._zscore.py_func(sel=sel)
12 | assert isinstance(z, np.ndarray)
13 | assert z.size == sel.size
14 |
15 |
16 | def test_mean_zscores(
17 | rng,
18 | ):
19 | scores = rng.normal(size=(2, 5, 10))
20 | es = dc.mt._consensus._mean_zscores.py_func(scores=scores)
21 | assert scores.shape[1:] == es.shape
22 |
23 |
24 | def test_consensus(
25 | adata,
26 | net,
27 | ):
28 | dc.mt.decouple(data=adata, net=net, methods=['zscore', 'ulm'], cons=False, tmin=0)
29 | dc.mt.consensus(adata)
30 | assert 'score_consensus' in adata.obsm
31 | res = dc.mt.decouple(data=adata.to_df(), net=net, methods=['zscore', 'ulm'], cons=False, tmin=0)
32 | es, pv = dc.mt.consensus(res)
33 | assert np.isfinite(es.values).all()
34 | assert ((0 <= pv.values) & (pv.values <= 1)).all()
35 |
--------------------------------------------------------------------------------
/tests/mt/test_decouple.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import decoupler as dc
4 |
5 |
6 | @pytest.mark.parametrize(
7 | 'methods,args,cons,anndata',
8 | [
9 | ['all', dict(), True, True],
10 | ['aucell', dict(aucell=dict(n_up=3)), True, False],
11 | [['ulm'], dict(), False, True],
12 | [['ulm', 'ora'], dict(ulm=dict(), ora=dict(n_up=3)), False, False]
13 | ]
14 | )
15 | def test_decouple(
16 | adata,
17 | net,
18 | methods,
19 | args,
20 | cons,
21 | anndata
22 | ):
23 | if anndata:
24 | dc.mt.decouple(data=adata, net=net, methods=methods, args=args, cons=cons, tmin=0)
25 | if cons:
26 | assert 'score_consensus' in adata.obsm
27 | else:
28 | assert 'score_consensus' not in adata.obsm
29 | else:
30 | res = dc.mt.decouple(data=adata.to_df(), net=net, methods=methods, args=args, cons=cons, tmin=0)
31 | if cons:
32 | assert 'score_consensus' in res
33 | else:
34 | assert 'score_consensus' not in res
35 |
--------------------------------------------------------------------------------
/tests/mt/test_gsea.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import pytest
4 |
5 | import gseapy as gp
6 | import decoupler as dc
7 |
8 |
9 | def test_std():
10 | arr = np.array([0.1, -5.3, 3.8, 9.5, -0.4, 5.5])
11 | np_std = np.std(arr, ddof=1)
12 | dc_std = dc.mt._gsea._std.py_func(arr=arr, ddof=1)
13 | assert np_std == dc_std
14 |
15 |
16 | def test_ridx():
17 | idx_a = dc.mt._gsea._ridx(times=5, nvar=10, seed=42)
18 | assert (~(np.diff(idx_a) == 1).all(axis=1)).all()
19 | idx_b = dc.mt._gsea._ridx(times=5, nvar=10, seed=2)
20 | assert (~(np.diff(idx_b) == 1).all(axis=1)).all()
21 | assert (~(idx_a == idx_b).all(axis=1)).all()
22 |
23 |
24 | @pytest.mark.parametrize(
25 | 'row,rnks,set_msk,dec,expected_value,expected_index',
26 | [
27 | (np.array([0.0, 2.0, 0.0]), np.array([0, 1, 2]), np.array([False, True, False]), 0.1, 0.9, 1),
28 | (np.array([1.0, 2.0, 3.0]), np.array([2, 1, 0]), np.array([True, True, True]), 0.1, 1.0, 0),
29 | (np.array([1.0, 2.0, 3.0]), np.array([0, 1, 2]), np.array([False, False, False]), 0.1, 0, 0),
30 | (np.array([0.0, 0.0, 0.0]), np.array([0, 1, 2]), np.array([True, True, True]), 0.1, 0.0, 0),
31 | (np.array([1.0, -2.0, 3.0]), np.array([0, 1, 2]), np.array([True, False, True]), 0.5, 0.5, 2),
32 | ]
33 | )
34 | def test_esrank(
35 | row,
36 | rnks,
37 | set_msk,
38 | dec,
39 | expected_value,
40 | expected_index
41 | ):
42 | value, index, es = dc.mt._gsea._esrank.py_func(row=row, rnks=rnks, set_msk=set_msk, dec=dec)
43 | assert np.isclose(value, expected_value)
44 | assert index == expected_index
45 | assert isinstance(es, np.ndarray) and es.shape == rnks.shape
46 |
47 |
48 | def test_nesrank(
49 | rng,
50 | ):
51 | ridx = np.array([
52 | [0, 1, 2],
53 | [0, 2, 1],
54 | [1, 2, 0],
55 | [1, 0, 2],
56 | [2, 0, 1],
57 | [2, 1, 0],
58 | ])
59 | row = np.array([0.0, 2.0, 0.0])
60 | rnks = np.array([0, 1, 2])
61 | set_msk = np.array([False, True, False])
62 | dec = 0.1
63 | es = 0.9
64 | nes, pval = dc.mt._gsea._nesrank.py_func(
65 | ridx=ridx,
66 | row=row,
67 | rnks=rnks,
68 | set_msk=set_msk,
69 | dec=dec,
70 | es=es
71 | )
72 | assert isinstance(nes, float)
73 | assert isinstance(pval, float)
74 |
75 |
76 | def test_stsgsea(
77 | mat,
78 | idxmat,
79 | ):
80 | X, obs, var = mat
81 | cnct, starts, offsets = idxmat
82 | row = X[0, :]
83 | times = 10
84 | ridx = dc.mt._gsea._ridx(times=times, nvar=row.size, seed=42)
85 | es, nes, pv = dc.mt._gsea._stsgsea.py_func(
86 | row=row,
87 | cnct=cnct,
88 | starts=starts,
89 | offsets=offsets,
90 | ridx=ridx,
91 | )
92 | assert es.size == offsets.size
93 | assert nes.size == offsets.size
94 | assert pv.size == offsets.size
95 |
96 |
97 | def test_func_gsea(
98 | mat,
99 | net,
100 | idxmat,
101 | ):
102 | times = 1000
103 | seed = 42
104 | X, obs, var = mat
105 | gene_sets = net.groupby('source')['target'].apply(lambda x: list(x)).to_dict()
106 | cnct, starts, offsets = idxmat
107 | res = gp.prerank(
108 | rnk=pd.DataFrame(X, index=obs, columns=var).T,
109 | gene_sets=gene_sets,
110 | permutation_num=times,
111 | permutation_type='gene_set',
112 | outdir=None,
113 | min_size=0,
114 | threads=4,
115 | seed=seed,
116 | ).res2d
117 | gp_es = res.pivot(index='Name', columns='Term', values='NES').astype(float)
118 | gp_pv = res.pivot(index='Name', columns='Term', values='FDR q-val').astype(float)
119 | dc_es, dc_pv = dc.mt._gsea._func_gsea(
120 | mat=X,
121 | cnct=cnct,
122 | starts=starts,
123 | offsets=offsets,
124 | times=times,
125 | seed=seed,
126 | )
127 | assert (gp_es - dc_es).abs().values.max() < 0.10
128 |
--------------------------------------------------------------------------------
/tests/mt/test_mdt.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | @pytest.mark.parametrize(
8 | 'kwargs',
9 | [
10 | dict(),
11 | dict(n_estimators=10),
12 | dict(max_depth=1),
13 | dict(gamma=0.01),
14 | ]
15 | )
16 | def test_func_mdt(
17 | mat,
18 | adjmat,
19 | kwargs,
20 | ):
21 | X, obs, var = mat
22 | es = dc.mt._mdt._func_mdt(mat=X, adj=adjmat, **kwargs)[0]
23 | assert np.isfinite(es).all()
24 | assert ((0 <= es) & (es <= 1)).all()
25 |
--------------------------------------------------------------------------------
/tests/mt/test_methods.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import decoupler as dc
4 |
5 |
6 | def test_methods():
7 | lstm = dc.mt._methods
8 | len_lstm = len(lstm)
9 | len_dfm = dc.mt.show().shape[0]
10 | assert len_lstm == len_dfm
11 | assert all(isinstance(m, dc._Method.Method) for m in lstm)
12 |
--------------------------------------------------------------------------------
/tests/mt/test_mlm.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import numpy as np
4 | import statsmodels.api as sm
5 | import pytest
6 |
7 | import decoupler as dc
8 |
9 |
10 | def test_fit(
11 | mat,
12 | adjmat,
13 | ):
14 | X, obs, var = mat
15 | n_features, n_fsets = adjmat.shape
16 | n_samples, _ = X.shape
17 | adjmat = np.column_stack((np.ones((n_features, )), adjmat))
18 | inv = np.linalg.inv(np.dot(adjmat.T, adjmat))
19 | df = n_features - n_fsets - 1
20 | coef, t = dc.mt._mlm._fit.py_func(
21 | X=adjmat,
22 | y=X.T,
23 | inv=inv,
24 | df=df,
25 | )
26 | # Assert output shapes
27 | assert isinstance(coef, np.ndarray)
28 | assert isinstance(t, np.ndarray)
29 | print(coef.shape, t.shape)
30 | assert coef.shape == (n_samples, n_fsets)
31 | assert t.shape == (n_samples, n_fsets)
32 |
33 |
34 | @pytest.mark.parametrize('tval', [True, False])
35 | def test_func_mlm(
36 | mat,
37 | adjmat,
38 | tval,
39 | ):
40 | X, obs, var = mat
41 | dc_es, dc_pv = dc.mt._mlm._func_mlm(mat=X, adj=adjmat, tval=tval)
42 | st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape)
43 | for i in range(st_es.shape[0]):
44 | y = X[i, :]
45 | x = sm.add_constant(adjmat)
46 | model = sm.OLS(y, x)
47 | res = model.fit()
48 | if tval:
49 | st_es[i, :] = res.tvalues[1:]
50 | else:
51 | st_es[i, :] = res.params[1:]
52 | st_pv[i, :] = res.pvalues[1:]
53 | assert np.allclose(dc_es, st_es)
54 | assert np.allclose(dc_pv, st_pv)
55 |
--------------------------------------------------------------------------------
/tests/mt/test_ora.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import numpy as np
4 | import scipy.stats as sts
5 | import scipy.sparse as sps
6 | import pytest
7 |
8 | import decoupler as dc
9 |
10 |
11 | @pytest.mark.parametrize(
12 | 'a,b,c,d',
13 | [
14 | [10, 1, 2, 1000],
15 | [0, 20, 35, 5],
16 | [1, 2, 3, 4],
17 | [0, 1, 2, 500],
18 | ]
19 | )
20 | def test_table(
21 | a,
22 | b,
23 | c,
24 | d,
25 | ):
26 | dc_es = dc.mt._ora._oddsr.py_func(a=a, b=b, c=c, d=d, ha_corr=0., log=False)
27 | dc_pv = dc.mt._ora._test1t.py_func(a=a, b=b, c=c, d=d)
28 | st_es, st_pv = sts.fisher_exact([[a, b],[c, d]])
29 | assert np.isclose(dc_es, st_es)
30 | assert np.isclose(dc_pv, st_pv)
31 | nb_pv = math.exp(-dc.mt._ora._mlnTest2t.py_func(a, a + b, a + c, a + b + c + d))
32 | assert np.isclose(dc_pv, nb_pv)
33 |
34 |
35 | def test_runora(
36 | mat,
37 | idxmat,
38 | ):
39 | X, obs, var = mat
40 | cnct, starts, offsets = idxmat
41 | row = sts.rankdata(X[0], method='ordinal')
42 | ranks = np.arange(row.size, dtype=np.int_)
43 | row = ranks[(row > 2) | (row < 0)]
44 | es, pv = dc.mt._ora._runora.py_func(
45 | row=row,
46 | ranks=ranks,
47 | cnct=cnct,
48 | starts=starts,
49 | offsets=offsets,
50 | n_bg=0,
51 | ha_corr=0.5,
52 | )
53 | assert isinstance(es, np.ndarray)
54 | assert isinstance(pv, np.ndarray)
55 |
56 |
57 | def test_func_ora(
58 | mat,
59 | idxmat,
60 | ):
61 | X, obs, var = mat
62 | cnct, starts, offsets = idxmat
63 | n_up = 3
64 | ha_corr = 1
65 | dc_es, dc_pv = dc.mt._ora._func_ora(
66 | mat=sps.csr_matrix(X),
67 | cnct=cnct,
68 | starts=starts,
69 | offsets=offsets,
70 | n_up=n_up,
71 | n_bm=0,
72 | n_bg=None,
73 | ha_corr=1,
74 | )
75 | st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape)
76 | ranks = np.arange(X.shape[1], dtype=np.int_)
77 | rnk = set(ranks)
78 | for i in range(st_es.shape[0]):
79 | row = sts.rankdata(X[i], method='ordinal')
80 | row = set(ranks[row > n_up])
81 | for j in range(st_es.shape[1]):
82 | fset = dc.pp.net._getset(cnct=cnct, starts=starts, offsets=offsets, j=j)
83 | fset = set(fset)
84 | # Build table
85 | set_a = row.intersection(fset)
86 | set_b = fset.difference(row)
87 | set_c = row.difference(fset)
88 | a = len(set_a)
89 | b = len(set_b)
90 | c = len(set_c)
91 | set_u = set_a.union(set_b).union(set_c)
92 | set_d = rnk.difference(set_u)
93 | d = len(set_d)
94 | _, st_pv[i, j] = sts.fisher_exact([[a, b],[c, d]])
95 | a += ha_corr
96 | b += ha_corr
97 | c += ha_corr
98 | d += ha_corr
99 | es = sts.fisher_exact([[a, b],[c, d]])
100 | st_es[i, j], _ = np.log(es)
101 | assert np.isclose(dc_es, st_es).all()
102 | assert np.isclose(dc_pv, st_pv).all()
103 |
--------------------------------------------------------------------------------
/tests/mt/test_run.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import scipy.sparse as sps
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | def test_return(
9 | adata,
10 | net,
11 | ):
12 | mth = dc.mt.ulm
13 | adata = adata[:4].copy()
14 | adata.X[:, 0] = 0.
15 | es, pv = mth(data=adata.to_df(), net=net, tmin=0)
16 | r = dc.mt._run._return(name=mth.name, data=adata, es=es, pv=pv)
17 | assert r is None
18 | r = dc.mt._run._return(name=mth.name, data=adata.to_df(), es=es, pv=pv)
19 | assert isinstance(r, tuple)
20 | assert isinstance(r[0], pd.DataFrame)
21 | assert isinstance(r[1], pd.DataFrame)
22 |
23 |
24 | @pytest.mark.parametrize(
25 | 'mth,bsize',
26 | [
27 | [dc.mt.zscore, 2],
28 | [dc.mt.ora, 2],
29 | [dc.mt.gsva, 250_000],
30 | ]
31 | )
32 | def test_run(
33 | adata,
34 | net,
35 | mth,
36 | bsize,
37 | ):
38 | sdata = adata.copy()
39 | sdata.X = sps.csr_matrix(sdata.X)
40 | des, dpv = dc.mt._run._run(
41 | name=mth.name,
42 | func=mth.func,
43 | adj=mth.adj,
44 | test=mth.test,
45 | data=adata.to_df(),
46 | net=net,
47 | tmin=0,
48 | )
49 | ses, spv = dc.mt._run._run(
50 | name=mth.name,
51 | func=mth.func,
52 | adj=mth.adj,
53 | test=mth.test,
54 | data=sdata.to_df(),
55 | net=net,
56 | tmin=0,
57 | )
58 | assert (des.values == ses.values).all()
59 |
--------------------------------------------------------------------------------
/tests/mt/test_udt.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | @pytest.mark.parametrize(
8 | 'kwargs',
9 | [
10 | dict(),
11 | dict(n_estimators=10),
12 | dict(max_depth=1),
13 | dict(gamma=0.01),
14 | ]
15 | )
16 | def test_func_udt(
17 | mat,
18 | adjmat,
19 | kwargs,
20 | ):
21 | X, obs, var = mat
22 | es = dc.mt._udt._func_udt(mat=X, adj=adjmat, **kwargs)[0]
23 | assert np.isfinite(es).all()
24 | assert ((0 <= es) & (es <= 1)).all()
25 |
--------------------------------------------------------------------------------
/tests/mt/test_ulm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.stats as sts
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | def test_cov(
9 | mat,
10 | adjmat,
11 | ):
12 | X, obs, var = mat
13 | dc_cov = dc.mt._ulm._cov(A=adjmat, b=X.T)
14 | nsrcs = adjmat.shape[1]
15 | np_cov = np.cov(m=adjmat, y=X.T, rowvar=False)[:nsrcs, nsrcs:].T
16 | assert np.allclose(np_cov, dc_cov)
17 |
18 |
19 | def test_cor(
20 | mat,
21 | adjmat,
22 | ):
23 | X, obs, var = mat
24 | dc_cor = dc.mt._ulm._cor(adjmat, X.T)
25 | nsrcs = adjmat.shape[1]
26 | np_cor = np.corrcoef(adjmat, X.T, rowvar=False)[:nsrcs, nsrcs:].T
27 | assert np.allclose(dc_cor, np_cor)
28 | assert np.all((dc_cor <= 1) * (dc_cor >= -1))
29 |
30 |
31 | def test_tval():
32 | t = dc.mt._ulm._tval(r=0.4, df=28)
33 | assert np.allclose(2.30940108, t)
34 | t = dc.mt._ulm._tval(r=0.99, df=3)
35 | assert np.allclose(12.15540081, t)
36 | t = dc.mt._ulm._tval(r=-0.05, df=99)
37 | assert np.allclose(-0.49811675, t)
38 |
39 |
40 | @pytest.mark.parametrize('tval', [True, False])
41 | def test_func_ulm(
42 | mat,
43 | adjmat,
44 | tval,
45 | ):
46 | X, obs, var = mat
47 | dc_es, dc_pv = dc.mt._ulm._func_ulm(mat=X, adj=adjmat, tval=tval)
48 | st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape)
49 | for i in range(st_es.shape[0]):
50 | for j in range(st_es.shape[1]):
51 | x = adjmat[:, j]
52 | y = X[i, :]
53 | res = sts.linregress(x, y)
54 | slope = res.slope
55 | st_pv[i, j] = res.pvalue
56 | std_err = res.stderr
57 | if tval:
58 | st_es[i, j] = slope / std_err
59 | else:
60 | st_es[i, j] = slope
61 | assert np.allclose(dc_es, st_es)
62 | assert np.allclose(dc_pv, st_pv)
63 |
--------------------------------------------------------------------------------
/tests/mt/test_viper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | """
9 | mat <- matrix(c(
10 | 0.879, 8.941, 1.951, 8.75, 0.128, 2.959, 2.369, 9.04, 0.853, 3.127, 0.017, 2.859, 0.316, 2.066, 2.05, 8.305, 0.778, 2.468, 1.302, 2.878,
11 | 2.142, 8.155, 0.428, 9.223, 0.532, 2.84, 2.114, 8.681, 0.431, 2.814, 0.413, 3.129, 0.365, 2.512, 2.651, 8.185, 0.406, 2.616, 0.352, 2.824,
12 | 1.729, 0.637, 8.341, 0.74, 8.084, 2.397, 3.093, 0.635, 1.682, 3.351, 1.28, 2.203, 8.556, 2.255, 3.303, 1.25, 1.359, 2.012, 9.784, 2.06,
13 | 0.746, 0.894, 8.011, 1.798, 8.044, 3.059, 2.996, 0.08, 0.151, 2.391, 1.082, 2.123, 8.203, 2.511, 2.039, 0.051, 1.25, 3.787, 8.249, 3.026
14 | ), nrow=4, byrow=TRUE)
15 | colnames(mat) <- c('G11', 'G04', 'G05', 'G03', 'G07', 'G18', 'G17', 'G02', 'G10',
16 | 'G14', 'G09', 'G16', 'G08', 'G13', 'G20', 'G01', 'G12', 'G15',
17 | 'G06', 'G19')
18 | rownames(mat) <- c("S01", "S02", "S29", "S30")
19 | gs <- list(
20 | T1 = list(
21 | tfmode = c(G01 = 1, G02 = 1, G03 = 0.7, G04 = 1, G06 = -0.5, G07 = -3, G08 = -1),
22 | likelihood = c(1, 1, 1, 1, 1, 1, 1)
23 | ),
24 | T2 = list(
25 | tfmode = c(G06 = 1, G07 = 0.5, G08 = 1, G05 = 1.9, G10 = -1.5, G11 = -2, G09 = 3.1),
26 | likelihood = c(1, 1, 1, 1, 1, 1, 1)
27 | ),
28 | T3 = list(
29 | tfmode = c(G09 = 0.7, G10 = 1.1, G11 = 0.1),
30 | likelihood = c(1, 1, 1)
31 | ),
32 | T4 = list(
33 | tfmode = c(G06 = 1, G07 = 0.5, G08 = 1, G05 = 1.9, G10 = -1.5, G11 = -2, G09 = 3.1, G03 = -1.2),
34 | likelihood = c(1, 1, 1, 1, 1, 1, 1, 1)
35 | )
36 | )
37 | t(viper::viper(eset=t(mat), regulon=gs, minsize=1, eset.filter=F, pleiotropy=F))
38 | pargs=list(regulators = 0.05, shadow = 0.05, targets = 1, penalty = 20, method = "adaptive")
39 | t(viper::viper(eset=t(mat), regulon=gs, minsize=1, eset.filter=F, pleiotropy=T, pleiotropyArgs=pargs))
40 |
41 | """
42 |
43 |
44 | def test_get_tmp_idxs(
45 | rng,
46 | ):
47 | pval = rng.random((5, 5))
48 | np.fill_diagonal(pval, np.nan)
49 | dc.mt._viper._get_tmp_idxs.py_func(pval)
50 |
51 |
52 | def test_func_viper(
53 | adata,
54 | net,
55 | ):
56 | dict_net = {
57 | 'T1': 'T1',
58 | 'T2': 'T1',
59 | 'T3': 'T2',
60 | 'T4': 'T2',
61 | 'T5': 'T3',
62 | }
63 | net['source'] = [dict_net[s] for s in net['source']]
64 | net = pd.concat([
65 | net,
66 | net[net['source'] == 'T2'].assign(source='T4'),
67 | pd.DataFrame([['T4', 'G03', -1.2]], columns=['source', 'target', 'weight'], index=[0])
68 | ])
69 | mat = dc.pp.extract(data=adata)
70 | X, obs, var = mat
71 | sources, targets, adjmat = dc.pp.adjmat(features=var, net=net, verbose=False)
72 | obs = np.array(['S01', 'S02', 'S29', 'S30'])
73 | X = np.vstack((X[:2, :], X[-2:, :]))
74 | pf_dc_es, pf_dc_pv = dc.mt._viper._func_viper(mat=X, adj=adjmat, pleiotropy=False)
75 | pt_dc_es, pt_dc_pv = dc.mt._viper._func_viper(mat=X, adj=adjmat, n_targets=1, pleiotropy=True)
76 | pf_vp_es = np.array([
77 | [ 3.708381, -2.154396, -1.4069603, -2.468185],
78 | [ 3.702911, -2.288070, -0.7239077, -2.848132],
79 | [-3.613066, 1.696114, -0.5789716, 2.039502],
80 | [-3.495480, 2.560792, -1.1296442, 2.523946],
81 | ])
82 | pt_vp_es = np.array([
83 | [ 2.224856, -2.154396, -1.4069603, -1.131059],
84 | [ 1.880012, -2.288070, -0.7239077, -2.848132],
85 | [-3.177418, 1.696114, -0.5789716, 2.039502],
86 | [-2.073186, 2.560792, -1.1296442, 2.523946],
87 | ])
88 | assert np.isclose(pf_vp_es, pf_dc_es).all()
89 | assert np.isclose(pt_vp_es, pt_dc_es).all()
90 |
--------------------------------------------------------------------------------
/tests/mt/test_waggr.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | def test_funcs(
8 | rng
9 | ):
10 | x = np.array([1, 2, 3, 4], dtype=float)
11 | w = rng.random(x.size)
12 | es = dc.mt._waggr._wsum.py_func(x=x, w=w)
13 | assert isinstance(es, float)
14 | es = dc.mt._waggr._wmean.py_func(x=x, w=w)
15 | assert isinstance(es, float)
16 |
17 |
18 | @pytest.mark.parametrize(
19 | 'fun,times,seed',
20 | [
21 | ['wmean', 10, 42],
22 | ['wsum', 5, 23],
23 | [lambda x, w: 0, 5, 1],
24 | ['wmean', 0, 42],
25 | ]
26 | )
27 | def test_func_waggr(
28 | mat,
29 | adjmat,
30 | fun,
31 | times,
32 | seed,
33 | ):
34 | X, obs, var = mat
35 | es, pv = dc.mt._waggr._func_waggr(mat=X, adj=adjmat, fun=fun, times=times, seed=seed)
36 | assert np.isfinite(es).all()
37 | assert ((0 <= pv) & (pv <= 1)).all()
38 |
--------------------------------------------------------------------------------
/tests/mt/test_zscore.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | @pytest.mark.parametrize(
8 | 'flavor', ['KSEA', 'RoKAI']
9 | )
10 | def test_func_zscore(
11 | mat,
12 | adjmat,
13 | flavor,
14 | ):
15 | X, obs, var = mat
16 | es, pv = dc.mt._zscore._func_zscore(mat=X, adj=adjmat, flavor=flavor)
17 | assert np.isfinite(es).all()
18 | assert ((0 <= pv) & (pv <= 1)).all()
19 |
--------------------------------------------------------------------------------
/tests/op/test_collectri.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | @pytest.mark.parametrize('remove_complexes', [True, False])
9 | def test_collectri(
10 | remove_complexes,
11 | ):
12 | ct = dc.op.collectri(remove_complexes=remove_complexes)
13 | assert isinstance(ct, pd.DataFrame)
14 | cols = {'source', 'target', 'weight', 'resources', 'references', 'sign_decision'}
15 | assert cols.issubset(ct.columns)
16 | assert pd.api.types.is_numeric_dtype(ct['weight'])
17 | msk = np.isin(['AP1', 'NFKB'], ct['source']).all()
18 | if remove_complexes:
19 | assert not msk
20 | else:
21 | assert msk
22 | assert not ct.duplicated(['source', 'target']).any()
23 |
--------------------------------------------------------------------------------
/tests/op/test_dorothea.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | @pytest.mark.parametrize(
9 | 'levels,dict_weights',
10 | [
11 | ['A', None],
12 | [['A', 'B'], dict(A=1, B=0.5)],
13 | ]
14 | )
15 | def test_dorothea(
16 | levels,
17 | dict_weights,
18 | ):
19 | do = dc.op.dorothea(levels=levels, dict_weights=dict_weights)
20 | assert isinstance(do, pd.DataFrame)
21 | cols = {'source', 'target', 'weight', 'confidence'}
22 | assert cols.issubset(do.columns)
23 | assert pd.api.types.is_numeric_dtype(do['weight'])
24 | assert not do.duplicated(['source', 'target']).any()
25 |
--------------------------------------------------------------------------------
/tests/op/test_dtype.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | def test_infer_dtypes():
8 | df = pd.DataFrame(
9 | data = [
10 | ['1', 'A', 'true', 'False', 0.3],
11 | ['2', 'B', 'false', 'True', 0.1],
12 | ['3', 'C', 'false', 'True', 3.1],
13 | ],
14 | columns=['a', 'b', 'c', 'd', 'e'],
15 | index=[0, 1, 2],
16 | )
17 | df['b'] = df['b'].astype('string')
18 | idf = dc.op._dtype._infer_dtypes(df.copy())
19 | assert pd.api.types.is_numeric_dtype(idf['a'])
20 | assert idf['b'].dtype == 'object'
21 | assert pd.api.types.is_bool_dtype(idf['c'])
22 | assert pd.api.types.is_bool_dtype(idf['d'])
23 | assert pd.api.types.is_numeric_dtype(idf['e'])
24 |
25 |
--------------------------------------------------------------------------------
/tests/op/test_hallmark.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | def test_hallmark():
9 | hm = dc.op.hallmark()
10 | assert isinstance(hm, pd.DataFrame)
11 | cols = {'source', 'target'}
12 | assert cols.issubset(hm.columns)
13 | assert not hm.duplicated(['source', 'target']).any()
14 |
--------------------------------------------------------------------------------
/tests/op/test_progeny.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | @pytest.mark.parametrize(
9 | 'top,thr_padj',
10 | [
11 | [100, 0.05],
12 | [100, 1],
13 | [np.inf, 0.05],
14 | [np.inf, 1],
15 | ]
16 | )
17 | def test_progeny(
18 | top,
19 | thr_padj,
20 | ):
21 | pr = dc.op.progeny(top=top, thr_padj=thr_padj)
22 | assert isinstance(pr, pd.DataFrame)
23 | cols = {'source', 'target', 'weight', 'padj'}
24 | assert cols.issubset(pr.columns)
25 | assert pd.api.types.is_numeric_dtype(pr['weight'])
26 | assert pd.api.types.is_numeric_dtype(pr['padj'])
27 | assert (pr['padj'] < thr_padj).all()
28 | assert (pr.groupby('source').size() <= top).all()
29 | assert not pr.duplicated(['source', 'target']).any()
30 |
--------------------------------------------------------------------------------
/tests/op/test_resource.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | def test_show_resources():
9 | df = dc.op.show_resources()
10 | assert isinstance(df, pd.DataFrame)
11 | assert df.shape[0] > 0
12 | assert {'name', 'license'}.issubset(df.columns)
13 | assert np.isin(['PROGENy', 'MSigDB'], df['name']).all()
14 |
15 |
16 | @pytest.mark.parametrize('name', ['Lambert2018', 'PanglaoDB'])
17 | def test_resource(
18 | name
19 | ):
20 | rs = dc.op.resource(name=name)
21 | assert isinstance(rs, pd.DataFrame)
22 | assert 'genesymbol' in rs.columns
23 |
--------------------------------------------------------------------------------
/tests/op/test_translate.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | def test_show_organisms():
9 | lst = dc.op.show_organisms()
10 | assert isinstance(lst, list)
11 | assert len(lst) > 0
12 | assert {'mouse', 'rat'}.issubset(lst)
13 |
14 |
15 | @pytest.mark.parametrize(
16 | 'lst,my_dict,one_to_many',
17 | [
18 | [['a', 'b', 'c', 'd'], dict(a=['B', 'C'], b=['A', 'C'], c=['A', 'B'], d='D'), 1],
19 | [['a', 'b', 'c', 'd'], dict(c=['A', 'B']), 1],
20 | [['a', 'b', 'c', 'd'], dict(a=['B', 'C'], b=['A', 'C'], c=['A', 'B'], d='D'), 10],
21 | ]
22 | )
23 | def test_replace_subunits(
24 | lst,
25 | my_dict,
26 | one_to_many,
27 | ):
28 | res = dc.op._translate._replace_subunits(
29 | lst=lst, my_dict=my_dict, one_to_many=one_to_many
30 | )
31 | assert isinstance(res, list)
32 | assert len(res) == len(lst)
33 | for k in my_dict:
34 | idx = lst.index(k)
35 | if k in my_dict:
36 | if len(my_dict[k]) > one_to_many:
37 | assert np.isnan(res[idx])
38 | else:
39 | assert isinstance(res[idx], list)
40 | else:
41 | assert np.isnan(res[idx])
42 |
43 |
44 | @pytest.mark.parametrize('target_organism', ['mouse', 'anole_lizard', 'fruitfly'])
45 | def test_translate(
46 | target_organism,
47 | ):
48 | net = dc.op.collectri()
49 | t_net = dc.op.translate(net=net, columns='target', target_organism='mouse')
50 | cols = {'source', 'target', 'weight'}
51 | assert isinstance(t_net, pd.DataFrame)
52 | assert cols.issubset(t_net.columns)
53 | assert net.shape[0] != t_net.shape[0]
54 |
55 |
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/tests/pl/test_Plotter.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 |
3 | import matplotlib.pyplot as plt
4 | import matplotlib.image as mpimg
5 | import pytest
6 |
7 | import decoupler as dc
8 |
9 |
10 | def test_plot_ax(
11 | adata
12 | ):
13 | fig, axes = plt.subplots(1, 2, tight_layout=True, figsize=(4, 2))
14 | ax1, ax2 = axes
15 | dc.pl.obsbar(adata=adata, y='group', hue='sample', ax=ax1)
16 | dc.pl.obsbar(adata=adata, y='sample', hue='group', ax=ax2)
17 |
18 |
19 | def test_plot_save(
20 | adata
21 | ):
22 | with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
23 | fig = dc.pl.obsbar(adata=adata, y='group', hue='sample', save=tmp.name)
24 | tmp.flush()
25 | img = mpimg.imread(tmp.name)
26 | assert img is not None
27 |
--------------------------------------------------------------------------------
/tests/pl/test_barplot.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | from matplotlib.figure import Figure
4 | import pytest
5 |
6 | import decoupler as dc
7 |
8 |
9 | @pytest.fixture
10 | def df():
11 | df = pd.DataFrame(
12 | data=[
13 | [1, -2, 3, -4],
14 | [5, -6, 7, -8],
15 | ],
16 | index=['C1', 'C2'],
17 | columns=[f'TF{i}' for i in range(4)]
18 | )
19 | return df
20 |
21 |
22 | @pytest.mark.parametrize(
23 | 'name,top,vertical,vcenter',
24 | [
25 | ['C1', 2, True, None],
26 | ['C2', 10, False, -3],
27 | ['C2', 10, False, 10],
28 | ]
29 | )
30 | def test_barplot(
31 | df,
32 | name,
33 | top,
34 | vertical,
35 | vcenter,
36 | ):
37 | fig = dc.pl.barplot(data=df, name=name, top=top, vertical=vertical, return_fig=True)
38 | assert isinstance(fig, Figure)
39 | plt.close(fig)
40 |
--------------------------------------------------------------------------------
/tests/pl/test_dotplot.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | from matplotlib.figure import Figure
4 | import pytest
5 |
6 | import decoupler as dc
7 |
8 |
9 | @pytest.fixture
10 | def df():
11 | df = pd.DataFrame(
12 | data = [
13 | ['TF1', 1, 1, 5],
14 | ['TF2', 3, 1, 10],
15 | ['TF3', 4, 10, 10],
16 | ['TF4', 5, 15, 11],
17 | ],
18 | columns=['y', 'x', 'c', 's'],
19 | )
20 | return df
21 |
22 |
23 | @pytest.mark.parametrize(
24 | 'vcenter', [None, 3]
25 | )
26 | def test_dotplot(
27 | df,
28 | vcenter,
29 | ):
30 | fig = dc.pl.dotplot(df=df, x='x', y='y', c='c', s='s', vcenter=vcenter, return_fig=True)
31 | assert isinstance(fig, Figure)
32 | plt.close(fig)
33 |
--------------------------------------------------------------------------------
/tests/pl/test_filter_by_expr.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.figure import Figure
3 |
4 | import decoupler as dc
5 |
6 |
7 | def test_filter_by_expr(
8 | pdata,
9 | ):
10 | fig = dc.pl.filter_by_expr(adata=pdata, return_fig=True)
11 | assert isinstance(fig, Figure)
12 | plt.close(fig)
13 |
--------------------------------------------------------------------------------
/tests/pl/test_filter_by_prop.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.figure import Figure
3 |
4 | import decoupler as dc
5 |
6 |
7 | def test_filter_by_prop(
8 | pdata,
9 | ):
10 | fig = dc.pl.filter_by_prop(adata=pdata, return_fig=True)
11 | assert isinstance(fig, Figure)
12 | plt.close(fig)
13 |
--------------------------------------------------------------------------------
/tests/pl/test_filter_samples.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.figure import Figure
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | @pytest.mark.parametrize(
9 | 'groupby,log',
10 | [
11 | ['group', True],
12 | [['group'], True],
13 | [['sample', 'group'], True],
14 | ]
15 | )
16 | def test_filter_samples(
17 | pdata,
18 | groupby,
19 | log,
20 | ):
21 | fig = dc.pl.filter_samples(adata=pdata, groupby=groupby, log=log, return_fig=True)
22 | assert isinstance(fig, Figure)
23 | plt.close(fig)
24 |
--------------------------------------------------------------------------------
/tests/pl/test_leading_edge.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | from matplotlib.figure import Figure
5 | import pytest
6 |
7 | import decoupler as dc
8 |
9 |
10 | @pytest.mark.parametrize(
11 | 'stat,name,a_err',
12 | [
13 | ['stat', 'T1', False],
14 | ['stat', 'T2', False],
15 | ['pval', 'T3', False],
16 | ['pval', 'T4', False],
17 | ]
18 | )
19 | def test_leading_edge(
20 | net,
21 | stat,
22 | name,
23 | a_err
24 | ):
25 | df = pd.DataFrame(
26 | data=[[i, i ** 2] for i in range(9)],
27 | columns=['stat', 'pval'],
28 | index=[f'G0{i}' for i in range(9)],
29 | )
30 | if not a_err:
31 | fig, le = dc.pl.leading_edge(df=df, net=net, stat=stat, name=name, return_fig=True)
32 | assert isinstance(le, np.ndarray)
33 | assert isinstance(fig, Figure)
34 | plt.close(fig)
35 | else:
36 | with pytest.raises(AssertionError):
37 | dc.pl.leading_edge(df=df, net=net, stat=stat, name=name, return_fig=True)
38 |
--------------------------------------------------------------------------------
/tests/pl/test_network.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | from matplotlib.figure import Figure
5 | import pytest
6 |
7 | import decoupler as dc
8 |
9 |
10 | @pytest.fixture
11 | def data():
12 | data = pd.DataFrame(
13 | data=[
14 | [5, 6, 7, 1, 1, 2.],
15 | ],
16 | index=['C1'],
17 | columns=['G01', 'G02', 'G03', 'G06', 'G07', 'G08']
18 | )
19 | return data
20 |
21 |
22 | @pytest.fixture
23 | def score():
24 | score = pd.DataFrame(
25 | data=[
26 | [4, 3, -3, -2.],
27 | ],
28 | index=['C1'],
29 | columns=[f'T{i + 1}' for i in range(4)]
30 | )
31 | return score
32 |
33 |
34 | @pytest.mark.parametrize(
35 | 'd_none,unw,sources,targets,by_abs,vcenter',
36 | [
37 | [False, False, 5, 5, False, False],
38 | [False, True, 'T1', 5, True, True],
39 | [True, False, ['T1'], 5, True, True],
40 | [True, False, ['T1', 'T3'], 5, True, True],
41 | [False, False, 5, 'G01', True, True],
42 | [False, False, 5, ['G01', 'G02', 'G03'], True, True],
43 | ]
44 | )
45 | def test_network(
46 | net,
47 | data,
48 | score,
49 | d_none,
50 | unw,
51 | sources,
52 | targets,
53 | by_abs,
54 | vcenter,
55 | ):
56 | if d_none:
57 | s_cmap = 'white'
58 | data = None
59 | score = None
60 | else:
61 | s_cmap = 'coolwarm'
62 | if unw:
63 | net = net.drop(columns=['weight'])
64 | fig = dc.pl.network(
65 | data=data,
66 | score=score,
67 | net=net,
68 | sources=sources,
69 | targets=targets,
70 | by_abs=by_abs,
71 | vcenter=vcenter,
72 | s_cmap = s_cmap,
73 | figsize=(5, 5),
74 | return_fig=True
75 | )
76 | assert isinstance(fig, Figure)
77 | plt.close(fig)
--------------------------------------------------------------------------------
/tests/pl/test_obsbar.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.figure import Figure
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | @pytest.mark.parametrize(
9 | 'y,hue,kw',
10 | [
11 | ['group', None, dict()],
12 | ['group', 'group', dict(width=0.5)],
13 | ['group', 'sample', dict(palette='tab10')],
14 | ['sample', 'group', dict(palette='tab20')],
15 | ]
16 | )
17 | def test_obsbar(
18 | adata,
19 | y,
20 | hue,
21 | kw,
22 | ):
23 | fig = dc.pl.obsbar(adata=adata, y=y, hue=hue, kw_barplot=kw, return_fig=True)
24 | assert isinstance(fig, Figure)
25 | plt.close(fig)
26 |
--------------------------------------------------------------------------------
/tests/pl/test_obsm.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib
3 | matplotlib.use("Agg")
4 | import matplotlib.pyplot as plt
5 | from matplotlib.figure import Figure
6 | import pytest
7 |
8 | import decoupler as dc
9 |
10 |
11 | @pytest.fixture
12 | def tdata_obsm_pca(
13 | tdata_obsm,
14 | ):
15 | dc.tl.rankby_obsm(tdata_obsm, key='X_pca')
16 | return tdata_obsm
17 |
18 |
19 | @pytest.fixture
20 | def tdata_obsm_ulm(
21 | tdata_obsm,
22 | ):
23 | tdata_obsm = tdata_obsm.copy()
24 | dc.tl.rankby_obsm(tdata_obsm, key='score_ulm')
25 | return tdata_obsm
26 |
27 |
28 | @pytest.mark.parametrize(
29 | 'pca,names,nvar,dendrogram,titles,cmap_obs',
30 | [
31 | [True, None, 10, True, ['Scores', 'Stats'], dict()],
32 | [True, 'group', 5, False, ['asd', 'fgh'], dict()],
33 | [True, ['group', 'pstime'], 10, True, ['Scores', 'Stats'], dict()],
34 | [True, None, 10, True, ['Scores', 'Stats'], dict(group='tab10', pstime='magma', sample='Pastel1')],
35 | [True, None, 2, True, ['Scores', 'Stats'], dict(pstime='magma')],
36 | [True, None, ['PC01', 'PC02'], True, ['Scores', 'Stats'], dict(pstime='magma')],
37 | [False, None, None, True, ['Scores', 'Stats'], dict()],
38 | [False, None, 10, True, ['Scores', 'Stats'], dict()],
39 | [False, None, 'T3', True, ['Scores', 'Stats'], dict()],
40 | [False, None, ['T5', 'T3'], True, ['Scores', 'Stats'], dict()],
41 | ]
42 | )
43 | def test_obsm(
44 | tdata_obsm_pca,
45 | tdata_obsm_ulm,
46 | pca,
47 | names,
48 | nvar,
49 | dendrogram,
50 | titles,
51 | cmap_obs,
52 | ):
53 | if pca:
54 | tdata_obsm_ranked = tdata_obsm_pca
55 | else:
56 | tdata_obsm_ranked = tdata_obsm_ulm
57 | fig = dc.pl.obsm(
58 | tdata_obsm_ranked,
59 | names=names,
60 | nvar=nvar,
61 | dendrogram=dendrogram,
62 | titles=titles,
63 | cmap_obs=cmap_obs,
64 | return_fig=True
65 | )
66 | assert isinstance(fig, Figure)
67 | plt.close(fig)
68 |
--------------------------------------------------------------------------------
/tests/pl/test_order.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.figure import Figure
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | @pytest.mark.parametrize(
9 | 'names,label,mode',
10 | [
11 | [['G01', 'G02', 'G07', 'G08', 'G12'], None, 'line'],
12 | [['G01', 'G02', 'G07', 'G08'], None, 'mat'],
13 | [None, 'group', 'line'],
14 | [None, 'group', 'mat'],
15 | ]
16 | )
17 | def test_order(
18 | tdata,
19 | names,
20 | label,
21 | mode,
22 | ):
23 | df = dc.pp.bin_order(adata=tdata, names=['G12', 'G01', 'G07', 'G04'], order='pstime', label=label)
24 | fig = dc.pl.order(df=df, mode=mode, return_fig=True)
25 | assert isinstance(fig, Figure)
26 | plt.close(fig)
27 |
--------------------------------------------------------------------------------
/tests/pl/test_order_targets.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.figure import Figure
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | @pytest.mark.parametrize(
9 | 'source,label,vmin,vmax',
10 | [
11 | ['T1', None, 0, 10],
12 | ['T2', 'group', -3, 10],
13 | ['T3', 'group', -20, 15],
14 | ['T4', 'group', -1, 20],
15 | ['T5', 'group', -2, 14],
16 | ['T5', 'group', None, None],
17 | ]
18 | )
19 | def test_order_targets(
20 | tdata,
21 | net,
22 | source,
23 | label,
24 | vmin,
25 | vmax,
26 | ):
27 | dc.mt.ulm(tdata, net, tmin=0)
28 | fig = dc.pl.order_targets(
29 | adata=tdata,
30 | net=net,
31 | order='pstime',
32 | source=source,
33 | label=label,
34 | vmin=vmin,
35 | vmax=vmax,
36 | return_fig=True,
37 | )
38 | assert isinstance(fig, Figure)
39 | plt.close(fig)
40 |
--------------------------------------------------------------------------------
/tests/pl/test_source_targets.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | from matplotlib.figure import Figure
4 | import pytest
5 |
6 | import decoupler as dc
7 |
8 |
9 | @pytest.mark.parametrize(
10 | 'name,a_err', [
11 | ['T1', False],
12 | ['T10', True],
13 | ]
14 | )
15 | def test_source_targets(
16 | deg,
17 | net,
18 | name,
19 | a_err,
20 | ):
21 | if not a_err:
22 | fig = dc.pl.source_targets(data=deg, net=net, name=name, x='weight', y='stat', return_fig=True)
23 | assert isinstance(fig, Figure)
24 | plt.close(fig)
25 | else:
26 | with pytest.raises(AssertionError):
27 | dc.pl.source_targets(data=deg, net=net, name=name, x='weight', y='stat', return_fig=True)
28 |
--------------------------------------------------------------------------------
/tests/pl/test_volcano.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | from matplotlib.figure import Figure
4 | import pytest
5 |
6 | import decoupler as dc
7 |
8 |
9 | @pytest.mark.parametrize(
10 | 'use_net,name,a_err',
11 | [
12 | [False, None, False],
13 | [True, 'T1', False],
14 | [True, 'T2', False],
15 | [True, 'T3', False],
16 | [True, 'T10', True],
17 | ]
18 | )
19 | def test_volcano(
20 | deg,
21 | net,
22 | use_net,
23 | name,
24 | a_err,
25 | ):
26 | if not use_net:
27 | net = None
28 | name = None
29 | if not a_err:
30 | fig = dc.pl.volcano(data=deg, x='stat', y='padj', net=net, name=name, return_fig=True)
31 | assert isinstance(fig, Figure)
32 | plt.close(fig)
33 | else:
34 | with pytest.raises(AssertionError):
35 | dc.pl.volcano(data=deg, x='stat', y='padj', net=net, name=name, return_fig=True)
36 |
--------------------------------------------------------------------------------
/tests/pp/test_data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.sparse as sps
3 | import pytest
4 |
5 | import decoupler as dc
6 |
7 |
8 | def test_extract(
9 | adata,
10 | ):
11 | data = [adata.X, adata.obs_names, adata.var_names]
12 | X, obs, var = dc.pp.extract(data=data)
13 | assert X.shape[0] == obs.size
14 | assert X.shape[1] == var.size
15 | X, obs, var = dc.pp.extract(data=adata.to_df())
16 | assert X.shape[0] == obs.size
17 | assert X.shape[1] == var.size
18 | X, obs, var = dc.pp.extract(data=adata)
19 | assert X.shape[0] == obs.size
20 | assert X.shape[1] == var.size
21 | adata.layers['counts'] = adata.X.round()
22 | X, obs, var = dc.pp.extract(data=adata, layer='counts')
23 | assert float(np.sum(X)).is_integer()
24 | sadata = adata.copy()
25 | sadata.X = sps.coo_matrix(sadata.X)
26 | X, obs, var = dc.pp.extract(data=sadata)
27 | assert isinstance(X, sps.csr_matrix)
28 | eadata = adata.copy()
29 | eadata.X[5, :] = 0.
30 | X, obs, var = dc.pp.extract(data=eadata, empty=True)
31 | assert X.shape[0] < eadata.shape[0]
32 | nadata = adata.copy()
33 | nadata.X = nadata.X * -1
34 | adata.raw = nadata
35 | X, obs, var = dc.pp.extract(data=adata, raw=True)
36 | assert (X < 0).all()
37 |
--------------------------------------------------------------------------------
/tests/test_download.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | @pytest.mark.parametrize(
8 | 'url,kwargs',
9 | [
10 | [
11 | ('https://www.ncbi.nlm.nih.gov/geo/download/?' +
12 | 'acc=GSM8563697&format=file&file=GSM8563697%' +
13 | '5FCO37%5Ffeatures%2Etsv%2Egz'),
14 | dict(sep='\t', compression='gzip', header=None)
15 | ],
16 | [
17 | ('https://www.ncbi.nlm.nih.gov/geo/download/?' +
18 | 'acc=GSM8563697&format=file&file=GSM8563697%' +
19 | '5FCO37%5Ftissue%5Fpositions%5Flist%2Ecsv%2Egz'),
20 | dict(sep=',', compression='gzip')
21 | ],
22 | ]
23 | )
24 | def test_download(
25 | url,
26 | kwargs,
27 | ):
28 | df = dc._download._download(url, **kwargs)
29 | assert isinstance(df, pd.DataFrame)
30 | assert df.columns.size > 1
31 |
--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import decoupler
4 |
5 |
6 | def test_package_has_version():
7 | assert decoupler.__version__ is not None
8 |
--------------------------------------------------------------------------------
/tests/tl/test_rankby_group.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | @pytest.mark.parametrize(
8 | 'groupby,reference,method',
9 | [
10 | ['group', 'rest', 'wilcoxon'],
11 | ['group', 'A', 't-test'],
12 | ['group', 'A', 't-test_overestim_var'],
13 | ['sample', 'rest', 't-test_overestim_var'],
14 | ['sample', 'S01', 't-test_overestim_var'],
15 | ['sample', ['S01'], 't-test_overestim_var'],
16 | ['sample', ['S01', 'S02'], 't-test_overestim_var'],
17 | ]
18 | )
19 | def test_rankby_group(
20 | adata,
21 | groupby,
22 | reference,
23 | method,
24 | ):
25 | df = dc.tl.rankby_group(adata=adata, groupby=groupby, reference=reference, method=method)
26 | assert isinstance(df, pd.DataFrame)
27 | cols_cat = {'group', 'reference', 'name'}
28 | cols_num = {'stat', 'meanchange', 'pval', 'padj'}
29 | cols = cols_cat | cols_num
30 | assert cols.issubset(set(df.columns))
31 | for col in cols_cat:
32 | assert isinstance(df[col].dtype, pd.CategoricalDtype)
33 | for col in cols_num:
34 | assert pd.api.types.is_numeric_dtype(df[col])
35 | assert set(df['group'].cat.categories).issubset(set(adata.obs[groupby].cat.categories))
36 | assert ((0. <= df['padj']) & (df['padj'] <= 1.)).all()
37 |
--------------------------------------------------------------------------------
/tests/tl/test_rankby_obsm.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import pytest
3 |
4 | import decoupler as dc
5 |
6 |
7 | @pytest.mark.parametrize(
8 | 'key,uns_key',
9 | [
10 | ['X_pca', 'rank_obsm'],
11 | ['X_pca', None],
12 | ['X_umap', 'other'],
13 | ['score_ulm', 'other'],
14 | ['score_ulm', None],
15 | ]
16 | )
17 | def test_rankby_obsm(
18 | tdata_obsm,
19 | key,
20 | uns_key,
21 | ):
22 | tdata_obsm = tdata_obsm.copy()
23 | tdata_obsm.obs['dose'] = 'Low'
24 | tdata_obsm.obs.loc[tdata_obsm.obs_names[5], 'dose'] = 'High'
25 | res = dc.tl.rankby_obsm(tdata_obsm, key=key, uns_key=uns_key)
26 | if uns_key is None:
27 | assert isinstance(res, pd.DataFrame)
28 | else:
29 | assert res is None
30 | assert uns_key in tdata_obsm.uns
31 | assert isinstance(tdata_obsm.uns[uns_key], pd.DataFrame)
32 |
--------------------------------------------------------------------------------
/tests/tl/test_rankby_order.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import scipy.sparse as sps
3 | import scipy.stats as sts
4 | import pytest
5 |
6 | import decoupler as dc
7 |
8 |
9 | @pytest.mark.parametrize('stat', ['dcor', 'pearsonr', 'spearmanr', 'kendalltau', sts.pearsonr])
10 | def test_rankby_order(
11 | tdata,
12 | stat,
13 | ):
14 | df = dc.tl.rankby_order(tdata, order='pstime', stat=stat)
15 | assert isinstance(df, pd.DataFrame)
16 | neg_genes = {'G01', 'G02', 'G03', 'G04'}
17 | pos_genes = {'G05', 'G06', 'G07', 'G08'}
18 | gt_genes = neg_genes | pos_genes
19 | pd_genes = set(df.head(len(gt_genes))['name'])
20 | assert len(gt_genes) > 3
21 | assert (len(gt_genes & pd_genes) / len(gt_genes)) >= 0.75
22 | msk = df['name'].isin(gt_genes)
23 | assert df[~msk]['stat'].mean() < df[msk]['stat'].mean()
24 | tdata.X = sps.csr_matrix(tdata.X)
25 | df = dc.tl.rankby_order(tdata, order='pstime', stat=stat)
26 | assert isinstance(df, pd.DataFrame)
27 | pd_genes = set(df.head(len(gt_genes))['name'])
28 | assert len(gt_genes) > 3
29 | assert (len(gt_genes & pd_genes) / len(gt_genes)) >= 0.75
30 | msk = df['name'].isin(gt_genes)
31 | assert df[~msk]['stat'].mean() < df[msk]['stat'].mean()
32 |
--------------------------------------------------------------------------------