├── .codecov.yaml
├── .cruft.json
├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   ├── config.yml
    │   └── feature_request.yml
    └── workflows
    │   ├── build.yaml
    │   ├── release.yaml
    │   └── test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .vscode
    ├── extensions.json
    ├── launch.json
    └── settings.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── biome.jsonc
├── docs
    ├── _static
    │   ├── .gitkeep
    │   ├── css
    │   │   └── custom.css
    │   └── images
    │   │   ├── logo.png
    │   │   ├── mlm.png
    │   │   ├── ora.png
    │   │   └── ulm.png
    ├── _templates
    │   ├── .gitkeep
    │   └── autosummary
    │   │   └── class.rst
    ├── api
    │   ├── bm.md
    │   ├── ds.md
    │   ├── index.md
    │   ├── mt.md
    │   ├── op.md
    │   ├── pl.md
    │   ├── pp.md
    │   └── tl.md
    ├── changelog.md
    ├── conf.py
    ├── extensions
    │   └── typed_returns.py
    ├── index.md
    ├── notebooks
    │   ├── bench
    │   │   ├── index.md
    │   │   └── rna.ipynb
    │   ├── bulk
    │   │   ├── index.md
    │   │   └── rna.ipynb
    │   ├── example.ipynb
    │   ├── index.md
    │   ├── omnipath
    │   │   ├── index.md
    │   │   ├── licenses.ipynb
    │   │   └── orthologs.ipynb
    │   ├── scell
    │   │   ├── index.md
    │   │   ├── rna_psbk.ipynb
    │   │   ├── rna_pstime.ipynb
    │   │   └── rna_sc.ipynb
    │   └── spatial
    │   │   ├── index.md
    │   │   └── rna_visium.ipynb
    ├── references.bib
    └── references.md
├── pyproject.toml
├── src
    └── decoupler
    │   ├── _Method.py
    │   ├── _Plotter.py
    │   ├── __init__.py
    │   ├── _datatype.py
    │   ├── _docs.py
    │   ├── _download.py
    │   ├── _log.py
    │   ├── _odeps.py
    │   ├── bm
    │       ├── __init__.py
    │       ├── _pp.py
    │       ├── _run.py
    │       ├── metric
    │       │   ├── __init__.py
    │       │   ├── _auc.py
    │       │   ├── _fscore.py
    │       │   ├── _hmean.py
    │       │   └── _qrank.py
    │       └── pl
    │       │   ├── __init__.py
    │       │   ├── _auc.py
    │       │   ├── _bar.py
    │       │   ├── _format.py
    │       │   ├── _fscore.py
    │       │   ├── _qrank.py
    │       │   └── _summary.py
    │   ├── ds
    │       ├── __init__.py
    │       ├── _bulk.py
    │       ├── _scell.py
    │       ├── _spatial.py
    │       ├── _toy.py
    │       └── _utils.py
    │   ├── mt
    │       ├── __init__.py
    │       ├── _aucell.py
    │       ├── _consensus.py
    │       ├── _decouple.py
    │       ├── _gsea.py
    │       ├── _gsva.py
    │       ├── _mdt.py
    │       ├── _methods.py
    │       ├── _mlm.py
    │       ├── _ora.py
    │       ├── _run.py
    │       ├── _udt.py
    │       ├── _ulm.py
    │       ├── _viper.py
    │       ├── _waggr.py
    │       └── _zscore.py
    │   ├── op
    │       ├── __init__.py
    │       ├── _collectri.py
    │       ├── _dorothea.py
    │       ├── _dtype.py
    │       ├── _hallmark.py
    │       ├── _progeny.py
    │       ├── _resource.py
    │       └── _translate.py
    │   ├── pl
    │       ├── __init__.py
    │       ├── _barplot.py
    │       ├── _dotplot.py
    │       ├── _filter_by_expr.py
    │       ├── _filter_by_prop.py
    │       ├── _filter_samples.py
    │       ├── _leading_edge.py
    │       ├── _network.py
    │       ├── _obsbar.py
    │       ├── _obsm.py
    │       ├── _order.py
    │       ├── _order_targets.py
    │       ├── _source_targets.py
    │       └── _volcano.py
    │   ├── pp
    │       ├── __init__.py
    │       ├── anndata.py
    │       ├── data.py
    │       └── net.py
    │   └── tl
    │       ├── __init__.py
    │       ├── _rankby_group.py
    │       ├── _rankby_obsm.py
    │       └── _rankby_order.py
└── tests
    ├── bm
        ├── test_benchmark.py
        └── test_pl.py
    ├── conftest.py
    ├── ds
        ├── test_bulk.py
        ├── test_scell.py
        ├── test_spatial.py
        ├── test_toy.py
        └── test_utils.py
    ├── mt
        ├── test_aucell.py
        ├── test_consensus.py
        ├── test_decouple.py
        ├── test_gsea.py
        ├── test_gsva.py
        ├── test_mdt.py
        ├── test_methods.py
        ├── test_mlm.py
        ├── test_ora.py
        ├── test_run.py
        ├── test_udt.py
        ├── test_ulm.py
        ├── test_viper.py
        ├── test_waggr.py
        └── test_zscore.py
    ├── op
        ├── test_collectri.py
        ├── test_dorothea.py
        ├── test_dtype.py
        ├── test_hallmark.py
        ├── test_progeny.py
        ├── test_resource.py
        └── test_translate.py
    ├── pl
        ├── test_Plotter.py
        ├── test_barplot.py
        ├── test_dotplot.py
        ├── test_filter_by_expr.py
        ├── test_filter_by_prop.py
        ├── test_filter_samples.py
        ├── test_leading_edge.py
        ├── test_network.py
        ├── test_obsbar.py
        ├── test_obsm.py
        ├── test_order.py
        ├── test_order_targets.py
        ├── test_source_targets.py
        └── test_volcano.py
    ├── pp
        ├── test_anndata.py
        ├── test_data.py
        └── test_net.py
    ├── test_download.py
    ├── test_version.py
    └── tl
        ├── test_rankby_group.py
        ├── test_rankby_obsm.py
        └── test_rankby_order.py


/.codecov.yaml:
--------------------------------------------------------------------------------
 1 | # Based on pydata/xarray
 2 | codecov:
 3 |   require_ci_to_pass: no
 4 | 
 5 | coverage:
 6 |   status:
 7 |     project:
 8 |       default:
 9 |         # Require 1% coverage, i.e., always succeed
10 |         target: 1
11 |     patch: false
12 |     changes: false
13 | 
14 | comment:
15 |   layout: diff, flags, files
16 |   behavior: once
17 |   require_base: no
18 | 


--------------------------------------------------------------------------------
/.cruft.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "template": "https://github.com/scverse/cookiecutter-scverse",
 3 |   "commit": "5842d5cb8510e1d4a037a8f772630d51ec86de96",
 4 |   "checkout": null,
 5 |   "context": {
 6 |     "cookiecutter": {
 7 |       "project_name": "decoupler",
 8 |       "package_name": "decoupler",
 9 |       "project_description": "Python package to perform enrichment analysis from omics data.",
10 |       "author_full_name": "Pau Badia i Mompel",
11 |       "author_email": "pau.badia@uni-heidelberg.de",
12 |       "github_user": "PauBadiaM",
13 |       "github_repo": "decoupler",
14 |       "license": "BSD 3-Clause License",
15 |       "ide_integration": true,
16 |       "_copy_without_render": [
17 |         ".github/workflows/build.yaml",
18 |         ".github/workflows/test.yaml",
19 |         "docs/_templates/autosummary/**.rst"
20 |       ],
21 |       "_exclude_on_template_update": [
22 |         "CHANGELOG.md",
23 |         "LICENSE",
24 |         "README.md",
25 |         "docs/api.md",
26 |         "docs/index.md",
27 |         "docs/notebooks/example.ipynb",
28 |         "docs/references.bib",
29 |         "docs/references.md",
30 |         "src/**",
31 |         "tests/**"
32 |       ],
33 |       "_render_devdocs": false,
34 |       "_jinja2_env_vars": {
35 |         "lstrip_blocks": true,
36 |         "trim_blocks": true
37 |       },
38 |       "_template": "https://github.com/scverse/cookiecutter-scverse",
39 |       "_commit": "5842d5cb8510e1d4a037a8f772630d51ec86de96"
40 |     }
41 |   },
42 |   "directory": null
43 | }
44 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 4
 6 | end_of_line = lf
 7 | charset = utf-8
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | 
11 | [{*.{yml,yaml,toml},.cruft.json}]
12 | indent_size = 2
13 | 
14 | [Makefile]
15 | indent_style = tab
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug report
 2 | description: Report something that is broken or incorrect
 3 | labels: bug
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         **Note**: Please read [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports)
 9 |         detailing how to provide the necessary information for us to reproduce your bug. In brief:
10 |           * Please provide exact steps how to reproduce the bug in a clean Python environment.
11 |           * In case it's not clear what's causing this bug, please provide the data or the data generation procedure.
12 |           * Sometimes it is not possible to share the data, but usually it is possible to replicate problems on publicly
13 |             available datasets or to share a subset of your data.
14 | 
15 |   - type: textarea
16 |     id: report
17 |     attributes:
18 |       label: Report
19 |       description: A clear and concise description of what the bug is.
20 |     validations:
21 |       required: true
22 | 
23 |   - type: textarea
24 |     id: versions
25 |     attributes:
26 |       label: Versions
27 |       description: |
28 |         Which version of packages.
29 | 
30 |         Please install `session-info2`, run the following command in a notebook,
31 |         click the “Copy as Markdown” button, then paste the results into the text box below.
32 | 
33 |         ```python
34 |         In[1]: import session_info2; session_info2.session_info(dependencies=True)
35 |         ```
36 | 
37 |         Alternatively, run this in a console:
38 | 
39 |         ```python
40 |         >>> import session_info2; print(session_info2.session_info(dependencies=True)._repr_mimebundle_()["text/markdown"])
41 |         ```
42 |       render: python
43 |       placeholder: |
44 |         anndata	0.11.3
45 |         ----	----
46 |         charset-normalizer	3.4.1
47 |         coverage	7.7.0
48 |         psutil	7.0.0
49 |         dask	2024.7.1
50 |         jaraco.context	5.3.0
51 |         numcodecs	0.15.1
52 |         jaraco.functools	4.0.1
53 |         Jinja2	3.1.6
54 |         sphinxcontrib-jsmath	1.0.1
55 |         sphinxcontrib-htmlhelp	2.1.0
56 |         toolz	1.0.0
57 |         session-info2	0.1.2
58 |         PyYAML	6.0.2
59 |         llvmlite	0.44.0
60 |         scipy	1.15.2
61 |         pandas	2.2.3
62 |         sphinxcontrib-devhelp	2.0.0
63 |         h5py	3.13.0
64 |         tblib	3.0.0
65 |         setuptools-scm	8.2.0
66 |         more-itertools	10.3.0
67 |         msgpack	1.1.0
68 |         sparse	0.15.5
69 |         wrapt	1.17.2
70 |         jaraco.collections	5.1.0
71 |         numba	0.61.0
72 |         pyarrow	19.0.1
73 |         pytz	2025.1
74 |         MarkupSafe	3.0.2
75 |         crc32c	2.7.1
76 |         sphinxcontrib-qthelp	2.0.0
77 |         sphinxcontrib-serializinghtml	2.0.0
78 |         zarr	2.18.4
79 |         asciitree	0.3.3
80 |         six	1.17.0
81 |         sphinxcontrib-applehelp	2.0.0
82 |         numpy	2.1.3
83 |         cloudpickle	3.1.1
84 |         sphinxcontrib-bibtex	2.6.3
85 |         natsort	8.4.0
86 |         jaraco.text	3.12.1
87 |         setuptools	76.1.0
88 |         Deprecated	1.2.18
89 |         packaging	24.2
90 |         python-dateutil	2.9.0.post0
91 |         ----	----
92 |         Python	3.13.2 | packaged by conda-forge | (main, Feb 17 2025, 14:10:22) [GCC 13.3.0]
93 |         OS	Linux-6.11.0-109019-tuxedo-x86_64-with-glibc2.39
94 |         Updated	2025-03-18 15:47
95 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Scverse Community Forum
4 |     url: https://discourse.scverse.org/
5 |     about: If you have questions about “How to do X”, please ask them here.
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature request
 2 | description: Propose a new feature for decoupler
 3 | labels: enhancement
 4 | body:
 5 |   - type: textarea
 6 |     id: description
 7 |     attributes:
 8 |       label: Description of feature
 9 |       description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered.
10 |     validations:
11 |       required: true
12 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | name: Check Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.ref }}
11 |   cancel-in-progress: true
12 | 
13 | defaults:
14 |   run:
15 |     # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
16 |     shell: bash -euo pipefail {0}
17 | 
18 | jobs:
19 |   package:
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |       - uses: actions/checkout@v4
23 |         with:
24 |           filter: blob:none
25 |           fetch-depth: 0
26 |       - name: Install uv
27 |         uses: astral-sh/setup-uv@v5
28 |         with:
29 |           cache-dependency-glob: pyproject.toml
30 |       - name: Build package
31 |         run: uv build
32 |       - name: Check package
33 |         run: uvx twine check --strict dist/*.whl
34 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | defaults:
 8 |   run:
 9 |     # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
10 |     shell: bash -euo pipefail {0}
11 | 
12 | # Use "trusted publishing", see https://docs.pypi.org/trusted-publishers/
13 | jobs:
14 |   release:
15 |     name: Upload release to PyPI
16 |     runs-on: ubuntu-latest
17 |     environment:
18 |       name: pypi
19 |       url: https://pypi.org/p/decoupler
20 |     permissions:
21 |       id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
22 |     steps:
23 |       - uses: actions/checkout@v4
24 |         with:
25 |           filter: blob:none
26 |           fetch-depth: 0
27 |       - name: Install uv
28 |         uses: astral-sh/setup-uv@v5
29 |         with:
30 |           cache-dependency-glob: pyproject.toml
31 |       - name: Build package
32 |         run: uv build
33 |       - name: Publish package distributions to PyPI
34 |         uses: pypa/gh-action-pypi-publish@release/v1
35 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
  1 | name: Test
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [main]
  6 |   pull_request:
  7 |     branches: [main]
  8 |   schedule:
  9 |     - cron: "0 5 1,15 * *"
 10 | 
 11 | concurrency:
 12 |   group: ${{ github.workflow }}-${{ github.ref }}
 13 |   cancel-in-progress: true
 14 | 
 15 | defaults:
 16 |   run:
 17 |     # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
 18 |     shell: bash -euo pipefail {0}
 19 | 
 20 | jobs:
 21 |   # Get the test environment from hatch as defined in pyproject.toml.
 22 |   # This ensures that the pyproject.toml is the single point of truth for test definitions and the same tests are
 23 |   # run locally and on continuous integration.
 24 |   # Check [[tool.hatch.envs.hatch-test.matrix]] in pyproject.toml and https://hatch.pypa.io/latest/environment/ for
 25 |   # more details.
 26 |   get-environments:
 27 |     runs-on: ubuntu-latest
 28 |     outputs:
 29 |       envs: ${{ steps.get-envs.outputs.envs }}
 30 |     steps:
 31 |       - uses: actions/checkout@v4
 32 |         with:
 33 |           filter: blob:none
 34 |           fetch-depth: 0
 35 |       - name: Install uv
 36 |         uses: astral-sh/setup-uv@v5
 37 |       - name: Get test environments
 38 |         id: get-envs
 39 |         run: |
 40 |           ENVS_JSON=$(uvx hatch env show --json | jq -c 'to_entries
 41 |             | map(
 42 |                 select(.key | startswith("hatch-test"))
 43 |                 | {
 44 |                     name: .key,
 45 |                     label: (if (.key | contains("pre")) then .key + " (PRE-RELEASE DEPENDENCIES)" else .key end),
 46 |                     python: .value.python
 47 |                   }
 48 |               )')
 49 |           echo "envs=${ENVS_JSON}" | tee $GITHUB_OUTPUT
 50 | 
 51 |   # Run tests through hatch. Spawns a separate runner for each environment defined in the hatch matrix obtained above.
 52 |   test:
 53 |     needs: get-environments
 54 | 
 55 |     strategy:
 56 |       fail-fast: false
 57 |       matrix:
 58 |         os: [ubuntu-latest]
 59 |         env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
 60 | 
 61 |     name: ${{ matrix.env.label }}
 62 |     runs-on: ${{ matrix.os }}
 63 | 
 64 |     steps:
 65 |       - uses: actions/checkout@v4
 66 |         with:
 67 |           filter: blob:none
 68 |           fetch-depth: 0
 69 |       - name: Install uv
 70 |         uses: astral-sh/setup-uv@v5
 71 |         with:
 72 |           python-version: ${{ matrix.env.python }}
 73 |           cache-dependency-glob: pyproject.toml
 74 |       - name: create hatch environment
 75 |         run: uvx hatch env create ${{ matrix.env.name }}
 76 |       - name: run tests using hatch
 77 |         env:
 78 |           MPLBACKEND: agg
 79 |           PLATFORM: ${{ matrix.os }}
 80 |           DISPLAY: :42
 81 |         run: uvx hatch run ${{ matrix.env.name }}:run-cov
 82 |       - name: generate coverage report
 83 |         run: uvx hatch run ${{ matrix.env.name }}:coverage xml
 84 |       - name: Upload coverage
 85 |         uses: codecov/codecov-action@v4
 86 |         with:
 87 |           token: ${{ secrets.CODECOV_TOKEN }}
 88 | 
 89 |   # Check that all tests defined above pass. This makes it easy to set a single "required" test in branch
 90 |   # protection instead of having to update it frequently. See https://github.com/re-actors/alls-green#why.
 91 |   check:
 92 |     name: Tests pass in all hatch environments
 93 |     if: always()
 94 |     needs:
 95 |       - get-environments
 96 |       - test
 97 |     runs-on: ubuntu-latest
 98 |     steps:
 99 |         - uses: re-actors/alls-green@release/v1
100 |           with:
101 |             jobs: ${{ toJSON(needs) }}
102 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Temp files
 2 | .DS_Store
 3 | *~
 4 | buck-out/
 5 | 
 6 | # Compiled files
 7 | .venv/
 8 | __pycache__/
 9 | .*cache/
10 | 
11 | # Distribution / packaging
12 | /dist/
13 | 
14 | # Tests and coverage
15 | /data/
16 | /node_modules/
17 | 
18 | # docs
19 | /docs/generated/
20 | /docs/_build/
21 | 
22 | # jupyter
23 | .ipynb_checkpoints/
24 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | fail_fast: false
 2 | default_language_version:
 3 |   python: python3
 4 | default_stages:
 5 |   - pre-commit
 6 |   - pre-push
 7 | minimum_pre_commit_version: 2.16.0
 8 | repos:
 9 |   - repo: https://github.com/biomejs/pre-commit
10 |     rev: v1.9.4
11 |     hooks:
12 |       - id: biome-format
13 |         exclude: ^\.cruft\.json$ # inconsistent indentation with cruft - file never to be modified manually.
14 |   - repo: https://github.com/tox-dev/pyproject-fmt
15 |     rev: v2.5.1
16 |     hooks:
17 |       - id: pyproject-fmt
18 |   - repo: https://github.com/astral-sh/ruff-pre-commit
19 |     rev: v0.11.5
20 |     hooks:
21 |       - id: ruff
22 |         types_or: [python, pyi, jupyter]
23 |         args: [--fix, --exit-non-zero-on-fix]
24 |       - id: ruff-format
25 |         types_or: [python, pyi, jupyter]
26 |   - repo: https://github.com/pre-commit/pre-commit-hooks
27 |     rev: v5.0.0
28 |     hooks:
29 |       - id: detect-private-key
30 |       - id: check-ast
31 |       - id: end-of-file-fixer
32 |       - id: mixed-line-ending
33 |         args: [--fix=lf]
34 |       - id: trailing-whitespace
35 |       - id: check-case-conflict
36 |       # Check that there are no merge conflicts (could be generated by template sync)
37 |       - id: check-merge-conflict
38 |         args: [--assume-in-merge]
39 |   - repo: local
40 |     hooks:
41 |       - id: forbid-to-commit
42 |         name: Don't commit rej files
43 |         entry: |
44 |           Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates.
45 |           Fix the merge conflicts manually and remove the .rej files.
46 |         language: fail
47 |         files: '.*\.rej$'
48 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # https://docs.readthedocs.io/en/stable/config-file/v2.html
 2 | version: 2
 3 | build:
 4 |   os: ubuntu-20.04
 5 |   tools:
 6 |     python: "3.10"
 7 | sphinx:
 8 |   configuration: docs/conf.py
 9 |   # disable this for more lenient docs builds
10 |   fail_on_warning: true
11 | python:
12 |   install:
13 |     - method: pip
14 |       path: .
15 |       extra_requirements:
16 |         - doc
17 |         - full
18 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "recommendations": [
 3 |         // GitHub integration
 4 |         "github.vscode-github-actions",
 5 |         "github.vscode-pull-request-github",
 6 |         // Language support
 7 |         "ms-python.python",
 8 |         "ms-python.vscode-pylance",
 9 |         "ms-toolsai.jupyter",
10 |         "tamasfe.even-better-toml",
11 |         // Dependency management
12 |         "ninoseki.vscode-mogami",
13 |         // Linting and formatting
14 |         "editorconfig.editorconfig",
15 |         "charliermarsh.ruff",
16 |         "biomejs.biome",
17 |     ],
18 | }
19 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "Python: Build Documentation",
 9 |             "type": "debugpy",
10 |             "request": "launch",
11 |             "module": "sphinx",
12 |             "args": ["-M", "html", ".", "_build"],
13 |             "cwd": "${workspaceFolder}/docs",
14 |             "console": "internalConsole",
15 |             "justMyCode": false,
16 |         },
17 |         {
18 |             "name": "Python: Debug Test",
19 |             "type": "debugpy",
20 |             "request": "launch",
21 |             "program": "${file}",
22 |             "purpose": ["debug-test"],
23 |             "console": "internalConsole",
24 |             "justMyCode": false,
25 |             "env": {
26 |                 "PYTEST_ADDOPTS": "--color=yes",
27 |             },
28 |             "presentation": {
29 |                 "hidden": true,
30 |             },
31 |         },
32 |     ],
33 | }
34 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "[python][json][jsonc]": {
 3 |         "editor.formatOnSave": true,
 4 |     },
 5 |     "[python]": {
 6 |         "editor.defaultFormatter": "charliermarsh.ruff",
 7 |         "editor.codeActionsOnSave": {
 8 |             "source.fixAll": "always",
 9 |             "source.organizeImports": "always",
10 |         },
11 |     },
12 |     "[json][jsonc]": {
13 |         "editor.defaultFormatter": "biomejs.biome",
14 |     },
15 |     "python.analysis.typeCheckingMode": "basic",
16 |     "python.testing.pytestEnabled": true,
17 |     "python.testing.pytestArgs": ["-vv", "--color=yes"],
18 | }
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2025, Pau Badia i Mompel, Saez lab
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # decoupler - Ensemble of methods to infer enrichment scores
  2 | <img src="https://github.com/saezlab/decoupleR/blob/master/inst/figures/logo.svg?raw=1" align="right" width="120" class="no-scaled-link" />
  3 |    
  4 | 
  5 | [![Tests][badge-tests]][tests]
  6 | [![Documentation][badge-docs]][documentation]
  7 | 
  8 | [![Issues][badge-issues]][issue tracker]
  9 | [![Coverage][badge-coverage]][codecoverage]
 10 | [![Stars][badge-stars]](https://github.com/scverse/anndata/stargazers)
 11 | 
 12 | [![PyPI][badge-pypi]][pypi]
 13 | [![Downloads month][badge-mdown]][down]
 14 | [![Downloads all][badge-adown]][down]
 15 | 
 16 | [![Conda version][badge-condav]][conda]
 17 | [![Conda downloads][badge-condad]][conda]
 18 | 
 19 | [badge-tests]: https://img.shields.io/github/actions/workflow/status/saezlab/decoupler-py/test.yaml?branch=main
 20 | [badge-docs]: https://img.shields.io/readthedocs/decoupler-py
 21 | [badge-condav]: https://img.shields.io/conda/vn/conda-forge/decoupler-py.svg
 22 | [badge-condad]: https://img.shields.io/conda/dn/conda-forge/decoupler-py.svg
 23 | [badge-issues]: https://img.shields.io/github/issues/saezlab/decoupler-py
 24 | [badge-coverage]: https://codecov.io/gh/saezlab/decoupler-py/branch/main/graph/badge.svg
 25 | [badge-pypi]: https://img.shields.io/pypi/v/decoupler.svg
 26 | [badge-mdown]: https://static.pepy.tech/badge/decoupler/month
 27 | [badge-adown]: https://static.pepy.tech/badge/decoupler
 28 | [badge-stars]: https://img.shields.io/github/stars/saezlab/decoupler-py?style=flat&logo=github&color=yellow
 29 | 
 30 | `decoupler` is a python package containing different enrichment statistical
 31 | methods to extract biologically driven scores
 32 | from omics data within a unified framework. This is its faster and memory efficient Python implementation,
 33 | a deprecated version in R can be found [here](https://github.com/saezlab/decoupler).
 34 | 
 35 | It is a package from the [scverse][] ecosystem {cite:p}`scverse`,
 36 | designed for easy interoperability with `anndata`, `scanpy` {cite:p}`scanpy` and other related packages.
 37 | 
 38 | ## Getting started
 39 | 
 40 | Please refer to the [documentation][],
 41 | in particular, the [API documentation][].
 42 | 
 43 | ## Installation
 44 | 
 45 | You need to have Python 3.10 or newer installed on your system.
 46 | If you don't have Python installed, we recommend installing [uv][].
 47 | 
 48 | There are several alternative options to install decoupler:
 49 | 
 50 | 1. Install the latest stable release from [PyPI][pypi] with minimal dependancies:
 51 | 
 52 | ```bash
 53 | pip install decoupler
 54 | ```
 55 | 
 56 | 2. Install the latest stable full release from [PyPI][pypi] with extra dependancies:
 57 | 
 58 | ```bash
 59 | pip install decoupler[full]
 60 | ```
 61 | 
 62 | 3. Install the latest stable version from [conda-forge][conda] using mamba or conda (pay attention to the `-py` suffix at the end):
 63 | 
 64 | ```bash
 65 | mamba create -n=dcp conda-forge::decoupler-py
 66 | ```
 67 | 
 68 | 4. Install the latest development version:
 69 | 
 70 | ```bash
 71 | pip install git+https://github.com/saezlab/decoupler-py.git@main
 72 | ```
 73 | 
 74 | ## Release notes
 75 | 
 76 | See the [changelog][].
 77 | 
 78 | ## Contact
 79 | 
 80 | For questions and help requests, you can reach out in the [scverse discourse][].
 81 | If you found a bug, please use the [issue tracker][].
 82 | 
 83 | ## Citation
 84 | 
 85 | > Badia-i-Mompel P., Vélez Santiago J., Braunger J., Geiss C., Dimitrov D.,
 86 | Müller-Dott S., Taus P., Dugourd A., Holland C.H., Ramirez Flores R.O.
 87 | and Saez-Rodriguez J. 2022. decoupleR: Ensemble of computational methods
 88 | to infer biological activities from omics data. Bioinformatics Advances.
 89 | <https://doi.org/10.1093/bioadv/vbac016>
 90 | 
 91 | [uv]: https://github.com/astral-sh/uv
 92 | [scverse discourse]: https://discourse.scverse.org/
 93 | [scverse]: https://scverse.org/
 94 | [issue tracker]: https://github.com/saezlab/decoupler-py/issues
 95 | [tests]: https://github.com/saezlab/decoupler-py/actions/workflows/test.yaml
 96 | [documentation]: https://decoupler-py.readthedocs.io
 97 | [changelog]: https://decoupler-py.readthedocs.io/en/latest/changelog.html
 98 | [api documentation]: https://decoupler-py.readthedocs.io/en/latest/api.html
 99 | [pypi]: https://pypi.org/project/decoupler
100 | [down]: https://pepy.tech/project/decoupler
101 | [conda]: https://anaconda.org/conda-forge/decoupler-py
102 | [codecoverage]: https://codecov.io/gh/saezlab/decoupler-py
103 | 


--------------------------------------------------------------------------------
/biome.jsonc:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
 3 |     "formatter": { "useEditorconfig": true },
 4 |     "overrides": [
 5 |         {
 6 |             "include": ["./.vscode/*.json", "**/*.jsonc"],
 7 |             "json": {
 8 |                 "formatter": { "trailingCommas": "all" },
 9 |                 "parser": {
10 |                     "allowComments": true,
11 |                     "allowTrailingCommas": true,
12 |                 },
13 |             },
14 |         },
15 |     ],
16 | }
17 | 


--------------------------------------------------------------------------------
/docs/_static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/.gitkeep


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | /* Reduce the font size in data frames - See https://github.com/scverse/cookiecutter-scverse/issues/193 */
 2 | div.cell_output table.dataframe {
 3 |     font-size: 0.8em;
 4 | }
 5 | 
 6 | /* Adjust the logo size */
 7 | .logo img {
 8 |     width: 50%;   /* or any percentage you want */
 9 |     height: auto; /* maintain aspect ratio */
10 | }
11 | 
12 | img.no-scaled-link {
13 |     background: transparent !important;
14 | }


--------------------------------------------------------------------------------
/docs/_static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/logo.png


--------------------------------------------------------------------------------
/docs/_static/images/mlm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/mlm.png


--------------------------------------------------------------------------------
/docs/_static/images/ora.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/ora.png


--------------------------------------------------------------------------------
/docs/_static/images/ulm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/ulm.png


--------------------------------------------------------------------------------
/docs/_templates/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_templates/.gitkeep


--------------------------------------------------------------------------------
/docs/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname | escape | underline}}
 2 | 
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | .. add toctree option to make autodoc generate the pages
 6 | 
 7 | .. autoclass:: {{ objname }}
 8 | 
 9 | {% block attributes %}
10 | {% if attributes %}
11 | Attributes table
12 | ~~~~~~~~~~~~~~~~
13 | 
14 | .. autosummary::
15 | {% for item in attributes %}
16 |     ~{{ name }}.{{ item }}
17 | {%- endfor %}
18 | {% endif %}
19 | {% endblock %}
20 | 
21 | {% block methods %}
22 | {% if methods %}
23 | Methods table
24 | ~~~~~~~~~~~~~
25 | 
26 | .. autosummary::
27 | {% for item in methods %}
28 |     {%- if item != '__init__' %}
29 |     ~{{ name }}.{{ item }}
30 |     {%- endif -%}
31 | {%- endfor %}
32 | {% endif %}
33 | {% endblock %}
34 | 
35 | {% block attributes_documentation %}
36 | {% if attributes %}
37 | Attributes
38 | ~~~~~~~~~~
39 | 
40 | {% for item in attributes %}
41 | 
42 | .. autoattribute:: {{ [objname, item] | join(".") }}
43 | {%- endfor %}
44 | 
45 | {% endif %}
46 | {% endblock %}
47 | 
48 | {% block methods_documentation %}
49 | {% if methods %}
50 | Methods
51 | ~~~~~~~
52 | 
53 | {% for item in methods %}
54 | {%- if item != '__init__' %}
55 | 
56 | .. automethod:: {{ [objname, item] | join(".") }}
57 | {%- endif -%}
58 | {%- endfor %}
59 | 
60 | {% endif %}
61 | {% endblock %}
62 | 


--------------------------------------------------------------------------------
/docs/api/bm.md:
--------------------------------------------------------------------------------
 1 | # Benchmark
 2 | 
 3 | 
 4 | ## Pipeline
 5 | ```{eval-rst}
 6 | .. module:: decoupler.bm
 7 | .. currentmodule:: decoupler
 8 | 
 9 | .. autosummary::
10 |     :toctree: generated
11 | 
12 |     bm.benchmark
13 | ```
14 | 
15 | ## Metrics
16 | 
17 | ```{eval-rst}
18 | .. module:: decoupler.bm.metric
19 | .. currentmodule:: decoupler
20 | 
21 | .. autosummary::
22 |     :toctree: generated
23 | 
24 |     bm.metric.auc
25 |     bm.metric.fscore
26 |     bm.metric.qrank
27 |     bm.metric.hmean
28 | ```
29 | 
30 | ## Plotting
31 | 
32 | ```{eval-rst}
33 | .. module:: decoupler.bm.pl
34 | .. currentmodule:: decoupler
35 | 
36 | .. autosummary::
37 |     :toctree: generated
38 | 
39 |     bm.pl.auc
40 |     bm.pl.fscore
41 |     bm.pl.qrank
42 |     bm.pl.bar
43 |     bm.pl.summary
44 | ```
45 | 


--------------------------------------------------------------------------------
/docs/api/ds.md:
--------------------------------------------------------------------------------
 1 | # Datasets
 2 | 
 3 | ## Bulk
 4 | ```{eval-rst}
 5 | .. module:: decoupler.ds
 6 | .. currentmodule:: decoupler
 7 | 
 8 | .. autosummary::
 9 |     :toctree: generated
10 | 
11 |     ds.hsctgfb
12 |     ds.knocktf
13 | ```
14 | 
15 | ## Single-cell
16 | ```{eval-rst}
17 | .. autosummary::
18 |     :toctree: generated
19 | 
20 |     ds.covid5k
21 |     ds.erygast1k
22 |     ds.pbmc3k
23 | ```
24 | 
25 | ## Spatial
26 | ```{eval-rst}
27 | .. autosummary::
28 |     :toctree: generated
29 | 
30 |     ds.msvisium
31 | ```
32 | 
33 | ## Toy
34 | ```{eval-rst}
35 | .. autosummary::
36 |     :toctree: generated
37 | 
38 |     ds.toy
39 |     ds.toy_bench
40 | ```
41 | 
42 | ## Utils
43 | ```{eval-rst}
44 | .. autosummary::
45 |     :toctree: generated
46 | 
47 |     ds.ensmbl_to_symbol
48 | ```
49 | 


--------------------------------------------------------------------------------
/docs/api/index.md:
--------------------------------------------------------------------------------
 1 | # API
 2 | 
 3 | Import decoupler as:
 4 | 
 5 | ```
 6 | import decoupler as dc
 7 | ```
 8 | 
 9 | ```{toctree}
10 | :maxdepth: 2
11 | 
12 | bm
13 | ds
14 | mt
15 | op
16 | pp
17 | ```
18 | 
19 | ```{toctree}
20 | :maxdepth: 1
21 | 
22 | pl
23 | tl
24 | ```


--------------------------------------------------------------------------------
/docs/api/mt.md:
--------------------------------------------------------------------------------
 1 | # Methods
 2 | 
 3 | ## Single methods
 4 | 
 5 | ```{eval-rst}
 6 | .. module:: decoupler.mt
 7 | .. currentmodule:: decoupler
 8 | 
 9 | .. autosummary::
10 |     :toctree: generated
11 | 
12 |     mt.aucell
13 |     mt.gsea
14 |     mt.gsva
15 |     mt.mdt
16 |     mt.mlm
17 |     mt.ora
18 |     mt.udt
19 |     mt.ulm
20 |     mt.viper
21 |     mt.waggr
22 |     mt.zscore
23 | ```
24 | 
25 | ## Multiple methods
26 | 
27 | ```{eval-rst}
28 | .. autosummary::
29 |     :toctree: generated
30 | 
31 |     mt.decouple
32 |     mt.consensus
33 | ```


--------------------------------------------------------------------------------
/docs/api/op.md:
--------------------------------------------------------------------------------
 1 | # OmniPath
 2 | 
 3 | ## Resources
 4 | 
 5 | ```{eval-rst}
 6 | .. module:: decoupler.op
 7 | .. currentmodule:: decoupler
 8 | 
 9 | .. autosummary::
10 |     :toctree: generated
11 | 
12 |     op.collectri
13 |     op.dorothea
14 |     op.hallmark
15 |     op.progeny
16 |     op.resource
17 | ```
18 | 
19 | ## Utils
20 | 
21 | ```{eval-rst}
22 | .. autosummary::
23 |     :toctree: generated
24 | 
25 |     op.show_resources
26 |     op.show_organisms
27 |     op.translate
28 | ```


--------------------------------------------------------------------------------
/docs/api/pl.md:
--------------------------------------------------------------------------------
 1 | # Plotting
 2 | 
 3 | ```{eval-rst}
 4 | .. module:: decoupler.pl
 5 | .. currentmodule:: decoupler
 6 | 
 7 | .. autosummary::
 8 |     :toctree: generated
 9 | 
10 |     pl.barplot
11 |     pl.dotplot
12 |     pl.filter_by_expr
13 |     pl.filter_by_prop
14 |     pl.filter_samples
15 |     pl.leading_edge
16 |     pl.network
17 |     pl.obsbar
18 |     pl.obsm
19 |     pl.order_targets
20 |     pl.order
21 |     pl.source_targets
22 |     pl.volcano
23 | ```


--------------------------------------------------------------------------------
/docs/api/pp.md:
--------------------------------------------------------------------------------
 1 | # Preprocessing
 2 | 
 3 | ## Data
 4 | ```{eval-rst}
 5 | .. module:: decoupler.pp
 6 | .. currentmodule:: decoupler
 7 | 
 8 | .. autosummary::
 9 |     :toctree: generated
10 | 
11 |     pp.extract
12 | ```
13 | 
14 | ## Network
15 | ```{eval-rst}
16 | .. autosummary::
17 |     :toctree: generated
18 | 
19 |     pp.read_gmt
20 |     pp.prune
21 |     pp.adjmat
22 |     pp.idxmat
23 |     pp.shuffle_net
24 |     pp.net_corr
25 | ```
26 | 
27 | ## AnnData
28 | ```{eval-rst}
29 | .. autosummary::
30 |     :toctree: generated
31 | 
32 |     pp.get_obsm
33 |     pp.swap_layer
34 |     pp.pseudobulk
35 |     pp.filter_samples
36 |     pp.filter_by_expr
37 |     pp.filter_by_prop
38 |     pp.knn
39 |     pp.bin_order
40 | ```
41 | 


--------------------------------------------------------------------------------
/docs/api/tl.md:
--------------------------------------------------------------------------------
 1 | # Tools
 2 | 
 3 | ```{eval-rst}
 4 | .. module:: decoupler.tl
 5 | .. currentmodule:: decoupler
 6 | 
 7 | .. autosummary::
 8 |     :toctree: generated
 9 | 
10 |     tl.rankby_group
11 |     tl.rankby_obsm
12 |     tl.rankby_order
13 | ```
14 | 


--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | ```{include} ../CHANGELOG.md
2 | 
3 | ```
4 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | 
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | import sys
  9 | from datetime import datetime
 10 | from importlib.metadata import metadata
 11 | from pathlib import Path
 12 | 
 13 | HERE = Path(__file__).parent
 14 | sys.path.insert(0, str(HERE / "extensions"))
 15 | 
 16 | 
 17 | # -- Project information -----------------------------------------------------
 18 | 
 19 | # NOTE: If you installed your project in editable mode, this might be stale.
 20 | #       If this is the case, reinstall it to refresh the metadata
 21 | info = metadata("decoupler")
 22 | project_name = info["Name"]
 23 | author = info["Author"]
 24 | copyright = f"{datetime.now():%Y}, {author}."
 25 | version = info["Version"]
 26 | urls = dict(pu.split(", ") for pu in info.get_all("Project-URL"))
 27 | repository_url = urls["Source"]
 28 | 
 29 | # The full version, including alpha/beta/rc tags
 30 | release = info["Version"]
 31 | 
 32 | bibtex_bibfiles = ["references.bib"]
 33 | templates_path = ["_templates"]
 34 | nitpicky = True  # Warn about broken links
 35 | needs_sphinx = "4.0"
 36 | 
 37 | html_context = {
 38 |     "display_github": True,  # Integrate GitHub
 39 |     "github_user": "PauBadiaM",
 40 |     "github_repo": project_name,
 41 |     "github_version": "main",
 42 |     "conf_py_path": "/docs/",
 43 | }
 44 | 
 45 | # -- General configuration ---------------------------------------------------
 46 | 
 47 | # Add any Sphinx extension module names here, as strings.
 48 | # They can be extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 49 | extensions = [
 50 |     "myst_nb",
 51 |     "sphinx_copybutton",
 52 |     "sphinx.ext.autodoc",
 53 |     "sphinx.ext.intersphinx",
 54 |     "sphinx.ext.autosummary",
 55 |     "sphinx.ext.napoleon",
 56 |     "sphinxcontrib.bibtex",
 57 |     "sphinx_autodoc_typehints",
 58 |     "sphinx_tabs.tabs",
 59 |     "sphinx.ext.mathjax",
 60 |     "IPython.sphinxext.ipython_console_highlighting",
 61 |     "sphinxext.opengraph",
 62 |     *[p.stem for p in (HERE / "extensions").glob("*.py")],
 63 | ]
 64 | 
 65 | autosummary_generate = True
 66 | autodoc_member_order = "groupwise"
 67 | default_role = "literal"
 68 | napoleon_google_docstring = False
 69 | napoleon_numpy_docstring = True
 70 | napoleon_include_init_with_doc = False
 71 | napoleon_use_rtype = True  # having a separate entry generally helps readability
 72 | napoleon_use_param = True
 73 | myst_heading_anchors = 6  # create anchors for h1-h6
 74 | myst_enable_extensions = [
 75 |     "amsmath",
 76 |     "colon_fence",
 77 |     "deflist",
 78 |     "dollarmath",
 79 |     "html_image",
 80 |     "html_admonition",
 81 | ]
 82 | myst_url_schemes = ("http", "https", "mailto")
 83 | nb_output_stderr = "remove"
 84 | nb_execution_mode = "off"
 85 | nb_merge_streams = True
 86 | typehints_defaults = "braces"
 87 | 
 88 | source_suffix = {
 89 |     ".rst": "restructuredtext",
 90 |     ".ipynb": "myst-nb",
 91 |     ".myst": "myst-nb",
 92 | }
 93 | 
 94 | intersphinx_mapping = {
 95 |     "python": ("https://docs.python.org/3", None),
 96 |     "anndata": ("https://anndata.readthedocs.io/en/stable/", None),
 97 |     "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
 98 |     "numpy": ("https://numpy.org/doc/stable/", None),
 99 |     "matplotlib": ("https://matplotlib.org/stable/", None),
100 |     'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
101 | }
102 | 
103 | # List of patterns, relative to source directory, that match files and
104 | # directories to ignore when looking for source files.
105 | # This pattern also affects html_static_path and html_extra_path.
106 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
107 | 
108 | 
109 | # -- Options for HTML output -------------------------------------------------
110 | 
111 | # The theme to use for HTML and HTML Help pages.  See the documentation for
112 | # a list of builtin themes.
113 | #
114 | html_theme = "sphinx_book_theme"
115 | html_static_path = ["_static"]
116 | html_css_files = ["css/custom.css"]
117 | html_title = project_name
118 | html_logo = '_static/images/logo.png'
119 | html_favicon = '_static/images/logo.png'
120 | 
121 | html_theme_options = {
122 |     "repository_url": repository_url,
123 |     "use_repository_button": True,
124 |     "path_to_docs": "docs/",
125 |     "navigation_with_keys": False,
126 | }
127 | 
128 | pygments_style = "default"
129 | 
130 | nitpick_ignore = [
131 |     # If building the documentation fails because of a missing link that is outside your control,
132 |     # you can add an exception to this list.
133 |     #     ("py:class", "igraph.Graph"),
134 | ]
135 | 


--------------------------------------------------------------------------------
/docs/extensions/typed_returns.py:
--------------------------------------------------------------------------------
 1 | # code from https://github.com/theislab/scanpy/blob/master/docs/extensions/typed_returns.py
 2 | # with some minor adjustment
 3 | from __future__ import annotations
 4 | 
 5 | import re
 6 | from collections.abc import Generator, Iterable
 7 | 
 8 | from sphinx.application import Sphinx
 9 | from sphinx.ext.napoleon import NumpyDocstring
10 | 
11 | 
12 | def _process_return(lines: Iterable[str]) -> Generator[str, None, None]:
13 |     for line in lines:
14 |         if m := re.fullmatch(r"(?P<param>\w+)\s+:\s+(?P<type>[\w.]+)", line):
15 |             yield f"-{m['param']} (:class:`~{m['type']}`)"
16 |         else:
17 |             yield line
18 | 
19 | 
20 | def _parse_returns_section(self: NumpyDocstring, section: str) -> list[str]:
21 |     lines_raw = self._dedent(self._consume_to_next_section())
22 |     if lines_raw[0] == ":":
23 |         del lines_raw[0]
24 |     lines = self._format_block(":returns: ", list(_process_return(lines_raw)))
25 |     if lines and lines[-1]:
26 |         lines.append("")
27 |     return lines
28 | 
29 | 
30 | def setup(app: Sphinx):
31 |     """Set app."""
32 |     NumpyDocstring._parse_returns_section = _parse_returns_section
33 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ```{include} ../README.md
 2 | 
 3 | ```
 4 | 
 5 | ```{toctree}
 6 | :maxdepth: 1
 7 | :hidden: true
 8 | 
 9 | api/index
10 | notebooks/index
11 | changelog
12 | references
13 | ```
14 | 


--------------------------------------------------------------------------------
/docs/notebooks/bench/index.md:
--------------------------------------------------------------------------------
1 | # Benchmark
2 | ```{toctree}
3 | :maxdepth: 1
4 | 
5 | rna
6 | ```
7 | 


--------------------------------------------------------------------------------
/docs/notebooks/bulk/index.md:
--------------------------------------------------------------------------------
1 | # Bulk
2 | ```{toctree}
3 | :maxdepth: 1
4 | 
5 | rna
6 | ```
7 | 


--------------------------------------------------------------------------------
/docs/notebooks/index.md:
--------------------------------------------------------------------------------
 1 | # Tutorials
 2 | ```{toctree}
 3 | :maxdepth: 1
 4 | 
 5 | example
 6 | scell/index
 7 | spatial/index
 8 | bulk/index
 9 | omnipath/index
10 | bench/index
11 | ```
12 | 


--------------------------------------------------------------------------------
/docs/notebooks/omnipath/index.md:
--------------------------------------------------------------------------------
1 | # OmniPath
2 | ```{toctree}
3 | :maxdepth: 1
4 | 
5 | licenses
6 | orthologs
7 | ```
8 | 


--------------------------------------------------------------------------------
/docs/notebooks/scell/index.md:
--------------------------------------------------------------------------------
1 | # Single-cell
2 | ```{toctree}
3 | :maxdepth: 1
4 | 
5 | rna_sc
6 | rna_psbk
7 | rna_pstime
8 | ```
9 | 


--------------------------------------------------------------------------------
/docs/notebooks/spatial/index.md:
--------------------------------------------------------------------------------
1 | # Spatial
2 | ```{toctree}
3 | :maxdepth: 1
4 | 
5 | rna_visium
6 | ```
7 | 


--------------------------------------------------------------------------------
/docs/references.md:
--------------------------------------------------------------------------------
1 | # References
2 | 
3 | ```{bibliography}
4 | :cited:
5 | ```
6 | 


--------------------------------------------------------------------------------
/src/decoupler/_Method.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable
 2 | import textwrap
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from decoupler._docs import docs
 7 | from decoupler._datatype import DataType
 8 | from decoupler.mt._run import _run
 9 | 
10 | 
11 | class MethodMeta:
12 |     def __init__(
13 |         self,
14 |         name: str,
15 |         desc: str,
16 |         func: Callable,
17 |         stype: str,
18 |         adj: bool,
19 |         weight: bool,
20 |         test: bool,
21 |         limits: tuple,
22 |         reference: str,
23 |     ):
24 |         self.name = name
25 |         self.desc = desc
26 |         self.func = func
27 |         self.stype = stype
28 |         self.adj = adj
29 |         self.weight = weight
30 |         self.test = test
31 |         self.limits = limits
32 |         self.reference = reference
33 | 
34 |     def meta(self) -> pd.DataFrame:
35 |         meta = pd.DataFrame([{
36 |             'name': self.name,
37 |             'desc': self.desc,
38 |             'stype': self.stype,
39 |             'weight': self.weight,
40 |             'test': self.test,
41 |             'limits': self.limits,
42 |             'reference': self.reference
43 |         }])
44 |         return meta
45 | 
46 | 
47 | #@docs.dedent
48 | class Method(MethodMeta):
49 |     def __init__(
50 |         self,
51 |         _method: MethodMeta,
52 |     ):
53 |         super().__init__(
54 |             name=_method.name,
55 |             desc=_method.desc,
56 |             func=_method.func,
57 |             stype=_method.stype,
58 |             adj=_method.adj,
59 |             weight=_method.weight,
60 |             test=_method.test,
61 |             limits=_method.limits,
62 |             reference=_method.reference,
63 |         )
64 |         self._method = _method
65 |         self.__doc__ = self.func.__doc__
66 | 
67 |     def __call__(
68 |         self,
69 |         data: DataType,
70 |         net: pd.DataFrame,
71 |         tmin: int | float = 5,
72 |         raw: bool = False,
73 |         empty: bool = True,
74 |         bsize: int | float = 250_000,
75 |         verbose: bool = False,
76 |         **kwargs,
77 |     ):
78 |         return _run(
79 |             name=self.name,
80 |             func=self.func,
81 |             adj=self.adj,
82 |             test=self.test,
83 |             data=data,
84 |             net=net,
85 |             tmin=tmin,
86 |             raw=raw,
87 |             empty=empty,
88 |             bsize=bsize,
89 |             verbose=verbose,
90 |             **kwargs,
91 |         )
92 | 
93 | 
94 | def _show_methods(methods):
95 |     return pd.concat([method.meta() for method in methods]).reset_index(drop=True)
96 | 


--------------------------------------------------------------------------------
/src/decoupler/_Plotter.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.axes._axes import Axes
 3 | from matplotlib.figure import Figure
 4 | 
 5 | from decoupler._docs import docs
 6 | 
 7 | 
 8 | class Plotter:
 9 |     @docs.dedent
10 |     def __init__(
11 |         self,
12 |         ax: Axes | None = None,
13 |         figsize: tuple | None = (4, 3),
14 |         dpi: int = 100,
15 |         return_fig: bool = False,
16 |         save: str | None = None,
17 |     ) -> Figure | None:
18 |         """
19 |         Base class for plotters.
20 | 
21 |         Parameters
22 |         ----------
23 |         %(plot)s
24 |         """
25 |         # Validate
26 |         assert isinstance(ax, Axes) or ax is None, \
27 |         'ax must be matplotlib.axes._axes.Axes or None'
28 |         assert isinstance(figsize, tuple), \
29 |         'figsize must be tuple'
30 |         assert isinstance(dpi, (int, float)) and dpi > 0, \
31 |         'dpi must be numerical and > 0'
32 |         assert isinstance(return_fig, bool), \
33 |         'return_fig must be bool'
34 |         assert isinstance(save, str) or save is None, \
35 |         'save must be str or None'
36 |         self.ax = ax
37 |         self.figsize = figsize
38 |         self.dpi = dpi
39 |         self.return_fig = return_fig
40 |         self.save = save
41 |         if self.ax is None:
42 |             self.fig, self.ax = plt.subplots(1, 1, figsize=self.figsize, dpi=self.dpi, tight_layout=True)
43 |         else:
44 |             self.fig = self.ax.figure
45 | 
46 |     def _return(self):
47 |         if self.save is not None:
48 |             self.fig.savefig(self.save, bbox_inches='tight')
49 |         if self.return_fig:
50 |             return self.fig
51 | 


--------------------------------------------------------------------------------
/src/decoupler/__init__.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import version
2 | 
3 | from . import bm, ds, mt, op, pl, pp, tl
4 | 
5 | __all__ = ['bm', 'ds', 'mt', 'op', 'pl', 'pp', 'tl']
6 | 
7 | __version__ = version('decoupler')
8 | 


--------------------------------------------------------------------------------
/src/decoupler/_datatype.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, Tuple
 2 | 
 3 | from anndata import AnnData
 4 | import pandas as pd
 5 | import numpy as np
 6 | 
 7 | 
 8 | DataType = Union[
 9 |     AnnData,
10 |     pd.DataFrame,
11 |     Tuple[np.ndarray, np.ndarray, np.ndarray],
12 | ]
13 | 


--------------------------------------------------------------------------------
/src/decoupler/_download.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from tqdm import tqdm
 3 | import pandas as pd
 4 | import io
 5 | 
 6 | from decoupler._log import _log
 7 | 
 8 | URL_DBS = 'https://omnipathdb.org/annotations?databases='
 9 | URL_INT = 'https://omnipathdb.org/interactions/?genesymbols=1&'
10 | 
11 | def _download(
12 |     url: str,
13 |     verbose: bool = False,
14 |     **kwargs,
15 | ) -> pd.DataFrame:
16 |     assert isinstance(url, str), 'url must be str'
17 |     # Download with progress bar
18 |     m = f'Downloading {url}'
19 |     _log(m, level='info', verbose=verbose)
20 |     chunks = []
21 |     with requests.get(url, stream=True) as r:
22 |         r.raise_for_status()
23 |         with tqdm(unit='B', unit_scale=True, desc="Progress", disable=not verbose) as pbar:
24 |             for chunk in r.iter_content(chunk_size=8192):
25 |                 if chunk:
26 |                     chunks.append(chunk)
27 |                     pbar.update(len(chunk))
28 |     # Read into pandas
29 |     data = io.BytesIO(b"".join(chunks))
30 |     df = pd.read_csv(data, **kwargs)
31 |     m = f'Download finished'
32 |     _log(m, level='info', verbose=verbose)
33 |     return df
34 | 


--------------------------------------------------------------------------------
/src/decoupler/_log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logging.basicConfig(
 4 |     level=logging.INFO,
 5 |     format="%(asctime)s | [%(levelname)s] %(message)s",
 6 |     datefmt="%Y-%m-%d %H:%M:%S"
 7 | )
 8 | 
 9 | def _log(
10 |     message: str,
11 |     level: str = 'info',
12 |     verbose: bool = False
13 | ) -> None:
14 |     """
15 |     Log a message with a specified logging level.
16 | 
17 |     Parameters
18 |     ----------
19 |     message
20 |         The message to log.
21 |     level
22 |         The logging level.
23 |     verbose
24 |         Whether to emit the log.
25 |     """
26 |     level = level.lower()
27 |     if verbose:
28 |         if level == "warn":
29 |             logging.warning(message)
30 |         elif level == "info":
31 |             logging.info(message)
32 | 


--------------------------------------------------------------------------------
/src/decoupler/_odeps.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | import types
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | 
 6 | def _try_import(
 7 |     name: str
 8 | ) -> types.ModuleType | None:
 9 |     try:
10 |         with warnings.catch_warnings():
11 |             warnings.filterwarnings("ignore", category=FutureWarning, module=name)
12 |             module = __import__(name, fromlist=[""])
13 |         return module
14 |     except ImportError:
15 |         return None
16 | 
17 | 
18 | def _check_import(
19 |     module: types.ModuleType
20 | ) -> None:
21 |     if module is None:
22 |         name = module.__name__
23 |         raise ImportError(
24 |             f"{name} is not installed. Please install it using:\n"
25 |             f"  pip install {name}"
26 |             "or install decoupler with full dependencies:\n"
27 |             "  pip install 'decoupler[full]'"
28 |         )
29 | 
30 | 
31 | # Handle optional dependencies
32 | ig = _try_import("igraph")
33 | if ig is not None:
34 |     if TYPE_CHECKING:
35 |         from igraph import Graph
36 |     else:
37 |         Graph = ig.Graph
38 | else:
39 |     if TYPE_CHECKING:
40 |         from typing import Any as Graph
41 |     else:
42 |         Graph = None
43 | 
44 | xgboost = _try_import("xgboost")
45 | dcor = _try_import("dcor")
46 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.bm._run import benchmark
2 | from decoupler.bm import metric
3 | from decoupler.bm import pl
4 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/_pp.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import pandas as pd
  4 | import numpy as np
  5 | import scipy.sparse as sps
  6 | from anndata import AnnData
  7 | 
  8 | from decoupler._log import _log
  9 | from decoupler.pp.net import prune
 10 | 
 11 | 
 12 | def _validate_groupby(
 13 |     obs: pd.DataFrame,
 14 |     groupby: str | list | None,
 15 |     runby: str,
 16 | ) -> None | list:
 17 |     assert isinstance(groupby, (str, list)) or groupby is None, \
 18 |     'groupby must be str, list or None'
 19 |     assert isinstance(runby, str) and runby in ['expr', 'source'], \
 20 |     'runby must be str and either expr or source'
 21 |     if groupby is not None:
 22 |         if type(groupby) is str:
 23 |             groupby = [groupby]
 24 |         for grp_i in groupby:
 25 |             if type(grp_i) is str:
 26 |                 grp_i = [grp_i]
 27 |             # For each group inside each groupby
 28 |             for grp_j in grp_i:
 29 |                 assert not ('source' == grp_j and runby == 'source'), \
 30 |                 f'source cannot be in groupby if runby="source"'
 31 |                 # Assert that columns exist in obs
 32 |                 assert grp_j in obs.columns, \
 33 |                 f'Column name "{grp_j}" must be in adata.obs.columns'
 34 |                 # Assert that column doesn't contain "|"
 35 |                 assert '|' not in grp_j, \
 36 |                 'Column names must not contain the \"|\" character'
 37 |         return groupby
 38 | 
 39 | 
 40 | def _validate_obs(
 41 |     obs: pd.DataFrame,
 42 | ) -> None:
 43 |     assert 'source' in obs.columns, \
 44 |     'source must be in adata.obs.columns'
 45 |     assert 'type_p' in obs.columns, \
 46 |     'type_p must be in adata.obs.columns'
 47 |     assert pd.api.types.is_numeric_dtype(obs['type_p']), \
 48 |     'type_p must contain numeric values'
 49 |     assert np.isin(obs['type_p'].sort_values().unique(), np.array([-1, 1])).all(), \
 50 |     'type_p must be -1 or +1'
 51 | 
 52 | 
 53 | def _filter(
 54 |     adata: AnnData,
 55 |     net: pd.DataFrame,
 56 |     sfilt: bool,
 57 |     verbose: bool,
 58 | ) -> Tuple[AnnData, pd.DataFrame]:
 59 |     # Remove experiments without sources in net
 60 |     srcs = net['source'].unique()
 61 |     prts = set()
 62 |     msk_exp = np.zeros(adata.obs_names.size, dtype=np.bool_)
 63 |     for i, src in enumerate(adata.obs['source']):
 64 |         if isinstance(src, list):
 65 |             prts.update(src)
 66 |             if np.isin(src, srcs).any():
 67 |                 msk_exp[i] = True
 68 |         elif isinstance(src, str):
 69 |             prts.add(src)
 70 |             if src in srcs:
 71 |                 msk_exp[i] = True
 72 |     n_exp = adata.shape[0]
 73 |     m = f'benchmark - found {len(prts)} unique perturbed sources across {n_exp} experiments'
 74 |     _log(m, level='info', verbose=verbose)
 75 |     r_exp = int((~msk_exp).sum())
 76 |     m = f'benchmark - removing {r_exp} experiments out of {n_exp} without sources in net'
 77 |     _log(m, level='info', verbose=verbose)
 78 |     adata = adata[msk_exp, :].copy()
 79 |     # Remove sources without experiments in obs
 80 |     if sfilt:
 81 |         msk_src = np.array([s in prts for s in net['source']])
 82 |         rsrc = net.loc[~msk_src].groupby('source').size().index.size
 83 |         m = f'benchmark - removing {rsrc} sources out of {srcs.size} without experiments in obs'
 84 |         _log(m, level='info', verbose=verbose)
 85 |         net = net.loc[msk_src, :]
 86 |     adata.uns['p_sources'] = prts
 87 |     return adata, net
 88 | 
 89 | 
 90 | def _sign(
 91 |     adata: AnnData,
 92 | ) -> None:
 93 |     v_sign = adata.obs['type_p'].values.reshape(-1, 1)
 94 |     if sps.issparse(adata.X):
 95 |         adata.layers['tmp'] = adata.X.multiply(v_sign).tocsr()
 96 |     else:
 97 |         adata.layers['tmp'] = adata.X * v_sign
 98 | 
 99 | 
100 | def _validate_bool(
101 |     y_true: np.ndarray,
102 |     y_score: np.ndarray,
103 | ) -> None:
104 |     assert isinstance(y_true, np.ndarray), 'y_true must be numpy.ndarray'
105 |     assert isinstance(y_score, np.ndarray), 'y_score must be numpy.ndarray'
106 |     unq = np.sort(np.unique(y_true))
107 |     m = 'y_true must contain two binary classes, 0 and 1'
108 |     assert unq.size <= 2, m
109 |     lbl = np.array([0, 1])
110 |     assert np.all(unq == lbl), m
111 |     assert y_true.size == y_score.size, \
112 |     'y_true and y_score must have the same size'
113 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/metric/__init__.py:
--------------------------------------------------------------------------------
 1 | from decoupler.bm.metric._auc import auc
 2 | from decoupler.bm.metric._fscore import fscore
 3 | from decoupler.bm.metric._qrank import qrank
 4 | from decoupler.bm.metric._hmean import hmean
 5 | 
 6 | dict_metric = {
 7 |     'auc': auc,
 8 |     'fscore': fscore,
 9 |     'qrank': qrank,
10 | }
11 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/metric/_auc.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import numpy as np
  4 | 
  5 | from decoupler.bm._pp import _validate_bool
  6 | 
  7 | 
  8 | def _binary_clf_curve(
  9 |     y_true: np.ndarray,
 10 |     y_score: np.ndarray,
 11 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
 12 |     # Sort scores
 13 |     idx = np.flip(np.argsort(y_score))
 14 |     y_score = y_score[idx]
 15 |     y_true = y_true[idx]
 16 |     # Find unique value idxs
 17 |     idx = np.where(np.diff(y_score))[0]
 18 |     # Append a value for the end of the curve
 19 |     idx = np.append(idx, y_true.size - 1)
 20 |     # Acucmulate TP with decreasing threshold
 21 |     tps = np.cumsum(y_true)[idx]
 22 |     fps = 1 + idx - tps
 23 |     return fps, tps, y_score[idx]
 24 | 
 25 | 
 26 | def auroc(
 27 |     y_true: np.ndarray,
 28 |     y_score: np.ndarray,
 29 | ) -> float:
 30 |     _validate_bool(y_true=y_true, y_score=y_score)
 31 |     # Compute binary curve
 32 |     fps, tps, thr = _binary_clf_curve(y_true, y_score)
 33 |     # Add limits
 34 |     fps = np.append(0., fps)
 35 |     tps = np.append(0., tps)
 36 |     thr = np.append(thr[0] + 1., thr)
 37 |     # Compute ratios
 38 |     fpr = fps / fps[-1]
 39 |     tpr = tps / tps[-1]
 40 |     # Compute auc
 41 |     dx = np.diff(np.ascontiguousarray(fpr))
 42 |     # Get direction slope
 43 |     if np.all(dx <= 0):
 44 |         d = -1.
 45 |     else:
 46 |         d = 1.
 47 |     # Compute area
 48 |     ret = np.sum((dx * (tpr[1:] + tpr[:-1]) / 2.0))
 49 |     auc = d * ret
 50 |     return auc
 51 | 
 52 | 
 53 | def auprc(
 54 |     y_true: np.ndarray,
 55 |     y_score: np.ndarray,
 56 |     pi0: float = 0.5
 57 | ) -> float:
 58 |     _validate_bool(y_true=y_true, y_score=y_score)
 59 |     assert isinstance(pi0, (int, float)) and 0. <= pi0 <= 1., \
 60 |     'pi0 must be numeric and between 0 and 1'
 61 |     # Compute binary curve
 62 |     fps, tps, thr = _binary_clf_curve(y_true, y_score)
 63 |     # Compute prc
 64 |     ps = tps + fps
 65 |     msk = ps != 0
 66 |     # Siblini W., Fréry J., He-Guelton L., Oblé F., Wang YQ. (2020) Master
 67 |     # Your Metrics with Calibration. In: Berthold M., Feelders A., Krempl G.
 68 |     # (eds) Advances in Intelligent Data Analysis XVIII. IDA 2020. Lecture
 69 |     # Notes in Computer Science, vol 12080. Springer, Cham
 70 |     pi = np.sum(y_true) / y_true.size
 71 |     ratio = pi * (1 - pi0) / (pi0 * (1 - pi))
 72 |     prc = tps[msk] / (tps[msk] + ratio * fps[msk])
 73 |     # Compute rcl
 74 |     rcl = tps / tps[-1]
 75 |     # Flip and add limits
 76 |     prc = np.append(np.flip(prc), 1)
 77 |     rcl = np.append(np.flip(rcl), 0)
 78 |     thr = np.flip(thr)
 79 |     dx = np.diff(np.ascontiguousarray(rcl))
 80 |     auc = -np.sum(dx * prc[:-1])
 81 |     return auc
 82 | 
 83 | 
 84 | def auc(
 85 |     y_true: np.ndarray,
 86 |     y_score: np.ndarray,
 87 |     pi0: float = 0.5,
 88 | ) -> Tuple[float, float]:
 89 |     """
 90 |     Area Under the Curve.
 91 |     """
 92 |     # Normalize to make comparable
 93 |     norm = np.nanmax(np.abs(y_score), axis=1)
 94 |     msk = norm == 0.
 95 |     norm[msk] = 1.
 96 |     y_score = y_score / norm.reshape(-1, 1)
 97 |     assert ((-1. <= y_score) & (y_score <= 1.)).all()
 98 |     # Flatten and remove nans
 99 |     y_true, y_score = y_true.ravel(), y_score.ravel()
100 |     msk_nan = ~np.isnan(y_score)
101 |     y_true, y_score = y_true[msk_nan], y_score[msk_nan]
102 |     auc_roc = auroc(y_true=y_true, y_score=y_score)
103 |     auc_prc = auprc(y_true=y_true, y_score=y_score, pi0=pi0)
104 |     return auc_roc, auc_prc
105 | 
106 | auc.scores = ['auroc', 'auprc']
107 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/metric/_fscore.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import numpy as np
 4 | 
 5 | from decoupler.bm._pp import _validate_bool
 6 | 
 7 | 
 8 | def fscore(
 9 |     y_true: np.ndarray,
10 |     y_score: np.ndarray,
11 | ) -> Tuple[float, float, float]:
12 |     """
13 |     F-beta score
14 |     """
15 |     # Validate
16 |     _validate_bool(y_true=y_true, y_score=y_score)
17 |     assert y_score.dtype == np.bool_, \
18 |     'y_score must be bool numpy.ndarray'
19 |     y_true = y_true.astype(np.bool_)
20 |     # Compute
21 |     tp = np.sum(y_true * y_score)
22 |     fp = np.sum((~y_true) * y_score)
23 |     fn = np.sum(y_true * (~y_score))
24 |     if tp > 0:
25 |         prc = tp / (tp + fp)
26 |         rcl = tp / (tp + fn)
27 |     else:
28 |         prc = 0.
29 |         rcl = 0.
30 |     return prc, rcl
31 | 
32 | fscore.scores = ['precision', 'recall']
33 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/metric/_hmean.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from decoupler._docs import docs
 5 | from decoupler.bm.pl._format import _format
 6 | 
 7 | 
 8 | def _hmean(
 9 |     x: float | int,
10 |     y: float | int,
11 |     beta: float | int = 1,
12 | ) -> float:
13 |     assert isinstance(beta, (int, float)) and 0 < beta, \
14 |     'beta must be numeric and > 0'
15 |     h = np.zeros(len(x))
16 |     msk = (x != 0.) & (y != 0.)
17 |     h[msk] = (1 + beta**2) * (x[msk] * y[msk]) / ((x[msk] * beta**2) + y[msk])
18 |     return h
19 | 
20 | 
21 | @docs.dedent
22 | def hmean(
23 |     df: pd.DataFrame,
24 |     metrics: str | list = ['auc', 'fscore', 'qrank'],
25 |     beta: int | float = 0.5,
26 | ) -> pd.DataFrame:
27 |     """
28 |     Computes the harmonic mean between two metric statistics.
29 | 
30 |     Parameters
31 |     ----------
32 |     %(df)s
33 |     metrics
34 |         Metrics which to compute the harmonic mean between their own statistics.
35 |     beta
36 |         Controls the balance between statistics, where beta > 1 favors the first one (for example recall),
37 |         beta < 1 the other one (for example precision), and beta = 1 gives equal weight to both.
38 | 
39 |     Returns
40 |     -------
41 |     Dataframe containing the harmonic mean per metric.
42 |     """
43 |     # Validate
44 |     assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame'
45 |     assert isinstance(metrics, (str, list)), 'metrics must be str or list'
46 |     if isinstance(metrics, str):
47 |         metrics = [metrics]
48 |     # Run
49 |     d_metrics = {
50 |         'auc': {
51 |             'name': 'H(auroc, auprc)',
52 |             'cols': ['auprc', 'auroc'],
53 |         },
54 |         'fscore': {
55 |             'name': 'F-score',
56 |             'cols': ['precision', 'recall'],
57 |         },
58 |         'qrank': {
59 |             'name': 'H(1-qrank, -log10(pval))',
60 |             'cols': ['-log10(pval)', '1-qrank'],
61 |         },
62 |     }
63 |     hdf = []
64 |     h_cols = []
65 |     for i, metric in enumerate(metrics):
66 |         # Format
67 |         cols = d_metrics[metric]['cols']
68 |         tmp = _format(df=df, cols=cols)
69 |         # Compute harmonic mean
70 |         name = d_metrics[metric]['name']
71 |         tmp[name] = _hmean(tmp[cols[0]], tmp[cols[1]], beta=beta)
72 |         if i == 0:
73 |             hdf.append(tmp)
74 |         else:
75 |             hdf.append(tmp[cols + [name]])
76 |         h_cols.append(name)
77 |     hdf = pd.concat(hdf, axis=1)
78 |     # Mean qrank (final score)
79 |     hdf['score'] = hdf[h_cols].mean(axis=1, numeric_only=True)
80 |     hdf['score'] = (hdf['score'] - hdf['score'].min()) / (hdf['score'].max() - hdf['score'].min())
81 |     return hdf
82 |         


--------------------------------------------------------------------------------
/src/decoupler/bm/metric/_qrank.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import numpy as np
 4 | import scipy.stats as sts
 5 | 
 6 | from decoupler.bm._pp import _validate_bool
 7 | 
 8 | 
 9 | def qrank(
10 |     y_true: np.ndarray,
11 |     y_score: np.ndarray,
12 | ) -> Tuple[float, float]:
13 |     """
14 |     1 - quantile normalized rank
15 |     """
16 |     _validate_bool(y_true=y_true, y_score=y_score)
17 |     y_rank = sts.rankdata(y_score, axis=1, nan_policy='omit', method='average')
18 |     y_rank = y_rank / np.sum(~np.isnan(y_rank), axis=1).reshape(-1, 1)
19 |     msk = y_true.astype(np.bool_)
20 |     score = y_rank[msk]
21 |     rest = y_rank[~msk]
22 |     _, pval = sts.ranksums(score, rest, alternative='greater')
23 |     score = np.nanmean(score)
24 |     return score, -np.log10(pval)
25 | 
26 | qrank.scores = ['1-qrank', '-log10(pval)']
27 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/pl/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.bm.pl._auc import auc
2 | from decoupler.bm.pl._fscore import fscore
3 | from decoupler.bm.pl._qrank import qrank
4 | from decoupler.bm.pl._bar import bar
5 | from decoupler.bm.pl._summary import summary
6 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_auc.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import seaborn as sns
 3 | from matplotlib.figure import Figure
 4 | 
 5 | from decoupler._docs import docs
 6 | from decoupler._Plotter import Plotter
 7 | from decoupler.bm.pl._format import _format
 8 | 
 9 | 
10 | @docs.dedent
11 | def auc(
12 |     df: pd.DataFrame,
13 |     hue: str | None = None,
14 |     palette: str = 'tab20',
15 |     thr_auroc: float = 0.5,
16 |     thr_auprc: float = 0.5,
17 |     **kwargs
18 | ) -> None | Figure:
19 |     """
20 |     Plot auroc and auprc.
21 | 
22 |     x-axis represent the auroc calculated by ranking all obtained enrichment scores, calculating different class thresholds
23 |     and finally obtaining the area under the curve.
24 |     The higher value the better performance is.
25 | 
26 |     y-axis represent the auprc calculated by ranking all obtained enrichment scores, calculating different class thresholds
27 |     and finally obtaining the area under the curve.
28 |     The higher value the better performance is.
29 | 
30 |     Parameters
31 |     ----------
32 |     %(df)s
33 |     %(hue)s
34 |     %(palette)s
35 |     thr_auroc
36 |         Dashed line to indicate baseline of auroc.
37 |     thr_auprc
38 |         Dashed line to indicate baseline of auprc.
39 |     %(plot)s
40 |     """
41 |     # Validate
42 |     assert isinstance(hue, str) or hue is None, 'hue must be str or None'
43 |     assert isinstance(thr_auroc, float) and 0. <= thr_auroc <= 1., \
44 |     'thr_auroc must be float and between 0 and 1'
45 |     assert isinstance(thr_auprc, float) and 0. <= thr_auprc <= 1., \
46 |     'thr_auprc must be float and between 0 and 1'
47 |     # Format
48 |     tmp = _format(df=df, cols=['auroc', 'auprc'])
49 |     # Instance
50 |     bp = Plotter(**kwargs)
51 |     # Plot
52 |     if hue is not None:
53 |         sns.scatterplot(
54 |             data=tmp,
55 |             x='auroc',
56 |             y='auprc',
57 |             hue=hue,
58 |             ax=bp.ax,
59 |             palette=palette,
60 |         )
61 |     else:
62 |         sns.scatterplot(
63 |             data=tmp,
64 |             x='auroc',
65 |             y='auprc',
66 |             ax=bp.ax,
67 |         )
68 |     bp.ax.axvline(x=thr_auroc, ls='--', c='black', zorder=0)
69 |     bp.ax.axhline(y=thr_auprc, ls='--', c='black', zorder=0)
70 |     if hue is not None:
71 |         bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
72 |     return bp._return()
73 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_bar.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import seaborn as sns
 3 | from matplotlib.figure import Figure
 4 | 
 5 | from decoupler._docs import docs
 6 | from decoupler._Plotter import Plotter
 7 | from decoupler.bm.pl._format import _format
 8 | 
 9 | 
10 | @docs.dedent
11 | def bar(
12 |     df: pd.DataFrame,
13 |     x: str,
14 |     y: str,
15 |     hue: str | None = None,
16 |     palette: str = 'tab20',
17 |     **kwargs
18 | ) -> None | Figure:
19 |     """
20 |     Plot the harmonic mean between two metric statistics as a barplot.
21 | 
22 |     x-axis represent the harmonic mean between metric statistics.
23 | 
24 |     y-axis represent a grouping variable.
25 | 
26 |     Parameters
27 |     ----------
28 |     %(df)s
29 |     x
30 |         Continous variable to plot on x axis.
31 |     %(y)s
32 |     %(hue)s
33 |     %(palette)s
34 |     %(plot)s
35 |     """
36 |     # Validate
37 |     assert isinstance(x, str), 'x must be str'
38 |     assert isinstance(y, str), 'y must be str'
39 |     assert isinstance(hue, str) or hue is None, 'hue must be str or None'
40 |     # Instance
41 |     bp = Plotter(**kwargs)
42 |     # Plot
43 |     order = (
44 |         df
45 |         .groupby(y)[x]
46 |         .mean()
47 |         .sort_values(ascending=False)
48 |         .index
49 |     )
50 |     args = dict()
51 |     if hue is not None:
52 |         args['hue'] = hue
53 |         args['palette'] = palette
54 |     sns.barplot(
55 |         data=df,
56 |         y=y,
57 |         x=x,
58 |         order=order,
59 |         **args
60 |     )
61 |     if hue is not None and hue != y:
62 |         bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
63 |     return bp._return()
64 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_format.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | 
 5 | def _format(
 6 |     df: pd.DataFrame,
 7 |     cols: list,
 8 | ) -> pd.DataFrame:
 9 |     # Validate
10 |     assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame'
11 |     assert isinstance(cols, list), 'cols must be list'
12 |     assert 'metric' in df.columns, 'metric must be in df.columns'
13 |     assert 'score' in df.columns, 'score must be in df.columns'
14 |     # Extract
15 |     tmp = df[df['metric'].isin(cols)].copy()
16 |     assert tmp.shape[0] > 0, 'cols must be in df["metric"]'
17 |     # Add small variations so not same number
18 |     rng = np.random.default_rng(seed=0)
19 |     tmp.loc[:, 'score'] = tmp.loc[:, 'score'] + rng.normal(loc=0, scale=2.2e-16, size=tmp.shape[0])
20 |     tmp.loc[:, 'score'] = tmp.loc[:, 'score'].clip(lower=0)
21 |     # Transform
22 |     grp_cols = ['net', 'groupby', 'group', 'source', 'method']
23 |     grp_cols = [c for c in grp_cols if c in df.columns]
24 |     tmp = (
25 |         tmp
26 |         .pivot(index=grp_cols, columns='metric', values='score')
27 |         .reset_index()
28 |     ).dropna(axis=1)
29 |     if np.all(np.isin(['groupby', 'group'], tmp.columns)):
30 |         tmp = (
31 |             tmp
32 |             .pivot(index=['source', 'method'] + cols, columns='groupby', values='group')
33 |             .reset_index()
34 |         )
35 |     # Remove names
36 |     tmp.index.name = None
37 |     tmp.columns.name = None
38 |     return tmp
39 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_fscore.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import seaborn as sns
 3 | from matplotlib.figure import Figure
 4 | 
 5 | from decoupler._docs import docs
 6 | from decoupler._Plotter import Plotter
 7 | from decoupler.bm.pl._format import _format
 8 | 
 9 | 
10 | @docs.dedent
11 | def fscore(
12 |     df: pd.DataFrame,
13 |     hue: str | None = None,
14 |     palette: str = 'tab20',
15 |     **kwargs
16 | ) -> None | Figure:
17 |     """
18 |     Plot precision and recall as scatterplot.
19 | 
20 |     x-axis represent the recall of correctly predicted sources after filtering by significance.
21 |     The higher value the better performance is.
22 | 
23 |     x-axis represent the precision of correctly predicted sources after filtering by significance.
24 |     The higher value the better performance is.
25 | 
26 |     Parameters
27 |     ----------
28 |     %(df)s
29 |     %(hue)s
30 |     %(palette)s
31 |     %(plot)s
32 |     """
33 |     # Validate
34 |     assert isinstance(hue, str) or hue is None, 'hue must be str or None'
35 |     # Format
36 |     tmp = _format(df=df, cols=['recall', 'precision'])
37 |     # Instance
38 |     bp = Plotter(**kwargs)
39 |     # Plot
40 |     args = dict()
41 |     if hue is not None:
42 |         args['hue'] = hue
43 |         args['palette'] = palette
44 |     sns.scatterplot(
45 |         data=tmp,
46 |         x='recall',
47 |         y='precision',
48 |         ax=bp.ax,
49 |         **args
50 |     )
51 |     if hue is not None:
52 |         bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
53 |     return bp._return()
54 | 


--------------------------------------------------------------------------------
/src/decoupler/bm/pl/_qrank.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import seaborn as sns
 4 | from matplotlib.figure import Figure
 5 | 
 6 | from decoupler._docs import docs
 7 | from decoupler._Plotter import Plotter
 8 | from decoupler.bm.pl._format import _format
 9 | 
10 | 
11 | @docs.dedent
12 | def qrank(
13 |     df: pd.DataFrame,
14 |     hue: str | None = None,
15 |     palette: str = 'tab20',
16 |     thr_rank: float = 0.5,
17 |     thr_pval: float = 0.05,
18 |     **kwargs
19 | ) -> None | Figure:
20 |     """
21 |     Plot 1-qrank and p-value.
22 | 
23 |     x-axis represent the one minus the quantile normalized ranks for the sources that belong to the ground truth.
24 |     The closer to 1 the better performance is.
25 | 
26 |     y-axis represents the p-value (-log10) obtained after performing a Ranksums test between the quantile normalized
27 |     ranks of the sources that belong to the ground truth against the sources that do not.
28 |     The higher value the better performance is.
29 | 
30 |     Parameters
31 |     ----------
32 |     %(df)s
33 |     %(hue)s
34 |     %(palette)s
35 |     thr_rank
36 |         Dashed line to indicate baseline of ranks.
37 |     thr_pval
38 |         Dashed line to indicate baseline of p-values.
39 |     %(plot)s
40 |     """
41 |     # Validate
42 |     assert isinstance(hue, str) or hue is None, 'hue must be str or None'
43 |     assert isinstance(thr_rank, float) and 0. <= thr_rank <= 1., \
44 |     'thr_rank must be float and between 0 and 1'
45 |     assert isinstance(thr_pval, float) and 0. <= thr_pval <= 1., \
46 |     'thr_pval must be float and between 0 and 1'
47 |     # Format
48 |     tmp = _format(df=df, cols=['1-qrank', '-log10(pval)'])
49 |     # Instance
50 |     bp = Plotter(**kwargs)
51 |     # Plot
52 |     if hue is not None:
53 |         sns.scatterplot(
54 |             data=tmp,
55 |             x='1-qrank',
56 |             y='-log10(pval)',
57 |             hue=hue,
58 |             ax=bp.ax,
59 |             palette=palette,
60 |         )
61 |     else:
62 |         sns.scatterplot(
63 |             data=tmp,
64 |             x='1-qrank',
65 |             y='-log10(pval)',
66 |             ax=bp.ax,
67 |         )
68 |     bp.ax.set_xlim(0, 1)
69 |     bp.ax.axvline(x=thr_rank, ls='--', c='black', zorder=0)
70 |     bp.ax.axhline(y=-np.log10(thr_pval), ls='--', c='black', zorder=0)
71 |     bp.ax.set_ylabel(r'$\log_{10}$(pval)')
72 |     if hue is not None:
73 |         bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
74 |     return bp._return()
75 | 


--------------------------------------------------------------------------------
/src/decoupler/ds/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.ds._bulk import hsctgfb, knocktf
2 | from decoupler.ds._scell import pbmc3k, covid5k, erygast1k
3 | from decoupler.ds._spatial import msvisium
4 | from decoupler.ds._toy import toy, toy_bench
5 | from decoupler.ds._utils import ensmbl_to_symbol
6 | 


--------------------------------------------------------------------------------
/src/decoupler/ds/_bulk.py:
--------------------------------------------------------------------------------
 1 | from anndata import AnnData
 2 | 
 3 | from decoupler._docs import docs
 4 | from decoupler._log import _log
 5 | from decoupler._download import _download
 6 | 
 7 | 
 8 | @docs.dedent
 9 | def hsctgfb(
10 |     verbose: bool = False,
11 | ) -> AnnData:
12 |     """
13 |     Downloads RNA-seq bulk data consisting of 6 samples of hepatic stellate cells
14 |     (HSC) where three of them were activated by the cytokine
15 |     Transforming growth factor (TGF-β) :cite:`hsc_tgfb`.
16 | 
17 |     Parameters
18 |     ----------
19 |     %(verbose)s
20 | 
21 |     Returns
22 |     -------
23 |     AnnData object.
24 |     """
25 |     # Download
26 |     url = (
27 |         'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE151251&format=file&'
28 |         'file=GSE151251%5FHSCs%5FCtrl%2Evs%2EHSCs%5FTGFb%2Ecounts%2Etsv%2Egz'
29 |     )
30 |     adata = _download(url, compression='gzip', sep='\t', verbose=verbose)
31 |     # Transform to AnnData
32 |     adata = adata.drop_duplicates('GeneName').set_index('GeneName').iloc[:, 5:].T
33 |     adata.columns.name = None
34 |     adata = AnnData(adata)
35 |     adata.X = adata.X.astype(float)
36 |     # Format obs
37 |     adata.obs['condition'] = ['control' if '-Ctrl' in sample_id else 'treatment' for sample_id in adata.obs.index]
38 |     adata.obs['sample_id'] = [sample_id.split('_')[0] for sample_id in adata.obs.index]
39 |     adata.obs['condition'] = adata.obs['condition'].astype('category')
40 |     adata.obs['sample_id'] = adata.obs['sample_id'].astype('category')
41 |     m = f'generated AnnData with shape={adata.shape}'
42 |     _log(m, level='info', verbose=verbose)
43 |     return adata
44 | 
45 | 
46 | @docs.dedent
47 | def knocktf(
48 |     thr_fc: int | float | None = -1,
49 |     verbose: bool = False,
50 | ) -> AnnData:
51 |     """
52 |     Downloads gene contrast statistics from KnockTF :cite:`knocktf`,
53 |     a large collection of transcription factor (TF) RNA-seq
54 |     perturbation experiments.
55 | 
56 |     The values in ``adata.X`` represent the log2FCs of genes between
57 |     perturbed and unperturbed samples.
58 | 
59 |     It also downloads all metadata associated to each perturbation
60 |     experiment, such as which TF was perturbed, or in which tissue.
61 | 
62 |     Parameters
63 |     ----------
64 |     %(verbose)s
65 | 
66 |     Returns
67 |     -------
68 |     AnnData object.
69 |     """
70 |     assert isinstance(thr_fc, (int, float)) or thr_fc is None, \
71 |     'thr_fc must be numeric or None'
72 |     # Download
73 |     url = (
74 |         'https://zenodo.org/record/7035528/'
75 |         'files/knockTF_expr.csv?download=1'
76 |     )
77 |     adata = _download(url, sep=',', index_col=0, verbose=verbose)
78 |     url = (
79 |         'https://zenodo.org/record/7035528/'
80 |         'files/knockTF_meta.csv?download=1'
81 |     )
82 |     obs = _download(url, sep=',', index_col=0, verbose=verbose)
83 |     obs = obs.rename(columns={'TF': 'source'}).assign(type_p=-1)
84 |     # Make anndata
85 |     adata = AnnData(X=adata, obs=obs)
86 |     # Filter by thr_fc
87 |     if thr_fc is not None:
88 |         msk = adata.obs['logFC'] < thr_fc
89 |         prc_keep = (msk.sum()/msk.size) * 100
90 |         m = f'filtering AnnData for thr_fc={thr_fc}, will keep {prc_keep:.2f}% of observations'
91 |         _log(m, level='info', verbose=verbose)
92 |         adata = adata[msk, :].copy()
93 |     m = f'generated AnnData with shape={adata.shape}'
94 |     _log(m, level='info', verbose=verbose)
95 |     return adata
96 | 


--------------------------------------------------------------------------------
/src/decoupler/ds/_spatial.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import io
  3 | import gzip
  4 | import json
  5 | 
  6 | import pandas as pd
  7 | import scipy.io as sio
  8 | from matplotlib.image import imread
  9 | from anndata import AnnData
 10 | 
 11 | from decoupler._docs import docs
 12 | from decoupler._log import _log
 13 | 
 14 | 
 15 | @docs.dedent
 16 | def msvisium(
 17 |     verbose: bool = False,
 18 | ) -> AnnData:
 19 |     """
 20 |     Downloads a spatial RNA-seq (Visium) human sample with multiple sclerosis
 21 |     displaying a chronic active lesion in the white matter of the brain :cite:`msvisium`.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     %(verbose)s
 26 | 
 27 |     Returns
 28 |     -------
 29 |     AnnData object.
 30 |     """
 31 |     url = (
 32 |         'https://www.ncbi.nlm.nih.gov/geo/download/'
 33 |         '?acc=GSM8563708&format=file&file=GSM8563708%5FMS377T%5F'
 34 |     )
 35 |     # Download mat
 36 |     response = requests.get(url + 'matrix%2Emtx%2Egz')
 37 |     with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
 38 |         X = sio.mmread(f).T.tocsr().rint()
 39 |         X.eliminate_zeros()
 40 |     var = pd.read_csv(
 41 |         url + 'features%2Etsv%2Egz',
 42 |         compression='gzip',
 43 |         sep='\t',
 44 |         header=None,
 45 |         usecols=[1],
 46 |         index_col=0,
 47 |     )
 48 |     var.index.name = None
 49 |     # Remove repeated genes
 50 |     msk_var = ~(var.index.duplicated(keep='first'))
 51 |     var = var.loc[msk_var]
 52 |     X = X[:, msk_var]
 53 |     obs = pd.read_csv(
 54 |         url + 'barcodes%2Etsv%2Egz',
 55 |         compression='gzip',
 56 |         sep='\t',
 57 |         header=None,
 58 |         usecols=[0],
 59 |         index_col=0,
 60 |     )
 61 |     obs.index.name = None
 62 |     # Create anndata
 63 |     adata = AnnData(X=X, obs=obs, var=var)
 64 |     # Add images
 65 |     adata.uns['spatial'] = dict()
 66 |     adata.uns['spatial']['MS377T'] = dict()
 67 |     adata.uns['spatial']['MS377T']['images'] = dict()
 68 |     response = requests.get(url + 'scalefactors%5Fjson%2Ejson%2Egz')
 69 |     with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
 70 |         adata.uns['spatial']['MS377T']['scalefactors'] = json.load(f)
 71 |     response = requests.get(url + 'tissue%5Fhires%5Fimage%2Epng%2Egz')
 72 |     with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
 73 |         adata.uns['spatial']['MS377T']['images']['hires'] = imread(f)
 74 |     response = requests.get(url + 'tissue%5Flowres%5Fimage%2Epng%2Egz')
 75 |     with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
 76 |         adata.uns['spatial']['MS377T']['images']['lowres'] = imread(f)
 77 |     # Add coordinates
 78 |     coords = pd.read_csv(
 79 |         url + 'tissue%5Fpositions%5Flist%2Ecsv%2Egz',
 80 |         compression='gzip',
 81 |         index_col=0,
 82 |     )
 83 |     adata.obs = adata.obs.join(coords, how='left')
 84 |     adata.obsm['spatial'] = adata.obs[['pxl_col_in_fullres', 'pxl_row_in_fullres']].values
 85 |     adata.obs.drop(
 86 |         columns=['in_tissue', 'pxl_row_in_fullres', 'pxl_col_in_fullres'],
 87 |         inplace=True,
 88 |     )
 89 |     # Add metadata
 90 |     url_meta = (
 91 |         'https://cells-test.gi.ucsc.edu/ms-subcortical-lesions/'
 92 |         'visium-ms377T/meta.tsv'
 93 |     )
 94 |     meta = pd.read_csv(url_meta, sep='\t', usecols=[0, 4], index_col=0)
 95 |     adata = adata[meta.index, :].copy()
 96 |     adata.obs = adata.obs.join(meta, how='right')
 97 |     adata.obs['niches'] = adata.obs['niches'].astype('category')
 98 |     adata.obs.index.name = None
 99 |     # Filter vars
100 |     msk_var = adata.X.getnnz(axis=0) > 9
101 |     adata = adata[:, msk_var].copy()
102 |     m = f'generated AnnData with shape={adata.shape}'
103 |     _log(m, level='info', verbose=verbose)
104 |     return adata
105 | 


--------------------------------------------------------------------------------
/src/decoupler/ds/_utils.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import os
 3 | import io
 4 | 
 5 | import pandas as pd
 6 | 
 7 | 
 8 | def ensmbl_to_symbol(
 9 |     genes: list,
10 |     organism: str,
11 | ) -> list:
12 |     """
13 |     Transforms ensembl gene ids to gene symbols.
14 | 
15 |     Parameters
16 |     ----------
17 |     genes
18 |         List of ensembl gene ids to transform.
19 | 
20 |     Returns
21 |     -------
22 |     List of gene symbols
23 |     """
24 |     url = (
25 |         'http://www.ensembl.org/biomart/martservice?query=<?xml version="1.0" encoding="UTF-8"?>'
26 |         '<!DOCTYPE Query><Query  virtualSchemaName = "default" formatter = "TSV" header = "0" un'
27 |         'iqueRows = "0" count = "" ><Dataset name = "{organism}" '
28 |         'interface = "default" ><Attribute name = "ensembl_gene_id" /><Attribute name ='
29 |         '"external_gene_name" /></Dataset></Query>'
30 |     )
31 |     # Organisms
32 |     # hsapiens_gene_ensembl
33 |     # mmusculus_gene_ensembl
34 |     # dmelanogaster_gene_ensembl
35 |     # rnorvegicus_gene_ensembl
36 |     # drerio_gene_ensembl
37 |     # celegans_gene_ensembl
38 |     # scerevisiae_gene_ensembl
39 |     # Validate
40 |     assert isinstance(genes, list), 'genes must be list'
41 |     assert isinstance(organism, str), f'organism must be str'
42 |     # Try different mirrors
43 |     response = requests.get(url.format(miror='www', organism=organism))
44 |     if any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']):
45 |         response = requests.get(url.format(miror='useast', organism=organism))
46 |     if any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']):
47 |         response = requests.get(url.format(miror='asia', organism=organism))
48 |     if not any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']):
49 |         eids = pd.read_csv(io.StringIO(response.text), sep='\t', header=None, index_col=0)[1].to_dict()
50 |     elif organism in ['hsapiens_gene_ensembl', 'mmusculus_gene_ensembl']:
51 |         url = f'https://zenodo.org/records/15551885/files/{organism}.csv.gz?download=1'
52 |         eids = pd.read_csv(url, index_col=0, compression='gzip')['symbol'].to_dict()
53 |     else:
54 |         assert False, 'ensembl servers are down, try again later'
55 |     return [eids[g] if g in eids else None for g in genes]
56 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/__init__.py:
--------------------------------------------------------------------------------
 1 | from decoupler._Method import _show_methods
 2 | from decoupler.mt._methods import aucell
 3 | from decoupler.mt._methods import gsea
 4 | from decoupler.mt._methods import gsva
 5 | from decoupler.mt._methods import mdt
 6 | from decoupler.mt._methods import mlm
 7 | from decoupler.mt._methods import ora
 8 | from decoupler.mt._methods import udt
 9 | from decoupler.mt._methods import ulm
10 | from decoupler.mt._methods import viper
11 | from decoupler.mt._methods import waggr
12 | from decoupler.mt._methods import zscore
13 | from decoupler.mt._methods import _methods
14 | from decoupler.mt._decouple import decouple
15 | from decoupler.mt._consensus import consensus
16 | 
17 | def show() -> None:
18 |     """Displays the methods available in decoupler"""
19 |     return _show_methods(_methods)
20 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/_aucell.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import numpy as np
  4 | import scipy.stats as sts
  5 | import scipy.sparse as sps
  6 | from tqdm.auto import tqdm
  7 | import numba as nb
  8 | 
  9 | from decoupler._docs import docs
 10 | from decoupler._log import _log
 11 | from decoupler._Method import MethodMeta, Method
 12 | from decoupler.pp.net import _getset
 13 | 
 14 | 
 15 | @nb.njit(parallel=True, cache=True)
 16 | def _auc(
 17 |     row: np.ndarray,
 18 |     cnct: np.ndarray,
 19 |     starts: np.ndarray,
 20 |     offsets: np.ndarray,
 21 |     n_up: int,
 22 |     nsrc: int,
 23 | ) -> np.ndarray:
 24 |     # Empty acts
 25 |     es = np.zeros(nsrc)
 26 |     # For each feature set
 27 |     for j in nb.prange(nsrc):
 28 |         # Extract feature set
 29 |         fset = _getset(cnct, starts, offsets, j)
 30 |         # Compute max AUC for fset
 31 |         x_th = np.arange(1, stop=fset.shape[0] + 1)
 32 |         x_th = x_th[x_th < n_up]
 33 |         max_auc = np.sum(np.diff(np.append(x_th, n_up)) * x_th)
 34 |         # Compute AUC
 35 |         x = row[fset]
 36 |         x = np.sort(x[x <= n_up])
 37 |         y = np.arange(x.shape[0]) + 1
 38 |         x = np.append(x, n_up)
 39 |         # Update acts matrix
 40 |         es[j] = np.sum(np.diff(x) * y) / max_auc
 41 |     return es
 42 | 
 43 | 
 44 | def _validate_n_up(
 45 |     nvar: int,
 46 |     n_up: int | float | None = None,
 47 | ) -> int:
 48 |     assert isinstance(n_up, (int, float)) or n_up is None, 'n_up must be numerical or None'
 49 |     if n_up is None:
 50 |         n_up = np.ceil(0.05 * nvar)
 51 |         n_up = int(np.clip(n_up, a_min=2, a_max=nvar))
 52 |     else:
 53 |         n_up = int(np.ceil(n_up))
 54 |     assert nvar >= n_up > 1, f'For nvar={nvar}, n_up={n_up} must be between 1 and {nvar}'
 55 |     return n_up
 56 | 
 57 | 
 58 | @docs.dedent
 59 | def _func_aucell(
 60 |     mat: np.ndarray,
 61 |     cnct: np.ndarray,
 62 |     starts: np.ndarray,
 63 |     offsets: np.ndarray,
 64 |     n_up: int | float | None = None,
 65 |     verbose: bool = False,
 66 | ) -> Tuple[np.ndarray, None]:
 67 |     r"""
 68 |     Area Under the Curve for set enrichment within single cells (AUCell) :cite:`aucell`.
 69 | 
 70 |     Given a ranked list of features per observation, AUCell calculates the AUC by measuring how early the features in
 71 |     the set appear in this ranking. Specifically, the enrichment score :math:`ES` is:
 72 | 
 73 |     .. math::
 74 |     
 75 |        {ES}_{i, F} = \int_0^1 {RecoveryCurve}_{i, F}(r_i) \, dr
 76 |     
 77 |     Where:
 78 |     
 79 |     - :math:`i` is the obervation  
 80 |     - :math:`F` is the feature set  
 81 |     - :math:`{RecoveryCurve}_{i, F}(r_i)` is the proportion of features from :math:`F` recovered in the top :math:`r_i`-fraction of the ranked list for observation :math:`i`
 82 | 
 83 |     %(notest)s
 84 | 
 85 |     %(params)s
 86 |     n_up
 87 |         Number of features to include in the AUC calculation.
 88 |         If ``None``, the top 5% of features based on their magnitude are selected.
 89 | 
 90 |     %(returns)s
 91 |     """
 92 |     nobs, nvar = mat.shape
 93 |     nsrc = starts.size
 94 |     n_up = _validate_n_up(nvar, n_up)
 95 |     m = f'aucell - calculating {nsrc} AUCs for {nvar} targets across {nobs} observations, categorizing features at rank={n_up}' 
 96 |     _log(m, level='info', verbose=verbose)
 97 |     es = np.zeros(shape=(nobs, nsrc))
 98 |     for i in tqdm(range(mat.shape[0]), disable=not verbose):
 99 |         if isinstance(mat, sps.csr_matrix):
100 |             row = mat[i].toarray()[0]
101 |         else:
102 |             row = mat[i]
103 |         row = sts.rankdata(a=-row, method='ordinal')
104 |         es[i] = _auc(row=row, cnct=cnct, starts=starts, offsets=offsets, n_up=n_up, nsrc=nsrc)
105 |     return es, None
106 | 
107 | 
108 | _aucell = MethodMeta(
109 |     name='aucell',
110 |     desc='AUCell',
111 |     func=_func_aucell,
112 |     stype='categorical',
113 |     adj=False,
114 |     weight=False,
115 |     test=False,
116 |     limits=(0, 1),
117 |     reference='https://doi.org/10.1038/nmeth.4463',
118 | )
119 | aucell = Method(_method=_aucell)
120 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/_decouple.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from decoupler._docs import docs
 4 | from decoupler._datatype import DataType
 5 | from decoupler.mt._methods import _methods
 6 | from decoupler.mt._consensus import consensus
 7 | 
 8 | 
 9 | @docs.dedent
10 | def decouple(
11 |     data: DataType,
12 |     net: pd.DataFrame,
13 |     methods: str | list = 'all',
14 |     args: dict = dict(),
15 |     cons: bool = False,
16 |     **kwargs
17 | ) -> dict | None:
18 |     """
19 |     Runs multiple enrichment methods sequentially.
20 | 
21 |     Parameters
22 |     ----------
23 |     %(data)s
24 |     %(net)s
25 |     methods
26 |         List of methods to run.
27 |     args
28 |         Dictionary of dictionaries containing method-specific keyword arguments.
29 |     cons
30 |         Whether to get a consensus score across the used methods.
31 |     %(tmin)s
32 |     %(raw)s
33 |     %(empty)s
34 |     %(bsize)s
35 |     %(verbose)s
36 |     """
37 |     # Validate
38 |     _mdict = {m.name: m for m in _methods}
39 |     if isinstance(methods, str):
40 |         if methods == 'all':
41 |             methods = _mdict.keys()
42 |         else:
43 |             methods = [methods]
44 |     methods = set(methods)
45 |     assert methods.issubset(_mdict), \
46 |     f'methods={methods} must be in decoupler.\nUse decoupler.mt.show_methods to check which ones are available'
47 |     assert all(k in methods for k in args), \
48 |     f'All keys in args={args.keys()} must belong to a method in methods={methods}'
49 |     kwargs = kwargs.copy()
50 |     kwargs.setdefault('verbose', False)
51 |     # Run each method
52 |     all_res = {}
53 |     for name in methods:
54 |         mth = _mdict[name]
55 |         arg = args.setdefault(name, {})
56 |         res = mth(data=data, net=net, **arg, **kwargs)
57 |         if res:
58 |             res = {
59 |                 f'score_{mth.name}': res[0],
60 |                 f'padj_{mth.name}': res[1],
61 |             }
62 |             all_res = all_res | res
63 |     if all_res:
64 |         if cons:
65 |             all_res['score_consensus'], all_res['padj_consensus'] = consensus(all_res, verbose=kwargs['verbose'])
66 |         return all_res
67 |     elif cons:
68 |         consensus(data, verbose=kwargs['verbose'])
69 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/_mdt.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from typing import Tuple
 3 | 
 4 | import numpy as np
 5 | import scipy.sparse as sps
 6 | from tqdm.auto import tqdm
 7 | 
 8 | from decoupler._odeps import xgboost, _check_import
 9 | from decoupler._docs import docs
10 | from decoupler._log import _log
11 | from decoupler._Method import MethodMeta, Method
12 | 
13 | 
14 | def _xgbr(
15 |     x: np.ndarray,
16 |     y: np.ndarray,
17 |     **kwargs,
18 | ) -> np.ndarray:
19 |     # Init model
20 |     reg = xgboost.XGBRegressor(**kwargs)
21 |     # Fit
22 |     y = y.reshape(-1, 1)
23 |     reg = reg.fit(x, y)
24 |     # Get R score
25 |     es = reg.feature_importances_
26 |     return es
27 | 
28 | 
29 | @docs.dedent
30 | def _func_mdt(
31 |     mat: np.ndarray,
32 |     adj: np.ndarray,
33 |     verbose: bool = False,
34 |     **kwargs,
35 | ) -> Tuple[np.ndarray, None]:
36 |     r"""
37 |     Multivariate Decision Trees (MDT) :cite:`decoupler`.
38 | 
39 |     This approach uses the molecular features from one observation as the population of samples
40 |     and it fits a gradient boosted decision trees model with multiple covariates,
41 |     which are the weights of all feature sets :math:`F`. It uses the implementation provided by ``xgboost`` :cite:`xgboost`.
42 | 
43 |     The enrichment score :math:`ES` for each :math:`F` is then calculated as the importance of each covariate in the model.
44 |     
45 |     %(notest)s
46 | 
47 |     %(params)s
48 | 
49 |     kwargs
50 |         All other keyword arguments are passed to ``xgboost.XGBRegressor``.
51 |     %(returns)s
52 |     """
53 |     _check_import(xgboost)
54 |     nobs = mat.shape[0]
55 |     nvar, nsrc = adj.shape
56 |     m = f'mdt - fitting {nsrc} multivariate decision tree models (XGBoost) of {nvar} targets across {nobs} observations'
57 |     _log(m, level='info', verbose=verbose)
58 |     es = np.zeros(shape=(nobs, nsrc))
59 |     for i in tqdm(range(nobs), disable=not verbose):
60 |         obs = mat[i]
61 |         es[i, :] = _xgbr(x=adj, y=obs, **kwargs)
62 |     return (es, None)
63 | 
64 | 
65 | _mdt = MethodMeta(
66 |     name='mdt',
67 |     desc='Multivariate Decision Tree (MDT)',
68 |     func=_func_mdt,
69 |     stype='numerical',
70 |     adj=True,
71 |     weight=True,
72 |     test=False,
73 |     limits=(0, 1),
74 |     reference='https://doi.org/10.1093/bioadv/vbac016',
75 | )
76 | mdt = Method(_method=_mdt)
77 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/_methods.py:
--------------------------------------------------------------------------------
 1 | from decoupler._Method import _show_methods
 2 | from decoupler.mt._aucell import aucell
 3 | from decoupler.mt._gsea import gsea
 4 | from decoupler.mt._gsva import gsva
 5 | from decoupler.mt._mdt import mdt
 6 | from decoupler.mt._mlm import mlm
 7 | from decoupler.mt._ora import ora
 8 | from decoupler.mt._udt import udt
 9 | from decoupler.mt._ulm import ulm
10 | from decoupler.mt._viper import viper
11 | from decoupler.mt._waggr import waggr
12 | from decoupler.mt._zscore import zscore
13 | 
14 | _methods = [
15 |     aucell,
16 |     gsea,
17 |     gsva,
18 |     mdt,
19 |     mlm,
20 |     ora,
21 |     udt,
22 |     ulm,
23 |     viper,
24 |     waggr,
25 |     zscore,
26 | ]
27 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/_run.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Callable
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | from anndata import AnnData
 6 | import scipy.sparse as sps
 7 | import scipy.stats as sts
 8 | from tqdm.auto import tqdm
 9 | 
10 | from decoupler._log import _log
11 | from decoupler._datatype import DataType
12 | from decoupler.pp.net import prune, adjmat, idxmat
13 | from decoupler.pp.data import extract
14 | 
15 | 
16 | def _return(
17 |     name: str,
18 |     data: DataType,
19 |     es: pd.DataFrame,
20 |     pv: pd.DataFrame,
21 |     verbose: bool = False,
22 | ) -> Tuple[pd.DataFrame, pd.DataFrame] | AnnData | None:
23 |     if isinstance(data, AnnData):
24 |         if data.obs_names.size != es.index.size:
25 |             m = 'Provided AnnData contains empty observations, returning repaired object'
26 |             _log(m, level='warn', verbose=verbose)
27 |             data = data[es.index, :].copy()
28 |             data.obsm[f'score_{name}'] = es
29 |             if pv is not None:
30 |                 data.obsm[f'padj_{name}'] = pv
31 |             return data
32 |         else:
33 |             data.obsm[f'score_{name}'] = es
34 |             if pv is not None:
35 |                 data.obsm[f'padj_{name}'] = pv
36 |             return None
37 |     else:
38 |         return es, pv
39 | 
40 | 
41 | def _run(
42 |     name: str,
43 |     func: Callable,
44 |     adj: bool,
45 |     test: bool,
46 |     data: DataType,
47 |     net: pd.DataFrame,
48 |     tmin: int | float = 5,
49 |     layer: str | None = None,
50 |     raw: bool = False,
51 |     empty: bool = True,
52 |     bsize: int | float = 250_000,
53 |     verbose: bool = False,
54 |     **kwargs
55 | ) -> Tuple[pd.DataFrame, pd.DataFrame] | AnnData | None:
56 |     _log(f'{name} - Running {name}', level='info', verbose=verbose)
57 |     # Process data
58 |     mat, obs, var = extract(data, layer=layer, raw=raw, empty=empty, verbose=verbose)
59 |     sparse = sps.issparse(mat)
60 |     # Process net
61 |     net = prune(features=var, net=net, tmin=tmin, verbose=verbose)
62 |     # Handle stat type
63 |     if adj:
64 |         sources, targets, adjm = adjmat(features=var, net=net, verbose=verbose)
65 |         # Handle sparse
66 |         if sparse:
67 |             nbatch = int(np.ceil(obs.size / bsize))
68 |             es, pv = [], []
69 |             for i in tqdm(range(nbatch), disable=not verbose):
70 |                 srt, end = i * bsize, i * bsize + bsize
71 |                 bmat = mat[srt:end].toarray()
72 |                 bes, bpv = func(bmat, adjm, verbose=verbose, **kwargs)
73 |                 es.append(bes)
74 |                 pv.append(bpv)
75 |             es = np.vstack(es)
76 |             es = pd.DataFrame(es, index=obs, columns=sources)
77 |         else:
78 |             es, pv = func(mat, adjm, verbose=verbose, **kwargs)
79 |             es = pd.DataFrame(es, index=obs, columns=sources)
80 |     else:
81 |         sources, cnct, starts, offsets = idxmat(features=var, net=net, verbose=verbose)
82 |         es, pv = func(mat, cnct, starts, offsets, verbose=verbose, **kwargs)
83 |         es = pd.DataFrame(es, index=obs, columns=sources)
84 |     # Handle pvals and FDR correction
85 |     if test:
86 |         pv = np.vstack(pv)
87 |         pv = pd.DataFrame(pv, index=obs, columns=sources)
88 |         if name != 'mlm':
89 |             _log(f'{name} - adjusting p-values by FDR', level='info', verbose=verbose)
90 |             pv.loc[:, :] = sts.false_discovery_control(pv.values, axis=1, method='bh')
91 |     else:
92 |         pv = None
93 |     _log(f'{name} - done', level='info', verbose=verbose)
94 |     return _return(name, data, es, pv, verbose=verbose)
95 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/_udt.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from typing import Tuple
 3 | 
 4 | import numpy as np
 5 | import scipy.sparse as sps
 6 | from tqdm.auto import tqdm
 7 | 
 8 | from decoupler._odeps import xgboost, _check_import
 9 | from decoupler._docs import docs
10 | from decoupler._log import _log
11 | from decoupler._Method import MethodMeta, Method
12 | 
13 | 
14 | def _xgbr(
15 |     x: np.ndarray,
16 |     y: np.ndarray,
17 |     **kwargs,
18 | ) -> np.ndarray:
19 |     kwargs.setdefault('n_estimators', 10)
20 |     # Init model
21 |     reg = xgboost.XGBRegressor(**kwargs)
22 |     # Fit
23 |     x, y = x.reshape(-1, 1), y.reshape(-1, 1)
24 |     reg = reg.fit(x, y)
25 |     # Get R score
26 |     es = reg.score(x, y)
27 |     # Clip to [0, 1]
28 |     es = np.clip(es, 0, 1)
29 |     return es
30 | 
31 | 
32 | @docs.dedent
33 | def _func_udt(
34 |     mat: np.ndarray,
35 |     adj: np.ndarray,
36 |     verbose: bool = False,
37 |     **kwargs,
38 | ) -> Tuple[np.ndarray, None]:
39 |     """
40 |     Univariate Decision Tree (UDT) :cite:`decoupler`.
41 | 
42 |     This approach uses the molecular features from one observation as the population of samples
43 |     and it fits a gradient boosted decision trees model with a single covariate,
44 |     which is the feature weights of a set :math:`F`.
45 |     It uses the implementation provided by ``xgboost`` :cite:`xgboost`.
46 | 
47 |     The enrichment score :math:`ES` is then calculated as the coefficient of determination :math:`R^2`.
48 | 
49 |     %(notest)s
50 | 
51 |     %(params)s
52 | 
53 |     kwargs
54 |         All other keyword arguments are passed to ``xgboost.XGBRegressor``.
55 |     %(returns)s
56 |     """
57 |     _check_import(xgboost)
58 |     nobs = mat.shape[0]
59 |     nvar, nsrc = adj.shape
60 |     m = f'udt - fitting {nsrc} univariate decision tree models (XGBoost) of {nvar} targets across {nobs} observations' 
61 |     _log(m, level='info', verbose=verbose)
62 |     es = np.zeros(shape=(nobs, nsrc))
63 |     for i in tqdm(range(nobs), disable=not verbose):
64 |         obs = mat[i]
65 |         for j in range(adj.shape[1]):
66 |             es[i, j] = _xgbr(x=adj[:, j], y=obs, **kwargs)
67 |     return es, None
68 | 
69 | 
70 | _udt = MethodMeta(
71 |     name='udt',
72 |     desc='Univariate Decision Tree (UDT)',
73 |     func=_func_udt,
74 |     stype='numerical',
75 |     adj=True,
76 |     weight=True,
77 |     test=False,
78 |     limits=(0, 1),
79 |     reference='https://doi.org/10.1093/bioadv/vbac016',
80 | )
81 | udt = Method(_method=_udt)
82 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/_ulm.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import numpy as np
  4 | import scipy.stats as sts
  5 | 
  6 | from decoupler._docs import docs
  7 | from decoupler._log import _log
  8 | from decoupler._Method import MethodMeta, Method
  9 | 
 10 | 
 11 | def _cov(
 12 |     A: np.ndarray,
 13 |     b: np.ndarray
 14 | ) -> np.ndarray:
 15 |     return np.dot(b.T - b.mean(), A - A.mean(axis=0)) / (b.shape[0]-1)
 16 | 
 17 | 
 18 | def _cor(
 19 |     A: np.ndarray,
 20 |     b: np.ndarray
 21 | ) -> np.ndarray:
 22 |     cov = _cov(A, b)
 23 |     ssd = np.std(A, axis=0, ddof=1) * np.std(b, axis=0, ddof=1).reshape(-1, 1)
 24 |     return cov / ssd
 25 | 
 26 | 
 27 | def _tval(
 28 |     r: np.ndarray,
 29 |     df: float
 30 | ) -> np.ndarray:
 31 |     return r * np.sqrt(df / ((1.0 - r + 2.2e-16) * (1.0 + r + 2.2e-16)))
 32 | 
 33 | 
 34 | @docs.dedent
 35 | def _func_ulm(
 36 |     mat: np.ndarray,
 37 |     adj: np.ndarray,
 38 |     tval: bool = True,
 39 |     verbose: bool = False,
 40 | ) -> Tuple[np.ndarray, np.ndarray]:
 41 |     r"""
 42 |     Univariate Linear Model (ULM) :cite:`decoupler`.
 43 | 
 44 |     This approach uses the molecular features from one observation as the population of samples
 45 |     and it fits a linear model with a single covariate, which is the feature weights of a set :math:`F`.
 46 | 
 47 |     .. math::
 48 | 
 49 |         y_i = \beta_0 + \beta_1 x_i + \varepsilon, \quad i = 1, 2, \ldots, n
 50 | 
 51 |     Where:
 52 | 
 53 |     - :math:`y_i` is the observed feature statistic (e.g. gene expression, :math:`log_{2}FC`, etc.) for feature :math:`i`
 54 |     - :math:`x_i` is the weight of feature :math:`i` in feature set :math:`F`. For unweighted sets, membership in the set is indicated by 1, and non-membership by 0.
 55 |     - :math:`\beta_0` is the intercept
 56 |     - :math:`\beta_1` is the slope coefficient
 57 |     - :math:`\varepsilon` is the error term for feature :math:`i`
 58 | 
 59 |     .. figure:: /_static/images/ulm.png
 60 |        :alt: Univariate Linear Model (ULM) schematic.
 61 |        :align: center
 62 |        :width: 75%
 63 | 
 64 |        Univariate Linear Model (ULM) scheme.
 65 |        In this example, the observed gene expression of :math:`Sample_1` is predicted using
 66 |        the interaction weights of :math:`TF_1`.
 67 |        Since the target genes that have negative weights are lowly expressed,
 68 |        and the positive target genes are highly expressed,
 69 |        the relationship between the two variables is positive so the obtained :math:`ES` score is positive.
 70 |        Scores can be interpreted as active when positive, repressive when negative, and inconclusive when close to 0.
 71 | 
 72 |     The enrichment score :math:`ES` is then calculated as the t-value of the slope coefficient.
 73 | 
 74 |     .. math::
 75 | 
 76 |         ES = t_{\beta_1} = \frac{\hat{\beta}_1}{\mathrm{SE}(\hat{\beta}_1)}
 77 | 
 78 |     Where:
 79 | 
 80 |     - :math:`t_{\beta_1}` is the t-value of the slope
 81 |     - :math:`\mathrm{SE}(\hat{\beta}_1)` is the standard error of the slope
 82 | 
 83 |     Next, :math:`p_{value}` are obtained by evaluating the two-sided survival function
 84 |     (:math:`sf`) of the Student’s t-distribution.
 85 | 
 86 |     .. math::
 87 | 
 88 |         p_{value} = 2 \times \mathrm{sf}(|ES|, \text{df})
 89 | 
 90 |     %(yestest)s
 91 | 
 92 |     %(params)s
 93 |     %(tval)s
 94 | 
 95 |     %(returns)s
 96 |     """
 97 |     # Get degrees of freedom
 98 |     n_var, n_src = adj.shape
 99 |     df = n_var - 2
100 |     m = f'ulm - fitting {n_src} univariate models of {n_var} observations (targets) with {df} degrees of freedom'
101 |     _log(m, level='info', verbose=verbose)
102 |     # Compute R value for all
103 |     r = _cor(adj, mat.T)
104 |     # Compute t-value
105 |     t = _tval(r, df)
106 |     # Compute p-value
107 |     pv = sts.t.sf(abs(t), df) * 2
108 |     if tval:
109 |         es = t
110 |     else:
111 |         # Compute coef
112 |         es = r * (np.std(mat.T, ddof=1, axis=0).reshape(-1, 1) / np.std(adj, ddof=1, axis=0))
113 |     return es, pv
114 | 
115 | 
116 | _ulm = MethodMeta(
117 |     name='ulm',
118 |     desc='Univariate Linear Model (ULM)',
119 |     func=_func_ulm,
120 |     stype='numerical',
121 |     adj=True,
122 |     weight=True,
123 |     test=True,
124 |     limits=(-np.inf, +np.inf),
125 |     reference='https://doi.org/10.1093/bioadv/vbac016',
126 | )
127 | ulm = Method(_method=_ulm)
128 | 


--------------------------------------------------------------------------------
/src/decoupler/mt/_zscore.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import numpy as np
 4 | import scipy.stats as sts
 5 | 
 6 | from decoupler._docs import docs
 7 | from decoupler._log import _log
 8 | from decoupler._Method import MethodMeta, Method
 9 | 
10 | 
11 | @docs.dedent
12 | def _func_zscore(
13 |     mat: np.ndarray,
14 |     adj: np.ndarray,
15 |     flavor: str = 'RoKAI',
16 |     verbose: bool = False,
17 | ) -> Tuple[np.ndarray, np.ndarray]:
18 |     r"""
19 |     Z-score (ZSCORE) :cite:`zscore`.
20 | 
21 |     This approach computes the mean value of the molecular features for known targets,
22 |     optionally subtracts the overall mean of all measured features,
23 |     and normalizes the result by the standard deviation of all features and the square
24 |     root of the number of targets.
25 |     
26 |     This formulation was originally introduced in KSEA, which explicitly includes the
27 |     subtraction of the global mean to compute the enrichment score :math:`ES`.
28 | 
29 |     .. math::
30 | 
31 |         ES = \frac{(\mu_s-\mu_p) \times \sqrt m }{\sigma}
32 | 
33 |     Where:
34 | 
35 |     - :math:`\mu_s` is the mean of targets
36 |     - :math:`\mu_p` is the mean of all features
37 |     - :math:`m` is the number of targets
38 |     - :math:`\sigma` is the standard deviation of all features
39 |     
40 |     However, in the RoKAI implementation, this global mean subtraction was omitted.
41 | 
42 |     .. math::
43 | 
44 |         ES = \frac{\mu_s \times \sqrt m }{\sigma}
45 | 
46 |     A two-sided :math:`p_{value}` is then calculated from the consensus score using
47 |     the survival function :math:`sf` of the standard normal distribution.
48 | 
49 |     .. math::
50 | 
51 |         p = 2 \times \mathrm{sf}\bigl(\lvert \mathrm{ES} \rvert \bigr)
52 | 
53 |     %(yestest)s
54 | 
55 |     %(params)s
56 | 
57 |     flavor
58 |         Which flavor to use when calculating the z-score, either KSEA or RoKAI.
59 | 
60 |     %(returns)s
61 |     """
62 |     assert isinstance(flavor, str) and flavor in ['KSEA', 'RoKAI'], \
63 |     'flavor must be str and KSEA or RoKAI'
64 |     nobs, nvar = mat.shape
65 |     nvar, nsrc = adj.shape
66 |     m = f'zscore - calculating {nsrc} scores with flavor={flavor}'
67 |     _log(m, level='info', verbose=verbose)
68 |     stds = np.std(mat, axis=1, ddof=1)
69 |     if flavor == 'RoKAI':
70 |         mean_all = np.mean(mat, axis=1)
71 |     elif flavor == 'KSEA':
72 |         mean_all = np.zeros(stds.shape)
73 |     n = np.sqrt(np.count_nonzero(adj, axis=0))
74 |     mean = mat.dot(adj) / np.sum(np.abs(adj), axis=0)
75 |     es = ((mean - mean_all.reshape(-1, 1)) * n) / stds.reshape(-1, 1)
76 |     pv = 2 * sts.norm.sf(np.abs(es))
77 |     return es, pv
78 | 
79 | 
80 | _zscore = MethodMeta(
81 |     name='zscore',
82 |     desc='Z-score (ZSCORE)',
83 |     func=_func_zscore,
84 |     stype='numerical',
85 |     adj=True,
86 |     weight=True,
87 |     test=True,
88 |     limits=(-np.inf, +np.inf),
89 |     reference='https://doi.org/10.1038/s41467-021-21211-6',
90 | )
91 | zscore = Method(_method=_zscore)
92 | 


--------------------------------------------------------------------------------
/src/decoupler/op/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.op._translate import show_organisms, translate
2 | from decoupler.op._resource import show_resources, resource
3 | from decoupler.op._collectri import collectri
4 | from decoupler.op._dorothea import dorothea
5 | from decoupler.op._hallmark import hallmark
6 | from decoupler.op._progeny import progeny
7 | 


--------------------------------------------------------------------------------
/src/decoupler/op/_collectri.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from decoupler._docs import docs
 5 | from decoupler._log import _log
 6 | from decoupler._download import URL_INT, _download
 7 | from decoupler.op._translate import translate
 8 | from decoupler.op._dtype import _infer_dtypes
 9 | 
10 | 
11 | @docs.dedent
12 | def collectri(
13 |     organism: str = 'human',
14 |     remove_complexes: bool = False,
15 |     license: str = 'academic',
16 |     verbose: bool = False,
17 | ) -> pd.DataFrame:
18 |     """
19 |     CollecTRI gene regulatory network :cite:p:`collectri`.
20 | 
21 |     Wrapper to access CollecTRI gene regulatory network. CollecTRI is a
22 |     comprehensive resource containing a curated collection of transcription
23 |     factors (TFs) and their target genes. It is an expansion of DoRothEA.
24 |     Each interaction is weighted by its mode of regulation (either positive or negative).
25 | 
26 |     Parameters
27 |     ----------
28 |     %(organism)s
29 |     remove_complexes
30 |         Whether to remove complexes.
31 |     %(license)s
32 |     %(verbose)s
33 | 
34 |     Returns
35 |     -------
36 |     Dataframe in long format containing target genes for each TF with their associated weights,
37 |     and if available, the PMIDs supporting each interaction.
38 |     """
39 |     url = 'https://zenodo.org/records/8192729/files/CollecTRI_regulons.csv?download=1'
40 |     ct = _download(url, verbose=verbose)
41 |     # Update resources
42 |     resources = []
43 |     for str_res in ct['resources']:
44 |         lst_res = str_res.replace('CollecTRI', '').split(';')
45 |         str_res = ';'.join(sorted([res.replace('_', '') for res in lst_res if res != '']))
46 |         resources.append(str_res)
47 |     ct['resources'] = resources
48 |     # Format references
49 |     ct['references'] = ct['references'].str.replace('CollecTRI:', '')
50 |     ct = ct.dropna()
51 |     if remove_complexes:
52 |         ct = ct[~ct['source'].isin(['AP1', 'NFKB'])]
53 |     ct = _infer_dtypes(ct)
54 |     if organism != 'human':
55 |         ct = translate(ct, columns=['source', 'target'], target_organism=organism, verbose=verbose)
56 |     ct = ct.drop_duplicates(['source', 'target']).reset_index(drop=True)
57 |     return ct
58 | 


--------------------------------------------------------------------------------
/src/decoupler/op/_dorothea.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from decoupler._docs import docs
  5 | from decoupler._log import _log
  6 | from decoupler._download import URL_INT, _download
  7 | from decoupler.op._translate import translate
  8 | from decoupler.op._dtype import _infer_dtypes
  9 | 
 10 | 
 11 | @docs.dedent
 12 | def dorothea(
 13 |     organism: str = 'human',
 14 |     levels: str | list = ['A', 'B', 'C'],
 15 |     dict_weights: dict | None = None,
 16 |     license: str = 'academic',
 17 |     verbose: bool = False,
 18 | ) -> pd.DataFrame:
 19 |     """
 20 |     DoRothEA gene regulatory network :cite:p:`dorothea`.
 21 | 
 22 |     Wrapper to access DoRothEA gene regulatory network. DoRothEA is a
 23 |     comprehensive resource containing a curated collection of transcription
 24 |     factors (TFs) and their target genes. Each interaction is weighted by its
 25 |     mode of regulation (either positive or negative) and by its confidence
 26 |     level.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     %(organism)s
 31 |     levels
 32 |         List of confidence levels to return. Goes from A to D, A being the
 33 |         most confident and D being the less.
 34 |     dict_weights
 35 |         Dictionary of values to divide the mode of regulation (-1 or 1),
 36 |         one for each confidence level. Bigger values will generate weights
 37 |         close to zero.
 38 |     %(license)s
 39 |     %(verbose)s
 40 | 
 41 |     Returns
 42 |     -------
 43 |     Dataframe in long format containing target genes for each TF with their associated weights and confidence level.
 44 |     """
 45 |     assert isinstance(levels, (str, list)), 'levels must be str or list'
 46 |     if isinstance(levels, str):
 47 |         levels = [levels]
 48 |     assert all(l in {'A', 'B', 'C', 'D'} for l in levels), 'levels can only contain any of these values: A, B, C, and/or D'
 49 |     assert isinstance(dict_weights, dict) or dict_weights is None, 'dict_weights must be dict or None'
 50 |     if dict_weights:
 51 |         assert all(k in levels for k in dict_weights), f'dict_weights keys must be in levels={levels}'
 52 |         weights = dict_weights
 53 |     else:
 54 |         weights = {'A': 1, 'B': 2, 'C': 3, 'D': 4}
 55 |         weights = {k: weights[k] for k in weights if k in levels}
 56 |     # Read
 57 |     str_levels = ','.join(levels)
 58 |     url_ext = f'datasets=dorothea&dorothea_levels={str_levels}&fields=dorothea_level&license={license}'
 59 |     url = URL_INT + url_ext
 60 |     m = f'dorothea - Accessing DoRothEA (levels {str_levels}) with {license} license and weights={weights}'
 61 |     _log(m, level='info', verbose=verbose)
 62 |     do = _download(url, sep='\t', verbose=verbose)
 63 |     # Filter extra columns
 64 |     do = do[[
 65 |         'source_genesymbol', 'target_genesymbol',
 66 |         'is_stimulation', 'is_inhibition',
 67 |         'consensus_direction', 'consensus_stimulation',
 68 |         'consensus_inhibition', 'dorothea_level',
 69 |     ]]
 70 |     # Remove duplicates
 71 |     do = do[~do.duplicated(['source_genesymbol', 'dorothea_level', 'target_genesymbol'])]
 72 |     # Assign top level if more than 2
 73 |     do['dorothea_level'] = [lvl.split(';')[0] for lvl in do['dorothea_level']]
 74 |     # Assign mode of regulation
 75 |     mor = []
 76 |     for i in do.itertuples():
 77 |         if i.is_stimulation and i.is_inhibition:
 78 |             if i.consensus_stimulation:
 79 |                 mor.append(1)
 80 |             else:
 81 |                 mor.append(-1)
 82 |         elif i.is_stimulation:
 83 |             mor.append(1)
 84 |         elif i.is_inhibition:
 85 |             mor.append(-1)
 86 |         else:
 87 |             mor.append(1)
 88 |     do['mor'] = mor
 89 |     # Compute weight based on confidence: mor/confidence
 90 |     do['weight'] = [i.mor / weights[i.dorothea_level] for i in do.itertuples()]
 91 |     # Format
 92 |     do = (
 93 |         do
 94 |         .rename(columns={'source_genesymbol': 'source', 'target_genesymbol': 'target', 'dorothea_level': 'confidence'})
 95 |         [['source', 'target', 'weight', 'confidence']]
 96 |         .sort_values('confidence')
 97 |     )
 98 |     do = do[do['confidence'].isin(levels)].reset_index(drop=True)
 99 |     do = _infer_dtypes(do)
100 |     if organism != 'human':
101 |         do = translate(do, columns=['source', 'target'], target_organism=organism, verbose=verbose)
102 |     do = do.drop_duplicates(['source', 'target']).reset_index(drop=True)
103 |     return do
104 | 


--------------------------------------------------------------------------------
/src/decoupler/op/_dtype.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def _infer_dtypes(
 5 |     df: pd.DataFrame
 6 | ) -> pd.DataFrame:
 7 |     for col in df.columns:
 8 |         try:
 9 |             df[col] = pd.to_numeric(df[col])
10 |             continue
11 |         except ValueError:
12 |             pass
13 |         if df[col].dtype == 'string':
14 |             df[col] = df[col].astype(str)
15 |         lowered = df[col].str.lower()
16 |         if lowered.isin(["true", "false"]).all():
17 |             df[col] = lowered == "true"
18 |             continue
19 |     return df
20 | 


--------------------------------------------------------------------------------
/src/decoupler/op/_hallmark.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from decoupler._docs import docs
 5 | from decoupler._log import _log
 6 | from decoupler._download import URL_INT, _download
 7 | from decoupler.op._translate import translate
 8 | from decoupler.op._dtype import _infer_dtypes
 9 | 
10 | 
11 | @docs.dedent
12 | def hallmark(
13 |     organism: str = 'human',
14 |     license: str = 'academic',
15 |     verbose: bool = False,
16 | ) -> pd.DataFrame:
17 |     """
18 |     Hallmark gene sets :cite:p:`msigdb`.
19 | 
20 |     Hallmark gene sets summarize and represent specific well-defined
21 |     biological states or processes and display coherent expression.
22 | 
23 |     Parameters
24 |     ----------
25 |     %(organism)s
26 |     %(license)s
27 |     %(verbose)s
28 | 
29 |     Returns
30 |     -------
31 |     Dataframe in long format containing the hallmark gene sets.
32 |     """
33 |     url = 'https://static.omnipathdb.org/tables/msigdb-hallmark.tsv.gz'
34 |     hm = _download(url, sep='\t', compression='gzip', verbose=verbose)
35 |     hm = hm[['geneset', 'genesymbol']]
36 |     hm['geneset'] = hm['geneset'].str.replace('HALLMARK_', '')
37 |     hm['genesymbol'] = hm['genesymbol'].str.replace('COMPLEX:', '').str.split('_')
38 |     hm = hm.explode('genesymbol')
39 |     hm = _infer_dtypes(hm)
40 |     if organism != 'human':
41 |         hm = translate(hm, columns=['genesymbol'], target_organism=organism, verbose=verbose)
42 |     hm = hm.rename(columns={'geneset': 'source', 'genesymbol': 'target'})
43 |     hm = hm.drop_duplicates(['source', 'target']).reset_index(drop=True)
44 |     return hm
45 | 


--------------------------------------------------------------------------------
/src/decoupler/op/_progeny.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from decoupler._docs import docs
 5 | from decoupler._log import _log
 6 | from decoupler.op._resource import resource
 7 | 
 8 | 
 9 | @docs.dedent
10 | def progeny(
11 |     organism: str = 'human',
12 |     top: int | float = np.inf,
13 |     thr_padj: float = 0.05,
14 |     license: str = 'academic',
15 |     verbose: bool = False,
16 | ) -> pd.DataFrame:
17 |     """
18 |     Pathway RespOnsive GENes for activity inference (PROGENy) :cite:p:`progeny`.
19 | 
20 |     Wrapper to access PROGENy model gene weights. Each pathway is defined with
21 |     a collection of target genes, each interaction has an associated p-value
22 |     and weight. The top significant interactions per pathway are returned.
23 | 
24 |     Here is a brief description of each pathway:
25 | 
26 |     - **Androgen**: involved in the growth and development of the male reproductive organs
27 |     - **EGFR**: regulates growth, survival, migration, apoptosis, proliferation, and differentiation in mammalian cells
28 |     - **Estrogen**: promotes the growth and development of the female reproductive organs
29 |     - **Hypoxia**: promotes angiogenesis and metabolic reprogramming when O2 levels are low
30 |     - **JAK-STAT**: involved in immunity, cell division, cell death, and tumor formation
31 |     - **MAPK**: integrates external signals and promotes cell growth and proliferation
32 |     - **NFkB**: regulates immune response, cytokine production and cell survival
33 |     - **p53**: regulates cell cycle, apoptosis, DNA repair and tumor suppression
34 |     - **PI3K**: promotes growth and proliferation
35 |     - **TGFb**: involved in development, homeostasis, and repair of most tissues
36 |     - **TNFa**: mediates haematopoiesis, immune surveillance, tumour regression and protection from infection
37 |     - **Trail**: induces apoptosis
38 |     - **VEGF**: mediates angiogenesis, vascular permeability, and cell migration
39 |     - **WNT**: regulates organ morphogenesis during development and tissue repair
40 | 
41 |     Parameters
42 |     ----------
43 |     %(organism)s
44 |     top
45 |         Number of genes per pathway to return. By default all of them.
46 |     thr_padj
47 |         Significance threshold to trim interactions.
48 |     %(license)s
49 |     %(verbose)s
50 | 
51 |     Returns
52 |     -------
53 |     Dataframe in long format containing target genes for each pathway with their associated weights and p-values.
54 |     """
55 |     # Validate
56 |     assert isinstance(top, (int, float)) and top > 0, \
57 |     'top must be numeric and > 0'
58 |     assert isinstance(thr_padj, (int, float)) and 0. <= thr_padj <= 1., \
59 |     'thr_padj must be numeric and between 0 and 1'
60 |     # Download
61 |     p = resource(name='PROGENy', organism=organism, license=license, verbose=verbose)
62 |     p = (
63 |         p
64 |         .sort_values('p_value')
65 |         .groupby('pathway')
66 |         .head(top)
67 |         .sort_values(['pathway', 'p_value'])
68 |         .reset_index(drop=True)
69 |     )
70 |     p = p.rename(columns={'pathway': 'source', 'genesymbol': 'target', 'p_value': 'padj'})
71 |     p = p[p['padj'] < thr_padj]
72 |     p = p[['source', 'target', 'weight', 'padj']]
73 |     m = f'progeny - filtered interactions for padj < {thr_padj}'
74 |     _log(m, level='info', verbose=verbose)
75 |     p = p.drop_duplicates(['source', 'target']).reset_index(drop=True)
76 |     return p
77 | 


--------------------------------------------------------------------------------
/src/decoupler/op/_resource.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import requests
 4 | import pandas as pd
 5 | 
 6 | from decoupler._docs import docs
 7 | from decoupler._log import _log
 8 | from decoupler._download import URL_DBS, _download
 9 | from decoupler.op._translate import translate
10 | from decoupler.op._dtype import _infer_dtypes
11 | 
12 | 
13 | def show_resources(
14 | ) -> pd.DataFrame:
15 |     """
16 |     Shows available resources in Omnipath :cite:p:`omnipath`.
17 |     For more information visit the official
18 |     [website](https://omnipathdb.org/).
19 | 
20 |     Returns
21 |     -------
22 |     List of available resources to query with `decoupler.op.resource`.
23 |     """
24 |     ann = pd.read_csv('https://omnipathdb.org/queries/annotations', sep='\t')
25 |     ann = ann.set_index('argument').loc['databases'].str.split(';')['values']
26 |     url = 'https://omnipathdb.org/resources'
27 |     response = requests.get(url)
28 |     lcs = response.json()
29 |     df = pd.DataFrame(ann, columns=['name'])
30 |     df['license'] = [lcs[a]['license']['purpose'] if a in lcs else None for a in ann]
31 |     return df
32 | 
33 | 
34 | @docs.dedent
35 | def resource(
36 |     name: str,
37 |     organism: str = 'human',
38 |     license: str = 'academic',
39 |     verbose: bool = False,
40 | ):
41 |     """
42 |     Wrapper to access resources inside Omnipath :cite:p:`omnipath`.
43 | 
44 |     This wrapper allows to easly query different prior knowledge resources. To
45 |     check available resources run ``decoupler.op.show_resources()``. For more
46 |     information visit the official [website](https://omnipathdb.org/).
47 | 
48 |     Parameters
49 |     ----------
50 |     name:
51 |         Name of the resource to query.
52 |     %(organism)s
53 |     %(license)s
54 |     %(verbose)s
55 |     kwargs
56 |         Passed to ``decoupler.op.translate``.
57 | 
58 |     Returns
59 |     -------
60 |     Network in long format.
61 |     """
62 |     # Validate
63 |     assert isinstance(name, str), 'name must be str'
64 |     names = set(show_resources()['name'])
65 |     assert name in names, f'name must be one of these: {names}'
66 |     assert isinstance(organism, str), 'organism must be str'
67 |     assert isinstance(license, str) and license in ['academic', 'commercial', 'nonprofit'], \
68 |     'license must be academic, commercial or nonprofit'
69 |     assert isinstance(verbose, bool), 'verbose must be bool'
70 |     m = f'Accessing {name} with {license} license'
71 |     _log(m, level='info', verbose=verbose)
72 |     # Download
73 |     url = URL_DBS + f'{name}&license={license}'
74 |     df = _download(url, sep='\t', verbose=verbose)
75 |     # Process
76 |     labels = df['label'].unique()
77 |     for label in labels:
78 |         if label in df.columns:
79 |             df.loc[df['label'] == label, 'label'] = f'_{label}'
80 |     df = df[['genesymbol', 'label', 'value', 'record_id']]
81 |     df = df.pivot(index=["genesymbol", "record_id"], columns="label", values="value").reset_index()
82 |     df.index.name = ''
83 |     df.columns.name = ''
84 |     cols_to_remove = ['record_id', 'entity_type', '_entity_type']
85 |     df = df.drop(columns=[c for c in cols_to_remove if c in df.columns])
86 |     df = _infer_dtypes(df)
87 |     if organism != 'human':
88 |         df = translate(df, columns='genesymbol', target_organism=organism, verbose=verbose)
89 |     return df
90 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/__init__.py:
--------------------------------------------------------------------------------
 1 | from decoupler.pl._barplot import barplot
 2 | from decoupler.pl._dotplot import dotplot
 3 | from decoupler.pl._filter_by_expr import filter_by_expr
 4 | from decoupler.pl._filter_by_prop import filter_by_prop
 5 | from decoupler.pl._leading_edge import leading_edge
 6 | from decoupler.pl._network import network
 7 | from decoupler.pl._obsbar import obsbar
 8 | from decoupler.pl._order_targets import order_targets
 9 | from decoupler.pl._order import order
10 | from decoupler.pl._obsm import obsm
11 | from decoupler.pl._filter_samples import filter_samples
12 | from decoupler.pl._source_targets import source_targets
13 | from decoupler.pl._volcano import volcano
14 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/_barplot.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import matplotlib
  6 | import matplotlib.pyplot as plt
  7 | from matplotlib.figure import Figure
  8 | import seaborn as sns
  9 | 
 10 | from decoupler._docs import docs
 11 | from decoupler._Plotter import Plotter
 12 | 
 13 | 
 14 | def _set_limits(
 15 |     vmin: int | float,
 16 |     vcenter: int | float,
 17 |     vmax: int | float,
 18 |     values: np.ndarray
 19 | ) -> Tuple[float, float, float]:
 20 |     assert np.isfinite(values).all(), 'values in data mut be finite'
 21 |     assert isinstance(vmin, (int, float)) or vmin is None, 'vmin must be numerical or None'
 22 |     assert isinstance(vcenter, (int, float)) or vcenter is None, 'vcenter must be numerical or None'
 23 |     assert isinstance(vmax, (int, float)) or vmax is None, 'vmax must be numerical or None'
 24 |     if vmin is None:
 25 |         vmin = values.min()
 26 |     if vmax is None:
 27 |         vmax = values.max()
 28 |     if vcenter is None:
 29 |         vcenter = values.mean()
 30 |     if vmin >= vcenter:
 31 |         vmin = -vmax
 32 |     if vcenter >= vmax:
 33 |         vmax = -vmin
 34 |     return vmin, vcenter, vmax
 35 | 
 36 | 
 37 | @docs.dedent
 38 | def barplot(
 39 |     data: pd.DataFrame,
 40 |     name: str,
 41 |     top: int = 25,
 42 |     vertical: bool = False,
 43 |     cmap: str = 'RdBu_r',
 44 |     vmin: float | None = None,
 45 |     vcenter: float | None = 0,
 46 |     vmax: float | None = None,
 47 |     **kwargs,
 48 | ) -> None | Figure:
 49 |     """
 50 |     Plot barplots showing top scores.
 51 | 
 52 |     Parameters
 53 |     ----------
 54 |     data
 55 |         DataFrame in wide format containing enrichment scores (contrasts, sources).
 56 |     name
 57 |         Name of the contrast (row) to plot.
 58 |     %(top)s
 59 |     vertical
 60 |         Whether to plot the bars verticaly or horizontaly.
 61 |     %(cmap)s
 62 |     %(vmin)s
 63 |     %(vcenter)s
 64 |     %(vmax)s
 65 |     %(plot)s
 66 |     """
 67 |     # Validate
 68 |     assert isinstance(data, pd.DataFrame), 'data must be pandas.DataFrame'
 69 |     assert isinstance(name, str) and name in data.index, \
 70 |     'name must be str and in data.index'
 71 |     assert isinstance(top, int) and top > 0, 'top must be int and > 0'
 72 |     assert isinstance(vertical, bool), 'vertical must be bool'
 73 |     # Process df
 74 |     df = data.loc[[name]]
 75 |     df.index.name = None
 76 |     df.columns.name = None
 77 |     df = df.melt(var_name='source', value_name='score')
 78 |     df['abs_score'] = df['score'].abs()
 79 |     df = df.sort_values('abs_score', ascending=False)
 80 |     df = df.head(top).sort_values('score', ascending=False)
 81 |     if not vertical:
 82 |         x, y = 'score', 'source'
 83 |     else:
 84 |         x, y = 'source', 'score'
 85 |     # Instance
 86 |     bp = Plotter(**kwargs)
 87 |     # Plot
 88 |     sns.barplot(data=df, x=x, y=y, ax=bp.ax)
 89 |     if not vertical:
 90 |         sizes = np.array([bar.get_width() for bar in bp.ax.containers[0]])
 91 |         bp.ax.set_xlabel('Score')
 92 |         bp.ax.set_ylabel('')
 93 |     else:
 94 |         sizes = np.array([bar.get_height() for bar in bp.ax.containers[0]])
 95 |         bp.ax.tick_params(axis='x', rotation=90)
 96 |         bp.ax.set_ylabel('Score')
 97 |         bp.ax.set_xlabel('')
 98 |         bp.ax.invert_xaxis()
 99 |     # Compute color limits
100 |     vmin, vcenter, vmax = _set_limits(vmin, vcenter, vmax, df['score'])
101 |     # Rescale cmap
102 |     divnorm = matplotlib.colors.TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
103 |     cmap_f = plt.get_cmap(cmap)
104 |     div_colors = cmap_f(divnorm(sizes))
105 |     for bar, color in zip(bp.ax.containers[0], div_colors):
106 |         bar.set_facecolor(color)
107 |     # Add legend
108 |     sm = plt.cm.ScalarMappable(cmap=cmap, norm=divnorm)
109 |     sm.set_array([])
110 |     bp.fig.colorbar(sm, ax=bp.ax, shrink=0.5)
111 |     return bp._return()
112 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/_dotplot.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from matplotlib.figure import Figure
  5 | from matplotlib.colors import TwoSlopeNorm
  6 | 
  7 | from decoupler._docs import docs
  8 | from decoupler._Plotter import Plotter
  9 | 
 10 | 
 11 | @docs.dedent
 12 | def dotplot(
 13 |     df: pd.DataFrame,
 14 |     x: str,
 15 |     y: str,
 16 |     c: str,
 17 |     s: str,
 18 |     top: int | float = 10,
 19 |     scale: int | float = 0.15,
 20 |     cmap: str = 'RdBu_r',
 21 |     vcenter: int | float | None = None,
 22 |     **kwargs
 23 | ) -> None | Figure:
 24 |     """
 25 |     Plot results of enrichment analysis as dots.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     df
 30 |         DataFrame containing enrichment results.
 31 |     x
 32 |         Name of the column containing values to place on the x-axis.
 33 |     y
 34 |         Name of the column containing values to place on the y-axis.
 35 |     c
 36 |         Name of the column containing values to use for coloring.
 37 |     s
 38 |         Name of the column containing values to use for setting the size of the dots.
 39 |     %(top)s
 40 |     scale
 41 |         Scale of the dots.
 42 |     %(cmap)s
 43 |     %(vcenter)s
 44 |     %(plot)s
 45 |     """
 46 |     # Validate
 47 |     assert isinstance(df, pd.DataFrame), 'df must be a pd.DataFrame'
 48 |     assert isinstance(x, str) and x in df.columns, 'x must be str and in df.columns'
 49 |     assert isinstance(y, str) and y in df.columns, 'y must be str and in df.columns'
 50 |     assert isinstance(c, str) and c in df.columns, 'c must be str and in df.columns'
 51 |     assert isinstance(s, str) and s in df.columns, 's must be str and in df.columns'
 52 |     assert isinstance(top, (int, float)) and top > 0, 'top must be numerical and > 0'
 53 |     assert isinstance(scale, (int, float)), 'scale must be numerical'
 54 |     assert isinstance(vcenter, (int, float)) or vcenter is None, 'vcenter must be numeric or None'
 55 |     # Filter by top
 56 |     df = df.copy()
 57 |     df['abs_x_col'] = df[x].abs()
 58 |     df = df.sort_values('abs_x_col', ascending=False).head(top)
 59 |     # Extract from df
 60 |     x_vals = df[x].values
 61 |     y_vals = df[y].values
 62 |     c_vals = df[c].values
 63 |     s_vals = df[s].values
 64 |     # Sort by x
 65 |     idxs = np.argsort(x_vals)
 66 |     x_vals = x_vals[idxs]
 67 |     y_vals = y_vals[idxs]
 68 |     c_vals = c_vals[idxs]
 69 |     s_vals = s_vals[idxs]
 70 |     # Instance
 71 |     bp = Plotter(**kwargs)
 72 |     # Plot
 73 |     ns = (s_vals * scale * plt.rcParams["lines.markersize"]) ** 2
 74 |     bp.ax.grid(axis='x')
 75 |     if vcenter:
 76 |         norm = TwoSlopeNorm(vmin=None, vcenter=vcenter, vmax=None)
 77 |     else:
 78 |         norm = None
 79 |     scatter = bp.ax.scatter(
 80 |         x=x_vals,
 81 |         y=y_vals,
 82 |         c=c_vals,
 83 |         s=ns,
 84 |         cmap=cmap,
 85 |         norm=norm,
 86 |     )
 87 |     bp.ax.set_axisbelow(True)
 88 |     bp.ax.set_xlabel(x)
 89 |     # Add legend
 90 |     handles, labels = scatter.legend_elements(
 91 |         prop="sizes",
 92 |         num=3,
 93 |         fmt="{x:.2f}",
 94 |         func=lambda s: np.sqrt(s) / plt.rcParams["lines.markersize"] / scale
 95 |     )
 96 |     bp.ax.legend(
 97 |         handles,
 98 |         labels,
 99 |         title=s,
100 |         frameon=False,
101 |         loc='lower left',
102 |         bbox_to_anchor=(1.05, 0.5),
103 |         alignment='left',
104 |         labelspacing=1.
105 |     )
106 |     # Add colorbar
107 |     clb = bp.fig.colorbar(
108 |         scatter,
109 |         ax=bp.ax,
110 |         shrink=0.25,
111 |         aspect=5,
112 |         orientation='vertical',
113 |         anchor=(0., 0.),
114 |     )
115 |     clb.ax.set_title(c, loc="left",)
116 |     bp.ax.margins(x=0.25, y=0.1)
117 |     return bp._return()
118 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/_filter_by_expr.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from matplotlib.figure import Figure
 3 | import seaborn as sns
 4 | from anndata import AnnData
 5 | 
 6 | from decoupler._docs import docs
 7 | from decoupler._Plotter import Plotter
 8 | from decoupler.pp.data import extract
 9 | from decoupler.pp.anndata import _min_sample_size, _ssize_tcount
10 | 
11 | 
12 | @docs.dedent
13 | def filter_by_expr(
14 |     adata: AnnData,
15 |     group: str | None = None,
16 |     lib_size: float | None = None,
17 |     min_count: int = 10,
18 |     min_total_count: int = 15,
19 |     large_n: int = 10,
20 |     min_prop: float = 0.7,
21 |     cmap: str = 'viridis',
22 |     **kwargs,
23 | ) -> None | Figure:
24 |     """
25 |     Plot to help determining the thresholds of the ``decoupler.pp.filter_by_expr`` function.
26 | 
27 |     Parameters
28 |     ----------
29 |     %(adata)s
30 |     %(cmap)s
31 |     %(group)s
32 |     %(lib_size)s
33 |     %(min_count)s
34 |     %(min_total_count)s
35 |     %(large_n)s
36 |     %(min_prop_expr)s
37 |     %(plot)s
38 |     """
39 |     assert isinstance(adata, AnnData), 'adata must be AnnData'
40 |     # Extract inputs
41 |     X, _, _ = extract(adata, empty=False)
42 |     obs = adata.obs
43 |     # Minimum sample size cutoff
44 |     min_sample_size = _min_sample_size(
45 |         obs=obs,
46 |         group=group,
47 |         large_n=large_n,
48 |         min_prop=min_prop,
49 |     )
50 |     # Compute sample size and total count
51 |     sample_size, total_count = _ssize_tcount(
52 |         X=X,
53 |         lib_size=lib_size,
54 |         min_count=min_count,
55 |     )
56 |     # Total counts
57 |     total_count[total_count < 1.] = np.nan  # Handle 0s
58 |     # Instance
59 |     bp = Plotter(**kwargs)
60 |     # Plot
61 |     sns.histplot(
62 |         x=np.log10(total_count),
63 |         y=sample_size,
64 |         cmap=cmap,
65 |         cbar=True,
66 |         cbar_kws=dict(shrink=.75, label='Number of genes'),
67 |         discrete=(False, True),
68 |         ax=bp.ax,
69 |     )
70 |     bp.ax.axhline(y=min_sample_size - 0.5, c='gray', ls='--')
71 |     bp.ax.axvline(x=np.log10(min_total_count), c='gray', ls='--')
72 |     bp.ax.set_xlabel(r'$\log_{10}$ total sum of counts')
73 |     bp.ax.set_ylabel('Number of samples')
74 |     return bp._return()
75 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/_filter_by_prop.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from anndata import AnnData
 4 | from matplotlib.figure import Figure
 5 | 
 6 | from decoupler._docs import docs
 7 | from decoupler._Plotter import Plotter
 8 | 
 9 | 
10 | @docs.dedent
11 | def filter_by_prop(
12 |     adata: AnnData,
13 |     min_prop: float = 0.1,
14 |     min_smpls: int = 2,
15 |     log: bool = True,
16 |     color = 'gray',
17 |     **kwargs
18 | ) -> None | Figure:
19 |     """
20 |     Plot to help determining the thresholds of the ``decoupler.pp.filter_by_prop`` function.
21 | 
22 |     Parameters
23 |     ----------
24 |     %(adata)s
25 |     %(min_prop_prop)s
26 |     %(min_smpls)s
27 |     log
28 |         Whether to log-scale the y axis.
29 |     color
30 |         Color to use in ``matplotlib.pyplot.hist``.
31 |     %(plot)s
32 |     """
33 |     assert isinstance(adata, AnnData), 'adata must be AnnData'
34 |     assert 'psbulk_props' in adata.layers.keys(), \
35 |     'psbulk_props must be in adata.layers, use this function afer running decoupler.pp.pseudobulk'
36 |     props = adata.layers['psbulk_props']
37 |     if isinstance(props, pd.DataFrame):
38 |         props = props.values
39 |     nsmpls = np.sum(props >= min_prop, axis=0)
40 |     # Instance
41 |     bp = Plotter(**kwargs)
42 |     # Plot
43 |     _ = bp.ax.hist(
44 |         nsmpls,
45 |         bins=range(min(nsmpls), max(nsmpls) + 2),
46 |         log=log,
47 |         color=color,
48 |         align='left',
49 |         rwidth=0.95,
50 |     )
51 |     bp.ax.axvline(x=min_smpls - 0.5, c='black', ls='--')
52 |     bp.ax.set_xlabel('Samples (≥ min_prop)')
53 |     bp.ax.set_ylabel('Number of genes')
54 |     return bp._return()
55 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/_filter_samples.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from matplotlib.figure import Figure
 5 | import seaborn as sns
 6 | from anndata import AnnData
 7 | 
 8 | from decoupler._docs import docs
 9 | from decoupler._Plotter import Plotter
10 | 
11 | 
12 | @docs.dedent
13 | def filter_samples(
14 |     adata: AnnData,
15 |     groupby: str | list,
16 |     log: bool = True,
17 |     min_cells: int | float = 10,
18 |     min_counts: int | float = 1000,
19 |     **kwargs
20 | ) -> None | Figure:
21 |     """
22 |     Plot to assess the quality of the obtained pseudobulk samples from ``decoupler.pp.pseudobulk``.
23 | 
24 |     Parameters
25 |     ----------
26 |     %(adata)s
27 |     groupby
28 |         Name or nomes of the ``adata.obs`` column/s to group by.
29 |     log
30 |         If set, log10 transform the ``psbulk_n_cells`` and ``psbulk_counts`` columns during visualization.
31 |     %(min_cells)s
32 |     %(min_counts)s
33 |     %(plot)s
34 |     """
35 |     # Validate
36 |     assert isinstance(adata, AnnData), 'adata must be AnnData'
37 |     assert isinstance(adata.obs, pd.DataFrame) and adata.obs is not None, \
38 |     f'adata.obs must be a pd.DataFrame not {type(adata.obs)}'
39 |     assert all(col in adata.obs.columns for col in ['psbulk_cells', 'psbulk_counts']), \
40 |     'psbulk_* columns not present in adata.obs, this function should be used after running decoupler.pp.pseudobulk'
41 |     assert isinstance(groupby, (str, list)), 'groupby must be str or list'
42 |     if isinstance(groupby, str):
43 |         groupby = [groupby]
44 |     assert all(col in adata.obs for col in groupby), 'columns in groupby must be in adata.obs'
45 |     # Extract obs
46 |     df = adata.obs.copy()
47 |     # Transform to log10
48 |     label_x, label_y = 'cells', 'counts'
49 |     if log:
50 |         df['psbulk_cells'] = np.log10(df['psbulk_cells'] + 1)
51 |         df['psbulk_counts'] = np.log10(df['psbulk_counts'] + 1)
52 |         label_x, label_y = r'$\log_{10}$ ' + label_x, r'$\log_{10}$ ' + label_y
53 |         min_cells, min_counts = np.log10(min_cells), np.log10(min_counts)
54 |     # Plot
55 |     if len(groupby) > 1:
56 |         # Instance
57 |         assert kwargs.get('ax') is None, 'when groupby is list, ax must be None'
58 |         kwargs['ax'] = None
59 |         bp = Plotter(**kwargs)
60 |         bp.fig.delaxes(bp.ax)
61 |         plt.close(bp.fig)
62 |         bp.fig, axes = plt.subplots(len(groupby), 1, figsize=bp.figsize, dpi=bp.dpi, tight_layout=True)
63 |         axes = axes.ravel()
64 |         for ax, grp in zip(axes, groupby):
65 |             ax.grid(zorder=0)
66 |             ax.set_axisbelow(True)
67 |             sns.scatterplot(x='psbulk_cells', y='psbulk_counts', hue=grp, ax=ax, data=df, zorder=1)
68 |             ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=grp)
69 |             ax.set_xlabel(label_x)
70 |             ax.set_ylabel(label_y)
71 |             ax.axvline(x=min_cells, linestyle='--', color="black")
72 |             ax.axhline(y=min_counts, linestyle='--', color="black")
73 |     else:
74 |         # Instance
75 |         groupby = groupby[0]
76 |         bp = Plotter(**kwargs)
77 |         bp.ax.grid(zorder=0)
78 |         bp.ax.set_axisbelow(True)
79 |         sns.scatterplot(x='psbulk_cells', y='psbulk_counts', hue=groupby, ax=bp.ax, data=df, zorder=1)
80 |         bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=groupby)
81 |         bp.ax.set_xlabel(label_x)
82 |         bp.ax.set_ylabel(label_y)
83 |         bp.ax.axvline(x=min_cells, linestyle='--', color="black")
84 |         bp.ax.axhline(y=min_counts, linestyle='--', color="black")
85 |     return bp._return()
86 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/_obsbar.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | import seaborn as sns
 4 | from anndata import AnnData
 5 | 
 6 | from decoupler._docs import docs
 7 | from decoupler._Plotter import Plotter
 8 | 
 9 | 
10 | @docs.dedent
11 | def obsbar(
12 |     adata: AnnData,
13 |     y: str,
14 |     hue: str | None = None,
15 |     kw_barplot: dict = dict(),
16 |     **kwargs
17 | ) -> None | Figure:
18 |     """
19 |     Plot ``adata.obs`` metadata as a grouped barplot.
20 | 
21 |     Parameters
22 |     ----------
23 |     %(adata)s
24 |     y
25 |         Column name in ``adata.obs`` to plot in y axis.
26 |     hue
27 |         Column name in ``adata.obs`` to color bars.
28 |     kw_barplot
29 |         Keyword arguments passed to ``seaborn.barplot``.
30 |     %(plot)s
31 |     """
32 |     # Validate
33 |     assert isinstance(adata, AnnData), 'adata must be an AnnData instance'
34 |     assert isinstance(y, str), 'y must be str'
35 |     assert isinstance(hue, str) or hue is None, 'hue must be str or None'
36 |     cols = {y, hue}
37 |     if hue is None:
38 |         cols.remove(None)
39 |     assert cols.issubset(adata.obs.columns), \
40 |     f'y={y} and hue={hue} must be in adata.obs.columns={adata.obs.columns}'
41 |     cols = list(cols)
42 |     # Process
43 |     data = (
44 |         adata.obs
45 |         .groupby(cols, observed=True, as_index=False)
46 |         .size()
47 |     )
48 |     # Instance
49 |     bp = Plotter(**kwargs)
50 |     # Plot
51 |     sns.barplot(
52 |         data=data,
53 |         y=y,
54 |         x='size',
55 |         hue=hue,
56 |         ax=bp.ax,
57 |         **kw_barplot
58 |     )
59 |     if hue is not None and y != hue:
60 |         bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue)
61 |     return bp._return()
62 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/_order.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from matplotlib.colors import to_rgb
 4 | from matplotlib.figure import Figure
 5 | import seaborn as sns
 6 | 
 7 | from decoupler._docs import docs
 8 | from decoupler._Plotter import Plotter
 9 | 
10 | 
11 | @docs.dedent
12 | def order(
13 |     df: pd.DataFrame,
14 |     mode: str = 'line',
15 |     kw_order = dict(),
16 |     **kwargs
17 | ) -> None | Figure:
18 |     """
19 |     Plot features along a continuous, ordered process such as pseudotime.
20 | 
21 |     Parameters
22 |     ----------
23 |     df
24 |         Results of ``decoupler.pp.bin_order``.
25 |     mode
26 |         The type of plot to use, either "line" or "mat".
27 |     kw_order
28 |         Other keyword arguments are passed down to ``seaborn.lineplot`` or ``matplotlib.pyplot.imshow``,
29 |         depending on ``mode`` used.
30 |     %(plot)s
31 |     """
32 |     # Validate
33 |     assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame'
34 |     assert isinstance(mode, str) and mode in ['line', 'mat'], \
35 |     'mode must be str and either "line" or "mat"'
36 |     assert isinstance(kw_order, dict), \
37 |     'kw_order must be dict'
38 |     # Process
39 |     ymax = df['value'].max()
40 |     xmin, xmax = df['order'].min(), df['order'].max()
41 |     n_names = df['name'].unique().size
42 |     # Add cbar if added
43 |     has_cbar = False
44 |     if np.isin(['label', 'color'], df.columns).all():
45 |         colors = df[df['name'] == df.loc[0, 'name']]['color']
46 |         colors = [[to_rgb(c) for c in colors]]
47 |         has_cbar = True
48 |     # Instance
49 |     bp = Plotter(**kwargs)
50 |     # Plot
51 |     if mode == 'line':
52 |         if has_cbar:
53 |             bp.ax.imshow(
54 |                 colors,
55 |                 aspect='auto',
56 |                 extent=[xmin, xmax, 1.05 * ymax, 1.2 * ymax],
57 |                 transform=bp.ax.transData,
58 |                 zorder=2
59 |             )
60 |             bp.ax.axhline(y=1.05 * ymax, c='black', lw=1)
61 |         kw_order = kw_order.copy()
62 |         kw_order.setdefault('palette', 'tab20')
63 |         sns.lineplot(
64 |             data=df,
65 |             x='order',
66 |             y='value',
67 |             hue='name',
68 |             ax=bp.ax,
69 |             **kw_order
70 |         )
71 |         bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False)
72 |     elif mode == 'mat':
73 |         mat = (
74 |             df
75 |             .groupby(['name', 'order'], as_index=False)['value'].mean()
76 |             .pivot(index='name', columns='order', values='value')
77 |         )
78 |         img = bp.ax.imshow(mat, extent=[xmin, xmax, 0, n_names], aspect='auto', **kw_order)
79 |         if has_cbar:
80 |             bp.ax.imshow(colors, aspect='auto', extent=[xmin, xmax, n_names, 1.1 * n_names], zorder=2)
81 |             bp.ax.axhline(y=n_names, c='black', lw=1)
82 |             bp.ax.set_ylim(0, 1.1 * n_names)
83 |         bp.fig.colorbar(img, ax=bp.ax, shrink=0.5, label='Mean value', location='top')
84 |         bp.ax.set_yticks(np.arange(n_names) + 0.5)
85 |         bp.ax.set_yticklabels(np.flip(mat.index))
86 |         bp.ax.grid(axis='y', visible=False)
87 |         bp.ax.set_xlabel('order')
88 |     bp.ax.set_xlim(xmin, xmax)
89 |     return bp._return()
90 | 


--------------------------------------------------------------------------------
/src/decoupler/pl/_source_targets.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from matplotlib.figure import Figure
  4 | import adjustText as at
  5 | 
  6 | from decoupler._docs import docs
  7 | from decoupler._Plotter import Plotter
  8 | from decoupler.pp.net import _validate_net
  9 | 
 10 | 
 11 | @docs.dedent
 12 | def source_targets(
 13 |     data: pd.DataFrame,
 14 |     net: pd.DataFrame,
 15 |     x: str,
 16 |     y: str,
 17 |     name: str,
 18 |     top: int = 5,
 19 |     thr_x: float = 0.,
 20 |     thr_y: float = 0.,
 21 |     max_x: float | None = None,
 22 |     max_y: float | None = None,
 23 |     color_pos: str = '#D62728',
 24 |     color_neg: str = '#1F77B4',
 25 |     **kwargs,
 26 | ) -> None | Figure:
 27 |     """
 28 |     Plots target features of a given source as a scatter plot.
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     %(data_plot)s
 33 |     %(net)s
 34 |     x
 35 |         Name of the column containing values to place on the x-axis.
 36 |     y
 37 |         Name of the column containing values to place on the y-axis.
 38 |     name
 39 |         Name of the source to plot.
 40 |     top
 41 |         Number of top features based on the product of x and y to label.
 42 |     thr_x
 43 |         Value were to place a baseline for the x-axis.
 44 |     thr_y
 45 |         Value were to place a baseline for the y-axis.
 46 |     max_x
 47 |         Maximum value to plot on x-axis.
 48 |     max_y
 49 |         Maximum value to plot on y-axis.
 50 |     color_pos
 51 |         Color to plot positively associated features.
 52 |     color_neg
 53 |         Color to plot negatively associated features.
 54 |     %(plot)s
 55 |     """
 56 |     # Validate inputs
 57 |     m = f'data must be a pd.DataFrame containing the columns {x} and {y}'
 58 |     assert isinstance(data, pd.DataFrame), m
 59 |     assert {x, y}.issubset(data.columns.union(net.columns)), m
 60 |     assert not pd.api.types.is_numeric_dtype(data.index), 'data index must be features in net'
 61 |     assert isinstance(net, pd.DataFrame), \
 62 |     f'net must be a pd.DataFrame containing the columns {x} and {y}'
 63 |     assert isinstance(name, str), 'name must be a str'
 64 |     assert isinstance(top, int) and top > 0, 'top must be int and > 0'
 65 |     assert isinstance(thr_x, (int, float)), 'thr_x must be numeric'
 66 |     assert isinstance(thr_y, (int, float)), 'thr_y must be numeric'
 67 |     if max_x is None:
 68 |         max_x = np.inf
 69 |     if max_y is None:
 70 |         max_y = np.inf
 71 |     assert isinstance(max_x, (int, float)) and max_x > 0, \
 72 |     'max_x must be None, or numeric and > 0'
 73 |     assert isinstance(max_y, (int, float)) and max_y > 0, \
 74 |     'max_y must be None, or numeric and > 0'
 75 |     assert isinstance(color_pos, str), 'color_pos must be str'
 76 |     assert isinstance(color_neg, str), 'color_neg must be str'
 77 |     # Instance
 78 |     bp = Plotter(**kwargs)
 79 |     # Extract df
 80 |     df = data.copy().reset_index(names='target')
 81 |     # Filter by net shared targets
 82 |     vnet = _validate_net(net)
 83 |     snet = vnet[vnet['source'] == name]
 84 |     assert snet.shape[0] > 0, f'name={name} must be in net["source"]'
 85 |     df = pd.merge(df, snet, on=['target'], how='inner').set_index('target')
 86 |     # Filter by limits
 87 |     msk_x = np.abs(df[x]) < np.abs(max_x)
 88 |     msk_y = np.abs(df[y]) < np.abs(max_y)
 89 |     df = df.loc[msk_x & msk_y]
 90 |     # Define +/- color
 91 |     pos = ((df[x] >= 0) & (df[y] >= 0)) | ((df[x] < 0) & (df[y] < 0))
 92 |     df['color'] = color_neg
 93 |     df.loc[pos, 'color'] = color_pos
 94 |     # Plot
 95 |     df.plot.scatter(x=x, y=y, c='color', ax=bp.ax)
 96 |     # Draw thr lines
 97 |     bp.ax.axvline(x=thr_x, linestyle='--', color="black")
 98 |     bp.ax.axhline(y=thr_y, linestyle='--', color="black")
 99 |     # Add labels
100 |     bp.ax.set_title(name)
101 |     bp.ax.set_xlabel(x)
102 |     bp.ax.set_ylabel(y)
103 |     # Show top features
104 |     df['order'] = df[x].abs() * df[y].abs()
105 |     signs = df.sort_values('order', ascending=False)
106 |     signs = signs.iloc[:top]
107 |     texts = []
108 |     for x, y, s in zip(signs[x], signs[y], signs.index):
109 |         texts.append(bp.ax.text(x, y, s))
110 |     if len(texts) > 0:
111 |         at.adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'), ax=bp.ax)
112 |     return bp._return()
113 | 


--------------------------------------------------------------------------------
/src/decoupler/pp/__init__.py:
--------------------------------------------------------------------------------
1 | from .net import read_gmt, prune, adjmat, idxmat, shuffle_net, net_corr
2 | from .data import extract
3 | from .anndata import get_obsm, swap_layer, pseudobulk, filter_samples, \
4 | filter_by_expr, filter_by_prop, knn, bin_order
5 | 


--------------------------------------------------------------------------------
/src/decoupler/tl/__init__.py:
--------------------------------------------------------------------------------
1 | from decoupler.tl._rankby_group import rankby_group
2 | from decoupler.tl._rankby_obsm import rankby_obsm
3 | from decoupler.tl._rankby_order import rankby_order
4 | 


--------------------------------------------------------------------------------
/src/decoupler/tl/_rankby_obsm.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import pandas as pd
  4 | import scipy.stats as sts
  5 | from anndata import AnnData
  6 | 
  7 | from decoupler._docs import docs
  8 | 
  9 | 
 10 | def _input_rank_obsm(
 11 |     adata: AnnData,
 12 |     key: str,
 13 | ) -> Tuple[pd.DataFrame, list, list]:
 14 |     # Validate
 15 |     assert isinstance(adata, AnnData), 'adata must be anndata.AnnData'
 16 |     assert key in adata.obsm, f'key={key} must be in adata.obsm'
 17 |     # Process
 18 |     name_col = (
 19 |         key
 20 |         .replace('X_', '')
 21 |         .replace('pca', 'PC')
 22 |         .replace('mofa', 'Factor')
 23 |         .replace('umap', 'UMAP')
 24 |     )
 25 |     df = adata.obsm[key]
 26 |     if isinstance(df, pd.DataFrame):
 27 |         y_vars = df.std(ddof=1, axis=0).sort_values(ascending=False).index
 28 |         df = df.loc[:, y_vars].values
 29 |     else:
 30 |         ncol = df.shape[1]
 31 |         digits = len(str(ncol))
 32 |         y_vars = [f"{name_col}{str(i).zfill(digits)}" for i in range(1, ncol + 1)]
 33 |     df = pd.DataFrame(
 34 |         data=df,
 35 |         index=adata.obs_names,
 36 |         columns=y_vars
 37 |     )
 38 |     x_vars = adata.obs.columns
 39 |     # Merge
 40 |     df = pd.merge(df, adata.obs, left_index=True, right_index=True)
 41 |     return df, x_vars, y_vars
 42 | 
 43 | 
 44 | @docs.dedent
 45 | def rankby_obsm(
 46 |     adata: AnnData,
 47 |     key: str,
 48 |     uns_key: str | None = 'rank_obsm',
 49 | ) -> None | pd.DataFrame:
 50 |     """
 51 |     Ranks features in ``adata.obsm`` by the significance of their association with metadata in ``adata.obs``.
 52 | 
 53 |     For categorical variables it uses ANOVA, for continous Spearman's correlation.
 54 | 
 55 |     The obtained p-values are corrected by Benjamini-Hochberg.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     %(adata)s
 60 |     %(key)s
 61 |     uns_key
 62 |         ``adata.uns`` key to store the results.
 63 | 
 64 |     Returns
 65 |     -------
 66 |     If ``uns_key=False``, a pandas.DataFrame with the resulting statistics.
 67 |     """
 68 |     assert isinstance(uns_key, str) or uns_key is None, \
 69 |     'uns_key must be str or None'
 70 |     # Extract
 71 |     df, x_vars, y_vars = _input_rank_obsm(adata=adata, key=key)
 72 |     # Test
 73 |     res = []
 74 |     for x_var in x_vars:
 75 |         for y_var in y_vars:
 76 |             if pd.api.types.is_numeric_dtype(df[x_var]):
 77 |                 # Correlation
 78 |                 x = df[x_var].values.ravel()
 79 |                 y = df[y_var].values.ravel()
 80 |                 stat, pval = sts.spearmanr(x, y)
 81 |             else:
 82 |                 # ANOVA
 83 |                 x = [group[y_var].dropna().values for _, group in df.groupby(x_var, observed=True)]
 84 |                 # At least n=2 per group else skip
 85 |                 if all(len(g) >= 2 for g in x):
 86 |                     stat, pval = sts.f_oneway(*x)
 87 |                 else:
 88 |                     stat, pval = None, 1.
 89 |             row = [y_var, x_var, stat, pval]
 90 |             res.append(row)
 91 |     res = pd.DataFrame(res, columns=['obsm', 'obs', 'stat', 'pval'])
 92 |     res['padj'] = sts.false_discovery_control(res['pval'])
 93 |     # Rank
 94 |     res = res.sort_values('padj').reset_index(drop=True)
 95 |     # Add obsm key
 96 |     res.key = key
 97 |     # Save or return
 98 |     if uns_key:
 99 |         adata.uns[uns_key] = res
100 |     else:
101 |         return res
102 | 


--------------------------------------------------------------------------------
/src/decoupler/tl/_rankby_order.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | from tqdm.auto import tqdm
 6 | import scipy.stats as sts
 7 | import scipy.sparse as sps
 8 | from anndata import AnnData
 9 | 
10 | from decoupler._odeps import dcor, _check_import
11 | from decoupler._docs import docs
12 | from decoupler.pp.data import extract
13 | 
14 | 
15 | 
16 | @docs.dedent
17 | def rankby_order(
18 |     adata: AnnData,
19 |     order: str,
20 |     stat: str = 'dcor',
21 |     verbose: bool = False,
22 |     **kwargs
23 | ) -> pd.DataFrame:
24 |     """
25 |     Rank features along a continuous, ordered process such as pseudotime.
26 | 
27 |     Parameters
28 |     ----------
29 |     %(adata)s
30 |     %(order)s
31 |     stat
32 |         Which statistic to compute.
33 |         Must be one of these:
34 |         
35 |         - ``dcor`` (distance correlation from ``dcor.independence.distance_correlation_t_test``)
36 |         - ``pearsonr`` (Pearson's R from ``scipy.stats.pearsonr``)
37 |         - ``spearmanr`` (Spearman's R from ``scipy.stats.spearmanr``)
38 |         - ``kendalltau`` (Kendall's Tau from ``scipy.stats.kendalltau``)
39 | 
40 |     %(verbose)s
41 |     kwargs
42 |         Key arguments passed to the selected ``stat`` function.
43 | 
44 |     Returns
45 |     -------
46 |     DataFrame with features associated with the ordering variable.
47 |     """
48 |     # Validate
49 |     assert isinstance(adata, AnnData), 'adata must be anndata.AnnData'
50 |     assert isinstance(order, str) and order in adata.obs.columns, 'order must be str and in adata.obs.columns'
51 |     stats = {'dcor', 'pearsonr', 'spearmanr', 'kendalltau'}
52 |     assert (isinstance(stat, str) and stat in stats) or callable(stat), \
53 |     f'stat must be str and one of these {stats}, or a function that returns statistic and pvalue'
54 |     # Get vars and ordinal variable
55 |     X = adata.X
56 |     if sps.issparse(X):
57 |         X = X.toarray()
58 |     X = X.astype(float)
59 |     y = adata.obs[order].values.astype(float)
60 |     # Init
61 |     df = pd.DataFrame()
62 |     df['name'] = adata.var_names
63 |     # Fit
64 |     if stat == 'dcor':
65 |         _check_import(dcor)
66 |         f = dcor.independence.distance_correlation_t_test
67 |     elif stat == 'pearsonr':
68 |         f = sts.pearsonr
69 |     elif stat == 'spearmanr':
70 |         f = sts.spearmanr
71 |     elif stat == 'kendalltau':
72 |         f = sts.kendalltau
73 |     else:
74 |         f = stat
75 |     ss = []
76 |     ps = []
77 |     for i in tqdm(range(X.shape[1]), disable=not verbose):
78 |         x = X[:, i]
79 |         if not np.all(x == x[0]):
80 |             res = f(x, y)
81 |             s = res.statistic
82 |             p = res.pvalue
83 |         else:
84 |             s = 0
85 |             p = 1
86 |         ss.append(s)
87 |         ps.append(p)
88 |     df['stat'] = ss
89 |     df['pval'] = ps
90 |     df['padj'] = sts.false_discovery_control(df['pval'])
91 |     df['abs_stat'] = df['stat'].abs()
92 |     df = df.sort_values(['padj', 'pval', 'abs_stat'], ascending=[True, True, False]).reset_index(drop=True)
93 |     df = df.drop(columns='abs_stat')
94 |     return df
95 | 


--------------------------------------------------------------------------------
/tests/bm/test_benchmark.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import scipy.sparse as sps
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'metrics,groupby,runby,sfilt,thr,emin,mnet',
10 |     [
11 |         ['auc', None, 'expr', False, 0.05, 5, False],
12 |         ['auc', None, 'expr', True, 0.05, 5, False],
13 |         [['auc'], None, 'expr', False, 0.05, 5, False],
14 |         [['auc', 'fscore'], 'group', 'expr', False, 0.05, 5, False],
15 |         [['auc', 'fscore', 'qrank'], None, 'source', False, 0.05, 2, False],
16 |         [['auc', 'fscore', 'qrank'], 'group', 'source', False, 0.05, 1, False],
17 |         [['auc', 'fscore', 'qrank'], 'bm_group', 'expr', True, 0.05, 5, False],
18 |         [['auc', 'fscore', 'qrank'], 'source', 'expr', True, 0.05, 5, False],
19 |     ]
20 | )
21 | def test_benchmark(
22 |     bdata,
23 |     net,
24 |     metrics,
25 |     groupby,
26 |     runby,
27 |     sfilt,
28 |     thr,
29 |     emin,
30 |     mnet,
31 |     rng,
32 | ):
33 |     dc.mt.ulm(data=bdata, net=net, tmin=0)
34 |     if mnet:
35 |         net = {'w_net': net, 'unw_net': net.drop(columns=['weight'])}
36 |         bdata = bdata.copy()
37 |         bdata.obs['source'] = rng.choice(['x', 'y', 'z'], size=bdata.n_obs, replace=True)
38 |         bdata.X = sps.csr_matrix(bdata.X)
39 |     df = dc.bm.benchmark(
40 |         adata=bdata,
41 |         net=net,
42 |         metrics=metrics,
43 |         groupby=groupby,
44 |         runby=runby,
45 |         sfilt=sfilt,
46 |         thr=thr,
47 |         emin=emin,
48 |         kws_decouple={
49 |             'cons': True,
50 |             'tmin': 3,
51 |             'methods': ['ulm', 'zscore', 'aucell']
52 |         },
53 |         verbose=True
54 |     )
55 |     assert isinstance(df, pd.DataFrame)
56 |     cols = {'method', 'metric', 'score'}
57 |     assert cols.issubset(df.columns)
58 |     hdf = dc.bm.metric.hmean(df, metrics=metrics)
59 |     assert isinstance(hdf, pd.DataFrame)
60 | 


--------------------------------------------------------------------------------
/tests/bm/test_pl.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib
 3 | matplotlib.use("Agg")
 4 | import matplotlib.pyplot as plt
 5 | from matplotlib.figure import Figure
 6 | import pytest
 7 | 
 8 | import decoupler as dc
 9 | 
10 | @pytest.fixture
11 | def df():
12 |     df = pd.DataFrame(
13 |         data=[
14 |             ['aucell', 'auroc', 0.45],
15 |             ['aucell', 'auprc', 0.55],
16 |             ['ulm', 'auroc', 0.9],
17 |             ['ulm', 'auprc', 0.8],
18 |             ['aucell', 'recall', 0.45],
19 |             ['aucell', 'precision', 0.55],
20 |             ['ulm', 'recall', 0.9],
21 |             ['ulm', 'precision', 0.8],
22 |             ['aucell', '1-qrank', 0.45],
23 |             ['aucell', '-log10(pval)', 0.9],
24 |             ['ulm', '1-qrank', 0.9],
25 |             ['ulm', '-log10(pval)', 5.6],
26 |         ],
27 |         columns = ['method', 'metric', 'score']
28 |     )
29 |     return df
30 | 
31 | 
32 | @pytest.fixture
33 | def hdf(
34 |     df,
35 | ):
36 |     hdf = dc.bm.metric.hmean(df)
37 |     return hdf
38 | 
39 | 
40 | def test_auc(
41 |     df,
42 | ):
43 |     fig = dc.bm.pl.auc(df=df, hue=None, return_fig=True)
44 |     assert isinstance(fig, Figure)
45 |     plt.close(fig)
46 |     fig = dc.bm.pl.auc(df=df, hue='method', return_fig=True)
47 |     assert isinstance(fig, Figure)
48 |     plt.close(fig)
49 | 
50 | 
51 | def test_fscore(
52 |     df,
53 | ):
54 |     fig = dc.bm.pl.fscore(df=df, hue=None, return_fig=True)
55 |     assert isinstance(fig, Figure)
56 |     plt.close(fig)
57 |     fig = dc.bm.pl.fscore(df=df, hue='method', return_fig=True)
58 |     assert isinstance(fig, Figure)
59 |     plt.close(fig)
60 | 
61 | 
62 | def test_qrank(
63 |     df,
64 | ):
65 |     fig = dc.bm.pl.qrank(df=df, hue=None, return_fig=True)
66 |     assert isinstance(fig, Figure)
67 |     plt.close(fig)
68 |     fig = dc.bm.pl.qrank(df=df, hue='method', return_fig=True)
69 |     assert isinstance(fig, Figure)
70 |     plt.close(fig)
71 | 
72 | 
73 | def test_bar(
74 |     hdf,
75 | ):
76 |     fig = dc.bm.pl.bar(df=hdf, x='H(auroc, auprc)', y='method', hue=None, return_fig=True)
77 |     assert isinstance(fig, Figure)
78 |     plt.close(fig)
79 |     fig = dc.bm.pl.bar(df=hdf, x='H(auroc, auprc)', y='method', hue='method', return_fig=True)
80 |     assert isinstance(fig, Figure)
81 |     plt.close(fig)
82 | 
83 | 
84 | def test_summary(
85 |     hdf,
86 | ):
87 |     fig = dc.bm.pl.summary(df=hdf, y='method', figsize=(6, 3), return_fig=True)
88 |     assert isinstance(fig, Figure)
89 |     plt.close(fig)
90 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | import scanpy as sc
  5 | 
  6 | import decoupler as dc
  7 | 
  8 | 
  9 | @pytest.fixture
 10 | def rng():
 11 |     rng = np.random.default_rng(seed=42)
 12 |     return rng
 13 | 
 14 | 
 15 | @pytest.fixture
 16 | def adata():
 17 |     adata, _ = dc.ds.toy(nobs=40, nvar=20, bval=2, seed=42, verbose=False)
 18 |     adata.layers['counts'] = adata.X.round()
 19 |     return adata
 20 | 
 21 | 
 22 | @pytest.fixture
 23 | def tdata():
 24 |     tdata, _ = dc.ds.toy(nobs=40, nvar=20, bval=2, seed=42, verbose=False, pstime=True)
 25 |     return tdata
 26 | 
 27 | 
 28 | @pytest.fixture
 29 | def tdata_obsm(
 30 |     tdata,
 31 |     net,
 32 |     rng,
 33 | ):
 34 |     sc.tl.pca(tdata)
 35 |     tdata.obsm['X_umap'] = tdata.obsm['X_pca'][:, :2] + rng.random(tdata.obsm['X_pca'][:, :2].shape)
 36 |     dc.mt.ulm(data=tdata, net=net, tmin=0)
 37 |     return tdata
 38 | 
 39 | 
 40 | @pytest.fixture
 41 | def pdata(
 42 |     adata,
 43 |     rng,
 44 | ):
 45 |     adata.X = adata.X.round() * (rng.random(adata.shape) > 0.75)
 46 |     return dc.pp.pseudobulk(adata=adata, sample_col='sample', groups_col='group')
 47 | 
 48 | 
 49 | @pytest.fixture
 50 | def bdata():
 51 |     adata, _ = dc.ds.toy_bench(nobs=100, nvar=20, bval=2, seed=42, verbose=False)
 52 |     adata.obs['bm_group'] = adata.obs.apply(lambda x: [x['sample'], x['group']], axis=1)
 53 |     return adata
 54 | 
 55 | 
 56 | @pytest.fixture
 57 | def deg():
 58 |     deg = pd.DataFrame(
 59 |         data = [
 60 |             [1, 0.5],
 61 |             [-2, 0.25],
 62 |             [3, 0.125],
 63 |             [-4, 0.05],
 64 |             [5, 0.025],
 65 |         ],
 66 |         columns=['stat', 'padj'],
 67 |         index=['G01', 'G02', 'G03', 'G04', 'G05']
 68 |     )
 69 |     return deg
 70 | 
 71 | 
 72 | @pytest.fixture
 73 | def net():
 74 |     _, net = dc.ds.toy(nobs=2, nvar=12, bval=2, seed=42, verbose=False)
 75 |     net = dc.pp.prune(features=net['target'].unique(), net=net, tmin=3)
 76 |     return net
 77 | 
 78 | 
 79 | @pytest.fixture
 80 | def unwnet(net):
 81 |     return net.drop(columns=['weight'], inplace=False)
 82 | 
 83 | 
 84 | @pytest.fixture
 85 | def mat(
 86 |     adata,
 87 | ):
 88 |     return dc.pp.extract(data=adata)
 89 | 
 90 | 
 91 | @pytest.fixture
 92 | def idxmat(
 93 |     mat,
 94 |     net,
 95 | ):
 96 |     X, obs, var = mat
 97 |     sources, cnct, starts, offsets = dc.pp.idxmat(features=var, net=net, verbose=False)
 98 |     return cnct, starts, offsets
 99 | 
100 | 
101 | @pytest.fixture
102 | def adjmat(
103 |     mat,
104 |     net,
105 | ):
106 |     X, obs, var = mat
107 |     sources, targets, adjmat = dc.pp.adjmat(features=var, net=net, verbose=False)
108 |     return adjmat
109 | 


--------------------------------------------------------------------------------
/tests/ds/test_bulk.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | import anndata as ad
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | def test_hsctgfb():
 9 |     adata = dc.ds.hsctgfb()
10 |     assert isinstance(adata, ad.AnnData)
11 |     assert isinstance(adata.obs, pd.DataFrame)
12 |     assert {'condition', 'sample_id'}.issubset(adata.obs)
13 | 
14 | @pytest.mark.parametrize(
15 |     'thr_fc', [None, -1]
16 | )
17 | def test_knocktf(
18 |     thr_fc, # val, None
19 | ):
20 |     adata = dc.ds.knocktf(thr_fc=thr_fc)
21 |     assert isinstance(adata, ad.AnnData)
22 |     assert isinstance(adata.obs, pd.DataFrame)
23 |     assert {'source', 'type_p'}.issubset(adata.obs.columns)
24 |     if thr_fc is not None:
25 |         assert (adata.obs['logFC'] < thr_fc).all()
26 | 


--------------------------------------------------------------------------------
/tests/ds/test_scell.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import pandas as pd
 4 | import pytest
 5 | import anndata as ad
 6 | 
 7 | import decoupler as dc
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'url', [
12 |         'https://datasets.cellxgene.cziscience.com/' +
13 |         'f665effe-d95a-4211-ab03-9d1777ca0806.h5ad',
14 |         'https://datasets.cellxgene.cziscience.com/' +
15 |         '1338d08a-481a-426c-ad60-9f4ac08afe16.h5ad'
16 |     ]
17 | )
18 | def test_download_anndata(
19 |     url
20 | ):
21 |     warnings.filterwarnings("ignore", module="anndata")
22 |     adata = dc.ds._scell._download_anndata(url=url)
23 |     assert isinstance(adata, ad.AnnData)
24 | 
25 | 
26 | def test_pbmc3k():
27 |     warnings.filterwarnings("ignore", module="anndata")
28 |     adata = dc.ds.pbmc3k()
29 |     assert isinstance(adata, ad.AnnData)
30 |     assert adata.raw is None
31 |     assert isinstance(adata.obs, pd.DataFrame)
32 |     cols = {'celltype', 'leiden'}
33 |     assert cols.issubset(adata.obs.columns)
34 |     assert 'louvain' not in adata.obs.columns
35 |     for col in cols:
36 |         assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype)
37 | 
38 | 
39 | def test_covid5k():
40 |     adata = dc.ds.covid5k()
41 |     assert isinstance(adata, ad.AnnData)
42 |     assert adata.raw is None
43 |     assert isinstance(adata.obs, pd.DataFrame)
44 |     cols = {'individual', 'sex', 'disease', 'celltype'}
45 |     assert cols.issubset(adata.obs.columns)
46 |     for col in cols:
47 |         assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype)
48 | 
49 | 
50 | def test_erygast1k():
51 |     adata = dc.ds.erygast1k()
52 |     assert isinstance(adata, ad.AnnData)
53 |     assert adata.raw is None
54 |     assert isinstance(adata.obs, pd.DataFrame)
55 |     cols = {'sample', 'stage', 'sequencing.batch', 'theiler', 'celltype'}
56 |     assert cols.issubset(adata.obs.columns)
57 |     for col in cols:
58 |         assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype)
59 |     keys = {'X_pca', 'X_umap'}
60 |     assert keys.issubset(adata.obsm.keys())
61 |     


--------------------------------------------------------------------------------
/tests/ds/test_spatial.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import pandas as pd
 4 | import pytest
 5 | import anndata as ad
 6 | 
 7 | import decoupler as dc
 8 | 
 9 | 
10 | def test_msvisium():
11 |     adata = dc.ds.msvisium()
12 |     assert isinstance(adata, ad.AnnData)
13 |     assert adata.raw is None
14 |     assert isinstance(adata.obs, pd.DataFrame)
15 |     cols = {'niches'}
16 |     assert cols.issubset(adata.obs.columns)
17 |     for col in cols:
18 |         assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype)
19 | 


--------------------------------------------------------------------------------
/tests/ds/test_toy.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import numpy as np
  4 | import scipy.stats as sts
  5 | import pytest
  6 | 
  7 | import decoupler as dc
  8 | 
  9 | 
 10 | @pytest.mark.parametrize(
 11 |     'nvar,val,size,hasval',
 12 |     [
 13 |         [3, 0., 5, False],
 14 |         [10, 0., 10, True],
 15 |     ]
 16 | )
 17 | def test_fillval(
 18 |     nvar,
 19 |     val,
 20 |     size,
 21 |     hasval,
 22 | ):
 23 |     arr = np.array([1., 2., 3., 4., 5.])
 24 |     farr = dc.ds._toy._fillval(arr=arr, nvar=nvar, val=val)
 25 | 
 26 |     assert farr.size == size
 27 |     assert (val == farr[-1]) == hasval
 28 | 
 29 | 
 30 | @pytest.mark.parametrize(
 31 |     'nobs,nvar,bval,pstime,seed,verbose',
 32 |     [
 33 |         [10, 15, 2, True, 42, False],
 34 |         [2, 12, 2, False, 42, False],
 35 |         [100, 50, 0, False, 0, True],
 36 |         [10, 500, 0, True, 0, True],
 37 |         
 38 |     ]
 39 | )
 40 | def test_toy(
 41 |     nobs,
 42 |     nvar,
 43 |     bval,
 44 |     pstime,
 45 |     seed,
 46 |     verbose,
 47 |     caplog,
 48 | ):
 49 |     with caplog.at_level(logging.INFO):
 50 |         adata, net = dc.ds.toy(nobs=nobs, nvar=nvar, bval=bval, pstime=pstime, seed=seed, verbose=verbose)
 51 |     if verbose:
 52 |         assert len(caplog.text) > 0
 53 |     else:
 54 |         assert caplog.text == ''
 55 |     assert all(adata.obs['group'].cat.categories == ['A', 'B'])
 56 |     msk = adata.obs['group'] == 'A'
 57 |     assert all(adata[msk, :4].X.mean(0) > adata[~msk, :4].X.mean(0))
 58 |     assert all(adata[msk, 4:8].X.mean(0) < adata[~msk, 4:8].X.mean(0))
 59 |     assert nobs == adata.n_obs
 60 |     assert nvar == adata.n_vars
 61 |     assert ((bval - 1) < np.mean(adata.X[:, -1].ravel()) < (bval + 1)) or nvar == 12
 62 |     if pstime:
 63 |         assert 'pstime' in adata.obs.columns
 64 |         assert ((0. <= adata.obs['pstime']) & (adata.obs['pstime'] <= 1.)).all()
 65 | 
 66 | 
 67 | @pytest.mark.parametrize(
 68 |     'shuffle_r,seed,nobs,nvar,is_diff',
 69 |     [
 70 |         [0.0, 1, 20, 31, True],
 71 |         [0.1, 2, 36, 41, True],
 72 |         [0.9, 3, 49, 21, False],
 73 |         [1.0, 4, 18, 41, False],
 74 |         
 75 |     ]
 76 | )
 77 | def test_toy_bench(
 78 |     net,
 79 |     shuffle_r,
 80 |     seed,
 81 |     nobs,
 82 |     nvar,
 83 |     is_diff,
 84 | ):
 85 |     adata, bmnet = dc.ds.toy_bench(shuffle_r=shuffle_r, seed=seed, nobs=nobs, nvar=nvar)
 86 |     assert (net == bmnet).values.all()
 87 |     assert adata.n_obs == nobs
 88 |     assert adata.n_vars == nvar
 89 |     msk = adata.obs['group'] == 'A'
 90 |     a_adata = adata[msk, :].copy()
 91 |     b_adata = adata[~msk, :].copy()
 92 |     for j in adata.var_names[:8]:
 93 |         a = a_adata[:, j].X.ravel()
 94 |         b = b_adata[:, j].X.ravel()
 95 |         stat, pval = sts.ranksums(a, b)
 96 |         if is_diff:
 97 |             assert pval < 0.05
 98 |         else:
 99 |             assert pval > 0.05
100 | 
101 |     


--------------------------------------------------------------------------------
/tests/ds/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import decoupler as dc
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     'organism,lst_ens,lst_sym',
 8 |     [
 9 |         ['hsapiens_gene_ensembl', ['ENSG00000196092', 'ENSG00000115415'], ['PAX5', 'STAT1']],
10 |         ['hsapiens_gene_ensembl', ['ENSG00000204655', 'ENSG00000184221'], ['MOG', 'OLIG1']],
11 |         ['mmusculus_gene_ensembl', ['ENSMUSG00000076439', 'ENSMUSG00000046160'], ['Mog', 'Olig1']],
12 |     ]
13 | )
14 | def test_ensmbl_to_symbol(
15 |     organism,
16 |     lst_ens,
17 |     lst_sym,
18 | ):
19 |     lst_trn = dc.ds.ensmbl_to_symbol(genes=lst_ens, organism=organism)
20 |     assert all(s == t for s, t in zip(lst_trn, lst_sym))
21 | 


--------------------------------------------------------------------------------
/tests/mt/test_aucell.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import scipy.sparse as sps
 4 | import pytest
 5 | 
 6 | import decoupler as dc
 7 | 
 8 | 
 9 | """
10 | gs <- list(
11 |     T1=c('G01', 'G02', 'G03'),
12 |     T2=c('G04', 'G06', 'G07', 'G08'),
13 |     T3=c('G06', 'G07', 'G08'),
14 |     T4=c('G05', 'G10', 'G11', 'G09'),
15 |     T5=c('G09', 'G10', 'G11')
16 | )
17 | mat <- matrix(c(
18 |     0.879, 8.941, 1.951, 8.75, 0.128, 2.959, 2.369, 9.04, 0.853, 3.127, 0.017, 2.859, 0.316, 2.066, 2.05, 8.305, 0.778, 2.468, 1.302, 2.878,
19 |     2.142, 8.155, 0.428, 9.223, 0.532, 2.84, 2.114, 8.681, 0.431, 2.814, 0.413, 3.129, 0.365, 2.512, 2.651, 8.185, 0.406, 2.616, 0.352, 2.824,
20 |     1.729, 0.637, 8.341, 0.74, 8.084, 2.397, 3.093, 0.635, 1.682, 3.351, 1.28, 2.203, 8.556, 2.255, 3.303, 1.25, 1.359, 2.012, 9.784, 2.06,
21 |     0.746, 0.894, 8.011, 1.798, 8.044, 3.059, 2.996, 0.08, 0.151, 2.391, 1.082, 2.123, 8.203, 2.511, 2.039, 0.051, 1.25, 3.787, 8.249, 3.026
22 | ), nrow=4, byrow=TRUE)
23 | colnames(mat) <- c('G11', 'G04', 'G05', 'G03', 'G07', 'G18', 'G17', 'G02', 'G10',
24 |        'G14', 'G09', 'G16', 'G08', 'G13', 'G20', 'G01', 'G12', 'G15',
25 |        'G06', 'G19')
26 | rownames(mat) <- c("S01", "S02", "S29", "S30")
27 | rnks <- AUCell::AUCell_buildRankings(t(mat), plotStats=FALSE)
28 | t(AUCell::AUCell_calcAUC(gs, rnks, aucMaxRank=3)@assays@data$AUC)
29 | """
30 | 
31 | def test_auc(
32 |     mat,
33 |     idxmat,
34 | ):
35 |     X, obs, var = mat
36 |     cnct, starts, offsets = idxmat
37 |     row = X[0]
38 |     es = dc.mt._aucell._auc.py_func(
39 |         row=row,
40 |         cnct=cnct,
41 |         starts=starts,
42 |         offsets=offsets,
43 |         n_up=2,
44 |         nsrc=offsets.size
45 |     )
46 |     assert isinstance(es, np.ndarray)
47 |     assert es.size == offsets.size
48 | 
49 | 
50 | def test_func_aucell(
51 |     mat,
52 |     idxmat,
53 | ):
54 |     X, obs, var = mat
55 |     cnct, starts, offsets = idxmat
56 |     obs = np.array(['S01', 'S02', 'S29', 'S30'])
57 |     X = np.vstack((X[:2, :], X[-2:, :]))
58 |     X = sps.csr_matrix(X)
59 |     ac_es = pd.DataFrame(
60 |         data=np.array([
61 |             [0.6666667, 0.3333333, 0, 0, 0],
62 |             [1.0000000, 0.0000000, 0, 0, 0],
63 |             [0.0000000, 1.0000000, 1, 0, 0],
64 |             [0.0000000, 1.0000000, 1, 0, 0],
65 |         ]),
66 |         columns=['T1', 'T2', 'T3', 'T4', 'T5'],
67 |         index=obs
68 |     )
69 |     dc_es, _ = dc.mt._aucell._func_aucell(
70 |         mat=X,
71 |         cnct=cnct,
72 |         starts=starts,
73 |         offsets=offsets,
74 |         n_up=3,
75 |         
76 |     )
77 |     assert np.isclose(dc_es, ac_es.values).all()
78 | 


--------------------------------------------------------------------------------
/tests/mt/test_consensus.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | @pytest.mark.parametrize('sel', [np.array([0., 0., 0., 0.]), np.array([1., 3., 8., 2.])])
 8 | def test_zscore(
 9 |     sel,
10 | ):
11 |     z = dc.mt._consensus._zscore.py_func(sel=sel)
12 |     assert isinstance(z, np.ndarray)
13 |     assert z.size  == sel.size
14 | 
15 | 
16 | def test_mean_zscores(
17 |     rng,
18 | ):
19 |     scores = rng.normal(size=(2, 5, 10))
20 |     es = dc.mt._consensus._mean_zscores.py_func(scores=scores)
21 |     assert scores.shape[1:] == es.shape
22 |     
23 | 
24 | def test_consensus(
25 |     adata,
26 |     net,
27 | ):
28 |     dc.mt.decouple(data=adata, net=net, methods=['zscore', 'ulm'], cons=False, tmin=0)
29 |     dc.mt.consensus(adata)
30 |     assert 'score_consensus' in adata.obsm
31 |     res = dc.mt.decouple(data=adata.to_df(), net=net, methods=['zscore', 'ulm'], cons=False, tmin=0)
32 |     es, pv = dc.mt.consensus(res)
33 |     assert np.isfinite(es.values).all()
34 |     assert ((0 <= pv.values) & (pv.values <= 1)).all() 
35 | 


--------------------------------------------------------------------------------
/tests/mt/test_decouple.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import decoupler as dc
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     'methods,args,cons,anndata',
 8 |     [
 9 |         ['all', dict(), True, True],
10 |         ['aucell', dict(aucell=dict(n_up=3)), True, False],
11 |         [['ulm'], dict(), False, True],
12 |         [['ulm', 'ora'], dict(ulm=dict(), ora=dict(n_up=3)), False, False]
13 |     ]
14 | )
15 | def test_decouple(
16 |     adata,
17 |     net,
18 |     methods,
19 |     args,
20 |     cons,
21 |     anndata
22 | ):
23 |     if anndata:
24 |         dc.mt.decouple(data=adata, net=net, methods=methods, args=args, cons=cons, tmin=0)
25 |         if cons:
26 |             assert 'score_consensus' in adata.obsm
27 |         else:
28 |             assert 'score_consensus' not in adata.obsm
29 |     else:
30 |         res = dc.mt.decouple(data=adata.to_df(), net=net, methods=methods, args=args, cons=cons, tmin=0)
31 |         if cons:
32 |             assert 'score_consensus' in res
33 |         else:
34 |             assert 'score_consensus' not in res
35 | 


--------------------------------------------------------------------------------
/tests/mt/test_gsea.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | 
  5 | import gseapy as gp
  6 | import decoupler as dc
  7 | 
  8 | 
  9 | def test_std():
 10 |     arr = np.array([0.1, -5.3, 3.8, 9.5, -0.4, 5.5])
 11 |     np_std = np.std(arr, ddof=1)
 12 |     dc_std = dc.mt._gsea._std.py_func(arr=arr, ddof=1)
 13 |     assert np_std == dc_std
 14 | 
 15 | 
 16 | def test_ridx():
 17 |     idx_a = dc.mt._gsea._ridx(times=5, nvar=10, seed=42)
 18 |     assert (~(np.diff(idx_a) == 1).all(axis=1)).all()
 19 |     idx_b = dc.mt._gsea._ridx(times=5, nvar=10, seed=2)
 20 |     assert (~(np.diff(idx_b) == 1).all(axis=1)).all()
 21 |     assert (~(idx_a == idx_b).all(axis=1)).all()
 22 | 
 23 | 
 24 | @pytest.mark.parametrize(
 25 |     'row,rnks,set_msk,dec,expected_value,expected_index',
 26 |     [
 27 |         (np.array([0.0, 2.0, 0.0]), np.array([0, 1, 2]), np.array([False, True, False]), 0.1, 0.9, 1),
 28 |         (np.array([1.0, 2.0, 3.0]), np.array([2, 1, 0]), np.array([True, True, True]), 0.1, 1.0, 0),
 29 |         (np.array([1.0, 2.0, 3.0]), np.array([0, 1, 2]), np.array([False, False, False]), 0.1, 0, 0),
 30 |         (np.array([0.0, 0.0, 0.0]), np.array([0, 1, 2]), np.array([True, True, True]), 0.1, 0.0, 0),
 31 |         (np.array([1.0, -2.0, 3.0]), np.array([0, 1, 2]), np.array([True, False, True]), 0.5, 0.5, 2),
 32 |     ]
 33 | )
 34 | def test_esrank(
 35 |     row,
 36 |     rnks,
 37 |     set_msk,
 38 |     dec,
 39 |     expected_value,
 40 |     expected_index
 41 | ):
 42 |     value, index, es = dc.mt._gsea._esrank.py_func(row=row, rnks=rnks, set_msk=set_msk, dec=dec)
 43 |     assert np.isclose(value, expected_value)
 44 |     assert index == expected_index
 45 |     assert isinstance(es, np.ndarray) and es.shape == rnks.shape
 46 | 
 47 | 
 48 | def test_nesrank(
 49 |     rng,
 50 | ):
 51 |     ridx = np.array([
 52 |         [0, 1, 2],
 53 |         [0, 2, 1],
 54 |         [1, 2, 0],
 55 |         [1, 0, 2],
 56 |         [2, 0, 1],
 57 |         [2, 1, 0],
 58 |     ])
 59 |     row = np.array([0.0, 2.0, 0.0])
 60 |     rnks = np.array([0, 1, 2])
 61 |     set_msk = np.array([False, True, False])
 62 |     dec = 0.1
 63 |     es = 0.9
 64 |     nes, pval = dc.mt._gsea._nesrank.py_func(
 65 |         ridx=ridx,
 66 |         row=row,
 67 |         rnks=rnks,
 68 |         set_msk=set_msk,
 69 |         dec=dec,
 70 |         es=es
 71 |     )
 72 |     assert isinstance(nes, float)
 73 |     assert isinstance(pval, float)
 74 | 
 75 | 
 76 | def test_stsgsea(
 77 |     mat,
 78 |     idxmat,
 79 | ):
 80 |     X, obs, var = mat
 81 |     cnct, starts, offsets = idxmat
 82 |     row = X[0, :]
 83 |     times = 10
 84 |     ridx = dc.mt._gsea._ridx(times=times, nvar=row.size, seed=42)
 85 |     es, nes, pv = dc.mt._gsea._stsgsea.py_func(
 86 |         row=row,
 87 |         cnct=cnct,
 88 |         starts=starts,
 89 |         offsets=offsets,
 90 |         ridx=ridx,
 91 |     )
 92 |     assert es.size == offsets.size
 93 |     assert nes.size == offsets.size
 94 |     assert pv.size == offsets.size
 95 | 
 96 | 
 97 | def test_func_gsea(
 98 |     mat,
 99 |     net,
100 |     idxmat,
101 | ):
102 |     times = 1000
103 |     seed = 42
104 |     X, obs, var = mat
105 |     gene_sets = net.groupby('source')['target'].apply(lambda x: list(x)).to_dict()
106 |     cnct, starts, offsets = idxmat
107 |     res = gp.prerank(
108 |         rnk=pd.DataFrame(X, index=obs, columns=var).T,
109 |         gene_sets=gene_sets,
110 |         permutation_num=times,
111 |         permutation_type='gene_set',
112 |         outdir=None,
113 |         min_size=0,
114 |         threads=4,
115 |         seed=seed,
116 |     ).res2d
117 |     gp_es = res.pivot(index='Name', columns='Term', values='NES').astype(float)
118 |     gp_pv = res.pivot(index='Name', columns='Term', values='FDR q-val').astype(float)
119 |     dc_es, dc_pv = dc.mt._gsea._func_gsea(
120 |         mat=X,
121 |         cnct=cnct,
122 |         starts=starts,
123 |         offsets=offsets,
124 |         times=times,
125 |         seed=seed,
126 |     )
127 |     assert (gp_es - dc_es).abs().values.max() < 0.10
128 | 


--------------------------------------------------------------------------------
/tests/mt/test_mdt.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'kwargs',
 9 |     [
10 |         dict(),
11 |         dict(n_estimators=10),
12 |         dict(max_depth=1),
13 |         dict(gamma=0.01),
14 |     ]
15 | )
16 | def test_func_mdt(
17 |     mat,
18 |     adjmat,
19 |     kwargs,
20 | ):
21 |     X, obs, var = mat
22 |     es = dc.mt._mdt._func_mdt(mat=X, adj=adjmat, **kwargs)[0]
23 |     assert np.isfinite(es).all()
24 |     assert ((0 <= es) & (es <= 1)).all()
25 | 


--------------------------------------------------------------------------------
/tests/mt/test_methods.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import decoupler as dc
 4 | 
 5 | 
 6 | def test_methods():
 7 |     lstm = dc.mt._methods
 8 |     len_lstm = len(lstm)
 9 |     len_dfm = dc.mt.show().shape[0]
10 |     assert len_lstm == len_dfm
11 |     assert all(isinstance(m, dc._Method.Method) for m in lstm)
12 | 


--------------------------------------------------------------------------------
/tests/mt/test_mlm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import numpy as np
 4 | import statsmodels.api as sm
 5 | import pytest
 6 | 
 7 | import decoupler as dc
 8 | 
 9 | 
10 | def test_fit(
11 |     mat,
12 |     adjmat,
13 | ):
14 |     X, obs, var = mat
15 |     n_features, n_fsets = adjmat.shape
16 |     n_samples, _ = X.shape
17 |     adjmat = np.column_stack((np.ones((n_features, )), adjmat))
18 |     inv = np.linalg.inv(np.dot(adjmat.T, adjmat))
19 |     df = n_features - n_fsets - 1
20 |     coef, t = dc.mt._mlm._fit.py_func(
21 |         X=adjmat,
22 |         y=X.T,
23 |         inv=inv,
24 |         df=df,
25 |     )
26 |     # Assert output shapes
27 |     assert isinstance(coef, np.ndarray)
28 |     assert isinstance(t, np.ndarray)
29 |     print(coef.shape, t.shape)
30 |     assert coef.shape == (n_samples, n_fsets)
31 |     assert t.shape == (n_samples, n_fsets)
32 |     
33 | 
34 | @pytest.mark.parametrize('tval', [True, False])
35 | def test_func_mlm(
36 |     mat,
37 |     adjmat,
38 |     tval,
39 | ):
40 |     X, obs, var = mat
41 |     dc_es, dc_pv = dc.mt._mlm._func_mlm(mat=X, adj=adjmat, tval=tval)
42 |     st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape)
43 |     for i in range(st_es.shape[0]):
44 |         y = X[i, :]
45 |         x = sm.add_constant(adjmat)
46 |         model = sm.OLS(y, x)
47 |         res = model.fit()
48 |         if tval:
49 |             st_es[i, :] = res.tvalues[1:]
50 |         else:
51 |             st_es[i, :] = res.params[1:]
52 |         st_pv[i, :] = res.pvalues[1:]
53 |     assert np.allclose(dc_es, st_es)
54 |     assert np.allclose(dc_pv, st_pv)
55 | 


--------------------------------------------------------------------------------
/tests/mt/test_ora.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | import scipy.stats as sts
  5 | import scipy.sparse as sps
  6 | import pytest
  7 | 
  8 | import decoupler as dc
  9 | 
 10 | 
 11 | @pytest.mark.parametrize(
 12 |     'a,b,c,d',
 13 |     [
 14 |         [10, 1, 2, 1000],
 15 |         [0, 20, 35, 5],
 16 |         [1, 2, 3, 4],
 17 |         [0, 1, 2, 500],
 18 |     ]
 19 | )
 20 | def test_table(
 21 |     a,
 22 |     b,
 23 |     c,
 24 |     d,
 25 | ):
 26 |     dc_es = dc.mt._ora._oddsr.py_func(a=a, b=b, c=c, d=d, ha_corr=0., log=False)
 27 |     dc_pv = dc.mt._ora._test1t.py_func(a=a, b=b, c=c, d=d)
 28 |     st_es, st_pv = sts.fisher_exact([[a, b],[c, d]])
 29 |     assert np.isclose(dc_es, st_es)
 30 |     assert np.isclose(dc_pv, st_pv)
 31 |     nb_pv = math.exp(-dc.mt._ora._mlnTest2t.py_func(a, a + b, a + c, a + b + c + d))
 32 |     assert np.isclose(dc_pv, nb_pv)
 33 | 
 34 | 
 35 | def test_runora(
 36 |     mat,
 37 |     idxmat,
 38 | ):
 39 |     X, obs, var = mat
 40 |     cnct, starts, offsets = idxmat
 41 |     row = sts.rankdata(X[0], method='ordinal')
 42 |     ranks = np.arange(row.size, dtype=np.int_)
 43 |     row = ranks[(row > 2) | (row < 0)]
 44 |     es, pv = dc.mt._ora._runora.py_func(
 45 |         row=row,
 46 |         ranks=ranks,
 47 |         cnct=cnct,
 48 |         starts=starts,
 49 |         offsets=offsets,
 50 |         n_bg=0,
 51 |         ha_corr=0.5,
 52 |     )
 53 |     assert isinstance(es, np.ndarray)
 54 |     assert isinstance(pv, np.ndarray)
 55 | 
 56 | 
 57 | def test_func_ora(
 58 |     mat,
 59 |     idxmat,
 60 | ):
 61 |     X, obs, var = mat
 62 |     cnct, starts, offsets = idxmat
 63 |     n_up = 3
 64 |     ha_corr = 1
 65 |     dc_es, dc_pv = dc.mt._ora._func_ora(
 66 |         mat=sps.csr_matrix(X),
 67 |         cnct=cnct,
 68 |         starts=starts,
 69 |         offsets=offsets,
 70 |         n_up=n_up,
 71 |         n_bm=0,
 72 |         n_bg=None,
 73 |         ha_corr=1,
 74 |     )
 75 |     st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape)
 76 |     ranks = np.arange(X.shape[1], dtype=np.int_)
 77 |     rnk = set(ranks)
 78 |     for i in range(st_es.shape[0]):
 79 |         row = sts.rankdata(X[i], method='ordinal')
 80 |         row = set(ranks[row > n_up])
 81 |         for j in range(st_es.shape[1]):
 82 |             fset = dc.pp.net._getset(cnct=cnct, starts=starts, offsets=offsets, j=j)
 83 |             fset = set(fset)
 84 |             # Build table
 85 |             set_a = row.intersection(fset)
 86 |             set_b = fset.difference(row)
 87 |             set_c = row.difference(fset)
 88 |             a = len(set_a)
 89 |             b = len(set_b)
 90 |             c = len(set_c)
 91 |             set_u = set_a.union(set_b).union(set_c)
 92 |             set_d = rnk.difference(set_u)
 93 |             d = len(set_d)
 94 |             _, st_pv[i, j] = sts.fisher_exact([[a, b],[c, d]])
 95 |             a += ha_corr
 96 |             b += ha_corr
 97 |             c += ha_corr
 98 |             d += ha_corr
 99 |             es = sts.fisher_exact([[a, b],[c, d]])
100 |             st_es[i, j], _ = np.log(es)
101 |     assert np.isclose(dc_es, st_es).all()
102 |     assert np.isclose(dc_pv, st_pv).all()
103 | 


--------------------------------------------------------------------------------
/tests/mt/test_run.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import scipy.sparse as sps
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | def test_return(
 9 |     adata,
10 |     net,
11 | ):
12 |     mth = dc.mt.ulm
13 |     adata = adata[:4].copy()
14 |     adata.X[:, 0] = 0.
15 |     es, pv = mth(data=adata.to_df(), net=net, tmin=0)
16 |     r = dc.mt._run._return(name=mth.name, data=adata, es=es, pv=pv)
17 |     assert r is None
18 |     r = dc.mt._run._return(name=mth.name, data=adata.to_df(), es=es, pv=pv)
19 |     assert isinstance(r, tuple)
20 |     assert isinstance(r[0], pd.DataFrame)
21 |     assert isinstance(r[1], pd.DataFrame)
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     'mth,bsize',
26 |     [
27 |         [dc.mt.zscore, 2],
28 |         [dc.mt.ora, 2],
29 |         [dc.mt.gsva, 250_000],
30 |     ]
31 | )
32 | def test_run(
33 |     adata,
34 |     net,
35 |     mth,
36 |     bsize,
37 | ):
38 |     sdata = adata.copy()
39 |     sdata.X = sps.csr_matrix(sdata.X)
40 |     des, dpv = dc.mt._run._run(
41 |         name=mth.name,
42 |         func=mth.func,
43 |         adj=mth.adj,
44 |         test=mth.test,
45 |         data=adata.to_df(),
46 |         net=net,
47 |         tmin=0,
48 |     )
49 |     ses, spv = dc.mt._run._run(
50 |         name=mth.name,
51 |         func=mth.func,
52 |         adj=mth.adj,
53 |         test=mth.test,
54 |         data=sdata.to_df(),
55 |         net=net,
56 |         tmin=0,
57 |     )
58 |     assert (des.values == ses.values).all()
59 |     


--------------------------------------------------------------------------------
/tests/mt/test_udt.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'kwargs',
 9 |     [
10 |         dict(),
11 |         dict(n_estimators=10),
12 |         dict(max_depth=1),
13 |         dict(gamma=0.01),
14 |     ]
15 | )
16 | def test_func_udt(
17 |     mat,
18 |     adjmat,
19 |     kwargs,
20 | ):
21 |     X, obs, var = mat
22 |     es = dc.mt._udt._func_udt(mat=X, adj=adjmat, **kwargs)[0]
23 |     assert np.isfinite(es).all()
24 |     assert ((0 <= es) & (es <= 1)).all()
25 | 


--------------------------------------------------------------------------------
/tests/mt/test_ulm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.stats as sts
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | def test_cov(
 9 |     mat,
10 |     adjmat,
11 | ):
12 |     X, obs, var = mat
13 |     dc_cov = dc.mt._ulm._cov(A=adjmat, b=X.T)
14 |     nsrcs = adjmat.shape[1]
15 |     np_cov = np.cov(m=adjmat, y=X.T, rowvar=False)[:nsrcs, nsrcs:].T
16 |     assert np.allclose(np_cov, dc_cov)
17 | 
18 | 
19 | def test_cor(
20 |     mat,
21 |     adjmat,
22 | ):
23 |     X, obs, var = mat
24 |     dc_cor = dc.mt._ulm._cor(adjmat, X.T)
25 |     nsrcs = adjmat.shape[1]
26 |     np_cor = np.corrcoef(adjmat, X.T, rowvar=False)[:nsrcs, nsrcs:].T
27 |     assert np.allclose(dc_cor, np_cor)
28 |     assert np.all((dc_cor <= 1) * (dc_cor >= -1))
29 | 
30 | 
31 | def test_tval():
32 |     t = dc.mt._ulm._tval(r=0.4, df=28)
33 |     assert np.allclose(2.30940108, t)
34 |     t = dc.mt._ulm._tval(r=0.99, df=3)
35 |     assert np.allclose(12.15540081, t)
36 |     t = dc.mt._ulm._tval(r=-0.05, df=99)
37 |     assert np.allclose(-0.49811675, t)
38 | 
39 | 
40 | @pytest.mark.parametrize('tval', [True, False])
41 | def test_func_ulm(
42 |     mat,
43 |     adjmat,
44 |     tval,
45 | ):
46 |     X, obs, var = mat
47 |     dc_es, dc_pv = dc.mt._ulm._func_ulm(mat=X, adj=adjmat, tval=tval)
48 |     st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape)
49 |     for i in range(st_es.shape[0]):
50 |         for j in range(st_es.shape[1]):
51 |             x = adjmat[:, j]
52 |             y = X[i, :]
53 |             res = sts.linregress(x, y)
54 |             slope = res.slope
55 |             st_pv[i, j] = res.pvalue
56 |             std_err = res.stderr
57 |             if tval:
58 |                 st_es[i, j] = slope / std_err
59 |             else:
60 |                 st_es[i, j] = slope
61 |     assert np.allclose(dc_es, st_es)
62 |     assert np.allclose(dc_pv, st_pv)
63 | 


--------------------------------------------------------------------------------
/tests/mt/test_viper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | """
 9 | mat <- matrix(c(
10 |     0.879, 8.941, 1.951, 8.75, 0.128, 2.959, 2.369, 9.04, 0.853, 3.127, 0.017, 2.859, 0.316, 2.066, 2.05, 8.305, 0.778, 2.468, 1.302, 2.878,
11 |     2.142, 8.155, 0.428, 9.223, 0.532, 2.84, 2.114, 8.681, 0.431, 2.814, 0.413, 3.129, 0.365, 2.512, 2.651, 8.185, 0.406, 2.616, 0.352, 2.824,
12 |     1.729, 0.637, 8.341, 0.74, 8.084, 2.397, 3.093, 0.635, 1.682, 3.351, 1.28, 2.203, 8.556, 2.255, 3.303, 1.25, 1.359, 2.012, 9.784, 2.06,
13 |     0.746, 0.894, 8.011, 1.798, 8.044, 3.059, 2.996, 0.08, 0.151, 2.391, 1.082, 2.123, 8.203, 2.511, 2.039, 0.051, 1.25, 3.787, 8.249, 3.026
14 | ), nrow=4, byrow=TRUE)
15 | colnames(mat) <- c('G11', 'G04', 'G05', 'G03', 'G07', 'G18', 'G17', 'G02', 'G10',
16 |        'G14', 'G09', 'G16', 'G08', 'G13', 'G20', 'G01', 'G12', 'G15',
17 |        'G06', 'G19')
18 | rownames(mat) <- c("S01", "S02", "S29", "S30")
19 | gs <- list(
20 |     T1 = list(
21 |         tfmode = c(G01 = 1, G02 = 1, G03 = 0.7, G04 = 1, G06 = -0.5, G07 = -3, G08 = -1),
22 |         likelihood = c(1, 1, 1, 1, 1, 1, 1)
23 |     ),
24 |     T2 = list(
25 |         tfmode = c(G06 = 1, G07 = 0.5, G08 = 1, G05 = 1.9, G10 = -1.5, G11 = -2, G09 = 3.1),
26 |         likelihood = c(1, 1, 1, 1, 1, 1, 1)
27 |     ),
28 |     T3 = list(
29 |         tfmode = c(G09 = 0.7, G10 = 1.1, G11 = 0.1),
30 |         likelihood = c(1, 1, 1)
31 |     ),
32 |     T4 = list(
33 |         tfmode = c(G06 = 1, G07 = 0.5, G08 = 1, G05 = 1.9, G10 = -1.5, G11 = -2, G09 = 3.1, G03 = -1.2),
34 |         likelihood = c(1, 1, 1, 1, 1, 1, 1, 1)
35 |     )
36 | )
37 | t(viper::viper(eset=t(mat), regulon=gs, minsize=1, eset.filter=F, pleiotropy=F))
38 | pargs=list(regulators = 0.05, shadow = 0.05, targets = 1, penalty = 20, method = "adaptive")
39 | t(viper::viper(eset=t(mat), regulon=gs, minsize=1, eset.filter=F, pleiotropy=T, pleiotropyArgs=pargs))
40 | 
41 | """
42 | 
43 | 
44 | def test_get_tmp_idxs(
45 |     rng,
46 | ):
47 |     pval = rng.random((5, 5))
48 |     np.fill_diagonal(pval, np.nan)
49 |     dc.mt._viper._get_tmp_idxs.py_func(pval)
50 | 
51 | 
52 | def test_func_viper(
53 |     adata,
54 |     net,
55 | ):
56 |     dict_net = {
57 |         'T1': 'T1',
58 |         'T2': 'T1',
59 |         'T3': 'T2',
60 |         'T4': 'T2',
61 |         'T5': 'T3',
62 |     }
63 |     net['source'] = [dict_net[s] for s in net['source']]
64 |     net = pd.concat([
65 |         net,
66 |         net[net['source'] == 'T2'].assign(source='T4'),
67 |         pd.DataFrame([['T4', 'G03', -1.2]], columns=['source', 'target', 'weight'], index=[0])
68 |     ])
69 |     mat = dc.pp.extract(data=adata)
70 |     X, obs, var = mat
71 |     sources, targets, adjmat = dc.pp.adjmat(features=var, net=net, verbose=False)
72 |     obs = np.array(['S01', 'S02', 'S29', 'S30'])
73 |     X = np.vstack((X[:2, :], X[-2:, :]))
74 |     pf_dc_es, pf_dc_pv = dc.mt._viper._func_viper(mat=X, adj=adjmat, pleiotropy=False)
75 |     pt_dc_es, pt_dc_pv = dc.mt._viper._func_viper(mat=X, adj=adjmat, n_targets=1, pleiotropy=True)
76 |     pf_vp_es = np.array([
77 |         [ 3.708381, -2.154396, -1.4069603, -2.468185],
78 |         [ 3.702911, -2.288070, -0.7239077, -2.848132],
79 |         [-3.613066,  1.696114, -0.5789716,  2.039502],
80 |         [-3.495480,  2.560792, -1.1296442,  2.523946],
81 |     ])
82 |     pt_vp_es = np.array([
83 |         [ 2.224856, -2.154396, -1.4069603, -1.131059],
84 |         [ 1.880012, -2.288070, -0.7239077, -2.848132],
85 |         [-3.177418,  1.696114, -0.5789716,  2.039502],
86 |         [-2.073186,  2.560792, -1.1296442,  2.523946],
87 |     ])
88 |     assert np.isclose(pf_vp_es, pf_dc_es).all()
89 |     assert np.isclose(pt_vp_es, pt_dc_es).all()
90 | 


--------------------------------------------------------------------------------
/tests/mt/test_waggr.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | def test_funcs(
 8 |     rng
 9 | ):
10 |     x = np.array([1, 2, 3, 4], dtype=float)
11 |     w = rng.random(x.size)
12 |     es = dc.mt._waggr._wsum.py_func(x=x, w=w)
13 |     assert isinstance(es, float)
14 |     es = dc.mt._waggr._wmean.py_func(x=x, w=w)
15 |     assert isinstance(es, float)
16 | 
17 | 
18 | @pytest.mark.parametrize(
19 |     'fun,times,seed',
20 |     [
21 |         ['wmean', 10, 42],
22 |         ['wsum', 5, 23],
23 |         [lambda x, w: 0, 5, 1],
24 |         ['wmean', 0, 42],
25 |     ]
26 | )
27 | def test_func_waggr(
28 |     mat,
29 |     adjmat,
30 |     fun,
31 |     times,
32 |     seed,
33 | ):
34 |     X, obs, var = mat
35 |     es, pv = dc.mt._waggr._func_waggr(mat=X, adj=adjmat, fun=fun, times=times, seed=seed)
36 |     assert np.isfinite(es).all()
37 |     assert ((0 <= pv) & (pv <= 1)).all()
38 | 


--------------------------------------------------------------------------------
/tests/mt/test_zscore.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'flavor', ['KSEA', 'RoKAI']
 9 | )
10 | def test_func_zscore(
11 |     mat,
12 |     adjmat,
13 |     flavor,
14 | ):
15 |     X, obs, var = mat
16 |     es, pv = dc.mt._zscore._func_zscore(mat=X, adj=adjmat, flavor=flavor)
17 |     assert np.isfinite(es).all()
18 |     assert ((0 <= pv) & (pv <= 1)).all()
19 | 


--------------------------------------------------------------------------------
/tests/op/test_collectri.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | @pytest.mark.parametrize('remove_complexes', [True, False])
 9 | def test_collectri(
10 |     remove_complexes,
11 | ):
12 |     ct = dc.op.collectri(remove_complexes=remove_complexes)
13 |     assert isinstance(ct, pd.DataFrame)
14 |     cols = {'source', 'target', 'weight', 'resources', 'references', 'sign_decision'}
15 |     assert cols.issubset(ct.columns)
16 |     assert pd.api.types.is_numeric_dtype(ct['weight'])
17 |     msk = np.isin(['AP1', 'NFKB'], ct['source']).all()
18 |     if remove_complexes:
19 |         assert not msk
20 |     else:
21 |         assert msk
22 |     assert not ct.duplicated(['source', 'target']).any()
23 | 


--------------------------------------------------------------------------------
/tests/op/test_dorothea.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'levels,dict_weights',
10 |     [
11 |         ['A', None],
12 |         [['A', 'B'], dict(A=1, B=0.5)],
13 |     ]
14 | )
15 | def test_dorothea(
16 |     levels,
17 |     dict_weights,
18 | ):
19 |     do = dc.op.dorothea(levels=levels, dict_weights=dict_weights)
20 |     assert isinstance(do, pd.DataFrame)
21 |     cols = {'source', 'target', 'weight', 'confidence'}
22 |     assert cols.issubset(do.columns)
23 |     assert pd.api.types.is_numeric_dtype(do['weight'])
24 |     assert not do.duplicated(['source', 'target']).any()
25 | 


--------------------------------------------------------------------------------
/tests/op/test_dtype.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 |     
 7 | def test_infer_dtypes():
 8 |     df = pd.DataFrame(
 9 |         data = [
10 |             ['1', 'A', 'true', 'False', 0.3],
11 |             ['2', 'B', 'false', 'True', 0.1],
12 |             ['3', 'C', 'false', 'True', 3.1],
13 |         ],
14 |         columns=['a', 'b', 'c', 'd', 'e'],
15 |         index=[0, 1, 2],
16 |     )
17 |     df['b'] = df['b'].astype('string')
18 |     idf = dc.op._dtype._infer_dtypes(df.copy())
19 |     assert pd.api.types.is_numeric_dtype(idf['a'])
20 |     assert idf['b'].dtype == 'object'
21 |     assert pd.api.types.is_bool_dtype(idf['c'])
22 |     assert pd.api.types.is_bool_dtype(idf['d'])
23 |     assert pd.api.types.is_numeric_dtype(idf['e'])
24 | 
25 |     


--------------------------------------------------------------------------------
/tests/op/test_hallmark.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | def test_hallmark():
 9 |     hm = dc.op.hallmark()
10 |     assert isinstance(hm, pd.DataFrame)
11 |     cols = {'source', 'target'}
12 |     assert cols.issubset(hm.columns)
13 |     assert not hm.duplicated(['source', 'target']).any()
14 | 


--------------------------------------------------------------------------------
/tests/op/test_progeny.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'top,thr_padj',
10 |     [
11 |         [100, 0.05],
12 |         [100, 1],
13 |         [np.inf, 0.05],
14 |         [np.inf, 1],
15 |     ]
16 | )
17 | def test_progeny(
18 |     top,
19 |     thr_padj,
20 | ):
21 |     pr = dc.op.progeny(top=top, thr_padj=thr_padj)
22 |     assert isinstance(pr, pd.DataFrame)
23 |     cols = {'source', 'target', 'weight', 'padj'}
24 |     assert cols.issubset(pr.columns)
25 |     assert pd.api.types.is_numeric_dtype(pr['weight'])
26 |     assert pd.api.types.is_numeric_dtype(pr['padj'])
27 |     assert (pr['padj'] < thr_padj).all()
28 |     assert (pr.groupby('source').size() <= top).all()
29 |     assert not pr.duplicated(['source', 'target']).any()
30 | 


--------------------------------------------------------------------------------
/tests/op/test_resource.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | def test_show_resources():
 9 |     df = dc.op.show_resources()
10 |     assert isinstance(df, pd.DataFrame)
11 |     assert df.shape[0] > 0
12 |     assert {'name', 'license'}.issubset(df.columns)
13 |     assert np.isin(['PROGENy', 'MSigDB'], df['name']).all()
14 | 
15 | 
16 | @pytest.mark.parametrize('name', ['Lambert2018', 'PanglaoDB'])
17 | def test_resource(
18 |     name
19 | ):
20 |     rs = dc.op.resource(name=name)
21 |     assert isinstance(rs, pd.DataFrame)
22 |     assert 'genesymbol' in rs.columns
23 | 


--------------------------------------------------------------------------------
/tests/op/test_translate.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | def test_show_organisms():
 9 |     lst = dc.op.show_organisms()
10 |     assert isinstance(lst, list)
11 |     assert len(lst) > 0
12 |     assert {'mouse', 'rat'}.issubset(lst)
13 | 
14 | 
15 | @pytest.mark.parametrize(
16 |     'lst,my_dict,one_to_many',
17 |     [
18 |         [['a', 'b', 'c', 'd'], dict(a=['B', 'C'], b=['A', 'C'], c=['A', 'B'], d='D'), 1],
19 |         [['a', 'b', 'c', 'd'], dict(c=['A', 'B']), 1],
20 |         [['a', 'b', 'c', 'd'], dict(a=['B', 'C'], b=['A', 'C'], c=['A', 'B'], d='D'), 10],
21 |     ]
22 | )
23 | def test_replace_subunits(
24 |     lst,
25 |     my_dict,
26 |     one_to_many,
27 | ):  
28 |     res = dc.op._translate._replace_subunits(
29 |         lst=lst, my_dict=my_dict, one_to_many=one_to_many
30 |     )
31 |     assert isinstance(res, list)
32 |     assert len(res) == len(lst)
33 |     for k in my_dict:
34 |         idx = lst.index(k)
35 |         if k in my_dict:
36 |             if len(my_dict[k]) > one_to_many:
37 |                 assert np.isnan(res[idx])
38 |             else:
39 |                 assert isinstance(res[idx], list)
40 |         else:
41 |             assert np.isnan(res[idx])
42 | 
43 | 
44 | @pytest.mark.parametrize('target_organism', ['mouse', 'anole_lizard', 'fruitfly'])
45 | def test_translate(
46 |     target_organism,
47 | ):
48 |     net = dc.op.collectri()
49 |     t_net = dc.op.translate(net=net, columns='target', target_organism='mouse')
50 |     cols = {'source', 'target', 'weight'}
51 |     assert isinstance(t_net, pd.DataFrame)
52 |     assert cols.issubset(t_net.columns)
53 |     assert net.shape[0] != t_net.shape[0]
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/tests/pl/test_Plotter.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import matplotlib.image as mpimg
 5 | import pytest
 6 | 
 7 | import decoupler as dc
 8 | 
 9 | 
10 | def test_plot_ax(
11 |     adata
12 | ):
13 |     fig, axes = plt.subplots(1, 2, tight_layout=True, figsize=(4, 2))
14 |     ax1, ax2 = axes
15 |     dc.pl.obsbar(adata=adata, y='group', hue='sample', ax=ax1)
16 |     dc.pl.obsbar(adata=adata, y='sample', hue='group', ax=ax2)
17 | 
18 | 
19 | def test_plot_save(
20 |     adata
21 | ):
22 |     with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
23 |         fig = dc.pl.obsbar(adata=adata, y='group', hue='sample', save=tmp.name)
24 |         tmp.flush()
25 |         img = mpimg.imread(tmp.name)
26 |         assert img is not None
27 | 


--------------------------------------------------------------------------------
/tests/pl/test_barplot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | from matplotlib.figure import Figure
 4 | import pytest
 5 | 
 6 | import decoupler as dc
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def df():
11 |     df = pd.DataFrame(
12 |         data=[
13 |             [1, -2, 3, -4],
14 |             [5, -6, 7, -8],
15 |         ],
16 |         index=['C1', 'C2'],
17 |         columns=[f'TF{i}' for i in range(4)]
18 |     )
19 |     return df
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     'name,top,vertical,vcenter',
24 |     [
25 |         ['C1', 2, True, None],
26 |         ['C2', 10, False, -3],
27 |         ['C2', 10, False, 10],
28 |     ]
29 | )
30 | def test_barplot(
31 |     df,
32 |     name,
33 |     top,
34 |     vertical,
35 |     vcenter,
36 | ):
37 |     fig = dc.pl.barplot(data=df, name=name, top=top, vertical=vertical, return_fig=True)
38 |     assert isinstance(fig, Figure)
39 |     plt.close(fig)
40 | 


--------------------------------------------------------------------------------
/tests/pl/test_dotplot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | from matplotlib.figure import Figure
 4 | import pytest
 5 | 
 6 | import decoupler as dc
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def df():
11 |     df = pd.DataFrame(
12 |         data = [
13 |             ['TF1', 1, 1, 5],
14 |             ['TF2', 3, 1, 10],
15 |             ['TF3', 4, 10, 10],
16 |             ['TF4', 5, 15, 11],
17 |         ],
18 |         columns=['y', 'x', 'c', 's'],
19 |     )
20 |     return df
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     'vcenter', [None, 3]
25 | )
26 | def test_dotplot(
27 |     df,
28 |     vcenter,
29 | ):
30 |     fig = dc.pl.dotplot(df=df, x='x', y='y', c='c', s='s', vcenter=vcenter, return_fig=True)
31 |     assert isinstance(fig, Figure)
32 |     plt.close(fig)
33 | 


--------------------------------------------------------------------------------
/tests/pl/test_filter_by_expr.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | def test_filter_by_expr(
 8 |     pdata,
 9 | ):
10 |     fig = dc.pl.filter_by_expr(adata=pdata, return_fig=True)
11 |     assert isinstance(fig, Figure)
12 |     plt.close(fig)
13 | 


--------------------------------------------------------------------------------
/tests/pl/test_filter_by_prop.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | def test_filter_by_prop(
 8 |     pdata,
 9 | ):
10 |     fig = dc.pl.filter_by_prop(adata=pdata, return_fig=True)
11 |     assert isinstance(fig, Figure)
12 |     plt.close(fig)
13 | 


--------------------------------------------------------------------------------
/tests/pl/test_filter_samples.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'groupby,log',
10 |     [
11 |         ['group', True],
12 |         [['group'], True],
13 |         [['sample', 'group'], True],
14 |     ]
15 | )
16 | def test_filter_samples(
17 |     pdata,
18 |     groupby,
19 |     log,
20 | ):
21 |     fig = dc.pl.filter_samples(adata=pdata, groupby=groupby, log=log, return_fig=True)
22 |     assert isinstance(fig, Figure)
23 |     plt.close(fig)
24 | 


--------------------------------------------------------------------------------
/tests/pl/test_leading_edge.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from matplotlib.figure import Figure
 5 | import pytest
 6 | 
 7 | import decoupler as dc
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'stat,name,a_err',
12 |     [
13 |         ['stat', 'T1', False],
14 |         ['stat', 'T2', False],
15 |         ['pval', 'T3', False],
16 |         ['pval', 'T4', False],
17 |     ]
18 | )
19 | def test_leading_edge(
20 |     net,
21 |     stat,
22 |     name,
23 |     a_err
24 | ):
25 |     df = pd.DataFrame(
26 |         data=[[i, i ** 2] for i in range(9)],
27 |         columns=['stat', 'pval'],
28 |         index=[f'G0{i}' for i in range(9)],
29 |     )
30 |     if not a_err:
31 |         fig, le = dc.pl.leading_edge(df=df, net=net, stat=stat, name=name, return_fig=True)
32 |         assert isinstance(le, np.ndarray)
33 |         assert isinstance(fig, Figure)
34 |         plt.close(fig)
35 |     else:
36 |         with pytest.raises(AssertionError):
37 |             dc.pl.leading_edge(df=df, net=net, stat=stat, name=name, return_fig=True)
38 | 


--------------------------------------------------------------------------------
/tests/pl/test_network.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from matplotlib.figure import Figure
 5 | import pytest
 6 | 
 7 | import decoupler as dc
 8 | 
 9 | 
10 | @pytest.fixture
11 | def data():
12 |     data = pd.DataFrame(
13 |         data=[
14 |             [5, 6, 7, 1, 1, 2.],
15 |         ],
16 |         index=['C1'],
17 |         columns=['G01', 'G02', 'G03', 'G06', 'G07', 'G08']
18 |     )
19 |     return data
20 | 
21 | 
22 | @pytest.fixture
23 | def score():
24 |     score = pd.DataFrame(
25 |         data=[
26 |             [4, 3, -3, -2.],
27 |         ],
28 |         index=['C1'],
29 |         columns=[f'T{i + 1}' for i in range(4)]
30 |     )
31 |     return score
32 | 
33 | 
34 | @pytest.mark.parametrize(
35 |     'd_none,unw,sources,targets,by_abs,vcenter',
36 |     [
37 |         [False, False, 5, 5, False, False],
38 |         [False, True, 'T1', 5, True, True],
39 |         [True, False, ['T1'], 5, True, True],
40 |         [True, False, ['T1', 'T3'], 5, True, True],
41 |         [False, False, 5, 'G01', True, True],
42 |         [False, False, 5, ['G01', 'G02', 'G03'], True, True],
43 |     ]
44 | )
45 | def test_network(
46 |     net,
47 |     data,
48 |     score,
49 |     d_none,
50 |     unw,
51 |     sources,
52 |     targets,
53 |     by_abs,
54 |     vcenter,
55 | ):
56 |     if d_none:
57 |         s_cmap = 'white'
58 |         data = None
59 |         score = None
60 |     else:
61 |         s_cmap = 'coolwarm'
62 |     if unw:
63 |         net = net.drop(columns=['weight'])
64 |     fig = dc.pl.network(
65 |         data=data,
66 |         score=score,
67 |         net=net,
68 |         sources=sources,
69 |         targets=targets,
70 |         by_abs=by_abs,
71 |         vcenter=vcenter,
72 |         s_cmap = s_cmap,
73 |         figsize=(5, 5),
74 |         return_fig=True
75 |     )
76 |     assert isinstance(fig, Figure)
77 |     plt.close(fig)


--------------------------------------------------------------------------------
/tests/pl/test_obsbar.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'y,hue,kw',
10 |     [
11 |         ['group', None, dict()],
12 |         ['group', 'group', dict(width=0.5)],
13 |         ['group', 'sample', dict(palette='tab10')],
14 |         ['sample', 'group', dict(palette='tab20')],
15 |     ]
16 | )
17 | def test_obsbar(
18 |     adata,
19 |     y,
20 |     hue,
21 |     kw,
22 | ):
23 |     fig = dc.pl.obsbar(adata=adata, y=y, hue=hue, kw_barplot=kw, return_fig=True)
24 |     assert isinstance(fig, Figure)
25 |     plt.close(fig)
26 | 


--------------------------------------------------------------------------------
/tests/pl/test_obsm.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib
 3 | matplotlib.use("Agg")
 4 | import matplotlib.pyplot as plt
 5 | from matplotlib.figure import Figure
 6 | import pytest
 7 | 
 8 | import decoupler as dc
 9 | 
10 | 
11 | @pytest.fixture
12 | def tdata_obsm_pca(
13 |     tdata_obsm,
14 | ):
15 |     dc.tl.rankby_obsm(tdata_obsm, key='X_pca')
16 |     return tdata_obsm
17 | 
18 | 
19 | @pytest.fixture
20 | def tdata_obsm_ulm(
21 |     tdata_obsm,
22 | ):
23 |     tdata_obsm = tdata_obsm.copy()
24 |     dc.tl.rankby_obsm(tdata_obsm, key='score_ulm')
25 |     return tdata_obsm
26 | 
27 | 
28 | @pytest.mark.parametrize(
29 |     'pca,names,nvar,dendrogram,titles,cmap_obs',
30 |     [
31 |         [True, None, 10, True, ['Scores', 'Stats'], dict()],
32 |         [True, 'group', 5, False, ['asd', 'fgh'], dict()],
33 |         [True, ['group', 'pstime'], 10, True, ['Scores', 'Stats'], dict()],
34 |         [True, None, 10, True, ['Scores', 'Stats'], dict(group='tab10', pstime='magma', sample='Pastel1')],
35 |         [True, None, 2, True, ['Scores', 'Stats'], dict(pstime='magma')],
36 |         [True, None, ['PC01', 'PC02'], True, ['Scores', 'Stats'], dict(pstime='magma')],
37 |         [False, None, None, True, ['Scores', 'Stats'], dict()],
38 |         [False, None, 10, True, ['Scores', 'Stats'], dict()],
39 |         [False, None, 'T3', True, ['Scores', 'Stats'], dict()],
40 |         [False, None, ['T5', 'T3'], True, ['Scores', 'Stats'], dict()],
41 |     ]
42 | )
43 | def test_obsm(
44 |     tdata_obsm_pca,
45 |     tdata_obsm_ulm,
46 |     pca,
47 |     names,
48 |     nvar,
49 |     dendrogram,
50 |     titles,
51 |     cmap_obs,
52 | ):
53 |     if pca:
54 |         tdata_obsm_ranked = tdata_obsm_pca
55 |     else:
56 |         tdata_obsm_ranked = tdata_obsm_ulm
57 |     fig = dc.pl.obsm(
58 |         tdata_obsm_ranked,
59 |         names=names,
60 |         nvar=nvar,
61 |         dendrogram=dendrogram,
62 |         titles=titles,
63 |         cmap_obs=cmap_obs,
64 |         return_fig=True
65 |     )
66 |     assert isinstance(fig, Figure)
67 |     plt.close(fig)
68 | 


--------------------------------------------------------------------------------
/tests/pl/test_order.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'names,label,mode',
10 |     [
11 |         [['G01', 'G02', 'G07', 'G08', 'G12'], None, 'line'],
12 |         [['G01', 'G02', 'G07', 'G08'], None, 'mat'],
13 |         [None, 'group', 'line'],
14 |         [None, 'group', 'mat'],
15 |     ]
16 | )
17 | def test_order(
18 |     tdata,
19 |     names,
20 |     label,
21 |     mode,
22 | ):
23 |     df = dc.pp.bin_order(adata=tdata, names=['G12', 'G01', 'G07', 'G04'], order='pstime', label=label)
24 |     fig = dc.pl.order(df=df, mode=mode, return_fig=True)
25 |     assert isinstance(fig, Figure)
26 |     plt.close(fig)
27 | 


--------------------------------------------------------------------------------
/tests/pl/test_order_targets.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'source,label,vmin,vmax',
10 |     [
11 |         ['T1', None, 0, 10],
12 |         ['T2', 'group', -3, 10],
13 |         ['T3', 'group', -20, 15],
14 |         ['T4', 'group', -1, 20],
15 |         ['T5', 'group', -2, 14],
16 |         ['T5', 'group', None, None],
17 |     ]
18 | )
19 | def test_order_targets(
20 |     tdata,
21 |     net,
22 |     source,
23 |     label,
24 |     vmin,
25 |     vmax,
26 | ):
27 |     dc.mt.ulm(tdata, net, tmin=0)
28 |     fig = dc.pl.order_targets(
29 |         adata=tdata,
30 |         net=net,
31 |         order='pstime',
32 |         source=source,
33 |         label=label,
34 |         vmin=vmin,
35 |         vmax=vmax,
36 |         return_fig=True,
37 |     )
38 |     assert isinstance(fig, Figure)
39 |     plt.close(fig)
40 | 


--------------------------------------------------------------------------------
/tests/pl/test_source_targets.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | from matplotlib.figure import Figure
 4 | import pytest
 5 | 
 6 | import decoupler as dc
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     'name,a_err', [
11 |         ['T1', False],
12 |         ['T10', True],
13 |     ]
14 | )
15 | def test_source_targets(
16 |     deg,
17 |     net,
18 |     name,
19 |     a_err,
20 | ):
21 |     if not a_err:
22 |         fig = dc.pl.source_targets(data=deg, net=net, name=name, x='weight', y='stat', return_fig=True)
23 |         assert isinstance(fig, Figure)
24 |         plt.close(fig)
25 |     else:
26 |         with pytest.raises(AssertionError):
27 |             dc.pl.source_targets(data=deg, net=net, name=name, x='weight', y='stat', return_fig=True)
28 | 


--------------------------------------------------------------------------------
/tests/pl/test_volcano.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | from matplotlib.figure import Figure
 4 | import pytest
 5 | 
 6 | import decoupler as dc
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     'use_net,name,a_err',
11 |     [
12 |         [False, None, False],
13 |         [True, 'T1', False],
14 |         [True, 'T2', False],
15 |         [True, 'T3', False],
16 |         [True, 'T10', True],
17 |     ]
18 | )
19 | def test_volcano(
20 |     deg,
21 |     net,
22 |     use_net,
23 |     name,
24 |     a_err,
25 | ):
26 |     if not use_net:
27 |         net = None
28 |         name = None
29 |     if not a_err:
30 |         fig = dc.pl.volcano(data=deg, x='stat', y='padj', net=net, name=name, return_fig=True)
31 |         assert isinstance(fig, Figure)
32 |         plt.close(fig)
33 |     else:
34 |         with pytest.raises(AssertionError):
35 |             dc.pl.volcano(data=deg, x='stat', y='padj', net=net, name=name, return_fig=True)
36 | 


--------------------------------------------------------------------------------
/tests/pp/test_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.sparse as sps
 3 | import pytest
 4 | 
 5 | import decoupler as dc
 6 | 
 7 | 
 8 | def test_extract(
 9 |     adata,
10 | ):
11 |     data = [adata.X, adata.obs_names, adata.var_names]
12 |     X, obs, var = dc.pp.extract(data=data)
13 |     assert X.shape[0] == obs.size
14 |     assert X.shape[1] == var.size
15 |     X, obs, var = dc.pp.extract(data=adata.to_df())
16 |     assert X.shape[0] == obs.size
17 |     assert X.shape[1] == var.size
18 |     X, obs, var = dc.pp.extract(data=adata)
19 |     assert X.shape[0] == obs.size
20 |     assert X.shape[1] == var.size
21 |     adata.layers['counts'] = adata.X.round()
22 |     X, obs, var = dc.pp.extract(data=adata, layer='counts')
23 |     assert float(np.sum(X)).is_integer()
24 |     sadata = adata.copy()
25 |     sadata.X = sps.coo_matrix(sadata.X)
26 |     X, obs, var = dc.pp.extract(data=sadata)
27 |     assert isinstance(X, sps.csr_matrix)
28 |     eadata = adata.copy()
29 |     eadata.X[5, :] = 0.
30 |     X, obs, var = dc.pp.extract(data=eadata, empty=True)
31 |     assert X.shape[0] < eadata.shape[0]
32 |     nadata = adata.copy()
33 |     nadata.X = nadata.X * -1
34 |     adata.raw = nadata
35 |     X, obs, var = dc.pp.extract(data=adata, raw=True)
36 |     assert (X < 0).all()
37 | 


--------------------------------------------------------------------------------
/tests/test_download.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'url,kwargs',
 9 |     [
10 |         [
11 |             ('https://www.ncbi.nlm.nih.gov/geo/download/?' +
12 |              'acc=GSM8563697&format=file&file=GSM8563697%' +
13 |              '5FCO37%5Ffeatures%2Etsv%2Egz'),
14 |             dict(sep='\t', compression='gzip', header=None)
15 |         ],
16 |         [
17 |             ('https://www.ncbi.nlm.nih.gov/geo/download/?' +
18 |              'acc=GSM8563697&format=file&file=GSM8563697%' +
19 |              '5FCO37%5Ftissue%5Fpositions%5Flist%2Ecsv%2Egz'),
20 |             dict(sep=',', compression='gzip')
21 |         ], 
22 |     ]
23 | )
24 | def test_download(
25 |     url,
26 |     kwargs,
27 | ):
28 |     df = dc._download._download(url, **kwargs)
29 |     assert isinstance(df, pd.DataFrame)
30 |     assert df.columns.size > 1
31 | 


--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | import decoupler
4 | 
5 | 
6 | def test_package_has_version():
7 |     assert decoupler.__version__ is not None    
8 | 


--------------------------------------------------------------------------------
/tests/tl/test_rankby_group.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'groupby,reference,method',
 9 |     [
10 |         ['group', 'rest', 'wilcoxon'],
11 |         ['group', 'A', 't-test'],
12 |         ['group', 'A', 't-test_overestim_var'],
13 |         ['sample', 'rest', 't-test_overestim_var'],
14 |         ['sample', 'S01', 't-test_overestim_var'],
15 |         ['sample', ['S01'], 't-test_overestim_var'],
16 |         ['sample', ['S01', 'S02'], 't-test_overestim_var'],
17 |     ]
18 | )
19 | def test_rankby_group(
20 |     adata,
21 |     groupby,
22 |     reference,
23 |     method,
24 | ):
25 |     df = dc.tl.rankby_group(adata=adata, groupby=groupby, reference=reference, method=method)
26 |     assert isinstance(df, pd.DataFrame)
27 |     cols_cat = {'group', 'reference', 'name'}
28 |     cols_num = {'stat', 'meanchange', 'pval', 'padj'}
29 |     cols = cols_cat | cols_num
30 |     assert cols.issubset(set(df.columns))
31 |     for col in cols_cat:
32 |         assert isinstance(df[col].dtype, pd.CategoricalDtype)
33 |     for col in cols_num:
34 |         assert pd.api.types.is_numeric_dtype(df[col])
35 |     assert set(df['group'].cat.categories).issubset(set(adata.obs[groupby].cat.categories))
36 |     assert ((0. <= df['padj']) & (df['padj'] <= 1.)).all()
37 | 


--------------------------------------------------------------------------------
/tests/tl/test_rankby_obsm.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | import decoupler as dc
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'key,uns_key',
 9 |     [
10 |         ['X_pca', 'rank_obsm'],
11 |         ['X_pca', None],
12 |         ['X_umap', 'other'],
13 |         ['score_ulm', 'other'],
14 |         ['score_ulm', None],
15 |     ]
16 | )
17 | def test_rankby_obsm(
18 |     tdata_obsm,
19 |     key,
20 |     uns_key,
21 | ):
22 |     tdata_obsm = tdata_obsm.copy()
23 |     tdata_obsm.obs['dose'] = 'Low'
24 |     tdata_obsm.obs.loc[tdata_obsm.obs_names[5], 'dose'] = 'High'
25 |     res = dc.tl.rankby_obsm(tdata_obsm, key=key, uns_key=uns_key)
26 |     if uns_key is None:
27 |         assert isinstance(res, pd.DataFrame)
28 |     else:
29 |         assert res is None
30 |         assert uns_key in tdata_obsm.uns
31 |         assert isinstance(tdata_obsm.uns[uns_key], pd.DataFrame)
32 | 


--------------------------------------------------------------------------------
/tests/tl/test_rankby_order.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import scipy.sparse as sps
 3 | import scipy.stats as sts
 4 | import pytest
 5 | 
 6 | import decoupler as dc
 7 | 
 8 | 
 9 | @pytest.mark.parametrize('stat', ['dcor', 'pearsonr', 'spearmanr', 'kendalltau', sts.pearsonr])
10 | def test_rankby_order(
11 |     tdata,
12 |     stat,
13 | ):
14 |     df = dc.tl.rankby_order(tdata, order='pstime', stat=stat)
15 |     assert isinstance(df, pd.DataFrame)
16 |     neg_genes = {'G01', 'G02', 'G03', 'G04'}
17 |     pos_genes = {'G05', 'G06', 'G07', 'G08'}
18 |     gt_genes = neg_genes | pos_genes
19 |     pd_genes = set(df.head(len(gt_genes))['name'])
20 |     assert len(gt_genes) > 3
21 |     assert (len(gt_genes & pd_genes) / len(gt_genes)) >= 0.75
22 |     msk = df['name'].isin(gt_genes)
23 |     assert df[~msk]['stat'].mean() < df[msk]['stat'].mean()
24 |     tdata.X = sps.csr_matrix(tdata.X)
25 |     df = dc.tl.rankby_order(tdata, order='pstime', stat=stat)
26 |     assert isinstance(df, pd.DataFrame)
27 |     pd_genes = set(df.head(len(gt_genes))['name'])
28 |     assert len(gt_genes) > 3
29 |     assert (len(gt_genes & pd_genes) / len(gt_genes)) >= 0.75
30 |     msk = df['name'].isin(gt_genes)
31 |     assert df[~msk]['stat'].mean() < df[msk]['stat'].mean()
32 | 


--------------------------------------------------------------------------------