├── .codecov.yaml ├── .cruft.json ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml └── workflows │ ├── build.yaml │ ├── release.yaml │ └── test.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── .vscode ├── extensions.json ├── launch.json └── settings.json ├── CHANGELOG.md ├── LICENSE ├── README.md ├── biome.jsonc ├── docs ├── _static │ ├── .gitkeep │ ├── css │ │ └── custom.css │ └── images │ │ ├── logo.png │ │ ├── mlm.png │ │ ├── ora.png │ │ └── ulm.png ├── _templates │ ├── .gitkeep │ └── autosummary │ │ └── class.rst ├── api │ ├── bm.md │ ├── ds.md │ ├── index.md │ ├── mt.md │ ├── op.md │ ├── pl.md │ ├── pp.md │ └── tl.md ├── changelog.md ├── conf.py ├── extensions │ └── typed_returns.py ├── index.md ├── notebooks │ ├── bench │ │ ├── index.md │ │ └── rna.ipynb │ ├── bulk │ │ ├── index.md │ │ └── rna.ipynb │ ├── example.ipynb │ ├── index.md │ ├── omnipath │ │ ├── index.md │ │ ├── licenses.ipynb │ │ └── orthologs.ipynb │ ├── scell │ │ ├── index.md │ │ ├── rna_psbk.ipynb │ │ ├── rna_pstime.ipynb │ │ └── rna_sc.ipynb │ └── spatial │ │ ├── index.md │ │ └── rna_visium.ipynb ├── references.bib └── references.md ├── pyproject.toml ├── src └── decoupler │ ├── _Method.py │ ├── _Plotter.py │ ├── __init__.py │ ├── _datatype.py │ ├── _docs.py │ ├── _download.py │ ├── _log.py │ ├── _odeps.py │ ├── bm │ ├── __init__.py │ ├── _pp.py │ ├── _run.py │ ├── metric │ │ ├── __init__.py │ │ ├── _auc.py │ │ ├── _fscore.py │ │ ├── _hmean.py │ │ └── _qrank.py │ └── pl │ │ ├── __init__.py │ │ ├── _auc.py │ │ ├── _bar.py │ │ ├── _format.py │ │ ├── _fscore.py │ │ ├── _qrank.py │ │ └── _summary.py │ ├── ds │ ├── __init__.py │ ├── _bulk.py │ ├── _scell.py │ ├── _spatial.py │ ├── _toy.py │ └── _utils.py │ ├── mt │ ├── __init__.py │ ├── _aucell.py │ ├── _consensus.py │ ├── _decouple.py │ ├── _gsea.py │ ├── _gsva.py │ ├── _mdt.py │ ├── _methods.py │ ├── _mlm.py │ ├── _ora.py │ ├── _run.py │ ├── _udt.py │ ├── _ulm.py │ ├── _viper.py │ ├── _waggr.py │ └── _zscore.py │ ├── op │ ├── __init__.py │ ├── _collectri.py │ ├── _dorothea.py │ ├── _dtype.py │ ├── _hallmark.py │ ├── _progeny.py │ ├── _resource.py │ └── _translate.py │ ├── pl │ ├── __init__.py │ ├── _barplot.py │ ├── _dotplot.py │ ├── _filter_by_expr.py │ ├── _filter_by_prop.py │ ├── _filter_samples.py │ ├── _leading_edge.py │ ├── _network.py │ ├── _obsbar.py │ ├── _obsm.py │ ├── _order.py │ ├── _order_targets.py │ ├── _source_targets.py │ └── _volcano.py │ ├── pp │ ├── __init__.py │ ├── anndata.py │ ├── data.py │ └── net.py │ └── tl │ ├── __init__.py │ ├── _rankby_group.py │ ├── _rankby_obsm.py │ └── _rankby_order.py └── tests ├── bm ├── test_benchmark.py └── test_pl.py ├── conftest.py ├── ds ├── test_bulk.py ├── test_scell.py ├── test_spatial.py ├── test_toy.py └── test_utils.py ├── mt ├── test_aucell.py ├── test_consensus.py ├── test_decouple.py ├── test_gsea.py ├── test_gsva.py ├── test_mdt.py ├── test_methods.py ├── test_mlm.py ├── test_ora.py ├── test_run.py ├── test_udt.py ├── test_ulm.py ├── test_viper.py ├── test_waggr.py └── test_zscore.py ├── op ├── test_collectri.py ├── test_dorothea.py ├── test_dtype.py ├── test_hallmark.py ├── test_progeny.py ├── test_resource.py └── test_translate.py ├── pl ├── test_Plotter.py ├── test_barplot.py ├── test_dotplot.py ├── test_filter_by_expr.py ├── test_filter_by_prop.py ├── test_filter_samples.py ├── test_leading_edge.py ├── test_network.py ├── test_obsbar.py ├── test_obsm.py ├── test_order.py ├── test_order_targets.py ├── test_source_targets.py └── test_volcano.py ├── pp ├── test_anndata.py ├── test_data.py └── test_net.py ├── test_download.py ├── test_version.py └── tl ├── test_rankby_group.py ├── test_rankby_obsm.py └── test_rankby_order.py /.codecov.yaml: -------------------------------------------------------------------------------- 1 | # Based on pydata/xarray 2 | codecov: 3 | require_ci_to_pass: no 4 | 5 | coverage: 6 | status: 7 | project: 8 | default: 9 | # Require 1% coverage, i.e., always succeed 10 | target: 1 11 | patch: false 12 | changes: false 13 | 14 | comment: 15 | layout: diff, flags, files 16 | behavior: once 17 | require_base: no 18 | -------------------------------------------------------------------------------- /.cruft.json: -------------------------------------------------------------------------------- 1 | { 2 | "template": "https://github.com/scverse/cookiecutter-scverse", 3 | "commit": "5842d5cb8510e1d4a037a8f772630d51ec86de96", 4 | "checkout": null, 5 | "context": { 6 | "cookiecutter": { 7 | "project_name": "decoupler", 8 | "package_name": "decoupler", 9 | "project_description": "Python package to perform enrichment analysis from omics data.", 10 | "author_full_name": "Pau Badia i Mompel", 11 | "author_email": "pau.badia@uni-heidelberg.de", 12 | "github_user": "PauBadiaM", 13 | "github_repo": "decoupler", 14 | "license": "BSD 3-Clause License", 15 | "ide_integration": true, 16 | "_copy_without_render": [ 17 | ".github/workflows/build.yaml", 18 | ".github/workflows/test.yaml", 19 | "docs/_templates/autosummary/**.rst" 20 | ], 21 | "_exclude_on_template_update": [ 22 | "CHANGELOG.md", 23 | "LICENSE", 24 | "README.md", 25 | "docs/api.md", 26 | "docs/index.md", 27 | "docs/notebooks/example.ipynb", 28 | "docs/references.bib", 29 | "docs/references.md", 30 | "src/**", 31 | "tests/**" 32 | ], 33 | "_render_devdocs": false, 34 | "_jinja2_env_vars": { 35 | "lstrip_blocks": true, 36 | "trim_blocks": true 37 | }, 38 | "_template": "https://github.com/scverse/cookiecutter-scverse", 39 | "_commit": "5842d5cb8510e1d4a037a8f772630d51ec86de96" 40 | } 41 | }, 42 | "directory": null 43 | } 44 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [{*.{yml,yaml,toml},.cruft.json}] 12 | indent_size = 2 13 | 14 | [Makefile] 15 | indent_style = tab 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | description: Report something that is broken or incorrect 3 | labels: bug 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | **Note**: Please read [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) 9 | detailing how to provide the necessary information for us to reproduce your bug. In brief: 10 | * Please provide exact steps how to reproduce the bug in a clean Python environment. 11 | * In case it's not clear what's causing this bug, please provide the data or the data generation procedure. 12 | * Sometimes it is not possible to share the data, but usually it is possible to replicate problems on publicly 13 | available datasets or to share a subset of your data. 14 | 15 | - type: textarea 16 | id: report 17 | attributes: 18 | label: Report 19 | description: A clear and concise description of what the bug is. 20 | validations: 21 | required: true 22 | 23 | - type: textarea 24 | id: versions 25 | attributes: 26 | label: Versions 27 | description: | 28 | Which version of packages. 29 | 30 | Please install `session-info2`, run the following command in a notebook, 31 | click the “Copy as Markdown” button, then paste the results into the text box below. 32 | 33 | ```python 34 | In[1]: import session_info2; session_info2.session_info(dependencies=True) 35 | ``` 36 | 37 | Alternatively, run this in a console: 38 | 39 | ```python 40 | >>> import session_info2; print(session_info2.session_info(dependencies=True)._repr_mimebundle_()["text/markdown"]) 41 | ``` 42 | render: python 43 | placeholder: | 44 | anndata 0.11.3 45 | ---- ---- 46 | charset-normalizer 3.4.1 47 | coverage 7.7.0 48 | psutil 7.0.0 49 | dask 2024.7.1 50 | jaraco.context 5.3.0 51 | numcodecs 0.15.1 52 | jaraco.functools 4.0.1 53 | Jinja2 3.1.6 54 | sphinxcontrib-jsmath 1.0.1 55 | sphinxcontrib-htmlhelp 2.1.0 56 | toolz 1.0.0 57 | session-info2 0.1.2 58 | PyYAML 6.0.2 59 | llvmlite 0.44.0 60 | scipy 1.15.2 61 | pandas 2.2.3 62 | sphinxcontrib-devhelp 2.0.0 63 | h5py 3.13.0 64 | tblib 3.0.0 65 | setuptools-scm 8.2.0 66 | more-itertools 10.3.0 67 | msgpack 1.1.0 68 | sparse 0.15.5 69 | wrapt 1.17.2 70 | jaraco.collections 5.1.0 71 | numba 0.61.0 72 | pyarrow 19.0.1 73 | pytz 2025.1 74 | MarkupSafe 3.0.2 75 | crc32c 2.7.1 76 | sphinxcontrib-qthelp 2.0.0 77 | sphinxcontrib-serializinghtml 2.0.0 78 | zarr 2.18.4 79 | asciitree 0.3.3 80 | six 1.17.0 81 | sphinxcontrib-applehelp 2.0.0 82 | numpy 2.1.3 83 | cloudpickle 3.1.1 84 | sphinxcontrib-bibtex 2.6.3 85 | natsort 8.4.0 86 | jaraco.text 3.12.1 87 | setuptools 76.1.0 88 | Deprecated 1.2.18 89 | packaging 24.2 90 | python-dateutil 2.9.0.post0 91 | ---- ---- 92 | Python 3.13.2 | packaged by conda-forge | (main, Feb 17 2025, 14:10:22) [GCC 13.3.0] 93 | OS Linux-6.11.0-109019-tuxedo-x86_64-with-glibc2.39 94 | Updated 2025-03-18 15:47 95 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Scverse Community Forum 4 | url: https://discourse.scverse.org/ 5 | about: If you have questions about “How to do X”, please ask them here. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Propose a new feature for decoupler 3 | labels: enhancement 4 | body: 5 | - type: textarea 6 | id: description 7 | attributes: 8 | label: Description of feature 9 | description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. 10 | validations: 11 | required: true 12 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Check Build 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.ref }} 11 | cancel-in-progress: true 12 | 13 | defaults: 14 | run: 15 | # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u). 16 | shell: bash -euo pipefail {0} 17 | 18 | jobs: 19 | package: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v4 23 | with: 24 | filter: blob:none 25 | fetch-depth: 0 26 | - name: Install uv 27 | uses: astral-sh/setup-uv@v5 28 | with: 29 | cache-dependency-glob: pyproject.toml 30 | - name: Build package 31 | run: uv build 32 | - name: Check package 33 | run: uvx twine check --strict dist/*.whl 34 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | defaults: 8 | run: 9 | # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u). 10 | shell: bash -euo pipefail {0} 11 | 12 | # Use "trusted publishing", see https://docs.pypi.org/trusted-publishers/ 13 | jobs: 14 | release: 15 | name: Upload release to PyPI 16 | runs-on: ubuntu-latest 17 | environment: 18 | name: pypi 19 | url: https://pypi.org/p/decoupler 20 | permissions: 21 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing 22 | steps: 23 | - uses: actions/checkout@v4 24 | with: 25 | filter: blob:none 26 | fetch-depth: 0 27 | - name: Install uv 28 | uses: astral-sh/setup-uv@v5 29 | with: 30 | cache-dependency-glob: pyproject.toml 31 | - name: Build package 32 | run: uv build 33 | - name: Publish package distributions to PyPI 34 | uses: pypa/gh-action-pypi-publish@release/v1 35 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | schedule: 9 | - cron: "0 5 1,15 * *" 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.ref }} 13 | cancel-in-progress: true 14 | 15 | defaults: 16 | run: 17 | # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u). 18 | shell: bash -euo pipefail {0} 19 | 20 | jobs: 21 | # Get the test environment from hatch as defined in pyproject.toml. 22 | # This ensures that the pyproject.toml is the single point of truth for test definitions and the same tests are 23 | # run locally and on continuous integration. 24 | # Check [[tool.hatch.envs.hatch-test.matrix]] in pyproject.toml and https://hatch.pypa.io/latest/environment/ for 25 | # more details. 26 | get-environments: 27 | runs-on: ubuntu-latest 28 | outputs: 29 | envs: ${{ steps.get-envs.outputs.envs }} 30 | steps: 31 | - uses: actions/checkout@v4 32 | with: 33 | filter: blob:none 34 | fetch-depth: 0 35 | - name: Install uv 36 | uses: astral-sh/setup-uv@v5 37 | - name: Get test environments 38 | id: get-envs 39 | run: | 40 | ENVS_JSON=$(uvx hatch env show --json | jq -c 'to_entries 41 | | map( 42 | select(.key | startswith("hatch-test")) 43 | | { 44 | name: .key, 45 | label: (if (.key | contains("pre")) then .key + " (PRE-RELEASE DEPENDENCIES)" else .key end), 46 | python: .value.python 47 | } 48 | )') 49 | echo "envs=${ENVS_JSON}" | tee $GITHUB_OUTPUT 50 | 51 | # Run tests through hatch. Spawns a separate runner for each environment defined in the hatch matrix obtained above. 52 | test: 53 | needs: get-environments 54 | 55 | strategy: 56 | fail-fast: false 57 | matrix: 58 | os: [ubuntu-latest] 59 | env: ${{ fromJSON(needs.get-environments.outputs.envs) }} 60 | 61 | name: ${{ matrix.env.label }} 62 | runs-on: ${{ matrix.os }} 63 | 64 | steps: 65 | - uses: actions/checkout@v4 66 | with: 67 | filter: blob:none 68 | fetch-depth: 0 69 | - name: Install uv 70 | uses: astral-sh/setup-uv@v5 71 | with: 72 | python-version: ${{ matrix.env.python }} 73 | cache-dependency-glob: pyproject.toml 74 | - name: create hatch environment 75 | run: uvx hatch env create ${{ matrix.env.name }} 76 | - name: run tests using hatch 77 | env: 78 | MPLBACKEND: agg 79 | PLATFORM: ${{ matrix.os }} 80 | DISPLAY: :42 81 | run: uvx hatch run ${{ matrix.env.name }}:run-cov 82 | - name: generate coverage report 83 | run: uvx hatch run ${{ matrix.env.name }}:coverage xml 84 | - name: Upload coverage 85 | uses: codecov/codecov-action@v4 86 | with: 87 | token: ${{ secrets.CODECOV_TOKEN }} 88 | 89 | # Check that all tests defined above pass. This makes it easy to set a single "required" test in branch 90 | # protection instead of having to update it frequently. See https://github.com/re-actors/alls-green#why. 91 | check: 92 | name: Tests pass in all hatch environments 93 | if: always() 94 | needs: 95 | - get-environments 96 | - test 97 | runs-on: ubuntu-latest 98 | steps: 99 | - uses: re-actors/alls-green@release/v1 100 | with: 101 | jobs: ${{ toJSON(needs) }} 102 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Temp files 2 | .DS_Store 3 | *~ 4 | buck-out/ 5 | 6 | # Compiled files 7 | .venv/ 8 | __pycache__/ 9 | .*cache/ 10 | 11 | # Distribution / packaging 12 | /dist/ 13 | 14 | # Tests and coverage 15 | /data/ 16 | /node_modules/ 17 | 18 | # docs 19 | /docs/generated/ 20 | /docs/_build/ 21 | 22 | # jupyter 23 | .ipynb_checkpoints/ 24 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: false 2 | default_language_version: 3 | python: python3 4 | default_stages: 5 | - pre-commit 6 | - pre-push 7 | minimum_pre_commit_version: 2.16.0 8 | repos: 9 | - repo: https://github.com/biomejs/pre-commit 10 | rev: v1.9.4 11 | hooks: 12 | - id: biome-format 13 | exclude: ^\.cruft\.json$ # inconsistent indentation with cruft - file never to be modified manually. 14 | - repo: https://github.com/tox-dev/pyproject-fmt 15 | rev: v2.5.1 16 | hooks: 17 | - id: pyproject-fmt 18 | - repo: https://github.com/astral-sh/ruff-pre-commit 19 | rev: v0.11.5 20 | hooks: 21 | - id: ruff 22 | types_or: [python, pyi, jupyter] 23 | args: [--fix, --exit-non-zero-on-fix] 24 | - id: ruff-format 25 | types_or: [python, pyi, jupyter] 26 | - repo: https://github.com/pre-commit/pre-commit-hooks 27 | rev: v5.0.0 28 | hooks: 29 | - id: detect-private-key 30 | - id: check-ast 31 | - id: end-of-file-fixer 32 | - id: mixed-line-ending 33 | args: [--fix=lf] 34 | - id: trailing-whitespace 35 | - id: check-case-conflict 36 | # Check that there are no merge conflicts (could be generated by template sync) 37 | - id: check-merge-conflict 38 | args: [--assume-in-merge] 39 | - repo: local 40 | hooks: 41 | - id: forbid-to-commit 42 | name: Don't commit rej files 43 | entry: | 44 | Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates. 45 | Fix the merge conflicts manually and remove the .rej files. 46 | language: fail 47 | files: '.*\.rej$' 48 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # https://docs.readthedocs.io/en/stable/config-file/v2.html 2 | version: 2 3 | build: 4 | os: ubuntu-20.04 5 | tools: 6 | python: "3.10" 7 | sphinx: 8 | configuration: docs/conf.py 9 | # disable this for more lenient docs builds 10 | fail_on_warning: true 11 | python: 12 | install: 13 | - method: pip 14 | path: . 15 | extra_requirements: 16 | - doc 17 | - full 18 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | // GitHub integration 4 | "github.vscode-github-actions", 5 | "github.vscode-pull-request-github", 6 | // Language support 7 | "ms-python.python", 8 | "ms-python.vscode-pylance", 9 | "ms-toolsai.jupyter", 10 | "tamasfe.even-better-toml", 11 | // Dependency management 12 | "ninoseki.vscode-mogami", 13 | // Linting and formatting 14 | "editorconfig.editorconfig", 15 | "charliermarsh.ruff", 16 | "biomejs.biome", 17 | ], 18 | } 19 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Build Documentation", 9 | "type": "debugpy", 10 | "request": "launch", 11 | "module": "sphinx", 12 | "args": ["-M", "html", ".", "_build"], 13 | "cwd": "${workspaceFolder}/docs", 14 | "console": "internalConsole", 15 | "justMyCode": false, 16 | }, 17 | { 18 | "name": "Python: Debug Test", 19 | "type": "debugpy", 20 | "request": "launch", 21 | "program": "${file}", 22 | "purpose": ["debug-test"], 23 | "console": "internalConsole", 24 | "justMyCode": false, 25 | "env": { 26 | "PYTEST_ADDOPTS": "--color=yes", 27 | }, 28 | "presentation": { 29 | "hidden": true, 30 | }, 31 | }, 32 | ], 33 | } 34 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python][json][jsonc]": { 3 | "editor.formatOnSave": true, 4 | }, 5 | "[python]": { 6 | "editor.defaultFormatter": "charliermarsh.ruff", 7 | "editor.codeActionsOnSave": { 8 | "source.fixAll": "always", 9 | "source.organizeImports": "always", 10 | }, 11 | }, 12 | "[json][jsonc]": { 13 | "editor.defaultFormatter": "biomejs.biome", 14 | }, 15 | "python.analysis.typeCheckingMode": "basic", 16 | "python.testing.pytestEnabled": true, 17 | "python.testing.pytestArgs": ["-vv", "--color=yes"], 18 | } 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2025, Pau Badia i Mompel, Saez lab 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # decoupler - Ensemble of methods to infer enrichment scores 2 | 3 | 4 | 5 | [![Tests][badge-tests]][tests] 6 | [![Documentation][badge-docs]][documentation] 7 | 8 | [![Issues][badge-issues]][issue tracker] 9 | [![Coverage][badge-coverage]][codecoverage] 10 | [![Stars][badge-stars]](https://github.com/scverse/anndata/stargazers) 11 | 12 | [![PyPI][badge-pypi]][pypi] 13 | [![Downloads month][badge-mdown]][down] 14 | [![Downloads all][badge-adown]][down] 15 | 16 | [![Conda version][badge-condav]][conda] 17 | [![Conda downloads][badge-condad]][conda] 18 | 19 | [badge-tests]: https://img.shields.io/github/actions/workflow/status/saezlab/decoupler-py/test.yaml?branch=main 20 | [badge-docs]: https://img.shields.io/readthedocs/decoupler-py 21 | [badge-condav]: https://img.shields.io/conda/vn/conda-forge/decoupler-py.svg 22 | [badge-condad]: https://img.shields.io/conda/dn/conda-forge/decoupler-py.svg 23 | [badge-issues]: https://img.shields.io/github/issues/saezlab/decoupler-py 24 | [badge-coverage]: https://codecov.io/gh/saezlab/decoupler-py/branch/main/graph/badge.svg 25 | [badge-pypi]: https://img.shields.io/pypi/v/decoupler.svg 26 | [badge-mdown]: https://static.pepy.tech/badge/decoupler/month 27 | [badge-adown]: https://static.pepy.tech/badge/decoupler 28 | [badge-stars]: https://img.shields.io/github/stars/saezlab/decoupler-py?style=flat&logo=github&color=yellow 29 | 30 | `decoupler` is a python package containing different enrichment statistical 31 | methods to extract biologically driven scores 32 | from omics data within a unified framework. This is its faster and memory efficient Python implementation, 33 | a deprecated version in R can be found [here](https://github.com/saezlab/decoupler). 34 | 35 | It is a package from the [scverse][] ecosystem {cite:p}`scverse`, 36 | designed for easy interoperability with `anndata`, `scanpy` {cite:p}`scanpy` and other related packages. 37 | 38 | ## Getting started 39 | 40 | Please refer to the [documentation][], 41 | in particular, the [API documentation][]. 42 | 43 | ## Installation 44 | 45 | You need to have Python 3.10 or newer installed on your system. 46 | If you don't have Python installed, we recommend installing [uv][]. 47 | 48 | There are several alternative options to install decoupler: 49 | 50 | 1. Install the latest stable release from [PyPI][pypi] with minimal dependancies: 51 | 52 | ```bash 53 | pip install decoupler 54 | ``` 55 | 56 | 2. Install the latest stable full release from [PyPI][pypi] with extra dependancies: 57 | 58 | ```bash 59 | pip install decoupler[full] 60 | ``` 61 | 62 | 3. Install the latest stable version from [conda-forge][conda] using mamba or conda (pay attention to the `-py` suffix at the end): 63 | 64 | ```bash 65 | mamba create -n=dcp conda-forge::decoupler-py 66 | ``` 67 | 68 | 4. Install the latest development version: 69 | 70 | ```bash 71 | pip install git+https://github.com/saezlab/decoupler-py.git@main 72 | ``` 73 | 74 | ## Release notes 75 | 76 | See the [changelog][]. 77 | 78 | ## Contact 79 | 80 | For questions and help requests, you can reach out in the [scverse discourse][]. 81 | If you found a bug, please use the [issue tracker][]. 82 | 83 | ## Citation 84 | 85 | > Badia-i-Mompel P., Vélez Santiago J., Braunger J., Geiss C., Dimitrov D., 86 | Müller-Dott S., Taus P., Dugourd A., Holland C.H., Ramirez Flores R.O. 87 | and Saez-Rodriguez J. 2022. decoupleR: Ensemble of computational methods 88 | to infer biological activities from omics data. Bioinformatics Advances. 89 | 90 | 91 | [uv]: https://github.com/astral-sh/uv 92 | [scverse discourse]: https://discourse.scverse.org/ 93 | [scverse]: https://scverse.org/ 94 | [issue tracker]: https://github.com/saezlab/decoupler-py/issues 95 | [tests]: https://github.com/saezlab/decoupler-py/actions/workflows/test.yaml 96 | [documentation]: https://decoupler-py.readthedocs.io 97 | [changelog]: https://decoupler-py.readthedocs.io/en/latest/changelog.html 98 | [api documentation]: https://decoupler-py.readthedocs.io/en/latest/api.html 99 | [pypi]: https://pypi.org/project/decoupler 100 | [down]: https://pepy.tech/project/decoupler 101 | [conda]: https://anaconda.org/conda-forge/decoupler-py 102 | [codecoverage]: https://codecov.io/gh/saezlab/decoupler-py 103 | -------------------------------------------------------------------------------- /biome.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", 3 | "formatter": { "useEditorconfig": true }, 4 | "overrides": [ 5 | { 6 | "include": ["./.vscode/*.json", "**/*.jsonc"], 7 | "json": { 8 | "formatter": { "trailingCommas": "all" }, 9 | "parser": { 10 | "allowComments": true, 11 | "allowTrailingCommas": true, 12 | }, 13 | }, 14 | }, 15 | ], 16 | } 17 | -------------------------------------------------------------------------------- /docs/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/.gitkeep -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* Reduce the font size in data frames - See https://github.com/scverse/cookiecutter-scverse/issues/193 */ 2 | div.cell_output table.dataframe { 3 | font-size: 0.8em; 4 | } 5 | 6 | /* Adjust the logo size */ 7 | .logo img { 8 | width: 50%; /* or any percentage you want */ 9 | height: auto; /* maintain aspect ratio */ 10 | } 11 | 12 | img.no-scaled-link { 13 | background: transparent !important; 14 | } -------------------------------------------------------------------------------- /docs/_static/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/logo.png -------------------------------------------------------------------------------- /docs/_static/images/mlm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/mlm.png -------------------------------------------------------------------------------- /docs/_static/images/ora.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/ora.png -------------------------------------------------------------------------------- /docs/_static/images/ulm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_static/images/ulm.png -------------------------------------------------------------------------------- /docs/_templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupler-py/b3471b11d76f9dd31499c64f7994db4f20714734/docs/_templates/.gitkeep -------------------------------------------------------------------------------- /docs/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. add toctree option to make autodoc generate the pages 6 | 7 | .. autoclass:: {{ objname }} 8 | 9 | {% block attributes %} 10 | {% if attributes %} 11 | Attributes table 12 | ~~~~~~~~~~~~~~~~ 13 | 14 | .. autosummary:: 15 | {% for item in attributes %} 16 | ~{{ name }}.{{ item }} 17 | {%- endfor %} 18 | {% endif %} 19 | {% endblock %} 20 | 21 | {% block methods %} 22 | {% if methods %} 23 | Methods table 24 | ~~~~~~~~~~~~~ 25 | 26 | .. autosummary:: 27 | {% for item in methods %} 28 | {%- if item != '__init__' %} 29 | ~{{ name }}.{{ item }} 30 | {%- endif -%} 31 | {%- endfor %} 32 | {% endif %} 33 | {% endblock %} 34 | 35 | {% block attributes_documentation %} 36 | {% if attributes %} 37 | Attributes 38 | ~~~~~~~~~~ 39 | 40 | {% for item in attributes %} 41 | 42 | .. autoattribute:: {{ [objname, item] | join(".") }} 43 | {%- endfor %} 44 | 45 | {% endif %} 46 | {% endblock %} 47 | 48 | {% block methods_documentation %} 49 | {% if methods %} 50 | Methods 51 | ~~~~~~~ 52 | 53 | {% for item in methods %} 54 | {%- if item != '__init__' %} 55 | 56 | .. automethod:: {{ [objname, item] | join(".") }} 57 | {%- endif -%} 58 | {%- endfor %} 59 | 60 | {% endif %} 61 | {% endblock %} 62 | -------------------------------------------------------------------------------- /docs/api/bm.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | 3 | 4 | ## Pipeline 5 | ```{eval-rst} 6 | .. module:: decoupler.bm 7 | .. currentmodule:: decoupler 8 | 9 | .. autosummary:: 10 | :toctree: generated 11 | 12 | bm.benchmark 13 | ``` 14 | 15 | ## Metrics 16 | 17 | ```{eval-rst} 18 | .. module:: decoupler.bm.metric 19 | .. currentmodule:: decoupler 20 | 21 | .. autosummary:: 22 | :toctree: generated 23 | 24 | bm.metric.auc 25 | bm.metric.fscore 26 | bm.metric.qrank 27 | bm.metric.hmean 28 | ``` 29 | 30 | ## Plotting 31 | 32 | ```{eval-rst} 33 | .. module:: decoupler.bm.pl 34 | .. currentmodule:: decoupler 35 | 36 | .. autosummary:: 37 | :toctree: generated 38 | 39 | bm.pl.auc 40 | bm.pl.fscore 41 | bm.pl.qrank 42 | bm.pl.bar 43 | bm.pl.summary 44 | ``` 45 | -------------------------------------------------------------------------------- /docs/api/ds.md: -------------------------------------------------------------------------------- 1 | # Datasets 2 | 3 | ## Bulk 4 | ```{eval-rst} 5 | .. module:: decoupler.ds 6 | .. currentmodule:: decoupler 7 | 8 | .. autosummary:: 9 | :toctree: generated 10 | 11 | ds.hsctgfb 12 | ds.knocktf 13 | ``` 14 | 15 | ## Single-cell 16 | ```{eval-rst} 17 | .. autosummary:: 18 | :toctree: generated 19 | 20 | ds.covid5k 21 | ds.erygast1k 22 | ds.pbmc3k 23 | ``` 24 | 25 | ## Spatial 26 | ```{eval-rst} 27 | .. autosummary:: 28 | :toctree: generated 29 | 30 | ds.msvisium 31 | ``` 32 | 33 | ## Toy 34 | ```{eval-rst} 35 | .. autosummary:: 36 | :toctree: generated 37 | 38 | ds.toy 39 | ds.toy_bench 40 | ``` 41 | 42 | ## Utils 43 | ```{eval-rst} 44 | .. autosummary:: 45 | :toctree: generated 46 | 47 | ds.ensmbl_to_symbol 48 | ``` 49 | -------------------------------------------------------------------------------- /docs/api/index.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | Import decoupler as: 4 | 5 | ``` 6 | import decoupler as dc 7 | ``` 8 | 9 | ```{toctree} 10 | :maxdepth: 2 11 | 12 | bm 13 | ds 14 | mt 15 | op 16 | pp 17 | ``` 18 | 19 | ```{toctree} 20 | :maxdepth: 1 21 | 22 | pl 23 | tl 24 | ``` -------------------------------------------------------------------------------- /docs/api/mt.md: -------------------------------------------------------------------------------- 1 | # Methods 2 | 3 | ## Single methods 4 | 5 | ```{eval-rst} 6 | .. module:: decoupler.mt 7 | .. currentmodule:: decoupler 8 | 9 | .. autosummary:: 10 | :toctree: generated 11 | 12 | mt.aucell 13 | mt.gsea 14 | mt.gsva 15 | mt.mdt 16 | mt.mlm 17 | mt.ora 18 | mt.udt 19 | mt.ulm 20 | mt.viper 21 | mt.waggr 22 | mt.zscore 23 | ``` 24 | 25 | ## Multiple methods 26 | 27 | ```{eval-rst} 28 | .. autosummary:: 29 | :toctree: generated 30 | 31 | mt.decouple 32 | mt.consensus 33 | ``` -------------------------------------------------------------------------------- /docs/api/op.md: -------------------------------------------------------------------------------- 1 | # OmniPath 2 | 3 | ## Resources 4 | 5 | ```{eval-rst} 6 | .. module:: decoupler.op 7 | .. currentmodule:: decoupler 8 | 9 | .. autosummary:: 10 | :toctree: generated 11 | 12 | op.collectri 13 | op.dorothea 14 | op.hallmark 15 | op.progeny 16 | op.resource 17 | ``` 18 | 19 | ## Utils 20 | 21 | ```{eval-rst} 22 | .. autosummary:: 23 | :toctree: generated 24 | 25 | op.show_resources 26 | op.show_organisms 27 | op.translate 28 | ``` -------------------------------------------------------------------------------- /docs/api/pl.md: -------------------------------------------------------------------------------- 1 | # Plotting 2 | 3 | ```{eval-rst} 4 | .. module:: decoupler.pl 5 | .. currentmodule:: decoupler 6 | 7 | .. autosummary:: 8 | :toctree: generated 9 | 10 | pl.barplot 11 | pl.dotplot 12 | pl.filter_by_expr 13 | pl.filter_by_prop 14 | pl.filter_samples 15 | pl.leading_edge 16 | pl.network 17 | pl.obsbar 18 | pl.obsm 19 | pl.order_targets 20 | pl.order 21 | pl.source_targets 22 | pl.volcano 23 | ``` -------------------------------------------------------------------------------- /docs/api/pp.md: -------------------------------------------------------------------------------- 1 | # Preprocessing 2 | 3 | ## Data 4 | ```{eval-rst} 5 | .. module:: decoupler.pp 6 | .. currentmodule:: decoupler 7 | 8 | .. autosummary:: 9 | :toctree: generated 10 | 11 | pp.extract 12 | ``` 13 | 14 | ## Network 15 | ```{eval-rst} 16 | .. autosummary:: 17 | :toctree: generated 18 | 19 | pp.read_gmt 20 | pp.prune 21 | pp.adjmat 22 | pp.idxmat 23 | pp.shuffle_net 24 | pp.net_corr 25 | ``` 26 | 27 | ## AnnData 28 | ```{eval-rst} 29 | .. autosummary:: 30 | :toctree: generated 31 | 32 | pp.get_obsm 33 | pp.swap_layer 34 | pp.pseudobulk 35 | pp.filter_samples 36 | pp.filter_by_expr 37 | pp.filter_by_prop 38 | pp.knn 39 | pp.bin_order 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/api/tl.md: -------------------------------------------------------------------------------- 1 | # Tools 2 | 3 | ```{eval-rst} 4 | .. module:: decoupler.tl 5 | .. currentmodule:: decoupler 6 | 7 | .. autosummary:: 8 | :toctree: generated 9 | 10 | tl.rankby_group 11 | tl.rankby_obsm 12 | tl.rankby_order 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | ```{include} ../CHANGELOG.md 2 | 3 | ``` 4 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | import sys 9 | from datetime import datetime 10 | from importlib.metadata import metadata 11 | from pathlib import Path 12 | 13 | HERE = Path(__file__).parent 14 | sys.path.insert(0, str(HERE / "extensions")) 15 | 16 | 17 | # -- Project information ----------------------------------------------------- 18 | 19 | # NOTE: If you installed your project in editable mode, this might be stale. 20 | # If this is the case, reinstall it to refresh the metadata 21 | info = metadata("decoupler") 22 | project_name = info["Name"] 23 | author = info["Author"] 24 | copyright = f"{datetime.now():%Y}, {author}." 25 | version = info["Version"] 26 | urls = dict(pu.split(", ") for pu in info.get_all("Project-URL")) 27 | repository_url = urls["Source"] 28 | 29 | # The full version, including alpha/beta/rc tags 30 | release = info["Version"] 31 | 32 | bibtex_bibfiles = ["references.bib"] 33 | templates_path = ["_templates"] 34 | nitpicky = True # Warn about broken links 35 | needs_sphinx = "4.0" 36 | 37 | html_context = { 38 | "display_github": True, # Integrate GitHub 39 | "github_user": "PauBadiaM", 40 | "github_repo": project_name, 41 | "github_version": "main", 42 | "conf_py_path": "/docs/", 43 | } 44 | 45 | # -- General configuration --------------------------------------------------- 46 | 47 | # Add any Sphinx extension module names here, as strings. 48 | # They can be extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 49 | extensions = [ 50 | "myst_nb", 51 | "sphinx_copybutton", 52 | "sphinx.ext.autodoc", 53 | "sphinx.ext.intersphinx", 54 | "sphinx.ext.autosummary", 55 | "sphinx.ext.napoleon", 56 | "sphinxcontrib.bibtex", 57 | "sphinx_autodoc_typehints", 58 | "sphinx_tabs.tabs", 59 | "sphinx.ext.mathjax", 60 | "IPython.sphinxext.ipython_console_highlighting", 61 | "sphinxext.opengraph", 62 | *[p.stem for p in (HERE / "extensions").glob("*.py")], 63 | ] 64 | 65 | autosummary_generate = True 66 | autodoc_member_order = "groupwise" 67 | default_role = "literal" 68 | napoleon_google_docstring = False 69 | napoleon_numpy_docstring = True 70 | napoleon_include_init_with_doc = False 71 | napoleon_use_rtype = True # having a separate entry generally helps readability 72 | napoleon_use_param = True 73 | myst_heading_anchors = 6 # create anchors for h1-h6 74 | myst_enable_extensions = [ 75 | "amsmath", 76 | "colon_fence", 77 | "deflist", 78 | "dollarmath", 79 | "html_image", 80 | "html_admonition", 81 | ] 82 | myst_url_schemes = ("http", "https", "mailto") 83 | nb_output_stderr = "remove" 84 | nb_execution_mode = "off" 85 | nb_merge_streams = True 86 | typehints_defaults = "braces" 87 | 88 | source_suffix = { 89 | ".rst": "restructuredtext", 90 | ".ipynb": "myst-nb", 91 | ".myst": "myst-nb", 92 | } 93 | 94 | intersphinx_mapping = { 95 | "python": ("https://docs.python.org/3", None), 96 | "anndata": ("https://anndata.readthedocs.io/en/stable/", None), 97 | "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None), 98 | "numpy": ("https://numpy.org/doc/stable/", None), 99 | "matplotlib": ("https://matplotlib.org/stable/", None), 100 | 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 101 | } 102 | 103 | # List of patterns, relative to source directory, that match files and 104 | # directories to ignore when looking for source files. 105 | # This pattern also affects html_static_path and html_extra_path. 106 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"] 107 | 108 | 109 | # -- Options for HTML output ------------------------------------------------- 110 | 111 | # The theme to use for HTML and HTML Help pages. See the documentation for 112 | # a list of builtin themes. 113 | # 114 | html_theme = "sphinx_book_theme" 115 | html_static_path = ["_static"] 116 | html_css_files = ["css/custom.css"] 117 | html_title = project_name 118 | html_logo = '_static/images/logo.png' 119 | html_favicon = '_static/images/logo.png' 120 | 121 | html_theme_options = { 122 | "repository_url": repository_url, 123 | "use_repository_button": True, 124 | "path_to_docs": "docs/", 125 | "navigation_with_keys": False, 126 | } 127 | 128 | pygments_style = "default" 129 | 130 | nitpick_ignore = [ 131 | # If building the documentation fails because of a missing link that is outside your control, 132 | # you can add an exception to this list. 133 | # ("py:class", "igraph.Graph"), 134 | ] 135 | -------------------------------------------------------------------------------- /docs/extensions/typed_returns.py: -------------------------------------------------------------------------------- 1 | # code from https://github.com/theislab/scanpy/blob/master/docs/extensions/typed_returns.py 2 | # with some minor adjustment 3 | from __future__ import annotations 4 | 5 | import re 6 | from collections.abc import Generator, Iterable 7 | 8 | from sphinx.application import Sphinx 9 | from sphinx.ext.napoleon import NumpyDocstring 10 | 11 | 12 | def _process_return(lines: Iterable[str]) -> Generator[str, None, None]: 13 | for line in lines: 14 | if m := re.fullmatch(r"(?P\w+)\s+:\s+(?P[\w.]+)", line): 15 | yield f"-{m['param']} (:class:`~{m['type']}`)" 16 | else: 17 | yield line 18 | 19 | 20 | def _parse_returns_section(self: NumpyDocstring, section: str) -> list[str]: 21 | lines_raw = self._dedent(self._consume_to_next_section()) 22 | if lines_raw[0] == ":": 23 | del lines_raw[0] 24 | lines = self._format_block(":returns: ", list(_process_return(lines_raw))) 25 | if lines and lines[-1]: 26 | lines.append("") 27 | return lines 28 | 29 | 30 | def setup(app: Sphinx): 31 | """Set app.""" 32 | NumpyDocstring._parse_returns_section = _parse_returns_section 33 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | 3 | ``` 4 | 5 | ```{toctree} 6 | :maxdepth: 1 7 | :hidden: true 8 | 9 | api/index 10 | notebooks/index 11 | changelog 12 | references 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/notebooks/bench/index.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | ```{toctree} 3 | :maxdepth: 1 4 | 5 | rna 6 | ``` 7 | -------------------------------------------------------------------------------- /docs/notebooks/bulk/index.md: -------------------------------------------------------------------------------- 1 | # Bulk 2 | ```{toctree} 3 | :maxdepth: 1 4 | 5 | rna 6 | ``` 7 | -------------------------------------------------------------------------------- /docs/notebooks/index.md: -------------------------------------------------------------------------------- 1 | # Tutorials 2 | ```{toctree} 3 | :maxdepth: 1 4 | 5 | example 6 | scell/index 7 | spatial/index 8 | bulk/index 9 | omnipath/index 10 | bench/index 11 | ``` 12 | -------------------------------------------------------------------------------- /docs/notebooks/omnipath/index.md: -------------------------------------------------------------------------------- 1 | # OmniPath 2 | ```{toctree} 3 | :maxdepth: 1 4 | 5 | licenses 6 | orthologs 7 | ``` 8 | -------------------------------------------------------------------------------- /docs/notebooks/scell/index.md: -------------------------------------------------------------------------------- 1 | # Single-cell 2 | ```{toctree} 3 | :maxdepth: 1 4 | 5 | rna_sc 6 | rna_psbk 7 | rna_pstime 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/notebooks/spatial/index.md: -------------------------------------------------------------------------------- 1 | # Spatial 2 | ```{toctree} 3 | :maxdepth: 1 4 | 5 | rna_visium 6 | ``` 7 | -------------------------------------------------------------------------------- /docs/references.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | ```{bibliography} 4 | :cited: 5 | ``` 6 | -------------------------------------------------------------------------------- /src/decoupler/_Method.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | import textwrap 3 | 4 | import pandas as pd 5 | 6 | from decoupler._docs import docs 7 | from decoupler._datatype import DataType 8 | from decoupler.mt._run import _run 9 | 10 | 11 | class MethodMeta: 12 | def __init__( 13 | self, 14 | name: str, 15 | desc: str, 16 | func: Callable, 17 | stype: str, 18 | adj: bool, 19 | weight: bool, 20 | test: bool, 21 | limits: tuple, 22 | reference: str, 23 | ): 24 | self.name = name 25 | self.desc = desc 26 | self.func = func 27 | self.stype = stype 28 | self.adj = adj 29 | self.weight = weight 30 | self.test = test 31 | self.limits = limits 32 | self.reference = reference 33 | 34 | def meta(self) -> pd.DataFrame: 35 | meta = pd.DataFrame([{ 36 | 'name': self.name, 37 | 'desc': self.desc, 38 | 'stype': self.stype, 39 | 'weight': self.weight, 40 | 'test': self.test, 41 | 'limits': self.limits, 42 | 'reference': self.reference 43 | }]) 44 | return meta 45 | 46 | 47 | #@docs.dedent 48 | class Method(MethodMeta): 49 | def __init__( 50 | self, 51 | _method: MethodMeta, 52 | ): 53 | super().__init__( 54 | name=_method.name, 55 | desc=_method.desc, 56 | func=_method.func, 57 | stype=_method.stype, 58 | adj=_method.adj, 59 | weight=_method.weight, 60 | test=_method.test, 61 | limits=_method.limits, 62 | reference=_method.reference, 63 | ) 64 | self._method = _method 65 | self.__doc__ = self.func.__doc__ 66 | 67 | def __call__( 68 | self, 69 | data: DataType, 70 | net: pd.DataFrame, 71 | tmin: int | float = 5, 72 | raw: bool = False, 73 | empty: bool = True, 74 | bsize: int | float = 250_000, 75 | verbose: bool = False, 76 | **kwargs, 77 | ): 78 | return _run( 79 | name=self.name, 80 | func=self.func, 81 | adj=self.adj, 82 | test=self.test, 83 | data=data, 84 | net=net, 85 | tmin=tmin, 86 | raw=raw, 87 | empty=empty, 88 | bsize=bsize, 89 | verbose=verbose, 90 | **kwargs, 91 | ) 92 | 93 | 94 | def _show_methods(methods): 95 | return pd.concat([method.meta() for method in methods]).reset_index(drop=True) 96 | -------------------------------------------------------------------------------- /src/decoupler/_Plotter.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.axes._axes import Axes 3 | from matplotlib.figure import Figure 4 | 5 | from decoupler._docs import docs 6 | 7 | 8 | class Plotter: 9 | @docs.dedent 10 | def __init__( 11 | self, 12 | ax: Axes | None = None, 13 | figsize: tuple | None = (4, 3), 14 | dpi: int = 100, 15 | return_fig: bool = False, 16 | save: str | None = None, 17 | ) -> Figure | None: 18 | """ 19 | Base class for plotters. 20 | 21 | Parameters 22 | ---------- 23 | %(plot)s 24 | """ 25 | # Validate 26 | assert isinstance(ax, Axes) or ax is None, \ 27 | 'ax must be matplotlib.axes._axes.Axes or None' 28 | assert isinstance(figsize, tuple), \ 29 | 'figsize must be tuple' 30 | assert isinstance(dpi, (int, float)) and dpi > 0, \ 31 | 'dpi must be numerical and > 0' 32 | assert isinstance(return_fig, bool), \ 33 | 'return_fig must be bool' 34 | assert isinstance(save, str) or save is None, \ 35 | 'save must be str or None' 36 | self.ax = ax 37 | self.figsize = figsize 38 | self.dpi = dpi 39 | self.return_fig = return_fig 40 | self.save = save 41 | if self.ax is None: 42 | self.fig, self.ax = plt.subplots(1, 1, figsize=self.figsize, dpi=self.dpi, tight_layout=True) 43 | else: 44 | self.fig = self.ax.figure 45 | 46 | def _return(self): 47 | if self.save is not None: 48 | self.fig.savefig(self.save, bbox_inches='tight') 49 | if self.return_fig: 50 | return self.fig 51 | -------------------------------------------------------------------------------- /src/decoupler/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version 2 | 3 | from . import bm, ds, mt, op, pl, pp, tl 4 | 5 | __all__ = ['bm', 'ds', 'mt', 'op', 'pl', 'pp', 'tl'] 6 | 7 | __version__ = version('decoupler') 8 | -------------------------------------------------------------------------------- /src/decoupler/_datatype.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Tuple 2 | 3 | from anndata import AnnData 4 | import pandas as pd 5 | import numpy as np 6 | 7 | 8 | DataType = Union[ 9 | AnnData, 10 | pd.DataFrame, 11 | Tuple[np.ndarray, np.ndarray, np.ndarray], 12 | ] 13 | -------------------------------------------------------------------------------- /src/decoupler/_download.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from tqdm import tqdm 3 | import pandas as pd 4 | import io 5 | 6 | from decoupler._log import _log 7 | 8 | URL_DBS = 'https://omnipathdb.org/annotations?databases=' 9 | URL_INT = 'https://omnipathdb.org/interactions/?genesymbols=1&' 10 | 11 | def _download( 12 | url: str, 13 | verbose: bool = False, 14 | **kwargs, 15 | ) -> pd.DataFrame: 16 | assert isinstance(url, str), 'url must be str' 17 | # Download with progress bar 18 | m = f'Downloading {url}' 19 | _log(m, level='info', verbose=verbose) 20 | chunks = [] 21 | with requests.get(url, stream=True) as r: 22 | r.raise_for_status() 23 | with tqdm(unit='B', unit_scale=True, desc="Progress", disable=not verbose) as pbar: 24 | for chunk in r.iter_content(chunk_size=8192): 25 | if chunk: 26 | chunks.append(chunk) 27 | pbar.update(len(chunk)) 28 | # Read into pandas 29 | data = io.BytesIO(b"".join(chunks)) 30 | df = pd.read_csv(data, **kwargs) 31 | m = f'Download finished' 32 | _log(m, level='info', verbose=verbose) 33 | return df 34 | -------------------------------------------------------------------------------- /src/decoupler/_log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logging.basicConfig( 4 | level=logging.INFO, 5 | format="%(asctime)s | [%(levelname)s] %(message)s", 6 | datefmt="%Y-%m-%d %H:%M:%S" 7 | ) 8 | 9 | def _log( 10 | message: str, 11 | level: str = 'info', 12 | verbose: bool = False 13 | ) -> None: 14 | """ 15 | Log a message with a specified logging level. 16 | 17 | Parameters 18 | ---------- 19 | message 20 | The message to log. 21 | level 22 | The logging level. 23 | verbose 24 | Whether to emit the log. 25 | """ 26 | level = level.lower() 27 | if verbose: 28 | if level == "warn": 29 | logging.warning(message) 30 | elif level == "info": 31 | logging.info(message) 32 | -------------------------------------------------------------------------------- /src/decoupler/_odeps.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import types 3 | from typing import TYPE_CHECKING 4 | 5 | 6 | def _try_import( 7 | name: str 8 | ) -> types.ModuleType | None: 9 | try: 10 | with warnings.catch_warnings(): 11 | warnings.filterwarnings("ignore", category=FutureWarning, module=name) 12 | module = __import__(name, fromlist=[""]) 13 | return module 14 | except ImportError: 15 | return None 16 | 17 | 18 | def _check_import( 19 | module: types.ModuleType 20 | ) -> None: 21 | if module is None: 22 | name = module.__name__ 23 | raise ImportError( 24 | f"{name} is not installed. Please install it using:\n" 25 | f" pip install {name}" 26 | "or install decoupler with full dependencies:\n" 27 | " pip install 'decoupler[full]'" 28 | ) 29 | 30 | 31 | # Handle optional dependencies 32 | ig = _try_import("igraph") 33 | if ig is not None: 34 | if TYPE_CHECKING: 35 | from igraph import Graph 36 | else: 37 | Graph = ig.Graph 38 | else: 39 | if TYPE_CHECKING: 40 | from typing import Any as Graph 41 | else: 42 | Graph = None 43 | 44 | xgboost = _try_import("xgboost") 45 | dcor = _try_import("dcor") 46 | -------------------------------------------------------------------------------- /src/decoupler/bm/__init__.py: -------------------------------------------------------------------------------- 1 | from decoupler.bm._run import benchmark 2 | from decoupler.bm import metric 3 | from decoupler.bm import pl 4 | -------------------------------------------------------------------------------- /src/decoupler/bm/_pp.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import scipy.sparse as sps 6 | from anndata import AnnData 7 | 8 | from decoupler._log import _log 9 | from decoupler.pp.net import prune 10 | 11 | 12 | def _validate_groupby( 13 | obs: pd.DataFrame, 14 | groupby: str | list | None, 15 | runby: str, 16 | ) -> None | list: 17 | assert isinstance(groupby, (str, list)) or groupby is None, \ 18 | 'groupby must be str, list or None' 19 | assert isinstance(runby, str) and runby in ['expr', 'source'], \ 20 | 'runby must be str and either expr or source' 21 | if groupby is not None: 22 | if type(groupby) is str: 23 | groupby = [groupby] 24 | for grp_i in groupby: 25 | if type(grp_i) is str: 26 | grp_i = [grp_i] 27 | # For each group inside each groupby 28 | for grp_j in grp_i: 29 | assert not ('source' == grp_j and runby == 'source'), \ 30 | f'source cannot be in groupby if runby="source"' 31 | # Assert that columns exist in obs 32 | assert grp_j in obs.columns, \ 33 | f'Column name "{grp_j}" must be in adata.obs.columns' 34 | # Assert that column doesn't contain "|" 35 | assert '|' not in grp_j, \ 36 | 'Column names must not contain the \"|\" character' 37 | return groupby 38 | 39 | 40 | def _validate_obs( 41 | obs: pd.DataFrame, 42 | ) -> None: 43 | assert 'source' in obs.columns, \ 44 | 'source must be in adata.obs.columns' 45 | assert 'type_p' in obs.columns, \ 46 | 'type_p must be in adata.obs.columns' 47 | assert pd.api.types.is_numeric_dtype(obs['type_p']), \ 48 | 'type_p must contain numeric values' 49 | assert np.isin(obs['type_p'].sort_values().unique(), np.array([-1, 1])).all(), \ 50 | 'type_p must be -1 or +1' 51 | 52 | 53 | def _filter( 54 | adata: AnnData, 55 | net: pd.DataFrame, 56 | sfilt: bool, 57 | verbose: bool, 58 | ) -> Tuple[AnnData, pd.DataFrame]: 59 | # Remove experiments without sources in net 60 | srcs = net['source'].unique() 61 | prts = set() 62 | msk_exp = np.zeros(adata.obs_names.size, dtype=np.bool_) 63 | for i, src in enumerate(adata.obs['source']): 64 | if isinstance(src, list): 65 | prts.update(src) 66 | if np.isin(src, srcs).any(): 67 | msk_exp[i] = True 68 | elif isinstance(src, str): 69 | prts.add(src) 70 | if src in srcs: 71 | msk_exp[i] = True 72 | n_exp = adata.shape[0] 73 | m = f'benchmark - found {len(prts)} unique perturbed sources across {n_exp} experiments' 74 | _log(m, level='info', verbose=verbose) 75 | r_exp = int((~msk_exp).sum()) 76 | m = f'benchmark - removing {r_exp} experiments out of {n_exp} without sources in net' 77 | _log(m, level='info', verbose=verbose) 78 | adata = adata[msk_exp, :].copy() 79 | # Remove sources without experiments in obs 80 | if sfilt: 81 | msk_src = np.array([s in prts for s in net['source']]) 82 | rsrc = net.loc[~msk_src].groupby('source').size().index.size 83 | m = f'benchmark - removing {rsrc} sources out of {srcs.size} without experiments in obs' 84 | _log(m, level='info', verbose=verbose) 85 | net = net.loc[msk_src, :] 86 | adata.uns['p_sources'] = prts 87 | return adata, net 88 | 89 | 90 | def _sign( 91 | adata: AnnData, 92 | ) -> None: 93 | v_sign = adata.obs['type_p'].values.reshape(-1, 1) 94 | if sps.issparse(adata.X): 95 | adata.layers['tmp'] = adata.X.multiply(v_sign).tocsr() 96 | else: 97 | adata.layers['tmp'] = adata.X * v_sign 98 | 99 | 100 | def _validate_bool( 101 | y_true: np.ndarray, 102 | y_score: np.ndarray, 103 | ) -> None: 104 | assert isinstance(y_true, np.ndarray), 'y_true must be numpy.ndarray' 105 | assert isinstance(y_score, np.ndarray), 'y_score must be numpy.ndarray' 106 | unq = np.sort(np.unique(y_true)) 107 | m = 'y_true must contain two binary classes, 0 and 1' 108 | assert unq.size <= 2, m 109 | lbl = np.array([0, 1]) 110 | assert np.all(unq == lbl), m 111 | assert y_true.size == y_score.size, \ 112 | 'y_true and y_score must have the same size' 113 | -------------------------------------------------------------------------------- /src/decoupler/bm/metric/__init__.py: -------------------------------------------------------------------------------- 1 | from decoupler.bm.metric._auc import auc 2 | from decoupler.bm.metric._fscore import fscore 3 | from decoupler.bm.metric._qrank import qrank 4 | from decoupler.bm.metric._hmean import hmean 5 | 6 | dict_metric = { 7 | 'auc': auc, 8 | 'fscore': fscore, 9 | 'qrank': qrank, 10 | } 11 | -------------------------------------------------------------------------------- /src/decoupler/bm/metric/_auc.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | 5 | from decoupler.bm._pp import _validate_bool 6 | 7 | 8 | def _binary_clf_curve( 9 | y_true: np.ndarray, 10 | y_score: np.ndarray, 11 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 12 | # Sort scores 13 | idx = np.flip(np.argsort(y_score)) 14 | y_score = y_score[idx] 15 | y_true = y_true[idx] 16 | # Find unique value idxs 17 | idx = np.where(np.diff(y_score))[0] 18 | # Append a value for the end of the curve 19 | idx = np.append(idx, y_true.size - 1) 20 | # Acucmulate TP with decreasing threshold 21 | tps = np.cumsum(y_true)[idx] 22 | fps = 1 + idx - tps 23 | return fps, tps, y_score[idx] 24 | 25 | 26 | def auroc( 27 | y_true: np.ndarray, 28 | y_score: np.ndarray, 29 | ) -> float: 30 | _validate_bool(y_true=y_true, y_score=y_score) 31 | # Compute binary curve 32 | fps, tps, thr = _binary_clf_curve(y_true, y_score) 33 | # Add limits 34 | fps = np.append(0., fps) 35 | tps = np.append(0., tps) 36 | thr = np.append(thr[0] + 1., thr) 37 | # Compute ratios 38 | fpr = fps / fps[-1] 39 | tpr = tps / tps[-1] 40 | # Compute auc 41 | dx = np.diff(np.ascontiguousarray(fpr)) 42 | # Get direction slope 43 | if np.all(dx <= 0): 44 | d = -1. 45 | else: 46 | d = 1. 47 | # Compute area 48 | ret = np.sum((dx * (tpr[1:] + tpr[:-1]) / 2.0)) 49 | auc = d * ret 50 | return auc 51 | 52 | 53 | def auprc( 54 | y_true: np.ndarray, 55 | y_score: np.ndarray, 56 | pi0: float = 0.5 57 | ) -> float: 58 | _validate_bool(y_true=y_true, y_score=y_score) 59 | assert isinstance(pi0, (int, float)) and 0. <= pi0 <= 1., \ 60 | 'pi0 must be numeric and between 0 and 1' 61 | # Compute binary curve 62 | fps, tps, thr = _binary_clf_curve(y_true, y_score) 63 | # Compute prc 64 | ps = tps + fps 65 | msk = ps != 0 66 | # Siblini W., Fréry J., He-Guelton L., Oblé F., Wang YQ. (2020) Master 67 | # Your Metrics with Calibration. In: Berthold M., Feelders A., Krempl G. 68 | # (eds) Advances in Intelligent Data Analysis XVIII. IDA 2020. Lecture 69 | # Notes in Computer Science, vol 12080. Springer, Cham 70 | pi = np.sum(y_true) / y_true.size 71 | ratio = pi * (1 - pi0) / (pi0 * (1 - pi)) 72 | prc = tps[msk] / (tps[msk] + ratio * fps[msk]) 73 | # Compute rcl 74 | rcl = tps / tps[-1] 75 | # Flip and add limits 76 | prc = np.append(np.flip(prc), 1) 77 | rcl = np.append(np.flip(rcl), 0) 78 | thr = np.flip(thr) 79 | dx = np.diff(np.ascontiguousarray(rcl)) 80 | auc = -np.sum(dx * prc[:-1]) 81 | return auc 82 | 83 | 84 | def auc( 85 | y_true: np.ndarray, 86 | y_score: np.ndarray, 87 | pi0: float = 0.5, 88 | ) -> Tuple[float, float]: 89 | """ 90 | Area Under the Curve. 91 | """ 92 | # Normalize to make comparable 93 | norm = np.nanmax(np.abs(y_score), axis=1) 94 | msk = norm == 0. 95 | norm[msk] = 1. 96 | y_score = y_score / norm.reshape(-1, 1) 97 | assert ((-1. <= y_score) & (y_score <= 1.)).all() 98 | # Flatten and remove nans 99 | y_true, y_score = y_true.ravel(), y_score.ravel() 100 | msk_nan = ~np.isnan(y_score) 101 | y_true, y_score = y_true[msk_nan], y_score[msk_nan] 102 | auc_roc = auroc(y_true=y_true, y_score=y_score) 103 | auc_prc = auprc(y_true=y_true, y_score=y_score, pi0=pi0) 104 | return auc_roc, auc_prc 105 | 106 | auc.scores = ['auroc', 'auprc'] 107 | -------------------------------------------------------------------------------- /src/decoupler/bm/metric/_fscore.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | 5 | from decoupler.bm._pp import _validate_bool 6 | 7 | 8 | def fscore( 9 | y_true: np.ndarray, 10 | y_score: np.ndarray, 11 | ) -> Tuple[float, float, float]: 12 | """ 13 | F-beta score 14 | """ 15 | # Validate 16 | _validate_bool(y_true=y_true, y_score=y_score) 17 | assert y_score.dtype == np.bool_, \ 18 | 'y_score must be bool numpy.ndarray' 19 | y_true = y_true.astype(np.bool_) 20 | # Compute 21 | tp = np.sum(y_true * y_score) 22 | fp = np.sum((~y_true) * y_score) 23 | fn = np.sum(y_true * (~y_score)) 24 | if tp > 0: 25 | prc = tp / (tp + fp) 26 | rcl = tp / (tp + fn) 27 | else: 28 | prc = 0. 29 | rcl = 0. 30 | return prc, rcl 31 | 32 | fscore.scores = ['precision', 'recall'] 33 | -------------------------------------------------------------------------------- /src/decoupler/bm/metric/_hmean.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from decoupler._docs import docs 5 | from decoupler.bm.pl._format import _format 6 | 7 | 8 | def _hmean( 9 | x: float | int, 10 | y: float | int, 11 | beta: float | int = 1, 12 | ) -> float: 13 | assert isinstance(beta, (int, float)) and 0 < beta, \ 14 | 'beta must be numeric and > 0' 15 | h = np.zeros(len(x)) 16 | msk = (x != 0.) & (y != 0.) 17 | h[msk] = (1 + beta**2) * (x[msk] * y[msk]) / ((x[msk] * beta**2) + y[msk]) 18 | return h 19 | 20 | 21 | @docs.dedent 22 | def hmean( 23 | df: pd.DataFrame, 24 | metrics: str | list = ['auc', 'fscore', 'qrank'], 25 | beta: int | float = 0.5, 26 | ) -> pd.DataFrame: 27 | """ 28 | Computes the harmonic mean between two metric statistics. 29 | 30 | Parameters 31 | ---------- 32 | %(df)s 33 | metrics 34 | Metrics which to compute the harmonic mean between their own statistics. 35 | beta 36 | Controls the balance between statistics, where beta > 1 favors the first one (for example recall), 37 | beta < 1 the other one (for example precision), and beta = 1 gives equal weight to both. 38 | 39 | Returns 40 | ------- 41 | Dataframe containing the harmonic mean per metric. 42 | """ 43 | # Validate 44 | assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame' 45 | assert isinstance(metrics, (str, list)), 'metrics must be str or list' 46 | if isinstance(metrics, str): 47 | metrics = [metrics] 48 | # Run 49 | d_metrics = { 50 | 'auc': { 51 | 'name': 'H(auroc, auprc)', 52 | 'cols': ['auprc', 'auroc'], 53 | }, 54 | 'fscore': { 55 | 'name': 'F-score', 56 | 'cols': ['precision', 'recall'], 57 | }, 58 | 'qrank': { 59 | 'name': 'H(1-qrank, -log10(pval))', 60 | 'cols': ['-log10(pval)', '1-qrank'], 61 | }, 62 | } 63 | hdf = [] 64 | h_cols = [] 65 | for i, metric in enumerate(metrics): 66 | # Format 67 | cols = d_metrics[metric]['cols'] 68 | tmp = _format(df=df, cols=cols) 69 | # Compute harmonic mean 70 | name = d_metrics[metric]['name'] 71 | tmp[name] = _hmean(tmp[cols[0]], tmp[cols[1]], beta=beta) 72 | if i == 0: 73 | hdf.append(tmp) 74 | else: 75 | hdf.append(tmp[cols + [name]]) 76 | h_cols.append(name) 77 | hdf = pd.concat(hdf, axis=1) 78 | # Mean qrank (final score) 79 | hdf['score'] = hdf[h_cols].mean(axis=1, numeric_only=True) 80 | hdf['score'] = (hdf['score'] - hdf['score'].min()) / (hdf['score'].max() - hdf['score'].min()) 81 | return hdf 82 | -------------------------------------------------------------------------------- /src/decoupler/bm/metric/_qrank.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | import scipy.stats as sts 5 | 6 | from decoupler.bm._pp import _validate_bool 7 | 8 | 9 | def qrank( 10 | y_true: np.ndarray, 11 | y_score: np.ndarray, 12 | ) -> Tuple[float, float]: 13 | """ 14 | 1 - quantile normalized rank 15 | """ 16 | _validate_bool(y_true=y_true, y_score=y_score) 17 | y_rank = sts.rankdata(y_score, axis=1, nan_policy='omit', method='average') 18 | y_rank = y_rank / np.sum(~np.isnan(y_rank), axis=1).reshape(-1, 1) 19 | msk = y_true.astype(np.bool_) 20 | score = y_rank[msk] 21 | rest = y_rank[~msk] 22 | _, pval = sts.ranksums(score, rest, alternative='greater') 23 | score = np.nanmean(score) 24 | return score, -np.log10(pval) 25 | 26 | qrank.scores = ['1-qrank', '-log10(pval)'] 27 | -------------------------------------------------------------------------------- /src/decoupler/bm/pl/__init__.py: -------------------------------------------------------------------------------- 1 | from decoupler.bm.pl._auc import auc 2 | from decoupler.bm.pl._fscore import fscore 3 | from decoupler.bm.pl._qrank import qrank 4 | from decoupler.bm.pl._bar import bar 5 | from decoupler.bm.pl._summary import summary 6 | -------------------------------------------------------------------------------- /src/decoupler/bm/pl/_auc.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | from matplotlib.figure import Figure 4 | 5 | from decoupler._docs import docs 6 | from decoupler._Plotter import Plotter 7 | from decoupler.bm.pl._format import _format 8 | 9 | 10 | @docs.dedent 11 | def auc( 12 | df: pd.DataFrame, 13 | hue: str | None = None, 14 | palette: str = 'tab20', 15 | thr_auroc: float = 0.5, 16 | thr_auprc: float = 0.5, 17 | **kwargs 18 | ) -> None | Figure: 19 | """ 20 | Plot auroc and auprc. 21 | 22 | x-axis represent the auroc calculated by ranking all obtained enrichment scores, calculating different class thresholds 23 | and finally obtaining the area under the curve. 24 | The higher value the better performance is. 25 | 26 | y-axis represent the auprc calculated by ranking all obtained enrichment scores, calculating different class thresholds 27 | and finally obtaining the area under the curve. 28 | The higher value the better performance is. 29 | 30 | Parameters 31 | ---------- 32 | %(df)s 33 | %(hue)s 34 | %(palette)s 35 | thr_auroc 36 | Dashed line to indicate baseline of auroc. 37 | thr_auprc 38 | Dashed line to indicate baseline of auprc. 39 | %(plot)s 40 | """ 41 | # Validate 42 | assert isinstance(hue, str) or hue is None, 'hue must be str or None' 43 | assert isinstance(thr_auroc, float) and 0. <= thr_auroc <= 1., \ 44 | 'thr_auroc must be float and between 0 and 1' 45 | assert isinstance(thr_auprc, float) and 0. <= thr_auprc <= 1., \ 46 | 'thr_auprc must be float and between 0 and 1' 47 | # Format 48 | tmp = _format(df=df, cols=['auroc', 'auprc']) 49 | # Instance 50 | bp = Plotter(**kwargs) 51 | # Plot 52 | if hue is not None: 53 | sns.scatterplot( 54 | data=tmp, 55 | x='auroc', 56 | y='auprc', 57 | hue=hue, 58 | ax=bp.ax, 59 | palette=palette, 60 | ) 61 | else: 62 | sns.scatterplot( 63 | data=tmp, 64 | x='auroc', 65 | y='auprc', 66 | ax=bp.ax, 67 | ) 68 | bp.ax.axvline(x=thr_auroc, ls='--', c='black', zorder=0) 69 | bp.ax.axhline(y=thr_auprc, ls='--', c='black', zorder=0) 70 | if hue is not None: 71 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue) 72 | return bp._return() 73 | -------------------------------------------------------------------------------- /src/decoupler/bm/pl/_bar.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | from matplotlib.figure import Figure 4 | 5 | from decoupler._docs import docs 6 | from decoupler._Plotter import Plotter 7 | from decoupler.bm.pl._format import _format 8 | 9 | 10 | @docs.dedent 11 | def bar( 12 | df: pd.DataFrame, 13 | x: str, 14 | y: str, 15 | hue: str | None = None, 16 | palette: str = 'tab20', 17 | **kwargs 18 | ) -> None | Figure: 19 | """ 20 | Plot the harmonic mean between two metric statistics as a barplot. 21 | 22 | x-axis represent the harmonic mean between metric statistics. 23 | 24 | y-axis represent a grouping variable. 25 | 26 | Parameters 27 | ---------- 28 | %(df)s 29 | x 30 | Continous variable to plot on x axis. 31 | %(y)s 32 | %(hue)s 33 | %(palette)s 34 | %(plot)s 35 | """ 36 | # Validate 37 | assert isinstance(x, str), 'x must be str' 38 | assert isinstance(y, str), 'y must be str' 39 | assert isinstance(hue, str) or hue is None, 'hue must be str or None' 40 | # Instance 41 | bp = Plotter(**kwargs) 42 | # Plot 43 | order = ( 44 | df 45 | .groupby(y)[x] 46 | .mean() 47 | .sort_values(ascending=False) 48 | .index 49 | ) 50 | args = dict() 51 | if hue is not None: 52 | args['hue'] = hue 53 | args['palette'] = palette 54 | sns.barplot( 55 | data=df, 56 | y=y, 57 | x=x, 58 | order=order, 59 | **args 60 | ) 61 | if hue is not None and hue != y: 62 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue) 63 | return bp._return() 64 | -------------------------------------------------------------------------------- /src/decoupler/bm/pl/_format.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | 5 | def _format( 6 | df: pd.DataFrame, 7 | cols: list, 8 | ) -> pd.DataFrame: 9 | # Validate 10 | assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame' 11 | assert isinstance(cols, list), 'cols must be list' 12 | assert 'metric' in df.columns, 'metric must be in df.columns' 13 | assert 'score' in df.columns, 'score must be in df.columns' 14 | # Extract 15 | tmp = df[df['metric'].isin(cols)].copy() 16 | assert tmp.shape[0] > 0, 'cols must be in df["metric"]' 17 | # Add small variations so not same number 18 | rng = np.random.default_rng(seed=0) 19 | tmp.loc[:, 'score'] = tmp.loc[:, 'score'] + rng.normal(loc=0, scale=2.2e-16, size=tmp.shape[0]) 20 | tmp.loc[:, 'score'] = tmp.loc[:, 'score'].clip(lower=0) 21 | # Transform 22 | grp_cols = ['net', 'groupby', 'group', 'source', 'method'] 23 | grp_cols = [c for c in grp_cols if c in df.columns] 24 | tmp = ( 25 | tmp 26 | .pivot(index=grp_cols, columns='metric', values='score') 27 | .reset_index() 28 | ).dropna(axis=1) 29 | if np.all(np.isin(['groupby', 'group'], tmp.columns)): 30 | tmp = ( 31 | tmp 32 | .pivot(index=['source', 'method'] + cols, columns='groupby', values='group') 33 | .reset_index() 34 | ) 35 | # Remove names 36 | tmp.index.name = None 37 | tmp.columns.name = None 38 | return tmp 39 | -------------------------------------------------------------------------------- /src/decoupler/bm/pl/_fscore.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | from matplotlib.figure import Figure 4 | 5 | from decoupler._docs import docs 6 | from decoupler._Plotter import Plotter 7 | from decoupler.bm.pl._format import _format 8 | 9 | 10 | @docs.dedent 11 | def fscore( 12 | df: pd.DataFrame, 13 | hue: str | None = None, 14 | palette: str = 'tab20', 15 | **kwargs 16 | ) -> None | Figure: 17 | """ 18 | Plot precision and recall as scatterplot. 19 | 20 | x-axis represent the recall of correctly predicted sources after filtering by significance. 21 | The higher value the better performance is. 22 | 23 | x-axis represent the precision of correctly predicted sources after filtering by significance. 24 | The higher value the better performance is. 25 | 26 | Parameters 27 | ---------- 28 | %(df)s 29 | %(hue)s 30 | %(palette)s 31 | %(plot)s 32 | """ 33 | # Validate 34 | assert isinstance(hue, str) or hue is None, 'hue must be str or None' 35 | # Format 36 | tmp = _format(df=df, cols=['recall', 'precision']) 37 | # Instance 38 | bp = Plotter(**kwargs) 39 | # Plot 40 | args = dict() 41 | if hue is not None: 42 | args['hue'] = hue 43 | args['palette'] = palette 44 | sns.scatterplot( 45 | data=tmp, 46 | x='recall', 47 | y='precision', 48 | ax=bp.ax, 49 | **args 50 | ) 51 | if hue is not None: 52 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue) 53 | return bp._return() 54 | -------------------------------------------------------------------------------- /src/decoupler/bm/pl/_qrank.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import seaborn as sns 4 | from matplotlib.figure import Figure 5 | 6 | from decoupler._docs import docs 7 | from decoupler._Plotter import Plotter 8 | from decoupler.bm.pl._format import _format 9 | 10 | 11 | @docs.dedent 12 | def qrank( 13 | df: pd.DataFrame, 14 | hue: str | None = None, 15 | palette: str = 'tab20', 16 | thr_rank: float = 0.5, 17 | thr_pval: float = 0.05, 18 | **kwargs 19 | ) -> None | Figure: 20 | """ 21 | Plot 1-qrank and p-value. 22 | 23 | x-axis represent the one minus the quantile normalized ranks for the sources that belong to the ground truth. 24 | The closer to 1 the better performance is. 25 | 26 | y-axis represents the p-value (-log10) obtained after performing a Ranksums test between the quantile normalized 27 | ranks of the sources that belong to the ground truth against the sources that do not. 28 | The higher value the better performance is. 29 | 30 | Parameters 31 | ---------- 32 | %(df)s 33 | %(hue)s 34 | %(palette)s 35 | thr_rank 36 | Dashed line to indicate baseline of ranks. 37 | thr_pval 38 | Dashed line to indicate baseline of p-values. 39 | %(plot)s 40 | """ 41 | # Validate 42 | assert isinstance(hue, str) or hue is None, 'hue must be str or None' 43 | assert isinstance(thr_rank, float) and 0. <= thr_rank <= 1., \ 44 | 'thr_rank must be float and between 0 and 1' 45 | assert isinstance(thr_pval, float) and 0. <= thr_pval <= 1., \ 46 | 'thr_pval must be float and between 0 and 1' 47 | # Format 48 | tmp = _format(df=df, cols=['1-qrank', '-log10(pval)']) 49 | # Instance 50 | bp = Plotter(**kwargs) 51 | # Plot 52 | if hue is not None: 53 | sns.scatterplot( 54 | data=tmp, 55 | x='1-qrank', 56 | y='-log10(pval)', 57 | hue=hue, 58 | ax=bp.ax, 59 | palette=palette, 60 | ) 61 | else: 62 | sns.scatterplot( 63 | data=tmp, 64 | x='1-qrank', 65 | y='-log10(pval)', 66 | ax=bp.ax, 67 | ) 68 | bp.ax.set_xlim(0, 1) 69 | bp.ax.axvline(x=thr_rank, ls='--', c='black', zorder=0) 70 | bp.ax.axhline(y=-np.log10(thr_pval), ls='--', c='black', zorder=0) 71 | bp.ax.set_ylabel(r'$\log_{10}$(pval)') 72 | if hue is not None: 73 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue) 74 | return bp._return() 75 | -------------------------------------------------------------------------------- /src/decoupler/ds/__init__.py: -------------------------------------------------------------------------------- 1 | from decoupler.ds._bulk import hsctgfb, knocktf 2 | from decoupler.ds._scell import pbmc3k, covid5k, erygast1k 3 | from decoupler.ds._spatial import msvisium 4 | from decoupler.ds._toy import toy, toy_bench 5 | from decoupler.ds._utils import ensmbl_to_symbol 6 | -------------------------------------------------------------------------------- /src/decoupler/ds/_bulk.py: -------------------------------------------------------------------------------- 1 | from anndata import AnnData 2 | 3 | from decoupler._docs import docs 4 | from decoupler._log import _log 5 | from decoupler._download import _download 6 | 7 | 8 | @docs.dedent 9 | def hsctgfb( 10 | verbose: bool = False, 11 | ) -> AnnData: 12 | """ 13 | Downloads RNA-seq bulk data consisting of 6 samples of hepatic stellate cells 14 | (HSC) where three of them were activated by the cytokine 15 | Transforming growth factor (TGF-β) :cite:`hsc_tgfb`. 16 | 17 | Parameters 18 | ---------- 19 | %(verbose)s 20 | 21 | Returns 22 | ------- 23 | AnnData object. 24 | """ 25 | # Download 26 | url = ( 27 | 'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE151251&format=file&' 28 | 'file=GSE151251%5FHSCs%5FCtrl%2Evs%2EHSCs%5FTGFb%2Ecounts%2Etsv%2Egz' 29 | ) 30 | adata = _download(url, compression='gzip', sep='\t', verbose=verbose) 31 | # Transform to AnnData 32 | adata = adata.drop_duplicates('GeneName').set_index('GeneName').iloc[:, 5:].T 33 | adata.columns.name = None 34 | adata = AnnData(adata) 35 | adata.X = adata.X.astype(float) 36 | # Format obs 37 | adata.obs['condition'] = ['control' if '-Ctrl' in sample_id else 'treatment' for sample_id in adata.obs.index] 38 | adata.obs['sample_id'] = [sample_id.split('_')[0] for sample_id in adata.obs.index] 39 | adata.obs['condition'] = adata.obs['condition'].astype('category') 40 | adata.obs['sample_id'] = adata.obs['sample_id'].astype('category') 41 | m = f'generated AnnData with shape={adata.shape}' 42 | _log(m, level='info', verbose=verbose) 43 | return adata 44 | 45 | 46 | @docs.dedent 47 | def knocktf( 48 | thr_fc: int | float | None = -1, 49 | verbose: bool = False, 50 | ) -> AnnData: 51 | """ 52 | Downloads gene contrast statistics from KnockTF :cite:`knocktf`, 53 | a large collection of transcription factor (TF) RNA-seq 54 | perturbation experiments. 55 | 56 | The values in ``adata.X`` represent the log2FCs of genes between 57 | perturbed and unperturbed samples. 58 | 59 | It also downloads all metadata associated to each perturbation 60 | experiment, such as which TF was perturbed, or in which tissue. 61 | 62 | Parameters 63 | ---------- 64 | %(verbose)s 65 | 66 | Returns 67 | ------- 68 | AnnData object. 69 | """ 70 | assert isinstance(thr_fc, (int, float)) or thr_fc is None, \ 71 | 'thr_fc must be numeric or None' 72 | # Download 73 | url = ( 74 | 'https://zenodo.org/record/7035528/' 75 | 'files/knockTF_expr.csv?download=1' 76 | ) 77 | adata = _download(url, sep=',', index_col=0, verbose=verbose) 78 | url = ( 79 | 'https://zenodo.org/record/7035528/' 80 | 'files/knockTF_meta.csv?download=1' 81 | ) 82 | obs = _download(url, sep=',', index_col=0, verbose=verbose) 83 | obs = obs.rename(columns={'TF': 'source'}).assign(type_p=-1) 84 | # Make anndata 85 | adata = AnnData(X=adata, obs=obs) 86 | # Filter by thr_fc 87 | if thr_fc is not None: 88 | msk = adata.obs['logFC'] < thr_fc 89 | prc_keep = (msk.sum()/msk.size) * 100 90 | m = f'filtering AnnData for thr_fc={thr_fc}, will keep {prc_keep:.2f}% of observations' 91 | _log(m, level='info', verbose=verbose) 92 | adata = adata[msk, :].copy() 93 | m = f'generated AnnData with shape={adata.shape}' 94 | _log(m, level='info', verbose=verbose) 95 | return adata 96 | -------------------------------------------------------------------------------- /src/decoupler/ds/_spatial.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import io 3 | import gzip 4 | import json 5 | 6 | import pandas as pd 7 | import scipy.io as sio 8 | from matplotlib.image import imread 9 | from anndata import AnnData 10 | 11 | from decoupler._docs import docs 12 | from decoupler._log import _log 13 | 14 | 15 | @docs.dedent 16 | def msvisium( 17 | verbose: bool = False, 18 | ) -> AnnData: 19 | """ 20 | Downloads a spatial RNA-seq (Visium) human sample with multiple sclerosis 21 | displaying a chronic active lesion in the white matter of the brain :cite:`msvisium`. 22 | 23 | Parameters 24 | ---------- 25 | %(verbose)s 26 | 27 | Returns 28 | ------- 29 | AnnData object. 30 | """ 31 | url = ( 32 | 'https://www.ncbi.nlm.nih.gov/geo/download/' 33 | '?acc=GSM8563708&format=file&file=GSM8563708%5FMS377T%5F' 34 | ) 35 | # Download mat 36 | response = requests.get(url + 'matrix%2Emtx%2Egz') 37 | with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: 38 | X = sio.mmread(f).T.tocsr().rint() 39 | X.eliminate_zeros() 40 | var = pd.read_csv( 41 | url + 'features%2Etsv%2Egz', 42 | compression='gzip', 43 | sep='\t', 44 | header=None, 45 | usecols=[1], 46 | index_col=0, 47 | ) 48 | var.index.name = None 49 | # Remove repeated genes 50 | msk_var = ~(var.index.duplicated(keep='first')) 51 | var = var.loc[msk_var] 52 | X = X[:, msk_var] 53 | obs = pd.read_csv( 54 | url + 'barcodes%2Etsv%2Egz', 55 | compression='gzip', 56 | sep='\t', 57 | header=None, 58 | usecols=[0], 59 | index_col=0, 60 | ) 61 | obs.index.name = None 62 | # Create anndata 63 | adata = AnnData(X=X, obs=obs, var=var) 64 | # Add images 65 | adata.uns['spatial'] = dict() 66 | adata.uns['spatial']['MS377T'] = dict() 67 | adata.uns['spatial']['MS377T']['images'] = dict() 68 | response = requests.get(url + 'scalefactors%5Fjson%2Ejson%2Egz') 69 | with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: 70 | adata.uns['spatial']['MS377T']['scalefactors'] = json.load(f) 71 | response = requests.get(url + 'tissue%5Fhires%5Fimage%2Epng%2Egz') 72 | with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: 73 | adata.uns['spatial']['MS377T']['images']['hires'] = imread(f) 74 | response = requests.get(url + 'tissue%5Flowres%5Fimage%2Epng%2Egz') 75 | with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: 76 | adata.uns['spatial']['MS377T']['images']['lowres'] = imread(f) 77 | # Add coordinates 78 | coords = pd.read_csv( 79 | url + 'tissue%5Fpositions%5Flist%2Ecsv%2Egz', 80 | compression='gzip', 81 | index_col=0, 82 | ) 83 | adata.obs = adata.obs.join(coords, how='left') 84 | adata.obsm['spatial'] = adata.obs[['pxl_col_in_fullres', 'pxl_row_in_fullres']].values 85 | adata.obs.drop( 86 | columns=['in_tissue', 'pxl_row_in_fullres', 'pxl_col_in_fullres'], 87 | inplace=True, 88 | ) 89 | # Add metadata 90 | url_meta = ( 91 | 'https://cells-test.gi.ucsc.edu/ms-subcortical-lesions/' 92 | 'visium-ms377T/meta.tsv' 93 | ) 94 | meta = pd.read_csv(url_meta, sep='\t', usecols=[0, 4], index_col=0) 95 | adata = adata[meta.index, :].copy() 96 | adata.obs = adata.obs.join(meta, how='right') 97 | adata.obs['niches'] = adata.obs['niches'].astype('category') 98 | adata.obs.index.name = None 99 | # Filter vars 100 | msk_var = adata.X.getnnz(axis=0) > 9 101 | adata = adata[:, msk_var].copy() 102 | m = f'generated AnnData with shape={adata.shape}' 103 | _log(m, level='info', verbose=verbose) 104 | return adata 105 | -------------------------------------------------------------------------------- /src/decoupler/ds/_utils.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | import io 4 | 5 | import pandas as pd 6 | 7 | 8 | def ensmbl_to_symbol( 9 | genes: list, 10 | organism: str, 11 | ) -> list: 12 | """ 13 | Transforms ensembl gene ids to gene symbols. 14 | 15 | Parameters 16 | ---------- 17 | genes 18 | List of ensembl gene ids to transform. 19 | 20 | Returns 21 | ------- 22 | List of gene symbols 23 | """ 24 | url = ( 25 | 'http://www.ensembl.org/biomart/martservice?query=' 26 | '' 30 | ) 31 | # Organisms 32 | # hsapiens_gene_ensembl 33 | # mmusculus_gene_ensembl 34 | # dmelanogaster_gene_ensembl 35 | # rnorvegicus_gene_ensembl 36 | # drerio_gene_ensembl 37 | # celegans_gene_ensembl 38 | # scerevisiae_gene_ensembl 39 | # Validate 40 | assert isinstance(genes, list), 'genes must be list' 41 | assert isinstance(organism, str), f'organism must be str' 42 | # Try different mirrors 43 | response = requests.get(url.format(miror='www', organism=organism)) 44 | if any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']): 45 | response = requests.get(url.format(miror='useast', organism=organism)) 46 | if any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']): 47 | response = requests.get(url.format(miror='asia', organism=organism)) 48 | if not any(msg in response.text for msg in ['Service unavailable', 'Gateway Time-out']): 49 | eids = pd.read_csv(io.StringIO(response.text), sep='\t', header=None, index_col=0)[1].to_dict() 50 | elif organism in ['hsapiens_gene_ensembl', 'mmusculus_gene_ensembl']: 51 | url = f'https://zenodo.org/records/15551885/files/{organism}.csv.gz?download=1' 52 | eids = pd.read_csv(url, index_col=0, compression='gzip')['symbol'].to_dict() 53 | else: 54 | assert False, 'ensembl servers are down, try again later' 55 | return [eids[g] if g in eids else None for g in genes] 56 | -------------------------------------------------------------------------------- /src/decoupler/mt/__init__.py: -------------------------------------------------------------------------------- 1 | from decoupler._Method import _show_methods 2 | from decoupler.mt._methods import aucell 3 | from decoupler.mt._methods import gsea 4 | from decoupler.mt._methods import gsva 5 | from decoupler.mt._methods import mdt 6 | from decoupler.mt._methods import mlm 7 | from decoupler.mt._methods import ora 8 | from decoupler.mt._methods import udt 9 | from decoupler.mt._methods import ulm 10 | from decoupler.mt._methods import viper 11 | from decoupler.mt._methods import waggr 12 | from decoupler.mt._methods import zscore 13 | from decoupler.mt._methods import _methods 14 | from decoupler.mt._decouple import decouple 15 | from decoupler.mt._consensus import consensus 16 | 17 | def show() -> None: 18 | """Displays the methods available in decoupler""" 19 | return _show_methods(_methods) 20 | -------------------------------------------------------------------------------- /src/decoupler/mt/_aucell.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | import scipy.stats as sts 5 | import scipy.sparse as sps 6 | from tqdm.auto import tqdm 7 | import numba as nb 8 | 9 | from decoupler._docs import docs 10 | from decoupler._log import _log 11 | from decoupler._Method import MethodMeta, Method 12 | from decoupler.pp.net import _getset 13 | 14 | 15 | @nb.njit(parallel=True, cache=True) 16 | def _auc( 17 | row: np.ndarray, 18 | cnct: np.ndarray, 19 | starts: np.ndarray, 20 | offsets: np.ndarray, 21 | n_up: int, 22 | nsrc: int, 23 | ) -> np.ndarray: 24 | # Empty acts 25 | es = np.zeros(nsrc) 26 | # For each feature set 27 | for j in nb.prange(nsrc): 28 | # Extract feature set 29 | fset = _getset(cnct, starts, offsets, j) 30 | # Compute max AUC for fset 31 | x_th = np.arange(1, stop=fset.shape[0] + 1) 32 | x_th = x_th[x_th < n_up] 33 | max_auc = np.sum(np.diff(np.append(x_th, n_up)) * x_th) 34 | # Compute AUC 35 | x = row[fset] 36 | x = np.sort(x[x <= n_up]) 37 | y = np.arange(x.shape[0]) + 1 38 | x = np.append(x, n_up) 39 | # Update acts matrix 40 | es[j] = np.sum(np.diff(x) * y) / max_auc 41 | return es 42 | 43 | 44 | def _validate_n_up( 45 | nvar: int, 46 | n_up: int | float | None = None, 47 | ) -> int: 48 | assert isinstance(n_up, (int, float)) or n_up is None, 'n_up must be numerical or None' 49 | if n_up is None: 50 | n_up = np.ceil(0.05 * nvar) 51 | n_up = int(np.clip(n_up, a_min=2, a_max=nvar)) 52 | else: 53 | n_up = int(np.ceil(n_up)) 54 | assert nvar >= n_up > 1, f'For nvar={nvar}, n_up={n_up} must be between 1 and {nvar}' 55 | return n_up 56 | 57 | 58 | @docs.dedent 59 | def _func_aucell( 60 | mat: np.ndarray, 61 | cnct: np.ndarray, 62 | starts: np.ndarray, 63 | offsets: np.ndarray, 64 | n_up: int | float | None = None, 65 | verbose: bool = False, 66 | ) -> Tuple[np.ndarray, None]: 67 | r""" 68 | Area Under the Curve for set enrichment within single cells (AUCell) :cite:`aucell`. 69 | 70 | Given a ranked list of features per observation, AUCell calculates the AUC by measuring how early the features in 71 | the set appear in this ranking. Specifically, the enrichment score :math:`ES` is: 72 | 73 | .. math:: 74 | 75 | {ES}_{i, F} = \int_0^1 {RecoveryCurve}_{i, F}(r_i) \, dr 76 | 77 | Where: 78 | 79 | - :math:`i` is the obervation 80 | - :math:`F` is the feature set 81 | - :math:`{RecoveryCurve}_{i, F}(r_i)` is the proportion of features from :math:`F` recovered in the top :math:`r_i`-fraction of the ranked list for observation :math:`i` 82 | 83 | %(notest)s 84 | 85 | %(params)s 86 | n_up 87 | Number of features to include in the AUC calculation. 88 | If ``None``, the top 5% of features based on their magnitude are selected. 89 | 90 | %(returns)s 91 | """ 92 | nobs, nvar = mat.shape 93 | nsrc = starts.size 94 | n_up = _validate_n_up(nvar, n_up) 95 | m = f'aucell - calculating {nsrc} AUCs for {nvar} targets across {nobs} observations, categorizing features at rank={n_up}' 96 | _log(m, level='info', verbose=verbose) 97 | es = np.zeros(shape=(nobs, nsrc)) 98 | for i in tqdm(range(mat.shape[0]), disable=not verbose): 99 | if isinstance(mat, sps.csr_matrix): 100 | row = mat[i].toarray()[0] 101 | else: 102 | row = mat[i] 103 | row = sts.rankdata(a=-row, method='ordinal') 104 | es[i] = _auc(row=row, cnct=cnct, starts=starts, offsets=offsets, n_up=n_up, nsrc=nsrc) 105 | return es, None 106 | 107 | 108 | _aucell = MethodMeta( 109 | name='aucell', 110 | desc='AUCell', 111 | func=_func_aucell, 112 | stype='categorical', 113 | adj=False, 114 | weight=False, 115 | test=False, 116 | limits=(0, 1), 117 | reference='https://doi.org/10.1038/nmeth.4463', 118 | ) 119 | aucell = Method(_method=_aucell) 120 | -------------------------------------------------------------------------------- /src/decoupler/mt/_decouple.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from decoupler._docs import docs 4 | from decoupler._datatype import DataType 5 | from decoupler.mt._methods import _methods 6 | from decoupler.mt._consensus import consensus 7 | 8 | 9 | @docs.dedent 10 | def decouple( 11 | data: DataType, 12 | net: pd.DataFrame, 13 | methods: str | list = 'all', 14 | args: dict = dict(), 15 | cons: bool = False, 16 | **kwargs 17 | ) -> dict | None: 18 | """ 19 | Runs multiple enrichment methods sequentially. 20 | 21 | Parameters 22 | ---------- 23 | %(data)s 24 | %(net)s 25 | methods 26 | List of methods to run. 27 | args 28 | Dictionary of dictionaries containing method-specific keyword arguments. 29 | cons 30 | Whether to get a consensus score across the used methods. 31 | %(tmin)s 32 | %(raw)s 33 | %(empty)s 34 | %(bsize)s 35 | %(verbose)s 36 | """ 37 | # Validate 38 | _mdict = {m.name: m for m in _methods} 39 | if isinstance(methods, str): 40 | if methods == 'all': 41 | methods = _mdict.keys() 42 | else: 43 | methods = [methods] 44 | methods = set(methods) 45 | assert methods.issubset(_mdict), \ 46 | f'methods={methods} must be in decoupler.\nUse decoupler.mt.show_methods to check which ones are available' 47 | assert all(k in methods for k in args), \ 48 | f'All keys in args={args.keys()} must belong to a method in methods={methods}' 49 | kwargs = kwargs.copy() 50 | kwargs.setdefault('verbose', False) 51 | # Run each method 52 | all_res = {} 53 | for name in methods: 54 | mth = _mdict[name] 55 | arg = args.setdefault(name, {}) 56 | res = mth(data=data, net=net, **arg, **kwargs) 57 | if res: 58 | res = { 59 | f'score_{mth.name}': res[0], 60 | f'padj_{mth.name}': res[1], 61 | } 62 | all_res = all_res | res 63 | if all_res: 64 | if cons: 65 | all_res['score_consensus'], all_res['padj_consensus'] = consensus(all_res, verbose=kwargs['verbose']) 66 | return all_res 67 | elif cons: 68 | consensus(data, verbose=kwargs['verbose']) 69 | -------------------------------------------------------------------------------- /src/decoupler/mt/_mdt.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Tuple 3 | 4 | import numpy as np 5 | import scipy.sparse as sps 6 | from tqdm.auto import tqdm 7 | 8 | from decoupler._odeps import xgboost, _check_import 9 | from decoupler._docs import docs 10 | from decoupler._log import _log 11 | from decoupler._Method import MethodMeta, Method 12 | 13 | 14 | def _xgbr( 15 | x: np.ndarray, 16 | y: np.ndarray, 17 | **kwargs, 18 | ) -> np.ndarray: 19 | # Init model 20 | reg = xgboost.XGBRegressor(**kwargs) 21 | # Fit 22 | y = y.reshape(-1, 1) 23 | reg = reg.fit(x, y) 24 | # Get R score 25 | es = reg.feature_importances_ 26 | return es 27 | 28 | 29 | @docs.dedent 30 | def _func_mdt( 31 | mat: np.ndarray, 32 | adj: np.ndarray, 33 | verbose: bool = False, 34 | **kwargs, 35 | ) -> Tuple[np.ndarray, None]: 36 | r""" 37 | Multivariate Decision Trees (MDT) :cite:`decoupler`. 38 | 39 | This approach uses the molecular features from one observation as the population of samples 40 | and it fits a gradient boosted decision trees model with multiple covariates, 41 | which are the weights of all feature sets :math:`F`. It uses the implementation provided by ``xgboost`` :cite:`xgboost`. 42 | 43 | The enrichment score :math:`ES` for each :math:`F` is then calculated as the importance of each covariate in the model. 44 | 45 | %(notest)s 46 | 47 | %(params)s 48 | 49 | kwargs 50 | All other keyword arguments are passed to ``xgboost.XGBRegressor``. 51 | %(returns)s 52 | """ 53 | _check_import(xgboost) 54 | nobs = mat.shape[0] 55 | nvar, nsrc = adj.shape 56 | m = f'mdt - fitting {nsrc} multivariate decision tree models (XGBoost) of {nvar} targets across {nobs} observations' 57 | _log(m, level='info', verbose=verbose) 58 | es = np.zeros(shape=(nobs, nsrc)) 59 | for i in tqdm(range(nobs), disable=not verbose): 60 | obs = mat[i] 61 | es[i, :] = _xgbr(x=adj, y=obs, **kwargs) 62 | return (es, None) 63 | 64 | 65 | _mdt = MethodMeta( 66 | name='mdt', 67 | desc='Multivariate Decision Tree (MDT)', 68 | func=_func_mdt, 69 | stype='numerical', 70 | adj=True, 71 | weight=True, 72 | test=False, 73 | limits=(0, 1), 74 | reference='https://doi.org/10.1093/bioadv/vbac016', 75 | ) 76 | mdt = Method(_method=_mdt) 77 | -------------------------------------------------------------------------------- /src/decoupler/mt/_methods.py: -------------------------------------------------------------------------------- 1 | from decoupler._Method import _show_methods 2 | from decoupler.mt._aucell import aucell 3 | from decoupler.mt._gsea import gsea 4 | from decoupler.mt._gsva import gsva 5 | from decoupler.mt._mdt import mdt 6 | from decoupler.mt._mlm import mlm 7 | from decoupler.mt._ora import ora 8 | from decoupler.mt._udt import udt 9 | from decoupler.mt._ulm import ulm 10 | from decoupler.mt._viper import viper 11 | from decoupler.mt._waggr import waggr 12 | from decoupler.mt._zscore import zscore 13 | 14 | _methods = [ 15 | aucell, 16 | gsea, 17 | gsva, 18 | mdt, 19 | mlm, 20 | ora, 21 | udt, 22 | ulm, 23 | viper, 24 | waggr, 25 | zscore, 26 | ] 27 | -------------------------------------------------------------------------------- /src/decoupler/mt/_run.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Callable 2 | 3 | import pandas as pd 4 | import numpy as np 5 | from anndata import AnnData 6 | import scipy.sparse as sps 7 | import scipy.stats as sts 8 | from tqdm.auto import tqdm 9 | 10 | from decoupler._log import _log 11 | from decoupler._datatype import DataType 12 | from decoupler.pp.net import prune, adjmat, idxmat 13 | from decoupler.pp.data import extract 14 | 15 | 16 | def _return( 17 | name: str, 18 | data: DataType, 19 | es: pd.DataFrame, 20 | pv: pd.DataFrame, 21 | verbose: bool = False, 22 | ) -> Tuple[pd.DataFrame, pd.DataFrame] | AnnData | None: 23 | if isinstance(data, AnnData): 24 | if data.obs_names.size != es.index.size: 25 | m = 'Provided AnnData contains empty observations, returning repaired object' 26 | _log(m, level='warn', verbose=verbose) 27 | data = data[es.index, :].copy() 28 | data.obsm[f'score_{name}'] = es 29 | if pv is not None: 30 | data.obsm[f'padj_{name}'] = pv 31 | return data 32 | else: 33 | data.obsm[f'score_{name}'] = es 34 | if pv is not None: 35 | data.obsm[f'padj_{name}'] = pv 36 | return None 37 | else: 38 | return es, pv 39 | 40 | 41 | def _run( 42 | name: str, 43 | func: Callable, 44 | adj: bool, 45 | test: bool, 46 | data: DataType, 47 | net: pd.DataFrame, 48 | tmin: int | float = 5, 49 | layer: str | None = None, 50 | raw: bool = False, 51 | empty: bool = True, 52 | bsize: int | float = 250_000, 53 | verbose: bool = False, 54 | **kwargs 55 | ) -> Tuple[pd.DataFrame, pd.DataFrame] | AnnData | None: 56 | _log(f'{name} - Running {name}', level='info', verbose=verbose) 57 | # Process data 58 | mat, obs, var = extract(data, layer=layer, raw=raw, empty=empty, verbose=verbose) 59 | sparse = sps.issparse(mat) 60 | # Process net 61 | net = prune(features=var, net=net, tmin=tmin, verbose=verbose) 62 | # Handle stat type 63 | if adj: 64 | sources, targets, adjm = adjmat(features=var, net=net, verbose=verbose) 65 | # Handle sparse 66 | if sparse: 67 | nbatch = int(np.ceil(obs.size / bsize)) 68 | es, pv = [], [] 69 | for i in tqdm(range(nbatch), disable=not verbose): 70 | srt, end = i * bsize, i * bsize + bsize 71 | bmat = mat[srt:end].toarray() 72 | bes, bpv = func(bmat, adjm, verbose=verbose, **kwargs) 73 | es.append(bes) 74 | pv.append(bpv) 75 | es = np.vstack(es) 76 | es = pd.DataFrame(es, index=obs, columns=sources) 77 | else: 78 | es, pv = func(mat, adjm, verbose=verbose, **kwargs) 79 | es = pd.DataFrame(es, index=obs, columns=sources) 80 | else: 81 | sources, cnct, starts, offsets = idxmat(features=var, net=net, verbose=verbose) 82 | es, pv = func(mat, cnct, starts, offsets, verbose=verbose, **kwargs) 83 | es = pd.DataFrame(es, index=obs, columns=sources) 84 | # Handle pvals and FDR correction 85 | if test: 86 | pv = np.vstack(pv) 87 | pv = pd.DataFrame(pv, index=obs, columns=sources) 88 | if name != 'mlm': 89 | _log(f'{name} - adjusting p-values by FDR', level='info', verbose=verbose) 90 | pv.loc[:, :] = sts.false_discovery_control(pv.values, axis=1, method='bh') 91 | else: 92 | pv = None 93 | _log(f'{name} - done', level='info', verbose=verbose) 94 | return _return(name, data, es, pv, verbose=verbose) 95 | -------------------------------------------------------------------------------- /src/decoupler/mt/_udt.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Tuple 3 | 4 | import numpy as np 5 | import scipy.sparse as sps 6 | from tqdm.auto import tqdm 7 | 8 | from decoupler._odeps import xgboost, _check_import 9 | from decoupler._docs import docs 10 | from decoupler._log import _log 11 | from decoupler._Method import MethodMeta, Method 12 | 13 | 14 | def _xgbr( 15 | x: np.ndarray, 16 | y: np.ndarray, 17 | **kwargs, 18 | ) -> np.ndarray: 19 | kwargs.setdefault('n_estimators', 10) 20 | # Init model 21 | reg = xgboost.XGBRegressor(**kwargs) 22 | # Fit 23 | x, y = x.reshape(-1, 1), y.reshape(-1, 1) 24 | reg = reg.fit(x, y) 25 | # Get R score 26 | es = reg.score(x, y) 27 | # Clip to [0, 1] 28 | es = np.clip(es, 0, 1) 29 | return es 30 | 31 | 32 | @docs.dedent 33 | def _func_udt( 34 | mat: np.ndarray, 35 | adj: np.ndarray, 36 | verbose: bool = False, 37 | **kwargs, 38 | ) -> Tuple[np.ndarray, None]: 39 | """ 40 | Univariate Decision Tree (UDT) :cite:`decoupler`. 41 | 42 | This approach uses the molecular features from one observation as the population of samples 43 | and it fits a gradient boosted decision trees model with a single covariate, 44 | which is the feature weights of a set :math:`F`. 45 | It uses the implementation provided by ``xgboost`` :cite:`xgboost`. 46 | 47 | The enrichment score :math:`ES` is then calculated as the coefficient of determination :math:`R^2`. 48 | 49 | %(notest)s 50 | 51 | %(params)s 52 | 53 | kwargs 54 | All other keyword arguments are passed to ``xgboost.XGBRegressor``. 55 | %(returns)s 56 | """ 57 | _check_import(xgboost) 58 | nobs = mat.shape[0] 59 | nvar, nsrc = adj.shape 60 | m = f'udt - fitting {nsrc} univariate decision tree models (XGBoost) of {nvar} targets across {nobs} observations' 61 | _log(m, level='info', verbose=verbose) 62 | es = np.zeros(shape=(nobs, nsrc)) 63 | for i in tqdm(range(nobs), disable=not verbose): 64 | obs = mat[i] 65 | for j in range(adj.shape[1]): 66 | es[i, j] = _xgbr(x=adj[:, j], y=obs, **kwargs) 67 | return es, None 68 | 69 | 70 | _udt = MethodMeta( 71 | name='udt', 72 | desc='Univariate Decision Tree (UDT)', 73 | func=_func_udt, 74 | stype='numerical', 75 | adj=True, 76 | weight=True, 77 | test=False, 78 | limits=(0, 1), 79 | reference='https://doi.org/10.1093/bioadv/vbac016', 80 | ) 81 | udt = Method(_method=_udt) 82 | -------------------------------------------------------------------------------- /src/decoupler/mt/_ulm.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | import scipy.stats as sts 5 | 6 | from decoupler._docs import docs 7 | from decoupler._log import _log 8 | from decoupler._Method import MethodMeta, Method 9 | 10 | 11 | def _cov( 12 | A: np.ndarray, 13 | b: np.ndarray 14 | ) -> np.ndarray: 15 | return np.dot(b.T - b.mean(), A - A.mean(axis=0)) / (b.shape[0]-1) 16 | 17 | 18 | def _cor( 19 | A: np.ndarray, 20 | b: np.ndarray 21 | ) -> np.ndarray: 22 | cov = _cov(A, b) 23 | ssd = np.std(A, axis=0, ddof=1) * np.std(b, axis=0, ddof=1).reshape(-1, 1) 24 | return cov / ssd 25 | 26 | 27 | def _tval( 28 | r: np.ndarray, 29 | df: float 30 | ) -> np.ndarray: 31 | return r * np.sqrt(df / ((1.0 - r + 2.2e-16) * (1.0 + r + 2.2e-16))) 32 | 33 | 34 | @docs.dedent 35 | def _func_ulm( 36 | mat: np.ndarray, 37 | adj: np.ndarray, 38 | tval: bool = True, 39 | verbose: bool = False, 40 | ) -> Tuple[np.ndarray, np.ndarray]: 41 | r""" 42 | Univariate Linear Model (ULM) :cite:`decoupler`. 43 | 44 | This approach uses the molecular features from one observation as the population of samples 45 | and it fits a linear model with a single covariate, which is the feature weights of a set :math:`F`. 46 | 47 | .. math:: 48 | 49 | y_i = \beta_0 + \beta_1 x_i + \varepsilon, \quad i = 1, 2, \ldots, n 50 | 51 | Where: 52 | 53 | - :math:`y_i` is the observed feature statistic (e.g. gene expression, :math:`log_{2}FC`, etc.) for feature :math:`i` 54 | - :math:`x_i` is the weight of feature :math:`i` in feature set :math:`F`. For unweighted sets, membership in the set is indicated by 1, and non-membership by 0. 55 | - :math:`\beta_0` is the intercept 56 | - :math:`\beta_1` is the slope coefficient 57 | - :math:`\varepsilon` is the error term for feature :math:`i` 58 | 59 | .. figure:: /_static/images/ulm.png 60 | :alt: Univariate Linear Model (ULM) schematic. 61 | :align: center 62 | :width: 75% 63 | 64 | Univariate Linear Model (ULM) scheme. 65 | In this example, the observed gene expression of :math:`Sample_1` is predicted using 66 | the interaction weights of :math:`TF_1`. 67 | Since the target genes that have negative weights are lowly expressed, 68 | and the positive target genes are highly expressed, 69 | the relationship between the two variables is positive so the obtained :math:`ES` score is positive. 70 | Scores can be interpreted as active when positive, repressive when negative, and inconclusive when close to 0. 71 | 72 | The enrichment score :math:`ES` is then calculated as the t-value of the slope coefficient. 73 | 74 | .. math:: 75 | 76 | ES = t_{\beta_1} = \frac{\hat{\beta}_1}{\mathrm{SE}(\hat{\beta}_1)} 77 | 78 | Where: 79 | 80 | - :math:`t_{\beta_1}` is the t-value of the slope 81 | - :math:`\mathrm{SE}(\hat{\beta}_1)` is the standard error of the slope 82 | 83 | Next, :math:`p_{value}` are obtained by evaluating the two-sided survival function 84 | (:math:`sf`) of the Student’s t-distribution. 85 | 86 | .. math:: 87 | 88 | p_{value} = 2 \times \mathrm{sf}(|ES|, \text{df}) 89 | 90 | %(yestest)s 91 | 92 | %(params)s 93 | %(tval)s 94 | 95 | %(returns)s 96 | """ 97 | # Get degrees of freedom 98 | n_var, n_src = adj.shape 99 | df = n_var - 2 100 | m = f'ulm - fitting {n_src} univariate models of {n_var} observations (targets) with {df} degrees of freedom' 101 | _log(m, level='info', verbose=verbose) 102 | # Compute R value for all 103 | r = _cor(adj, mat.T) 104 | # Compute t-value 105 | t = _tval(r, df) 106 | # Compute p-value 107 | pv = sts.t.sf(abs(t), df) * 2 108 | if tval: 109 | es = t 110 | else: 111 | # Compute coef 112 | es = r * (np.std(mat.T, ddof=1, axis=0).reshape(-1, 1) / np.std(adj, ddof=1, axis=0)) 113 | return es, pv 114 | 115 | 116 | _ulm = MethodMeta( 117 | name='ulm', 118 | desc='Univariate Linear Model (ULM)', 119 | func=_func_ulm, 120 | stype='numerical', 121 | adj=True, 122 | weight=True, 123 | test=True, 124 | limits=(-np.inf, +np.inf), 125 | reference='https://doi.org/10.1093/bioadv/vbac016', 126 | ) 127 | ulm = Method(_method=_ulm) 128 | -------------------------------------------------------------------------------- /src/decoupler/mt/_zscore.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | import scipy.stats as sts 5 | 6 | from decoupler._docs import docs 7 | from decoupler._log import _log 8 | from decoupler._Method import MethodMeta, Method 9 | 10 | 11 | @docs.dedent 12 | def _func_zscore( 13 | mat: np.ndarray, 14 | adj: np.ndarray, 15 | flavor: str = 'RoKAI', 16 | verbose: bool = False, 17 | ) -> Tuple[np.ndarray, np.ndarray]: 18 | r""" 19 | Z-score (ZSCORE) :cite:`zscore`. 20 | 21 | This approach computes the mean value of the molecular features for known targets, 22 | optionally subtracts the overall mean of all measured features, 23 | and normalizes the result by the standard deviation of all features and the square 24 | root of the number of targets. 25 | 26 | This formulation was originally introduced in KSEA, which explicitly includes the 27 | subtraction of the global mean to compute the enrichment score :math:`ES`. 28 | 29 | .. math:: 30 | 31 | ES = \frac{(\mu_s-\mu_p) \times \sqrt m }{\sigma} 32 | 33 | Where: 34 | 35 | - :math:`\mu_s` is the mean of targets 36 | - :math:`\mu_p` is the mean of all features 37 | - :math:`m` is the number of targets 38 | - :math:`\sigma` is the standard deviation of all features 39 | 40 | However, in the RoKAI implementation, this global mean subtraction was omitted. 41 | 42 | .. math:: 43 | 44 | ES = \frac{\mu_s \times \sqrt m }{\sigma} 45 | 46 | A two-sided :math:`p_{value}` is then calculated from the consensus score using 47 | the survival function :math:`sf` of the standard normal distribution. 48 | 49 | .. math:: 50 | 51 | p = 2 \times \mathrm{sf}\bigl(\lvert \mathrm{ES} \rvert \bigr) 52 | 53 | %(yestest)s 54 | 55 | %(params)s 56 | 57 | flavor 58 | Which flavor to use when calculating the z-score, either KSEA or RoKAI. 59 | 60 | %(returns)s 61 | """ 62 | assert isinstance(flavor, str) and flavor in ['KSEA', 'RoKAI'], \ 63 | 'flavor must be str and KSEA or RoKAI' 64 | nobs, nvar = mat.shape 65 | nvar, nsrc = adj.shape 66 | m = f'zscore - calculating {nsrc} scores with flavor={flavor}' 67 | _log(m, level='info', verbose=verbose) 68 | stds = np.std(mat, axis=1, ddof=1) 69 | if flavor == 'RoKAI': 70 | mean_all = np.mean(mat, axis=1) 71 | elif flavor == 'KSEA': 72 | mean_all = np.zeros(stds.shape) 73 | n = np.sqrt(np.count_nonzero(adj, axis=0)) 74 | mean = mat.dot(adj) / np.sum(np.abs(adj), axis=0) 75 | es = ((mean - mean_all.reshape(-1, 1)) * n) / stds.reshape(-1, 1) 76 | pv = 2 * sts.norm.sf(np.abs(es)) 77 | return es, pv 78 | 79 | 80 | _zscore = MethodMeta( 81 | name='zscore', 82 | desc='Z-score (ZSCORE)', 83 | func=_func_zscore, 84 | stype='numerical', 85 | adj=True, 86 | weight=True, 87 | test=True, 88 | limits=(-np.inf, +np.inf), 89 | reference='https://doi.org/10.1038/s41467-021-21211-6', 90 | ) 91 | zscore = Method(_method=_zscore) 92 | -------------------------------------------------------------------------------- /src/decoupler/op/__init__.py: -------------------------------------------------------------------------------- 1 | from decoupler.op._translate import show_organisms, translate 2 | from decoupler.op._resource import show_resources, resource 3 | from decoupler.op._collectri import collectri 4 | from decoupler.op._dorothea import dorothea 5 | from decoupler.op._hallmark import hallmark 6 | from decoupler.op._progeny import progeny 7 | -------------------------------------------------------------------------------- /src/decoupler/op/_collectri.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from decoupler._docs import docs 5 | from decoupler._log import _log 6 | from decoupler._download import URL_INT, _download 7 | from decoupler.op._translate import translate 8 | from decoupler.op._dtype import _infer_dtypes 9 | 10 | 11 | @docs.dedent 12 | def collectri( 13 | organism: str = 'human', 14 | remove_complexes: bool = False, 15 | license: str = 'academic', 16 | verbose: bool = False, 17 | ) -> pd.DataFrame: 18 | """ 19 | CollecTRI gene regulatory network :cite:p:`collectri`. 20 | 21 | Wrapper to access CollecTRI gene regulatory network. CollecTRI is a 22 | comprehensive resource containing a curated collection of transcription 23 | factors (TFs) and their target genes. It is an expansion of DoRothEA. 24 | Each interaction is weighted by its mode of regulation (either positive or negative). 25 | 26 | Parameters 27 | ---------- 28 | %(organism)s 29 | remove_complexes 30 | Whether to remove complexes. 31 | %(license)s 32 | %(verbose)s 33 | 34 | Returns 35 | ------- 36 | Dataframe in long format containing target genes for each TF with their associated weights, 37 | and if available, the PMIDs supporting each interaction. 38 | """ 39 | url = 'https://zenodo.org/records/8192729/files/CollecTRI_regulons.csv?download=1' 40 | ct = _download(url, verbose=verbose) 41 | # Update resources 42 | resources = [] 43 | for str_res in ct['resources']: 44 | lst_res = str_res.replace('CollecTRI', '').split(';') 45 | str_res = ';'.join(sorted([res.replace('_', '') for res in lst_res if res != ''])) 46 | resources.append(str_res) 47 | ct['resources'] = resources 48 | # Format references 49 | ct['references'] = ct['references'].str.replace('CollecTRI:', '') 50 | ct = ct.dropna() 51 | if remove_complexes: 52 | ct = ct[~ct['source'].isin(['AP1', 'NFKB'])] 53 | ct = _infer_dtypes(ct) 54 | if organism != 'human': 55 | ct = translate(ct, columns=['source', 'target'], target_organism=organism, verbose=verbose) 56 | ct = ct.drop_duplicates(['source', 'target']).reset_index(drop=True) 57 | return ct 58 | -------------------------------------------------------------------------------- /src/decoupler/op/_dorothea.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from decoupler._docs import docs 5 | from decoupler._log import _log 6 | from decoupler._download import URL_INT, _download 7 | from decoupler.op._translate import translate 8 | from decoupler.op._dtype import _infer_dtypes 9 | 10 | 11 | @docs.dedent 12 | def dorothea( 13 | organism: str = 'human', 14 | levels: str | list = ['A', 'B', 'C'], 15 | dict_weights: dict | None = None, 16 | license: str = 'academic', 17 | verbose: bool = False, 18 | ) -> pd.DataFrame: 19 | """ 20 | DoRothEA gene regulatory network :cite:p:`dorothea`. 21 | 22 | Wrapper to access DoRothEA gene regulatory network. DoRothEA is a 23 | comprehensive resource containing a curated collection of transcription 24 | factors (TFs) and their target genes. Each interaction is weighted by its 25 | mode of regulation (either positive or negative) and by its confidence 26 | level. 27 | 28 | Parameters 29 | ---------- 30 | %(organism)s 31 | levels 32 | List of confidence levels to return. Goes from A to D, A being the 33 | most confident and D being the less. 34 | dict_weights 35 | Dictionary of values to divide the mode of regulation (-1 or 1), 36 | one for each confidence level. Bigger values will generate weights 37 | close to zero. 38 | %(license)s 39 | %(verbose)s 40 | 41 | Returns 42 | ------- 43 | Dataframe in long format containing target genes for each TF with their associated weights and confidence level. 44 | """ 45 | assert isinstance(levels, (str, list)), 'levels must be str or list' 46 | if isinstance(levels, str): 47 | levels = [levels] 48 | assert all(l in {'A', 'B', 'C', 'D'} for l in levels), 'levels can only contain any of these values: A, B, C, and/or D' 49 | assert isinstance(dict_weights, dict) or dict_weights is None, 'dict_weights must be dict or None' 50 | if dict_weights: 51 | assert all(k in levels for k in dict_weights), f'dict_weights keys must be in levels={levels}' 52 | weights = dict_weights 53 | else: 54 | weights = {'A': 1, 'B': 2, 'C': 3, 'D': 4} 55 | weights = {k: weights[k] for k in weights if k in levels} 56 | # Read 57 | str_levels = ','.join(levels) 58 | url_ext = f'datasets=dorothea&dorothea_levels={str_levels}&fields=dorothea_level&license={license}' 59 | url = URL_INT + url_ext 60 | m = f'dorothea - Accessing DoRothEA (levels {str_levels}) with {license} license and weights={weights}' 61 | _log(m, level='info', verbose=verbose) 62 | do = _download(url, sep='\t', verbose=verbose) 63 | # Filter extra columns 64 | do = do[[ 65 | 'source_genesymbol', 'target_genesymbol', 66 | 'is_stimulation', 'is_inhibition', 67 | 'consensus_direction', 'consensus_stimulation', 68 | 'consensus_inhibition', 'dorothea_level', 69 | ]] 70 | # Remove duplicates 71 | do = do[~do.duplicated(['source_genesymbol', 'dorothea_level', 'target_genesymbol'])] 72 | # Assign top level if more than 2 73 | do['dorothea_level'] = [lvl.split(';')[0] for lvl in do['dorothea_level']] 74 | # Assign mode of regulation 75 | mor = [] 76 | for i in do.itertuples(): 77 | if i.is_stimulation and i.is_inhibition: 78 | if i.consensus_stimulation: 79 | mor.append(1) 80 | else: 81 | mor.append(-1) 82 | elif i.is_stimulation: 83 | mor.append(1) 84 | elif i.is_inhibition: 85 | mor.append(-1) 86 | else: 87 | mor.append(1) 88 | do['mor'] = mor 89 | # Compute weight based on confidence: mor/confidence 90 | do['weight'] = [i.mor / weights[i.dorothea_level] for i in do.itertuples()] 91 | # Format 92 | do = ( 93 | do 94 | .rename(columns={'source_genesymbol': 'source', 'target_genesymbol': 'target', 'dorothea_level': 'confidence'}) 95 | [['source', 'target', 'weight', 'confidence']] 96 | .sort_values('confidence') 97 | ) 98 | do = do[do['confidence'].isin(levels)].reset_index(drop=True) 99 | do = _infer_dtypes(do) 100 | if organism != 'human': 101 | do = translate(do, columns=['source', 'target'], target_organism=organism, verbose=verbose) 102 | do = do.drop_duplicates(['source', 'target']).reset_index(drop=True) 103 | return do 104 | -------------------------------------------------------------------------------- /src/decoupler/op/_dtype.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def _infer_dtypes( 5 | df: pd.DataFrame 6 | ) -> pd.DataFrame: 7 | for col in df.columns: 8 | try: 9 | df[col] = pd.to_numeric(df[col]) 10 | continue 11 | except ValueError: 12 | pass 13 | if df[col].dtype == 'string': 14 | df[col] = df[col].astype(str) 15 | lowered = df[col].str.lower() 16 | if lowered.isin(["true", "false"]).all(): 17 | df[col] = lowered == "true" 18 | continue 19 | return df 20 | -------------------------------------------------------------------------------- /src/decoupler/op/_hallmark.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from decoupler._docs import docs 5 | from decoupler._log import _log 6 | from decoupler._download import URL_INT, _download 7 | from decoupler.op._translate import translate 8 | from decoupler.op._dtype import _infer_dtypes 9 | 10 | 11 | @docs.dedent 12 | def hallmark( 13 | organism: str = 'human', 14 | license: str = 'academic', 15 | verbose: bool = False, 16 | ) -> pd.DataFrame: 17 | """ 18 | Hallmark gene sets :cite:p:`msigdb`. 19 | 20 | Hallmark gene sets summarize and represent specific well-defined 21 | biological states or processes and display coherent expression. 22 | 23 | Parameters 24 | ---------- 25 | %(organism)s 26 | %(license)s 27 | %(verbose)s 28 | 29 | Returns 30 | ------- 31 | Dataframe in long format containing the hallmark gene sets. 32 | """ 33 | url = 'https://static.omnipathdb.org/tables/msigdb-hallmark.tsv.gz' 34 | hm = _download(url, sep='\t', compression='gzip', verbose=verbose) 35 | hm = hm[['geneset', 'genesymbol']] 36 | hm['geneset'] = hm['geneset'].str.replace('HALLMARK_', '') 37 | hm['genesymbol'] = hm['genesymbol'].str.replace('COMPLEX:', '').str.split('_') 38 | hm = hm.explode('genesymbol') 39 | hm = _infer_dtypes(hm) 40 | if organism != 'human': 41 | hm = translate(hm, columns=['genesymbol'], target_organism=organism, verbose=verbose) 42 | hm = hm.rename(columns={'geneset': 'source', 'genesymbol': 'target'}) 43 | hm = hm.drop_duplicates(['source', 'target']).reset_index(drop=True) 44 | return hm 45 | -------------------------------------------------------------------------------- /src/decoupler/op/_progeny.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from decoupler._docs import docs 5 | from decoupler._log import _log 6 | from decoupler.op._resource import resource 7 | 8 | 9 | @docs.dedent 10 | def progeny( 11 | organism: str = 'human', 12 | top: int | float = np.inf, 13 | thr_padj: float = 0.05, 14 | license: str = 'academic', 15 | verbose: bool = False, 16 | ) -> pd.DataFrame: 17 | """ 18 | Pathway RespOnsive GENes for activity inference (PROGENy) :cite:p:`progeny`. 19 | 20 | Wrapper to access PROGENy model gene weights. Each pathway is defined with 21 | a collection of target genes, each interaction has an associated p-value 22 | and weight. The top significant interactions per pathway are returned. 23 | 24 | Here is a brief description of each pathway: 25 | 26 | - **Androgen**: involved in the growth and development of the male reproductive organs 27 | - **EGFR**: regulates growth, survival, migration, apoptosis, proliferation, and differentiation in mammalian cells 28 | - **Estrogen**: promotes the growth and development of the female reproductive organs 29 | - **Hypoxia**: promotes angiogenesis and metabolic reprogramming when O2 levels are low 30 | - **JAK-STAT**: involved in immunity, cell division, cell death, and tumor formation 31 | - **MAPK**: integrates external signals and promotes cell growth and proliferation 32 | - **NFkB**: regulates immune response, cytokine production and cell survival 33 | - **p53**: regulates cell cycle, apoptosis, DNA repair and tumor suppression 34 | - **PI3K**: promotes growth and proliferation 35 | - **TGFb**: involved in development, homeostasis, and repair of most tissues 36 | - **TNFa**: mediates haematopoiesis, immune surveillance, tumour regression and protection from infection 37 | - **Trail**: induces apoptosis 38 | - **VEGF**: mediates angiogenesis, vascular permeability, and cell migration 39 | - **WNT**: regulates organ morphogenesis during development and tissue repair 40 | 41 | Parameters 42 | ---------- 43 | %(organism)s 44 | top 45 | Number of genes per pathway to return. By default all of them. 46 | thr_padj 47 | Significance threshold to trim interactions. 48 | %(license)s 49 | %(verbose)s 50 | 51 | Returns 52 | ------- 53 | Dataframe in long format containing target genes for each pathway with their associated weights and p-values. 54 | """ 55 | # Validate 56 | assert isinstance(top, (int, float)) and top > 0, \ 57 | 'top must be numeric and > 0' 58 | assert isinstance(thr_padj, (int, float)) and 0. <= thr_padj <= 1., \ 59 | 'thr_padj must be numeric and between 0 and 1' 60 | # Download 61 | p = resource(name='PROGENy', organism=organism, license=license, verbose=verbose) 62 | p = ( 63 | p 64 | .sort_values('p_value') 65 | .groupby('pathway') 66 | .head(top) 67 | .sort_values(['pathway', 'p_value']) 68 | .reset_index(drop=True) 69 | ) 70 | p = p.rename(columns={'pathway': 'source', 'genesymbol': 'target', 'p_value': 'padj'}) 71 | p = p[p['padj'] < thr_padj] 72 | p = p[['source', 'target', 'weight', 'padj']] 73 | m = f'progeny - filtered interactions for padj < {thr_padj}' 74 | _log(m, level='info', verbose=verbose) 75 | p = p.drop_duplicates(['source', 'target']).reset_index(drop=True) 76 | return p 77 | -------------------------------------------------------------------------------- /src/decoupler/op/_resource.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import requests 4 | import pandas as pd 5 | 6 | from decoupler._docs import docs 7 | from decoupler._log import _log 8 | from decoupler._download import URL_DBS, _download 9 | from decoupler.op._translate import translate 10 | from decoupler.op._dtype import _infer_dtypes 11 | 12 | 13 | def show_resources( 14 | ) -> pd.DataFrame: 15 | """ 16 | Shows available resources in Omnipath :cite:p:`omnipath`. 17 | For more information visit the official 18 | [website](https://omnipathdb.org/). 19 | 20 | Returns 21 | ------- 22 | List of available resources to query with `decoupler.op.resource`. 23 | """ 24 | ann = pd.read_csv('https://omnipathdb.org/queries/annotations', sep='\t') 25 | ann = ann.set_index('argument').loc['databases'].str.split(';')['values'] 26 | url = 'https://omnipathdb.org/resources' 27 | response = requests.get(url) 28 | lcs = response.json() 29 | df = pd.DataFrame(ann, columns=['name']) 30 | df['license'] = [lcs[a]['license']['purpose'] if a in lcs else None for a in ann] 31 | return df 32 | 33 | 34 | @docs.dedent 35 | def resource( 36 | name: str, 37 | organism: str = 'human', 38 | license: str = 'academic', 39 | verbose: bool = False, 40 | ): 41 | """ 42 | Wrapper to access resources inside Omnipath :cite:p:`omnipath`. 43 | 44 | This wrapper allows to easly query different prior knowledge resources. To 45 | check available resources run ``decoupler.op.show_resources()``. For more 46 | information visit the official [website](https://omnipathdb.org/). 47 | 48 | Parameters 49 | ---------- 50 | name: 51 | Name of the resource to query. 52 | %(organism)s 53 | %(license)s 54 | %(verbose)s 55 | kwargs 56 | Passed to ``decoupler.op.translate``. 57 | 58 | Returns 59 | ------- 60 | Network in long format. 61 | """ 62 | # Validate 63 | assert isinstance(name, str), 'name must be str' 64 | names = set(show_resources()['name']) 65 | assert name in names, f'name must be one of these: {names}' 66 | assert isinstance(organism, str), 'organism must be str' 67 | assert isinstance(license, str) and license in ['academic', 'commercial', 'nonprofit'], \ 68 | 'license must be academic, commercial or nonprofit' 69 | assert isinstance(verbose, bool), 'verbose must be bool' 70 | m = f'Accessing {name} with {license} license' 71 | _log(m, level='info', verbose=verbose) 72 | # Download 73 | url = URL_DBS + f'{name}&license={license}' 74 | df = _download(url, sep='\t', verbose=verbose) 75 | # Process 76 | labels = df['label'].unique() 77 | for label in labels: 78 | if label in df.columns: 79 | df.loc[df['label'] == label, 'label'] = f'_{label}' 80 | df = df[['genesymbol', 'label', 'value', 'record_id']] 81 | df = df.pivot(index=["genesymbol", "record_id"], columns="label", values="value").reset_index() 82 | df.index.name = '' 83 | df.columns.name = '' 84 | cols_to_remove = ['record_id', 'entity_type', '_entity_type'] 85 | df = df.drop(columns=[c for c in cols_to_remove if c in df.columns]) 86 | df = _infer_dtypes(df) 87 | if organism != 'human': 88 | df = translate(df, columns='genesymbol', target_organism=organism, verbose=verbose) 89 | return df 90 | -------------------------------------------------------------------------------- /src/decoupler/pl/__init__.py: -------------------------------------------------------------------------------- 1 | from decoupler.pl._barplot import barplot 2 | from decoupler.pl._dotplot import dotplot 3 | from decoupler.pl._filter_by_expr import filter_by_expr 4 | from decoupler.pl._filter_by_prop import filter_by_prop 5 | from decoupler.pl._leading_edge import leading_edge 6 | from decoupler.pl._network import network 7 | from decoupler.pl._obsbar import obsbar 8 | from decoupler.pl._order_targets import order_targets 9 | from decoupler.pl._order import order 10 | from decoupler.pl._obsm import obsm 11 | from decoupler.pl._filter_samples import filter_samples 12 | from decoupler.pl._source_targets import source_targets 13 | from decoupler.pl._volcano import volcano 14 | -------------------------------------------------------------------------------- /src/decoupler/pl/_barplot.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import matplotlib 6 | import matplotlib.pyplot as plt 7 | from matplotlib.figure import Figure 8 | import seaborn as sns 9 | 10 | from decoupler._docs import docs 11 | from decoupler._Plotter import Plotter 12 | 13 | 14 | def _set_limits( 15 | vmin: int | float, 16 | vcenter: int | float, 17 | vmax: int | float, 18 | values: np.ndarray 19 | ) -> Tuple[float, float, float]: 20 | assert np.isfinite(values).all(), 'values in data mut be finite' 21 | assert isinstance(vmin, (int, float)) or vmin is None, 'vmin must be numerical or None' 22 | assert isinstance(vcenter, (int, float)) or vcenter is None, 'vcenter must be numerical or None' 23 | assert isinstance(vmax, (int, float)) or vmax is None, 'vmax must be numerical or None' 24 | if vmin is None: 25 | vmin = values.min() 26 | if vmax is None: 27 | vmax = values.max() 28 | if vcenter is None: 29 | vcenter = values.mean() 30 | if vmin >= vcenter: 31 | vmin = -vmax 32 | if vcenter >= vmax: 33 | vmax = -vmin 34 | return vmin, vcenter, vmax 35 | 36 | 37 | @docs.dedent 38 | def barplot( 39 | data: pd.DataFrame, 40 | name: str, 41 | top: int = 25, 42 | vertical: bool = False, 43 | cmap: str = 'RdBu_r', 44 | vmin: float | None = None, 45 | vcenter: float | None = 0, 46 | vmax: float | None = None, 47 | **kwargs, 48 | ) -> None | Figure: 49 | """ 50 | Plot barplots showing top scores. 51 | 52 | Parameters 53 | ---------- 54 | data 55 | DataFrame in wide format containing enrichment scores (contrasts, sources). 56 | name 57 | Name of the contrast (row) to plot. 58 | %(top)s 59 | vertical 60 | Whether to plot the bars verticaly or horizontaly. 61 | %(cmap)s 62 | %(vmin)s 63 | %(vcenter)s 64 | %(vmax)s 65 | %(plot)s 66 | """ 67 | # Validate 68 | assert isinstance(data, pd.DataFrame), 'data must be pandas.DataFrame' 69 | assert isinstance(name, str) and name in data.index, \ 70 | 'name must be str and in data.index' 71 | assert isinstance(top, int) and top > 0, 'top must be int and > 0' 72 | assert isinstance(vertical, bool), 'vertical must be bool' 73 | # Process df 74 | df = data.loc[[name]] 75 | df.index.name = None 76 | df.columns.name = None 77 | df = df.melt(var_name='source', value_name='score') 78 | df['abs_score'] = df['score'].abs() 79 | df = df.sort_values('abs_score', ascending=False) 80 | df = df.head(top).sort_values('score', ascending=False) 81 | if not vertical: 82 | x, y = 'score', 'source' 83 | else: 84 | x, y = 'source', 'score' 85 | # Instance 86 | bp = Plotter(**kwargs) 87 | # Plot 88 | sns.barplot(data=df, x=x, y=y, ax=bp.ax) 89 | if not vertical: 90 | sizes = np.array([bar.get_width() for bar in bp.ax.containers[0]]) 91 | bp.ax.set_xlabel('Score') 92 | bp.ax.set_ylabel('') 93 | else: 94 | sizes = np.array([bar.get_height() for bar in bp.ax.containers[0]]) 95 | bp.ax.tick_params(axis='x', rotation=90) 96 | bp.ax.set_ylabel('Score') 97 | bp.ax.set_xlabel('') 98 | bp.ax.invert_xaxis() 99 | # Compute color limits 100 | vmin, vcenter, vmax = _set_limits(vmin, vcenter, vmax, df['score']) 101 | # Rescale cmap 102 | divnorm = matplotlib.colors.TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax) 103 | cmap_f = plt.get_cmap(cmap) 104 | div_colors = cmap_f(divnorm(sizes)) 105 | for bar, color in zip(bp.ax.containers[0], div_colors): 106 | bar.set_facecolor(color) 107 | # Add legend 108 | sm = plt.cm.ScalarMappable(cmap=cmap, norm=divnorm) 109 | sm.set_array([]) 110 | bp.fig.colorbar(sm, ax=bp.ax, shrink=0.5) 111 | return bp._return() 112 | -------------------------------------------------------------------------------- /src/decoupler/pl/_dotplot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib.figure import Figure 5 | from matplotlib.colors import TwoSlopeNorm 6 | 7 | from decoupler._docs import docs 8 | from decoupler._Plotter import Plotter 9 | 10 | 11 | @docs.dedent 12 | def dotplot( 13 | df: pd.DataFrame, 14 | x: str, 15 | y: str, 16 | c: str, 17 | s: str, 18 | top: int | float = 10, 19 | scale: int | float = 0.15, 20 | cmap: str = 'RdBu_r', 21 | vcenter: int | float | None = None, 22 | **kwargs 23 | ) -> None | Figure: 24 | """ 25 | Plot results of enrichment analysis as dots. 26 | 27 | Parameters 28 | ---------- 29 | df 30 | DataFrame containing enrichment results. 31 | x 32 | Name of the column containing values to place on the x-axis. 33 | y 34 | Name of the column containing values to place on the y-axis. 35 | c 36 | Name of the column containing values to use for coloring. 37 | s 38 | Name of the column containing values to use for setting the size of the dots. 39 | %(top)s 40 | scale 41 | Scale of the dots. 42 | %(cmap)s 43 | %(vcenter)s 44 | %(plot)s 45 | """ 46 | # Validate 47 | assert isinstance(df, pd.DataFrame), 'df must be a pd.DataFrame' 48 | assert isinstance(x, str) and x in df.columns, 'x must be str and in df.columns' 49 | assert isinstance(y, str) and y in df.columns, 'y must be str and in df.columns' 50 | assert isinstance(c, str) and c in df.columns, 'c must be str and in df.columns' 51 | assert isinstance(s, str) and s in df.columns, 's must be str and in df.columns' 52 | assert isinstance(top, (int, float)) and top > 0, 'top must be numerical and > 0' 53 | assert isinstance(scale, (int, float)), 'scale must be numerical' 54 | assert isinstance(vcenter, (int, float)) or vcenter is None, 'vcenter must be numeric or None' 55 | # Filter by top 56 | df = df.copy() 57 | df['abs_x_col'] = df[x].abs() 58 | df = df.sort_values('abs_x_col', ascending=False).head(top) 59 | # Extract from df 60 | x_vals = df[x].values 61 | y_vals = df[y].values 62 | c_vals = df[c].values 63 | s_vals = df[s].values 64 | # Sort by x 65 | idxs = np.argsort(x_vals) 66 | x_vals = x_vals[idxs] 67 | y_vals = y_vals[idxs] 68 | c_vals = c_vals[idxs] 69 | s_vals = s_vals[idxs] 70 | # Instance 71 | bp = Plotter(**kwargs) 72 | # Plot 73 | ns = (s_vals * scale * plt.rcParams["lines.markersize"]) ** 2 74 | bp.ax.grid(axis='x') 75 | if vcenter: 76 | norm = TwoSlopeNorm(vmin=None, vcenter=vcenter, vmax=None) 77 | else: 78 | norm = None 79 | scatter = bp.ax.scatter( 80 | x=x_vals, 81 | y=y_vals, 82 | c=c_vals, 83 | s=ns, 84 | cmap=cmap, 85 | norm=norm, 86 | ) 87 | bp.ax.set_axisbelow(True) 88 | bp.ax.set_xlabel(x) 89 | # Add legend 90 | handles, labels = scatter.legend_elements( 91 | prop="sizes", 92 | num=3, 93 | fmt="{x:.2f}", 94 | func=lambda s: np.sqrt(s) / plt.rcParams["lines.markersize"] / scale 95 | ) 96 | bp.ax.legend( 97 | handles, 98 | labels, 99 | title=s, 100 | frameon=False, 101 | loc='lower left', 102 | bbox_to_anchor=(1.05, 0.5), 103 | alignment='left', 104 | labelspacing=1. 105 | ) 106 | # Add colorbar 107 | clb = bp.fig.colorbar( 108 | scatter, 109 | ax=bp.ax, 110 | shrink=0.25, 111 | aspect=5, 112 | orientation='vertical', 113 | anchor=(0., 0.), 114 | ) 115 | clb.ax.set_title(c, loc="left",) 116 | bp.ax.margins(x=0.25, y=0.1) 117 | return bp._return() 118 | -------------------------------------------------------------------------------- /src/decoupler/pl/_filter_by_expr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib.figure import Figure 3 | import seaborn as sns 4 | from anndata import AnnData 5 | 6 | from decoupler._docs import docs 7 | from decoupler._Plotter import Plotter 8 | from decoupler.pp.data import extract 9 | from decoupler.pp.anndata import _min_sample_size, _ssize_tcount 10 | 11 | 12 | @docs.dedent 13 | def filter_by_expr( 14 | adata: AnnData, 15 | group: str | None = None, 16 | lib_size: float | None = None, 17 | min_count: int = 10, 18 | min_total_count: int = 15, 19 | large_n: int = 10, 20 | min_prop: float = 0.7, 21 | cmap: str = 'viridis', 22 | **kwargs, 23 | ) -> None | Figure: 24 | """ 25 | Plot to help determining the thresholds of the ``decoupler.pp.filter_by_expr`` function. 26 | 27 | Parameters 28 | ---------- 29 | %(adata)s 30 | %(cmap)s 31 | %(group)s 32 | %(lib_size)s 33 | %(min_count)s 34 | %(min_total_count)s 35 | %(large_n)s 36 | %(min_prop_expr)s 37 | %(plot)s 38 | """ 39 | assert isinstance(adata, AnnData), 'adata must be AnnData' 40 | # Extract inputs 41 | X, _, _ = extract(adata, empty=False) 42 | obs = adata.obs 43 | # Minimum sample size cutoff 44 | min_sample_size = _min_sample_size( 45 | obs=obs, 46 | group=group, 47 | large_n=large_n, 48 | min_prop=min_prop, 49 | ) 50 | # Compute sample size and total count 51 | sample_size, total_count = _ssize_tcount( 52 | X=X, 53 | lib_size=lib_size, 54 | min_count=min_count, 55 | ) 56 | # Total counts 57 | total_count[total_count < 1.] = np.nan # Handle 0s 58 | # Instance 59 | bp = Plotter(**kwargs) 60 | # Plot 61 | sns.histplot( 62 | x=np.log10(total_count), 63 | y=sample_size, 64 | cmap=cmap, 65 | cbar=True, 66 | cbar_kws=dict(shrink=.75, label='Number of genes'), 67 | discrete=(False, True), 68 | ax=bp.ax, 69 | ) 70 | bp.ax.axhline(y=min_sample_size - 0.5, c='gray', ls='--') 71 | bp.ax.axvline(x=np.log10(min_total_count), c='gray', ls='--') 72 | bp.ax.set_xlabel(r'$\log_{10}$ total sum of counts') 73 | bp.ax.set_ylabel('Number of samples') 74 | return bp._return() 75 | -------------------------------------------------------------------------------- /src/decoupler/pl/_filter_by_prop.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from anndata import AnnData 4 | from matplotlib.figure import Figure 5 | 6 | from decoupler._docs import docs 7 | from decoupler._Plotter import Plotter 8 | 9 | 10 | @docs.dedent 11 | def filter_by_prop( 12 | adata: AnnData, 13 | min_prop: float = 0.1, 14 | min_smpls: int = 2, 15 | log: bool = True, 16 | color = 'gray', 17 | **kwargs 18 | ) -> None | Figure: 19 | """ 20 | Plot to help determining the thresholds of the ``decoupler.pp.filter_by_prop`` function. 21 | 22 | Parameters 23 | ---------- 24 | %(adata)s 25 | %(min_prop_prop)s 26 | %(min_smpls)s 27 | log 28 | Whether to log-scale the y axis. 29 | color 30 | Color to use in ``matplotlib.pyplot.hist``. 31 | %(plot)s 32 | """ 33 | assert isinstance(adata, AnnData), 'adata must be AnnData' 34 | assert 'psbulk_props' in adata.layers.keys(), \ 35 | 'psbulk_props must be in adata.layers, use this function afer running decoupler.pp.pseudobulk' 36 | props = adata.layers['psbulk_props'] 37 | if isinstance(props, pd.DataFrame): 38 | props = props.values 39 | nsmpls = np.sum(props >= min_prop, axis=0) 40 | # Instance 41 | bp = Plotter(**kwargs) 42 | # Plot 43 | _ = bp.ax.hist( 44 | nsmpls, 45 | bins=range(min(nsmpls), max(nsmpls) + 2), 46 | log=log, 47 | color=color, 48 | align='left', 49 | rwidth=0.95, 50 | ) 51 | bp.ax.axvline(x=min_smpls - 0.5, c='black', ls='--') 52 | bp.ax.set_xlabel('Samples (≥ min_prop)') 53 | bp.ax.set_ylabel('Number of genes') 54 | return bp._return() 55 | -------------------------------------------------------------------------------- /src/decoupler/pl/_filter_samples.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib.figure import Figure 5 | import seaborn as sns 6 | from anndata import AnnData 7 | 8 | from decoupler._docs import docs 9 | from decoupler._Plotter import Plotter 10 | 11 | 12 | @docs.dedent 13 | def filter_samples( 14 | adata: AnnData, 15 | groupby: str | list, 16 | log: bool = True, 17 | min_cells: int | float = 10, 18 | min_counts: int | float = 1000, 19 | **kwargs 20 | ) -> None | Figure: 21 | """ 22 | Plot to assess the quality of the obtained pseudobulk samples from ``decoupler.pp.pseudobulk``. 23 | 24 | Parameters 25 | ---------- 26 | %(adata)s 27 | groupby 28 | Name or nomes of the ``adata.obs`` column/s to group by. 29 | log 30 | If set, log10 transform the ``psbulk_n_cells`` and ``psbulk_counts`` columns during visualization. 31 | %(min_cells)s 32 | %(min_counts)s 33 | %(plot)s 34 | """ 35 | # Validate 36 | assert isinstance(adata, AnnData), 'adata must be AnnData' 37 | assert isinstance(adata.obs, pd.DataFrame) and adata.obs is not None, \ 38 | f'adata.obs must be a pd.DataFrame not {type(adata.obs)}' 39 | assert all(col in adata.obs.columns for col in ['psbulk_cells', 'psbulk_counts']), \ 40 | 'psbulk_* columns not present in adata.obs, this function should be used after running decoupler.pp.pseudobulk' 41 | assert isinstance(groupby, (str, list)), 'groupby must be str or list' 42 | if isinstance(groupby, str): 43 | groupby = [groupby] 44 | assert all(col in adata.obs for col in groupby), 'columns in groupby must be in adata.obs' 45 | # Extract obs 46 | df = adata.obs.copy() 47 | # Transform to log10 48 | label_x, label_y = 'cells', 'counts' 49 | if log: 50 | df['psbulk_cells'] = np.log10(df['psbulk_cells'] + 1) 51 | df['psbulk_counts'] = np.log10(df['psbulk_counts'] + 1) 52 | label_x, label_y = r'$\log_{10}$ ' + label_x, r'$\log_{10}$ ' + label_y 53 | min_cells, min_counts = np.log10(min_cells), np.log10(min_counts) 54 | # Plot 55 | if len(groupby) > 1: 56 | # Instance 57 | assert kwargs.get('ax') is None, 'when groupby is list, ax must be None' 58 | kwargs['ax'] = None 59 | bp = Plotter(**kwargs) 60 | bp.fig.delaxes(bp.ax) 61 | plt.close(bp.fig) 62 | bp.fig, axes = plt.subplots(len(groupby), 1, figsize=bp.figsize, dpi=bp.dpi, tight_layout=True) 63 | axes = axes.ravel() 64 | for ax, grp in zip(axes, groupby): 65 | ax.grid(zorder=0) 66 | ax.set_axisbelow(True) 67 | sns.scatterplot(x='psbulk_cells', y='psbulk_counts', hue=grp, ax=ax, data=df, zorder=1) 68 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=grp) 69 | ax.set_xlabel(label_x) 70 | ax.set_ylabel(label_y) 71 | ax.axvline(x=min_cells, linestyle='--', color="black") 72 | ax.axhline(y=min_counts, linestyle='--', color="black") 73 | else: 74 | # Instance 75 | groupby = groupby[0] 76 | bp = Plotter(**kwargs) 77 | bp.ax.grid(zorder=0) 78 | bp.ax.set_axisbelow(True) 79 | sns.scatterplot(x='psbulk_cells', y='psbulk_counts', hue=groupby, ax=bp.ax, data=df, zorder=1) 80 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=groupby) 81 | bp.ax.set_xlabel(label_x) 82 | bp.ax.set_ylabel(label_y) 83 | bp.ax.axvline(x=min_cells, linestyle='--', color="black") 84 | bp.ax.axhline(y=min_counts, linestyle='--', color="black") 85 | return bp._return() 86 | -------------------------------------------------------------------------------- /src/decoupler/pl/_obsbar.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.figure import Figure 3 | import seaborn as sns 4 | from anndata import AnnData 5 | 6 | from decoupler._docs import docs 7 | from decoupler._Plotter import Plotter 8 | 9 | 10 | @docs.dedent 11 | def obsbar( 12 | adata: AnnData, 13 | y: str, 14 | hue: str | None = None, 15 | kw_barplot: dict = dict(), 16 | **kwargs 17 | ) -> None | Figure: 18 | """ 19 | Plot ``adata.obs`` metadata as a grouped barplot. 20 | 21 | Parameters 22 | ---------- 23 | %(adata)s 24 | y 25 | Column name in ``adata.obs`` to plot in y axis. 26 | hue 27 | Column name in ``adata.obs`` to color bars. 28 | kw_barplot 29 | Keyword arguments passed to ``seaborn.barplot``. 30 | %(plot)s 31 | """ 32 | # Validate 33 | assert isinstance(adata, AnnData), 'adata must be an AnnData instance' 34 | assert isinstance(y, str), 'y must be str' 35 | assert isinstance(hue, str) or hue is None, 'hue must be str or None' 36 | cols = {y, hue} 37 | if hue is None: 38 | cols.remove(None) 39 | assert cols.issubset(adata.obs.columns), \ 40 | f'y={y} and hue={hue} must be in adata.obs.columns={adata.obs.columns}' 41 | cols = list(cols) 42 | # Process 43 | data = ( 44 | adata.obs 45 | .groupby(cols, observed=True, as_index=False) 46 | .size() 47 | ) 48 | # Instance 49 | bp = Plotter(**kwargs) 50 | # Plot 51 | sns.barplot( 52 | data=data, 53 | y=y, 54 | x='size', 55 | hue=hue, 56 | ax=bp.ax, 57 | **kw_barplot 58 | ) 59 | if hue is not None and y != hue: 60 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, title=hue) 61 | return bp._return() 62 | -------------------------------------------------------------------------------- /src/decoupler/pl/_order.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from matplotlib.colors import to_rgb 4 | from matplotlib.figure import Figure 5 | import seaborn as sns 6 | 7 | from decoupler._docs import docs 8 | from decoupler._Plotter import Plotter 9 | 10 | 11 | @docs.dedent 12 | def order( 13 | df: pd.DataFrame, 14 | mode: str = 'line', 15 | kw_order = dict(), 16 | **kwargs 17 | ) -> None | Figure: 18 | """ 19 | Plot features along a continuous, ordered process such as pseudotime. 20 | 21 | Parameters 22 | ---------- 23 | df 24 | Results of ``decoupler.pp.bin_order``. 25 | mode 26 | The type of plot to use, either "line" or "mat". 27 | kw_order 28 | Other keyword arguments are passed down to ``seaborn.lineplot`` or ``matplotlib.pyplot.imshow``, 29 | depending on ``mode`` used. 30 | %(plot)s 31 | """ 32 | # Validate 33 | assert isinstance(df, pd.DataFrame), 'df must be pandas.DataFrame' 34 | assert isinstance(mode, str) and mode in ['line', 'mat'], \ 35 | 'mode must be str and either "line" or "mat"' 36 | assert isinstance(kw_order, dict), \ 37 | 'kw_order must be dict' 38 | # Process 39 | ymax = df['value'].max() 40 | xmin, xmax = df['order'].min(), df['order'].max() 41 | n_names = df['name'].unique().size 42 | # Add cbar if added 43 | has_cbar = False 44 | if np.isin(['label', 'color'], df.columns).all(): 45 | colors = df[df['name'] == df.loc[0, 'name']]['color'] 46 | colors = [[to_rgb(c) for c in colors]] 47 | has_cbar = True 48 | # Instance 49 | bp = Plotter(**kwargs) 50 | # Plot 51 | if mode == 'line': 52 | if has_cbar: 53 | bp.ax.imshow( 54 | colors, 55 | aspect='auto', 56 | extent=[xmin, xmax, 1.05 * ymax, 1.2 * ymax], 57 | transform=bp.ax.transData, 58 | zorder=2 59 | ) 60 | bp.ax.axhline(y=1.05 * ymax, c='black', lw=1) 61 | kw_order = kw_order.copy() 62 | kw_order.setdefault('palette', 'tab20') 63 | sns.lineplot( 64 | data=df, 65 | x='order', 66 | y='value', 67 | hue='name', 68 | ax=bp.ax, 69 | **kw_order 70 | ) 71 | bp.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False) 72 | elif mode == 'mat': 73 | mat = ( 74 | df 75 | .groupby(['name', 'order'], as_index=False)['value'].mean() 76 | .pivot(index='name', columns='order', values='value') 77 | ) 78 | img = bp.ax.imshow(mat, extent=[xmin, xmax, 0, n_names], aspect='auto', **kw_order) 79 | if has_cbar: 80 | bp.ax.imshow(colors, aspect='auto', extent=[xmin, xmax, n_names, 1.1 * n_names], zorder=2) 81 | bp.ax.axhline(y=n_names, c='black', lw=1) 82 | bp.ax.set_ylim(0, 1.1 * n_names) 83 | bp.fig.colorbar(img, ax=bp.ax, shrink=0.5, label='Mean value', location='top') 84 | bp.ax.set_yticks(np.arange(n_names) + 0.5) 85 | bp.ax.set_yticklabels(np.flip(mat.index)) 86 | bp.ax.grid(axis='y', visible=False) 87 | bp.ax.set_xlabel('order') 88 | bp.ax.set_xlim(xmin, xmax) 89 | return bp._return() 90 | -------------------------------------------------------------------------------- /src/decoupler/pl/_source_targets.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from matplotlib.figure import Figure 4 | import adjustText as at 5 | 6 | from decoupler._docs import docs 7 | from decoupler._Plotter import Plotter 8 | from decoupler.pp.net import _validate_net 9 | 10 | 11 | @docs.dedent 12 | def source_targets( 13 | data: pd.DataFrame, 14 | net: pd.DataFrame, 15 | x: str, 16 | y: str, 17 | name: str, 18 | top: int = 5, 19 | thr_x: float = 0., 20 | thr_y: float = 0., 21 | max_x: float | None = None, 22 | max_y: float | None = None, 23 | color_pos: str = '#D62728', 24 | color_neg: str = '#1F77B4', 25 | **kwargs, 26 | ) -> None | Figure: 27 | """ 28 | Plots target features of a given source as a scatter plot. 29 | 30 | Parameters 31 | ---------- 32 | %(data_plot)s 33 | %(net)s 34 | x 35 | Name of the column containing values to place on the x-axis. 36 | y 37 | Name of the column containing values to place on the y-axis. 38 | name 39 | Name of the source to plot. 40 | top 41 | Number of top features based on the product of x and y to label. 42 | thr_x 43 | Value were to place a baseline for the x-axis. 44 | thr_y 45 | Value were to place a baseline for the y-axis. 46 | max_x 47 | Maximum value to plot on x-axis. 48 | max_y 49 | Maximum value to plot on y-axis. 50 | color_pos 51 | Color to plot positively associated features. 52 | color_neg 53 | Color to plot negatively associated features. 54 | %(plot)s 55 | """ 56 | # Validate inputs 57 | m = f'data must be a pd.DataFrame containing the columns {x} and {y}' 58 | assert isinstance(data, pd.DataFrame), m 59 | assert {x, y}.issubset(data.columns.union(net.columns)), m 60 | assert not pd.api.types.is_numeric_dtype(data.index), 'data index must be features in net' 61 | assert isinstance(net, pd.DataFrame), \ 62 | f'net must be a pd.DataFrame containing the columns {x} and {y}' 63 | assert isinstance(name, str), 'name must be a str' 64 | assert isinstance(top, int) and top > 0, 'top must be int and > 0' 65 | assert isinstance(thr_x, (int, float)), 'thr_x must be numeric' 66 | assert isinstance(thr_y, (int, float)), 'thr_y must be numeric' 67 | if max_x is None: 68 | max_x = np.inf 69 | if max_y is None: 70 | max_y = np.inf 71 | assert isinstance(max_x, (int, float)) and max_x > 0, \ 72 | 'max_x must be None, or numeric and > 0' 73 | assert isinstance(max_y, (int, float)) and max_y > 0, \ 74 | 'max_y must be None, or numeric and > 0' 75 | assert isinstance(color_pos, str), 'color_pos must be str' 76 | assert isinstance(color_neg, str), 'color_neg must be str' 77 | # Instance 78 | bp = Plotter(**kwargs) 79 | # Extract df 80 | df = data.copy().reset_index(names='target') 81 | # Filter by net shared targets 82 | vnet = _validate_net(net) 83 | snet = vnet[vnet['source'] == name] 84 | assert snet.shape[0] > 0, f'name={name} must be in net["source"]' 85 | df = pd.merge(df, snet, on=['target'], how='inner').set_index('target') 86 | # Filter by limits 87 | msk_x = np.abs(df[x]) < np.abs(max_x) 88 | msk_y = np.abs(df[y]) < np.abs(max_y) 89 | df = df.loc[msk_x & msk_y] 90 | # Define +/- color 91 | pos = ((df[x] >= 0) & (df[y] >= 0)) | ((df[x] < 0) & (df[y] < 0)) 92 | df['color'] = color_neg 93 | df.loc[pos, 'color'] = color_pos 94 | # Plot 95 | df.plot.scatter(x=x, y=y, c='color', ax=bp.ax) 96 | # Draw thr lines 97 | bp.ax.axvline(x=thr_x, linestyle='--', color="black") 98 | bp.ax.axhline(y=thr_y, linestyle='--', color="black") 99 | # Add labels 100 | bp.ax.set_title(name) 101 | bp.ax.set_xlabel(x) 102 | bp.ax.set_ylabel(y) 103 | # Show top features 104 | df['order'] = df[x].abs() * df[y].abs() 105 | signs = df.sort_values('order', ascending=False) 106 | signs = signs.iloc[:top] 107 | texts = [] 108 | for x, y, s in zip(signs[x], signs[y], signs.index): 109 | texts.append(bp.ax.text(x, y, s)) 110 | if len(texts) > 0: 111 | at.adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'), ax=bp.ax) 112 | return bp._return() 113 | -------------------------------------------------------------------------------- /src/decoupler/pp/__init__.py: -------------------------------------------------------------------------------- 1 | from .net import read_gmt, prune, adjmat, idxmat, shuffle_net, net_corr 2 | from .data import extract 3 | from .anndata import get_obsm, swap_layer, pseudobulk, filter_samples, \ 4 | filter_by_expr, filter_by_prop, knn, bin_order 5 | -------------------------------------------------------------------------------- /src/decoupler/tl/__init__.py: -------------------------------------------------------------------------------- 1 | from decoupler.tl._rankby_group import rankby_group 2 | from decoupler.tl._rankby_obsm import rankby_obsm 3 | from decoupler.tl._rankby_order import rankby_order 4 | -------------------------------------------------------------------------------- /src/decoupler/tl/_rankby_obsm.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import pandas as pd 4 | import scipy.stats as sts 5 | from anndata import AnnData 6 | 7 | from decoupler._docs import docs 8 | 9 | 10 | def _input_rank_obsm( 11 | adata: AnnData, 12 | key: str, 13 | ) -> Tuple[pd.DataFrame, list, list]: 14 | # Validate 15 | assert isinstance(adata, AnnData), 'adata must be anndata.AnnData' 16 | assert key in adata.obsm, f'key={key} must be in adata.obsm' 17 | # Process 18 | name_col = ( 19 | key 20 | .replace('X_', '') 21 | .replace('pca', 'PC') 22 | .replace('mofa', 'Factor') 23 | .replace('umap', 'UMAP') 24 | ) 25 | df = adata.obsm[key] 26 | if isinstance(df, pd.DataFrame): 27 | y_vars = df.std(ddof=1, axis=0).sort_values(ascending=False).index 28 | df = df.loc[:, y_vars].values 29 | else: 30 | ncol = df.shape[1] 31 | digits = len(str(ncol)) 32 | y_vars = [f"{name_col}{str(i).zfill(digits)}" for i in range(1, ncol + 1)] 33 | df = pd.DataFrame( 34 | data=df, 35 | index=adata.obs_names, 36 | columns=y_vars 37 | ) 38 | x_vars = adata.obs.columns 39 | # Merge 40 | df = pd.merge(df, adata.obs, left_index=True, right_index=True) 41 | return df, x_vars, y_vars 42 | 43 | 44 | @docs.dedent 45 | def rankby_obsm( 46 | adata: AnnData, 47 | key: str, 48 | uns_key: str | None = 'rank_obsm', 49 | ) -> None | pd.DataFrame: 50 | """ 51 | Ranks features in ``adata.obsm`` by the significance of their association with metadata in ``adata.obs``. 52 | 53 | For categorical variables it uses ANOVA, for continous Spearman's correlation. 54 | 55 | The obtained p-values are corrected by Benjamini-Hochberg. 56 | 57 | Parameters 58 | ---------- 59 | %(adata)s 60 | %(key)s 61 | uns_key 62 | ``adata.uns`` key to store the results. 63 | 64 | Returns 65 | ------- 66 | If ``uns_key=False``, a pandas.DataFrame with the resulting statistics. 67 | """ 68 | assert isinstance(uns_key, str) or uns_key is None, \ 69 | 'uns_key must be str or None' 70 | # Extract 71 | df, x_vars, y_vars = _input_rank_obsm(adata=adata, key=key) 72 | # Test 73 | res = [] 74 | for x_var in x_vars: 75 | for y_var in y_vars: 76 | if pd.api.types.is_numeric_dtype(df[x_var]): 77 | # Correlation 78 | x = df[x_var].values.ravel() 79 | y = df[y_var].values.ravel() 80 | stat, pval = sts.spearmanr(x, y) 81 | else: 82 | # ANOVA 83 | x = [group[y_var].dropna().values for _, group in df.groupby(x_var, observed=True)] 84 | # At least n=2 per group else skip 85 | if all(len(g) >= 2 for g in x): 86 | stat, pval = sts.f_oneway(*x) 87 | else: 88 | stat, pval = None, 1. 89 | row = [y_var, x_var, stat, pval] 90 | res.append(row) 91 | res = pd.DataFrame(res, columns=['obsm', 'obs', 'stat', 'pval']) 92 | res['padj'] = sts.false_discovery_control(res['pval']) 93 | # Rank 94 | res = res.sort_values('padj').reset_index(drop=True) 95 | # Add obsm key 96 | res.key = key 97 | # Save or return 98 | if uns_key: 99 | adata.uns[uns_key] = res 100 | else: 101 | return res 102 | -------------------------------------------------------------------------------- /src/decoupler/tl/_rankby_order.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import pandas as pd 4 | import numpy as np 5 | from tqdm.auto import tqdm 6 | import scipy.stats as sts 7 | import scipy.sparse as sps 8 | from anndata import AnnData 9 | 10 | from decoupler._odeps import dcor, _check_import 11 | from decoupler._docs import docs 12 | from decoupler.pp.data import extract 13 | 14 | 15 | 16 | @docs.dedent 17 | def rankby_order( 18 | adata: AnnData, 19 | order: str, 20 | stat: str = 'dcor', 21 | verbose: bool = False, 22 | **kwargs 23 | ) -> pd.DataFrame: 24 | """ 25 | Rank features along a continuous, ordered process such as pseudotime. 26 | 27 | Parameters 28 | ---------- 29 | %(adata)s 30 | %(order)s 31 | stat 32 | Which statistic to compute. 33 | Must be one of these: 34 | 35 | - ``dcor`` (distance correlation from ``dcor.independence.distance_correlation_t_test``) 36 | - ``pearsonr`` (Pearson's R from ``scipy.stats.pearsonr``) 37 | - ``spearmanr`` (Spearman's R from ``scipy.stats.spearmanr``) 38 | - ``kendalltau`` (Kendall's Tau from ``scipy.stats.kendalltau``) 39 | 40 | %(verbose)s 41 | kwargs 42 | Key arguments passed to the selected ``stat`` function. 43 | 44 | Returns 45 | ------- 46 | DataFrame with features associated with the ordering variable. 47 | """ 48 | # Validate 49 | assert isinstance(adata, AnnData), 'adata must be anndata.AnnData' 50 | assert isinstance(order, str) and order in adata.obs.columns, 'order must be str and in adata.obs.columns' 51 | stats = {'dcor', 'pearsonr', 'spearmanr', 'kendalltau'} 52 | assert (isinstance(stat, str) and stat in stats) or callable(stat), \ 53 | f'stat must be str and one of these {stats}, or a function that returns statistic and pvalue' 54 | # Get vars and ordinal variable 55 | X = adata.X 56 | if sps.issparse(X): 57 | X = X.toarray() 58 | X = X.astype(float) 59 | y = adata.obs[order].values.astype(float) 60 | # Init 61 | df = pd.DataFrame() 62 | df['name'] = adata.var_names 63 | # Fit 64 | if stat == 'dcor': 65 | _check_import(dcor) 66 | f = dcor.independence.distance_correlation_t_test 67 | elif stat == 'pearsonr': 68 | f = sts.pearsonr 69 | elif stat == 'spearmanr': 70 | f = sts.spearmanr 71 | elif stat == 'kendalltau': 72 | f = sts.kendalltau 73 | else: 74 | f = stat 75 | ss = [] 76 | ps = [] 77 | for i in tqdm(range(X.shape[1]), disable=not verbose): 78 | x = X[:, i] 79 | if not np.all(x == x[0]): 80 | res = f(x, y) 81 | s = res.statistic 82 | p = res.pvalue 83 | else: 84 | s = 0 85 | p = 1 86 | ss.append(s) 87 | ps.append(p) 88 | df['stat'] = ss 89 | df['pval'] = ps 90 | df['padj'] = sts.false_discovery_control(df['pval']) 91 | df['abs_stat'] = df['stat'].abs() 92 | df = df.sort_values(['padj', 'pval', 'abs_stat'], ascending=[True, True, False]).reset_index(drop=True) 93 | df = df.drop(columns='abs_stat') 94 | return df 95 | -------------------------------------------------------------------------------- /tests/bm/test_benchmark.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import scipy.sparse as sps 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'metrics,groupby,runby,sfilt,thr,emin,mnet', 10 | [ 11 | ['auc', None, 'expr', False, 0.05, 5, False], 12 | ['auc', None, 'expr', True, 0.05, 5, False], 13 | [['auc'], None, 'expr', False, 0.05, 5, False], 14 | [['auc', 'fscore'], 'group', 'expr', False, 0.05, 5, False], 15 | [['auc', 'fscore', 'qrank'], None, 'source', False, 0.05, 2, False], 16 | [['auc', 'fscore', 'qrank'], 'group', 'source', False, 0.05, 1, False], 17 | [['auc', 'fscore', 'qrank'], 'bm_group', 'expr', True, 0.05, 5, False], 18 | [['auc', 'fscore', 'qrank'], 'source', 'expr', True, 0.05, 5, False], 19 | ] 20 | ) 21 | def test_benchmark( 22 | bdata, 23 | net, 24 | metrics, 25 | groupby, 26 | runby, 27 | sfilt, 28 | thr, 29 | emin, 30 | mnet, 31 | rng, 32 | ): 33 | dc.mt.ulm(data=bdata, net=net, tmin=0) 34 | if mnet: 35 | net = {'w_net': net, 'unw_net': net.drop(columns=['weight'])} 36 | bdata = bdata.copy() 37 | bdata.obs['source'] = rng.choice(['x', 'y', 'z'], size=bdata.n_obs, replace=True) 38 | bdata.X = sps.csr_matrix(bdata.X) 39 | df = dc.bm.benchmark( 40 | adata=bdata, 41 | net=net, 42 | metrics=metrics, 43 | groupby=groupby, 44 | runby=runby, 45 | sfilt=sfilt, 46 | thr=thr, 47 | emin=emin, 48 | kws_decouple={ 49 | 'cons': True, 50 | 'tmin': 3, 51 | 'methods': ['ulm', 'zscore', 'aucell'] 52 | }, 53 | verbose=True 54 | ) 55 | assert isinstance(df, pd.DataFrame) 56 | cols = {'method', 'metric', 'score'} 57 | assert cols.issubset(df.columns) 58 | hdf = dc.bm.metric.hmean(df, metrics=metrics) 59 | assert isinstance(hdf, pd.DataFrame) 60 | -------------------------------------------------------------------------------- /tests/bm/test_pl.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | import matplotlib.pyplot as plt 5 | from matplotlib.figure import Figure 6 | import pytest 7 | 8 | import decoupler as dc 9 | 10 | @pytest.fixture 11 | def df(): 12 | df = pd.DataFrame( 13 | data=[ 14 | ['aucell', 'auroc', 0.45], 15 | ['aucell', 'auprc', 0.55], 16 | ['ulm', 'auroc', 0.9], 17 | ['ulm', 'auprc', 0.8], 18 | ['aucell', 'recall', 0.45], 19 | ['aucell', 'precision', 0.55], 20 | ['ulm', 'recall', 0.9], 21 | ['ulm', 'precision', 0.8], 22 | ['aucell', '1-qrank', 0.45], 23 | ['aucell', '-log10(pval)', 0.9], 24 | ['ulm', '1-qrank', 0.9], 25 | ['ulm', '-log10(pval)', 5.6], 26 | ], 27 | columns = ['method', 'metric', 'score'] 28 | ) 29 | return df 30 | 31 | 32 | @pytest.fixture 33 | def hdf( 34 | df, 35 | ): 36 | hdf = dc.bm.metric.hmean(df) 37 | return hdf 38 | 39 | 40 | def test_auc( 41 | df, 42 | ): 43 | fig = dc.bm.pl.auc(df=df, hue=None, return_fig=True) 44 | assert isinstance(fig, Figure) 45 | plt.close(fig) 46 | fig = dc.bm.pl.auc(df=df, hue='method', return_fig=True) 47 | assert isinstance(fig, Figure) 48 | plt.close(fig) 49 | 50 | 51 | def test_fscore( 52 | df, 53 | ): 54 | fig = dc.bm.pl.fscore(df=df, hue=None, return_fig=True) 55 | assert isinstance(fig, Figure) 56 | plt.close(fig) 57 | fig = dc.bm.pl.fscore(df=df, hue='method', return_fig=True) 58 | assert isinstance(fig, Figure) 59 | plt.close(fig) 60 | 61 | 62 | def test_qrank( 63 | df, 64 | ): 65 | fig = dc.bm.pl.qrank(df=df, hue=None, return_fig=True) 66 | assert isinstance(fig, Figure) 67 | plt.close(fig) 68 | fig = dc.bm.pl.qrank(df=df, hue='method', return_fig=True) 69 | assert isinstance(fig, Figure) 70 | plt.close(fig) 71 | 72 | 73 | def test_bar( 74 | hdf, 75 | ): 76 | fig = dc.bm.pl.bar(df=hdf, x='H(auroc, auprc)', y='method', hue=None, return_fig=True) 77 | assert isinstance(fig, Figure) 78 | plt.close(fig) 79 | fig = dc.bm.pl.bar(df=hdf, x='H(auroc, auprc)', y='method', hue='method', return_fig=True) 80 | assert isinstance(fig, Figure) 81 | plt.close(fig) 82 | 83 | 84 | def test_summary( 85 | hdf, 86 | ): 87 | fig = dc.bm.pl.summary(df=hdf, y='method', figsize=(6, 3), return_fig=True) 88 | assert isinstance(fig, Figure) 89 | plt.close(fig) 90 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | import scanpy as sc 5 | 6 | import decoupler as dc 7 | 8 | 9 | @pytest.fixture 10 | def rng(): 11 | rng = np.random.default_rng(seed=42) 12 | return rng 13 | 14 | 15 | @pytest.fixture 16 | def adata(): 17 | adata, _ = dc.ds.toy(nobs=40, nvar=20, bval=2, seed=42, verbose=False) 18 | adata.layers['counts'] = adata.X.round() 19 | return adata 20 | 21 | 22 | @pytest.fixture 23 | def tdata(): 24 | tdata, _ = dc.ds.toy(nobs=40, nvar=20, bval=2, seed=42, verbose=False, pstime=True) 25 | return tdata 26 | 27 | 28 | @pytest.fixture 29 | def tdata_obsm( 30 | tdata, 31 | net, 32 | rng, 33 | ): 34 | sc.tl.pca(tdata) 35 | tdata.obsm['X_umap'] = tdata.obsm['X_pca'][:, :2] + rng.random(tdata.obsm['X_pca'][:, :2].shape) 36 | dc.mt.ulm(data=tdata, net=net, tmin=0) 37 | return tdata 38 | 39 | 40 | @pytest.fixture 41 | def pdata( 42 | adata, 43 | rng, 44 | ): 45 | adata.X = adata.X.round() * (rng.random(adata.shape) > 0.75) 46 | return dc.pp.pseudobulk(adata=adata, sample_col='sample', groups_col='group') 47 | 48 | 49 | @pytest.fixture 50 | def bdata(): 51 | adata, _ = dc.ds.toy_bench(nobs=100, nvar=20, bval=2, seed=42, verbose=False) 52 | adata.obs['bm_group'] = adata.obs.apply(lambda x: [x['sample'], x['group']], axis=1) 53 | return adata 54 | 55 | 56 | @pytest.fixture 57 | def deg(): 58 | deg = pd.DataFrame( 59 | data = [ 60 | [1, 0.5], 61 | [-2, 0.25], 62 | [3, 0.125], 63 | [-4, 0.05], 64 | [5, 0.025], 65 | ], 66 | columns=['stat', 'padj'], 67 | index=['G01', 'G02', 'G03', 'G04', 'G05'] 68 | ) 69 | return deg 70 | 71 | 72 | @pytest.fixture 73 | def net(): 74 | _, net = dc.ds.toy(nobs=2, nvar=12, bval=2, seed=42, verbose=False) 75 | net = dc.pp.prune(features=net['target'].unique(), net=net, tmin=3) 76 | return net 77 | 78 | 79 | @pytest.fixture 80 | def unwnet(net): 81 | return net.drop(columns=['weight'], inplace=False) 82 | 83 | 84 | @pytest.fixture 85 | def mat( 86 | adata, 87 | ): 88 | return dc.pp.extract(data=adata) 89 | 90 | 91 | @pytest.fixture 92 | def idxmat( 93 | mat, 94 | net, 95 | ): 96 | X, obs, var = mat 97 | sources, cnct, starts, offsets = dc.pp.idxmat(features=var, net=net, verbose=False) 98 | return cnct, starts, offsets 99 | 100 | 101 | @pytest.fixture 102 | def adjmat( 103 | mat, 104 | net, 105 | ): 106 | X, obs, var = mat 107 | sources, targets, adjmat = dc.pp.adjmat(features=var, net=net, verbose=False) 108 | return adjmat 109 | -------------------------------------------------------------------------------- /tests/ds/test_bulk.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | import anndata as ad 4 | 5 | import decoupler as dc 6 | 7 | 8 | def test_hsctgfb(): 9 | adata = dc.ds.hsctgfb() 10 | assert isinstance(adata, ad.AnnData) 11 | assert isinstance(adata.obs, pd.DataFrame) 12 | assert {'condition', 'sample_id'}.issubset(adata.obs) 13 | 14 | @pytest.mark.parametrize( 15 | 'thr_fc', [None, -1] 16 | ) 17 | def test_knocktf( 18 | thr_fc, # val, None 19 | ): 20 | adata = dc.ds.knocktf(thr_fc=thr_fc) 21 | assert isinstance(adata, ad.AnnData) 22 | assert isinstance(adata.obs, pd.DataFrame) 23 | assert {'source', 'type_p'}.issubset(adata.obs.columns) 24 | if thr_fc is not None: 25 | assert (adata.obs['logFC'] < thr_fc).all() 26 | -------------------------------------------------------------------------------- /tests/ds/test_scell.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import pandas as pd 4 | import pytest 5 | import anndata as ad 6 | 7 | import decoupler as dc 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'url', [ 12 | 'https://datasets.cellxgene.cziscience.com/' + 13 | 'f665effe-d95a-4211-ab03-9d1777ca0806.h5ad', 14 | 'https://datasets.cellxgene.cziscience.com/' + 15 | '1338d08a-481a-426c-ad60-9f4ac08afe16.h5ad' 16 | ] 17 | ) 18 | def test_download_anndata( 19 | url 20 | ): 21 | warnings.filterwarnings("ignore", module="anndata") 22 | adata = dc.ds._scell._download_anndata(url=url) 23 | assert isinstance(adata, ad.AnnData) 24 | 25 | 26 | def test_pbmc3k(): 27 | warnings.filterwarnings("ignore", module="anndata") 28 | adata = dc.ds.pbmc3k() 29 | assert isinstance(adata, ad.AnnData) 30 | assert adata.raw is None 31 | assert isinstance(adata.obs, pd.DataFrame) 32 | cols = {'celltype', 'leiden'} 33 | assert cols.issubset(adata.obs.columns) 34 | assert 'louvain' not in adata.obs.columns 35 | for col in cols: 36 | assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype) 37 | 38 | 39 | def test_covid5k(): 40 | adata = dc.ds.covid5k() 41 | assert isinstance(adata, ad.AnnData) 42 | assert adata.raw is None 43 | assert isinstance(adata.obs, pd.DataFrame) 44 | cols = {'individual', 'sex', 'disease', 'celltype'} 45 | assert cols.issubset(adata.obs.columns) 46 | for col in cols: 47 | assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype) 48 | 49 | 50 | def test_erygast1k(): 51 | adata = dc.ds.erygast1k() 52 | assert isinstance(adata, ad.AnnData) 53 | assert adata.raw is None 54 | assert isinstance(adata.obs, pd.DataFrame) 55 | cols = {'sample', 'stage', 'sequencing.batch', 'theiler', 'celltype'} 56 | assert cols.issubset(adata.obs.columns) 57 | for col in cols: 58 | assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype) 59 | keys = {'X_pca', 'X_umap'} 60 | assert keys.issubset(adata.obsm.keys()) 61 | -------------------------------------------------------------------------------- /tests/ds/test_spatial.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import pandas as pd 4 | import pytest 5 | import anndata as ad 6 | 7 | import decoupler as dc 8 | 9 | 10 | def test_msvisium(): 11 | adata = dc.ds.msvisium() 12 | assert isinstance(adata, ad.AnnData) 13 | assert adata.raw is None 14 | assert isinstance(adata.obs, pd.DataFrame) 15 | cols = {'niches'} 16 | assert cols.issubset(adata.obs.columns) 17 | for col in cols: 18 | assert isinstance(adata.obs[col].dtype, pd.CategoricalDtype) 19 | -------------------------------------------------------------------------------- /tests/ds/test_toy.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import scipy.stats as sts 5 | import pytest 6 | 7 | import decoupler as dc 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'nvar,val,size,hasval', 12 | [ 13 | [3, 0., 5, False], 14 | [10, 0., 10, True], 15 | ] 16 | ) 17 | def test_fillval( 18 | nvar, 19 | val, 20 | size, 21 | hasval, 22 | ): 23 | arr = np.array([1., 2., 3., 4., 5.]) 24 | farr = dc.ds._toy._fillval(arr=arr, nvar=nvar, val=val) 25 | 26 | assert farr.size == size 27 | assert (val == farr[-1]) == hasval 28 | 29 | 30 | @pytest.mark.parametrize( 31 | 'nobs,nvar,bval,pstime,seed,verbose', 32 | [ 33 | [10, 15, 2, True, 42, False], 34 | [2, 12, 2, False, 42, False], 35 | [100, 50, 0, False, 0, True], 36 | [10, 500, 0, True, 0, True], 37 | 38 | ] 39 | ) 40 | def test_toy( 41 | nobs, 42 | nvar, 43 | bval, 44 | pstime, 45 | seed, 46 | verbose, 47 | caplog, 48 | ): 49 | with caplog.at_level(logging.INFO): 50 | adata, net = dc.ds.toy(nobs=nobs, nvar=nvar, bval=bval, pstime=pstime, seed=seed, verbose=verbose) 51 | if verbose: 52 | assert len(caplog.text) > 0 53 | else: 54 | assert caplog.text == '' 55 | assert all(adata.obs['group'].cat.categories == ['A', 'B']) 56 | msk = adata.obs['group'] == 'A' 57 | assert all(adata[msk, :4].X.mean(0) > adata[~msk, :4].X.mean(0)) 58 | assert all(adata[msk, 4:8].X.mean(0) < adata[~msk, 4:8].X.mean(0)) 59 | assert nobs == adata.n_obs 60 | assert nvar == adata.n_vars 61 | assert ((bval - 1) < np.mean(adata.X[:, -1].ravel()) < (bval + 1)) or nvar == 12 62 | if pstime: 63 | assert 'pstime' in adata.obs.columns 64 | assert ((0. <= adata.obs['pstime']) & (adata.obs['pstime'] <= 1.)).all() 65 | 66 | 67 | @pytest.mark.parametrize( 68 | 'shuffle_r,seed,nobs,nvar,is_diff', 69 | [ 70 | [0.0, 1, 20, 31, True], 71 | [0.1, 2, 36, 41, True], 72 | [0.9, 3, 49, 21, False], 73 | [1.0, 4, 18, 41, False], 74 | 75 | ] 76 | ) 77 | def test_toy_bench( 78 | net, 79 | shuffle_r, 80 | seed, 81 | nobs, 82 | nvar, 83 | is_diff, 84 | ): 85 | adata, bmnet = dc.ds.toy_bench(shuffle_r=shuffle_r, seed=seed, nobs=nobs, nvar=nvar) 86 | assert (net == bmnet).values.all() 87 | assert adata.n_obs == nobs 88 | assert adata.n_vars == nvar 89 | msk = adata.obs['group'] == 'A' 90 | a_adata = adata[msk, :].copy() 91 | b_adata = adata[~msk, :].copy() 92 | for j in adata.var_names[:8]: 93 | a = a_adata[:, j].X.ravel() 94 | b = b_adata[:, j].X.ravel() 95 | stat, pval = sts.ranksums(a, b) 96 | if is_diff: 97 | assert pval < 0.05 98 | else: 99 | assert pval > 0.05 100 | 101 | -------------------------------------------------------------------------------- /tests/ds/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import decoupler as dc 4 | 5 | 6 | @pytest.mark.parametrize( 7 | 'organism,lst_ens,lst_sym', 8 | [ 9 | ['hsapiens_gene_ensembl', ['ENSG00000196092', 'ENSG00000115415'], ['PAX5', 'STAT1']], 10 | ['hsapiens_gene_ensembl', ['ENSG00000204655', 'ENSG00000184221'], ['MOG', 'OLIG1']], 11 | ['mmusculus_gene_ensembl', ['ENSMUSG00000076439', 'ENSMUSG00000046160'], ['Mog', 'Olig1']], 12 | ] 13 | ) 14 | def test_ensmbl_to_symbol( 15 | organism, 16 | lst_ens, 17 | lst_sym, 18 | ): 19 | lst_trn = dc.ds.ensmbl_to_symbol(genes=lst_ens, organism=organism) 20 | assert all(s == t for s, t in zip(lst_trn, lst_sym)) 21 | -------------------------------------------------------------------------------- /tests/mt/test_aucell.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import scipy.sparse as sps 4 | import pytest 5 | 6 | import decoupler as dc 7 | 8 | 9 | """ 10 | gs <- list( 11 | T1=c('G01', 'G02', 'G03'), 12 | T2=c('G04', 'G06', 'G07', 'G08'), 13 | T3=c('G06', 'G07', 'G08'), 14 | T4=c('G05', 'G10', 'G11', 'G09'), 15 | T5=c('G09', 'G10', 'G11') 16 | ) 17 | mat <- matrix(c( 18 | 0.879, 8.941, 1.951, 8.75, 0.128, 2.959, 2.369, 9.04, 0.853, 3.127, 0.017, 2.859, 0.316, 2.066, 2.05, 8.305, 0.778, 2.468, 1.302, 2.878, 19 | 2.142, 8.155, 0.428, 9.223, 0.532, 2.84, 2.114, 8.681, 0.431, 2.814, 0.413, 3.129, 0.365, 2.512, 2.651, 8.185, 0.406, 2.616, 0.352, 2.824, 20 | 1.729, 0.637, 8.341, 0.74, 8.084, 2.397, 3.093, 0.635, 1.682, 3.351, 1.28, 2.203, 8.556, 2.255, 3.303, 1.25, 1.359, 2.012, 9.784, 2.06, 21 | 0.746, 0.894, 8.011, 1.798, 8.044, 3.059, 2.996, 0.08, 0.151, 2.391, 1.082, 2.123, 8.203, 2.511, 2.039, 0.051, 1.25, 3.787, 8.249, 3.026 22 | ), nrow=4, byrow=TRUE) 23 | colnames(mat) <- c('G11', 'G04', 'G05', 'G03', 'G07', 'G18', 'G17', 'G02', 'G10', 24 | 'G14', 'G09', 'G16', 'G08', 'G13', 'G20', 'G01', 'G12', 'G15', 25 | 'G06', 'G19') 26 | rownames(mat) <- c("S01", "S02", "S29", "S30") 27 | rnks <- AUCell::AUCell_buildRankings(t(mat), plotStats=FALSE) 28 | t(AUCell::AUCell_calcAUC(gs, rnks, aucMaxRank=3)@assays@data$AUC) 29 | """ 30 | 31 | def test_auc( 32 | mat, 33 | idxmat, 34 | ): 35 | X, obs, var = mat 36 | cnct, starts, offsets = idxmat 37 | row = X[0] 38 | es = dc.mt._aucell._auc.py_func( 39 | row=row, 40 | cnct=cnct, 41 | starts=starts, 42 | offsets=offsets, 43 | n_up=2, 44 | nsrc=offsets.size 45 | ) 46 | assert isinstance(es, np.ndarray) 47 | assert es.size == offsets.size 48 | 49 | 50 | def test_func_aucell( 51 | mat, 52 | idxmat, 53 | ): 54 | X, obs, var = mat 55 | cnct, starts, offsets = idxmat 56 | obs = np.array(['S01', 'S02', 'S29', 'S30']) 57 | X = np.vstack((X[:2, :], X[-2:, :])) 58 | X = sps.csr_matrix(X) 59 | ac_es = pd.DataFrame( 60 | data=np.array([ 61 | [0.6666667, 0.3333333, 0, 0, 0], 62 | [1.0000000, 0.0000000, 0, 0, 0], 63 | [0.0000000, 1.0000000, 1, 0, 0], 64 | [0.0000000, 1.0000000, 1, 0, 0], 65 | ]), 66 | columns=['T1', 'T2', 'T3', 'T4', 'T5'], 67 | index=obs 68 | ) 69 | dc_es, _ = dc.mt._aucell._func_aucell( 70 | mat=X, 71 | cnct=cnct, 72 | starts=starts, 73 | offsets=offsets, 74 | n_up=3, 75 | 76 | ) 77 | assert np.isclose(dc_es, ac_es.values).all() 78 | -------------------------------------------------------------------------------- /tests/mt/test_consensus.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | @pytest.mark.parametrize('sel', [np.array([0., 0., 0., 0.]), np.array([1., 3., 8., 2.])]) 8 | def test_zscore( 9 | sel, 10 | ): 11 | z = dc.mt._consensus._zscore.py_func(sel=sel) 12 | assert isinstance(z, np.ndarray) 13 | assert z.size == sel.size 14 | 15 | 16 | def test_mean_zscores( 17 | rng, 18 | ): 19 | scores = rng.normal(size=(2, 5, 10)) 20 | es = dc.mt._consensus._mean_zscores.py_func(scores=scores) 21 | assert scores.shape[1:] == es.shape 22 | 23 | 24 | def test_consensus( 25 | adata, 26 | net, 27 | ): 28 | dc.mt.decouple(data=adata, net=net, methods=['zscore', 'ulm'], cons=False, tmin=0) 29 | dc.mt.consensus(adata) 30 | assert 'score_consensus' in adata.obsm 31 | res = dc.mt.decouple(data=adata.to_df(), net=net, methods=['zscore', 'ulm'], cons=False, tmin=0) 32 | es, pv = dc.mt.consensus(res) 33 | assert np.isfinite(es.values).all() 34 | assert ((0 <= pv.values) & (pv.values <= 1)).all() 35 | -------------------------------------------------------------------------------- /tests/mt/test_decouple.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import decoupler as dc 4 | 5 | 6 | @pytest.mark.parametrize( 7 | 'methods,args,cons,anndata', 8 | [ 9 | ['all', dict(), True, True], 10 | ['aucell', dict(aucell=dict(n_up=3)), True, False], 11 | [['ulm'], dict(), False, True], 12 | [['ulm', 'ora'], dict(ulm=dict(), ora=dict(n_up=3)), False, False] 13 | ] 14 | ) 15 | def test_decouple( 16 | adata, 17 | net, 18 | methods, 19 | args, 20 | cons, 21 | anndata 22 | ): 23 | if anndata: 24 | dc.mt.decouple(data=adata, net=net, methods=methods, args=args, cons=cons, tmin=0) 25 | if cons: 26 | assert 'score_consensus' in adata.obsm 27 | else: 28 | assert 'score_consensus' not in adata.obsm 29 | else: 30 | res = dc.mt.decouple(data=adata.to_df(), net=net, methods=methods, args=args, cons=cons, tmin=0) 31 | if cons: 32 | assert 'score_consensus' in res 33 | else: 34 | assert 'score_consensus' not in res 35 | -------------------------------------------------------------------------------- /tests/mt/test_gsea.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | 5 | import gseapy as gp 6 | import decoupler as dc 7 | 8 | 9 | def test_std(): 10 | arr = np.array([0.1, -5.3, 3.8, 9.5, -0.4, 5.5]) 11 | np_std = np.std(arr, ddof=1) 12 | dc_std = dc.mt._gsea._std.py_func(arr=arr, ddof=1) 13 | assert np_std == dc_std 14 | 15 | 16 | def test_ridx(): 17 | idx_a = dc.mt._gsea._ridx(times=5, nvar=10, seed=42) 18 | assert (~(np.diff(idx_a) == 1).all(axis=1)).all() 19 | idx_b = dc.mt._gsea._ridx(times=5, nvar=10, seed=2) 20 | assert (~(np.diff(idx_b) == 1).all(axis=1)).all() 21 | assert (~(idx_a == idx_b).all(axis=1)).all() 22 | 23 | 24 | @pytest.mark.parametrize( 25 | 'row,rnks,set_msk,dec,expected_value,expected_index', 26 | [ 27 | (np.array([0.0, 2.0, 0.0]), np.array([0, 1, 2]), np.array([False, True, False]), 0.1, 0.9, 1), 28 | (np.array([1.0, 2.0, 3.0]), np.array([2, 1, 0]), np.array([True, True, True]), 0.1, 1.0, 0), 29 | (np.array([1.0, 2.0, 3.0]), np.array([0, 1, 2]), np.array([False, False, False]), 0.1, 0, 0), 30 | (np.array([0.0, 0.0, 0.0]), np.array([0, 1, 2]), np.array([True, True, True]), 0.1, 0.0, 0), 31 | (np.array([1.0, -2.0, 3.0]), np.array([0, 1, 2]), np.array([True, False, True]), 0.5, 0.5, 2), 32 | ] 33 | ) 34 | def test_esrank( 35 | row, 36 | rnks, 37 | set_msk, 38 | dec, 39 | expected_value, 40 | expected_index 41 | ): 42 | value, index, es = dc.mt._gsea._esrank.py_func(row=row, rnks=rnks, set_msk=set_msk, dec=dec) 43 | assert np.isclose(value, expected_value) 44 | assert index == expected_index 45 | assert isinstance(es, np.ndarray) and es.shape == rnks.shape 46 | 47 | 48 | def test_nesrank( 49 | rng, 50 | ): 51 | ridx = np.array([ 52 | [0, 1, 2], 53 | [0, 2, 1], 54 | [1, 2, 0], 55 | [1, 0, 2], 56 | [2, 0, 1], 57 | [2, 1, 0], 58 | ]) 59 | row = np.array([0.0, 2.0, 0.0]) 60 | rnks = np.array([0, 1, 2]) 61 | set_msk = np.array([False, True, False]) 62 | dec = 0.1 63 | es = 0.9 64 | nes, pval = dc.mt._gsea._nesrank.py_func( 65 | ridx=ridx, 66 | row=row, 67 | rnks=rnks, 68 | set_msk=set_msk, 69 | dec=dec, 70 | es=es 71 | ) 72 | assert isinstance(nes, float) 73 | assert isinstance(pval, float) 74 | 75 | 76 | def test_stsgsea( 77 | mat, 78 | idxmat, 79 | ): 80 | X, obs, var = mat 81 | cnct, starts, offsets = idxmat 82 | row = X[0, :] 83 | times = 10 84 | ridx = dc.mt._gsea._ridx(times=times, nvar=row.size, seed=42) 85 | es, nes, pv = dc.mt._gsea._stsgsea.py_func( 86 | row=row, 87 | cnct=cnct, 88 | starts=starts, 89 | offsets=offsets, 90 | ridx=ridx, 91 | ) 92 | assert es.size == offsets.size 93 | assert nes.size == offsets.size 94 | assert pv.size == offsets.size 95 | 96 | 97 | def test_func_gsea( 98 | mat, 99 | net, 100 | idxmat, 101 | ): 102 | times = 1000 103 | seed = 42 104 | X, obs, var = mat 105 | gene_sets = net.groupby('source')['target'].apply(lambda x: list(x)).to_dict() 106 | cnct, starts, offsets = idxmat 107 | res = gp.prerank( 108 | rnk=pd.DataFrame(X, index=obs, columns=var).T, 109 | gene_sets=gene_sets, 110 | permutation_num=times, 111 | permutation_type='gene_set', 112 | outdir=None, 113 | min_size=0, 114 | threads=4, 115 | seed=seed, 116 | ).res2d 117 | gp_es = res.pivot(index='Name', columns='Term', values='NES').astype(float) 118 | gp_pv = res.pivot(index='Name', columns='Term', values='FDR q-val').astype(float) 119 | dc_es, dc_pv = dc.mt._gsea._func_gsea( 120 | mat=X, 121 | cnct=cnct, 122 | starts=starts, 123 | offsets=offsets, 124 | times=times, 125 | seed=seed, 126 | ) 127 | assert (gp_es - dc_es).abs().values.max() < 0.10 128 | -------------------------------------------------------------------------------- /tests/mt/test_mdt.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'kwargs', 9 | [ 10 | dict(), 11 | dict(n_estimators=10), 12 | dict(max_depth=1), 13 | dict(gamma=0.01), 14 | ] 15 | ) 16 | def test_func_mdt( 17 | mat, 18 | adjmat, 19 | kwargs, 20 | ): 21 | X, obs, var = mat 22 | es = dc.mt._mdt._func_mdt(mat=X, adj=adjmat, **kwargs)[0] 23 | assert np.isfinite(es).all() 24 | assert ((0 <= es) & (es <= 1)).all() 25 | -------------------------------------------------------------------------------- /tests/mt/test_methods.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import decoupler as dc 4 | 5 | 6 | def test_methods(): 7 | lstm = dc.mt._methods 8 | len_lstm = len(lstm) 9 | len_dfm = dc.mt.show().shape[0] 10 | assert len_lstm == len_dfm 11 | assert all(isinstance(m, dc._Method.Method) for m in lstm) 12 | -------------------------------------------------------------------------------- /tests/mt/test_mlm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import statsmodels.api as sm 5 | import pytest 6 | 7 | import decoupler as dc 8 | 9 | 10 | def test_fit( 11 | mat, 12 | adjmat, 13 | ): 14 | X, obs, var = mat 15 | n_features, n_fsets = adjmat.shape 16 | n_samples, _ = X.shape 17 | adjmat = np.column_stack((np.ones((n_features, )), adjmat)) 18 | inv = np.linalg.inv(np.dot(adjmat.T, adjmat)) 19 | df = n_features - n_fsets - 1 20 | coef, t = dc.mt._mlm._fit.py_func( 21 | X=adjmat, 22 | y=X.T, 23 | inv=inv, 24 | df=df, 25 | ) 26 | # Assert output shapes 27 | assert isinstance(coef, np.ndarray) 28 | assert isinstance(t, np.ndarray) 29 | print(coef.shape, t.shape) 30 | assert coef.shape == (n_samples, n_fsets) 31 | assert t.shape == (n_samples, n_fsets) 32 | 33 | 34 | @pytest.mark.parametrize('tval', [True, False]) 35 | def test_func_mlm( 36 | mat, 37 | adjmat, 38 | tval, 39 | ): 40 | X, obs, var = mat 41 | dc_es, dc_pv = dc.mt._mlm._func_mlm(mat=X, adj=adjmat, tval=tval) 42 | st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape) 43 | for i in range(st_es.shape[0]): 44 | y = X[i, :] 45 | x = sm.add_constant(adjmat) 46 | model = sm.OLS(y, x) 47 | res = model.fit() 48 | if tval: 49 | st_es[i, :] = res.tvalues[1:] 50 | else: 51 | st_es[i, :] = res.params[1:] 52 | st_pv[i, :] = res.pvalues[1:] 53 | assert np.allclose(dc_es, st_es) 54 | assert np.allclose(dc_pv, st_pv) 55 | -------------------------------------------------------------------------------- /tests/mt/test_ora.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import scipy.stats as sts 5 | import scipy.sparse as sps 6 | import pytest 7 | 8 | import decoupler as dc 9 | 10 | 11 | @pytest.mark.parametrize( 12 | 'a,b,c,d', 13 | [ 14 | [10, 1, 2, 1000], 15 | [0, 20, 35, 5], 16 | [1, 2, 3, 4], 17 | [0, 1, 2, 500], 18 | ] 19 | ) 20 | def test_table( 21 | a, 22 | b, 23 | c, 24 | d, 25 | ): 26 | dc_es = dc.mt._ora._oddsr.py_func(a=a, b=b, c=c, d=d, ha_corr=0., log=False) 27 | dc_pv = dc.mt._ora._test1t.py_func(a=a, b=b, c=c, d=d) 28 | st_es, st_pv = sts.fisher_exact([[a, b],[c, d]]) 29 | assert np.isclose(dc_es, st_es) 30 | assert np.isclose(dc_pv, st_pv) 31 | nb_pv = math.exp(-dc.mt._ora._mlnTest2t.py_func(a, a + b, a + c, a + b + c + d)) 32 | assert np.isclose(dc_pv, nb_pv) 33 | 34 | 35 | def test_runora( 36 | mat, 37 | idxmat, 38 | ): 39 | X, obs, var = mat 40 | cnct, starts, offsets = idxmat 41 | row = sts.rankdata(X[0], method='ordinal') 42 | ranks = np.arange(row.size, dtype=np.int_) 43 | row = ranks[(row > 2) | (row < 0)] 44 | es, pv = dc.mt._ora._runora.py_func( 45 | row=row, 46 | ranks=ranks, 47 | cnct=cnct, 48 | starts=starts, 49 | offsets=offsets, 50 | n_bg=0, 51 | ha_corr=0.5, 52 | ) 53 | assert isinstance(es, np.ndarray) 54 | assert isinstance(pv, np.ndarray) 55 | 56 | 57 | def test_func_ora( 58 | mat, 59 | idxmat, 60 | ): 61 | X, obs, var = mat 62 | cnct, starts, offsets = idxmat 63 | n_up = 3 64 | ha_corr = 1 65 | dc_es, dc_pv = dc.mt._ora._func_ora( 66 | mat=sps.csr_matrix(X), 67 | cnct=cnct, 68 | starts=starts, 69 | offsets=offsets, 70 | n_up=n_up, 71 | n_bm=0, 72 | n_bg=None, 73 | ha_corr=1, 74 | ) 75 | st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape) 76 | ranks = np.arange(X.shape[1], dtype=np.int_) 77 | rnk = set(ranks) 78 | for i in range(st_es.shape[0]): 79 | row = sts.rankdata(X[i], method='ordinal') 80 | row = set(ranks[row > n_up]) 81 | for j in range(st_es.shape[1]): 82 | fset = dc.pp.net._getset(cnct=cnct, starts=starts, offsets=offsets, j=j) 83 | fset = set(fset) 84 | # Build table 85 | set_a = row.intersection(fset) 86 | set_b = fset.difference(row) 87 | set_c = row.difference(fset) 88 | a = len(set_a) 89 | b = len(set_b) 90 | c = len(set_c) 91 | set_u = set_a.union(set_b).union(set_c) 92 | set_d = rnk.difference(set_u) 93 | d = len(set_d) 94 | _, st_pv[i, j] = sts.fisher_exact([[a, b],[c, d]]) 95 | a += ha_corr 96 | b += ha_corr 97 | c += ha_corr 98 | d += ha_corr 99 | es = sts.fisher_exact([[a, b],[c, d]]) 100 | st_es[i, j], _ = np.log(es) 101 | assert np.isclose(dc_es, st_es).all() 102 | assert np.isclose(dc_pv, st_pv).all() 103 | -------------------------------------------------------------------------------- /tests/mt/test_run.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import scipy.sparse as sps 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | def test_return( 9 | adata, 10 | net, 11 | ): 12 | mth = dc.mt.ulm 13 | adata = adata[:4].copy() 14 | adata.X[:, 0] = 0. 15 | es, pv = mth(data=adata.to_df(), net=net, tmin=0) 16 | r = dc.mt._run._return(name=mth.name, data=adata, es=es, pv=pv) 17 | assert r is None 18 | r = dc.mt._run._return(name=mth.name, data=adata.to_df(), es=es, pv=pv) 19 | assert isinstance(r, tuple) 20 | assert isinstance(r[0], pd.DataFrame) 21 | assert isinstance(r[1], pd.DataFrame) 22 | 23 | 24 | @pytest.mark.parametrize( 25 | 'mth,bsize', 26 | [ 27 | [dc.mt.zscore, 2], 28 | [dc.mt.ora, 2], 29 | [dc.mt.gsva, 250_000], 30 | ] 31 | ) 32 | def test_run( 33 | adata, 34 | net, 35 | mth, 36 | bsize, 37 | ): 38 | sdata = adata.copy() 39 | sdata.X = sps.csr_matrix(sdata.X) 40 | des, dpv = dc.mt._run._run( 41 | name=mth.name, 42 | func=mth.func, 43 | adj=mth.adj, 44 | test=mth.test, 45 | data=adata.to_df(), 46 | net=net, 47 | tmin=0, 48 | ) 49 | ses, spv = dc.mt._run._run( 50 | name=mth.name, 51 | func=mth.func, 52 | adj=mth.adj, 53 | test=mth.test, 54 | data=sdata.to_df(), 55 | net=net, 56 | tmin=0, 57 | ) 58 | assert (des.values == ses.values).all() 59 | -------------------------------------------------------------------------------- /tests/mt/test_udt.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'kwargs', 9 | [ 10 | dict(), 11 | dict(n_estimators=10), 12 | dict(max_depth=1), 13 | dict(gamma=0.01), 14 | ] 15 | ) 16 | def test_func_udt( 17 | mat, 18 | adjmat, 19 | kwargs, 20 | ): 21 | X, obs, var = mat 22 | es = dc.mt._udt._func_udt(mat=X, adj=adjmat, **kwargs)[0] 23 | assert np.isfinite(es).all() 24 | assert ((0 <= es) & (es <= 1)).all() 25 | -------------------------------------------------------------------------------- /tests/mt/test_ulm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.stats as sts 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | def test_cov( 9 | mat, 10 | adjmat, 11 | ): 12 | X, obs, var = mat 13 | dc_cov = dc.mt._ulm._cov(A=adjmat, b=X.T) 14 | nsrcs = adjmat.shape[1] 15 | np_cov = np.cov(m=adjmat, y=X.T, rowvar=False)[:nsrcs, nsrcs:].T 16 | assert np.allclose(np_cov, dc_cov) 17 | 18 | 19 | def test_cor( 20 | mat, 21 | adjmat, 22 | ): 23 | X, obs, var = mat 24 | dc_cor = dc.mt._ulm._cor(adjmat, X.T) 25 | nsrcs = adjmat.shape[1] 26 | np_cor = np.corrcoef(adjmat, X.T, rowvar=False)[:nsrcs, nsrcs:].T 27 | assert np.allclose(dc_cor, np_cor) 28 | assert np.all((dc_cor <= 1) * (dc_cor >= -1)) 29 | 30 | 31 | def test_tval(): 32 | t = dc.mt._ulm._tval(r=0.4, df=28) 33 | assert np.allclose(2.30940108, t) 34 | t = dc.mt._ulm._tval(r=0.99, df=3) 35 | assert np.allclose(12.15540081, t) 36 | t = dc.mt._ulm._tval(r=-0.05, df=99) 37 | assert np.allclose(-0.49811675, t) 38 | 39 | 40 | @pytest.mark.parametrize('tval', [True, False]) 41 | def test_func_ulm( 42 | mat, 43 | adjmat, 44 | tval, 45 | ): 46 | X, obs, var = mat 47 | dc_es, dc_pv = dc.mt._ulm._func_ulm(mat=X, adj=adjmat, tval=tval) 48 | st_es, st_pv = np.zeros(dc_es.shape), np.zeros(dc_pv.shape) 49 | for i in range(st_es.shape[0]): 50 | for j in range(st_es.shape[1]): 51 | x = adjmat[:, j] 52 | y = X[i, :] 53 | res = sts.linregress(x, y) 54 | slope = res.slope 55 | st_pv[i, j] = res.pvalue 56 | std_err = res.stderr 57 | if tval: 58 | st_es[i, j] = slope / std_err 59 | else: 60 | st_es[i, j] = slope 61 | assert np.allclose(dc_es, st_es) 62 | assert np.allclose(dc_pv, st_pv) 63 | -------------------------------------------------------------------------------- /tests/mt/test_viper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | """ 9 | mat <- matrix(c( 10 | 0.879, 8.941, 1.951, 8.75, 0.128, 2.959, 2.369, 9.04, 0.853, 3.127, 0.017, 2.859, 0.316, 2.066, 2.05, 8.305, 0.778, 2.468, 1.302, 2.878, 11 | 2.142, 8.155, 0.428, 9.223, 0.532, 2.84, 2.114, 8.681, 0.431, 2.814, 0.413, 3.129, 0.365, 2.512, 2.651, 8.185, 0.406, 2.616, 0.352, 2.824, 12 | 1.729, 0.637, 8.341, 0.74, 8.084, 2.397, 3.093, 0.635, 1.682, 3.351, 1.28, 2.203, 8.556, 2.255, 3.303, 1.25, 1.359, 2.012, 9.784, 2.06, 13 | 0.746, 0.894, 8.011, 1.798, 8.044, 3.059, 2.996, 0.08, 0.151, 2.391, 1.082, 2.123, 8.203, 2.511, 2.039, 0.051, 1.25, 3.787, 8.249, 3.026 14 | ), nrow=4, byrow=TRUE) 15 | colnames(mat) <- c('G11', 'G04', 'G05', 'G03', 'G07', 'G18', 'G17', 'G02', 'G10', 16 | 'G14', 'G09', 'G16', 'G08', 'G13', 'G20', 'G01', 'G12', 'G15', 17 | 'G06', 'G19') 18 | rownames(mat) <- c("S01", "S02", "S29", "S30") 19 | gs <- list( 20 | T1 = list( 21 | tfmode = c(G01 = 1, G02 = 1, G03 = 0.7, G04 = 1, G06 = -0.5, G07 = -3, G08 = -1), 22 | likelihood = c(1, 1, 1, 1, 1, 1, 1) 23 | ), 24 | T2 = list( 25 | tfmode = c(G06 = 1, G07 = 0.5, G08 = 1, G05 = 1.9, G10 = -1.5, G11 = -2, G09 = 3.1), 26 | likelihood = c(1, 1, 1, 1, 1, 1, 1) 27 | ), 28 | T3 = list( 29 | tfmode = c(G09 = 0.7, G10 = 1.1, G11 = 0.1), 30 | likelihood = c(1, 1, 1) 31 | ), 32 | T4 = list( 33 | tfmode = c(G06 = 1, G07 = 0.5, G08 = 1, G05 = 1.9, G10 = -1.5, G11 = -2, G09 = 3.1, G03 = -1.2), 34 | likelihood = c(1, 1, 1, 1, 1, 1, 1, 1) 35 | ) 36 | ) 37 | t(viper::viper(eset=t(mat), regulon=gs, minsize=1, eset.filter=F, pleiotropy=F)) 38 | pargs=list(regulators = 0.05, shadow = 0.05, targets = 1, penalty = 20, method = "adaptive") 39 | t(viper::viper(eset=t(mat), regulon=gs, minsize=1, eset.filter=F, pleiotropy=T, pleiotropyArgs=pargs)) 40 | 41 | """ 42 | 43 | 44 | def test_get_tmp_idxs( 45 | rng, 46 | ): 47 | pval = rng.random((5, 5)) 48 | np.fill_diagonal(pval, np.nan) 49 | dc.mt._viper._get_tmp_idxs.py_func(pval) 50 | 51 | 52 | def test_func_viper( 53 | adata, 54 | net, 55 | ): 56 | dict_net = { 57 | 'T1': 'T1', 58 | 'T2': 'T1', 59 | 'T3': 'T2', 60 | 'T4': 'T2', 61 | 'T5': 'T3', 62 | } 63 | net['source'] = [dict_net[s] for s in net['source']] 64 | net = pd.concat([ 65 | net, 66 | net[net['source'] == 'T2'].assign(source='T4'), 67 | pd.DataFrame([['T4', 'G03', -1.2]], columns=['source', 'target', 'weight'], index=[0]) 68 | ]) 69 | mat = dc.pp.extract(data=adata) 70 | X, obs, var = mat 71 | sources, targets, adjmat = dc.pp.adjmat(features=var, net=net, verbose=False) 72 | obs = np.array(['S01', 'S02', 'S29', 'S30']) 73 | X = np.vstack((X[:2, :], X[-2:, :])) 74 | pf_dc_es, pf_dc_pv = dc.mt._viper._func_viper(mat=X, adj=adjmat, pleiotropy=False) 75 | pt_dc_es, pt_dc_pv = dc.mt._viper._func_viper(mat=X, adj=adjmat, n_targets=1, pleiotropy=True) 76 | pf_vp_es = np.array([ 77 | [ 3.708381, -2.154396, -1.4069603, -2.468185], 78 | [ 3.702911, -2.288070, -0.7239077, -2.848132], 79 | [-3.613066, 1.696114, -0.5789716, 2.039502], 80 | [-3.495480, 2.560792, -1.1296442, 2.523946], 81 | ]) 82 | pt_vp_es = np.array([ 83 | [ 2.224856, -2.154396, -1.4069603, -1.131059], 84 | [ 1.880012, -2.288070, -0.7239077, -2.848132], 85 | [-3.177418, 1.696114, -0.5789716, 2.039502], 86 | [-2.073186, 2.560792, -1.1296442, 2.523946], 87 | ]) 88 | assert np.isclose(pf_vp_es, pf_dc_es).all() 89 | assert np.isclose(pt_vp_es, pt_dc_es).all() 90 | -------------------------------------------------------------------------------- /tests/mt/test_waggr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | def test_funcs( 8 | rng 9 | ): 10 | x = np.array([1, 2, 3, 4], dtype=float) 11 | w = rng.random(x.size) 12 | es = dc.mt._waggr._wsum.py_func(x=x, w=w) 13 | assert isinstance(es, float) 14 | es = dc.mt._waggr._wmean.py_func(x=x, w=w) 15 | assert isinstance(es, float) 16 | 17 | 18 | @pytest.mark.parametrize( 19 | 'fun,times,seed', 20 | [ 21 | ['wmean', 10, 42], 22 | ['wsum', 5, 23], 23 | [lambda x, w: 0, 5, 1], 24 | ['wmean', 0, 42], 25 | ] 26 | ) 27 | def test_func_waggr( 28 | mat, 29 | adjmat, 30 | fun, 31 | times, 32 | seed, 33 | ): 34 | X, obs, var = mat 35 | es, pv = dc.mt._waggr._func_waggr(mat=X, adj=adjmat, fun=fun, times=times, seed=seed) 36 | assert np.isfinite(es).all() 37 | assert ((0 <= pv) & (pv <= 1)).all() 38 | -------------------------------------------------------------------------------- /tests/mt/test_zscore.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'flavor', ['KSEA', 'RoKAI'] 9 | ) 10 | def test_func_zscore( 11 | mat, 12 | adjmat, 13 | flavor, 14 | ): 15 | X, obs, var = mat 16 | es, pv = dc.mt._zscore._func_zscore(mat=X, adj=adjmat, flavor=flavor) 17 | assert np.isfinite(es).all() 18 | assert ((0 <= pv) & (pv <= 1)).all() 19 | -------------------------------------------------------------------------------- /tests/op/test_collectri.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | @pytest.mark.parametrize('remove_complexes', [True, False]) 9 | def test_collectri( 10 | remove_complexes, 11 | ): 12 | ct = dc.op.collectri(remove_complexes=remove_complexes) 13 | assert isinstance(ct, pd.DataFrame) 14 | cols = {'source', 'target', 'weight', 'resources', 'references', 'sign_decision'} 15 | assert cols.issubset(ct.columns) 16 | assert pd.api.types.is_numeric_dtype(ct['weight']) 17 | msk = np.isin(['AP1', 'NFKB'], ct['source']).all() 18 | if remove_complexes: 19 | assert not msk 20 | else: 21 | assert msk 22 | assert not ct.duplicated(['source', 'target']).any() 23 | -------------------------------------------------------------------------------- /tests/op/test_dorothea.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'levels,dict_weights', 10 | [ 11 | ['A', None], 12 | [['A', 'B'], dict(A=1, B=0.5)], 13 | ] 14 | ) 15 | def test_dorothea( 16 | levels, 17 | dict_weights, 18 | ): 19 | do = dc.op.dorothea(levels=levels, dict_weights=dict_weights) 20 | assert isinstance(do, pd.DataFrame) 21 | cols = {'source', 'target', 'weight', 'confidence'} 22 | assert cols.issubset(do.columns) 23 | assert pd.api.types.is_numeric_dtype(do['weight']) 24 | assert not do.duplicated(['source', 'target']).any() 25 | -------------------------------------------------------------------------------- /tests/op/test_dtype.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | def test_infer_dtypes(): 8 | df = pd.DataFrame( 9 | data = [ 10 | ['1', 'A', 'true', 'False', 0.3], 11 | ['2', 'B', 'false', 'True', 0.1], 12 | ['3', 'C', 'false', 'True', 3.1], 13 | ], 14 | columns=['a', 'b', 'c', 'd', 'e'], 15 | index=[0, 1, 2], 16 | ) 17 | df['b'] = df['b'].astype('string') 18 | idf = dc.op._dtype._infer_dtypes(df.copy()) 19 | assert pd.api.types.is_numeric_dtype(idf['a']) 20 | assert idf['b'].dtype == 'object' 21 | assert pd.api.types.is_bool_dtype(idf['c']) 22 | assert pd.api.types.is_bool_dtype(idf['d']) 23 | assert pd.api.types.is_numeric_dtype(idf['e']) 24 | 25 | -------------------------------------------------------------------------------- /tests/op/test_hallmark.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | def test_hallmark(): 9 | hm = dc.op.hallmark() 10 | assert isinstance(hm, pd.DataFrame) 11 | cols = {'source', 'target'} 12 | assert cols.issubset(hm.columns) 13 | assert not hm.duplicated(['source', 'target']).any() 14 | -------------------------------------------------------------------------------- /tests/op/test_progeny.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'top,thr_padj', 10 | [ 11 | [100, 0.05], 12 | [100, 1], 13 | [np.inf, 0.05], 14 | [np.inf, 1], 15 | ] 16 | ) 17 | def test_progeny( 18 | top, 19 | thr_padj, 20 | ): 21 | pr = dc.op.progeny(top=top, thr_padj=thr_padj) 22 | assert isinstance(pr, pd.DataFrame) 23 | cols = {'source', 'target', 'weight', 'padj'} 24 | assert cols.issubset(pr.columns) 25 | assert pd.api.types.is_numeric_dtype(pr['weight']) 26 | assert pd.api.types.is_numeric_dtype(pr['padj']) 27 | assert (pr['padj'] < thr_padj).all() 28 | assert (pr.groupby('source').size() <= top).all() 29 | assert not pr.duplicated(['source', 'target']).any() 30 | -------------------------------------------------------------------------------- /tests/op/test_resource.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | def test_show_resources(): 9 | df = dc.op.show_resources() 10 | assert isinstance(df, pd.DataFrame) 11 | assert df.shape[0] > 0 12 | assert {'name', 'license'}.issubset(df.columns) 13 | assert np.isin(['PROGENy', 'MSigDB'], df['name']).all() 14 | 15 | 16 | @pytest.mark.parametrize('name', ['Lambert2018', 'PanglaoDB']) 17 | def test_resource( 18 | name 19 | ): 20 | rs = dc.op.resource(name=name) 21 | assert isinstance(rs, pd.DataFrame) 22 | assert 'genesymbol' in rs.columns 23 | -------------------------------------------------------------------------------- /tests/op/test_translate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | def test_show_organisms(): 9 | lst = dc.op.show_organisms() 10 | assert isinstance(lst, list) 11 | assert len(lst) > 0 12 | assert {'mouse', 'rat'}.issubset(lst) 13 | 14 | 15 | @pytest.mark.parametrize( 16 | 'lst,my_dict,one_to_many', 17 | [ 18 | [['a', 'b', 'c', 'd'], dict(a=['B', 'C'], b=['A', 'C'], c=['A', 'B'], d='D'), 1], 19 | [['a', 'b', 'c', 'd'], dict(c=['A', 'B']), 1], 20 | [['a', 'b', 'c', 'd'], dict(a=['B', 'C'], b=['A', 'C'], c=['A', 'B'], d='D'), 10], 21 | ] 22 | ) 23 | def test_replace_subunits( 24 | lst, 25 | my_dict, 26 | one_to_many, 27 | ): 28 | res = dc.op._translate._replace_subunits( 29 | lst=lst, my_dict=my_dict, one_to_many=one_to_many 30 | ) 31 | assert isinstance(res, list) 32 | assert len(res) == len(lst) 33 | for k in my_dict: 34 | idx = lst.index(k) 35 | if k in my_dict: 36 | if len(my_dict[k]) > one_to_many: 37 | assert np.isnan(res[idx]) 38 | else: 39 | assert isinstance(res[idx], list) 40 | else: 41 | assert np.isnan(res[idx]) 42 | 43 | 44 | @pytest.mark.parametrize('target_organism', ['mouse', 'anole_lizard', 'fruitfly']) 45 | def test_translate( 46 | target_organism, 47 | ): 48 | net = dc.op.collectri() 49 | t_net = dc.op.translate(net=net, columns='target', target_organism='mouse') 50 | cols = {'source', 'target', 'weight'} 51 | assert isinstance(t_net, pd.DataFrame) 52 | assert cols.issubset(t_net.columns) 53 | assert net.shape[0] != t_net.shape[0] 54 | 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /tests/pl/test_Plotter.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | import matplotlib.pyplot as plt 4 | import matplotlib.image as mpimg 5 | import pytest 6 | 7 | import decoupler as dc 8 | 9 | 10 | def test_plot_ax( 11 | adata 12 | ): 13 | fig, axes = plt.subplots(1, 2, tight_layout=True, figsize=(4, 2)) 14 | ax1, ax2 = axes 15 | dc.pl.obsbar(adata=adata, y='group', hue='sample', ax=ax1) 16 | dc.pl.obsbar(adata=adata, y='sample', hue='group', ax=ax2) 17 | 18 | 19 | def test_plot_save( 20 | adata 21 | ): 22 | with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp: 23 | fig = dc.pl.obsbar(adata=adata, y='group', hue='sample', save=tmp.name) 24 | tmp.flush() 25 | img = mpimg.imread(tmp.name) 26 | assert img is not None 27 | -------------------------------------------------------------------------------- /tests/pl/test_barplot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from matplotlib.figure import Figure 4 | import pytest 5 | 6 | import decoupler as dc 7 | 8 | 9 | @pytest.fixture 10 | def df(): 11 | df = pd.DataFrame( 12 | data=[ 13 | [1, -2, 3, -4], 14 | [5, -6, 7, -8], 15 | ], 16 | index=['C1', 'C2'], 17 | columns=[f'TF{i}' for i in range(4)] 18 | ) 19 | return df 20 | 21 | 22 | @pytest.mark.parametrize( 23 | 'name,top,vertical,vcenter', 24 | [ 25 | ['C1', 2, True, None], 26 | ['C2', 10, False, -3], 27 | ['C2', 10, False, 10], 28 | ] 29 | ) 30 | def test_barplot( 31 | df, 32 | name, 33 | top, 34 | vertical, 35 | vcenter, 36 | ): 37 | fig = dc.pl.barplot(data=df, name=name, top=top, vertical=vertical, return_fig=True) 38 | assert isinstance(fig, Figure) 39 | plt.close(fig) 40 | -------------------------------------------------------------------------------- /tests/pl/test_dotplot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from matplotlib.figure import Figure 4 | import pytest 5 | 6 | import decoupler as dc 7 | 8 | 9 | @pytest.fixture 10 | def df(): 11 | df = pd.DataFrame( 12 | data = [ 13 | ['TF1', 1, 1, 5], 14 | ['TF2', 3, 1, 10], 15 | ['TF3', 4, 10, 10], 16 | ['TF4', 5, 15, 11], 17 | ], 18 | columns=['y', 'x', 'c', 's'], 19 | ) 20 | return df 21 | 22 | 23 | @pytest.mark.parametrize( 24 | 'vcenter', [None, 3] 25 | ) 26 | def test_dotplot( 27 | df, 28 | vcenter, 29 | ): 30 | fig = dc.pl.dotplot(df=df, x='x', y='y', c='c', s='s', vcenter=vcenter, return_fig=True) 31 | assert isinstance(fig, Figure) 32 | plt.close(fig) 33 | -------------------------------------------------------------------------------- /tests/pl/test_filter_by_expr.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.figure import Figure 3 | 4 | import decoupler as dc 5 | 6 | 7 | def test_filter_by_expr( 8 | pdata, 9 | ): 10 | fig = dc.pl.filter_by_expr(adata=pdata, return_fig=True) 11 | assert isinstance(fig, Figure) 12 | plt.close(fig) 13 | -------------------------------------------------------------------------------- /tests/pl/test_filter_by_prop.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.figure import Figure 3 | 4 | import decoupler as dc 5 | 6 | 7 | def test_filter_by_prop( 8 | pdata, 9 | ): 10 | fig = dc.pl.filter_by_prop(adata=pdata, return_fig=True) 11 | assert isinstance(fig, Figure) 12 | plt.close(fig) 13 | -------------------------------------------------------------------------------- /tests/pl/test_filter_samples.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.figure import Figure 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'groupby,log', 10 | [ 11 | ['group', True], 12 | [['group'], True], 13 | [['sample', 'group'], True], 14 | ] 15 | ) 16 | def test_filter_samples( 17 | pdata, 18 | groupby, 19 | log, 20 | ): 21 | fig = dc.pl.filter_samples(adata=pdata, groupby=groupby, log=log, return_fig=True) 22 | assert isinstance(fig, Figure) 23 | plt.close(fig) 24 | -------------------------------------------------------------------------------- /tests/pl/test_leading_edge.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib.figure import Figure 5 | import pytest 6 | 7 | import decoupler as dc 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'stat,name,a_err', 12 | [ 13 | ['stat', 'T1', False], 14 | ['stat', 'T2', False], 15 | ['pval', 'T3', False], 16 | ['pval', 'T4', False], 17 | ] 18 | ) 19 | def test_leading_edge( 20 | net, 21 | stat, 22 | name, 23 | a_err 24 | ): 25 | df = pd.DataFrame( 26 | data=[[i, i ** 2] for i in range(9)], 27 | columns=['stat', 'pval'], 28 | index=[f'G0{i}' for i in range(9)], 29 | ) 30 | if not a_err: 31 | fig, le = dc.pl.leading_edge(df=df, net=net, stat=stat, name=name, return_fig=True) 32 | assert isinstance(le, np.ndarray) 33 | assert isinstance(fig, Figure) 34 | plt.close(fig) 35 | else: 36 | with pytest.raises(AssertionError): 37 | dc.pl.leading_edge(df=df, net=net, stat=stat, name=name, return_fig=True) 38 | -------------------------------------------------------------------------------- /tests/pl/test_network.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib.figure import Figure 5 | import pytest 6 | 7 | import decoupler as dc 8 | 9 | 10 | @pytest.fixture 11 | def data(): 12 | data = pd.DataFrame( 13 | data=[ 14 | [5, 6, 7, 1, 1, 2.], 15 | ], 16 | index=['C1'], 17 | columns=['G01', 'G02', 'G03', 'G06', 'G07', 'G08'] 18 | ) 19 | return data 20 | 21 | 22 | @pytest.fixture 23 | def score(): 24 | score = pd.DataFrame( 25 | data=[ 26 | [4, 3, -3, -2.], 27 | ], 28 | index=['C1'], 29 | columns=[f'T{i + 1}' for i in range(4)] 30 | ) 31 | return score 32 | 33 | 34 | @pytest.mark.parametrize( 35 | 'd_none,unw,sources,targets,by_abs,vcenter', 36 | [ 37 | [False, False, 5, 5, False, False], 38 | [False, True, 'T1', 5, True, True], 39 | [True, False, ['T1'], 5, True, True], 40 | [True, False, ['T1', 'T3'], 5, True, True], 41 | [False, False, 5, 'G01', True, True], 42 | [False, False, 5, ['G01', 'G02', 'G03'], True, True], 43 | ] 44 | ) 45 | def test_network( 46 | net, 47 | data, 48 | score, 49 | d_none, 50 | unw, 51 | sources, 52 | targets, 53 | by_abs, 54 | vcenter, 55 | ): 56 | if d_none: 57 | s_cmap = 'white' 58 | data = None 59 | score = None 60 | else: 61 | s_cmap = 'coolwarm' 62 | if unw: 63 | net = net.drop(columns=['weight']) 64 | fig = dc.pl.network( 65 | data=data, 66 | score=score, 67 | net=net, 68 | sources=sources, 69 | targets=targets, 70 | by_abs=by_abs, 71 | vcenter=vcenter, 72 | s_cmap = s_cmap, 73 | figsize=(5, 5), 74 | return_fig=True 75 | ) 76 | assert isinstance(fig, Figure) 77 | plt.close(fig) -------------------------------------------------------------------------------- /tests/pl/test_obsbar.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.figure import Figure 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'y,hue,kw', 10 | [ 11 | ['group', None, dict()], 12 | ['group', 'group', dict(width=0.5)], 13 | ['group', 'sample', dict(palette='tab10')], 14 | ['sample', 'group', dict(palette='tab20')], 15 | ] 16 | ) 17 | def test_obsbar( 18 | adata, 19 | y, 20 | hue, 21 | kw, 22 | ): 23 | fig = dc.pl.obsbar(adata=adata, y=y, hue=hue, kw_barplot=kw, return_fig=True) 24 | assert isinstance(fig, Figure) 25 | plt.close(fig) 26 | -------------------------------------------------------------------------------- /tests/pl/test_obsm.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | import matplotlib.pyplot as plt 5 | from matplotlib.figure import Figure 6 | import pytest 7 | 8 | import decoupler as dc 9 | 10 | 11 | @pytest.fixture 12 | def tdata_obsm_pca( 13 | tdata_obsm, 14 | ): 15 | dc.tl.rankby_obsm(tdata_obsm, key='X_pca') 16 | return tdata_obsm 17 | 18 | 19 | @pytest.fixture 20 | def tdata_obsm_ulm( 21 | tdata_obsm, 22 | ): 23 | tdata_obsm = tdata_obsm.copy() 24 | dc.tl.rankby_obsm(tdata_obsm, key='score_ulm') 25 | return tdata_obsm 26 | 27 | 28 | @pytest.mark.parametrize( 29 | 'pca,names,nvar,dendrogram,titles,cmap_obs', 30 | [ 31 | [True, None, 10, True, ['Scores', 'Stats'], dict()], 32 | [True, 'group', 5, False, ['asd', 'fgh'], dict()], 33 | [True, ['group', 'pstime'], 10, True, ['Scores', 'Stats'], dict()], 34 | [True, None, 10, True, ['Scores', 'Stats'], dict(group='tab10', pstime='magma', sample='Pastel1')], 35 | [True, None, 2, True, ['Scores', 'Stats'], dict(pstime='magma')], 36 | [True, None, ['PC01', 'PC02'], True, ['Scores', 'Stats'], dict(pstime='magma')], 37 | [False, None, None, True, ['Scores', 'Stats'], dict()], 38 | [False, None, 10, True, ['Scores', 'Stats'], dict()], 39 | [False, None, 'T3', True, ['Scores', 'Stats'], dict()], 40 | [False, None, ['T5', 'T3'], True, ['Scores', 'Stats'], dict()], 41 | ] 42 | ) 43 | def test_obsm( 44 | tdata_obsm_pca, 45 | tdata_obsm_ulm, 46 | pca, 47 | names, 48 | nvar, 49 | dendrogram, 50 | titles, 51 | cmap_obs, 52 | ): 53 | if pca: 54 | tdata_obsm_ranked = tdata_obsm_pca 55 | else: 56 | tdata_obsm_ranked = tdata_obsm_ulm 57 | fig = dc.pl.obsm( 58 | tdata_obsm_ranked, 59 | names=names, 60 | nvar=nvar, 61 | dendrogram=dendrogram, 62 | titles=titles, 63 | cmap_obs=cmap_obs, 64 | return_fig=True 65 | ) 66 | assert isinstance(fig, Figure) 67 | plt.close(fig) 68 | -------------------------------------------------------------------------------- /tests/pl/test_order.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.figure import Figure 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'names,label,mode', 10 | [ 11 | [['G01', 'G02', 'G07', 'G08', 'G12'], None, 'line'], 12 | [['G01', 'G02', 'G07', 'G08'], None, 'mat'], 13 | [None, 'group', 'line'], 14 | [None, 'group', 'mat'], 15 | ] 16 | ) 17 | def test_order( 18 | tdata, 19 | names, 20 | label, 21 | mode, 22 | ): 23 | df = dc.pp.bin_order(adata=tdata, names=['G12', 'G01', 'G07', 'G04'], order='pstime', label=label) 24 | fig = dc.pl.order(df=df, mode=mode, return_fig=True) 25 | assert isinstance(fig, Figure) 26 | plt.close(fig) 27 | -------------------------------------------------------------------------------- /tests/pl/test_order_targets.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.figure import Figure 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'source,label,vmin,vmax', 10 | [ 11 | ['T1', None, 0, 10], 12 | ['T2', 'group', -3, 10], 13 | ['T3', 'group', -20, 15], 14 | ['T4', 'group', -1, 20], 15 | ['T5', 'group', -2, 14], 16 | ['T5', 'group', None, None], 17 | ] 18 | ) 19 | def test_order_targets( 20 | tdata, 21 | net, 22 | source, 23 | label, 24 | vmin, 25 | vmax, 26 | ): 27 | dc.mt.ulm(tdata, net, tmin=0) 28 | fig = dc.pl.order_targets( 29 | adata=tdata, 30 | net=net, 31 | order='pstime', 32 | source=source, 33 | label=label, 34 | vmin=vmin, 35 | vmax=vmax, 36 | return_fig=True, 37 | ) 38 | assert isinstance(fig, Figure) 39 | plt.close(fig) 40 | -------------------------------------------------------------------------------- /tests/pl/test_source_targets.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from matplotlib.figure import Figure 4 | import pytest 5 | 6 | import decoupler as dc 7 | 8 | 9 | @pytest.mark.parametrize( 10 | 'name,a_err', [ 11 | ['T1', False], 12 | ['T10', True], 13 | ] 14 | ) 15 | def test_source_targets( 16 | deg, 17 | net, 18 | name, 19 | a_err, 20 | ): 21 | if not a_err: 22 | fig = dc.pl.source_targets(data=deg, net=net, name=name, x='weight', y='stat', return_fig=True) 23 | assert isinstance(fig, Figure) 24 | plt.close(fig) 25 | else: 26 | with pytest.raises(AssertionError): 27 | dc.pl.source_targets(data=deg, net=net, name=name, x='weight', y='stat', return_fig=True) 28 | -------------------------------------------------------------------------------- /tests/pl/test_volcano.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from matplotlib.figure import Figure 4 | import pytest 5 | 6 | import decoupler as dc 7 | 8 | 9 | @pytest.mark.parametrize( 10 | 'use_net,name,a_err', 11 | [ 12 | [False, None, False], 13 | [True, 'T1', False], 14 | [True, 'T2', False], 15 | [True, 'T3', False], 16 | [True, 'T10', True], 17 | ] 18 | ) 19 | def test_volcano( 20 | deg, 21 | net, 22 | use_net, 23 | name, 24 | a_err, 25 | ): 26 | if not use_net: 27 | net = None 28 | name = None 29 | if not a_err: 30 | fig = dc.pl.volcano(data=deg, x='stat', y='padj', net=net, name=name, return_fig=True) 31 | assert isinstance(fig, Figure) 32 | plt.close(fig) 33 | else: 34 | with pytest.raises(AssertionError): 35 | dc.pl.volcano(data=deg, x='stat', y='padj', net=net, name=name, return_fig=True) 36 | -------------------------------------------------------------------------------- /tests/pp/test_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sps 3 | import pytest 4 | 5 | import decoupler as dc 6 | 7 | 8 | def test_extract( 9 | adata, 10 | ): 11 | data = [adata.X, adata.obs_names, adata.var_names] 12 | X, obs, var = dc.pp.extract(data=data) 13 | assert X.shape[0] == obs.size 14 | assert X.shape[1] == var.size 15 | X, obs, var = dc.pp.extract(data=adata.to_df()) 16 | assert X.shape[0] == obs.size 17 | assert X.shape[1] == var.size 18 | X, obs, var = dc.pp.extract(data=adata) 19 | assert X.shape[0] == obs.size 20 | assert X.shape[1] == var.size 21 | adata.layers['counts'] = adata.X.round() 22 | X, obs, var = dc.pp.extract(data=adata, layer='counts') 23 | assert float(np.sum(X)).is_integer() 24 | sadata = adata.copy() 25 | sadata.X = sps.coo_matrix(sadata.X) 26 | X, obs, var = dc.pp.extract(data=sadata) 27 | assert isinstance(X, sps.csr_matrix) 28 | eadata = adata.copy() 29 | eadata.X[5, :] = 0. 30 | X, obs, var = dc.pp.extract(data=eadata, empty=True) 31 | assert X.shape[0] < eadata.shape[0] 32 | nadata = adata.copy() 33 | nadata.X = nadata.X * -1 34 | adata.raw = nadata 35 | X, obs, var = dc.pp.extract(data=adata, raw=True) 36 | assert (X < 0).all() 37 | -------------------------------------------------------------------------------- /tests/test_download.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'url,kwargs', 9 | [ 10 | [ 11 | ('https://www.ncbi.nlm.nih.gov/geo/download/?' + 12 | 'acc=GSM8563697&format=file&file=GSM8563697%' + 13 | '5FCO37%5Ffeatures%2Etsv%2Egz'), 14 | dict(sep='\t', compression='gzip', header=None) 15 | ], 16 | [ 17 | ('https://www.ncbi.nlm.nih.gov/geo/download/?' + 18 | 'acc=GSM8563697&format=file&file=GSM8563697%' + 19 | '5FCO37%5Ftissue%5Fpositions%5Flist%2Ecsv%2Egz'), 20 | dict(sep=',', compression='gzip') 21 | ], 22 | ] 23 | ) 24 | def test_download( 25 | url, 26 | kwargs, 27 | ): 28 | df = dc._download._download(url, **kwargs) 29 | assert isinstance(df, pd.DataFrame) 30 | assert df.columns.size > 1 31 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import decoupler 4 | 5 | 6 | def test_package_has_version(): 7 | assert decoupler.__version__ is not None 8 | -------------------------------------------------------------------------------- /tests/tl/test_rankby_group.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'groupby,reference,method', 9 | [ 10 | ['group', 'rest', 'wilcoxon'], 11 | ['group', 'A', 't-test'], 12 | ['group', 'A', 't-test_overestim_var'], 13 | ['sample', 'rest', 't-test_overestim_var'], 14 | ['sample', 'S01', 't-test_overestim_var'], 15 | ['sample', ['S01'], 't-test_overestim_var'], 16 | ['sample', ['S01', 'S02'], 't-test_overestim_var'], 17 | ] 18 | ) 19 | def test_rankby_group( 20 | adata, 21 | groupby, 22 | reference, 23 | method, 24 | ): 25 | df = dc.tl.rankby_group(adata=adata, groupby=groupby, reference=reference, method=method) 26 | assert isinstance(df, pd.DataFrame) 27 | cols_cat = {'group', 'reference', 'name'} 28 | cols_num = {'stat', 'meanchange', 'pval', 'padj'} 29 | cols = cols_cat | cols_num 30 | assert cols.issubset(set(df.columns)) 31 | for col in cols_cat: 32 | assert isinstance(df[col].dtype, pd.CategoricalDtype) 33 | for col in cols_num: 34 | assert pd.api.types.is_numeric_dtype(df[col]) 35 | assert set(df['group'].cat.categories).issubset(set(adata.obs[groupby].cat.categories)) 36 | assert ((0. <= df['padj']) & (df['padj'] <= 1.)).all() 37 | -------------------------------------------------------------------------------- /tests/tl/test_rankby_obsm.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | 4 | import decoupler as dc 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'key,uns_key', 9 | [ 10 | ['X_pca', 'rank_obsm'], 11 | ['X_pca', None], 12 | ['X_umap', 'other'], 13 | ['score_ulm', 'other'], 14 | ['score_ulm', None], 15 | ] 16 | ) 17 | def test_rankby_obsm( 18 | tdata_obsm, 19 | key, 20 | uns_key, 21 | ): 22 | tdata_obsm = tdata_obsm.copy() 23 | tdata_obsm.obs['dose'] = 'Low' 24 | tdata_obsm.obs.loc[tdata_obsm.obs_names[5], 'dose'] = 'High' 25 | res = dc.tl.rankby_obsm(tdata_obsm, key=key, uns_key=uns_key) 26 | if uns_key is None: 27 | assert isinstance(res, pd.DataFrame) 28 | else: 29 | assert res is None 30 | assert uns_key in tdata_obsm.uns 31 | assert isinstance(tdata_obsm.uns[uns_key], pd.DataFrame) 32 | -------------------------------------------------------------------------------- /tests/tl/test_rankby_order.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import scipy.sparse as sps 3 | import scipy.stats as sts 4 | import pytest 5 | 6 | import decoupler as dc 7 | 8 | 9 | @pytest.mark.parametrize('stat', ['dcor', 'pearsonr', 'spearmanr', 'kendalltau', sts.pearsonr]) 10 | def test_rankby_order( 11 | tdata, 12 | stat, 13 | ): 14 | df = dc.tl.rankby_order(tdata, order='pstime', stat=stat) 15 | assert isinstance(df, pd.DataFrame) 16 | neg_genes = {'G01', 'G02', 'G03', 'G04'} 17 | pos_genes = {'G05', 'G06', 'G07', 'G08'} 18 | gt_genes = neg_genes | pos_genes 19 | pd_genes = set(df.head(len(gt_genes))['name']) 20 | assert len(gt_genes) > 3 21 | assert (len(gt_genes & pd_genes) / len(gt_genes)) >= 0.75 22 | msk = df['name'].isin(gt_genes) 23 | assert df[~msk]['stat'].mean() < df[msk]['stat'].mean() 24 | tdata.X = sps.csr_matrix(tdata.X) 25 | df = dc.tl.rankby_order(tdata, order='pstime', stat=stat) 26 | assert isinstance(df, pd.DataFrame) 27 | pd_genes = set(df.head(len(gt_genes))['name']) 28 | assert len(gt_genes) > 3 29 | assert (len(gt_genes & pd_genes) / len(gt_genes)) >= 0.75 30 | msk = df['name'].isin(gt_genes) 31 | assert df[~msk]['stat'].mean() < df[msk]['stat'].mean() 32 | --------------------------------------------------------------------------------