├── tests ├── __init__.py ├── _data │ ├── receivers.pickle │ ├── interactions.pickle │ ├── transmitters.pickle │ └── import_intercell_result.pickle ├── test_misc.py ├── test_orthology.py ├── test_cache.py ├── test_query.py ├── test_options.py ├── test_compare_R.py ├── test_downloader.py ├── conftest.py └── test_interactions.py ├── .python-version ├── omnipath ├── _core │ ├── __init__.py │ ├── downloader │ │ ├── __init__.py │ │ └── _downloader.py │ ├── cache │ │ ├── __init__.py │ │ └── _cache.py │ ├── query │ │ ├── __init__.py │ │ ├── _types.py │ │ ├── _query.py │ │ └── _query_validator.py │ ├── utils │ │ ├── __init__.py │ │ ├── _docs.py │ │ ├── _homologene.py │ │ ├── _orthology.py │ │ ├── _static.py │ │ └── _options.py │ └── requests │ │ ├── __init__.py │ │ ├── interactions │ │ ├── __init__.py │ │ ├── _json.py │ │ ├── _utils.py │ │ └── _evidences.py │ │ ├── _complexes.py │ │ ├── _intercell.py │ │ ├── _utils.py │ │ └── _annotations.py ├── _misc │ ├── __init__.py │ ├── utils.py │ └── dtypes.py ├── requests.py ├── interactions.py ├── constants │ ├── __init__.py │ ├── _pkg_constants.py │ └── _constants.py └── __init__.py ├── docs ├── source │ ├── _templates │ │ └── autosummary │ │ │ ├── base.rst │ │ │ └── class.rst │ ├── installation.rst │ ├── references.rst │ ├── index.rst │ ├── release_notes.rst │ ├── api.rst │ ├── _static │ │ └── css │ │ │ └── custom.css │ └── conf.py ├── Makefile └── make.bat ├── .bumpversion.cfg ├── .readthedocs.yml ├── .coveragerc ├── LICENSE ├── .github └── workflows │ ├── lint.yml │ └── ci.yml ├── .pre-commit-config.yaml ├── .gitignore ├── README.rst ├── tox.ini └── pyproject.toml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | >=3.9 2 | -------------------------------------------------------------------------------- /omnipath/_core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /omnipath/_misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /omnipath/_core/downloader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /omnipath/requests.py: -------------------------------------------------------------------------------- 1 | from omnipath._core.requests import * # noqa: F401 F403 2 | -------------------------------------------------------------------------------- /omnipath/_core/cache/__init__.py: -------------------------------------------------------------------------------- 1 | from omnipath._core.cache._cache import clear_cache 2 | -------------------------------------------------------------------------------- /omnipath/_core/query/__init__.py: -------------------------------------------------------------------------------- 1 | from omnipath._core.query._query import QueryType 2 | -------------------------------------------------------------------------------- /omnipath/interactions.py: -------------------------------------------------------------------------------- 1 | from omnipath._core.requests.interactions import * # noqa: F401 F403 2 | -------------------------------------------------------------------------------- /tests/_data/receivers.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/omnipath/HEAD/tests/_data/receivers.pickle -------------------------------------------------------------------------------- /omnipath/constants/__init__.py: -------------------------------------------------------------------------------- 1 | from omnipath.constants._constants import License, Organism, InteractionDataset 2 | -------------------------------------------------------------------------------- /tests/_data/interactions.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/omnipath/HEAD/tests/_data/interactions.pickle -------------------------------------------------------------------------------- /tests/_data/transmitters.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/omnipath/HEAD/tests/_data/transmitters.pickle -------------------------------------------------------------------------------- /omnipath/_core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from omnipath._core.utils._options import options 2 | import omnipath._core.utils._static as static 3 | -------------------------------------------------------------------------------- /tests/_data/import_intercell_result.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/omnipath/HEAD/tests/_data/import_intercell_result.pickle -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/base.rst: -------------------------------------------------------------------------------- 1 | :github_url: {{ fullname | escape }} 2 | 3 | {% extends "!autosummary/base.rst" %} 4 | 5 | .. http://www.sphinx-doc.org/en/stable/ext/autosummary.html#customizing-templates 6 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.0.11 3 | commit = True 4 | tag = True 5 | files = pyproject.toml omnipath/__init__.py 6 | parse = (?P\d+)\.(?P\d+)\.(?P\d+) 7 | serialize = {major}.{minor}.{patch} 8 | -------------------------------------------------------------------------------- /omnipath/_core/requests/__init__.py: -------------------------------------------------------------------------------- 1 | from omnipath._core.requests._request import Enzsub, SignedPTMs 2 | from omnipath._core.requests._complexes import Complexes 3 | from omnipath._core.requests._intercell import Intercell 4 | from omnipath._core.requests._annotations import Annotations 5 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sphinx: 4 | builder: html 5 | configuration: docs/source/conf.py 6 | fail_on_warning: true 7 | 8 | formats: 9 | - htmlzip 10 | - pdf 11 | 12 | build: 13 | image: latest 14 | 15 | python: 16 | version: 3.8 17 | install: 18 | - method: pip 19 | path: . 20 | extra_requirements: 21 | - docs 22 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [paths] 2 | source = 3 | omnipath 4 | */site-packages/omnipath 5 | 6 | [run] 7 | branch = true 8 | parallel = true 9 | source = omnipath 10 | omit = */__init__.py 11 | 12 | [report] 13 | exclude_lines = 14 | \#.*pragma:\s*no.?cover 15 | 16 | if __name__ == .__main__. 17 | 18 | ^\s*raise AssertionError\b 19 | ^\s*raise NotImplementedError\b 20 | ^\s*return NotImplemented\b 21 | show_missing = true 22 | precision = 2 23 | -------------------------------------------------------------------------------- /omnipath/_core/query/_types.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Optional, Sequence 2 | 3 | try: 4 | from typing import Literal 5 | except ImportError: 6 | from typing_extensions import Literal 7 | 8 | 9 | Strseq_t = Optional[Union[str, Sequence[str]]] 10 | Organism_t = Literal["human", "mouse", "rat"] 11 | License_t = Literal["academic", "commercial"] 12 | Bool_t = Optional[bool] 13 | Str_t = Optional[str] 14 | Int_t = Optional[int] 15 | None_t = type(None) 16 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | Omnipath requires Python version >= 3.7 to run. 4 | 5 | PyPI 6 | ~~~~ 7 | Omnipath is also available on PyPI:: 8 | 9 | pip install omnipath 10 | 11 | Additionally, :mod:`omnipath` may sometimes require :mod:`networkx` to create an interaction graph. 12 | This dependency can be installed as:: 13 | 14 | pip install omnipath[graph] 15 | 16 | Development Version 17 | ~~~~~~~~~~~~~~~~~~~ 18 | To stay up-to-date with the newest version, run:: 19 | 20 | pip install git+https://github.com/saezlab/omnipath 21 | -------------------------------------------------------------------------------- /docs/source/references.rst: -------------------------------------------------------------------------------- 1 | .. |br| raw:: html 2 | 3 |
4 | 5 | References 6 | ---------- 7 | .. [OmniPath] Türei, D., Valdeolivas, A. *et al.* (2020), |br| 8 | *Integrated intra- and intercellular signaling knowledge for multicellular omics analysis*, |br| 9 | `bioRxiv 2020.08.03.221242 `__. 10 | 11 | .. [OmniPath16] Türei, D., Korcsmáros, T. & Saez-Rodriguez, J. (2016), |br| 12 | *OmniPath: guidelines and gateway for literature-curated signaling pathway resources.*, |br| 13 | `Nat Methods 13, 966–967 `__. 14 | -------------------------------------------------------------------------------- /omnipath/_core/requests/interactions/__init__.py: -------------------------------------------------------------------------------- 1 | from omnipath._core.requests.interactions._utils import import_intercell_network 2 | from omnipath._core.requests.interactions._evidences import ( 3 | only_from, 4 | from_evidences, 5 | filter_evidences, 6 | unnest_evidences, 7 | ) 8 | from omnipath._core.requests.interactions._interactions import ( 9 | TFmiRNA, 10 | Dorothea, 11 | OmniPath, 12 | TFtarget, 13 | CollecTRI, 14 | KinaseExtra, 15 | LigRecExtra, 16 | PathwayExtra, 17 | SmallMolecule, 18 | AllInteractions, 19 | Transcriptional, 20 | PostTranslational, 21 | miRNA, 22 | lncRNAmRNA, 23 | ) 24 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile clean 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | clean: 23 | @rm -rf $(BUILDDIR)/* 24 | @rm -rf $(SOURCEDIR)/api/ 25 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | :github_url: {{ fullname | escape }} 2 | 3 | {{ fullname | escape | underline}} 4 | 5 | .. currentmodule:: {{ module }} 6 | 7 | .. add toctree option to make autodoc generate the pages 8 | 9 | .. autoclass:: {{ objname }} 10 | 11 | {% block attributes %} 12 | {% if attributes %} 13 | .. rubric:: Attributes 14 | 15 | .. autosummary:: 16 | :toctree: . 17 | {% for item in attributes %} 18 | ~{{ fullname }}.{{ item }} 19 | {%- endfor %} 20 | {% endif %} 21 | {% endblock %} 22 | 23 | {% block methods %} 24 | {% if methods %} 25 | .. rubric:: Methods 26 | 27 | .. autosummary:: 28 | :toctree: . 29 | {% for item in methods %} 30 | {%- if item != '__init__' %} 31 | ~{{ fullname }}.{{ item }} 32 | {%- endif -%} 33 | {%- endfor %} 34 | {% endif %} 35 | {% endblock %} 36 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /omnipath/_misc/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Set, Iterable 2 | 3 | 4 | def to_set(value: Any) -> Set: 5 | """Make sure `value` is a set, convert it if necessary. 6 | 7 | Parameters 8 | ---------- 9 | value 10 | Any kind of object. 11 | 12 | Returns 13 | ------- 14 | `Set` 15 | The `value` itself if it's already a set; a set of single element 16 | if `value` is a simple type; a set of the elements in `value` 17 | if `value` is iterable; empty set if `value` is None. 18 | 19 | Raises 20 | ------ 21 | TypeError 22 | If `value` is not an iterable and not hashable, or if it's an iterable 23 | containing non hashable elements. 24 | """ 25 | if isinstance(value, Set): 26 | return value 27 | 28 | elif value is None: 29 | return set() 30 | 31 | elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)): 32 | return set(value) 33 | 34 | else: 35 | return {value} 36 | -------------------------------------------------------------------------------- /omnipath/_core/requests/interactions/_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pandas as pd 4 | 5 | 6 | def convert_json_col(df: pd.DataFrame, col: str) -> pd.DataFrame: 7 | """ 8 | Convert a column of JSON encoded strings to nested Python objects. 9 | 10 | Parameters 11 | ---------- 12 | df 13 | An OmniPath interaction data frame. 14 | col 15 | Name of a column with JSON encoded strings. 16 | 17 | Returns 18 | ------- 19 | :class:`pandas.DataFrame` 20 | The input data frame with the column converted to nested Python 21 | objects, i.e. lists or dicts. If the column does not exist, the 22 | data frame is returned unmodified. 23 | """ 24 | if col in df.columns: 25 | df[col] = df[col].apply(json.loads) 26 | 27 | return df 28 | 29 | 30 | def _json_cols_hook(df: pd.DataFrame) -> pd.DataFrame: 31 | """Handle the JSON columns in post-processing, if there is any.""" 32 | for col in ("extra_attrs", "evidences"): 33 | df = convert_json_col(df, col) 34 | 35 | return df 36 | -------------------------------------------------------------------------------- /omnipath/__init__.py: -------------------------------------------------------------------------------- 1 | from omnipath._core.cache import clear_cache 2 | from omnipath._core.utils import ( # from_first in isort is important here 3 | static, 4 | options, 5 | ) 6 | from omnipath._core.downloader._downloader import _get_server_version 7 | import omnipath.requests as requests 8 | import omnipath.constants as constants 9 | import omnipath.interactions as interactions 10 | 11 | __author__ = ", ".join(["Michal Klein", "Dénes Türei"]) 12 | __maintainer__ = ", ".join(["Michal Klein", "Dénes Türei"]) 13 | __version__ = "1.0.11" 14 | __email__ = "turei.denes@gmail.com" 15 | 16 | try: 17 | from importlib_metadata import version # Python < 3.8 18 | except ImportError: 19 | from importlib.metadata import version # Python = 3.8 20 | 21 | from packaging.version import parse 22 | 23 | __full_version__ = parse(version(__name__)) 24 | __full_version__ = ( 25 | f"{__version__}+{__full_version__.local}" if __full_version__.local else __version__ 26 | ) 27 | __server_version__ = _get_server_version(options) 28 | 29 | del parse, version, _get_server_version 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | Copyright (c) 2020 Saez Lab - https://saezlab.org/ 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | schedule: 5 | - cron: 00 00 * * 1 # every Monday at 00:00 6 | push: 7 | branches: [main] 8 | pull_request: 9 | branches: [main] 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Set up Python 3.13 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: '3.13' 20 | 21 | - name: Install uv 22 | uses: astral-sh/setup-uv@v5 23 | with: 24 | enable-cache: true 25 | 26 | - name: Set up Python ${{ matrix.python }} 27 | run: uv python install ${{ matrix.python-version }} 28 | 29 | - name: Install dependencies 30 | run: | 31 | uv sync --all-extras 32 | 33 | - uses: actions/cache@v4 34 | with: 35 | path: ~/.cache/pre-commit 36 | key: precommit-${{ env.pythonLocation }}-${{ hashFiles('**/.pre-commit-config.yaml') }} 37 | 38 | - name: Lint 39 | run: | 40 | uv run tox -e lint 41 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | |PyPI| |Downloads| |CI| |Docs| |Coverage| 2 | 3 | OmniPath 4 | ======== 5 | 6 | This package is a Python equivalent of an R package `OmnipathR`_ for accessing web service of 7 | `OmniPath`_ database developed by `Saezlab`_. 8 | 9 | .. toctree:: 10 | :caption: General 11 | :maxdepth: 2 12 | :hidden: 13 | 14 | installation 15 | api 16 | release_notes 17 | references 18 | 19 | .. |PyPI| image:: https://img.shields.io/pypi/v/omnipath.svg 20 | :target: https://pypi.org/project/omnipath 21 | :alt: PyPI 22 | 23 | .. |Downloads| image:: https://pepy.tech/badge/omnipath 24 | :target: https://pepy.tech/project/omnipath 25 | :alt: Downloads 26 | 27 | .. |CI| image:: https://img.shields.io/github/actions/workflow/status/saezlab/omnipath/ci.yml?branch=master 28 | :target: https://github.com/saezlab/omnipath/actions?query=workflow:CI 29 | :alt: CI 30 | 31 | .. |Docs| image:: https://img.shields.io/readthedocs/omnipath 32 | :target: https://omnipath.readthedocs.io/en/latest 33 | :alt: Documentation 34 | 35 | .. |Coverage| image:: https://codecov.io/gh/saezlab/omnipath/branch/master/graph/badge.svg 36 | :target: https://codecov.io/gh/saezlab/omnipath 37 | :alt: Coverage 38 | 39 | .. _Saezlab : https://saezlab.org/ 40 | .. _OmniPathR : https://github.com/saezlab/omnipathR 41 | -------------------------------------------------------------------------------- /docs/source/release_notes.rst: -------------------------------------------------------------------------------- 1 | Release Notes 2 | ============= 3 | 4 | .. role:: small 5 | 6 | Version 1.0 7 | ----------- 8 | 9 | 1.0.5 :small:`2021-16-08` 10 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 11 | - Setting :attr:`omnipath.options.cache` to ``None`` will now disable it (use ``'memory'`` instead) 12 | - Fix writing empty values into cache 13 | - Fix memory cache not copying data before storing it 14 | - Fix various :mod:`pandas` warnings 15 | - Remove redundant step from CI 16 | 17 | 1.0.4 :small:`2020-27-12` 18 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 19 | - Fix recursion error 20 | - Remove duplicated ``PostTranslational`` class 21 | - Add interactions tests 22 | 23 | 1.0.3 :small:`2020-08-12` 24 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | - Add :class:`omnipath.interactions.PostTranslational` 26 | - Add possibility to download all :class:`omnipath.requests.Annotations` 27 | 28 | 1.0.2 :small:`2020-29-11` 29 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 30 | - Fix small bug when converting boolean values 31 | - Fix typos 32 | - Add option to create interaction graphs 33 | 34 | 1.0.1 :small:`2020-29-11` 35 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 36 | - Fix bug of not correctly passing datasets in interactions 37 | - Fix the way the progress bar is getting content size 38 | - Add comparison tests with OmnipathR 39 | 40 | 1.0.0 :small:`2020-23-11` 41 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 42 | - Fix minor bugs 43 | - Add options improvements 44 | - Add tests 45 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | Import Omnipath as:: 5 | 6 | import omnipath as op 7 | 8 | Requests 9 | ~~~~~~~~ 10 | 11 | .. module::omnipath.requests 12 | .. currentmodule:: omnipath 13 | 14 | .. autosummary:: 15 | :toctree: api 16 | 17 | requests.Annotations 18 | requests.Complexes 19 | requests.Enzsub 20 | requests.Intercell 21 | requests.SignedPTMs 22 | 23 | Interactions 24 | ~~~~~~~~~~~~ 25 | 26 | .. module::omnipath.interactions 27 | .. currentmodule:: omnipath 28 | 29 | .. autosummary:: 30 | :toctree: api 31 | 32 | interactions.AllInteractions 33 | interactions.Dorothea 34 | interactions.KinaseExtra 35 | interactions.LigRecExtra 36 | interactions.OmniPath 37 | interactions.PathwayExtra 38 | interactions.PostTranslational 39 | interactions.TFmiRNA 40 | interactions.TFtarget 41 | interactions.Transcriptional 42 | interactions.lncRNAmRNA 43 | interactions.miRNA 44 | interactions.import_intercell_network 45 | 46 | Other 47 | ~~~~~ 48 | 49 | Constants 50 | --------- 51 | 52 | .. module::omnipath.constants 53 | .. currentmodule:: omnipath 54 | 55 | .. autosummary:: 56 | :toctree: api 57 | 58 | constants.InteractionDataset 59 | constants.License 60 | constants.Organism 61 | 62 | Options 63 | ------- 64 | 65 | .. module::omnipath 66 | .. currentmodule:: omnipath 67 | 68 | .. autosummary:: 69 | :toctree: api 70 | 71 | omnipath.clear_cache 72 | omnipath.options 73 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | from pandas.testing import assert_frame_equal 2 | import pandas as pd 3 | 4 | from omnipath._misc import dtypes 5 | 6 | 7 | class TestMisc: 8 | def test_auto_dtype(self): 9 | inp = pd.DataFrame( 10 | { 11 | "a": ["1", "2", "3"], 12 | "b": ["1", "2", 3], 13 | "c": ["1", "2", "3.14"], 14 | "d": ["1", "0", "1"], 15 | "e": ["Y", "N", "Y"], 16 | "f": [2.3, 4.7, 3.1], 17 | "g": [False, True, True], 18 | "h": [1, 0, 1], 19 | "i": [1.0, 2.0, 3.0], 20 | "j": ["1.0", "2.0", "3.0"], 21 | "k": ["1.0", "0.0", "1.0"], 22 | "l": pd.Series([1, 2, 3], dtype="int8"), 23 | "m": pd.Series([1.09, 2.51, 3.33], dtype="float32"), 24 | } 25 | ) 26 | 27 | exp = pd.DataFrame( 28 | { 29 | "a": [1, 2, 3], 30 | "b": [1, 2, 3], 31 | "c": [1.0, 2.0, 3.14], 32 | "d": [True, False, True], 33 | "e": [True, False, True], 34 | "f": [2.3, 4.7, 3.1], 35 | "g": [False, True, True], 36 | "h": [True, False, True], 37 | "i": [1, 2, 3], 38 | "j": [1, 2, 3], 39 | "k": [True, False, True], 40 | "l": pd.Series([1, 2, 3], dtype="int8"), 41 | "m": pd.Series([1.09, 2.51, 3.33], dtype="float32"), 42 | } 43 | ) 44 | 45 | out = dtypes.auto_dtype(inp) 46 | 47 | assert_frame_equal(exp, out) 48 | -------------------------------------------------------------------------------- /omnipath/_core/utils/_docs.py: -------------------------------------------------------------------------------- 1 | from docrep import DocstringProcessor 2 | 3 | _general_get = """ 4 | Perform the query. 5 | 6 | Parameters 7 | ---------- 8 | kwargs 9 | Parameters of the request. For more information about available values, see :meth:`params`. 10 | 11 | Returns 12 | ------- 13 | :class:`pandas.DataFrame` 14 | The result of this query.""" 15 | _interactions_datasets = """ 16 | - :attr:`omnipath.constants.InteractionDataset.OMNIPATH` 17 | - :attr:`omnipath.constants.InteractionDataset.PATHWAY_EXTRA` 18 | - :attr:`omnipath.constants.InteractionDataset.KINASE_EXTRA` 19 | - :attr:`omnipath.constants.InteractionDataset.LIGREC_EXTRA` 20 | - :attr:`omnipath.constants.InteractionDataset.COLLECTRI` 21 | - :attr:`omnipath.constants.InteractionDataset.DOROTHEA` 22 | - :attr:`omnipath.constants.InteractionDataset.TF_TARGET` 23 | - :attr:`omnipath.constants.InteractionDataset.TF_MIRNA` 24 | - :attr:`omnipath.constants.InteractionDataset.TF_REGULONS` 25 | - :attr:`omnipath.constants.InteractionDataset.MIRNA_TARGET` 26 | - :attr:`omnipath.constants.InteractionDataset.LNCRNA_MRNA`""" 27 | _validate = """ 28 | Validate the ``value`` for the :attr:`param`. 29 | 30 | Parameters 31 | ---------- 32 | value 33 | Value to validate. 34 | 35 | Returns 36 | ------- 37 | The valid values.""" 38 | _query_resources = """ 39 | Return the available resources for this query.""" 40 | _query_params = """ 41 | Return the available values for each parameter, if available.""" 42 | 43 | d = DocstringProcessor( 44 | general_get=_general_get, 45 | interaction_datasets=_interactions_datasets, 46 | validate=_validate, 47 | query_params=_query_params, 48 | query_resources=_query_resources, 49 | ) 50 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | fail_fast: false 4 | default_language_version: 5 | python: python3 6 | default_stages: 7 | - pre-commit 8 | - pre-push 9 | minimum_pre_commit_version: 3.0.0 10 | repos: 11 | - repo: https://github.com/psf/black 12 | rev: 25.1.0 13 | hooks: 14 | - id: black 15 | additional_dependencies: [toml] 16 | - repo: https://github.com/timothycrosley/isort 17 | rev: 6.0.1 18 | hooks: 19 | - id: isort 20 | additional_dependencies: [toml] 21 | - repo: https://github.com/pre-commit/pre-commit-hooks 22 | rev: v5.0.0 23 | hooks: 24 | - id: check-docstring-first 25 | - id: end-of-file-fixer 26 | - id: check-added-large-files 27 | - id: mixed-line-ending 28 | - id: trailing-whitespace 29 | exclude: ^.bumpversion.cfg$ 30 | - id: check-merge-conflict 31 | - id: check-case-conflict 32 | - id: check-symlinks 33 | - id: check-yaml 34 | - id: check-ast 35 | - id: requirements-txt-fixer 36 | - repo: https://github.com/pycqa/flake8 37 | rev: 7.1.2 38 | hooks: 39 | - id: flake8 40 | additional_dependencies: [flake8-docstrings, flake8-comprehensions, flake8-bugbear] 41 | - repo: https://github.com/asottile/blacken-docs 42 | rev: 1.19.1 43 | hooks: 44 | - id: blacken-docs 45 | additional_dependencies: [black] 46 | - repo: https://github.com/asottile/pyupgrade 47 | rev: v3.19.1 48 | hooks: 49 | - id: pyupgrade 50 | args: [--py3-plus, --py36-plus] 51 | - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks 52 | rev: v2.14.0 53 | hooks: 54 | - id: pretty-format-yaml 55 | args: [--autofix, --indent, '4'] 56 | - repo: https://github.com/pre-commit/pygrep-hooks 57 | rev: v1.10.0 58 | hooks: 59 | - id: python-no-eval 60 | - id: python-use-type-annotations 61 | - id: python-check-blanket-noqa 62 | - id: rst-backticks 63 | - id: rst-directive-colons 64 | - id: rst-inline-touching-normal 65 | -------------------------------------------------------------------------------- /docs/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* ReadTheDocs theme colors */ 2 | 3 | 4 | .wy-nav-top { background-color: #f07e44 } 5 | .wy-side-nav-search { background-color: #79859E; } 6 | .wy-nav-content { max-width: 840px } 7 | .wy-side-nav-search input[type="text"] { border-width: 0 } 8 | 9 | .highlight { background: rgba(50, 140, 193, 0.15); } 10 | 11 | 12 | /* Custom classes */ 13 | 14 | 15 | .small { font-size:40% } 16 | .smaller, .pr, .noteversion { font-size:70% } 17 | .noteversion::after { content: "/" } 18 | 19 | 20 | /* Code: literals and links */ 21 | 22 | 23 | .rst-content tt.literal, 24 | .rst-content code.literal { 25 | color: #404040; 26 | } 27 | /* slim font weight for non-link code */ 28 | .rst-content tt:not(.xref), 29 | .rst-content code:not(.xref), 30 | .rst-content *:not(a) > tt.xref, 31 | .rst-content *:not(a) > code.xref { 32 | font-weight: normal; 33 | } 34 | 35 | 36 | /* Just one box for annotation code for a less noisy look */ 37 | 38 | 39 | .rst-content .annotation { 40 | padding: 2px 5px; 41 | background-color: white; 42 | border: 1px solid #e1e4e5; 43 | } 44 | .rst-content .annotation tt, 45 | .rst-content .annotation code { 46 | padding: 0 0; 47 | background-color: transparent; 48 | border: 0 solid transparent; 49 | } 50 | 51 | 52 | /* Parameter lists */ 53 | 54 | 55 | /* Mimick rubric style used for other headings */ 56 | .rst-content dl:not(.docutils) dl > dt { 57 | font-weight: bold; 58 | background: none transparent; 59 | border-left: none; 60 | margin: 0 0 12px; 61 | padding: 3px 0 0; 62 | font-size: 105%; 63 | } 64 | 65 | html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) > dt { 66 | color: #005A87; 67 | border-top: solid 3px #005A87; 68 | background: #8EBAE5; 69 | } 70 | 71 | .rst-content .viewcode-back, .rst-content .viewcode-link { 72 | color: #005A87; 73 | } 74 | 75 | /*class="longtable docutils align-default"*/ 76 | table.longtable td { 77 | white-space: normal!important; 78 | } 79 | 80 | /* Parameters contain parts and don’t need bold font */ 81 | .rst-content dl.field-list dl > dt { font-weight: unset } 82 | /* Add colon between return tuple element name and type */ 83 | .rst-content dl:not(.docutils) dl > dt .classifier::before { content: ' : ' } 84 | -------------------------------------------------------------------------------- /omnipath/constants/_pkg_constants.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | from typing import Tuple, Optional 3 | from pathlib import Path 4 | 5 | from omnipath.constants import License, Organism 6 | from omnipath.constants._constants import PrettyEnumMixin 7 | 8 | try: 9 | from typing import final 10 | except ImportError: 11 | from typing_extensions import final # noqa: F401 12 | 13 | 14 | class DEFAULT_FIELD(PrettyEnumMixin): 15 | """Default values for ``field`` parameter.""" 16 | 17 | ENZSUB = ("sources", "references", "curation_effort") 18 | INTERACTIONS = ("sources", "references", "curation_effort") 19 | 20 | 21 | class Format(PrettyEnumMixin): 22 | """Response format types.""" 23 | 24 | JSON = "json" 25 | TABLE = "tab" 26 | TEXT = "text" 27 | TSV = "tsv" 28 | 29 | 30 | class DEFAULT_OPTIONS: 31 | """Default options for :attr:`omnipath.options`.""" 32 | 33 | url: str = "https://omnipathdb.org" 34 | fallback_urls: Tuple[str] = ("http://no-tls.omnipathdb.org",) 35 | static_url: str = "http://no-tls.static.omnipathdb.org/resources" 36 | license: Optional[License] = None 37 | num_retries: int = 3 38 | timeout: int = 600 39 | chunk_size: int = 8196 40 | cache_dir: Path = Path.home() / ".cache" / "omnipathdb" 41 | progress_bar: bool = True 42 | # for testing purposes 43 | autoload: bool = environ.get("OMNIPATH_AUTOLOAD", "") == "" 44 | convert_dtypes: bool = True 45 | 46 | 47 | class Endpoint(PrettyEnumMixin): 48 | """Endpoints of :attr:`omnipath.options.url` that are sometimes accessed.""" 49 | 50 | RESOURCES = "resources" 51 | ABOUT = "about" 52 | INFO = "info" # not used 53 | 54 | 55 | # TODO: refactor me 56 | class Key(PrettyEnumMixin): # noqa: D101 57 | ORGANISM = "organism" 58 | GENESYMBOLS = "genesymbols" 59 | FORMAT = "format" 60 | DATASETS = "datasets" 61 | LICENSE = "license" 62 | QUERIES = "queries" 63 | FIELDS = "fields" 64 | PASSWORD = "password" 65 | LOOPS = "loops" 66 | INTERCELL_SUMMARY = "intercell_summary" 67 | GENERIC_CATEGORIES = "generic_categories" 68 | CATEGORY = "category" 69 | PARENT = "parent" 70 | 71 | 72 | DEFAULT_ORGANISM = Organism.HUMAN # default organism to access 73 | DEFAULT_FORMAT = Format.TSV 74 | UNKNOWN_SERVER_VERSION = ( 75 | "UNKNOWN" # server version to save under __server_version__ if we can't get it 76 | ) 77 | -------------------------------------------------------------------------------- /tests/test_orthology.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from omnipath._core.utils._orthology import translate_column 4 | from omnipath._core.utils._homologene import download_homologene 5 | 6 | 7 | class TestHomologene: 8 | def test_download_homologene(self): 9 | homologene = download_homologene(9606, 10090) 10 | 11 | expected_shape = (17312, 2) 12 | actual_shape = homologene.shape 13 | 14 | assert expected_shape == actual_shape 15 | 16 | expected_columns = ["source", "target"] 17 | actual_columns = homologene.columns 18 | 19 | assert all(expected_columns == actual_columns) 20 | 21 | 22 | class TestOrthologyConversion: 23 | def test_complex_genes(self): 24 | df = pd.DataFrame( 25 | { 26 | "symbol": [ 27 | "CSF2RA_CSF2RB", # one to many 28 | "IFNL3_IFNLR1_IL10RB", # 3 subunits 29 | "HCST_KLRK1", # one subunit missing 30 | "CD8A_CD8B", # 1 to 1 31 | "IL4", # 1 to 1 simple protein 32 | ] 33 | } 34 | ) 35 | 36 | default = translate_column( 37 | df, 38 | column="symbol", 39 | id_type="genesymbol", 40 | target_organism=10090, 41 | ) 42 | assert all(default["symbol"] == ["Cd8a_Cd8b1", "Il4"]) 43 | 44 | to_many = translate_column( 45 | df, 46 | column="symbol", 47 | id_type="genesymbol", 48 | target_organism=10090, 49 | replace=True, 50 | keep_untranslated=False, 51 | one_to_many=2, 52 | ) 53 | expected = { 54 | "Csf2ra_Csf2rb", 55 | "Csf2ra_Csf2rb2", 56 | "Ifnl2_Ifnlr1_Il10rb", 57 | "Ifnl3_Ifnlr1_Il10rb", 58 | "Cd8a_Cd8b1", 59 | "Il4", 60 | } 61 | 62 | assert to_many.shape == (6, 1) 63 | assert set(to_many["symbol"]) == expected 64 | 65 | keep_missing = translate_column( 66 | df, 67 | column="symbol", 68 | id_type="genesymbol", 69 | target_organism=10090, 70 | replace=False, 71 | keep_untranslated=True, 72 | one_to_many=2, 73 | ) 74 | untranslated = keep_missing["symbol"].isin(["HCST_KLRK1"]) 75 | assert untranslated.any() 76 | assert keep_missing[untranslated]["orthology_target"].isna().all() 77 | -------------------------------------------------------------------------------- /omnipath/_core/utils/_homologene.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from omnipath._core.downloader._downloader import Downloader 4 | 5 | # NOTE: this downloads homologene data from github 6 | # Either way this is not a great solution, as homologene was last updated in 2014... 7 | RAW_TAXA_URL = ( 8 | "https://raw.githubusercontent.com/oganm/homologene/master/data-raw/taxData.tsv" 9 | ) 10 | HOMOLOGENE_URL = ( 11 | "https://raw.githubusercontent.com/oganm/homologene/master/data-raw/homologene2.tsv" 12 | ) 13 | 14 | 15 | def _get_homologene_raw(): 16 | dwnld = Downloader() 17 | homologene = ( 18 | dwnld.maybe_download( 19 | HOMOLOGENE_URL, 20 | callback=pd.read_table, 21 | is_final=True, 22 | ) 23 | .astype(str) 24 | .rename( 25 | columns={ 26 | "Gene.Symbol": "genesymbol", 27 | "Gene.ID": "gene_id", 28 | "Taxonomy": "ncbi_taxid", 29 | "HID": "hid", 30 | } 31 | ) 32 | .set_index("hid") 33 | ) 34 | return homologene 35 | 36 | 37 | def show_homologene(): 38 | """Show the homologene taxa data""" 39 | dwnld = Downloader() 40 | return dwnld.maybe_download( 41 | RAW_TAXA_URL, 42 | callback=pd.read_table, 43 | is_final=True, 44 | ) 45 | 46 | 47 | def download_homologene(source_organism, target_organism, id_type="genesymbol"): 48 | """ 49 | Download homologene information for a given source and target organism. 50 | 51 | Parameters 52 | ---------- 53 | source_organism : int, str 54 | Source organism NCBI Taxonomy ID. 55 | target_organism : int, str 56 | Target organism NCBI Taxonomy ID. 57 | id_type : str 58 | Type of ID to use for homology conversion. 59 | Can be one of 'genesymbol', 'gene_id'. 60 | 61 | Returns 62 | ------- 63 | A pandas DataFrame with homologene information. 64 | 65 | """ 66 | homologene = _get_homologene_raw() 67 | s_taxid = str(source_organism) 68 | t_taxid = str(target_organism) 69 | 70 | source_df = homologene[(homologene["ncbi_taxid"] == s_taxid)][[id_type]] 71 | target_df = homologene[(homologene["ncbi_taxid"] == t_taxid)][[id_type]] 72 | 73 | homologene = pd.merge( 74 | source_df, 75 | target_df, 76 | right_index=True, 77 | left_index=True, 78 | suffixes=("_source", "_target"), 79 | how="inner", 80 | ) 81 | homologene = homologene.reset_index().rename( 82 | {f"{id_type}_source": "source", f"{id_type}_target": "target"}, axis=1 83 | ) 84 | homologene = homologene[["source", "target"]] 85 | 86 | return homologene 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | docs/source/api 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # pytype static type analyzer 136 | .pytype/ 137 | 138 | # Cython debug symbols 139 | cython_debug/ 140 | 141 | # Pycharm stuff 142 | .idea 143 | -------------------------------------------------------------------------------- /omnipath/_core/requests/_complexes.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Union, Mapping, Iterable, Optional 2 | import logging 3 | 4 | import pandas as pd 5 | 6 | from omnipath._core.query import QueryType 7 | from omnipath._core.requests._request import OrganismGenesymbolsRemover 8 | from omnipath.constants._pkg_constants import final 9 | 10 | 11 | @final 12 | class Complexes(OrganismGenesymbolsRemover): 13 | """Request information about protein complexes from [OmniPath]_.""" 14 | 15 | __string__ = frozenset( 16 | { 17 | "name", 18 | "components", 19 | "components_genesymbols", 20 | "stoichiometry", 21 | "references", 22 | "identifiers", 23 | } 24 | ) 25 | __categorical__ = frozenset({"sources"}) 26 | 27 | _query_type = QueryType.COMPLEXES 28 | 29 | def _resource_filter(self, data: Mapping[str, Any], **_) -> bool: 30 | return True 31 | 32 | @classmethod 33 | def complex_genes( 34 | cls, 35 | genes: Union[str, Iterable[str]], 36 | complexes: Optional[pd.DataFrame] = None, 37 | total_match: bool = False, 38 | ) -> pd.DataFrame: 39 | """ 40 | Get all the molecular complexes for a given ``genes``. 41 | 42 | This function returns all the molecular complexes where an input set of genes participate. User can choose 43 | to retrieve every complex where any of the input genes participate or just retrieve these complexes where 44 | all the genes in input set participate together. 45 | 46 | Parameters 47 | ---------- 48 | genes 49 | The genes for which complexes will be retrieved (hgnc format). 50 | complexes 51 | Complex data from :meth:`get`. If `None`, new request will be made. 52 | total_match 53 | If `True`, get only complexes where all the genes participate together, otherwise get complexes 54 | where any of the genes participate. 55 | 56 | Returns 57 | ------- 58 | :class:`pandas.DataFrame` 59 | The filtered ``complexes``. 60 | """ 61 | if isinstance(genes, str): 62 | genes = (genes,) 63 | genes = tuple(set(genes)) 64 | if not len(genes): 65 | raise ValueError("No genes have been selected.") 66 | 67 | if complexes is None: 68 | logging.info("Fetching complexes from the server") 69 | complexes = cls.get() 70 | if not isinstance(complexes, pd.DataFrame): 71 | raise TypeError( 72 | f"Expected `complexes` to be of type `pandas.DataFrame`, found `{type(complexes)}`." 73 | ) 74 | 75 | if complexes.empty: 76 | logging.warning("Complexes are empty") 77 | return complexes 78 | 79 | col = "components_genesymbols" 80 | if col not in complexes: 81 | raise KeyError(f"Unable to find `{col}` in `{complexes.columns}`.") 82 | 83 | reduction = all if total_match else any 84 | 85 | return complexes.loc[ 86 | complexes[col] 87 | .str.split("_") 88 | .apply(lambda needles: reduction(n in genes for n in needles)) 89 | ].reset_index(drop=True) 90 | 91 | 92 | __all__ = [Complexes] 93 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | from datetime import datetime 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | from pathlib import Path 15 | import sys 16 | 17 | HERE = Path(__file__).parent 18 | sys.path.insert(0, str(HERE.parent.parent)) 19 | import omnipath 20 | 21 | needs_sphinx = "3.0" 22 | 23 | # -- Project information ----------------------------------------------------- 24 | 25 | project = "omnipath" 26 | author = omnipath.__author__ 27 | copyright = f"{datetime.now():%Y}, {author}" 28 | 29 | # The full version, including alpha/beta/rc tags 30 | release = f"master ({omnipath.__version__})" 31 | 32 | 33 | # -- General configuration --------------------------------------------------- 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | "sphinx.ext.autodoc", 40 | "sphinx.ext.napoleon", 41 | "sphinx.ext.viewcode", 42 | "sphinx_autodoc_typehints", 43 | "sphinx.ext.intersphinx", 44 | "sphinx.ext.autosummary", 45 | "sphinx_last_updated_by_git", 46 | ] 47 | intersphinx_mapping = dict( 48 | python=("https://docs.python.org/3", None), 49 | pandas=("https://pandas.pydata.org/pandas-docs/stable/", None), 50 | networkx=("https://networkx.github.io/documentation/stable/", None), 51 | ) 52 | 53 | # Add any paths that contain templates here, relative to this directory. 54 | templates_path = ["_templates"] 55 | source_suffix = [".rst"] 56 | master_doc = "index" 57 | pygments_style = "sphinx" 58 | 59 | # List of patterns, relative to source directory, that match files and 60 | # directories to ignore when looking for source files. 61 | # This pattern also affects html_static_path and html_extra_path. 62 | exclude_patterns = [] 63 | 64 | 65 | # -- Options for HTML output ------------------------------------------------- 66 | 67 | # The theme to use for HTML and HTML Help pages. See the documentation for 68 | # a list of builtin themes. 69 | # 70 | autosummary_generate = True 71 | autodoc_member_order = "alphabetical" 72 | autodoc_typehints = "signature" 73 | autodoc_docstring_signature = True 74 | autodoc_follow_wrapped = False 75 | napoleon_google_docstring = False 76 | napoleon_numpy_docstring = True 77 | napoleon_include_init_with_doc = False 78 | napoleon_use_rtype = True 79 | napoleon_use_param = True 80 | napoleon_custom_sections = [("Params", "Parameters")] 81 | todo_include_todos = False 82 | 83 | # Add any paths that contain custom static files (such as style sheets) here, 84 | # relative to this directory. They are copied after the builtin static files, 85 | # so a file named "default.css" will overwrite the builtin "default.css". 86 | html_theme = "sphinx_rtd_theme" 87 | html_static_path = ["_static"] 88 | html_theme_options = dict(navigation_depth=4, logo_only=True) 89 | html_show_sphinx = False 90 | 91 | 92 | def setup(app): 93 | app.add_css_file("css/custom.css") 94 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |PyPI| |Downloads| |CI| |Docs| |Coverage| 2 | 3 | Python client for the OmniPath web service 4 | ========================================== 5 | 6 | Installation 7 | ------------ 8 | You can install ``omnipath`` by running:: 9 | 10 | pip install omnipath 11 | 12 | The OmniPath database 13 | --------------------- 14 | 15 | OmniPath is a database of: 16 | 17 | * Protein-protein, TF target and miRNA-mRNA interactions 18 | * Enzyme-PTM relationships 19 | * Protein complexes 20 | * Annotations of protein function, structure, localization, expression 21 | * Intercellular communication roles of proteins 22 | 23 | To learn more about OmniPath, you can visit its `website`_, or read our recent `preprint`_ 24 | or our first `paper from 2016`_, especially its `supplementary material`_. 25 | 26 | The Python client 27 | ----------------- 28 | The data is available through a web service hosted on this `website`_. 29 | This repository hosts a Python package for querying this web service and 30 | downloading data into data frames or dictionaries. 31 | 32 | 33 | The Python package for OmniPath is pypath, isn't it? 34 | ---------------------------------------------------- 35 | `pypath`_ is a tool for building the OmniPath databases in a fully customizable way. 36 | We recommend to use pypath if you want to: 37 | 38 | * Tailor the database building to your needs 39 | * Include resources not available in the public web service 40 | * Use the rich Python APIs available for the database objects 41 | * Make sure the data from the original sources is the most up-to-date 42 | * Use the methods in ``pypath.inputs`` to download data from resources 43 | * Use the various extra tools in ``pypath.utils``, e.g. for identifier 44 | translation, homology translation, querying Gene Ontology, working with 45 | protein sequences, processing BioPAX, etc. 46 | 47 | Is there an R client? 48 | --------------------- 49 | Yes there is. The R/Bioconductor package ``OmnipathR`` you may find on `GitHub `_ 50 | or in `Bioconductor `_. 51 | The R client currently supports all features of the web service. 52 | 53 | Cytoscape 54 | --------- 55 | We even have a `Cytoscape plug-in`_. 56 | With the plug-in you are able to load networks into Cytoscape and access 57 | certain (not all) annotations of the proteins. 58 | 59 | .. |PyPI| image:: https://img.shields.io/pypi/v/omnipath.svg 60 | :target: https://pypi.org/project/omnipath 61 | :alt: PyPI 62 | 63 | .. |Downloads| image:: https://pepy.tech/badge/omnipath 64 | :target: https://pepy.tech/project/omnipath 65 | :alt: Downloads 66 | 67 | .. |CI| image:: https://img.shields.io/github/actions/workflow/status/saezlab/omnipath/ci.yml?branch=master 68 | :target: https://github.com/saezlab/omnipath/actions?query=workflow:CI 69 | :alt: CI 70 | 71 | .. |Coverage| image:: https://codecov.io/gh/saezlab/omnipath/branch/master/graph/badge.svg 72 | :target: https://codecov.io/gh/saezlab/omnipath 73 | :alt: Coverage 74 | 75 | .. |Docs| image:: https://img.shields.io/readthedocs/omnipath 76 | :target: https://omnipath.readthedocs.io/en/latest 77 | :alt: Documentation 78 | 79 | .. _website : https://omnipathdb.org/ 80 | .. _Cytoscape plug-in : https://apps.cytoscape.org/apps/omnipath 81 | .. _pypath : https://github.com/saezlab/pypath 82 | .. _preprint : https://www.biorxiv.org/content/10.1101/2020.08.03.221242v2 83 | .. _paper from 2016 : https://www.nature.com/articles/nmeth.4077 84 | .. _supplementary material : https://static-content.springer.com/esm/art%3A10.1038%2Fnmeth.4077/MediaObjects/41592_2016_BFnmeth4077_MOESM495_ESM.pdf 85 | -------------------------------------------------------------------------------- /omnipath/_core/requests/_intercell.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Tuple, Mapping, Iterable, Optional, Sequence 2 | 3 | import pandas as pd 4 | 5 | from omnipath._core.query import QueryType 6 | from omnipath._core.query._types import Strseq_t 7 | from omnipath._core.requests._request import OrganismGenesymbolsRemover 8 | from omnipath.constants._pkg_constants import Key, Format, final 9 | from omnipath._core.query._query_validator import _to_string_set 10 | 11 | 12 | @final 13 | class Intercell(OrganismGenesymbolsRemover): 14 | """ 15 | Request `intercell` annotations from [OmniPath]_. 16 | 17 | Imports the [OmniPath]_ inter-cellular communication role annotation 18 | `database `__. 19 | 20 | It provides information on the roles in inter-cellular signaling, e.g. if a protein is a ligand, a receptor, 21 | an extracellular matrix (ECM) component, etc. 22 | """ 23 | 24 | __categorical__ = frozenset( 25 | {"category", "parent", "database", "scope", "aspect", "source", "entity_type"} 26 | ) 27 | 28 | _query_type = QueryType.INTERCELL 29 | 30 | def _resource_filter( 31 | self, 32 | data: Mapping[str, Any], 33 | generic_categories: Optional[Sequence[str]] = None, 34 | **kwargs, 35 | ) -> bool: 36 | return generic_categories is None or _to_string_set( 37 | data.get(Key.GENERIC_CATEGORIES.s, set()) 38 | ) & _to_string_set(generic_categories) 39 | 40 | @classmethod 41 | def resources(cls, generic_categories: Strseq_t = None) -> Tuple[str]: 42 | """ 43 | Return the resources falling into the specified generic categories. 44 | 45 | Parameters 46 | ---------- 47 | generic_categories 48 | For valid options, see :attr:`generic_categories`. 49 | 50 | Returns 51 | ------- 52 | tuple 53 | The filtered resources according to ``generic_categories``. 54 | """ 55 | if generic_categories is None: 56 | return super().resources() 57 | 58 | if isinstance(generic_categories, str): 59 | generic_categories = (generic_categories,) 60 | if not isinstance(generic_categories, (Sequence, Iterable)): 61 | raise TypeError( 62 | f"Expected generic categories to be a `str` or an `Iterable`, " 63 | f"found `{type(generic_categories).__name__}`." 64 | ) 65 | 66 | if not len(generic_categories): 67 | raise ValueError("No generic categories have been selected.") 68 | 69 | return super().resources(**{Key.GENERIC_CATEGORIES.s: generic_categories}) 70 | 71 | @classmethod 72 | def categories(cls) -> Tuple[str]: 73 | """Return categories from the `intercell` database.""" 74 | return cls()._get_metadata(Key.CATEGORY.s) 75 | 76 | @classmethod 77 | def generic_categories(cls) -> Tuple[str]: 78 | """Return generic categories from the `intercell` database.""" 79 | return cls()._get_metadata(Key.PARENT.s) 80 | 81 | def _get_metadata(self, col: Optional[str]) -> Tuple[str]: 82 | """Return unique summary data from column ``col``.""" 83 | metadata = self._downloader.maybe_download( 84 | Key.INTERCELL_SUMMARY.s, 85 | params={Key.FORMAT.s: Format.JSON.s}, 86 | callback=self._json_reader, 87 | ) 88 | 89 | if col not in metadata.columns: 90 | raise KeyError(f"Column `{col}` not found in `{list(metadata.columns)}`.") 91 | 92 | return tuple(sorted(pd.unique(metadata[col].astype(str)))) 93 | 94 | 95 | __all__ = [Intercell] 96 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # D104 Missing docstring in public package 3 | # F401 ... imported but unused 4 | per-file-ignores = 5 | */__init__.py: D104, F401, 6 | tests/* : D101, D102, D103, D104 7 | tests/conftest.py: D101, D102, D103, E402 8 | # D100 Missing docstring in public module 9 | # D107 Missing docstring in __init__ 10 | # W503 line break before binary operator 11 | # D105 Missing docstring in magic method 12 | # E203 whitespace before ':' 13 | # D400 First line should end with a period 14 | # false positive: 15 | # B024 ... is an abstract base class, but it has no abstract methods 16 | ignore = D100,D107,W503,D105,E203,D400,B024 17 | exclude = 18 | .git, 19 | __pycache__, 20 | docs/* 21 | max_line_length = 120 22 | filename = *.py 23 | 24 | [gh] 25 | python = 26 | 3.9: py39 27 | 3.10: py310 28 | 3.11: py311 29 | 3.12: py312 30 | 3.13: py313, covclean, lint, coverage, readme 31 | 32 | [pytest] 33 | python_files = test_*.py 34 | testpaths = tests/ 35 | xfail_strict = true 36 | requests_mock_case_sensitive = true 37 | 38 | [tox] 39 | min_version=3.20.0 40 | isolated_build = true 41 | skip_missing_interpreters = true 42 | envlist = 43 | covclean 44 | lint 45 | py{39,310,311,312,313} 46 | coverage 47 | readme 48 | docs 49 | 50 | [testenv] 51 | platform = 52 | linux: linux 53 | macos: (macos|osx|darwin) 54 | base_python = 55 | py39: python3.9 56 | py310: python3.10 57 | py311: python3.11 58 | py312: python3.12 59 | py313: python3.13 60 | deps = 61 | pytest 62 | pytest-mock 63 | pytest-cov 64 | pytest-socket 65 | requests-mock>=1.9.2 66 | numpy 67 | networkx 68 | # {3.10-linux}: rpy2<4 69 | # log level ERROR because we print out info from fixture and -s also prints useless stuff from R 70 | setenv = 71 | OMNIPATH_AUTOLOAD = false 72 | passenv = TOXENV,CI,CODECOV_*,GITHUB_ACTIONS 73 | usedevelop = true 74 | commands = 75 | pytest --cov --cov-append --cov-config={toxinidir}/.coveragerc --ignore docs/ {posargs:-vv {env:_PYTEST_TOX_POSARGS:}} 76 | 77 | [testenv:py313] 78 | setenv = 79 | _PYTEST_TOX_POSARGS=--test-server --log-cli-level=ERROR 80 | 81 | 82 | [testenv:covclean] 83 | description = Clean coverage files. 84 | deps = coverage 85 | skip_install = True 86 | commands = coverage erase 87 | 88 | [testenv:lint] 89 | description = Perform linting. 90 | deps = pre-commit>=2.7.1 91 | skip_install = true 92 | commands = 93 | pre-commit run --all-files --show-diff-on-failure {posargs:} 94 | 95 | [testenv:coverage] 96 | description = Report the coverage difference. 97 | deps = 98 | coverage 99 | diff_cover 100 | skip_install = true 101 | depends = py{39,310,311,312,313} 102 | parallel_show_output = True 103 | commands = 104 | coverage report --omit="tox/*" 105 | coverage xml --omit="tox/*" -o {toxinidir}/coverage.xml 106 | diff-cover --compare-branch origin/master {toxinidir}/coverage.xml 107 | 108 | [testenv:docs] 109 | description = Build the documentation. 110 | skip_install = true 111 | allowlist_externals = uv 112 | commands = 113 | uv sync --extra docs 114 | uv run sphinx-build --color -b html {toxinidir}/docs/source {toxinidir}/docs/build/html 115 | python -c 'import pathlib; print(f"Documentation is available under:", pathlib.Path(f"{toxinidir}") / "docs" / "build" / "html" / "index.html")' 116 | 117 | [testenv:clean-docs] 118 | description = Clean the documentation artifacts. 119 | deps = 120 | skip_install = true 121 | changedir = {toxinidir}/docs 122 | allowlist_externals = make 123 | commands = make clean 124 | 125 | [testenv:readme] 126 | description = Check if README renders on PyPI. 127 | deps = twine >= 1.12.1 128 | skip_install = true 129 | allowlist_externals = uv 130 | commands = uv build --wheel --out-dir {envtmpdir}/build 131 | twine check {envtmpdir}/build/* 132 | -------------------------------------------------------------------------------- /omnipath/constants/_constants.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, ABCMeta 2 | from enum import Enum, EnumMeta, unique 3 | from typing import Any, Callable 4 | from functools import wraps 5 | 6 | 7 | def _pretty_raise_enum(cls: EnumMeta, fun: Callable) -> Callable: 8 | @wraps(fun) 9 | def wrapper(*args, **kwargs) -> Enum: 10 | try: 11 | return fun(*args, **kwargs) 12 | except ValueError as e: 13 | _cls, value, *_ = args 14 | e.args = (cls._format(value),) 15 | raise e 16 | 17 | if not issubclass(cls, ErrorFormatter): 18 | raise TypeError(f"Class `{cls}` must be subtype of `ErrorFormatter`.") 19 | elif not len(cls.__members__): 20 | # empty enum, for class hierarchy 21 | return fun 22 | 23 | return wrapper 24 | 25 | 26 | class NoValue(Enum): 27 | """Enumeration which hides its :attr:`value`.""" 28 | 29 | def __repr__(self): 30 | return f"<{self.__class__.__name__}.{self.name}>" 31 | 32 | 33 | class ErrorFormatter(ABC): # noqa: D101 34 | __error_format__ = "Invalid value `{}` for `{}`. Valid options are: `{}`." 35 | 36 | @classmethod 37 | def _format(cls, value: Any) -> str: 38 | """Format the error message for invalid ``value``.""" 39 | return cls.__error_format__.format( 40 | value, cls.__name__, [m.value for m in cls.__members__.values()] 41 | ) 42 | 43 | 44 | class FormatterMeta(EnumMeta, ABCMeta): # noqa: D101 45 | def __call__(cls, *args, **kw): # noqa: D102 46 | if getattr(cls, "__error_format__", None) is None: 47 | raise TypeError( 48 | f"Can't instantiate class `{cls.__name__}` " 49 | f"without `__error_format__` class attribute." 50 | ) 51 | return super().__call__(*args, **kw) 52 | 53 | def __new__(cls, clsname, superclasses, attributedict): # noqa: D102 54 | res = super().__new__(cls, clsname, superclasses, attributedict) 55 | res.__new__ = _pretty_raise_enum(res, res.__new__) 56 | return res 57 | 58 | 59 | class PrettyEnumMixin(ErrorFormatter, NoValue, metaclass=FormatterMeta): 60 | """Enum mixin that pretty prints when user uses invalid value.""" 61 | 62 | @property 63 | def s(self) -> str: 64 | """Return the :attr:`value` as :class:`str`.""" 65 | return str(self.value) 66 | 67 | 68 | @unique 69 | class License(PrettyEnumMixin): 70 | """License types.""" 71 | 72 | ACADEMIC = "academic" #: Academic license. 73 | COMMERCIAL = "commercial" #: Commercial license. 74 | NON_PROFIT = "non_profit" #: Non-profit license. 75 | FOR_PROFIT = "for_profit" #: For-profit license. 76 | IGNORE = "ignore" #: Ignore the license type. 77 | 78 | 79 | @unique 80 | class InteractionDataset(PrettyEnumMixin): 81 | """ 82 | Available interaction datasets in [OmniPath]_. 83 | 84 | See :mod:`omnipath.interactions` for more information. 85 | """ 86 | 87 | COLLECTRI = "collectri" 88 | DOROTHEA = "dorothea" 89 | KINASE_EXTRA = "kinaseextra" 90 | LIGREC_EXTRA = "ligrecextra" 91 | LNCRNA_MRNA = "lncrna_mrna" 92 | MIRNA_TARGET = "mirnatarget" 93 | OMNIPATH = "omnipath" 94 | PATHWAY_EXTRA = "pathwayextra" 95 | SMALL_MOLECULE = "small_molecule" 96 | TF_MIRNA = "tf_mirna" 97 | TF_REGULONS = "tfregulons" 98 | TF_TARGET = "tf_target" 99 | 100 | 101 | @unique 102 | class Organism(PrettyEnumMixin): 103 | """Organism types.""" 104 | 105 | HUMAN = "human" #: NCIB taxonomy id ``9606``. 106 | MOUSE = "mouse" #: NCIB taxonomy id ``10090``. 107 | RAT = "rat" #: NCIB taxonomy id ``10116``. 108 | 109 | def __new__(cls, value: str): # noqa: D102 110 | obj = object.__new__(cls) 111 | obj._code = {"human": 9606, "rat": 10116, "mouse": 10090}[value] 112 | return obj 113 | 114 | @property 115 | def code(self) -> int: 116 | """Return the code for this organism.""" 117 | return self._code 118 | 119 | 120 | __all__ = [ 121 | License, 122 | Organism, 123 | InteractionDataset, 124 | ] 125 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | schedule: 5 | - cron: 00 00 * * 1 # run every Monday at 00:00 6 | push: 7 | branches: [main] 8 | tags: [v*] 9 | pull_request: 10 | branches: [main] 11 | 12 | jobs: 13 | build: 14 | runs-on: ${{ matrix.os }} 15 | timeout-minutes: 10 16 | strategy: 17 | fail-fast: false 18 | max-parallel: 4 19 | matrix: 20 | os: [ubuntu-latest, macos-latest] 21 | python: ['3.9', '3.10', '3.11', '3.12', '3.13'] 22 | test_server: [false] 23 | exclude: 24 | - os: macos-latest 25 | include: 26 | - os: macos-latest 27 | python: '3.13' 28 | env: 29 | OS: ${{ matrix.os }} 30 | PYTHON: ${{ matrix.python }} 31 | 32 | steps: 33 | - uses: actions/checkout@v4 34 | with: 35 | fetch-depth: 0 36 | 37 | - name: Install uv 38 | uses: astral-sh/setup-uv@v5 39 | with: 40 | enable-cache: true 41 | python-version: ${{ matrix.python }} 42 | 43 | - name: Install Python 44 | run: uv python install --python-preference only-managed ${{ matrix.python }} 45 | 46 | - name: Install dependencies 47 | run: | 48 | uv sync --all-extras 49 | uv pip install codecov 50 | uv tool install \ 51 | --python-preference only-managed \ 52 | --python ${{ matrix.python }} \ 53 | --with tox-uv \ 54 | --with tox-gh \ 55 | tox 56 | 57 | 58 | - name: Install R 59 | if: matrix.test_server 60 | uses: r-lib/actions/setup-r@v2 61 | with: 62 | r-version: 4.4.1 63 | 64 | - name: Get R cache dir 65 | uses: actions/cache@v4 66 | if: matrix.test_server 67 | with: 68 | path: ~/.local/share/renv 69 | key: ${{ runner.os }}-renv-${{ hashFiles('**/renv.lock') }} 70 | 71 | - name: Install OmnipathR 72 | if: matrix.test_server 73 | run: | 74 | sudo apt-get install libcurl4-openssl-dev 75 | sudo Rscript --vanilla -e "if (!(requireNamespace('BiocManager', quietly=TRUE))) { install.packages(c('BiocManager', 'curl'), repos='https://cloud.r-project.org/') }; BiocManager::install('OmnipathR')" 76 | Rscript --vanilla -e "packageVersion('OmnipathR')" 77 | 78 | - name: Set up test suite 79 | env: 80 | TOX_GH_MAJOR_MINOR: ${{ matrix.python }} 81 | run: | 82 | tox run -vv --notest --skip-missing-interpreters true 83 | 84 | - name: Run tests 85 | env: 86 | TOX_GH_MAJOR_MINOR: ${{ matrix.python }} 87 | run: | 88 | tox run -vv --skip-pkg-install 89 | 90 | - name: Upload coverage to Codecov 91 | if: success() 92 | env: 93 | CODECOV_NAME: ${{ matrix.python }}-${{ matrix.os }} 94 | run: | 95 | uv run codecovcli --verbose upload-process -t ${{ secrets.CODECOV_TOKEN }} -n $CODECOV_NAME -F unittests 96 | 97 | deploy: 98 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 99 | needs: build 100 | runs-on: ubuntu-latest 101 | steps: 102 | 103 | - uses: actions/checkout@v4 104 | with: 105 | fetch-depth: 0 106 | 107 | - name: Install uv 108 | uses: astral-sh/setup-uv@v5 109 | with: 110 | enable-cache: true 111 | 112 | - name: Build a binary wheel and a source tarball 113 | run: uv build 114 | 115 | - name: Publish package on PyPI 116 | uses: pypa/gh-action-pypi-publish@release/v1 117 | with: 118 | user: __token__ 119 | password: ${{ secrets.PYPI_PASSWORD }} 120 | skip_existing: true 121 | verbose: true 122 | -------------------------------------------------------------------------------- /tests/test_cache.py: -------------------------------------------------------------------------------- 1 | from copy import copy, deepcopy 2 | from typing import Optional 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | from pandas.testing import assert_frame_equal 8 | import pandas as pd 9 | 10 | from omnipath import options, clear_cache 11 | from omnipath._core.cache._cache import FileCache, NoopCache, MemoryCache 12 | 13 | 14 | def test_clear_cache_high_lvl(cache_backup): 15 | options.cache["foo"] = 42 16 | assert len(options.cache) == 1 17 | assert options.cache["foo"] == 42 18 | 19 | clear_cache() 20 | 21 | assert len(options.cache) == 0 22 | 23 | 24 | class TestMemoryCache: 25 | def test_str_repr(self): 26 | mc = MemoryCache() 27 | 28 | assert str(mc) == f"<{mc.__class__.__name__}[size={len(mc)}]>" 29 | assert repr(mc) == f"<{mc.__class__.__name__}[size={len(mc)}]>" 30 | 31 | def test_path_is_None(self): 32 | mc = MemoryCache() 33 | assert mc.path == "memory" 34 | 35 | def test_copy_does_nothing(self): 36 | mc = MemoryCache() 37 | 38 | assert mc is mc.copy() 39 | assert mc is copy(mc) 40 | 41 | def test_deepcopy_work(self): 42 | mc = MemoryCache() 43 | 44 | assert mc is not deepcopy(mc) 45 | 46 | def test_cache_works(self): 47 | mc = MemoryCache() 48 | sentinel = object() 49 | 50 | mc["foo"] = sentinel 51 | 52 | assert len(mc) == 1 53 | assert mc["foo"] is not sentinel # copy was made 54 | 55 | mc.clear() 56 | 57 | assert len(mc) == 0 58 | 59 | def test_dataframe_modification(self): 60 | mc = MemoryCache() 61 | df = pd.DataFrame({"foo": [1, 2], "bar": [3, 4]}) 62 | 63 | mc["baz"] = df 64 | _ = df.pop("foo") 65 | 66 | assert "foo" in mc["baz"] 67 | assert "bar" in mc["baz"] 68 | 69 | @pytest.mark.parametrize("val", [None, pd.DataFrame()]) 70 | def test_add_empty_value(self, val: Optional[pd.DataFrame]): 71 | mc = MemoryCache() 72 | 73 | mc["foo"] = val 74 | 75 | assert "foo" not in mc 76 | assert len(mc) == 0 77 | 78 | def test_returns_copy(self): 79 | mc = MemoryCache() 80 | data = pd.DataFrame({"x": [0, 1]}) 81 | mc["foo"] = data 82 | 83 | assert mc["foo"] is not mc["foo"] 84 | assert_frame_equal(mc["foo"], data) 85 | 86 | 87 | class TestPickleCache: 88 | def test_invalid_path(self): 89 | with pytest.raises(TypeError): 90 | FileCache(42) 91 | 92 | def test_path(self, tmpdir): 93 | fc = FileCache(Path(tmpdir)) 94 | 95 | assert isinstance(fc.path, Path) 96 | assert str(fc.path) == str(tmpdir) 97 | 98 | def test_str_repr(self, tmpdir): 99 | fc = FileCache(Path(tmpdir)) 100 | 101 | assert ( 102 | str(fc) 103 | == f"<{fc.__class__.__name__}[size={len(fc)}, path={str(tmpdir)!r}]>" 104 | ) 105 | assert ( 106 | repr(fc) 107 | == f"<{fc.__class__.__name__}[size={len(fc)}, path={str(tmpdir)!r}]>" 108 | ) 109 | 110 | def test_cache_works(self, tmpdir): 111 | fc = FileCache(Path(tmpdir)) 112 | sentinel = object() 113 | 114 | assert "foo" not in fc 115 | fc["foo"] = 42 116 | fc["bar.pickle"] = sentinel 117 | 118 | assert "foo" in fc 119 | assert "foo.pickle" in fc 120 | assert fc["bar.pickle"] is not sentinel 121 | 122 | def test_clear_works(self, tmpdir): 123 | fc = FileCache(Path(tmpdir)) 124 | fc["foo"] = 42 125 | assert Path(fc.path).exists() 126 | 127 | fc.clear() 128 | 129 | assert len(fc) == 0 130 | assert not Path(tmpdir).exists() 131 | 132 | @pytest.mark.parametrize("val", [None, pd.DataFrame()]) 133 | def test_add_empty_value(self, tmpdir, val: Optional[pd.DataFrame]): 134 | fc = FileCache(Path(tmpdir)) 135 | 136 | fc["foo"] = val 137 | 138 | assert "foo" not in fc 139 | assert len(fc) == 0 140 | 141 | 142 | class TestNoopCache: 143 | def test_add_value(self): 144 | nc = NoopCache() 145 | nc["foo"] = 42 146 | 147 | assert nc.path is None 148 | assert "foo" not in nc 149 | assert len(nc) == 0 150 | -------------------------------------------------------------------------------- /omnipath/_core/query/_query.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from enum import Enum, EnumMeta 3 | from typing import Set, Tuple, Union, Optional, Sequence, FrozenSet 4 | 5 | from inflect import engine 6 | 7 | from omnipath.constants._constants import FormatterMeta, ErrorFormatter 8 | from omnipath._core.query._query_validator import ( 9 | EnzsubValidator, 10 | ComplexesValidator, 11 | IntercellValidator, 12 | AnnotationsValidator, 13 | InteractionsValidator, 14 | ) 15 | 16 | _engine = engine() 17 | 18 | 19 | def _get_synonyms(key: str) -> Tuple[str]: 20 | """ 21 | Create synonyms for ``key``. 22 | 23 | This function creates just 2 synonyms - the singular and plural case of ``key``. 24 | 25 | Parameters 26 | ---------- 27 | key 28 | Key for which to create the synonyms. 29 | 30 | Returns 31 | ------- 32 | :class:`tuple` 33 | Synonyms for ``key``. User will be able to use these submitting requests. 34 | """ 35 | if not isinstance(key, str): 36 | raise TypeError(f"Expected a `str`, found `{type(key)}`.") 37 | 38 | singular = _engine.singular_noun(key) 39 | singular = singular if isinstance(singular, str) else key 40 | 41 | plural = _engine.plural_noun(singular) 42 | if not isinstance(plural, str): 43 | plural = key + "s" if not key.endswith("s") else key 44 | 45 | return tuple(sorted({singular, plural})) 46 | 47 | 48 | class SynonymizerMeta(EnumMeta, ABCMeta): # noqa: D101 49 | def __new__(cls, clsname, superclasses, attributedict): # noqa: D102 50 | validator = attributedict.get("__validator__", None) 51 | 52 | if validator is None: 53 | return super().__new__(cls, clsname, superclasses, attributedict) 54 | 55 | for k in list(validator): 56 | k = str(k.name) 57 | for i, synonym in enumerate(_get_synonyms(k.lower())): 58 | attributedict[f"{k}_{i}"] = synonym 59 | 60 | return super().__new__(cls, clsname, superclasses, attributedict) 61 | 62 | 63 | class QueryMeta(SynonymizerMeta, FormatterMeta): # noqa: D101 64 | pass 65 | 66 | 67 | class Query(ErrorFormatter, Enum, metaclass=QueryMeta): # noqa: D101 68 | @property 69 | def _query_name(self) -> str: 70 | """Convert the synonym to an actual query parameter name.""" 71 | return "_".join(self.name.split("_")[:-1]) 72 | 73 | @property 74 | def _delegate(self): 75 | """Delegate the validation.""" 76 | return getattr(self.__validator__, self._query_name) 77 | 78 | @property 79 | def param(self) -> str: 80 | """Get the parameter name as required by the server.""" 81 | return self._query_name.lower() 82 | 83 | @property 84 | def valid(self) -> Optional[FrozenSet[str]]: 85 | """Return the set of valid values for :attr:`param`.""" 86 | return self._delegate.valid 87 | 88 | @property 89 | def annotation(self) -> type: 90 | """Return type annotations for :attr:`param`.""" 91 | return self._delegate.annotation 92 | 93 | @property 94 | def doc(self) -> Optional[str]: 95 | """Return the docstring for :attr:`param`.""" 96 | return self._delegate.doc 97 | 98 | def __call__( 99 | self, value: Optional[Union[str, Sequence[str]]] 100 | ) -> Optional[Set[str]]: 101 | """%(validate)s""" # noqa: D401 102 | return self._delegate(value) 103 | 104 | 105 | class EnzsubQuery(Query): # noqa: D101 106 | __validator__ = EnzsubValidator 107 | 108 | 109 | class InteractionsQuery(Query): # noqa: D101 110 | __validator__ = InteractionsValidator 111 | 112 | 113 | class ComplexesQuery(Query): # noqa: D101 114 | __validator__ = ComplexesValidator 115 | 116 | 117 | class AnnotationsQuery(Query): # noqa: D101 118 | __validator__ = AnnotationsValidator 119 | 120 | 121 | class IntercellQuery(Query): # noqa: D101 122 | __validator__ = IntercellValidator 123 | 124 | 125 | class QueryType(Enum): # noqa: D101 126 | ENZSUB = EnzsubQuery 127 | INTERACTIONS = InteractionsQuery 128 | COMPLEXES = ComplexesQuery 129 | ANNOTATIONS = AnnotationsQuery 130 | INTERCELL = IntercellQuery 131 | 132 | def __call__( 133 | self, value: Optional[Union[str, Sequence[str]]] 134 | ) -> Optional[Set[str]]: 135 | """%(validate)s""" # noqa: D401 136 | return self.value(value) 137 | 138 | @property 139 | def endpoint(self) -> str: 140 | """Get the API endpoint for this type of query.""" 141 | return self.name.lower() 142 | 143 | 144 | __all__ = [ 145 | EnzsubQuery, 146 | InteractionsQuery, 147 | ComplexesQuery, 148 | AnnotationsQuery, 149 | IntercellQuery, 150 | ] 151 | -------------------------------------------------------------------------------- /omnipath/_core/utils/_orthology.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from omnipath._core.utils._homologene import download_homologene 7 | 8 | CPLEX_PREFIX = "COMPLEX:" 9 | 10 | 11 | # Replace list elements with dictionary values 12 | def _replace_subunits(lst, my_dict, one_to_many): 13 | result = [] 14 | for x in lst: 15 | if x in my_dict: 16 | value = my_dict[x] 17 | 18 | if not isinstance(value, list): 19 | value = [value] 20 | 21 | if len(value) > one_to_many: 22 | result.append(np.nan) 23 | else: 24 | result.append(value) 25 | else: 26 | result.append(np.nan) 27 | return result 28 | 29 | 30 | def _generate_orthologs(data, column, map_dict, one_to_many): 31 | df = data[[column]].drop_duplicates().set_index(column) 32 | data[column] = data[column].replace(CPLEX_PREFIX, "", regex=True) 33 | 34 | df["subunits"] = df.index.str.split("_") 35 | df["subunits"] = df["subunits"].apply( 36 | _replace_subunits, 37 | args=( 38 | map_dict, 39 | one_to_many, 40 | ), 41 | ) 42 | df = df["subunits"].explode().reset_index() 43 | 44 | grouped = ( 45 | df.groupby(column).filter(lambda x: x["subunits"].notna().all()).groupby(column) 46 | ) 47 | 48 | # Generate all possible subunit combinations within each group 49 | complexes = [] 50 | for name, group in grouped: 51 | if group["subunits"].isnull().all(): 52 | continue 53 | subunit_lists = [list(x) for x in group["subunits"]] 54 | complex_combinations = list(product(*subunit_lists)) 55 | for complex in complex_combinations: 56 | complexes.append((name, "_".join(complex))) 57 | 58 | # Create output DataFrame 59 | col_names = ["orthology_source", "orthology_target"] 60 | result = pd.DataFrame(complexes, columns=col_names).set_index("orthology_source") 61 | 62 | return result 63 | 64 | 65 | def translate_column( 66 | data, 67 | column, 68 | id_type, 69 | target_organism, 70 | replace=True, 71 | keep_untranslated=False, 72 | source_organism=9606, 73 | one_to_many=1, 74 | ): 75 | """ 76 | Generate orthologs for a given column in a DataFrame. 77 | 78 | Parameters 79 | ---------- 80 | data : pandas.DataFrame 81 | Input DataFrame. 82 | column : str 83 | Column name to translate. 84 | id_type : str 85 | Type of ID to use for homology conversion. Can be one of 'genesymbol', 'gene_id'. 86 | target_organism : int 87 | NCBI Taxonomy ID of the target organism. 88 | replace : bool, optional 89 | Whether to replace the original column with the translated values. Default is True. 90 | keep_untranslated : bool, optional 91 | Whether to keep the original column in the output DataFrame. Default is False. Ignored if `replace` is True. 92 | source_organism : int 93 | NCBI Taxonomy ID of the source organism. Default is 9606 (human). 94 | one_to_many : int, optional 95 | Maximum number of orthologs allowed per gene. Default is 1. 96 | 97 | Returns 98 | ------- 99 | Resulting DataFrame with translated column. 100 | 101 | """ 102 | if not isinstance(one_to_many, int): 103 | raise ValueError("`one_to_many` should be a positive integer!") 104 | 105 | id_types = ["genesymbol", "gene_id"] 106 | if id_type not in id_types: 107 | raise ValueError(f"`id_type` should be one of: {id_types}") 108 | 109 | # get orthologs 110 | source_organism, target_organism = str(source_organism), str(target_organism) 111 | map_df = download_homologene(source_organism, target_organism, id_type).set_index( 112 | "source" 113 | ) 114 | map_dict = map_df.groupby(level=0)["target"].apply(list).to_dict() 115 | map_data = _generate_orthologs(data, column, map_dict, one_to_many) 116 | 117 | # join orthologs 118 | data = ( 119 | data.set_index(column) 120 | .merge(map_data, left_index=True, right_index=True, how="left") 121 | .reset_index(names=column) 122 | ) 123 | 124 | # replace orthologs 125 | if replace: 126 | data[column] = data["orthology_target"] 127 | data = data.drop(columns=["orthology_target"]) 128 | 129 | elif keep_untranslated: 130 | data[column] = data.apply( 131 | lambda x: ( 132 | x["orthology_target"] 133 | if not pd.isnull(x["orthology_target"]) 134 | else x[column] 135 | ), 136 | axis=1, 137 | ) 138 | 139 | data = data.dropna(subset=[column]) 140 | return data 141 | -------------------------------------------------------------------------------- /omnipath/_misc/dtypes.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Iterable 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | __all__ = ["auto_dtype"] 7 | 8 | TRUE = frozenset(("true", "t", "yes", "y")) 9 | FALSE = frozenset(("false", "f", "no", "n")) 10 | BOOL = frozenset().union(TRUE, FALSE) 11 | NA = frozenset(("na", "NA", "NaN", "none", "None", None, pd.NA, pd.NaT, np.nan)) 12 | INT = frozenset( 13 | ("int64", "uint64", "int32", "uint32", "int16", "uint16", "int8", "uint8") 14 | ) 15 | FLT = frozenset(("float64", "float32", "float128")) 16 | NUM = INT | FLT 17 | ALL = ("int64", "uint64", "float64", "string") 18 | 19 | 20 | def auto_dtype( 21 | data: Union[pd.DataFrame, pd.Series, Iterable], 22 | categories: bool = True, 23 | **kwargs, 24 | ) -> Union[pd.DataFrame, pd.Series]: 25 | """ 26 | Convert to the best dtype 27 | 28 | Guess automatically and convert data types of a dataframe, series or other 29 | iterable. 30 | 31 | Parameters 32 | ---------- 33 | data 34 | A dataframe or an array like object such as :class:`pandas.Series`, 35 | :class:`numpy.ndarray` or list. 36 | categories 37 | Use the `category` data type for string variables with a small 38 | number of values compared to their size. 39 | kwargs 40 | For dataframes, manually set the desired data type of certain 41 | variables. 42 | 43 | Returns 44 | ------- 45 | :class:`pandas.DataFrame` or :class:`pandas.Series` or str or list 46 | A dataframe or series with its data type(s) converted. 47 | """ 48 | method = _auto_dtype_df if isinstance(data, pd.DataFrame) else _auto_dtype_series 49 | 50 | return method(data, categories=categories, **kwargs) 51 | 52 | 53 | def _auto_dtype_df( 54 | data: pd.DataFrame, 55 | categories: bool = True, 56 | **kwargs, 57 | ) -> pd.DataFrame: 58 | def process_col(col): 59 | if col in kwargs: 60 | return data[col].astype(kwargs[col]) 61 | 62 | else: 63 | return _auto_dtype_series( 64 | data[col], 65 | categories=categories, 66 | ) 67 | 68 | result = {col: process_col(col) for col in data} 69 | 70 | return pd.DataFrame(result, index=data.index) 71 | 72 | 73 | def _auto_dtype_series( 74 | data: pd.Series, 75 | categories: bool = True, 76 | **kwargs, 77 | ) -> pd.Series: 78 | data = pd.Series(data) 79 | 80 | for t in ALL: 81 | if (str(data.dtype) in INT and t in FLT) or ( 82 | t == "string" and str(data.dtype) != "object" 83 | ): 84 | continue 85 | 86 | try: 87 | converted = data.astype(t) 88 | 89 | if t in FLT: 90 | if str(data.dtype) in FLT: 91 | continue 92 | 93 | elif str(data.dtype) not in FLT: 94 | return _auto_dtype_series(converted) 95 | 96 | if t in INT: 97 | if _has_na(converted) or ( 98 | str(data.dtype) in FLT and (data != converted).any() 99 | ): 100 | continue 101 | 102 | elif sorted(converted.unique()) == [0, 1]: 103 | t = "bool" 104 | converted = converted.astype(t) 105 | 106 | elif str(data.dtype) in INT: 107 | continue 108 | 109 | elif t == "string": 110 | if not _has_na(converted) and _string_is_bool(converted): 111 | t = "bool" 112 | converted = _string_to_bool(converted) 113 | 114 | elif converted.nunique() < len(converted) / 4: 115 | t = "category" 116 | converted = converted.astype(t) 117 | 118 | return converted 119 | 120 | except (OverflowError, ValueError): 121 | continue 122 | 123 | return data 124 | 125 | 126 | def _has_na(data: Union[pd.Series, Iterable]) -> bool: 127 | """Chec if any item in the series looks like NA or NaN.""" 128 | return pd.Series(data).isin(NA).any() 129 | 130 | 131 | def _string_is_bool(data: Union[pd.Series, Iterable]) -> bool: 132 | """ 133 | Contains only bool-like values 134 | 135 | Tell if a string or object type series contains only values that we 136 | recognize as boolean values. 137 | """ 138 | return pd.Series(s.lower() for s in data).isin(BOOL).all() 139 | 140 | 141 | def _string_to_bool(data: Union[pd.Series, Iterable]) -> pd.Series: 142 | """ 143 | Convert to bool if possible 144 | 145 | Convert a series or iterable to bool type if all elements can be 146 | recognized as a boolean value. 147 | """ 148 | if _string_is_bool(data): 149 | return pd.Series(i.lower() in TRUE for i in data) 150 | 151 | return pd.Series(data) 152 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [tool.hatch.build.targets.wheel] 6 | packages = ["omnipath"] 7 | 8 | [project] 9 | name = "omnipath" 10 | version = "1.0.11" 11 | description = "Python client for the OmniPath web service" 12 | license = "MIT" 13 | authors = [ 14 | { name = "Michal Klein", email = "michalk@apple.com" }, 15 | { name = "Dénes Türei", email = "turei.denes@gmail.com" }, 16 | ] 17 | maintainers = [ 18 | { name = "Dénes Türei", email = "turei.denes@gmail.com" }, 19 | ] 20 | readme = "README.rst" 21 | classifiers = [ 22 | "Development Status :: 5 - Production/Stable", 23 | "Intended Audience :: Developers", 24 | "Intended Audience :: Science/Research", 25 | "License :: OSI Approved :: MIT License", 26 | "Operating System :: OS Independent", 27 | "Natural Language :: English", 28 | "Typing :: Typed", 29 | "Programming Language :: Python :: 3", 30 | "Programming Language :: Python :: 3.9", 31 | "Programming Language :: Python :: 3.10", 32 | "Programming Language :: Python :: 3.11", 33 | "Programming Language :: Python :: 3.12", 34 | "Programming Language :: Python :: 3.13", 35 | "Topic :: Scientific/Engineering :: Bio-Informatics", 36 | ] 37 | keywords = [ 38 | "protein", 39 | "mRNA", 40 | "miRNA", 41 | "DNA", 42 | "signaling", 43 | "SignaLink", 44 | "SIGNOR", 45 | "InnateDB", 46 | "IntAct", 47 | "Reactome", 48 | "MPPI", 49 | "NCI-PID", 50 | "DIP", 51 | "MatrixDB", 52 | "PANTHER", 53 | "PhosphoSite", 54 | "PhosphoPoint", 55 | "DEPOD", 56 | "SPIKE", 57 | "KEGG", 58 | "Autophagy", 59 | "ARN", 60 | "NRF2ome", 61 | "Guide to Pharmacology", 62 | "UniProt", 63 | "BioPAX", 64 | "Ensembl", 65 | "Surfaceome", 66 | "Exocarta", 67 | "Vesiclepedia", 68 | "Matrisome", 69 | "Human Protein Atlas", 70 | "Compleat", 71 | "CORUM", 72 | "ComplexPortal", 73 | "BioGRID", 74 | "STRING", 75 | "ICELLNET", 76 | "Cell Surface Protein Atlas", 77 | "COSMIC", 78 | "Cancer Gene Census", 79 | "IntOGen", 80 | "TopDB", 81 | "iTALK", 82 | "Human Plasma Membrane Receptome", 83 | "EMBRACE", 84 | "ELM", 85 | "phospho.ELM", 86 | "CancerSEA", 87 | "ComPPI", 88 | "CellPhoneDB", 89 | "DGIdb", 90 | "DisGeNet", 91 | "PAZAR", 92 | "ORegAnno", 93 | "TRED", 94 | "DoRothEA", 95 | "TRRD", 96 | "CPAD", 97 | "regulation", 98 | "phosphorylation", 99 | "kinase", 100 | "phosphatase", 101 | "dephosphorylation", 102 | "directed graph", 103 | "annotations", 104 | "cancer", 105 | "complexes", 106 | "intercellular communication", 107 | "HGNC", 108 | "GPCRdb", 109 | "MSigDB", 110 | "GSEA", 111 | "Phobius", 112 | "Phosphatome", 113 | "NetPath", 114 | "gene", 115 | "gene symbol", 116 | "mouse", 117 | "rat", 118 | "HomoloGene", 119 | "integrin", 120 | "adhesion", 121 | "receptor", 122 | "ligand", 123 | "transporter", 124 | "ion channel", 125 | "disease", 126 | "activity flow", 127 | "transcription", 128 | "PPI", 129 | "subcellular localization", 130 | "pathway", 131 | "signaling pathway", 132 | ] 133 | requires-python = ">=3.9" 134 | dependencies = [ 135 | "attrs>=20.2.0", 136 | "docrep>=0.3.1", 137 | "inflect>=4.1.0", 138 | "packaging>=24.2", 139 | "pandas>=1.2.0", 140 | "requests>=2.24.0", 141 | "tqdm>=4.51.0", 142 | "typing-extensions>=3.7.4.3", 143 | "urllib3>=1.26.0", 144 | "wrapt>=1.12.0", 145 | ] 146 | 147 | [project.optional-dependencies] 148 | docs = [ 149 | "attrs>=20.2.0", 150 | "docrep>=0.3.1", 151 | "inflect>=4.1.0", 152 | "packaging>=24.2", 153 | "pandas>=1.2.0", 154 | "requests>=2.24.0", 155 | "sphinx>=4", 156 | "sphinx-autodoc-annotation>=1.0.post1", 157 | "sphinx-autodoc-typehints>=1.10.3", 158 | "sphinx-copybutton>=0.5.2", 159 | "sphinx-last-updated-by-git>=0.3.8", 160 | "sphinx-paramlinks>=0.6.0", 161 | "sphinx-rtd-theme>=3.0.2", 162 | "sphinx-toolbox>=3.9.0", 163 | "tqdm>=4.51.0", 164 | "typing-extensions>=3.7.4.3", 165 | "urllib3>=1.26.0", 166 | "wrapt>=1.12.0", 167 | ] 168 | tests = [ 169 | "tox>=3.20.1", 170 | ] 171 | graph = [ 172 | "networkx>=2.3.0", 173 | ] 174 | 175 | [dependency-groups] 176 | dev = [ 177 | "bump2version>=1.0.1", 178 | "codecov-cli>=10.2.0", 179 | "pre-commit>=2.7.1", 180 | "tox-gh>=1.5.0", 181 | ] 182 | 183 | [project.urls] 184 | Homepage = "https://omnipathdb.org/" 185 | Documentation = "https://omnipath.readthedocs.io/" 186 | Repository = "https://github.com/saezlab/omnipath" 187 | Issues = "https://github.com/saezlab/omnipath/issues" 188 | 189 | [tool.isort] 190 | from_first = true 191 | line_length = 88 192 | multi_line_output = 3 193 | include_trailing_comma = true 194 | use_parentheses = true 195 | known_num="numpy,pandas" 196 | sections = "FUTURE,STDLIB,THIRDPARTY,NUM,FIRSTPARTY,LOCALFOLDER" 197 | no_lines_before="LOCALFOLDER" 198 | balanced_wrapping = true 199 | force_grid_wrap = 0 200 | length_sort = "1" 201 | indent = " " 202 | skip_glob = "docs/source/conf.py" 203 | -------------------------------------------------------------------------------- /tests/test_query.py: -------------------------------------------------------------------------------- 1 | from typing import _GenericAlias 2 | from collections import defaultdict 3 | 4 | import pytest 5 | 6 | from omnipath._core.query._query import ( 7 | Query, 8 | QueryType, 9 | EnzsubQuery, 10 | ComplexesQuery, 11 | IntercellQuery, 12 | AnnotationsQuery, 13 | InteractionsQuery, 14 | _get_synonyms, 15 | ) 16 | from omnipath._core.query._query_validator import ( 17 | EnzsubValidator, 18 | ComplexesValidator, 19 | IntercellValidator, 20 | AnnotationsValidator, 21 | InteractionsValidator, 22 | _to_string_set, 23 | ) 24 | 25 | 26 | class TestUtils: 27 | def test_get_synonyms_wrong_type(self): 28 | with pytest.raises(TypeError): 29 | _get_synonyms(42) 30 | 31 | def test_get_synonyms_from_s2p(self): 32 | res = _get_synonyms("cat") 33 | 34 | assert len(res) == 2 35 | assert res == ("cat", "cats") 36 | 37 | def test_get_synonyms_from_p2s(self): 38 | res = _get_synonyms("dogs") 39 | 40 | assert len(res) == 2 41 | assert res == ("dog", "dogs") 42 | 43 | def test_to_string_set_string(self): 44 | assert {"foo"} == _to_string_set("foo") 45 | 46 | def test_to_string_set_int(self): 47 | assert {"42"} == _to_string_set(42) 48 | 49 | def test_to_string_set_sequence(self): 50 | assert {"foo", "42"} == _to_string_set(["foo", 42]) 51 | 52 | 53 | class TestValidator: 54 | @pytest.mark.parametrize( 55 | "validator", 56 | [ 57 | EnzsubValidator, 58 | InteractionsValidator, 59 | ComplexesValidator, 60 | AnnotationsValidator, 61 | IntercellValidator, 62 | ], 63 | ) 64 | def test_validator_no_server_access(self, validator): 65 | for value in list(validator): 66 | v = validator(value) 67 | 68 | assert v.valid is None 69 | assert v.doc is None 70 | 71 | assert v(None) is None 72 | assert v("foo") == {"foo"} 73 | assert v(42) == {"42"} 74 | assert v(True) == {"1"} 75 | assert v(False) == {"0"} 76 | assert v(["foo", "foo"]) == {"foo"} 77 | assert v(["foo", 42]) == {"foo", "42"} 78 | assert v({"foo", "bar", "baz"}) == {"foo", "bar", "baz"} 79 | 80 | assert issubclass(type(v.annotation), (_GenericAlias, type)) 81 | 82 | 83 | class TestQuery: 84 | @pytest.mark.parametrize( 85 | "query,validator", 86 | zip( 87 | [ 88 | EnzsubQuery, 89 | InteractionsQuery, 90 | ComplexesQuery, 91 | AnnotationsQuery, 92 | IntercellQuery, 93 | ], 94 | [ 95 | EnzsubValidator, 96 | InteractionsValidator, 97 | ComplexesValidator, 98 | AnnotationsValidator, 99 | IntercellValidator, 100 | ], 101 | ), 102 | ) 103 | def test_query_correct_validator(self, query, validator): 104 | assert query.__validator__ == validator 105 | 106 | def test_query_endpoint(self): 107 | for q in list(QueryType): 108 | q = QueryType(q) 109 | 110 | assert issubclass(q.value, Query) 111 | assert q.endpoint == q.name.lower() 112 | 113 | @pytest.mark.parametrize( 114 | "query,validator", 115 | zip( 116 | [ 117 | EnzsubQuery, 118 | InteractionsQuery, 119 | ComplexesQuery, 120 | AnnotationsQuery, 121 | IntercellQuery, 122 | ], 123 | [ 124 | EnzsubValidator, 125 | InteractionsValidator, 126 | ComplexesValidator, 127 | AnnotationsValidator, 128 | IntercellValidator, 129 | ], 130 | ), 131 | ) 132 | def test_query_delegation(self, query, validator, mocker): 133 | call_spy = mocker.spy(validator, "__call__") 134 | 135 | qdb = query("databases") 136 | _ = qdb("foo") 137 | 138 | call_spy.assert_called_once_with( 139 | getattr(qdb.__validator__, qdb._query_name), "foo" 140 | ) 141 | assert call_spy.spy_return == {"foo"} 142 | assert qdb.doc is None 143 | 144 | for attr in ("valid", "annotation", "doc"): 145 | m = mocker.patch.object( 146 | validator, attr, new_callable=mocker.PropertyMock, return_value="foo" 147 | ) 148 | assert getattr(qdb, attr) == "foo" 149 | 150 | m.assert_called_once() 151 | 152 | @pytest.mark.parametrize( 153 | "query", 154 | [ 155 | EnzsubQuery, 156 | InteractionsQuery, 157 | ComplexesQuery, 158 | AnnotationsQuery, 159 | IntercellQuery, 160 | ], 161 | ) 162 | def test_query_synonym(self, query): 163 | mapper = defaultdict(list) 164 | for v in list(query): 165 | name = "_".join(v.name.split("_")[:-1]) 166 | mapper[name].append(v.value) 167 | 168 | for vs in mapper.values(): 169 | assert len(vs) == 2 170 | assert len({query(v).param for v in vs}) 171 | -------------------------------------------------------------------------------- /omnipath/_core/cache/_cache.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from copy import copy 3 | from shutil import rmtree 4 | from typing import Any, Union, Optional 5 | from pathlib import Path 6 | import os 7 | import pickle 8 | 9 | import pandas as pd 10 | 11 | 12 | def _is_empty(data: Optional[pd.DataFrame]) -> bool: 13 | return data is None or (isinstance(data, pd.DataFrame) and not len(data)) 14 | 15 | 16 | class Cache(ABC): 17 | """ 18 | Abstract class which defines the caching interface. 19 | 20 | Empty values (`None` or an empty :class:`pandas.DataFrame`) will not be saved in the cache. 21 | """ 22 | 23 | @abstractmethod 24 | def __getitem__(self, key: str) -> Optional[Any]: 25 | pass 26 | 27 | @abstractmethod 28 | def __setitem__(self, key: str, value: Any) -> None: 29 | pass 30 | 31 | @abstractmethod 32 | def __len__(self) -> int: 33 | pass 34 | 35 | @abstractmethod 36 | def clear(self) -> None: # noqa: D102 37 | pass 38 | 39 | @property 40 | @abstractmethod 41 | def path(self) -> Optional[Union[str, Path]]: # noqa: D102 42 | pass 43 | 44 | @abstractmethod 45 | def __str__(self) -> str: 46 | pass 47 | 48 | def __repr__(self) -> str: 49 | return str(self) 50 | 51 | 52 | class FileCache(Cache): 53 | """ 54 | Cache which persists the data into :mod:`pickle` files. 55 | 56 | Parameters 57 | ---------- 58 | path 59 | Path to a directory where the files will be stored. 60 | """ 61 | 62 | _suffix = ".pickle" 63 | 64 | def __init__(self, path: Union[str, Path]): 65 | if not isinstance(path, (str, Path)): 66 | raise TypeError( 67 | f"Expected `path` to be either `str` or `pathlib.Path`, " 68 | f"found `{type(path).__name__}`." 69 | ) 70 | if not str(path): 71 | raise ValueError("Empty cache path.") 72 | 73 | self._cache_dir = Path(path) 74 | 75 | def __contains__(self, key: str) -> bool: 76 | if not key.endswith(self._suffix): 77 | key += self._suffix 78 | 79 | return (self._cache_dir / key).is_file() 80 | 81 | def __setitem__(self, key: str, value: Any) -> None: 82 | if _is_empty(value): 83 | return 84 | self._cache_dir.mkdir(parents=True, exist_ok=True) 85 | 86 | fname = str(key) 87 | if not fname.endswith(self._suffix): 88 | fname += self._suffix 89 | 90 | with open(self._cache_dir / fname, "wb") as fout: 91 | pickle.dump(value, fout) 92 | 93 | def __getitem__(self, key: str) -> Any: 94 | if not key.endswith(self._suffix): 95 | key += self._suffix 96 | 97 | if not (self._cache_dir / key).is_file(): 98 | raise KeyError(self._cache_dir / key) 99 | 100 | with open(self._cache_dir / key, "rb") as fin: 101 | return pickle.load(fin) 102 | 103 | def __len__(self) -> int: 104 | return ( 105 | len([f for f in os.listdir(self.path) if str(f).endswith(self._suffix)]) 106 | if self.path.is_dir() 107 | else 0 108 | ) 109 | 110 | @property 111 | def path(self) -> Path: 112 | """Return the directory where the cache files are stored.""" 113 | return self._cache_dir 114 | 115 | def clear(self) -> None: 116 | """Remove all files and the directory under :attr:`path`.""" 117 | if self._cache_dir.is_dir(): 118 | rmtree(self._cache_dir) 119 | 120 | def __str__(self) -> str: 121 | return f"<{self.__class__.__name__}[size={len(self)}, path={str(self.path)!r}]>" 122 | 123 | 124 | class MemoryCache(dict, Cache): 125 | """ 126 | Cache which persists the data into the memory. 127 | 128 | Objects stored in the cache are copied using :func:`copy.copy``. 129 | """ 130 | 131 | @property 132 | def path(self) -> Optional[str]: 133 | """Return `'memory'`.""" 134 | return "memory" 135 | 136 | def __setitem__(self, key: str, value: Any) -> None: 137 | if _is_empty(value): 138 | return 139 | # the value is usually a dataframe (copy for safety) 140 | return super().__setitem__(key, copy(value)) 141 | 142 | def __getitem__(self, key: str) -> Any: 143 | return copy(super().__getitem__(key)) 144 | 145 | def __str__(self) -> str: 146 | return f"<{self.__class__.__name__}[size={len(self)}]>" 147 | 148 | def __repr__(self) -> str: 149 | return str(self) 150 | 151 | def __copy__(self) -> "MemoryCache": 152 | return self 153 | 154 | def copy(self) -> "MemoryCache": 155 | """Return self.""" 156 | return self 157 | 158 | 159 | class NoopCache(MemoryCache): 160 | """Cache which doesn't save anything.""" 161 | 162 | @property 163 | def path(self) -> Optional[str]: 164 | """Return `None`.""" 165 | return None 166 | 167 | def __setitem__(self, key: str, value: Any) -> None: 168 | pass 169 | 170 | def __str__(self): 171 | return f"<{self.__class__.__name__}>" 172 | 173 | 174 | def clear_cache() -> None: 175 | """Remove all cached data from :attr:`omnipath.options.cache`.""" 176 | from omnipath import options 177 | 178 | options.cache.clear() 179 | 180 | 181 | __all__ = [clear_cache] 182 | -------------------------------------------------------------------------------- /tests/test_options.py: -------------------------------------------------------------------------------- 1 | from os import remove 2 | from typing import Optional 3 | from pathlib import Path 4 | from configparser import NoSectionError 5 | 6 | import pytest 7 | 8 | from omnipath.constants import License 9 | from omnipath._core.utils._options import Options 10 | from omnipath.constants._pkg_constants import DEFAULT_OPTIONS 11 | 12 | 13 | class TestOptions: 14 | def test_invalid_url_type(self, options: Options): 15 | with pytest.raises(TypeError): 16 | options.url = 42 17 | 18 | def test_invalid_url(self, options: Options): 19 | with pytest.raises(ValueError): 20 | options.url = "foo" 21 | 22 | def test_invalid_license(self, options: Options): 23 | with pytest.raises(ValueError): 24 | options.license = "foo" 25 | 26 | def test_invalid_cache_type(self, options: Options): 27 | with pytest.raises(TypeError): 28 | options.cache = 42 29 | 30 | def test_invalid_password_type(self, options: Options): 31 | with pytest.raises(TypeError): 32 | options.password = 42 33 | 34 | def test_invalid_num_retries(self, options: Options): 35 | with pytest.raises(ValueError): 36 | options.num_retries = -1 37 | 38 | def test_invalid_timeout(self, options: Options): 39 | with pytest.raises(ValueError): 40 | options.timeout = 0 41 | 42 | def test_invalid_chunk_size(self, options: Options): 43 | with pytest.raises(ValueError): 44 | options.chunk_size = 0 45 | 46 | def test_from_options_invalid_type(self): 47 | with pytest.raises(TypeError): 48 | Options.from_options("foo") 49 | 50 | def test_url_localhost(self, options: Options): 51 | options.url = "https://localhost" 52 | 53 | assert options.url == "https://localhost" 54 | 55 | @pytest.mark.parametrize("license", list(License)) 56 | def test_valid_license(self, options: Options, license: License): 57 | options.license = license.value 58 | 59 | assert isinstance(options.license, License) 60 | assert options.license == license 61 | 62 | @pytest.mark.parametrize("pwd", ["foo", None]) 63 | def test_password(self, options: Options, pwd: Optional[str]): 64 | options.password = pwd 65 | 66 | assert options.password == pwd 67 | 68 | def test_from_options(self, options: Options): 69 | new_opt = Options.from_options(options) 70 | 71 | for k, v in options.__dict__.items(): 72 | assert getattr(new_opt, k) == v 73 | 74 | def test_from_options_new_values(self, options: Options): 75 | new_opt = Options.from_options( 76 | options, autoload=not options.autoload, num_retries=0 77 | ) 78 | 79 | for k, v in options.__dict__.items(): 80 | if k not in ("autoload", "num_retries"): 81 | assert getattr(new_opt, k) == v 82 | 83 | assert new_opt.autoload != options.autoload 84 | assert new_opt.num_retries == 0 85 | 86 | def test_from_config_no_file(self, config_backup): 87 | if Path(Options.config_path).exists(): 88 | remove(Options.config_path) 89 | 90 | new_opt = Options.from_config() 91 | 92 | for k, v in DEFAULT_OPTIONS.__dict__.items(): 93 | if hasattr(new_opt, k) and not k.startswith("_"): 94 | assert getattr(new_opt, k) == v 95 | 96 | def test_from_config_section_is_not_url(self): 97 | with pytest.raises(NoSectionError, match=r"No section: 'http://foo.bar'"): 98 | Options.from_config("http://foo.bar") 99 | 100 | def test_write_config(self, options: Options, config_backup): 101 | options.timeout = 1337 102 | options.license = License.COMMERCIAL 103 | options.password = "foobarbaz" 104 | options.fallback_urls = DEFAULT_OPTIONS.fallback_urls 105 | options.write() 106 | 107 | new_opt = Options.from_config() 108 | for k, v in options.__dict__.items(): 109 | if k == "cache": 110 | assert type(new_opt.cache) == type(options.cache) # noqa: E721 111 | elif k == "password": 112 | # don't store the password in the file 113 | assert getattr(new_opt, k) is None 114 | elif k not in ("timeout", "license"): 115 | assert getattr(new_opt, k) == v 116 | 117 | assert new_opt.timeout == 1337 118 | assert new_opt.license == License.COMMERCIAL 119 | 120 | def test_write_new_section(self, options: Options, config_backup): 121 | options.timeout = 42 122 | options.fallback_urls = DEFAULT_OPTIONS.fallback_urls 123 | options.write("https://foo.bar") 124 | 125 | new_opt = Options.from_config("https://foo.bar") 126 | assert options is not new_opt 127 | for k, v in options.__dict__.items(): 128 | if k == "url": 129 | assert v == options.url 130 | assert new_opt.url == "https://foo.bar" 131 | elif k == "cache": 132 | assert type(new_opt.cache) == type(options.cache) # noqa: E721 133 | else: 134 | assert getattr(new_opt, k) == v 135 | 136 | def test_write_new_section_not_url(self, options: Options, config_backup): 137 | with pytest.raises(ValueError, match=r"Invalid URL: `foobar`."): 138 | options.write("foobar") 139 | 140 | def test_contextmanager(self, options: Options): 141 | with options as new_opt: 142 | assert options is not new_opt 143 | for k, v in options.__dict__.items(): 144 | if k == "cache": 145 | assert type(new_opt.cache) == type(options.cache) # noqa: E721 146 | else: 147 | assert getattr(new_opt, k) == v 148 | -------------------------------------------------------------------------------- /omnipath/_core/utils/_static.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Literal, Optional 2 | from functools import partial 3 | import re 4 | import logging 5 | import warnings 6 | 7 | import requests 8 | 9 | import pandas as pd 10 | 11 | from omnipath._core.utils import _options as opt 12 | from omnipath._core.downloader._downloader import Downloader 13 | 14 | 15 | def static_tables() -> pd.DataFrame: 16 | """ 17 | List the static tables available from OmniPath 18 | 19 | Returns 20 | ------- 21 | A data frame with metadata about the static tables. 22 | """ 23 | refile = re.compile( 24 | r'([^<]+)' 25 | r"\s+(\d{2}-\w+-\d{4}) (\d{2}:\d{2})" 26 | r"\s+(\d+)[\r\n]*" 27 | ) 28 | 29 | req = requests.get(opt.options.static_url, stream=True) 30 | 31 | result = pd.DataFrame( 32 | [ 33 | refile.match(line.decode("utf-8")).groups() 34 | for line in req.raw.readlines()[5:-2] 35 | ], 36 | columns=["name", "date", "time", "size"], 37 | ) 38 | 39 | result["url"] = [f"{opt.options.static_url}/{name}" for name in result.name] 40 | 41 | result = pd.concat( 42 | [ 43 | result, 44 | result.name.str.extract( 45 | r"(?P[\w]+)_" 46 | r"(?P\w+)_" 47 | r"(?P\d+)\.tsv\.gz", 48 | expand=True, 49 | ), 50 | ], 51 | axis=1, 52 | ) 53 | 54 | return result 55 | 56 | 57 | def static_table( 58 | query: Literal["annotations", "interactions"], 59 | resource: str, 60 | organism: Union[int, str], 61 | strict_evidences: bool = True, 62 | dorothea_levels: Optional[List[Literal["A", "B", "C", "D"]]] = None, 63 | wide: bool = True, 64 | ) -> pd.DataFrame: 65 | """ 66 | Download a static table from OmniPath. 67 | 68 | A few resources and datasets are available also as plain TSV files and 69 | can be accessed without TLS. The purpose of these tables is to make the 70 | most often used OmniPath data available on computers with configuration 71 | issues. These tables are not the recommended way to access OmniPath 72 | data, and a warning is issued each time they are accessed. 73 | 74 | Parameters 75 | ---------- 76 | query 77 | A query type such as "annotations" or "interactions". 78 | resource 79 | Name of the resource or dataset, such as 80 | "CollecTRI" or "PROGENy". 81 | organism 82 | NCBI Taxonomy of the organism: 9606 for human, 83 | 10090 for mouse and 10116 for rat. 84 | strict_evidences 85 | Restrict the evidences to the queried 86 | datasets and resources. If set to False, the directions and effect signs 87 | and references might be based on other datasets and resources. 88 | wide 89 | Convert the annotation table to wide format, which 90 | corresponds more or less to the original resource. If the data comes 91 | from more than one resource a list of wide tables will be returned. 92 | See examples at ``pivot_annotations``. 93 | dorothea_levels 94 | A list of confidence levels in case the accessed resource is DoRothEA. 95 | In dorothea, every TF-target interaction has a confidence score 96 | ranging from A to E, being A the most reliable interactions. 97 | By default here we take A, B and C level interactions 98 | (``["A", "B", "C"]``). 99 | It is to note that E interactions are not available in OmniPath. 100 | 101 | Returns 102 | ------- 103 | A data frame with the requested resource. 104 | """ 105 | msg = ( 106 | f"Accessing `{resource}` as a static table. This is not the " 107 | "recommended way to access OmniPath data; it is only a backup " 108 | "plan for situations when our server or your computer is " 109 | "experiencing issues." 110 | ) 111 | logging.warning(msg) 112 | warnings.warn(msg) # noqa: B028 113 | 114 | organism = str(organism) 115 | query_l = query.lower() 116 | resource_l = resource.lower() 117 | resources = () if resource_l in ("collectri", "dorothea") else (resource,) 118 | datasets = () if resources else (resource_l,) 119 | 120 | if query_l == "annotations": 121 | from omnipath._core.requests._annotations import Annotations as req_cls 122 | 123 | elif query_l == "interactions": 124 | from omnipath._core.requests.interactions._interactions import ( 125 | AllInteractions as req_cls, 126 | ) 127 | from omnipath._core.requests.interactions._interactions import ( 128 | InteractionDataset, 129 | ) 130 | 131 | s = static_tables() 132 | 133 | s = s[ 134 | (s["query"] == query_l) 135 | & (s.resource.str.lower() == resource_l) 136 | & (s.organism == organism) 137 | ].reset_index() 138 | 139 | if s.shape[0] == 0: 140 | msg = ( 141 | f"No static table is available for query `{query}`, resource " 142 | f"`{resource}` and organism `{organism}`. For a list of the " 143 | "available tables see `static_tables()`." 144 | ) 145 | logging.error(msg) 146 | raise ValueError(msg) 147 | 148 | url = s.url[0] 149 | logging.debug(f"Downloading static table from `{url}`.") 150 | downloader = Downloader() 151 | callback = partial( 152 | pd.read_csv, 153 | sep="\t", 154 | header=0, 155 | low_memory=False, 156 | compression="gzip", 157 | ) 158 | result = downloader.maybe_download(url, callback=callback, is_final=True) 159 | logging.debug(f"Ready downloading static table from `{url}`.") 160 | omnipath_req = req_cls() 161 | omnipath_req._last_param = { 162 | "original": {"strict_evidences": strict_evidences}, 163 | "final": {"resources": resources, "datasets": datasets}, 164 | } 165 | omnipath_req._wide = wide 166 | omnipath_req._datasets = {InteractionDataset(d) for d in datasets} 167 | logging.debug("Static table: converting dtypes.") 168 | result = omnipath_req._convert_dtypes(result) 169 | logging.debug("Static table: post-pocessing.") 170 | result = omnipath_req._post_process(result) 171 | 172 | if resource_l == "dorothea": 173 | logging.debug("Static table: filtering for DoRothEA confidence levels.") 174 | dorothea_levels = set(dorothea_levels) 175 | result = result[result.dorothea_level.isin(dorothea_levels)] 176 | 177 | return result 178 | -------------------------------------------------------------------------------- /omnipath/_core/requests/_utils.py: -------------------------------------------------------------------------------- 1 | from types import MethodType 2 | from typing import * # noqa: F401 F403 (because of the argspec factory) 3 | from typing import Any, Dict, Union, Callable, Iterable, Optional 4 | from inspect import Parameter, isabstract 5 | import inspect 6 | 7 | import wrapt 8 | import typing_extensions # noqa: F401 9 | 10 | import pandas as pd 11 | 12 | from omnipath._core.utils._docs import d 13 | 14 | 15 | @d.get_full_description(base="get") 16 | @d.get_sections(base="get", sections=["Parameters", "Returns"]) 17 | def _get_helper(cls: type, **kwargs) -> pd.DataFrame: 18 | """ 19 | Perform a request to the [OmniPath]_ web service. 20 | 21 | Parameters 22 | ---------- 23 | kwargs 24 | Additional query parameters. 25 | 26 | Returns 27 | ------- 28 | :class:`pandas.DataFrame` 29 | The result which depends the type of the request and the supplied parameters. 30 | """ 31 | return cls()._get(**kwargs) 32 | 33 | 34 | def _inject_api_method( 35 | clazz: type, 36 | ) -> None: 37 | """ 38 | Create a decorator which does nothing except for modifying the function signature in the docstring. 39 | 40 | The function to be decorated must be a class method and is allowed only to have positional arguments, 41 | and variable keyword arguments (**kwargs). 42 | 43 | The resulting decorated function will containing only the positional arguments (including original type annotations) 44 | and possibly keyword only arguments. In this example signature might def fn(foo, bar, *, baz, quux), 45 | `baz` and `quux` are the keyword only arguments. 46 | 47 | Parameters 48 | ---------- 49 | clazz 50 | The class for which to create the query. Must not be abstract. 51 | 52 | Returns 53 | ------- 54 | :class:`callable` 55 | The decorator as described above. 56 | """ 57 | 58 | def argspec_factory(orig_fn: Callable) -> Callable: 59 | orig_fn = getattr(orig_fn, "__func__", orig_fn) 60 | orig_params = inspect.signature(orig_fn).parameters 61 | # maintain the original signature if the subclass has overriden the method 62 | # this will lose the docstring of the original function 63 | parameters = { 64 | k: v 65 | for k, v in orig_params.items() 66 | if k != "cls" 67 | and v.kind in (Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD) 68 | } 69 | annotations = { 70 | k: v for k, v in clazz._annotations().items() if k not in parameters 71 | } 72 | 73 | for c in clazz.__mro__: 74 | if c.__name__ == "InteractionRequest": 75 | parameters["strict_evidences"] = Parameter( 76 | "strict_evidences", 77 | kind=Parameter.KEYWORD_ONLY, 78 | default=None, 79 | annotation=Optional[bool], 80 | ) 81 | 82 | sig = inspect.signature(lambda _: _) 83 | sig = sig.replace( 84 | parameters=[Parameter("cls", kind=Parameter.POSITIONAL_ONLY)] 85 | + list(parameters.values()) 86 | + [ 87 | Parameter(k, kind=Parameter.KEYWORD_ONLY, annotation=a) 88 | for k, a in sorted(annotations.items()) 89 | ] 90 | + [Parameter("kwargs", kind=Parameter.VAR_KEYWORD)] 91 | ) 92 | # modify locals() for argspec factory 93 | import omnipath # noqa: F401 94 | 95 | NoneType, pandas = type(None), pd 96 | adapter_code = f"def adapter{sig}: pass".replace(" /,", "") 97 | exec_locals = locals() 98 | exec(adapter_code, globals(), exec_locals) 99 | return exec_locals["adapter"] 100 | 101 | if not isinstance(clazz, type): 102 | raise TypeError( 103 | f"Expected `clazz` to be a type, found `{type(clazz).__name__}`." 104 | ) 105 | 106 | if isabstract(clazz): 107 | return 108 | 109 | @wrapt.decorator(adapter=wrapt.adapter_factory(argspec_factory)) 110 | def wrapper(wrapped, _instance, args, kwargs): 111 | return wrapped(*args, **kwargs) 112 | 113 | from_class = hasattr(clazz, "get") and not hasattr(clazz.get, "__wrapped__") 114 | func = clazz.get if from_class else _get_helper 115 | func = getattr(func, "__func__", func) 116 | 117 | clazz.get = MethodType(wrapper(func), clazz) 118 | 119 | 120 | def _inject_params( 121 | params: Dict[str, Any], key: str, value: Optional[Union[str, Iterable[str]]] 122 | ) -> None: 123 | if value is None: 124 | return 125 | value = {value} if isinstance(value, str) else set(value) 126 | 127 | old_value = params.pop(key, None) 128 | if old_value is None: 129 | params[key] = value 130 | return 131 | 132 | old_value = {old_value} if isinstance(old_value, str) else set(old_value) 133 | 134 | params[key] = value | old_value 135 | 136 | 137 | def _split_unique_join(data: pd.Series, func: Optional[Callable] = None) -> pd.Series: 138 | mask = ~pd.isnull(data.astype("string")) 139 | data = data[mask] 140 | data = data.str.split(";") 141 | 142 | if func is None: 143 | data = data.apply( 144 | lambda row: ( 145 | ";".join(sorted(set(map(str, row)))) 146 | if isinstance(row, Iterable) 147 | else row 148 | ) 149 | ) 150 | else: 151 | data = data.apply(func) 152 | 153 | res = pd.Series([None] * len(mask)) 154 | res.loc[mask] = data 155 | 156 | return res 157 | 158 | 159 | def _strip_resource_label( 160 | data: pd.Series, func: Optional[Callable] = None 161 | ) -> pd.Series: 162 | return _split_unique_join( 163 | _split_unique_join(data.str.replace(r"[-\w]*:?(\d+)", r"\1", regex=True)), 164 | func=func, 165 | ) 166 | 167 | 168 | def _strip_resource_label_df( 169 | df: pd.DataFrame, 170 | col: str, 171 | func: Optional[Callable] = None, 172 | ) -> None: 173 | if col in df: 174 | df[f"{col}_stripped"] = _strip_resource_label(df[col], func=func) 175 | 176 | 177 | def _count_references(df: pd.DataFrame) -> None: 178 | if "references" in df: 179 | df["n_references"] = _strip_resource_label( 180 | df["references"], func=lambda row: len(set(row)) 181 | ) 182 | 183 | 184 | def _count_resources(df: pd.DataFrame) -> None: 185 | if "sources" in df: 186 | df["n_sources"] = df["sources"].astype(str).str.split(";").apply(len) 187 | df["n_primary_sources"] = ( 188 | df["sources"] 189 | .astype(str) 190 | .str.split(";") 191 | .apply( 192 | lambda row: len( 193 | [r for r in row if "_" not in r] if isinstance(row, Iterable) else 0 194 | ) 195 | ) 196 | ) 197 | 198 | 199 | _ERROR_EMPTY_FMT = ( 200 | "No {obj} were retrieved. Please check if supplying valid parameter values." 201 | ) 202 | -------------------------------------------------------------------------------- /omnipath/_core/requests/_annotations.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Union, Mapping, Iterable, Optional 2 | import logging 3 | 4 | import pandas as pd 5 | 6 | from omnipath._misc import dtypes 7 | from omnipath._core.query import QueryType 8 | from omnipath._core.utils._docs import d 9 | from omnipath._core.requests._request import OmnipathRequestABC 10 | from omnipath.constants._pkg_constants import Key, final 11 | 12 | _MAX_N_PROTS = 600 13 | 14 | 15 | @final 16 | class Annotations(OmnipathRequestABC): 17 | """Request annotations from [OmniPath]_.""" 18 | 19 | __string__ = frozenset({"source", "value"}) 20 | __categorical__ = frozenset({"entity_type", "label", "source"}) 21 | 22 | _query_type = QueryType.ANNOTATIONS 23 | 24 | def _modify_params(self, params: Dict[str, Any]) -> Dict[str, Any]: 25 | params.pop(Key.ORGANISM.value, None) 26 | 27 | return params 28 | 29 | @classmethod 30 | @d.dedent 31 | def params(cls) -> Dict[str, Any]: 32 | """%(query_params)s""" 33 | params = super().params() 34 | params.pop(Key.ORGANISM.value, None) 35 | 36 | return params 37 | 38 | @classmethod 39 | def get( 40 | cls, 41 | proteins: Optional[Union[str, Iterable[str]]] = None, 42 | resources: Optional[Union[str, Iterable[str]]] = None, 43 | force_full_download: bool = False, 44 | wide: bool = False, 45 | **kwargs, 46 | ) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]: 47 | """ 48 | Import annotations from [OmniPath]_. 49 | 50 | Retrieves protein annotations about function, localization, expression, structure and other properties of 51 | proteins from `OmniPath `__. 52 | 53 | Parameters 54 | ---------- 55 | proteins 56 | Genes or proteins for which annotations will be retrieved (UniProt IDs, HGNC Gene Symbols or miRBase IDs). 57 | 58 | In order to download annotations for proteins complexes, write **'COMPLEX:'** before the gene symbols of 59 | the genes integrating the complex. 60 | 61 | If `None`, fetch annotations for all available genes or proteins. 62 | resources 63 | Load the annotations only from these databases. See :meth:`resources` for available options. 64 | If `None`, use all available resources. 65 | force_full_download 66 | Force the download of the entire annotations dataset. The full size of the data is ~1GB. 67 | We recommend to retrieve the annotations for a set of proteins or only from a few resources, 68 | depending on your interest. 69 | wide 70 | Pivot the annotations from a long to a wide dataframe format, reconstituting the format 71 | of the original resource. 72 | kwargs 73 | Additional query parameters. 74 | 75 | Returns 76 | ------- 77 | :class:`pandas.DataFrame` 78 | A dataframe containing different molecule (protein, complex, gene, miRNA, small molecule) annotations. 79 | If `wide` is `True` and the result contains more than one resource, a `dict` of dataframes 80 | will be returned, one for each resource. 81 | 82 | Notes 83 | ----- 84 | There might be also a few miRNAs and small molecules annotated. A vast majority of protein complex 85 | annotations are inferred from the annotations of the members: if all members carry the same annotation 86 | the complex inherits. 87 | """ 88 | if proteins is None and resources is None and not force_full_download: 89 | raise ValueError( 90 | "Please specify `force_full_download=True` in order to download the full dataset." 91 | ) 92 | res_info = ( 93 | "all resources" 94 | if resources is None 95 | else f"the following resources: `{[resources] if isinstance(resources, str) else sorted(set(resources))}`" 96 | ) 97 | inst = cls() 98 | inst._wide = wide 99 | 100 | if proteins is not None: 101 | if isinstance(proteins, str): 102 | proteins = (proteins,) 103 | proteins = sorted(set(proteins)) 104 | 105 | logging.info( 106 | f"Downloading annotations for `{len(proteins)}` in `{_MAX_N_PROTS}` chunks from {res_info}" 107 | ) 108 | 109 | return pd.concat( 110 | [ 111 | inst._get( 112 | proteins=proteins[i * _MAX_N_PROTS : (i + 1) * _MAX_N_PROTS], 113 | resources=resources, 114 | **kwargs, 115 | ) 116 | for i in range((len(proteins) // _MAX_N_PROTS) + 1) 117 | if len(proteins[i * _MAX_N_PROTS : (i + 1) * _MAX_N_PROTS]) 118 | ] 119 | ) 120 | 121 | logging.info(f"Downloading annotations for all proteins from {res_info}") 122 | 123 | return inst._get(proteins=None, resources=resources, **kwargs) 124 | 125 | def _resource_filter(self, data: Mapping[str, Any], **_) -> bool: 126 | return True 127 | 128 | def _post_process(self, df: pd.DataFrame, **kwargs) -> pd.DataFrame: 129 | if self._wide: 130 | df = self.pivot_annotations(df) 131 | 132 | return df 133 | 134 | @classmethod 135 | def pivot_annotations( 136 | cls, 137 | df: pd.DataFrame, 138 | ) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]: 139 | """ 140 | Annotations from narrow to wide format 141 | 142 | Converts the annotations from a long to a wide dataframe format, 143 | reconstituting the format of the original resource. 144 | 145 | Parameters 146 | ---------- 147 | df 148 | An annotation dataframe. 149 | 150 | Returns 151 | ------- 152 | :class:`pandas.DataFrame` or `dict` 153 | A dataframe of various molecule (protein, complex, gene, miRNA, small molecule) annotations. 154 | If the data contains more than one resource, a `dict` of dataframes will be returned, one for each 155 | resource. 156 | """ 157 | if df.source.nunique() > 1: 158 | return { 159 | resource: cls.pivot_annotations(df[df.source == resource]) 160 | for resource in df.source.unique() 161 | } 162 | 163 | index_cols = ["record_id", "uniprot", "genesymbol", "label"] 164 | 165 | if "entity_type" in df.label.values: 166 | df = df.drop("entity_type", axis=1) 167 | 168 | else: 169 | index_cols.append("entity_type") 170 | 171 | return dtypes.auto_dtype( 172 | df.drop("source", axis=1) 173 | .set_index(index_cols) 174 | .unstack("label") 175 | .droplevel(axis=1, level=0) 176 | .reset_index() 177 | .drop("record_id", axis=1) 178 | ) 179 | 180 | 181 | __all__ = [Annotations] 182 | -------------------------------------------------------------------------------- /omnipath/_core/requests/interactions/_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Mapping, Optional 2 | 3 | import pandas as pd 4 | 5 | from omnipath.constants._constants import InteractionDataset 6 | from omnipath._core.requests._utils import _ERROR_EMPTY_FMT 7 | from omnipath._core.requests._intercell import Intercell 8 | from omnipath._core.requests.interactions._interactions import ( 9 | Datasets_t, 10 | AllInteractions, 11 | ) 12 | 13 | 14 | def _to_dict(mapping: Optional[Mapping[Any, Any]]) -> Dict[Any, Any]: 15 | return {} if mapping is None else dict(mapping) 16 | 17 | 18 | def _swap_undirected(df: pd.DataFrame) -> pd.DataFrame: 19 | if "is_directed" not in df.columns: 20 | raise KeyError(f"Key `'is_directed'` not found in `{list(df.columns)}`.") 21 | 22 | directed = df.pop("is_directed") 23 | 24 | undirected = df.loc[~directed, :] 25 | if undirected.empty: 26 | return df 27 | 28 | undirected_swapped = undirected.copy() 29 | undirected_swapped[["source", "target"]] = undirected[["target", "source"]] 30 | 31 | if "source_genesymbol" in undirected: 32 | undirected_swapped[["source_genesymbol", "target_genesymbol"]] = undirected[ 33 | ["target_genesymbol", "source_genesymbol"] 34 | ] 35 | if "ncbi_tax_id_source" in undirected.columns: 36 | undirected_swapped[["ncbi_tax_id_source", "ncbi_tax_id_target"]] = undirected[ 37 | ["ncbi_tax_id_target", "ncbi_tax_id_source"] 38 | ] 39 | 40 | return pd.concat( 41 | [directed, undirected, undirected_swapped], 42 | axis=0, 43 | ignore_index=True, 44 | ) 45 | 46 | 47 | def import_intercell_network( 48 | include: Datasets_t = ( 49 | InteractionDataset.OMNIPATH, 50 | InteractionDataset.PATHWAY_EXTRA, 51 | InteractionDataset.KINASE_EXTRA, 52 | InteractionDataset.LIGREC_EXTRA, 53 | ), 54 | interactions_params: Optional[Mapping[str, Any]] = None, 55 | transmitter_params: Optional[Mapping[str, Any]] = None, 56 | receiver_params: Optional[Mapping[str, Any]] = None, 57 | ) -> pd.DataFrame: 58 | """ 59 | Import intercellular network combining intercellular annotations and protein interactions. 60 | 61 | First, it imports a network of protein-protein interactions. Then, it retrieves annotations about the proteins 62 | intercellular communication roles, once for the transmitter (delivering information from the expressing cell) and 63 | second, the receiver (receiving signal and relaying it towards the expressing cell) side. 64 | 65 | These 3 queries can be customized by providing parameters which will be passed to 66 | :meth:`omnipath.interactions.OmniPath.get` for the network and :meth:`omnipath.requests.Intercell` 67 | for the annotations. 68 | 69 | Finally the 3 :class:`pandas.DataFrame` are combined in a way that the source proteins in each interaction annotated 70 | by the transmitter, and the target proteins by the receiver categories. If undirected interactions present 71 | (these are disabled by default) they will be duplicated, i.e. both partners can be both receiver and transmitter. 72 | 73 | Parameters 74 | ---------- 75 | include 76 | Interaction datasets to include for :meth:`omnipath.interactions.AllInteractions.get`. 77 | interactions_params 78 | Parameters for the :meth:`omnipath.interactions.AllInteractions.get`. 79 | transmitter_params 80 | Parameters defining the transmitter side of intercellular connections. 81 | See :meth:`omnipath.interactions.AllInteractions.params` for available values. 82 | receiver_params 83 | Parameters defining the receiver side of intercellular connections. 84 | See :meth:`omnipath.interactions.AllInteractions.params` for available values. 85 | 86 | Returns 87 | ------- 88 | :class:`pandas.DataFrame` 89 | A dataframe containing information about protein-protein interactions and the inter-cellular roles 90 | of the proteins involved in those interactions. 91 | """ 92 | interactions_params = _to_dict(interactions_params) 93 | transmitter_params = _to_dict(transmitter_params) 94 | receiver_params = _to_dict(receiver_params) 95 | 96 | # TODO: this should be refactored as: QueryType.INTERCELL("scope").param, etc. (also in many other places) 97 | transmitter_params.setdefault("causality", "trans") 98 | transmitter_params.setdefault("scope", "generic") 99 | receiver_params.setdefault("causality", "rec") 100 | receiver_params.setdefault("scope", "generic") 101 | 102 | interactions = AllInteractions.get(include=include, **interactions_params) 103 | if interactions.empty: 104 | raise ValueError(_ERROR_EMPTY_FMT.format(obj="interactions")) 105 | interactions = _swap_undirected(interactions) 106 | 107 | transmitters = Intercell.get(**transmitter_params) 108 | if transmitters.empty: 109 | raise ValueError(_ERROR_EMPTY_FMT.format(obj="transmitters")) 110 | receivers = Intercell.get(**receiver_params) 111 | if receivers.empty: 112 | raise ValueError(_ERROR_EMPTY_FMT.format(obj="receivers")) 113 | 114 | # fmt: off 115 | intracell = ['intracellular_intercellular_related', 'intracellular'] 116 | transmitters = transmitters.loc[~transmitters["parent"].isin(intracell), :].copy() 117 | transmitters.rename(columns={"source": "category_source"}, inplace=True) 118 | # this makes it 3x as fast during groupby, since all of these are categories 119 | # it's mostly because groupby needs observed=True + using string object (numpy) vs "string" 120 | transmitters[["category", "parent", "database"]] = transmitters[["category", "parent", "database"]].astype(str) 121 | 122 | receivers = receivers.loc[~receivers["parent"].isin(intracell), :].copy() 123 | receivers.rename(columns={"source": "category_source"}, inplace=True) 124 | receivers[["category", "parent", "database"]] = receivers[["category", "parent", "database"]].astype(str) 125 | 126 | res = pd.merge(interactions, transmitters, left_on="source", right_on="uniprot", how="inner") 127 | if res.empty: 128 | raise ValueError("No values are left after merging interactions and transmitters.") 129 | 130 | # fmt: on 131 | 132 | groupby_cols = ["category", "parent", "source", "target"] 133 | res = _join_str_col(res, "database", groupby_cols) 134 | res = _summarize_first(res, groupby_cols) 135 | 136 | res = pd.merge( 137 | res, 138 | receivers, 139 | how="inner", 140 | left_on="target", 141 | right_on="uniprot", 142 | suffixes=("_intercell_source", "_intercell_target"), 143 | ) 144 | if res.empty: 145 | raise ValueError("No values are left after merging interactions and receivers.") 146 | 147 | groupby_cols = [ 148 | "category_intercell_source", 149 | "parent_intercell_source", 150 | "source", 151 | "target", 152 | "category_intercell_target", 153 | "parent_intercell_target", 154 | ] 155 | 156 | res = _join_str_col(res, "database_intercell_target", groupby_cols) 157 | res = _summarize_first(res, groupby_cols) 158 | 159 | # retype back as categories 160 | for col in ["category", "parent"]: 161 | for suffix in ["_intercell_source", "_intercell_target"]: 162 | res[f"{col}{suffix}"] = res[f"{col}{suffix}"].astype("category") 163 | 164 | return res.reset_index(drop=True) 165 | 166 | 167 | # pandas is a disaster: 168 | def _join_str_col(df: pd.DataFrame, col: str, groupby_cols: List[str]) -> pd.DataFrame: 169 | return df.assign( 170 | **{col: df.groupby(groupby_cols)[col].transform(lambda x: ";".join(x))} 171 | ) 172 | 173 | 174 | def _summarize_first(df: pd.DataFrame, groupby_cols: List[str]) -> pd.DataFrame: 175 | return ( 176 | df.groupby(groupby_cols, as_index=False).nth(0).copy() 177 | ) # much faster than 1st 178 | -------------------------------------------------------------------------------- /tests/test_compare_R.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable, Optional 2 | 3 | import pytest 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from omnipath.constants import License, Organism 9 | import omnipath as op 10 | from .conftest import RTester 11 | 12 | # in order to minimize server access, the tests are not parametrized 13 | # and the resources are chosen so that minimal data required is transferred 14 | # these tests will also run only on 1 job CI matrix, to further reduce the load 15 | # 16 | # note that these tests don't test whether `.params()` returns the valid values 17 | # this would require different Python interpreter invocation, since by design, during testing 18 | # `omnipath.options.autoload` is disabled 19 | 20 | 21 | def _assert_dataframes_equal( 22 | expected: pd.DataFrame, 23 | actual: pd.DataFrame, 24 | clazz: type = op.requests.Enzsub, 25 | remove_metadata: Optional[Iterable[str]] = None, 26 | ): 27 | assert isinstance(expected, pd.DataFrame) 28 | assert isinstance(actual, pd.DataFrame) 29 | 30 | # some small naming discrepancy 31 | actual.rename(columns={"n_primary_sources": "n_resources"}, inplace=True) 32 | # these are always present in our case 33 | if remove_metadata is None: 34 | remove_metadata = ["n_sources", "references_stripped"] 35 | for k in remove_metadata: 36 | if k in actual.columns: 37 | del actual[k] 38 | 39 | np.testing.assert_array_equal(expected.shape, actual.shape) 40 | # don't compare index since sometimes, it's not informative + differs across calls from OmnipathR 41 | col_order = sorted(expected.columns) 42 | np.testing.assert_array_equal(col_order, sorted(actual.columns)) 43 | 44 | expected = clazz()._convert_dtypes(expected) 45 | 46 | # this way, we know which column fails 47 | for col in col_order: 48 | e, a = expected[col], actual[col] 49 | emask = ~(pd.isna(e).values | pd.isnull(a).values) 50 | amask = ~(pd.isna(e).values | pd.isnull(a).values) 51 | 52 | np.testing.assert_array_equal(emask, amask) 53 | np.testing.assert_array_equal(e[emask], a[emask]) 54 | 55 | 56 | class TestEnzSub(RTester): 57 | def test_organism(self): 58 | organism = Organism.RAT 59 | expected = self.omnipathr.import_omnipath_enzsub( 60 | resources="DEPOD", genesymbols=False, organism=organism.code 61 | ) 62 | actual = op.requests.Enzsub.get( 63 | resources="DEPOD", genesymbols=False, organism=organism 64 | ) 65 | 66 | _assert_dataframes_equal(expected, actual) 67 | 68 | def test_resources(self): 69 | expected = self.omnipathr.import_omnipath_enzsub( 70 | resources="HPRD", genesymbols=True 71 | ) 72 | actual = op.requests.Enzsub.get(resources="HPRD", genesymbols=True) 73 | 74 | _assert_dataframes_equal(expected, actual) 75 | 76 | def test_fields(self): 77 | fields = ["isoforms", "ncbi_tax_id"] 78 | expected = self.omnipathr.import_omnipath_enzsub( 79 | resources="DEPOD", genesymbols=True, fields=fields 80 | ) 81 | actual = op.requests.Enzsub.get( 82 | resources="DEPOD", genesymbols=True, fields=fields 83 | ) 84 | 85 | _assert_dataframes_equal(expected, actual) 86 | 87 | def test_license(self): 88 | license = License.COMMERCIAL 89 | expected = self.omnipathr.import_omnipath_enzsub( 90 | resources="DEPOD", genesymbols=True, license=license.value 91 | ) 92 | actual = op.requests.Enzsub.get( 93 | resources="DEPOD", genesymbols=True, license=license 94 | ) 95 | 96 | _assert_dataframes_equal(expected, actual) 97 | 98 | 99 | class TestIntercell(RTester): 100 | def test_categories(self): 101 | expected = sorted(self.omnipathr.get_intercell_categories()) 102 | actual = sorted(op.requests.Intercell.categories()) 103 | 104 | np.testing.assert_array_equal(expected, actual) 105 | 106 | def test_generic_categories(self): 107 | expected = sorted(self.omnipathr.get_intercell_generic_categories()) 108 | actual = sorted(op.requests.Intercell.generic_categories()) 109 | 110 | np.testing.assert_array_equal(expected, actual) 111 | 112 | def test_normal_run(self): 113 | expected = self.omnipathr.import_omnipath_intercell( 114 | causality="transmitter", scope="specific", entity_types="protein" 115 | ) 116 | actual = op.requests.Intercell.get( 117 | causality="transmitter", scope="specific", entity_types="protein" 118 | ) 119 | 120 | _assert_dataframes_equal(expected, actual) 121 | 122 | 123 | class TestComplexes(RTester): 124 | def test_complex_genes(self): 125 | genes = ["ITGB1", "RET"] 126 | expected = self.omnipathr.import_omnipath_complexes(resources="CellPhoneDB") 127 | actual = op.requests.Complexes.get(database="CellPhoneDB") 128 | 129 | _assert_dataframes_equal( 130 | expected, 131 | actual, 132 | remove_metadata=[ 133 | "n_sources", 134 | "n_resources", 135 | "n_references", 136 | "references_stripped", 137 | ], 138 | ) 139 | 140 | expected = self.omnipathr.get_complex_genes(genes, complexes=expected) 141 | actual = op.requests.Complexes.complex_genes(genes, complexes=actual) 142 | 143 | _assert_dataframes_equal( 144 | expected, 145 | actual, 146 | remove_metadata=[ 147 | "n_sources", 148 | "n_resources", 149 | "n_references", 150 | "references_stripped", 151 | ], 152 | ) 153 | 154 | 155 | class TestAnnotations(RTester): 156 | def test_normal_run(self): 157 | proteins = ["ITGB1", "RET"] 158 | expected = self.omnipathr.import_omnipath_annotations( 159 | proteins=proteins, resources="Phobius", genesymbols=False 160 | ) 161 | actual = op.requests.Annotations.get( 162 | proteins=proteins, databases="Phobius", genesymbols=False 163 | ) 164 | 165 | _assert_dataframes_equal(expected, actual) 166 | 167 | 168 | class TestInteractions(RTester): 169 | def test_tfregulons_levels(self): 170 | fields = ["tfregulons_level", "tfregulons_tfbs"] 171 | expected = self.omnipathr.import_tf_target_interactions( 172 | resources=["ABS"], fields=fields, genesymbols=False 173 | ) 174 | actual = op.interactions.TFtarget.get( 175 | resources=["ABS"], fields=fields, genesymbols=False 176 | ) 177 | 178 | _assert_dataframes_equal(expected, actual) 179 | 180 | def test_dorothea_levels(self): 181 | fields = ["dorothea_level"] 182 | expected = self.omnipathr.import_dorothea_interactions( 183 | resources=["ABS"], dorothea_levels="D", fields=fields, genesymbols=False 184 | ) 185 | actual = op.interactions.Dorothea.get( 186 | resources=["ABS"], dorothea_levels="D", fields=fields, genesymbols=False 187 | ) 188 | 189 | _assert_dataframes_equal(expected, actual) 190 | 191 | def test_omnipath(self): 192 | expected = self.omnipathr.import_omnipath_interactions( 193 | resources="CA1", genesymbols=False 194 | ) 195 | actual = op.interactions.OmniPath.get(resource="CA1", genesymbols=False) 196 | 197 | _assert_dataframes_equal(expected, actual) 198 | 199 | 200 | class TestUtils(RTester): 201 | @pytest.mark.skip(reason="TODO: different index order, ref. mismatch") 202 | def test_import_intercell_network(self): 203 | from rpy2.robjects import ListVector 204 | 205 | interactions_params = {"resources": "CellPhoneDB"} 206 | transmitter_params = {"categories": "ligand"} 207 | receiver_params = {"categories": "receptor"} 208 | 209 | expected = self.omnipathr.import_intercell_network( 210 | interactions_param=ListVector(list(interactions_params.items())), 211 | transmitter_param=ListVector(list(transmitter_params.items())), 212 | receiver_param=ListVector(list(receiver_params.items())), 213 | ) 214 | actual = op.interactions.import_intercell_network( 215 | interactions_params=interactions_params, 216 | transmitter_params=transmitter_params, 217 | receiver_params=receiver_params, 218 | ) 219 | 220 | _assert_dataframes_equal(expected, actual) 221 | -------------------------------------------------------------------------------- /omnipath/_core/downloader/_downloader.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from copy import copy 3 | from typing import Any, Mapping, Callable, Optional 4 | from hashlib import md5 5 | from urllib.parse import urljoin, urlparse 6 | import json 7 | import logging 8 | import traceback 9 | 10 | from requests import Request, Session, PreparedRequest 11 | from tqdm.auto import tqdm 12 | from urllib3.util import Retry 13 | from requests.adapters import HTTPAdapter 14 | from requests.exceptions import RequestException 15 | 16 | from omnipath._core.utils._options import Options 17 | from omnipath.constants._pkg_constants import ( 18 | UNKNOWN_SERVER_VERSION, 19 | Key, 20 | Format, 21 | Endpoint, 22 | ) 23 | 24 | 25 | class Downloader: 26 | """ 27 | Class which performs a GET request to the server in order to retrieve some remote resources. 28 | 29 | Also implements other behavior, such as retrying after some status codes. 30 | 31 | Parameters 32 | ---------- 33 | opts 34 | Options. If `None`, :attr:`omnipath.options` are used. 35 | """ 36 | 37 | def __init__(self, opts: Optional[Options] = None): 38 | if opts is None: 39 | from omnipath import options as opts 40 | 41 | if not isinstance(opts, Options): 42 | raise TypeError( 43 | f"Expected `opts` to be of type `Options`, found {type(opts).__name__}." 44 | ) 45 | 46 | self._session = Session() 47 | self._options = copy(opts) # this does not copy MemoryCache 48 | 49 | if self._options.num_retries > 0: 50 | adapter = HTTPAdapter( 51 | max_retries=Retry( 52 | total=self._options.num_retries, 53 | redirect=5, 54 | status_forcelist=[413, 429, 500, 502, 503, 504], 55 | backoff_factor=1, 56 | ) 57 | ) 58 | self._session.mount("http://", adapter) 59 | self._session.mount("https://", adapter) 60 | 61 | logging.debug(f"Initialized `{self}`") 62 | 63 | @property 64 | def resources(self) -> Mapping[str, Mapping[str, Any]]: 65 | """Return the resources.""" 66 | logging.debug("Fetching resources") 67 | return self.maybe_download( 68 | Endpoint.RESOURCES.s, 69 | params={Key.FORMAT.s: Format.JSON.s}, 70 | callback=json.load, 71 | ) 72 | 73 | def maybe_download( 74 | self, 75 | url: str, 76 | callback: Callable[[BytesIO], Any], 77 | params: Optional[Mapping[str, str]] = None, 78 | cache: bool = True, 79 | is_final: bool = False, 80 | **_, 81 | ) -> Any: 82 | """ 83 | Fetch the data from the cache, if present, or download them from the ``url``. 84 | 85 | The key, under which is the download result saved, is the MD5 hash of the ``url``, including the ``params``. 86 | 87 | Parameters 88 | ---------- 89 | url 90 | URL that is used to access the remote resources if the data is not found in the cache. 91 | callback 92 | Function applied on the downloaded data. Usually, this will return either a :class:`pandas.DataFrame` 93 | or a :class:`dict`. 94 | params 95 | Parameters of the `GET` request. 96 | cache 97 | Whether to save the files to the cache or not. 98 | is_final 99 | Whether ``url`` is final or should be prefixed with :attr:`_options.url`. 100 | 101 | Returns 102 | ------- 103 | :class:`typing.Any` 104 | The result of applying ``callback`` on the maybe downloaded data. 105 | """ 106 | if not callable(callback): 107 | raise TypeError( 108 | f"Expected `callback` to be `callable`, found `{type(callback).__name__}`." 109 | ) 110 | 111 | if is_final: 112 | urls = (url,) if isinstance(url, str) else url 113 | else: 114 | urls = [ 115 | urljoin(baseurl, url) 116 | for baseurl in ( 117 | (self._options.url,) + tuple(self._options.fallback_urls) 118 | ) 119 | ] 120 | 121 | res = None 122 | 123 | for the_url in urls: 124 | urlp = urlparse(the_url) 125 | domain = f"{urlp.scheme}://{urlp.netloc}/" 126 | logging.debug(f"Attempting server `{domain}`.") 127 | req = self._session.prepare_request( 128 | Request( 129 | "GET", 130 | the_url, 131 | params=params, 132 | headers={"User-agent": "omnipathdb-user"}, 133 | ) 134 | ) 135 | key = md5(bytes(req.url, encoding="utf-8")).hexdigest() 136 | logging.debug(f"Looking up in cache: `{req.url}` ({key!r}).") 137 | 138 | if key in self._options.cache: 139 | logging.debug(f"Found data in cache `{self._options.cache}[{key!r}]`") 140 | res = self._options.cache[key] 141 | else: 142 | try: 143 | res = self._download(req) 144 | except RequestException: 145 | logging.warning(f"Failed to download from `{domain}`.") 146 | logging.warning(traceback.format_exc()) 147 | continue 148 | res = callback(res) 149 | if cache: 150 | logging.debug(f"Caching result to `{self._options.cache}[{key!r}]`") 151 | self._options.cache[key] = res 152 | else: 153 | logging.debug("Not caching the results") 154 | break 155 | 156 | if res is None: 157 | raise 158 | 159 | return res 160 | 161 | def _download(self, req: PreparedRequest) -> BytesIO: 162 | """ 163 | Request the remote resources. 164 | 165 | Parameters 166 | ---------- 167 | req 168 | `GET` request to perform. 169 | 170 | Returns 171 | ------- 172 | :class:`io.BytesIO` 173 | File-like object containing the data. Usually a json- or csv-like data is present inside. 174 | """ 175 | logging.info(f"Downloading data from `{req.url}`") 176 | settings = self._session.merge_environment_settings( 177 | req.url, {}, None, None, None 178 | ) 179 | settings["stream"] = True 180 | settings["timeout"] = self._options.timeout 181 | handle = BytesIO() 182 | with self._session.send(req, **settings) as resp: 183 | resp.raise_for_status() 184 | total = resp.headers.get("content-length", None) 185 | 186 | with tqdm( 187 | unit="B", 188 | unit_scale=True, 189 | miniters=1, 190 | unit_divisor=1024, 191 | total=total if total is None else int(total), 192 | disable=not self._options.progress_bar, 193 | ) as t: 194 | for chunk in resp.iter_content(chunk_size=self._options.chunk_size): 195 | t.update(len(chunk)) 196 | handle.write(chunk) 197 | 198 | handle.flush() 199 | handle.seek(0) 200 | 201 | return handle 202 | 203 | def __str__(self) -> str: 204 | return f"<{self.__class__.__name__}[options={self._options}]>" 205 | 206 | def __repr__(self) -> str: 207 | return str(self) 208 | 209 | 210 | def _get_server_version(options: Options) -> str: 211 | """Try and get the server version.""" 212 | import re 213 | 214 | def callback(fp: BytesIO) -> str: 215 | """Parse the version.""" 216 | return re.findall( 217 | r"\d+\.\d+.\d+", fp.getvalue().decode("utf-8"), flags=re.IGNORECASE 218 | )[0] 219 | 220 | try: 221 | if not options.autoload: 222 | raise ValueError( 223 | "Autoloading is disabled. You can enable it by setting " 224 | "`omnipath.options.autoload = True`." 225 | ) 226 | 227 | with Options.from_options( 228 | options, 229 | num_retries=0, 230 | timeout=(1.0, 3.0), 231 | cache=None, 232 | progress_bar=False, 233 | chunk_size=1024, 234 | ) as opt: 235 | return Downloader(opt).maybe_download( 236 | Endpoint.ABOUT.s, 237 | callback, 238 | params={Key.FORMAT.s: Format.TEXT.s}, 239 | cache=False, 240 | ) 241 | except Exception as e: 242 | logging.debug(f"Unable to get server version. Reason: `{e}`") 243 | 244 | return UNKNOWN_SERVER_VERSION 245 | -------------------------------------------------------------------------------- /tests/test_downloader.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO, StringIO 2 | from urllib.parse import urljoin 3 | import logging 4 | 5 | import pytest 6 | import requests 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from omnipath import options as opt 12 | from omnipath._core.utils._options import Options 13 | from omnipath.constants._pkg_constants import UNKNOWN_SERVER_VERSION, Endpoint 14 | from omnipath._core.downloader._downloader import Downloader, _get_server_version 15 | 16 | opt.fallback_urls = () 17 | 18 | 19 | class TestDownloader: 20 | def test_options_wrong_type(self): 21 | with pytest.raises(TypeError): 22 | Downloader("foobar") 23 | 24 | def test_str_repr(self, options: Options): 25 | d = Downloader(options) 26 | 27 | assert str(d) == f"<{d.__class__.__name__}[options={options}]>" 28 | assert repr(d) == f"<{d.__class__.__name__}[options={options}]>" 29 | 30 | def test_initialize_local_options(self, options: Options): 31 | options.password = "foo" 32 | options.timeout = 1337 33 | d = Downloader(options) 34 | 35 | assert d._options is not options 36 | assert str(d._options) == str(options) 37 | assert str(d._options) != str(opt) 38 | 39 | options.password = "bar" 40 | assert d._options.password == "foo" 41 | 42 | def test_initialize_global_options(self): 43 | d = Downloader() 44 | 45 | assert d._options is not opt 46 | assert str(d._options) == str(opt) 47 | 48 | def test_resources_cached_values(self, downloader: Downloader, requests_mock): 49 | data = {"foo": "bar", "42": 1337} 50 | requests_mock.register_uri( 51 | "GET", urljoin(downloader._options.url, Endpoint.RESOURCES.s), json=data 52 | ) 53 | 54 | assert downloader.resources == data 55 | assert requests_mock.called_once 56 | 57 | assert downloader.resources == data 58 | assert requests_mock.called_once 59 | 60 | def test_resources_no_cached_values(self, downloader: Downloader, requests_mock): 61 | data = {"foo": "bar", "42": 1337} 62 | requests_mock.register_uri( 63 | "GET", urljoin(downloader._options.url, Endpoint.RESOURCES.s), json=data 64 | ) 65 | 66 | assert downloader.resources == data 67 | assert requests_mock.called_once 68 | 69 | downloader._options.cache.clear() 70 | 71 | assert downloader.resources == data 72 | assert len(requests_mock.request_history) == 2 73 | 74 | def test_maybe_download_not_callable(self, downloader: Downloader): 75 | with pytest.raises(TypeError): 76 | downloader.maybe_download("foo", callback=None) 77 | 78 | def test_maybe_download_wrong_callable( 79 | self, downloader: Downloader, requests_mock, csv_data: bytes 80 | ): 81 | url = urljoin(downloader._options.url, "foobar") 82 | requests_mock.register_uri("GET", url, content=csv_data) 83 | 84 | with pytest.raises(ValueError, match=r"Expected object or value"): 85 | downloader.maybe_download(url, callback=pd.read_json) 86 | 87 | def test_maybe_download_passes_params( 88 | self, downloader: Downloader, requests_mock, csv_data: bytes 89 | ): 90 | csv_url = urljoin(downloader._options.url, "foobar/?format=csv") 91 | csv_df = pd.read_csv(BytesIO(csv_data)) 92 | json_url = urljoin(downloader._options.url, "foobar/?format=json") 93 | json_handle = StringIO() 94 | csv_df.to_json(json_handle) 95 | 96 | requests_mock.register_uri("GET", csv_url, content=csv_data) 97 | requests_mock.register_uri( 98 | "GET", json_url, content=bytes(json_handle.getvalue(), encoding="utf-8") 99 | ) 100 | 101 | res1 = downloader.maybe_download(csv_url, callback=pd.read_csv) 102 | res2 = downloader.maybe_download(csv_url, callback=pd.read_csv) 103 | 104 | assert res1 is not res2 105 | assert len(downloader._options.cache) == 1 106 | assert requests_mock.called_once 107 | np.testing.assert_array_equal(res1.index, csv_df.index) 108 | np.testing.assert_array_equal(res1.columns, csv_df.columns) 109 | np.testing.assert_array_equal(res1.values, csv_df.values) 110 | 111 | res1 = downloader.maybe_download(json_url, callback=pd.read_json) 112 | res2 = downloader.maybe_download(json_url, callback=pd.read_json) 113 | 114 | assert res1 is not res2 115 | assert len(requests_mock.request_history) == 2 116 | assert len(downloader._options.cache) == 2 117 | np.testing.assert_array_equal(res1.index, csv_df.index) 118 | np.testing.assert_array_equal(res1.columns, csv_df.columns) 119 | np.testing.assert_array_equal(res1.values, csv_df.values) 120 | 121 | def test_maybe_download_no_cache( 122 | self, downloader: Downloader, requests_mock, csv_data: bytes 123 | ): 124 | url = urljoin(downloader._options.url, "foobar") 125 | requests_mock.register_uri("GET", url, content=csv_data) 126 | 127 | res1 = downloader.maybe_download(url, callback=pd.read_csv) 128 | downloader._options.cache.clear() 129 | res2 = downloader.maybe_download(url, callback=pd.read_csv) 130 | 131 | assert res1 is not res2 132 | assert len(requests_mock.request_history) == 2 133 | np.testing.assert_array_equal(res1.index, res2.index) 134 | np.testing.assert_array_equal(res1.columns, res2.columns) 135 | np.testing.assert_array_equal(res1.values, res2.values) 136 | 137 | def test_maybe_download_is_not_final( 138 | self, downloader: Downloader, requests_mock, csv_data: bytes 139 | ): 140 | endpoint = "barbaz" 141 | url = urljoin(downloader._options.url, endpoint) 142 | requests_mock.register_uri("GET", url, content=csv_data) 143 | csv_df = pd.read_csv(BytesIO(csv_data)) 144 | 145 | res = downloader.maybe_download(endpoint, callback=pd.read_csv) 146 | 147 | assert requests_mock.called_once 148 | np.testing.assert_array_equal(res.index, csv_df.index) 149 | np.testing.assert_array_equal(res.columns, csv_df.columns) 150 | np.testing.assert_array_equal(res.values, csv_df.values) 151 | 152 | def test_fallback_urls(self, requests_mock, csv_data: bytes): 153 | query = "annotations?resources=PROGENy" 154 | opt = Options(url="https://wrong.omnipathdb.org/") 155 | requests_mock.register_uri( 156 | "GET", 157 | urljoin(opt.url, query), 158 | exc=requests.exceptions.ConnectionError, 159 | ) 160 | requests_mock.register_uri( 161 | "GET", 162 | urljoin(opt.fallback_urls[0], query), 163 | content=csv_data, 164 | ) 165 | csv_df = pd.read_csv(BytesIO(csv_data)) 166 | downloader = Downloader(opt) 167 | res = downloader.maybe_download(query, callback=pd.read_csv) 168 | 169 | assert requests_mock.called 170 | np.testing.assert_array_equal(res.index, csv_df.index) 171 | np.testing.assert_array_equal(res.columns, csv_df.columns) 172 | np.testing.assert_array_equal(res.values, csv_df.values) 173 | 174 | def test_get_server_version_not_decodable( 175 | self, options: Options, requests_mock, caplog 176 | ): 177 | url = urljoin(options.url, Endpoint.ABOUT.s) 178 | options.autoload = True 179 | requests_mock.register_uri( 180 | "GET", f"{url}?format=text", content=bytes("foobarbaz", encoding="utf-8") 181 | ) 182 | 183 | with caplog.at_level(logging.DEBUG): 184 | version = _get_server_version(options) 185 | 186 | assert requests_mock.called_once 187 | assert ( 188 | "Unable to get server version. Reason: `list index out of range`" 189 | in caplog.text 190 | ) 191 | assert version == UNKNOWN_SERVER_VERSION 192 | 193 | def test_get_server_version_no_autoload( 194 | self, options: Options, requests_mock, caplog 195 | ): 196 | url = urljoin(options.url, Endpoint.ABOUT.s) 197 | options.autoload = False 198 | requests_mock.register_uri("GET", f"{url}?format=text", text="foobarbaz") 199 | 200 | with caplog.at_level(logging.DEBUG): 201 | version = _get_server_version(options) 202 | 203 | assert not requests_mock.called_once 204 | assert ( 205 | "Unable to get server version. Reason: `Autoloading is disabled." 206 | in caplog.text 207 | ) 208 | assert version == UNKNOWN_SERVER_VERSION 209 | 210 | def test_get_server_version(self, options: Options, requests_mock): 211 | url = urljoin(options.url, Endpoint.ABOUT.s) 212 | options.autoload = True 213 | requests_mock.register_uri( 214 | "GET", 215 | f"{url}?format=text", 216 | content=bytes("foo bar baz\nversion: 42.1337.00", encoding="utf-8"), 217 | ) 218 | 219 | version = _get_server_version(options) 220 | 221 | assert requests_mock.called_once 222 | assert version == "42.1337.00" 223 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from abc import ABC 3 | from copy import deepcopy 4 | from shutil import copy 5 | from typing import Optional 6 | from inspect import isclass 7 | from pathlib import Path 8 | from collections import defaultdict 9 | from urllib.parse import urljoin 10 | import json 11 | import pickle 12 | import logging 13 | 14 | from pytest_socket import disable_socket 15 | 16 | disable_socket() 17 | import pytest 18 | import requests 19 | 20 | import numpy as np 21 | import pandas as pd 22 | 23 | from omnipath.constants import InteractionDataset 24 | from omnipath._core.cache._cache import MemoryCache 25 | from omnipath._core.query._query import QueryType 26 | from omnipath._core.utils._options import Options 27 | from omnipath.constants._pkg_constants import DEFAULT_OPTIONS, Key, Endpoint 28 | from omnipath._core.downloader._downloader import Downloader 29 | import omnipath as op 30 | 31 | 32 | # removes overly verbose logging errors for rpy2 33 | # see: https://github.com/pytest-dev/pytest/issues/5502#issuecomment-647157873 34 | def pytest_sessionfinish(session, exitstatus): 35 | import logging 36 | 37 | loggers = [logging.getLogger()] + list(logging.Logger.manager.loggerDict.values()) 38 | for logger in loggers: 39 | handlers = getattr(logger, "handlers", []) 40 | for handler in handlers: 41 | logger.removeHandler(handler) 42 | 43 | 44 | def pytest_addoption(parser): 45 | parser.addoption( 46 | "--test-server", 47 | dest="test_server", 48 | action="store_true", 49 | help="Whether to also test the server connection.", 50 | ) 51 | 52 | 53 | @pytest.fixture(scope="function") 54 | def options() -> "Options": 55 | opt = Options.from_config() 56 | opt.cache = "memory" 57 | opt.progress_bar = False 58 | opt.fallback_urls = () 59 | return opt 60 | 61 | 62 | @pytest.fixture(scope="function") 63 | def config_backup(tmpdir): 64 | copy(Options.config_path, tmpdir / "config.ini") 65 | yield 66 | copy(tmpdir / "config.ini", Options.config_path) 67 | 68 | 69 | @pytest.fixture(scope="function") 70 | def cache_backup(): 71 | import omnipath as op 72 | 73 | cache = deepcopy(op.options.cache) 74 | pb = op.options.progress_bar 75 | op.options.cache = MemoryCache() 76 | op.options.progress_bar = False 77 | yield 78 | op.options.cache = cache 79 | op.options.progress_bar = pb 80 | 81 | 82 | @pytest.fixture(scope="class") 83 | def server_url(): 84 | import omnipath as op 85 | 86 | cache = deepcopy(op.options.cache) 87 | pb = op.options.progress_bar 88 | url = op.options.url 89 | cd = op.options.convert_dtypes 90 | 91 | op.options.cache = MemoryCache() 92 | op.options.progress_bar = False 93 | op.options.url = DEFAULT_OPTIONS.url 94 | op.options.convert_dtypes = True 95 | yield 96 | op.options.cache = cache 97 | op.options.progress_bar = pb 98 | op.options.url = url 99 | op.options.convert_dtypes = cd 100 | 101 | 102 | @pytest.fixture(scope="function") 103 | def downloader(options) -> "Downloader": 104 | options.fallback_urls = () 105 | return Downloader(options) 106 | 107 | 108 | @pytest.fixture(scope="session") 109 | def csv_data() -> bytes: 110 | str_handle = StringIO() 111 | pd.DataFrame({"foo": range(5), "bar": "baz", "quux": 42}).to_csv(str_handle) 112 | 113 | return bytes(str_handle.getvalue(), encoding="utf-8") 114 | 115 | 116 | @pytest.fixture(scope="session") 117 | def tsv_data() -> bytes: 118 | str_handle = StringIO() 119 | pd.DataFrame( 120 | { 121 | "foo": range(5), 122 | "components_genesymbols": "foo", 123 | "quux": 42, 124 | "modification": "bar", 125 | } 126 | ).to_csv(str_handle, sep="\t") 127 | 128 | return bytes(str_handle.getvalue(), encoding="utf-8") 129 | 130 | 131 | @pytest.fixture(scope="session") 132 | def intercell_data() -> bytes: 133 | data = {} 134 | data[Key.PARENT.s] = [42, 1337, 24, 42] 135 | data[Key.CATEGORY.s] = ["foo", "bar", "bar", "foo"] 136 | 137 | return bytes(json.dumps(data), encoding="utf-8") 138 | 139 | 140 | @pytest.fixture(scope="session") 141 | def resources() -> bytes: 142 | data = defaultdict(dict) 143 | data["foo"][Key.QUERIES.s] = { 144 | QueryType.INTERCELL.endpoint: {Key.GENERIC_CATEGORIES.s: ["42"]} 145 | } 146 | data["bar"][Key.QUERIES.s] = { 147 | QueryType.INTERCELL.endpoint: {Key.GENERIC_CATEGORIES.s: ["42", "13"]} 148 | } 149 | data["baz"][Key.QUERIES.s] = { 150 | QueryType.INTERCELL.endpoint: {Key.GENERIC_CATEGORIES.s: ["24"]} 151 | } 152 | data["quux"][Key.QUERIES.s] = { 153 | QueryType.ENZSUB.endpoint: {Key.GENERIC_CATEGORIES.s: ["24"]} 154 | } 155 | 156 | return bytes(json.dumps(data), encoding="utf-8") 157 | 158 | 159 | @pytest.fixture(scope="session") 160 | def interaction_resources() -> bytes: 161 | data = defaultdict(dict) 162 | for i, d in enumerate(InteractionDataset): 163 | data[f"d_{i}"][Key.QUERIES.s] = { 164 | QueryType.INTERACTIONS.endpoint: {Key.DATASETS.s: [d.value]} 165 | } 166 | 167 | return bytes(json.dumps(data), encoding="utf-8") 168 | 169 | 170 | @pytest.fixture(scope="session") 171 | def complexes() -> pd.DataFrame: 172 | return pd.DataFrame( 173 | { 174 | "components_genesymbols": [ 175 | "foo", 176 | "bar_baz_quux", 177 | "baz_bar", 178 | "bar_quux_foo", 179 | ], 180 | "dummy": 42, 181 | } 182 | ) 183 | 184 | 185 | @pytest.fixture(scope="session") 186 | def interactions_data() -> bytes: 187 | str_handle = StringIO() 188 | with open(Path("tests") / "_data" / "interactions.pickle", "rb") as fin: 189 | data: pd.DataFrame = pickle.load(fin) 190 | 191 | data.to_csv(str_handle, sep="\t", index=False) 192 | 193 | return bytes(str_handle.getvalue(), encoding="utf-8") 194 | 195 | 196 | @pytest.fixture(scope="session") 197 | def transmitters_data() -> bytes: 198 | str_handle = StringIO() 199 | with open(Path("tests") / "_data" / "transmitters.pickle", "rb") as fin: 200 | data: pd.DataFrame = pickle.load(fin) 201 | 202 | data.to_csv(str_handle, sep="\t", index=False) 203 | 204 | return bytes(str_handle.getvalue(), encoding="utf-8") 205 | 206 | 207 | @pytest.fixture(scope="session") 208 | def receivers_data() -> bytes: 209 | str_handle = StringIO() 210 | with open(Path("tests") / "_data" / "receivers.pickle", "rb") as fin: 211 | data: pd.DataFrame = pickle.load(fin) 212 | 213 | data.to_csv(str_handle, sep="\t", index=False) 214 | 215 | return bytes(str_handle.getvalue(), encoding="utf-8") 216 | 217 | 218 | @pytest.fixture(scope="session") 219 | def import_intercell_result() -> pd.DataFrame: 220 | with open(Path("tests") / "_data" / "import_intercell_result.pickle", "rb") as fin: 221 | return pickle.load(fin) 222 | 223 | 224 | @pytest.fixture(scope="session") 225 | def string_series() -> pd.Series: 226 | return pd.Series(["foo:123", "bar:45;baz", None, "baz:67;bar:67", "foo;foo;foo"]) 227 | 228 | 229 | def _can_import_omnipathR() -> Optional["rpy2.robjects.packages.Package"]: # noqa: F821 230 | try: 231 | from packaging import version 232 | from rpy2.robjects.packages import PackageNotInstalledError, importr 233 | import rpy2 234 | 235 | try: 236 | from importlib_metadata import version as get_version 237 | except ImportError: 238 | # >=Python3.8 239 | from importlib.metadata import version as get_version 240 | 241 | try: 242 | assert version.parse(get_version(rpy2.__name__)) >= version.parse("3.3.0") 243 | mod = importr("OmnipathR") 244 | logging.info("Successfully loaded `OmnipathR`") 245 | return mod 246 | except (PackageNotInstalledError, AssertionError) as err: 247 | logging.error(f"Unable to import `OmnipathR`. Reason: `{err}`") 248 | 249 | except ImportError as err: 250 | logging.error(f"Unable to import `rpy2`. Reason: `{err}`") 251 | except Exception as err: 252 | logging.error(f"Unknown exception when trying to import `OmnipathR`: `{err}`") 253 | 254 | return None 255 | 256 | 257 | @pytest.fixture(scope="session") 258 | def omnipathr(request): 259 | url = urljoin(DEFAULT_OPTIONS.url, Endpoint.ABOUT.s) 260 | 261 | if not request.config.getoption("test_server", default=False, skip=True): 262 | logging.error("Testing using the server is disabled.") 263 | return None 264 | try: 265 | resp = requests.get(url) 266 | resp.raise_for_status() 267 | except Exception as e: 268 | logging.error(f"Unable to contact the server at `{url}`. Reason: `{e}`") 269 | return None 270 | 271 | return _can_import_omnipathR() 272 | 273 | 274 | @pytest.fixture(autouse=True, scope="class") 275 | def _inject_omnipath(request, omnipathr, server_url): 276 | if isclass(request.cls) and issubclass(request.cls, RTester): 277 | if omnipathr is None: 278 | pytest.skip("Unable to import `OmnipathR`.") 279 | from rpy2.robjects import pandas2ri 280 | 281 | # at this point, we know rpy2 can be imported, thanks to the `omnipathr` fixture 282 | # do not change the activation order 283 | pandas2ri.activate() 284 | request.cls.omnipathr = omnipathr 285 | 286 | 287 | class RTester(ABC): 288 | def test_resources(self): 289 | expected = sorted(self.omnipathr.get_intercell_resources()) 290 | actual = sorted(op.requests.Intercell.resources()) 291 | 292 | np.testing.assert_array_equal(expected, actual) 293 | -------------------------------------------------------------------------------- /omnipath/_core/utils/_options.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Tuple, Union, ClassVar, NoReturn, Optional 2 | from pathlib import Path 3 | from urllib.parse import urlparse 4 | import configparser 5 | 6 | import attr 7 | 8 | from omnipath.constants import License 9 | from omnipath._core.cache._cache import Cache, FileCache, NoopCache, MemoryCache 10 | from omnipath.constants._pkg_constants import DEFAULT_OPTIONS 11 | 12 | 13 | def _is_positive(_instance, attribute: attr.Attribute, value: int) -> NoReturn: 14 | """Check whether the ``value`` is positive.""" 15 | if isinstance(value, tuple): 16 | return all(_is_positive(_instance, attribute, v) for v in value) 17 | if value <= 0: 18 | raise ValueError( 19 | f"Expected `{attribute.name}` to be positive, found `{value}`." 20 | ) 21 | 22 | 23 | def _is_non_negative(_instance, attribute: attr.Attribute, value: int) -> NoReturn: 24 | """Check whether the ``value`` is non-negative.""" 25 | if value < 0: 26 | raise ValueError( 27 | f"Expected `{attribute.name}` to be non-negative, found `{value}`." 28 | ) 29 | 30 | 31 | def _is_valid_url(_instance, _attribute: attr.Attribute, value: str) -> NoReturn: 32 | """Check whether the ``value`` forms a valid URL.""" 33 | pr = urlparse(value) 34 | 35 | if not pr.scheme or not pr.netloc: 36 | raise ValueError(f"Invalid URL: `{value}`.") 37 | 38 | 39 | def _cache_converter(value: Optional[Union[str, Path, Cache]]) -> Cache: 40 | """Convert ``value`` to :class:`omnipath._core.cache.Cache`.""" 41 | if isinstance(value, Cache): 42 | return value 43 | 44 | if value is None: 45 | return NoopCache() 46 | if value == "memory": 47 | return MemoryCache() 48 | 49 | return FileCache(value) 50 | 51 | 52 | @attr.s 53 | class Options: 54 | """ 55 | Class defining various :mod:`omnipath` options. 56 | 57 | Parameters 58 | ---------- 59 | url 60 | URL of the web service. 61 | license 62 | License to use when fetching the data. 63 | password 64 | Password used when performing requests. 65 | cache 66 | Type of a cache. Valid options are: 67 | 68 | - `None`: do not save anything into a cache. 69 | - `'memory'`: cache files into the memory. 70 | - :class:`str`: persist files into a directory. 71 | 72 | autoload 73 | Whether to contact the server at ``url`` during import to get the server version and the most up-to-date 74 | query parameters and their valid options. 75 | convert_dtypes 76 | Whether to convert the data types of the resulting :class:`pandas.DataFrame`. 77 | num_retries 78 | Number of retries before giving up. 79 | timeout 80 | Timeout in seconds when awaiting response. 81 | chunk_size 82 | Size in bytes in which to read the data. 83 | progress_bar 84 | Whether to show the progress bar when downloading data. 85 | """ 86 | 87 | config_path: ClassVar[Path] = Path.home() / ".config" / "omnipathdb.ini" 88 | 89 | url: str = attr.ib( 90 | default=DEFAULT_OPTIONS.url, 91 | validator=[attr.validators.instance_of(str), _is_valid_url], 92 | on_setattr=attr.setters.validate, 93 | ) 94 | fallback_urls: Tuple[str] = attr.ib( 95 | default=DEFAULT_OPTIONS.fallback_urls, 96 | converter=(lambda val: (val,) if isinstance(val, str) else tuple(val)), 97 | on_setattr=attr.setters.convert, 98 | ) 99 | static_url: str = attr.ib( 100 | default=DEFAULT_OPTIONS.static_url, 101 | validator=[attr.validators.instance_of(str), _is_valid_url], 102 | on_setattr=attr.setters.validate, 103 | ) 104 | license: License = attr.ib( 105 | default=None, 106 | validator=attr.validators.optional(attr.validators.instance_of((str, License))), 107 | converter=(lambda val: None if val is None else License(val)), 108 | on_setattr=attr.setters.convert, 109 | ) 110 | password: Optional[str] = attr.ib( 111 | default=None, 112 | repr=False, 113 | validator=attr.validators.optional(attr.validators.instance_of(str)), 114 | on_setattr=attr.setters.validate, 115 | ) 116 | 117 | cache: Cache = attr.ib( 118 | default=DEFAULT_OPTIONS.cache_dir, 119 | converter=_cache_converter, 120 | kw_only=True, 121 | on_setattr=attr.setters.convert, 122 | ) 123 | autoload: bool = attr.ib( 124 | default=DEFAULT_OPTIONS.autoload, 125 | validator=attr.validators.instance_of(bool), 126 | on_setattr=attr.setters.validate, 127 | ) 128 | convert_dtypes: bool = attr.ib( 129 | default=DEFAULT_OPTIONS.convert_dtypes, 130 | validator=attr.validators.instance_of(bool), 131 | on_setattr=attr.setters.validate, 132 | ) 133 | 134 | num_retries: int = attr.ib( 135 | default=DEFAULT_OPTIONS.num_retries, 136 | validator=[attr.validators.instance_of(int), _is_non_negative], 137 | on_setattr=attr.setters.validate, 138 | ) 139 | timeout: Union[int, float, Tuple[float, float]] = attr.ib( 140 | default=DEFAULT_OPTIONS.timeout, 141 | validator=[attr.validators.instance_of((int, float, tuple)), _is_positive], 142 | on_setattr=attr.setters.validate, 143 | ) 144 | chunk_size: int = attr.ib( 145 | default=DEFAULT_OPTIONS.chunk_size, 146 | validator=[attr.validators.instance_of(int), _is_positive], 147 | on_setattr=attr.setters.validate, 148 | ) 149 | 150 | progress_bar: bool = attr.ib( 151 | default=True, 152 | repr=False, 153 | validator=attr.validators.instance_of(bool), 154 | on_setattr=attr.setters.validate, 155 | ) 156 | 157 | def _create_config(self, section: Optional[str] = None): 158 | section = self.url if section is None else section 159 | _is_valid_url(None, None, section) 160 | config = configparser.ConfigParser() 161 | # do not save the password 162 | config[section] = { 163 | "license": str(None if self.license is None else self.license.value), 164 | "cache_dir": str(self.cache.path), 165 | "autoload": self.autoload, 166 | "convert_dtypes": self.convert_dtypes, 167 | "num_retries": self.num_retries, 168 | "timeout": self.timeout, 169 | "chunk_size": self.chunk_size, 170 | "progress_bar": self.progress_bar, 171 | } 172 | 173 | return config 174 | 175 | @classmethod 176 | def from_config(cls, section: Optional[str] = None) -> "Options": 177 | """ 178 | Return the options from a configuration file. 179 | 180 | Parameters 181 | ---------- 182 | section 183 | Section of the `.ini` file from which to create the options. It corresponds to the URL of the server. 184 | If `None`, use default URL. 185 | 186 | Returns 187 | ------- 188 | :class:`omnipath._cores.utils.Options` 189 | The options. 190 | """ 191 | if not cls.config_path.is_file(): 192 | return cls().write() 193 | 194 | config = configparser.ConfigParser(default_section=DEFAULT_OPTIONS.url) 195 | config.read(cls.config_path) 196 | 197 | section = DEFAULT_OPTIONS.url if section is None else section 198 | _is_valid_url(None, None, section) 199 | _ = config.get(section, "cache_dir") 200 | 201 | cache = config.get(section, "cache_dir", fallback=DEFAULT_OPTIONS.cache_dir) 202 | cache = None if cache == "None" else cache 203 | license = config.get(section, "license", fallback=DEFAULT_OPTIONS.license) 204 | license = None if license == "None" else License(license) 205 | 206 | return cls( 207 | url=section, 208 | license=license, 209 | num_retries=config.getint( 210 | section, "num_retries", fallback=DEFAULT_OPTIONS.num_retries 211 | ), 212 | timeout=config.getfloat( 213 | section, "timeout", fallback=DEFAULT_OPTIONS.timeout 214 | ), 215 | chunk_size=config.getint( 216 | section, "chunk_size", fallback=DEFAULT_OPTIONS.chunk_size 217 | ), 218 | progress_bar=config.getboolean( 219 | section, "progress_bar", fallback=DEFAULT_OPTIONS.progress_bar 220 | ), 221 | autoload=config.getboolean( 222 | section, "autoload", fallback=DEFAULT_OPTIONS.autoload 223 | ), 224 | convert_dtypes=config.getboolean( 225 | section, "convert_dtypes", fallback=DEFAULT_OPTIONS.convert_dtypes 226 | ), 227 | cache=cache, 228 | ) 229 | 230 | @classmethod 231 | def from_options(cls, options: "Options", **kwargs: Any) -> "Options": 232 | """ 233 | Create new options from previous options. 234 | 235 | Parameters 236 | ---------- 237 | options 238 | Options from which to create new ones. 239 | **kwargs 240 | Keyword arguments overriding attributes from ``options``. 241 | 242 | Returns 243 | ------- 244 | The newly created option. 245 | """ 246 | if not isinstance(options, Options): 247 | raise TypeError( 248 | f"Expected `options` to be of type `Options`, found `{type(options)}`." 249 | ) 250 | 251 | kwargs = {k: v for k, v in kwargs.items() if hasattr(options, k)} 252 | 253 | return cls(**{**options.__dict__, **kwargs}) 254 | 255 | def write(self, section: Optional[str] = None) -> NoReturn: 256 | """Write the current options to a configuration file.""" 257 | self.config_path.parent.mkdir(parents=True, exist_ok=True) 258 | 259 | with open(self.config_path, "w") as fout: 260 | self._create_config(section).write(fout) 261 | 262 | return self 263 | 264 | def __enter__(self) -> "Options": 265 | return self.from_options(self) 266 | 267 | def __exit__(self, exc_type, exc_val, exc_tb) -> None: 268 | pass 269 | 270 | 271 | options = Options.from_config() 272 | 273 | 274 | __all__ = [options, Options] 275 | -------------------------------------------------------------------------------- /omnipath/_core/requests/interactions/_evidences.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple, Union, Callable, Iterable, Optional 2 | 3 | import pandas as pd 4 | 5 | from omnipath._misc.utils import to_set 6 | from omnipath._core.requests._utils import ( 7 | _count_resources, 8 | _count_references, 9 | _strip_resource_label_df, 10 | ) 11 | 12 | EVIDENCES_KEYS = ("positive", "negative", "directed", "undirected") 13 | 14 | 15 | def _must_have_evidences(df: pd.DataFrame) -> None: 16 | """Raise an error if the input data frame does not contain evidences.""" 17 | if "evidences" not in df.columns: 18 | raise ValueError("The input data frame must contain `evidences` column.") 19 | 20 | 21 | def unnest_evidences(df: pd.DataFrame, col: str = "evidences") -> pd.DataFrame: 22 | """ 23 | Create new columns of evidences by direction and effect sign. 24 | 25 | Plucks evidence lists of each direction and effect sign into separate, 26 | new columns. This will yield four new columns: "positive", "negative", 27 | "directed" and "undirected", each containing lists of dicts of evidences. 28 | 29 | Parameters 30 | ---------- 31 | df 32 | An OmniPath interaction data frame with "evidences" column. 33 | col 34 | Name of the column containing the nested evidences. 35 | 36 | Returns 37 | ------- 38 | :class:`pandas.DataFrame` 39 | The input data frame with new columns "positive", "negative", 40 | "directed" and "undirected" each containing lists of dicts of 41 | evidences. 42 | 43 | Raises 44 | ------ 45 | ValueError 46 | If the input data frame does not contain "evidences" column. 47 | """ 48 | for key in ("positive", "negative", "directed", "undirected"): 49 | df[key] = df[col].apply(lambda x: x[key]) # noqa: B023 50 | 51 | return df 52 | 53 | 54 | def filter_evidences( 55 | df: pd.DataFrame, 56 | datasets: Optional[Union[str, Iterable[str]]] = None, 57 | resources: Optional[Union[str, Iterable[str]]] = None, 58 | col: str = "evidences", 59 | target_col: Optional[str] = None, 60 | ) -> pd.DataFrame: 61 | """ 62 | Filter evidences by dataset and resource. 63 | 64 | Parameters 65 | ---------- 66 | df 67 | An OmniPath interaction data frame with "evidences" column. 68 | datasets 69 | A list of dataset names. If None, all datasets will be included. 70 | resources 71 | A list of resource names. If None, all resources will be included. 72 | col 73 | Name of the column containing the evidences. 74 | target_col 75 | Column to output the filtered evidences to. By default `col` is 76 | to be overwritten. 77 | 78 | Returns 79 | ------- 80 | :class:`pandas.DataFrame` 81 | The input data frame with the evidences filtered, with a new column 82 | depending on the `target_col` parameter. 83 | """ 84 | target_col = target_col or col 85 | datasets = to_set(datasets) 86 | resources = to_set(resources) 87 | 88 | def the_filter(evs): 89 | if isinstance(evs, dict): 90 | return {k: the_filter(v) for k, v in evs.items()} 91 | 92 | elif isinstance(evs, list): 93 | return [ 94 | ev 95 | for ev in evs 96 | if ( 97 | (not datasets or ev["dataset"] in datasets) 98 | and (not resources or ev["resource"] in resources) 99 | ) 100 | ] 101 | 102 | else: 103 | return evs 104 | 105 | df[target_col] = df[col].apply(the_filter) 106 | 107 | return df 108 | 109 | 110 | def from_evidences( 111 | df: pd.DataFrame, 112 | col: str = "evidences", 113 | ) -> pd.DataFrame: 114 | """ 115 | Recreate interaction records from an evidences column. 116 | 117 | Parameters 118 | ---------- 119 | df 120 | An OmniPath interaction data frame. 121 | col: 122 | Name of the column containing the evidences. 123 | 124 | Returns 125 | ------- 126 | :class:`pandas.DataFrame` 127 | The input data frame with its standard columns reconstructed based 128 | on the evidences in `col`. The records with no evidences from the 129 | specified datasets and resources will be removed. 130 | """ 131 | evs_df = pd.DataFrame({"evidences": df[col]}) 132 | evs_df = unnest_evidences(evs_df) 133 | evs_df["ce_positive"] = _curation_effort_from(evs_df, columns="positive") 134 | evs_df["ce_negative"] = _curation_effort_from(evs_df, columns="negative") 135 | evs_df["ce_directed"] = _curation_effort_from(evs_df, columns="directed") 136 | 137 | df["is_directed"] = evs_df["directed"].apply(bool) 138 | df["is_stimulation"] = evs_df["positive"].apply(bool) 139 | df["is_inhibition"] = evs_df["negative"].apply(bool) 140 | df["curation_effort"] = _curation_effort_from(evs_df) 141 | df["sources"] = _resources_from(evs_df) 142 | df["references"] = _references_from(evs_df) 143 | df["consensus_stimulation"] = evs_df["ce_positive"] >= evs_df["ce_negative"] 144 | df["consensus_inhibition"] = evs_df["ce_positive"] <= evs_df["ce_negative"] 145 | 146 | # recompile the consensus_direction 147 | opposite_direction = pd.DataFrame( 148 | { 149 | "source": df["source"], 150 | "target": df["target"], 151 | "ce_directed_opp": evs_df["ce_directed"], 152 | } 153 | ) 154 | df["ce_directed"] = evs_df["ce_directed"] 155 | df = df.merge( 156 | opposite_direction, 157 | on=["source", "target"], 158 | how="left", 159 | sort=False, 160 | ) 161 | df["consensus_direction"] = ( 162 | pd.isnull(df["ce_directed_opp"]) | df["ce_directed"] >= df["ce_directed_opp"] 163 | ) 164 | df.drop(columns=["ce_directed", "ce_directed_opp"], inplace=True) 165 | 166 | _count_resources(df) 167 | _count_references(df) 168 | _strip_resource_label_df(df, col="references") 169 | 170 | # drop records which remained without evidences 171 | df = df[df.sources.apply(bool)] 172 | 173 | return df 174 | 175 | 176 | def _ensure_unnested( 177 | df: pd.DataFrame, 178 | columns: Union[str, Iterable[str]] = EVIDENCES_KEYS, 179 | ) -> Tuple[pd.DataFrame, Tuple[str]]: 180 | """ 181 | Unnest a nested evidences column in a single column data frame. 182 | 183 | Used only in some specific contexts within this module, all are helper 184 | functions of `from_evidences`. 185 | 186 | Returns 187 | ------- 188 | A tuple of the input data frame and a tuple of column names. If the 189 | data frame does not consist of a single nested evidences columns it 190 | will be still subsetted to the specified columns. 191 | """ 192 | columns = list(to_set(columns)) 193 | evs_df = df[columns] 194 | 195 | if ( 196 | evs_df.shape[1] == 1 197 | and isinstance(evs_df.iloc[0, 0], dict) 198 | and not set(EVIDENCES_KEYS) - set(evs_df.iloc[0, 0].keys()) 199 | ): 200 | evs_df = unnest_evidences(evs_df, col=evs_df.columns[0]) 201 | columns = EVIDENCES_KEYS 202 | 203 | evs_df = evs_df[columns] 204 | 205 | return evs_df, columns 206 | 207 | 208 | def _from( 209 | df: pd.DataFrame, 210 | func: Callable, 211 | columns: Union[str, Iterable[str]] = EVIDENCES_KEYS, 212 | ) -> List[Union[int, str]]: 213 | """Compile a new column by applying a function on evidences.""" 214 | evs_df, columns = _ensure_unnested(df, columns) 215 | 216 | return [ 217 | func(ev for evs in rec for ev in evs) 218 | for rec in evs_df[columns].itertuples(index=False) 219 | ] 220 | 221 | 222 | def _curation_effort_from( 223 | df: pd.DataFrame, 224 | columns: Union[str, Iterable[str]] = EVIDENCES_KEYS, 225 | ) -> List[int]: 226 | """Curation effort from one or more evidences columns.""" 227 | return _from( 228 | df=df, 229 | func=lambda evs: sum(len(ev["references"]) + 1 for ev in evs), 230 | columns=columns, 231 | ) 232 | 233 | 234 | def _resources_from( 235 | df: pd.DataFrame, 236 | columns: Union[str, Iterable[str]] = EVIDENCES_KEYS, 237 | ) -> List[str]: 238 | """Resources from one or more evidences columns.""" 239 | 240 | def extract_resources(evs: tuple) -> str: 241 | return ";".join( 242 | sorted( 243 | { 244 | f"{ev['resource']}{'_' if ev['via'] else ''}{ev['via'] or ''}" 245 | for ev in evs 246 | } 247 | ) 248 | ) 249 | 250 | return _from(df=df, func=extract_resources, columns=columns) 251 | 252 | 253 | def _references_from( 254 | df: pd.DataFrame, 255 | columns: Union[str, Iterable[str]] = EVIDENCES_KEYS, 256 | prefix: bool = True, 257 | ) -> List[str]: 258 | """Get references from one or more evidences columns.""" 259 | 260 | def extract_references(evs: tuple) -> str: 261 | return ";".join( 262 | sorted( 263 | { 264 | f"{ev['resource'] + ':' if prefix else ''}{ref}" 265 | for ev in evs 266 | for ref in ev["references"] 267 | } 268 | ) 269 | ) 270 | 271 | return _from(df=df, func=extract_references, columns=columns) 272 | 273 | 274 | def only_from( 275 | df: pd.DataFrame, 276 | datasets: Optional[Union[str, Iterable[str]]] = None, 277 | resources: Optional[Union[str, Iterable[str]]] = None, 278 | ): 279 | """ 280 | Restrict interactions to the specified datasets and resources. 281 | 282 | The OmniPath interactions database fully integrates all attributes from all 283 | resources for each interaction. This comes with the advantage that 284 | interaction data frames are ready for use in most of the applications; 285 | however, it makes it impossible to know which of the resources and 286 | references support the direction or effect sign of the interaction. This 287 | information can be recovered from the "evidences" column. The "evidences" 288 | column preserves all the details about interaction provenances. In cases 289 | when you want to use a faithful copy of a certain resource or dataset, this 290 | function will help you do so. Still, in most of the applications the best 291 | is to use the interaction data as it is returned by the web service. 292 | 293 | Parameters 294 | ---------- 295 | df 296 | An OmniPath interaction data frame with "evidences" column. 297 | datasets 298 | A list of dataset names. If None, all datasets will be included. 299 | resources 300 | A list of resource names. If None, all resources will be included. 301 | 302 | Returns 303 | ------- 304 | The input data frame with the standard columns reconstructed from the 305 | evidences supported by the datasets and resources provided. The 306 | records with no evidences from the specified datasets or resources 307 | will be removed. 308 | """ 309 | tmp_col = "evidences_filtered_tmp" 310 | 311 | _must_have_evidences(df) 312 | 313 | df = filter_evidences(df, datasets, resources, target_col=tmp_col) 314 | df = from_evidences(df, tmp_col) 315 | df = df.drop(columns=tmp_col) 316 | 317 | return df 318 | -------------------------------------------------------------------------------- /omnipath/_core/query/_query_validator.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from enum import Enum, EnumMeta 3 | from typing import ( 4 | Any, 5 | Set, 6 | List, 7 | Union, 8 | Mapping, 9 | Iterable, 10 | Optional, 11 | Sequence, 12 | FrozenSet, 13 | ) 14 | import sys 15 | import json 16 | import logging 17 | 18 | from omnipath._core.utils._docs import d 19 | from omnipath._core.query._types import ( 20 | Int_t, 21 | Str_t, 22 | Bool_t, 23 | None_t, 24 | Strseq_t, 25 | License_t, 26 | Organism_t, 27 | ) 28 | from omnipath._core.utils._options import Options 29 | from omnipath.constants._constants import NoValue 30 | from omnipath.constants._pkg_constants import Key, Format 31 | from omnipath._core.downloader._downloader import Downloader 32 | 33 | 34 | def _to_string_set(item: Union[Any, Sequence[Any]]) -> Set[str]: 35 | """ 36 | Convert ``item`` to a `str` set. 37 | 38 | Parameters 39 | ---------- 40 | item 41 | Item to convert. If it's not a sequence, it will be made into singleton. 42 | 43 | Returns 44 | ------- 45 | :class:`set` 46 | Set of `str`. 47 | """ 48 | if isinstance(item, (str, Enum)) or not isinstance(item, Iterable): 49 | item = (item,) 50 | return set({str(i.value if isinstance(i, Enum) else i) for i in item}) 51 | 52 | 53 | class ServerValidatorMeta(EnumMeta, ABCMeta): # noqa: D101 54 | class Validator: 55 | """ 56 | Class that validates values for some parameter passed to the server. 57 | 58 | Parameters 59 | ---------- 60 | param 61 | Name of the parameter we're checking. Only used for informing the user. 62 | haystack 63 | Valid values for the ``paramter``. If `None`, no validation will be performed. 64 | doc 65 | Doctring specific to the ``param``. 66 | """ 67 | 68 | def __init__( 69 | self, 70 | param: str, 71 | haystack: Optional[Set[str]] = None, 72 | doc: Optional[str] = None, 73 | ): 74 | if isinstance(haystack, str): 75 | haystack = (haystack,) 76 | elif haystack is not None and not isinstance(haystack, Iterable): 77 | raise TypeError( 78 | f"Expected `haystack` for `{param}` to be either a " 79 | f"`str` or a `Sequence`, found `{type(haystack)}`." 80 | ) 81 | 82 | self._param = param.lower() 83 | self._haystack = haystack if haystack is None else frozenset(haystack) 84 | self._query_doc_ = None if not doc else doc # doc can also be `()` 85 | 86 | @property 87 | def haystack(self) -> Optional[FrozenSet[str]]: 88 | """Return the valid values for this parameter.""" 89 | return self._haystack 90 | 91 | def __call__(self, needle: Optional[Set[str]]) -> Optional[Set[str]]: 92 | """ 93 | Check whether ``needle`` is a valid value for :attr:`_param`. 94 | 95 | Parameters 96 | ---------- 97 | needle 98 | Needle to check. 99 | 100 | Returns 101 | ------- 102 | `None` if the ``needle`` was `None`, otherwise the ``needle`` as a `str` set, 103 | optionally intersected with :attr:`_haystack` if it is not `None`. 104 | 105 | Raises 106 | ------ 107 | ValueError 108 | If :attr:`haystack` is not `None` and no valid values were found. 109 | """ 110 | if needle is None: 111 | return None 112 | elif isinstance(needle, bool): 113 | needle = int(needle) 114 | elif isinstance(needle, Enum): 115 | needle = needle.value 116 | 117 | needle = _to_string_set(needle) 118 | if self.haystack is None: 119 | logging.debug( 120 | f"Unable to perform parameter validation for `{self._param}`, haystack is empty" 121 | ) 122 | return needle 123 | 124 | res = needle & self.haystack 125 | if not len(res): 126 | raise ValueError( 127 | f"No valid options found for parameter `{self._param}` in: `{sorted(needle)}`.\n" 128 | f"Valid options are: `{sorted(self.haystack)}`." 129 | ) 130 | elif len(res) < len(needle): 131 | logging.warning( 132 | f"Encountered invalid value(s) for `{self._param}`. " 133 | f"Remaining values are `{sorted(res)}`" 134 | ) 135 | 136 | return res 137 | 138 | def __new__(cls, clsname, superclasses, attributedict): # noqa: D102 139 | from omnipath import options 140 | 141 | endpoint = attributedict.pop( 142 | "__endpoint__", clsname.lower().replace("validator", "") 143 | ) 144 | use_default = True 145 | old_members = list(attributedict._member_names) 146 | old_values = cls._remove_old_members(attributedict) 147 | 148 | if endpoint is None: 149 | if len(old_members): 150 | raise ValueError( 151 | "If `__endpoint__` is `None`, no members must be specified." 152 | ) 153 | elif options.autoload: 154 | use_default = False 155 | with Options.from_options( 156 | options, 157 | num_retries=0, 158 | timeout=3.0, 159 | cache=None, 160 | progress_bar=False, 161 | chunk_size=2048, 162 | ) as opt: 163 | try: 164 | logging.debug("Attempting to construct classes from the server") 165 | res = Downloader(opt).maybe_download( 166 | f"{Key.QUERIES.s}/{endpoint}", 167 | callback=json.load, 168 | params={Key.FORMAT.s: Format.JSON.s}, 169 | ) 170 | 171 | if len({str(k).upper() for k in res.keys()}) != len(res): 172 | raise RuntimeError( 173 | f"After upper casing, key will not be unique: `{list(res.keys())}`." 174 | ) 175 | 176 | for k, value in res.items(): 177 | if ( 178 | isinstance(value, str) 179 | and "no such query available" in value 180 | ): 181 | raise RuntimeError(f"Invalid endpoint: `{endpoint}`.") 182 | 183 | key = str(k).upper() 184 | if value is None: 185 | attributedict[key] = cls.Validator(param=k) 186 | elif isinstance(value, Sequence): 187 | attributedict[key] = cls.Validator( 188 | param=k, haystack={str(v) for v in value} 189 | ) 190 | else: 191 | attributedict[key] = cls.Validator(param=k) 192 | except Exception as e: 193 | logging.debug( 194 | f"Unable to construct classes from the server. Reason: `{e}`" 195 | ) 196 | use_default = True 197 | 198 | if use_default: 199 | if endpoint is not None: 200 | logging.debug( 201 | f"Using predefined class: `{clsname}`." 202 | + ( 203 | "" 204 | if options.autoload 205 | else " Consider specifying `omnipath.options.autoload = True`" 206 | ) 207 | ) 208 | 209 | _ = cls._remove_old_members(attributedict) 210 | for k, v in zip(old_members, old_values): 211 | attributedict[k] = cls.Validator(param=k, doc=v) 212 | 213 | return super().__new__(cls, clsname, superclasses, attributedict) 214 | 215 | @classmethod 216 | def _remove_old_members(cls, attributedict) -> List[Any]: 217 | vals = [] 218 | for k in list(attributedict._member_names): 219 | vals.append(attributedict.pop(k, None)) 220 | attributedict._member_names = [] if sys.version_info[1] < 11 else {} 221 | 222 | return vals 223 | 224 | 225 | class AutoValidator(NoValue): # noqa: D101 226 | @property 227 | def valid(self) -> Optional[Set[str]]: 228 | """Return the valid values.""" 229 | return self.value.haystack 230 | 231 | @property 232 | def annotation(self) -> Mapping[str, type]: 233 | """Return the type annotations.""" 234 | return getattr(self, "__annotations__", {}).get(self.name, Any) 235 | 236 | @property 237 | def doc(self) -> Optional[str]: 238 | """Return the docstring.""" 239 | return getattr(self.value, "_query_doc_", None) 240 | 241 | @d.dedent 242 | def __call__(self, value: Union[str, Sequence[str]]) -> Optional[Set[str]]: 243 | """%(validate)s""" # noqa: D401 244 | return self.value(value) 245 | 246 | 247 | class QueryValidatorMixin(AutoValidator, metaclass=ServerValidatorMeta): # noqa: D101 248 | __endpoint__ = None 249 | 250 | 251 | class EnzsubValidator(QueryValidatorMixin): # noqa: D101 252 | DATABASES: Strseq_t = () 253 | ENZYME_SUBSTRATE: Str_t = () 254 | ENZYMES: Strseq_t = () 255 | FIELDS: Strseq_t = () 256 | FORMAT: Str_t = () 257 | GENESYMBOLS: Bool_t = () 258 | HEADER: Str_t = () 259 | LICENSE: License_t = () 260 | LIMIT: Int_t = () 261 | MODIFICATION: Str_t = () 262 | ORGANISMS: Organism_t = () 263 | PARTNERS: Strseq_t = () 264 | PASSWORD: Str_t = () 265 | RESIDUES: Strseq_t = () 266 | RESOURCES: Strseq_t = () 267 | SUBSTRATES: Strseq_t = () 268 | TYPES: Strseq_t = () 269 | 270 | 271 | class InteractionsValidator(QueryValidatorMixin): # noqa: D101 272 | DATABASES: Strseq_t = () 273 | DATASETS: Strseq_t = () 274 | DIRECTED: Bool_t = () 275 | DOROTHEA_LEVELS: Strseq_t = () 276 | DOROTHEA_METHODS: Strseq_t = () 277 | ENTITY_TYPES: Strseq_t = () 278 | FIELDS: Strseq_t = () 279 | FORMAT: Str_t = () 280 | GENESYMBOLS: Bool_t = () 281 | HEADER: Str_t = () 282 | LICENSE: License_t = () 283 | LIMIT: Int_t = () 284 | LOOPS: Bool_t = () 285 | ORGANISMS: Organism_t = () 286 | PARTNERS: Strseq_t = () 287 | PASSWORD: Str_t = () 288 | RESOURCES: Strseq_t = () 289 | SIGNED: Bool_t = () 290 | SOURCE_TARGET: Bool_t = () 291 | SOURCES: Strseq_t = () 292 | TARGETS: Strseq_t = () 293 | TFREGULONS_LEVELS: Strseq_t = () 294 | TFREGULONS_METHODS: Strseq_t = () 295 | TYPES: Strseq_t = () 296 | 297 | 298 | class ComplexesValidator(QueryValidatorMixin): # noqa: D101 299 | DATABASES: Strseq_t = () 300 | FIELDS: Strseq_t = () 301 | FORMAT: Str_t = () 302 | HEADER: Str_t = () 303 | LICENSE: License_t = () 304 | LIMIT: Int_t = () 305 | PASSWORD: Str_t = () 306 | PROTEINS: Strseq_t = () 307 | RESOURCES: Strseq_t = () 308 | 309 | 310 | class AnnotationsValidator(QueryValidatorMixin): # noqa: D101 311 | DATABASES: Strseq_t = () 312 | ENTITY_TYPES: Strseq_t = () 313 | FIELDS: Strseq_t = () 314 | FORMAT: Str_t = () 315 | GENESYMBOLS: Bool_t = () 316 | HEADER: Str_t = () 317 | LICENSE: License_t = () 318 | LIMIT: Int_t = () 319 | PASSWORD: Str_t = () 320 | PROTEINS: Strseq_t = () 321 | RESOURCES: Strseq_t = () 322 | 323 | 324 | class IntercellValidator(QueryValidatorMixin): # noqa: D101 325 | ASPECT: Str_t = () 326 | CATEGORIES: Str_t = () 327 | CAUSALITY: Str_t = () 328 | DATABASES: Strseq_t = () 329 | ENTITY_TYPES: Str_t = () 330 | FIELDS: Strseq_t = () 331 | FORMAT: Str_t = () 332 | HEADER: None_t = () 333 | LICENSE: License_t = () 334 | LIMIT: Int_t = () 335 | PARENT: Str_t = () 336 | PASSWORD: Str_t = () 337 | PLASMA_MEMBRANE_PERIPHERAL: Bool_t = () 338 | PLASMA_MEMBRANE_TRANSMEMBRANE: Bool_t = () 339 | PMP: Bool_t = () 340 | PMTM: Bool_t = () 341 | PROTEINS: Strseq_t = () 342 | REC: Bool_t = () 343 | RECEIVER: Strseq_t = () 344 | RESOURCES: Strseq_t = () 345 | SCOPE: Str_t = () 346 | SEC: Bool_t = () 347 | SECRETED: Bool_t = () 348 | SOURCE: Str_t = () 349 | TOPOLOGY: Str_t = () 350 | TRANS: Bool_t = () 351 | TRANSMITTER: Bool_t = () 352 | 353 | 354 | __all__ = [ 355 | "EnzsubValidator", 356 | "InteractionsValidator", 357 | "ComplexesValidator", 358 | "AnnotationsValidator", 359 | "IntercellValidator", 360 | ] 361 | -------------------------------------------------------------------------------- /tests/test_interactions.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from urllib.parse import urljoin, quote_plus 3 | import json 4 | 5 | import pytest 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from omnipath import options 11 | from omnipath.constants import Organism, InteractionDataset 12 | from omnipath._core.requests import Intercell 13 | from omnipath.constants._pkg_constants import Key, Endpoint 14 | from omnipath._core.requests.interactions._utils import import_intercell_network 15 | from omnipath._core.requests.interactions._interactions import ( 16 | TFmiRNA, 17 | Dorothea, 18 | OmniPath, 19 | TFtarget, 20 | CollecTRI, 21 | KinaseExtra, 22 | LigRecExtra, 23 | PathwayExtra, 24 | AllInteractions, 25 | Transcriptional, 26 | PostTranslational, 27 | miRNA, 28 | lncRNAmRNA, 29 | ) 30 | 31 | options.fallback_urls = () 32 | 33 | 34 | class TestInteractions: 35 | def test_all_excluded_excluded(self): 36 | with pytest.raises( 37 | ValueError, match=r"After excluding `\d+` datasets, none were left." 38 | ): 39 | AllInteractions.get(exclude=list(InteractionDataset)) 40 | 41 | def test_invalid_excluded_datasets(self): 42 | with pytest.raises( 43 | ValueError, match=r"Invalid value `foo` for `InteractionDataset`." 44 | ): 45 | AllInteractions.get(exclude="foo") 46 | 47 | def test_graph_empty(self): 48 | with pytest.raises(ValueError, match=r"No data were retrieved. Please"): 49 | AllInteractions.graph(pd.DataFrame()) 50 | 51 | def test_graph_source_target(self): 52 | interaction = pd.DataFrame( 53 | { 54 | "source": ["alpha", "beta", "gamma"], 55 | "target": [0, 1, 0], 56 | "source_genesymbol": "bar", 57 | "target_genesymbol": "baz", 58 | } 59 | ) 60 | src, tgt = AllInteractions._get_source_target_cols(interaction) 61 | 62 | assert src == "source_genesymbol" 63 | assert tgt == "target_genesymbol" 64 | 65 | src, tgt = AllInteractions._get_source_target_cols( 66 | interaction[ 67 | interaction.columns.difference( 68 | ["source_genesymbol", "target_genesymbol"] 69 | ) 70 | ] 71 | ) 72 | 73 | assert src == "source" 74 | assert tgt == "target" 75 | 76 | @pytest.mark.parametrize( 77 | "interaction", 78 | [ 79 | PathwayExtra, 80 | KinaseExtra, 81 | LigRecExtra, 82 | miRNA, 83 | TFmiRNA, 84 | lncRNAmRNA, 85 | Dorothea, 86 | TFtarget, 87 | OmniPath, 88 | PostTranslational, 89 | ], 90 | ) 91 | def test_resources( 92 | self, cache_backup, interaction, interaction_resources: bytes, requests_mock 93 | ): 94 | url = urljoin(options.url, Endpoint.RESOURCES.s) 95 | data = json.loads(interaction_resources) 96 | requests_mock.register_uri( 97 | "GET", f"{url}?format=json", content=interaction_resources 98 | ) 99 | 100 | resources = interaction.resources() 101 | for resource in resources: 102 | assert { 103 | InteractionDataset(d) 104 | for d in data[resource][Key.QUERIES.s][ 105 | interaction._query_type.endpoint 106 | ][Key.DATASETS.s] 107 | } & interaction()._datasets 108 | assert requests_mock.called_once 109 | 110 | def test_invalid_organism(self): 111 | with pytest.raises( 112 | ValueError, match=r"Invalid value `foo` for `Organism`. Valid options are:" 113 | ): 114 | AllInteractions.get(**{Key.ORGANISM.s: "foo"}) 115 | 116 | @pytest.mark.parametrize( 117 | "interaction", 118 | [ 119 | PathwayExtra, 120 | KinaseExtra, 121 | LigRecExtra, 122 | miRNA, 123 | TFmiRNA, 124 | lncRNAmRNA, 125 | Dorothea, 126 | TFtarget, 127 | OmniPath, 128 | PostTranslational, 129 | AllInteractions, 130 | CollecTRI, 131 | ], 132 | ) 133 | def test_interaction_get( 134 | self, cache_backup, interaction, interaction_resources: bytes, requests_mock 135 | ): 136 | url = urljoin(options.url, interaction._query_type.endpoint) 137 | datasets = quote_plus( 138 | ",".join(sorted(d.value for d in interaction()._datasets)) 139 | ) 140 | if getattr(interaction, "_strict_evidences", False): 141 | pytest.skip("Test not yet implemented") 142 | fields = "fields=curation_effort%2Creferences%2Csources" 143 | if interaction is AllInteractions: 144 | fields += "%2Ctype" 145 | 146 | requests_mock.register_uri( 147 | "GET", 148 | f"{url}?datasets={datasets}&{fields}&format=tsv", 149 | content=interaction_resources, 150 | ) 151 | 152 | _ = interaction.get() 153 | 154 | assert requests_mock.called_once 155 | 156 | @pytest.mark.parametrize("organisms", list(Organism)) 157 | def test_valid_organism( 158 | self, cache_backup, organisms, requests_mock, interaction_resources 159 | ): 160 | url = urljoin(options.url, AllInteractions._query_type.endpoint) 161 | datasets = quote_plus(",".join(sorted(d.value for d in InteractionDataset))) 162 | requests_mock.register_uri( 163 | "GET", 164 | f"{url}?datasets={datasets}&fields=curation_effort%2Creferences%2Csources%2Ctype&" 165 | f"format=json&organisms={organisms.code}", 166 | content=interaction_resources, 167 | ) 168 | 169 | x = AllInteractions.get(organism=organisms, format="json") 170 | y = AllInteractions.get(organisms=organisms.value, format="json") 171 | 172 | assert requests_mock.called_once 173 | pd.testing.assert_frame_equal(x, y) 174 | 175 | def test_dorothea_params(self): 176 | params = Dorothea.params() 177 | 178 | assert "dorothea_levels" in params 179 | assert "dorothea_methods" in params 180 | assert "tfregulons_levels" not in params 181 | assert "tfregulons_methods" not in params 182 | assert Key.DATASETS.s not in params 183 | 184 | def test_tftarget_params(self): 185 | params = TFtarget.params() 186 | 187 | assert "dorothea_levels" not in params 188 | assert "dorothea_methods" not in params 189 | assert "tfregulons_levels" in params 190 | assert "tfregulons_methods" in params 191 | assert Key.DATASETS.s not in params 192 | 193 | @pytest.mark.parametrize( 194 | "interaction", [OmniPath, Transcriptional, AllInteractions] 195 | ) 196 | def test_transcriptional_params(self, interaction): 197 | params = interaction.params() 198 | 199 | assert "dorothea_levels" in params 200 | assert "dorothea_methods" in params 201 | assert "tfregulons_levels" in params 202 | assert "tfregulons_methods" in params 203 | assert Key.DATASETS.s not in params 204 | 205 | @pytest.mark.parametrize( 206 | "interaction", 207 | [PathwayExtra, KinaseExtra, LigRecExtra, miRNA, TFmiRNA, lncRNAmRNA], 208 | ) 209 | def test_rest_params(self, interaction): 210 | params = interaction.params() 211 | 212 | assert "dorothea_levels" not in params 213 | assert "dorothea_methods" not in params 214 | assert "tfregulons_levels" not in params 215 | assert "tfregulons_methods" not in params 216 | assert Key.DATASETS.s not in params 217 | 218 | 219 | class TestUtils: 220 | def test_import_intercell_network( 221 | self, 222 | cache_backup, 223 | requests_mock, 224 | interactions_data: bytes, 225 | transmitters_data: bytes, 226 | receivers_data: bytes, 227 | import_intercell_result: pd.DataFrame, 228 | ): 229 | interactions_url = urljoin(options.url, AllInteractions._query_type.endpoint) 230 | intercell_url = urljoin(options.url, Intercell._query_type.endpoint) 231 | 232 | # interactions 233 | requests_mock.register_uri( 234 | "GET", 235 | f"{interactions_url}?datasets=omnipath&dorothea_levels=A&fields=curation_effort%2C" 236 | f"references%2Csources%2Ctype&format=tsv", 237 | content=interactions_data, 238 | ) 239 | # transmitter 240 | requests_mock.register_uri( 241 | "GET", 242 | f"{intercell_url}?categories=ligand&causality=trans&format=tsv&scope=generic", 243 | content=transmitters_data, 244 | ) 245 | # receiver 246 | requests_mock.register_uri( 247 | "GET", 248 | f"{intercell_url}?categories=receptor&causality=rec&format=tsv&scope=generic", 249 | content=receivers_data, 250 | ) 251 | 252 | res = import_intercell_network( 253 | include=InteractionDataset.OMNIPATH, 254 | transmitter_params={"categories": "ligand"}, 255 | interactions_params={"dorothea_levels": "A"}, 256 | receiver_params={"categories": "receptor"}, 257 | ) 258 | 259 | sortby = [ 260 | "source", 261 | "target", 262 | "category_intercell_source", 263 | "category_intercell_target", 264 | "database_intercell_source", 265 | "database_intercell_target", 266 | ] 267 | 268 | for df in (res, import_intercell_result): 269 | df.sort_values(sortby, inplace=True) 270 | df.reset_index(drop=True, inplace=True) 271 | 272 | assert len(requests_mock.request_history) == 3 273 | np.testing.assert_array_equal(res.shape, import_intercell_result.shape) 274 | np.testing.assert_array_equal(res.index, import_intercell_result.index) 275 | np.testing.assert_array_equal(res.columns, import_intercell_result.columns) 276 | # TODO(michalk8): broken in `pandas=2.0` 277 | # np.testing.assert_array_equal(res.dtypes, import_intercell_result.dtypes) 278 | np.testing.assert_array_equal( 279 | res.values[~pd.isnull(res)], 280 | import_intercell_result.values[~pd.isnull(import_intercell_result)], 281 | ) 282 | np.testing.assert_array_equal( 283 | pd.isnull(res), pd.isnull(import_intercell_result) 284 | ) 285 | 286 | @pytest.mark.parametrize("which", ["interactions", "receivers", "transmitters"]) 287 | def test_intercell_empty( 288 | self, 289 | which: str, 290 | cache_backup, 291 | requests_mock, 292 | interactions_data: bytes, 293 | transmitters_data: bytes, 294 | receivers_data: bytes, 295 | ): 296 | interactions_url = urljoin(options.url, AllInteractions._query_type.endpoint) 297 | intercell_url = urljoin(options.url, Intercell._query_type.endpoint) 298 | 299 | handle = StringIO() 300 | pd.DataFrame({"is_directed": []}).to_csv(handle, sep="\t", index=False) 301 | empty_data = bytes(handle.getvalue(), encoding="utf-8") 302 | 303 | if which == "interactions": 304 | interactions_data = empty_data 305 | elif which == "receivers": 306 | receivers_data = empty_data 307 | elif which == "transmitters": 308 | transmitters_data = empty_data 309 | else: 310 | raise AssertionError(which) 311 | 312 | # interactions 313 | requests_mock.register_uri( 314 | "GET", 315 | f"{interactions_url}?datasets=omnipath,pathwayextra&fields=curation_effort%2C" 316 | f"references%2Csources%2Ctype&format=tsv&resources=CellPhoneDB", 317 | content=interactions_data, 318 | ) 319 | # transmitter 320 | requests_mock.register_uri( 321 | "GET", 322 | f"{intercell_url}?categories=ligand&causality=trans&format=tsv&scope=generic", 323 | content=transmitters_data, 324 | ) 325 | # receiver 326 | requests_mock.register_uri( 327 | "GET", 328 | f"{intercell_url}?categories=receptor&causality=rec&format=tsv&scope=generic", 329 | content=receivers_data, 330 | ) 331 | 332 | with pytest.raises(ValueError, match=rf"No {which} were retrieved. Please"): 333 | import_intercell_network( 334 | include=(InteractionDataset.OMNIPATH, InteractionDataset.PATHWAY_EXTRA), 335 | transmitter_params={"categories": "ligand"}, 336 | interactions_params={"resources": "CellPhoneDB"}, 337 | receiver_params={"categories": "receptor"}, 338 | ) 339 | --------------------------------------------------------------------------------