├── tests
    ├── __init__.py
    ├── _data
    │   ├── receivers.pickle
    │   ├── interactions.pickle
    │   ├── transmitters.pickle
    │   └── import_intercell_result.pickle
    ├── test_misc.py
    ├── test_orthology.py
    ├── test_cache.py
    ├── test_query.py
    ├── test_options.py
    ├── test_compare_R.py
    ├── test_downloader.py
    ├── conftest.py
    └── test_interactions.py
├── .python-version
├── omnipath
    ├── _core
    │   ├── __init__.py
    │   ├── downloader
    │   │   ├── __init__.py
    │   │   └── _downloader.py
    │   ├── cache
    │   │   ├── __init__.py
    │   │   └── _cache.py
    │   ├── query
    │   │   ├── __init__.py
    │   │   ├── _types.py
    │   │   ├── _query.py
    │   │   └── _query_validator.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── _docs.py
    │   │   ├── _homologene.py
    │   │   ├── _orthology.py
    │   │   ├── _static.py
    │   │   └── _options.py
    │   └── requests
    │   │   ├── __init__.py
    │   │   ├── interactions
    │   │       ├── __init__.py
    │   │       ├── _json.py
    │   │       ├── _utils.py
    │   │       └── _evidences.py
    │   │   ├── _complexes.py
    │   │   ├── _intercell.py
    │   │   ├── _utils.py
    │   │   └── _annotations.py
    ├── _misc
    │   ├── __init__.py
    │   ├── utils.py
    │   └── dtypes.py
    ├── requests.py
    ├── interactions.py
    ├── constants
    │   ├── __init__.py
    │   ├── _pkg_constants.py
    │   └── _constants.py
    └── __init__.py
├── docs
    ├── source
    │   ├── _templates
    │   │   └── autosummary
    │   │   │   ├── base.rst
    │   │   │   └── class.rst
    │   ├── installation.rst
    │   ├── references.rst
    │   ├── index.rst
    │   ├── release_notes.rst
    │   ├── api.rst
    │   ├── _static
    │   │   └── css
    │   │   │   └── custom.css
    │   └── conf.py
    ├── Makefile
    └── make.bat
├── .bumpversion.cfg
├── .readthedocs.yml
├── .coveragerc
├── LICENSE
├── .github
    └── workflows
    │   ├── lint.yml
    │   └── ci.yml
├── .pre-commit-config.yaml
├── .gitignore
├── README.rst
├── tox.ini
└── pyproject.toml


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | >=3.9
2 | 


--------------------------------------------------------------------------------
/omnipath/_core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/omnipath/_misc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/omnipath/_core/downloader/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/omnipath/requests.py:
--------------------------------------------------------------------------------
1 | from omnipath._core.requests import *  # noqa: F401 F403
2 | 


--------------------------------------------------------------------------------
/omnipath/_core/cache/__init__.py:
--------------------------------------------------------------------------------
1 | from omnipath._core.cache._cache import clear_cache
2 | 


--------------------------------------------------------------------------------
/omnipath/_core/query/__init__.py:
--------------------------------------------------------------------------------
1 | from omnipath._core.query._query import QueryType
2 | 


--------------------------------------------------------------------------------
/omnipath/interactions.py:
--------------------------------------------------------------------------------
1 | from omnipath._core.requests.interactions import *  # noqa: F401 F403
2 | 


--------------------------------------------------------------------------------
/tests/_data/receivers.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/omnipath/HEAD/tests/_data/receivers.pickle


--------------------------------------------------------------------------------
/omnipath/constants/__init__.py:
--------------------------------------------------------------------------------
1 | from omnipath.constants._constants import License, Organism, InteractionDataset
2 | 


--------------------------------------------------------------------------------
/tests/_data/interactions.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/omnipath/HEAD/tests/_data/interactions.pickle


--------------------------------------------------------------------------------
/tests/_data/transmitters.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/omnipath/HEAD/tests/_data/transmitters.pickle


--------------------------------------------------------------------------------
/omnipath/_core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from omnipath._core.utils._options import options
2 | import omnipath._core.utils._static as static
3 | 


--------------------------------------------------------------------------------
/tests/_data/import_intercell_result.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saezlab/omnipath/HEAD/tests/_data/import_intercell_result.pickle


--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/base.rst:
--------------------------------------------------------------------------------
1 | :github_url: {{ fullname | escape }}
2 | 
3 | {% extends "!autosummary/base.rst" %}
4 | 
5 | .. http://www.sphinx-doc.org/en/stable/ext/autosummary.html#customizing-templates
6 | 


--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 1.0.11
3 | commit = True
4 | tag = True
5 | files = pyproject.toml omnipath/__init__.py
6 | parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)
7 | serialize = {major}.{minor}.{patch}
8 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/__init__.py:
--------------------------------------------------------------------------------
1 | from omnipath._core.requests._request import Enzsub, SignedPTMs
2 | from omnipath._core.requests._complexes import Complexes
3 | from omnipath._core.requests._intercell import Intercell
4 | from omnipath._core.requests._annotations import Annotations
5 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sphinx:
 4 |     builder: html
 5 |     configuration: docs/source/conf.py
 6 |     fail_on_warning: true
 7 | 
 8 | formats:
 9 | -   htmlzip
10 | -   pdf
11 | 
12 | build:
13 |     image: latest
14 | 
15 | python:
16 |     version: 3.8
17 |     install:
18 |     -   method: pip
19 |         path: .
20 |         extra_requirements:
21 |         -   docs
22 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [paths]
 2 | source =
 3 |     omnipath
 4 |     */site-packages/omnipath
 5 | 
 6 | [run]
 7 | branch = true
 8 | parallel = true
 9 | source = omnipath
10 | omit = */__init__.py
11 | 
12 | [report]
13 | exclude_lines =
14 |     \#.*pragma:\s*no.?cover
15 | 
16 |     if __name__ == .__main__.
17 | 
18 |     ^\s*raise AssertionError\b
19 |     ^\s*raise NotImplementedError\b
20 |     ^\s*return NotImplemented\b
21 | show_missing = true
22 | precision = 2
23 | 


--------------------------------------------------------------------------------
/omnipath/_core/query/_types.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, Optional, Sequence
 2 | 
 3 | try:
 4 |     from typing import Literal
 5 | except ImportError:
 6 |     from typing_extensions import Literal
 7 | 
 8 | 
 9 | Strseq_t = Optional[Union[str, Sequence[str]]]
10 | Organism_t = Literal["human", "mouse", "rat"]
11 | License_t = Literal["academic", "commercial"]
12 | Bool_t = Optional[bool]
13 | Str_t = Optional[str]
14 | Int_t = Optional[int]
15 | None_t = type(None)
16 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | Omnipath requires Python version >= 3.7 to run.
 4 | 
 5 | PyPI
 6 | ~~~~
 7 | Omnipath is also available on PyPI::
 8 | 
 9 |     pip install omnipath
10 | 
11 | Additionally, :mod:`omnipath` may sometimes require :mod:`networkx` to create an interaction graph.
12 | This dependency can be installed as::
13 | 
14 |     pip install omnipath[graph]
15 | 
16 | Development Version
17 | ~~~~~~~~~~~~~~~~~~~
18 | To stay up-to-date with the newest version, run::
19 | 
20 |     pip install git+https://github.com/saezlab/omnipath
21 | 


--------------------------------------------------------------------------------
/docs/source/references.rst:
--------------------------------------------------------------------------------
 1 | .. |br| raw:: html
 2 | 
 3 |   <br/>
 4 | 
 5 | References
 6 | ----------
 7 | .. [OmniPath] Türei, D., Valdeolivas, A. *et al.* (2020), |br|
 8 |     *Integrated intra- and intercellular signaling knowledge for multicellular omics analysis*, |br|
 9 |     `bioRxiv 2020.08.03.221242 <https://doi.org/10.1101/2020.08.03.221242>`__.
10 | 
11 | .. [OmniPath16] Türei, D., Korcsmáros, T. & Saez-Rodriguez, J. (2016), |br|
12 |     *OmniPath: guidelines and gateway for literature-curated signaling pathway resources.*, |br|
13 |     `Nat Methods 13, 966–967 <https://doi.org/10.1038/nmeth.4077>`__.
14 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/interactions/__init__.py:
--------------------------------------------------------------------------------
 1 | from omnipath._core.requests.interactions._utils import import_intercell_network
 2 | from omnipath._core.requests.interactions._evidences import (
 3 |     only_from,
 4 |     from_evidences,
 5 |     filter_evidences,
 6 |     unnest_evidences,
 7 | )
 8 | from omnipath._core.requests.interactions._interactions import (
 9 |     TFmiRNA,
10 |     Dorothea,
11 |     OmniPath,
12 |     TFtarget,
13 |     CollecTRI,
14 |     KinaseExtra,
15 |     LigRecExtra,
16 |     PathwayExtra,
17 |     SmallMolecule,
18 |     AllInteractions,
19 |     Transcriptional,
20 |     PostTranslational,
21 |     miRNA,
22 |     lncRNAmRNA,
23 | )
24 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile clean
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | clean:
23 | 	@rm -rf $(BUILDDIR)/*
24 | 	@rm -rf $(SOURCEDIR)/api/
25 | 


--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
 1 | :github_url: {{ fullname | escape  }}
 2 | 
 3 | {{ fullname | escape | underline}}
 4 | 
 5 | .. currentmodule:: {{ module }}
 6 | 
 7 | .. add toctree option to make autodoc generate the pages
 8 | 
 9 | .. autoclass:: {{ objname }}
10 | 
11 |    {% block attributes %}
12 |    {% if attributes %}
13 |    .. rubric:: Attributes
14 | 
15 |    .. autosummary::
16 |       :toctree: .
17 |    {% for item in attributes %}
18 |       ~{{ fullname }}.{{ item }}
19 |    {%- endfor %}
20 |    {% endif %}
21 |    {% endblock %}
22 | 
23 |    {% block methods %}
24 |    {% if methods %}
25 |    .. rubric:: Methods
26 | 
27 |    .. autosummary::
28 |       :toctree: .
29 |    {% for item in methods %}
30 |       {%- if item != '__init__' %}
31 |       ~{{ fullname }}.{{ item }}
32 |       {%- endif -%}
33 |    {%- endfor %}
34 |    {% endif %}
35 |    {% endblock %}
36 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/omnipath/_misc/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Set, Iterable
 2 | 
 3 | 
 4 | def to_set(value: Any) -> Set:
 5 |     """Make sure `value` is a set, convert it if necessary.
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     value
10 |         Any kind of object.
11 | 
12 |     Returns
13 |     -------
14 |     `Set`
15 |         The `value` itself if it's already a set; a set of single element
16 |         if `value` is a simple type; a set of the elements in `value`
17 |         if `value` is iterable; empty set if `value` is None.
18 | 
19 |     Raises
20 |     ------
21 |     TypeError
22 |         If `value` is not an iterable and not hashable, or if it's an iterable
23 |         containing non hashable elements.
24 |     """
25 |     if isinstance(value, Set):
26 |         return value
27 | 
28 |     elif value is None:
29 |         return set()
30 | 
31 |     elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
32 |         return set(value)
33 | 
34 |     else:
35 |         return {value}
36 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/interactions/_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pandas as pd
 4 | 
 5 | 
 6 | def convert_json_col(df: pd.DataFrame, col: str) -> pd.DataFrame:
 7 |     """
 8 |     Convert a column of JSON encoded strings to nested Python objects.
 9 | 
10 |     Parameters
11 |     ----------
12 |     df
13 |         An OmniPath interaction data frame.
14 |     col
15 |         Name of a column with JSON encoded strings.
16 | 
17 |     Returns
18 |     -------
19 |     :class:`pandas.DataFrame`
20 |         The input data frame with the column converted to nested Python
21 |         objects, i.e. lists or dicts. If the column does not exist, the
22 |         data frame is returned unmodified.
23 |     """
24 |     if col in df.columns:
25 |         df[col] = df[col].apply(json.loads)
26 | 
27 |     return df
28 | 
29 | 
30 | def _json_cols_hook(df: pd.DataFrame) -> pd.DataFrame:
31 |     """Handle the JSON columns in post-processing, if there is any."""
32 |     for col in ("extra_attrs", "evidences"):
33 |         df = convert_json_col(df, col)
34 | 
35 |     return df
36 | 


--------------------------------------------------------------------------------
/omnipath/__init__.py:
--------------------------------------------------------------------------------
 1 | from omnipath._core.cache import clear_cache
 2 | from omnipath._core.utils import (  # from_first in isort is important here
 3 |     static,
 4 |     options,
 5 | )
 6 | from omnipath._core.downloader._downloader import _get_server_version
 7 | import omnipath.requests as requests
 8 | import omnipath.constants as constants
 9 | import omnipath.interactions as interactions
10 | 
11 | __author__ = ", ".join(["Michal Klein", "Dénes Türei"])
12 | __maintainer__ = ", ".join(["Michal Klein", "Dénes Türei"])
13 | __version__ = "1.0.11"
14 | __email__ = "turei.denes@gmail.com"
15 | 
16 | try:
17 |     from importlib_metadata import version  # Python < 3.8
18 | except ImportError:
19 |     from importlib.metadata import version  # Python = 3.8
20 | 
21 | from packaging.version import parse
22 | 
23 | __full_version__ = parse(version(__name__))
24 | __full_version__ = (
25 |     f"{__version__}+{__full_version__.local}" if __full_version__.local else __version__
26 | )
27 | __server_version__ = _get_server_version(options)
28 | 
29 | del parse, version, _get_server_version
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 | Copyright (c) 2020 Saez Lab - https://saezlab.org/
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on:
 4 |     schedule:
 5 |     -   cron: 00 00 * * 1  # every Monday at 00:00
 6 |     push:
 7 |         branches: [main]
 8 |     pull_request:
 9 |         branches: [main]
10 | 
11 | jobs:
12 |     lint:
13 |         runs-on: ubuntu-latest
14 |         steps:
15 |         -   uses: actions/checkout@v4
16 |         -   name: Set up Python 3.13
17 |             uses: actions/setup-python@v5
18 |             with:
19 |                 python-version: '3.13'
20 | 
21 |         -   name: Install uv
22 |             uses: astral-sh/setup-uv@v5
23 |             with:
24 |                 enable-cache: true
25 | 
26 |         -   name: Set up Python ${{ matrix.python }}
27 |             run: uv python install ${{ matrix.python-version }}
28 | 
29 |         -   name: Install dependencies
30 |             run: |
31 |                 uv sync --all-extras
32 | 
33 |         -   uses: actions/cache@v4
34 |             with:
35 |                 path: ~/.cache/pre-commit
36 |                 key: precommit-${{ env.pythonLocation }}-${{ hashFiles('**/.pre-commit-config.yaml') }}
37 | 
38 |         -   name: Lint
39 |             run: |
40 |                 uv run tox -e lint
41 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | |PyPI| |Downloads| |CI| |Docs| |Coverage|
 2 | 
 3 | OmniPath
 4 | ========
 5 | 
 6 | This package is a Python equivalent of an R package `OmnipathR`_ for accessing web service of
 7 | `OmniPath`_ database developed by `Saezlab`_.
 8 | 
 9 | .. toctree::
10 |     :caption: General
11 |     :maxdepth: 2
12 |     :hidden:
13 | 
14 |     installation
15 |     api
16 |     release_notes
17 |     references
18 | 
19 | .. |PyPI| image:: https://img.shields.io/pypi/v/omnipath.svg
20 |     :target: https://pypi.org/project/omnipath
21 |     :alt: PyPI
22 | 
23 | .. |Downloads| image:: https://pepy.tech/badge/omnipath
24 |     :target: https://pepy.tech/project/omnipath
25 |     :alt: Downloads
26 | 
27 | .. |CI| image:: https://img.shields.io/github/actions/workflow/status/saezlab/omnipath/ci.yml?branch=master
28 |     :target: https://github.com/saezlab/omnipath/actions?query=workflow:CI
29 |     :alt: CI
30 | 
31 | .. |Docs|  image:: https://img.shields.io/readthedocs/omnipath
32 |     :target: https://omnipath.readthedocs.io/en/latest
33 |     :alt: Documentation
34 | 
35 | .. |Coverage| image:: https://codecov.io/gh/saezlab/omnipath/branch/master/graph/badge.svg
36 |     :target: https://codecov.io/gh/saezlab/omnipath
37 |     :alt: Coverage
38 | 
39 | .. _Saezlab : https://saezlab.org/
40 | .. _OmniPathR : https://github.com/saezlab/omnipathR
41 | 


--------------------------------------------------------------------------------
/docs/source/release_notes.rst:
--------------------------------------------------------------------------------
 1 | Release Notes
 2 | =============
 3 | 
 4 | .. role:: small
 5 | 
 6 | Version 1.0
 7 | -----------
 8 | 
 9 | 1.0.5 :small:`2021-16-08`
10 | ~~~~~~~~~~~~~~~~~~~~~~~~~
11 | - Setting :attr:`omnipath.options.cache` to ``None`` will now disable it (use ``'memory'`` instead)
12 | - Fix writing empty values into cache
13 | - Fix memory cache not copying data before storing it
14 | - Fix various :mod:`pandas` warnings
15 | - Remove redundant step from CI
16 | 
17 | 1.0.4 :small:`2020-27-12`
18 | ~~~~~~~~~~~~~~~~~~~~~~~~~
19 | - Fix recursion error
20 | - Remove duplicated ``PostTranslational`` class
21 | - Add interactions tests
22 | 
23 | 1.0.3 :small:`2020-08-12`
24 | ~~~~~~~~~~~~~~~~~~~~~~~~~
25 | - Add :class:`omnipath.interactions.PostTranslational`
26 | - Add possibility to download all :class:`omnipath.requests.Annotations`
27 | 
28 | 1.0.2 :small:`2020-29-11`
29 | ~~~~~~~~~~~~~~~~~~~~~~~~~
30 | - Fix small bug when converting boolean values
31 | - Fix typos
32 | - Add option to create interaction graphs
33 | 
34 | 1.0.1 :small:`2020-29-11`
35 | ~~~~~~~~~~~~~~~~~~~~~~~~~
36 | - Fix bug of not correctly passing datasets in interactions
37 | - Fix the way the progress bar is getting content size
38 | - Add comparison tests with OmnipathR
39 | 
40 | 1.0.0 :small:`2020-23-11`
41 | ~~~~~~~~~~~~~~~~~~~~~~~~~
42 | - Fix minor bugs
43 | - Add options improvements
44 | - Add tests
45 | 


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | Import Omnipath as::
 5 | 
 6 |     import omnipath as op
 7 | 
 8 | Requests
 9 | ~~~~~~~~
10 | 
11 | .. module::omnipath.requests
12 | .. currentmodule:: omnipath
13 | 
14 | .. autosummary::
15 |     :toctree: api
16 | 
17 |     requests.Annotations
18 |     requests.Complexes
19 |     requests.Enzsub
20 |     requests.Intercell
21 |     requests.SignedPTMs
22 | 
23 | Interactions
24 | ~~~~~~~~~~~~
25 | 
26 | .. module::omnipath.interactions
27 | .. currentmodule:: omnipath
28 | 
29 | .. autosummary::
30 |     :toctree: api
31 | 
32 |     interactions.AllInteractions
33 |     interactions.Dorothea
34 |     interactions.KinaseExtra
35 |     interactions.LigRecExtra
36 |     interactions.OmniPath
37 |     interactions.PathwayExtra
38 |     interactions.PostTranslational
39 |     interactions.TFmiRNA
40 |     interactions.TFtarget
41 |     interactions.Transcriptional
42 |     interactions.lncRNAmRNA
43 |     interactions.miRNA
44 |     interactions.import_intercell_network
45 | 
46 | Other
47 | ~~~~~
48 | 
49 | Constants
50 | ---------
51 | 
52 | .. module::omnipath.constants
53 | .. currentmodule:: omnipath
54 | 
55 | .. autosummary::
56 |     :toctree: api
57 | 
58 |     constants.InteractionDataset
59 |     constants.License
60 |     constants.Organism
61 | 
62 | Options
63 | -------
64 | 
65 | .. module::omnipath
66 | .. currentmodule:: omnipath
67 | 
68 | .. autosummary::
69 |     :toctree: api
70 | 
71 |     omnipath.clear_cache
72 |     omnipath.options
73 | 


--------------------------------------------------------------------------------
/tests/test_misc.py:
--------------------------------------------------------------------------------
 1 | from pandas.testing import assert_frame_equal
 2 | import pandas as pd
 3 | 
 4 | from omnipath._misc import dtypes
 5 | 
 6 | 
 7 | class TestMisc:
 8 |     def test_auto_dtype(self):
 9 |         inp = pd.DataFrame(
10 |             {
11 |                 "a": ["1", "2", "3"],
12 |                 "b": ["1", "2", 3],
13 |                 "c": ["1", "2", "3.14"],
14 |                 "d": ["1", "0", "1"],
15 |                 "e": ["Y", "N", "Y"],
16 |                 "f": [2.3, 4.7, 3.1],
17 |                 "g": [False, True, True],
18 |                 "h": [1, 0, 1],
19 |                 "i": [1.0, 2.0, 3.0],
20 |                 "j": ["1.0", "2.0", "3.0"],
21 |                 "k": ["1.0", "0.0", "1.0"],
22 |                 "l": pd.Series([1, 2, 3], dtype="int8"),
23 |                 "m": pd.Series([1.09, 2.51, 3.33], dtype="float32"),
24 |             }
25 |         )
26 | 
27 |         exp = pd.DataFrame(
28 |             {
29 |                 "a": [1, 2, 3],
30 |                 "b": [1, 2, 3],
31 |                 "c": [1.0, 2.0, 3.14],
32 |                 "d": [True, False, True],
33 |                 "e": [True, False, True],
34 |                 "f": [2.3, 4.7, 3.1],
35 |                 "g": [False, True, True],
36 |                 "h": [True, False, True],
37 |                 "i": [1, 2, 3],
38 |                 "j": [1, 2, 3],
39 |                 "k": [True, False, True],
40 |                 "l": pd.Series([1, 2, 3], dtype="int8"),
41 |                 "m": pd.Series([1.09, 2.51, 3.33], dtype="float32"),
42 |             }
43 |         )
44 | 
45 |         out = dtypes.auto_dtype(inp)
46 | 
47 |         assert_frame_equal(exp, out)
48 | 


--------------------------------------------------------------------------------
/omnipath/_core/utils/_docs.py:
--------------------------------------------------------------------------------
 1 | from docrep import DocstringProcessor
 2 | 
 3 | _general_get = """
 4 | Perform the query.
 5 | 
 6 | Parameters
 7 | ----------
 8 | kwargs
 9 |     Parameters of the request. For more information about available values, see :meth:`params`.
10 | 
11 | Returns
12 | -------
13 | :class:`pandas.DataFrame`
14 |     The result of this query."""
15 | _interactions_datasets = """
16 |     - :attr:`omnipath.constants.InteractionDataset.OMNIPATH`
17 |     - :attr:`omnipath.constants.InteractionDataset.PATHWAY_EXTRA`
18 |     - :attr:`omnipath.constants.InteractionDataset.KINASE_EXTRA`
19 |     - :attr:`omnipath.constants.InteractionDataset.LIGREC_EXTRA`
20 |     - :attr:`omnipath.constants.InteractionDataset.COLLECTRI`
21 |     - :attr:`omnipath.constants.InteractionDataset.DOROTHEA`
22 |     - :attr:`omnipath.constants.InteractionDataset.TF_TARGET`
23 |     - :attr:`omnipath.constants.InteractionDataset.TF_MIRNA`
24 |     - :attr:`omnipath.constants.InteractionDataset.TF_REGULONS`
25 |     - :attr:`omnipath.constants.InteractionDataset.MIRNA_TARGET`
26 |     - :attr:`omnipath.constants.InteractionDataset.LNCRNA_MRNA`"""
27 | _validate = """
28 | Validate the ``value`` for the :attr:`param`.
29 | 
30 | Parameters
31 | ----------
32 | value
33 |     Value to validate.
34 | 
35 | Returns
36 | -------
37 |     The valid values."""
38 | _query_resources = """
39 |     Return the available resources for this query."""
40 | _query_params = """
41 |     Return the available values for each parameter, if available."""
42 | 
43 | d = DocstringProcessor(
44 |     general_get=_general_get,
45 |     interaction_datasets=_interactions_datasets,
46 |     validate=_validate,
47 |     query_params=_query_params,
48 |     query_resources=_query_resources,
49 | )
50 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | fail_fast: false
 4 | default_language_version:
 5 |     python: python3
 6 | default_stages:
 7 | -   pre-commit
 8 | -   pre-push
 9 | minimum_pre_commit_version: 3.0.0
10 | repos:
11 | -   repo: https://github.com/psf/black
12 |     rev: 25.1.0
13 |     hooks:
14 |     -   id: black
15 |         additional_dependencies: [toml]
16 | -   repo: https://github.com/timothycrosley/isort
17 |     rev: 6.0.1
18 |     hooks:
19 |     -   id: isort
20 |         additional_dependencies: [toml]
21 | -   repo: https://github.com/pre-commit/pre-commit-hooks
22 |     rev: v5.0.0
23 |     hooks:
24 |     -   id: check-docstring-first
25 |     -   id: end-of-file-fixer
26 |     -   id: check-added-large-files
27 |     -   id: mixed-line-ending
28 |     -   id: trailing-whitespace
29 |         exclude: ^.bumpversion.cfg$
30 |     -   id: check-merge-conflict
31 |     -   id: check-case-conflict
32 |     -   id: check-symlinks
33 |     -   id: check-yaml
34 |     -   id: check-ast
35 |     -   id: requirements-txt-fixer
36 | -   repo: https://github.com/pycqa/flake8
37 |     rev: 7.1.2
38 |     hooks:
39 |     -   id: flake8
40 |         additional_dependencies: [flake8-docstrings, flake8-comprehensions, flake8-bugbear]
41 | -   repo: https://github.com/asottile/blacken-docs
42 |     rev: 1.19.1
43 |     hooks:
44 |     -   id: blacken-docs
45 |         additional_dependencies: [black]
46 | -   repo: https://github.com/asottile/pyupgrade
47 |     rev: v3.19.1
48 |     hooks:
49 |     -   id: pyupgrade
50 |         args: [--py3-plus, --py36-plus]
51 | -   repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
52 |     rev: v2.14.0
53 |     hooks:
54 |     -   id: pretty-format-yaml
55 |         args: [--autofix, --indent, '4']
56 | -   repo: https://github.com/pre-commit/pygrep-hooks
57 |     rev: v1.10.0
58 |     hooks:
59 |     -   id: python-no-eval
60 |     -   id: python-use-type-annotations
61 |     -   id: python-check-blanket-noqa
62 |     -   id: rst-backticks
63 |     -   id: rst-directive-colons
64 |     -   id: rst-inline-touching-normal
65 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | /* ReadTheDocs theme colors */
 2 | 
 3 | 
 4 | .wy-nav-top { background-color: #f07e44 }
 5 | .wy-side-nav-search { background-color: #79859E; }
 6 | .wy-nav-content { max-width: 840px }
 7 | .wy-side-nav-search input[type="text"] { border-width: 0 }
 8 | 
 9 | .highlight { background: rgba(50, 140, 193, 0.15); }
10 | 
11 | 
12 | /* Custom classes */
13 | 
14 | 
15 | .small { font-size:40% }
16 | .smaller, .pr, .noteversion { font-size:70% }
17 | .noteversion::after { content: "/" }
18 | 
19 | 
20 | /* Code: literals and links */
21 | 
22 | 
23 | .rst-content tt.literal,
24 | .rst-content code.literal {
25 |     color: #404040;
26 | }
27 | /* slim font weight for non-link code */
28 | .rst-content tt:not(.xref),
29 | .rst-content code:not(.xref),
30 | .rst-content *:not(a) > tt.xref,
31 | .rst-content *:not(a) > code.xref {
32 |     font-weight: normal;
33 | }
34 | 
35 | 
36 | /* Just one box for annotation code for a less noisy look */
37 | 
38 | 
39 | .rst-content .annotation {
40 |     padding: 2px 5px;
41 |     background-color: white;
42 |     border: 1px solid #e1e4e5;
43 | }
44 | .rst-content .annotation tt,
45 | .rst-content .annotation code {
46 |     padding: 0 0;
47 |     background-color: transparent;
48 |     border: 0 solid transparent;
49 | }
50 | 
51 | 
52 | /* Parameter lists */
53 | 
54 | 
55 | /* Mimick rubric style used for other headings */
56 | .rst-content dl:not(.docutils) dl > dt {
57 |     font-weight: bold;
58 |     background: none transparent;
59 |     border-left: none;
60 |     margin: 0 0 12px;
61 |     padding: 3px 0 0;
62 |     font-size: 105%;
63 | }
64 | 
65 | html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) > dt {
66 |     color: #005A87;
67 |     border-top: solid 3px #005A87;
68 |     background: #8EBAE5;
69 | }
70 | 
71 | .rst-content .viewcode-back, .rst-content .viewcode-link {
72 |     color: #005A87;
73 | }
74 | 
75 | /*class="longtable docutils align-default"*/
76 | table.longtable td {
77 |     white-space: normal!important;
78 | }
79 | 
80 | /* Parameters contain <strong> parts and don’t need bold font */
81 | .rst-content dl.field-list dl > dt { font-weight: unset }
82 | /* Add colon between return tuple element name and type */
83 | .rst-content dl:not(.docutils) dl > dt .classifier::before { content: ' : ' }
84 | 


--------------------------------------------------------------------------------
/omnipath/constants/_pkg_constants.py:
--------------------------------------------------------------------------------
 1 | from os import environ
 2 | from typing import Tuple, Optional
 3 | from pathlib import Path
 4 | 
 5 | from omnipath.constants import License, Organism
 6 | from omnipath.constants._constants import PrettyEnumMixin
 7 | 
 8 | try:
 9 |     from typing import final
10 | except ImportError:
11 |     from typing_extensions import final  # noqa: F401
12 | 
13 | 
14 | class DEFAULT_FIELD(PrettyEnumMixin):
15 |     """Default values for ``field`` parameter."""
16 | 
17 |     ENZSUB = ("sources", "references", "curation_effort")
18 |     INTERACTIONS = ("sources", "references", "curation_effort")
19 | 
20 | 
21 | class Format(PrettyEnumMixin):
22 |     """Response format types."""
23 | 
24 |     JSON = "json"
25 |     TABLE = "tab"
26 |     TEXT = "text"
27 |     TSV = "tsv"
28 | 
29 | 
30 | class DEFAULT_OPTIONS:
31 |     """Default options for :attr:`omnipath.options`."""
32 | 
33 |     url: str = "https://omnipathdb.org"
34 |     fallback_urls: Tuple[str] = ("http://no-tls.omnipathdb.org",)
35 |     static_url: str = "http://no-tls.static.omnipathdb.org/resources"
36 |     license: Optional[License] = None
37 |     num_retries: int = 3
38 |     timeout: int = 600
39 |     chunk_size: int = 8196
40 |     cache_dir: Path = Path.home() / ".cache" / "omnipathdb"
41 |     progress_bar: bool = True
42 |     # for testing purposes
43 |     autoload: bool = environ.get("OMNIPATH_AUTOLOAD", "") == ""
44 |     convert_dtypes: bool = True
45 | 
46 | 
47 | class Endpoint(PrettyEnumMixin):
48 |     """Endpoints of :attr:`omnipath.options.url` that are sometimes accessed."""
49 | 
50 |     RESOURCES = "resources"
51 |     ABOUT = "about"
52 |     INFO = "info"  # not used
53 | 
54 | 
55 | # TODO: refactor me
56 | class Key(PrettyEnumMixin):  # noqa: D101
57 |     ORGANISM = "organism"
58 |     GENESYMBOLS = "genesymbols"
59 |     FORMAT = "format"
60 |     DATASETS = "datasets"
61 |     LICENSE = "license"
62 |     QUERIES = "queries"
63 |     FIELDS = "fields"
64 |     PASSWORD = "password"
65 |     LOOPS = "loops"
66 |     INTERCELL_SUMMARY = "intercell_summary"
67 |     GENERIC_CATEGORIES = "generic_categories"
68 |     CATEGORY = "category"
69 |     PARENT = "parent"
70 | 
71 | 
72 | DEFAULT_ORGANISM = Organism.HUMAN  # default organism to access
73 | DEFAULT_FORMAT = Format.TSV
74 | UNKNOWN_SERVER_VERSION = (
75 |     "UNKNOWN"  # server version to save under __server_version__ if we can't get it
76 | )
77 | 


--------------------------------------------------------------------------------
/tests/test_orthology.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from omnipath._core.utils._orthology import translate_column
 4 | from omnipath._core.utils._homologene import download_homologene
 5 | 
 6 | 
 7 | class TestHomologene:
 8 |     def test_download_homologene(self):
 9 |         homologene = download_homologene(9606, 10090)
10 | 
11 |         expected_shape = (17312, 2)
12 |         actual_shape = homologene.shape
13 | 
14 |         assert expected_shape == actual_shape
15 | 
16 |         expected_columns = ["source", "target"]
17 |         actual_columns = homologene.columns
18 | 
19 |         assert all(expected_columns == actual_columns)
20 | 
21 | 
22 | class TestOrthologyConversion:
23 |     def test_complex_genes(self):
24 |         df = pd.DataFrame(
25 |             {
26 |                 "symbol": [
27 |                     "CSF2RA_CSF2RB",  # one to many
28 |                     "IFNL3_IFNLR1_IL10RB",  # 3 subunits
29 |                     "HCST_KLRK1",  # one subunit missing
30 |                     "CD8A_CD8B",  # 1 to 1
31 |                     "IL4",  # 1 to 1 simple protein
32 |                 ]
33 |             }
34 |         )
35 | 
36 |         default = translate_column(
37 |             df,
38 |             column="symbol",
39 |             id_type="genesymbol",
40 |             target_organism=10090,
41 |         )
42 |         assert all(default["symbol"] == ["Cd8a_Cd8b1", "Il4"])
43 | 
44 |         to_many = translate_column(
45 |             df,
46 |             column="symbol",
47 |             id_type="genesymbol",
48 |             target_organism=10090,
49 |             replace=True,
50 |             keep_untranslated=False,
51 |             one_to_many=2,
52 |         )
53 |         expected = {
54 |             "Csf2ra_Csf2rb",
55 |             "Csf2ra_Csf2rb2",
56 |             "Ifnl2_Ifnlr1_Il10rb",
57 |             "Ifnl3_Ifnlr1_Il10rb",
58 |             "Cd8a_Cd8b1",
59 |             "Il4",
60 |         }
61 | 
62 |         assert to_many.shape == (6, 1)
63 |         assert set(to_many["symbol"]) == expected
64 | 
65 |         keep_missing = translate_column(
66 |             df,
67 |             column="symbol",
68 |             id_type="genesymbol",
69 |             target_organism=10090,
70 |             replace=False,
71 |             keep_untranslated=True,
72 |             one_to_many=2,
73 |         )
74 |         untranslated = keep_missing["symbol"].isin(["HCST_KLRK1"])
75 |         assert untranslated.any()
76 |         assert keep_missing[untranslated]["orthology_target"].isna().all()
77 | 


--------------------------------------------------------------------------------
/omnipath/_core/utils/_homologene.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from omnipath._core.downloader._downloader import Downloader
 4 | 
 5 | # NOTE: this downloads homologene data from github
 6 | # Either way this is not a great solution, as homologene was last updated in 2014...
 7 | RAW_TAXA_URL = (
 8 |     "https://raw.githubusercontent.com/oganm/homologene/master/data-raw/taxData.tsv"
 9 | )
10 | HOMOLOGENE_URL = (
11 |     "https://raw.githubusercontent.com/oganm/homologene/master/data-raw/homologene2.tsv"
12 | )
13 | 
14 | 
15 | def _get_homologene_raw():
16 |     dwnld = Downloader()
17 |     homologene = (
18 |         dwnld.maybe_download(
19 |             HOMOLOGENE_URL,
20 |             callback=pd.read_table,
21 |             is_final=True,
22 |         )
23 |         .astype(str)
24 |         .rename(
25 |             columns={
26 |                 "Gene.Symbol": "genesymbol",
27 |                 "Gene.ID": "gene_id",
28 |                 "Taxonomy": "ncbi_taxid",
29 |                 "HID": "hid",
30 |             }
31 |         )
32 |         .set_index("hid")
33 |     )
34 |     return homologene
35 | 
36 | 
37 | def show_homologene():
38 |     """Show the homologene taxa data"""
39 |     dwnld = Downloader()
40 |     return dwnld.maybe_download(
41 |         RAW_TAXA_URL,
42 |         callback=pd.read_table,
43 |         is_final=True,
44 |     )
45 | 
46 | 
47 | def download_homologene(source_organism, target_organism, id_type="genesymbol"):
48 |     """
49 |     Download homologene information for a given source and target organism.
50 | 
51 |     Parameters
52 |     ----------
53 |     source_organism : int, str
54 |         Source organism NCBI Taxonomy ID.
55 |     target_organism : int, str
56 |         Target organism NCBI Taxonomy ID.
57 |     id_type : str
58 |         Type of ID to use for homology conversion.
59 |         Can be one of 'genesymbol', 'gene_id'.
60 | 
61 |     Returns
62 |     -------
63 |     A pandas DataFrame with homologene information.
64 | 
65 |     """
66 |     homologene = _get_homologene_raw()
67 |     s_taxid = str(source_organism)
68 |     t_taxid = str(target_organism)
69 | 
70 |     source_df = homologene[(homologene["ncbi_taxid"] == s_taxid)][[id_type]]
71 |     target_df = homologene[(homologene["ncbi_taxid"] == t_taxid)][[id_type]]
72 | 
73 |     homologene = pd.merge(
74 |         source_df,
75 |         target_df,
76 |         right_index=True,
77 |         left_index=True,
78 |         suffixes=("_source", "_target"),
79 |         how="inner",
80 |     )
81 |     homologene = homologene.reset_index().rename(
82 |         {f"{id_type}_source": "source", f"{id_type}_target": "target"}, axis=1
83 |     )
84 |     homologene = homologene[["source", "target"]]
85 | 
86 |     return homologene
87 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | docs/source/api
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | # pytype static type analyzer
136 | .pytype/
137 | 
138 | # Cython debug symbols
139 | cython_debug/
140 | 
141 | # Pycharm stuff
142 | .idea
143 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/_complexes.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Union, Mapping, Iterable, Optional
 2 | import logging
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from omnipath._core.query import QueryType
 7 | from omnipath._core.requests._request import OrganismGenesymbolsRemover
 8 | from omnipath.constants._pkg_constants import final
 9 | 
10 | 
11 | @final
12 | class Complexes(OrganismGenesymbolsRemover):
13 |     """Request information about protein complexes from [OmniPath]_."""
14 | 
15 |     __string__ = frozenset(
16 |         {
17 |             "name",
18 |             "components",
19 |             "components_genesymbols",
20 |             "stoichiometry",
21 |             "references",
22 |             "identifiers",
23 |         }
24 |     )
25 |     __categorical__ = frozenset({"sources"})
26 | 
27 |     _query_type = QueryType.COMPLEXES
28 | 
29 |     def _resource_filter(self, data: Mapping[str, Any], **_) -> bool:
30 |         return True
31 | 
32 |     @classmethod
33 |     def complex_genes(
34 |         cls,
35 |         genes: Union[str, Iterable[str]],
36 |         complexes: Optional[pd.DataFrame] = None,
37 |         total_match: bool = False,
38 |     ) -> pd.DataFrame:
39 |         """
40 |         Get all the molecular complexes for a given ``genes``.
41 | 
42 |         This function returns all the molecular complexes where an input set of genes participate. User can choose
43 |         to retrieve every complex where any of the input genes participate or just retrieve these complexes where
44 |         all the genes in input set participate together.
45 | 
46 |         Parameters
47 |         ----------
48 |         genes
49 |             The genes for which complexes will be retrieved (hgnc format).
50 |         complexes
51 |             Complex data from :meth:`get`. If `None`, new request will be made.
52 |         total_match
53 |             If `True`, get only complexes where all the genes participate together, otherwise get complexes
54 |             where any of the genes participate.
55 | 
56 |         Returns
57 |         -------
58 |         :class:`pandas.DataFrame`
59 |             The filtered ``complexes``.
60 |         """
61 |         if isinstance(genes, str):
62 |             genes = (genes,)
63 |         genes = tuple(set(genes))
64 |         if not len(genes):
65 |             raise ValueError("No genes have been selected.")
66 | 
67 |         if complexes is None:
68 |             logging.info("Fetching complexes from the server")
69 |             complexes = cls.get()
70 |         if not isinstance(complexes, pd.DataFrame):
71 |             raise TypeError(
72 |                 f"Expected `complexes` to be of type `pandas.DataFrame`, found `{type(complexes)}`."
73 |             )
74 | 
75 |         if complexes.empty:
76 |             logging.warning("Complexes are empty")
77 |             return complexes
78 | 
79 |         col = "components_genesymbols"
80 |         if col not in complexes:
81 |             raise KeyError(f"Unable to find `{col}` in `{complexes.columns}`.")
82 | 
83 |         reduction = all if total_match else any
84 | 
85 |         return complexes.loc[
86 |             complexes[col]
87 |             .str.split("_")
88 |             .apply(lambda needles: reduction(n in genes for n in needles))
89 |         ].reset_index(drop=True)
90 | 
91 | 
92 | __all__ = [Complexes]
93 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | from datetime import datetime
10 | 
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | from pathlib import Path
15 | import sys
16 | 
17 | HERE = Path(__file__).parent
18 | sys.path.insert(0, str(HERE.parent.parent))
19 | import omnipath
20 | 
21 | needs_sphinx = "3.0"
22 | 
23 | # -- Project information -----------------------------------------------------
24 | 
25 | project = "omnipath"
26 | author = omnipath.__author__
27 | copyright = f"{datetime.now():%Y}, {author}"
28 | 
29 | # The full version, including alpha/beta/rc tags
30 | release = f"master ({omnipath.__version__})"
31 | 
32 | 
33 | # -- General configuration ---------------------------------------------------
34 | 
35 | # Add any Sphinx extension module names here, as strings. They can be
36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
37 | # ones.
38 | extensions = [
39 |     "sphinx.ext.autodoc",
40 |     "sphinx.ext.napoleon",
41 |     "sphinx.ext.viewcode",
42 |     "sphinx_autodoc_typehints",
43 |     "sphinx.ext.intersphinx",
44 |     "sphinx.ext.autosummary",
45 |     "sphinx_last_updated_by_git",
46 | ]
47 | intersphinx_mapping = dict(
48 |     python=("https://docs.python.org/3", None),
49 |     pandas=("https://pandas.pydata.org/pandas-docs/stable/", None),
50 |     networkx=("https://networkx.github.io/documentation/stable/", None),
51 | )
52 | 
53 | # Add any paths that contain templates here, relative to this directory.
54 | templates_path = ["_templates"]
55 | source_suffix = [".rst"]
56 | master_doc = "index"
57 | pygments_style = "sphinx"
58 | 
59 | # List of patterns, relative to source directory, that match files and
60 | # directories to ignore when looking for source files.
61 | # This pattern also affects html_static_path and html_extra_path.
62 | exclude_patterns = []
63 | 
64 | 
65 | # -- Options for HTML output -------------------------------------------------
66 | 
67 | # The theme to use for HTML and HTML Help pages.  See the documentation for
68 | # a list of builtin themes.
69 | #
70 | autosummary_generate = True
71 | autodoc_member_order = "alphabetical"
72 | autodoc_typehints = "signature"
73 | autodoc_docstring_signature = True
74 | autodoc_follow_wrapped = False
75 | napoleon_google_docstring = False
76 | napoleon_numpy_docstring = True
77 | napoleon_include_init_with_doc = False
78 | napoleon_use_rtype = True
79 | napoleon_use_param = True
80 | napoleon_custom_sections = [("Params", "Parameters")]
81 | todo_include_todos = False
82 | 
83 | # Add any paths that contain custom static files (such as style sheets) here,
84 | # relative to this directory. They are copied after the builtin static files,
85 | # so a file named "default.css" will overwrite the builtin "default.css".
86 | html_theme = "sphinx_rtd_theme"
87 | html_static_path = ["_static"]
88 | html_theme_options = dict(navigation_depth=4, logo_only=True)
89 | html_show_sphinx = False
90 | 
91 | 
92 | def setup(app):
93 |     app.add_css_file("css/custom.css")
94 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | |PyPI| |Downloads| |CI| |Docs| |Coverage|
 2 | 
 3 | Python client for the OmniPath web service
 4 | ==========================================
 5 | 
 6 | Installation
 7 | ------------
 8 | You can install ``omnipath`` by running::
 9 | 
10 |     pip install omnipath
11 | 
12 | The OmniPath database
13 | ---------------------
14 | 
15 | OmniPath is a database of:
16 | 
17 | * Protein-protein, TF target and miRNA-mRNA interactions
18 | * Enzyme-PTM relationships
19 | * Protein complexes
20 | * Annotations of protein function, structure, localization, expression
21 | * Intercellular communication roles of proteins
22 | 
23 | To learn more about OmniPath, you can visit its `website`_, or read our recent `preprint`_
24 | or our first `paper from 2016`_, especially its `supplementary material`_.
25 | 
26 | The Python client
27 | -----------------
28 | The data is available through a web service hosted on this `website`_.
29 | This repository hosts a Python package for querying this web service and
30 | downloading data into data frames or dictionaries.
31 | 
32 | 
33 | The Python package for OmniPath is pypath, isn't it?
34 | ----------------------------------------------------
35 | `pypath`_ is a tool for building the OmniPath databases in a fully customizable way.
36 | We recommend to use pypath if you want to:
37 | 
38 | * Tailor the database building to your needs
39 | * Include resources not available in the public web service
40 | * Use the rich Python APIs available for the database objects
41 | * Make sure the data from the original sources is the most up-to-date
42 | * Use the methods in ``pypath.inputs`` to download data from resources
43 | * Use the various extra tools in ``pypath.utils``, e.g. for identifier
44 |   translation, homology translation, querying Gene Ontology, working with
45 |   protein sequences, processing BioPAX, etc.
46 | 
47 | Is there an R client?
48 | ---------------------
49 | Yes there is. The R/Bioconductor package ``OmnipathR`` you may find on `GitHub <https://github.com/saezlab/OmnipathR>`_
50 | or in `Bioconductor <http://bioconductor.org/packages/3.12/bioc/html/OmnipathR.html>`_.
51 | The R client currently supports all features of the web service.
52 | 
53 | Cytoscape
54 | ---------
55 | We even have a `Cytoscape plug-in`_.
56 | With the plug-in you are able to load networks into Cytoscape and access
57 | certain (not all) annotations of the proteins.
58 | 
59 | .. |PyPI| image:: https://img.shields.io/pypi/v/omnipath.svg
60 |     :target: https://pypi.org/project/omnipath
61 |     :alt: PyPI
62 | 
63 | .. |Downloads| image:: https://pepy.tech/badge/omnipath
64 |     :target: https://pepy.tech/project/omnipath
65 |     :alt: Downloads
66 | 
67 | .. |CI| image:: https://img.shields.io/github/actions/workflow/status/saezlab/omnipath/ci.yml?branch=master
68 |     :target: https://github.com/saezlab/omnipath/actions?query=workflow:CI
69 |     :alt: CI
70 | 
71 | .. |Coverage| image:: https://codecov.io/gh/saezlab/omnipath/branch/master/graph/badge.svg
72 |     :target: https://codecov.io/gh/saezlab/omnipath
73 |     :alt: Coverage
74 | 
75 | .. |Docs|  image:: https://img.shields.io/readthedocs/omnipath
76 |     :target: https://omnipath.readthedocs.io/en/latest
77 |     :alt: Documentation
78 | 
79 | .. _website : https://omnipathdb.org/
80 | .. _Cytoscape plug-in : https://apps.cytoscape.org/apps/omnipath
81 | .. _pypath : https://github.com/saezlab/pypath
82 | .. _preprint : https://www.biorxiv.org/content/10.1101/2020.08.03.221242v2
83 | .. _paper from 2016 : https://www.nature.com/articles/nmeth.4077
84 | .. _supplementary material : https://static-content.springer.com/esm/art%3A10.1038%2Fnmeth.4077/MediaObjects/41592_2016_BFnmeth4077_MOESM495_ESM.pdf
85 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/_intercell.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Tuple, Mapping, Iterable, Optional, Sequence
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from omnipath._core.query import QueryType
 6 | from omnipath._core.query._types import Strseq_t
 7 | from omnipath._core.requests._request import OrganismGenesymbolsRemover
 8 | from omnipath.constants._pkg_constants import Key, Format, final
 9 | from omnipath._core.query._query_validator import _to_string_set
10 | 
11 | 
12 | @final
13 | class Intercell(OrganismGenesymbolsRemover):
14 |     """
15 |     Request `intercell` annotations from [OmniPath]_.
16 | 
17 |     Imports the [OmniPath]_ inter-cellular communication role annotation
18 |     `database <https://omnipathdb.org/intercell>`__.
19 | 
20 |     It provides information on the roles in inter-cellular signaling, e.g. if a protein is a ligand, a receptor,
21 |     an extracellular matrix (ECM) component, etc.
22 |     """
23 | 
24 |     __categorical__ = frozenset(
25 |         {"category", "parent", "database", "scope", "aspect", "source", "entity_type"}
26 |     )
27 | 
28 |     _query_type = QueryType.INTERCELL
29 | 
30 |     def _resource_filter(
31 |         self,
32 |         data: Mapping[str, Any],
33 |         generic_categories: Optional[Sequence[str]] = None,
34 |         **kwargs,
35 |     ) -> bool:
36 |         return generic_categories is None or _to_string_set(
37 |             data.get(Key.GENERIC_CATEGORIES.s, set())
38 |         ) & _to_string_set(generic_categories)
39 | 
40 |     @classmethod
41 |     def resources(cls, generic_categories: Strseq_t = None) -> Tuple[str]:
42 |         """
43 |         Return the resources falling into the specified generic categories.
44 | 
45 |         Parameters
46 |         ----------
47 |         generic_categories
48 |             For valid options, see :attr:`generic_categories`.
49 | 
50 |         Returns
51 |         -------
52 |         tuple
53 |             The filtered resources according to ``generic_categories``.
54 |         """
55 |         if generic_categories is None:
56 |             return super().resources()
57 | 
58 |         if isinstance(generic_categories, str):
59 |             generic_categories = (generic_categories,)
60 |         if not isinstance(generic_categories, (Sequence, Iterable)):
61 |             raise TypeError(
62 |                 f"Expected generic categories to be a `str` or an `Iterable`, "
63 |                 f"found `{type(generic_categories).__name__}`."
64 |             )
65 | 
66 |         if not len(generic_categories):
67 |             raise ValueError("No generic categories have been selected.")
68 | 
69 |         return super().resources(**{Key.GENERIC_CATEGORIES.s: generic_categories})
70 | 
71 |     @classmethod
72 |     def categories(cls) -> Tuple[str]:
73 |         """Return categories from the `intercell` database."""
74 |         return cls()._get_metadata(Key.CATEGORY.s)
75 | 
76 |     @classmethod
77 |     def generic_categories(cls) -> Tuple[str]:
78 |         """Return generic categories from the `intercell` database."""
79 |         return cls()._get_metadata(Key.PARENT.s)
80 | 
81 |     def _get_metadata(self, col: Optional[str]) -> Tuple[str]:
82 |         """Return unique summary data from column ``col``."""
83 |         metadata = self._downloader.maybe_download(
84 |             Key.INTERCELL_SUMMARY.s,
85 |             params={Key.FORMAT.s: Format.JSON.s},
86 |             callback=self._json_reader,
87 |         )
88 | 
89 |         if col not in metadata.columns:
90 |             raise KeyError(f"Column `{col}` not found in `{list(metadata.columns)}`.")
91 | 
92 |         return tuple(sorted(pd.unique(metadata[col].astype(str))))
93 | 
94 | 
95 | __all__ = [Intercell]
96 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | [flake8]
  2 | # D104 Missing docstring in public package
  3 | # F401 ... imported but unused
  4 | per-file-ignores =
  5 |     */__init__.py: D104, F401,
  6 |     tests/* : D101, D102, D103, D104
  7 |     tests/conftest.py: D101, D102, D103, E402
  8 | # D100 Missing docstring in public module
  9 | # D107 Missing docstring in __init__
 10 | # W503 line break before binary operator
 11 | # D105 Missing docstring in magic method
 12 | # E203 whitespace before ':'
 13 | # D400 First line should end with a period
 14 | # false positive:
 15 | # B024 ... is an abstract base class, but it has no abstract methods
 16 | ignore = D100,D107,W503,D105,E203,D400,B024
 17 | exclude =
 18 |     .git,
 19 |     __pycache__,
 20 |     docs/*
 21 | max_line_length = 120
 22 | filename = *.py
 23 | 
 24 | [gh]
 25 | python =
 26 |     3.9: py39
 27 |     3.10: py310
 28 |     3.11: py311
 29 |     3.12: py312
 30 |     3.13: py313, covclean, lint, coverage, readme
 31 | 
 32 | [pytest]
 33 | python_files = test_*.py
 34 | testpaths = tests/
 35 | xfail_strict = true
 36 | requests_mock_case_sensitive = true
 37 | 
 38 | [tox]
 39 | min_version=3.20.0
 40 | isolated_build = true
 41 | skip_missing_interpreters = true
 42 | envlist =
 43 |     covclean
 44 |     lint
 45 |     py{39,310,311,312,313}
 46 |     coverage
 47 |     readme
 48 |     docs
 49 | 
 50 | [testenv]
 51 | platform =
 52 |     linux: linux
 53 |     macos: (macos|osx|darwin)
 54 | base_python =
 55 |     py39: python3.9
 56 |     py310: python3.10
 57 |     py311: python3.11
 58 |     py312: python3.12
 59 |     py313: python3.13
 60 | deps =
 61 |     pytest
 62 |     pytest-mock
 63 |     pytest-cov
 64 |     pytest-socket
 65 |     requests-mock>=1.9.2
 66 |     numpy
 67 |     networkx
 68 | #    {3.10-linux}: rpy2<4
 69 | # log level ERROR because we print out info from fixture and -s also prints useless stuff from R
 70 | setenv =
 71 |     OMNIPATH_AUTOLOAD = false
 72 | passenv = TOXENV,CI,CODECOV_*,GITHUB_ACTIONS
 73 | usedevelop = true
 74 | commands =
 75 |     pytest --cov --cov-append --cov-config={toxinidir}/.coveragerc --ignore docs/ {posargs:-vv {env:_PYTEST_TOX_POSARGS:}}
 76 | 
 77 | [testenv:py313]
 78 | setenv =
 79 |     _PYTEST_TOX_POSARGS=--test-server --log-cli-level=ERROR
 80 | 
 81 | 
 82 | [testenv:covclean]
 83 | description = Clean coverage files.
 84 | deps = coverage
 85 | skip_install = True
 86 | commands = coverage erase
 87 | 
 88 | [testenv:lint]
 89 | description = Perform linting.
 90 | deps = pre-commit>=2.7.1
 91 | skip_install = true
 92 | commands =
 93 |     pre-commit run --all-files --show-diff-on-failure {posargs:}
 94 | 
 95 | [testenv:coverage]
 96 | description = Report the coverage difference.
 97 | deps =
 98 |     coverage
 99 |     diff_cover
100 | skip_install = true
101 | depends = py{39,310,311,312,313}
102 | parallel_show_output = True
103 | commands =
104 |     coverage report --omit="tox/*"
105 |     coverage xml --omit="tox/*" -o {toxinidir}/coverage.xml
106 |     diff-cover --compare-branch origin/master {toxinidir}/coverage.xml
107 | 
108 | [testenv:docs]
109 | description = Build the documentation.
110 | skip_install = true
111 | allowlist_externals = uv
112 | commands =
113 |     uv sync --extra docs
114 |     uv run sphinx-build --color -b html {toxinidir}/docs/source {toxinidir}/docs/build/html
115 |     python -c 'import pathlib; print(f"Documentation is available under:", pathlib.Path(f"{toxinidir}") / "docs" / "build" / "html" / "index.html")'
116 | 
117 | [testenv:clean-docs]
118 | description = Clean the documentation artifacts.
119 | deps =
120 | skip_install = true
121 | changedir = {toxinidir}/docs
122 | allowlist_externals = make
123 | commands = make clean
124 | 
125 | [testenv:readme]
126 | description = Check if README renders on PyPI.
127 | deps = twine >= 1.12.1
128 | skip_install = true
129 | allowlist_externals = uv
130 | commands = uv build --wheel --out-dir {envtmpdir}/build
131 |            twine check {envtmpdir}/build/*
132 | 


--------------------------------------------------------------------------------
/omnipath/constants/_constants.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, ABCMeta
  2 | from enum import Enum, EnumMeta, unique
  3 | from typing import Any, Callable
  4 | from functools import wraps
  5 | 
  6 | 
  7 | def _pretty_raise_enum(cls: EnumMeta, fun: Callable) -> Callable:
  8 |     @wraps(fun)
  9 |     def wrapper(*args, **kwargs) -> Enum:
 10 |         try:
 11 |             return fun(*args, **kwargs)
 12 |         except ValueError as e:
 13 |             _cls, value, *_ = args
 14 |             e.args = (cls._format(value),)
 15 |             raise e
 16 | 
 17 |     if not issubclass(cls, ErrorFormatter):
 18 |         raise TypeError(f"Class `{cls}` must be subtype of `ErrorFormatter`.")
 19 |     elif not len(cls.__members__):
 20 |         # empty enum, for class hierarchy
 21 |         return fun
 22 | 
 23 |     return wrapper
 24 | 
 25 | 
 26 | class NoValue(Enum):
 27 |     """Enumeration which hides its :attr:`value`."""
 28 | 
 29 |     def __repr__(self):
 30 |         return f"<{self.__class__.__name__}.{self.name}>"
 31 | 
 32 | 
 33 | class ErrorFormatter(ABC):  # noqa: D101
 34 |     __error_format__ = "Invalid value `{}` for `{}`. Valid options are: `{}`."
 35 | 
 36 |     @classmethod
 37 |     def _format(cls, value: Any) -> str:
 38 |         """Format the error message for invalid ``value``."""
 39 |         return cls.__error_format__.format(
 40 |             value, cls.__name__, [m.value for m in cls.__members__.values()]
 41 |         )
 42 | 
 43 | 
 44 | class FormatterMeta(EnumMeta, ABCMeta):  # noqa: D101
 45 |     def __call__(cls, *args, **kw):  # noqa: D102
 46 |         if getattr(cls, "__error_format__", None) is None:
 47 |             raise TypeError(
 48 |                 f"Can't instantiate class `{cls.__name__}` "
 49 |                 f"without `__error_format__` class attribute."
 50 |             )
 51 |         return super().__call__(*args, **kw)
 52 | 
 53 |     def __new__(cls, clsname, superclasses, attributedict):  # noqa: D102
 54 |         res = super().__new__(cls, clsname, superclasses, attributedict)
 55 |         res.__new__ = _pretty_raise_enum(res, res.__new__)
 56 |         return res
 57 | 
 58 | 
 59 | class PrettyEnumMixin(ErrorFormatter, NoValue, metaclass=FormatterMeta):
 60 |     """Enum mixin that pretty prints when user uses invalid value."""
 61 | 
 62 |     @property
 63 |     def s(self) -> str:
 64 |         """Return the :attr:`value` as :class:`str`."""
 65 |         return str(self.value)
 66 | 
 67 | 
 68 | @unique
 69 | class License(PrettyEnumMixin):
 70 |     """License types."""
 71 | 
 72 |     ACADEMIC = "academic"  #: Academic license.
 73 |     COMMERCIAL = "commercial"  #: Commercial license.
 74 |     NON_PROFIT = "non_profit"  #: Non-profit license.
 75 |     FOR_PROFIT = "for_profit"  #: For-profit license.
 76 |     IGNORE = "ignore"  #: Ignore the license type.
 77 | 
 78 | 
 79 | @unique
 80 | class InteractionDataset(PrettyEnumMixin):
 81 |     """
 82 |     Available interaction datasets in [OmniPath]_.
 83 | 
 84 |     See :mod:`omnipath.interactions` for more information.
 85 |     """
 86 | 
 87 |     COLLECTRI = "collectri"
 88 |     DOROTHEA = "dorothea"
 89 |     KINASE_EXTRA = "kinaseextra"
 90 |     LIGREC_EXTRA = "ligrecextra"
 91 |     LNCRNA_MRNA = "lncrna_mrna"
 92 |     MIRNA_TARGET = "mirnatarget"
 93 |     OMNIPATH = "omnipath"
 94 |     PATHWAY_EXTRA = "pathwayextra"
 95 |     SMALL_MOLECULE = "small_molecule"
 96 |     TF_MIRNA = "tf_mirna"
 97 |     TF_REGULONS = "tfregulons"
 98 |     TF_TARGET = "tf_target"
 99 | 
100 | 
101 | @unique
102 | class Organism(PrettyEnumMixin):
103 |     """Organism types."""
104 | 
105 |     HUMAN = "human"  #: NCIB taxonomy id ``9606``.
106 |     MOUSE = "mouse"  #: NCIB taxonomy id ``10090``.
107 |     RAT = "rat"  #: NCIB taxonomy id ``10116``.
108 | 
109 |     def __new__(cls, value: str):  # noqa: D102
110 |         obj = object.__new__(cls)
111 |         obj._code = {"human": 9606, "rat": 10116, "mouse": 10090}[value]
112 |         return obj
113 | 
114 |     @property
115 |     def code(self) -> int:
116 |         """Return the code for this organism."""
117 |         return self._code
118 | 
119 | 
120 | __all__ = [
121 |     License,
122 |     Organism,
123 |     InteractionDataset,
124 | ]
125 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |     schedule:
  5 |     -   cron: 00 00 * * 1  # run every Monday at 00:00
  6 |     push:
  7 |         branches: [main]
  8 |         tags: [v*]
  9 |     pull_request:
 10 |         branches: [main]
 11 | 
 12 | jobs:
 13 |     build:
 14 |         runs-on: ${{ matrix.os }}
 15 |         timeout-minutes: 10
 16 |         strategy:
 17 |             fail-fast: false
 18 |             max-parallel: 4
 19 |             matrix:
 20 |                 os: [ubuntu-latest, macos-latest]
 21 |                 python: ['3.9', '3.10', '3.11', '3.12', '3.13']
 22 |                 test_server: [false]
 23 |                 exclude:
 24 |                 -   os: macos-latest
 25 |                 include:
 26 |                 -   os: macos-latest
 27 |                     python: '3.13'
 28 |         env:
 29 |             OS: ${{ matrix.os }}
 30 |             PYTHON: ${{ matrix.python }}
 31 | 
 32 |         steps:
 33 |         -   uses: actions/checkout@v4
 34 |             with:
 35 |                 fetch-depth: 0
 36 | 
 37 |         -   name: Install uv
 38 |             uses: astral-sh/setup-uv@v5
 39 |             with:
 40 |                 enable-cache: true
 41 |                 python-version: ${{ matrix.python }}
 42 | 
 43 |         -   name: Install Python
 44 |             run: uv python install --python-preference only-managed ${{ matrix.python }}
 45 | 
 46 |         -   name: Install dependencies
 47 |             run: |
 48 |                 uv sync --all-extras
 49 |                 uv pip install codecov
 50 |                 uv tool install \
 51 |                   --python-preference only-managed \
 52 |                   --python ${{ matrix.python }} \
 53 |                   --with tox-uv \
 54 |                   --with tox-gh \
 55 |                   tox
 56 | 
 57 | 
 58 |         -   name: Install R
 59 |             if: matrix.test_server
 60 |             uses: r-lib/actions/setup-r@v2
 61 |             with:
 62 |                 r-version: 4.4.1
 63 | 
 64 |         -   name: Get R cache dir
 65 |             uses: actions/cache@v4
 66 |             if: matrix.test_server
 67 |             with:
 68 |                 path: ~/.local/share/renv
 69 |                 key: ${{ runner.os }}-renv-${{ hashFiles('**/renv.lock') }}
 70 | 
 71 |         -   name: Install OmnipathR
 72 |             if: matrix.test_server
 73 |             run: |
 74 |                 sudo apt-get install libcurl4-openssl-dev
 75 |                 sudo Rscript --vanilla -e "if (!(requireNamespace('BiocManager', quietly=TRUE))) { install.packages(c('BiocManager', 'curl'), repos='https://cloud.r-project.org/') }; BiocManager::install('OmnipathR')"
 76 |                 Rscript --vanilla -e "packageVersion('OmnipathR')"
 77 | 
 78 |         -   name: Set up test suite
 79 |             env:
 80 |                 TOX_GH_MAJOR_MINOR: ${{ matrix.python }}
 81 |             run: |
 82 |                 tox run -vv --notest --skip-missing-interpreters true
 83 | 
 84 |         -   name: Run tests
 85 |             env:
 86 |                 TOX_GH_MAJOR_MINOR: ${{ matrix.python }}
 87 |             run: |
 88 |                 tox run -vv --skip-pkg-install
 89 | 
 90 |         -   name: Upload coverage to Codecov
 91 |             if: success()
 92 |             env:
 93 |                 CODECOV_NAME: ${{ matrix.python }}-${{ matrix.os }}
 94 |             run: |
 95 |                 uv run codecovcli --verbose upload-process -t ${{ secrets.CODECOV_TOKEN  }} -n $CODECOV_NAME -F unittests
 96 | 
 97 |     deploy:
 98 |         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
 99 |         needs: build
100 |         runs-on: ubuntu-latest
101 |         steps:
102 | 
103 |         -   uses: actions/checkout@v4
104 |             with:
105 |                 fetch-depth: 0
106 | 
107 |         -   name: Install uv
108 |             uses: astral-sh/setup-uv@v5
109 |             with:
110 |                 enable-cache: true
111 | 
112 |         -   name: Build a binary wheel and a source tarball
113 |             run: uv build
114 | 
115 |         -   name: Publish package on PyPI
116 |             uses: pypa/gh-action-pypi-publish@release/v1
117 |             with:
118 |                 user: __token__
119 |                 password: ${{ secrets.PYPI_PASSWORD }}
120 |                 skip_existing: true
121 |                 verbose: true
122 | 


--------------------------------------------------------------------------------
/tests/test_cache.py:
--------------------------------------------------------------------------------
  1 | from copy import copy, deepcopy
  2 | from typing import Optional
  3 | from pathlib import Path
  4 | 
  5 | import pytest
  6 | 
  7 | from pandas.testing import assert_frame_equal
  8 | import pandas as pd
  9 | 
 10 | from omnipath import options, clear_cache
 11 | from omnipath._core.cache._cache import FileCache, NoopCache, MemoryCache
 12 | 
 13 | 
 14 | def test_clear_cache_high_lvl(cache_backup):
 15 |     options.cache["foo"] = 42
 16 |     assert len(options.cache) == 1
 17 |     assert options.cache["foo"] == 42
 18 | 
 19 |     clear_cache()
 20 | 
 21 |     assert len(options.cache) == 0
 22 | 
 23 | 
 24 | class TestMemoryCache:
 25 |     def test_str_repr(self):
 26 |         mc = MemoryCache()
 27 | 
 28 |         assert str(mc) == f"<{mc.__class__.__name__}[size={len(mc)}]>"
 29 |         assert repr(mc) == f"<{mc.__class__.__name__}[size={len(mc)}]>"
 30 | 
 31 |     def test_path_is_None(self):
 32 |         mc = MemoryCache()
 33 |         assert mc.path == "memory"
 34 | 
 35 |     def test_copy_does_nothing(self):
 36 |         mc = MemoryCache()
 37 | 
 38 |         assert mc is mc.copy()
 39 |         assert mc is copy(mc)
 40 | 
 41 |     def test_deepcopy_work(self):
 42 |         mc = MemoryCache()
 43 | 
 44 |         assert mc is not deepcopy(mc)
 45 | 
 46 |     def test_cache_works(self):
 47 |         mc = MemoryCache()
 48 |         sentinel = object()
 49 | 
 50 |         mc["foo"] = sentinel
 51 | 
 52 |         assert len(mc) == 1
 53 |         assert mc["foo"] is not sentinel  # copy was made
 54 | 
 55 |         mc.clear()
 56 | 
 57 |         assert len(mc) == 0
 58 | 
 59 |     def test_dataframe_modification(self):
 60 |         mc = MemoryCache()
 61 |         df = pd.DataFrame({"foo": [1, 2], "bar": [3, 4]})
 62 | 
 63 |         mc["baz"] = df
 64 |         _ = df.pop("foo")
 65 | 
 66 |         assert "foo" in mc["baz"]
 67 |         assert "bar" in mc["baz"]
 68 | 
 69 |     @pytest.mark.parametrize("val", [None, pd.DataFrame()])
 70 |     def test_add_empty_value(self, val: Optional[pd.DataFrame]):
 71 |         mc = MemoryCache()
 72 | 
 73 |         mc["foo"] = val
 74 | 
 75 |         assert "foo" not in mc
 76 |         assert len(mc) == 0
 77 | 
 78 |     def test_returns_copy(self):
 79 |         mc = MemoryCache()
 80 |         data = pd.DataFrame({"x": [0, 1]})
 81 |         mc["foo"] = data
 82 | 
 83 |         assert mc["foo"] is not mc["foo"]
 84 |         assert_frame_equal(mc["foo"], data)
 85 | 
 86 | 
 87 | class TestPickleCache:
 88 |     def test_invalid_path(self):
 89 |         with pytest.raises(TypeError):
 90 |             FileCache(42)
 91 | 
 92 |     def test_path(self, tmpdir):
 93 |         fc = FileCache(Path(tmpdir))
 94 | 
 95 |         assert isinstance(fc.path, Path)
 96 |         assert str(fc.path) == str(tmpdir)
 97 | 
 98 |     def test_str_repr(self, tmpdir):
 99 |         fc = FileCache(Path(tmpdir))
100 | 
101 |         assert (
102 |             str(fc)
103 |             == f"<{fc.__class__.__name__}[size={len(fc)}, path={str(tmpdir)!r}]>"
104 |         )
105 |         assert (
106 |             repr(fc)
107 |             == f"<{fc.__class__.__name__}[size={len(fc)}, path={str(tmpdir)!r}]>"
108 |         )
109 | 
110 |     def test_cache_works(self, tmpdir):
111 |         fc = FileCache(Path(tmpdir))
112 |         sentinel = object()
113 | 
114 |         assert "foo" not in fc
115 |         fc["foo"] = 42
116 |         fc["bar.pickle"] = sentinel
117 | 
118 |         assert "foo" in fc
119 |         assert "foo.pickle" in fc
120 |         assert fc["bar.pickle"] is not sentinel
121 | 
122 |     def test_clear_works(self, tmpdir):
123 |         fc = FileCache(Path(tmpdir))
124 |         fc["foo"] = 42
125 |         assert Path(fc.path).exists()
126 | 
127 |         fc.clear()
128 | 
129 |         assert len(fc) == 0
130 |         assert not Path(tmpdir).exists()
131 | 
132 |     @pytest.mark.parametrize("val", [None, pd.DataFrame()])
133 |     def test_add_empty_value(self, tmpdir, val: Optional[pd.DataFrame]):
134 |         fc = FileCache(Path(tmpdir))
135 | 
136 |         fc["foo"] = val
137 | 
138 |         assert "foo" not in fc
139 |         assert len(fc) == 0
140 | 
141 | 
142 | class TestNoopCache:
143 |     def test_add_value(self):
144 |         nc = NoopCache()
145 |         nc["foo"] = 42
146 | 
147 |         assert nc.path is None
148 |         assert "foo" not in nc
149 |         assert len(nc) == 0
150 | 


--------------------------------------------------------------------------------
/omnipath/_core/query/_query.py:
--------------------------------------------------------------------------------
  1 | from abc import ABCMeta
  2 | from enum import Enum, EnumMeta
  3 | from typing import Set, Tuple, Union, Optional, Sequence, FrozenSet
  4 | 
  5 | from inflect import engine
  6 | 
  7 | from omnipath.constants._constants import FormatterMeta, ErrorFormatter
  8 | from omnipath._core.query._query_validator import (
  9 |     EnzsubValidator,
 10 |     ComplexesValidator,
 11 |     IntercellValidator,
 12 |     AnnotationsValidator,
 13 |     InteractionsValidator,
 14 | )
 15 | 
 16 | _engine = engine()
 17 | 
 18 | 
 19 | def _get_synonyms(key: str) -> Tuple[str]:
 20 |     """
 21 |     Create synonyms for ``key``.
 22 | 
 23 |     This function creates just 2 synonyms - the singular and plural case of ``key``.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     key
 28 |         Key for which to create the synonyms.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     :class:`tuple`
 33 |         Synonyms for ``key``. User will be able to use these submitting requests.
 34 |     """
 35 |     if not isinstance(key, str):
 36 |         raise TypeError(f"Expected a `str`, found `{type(key)}`.")
 37 | 
 38 |     singular = _engine.singular_noun(key)
 39 |     singular = singular if isinstance(singular, str) else key
 40 | 
 41 |     plural = _engine.plural_noun(singular)
 42 |     if not isinstance(plural, str):
 43 |         plural = key + "s" if not key.endswith("s") else key
 44 | 
 45 |     return tuple(sorted({singular, plural}))
 46 | 
 47 | 
 48 | class SynonymizerMeta(EnumMeta, ABCMeta):  # noqa: D101
 49 |     def __new__(cls, clsname, superclasses, attributedict):  # noqa: D102
 50 |         validator = attributedict.get("__validator__", None)
 51 | 
 52 |         if validator is None:
 53 |             return super().__new__(cls, clsname, superclasses, attributedict)
 54 | 
 55 |         for k in list(validator):
 56 |             k = str(k.name)
 57 |             for i, synonym in enumerate(_get_synonyms(k.lower())):
 58 |                 attributedict[f"{k}_{i}"] = synonym
 59 | 
 60 |         return super().__new__(cls, clsname, superclasses, attributedict)
 61 | 
 62 | 
 63 | class QueryMeta(SynonymizerMeta, FormatterMeta):  # noqa: D101
 64 |     pass
 65 | 
 66 | 
 67 | class Query(ErrorFormatter, Enum, metaclass=QueryMeta):  # noqa: D101
 68 |     @property
 69 |     def _query_name(self) -> str:
 70 |         """Convert the synonym to an actual query parameter name."""
 71 |         return "_".join(self.name.split("_")[:-1])
 72 | 
 73 |     @property
 74 |     def _delegate(self):
 75 |         """Delegate the validation."""
 76 |         return getattr(self.__validator__, self._query_name)
 77 | 
 78 |     @property
 79 |     def param(self) -> str:
 80 |         """Get the parameter name as required by the server."""
 81 |         return self._query_name.lower()
 82 | 
 83 |     @property
 84 |     def valid(self) -> Optional[FrozenSet[str]]:
 85 |         """Return the set of valid values for :attr:`param`."""
 86 |         return self._delegate.valid
 87 | 
 88 |     @property
 89 |     def annotation(self) -> type:
 90 |         """Return type annotations for :attr:`param`."""
 91 |         return self._delegate.annotation
 92 | 
 93 |     @property
 94 |     def doc(self) -> Optional[str]:
 95 |         """Return the docstring for :attr:`param`."""
 96 |         return self._delegate.doc
 97 | 
 98 |     def __call__(
 99 |         self, value: Optional[Union[str, Sequence[str]]]
100 |     ) -> Optional[Set[str]]:
101 |         """%(validate)s"""  # noqa: D401
102 |         return self._delegate(value)
103 | 
104 | 
105 | class EnzsubQuery(Query):  # noqa: D101
106 |     __validator__ = EnzsubValidator
107 | 
108 | 
109 | class InteractionsQuery(Query):  # noqa: D101
110 |     __validator__ = InteractionsValidator
111 | 
112 | 
113 | class ComplexesQuery(Query):  # noqa: D101
114 |     __validator__ = ComplexesValidator
115 | 
116 | 
117 | class AnnotationsQuery(Query):  # noqa: D101
118 |     __validator__ = AnnotationsValidator
119 | 
120 | 
121 | class IntercellQuery(Query):  # noqa: D101
122 |     __validator__ = IntercellValidator
123 | 
124 | 
125 | class QueryType(Enum):  # noqa: D101
126 |     ENZSUB = EnzsubQuery
127 |     INTERACTIONS = InteractionsQuery
128 |     COMPLEXES = ComplexesQuery
129 |     ANNOTATIONS = AnnotationsQuery
130 |     INTERCELL = IntercellQuery
131 | 
132 |     def __call__(
133 |         self, value: Optional[Union[str, Sequence[str]]]
134 |     ) -> Optional[Set[str]]:
135 |         """%(validate)s"""  # noqa: D401
136 |         return self.value(value)
137 | 
138 |     @property
139 |     def endpoint(self) -> str:
140 |         """Get the API endpoint for this type of query."""
141 |         return self.name.lower()
142 | 
143 | 
144 | __all__ = [
145 |     EnzsubQuery,
146 |     InteractionsQuery,
147 |     ComplexesQuery,
148 |     AnnotationsQuery,
149 |     IntercellQuery,
150 | ]
151 | 


--------------------------------------------------------------------------------
/omnipath/_core/utils/_orthology.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | from omnipath._core.utils._homologene import download_homologene
  7 | 
  8 | CPLEX_PREFIX = "COMPLEX:"
  9 | 
 10 | 
 11 | # Replace list elements with dictionary values
 12 | def _replace_subunits(lst, my_dict, one_to_many):
 13 |     result = []
 14 |     for x in lst:
 15 |         if x in my_dict:
 16 |             value = my_dict[x]
 17 | 
 18 |             if not isinstance(value, list):
 19 |                 value = [value]
 20 | 
 21 |             if len(value) > one_to_many:
 22 |                 result.append(np.nan)
 23 |             else:
 24 |                 result.append(value)
 25 |         else:
 26 |             result.append(np.nan)
 27 |     return result
 28 | 
 29 | 
 30 | def _generate_orthologs(data, column, map_dict, one_to_many):
 31 |     df = data[[column]].drop_duplicates().set_index(column)
 32 |     data[column] = data[column].replace(CPLEX_PREFIX, "", regex=True)
 33 | 
 34 |     df["subunits"] = df.index.str.split("_")
 35 |     df["subunits"] = df["subunits"].apply(
 36 |         _replace_subunits,
 37 |         args=(
 38 |             map_dict,
 39 |             one_to_many,
 40 |         ),
 41 |     )
 42 |     df = df["subunits"].explode().reset_index()
 43 | 
 44 |     grouped = (
 45 |         df.groupby(column).filter(lambda x: x["subunits"].notna().all()).groupby(column)
 46 |     )
 47 | 
 48 |     # Generate all possible subunit combinations within each group
 49 |     complexes = []
 50 |     for name, group in grouped:
 51 |         if group["subunits"].isnull().all():
 52 |             continue
 53 |         subunit_lists = [list(x) for x in group["subunits"]]
 54 |         complex_combinations = list(product(*subunit_lists))
 55 |         for complex in complex_combinations:
 56 |             complexes.append((name, "_".join(complex)))
 57 | 
 58 |     # Create output DataFrame
 59 |     col_names = ["orthology_source", "orthology_target"]
 60 |     result = pd.DataFrame(complexes, columns=col_names).set_index("orthology_source")
 61 | 
 62 |     return result
 63 | 
 64 | 
 65 | def translate_column(
 66 |     data,
 67 |     column,
 68 |     id_type,
 69 |     target_organism,
 70 |     replace=True,
 71 |     keep_untranslated=False,
 72 |     source_organism=9606,
 73 |     one_to_many=1,
 74 | ):
 75 |     """
 76 |     Generate orthologs for a given column in a DataFrame.
 77 | 
 78 |     Parameters
 79 |     ----------
 80 |     data : pandas.DataFrame
 81 |         Input DataFrame.
 82 |     column : str
 83 |         Column name to translate.
 84 |     id_type : str
 85 |         Type of ID to use for homology conversion. Can be one of 'genesymbol', 'gene_id'.
 86 |     target_organism : int
 87 |         NCBI Taxonomy ID of the target organism.
 88 |     replace : bool, optional
 89 |         Whether to replace the original column with the translated values. Default is True.
 90 |     keep_untranslated : bool, optional
 91 |         Whether to keep the original column in the output DataFrame. Default is False. Ignored if `replace` is True.
 92 |     source_organism : int
 93 |         NCBI Taxonomy ID of the source organism. Default is 9606 (human).
 94 |     one_to_many : int, optional
 95 |         Maximum number of orthologs allowed per gene. Default is 1.
 96 | 
 97 |     Returns
 98 |     -------
 99 |     Resulting DataFrame with translated column.
100 | 
101 |     """
102 |     if not isinstance(one_to_many, int):
103 |         raise ValueError("`one_to_many` should be a positive integer!")
104 | 
105 |     id_types = ["genesymbol", "gene_id"]
106 |     if id_type not in id_types:
107 |         raise ValueError(f"`id_type` should be one of: {id_types}")
108 | 
109 |     # get orthologs
110 |     source_organism, target_organism = str(source_organism), str(target_organism)
111 |     map_df = download_homologene(source_organism, target_organism, id_type).set_index(
112 |         "source"
113 |     )
114 |     map_dict = map_df.groupby(level=0)["target"].apply(list).to_dict()
115 |     map_data = _generate_orthologs(data, column, map_dict, one_to_many)
116 | 
117 |     # join orthologs
118 |     data = (
119 |         data.set_index(column)
120 |         .merge(map_data, left_index=True, right_index=True, how="left")
121 |         .reset_index(names=column)
122 |     )
123 | 
124 |     # replace orthologs
125 |     if replace:
126 |         data[column] = data["orthology_target"]
127 |         data = data.drop(columns=["orthology_target"])
128 | 
129 |     elif keep_untranslated:
130 |         data[column] = data.apply(
131 |             lambda x: (
132 |                 x["orthology_target"]
133 |                 if not pd.isnull(x["orthology_target"])
134 |                 else x[column]
135 |             ),
136 |             axis=1,
137 |         )
138 | 
139 |     data = data.dropna(subset=[column])
140 |     return data
141 | 


--------------------------------------------------------------------------------
/omnipath/_misc/dtypes.py:
--------------------------------------------------------------------------------
  1 | from typing import Union, Iterable
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | __all__ = ["auto_dtype"]
  7 | 
  8 | TRUE = frozenset(("true", "t", "yes", "y"))
  9 | FALSE = frozenset(("false", "f", "no", "n"))
 10 | BOOL = frozenset().union(TRUE, FALSE)
 11 | NA = frozenset(("na", "NA", "NaN", "none", "None", None, pd.NA, pd.NaT, np.nan))
 12 | INT = frozenset(
 13 |     ("int64", "uint64", "int32", "uint32", "int16", "uint16", "int8", "uint8")
 14 | )
 15 | FLT = frozenset(("float64", "float32", "float128"))
 16 | NUM = INT | FLT
 17 | ALL = ("int64", "uint64", "float64", "string")
 18 | 
 19 | 
 20 | def auto_dtype(
 21 |     data: Union[pd.DataFrame, pd.Series, Iterable],
 22 |     categories: bool = True,
 23 |     **kwargs,
 24 | ) -> Union[pd.DataFrame, pd.Series]:
 25 |     """
 26 |     Convert to the best dtype
 27 | 
 28 |     Guess automatically and convert data types of a dataframe, series or other
 29 |     iterable.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     data
 34 |         A dataframe or an array like object such as :class:`pandas.Series`,
 35 |         :class:`numpy.ndarray` or list.
 36 |     categories
 37 |         Use the `category` data type for string variables with a small
 38 |         number of values compared to their size.
 39 |     kwargs
 40 |         For dataframes, manually set the desired data type of certain
 41 |         variables.
 42 | 
 43 |     Returns
 44 |     -------
 45 |     :class:`pandas.DataFrame` or :class:`pandas.Series` or str or list
 46 |         A dataframe or series with its data type(s) converted.
 47 |     """
 48 |     method = _auto_dtype_df if isinstance(data, pd.DataFrame) else _auto_dtype_series
 49 | 
 50 |     return method(data, categories=categories, **kwargs)
 51 | 
 52 | 
 53 | def _auto_dtype_df(
 54 |     data: pd.DataFrame,
 55 |     categories: bool = True,
 56 |     **kwargs,
 57 | ) -> pd.DataFrame:
 58 |     def process_col(col):
 59 |         if col in kwargs:
 60 |             return data[col].astype(kwargs[col])
 61 | 
 62 |         else:
 63 |             return _auto_dtype_series(
 64 |                 data[col],
 65 |                 categories=categories,
 66 |             )
 67 | 
 68 |     result = {col: process_col(col) for col in data}
 69 | 
 70 |     return pd.DataFrame(result, index=data.index)
 71 | 
 72 | 
 73 | def _auto_dtype_series(
 74 |     data: pd.Series,
 75 |     categories: bool = True,
 76 |     **kwargs,
 77 | ) -> pd.Series:
 78 |     data = pd.Series(data)
 79 | 
 80 |     for t in ALL:
 81 |         if (str(data.dtype) in INT and t in FLT) or (
 82 |             t == "string" and str(data.dtype) != "object"
 83 |         ):
 84 |             continue
 85 | 
 86 |         try:
 87 |             converted = data.astype(t)
 88 | 
 89 |             if t in FLT:
 90 |                 if str(data.dtype) in FLT:
 91 |                     continue
 92 | 
 93 |                 elif str(data.dtype) not in FLT:
 94 |                     return _auto_dtype_series(converted)
 95 | 
 96 |             if t in INT:
 97 |                 if _has_na(converted) or (
 98 |                     str(data.dtype) in FLT and (data != converted).any()
 99 |                 ):
100 |                     continue
101 | 
102 |                 elif sorted(converted.unique()) == [0, 1]:
103 |                     t = "bool"
104 |                     converted = converted.astype(t)
105 | 
106 |                 elif str(data.dtype) in INT:
107 |                     continue
108 | 
109 |             elif t == "string":
110 |                 if not _has_na(converted) and _string_is_bool(converted):
111 |                     t = "bool"
112 |                     converted = _string_to_bool(converted)
113 | 
114 |                 elif converted.nunique() < len(converted) / 4:
115 |                     t = "category"
116 |                     converted = converted.astype(t)
117 | 
118 |             return converted
119 | 
120 |         except (OverflowError, ValueError):
121 |             continue
122 | 
123 |     return data
124 | 
125 | 
126 | def _has_na(data: Union[pd.Series, Iterable]) -> bool:
127 |     """Chec if any item in the series looks like NA or NaN."""
128 |     return pd.Series(data).isin(NA).any()
129 | 
130 | 
131 | def _string_is_bool(data: Union[pd.Series, Iterable]) -> bool:
132 |     """
133 |     Contains only bool-like values
134 | 
135 |     Tell if a string or object type series contains only values that we
136 |     recognize as boolean values.
137 |     """
138 |     return pd.Series(s.lower() for s in data).isin(BOOL).all()
139 | 
140 | 
141 | def _string_to_bool(data: Union[pd.Series, Iterable]) -> pd.Series:
142 |     """
143 |     Convert to bool if possible
144 | 
145 |     Convert a series or iterable to bool type if all elements can be
146 |     recognized as a boolean value.
147 |     """
148 |     if _string_is_bool(data):
149 |         return pd.Series(i.lower() in TRUE for i in data)
150 | 
151 |     return pd.Series(data)
152 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [tool.hatch.build.targets.wheel]
  6 | packages = ["omnipath"]
  7 | 
  8 | [project]
  9 | name = "omnipath"
 10 | version = "1.0.11"
 11 | description = "Python client for the OmniPath web service"
 12 | license = "MIT"
 13 | authors = [
 14 |     { name = "Michal Klein", email = "michalk@apple.com" },
 15 |     { name = "Dénes Türei", email = "turei.denes@gmail.com" },
 16 | ]
 17 | maintainers = [
 18 |     { name = "Dénes Türei", email = "turei.denes@gmail.com" },
 19 | ]
 20 | readme = "README.rst"
 21 | classifiers = [
 22 |     "Development Status :: 5 - Production/Stable",
 23 |     "Intended Audience :: Developers",
 24 |     "Intended Audience :: Science/Research",
 25 |     "License :: OSI Approved :: MIT License",
 26 |     "Operating System :: OS Independent",
 27 |     "Natural Language :: English",
 28 |     "Typing :: Typed",
 29 |     "Programming Language :: Python :: 3",
 30 |     "Programming Language :: Python :: 3.9",
 31 |     "Programming Language :: Python :: 3.10",
 32 |     "Programming Language :: Python :: 3.11",
 33 |     "Programming Language :: Python :: 3.12",
 34 |     "Programming Language :: Python :: 3.13",
 35 |     "Topic :: Scientific/Engineering :: Bio-Informatics",
 36 | ]
 37 | keywords = [
 38 |     "protein",
 39 |     "mRNA",
 40 |     "miRNA",
 41 |     "DNA",
 42 |     "signaling",
 43 |     "SignaLink",
 44 |     "SIGNOR",
 45 |     "InnateDB",
 46 |     "IntAct",
 47 |     "Reactome",
 48 |     "MPPI",
 49 |     "NCI-PID",
 50 |     "DIP",
 51 |     "MatrixDB",
 52 |     "PANTHER",
 53 |     "PhosphoSite",
 54 |     "PhosphoPoint",
 55 |     "DEPOD",
 56 |     "SPIKE",
 57 |     "KEGG",
 58 |     "Autophagy",
 59 |     "ARN",
 60 |     "NRF2ome",
 61 |     "Guide to Pharmacology",
 62 |     "UniProt",
 63 |     "BioPAX",
 64 |     "Ensembl",
 65 |     "Surfaceome",
 66 |     "Exocarta",
 67 |     "Vesiclepedia",
 68 |     "Matrisome",
 69 |     "Human Protein Atlas",
 70 |     "Compleat",
 71 |     "CORUM",
 72 |     "ComplexPortal",
 73 |     "BioGRID",
 74 |     "STRING",
 75 |     "ICELLNET",
 76 |     "Cell Surface Protein Atlas",
 77 |     "COSMIC",
 78 |     "Cancer Gene Census",
 79 |     "IntOGen",
 80 |     "TopDB",
 81 |     "iTALK",
 82 |     "Human Plasma Membrane Receptome",
 83 |     "EMBRACE",
 84 |     "ELM",
 85 |     "phospho.ELM",
 86 |     "CancerSEA",
 87 |     "ComPPI",
 88 |     "CellPhoneDB",
 89 |     "DGIdb",
 90 |     "DisGeNet",
 91 |     "PAZAR",
 92 |     "ORegAnno",
 93 |     "TRED",
 94 |     "DoRothEA",
 95 |     "TRRD",
 96 |     "CPAD",
 97 |     "regulation",
 98 |     "phosphorylation",
 99 |     "kinase",
100 |     "phosphatase",
101 |     "dephosphorylation",
102 |     "directed graph",
103 |     "annotations",
104 |     "cancer",
105 |     "complexes",
106 |     "intercellular communication",
107 |     "HGNC",
108 |     "GPCRdb",
109 |     "MSigDB",
110 |     "GSEA",
111 |     "Phobius",
112 |     "Phosphatome",
113 |     "NetPath",
114 |     "gene",
115 |     "gene symbol",
116 |     "mouse",
117 |     "rat",
118 |     "HomoloGene",
119 |     "integrin",
120 |     "adhesion",
121 |     "receptor",
122 |     "ligand",
123 |     "transporter",
124 |     "ion channel",
125 |     "disease",
126 |     "activity flow",
127 |     "transcription",
128 |     "PPI",
129 |     "subcellular localization",
130 |     "pathway",
131 |     "signaling pathway",
132 | ]
133 | requires-python = ">=3.9"
134 | dependencies = [
135 |     "attrs>=20.2.0",
136 |     "docrep>=0.3.1",
137 |     "inflect>=4.1.0",
138 |     "packaging>=24.2",
139 |     "pandas>=1.2.0",
140 |     "requests>=2.24.0",
141 |     "tqdm>=4.51.0",
142 |     "typing-extensions>=3.7.4.3",
143 |     "urllib3>=1.26.0",
144 |     "wrapt>=1.12.0",
145 | ]
146 | 
147 | [project.optional-dependencies]
148 | docs = [
149 |     "attrs>=20.2.0",
150 |     "docrep>=0.3.1",
151 |     "inflect>=4.1.0",
152 |     "packaging>=24.2",
153 |     "pandas>=1.2.0",
154 |     "requests>=2.24.0",
155 |     "sphinx>=4",
156 |     "sphinx-autodoc-annotation>=1.0.post1",
157 |     "sphinx-autodoc-typehints>=1.10.3",
158 |     "sphinx-copybutton>=0.5.2",
159 |     "sphinx-last-updated-by-git>=0.3.8",
160 |     "sphinx-paramlinks>=0.6.0",
161 |     "sphinx-rtd-theme>=3.0.2",
162 |     "sphinx-toolbox>=3.9.0",
163 |     "tqdm>=4.51.0",
164 |     "typing-extensions>=3.7.4.3",
165 |     "urllib3>=1.26.0",
166 |     "wrapt>=1.12.0",
167 | ]
168 | tests = [
169 |     "tox>=3.20.1",
170 | ]
171 | graph = [
172 |     "networkx>=2.3.0",
173 | ]
174 | 
175 | [dependency-groups]
176 | dev = [
177 |     "bump2version>=1.0.1",
178 |     "codecov-cli>=10.2.0",
179 |     "pre-commit>=2.7.1",
180 |     "tox-gh>=1.5.0",
181 | ]
182 | 
183 | [project.urls]
184 | Homepage = "https://omnipathdb.org/"
185 | Documentation = "https://omnipath.readthedocs.io/"
186 | Repository = "https://github.com/saezlab/omnipath"
187 | Issues = "https://github.com/saezlab/omnipath/issues"
188 | 
189 | [tool.isort]
190 | from_first = true
191 | line_length = 88
192 | multi_line_output = 3
193 | include_trailing_comma = true
194 | use_parentheses = true
195 | known_num="numpy,pandas"
196 | sections = "FUTURE,STDLIB,THIRDPARTY,NUM,FIRSTPARTY,LOCALFOLDER"
197 | no_lines_before="LOCALFOLDER"
198 | balanced_wrapping = true
199 | force_grid_wrap = 0
200 | length_sort = "1"
201 | indent = "    "
202 | skip_glob = "docs/source/conf.py"
203 | 


--------------------------------------------------------------------------------
/tests/test_query.py:
--------------------------------------------------------------------------------
  1 | from typing import _GenericAlias
  2 | from collections import defaultdict
  3 | 
  4 | import pytest
  5 | 
  6 | from omnipath._core.query._query import (
  7 |     Query,
  8 |     QueryType,
  9 |     EnzsubQuery,
 10 |     ComplexesQuery,
 11 |     IntercellQuery,
 12 |     AnnotationsQuery,
 13 |     InteractionsQuery,
 14 |     _get_synonyms,
 15 | )
 16 | from omnipath._core.query._query_validator import (
 17 |     EnzsubValidator,
 18 |     ComplexesValidator,
 19 |     IntercellValidator,
 20 |     AnnotationsValidator,
 21 |     InteractionsValidator,
 22 |     _to_string_set,
 23 | )
 24 | 
 25 | 
 26 | class TestUtils:
 27 |     def test_get_synonyms_wrong_type(self):
 28 |         with pytest.raises(TypeError):
 29 |             _get_synonyms(42)
 30 | 
 31 |     def test_get_synonyms_from_s2p(self):
 32 |         res = _get_synonyms("cat")
 33 | 
 34 |         assert len(res) == 2
 35 |         assert res == ("cat", "cats")
 36 | 
 37 |     def test_get_synonyms_from_p2s(self):
 38 |         res = _get_synonyms("dogs")
 39 | 
 40 |         assert len(res) == 2
 41 |         assert res == ("dog", "dogs")
 42 | 
 43 |     def test_to_string_set_string(self):
 44 |         assert {"foo"} == _to_string_set("foo")
 45 | 
 46 |     def test_to_string_set_int(self):
 47 |         assert {"42"} == _to_string_set(42)
 48 | 
 49 |     def test_to_string_set_sequence(self):
 50 |         assert {"foo", "42"} == _to_string_set(["foo", 42])
 51 | 
 52 | 
 53 | class TestValidator:
 54 |     @pytest.mark.parametrize(
 55 |         "validator",
 56 |         [
 57 |             EnzsubValidator,
 58 |             InteractionsValidator,
 59 |             ComplexesValidator,
 60 |             AnnotationsValidator,
 61 |             IntercellValidator,
 62 |         ],
 63 |     )
 64 |     def test_validator_no_server_access(self, validator):
 65 |         for value in list(validator):
 66 |             v = validator(value)
 67 | 
 68 |             assert v.valid is None
 69 |             assert v.doc is None
 70 | 
 71 |             assert v(None) is None
 72 |             assert v("foo") == {"foo"}
 73 |             assert v(42) == {"42"}
 74 |             assert v(True) == {"1"}
 75 |             assert v(False) == {"0"}
 76 |             assert v(["foo", "foo"]) == {"foo"}
 77 |             assert v(["foo", 42]) == {"foo", "42"}
 78 |             assert v({"foo", "bar", "baz"}) == {"foo", "bar", "baz"}
 79 | 
 80 |             assert issubclass(type(v.annotation), (_GenericAlias, type))
 81 | 
 82 | 
 83 | class TestQuery:
 84 |     @pytest.mark.parametrize(
 85 |         "query,validator",
 86 |         zip(
 87 |             [
 88 |                 EnzsubQuery,
 89 |                 InteractionsQuery,
 90 |                 ComplexesQuery,
 91 |                 AnnotationsQuery,
 92 |                 IntercellQuery,
 93 |             ],
 94 |             [
 95 |                 EnzsubValidator,
 96 |                 InteractionsValidator,
 97 |                 ComplexesValidator,
 98 |                 AnnotationsValidator,
 99 |                 IntercellValidator,
100 |             ],
101 |         ),
102 |     )
103 |     def test_query_correct_validator(self, query, validator):
104 |         assert query.__validator__ == validator
105 | 
106 |     def test_query_endpoint(self):
107 |         for q in list(QueryType):
108 |             q = QueryType(q)
109 | 
110 |             assert issubclass(q.value, Query)
111 |             assert q.endpoint == q.name.lower()
112 | 
113 |     @pytest.mark.parametrize(
114 |         "query,validator",
115 |         zip(
116 |             [
117 |                 EnzsubQuery,
118 |                 InteractionsQuery,
119 |                 ComplexesQuery,
120 |                 AnnotationsQuery,
121 |                 IntercellQuery,
122 |             ],
123 |             [
124 |                 EnzsubValidator,
125 |                 InteractionsValidator,
126 |                 ComplexesValidator,
127 |                 AnnotationsValidator,
128 |                 IntercellValidator,
129 |             ],
130 |         ),
131 |     )
132 |     def test_query_delegation(self, query, validator, mocker):
133 |         call_spy = mocker.spy(validator, "__call__")
134 | 
135 |         qdb = query("databases")
136 |         _ = qdb("foo")
137 | 
138 |         call_spy.assert_called_once_with(
139 |             getattr(qdb.__validator__, qdb._query_name), "foo"
140 |         )
141 |         assert call_spy.spy_return == {"foo"}
142 |         assert qdb.doc is None
143 | 
144 |         for attr in ("valid", "annotation", "doc"):
145 |             m = mocker.patch.object(
146 |                 validator, attr, new_callable=mocker.PropertyMock, return_value="foo"
147 |             )
148 |             assert getattr(qdb, attr) == "foo"
149 | 
150 |             m.assert_called_once()
151 | 
152 |     @pytest.mark.parametrize(
153 |         "query",
154 |         [
155 |             EnzsubQuery,
156 |             InteractionsQuery,
157 |             ComplexesQuery,
158 |             AnnotationsQuery,
159 |             IntercellQuery,
160 |         ],
161 |     )
162 |     def test_query_synonym(self, query):
163 |         mapper = defaultdict(list)
164 |         for v in list(query):
165 |             name = "_".join(v.name.split("_")[:-1])
166 |             mapper[name].append(v.value)
167 | 
168 |         for vs in mapper.values():
169 |             assert len(vs) == 2
170 |             assert len({query(v).param for v in vs})
171 | 


--------------------------------------------------------------------------------
/omnipath/_core/cache/_cache.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from copy import copy
  3 | from shutil import rmtree
  4 | from typing import Any, Union, Optional
  5 | from pathlib import Path
  6 | import os
  7 | import pickle
  8 | 
  9 | import pandas as pd
 10 | 
 11 | 
 12 | def _is_empty(data: Optional[pd.DataFrame]) -> bool:
 13 |     return data is None or (isinstance(data, pd.DataFrame) and not len(data))
 14 | 
 15 | 
 16 | class Cache(ABC):
 17 |     """
 18 |     Abstract class which defines the caching interface.
 19 | 
 20 |     Empty values (`None` or an empty :class:`pandas.DataFrame`) will not be saved in the cache.
 21 |     """
 22 | 
 23 |     @abstractmethod
 24 |     def __getitem__(self, key: str) -> Optional[Any]:
 25 |         pass
 26 | 
 27 |     @abstractmethod
 28 |     def __setitem__(self, key: str, value: Any) -> None:
 29 |         pass
 30 | 
 31 |     @abstractmethod
 32 |     def __len__(self) -> int:
 33 |         pass
 34 | 
 35 |     @abstractmethod
 36 |     def clear(self) -> None:  # noqa: D102
 37 |         pass
 38 | 
 39 |     @property
 40 |     @abstractmethod
 41 |     def path(self) -> Optional[Union[str, Path]]:  # noqa: D102
 42 |         pass
 43 | 
 44 |     @abstractmethod
 45 |     def __str__(self) -> str:
 46 |         pass
 47 | 
 48 |     def __repr__(self) -> str:
 49 |         return str(self)
 50 | 
 51 | 
 52 | class FileCache(Cache):
 53 |     """
 54 |     Cache which persists the data into :mod:`pickle` files.
 55 | 
 56 |     Parameters
 57 |     ----------
 58 |     path
 59 |         Path to a directory where the files will be stored.
 60 |     """
 61 | 
 62 |     _suffix = ".pickle"
 63 | 
 64 |     def __init__(self, path: Union[str, Path]):
 65 |         if not isinstance(path, (str, Path)):
 66 |             raise TypeError(
 67 |                 f"Expected `path` to be either `str` or `pathlib.Path`, "
 68 |                 f"found `{type(path).__name__}`."
 69 |             )
 70 |         if not str(path):
 71 |             raise ValueError("Empty cache path.")
 72 | 
 73 |         self._cache_dir = Path(path)
 74 | 
 75 |     def __contains__(self, key: str) -> bool:
 76 |         if not key.endswith(self._suffix):
 77 |             key += self._suffix
 78 | 
 79 |         return (self._cache_dir / key).is_file()
 80 | 
 81 |     def __setitem__(self, key: str, value: Any) -> None:
 82 |         if _is_empty(value):
 83 |             return
 84 |         self._cache_dir.mkdir(parents=True, exist_ok=True)
 85 | 
 86 |         fname = str(key)
 87 |         if not fname.endswith(self._suffix):
 88 |             fname += self._suffix
 89 | 
 90 |         with open(self._cache_dir / fname, "wb") as fout:
 91 |             pickle.dump(value, fout)
 92 | 
 93 |     def __getitem__(self, key: str) -> Any:
 94 |         if not key.endswith(self._suffix):
 95 |             key += self._suffix
 96 | 
 97 |         if not (self._cache_dir / key).is_file():
 98 |             raise KeyError(self._cache_dir / key)
 99 | 
100 |         with open(self._cache_dir / key, "rb") as fin:
101 |             return pickle.load(fin)
102 | 
103 |     def __len__(self) -> int:
104 |         return (
105 |             len([f for f in os.listdir(self.path) if str(f).endswith(self._suffix)])
106 |             if self.path.is_dir()
107 |             else 0
108 |         )
109 | 
110 |     @property
111 |     def path(self) -> Path:
112 |         """Return the directory where the cache files are stored."""
113 |         return self._cache_dir
114 | 
115 |     def clear(self) -> None:
116 |         """Remove all files and the directory under :attr:`path`."""
117 |         if self._cache_dir.is_dir():
118 |             rmtree(self._cache_dir)
119 | 
120 |     def __str__(self) -> str:
121 |         return f"<{self.__class__.__name__}[size={len(self)}, path={str(self.path)!r}]>"
122 | 
123 | 
124 | class MemoryCache(dict, Cache):
125 |     """
126 |     Cache which persists the data into the memory.
127 | 
128 |     Objects stored in the cache are copied using :func:`copy.copy``.
129 |     """
130 | 
131 |     @property
132 |     def path(self) -> Optional[str]:
133 |         """Return `'memory'`."""
134 |         return "memory"
135 | 
136 |     def __setitem__(self, key: str, value: Any) -> None:
137 |         if _is_empty(value):
138 |             return
139 |         # the value is usually a dataframe (copy for safety)
140 |         return super().__setitem__(key, copy(value))
141 | 
142 |     def __getitem__(self, key: str) -> Any:
143 |         return copy(super().__getitem__(key))
144 | 
145 |     def __str__(self) -> str:
146 |         return f"<{self.__class__.__name__}[size={len(self)}]>"
147 | 
148 |     def __repr__(self) -> str:
149 |         return str(self)
150 | 
151 |     def __copy__(self) -> "MemoryCache":
152 |         return self
153 | 
154 |     def copy(self) -> "MemoryCache":
155 |         """Return self."""
156 |         return self
157 | 
158 | 
159 | class NoopCache(MemoryCache):
160 |     """Cache which doesn't save anything."""
161 | 
162 |     @property
163 |     def path(self) -> Optional[str]:
164 |         """Return `None`."""
165 |         return None
166 | 
167 |     def __setitem__(self, key: str, value: Any) -> None:
168 |         pass
169 | 
170 |     def __str__(self):
171 |         return f"<{self.__class__.__name__}>"
172 | 
173 | 
174 | def clear_cache() -> None:
175 |     """Remove all cached data from :attr:`omnipath.options.cache`."""
176 |     from omnipath import options
177 | 
178 |     options.cache.clear()
179 | 
180 | 
181 | __all__ = [clear_cache]
182 | 


--------------------------------------------------------------------------------
/tests/test_options.py:
--------------------------------------------------------------------------------
  1 | from os import remove
  2 | from typing import Optional
  3 | from pathlib import Path
  4 | from configparser import NoSectionError
  5 | 
  6 | import pytest
  7 | 
  8 | from omnipath.constants import License
  9 | from omnipath._core.utils._options import Options
 10 | from omnipath.constants._pkg_constants import DEFAULT_OPTIONS
 11 | 
 12 | 
 13 | class TestOptions:
 14 |     def test_invalid_url_type(self, options: Options):
 15 |         with pytest.raises(TypeError):
 16 |             options.url = 42
 17 | 
 18 |     def test_invalid_url(self, options: Options):
 19 |         with pytest.raises(ValueError):
 20 |             options.url = "foo"
 21 | 
 22 |     def test_invalid_license(self, options: Options):
 23 |         with pytest.raises(ValueError):
 24 |             options.license = "foo"
 25 | 
 26 |     def test_invalid_cache_type(self, options: Options):
 27 |         with pytest.raises(TypeError):
 28 |             options.cache = 42
 29 | 
 30 |     def test_invalid_password_type(self, options: Options):
 31 |         with pytest.raises(TypeError):
 32 |             options.password = 42
 33 | 
 34 |     def test_invalid_num_retries(self, options: Options):
 35 |         with pytest.raises(ValueError):
 36 |             options.num_retries = -1
 37 | 
 38 |     def test_invalid_timeout(self, options: Options):
 39 |         with pytest.raises(ValueError):
 40 |             options.timeout = 0
 41 | 
 42 |     def test_invalid_chunk_size(self, options: Options):
 43 |         with pytest.raises(ValueError):
 44 |             options.chunk_size = 0
 45 | 
 46 |     def test_from_options_invalid_type(self):
 47 |         with pytest.raises(TypeError):
 48 |             Options.from_options("foo")
 49 | 
 50 |     def test_url_localhost(self, options: Options):
 51 |         options.url = "https://localhost"
 52 | 
 53 |         assert options.url == "https://localhost"
 54 | 
 55 |     @pytest.mark.parametrize("license", list(License))
 56 |     def test_valid_license(self, options: Options, license: License):
 57 |         options.license = license.value
 58 | 
 59 |         assert isinstance(options.license, License)
 60 |         assert options.license == license
 61 | 
 62 |     @pytest.mark.parametrize("pwd", ["foo", None])
 63 |     def test_password(self, options: Options, pwd: Optional[str]):
 64 |         options.password = pwd
 65 | 
 66 |         assert options.password == pwd
 67 | 
 68 |     def test_from_options(self, options: Options):
 69 |         new_opt = Options.from_options(options)
 70 | 
 71 |         for k, v in options.__dict__.items():
 72 |             assert getattr(new_opt, k) == v
 73 | 
 74 |     def test_from_options_new_values(self, options: Options):
 75 |         new_opt = Options.from_options(
 76 |             options, autoload=not options.autoload, num_retries=0
 77 |         )
 78 | 
 79 |         for k, v in options.__dict__.items():
 80 |             if k not in ("autoload", "num_retries"):
 81 |                 assert getattr(new_opt, k) == v
 82 | 
 83 |         assert new_opt.autoload != options.autoload
 84 |         assert new_opt.num_retries == 0
 85 | 
 86 |     def test_from_config_no_file(self, config_backup):
 87 |         if Path(Options.config_path).exists():
 88 |             remove(Options.config_path)
 89 | 
 90 |         new_opt = Options.from_config()
 91 | 
 92 |         for k, v in DEFAULT_OPTIONS.__dict__.items():
 93 |             if hasattr(new_opt, k) and not k.startswith("_"):
 94 |                 assert getattr(new_opt, k) == v
 95 | 
 96 |     def test_from_config_section_is_not_url(self):
 97 |         with pytest.raises(NoSectionError, match=r"No section: 'http://foo.bar'"):
 98 |             Options.from_config("http://foo.bar")
 99 | 
100 |     def test_write_config(self, options: Options, config_backup):
101 |         options.timeout = 1337
102 |         options.license = License.COMMERCIAL
103 |         options.password = "foobarbaz"
104 |         options.fallback_urls = DEFAULT_OPTIONS.fallback_urls
105 |         options.write()
106 | 
107 |         new_opt = Options.from_config()
108 |         for k, v in options.__dict__.items():
109 |             if k == "cache":
110 |                 assert type(new_opt.cache) == type(options.cache)  # noqa: E721
111 |             elif k == "password":
112 |                 # don't store the password in the file
113 |                 assert getattr(new_opt, k) is None
114 |             elif k not in ("timeout", "license"):
115 |                 assert getattr(new_opt, k) == v
116 | 
117 |         assert new_opt.timeout == 1337
118 |         assert new_opt.license == License.COMMERCIAL
119 | 
120 |     def test_write_new_section(self, options: Options, config_backup):
121 |         options.timeout = 42
122 |         options.fallback_urls = DEFAULT_OPTIONS.fallback_urls
123 |         options.write("https://foo.bar")
124 | 
125 |         new_opt = Options.from_config("https://foo.bar")
126 |         assert options is not new_opt
127 |         for k, v in options.__dict__.items():
128 |             if k == "url":
129 |                 assert v == options.url
130 |                 assert new_opt.url == "https://foo.bar"
131 |             elif k == "cache":
132 |                 assert type(new_opt.cache) == type(options.cache)  # noqa: E721
133 |             else:
134 |                 assert getattr(new_opt, k) == v
135 | 
136 |     def test_write_new_section_not_url(self, options: Options, config_backup):
137 |         with pytest.raises(ValueError, match=r"Invalid URL: `foobar`."):
138 |             options.write("foobar")
139 | 
140 |     def test_contextmanager(self, options: Options):
141 |         with options as new_opt:
142 |             assert options is not new_opt
143 |             for k, v in options.__dict__.items():
144 |                 if k == "cache":
145 |                     assert type(new_opt.cache) == type(options.cache)  # noqa: E721
146 |                 else:
147 |                     assert getattr(new_opt, k) == v
148 | 


--------------------------------------------------------------------------------
/omnipath/_core/utils/_static.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Union, Literal, Optional
  2 | from functools import partial
  3 | import re
  4 | import logging
  5 | import warnings
  6 | 
  7 | import requests
  8 | 
  9 | import pandas as pd
 10 | 
 11 | from omnipath._core.utils import _options as opt
 12 | from omnipath._core.downloader._downloader import Downloader
 13 | 
 14 | 
 15 | def static_tables() -> pd.DataFrame:
 16 |     """
 17 |     List the static tables available from OmniPath
 18 | 
 19 |     Returns
 20 |     -------
 21 |     A data frame with metadata about the static tables.
 22 |     """
 23 |     refile = re.compile(
 24 |         r'<a href="[^"]+">([^<]+)</a>'
 25 |         r"\s+(\d{2}-\w+-\d{4}) (\d{2}:\d{2})"
 26 |         r"\s+(\d+)[\r\n]*"
 27 |     )
 28 | 
 29 |     req = requests.get(opt.options.static_url, stream=True)
 30 | 
 31 |     result = pd.DataFrame(
 32 |         [
 33 |             refile.match(line.decode("utf-8")).groups()
 34 |             for line in req.raw.readlines()[5:-2]
 35 |         ],
 36 |         columns=["name", "date", "time", "size"],
 37 |     )
 38 | 
 39 |     result["url"] = [f"{opt.options.static_url}/{name}" for name in result.name]
 40 | 
 41 |     result = pd.concat(
 42 |         [
 43 |             result,
 44 |             result.name.str.extract(
 45 |                 r"(?P<query>[\w]+)_"
 46 |                 r"(?P<resource>\w+)_"
 47 |                 r"(?P<organism>\d+)\.tsv\.gz",
 48 |                 expand=True,
 49 |             ),
 50 |         ],
 51 |         axis=1,
 52 |     )
 53 | 
 54 |     return result
 55 | 
 56 | 
 57 | def static_table(
 58 |     query: Literal["annotations", "interactions"],
 59 |     resource: str,
 60 |     organism: Union[int, str],
 61 |     strict_evidences: bool = True,
 62 |     dorothea_levels: Optional[List[Literal["A", "B", "C", "D"]]] = None,
 63 |     wide: bool = True,
 64 | ) -> pd.DataFrame:
 65 |     """
 66 |     Download a static table from OmniPath.
 67 | 
 68 |     A few resources and datasets are available also as plain TSV files and
 69 |     can be accessed without TLS. The purpose of these tables is to make the
 70 |     most often used OmniPath data available on computers with configuration
 71 |     issues. These tables are not the recommended way to access OmniPath
 72 |     data, and a warning is issued each time they are accessed.
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     query
 77 |         A query type such as "annotations" or "interactions".
 78 |     resource
 79 |         Name of the resource or dataset, such as
 80 |         "CollecTRI" or "PROGENy".
 81 |     organism
 82 |         NCBI Taxonomy of the organism: 9606 for human,
 83 |         10090 for mouse and 10116 for rat.
 84 |     strict_evidences
 85 |         Restrict the evidences to the queried
 86 |         datasets and resources. If set to False, the directions and effect signs
 87 |         and references might be based on other datasets and resources.
 88 |     wide
 89 |         Convert the annotation table to wide format, which
 90 |         corresponds more or less to the original resource. If the data comes
 91 |         from more than one resource a list of wide tables will be returned.
 92 |         See examples at ``pivot_annotations``.
 93 |     dorothea_levels
 94 |         A list of confidence levels in case the accessed resource is DoRothEA.
 95 |         In dorothea, every TF-target interaction has a confidence score
 96 |         ranging from A to E, being A the most reliable interactions.
 97 |         By default here we take A, B and C level interactions
 98 |         (``["A", "B", "C"]``).
 99 |         It is to note that E interactions are not available in OmniPath.
100 | 
101 |     Returns
102 |     -------
103 |     A data frame with the requested resource.
104 |     """
105 |     msg = (
106 |         f"Accessing `{resource}` as a static table. This is not the "
107 |         "recommended way to access OmniPath data; it is only a backup "
108 |         "plan for situations when our server or your computer is "
109 |         "experiencing issues."
110 |     )
111 |     logging.warning(msg)
112 |     warnings.warn(msg)  # noqa: B028
113 | 
114 |     organism = str(organism)
115 |     query_l = query.lower()
116 |     resource_l = resource.lower()
117 |     resources = () if resource_l in ("collectri", "dorothea") else (resource,)
118 |     datasets = () if resources else (resource_l,)
119 | 
120 |     if query_l == "annotations":
121 |         from omnipath._core.requests._annotations import Annotations as req_cls
122 | 
123 |     elif query_l == "interactions":
124 |         from omnipath._core.requests.interactions._interactions import (
125 |             AllInteractions as req_cls,
126 |         )
127 |         from omnipath._core.requests.interactions._interactions import (
128 |             InteractionDataset,
129 |         )
130 | 
131 |     s = static_tables()
132 | 
133 |     s = s[
134 |         (s["query"] == query_l)
135 |         & (s.resource.str.lower() == resource_l)
136 |         & (s.organism == organism)
137 |     ].reset_index()
138 | 
139 |     if s.shape[0] == 0:
140 |         msg = (
141 |             f"No static table is available for query `{query}`, resource "
142 |             f"`{resource}` and organism `{organism}`. For a list of the "
143 |             "available tables see `static_tables()`."
144 |         )
145 |         logging.error(msg)
146 |         raise ValueError(msg)
147 | 
148 |     url = s.url[0]
149 |     logging.debug(f"Downloading static table from `{url}`.")
150 |     downloader = Downloader()
151 |     callback = partial(
152 |         pd.read_csv,
153 |         sep="\t",
154 |         header=0,
155 |         low_memory=False,
156 |         compression="gzip",
157 |     )
158 |     result = downloader.maybe_download(url, callback=callback, is_final=True)
159 |     logging.debug(f"Ready downloading static table from `{url}`.")
160 |     omnipath_req = req_cls()
161 |     omnipath_req._last_param = {
162 |         "original": {"strict_evidences": strict_evidences},
163 |         "final": {"resources": resources, "datasets": datasets},
164 |     }
165 |     omnipath_req._wide = wide
166 |     omnipath_req._datasets = {InteractionDataset(d) for d in datasets}
167 |     logging.debug("Static table: converting dtypes.")
168 |     result = omnipath_req._convert_dtypes(result)
169 |     logging.debug("Static table: post-pocessing.")
170 |     result = omnipath_req._post_process(result)
171 | 
172 |     if resource_l == "dorothea":
173 |         logging.debug("Static table: filtering for DoRothEA confidence levels.")
174 |         dorothea_levels = set(dorothea_levels)
175 |         result = result[result.dorothea_level.isin(dorothea_levels)]
176 | 
177 |     return result
178 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/_utils.py:
--------------------------------------------------------------------------------
  1 | from types import MethodType
  2 | from typing import *  # noqa: F401 F403 (because of the argspec factory)
  3 | from typing import Any, Dict, Union, Callable, Iterable, Optional
  4 | from inspect import Parameter, isabstract
  5 | import inspect
  6 | 
  7 | import wrapt
  8 | import typing_extensions  # noqa: F401
  9 | 
 10 | import pandas as pd
 11 | 
 12 | from omnipath._core.utils._docs import d
 13 | 
 14 | 
 15 | @d.get_full_description(base="get")
 16 | @d.get_sections(base="get", sections=["Parameters", "Returns"])
 17 | def _get_helper(cls: type, **kwargs) -> pd.DataFrame:
 18 |     """
 19 |     Perform a request to the [OmniPath]_ web service.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     kwargs
 24 |         Additional query parameters.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     :class:`pandas.DataFrame`
 29 |         The result which depends the type of the request and the supplied parameters.
 30 |     """
 31 |     return cls()._get(**kwargs)
 32 | 
 33 | 
 34 | def _inject_api_method(
 35 |     clazz: type,
 36 | ) -> None:
 37 |     """
 38 |     Create a decorator which does nothing except for modifying the function signature in the docstring.
 39 | 
 40 |     The function to be decorated must be a class method and is allowed only to have positional arguments,
 41 |     and variable keyword arguments (**kwargs).
 42 | 
 43 |     The resulting decorated function will containing only the positional arguments (including original type annotations)
 44 |     and possibly keyword only arguments. In this example signature might def fn(foo, bar, *, baz, quux),
 45 |     `baz` and `quux` are the keyword only arguments.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     clazz
 50 |         The class for which to create the query. Must not be abstract.
 51 | 
 52 |     Returns
 53 |     -------
 54 |     :class:`callable`
 55 |         The decorator as described above.
 56 |     """
 57 | 
 58 |     def argspec_factory(orig_fn: Callable) -> Callable:
 59 |         orig_fn = getattr(orig_fn, "__func__", orig_fn)
 60 |         orig_params = inspect.signature(orig_fn).parameters
 61 |         # maintain the original signature if the subclass has overriden the method
 62 |         # this will lose the docstring of the original function
 63 |         parameters = {
 64 |             k: v
 65 |             for k, v in orig_params.items()
 66 |             if k != "cls"
 67 |             and v.kind in (Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD)
 68 |         }
 69 |         annotations = {
 70 |             k: v for k, v in clazz._annotations().items() if k not in parameters
 71 |         }
 72 | 
 73 |         for c in clazz.__mro__:
 74 |             if c.__name__ == "InteractionRequest":
 75 |                 parameters["strict_evidences"] = Parameter(
 76 |                     "strict_evidences",
 77 |                     kind=Parameter.KEYWORD_ONLY,
 78 |                     default=None,
 79 |                     annotation=Optional[bool],
 80 |                 )
 81 | 
 82 |         sig = inspect.signature(lambda _: _)
 83 |         sig = sig.replace(
 84 |             parameters=[Parameter("cls", kind=Parameter.POSITIONAL_ONLY)]
 85 |             + list(parameters.values())
 86 |             + [
 87 |                 Parameter(k, kind=Parameter.KEYWORD_ONLY, annotation=a)
 88 |                 for k, a in sorted(annotations.items())
 89 |             ]
 90 |             + [Parameter("kwargs", kind=Parameter.VAR_KEYWORD)]
 91 |         )
 92 |         # modify locals() for argspec factory
 93 |         import omnipath  # noqa: F401
 94 | 
 95 |         NoneType, pandas = type(None), pd
 96 |         adapter_code = f"def adapter{sig}: pass".replace(" /,", "")
 97 |         exec_locals = locals()
 98 |         exec(adapter_code, globals(), exec_locals)
 99 |         return exec_locals["adapter"]
100 | 
101 |     if not isinstance(clazz, type):
102 |         raise TypeError(
103 |             f"Expected `clazz` to be a type, found `{type(clazz).__name__}`."
104 |         )
105 | 
106 |     if isabstract(clazz):
107 |         return
108 | 
109 |     @wrapt.decorator(adapter=wrapt.adapter_factory(argspec_factory))
110 |     def wrapper(wrapped, _instance, args, kwargs):
111 |         return wrapped(*args, **kwargs)
112 | 
113 |     from_class = hasattr(clazz, "get") and not hasattr(clazz.get, "__wrapped__")
114 |     func = clazz.get if from_class else _get_helper
115 |     func = getattr(func, "__func__", func)
116 | 
117 |     clazz.get = MethodType(wrapper(func), clazz)
118 | 
119 | 
120 | def _inject_params(
121 |     params: Dict[str, Any], key: str, value: Optional[Union[str, Iterable[str]]]
122 | ) -> None:
123 |     if value is None:
124 |         return
125 |     value = {value} if isinstance(value, str) else set(value)
126 | 
127 |     old_value = params.pop(key, None)
128 |     if old_value is None:
129 |         params[key] = value
130 |         return
131 | 
132 |     old_value = {old_value} if isinstance(old_value, str) else set(old_value)
133 | 
134 |     params[key] = value | old_value
135 | 
136 | 
137 | def _split_unique_join(data: pd.Series, func: Optional[Callable] = None) -> pd.Series:
138 |     mask = ~pd.isnull(data.astype("string"))
139 |     data = data[mask]
140 |     data = data.str.split(";")
141 | 
142 |     if func is None:
143 |         data = data.apply(
144 |             lambda row: (
145 |                 ";".join(sorted(set(map(str, row))))
146 |                 if isinstance(row, Iterable)
147 |                 else row
148 |             )
149 |         )
150 |     else:
151 |         data = data.apply(func)
152 | 
153 |     res = pd.Series([None] * len(mask))
154 |     res.loc[mask] = data
155 | 
156 |     return res
157 | 
158 | 
159 | def _strip_resource_label(
160 |     data: pd.Series, func: Optional[Callable] = None
161 | ) -> pd.Series:
162 |     return _split_unique_join(
163 |         _split_unique_join(data.str.replace(r"[-\w]*:?(\d+)", r"\1", regex=True)),
164 |         func=func,
165 |     )
166 | 
167 | 
168 | def _strip_resource_label_df(
169 |     df: pd.DataFrame,
170 |     col: str,
171 |     func: Optional[Callable] = None,
172 | ) -> None:
173 |     if col in df:
174 |         df[f"{col}_stripped"] = _strip_resource_label(df[col], func=func)
175 | 
176 | 
177 | def _count_references(df: pd.DataFrame) -> None:
178 |     if "references" in df:
179 |         df["n_references"] = _strip_resource_label(
180 |             df["references"], func=lambda row: len(set(row))
181 |         )
182 | 
183 | 
184 | def _count_resources(df: pd.DataFrame) -> None:
185 |     if "sources" in df:
186 |         df["n_sources"] = df["sources"].astype(str).str.split(";").apply(len)
187 |         df["n_primary_sources"] = (
188 |             df["sources"]
189 |             .astype(str)
190 |             .str.split(";")
191 |             .apply(
192 |                 lambda row: len(
193 |                     [r for r in row if "_" not in r] if isinstance(row, Iterable) else 0
194 |                 )
195 |             )
196 |         )
197 | 
198 | 
199 | _ERROR_EMPTY_FMT = (
200 |     "No {obj} were retrieved. Please check if supplying valid parameter values."
201 | )
202 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/_annotations.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, Union, Mapping, Iterable, Optional
  2 | import logging
  3 | 
  4 | import pandas as pd
  5 | 
  6 | from omnipath._misc import dtypes
  7 | from omnipath._core.query import QueryType
  8 | from omnipath._core.utils._docs import d
  9 | from omnipath._core.requests._request import OmnipathRequestABC
 10 | from omnipath.constants._pkg_constants import Key, final
 11 | 
 12 | _MAX_N_PROTS = 600
 13 | 
 14 | 
 15 | @final
 16 | class Annotations(OmnipathRequestABC):
 17 |     """Request annotations from [OmniPath]_."""
 18 | 
 19 |     __string__ = frozenset({"source", "value"})
 20 |     __categorical__ = frozenset({"entity_type", "label", "source"})
 21 | 
 22 |     _query_type = QueryType.ANNOTATIONS
 23 | 
 24 |     def _modify_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
 25 |         params.pop(Key.ORGANISM.value, None)
 26 | 
 27 |         return params
 28 | 
 29 |     @classmethod
 30 |     @d.dedent
 31 |     def params(cls) -> Dict[str, Any]:
 32 |         """%(query_params)s"""
 33 |         params = super().params()
 34 |         params.pop(Key.ORGANISM.value, None)
 35 | 
 36 |         return params
 37 | 
 38 |     @classmethod
 39 |     def get(
 40 |         cls,
 41 |         proteins: Optional[Union[str, Iterable[str]]] = None,
 42 |         resources: Optional[Union[str, Iterable[str]]] = None,
 43 |         force_full_download: bool = False,
 44 |         wide: bool = False,
 45 |         **kwargs,
 46 |     ) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
 47 |         """
 48 |         Import annotations from [OmniPath]_.
 49 | 
 50 |         Retrieves protein annotations about function, localization, expression, structure and other properties of
 51 |         proteins from `OmniPath <https://omnipathdb.org/annotations>`__.
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         proteins
 56 |             Genes or proteins for which annotations will be retrieved (UniProt IDs, HGNC Gene Symbols or miRBase IDs).
 57 | 
 58 |             In order to download annotations for proteins complexes, write **'COMPLEX:'** before the gene symbols of
 59 |             the genes integrating the complex.
 60 | 
 61 |             If `None`, fetch annotations for all available genes or proteins.
 62 |         resources
 63 |             Load the annotations only from these databases. See :meth:`resources` for available options.
 64 |             If `None`, use all available resources.
 65 |         force_full_download
 66 |             Force the download of the entire annotations dataset. The full size of the data is ~1GB.
 67 |             We recommend to retrieve the annotations for a set of proteins or only from a few resources,
 68 |             depending on your interest.
 69 |         wide
 70 |             Pivot the annotations from a long to a wide dataframe format, reconstituting the format
 71 |             of the original resource.
 72 |         kwargs
 73 |             Additional query parameters.
 74 | 
 75 |         Returns
 76 |         -------
 77 |         :class:`pandas.DataFrame`
 78 |             A dataframe containing different molecule (protein, complex, gene, miRNA, small molecule) annotations.
 79 |             If `wide` is `True` and the result contains more than one resource, a `dict` of dataframes
 80 |             will be returned, one for each resource.
 81 | 
 82 |         Notes
 83 |         -----
 84 |         There might be also a few miRNAs and small molecules annotated. A vast majority of protein complex
 85 |         annotations are inferred from the annotations of the members: if all members carry the same annotation
 86 |         the complex inherits.
 87 |         """
 88 |         if proteins is None and resources is None and not force_full_download:
 89 |             raise ValueError(
 90 |                 "Please specify `force_full_download=True` in order to download the full dataset."
 91 |             )
 92 |         res_info = (
 93 |             "all resources"
 94 |             if resources is None
 95 |             else f"the following resources: `{[resources] if isinstance(resources, str) else sorted(set(resources))}`"
 96 |         )
 97 |         inst = cls()
 98 |         inst._wide = wide
 99 | 
100 |         if proteins is not None:
101 |             if isinstance(proteins, str):
102 |                 proteins = (proteins,)
103 |             proteins = sorted(set(proteins))
104 | 
105 |             logging.info(
106 |                 f"Downloading annotations for `{len(proteins)}` in `{_MAX_N_PROTS}` chunks from {res_info}"
107 |             )
108 | 
109 |             return pd.concat(
110 |                 [
111 |                     inst._get(
112 |                         proteins=proteins[i * _MAX_N_PROTS : (i + 1) * _MAX_N_PROTS],
113 |                         resources=resources,
114 |                         **kwargs,
115 |                     )
116 |                     for i in range((len(proteins) // _MAX_N_PROTS) + 1)
117 |                     if len(proteins[i * _MAX_N_PROTS : (i + 1) * _MAX_N_PROTS])
118 |                 ]
119 |             )
120 | 
121 |         logging.info(f"Downloading annotations for all proteins from {res_info}")
122 | 
123 |         return inst._get(proteins=None, resources=resources, **kwargs)
124 | 
125 |     def _resource_filter(self, data: Mapping[str, Any], **_) -> bool:
126 |         return True
127 | 
128 |     def _post_process(self, df: pd.DataFrame, **kwargs) -> pd.DataFrame:
129 |         if self._wide:
130 |             df = self.pivot_annotations(df)
131 | 
132 |         return df
133 | 
134 |     @classmethod
135 |     def pivot_annotations(
136 |         cls,
137 |         df: pd.DataFrame,
138 |     ) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
139 |         """
140 |         Annotations from narrow to wide format
141 | 
142 |         Converts the annotations from a long to a wide dataframe format,
143 |         reconstituting the format of the original resource.
144 | 
145 |         Parameters
146 |         ----------
147 |         df
148 |             An annotation dataframe.
149 | 
150 |         Returns
151 |         -------
152 |         :class:`pandas.DataFrame` or `dict`
153 |             A dataframe of various molecule (protein, complex, gene, miRNA, small molecule) annotations.
154 |             If the data contains more than one resource, a `dict` of dataframes will be returned, one for each
155 |             resource.
156 |         """
157 |         if df.source.nunique() > 1:
158 |             return {
159 |                 resource: cls.pivot_annotations(df[df.source == resource])
160 |                 for resource in df.source.unique()
161 |             }
162 | 
163 |         index_cols = ["record_id", "uniprot", "genesymbol", "label"]
164 | 
165 |         if "entity_type" in df.label.values:
166 |             df = df.drop("entity_type", axis=1)
167 | 
168 |         else:
169 |             index_cols.append("entity_type")
170 | 
171 |         return dtypes.auto_dtype(
172 |             df.drop("source", axis=1)
173 |             .set_index(index_cols)
174 |             .unstack("label")
175 |             .droplevel(axis=1, level=0)
176 |             .reset_index()
177 |             .drop("record_id", axis=1)
178 |         )
179 | 
180 | 
181 | __all__ = [Annotations]
182 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/interactions/_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Mapping, Optional
  2 | 
  3 | import pandas as pd
  4 | 
  5 | from omnipath.constants._constants import InteractionDataset
  6 | from omnipath._core.requests._utils import _ERROR_EMPTY_FMT
  7 | from omnipath._core.requests._intercell import Intercell
  8 | from omnipath._core.requests.interactions._interactions import (
  9 |     Datasets_t,
 10 |     AllInteractions,
 11 | )
 12 | 
 13 | 
 14 | def _to_dict(mapping: Optional[Mapping[Any, Any]]) -> Dict[Any, Any]:
 15 |     return {} if mapping is None else dict(mapping)
 16 | 
 17 | 
 18 | def _swap_undirected(df: pd.DataFrame) -> pd.DataFrame:
 19 |     if "is_directed" not in df.columns:
 20 |         raise KeyError(f"Key `'is_directed'` not found in `{list(df.columns)}`.")
 21 | 
 22 |     directed = df.pop("is_directed")
 23 | 
 24 |     undirected = df.loc[~directed, :]
 25 |     if undirected.empty:
 26 |         return df
 27 | 
 28 |     undirected_swapped = undirected.copy()
 29 |     undirected_swapped[["source", "target"]] = undirected[["target", "source"]]
 30 | 
 31 |     if "source_genesymbol" in undirected:
 32 |         undirected_swapped[["source_genesymbol", "target_genesymbol"]] = undirected[
 33 |             ["target_genesymbol", "source_genesymbol"]
 34 |         ]
 35 |     if "ncbi_tax_id_source" in undirected.columns:
 36 |         undirected_swapped[["ncbi_tax_id_source", "ncbi_tax_id_target"]] = undirected[
 37 |             ["ncbi_tax_id_target", "ncbi_tax_id_source"]
 38 |         ]
 39 | 
 40 |     return pd.concat(
 41 |         [directed, undirected, undirected_swapped],
 42 |         axis=0,
 43 |         ignore_index=True,
 44 |     )
 45 | 
 46 | 
 47 | def import_intercell_network(
 48 |     include: Datasets_t = (
 49 |         InteractionDataset.OMNIPATH,
 50 |         InteractionDataset.PATHWAY_EXTRA,
 51 |         InteractionDataset.KINASE_EXTRA,
 52 |         InteractionDataset.LIGREC_EXTRA,
 53 |     ),
 54 |     interactions_params: Optional[Mapping[str, Any]] = None,
 55 |     transmitter_params: Optional[Mapping[str, Any]] = None,
 56 |     receiver_params: Optional[Mapping[str, Any]] = None,
 57 | ) -> pd.DataFrame:
 58 |     """
 59 |     Import intercellular network combining intercellular annotations and protein interactions.
 60 | 
 61 |     First, it imports a network of protein-protein interactions. Then, it retrieves annotations about the proteins
 62 |     intercellular communication roles, once for the transmitter (delivering information from the expressing cell) and
 63 |     second, the receiver (receiving signal and relaying it towards the expressing cell) side.
 64 | 
 65 |     These 3 queries can be customized by providing parameters which will be passed to
 66 |     :meth:`omnipath.interactions.OmniPath.get` for the network and :meth:`omnipath.requests.Intercell`
 67 |     for the annotations.
 68 | 
 69 |     Finally the 3 :class:`pandas.DataFrame` are combined in a way that the source proteins in each interaction annotated
 70 |     by the transmitter, and the target proteins by the receiver categories. If undirected interactions present
 71 |     (these are disabled by default) they will be duplicated, i.e. both partners can be both receiver and transmitter.
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     include
 76 |         Interaction datasets to include for :meth:`omnipath.interactions.AllInteractions.get`.
 77 |     interactions_params
 78 |         Parameters for the :meth:`omnipath.interactions.AllInteractions.get`.
 79 |     transmitter_params
 80 |         Parameters defining the transmitter side of intercellular connections.
 81 |         See :meth:`omnipath.interactions.AllInteractions.params` for available values.
 82 |     receiver_params
 83 |         Parameters defining the receiver side of intercellular connections.
 84 |         See :meth:`omnipath.interactions.AllInteractions.params` for available values.
 85 | 
 86 |     Returns
 87 |     -------
 88 |     :class:`pandas.DataFrame`
 89 |         A dataframe containing information about protein-protein interactions and the inter-cellular roles
 90 |         of the proteins involved in those interactions.
 91 |     """
 92 |     interactions_params = _to_dict(interactions_params)
 93 |     transmitter_params = _to_dict(transmitter_params)
 94 |     receiver_params = _to_dict(receiver_params)
 95 | 
 96 |     # TODO: this should be refactored as: QueryType.INTERCELL("scope").param, etc. (also in many other places)
 97 |     transmitter_params.setdefault("causality", "trans")
 98 |     transmitter_params.setdefault("scope", "generic")
 99 |     receiver_params.setdefault("causality", "rec")
100 |     receiver_params.setdefault("scope", "generic")
101 | 
102 |     interactions = AllInteractions.get(include=include, **interactions_params)
103 |     if interactions.empty:
104 |         raise ValueError(_ERROR_EMPTY_FMT.format(obj="interactions"))
105 |     interactions = _swap_undirected(interactions)
106 | 
107 |     transmitters = Intercell.get(**transmitter_params)
108 |     if transmitters.empty:
109 |         raise ValueError(_ERROR_EMPTY_FMT.format(obj="transmitters"))
110 |     receivers = Intercell.get(**receiver_params)
111 |     if receivers.empty:
112 |         raise ValueError(_ERROR_EMPTY_FMT.format(obj="receivers"))
113 | 
114 |     # fmt: off
115 |     intracell = ['intracellular_intercellular_related', 'intracellular']
116 |     transmitters = transmitters.loc[~transmitters["parent"].isin(intracell), :].copy()
117 |     transmitters.rename(columns={"source": "category_source"}, inplace=True)
118 |     # this makes it 3x as fast during groupby, since all of these are categories
119 |     # it's mostly because groupby needs observed=True + using string object (numpy) vs "string"
120 |     transmitters[["category", "parent", "database"]] = transmitters[["category", "parent", "database"]].astype(str)
121 | 
122 |     receivers = receivers.loc[~receivers["parent"].isin(intracell), :].copy()
123 |     receivers.rename(columns={"source": "category_source"}, inplace=True)
124 |     receivers[["category", "parent", "database"]] = receivers[["category", "parent", "database"]].astype(str)
125 | 
126 |     res = pd.merge(interactions, transmitters, left_on="source", right_on="uniprot", how="inner")
127 |     if res.empty:
128 |         raise ValueError("No values are left after merging interactions and transmitters.")
129 | 
130 |     # fmt: on
131 | 
132 |     groupby_cols = ["category", "parent", "source", "target"]
133 |     res = _join_str_col(res, "database", groupby_cols)
134 |     res = _summarize_first(res, groupby_cols)
135 | 
136 |     res = pd.merge(
137 |         res,
138 |         receivers,
139 |         how="inner",
140 |         left_on="target",
141 |         right_on="uniprot",
142 |         suffixes=("_intercell_source", "_intercell_target"),
143 |     )
144 |     if res.empty:
145 |         raise ValueError("No values are left after merging interactions and receivers.")
146 | 
147 |     groupby_cols = [
148 |         "category_intercell_source",
149 |         "parent_intercell_source",
150 |         "source",
151 |         "target",
152 |         "category_intercell_target",
153 |         "parent_intercell_target",
154 |     ]
155 | 
156 |     res = _join_str_col(res, "database_intercell_target", groupby_cols)
157 |     res = _summarize_first(res, groupby_cols)
158 | 
159 |     # retype back as categories
160 |     for col in ["category", "parent"]:
161 |         for suffix in ["_intercell_source", "_intercell_target"]:
162 |             res[f"{col}{suffix}"] = res[f"{col}{suffix}"].astype("category")
163 | 
164 |     return res.reset_index(drop=True)
165 | 
166 | 
167 | # pandas is a disaster:
168 | def _join_str_col(df: pd.DataFrame, col: str, groupby_cols: List[str]) -> pd.DataFrame:
169 |     return df.assign(
170 |         **{col: df.groupby(groupby_cols)[col].transform(lambda x: ";".join(x))}
171 |     )
172 | 
173 | 
174 | def _summarize_first(df: pd.DataFrame, groupby_cols: List[str]) -> pd.DataFrame:
175 |     return (
176 |         df.groupby(groupby_cols, as_index=False).nth(0).copy()
177 |     )  # much faster than 1st
178 | 


--------------------------------------------------------------------------------
/tests/test_compare_R.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable, Optional
  2 | 
  3 | import pytest
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | from omnipath.constants import License, Organism
  9 | import omnipath as op
 10 | from .conftest import RTester
 11 | 
 12 | # in order to minimize server access, the tests are not parametrized
 13 | # and the resources are chosen so that minimal data required is transferred
 14 | # these tests will also run only on 1 job CI matrix, to further reduce the load
 15 | #
 16 | # note that these tests don't test whether `<query>.params()` returns the valid values
 17 | # this would require different Python interpreter invocation, since by design, during testing
 18 | # `omnipath.options.autoload` is disabled
 19 | 
 20 | 
 21 | def _assert_dataframes_equal(
 22 |     expected: pd.DataFrame,
 23 |     actual: pd.DataFrame,
 24 |     clazz: type = op.requests.Enzsub,
 25 |     remove_metadata: Optional[Iterable[str]] = None,
 26 | ):
 27 |     assert isinstance(expected, pd.DataFrame)
 28 |     assert isinstance(actual, pd.DataFrame)
 29 | 
 30 |     # some small naming discrepancy
 31 |     actual.rename(columns={"n_primary_sources": "n_resources"}, inplace=True)
 32 |     # these are always present in our case
 33 |     if remove_metadata is None:
 34 |         remove_metadata = ["n_sources", "references_stripped"]
 35 |     for k in remove_metadata:
 36 |         if k in actual.columns:
 37 |             del actual[k]
 38 | 
 39 |     np.testing.assert_array_equal(expected.shape, actual.shape)
 40 |     # don't compare index since sometimes, it's not informative + differs across calls from OmnipathR
 41 |     col_order = sorted(expected.columns)
 42 |     np.testing.assert_array_equal(col_order, sorted(actual.columns))
 43 | 
 44 |     expected = clazz()._convert_dtypes(expected)
 45 | 
 46 |     # this way, we know which column fails
 47 |     for col in col_order:
 48 |         e, a = expected[col], actual[col]
 49 |         emask = ~(pd.isna(e).values | pd.isnull(a).values)
 50 |         amask = ~(pd.isna(e).values | pd.isnull(a).values)
 51 | 
 52 |         np.testing.assert_array_equal(emask, amask)
 53 |         np.testing.assert_array_equal(e[emask], a[emask])
 54 | 
 55 | 
 56 | class TestEnzSub(RTester):
 57 |     def test_organism(self):
 58 |         organism = Organism.RAT
 59 |         expected = self.omnipathr.import_omnipath_enzsub(
 60 |             resources="DEPOD", genesymbols=False, organism=organism.code
 61 |         )
 62 |         actual = op.requests.Enzsub.get(
 63 |             resources="DEPOD", genesymbols=False, organism=organism
 64 |         )
 65 | 
 66 |         _assert_dataframes_equal(expected, actual)
 67 | 
 68 |     def test_resources(self):
 69 |         expected = self.omnipathr.import_omnipath_enzsub(
 70 |             resources="HPRD", genesymbols=True
 71 |         )
 72 |         actual = op.requests.Enzsub.get(resources="HPRD", genesymbols=True)
 73 | 
 74 |         _assert_dataframes_equal(expected, actual)
 75 | 
 76 |     def test_fields(self):
 77 |         fields = ["isoforms", "ncbi_tax_id"]
 78 |         expected = self.omnipathr.import_omnipath_enzsub(
 79 |             resources="DEPOD", genesymbols=True, fields=fields
 80 |         )
 81 |         actual = op.requests.Enzsub.get(
 82 |             resources="DEPOD", genesymbols=True, fields=fields
 83 |         )
 84 | 
 85 |         _assert_dataframes_equal(expected, actual)
 86 | 
 87 |     def test_license(self):
 88 |         license = License.COMMERCIAL
 89 |         expected = self.omnipathr.import_omnipath_enzsub(
 90 |             resources="DEPOD", genesymbols=True, license=license.value
 91 |         )
 92 |         actual = op.requests.Enzsub.get(
 93 |             resources="DEPOD", genesymbols=True, license=license
 94 |         )
 95 | 
 96 |         _assert_dataframes_equal(expected, actual)
 97 | 
 98 | 
 99 | class TestIntercell(RTester):
100 |     def test_categories(self):
101 |         expected = sorted(self.omnipathr.get_intercell_categories())
102 |         actual = sorted(op.requests.Intercell.categories())
103 | 
104 |         np.testing.assert_array_equal(expected, actual)
105 | 
106 |     def test_generic_categories(self):
107 |         expected = sorted(self.omnipathr.get_intercell_generic_categories())
108 |         actual = sorted(op.requests.Intercell.generic_categories())
109 | 
110 |         np.testing.assert_array_equal(expected, actual)
111 | 
112 |     def test_normal_run(self):
113 |         expected = self.omnipathr.import_omnipath_intercell(
114 |             causality="transmitter", scope="specific", entity_types="protein"
115 |         )
116 |         actual = op.requests.Intercell.get(
117 |             causality="transmitter", scope="specific", entity_types="protein"
118 |         )
119 | 
120 |         _assert_dataframes_equal(expected, actual)
121 | 
122 | 
123 | class TestComplexes(RTester):
124 |     def test_complex_genes(self):
125 |         genes = ["ITGB1", "RET"]
126 |         expected = self.omnipathr.import_omnipath_complexes(resources="CellPhoneDB")
127 |         actual = op.requests.Complexes.get(database="CellPhoneDB")
128 | 
129 |         _assert_dataframes_equal(
130 |             expected,
131 |             actual,
132 |             remove_metadata=[
133 |                 "n_sources",
134 |                 "n_resources",
135 |                 "n_references",
136 |                 "references_stripped",
137 |             ],
138 |         )
139 | 
140 |         expected = self.omnipathr.get_complex_genes(genes, complexes=expected)
141 |         actual = op.requests.Complexes.complex_genes(genes, complexes=actual)
142 | 
143 |         _assert_dataframes_equal(
144 |             expected,
145 |             actual,
146 |             remove_metadata=[
147 |                 "n_sources",
148 |                 "n_resources",
149 |                 "n_references",
150 |                 "references_stripped",
151 |             ],
152 |         )
153 | 
154 | 
155 | class TestAnnotations(RTester):
156 |     def test_normal_run(self):
157 |         proteins = ["ITGB1", "RET"]
158 |         expected = self.omnipathr.import_omnipath_annotations(
159 |             proteins=proteins, resources="Phobius", genesymbols=False
160 |         )
161 |         actual = op.requests.Annotations.get(
162 |             proteins=proteins, databases="Phobius", genesymbols=False
163 |         )
164 | 
165 |         _assert_dataframes_equal(expected, actual)
166 | 
167 | 
168 | class TestInteractions(RTester):
169 |     def test_tfregulons_levels(self):
170 |         fields = ["tfregulons_level", "tfregulons_tfbs"]
171 |         expected = self.omnipathr.import_tf_target_interactions(
172 |             resources=["ABS"], fields=fields, genesymbols=False
173 |         )
174 |         actual = op.interactions.TFtarget.get(
175 |             resources=["ABS"], fields=fields, genesymbols=False
176 |         )
177 | 
178 |         _assert_dataframes_equal(expected, actual)
179 | 
180 |     def test_dorothea_levels(self):
181 |         fields = ["dorothea_level"]
182 |         expected = self.omnipathr.import_dorothea_interactions(
183 |             resources=["ABS"], dorothea_levels="D", fields=fields, genesymbols=False
184 |         )
185 |         actual = op.interactions.Dorothea.get(
186 |             resources=["ABS"], dorothea_levels="D", fields=fields, genesymbols=False
187 |         )
188 | 
189 |         _assert_dataframes_equal(expected, actual)
190 | 
191 |     def test_omnipath(self):
192 |         expected = self.omnipathr.import_omnipath_interactions(
193 |             resources="CA1", genesymbols=False
194 |         )
195 |         actual = op.interactions.OmniPath.get(resource="CA1", genesymbols=False)
196 | 
197 |         _assert_dataframes_equal(expected, actual)
198 | 
199 | 
200 | class TestUtils(RTester):
201 |     @pytest.mark.skip(reason="TODO: different index order, ref. mismatch")
202 |     def test_import_intercell_network(self):
203 |         from rpy2.robjects import ListVector
204 | 
205 |         interactions_params = {"resources": "CellPhoneDB"}
206 |         transmitter_params = {"categories": "ligand"}
207 |         receiver_params = {"categories": "receptor"}
208 | 
209 |         expected = self.omnipathr.import_intercell_network(
210 |             interactions_param=ListVector(list(interactions_params.items())),
211 |             transmitter_param=ListVector(list(transmitter_params.items())),
212 |             receiver_param=ListVector(list(receiver_params.items())),
213 |         )
214 |         actual = op.interactions.import_intercell_network(
215 |             interactions_params=interactions_params,
216 |             transmitter_params=transmitter_params,
217 |             receiver_params=receiver_params,
218 |         )
219 | 
220 |         _assert_dataframes_equal(expected, actual)
221 | 


--------------------------------------------------------------------------------
/omnipath/_core/downloader/_downloader.py:
--------------------------------------------------------------------------------
  1 | from io import BytesIO
  2 | from copy import copy
  3 | from typing import Any, Mapping, Callable, Optional
  4 | from hashlib import md5
  5 | from urllib.parse import urljoin, urlparse
  6 | import json
  7 | import logging
  8 | import traceback
  9 | 
 10 | from requests import Request, Session, PreparedRequest
 11 | from tqdm.auto import tqdm
 12 | from urllib3.util import Retry
 13 | from requests.adapters import HTTPAdapter
 14 | from requests.exceptions import RequestException
 15 | 
 16 | from omnipath._core.utils._options import Options
 17 | from omnipath.constants._pkg_constants import (
 18 |     UNKNOWN_SERVER_VERSION,
 19 |     Key,
 20 |     Format,
 21 |     Endpoint,
 22 | )
 23 | 
 24 | 
 25 | class Downloader:
 26 |     """
 27 |     Class which performs a GET request to the server in order to retrieve some remote resources.
 28 | 
 29 |     Also implements other behavior, such as retrying after some status codes.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     opts
 34 |         Options. If `None`, :attr:`omnipath.options` are used.
 35 |     """
 36 | 
 37 |     def __init__(self, opts: Optional[Options] = None):
 38 |         if opts is None:
 39 |             from omnipath import options as opts
 40 | 
 41 |         if not isinstance(opts, Options):
 42 |             raise TypeError(
 43 |                 f"Expected `opts` to be of type `Options`, found {type(opts).__name__}."
 44 |             )
 45 | 
 46 |         self._session = Session()
 47 |         self._options = copy(opts)  # this does not copy MemoryCache
 48 | 
 49 |         if self._options.num_retries > 0:
 50 |             adapter = HTTPAdapter(
 51 |                 max_retries=Retry(
 52 |                     total=self._options.num_retries,
 53 |                     redirect=5,
 54 |                     status_forcelist=[413, 429, 500, 502, 503, 504],
 55 |                     backoff_factor=1,
 56 |                 )
 57 |             )
 58 |             self._session.mount("http://", adapter)
 59 |             self._session.mount("https://", adapter)
 60 | 
 61 |         logging.debug(f"Initialized `{self}`")
 62 | 
 63 |     @property
 64 |     def resources(self) -> Mapping[str, Mapping[str, Any]]:
 65 |         """Return the resources."""
 66 |         logging.debug("Fetching resources")
 67 |         return self.maybe_download(
 68 |             Endpoint.RESOURCES.s,
 69 |             params={Key.FORMAT.s: Format.JSON.s},
 70 |             callback=json.load,
 71 |         )
 72 | 
 73 |     def maybe_download(
 74 |         self,
 75 |         url: str,
 76 |         callback: Callable[[BytesIO], Any],
 77 |         params: Optional[Mapping[str, str]] = None,
 78 |         cache: bool = True,
 79 |         is_final: bool = False,
 80 |         **_,
 81 |     ) -> Any:
 82 |         """
 83 |         Fetch the data from the cache, if present, or download them from the ``url``.
 84 | 
 85 |         The key, under which is the download result saved, is the MD5 hash of the ``url``, including the ``params``.
 86 | 
 87 |         Parameters
 88 |         ----------
 89 |         url
 90 |             URL that is used to access the remote resources if the data is not found in the cache.
 91 |         callback
 92 |             Function applied on the downloaded data. Usually, this will return either a :class:`pandas.DataFrame`
 93 |             or a :class:`dict`.
 94 |         params
 95 |             Parameters of the `GET` request.
 96 |         cache
 97 |             Whether to save the files to the cache or not.
 98 |         is_final
 99 |             Whether ``url`` is final or should be prefixed with :attr:`_options.url`.
100 | 
101 |         Returns
102 |         -------
103 |         :class:`typing.Any`
104 |             The result of applying ``callback`` on the maybe downloaded data.
105 |         """
106 |         if not callable(callback):
107 |             raise TypeError(
108 |                 f"Expected `callback` to be `callable`, found `{type(callback).__name__}`."
109 |             )
110 | 
111 |         if is_final:
112 |             urls = (url,) if isinstance(url, str) else url
113 |         else:
114 |             urls = [
115 |                 urljoin(baseurl, url)
116 |                 for baseurl in (
117 |                     (self._options.url,) + tuple(self._options.fallback_urls)
118 |                 )
119 |             ]
120 | 
121 |         res = None
122 | 
123 |         for the_url in urls:
124 |             urlp = urlparse(the_url)
125 |             domain = f"{urlp.scheme}://{urlp.netloc}/"
126 |             logging.debug(f"Attempting server `{domain}`.")
127 |             req = self._session.prepare_request(
128 |                 Request(
129 |                     "GET",
130 |                     the_url,
131 |                     params=params,
132 |                     headers={"User-agent": "omnipathdb-user"},
133 |                 )
134 |             )
135 |             key = md5(bytes(req.url, encoding="utf-8")).hexdigest()
136 |             logging.debug(f"Looking up in cache: `{req.url}` ({key!r}).")
137 | 
138 |             if key in self._options.cache:
139 |                 logging.debug(f"Found data in cache `{self._options.cache}[{key!r}]`")
140 |                 res = self._options.cache[key]
141 |             else:
142 |                 try:
143 |                     res = self._download(req)
144 |                 except RequestException:
145 |                     logging.warning(f"Failed to download from `{domain}`.")
146 |                     logging.warning(traceback.format_exc())
147 |                     continue
148 |                 res = callback(res)
149 |                 if cache:
150 |                     logging.debug(f"Caching result to `{self._options.cache}[{key!r}]`")
151 |                     self._options.cache[key] = res
152 |                 else:
153 |                     logging.debug("Not caching the results")
154 |             break
155 | 
156 |         if res is None:
157 |             raise
158 | 
159 |         return res
160 | 
161 |     def _download(self, req: PreparedRequest) -> BytesIO:
162 |         """
163 |         Request the remote resources.
164 | 
165 |         Parameters
166 |         ----------
167 |         req
168 |             `GET` request to perform.
169 | 
170 |         Returns
171 |         -------
172 |         :class:`io.BytesIO`
173 |             File-like object containing the data. Usually a json- or csv-like data is present inside.
174 |         """
175 |         logging.info(f"Downloading data from `{req.url}`")
176 |         settings = self._session.merge_environment_settings(
177 |             req.url, {}, None, None, None
178 |         )
179 |         settings["stream"] = True
180 |         settings["timeout"] = self._options.timeout
181 |         handle = BytesIO()
182 |         with self._session.send(req, **settings) as resp:
183 |             resp.raise_for_status()
184 |             total = resp.headers.get("content-length", None)
185 | 
186 |             with tqdm(
187 |                 unit="B",
188 |                 unit_scale=True,
189 |                 miniters=1,
190 |                 unit_divisor=1024,
191 |                 total=total if total is None else int(total),
192 |                 disable=not self._options.progress_bar,
193 |             ) as t:
194 |                 for chunk in resp.iter_content(chunk_size=self._options.chunk_size):
195 |                     t.update(len(chunk))
196 |                     handle.write(chunk)
197 | 
198 |                 handle.flush()
199 |                 handle.seek(0)
200 | 
201 |         return handle
202 | 
203 |     def __str__(self) -> str:
204 |         return f"<{self.__class__.__name__}[options={self._options}]>"
205 | 
206 |     def __repr__(self) -> str:
207 |         return str(self)
208 | 
209 | 
210 | def _get_server_version(options: Options) -> str:
211 |     """Try and get the server version."""
212 |     import re
213 | 
214 |     def callback(fp: BytesIO) -> str:
215 |         """Parse the version."""
216 |         return re.findall(
217 |             r"\d+\.\d+.\d+", fp.getvalue().decode("utf-8"), flags=re.IGNORECASE
218 |         )[0]
219 | 
220 |     try:
221 |         if not options.autoload:
222 |             raise ValueError(
223 |                 "Autoloading is disabled. You can enable it by setting "
224 |                 "`omnipath.options.autoload = True`."
225 |             )
226 | 
227 |         with Options.from_options(
228 |             options,
229 |             num_retries=0,
230 |             timeout=(1.0, 3.0),
231 |             cache=None,
232 |             progress_bar=False,
233 |             chunk_size=1024,
234 |         ) as opt:
235 |             return Downloader(opt).maybe_download(
236 |                 Endpoint.ABOUT.s,
237 |                 callback,
238 |                 params={Key.FORMAT.s: Format.TEXT.s},
239 |                 cache=False,
240 |             )
241 |     except Exception as e:
242 |         logging.debug(f"Unable to get server version. Reason: `{e}`")
243 | 
244 |         return UNKNOWN_SERVER_VERSION
245 | 


--------------------------------------------------------------------------------
/tests/test_downloader.py:
--------------------------------------------------------------------------------
  1 | from io import BytesIO, StringIO
  2 | from urllib.parse import urljoin
  3 | import logging
  4 | 
  5 | import pytest
  6 | import requests
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | 
 11 | from omnipath import options as opt
 12 | from omnipath._core.utils._options import Options
 13 | from omnipath.constants._pkg_constants import UNKNOWN_SERVER_VERSION, Endpoint
 14 | from omnipath._core.downloader._downloader import Downloader, _get_server_version
 15 | 
 16 | opt.fallback_urls = ()
 17 | 
 18 | 
 19 | class TestDownloader:
 20 |     def test_options_wrong_type(self):
 21 |         with pytest.raises(TypeError):
 22 |             Downloader("foobar")
 23 | 
 24 |     def test_str_repr(self, options: Options):
 25 |         d = Downloader(options)
 26 | 
 27 |         assert str(d) == f"<{d.__class__.__name__}[options={options}]>"
 28 |         assert repr(d) == f"<{d.__class__.__name__}[options={options}]>"
 29 | 
 30 |     def test_initialize_local_options(self, options: Options):
 31 |         options.password = "foo"
 32 |         options.timeout = 1337
 33 |         d = Downloader(options)
 34 | 
 35 |         assert d._options is not options
 36 |         assert str(d._options) == str(options)
 37 |         assert str(d._options) != str(opt)
 38 | 
 39 |         options.password = "bar"
 40 |         assert d._options.password == "foo"
 41 | 
 42 |     def test_initialize_global_options(self):
 43 |         d = Downloader()
 44 | 
 45 |         assert d._options is not opt
 46 |         assert str(d._options) == str(opt)
 47 | 
 48 |     def test_resources_cached_values(self, downloader: Downloader, requests_mock):
 49 |         data = {"foo": "bar", "42": 1337}
 50 |         requests_mock.register_uri(
 51 |             "GET", urljoin(downloader._options.url, Endpoint.RESOURCES.s), json=data
 52 |         )
 53 | 
 54 |         assert downloader.resources == data
 55 |         assert requests_mock.called_once
 56 | 
 57 |         assert downloader.resources == data
 58 |         assert requests_mock.called_once
 59 | 
 60 |     def test_resources_no_cached_values(self, downloader: Downloader, requests_mock):
 61 |         data = {"foo": "bar", "42": 1337}
 62 |         requests_mock.register_uri(
 63 |             "GET", urljoin(downloader._options.url, Endpoint.RESOURCES.s), json=data
 64 |         )
 65 | 
 66 |         assert downloader.resources == data
 67 |         assert requests_mock.called_once
 68 | 
 69 |         downloader._options.cache.clear()
 70 | 
 71 |         assert downloader.resources == data
 72 |         assert len(requests_mock.request_history) == 2
 73 | 
 74 |     def test_maybe_download_not_callable(self, downloader: Downloader):
 75 |         with pytest.raises(TypeError):
 76 |             downloader.maybe_download("foo", callback=None)
 77 | 
 78 |     def test_maybe_download_wrong_callable(
 79 |         self, downloader: Downloader, requests_mock, csv_data: bytes
 80 |     ):
 81 |         url = urljoin(downloader._options.url, "foobar")
 82 |         requests_mock.register_uri("GET", url, content=csv_data)
 83 | 
 84 |         with pytest.raises(ValueError, match=r"Expected object or value"):
 85 |             downloader.maybe_download(url, callback=pd.read_json)
 86 | 
 87 |     def test_maybe_download_passes_params(
 88 |         self, downloader: Downloader, requests_mock, csv_data: bytes
 89 |     ):
 90 |         csv_url = urljoin(downloader._options.url, "foobar/?format=csv")
 91 |         csv_df = pd.read_csv(BytesIO(csv_data))
 92 |         json_url = urljoin(downloader._options.url, "foobar/?format=json")
 93 |         json_handle = StringIO()
 94 |         csv_df.to_json(json_handle)
 95 | 
 96 |         requests_mock.register_uri("GET", csv_url, content=csv_data)
 97 |         requests_mock.register_uri(
 98 |             "GET", json_url, content=bytes(json_handle.getvalue(), encoding="utf-8")
 99 |         )
100 | 
101 |         res1 = downloader.maybe_download(csv_url, callback=pd.read_csv)
102 |         res2 = downloader.maybe_download(csv_url, callback=pd.read_csv)
103 | 
104 |         assert res1 is not res2
105 |         assert len(downloader._options.cache) == 1
106 |         assert requests_mock.called_once
107 |         np.testing.assert_array_equal(res1.index, csv_df.index)
108 |         np.testing.assert_array_equal(res1.columns, csv_df.columns)
109 |         np.testing.assert_array_equal(res1.values, csv_df.values)
110 | 
111 |         res1 = downloader.maybe_download(json_url, callback=pd.read_json)
112 |         res2 = downloader.maybe_download(json_url, callback=pd.read_json)
113 | 
114 |         assert res1 is not res2
115 |         assert len(requests_mock.request_history) == 2
116 |         assert len(downloader._options.cache) == 2
117 |         np.testing.assert_array_equal(res1.index, csv_df.index)
118 |         np.testing.assert_array_equal(res1.columns, csv_df.columns)
119 |         np.testing.assert_array_equal(res1.values, csv_df.values)
120 | 
121 |     def test_maybe_download_no_cache(
122 |         self, downloader: Downloader, requests_mock, csv_data: bytes
123 |     ):
124 |         url = urljoin(downloader._options.url, "foobar")
125 |         requests_mock.register_uri("GET", url, content=csv_data)
126 | 
127 |         res1 = downloader.maybe_download(url, callback=pd.read_csv)
128 |         downloader._options.cache.clear()
129 |         res2 = downloader.maybe_download(url, callback=pd.read_csv)
130 | 
131 |         assert res1 is not res2
132 |         assert len(requests_mock.request_history) == 2
133 |         np.testing.assert_array_equal(res1.index, res2.index)
134 |         np.testing.assert_array_equal(res1.columns, res2.columns)
135 |         np.testing.assert_array_equal(res1.values, res2.values)
136 | 
137 |     def test_maybe_download_is_not_final(
138 |         self, downloader: Downloader, requests_mock, csv_data: bytes
139 |     ):
140 |         endpoint = "barbaz"
141 |         url = urljoin(downloader._options.url, endpoint)
142 |         requests_mock.register_uri("GET", url, content=csv_data)
143 |         csv_df = pd.read_csv(BytesIO(csv_data))
144 | 
145 |         res = downloader.maybe_download(endpoint, callback=pd.read_csv)
146 | 
147 |         assert requests_mock.called_once
148 |         np.testing.assert_array_equal(res.index, csv_df.index)
149 |         np.testing.assert_array_equal(res.columns, csv_df.columns)
150 |         np.testing.assert_array_equal(res.values, csv_df.values)
151 | 
152 |     def test_fallback_urls(self, requests_mock, csv_data: bytes):
153 |         query = "annotations?resources=PROGENy"
154 |         opt = Options(url="https://wrong.omnipathdb.org/")
155 |         requests_mock.register_uri(
156 |             "GET",
157 |             urljoin(opt.url, query),
158 |             exc=requests.exceptions.ConnectionError,
159 |         )
160 |         requests_mock.register_uri(
161 |             "GET",
162 |             urljoin(opt.fallback_urls[0], query),
163 |             content=csv_data,
164 |         )
165 |         csv_df = pd.read_csv(BytesIO(csv_data))
166 |         downloader = Downloader(opt)
167 |         res = downloader.maybe_download(query, callback=pd.read_csv)
168 | 
169 |         assert requests_mock.called
170 |         np.testing.assert_array_equal(res.index, csv_df.index)
171 |         np.testing.assert_array_equal(res.columns, csv_df.columns)
172 |         np.testing.assert_array_equal(res.values, csv_df.values)
173 | 
174 |     def test_get_server_version_not_decodable(
175 |         self, options: Options, requests_mock, caplog
176 |     ):
177 |         url = urljoin(options.url, Endpoint.ABOUT.s)
178 |         options.autoload = True
179 |         requests_mock.register_uri(
180 |             "GET", f"{url}?format=text", content=bytes("foobarbaz", encoding="utf-8")
181 |         )
182 | 
183 |         with caplog.at_level(logging.DEBUG):
184 |             version = _get_server_version(options)
185 | 
186 |         assert requests_mock.called_once
187 |         assert (
188 |             "Unable to get server version. Reason: `list index out of range`"
189 |             in caplog.text
190 |         )
191 |         assert version == UNKNOWN_SERVER_VERSION
192 | 
193 |     def test_get_server_version_no_autoload(
194 |         self, options: Options, requests_mock, caplog
195 |     ):
196 |         url = urljoin(options.url, Endpoint.ABOUT.s)
197 |         options.autoload = False
198 |         requests_mock.register_uri("GET", f"{url}?format=text", text="foobarbaz")
199 | 
200 |         with caplog.at_level(logging.DEBUG):
201 |             version = _get_server_version(options)
202 | 
203 |         assert not requests_mock.called_once
204 |         assert (
205 |             "Unable to get server version. Reason: `Autoloading is disabled."
206 |             in caplog.text
207 |         )
208 |         assert version == UNKNOWN_SERVER_VERSION
209 | 
210 |     def test_get_server_version(self, options: Options, requests_mock):
211 |         url = urljoin(options.url, Endpoint.ABOUT.s)
212 |         options.autoload = True
213 |         requests_mock.register_uri(
214 |             "GET",
215 |             f"{url}?format=text",
216 |             content=bytes("foo bar baz\nversion: 42.1337.00", encoding="utf-8"),
217 |         )
218 | 
219 |         version = _get_server_version(options)
220 | 
221 |         assert requests_mock.called_once
222 |         assert version == "42.1337.00"
223 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | from io import StringIO
  2 | from abc import ABC
  3 | from copy import deepcopy
  4 | from shutil import copy
  5 | from typing import Optional
  6 | from inspect import isclass
  7 | from pathlib import Path
  8 | from collections import defaultdict
  9 | from urllib.parse import urljoin
 10 | import json
 11 | import pickle
 12 | import logging
 13 | 
 14 | from pytest_socket import disable_socket
 15 | 
 16 | disable_socket()
 17 | import pytest
 18 | import requests
 19 | 
 20 | import numpy as np
 21 | import pandas as pd
 22 | 
 23 | from omnipath.constants import InteractionDataset
 24 | from omnipath._core.cache._cache import MemoryCache
 25 | from omnipath._core.query._query import QueryType
 26 | from omnipath._core.utils._options import Options
 27 | from omnipath.constants._pkg_constants import DEFAULT_OPTIONS, Key, Endpoint
 28 | from omnipath._core.downloader._downloader import Downloader
 29 | import omnipath as op
 30 | 
 31 | 
 32 | # removes overly verbose logging errors for rpy2
 33 | # see: https://github.com/pytest-dev/pytest/issues/5502#issuecomment-647157873
 34 | def pytest_sessionfinish(session, exitstatus):
 35 |     import logging
 36 | 
 37 |     loggers = [logging.getLogger()] + list(logging.Logger.manager.loggerDict.values())
 38 |     for logger in loggers:
 39 |         handlers = getattr(logger, "handlers", [])
 40 |         for handler in handlers:
 41 |             logger.removeHandler(handler)
 42 | 
 43 | 
 44 | def pytest_addoption(parser):
 45 |     parser.addoption(
 46 |         "--test-server",
 47 |         dest="test_server",
 48 |         action="store_true",
 49 |         help="Whether to also test the server connection.",
 50 |     )
 51 | 
 52 | 
 53 | @pytest.fixture(scope="function")
 54 | def options() -> "Options":
 55 |     opt = Options.from_config()
 56 |     opt.cache = "memory"
 57 |     opt.progress_bar = False
 58 |     opt.fallback_urls = ()
 59 |     return opt
 60 | 
 61 | 
 62 | @pytest.fixture(scope="function")
 63 | def config_backup(tmpdir):
 64 |     copy(Options.config_path, tmpdir / "config.ini")
 65 |     yield
 66 |     copy(tmpdir / "config.ini", Options.config_path)
 67 | 
 68 | 
 69 | @pytest.fixture(scope="function")
 70 | def cache_backup():
 71 |     import omnipath as op
 72 | 
 73 |     cache = deepcopy(op.options.cache)
 74 |     pb = op.options.progress_bar
 75 |     op.options.cache = MemoryCache()
 76 |     op.options.progress_bar = False
 77 |     yield
 78 |     op.options.cache = cache
 79 |     op.options.progress_bar = pb
 80 | 
 81 | 
 82 | @pytest.fixture(scope="class")
 83 | def server_url():
 84 |     import omnipath as op
 85 | 
 86 |     cache = deepcopy(op.options.cache)
 87 |     pb = op.options.progress_bar
 88 |     url = op.options.url
 89 |     cd = op.options.convert_dtypes
 90 | 
 91 |     op.options.cache = MemoryCache()
 92 |     op.options.progress_bar = False
 93 |     op.options.url = DEFAULT_OPTIONS.url
 94 |     op.options.convert_dtypes = True
 95 |     yield
 96 |     op.options.cache = cache
 97 |     op.options.progress_bar = pb
 98 |     op.options.url = url
 99 |     op.options.convert_dtypes = cd
100 | 
101 | 
102 | @pytest.fixture(scope="function")
103 | def downloader(options) -> "Downloader":
104 |     options.fallback_urls = ()
105 |     return Downloader(options)
106 | 
107 | 
108 | @pytest.fixture(scope="session")
109 | def csv_data() -> bytes:
110 |     str_handle = StringIO()
111 |     pd.DataFrame({"foo": range(5), "bar": "baz", "quux": 42}).to_csv(str_handle)
112 | 
113 |     return bytes(str_handle.getvalue(), encoding="utf-8")
114 | 
115 | 
116 | @pytest.fixture(scope="session")
117 | def tsv_data() -> bytes:
118 |     str_handle = StringIO()
119 |     pd.DataFrame(
120 |         {
121 |             "foo": range(5),
122 |             "components_genesymbols": "foo",
123 |             "quux": 42,
124 |             "modification": "bar",
125 |         }
126 |     ).to_csv(str_handle, sep="\t")
127 | 
128 |     return bytes(str_handle.getvalue(), encoding="utf-8")
129 | 
130 | 
131 | @pytest.fixture(scope="session")
132 | def intercell_data() -> bytes:
133 |     data = {}
134 |     data[Key.PARENT.s] = [42, 1337, 24, 42]
135 |     data[Key.CATEGORY.s] = ["foo", "bar", "bar", "foo"]
136 | 
137 |     return bytes(json.dumps(data), encoding="utf-8")
138 | 
139 | 
140 | @pytest.fixture(scope="session")
141 | def resources() -> bytes:
142 |     data = defaultdict(dict)
143 |     data["foo"][Key.QUERIES.s] = {
144 |         QueryType.INTERCELL.endpoint: {Key.GENERIC_CATEGORIES.s: ["42"]}
145 |     }
146 |     data["bar"][Key.QUERIES.s] = {
147 |         QueryType.INTERCELL.endpoint: {Key.GENERIC_CATEGORIES.s: ["42", "13"]}
148 |     }
149 |     data["baz"][Key.QUERIES.s] = {
150 |         QueryType.INTERCELL.endpoint: {Key.GENERIC_CATEGORIES.s: ["24"]}
151 |     }
152 |     data["quux"][Key.QUERIES.s] = {
153 |         QueryType.ENZSUB.endpoint: {Key.GENERIC_CATEGORIES.s: ["24"]}
154 |     }
155 | 
156 |     return bytes(json.dumps(data), encoding="utf-8")
157 | 
158 | 
159 | @pytest.fixture(scope="session")
160 | def interaction_resources() -> bytes:
161 |     data = defaultdict(dict)
162 |     for i, d in enumerate(InteractionDataset):
163 |         data[f"d_{i}"][Key.QUERIES.s] = {
164 |             QueryType.INTERACTIONS.endpoint: {Key.DATASETS.s: [d.value]}
165 |         }
166 | 
167 |     return bytes(json.dumps(data), encoding="utf-8")
168 | 
169 | 
170 | @pytest.fixture(scope="session")
171 | def complexes() -> pd.DataFrame:
172 |     return pd.DataFrame(
173 |         {
174 |             "components_genesymbols": [
175 |                 "foo",
176 |                 "bar_baz_quux",
177 |                 "baz_bar",
178 |                 "bar_quux_foo",
179 |             ],
180 |             "dummy": 42,
181 |         }
182 |     )
183 | 
184 | 
185 | @pytest.fixture(scope="session")
186 | def interactions_data() -> bytes:
187 |     str_handle = StringIO()
188 |     with open(Path("tests") / "_data" / "interactions.pickle", "rb") as fin:
189 |         data: pd.DataFrame = pickle.load(fin)
190 | 
191 |     data.to_csv(str_handle, sep="\t", index=False)
192 | 
193 |     return bytes(str_handle.getvalue(), encoding="utf-8")
194 | 
195 | 
196 | @pytest.fixture(scope="session")
197 | def transmitters_data() -> bytes:
198 |     str_handle = StringIO()
199 |     with open(Path("tests") / "_data" / "transmitters.pickle", "rb") as fin:
200 |         data: pd.DataFrame = pickle.load(fin)
201 | 
202 |     data.to_csv(str_handle, sep="\t", index=False)
203 | 
204 |     return bytes(str_handle.getvalue(), encoding="utf-8")
205 | 
206 | 
207 | @pytest.fixture(scope="session")
208 | def receivers_data() -> bytes:
209 |     str_handle = StringIO()
210 |     with open(Path("tests") / "_data" / "receivers.pickle", "rb") as fin:
211 |         data: pd.DataFrame = pickle.load(fin)
212 | 
213 |     data.to_csv(str_handle, sep="\t", index=False)
214 | 
215 |     return bytes(str_handle.getvalue(), encoding="utf-8")
216 | 
217 | 
218 | @pytest.fixture(scope="session")
219 | def import_intercell_result() -> pd.DataFrame:
220 |     with open(Path("tests") / "_data" / "import_intercell_result.pickle", "rb") as fin:
221 |         return pickle.load(fin)
222 | 
223 | 
224 | @pytest.fixture(scope="session")
225 | def string_series() -> pd.Series:
226 |     return pd.Series(["foo:123", "bar:45;baz", None, "baz:67;bar:67", "foo;foo;foo"])
227 | 
228 | 
229 | def _can_import_omnipathR() -> Optional["rpy2.robjects.packages.Package"]:  # noqa: F821
230 |     try:
231 |         from packaging import version
232 |         from rpy2.robjects.packages import PackageNotInstalledError, importr
233 |         import rpy2
234 | 
235 |         try:
236 |             from importlib_metadata import version as get_version
237 |         except ImportError:
238 |             # >=Python3.8
239 |             from importlib.metadata import version as get_version
240 | 
241 |         try:
242 |             assert version.parse(get_version(rpy2.__name__)) >= version.parse("3.3.0")
243 |             mod = importr("OmnipathR")
244 |             logging.info("Successfully loaded `OmnipathR`")
245 |             return mod
246 |         except (PackageNotInstalledError, AssertionError) as err:
247 |             logging.error(f"Unable to import `OmnipathR`. Reason: `{err}`")
248 | 
249 |     except ImportError as err:
250 |         logging.error(f"Unable to import `rpy2`. Reason: `{err}`")
251 |     except Exception as err:
252 |         logging.error(f"Unknown exception when trying to import `OmnipathR`: `{err}`")
253 | 
254 |     return None
255 | 
256 | 
257 | @pytest.fixture(scope="session")
258 | def omnipathr(request):
259 |     url = urljoin(DEFAULT_OPTIONS.url, Endpoint.ABOUT.s)
260 | 
261 |     if not request.config.getoption("test_server", default=False, skip=True):
262 |         logging.error("Testing using the server is disabled.")
263 |         return None
264 |     try:
265 |         resp = requests.get(url)
266 |         resp.raise_for_status()
267 |     except Exception as e:
268 |         logging.error(f"Unable to contact the server at `{url}`. Reason: `{e}`")
269 |         return None
270 | 
271 |     return _can_import_omnipathR()
272 | 
273 | 
274 | @pytest.fixture(autouse=True, scope="class")
275 | def _inject_omnipath(request, omnipathr, server_url):
276 |     if isclass(request.cls) and issubclass(request.cls, RTester):
277 |         if omnipathr is None:
278 |             pytest.skip("Unable to import `OmnipathR`.")
279 |         from rpy2.robjects import pandas2ri
280 | 
281 |         # at this point, we know rpy2 can be imported, thanks to the `omnipathr` fixture
282 |         # do not change the activation order
283 |         pandas2ri.activate()
284 |         request.cls.omnipathr = omnipathr
285 | 
286 | 
287 | class RTester(ABC):
288 |     def test_resources(self):
289 |         expected = sorted(self.omnipathr.get_intercell_resources())
290 |         actual = sorted(op.requests.Intercell.resources())
291 | 
292 |         np.testing.assert_array_equal(expected, actual)
293 | 


--------------------------------------------------------------------------------
/omnipath/_core/utils/_options.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Tuple, Union, ClassVar, NoReturn, Optional
  2 | from pathlib import Path
  3 | from urllib.parse import urlparse
  4 | import configparser
  5 | 
  6 | import attr
  7 | 
  8 | from omnipath.constants import License
  9 | from omnipath._core.cache._cache import Cache, FileCache, NoopCache, MemoryCache
 10 | from omnipath.constants._pkg_constants import DEFAULT_OPTIONS
 11 | 
 12 | 
 13 | def _is_positive(_instance, attribute: attr.Attribute, value: int) -> NoReturn:
 14 |     """Check whether the ``value`` is positive."""
 15 |     if isinstance(value, tuple):
 16 |         return all(_is_positive(_instance, attribute, v) for v in value)
 17 |     if value <= 0:
 18 |         raise ValueError(
 19 |             f"Expected `{attribute.name}` to be positive, found `{value}`."
 20 |         )
 21 | 
 22 | 
 23 | def _is_non_negative(_instance, attribute: attr.Attribute, value: int) -> NoReturn:
 24 |     """Check whether the ``value`` is non-negative."""
 25 |     if value < 0:
 26 |         raise ValueError(
 27 |             f"Expected `{attribute.name}` to be non-negative, found `{value}`."
 28 |         )
 29 | 
 30 | 
 31 | def _is_valid_url(_instance, _attribute: attr.Attribute, value: str) -> NoReturn:
 32 |     """Check whether the ``value`` forms a valid URL."""
 33 |     pr = urlparse(value)
 34 | 
 35 |     if not pr.scheme or not pr.netloc:
 36 |         raise ValueError(f"Invalid URL: `{value}`.")
 37 | 
 38 | 
 39 | def _cache_converter(value: Optional[Union[str, Path, Cache]]) -> Cache:
 40 |     """Convert ``value`` to :class:`omnipath._core.cache.Cache`."""
 41 |     if isinstance(value, Cache):
 42 |         return value
 43 | 
 44 |     if value is None:
 45 |         return NoopCache()
 46 |     if value == "memory":
 47 |         return MemoryCache()
 48 | 
 49 |     return FileCache(value)
 50 | 
 51 | 
 52 | @attr.s
 53 | class Options:
 54 |     """
 55 |     Class defining various :mod:`omnipath` options.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     url
 60 |         URL of the web service.
 61 |     license
 62 |         License to use when fetching the data.
 63 |     password
 64 |         Password used when performing requests.
 65 |     cache
 66 |         Type of a cache. Valid options are:
 67 | 
 68 |             - `None`: do not save anything into a cache.
 69 |             - `'memory'`: cache files into the memory.
 70 |             - :class:`str`: persist files into a directory.
 71 | 
 72 |     autoload
 73 |         Whether to contact the server at ``url`` during import to get the server version and the most up-to-date
 74 |         query parameters and their valid options.
 75 |     convert_dtypes
 76 |         Whether to convert the data types of the resulting :class:`pandas.DataFrame`.
 77 |     num_retries
 78 |         Number of retries before giving up.
 79 |     timeout
 80 |         Timeout in seconds when awaiting response.
 81 |     chunk_size
 82 |         Size in bytes in which to read the data.
 83 |     progress_bar
 84 |         Whether to show the progress bar when downloading data.
 85 |     """
 86 | 
 87 |     config_path: ClassVar[Path] = Path.home() / ".config" / "omnipathdb.ini"
 88 | 
 89 |     url: str = attr.ib(
 90 |         default=DEFAULT_OPTIONS.url,
 91 |         validator=[attr.validators.instance_of(str), _is_valid_url],
 92 |         on_setattr=attr.setters.validate,
 93 |     )
 94 |     fallback_urls: Tuple[str] = attr.ib(
 95 |         default=DEFAULT_OPTIONS.fallback_urls,
 96 |         converter=(lambda val: (val,) if isinstance(val, str) else tuple(val)),
 97 |         on_setattr=attr.setters.convert,
 98 |     )
 99 |     static_url: str = attr.ib(
100 |         default=DEFAULT_OPTIONS.static_url,
101 |         validator=[attr.validators.instance_of(str), _is_valid_url],
102 |         on_setattr=attr.setters.validate,
103 |     )
104 |     license: License = attr.ib(
105 |         default=None,
106 |         validator=attr.validators.optional(attr.validators.instance_of((str, License))),
107 |         converter=(lambda val: None if val is None else License(val)),
108 |         on_setattr=attr.setters.convert,
109 |     )
110 |     password: Optional[str] = attr.ib(
111 |         default=None,
112 |         repr=False,
113 |         validator=attr.validators.optional(attr.validators.instance_of(str)),
114 |         on_setattr=attr.setters.validate,
115 |     )
116 | 
117 |     cache: Cache = attr.ib(
118 |         default=DEFAULT_OPTIONS.cache_dir,
119 |         converter=_cache_converter,
120 |         kw_only=True,
121 |         on_setattr=attr.setters.convert,
122 |     )
123 |     autoload: bool = attr.ib(
124 |         default=DEFAULT_OPTIONS.autoload,
125 |         validator=attr.validators.instance_of(bool),
126 |         on_setattr=attr.setters.validate,
127 |     )
128 |     convert_dtypes: bool = attr.ib(
129 |         default=DEFAULT_OPTIONS.convert_dtypes,
130 |         validator=attr.validators.instance_of(bool),
131 |         on_setattr=attr.setters.validate,
132 |     )
133 | 
134 |     num_retries: int = attr.ib(
135 |         default=DEFAULT_OPTIONS.num_retries,
136 |         validator=[attr.validators.instance_of(int), _is_non_negative],
137 |         on_setattr=attr.setters.validate,
138 |     )
139 |     timeout: Union[int, float, Tuple[float, float]] = attr.ib(
140 |         default=DEFAULT_OPTIONS.timeout,
141 |         validator=[attr.validators.instance_of((int, float, tuple)), _is_positive],
142 |         on_setattr=attr.setters.validate,
143 |     )
144 |     chunk_size: int = attr.ib(
145 |         default=DEFAULT_OPTIONS.chunk_size,
146 |         validator=[attr.validators.instance_of(int), _is_positive],
147 |         on_setattr=attr.setters.validate,
148 |     )
149 | 
150 |     progress_bar: bool = attr.ib(
151 |         default=True,
152 |         repr=False,
153 |         validator=attr.validators.instance_of(bool),
154 |         on_setattr=attr.setters.validate,
155 |     )
156 | 
157 |     def _create_config(self, section: Optional[str] = None):
158 |         section = self.url if section is None else section
159 |         _is_valid_url(None, None, section)
160 |         config = configparser.ConfigParser()
161 |         # do not save the password
162 |         config[section] = {
163 |             "license": str(None if self.license is None else self.license.value),
164 |             "cache_dir": str(self.cache.path),
165 |             "autoload": self.autoload,
166 |             "convert_dtypes": self.convert_dtypes,
167 |             "num_retries": self.num_retries,
168 |             "timeout": self.timeout,
169 |             "chunk_size": self.chunk_size,
170 |             "progress_bar": self.progress_bar,
171 |         }
172 | 
173 |         return config
174 | 
175 |     @classmethod
176 |     def from_config(cls, section: Optional[str] = None) -> "Options":
177 |         """
178 |         Return the options from a configuration file.
179 | 
180 |         Parameters
181 |         ----------
182 |         section
183 |             Section of the `.ini` file from which to create the options. It corresponds to the URL of the server.
184 |             If `None`, use default URL.
185 | 
186 |         Returns
187 |         -------
188 |         :class:`omnipath._cores.utils.Options`
189 |             The options.
190 |         """
191 |         if not cls.config_path.is_file():
192 |             return cls().write()
193 | 
194 |         config = configparser.ConfigParser(default_section=DEFAULT_OPTIONS.url)
195 |         config.read(cls.config_path)
196 | 
197 |         section = DEFAULT_OPTIONS.url if section is None else section
198 |         _is_valid_url(None, None, section)
199 |         _ = config.get(section, "cache_dir")
200 | 
201 |         cache = config.get(section, "cache_dir", fallback=DEFAULT_OPTIONS.cache_dir)
202 |         cache = None if cache == "None" else cache
203 |         license = config.get(section, "license", fallback=DEFAULT_OPTIONS.license)
204 |         license = None if license == "None" else License(license)
205 | 
206 |         return cls(
207 |             url=section,
208 |             license=license,
209 |             num_retries=config.getint(
210 |                 section, "num_retries", fallback=DEFAULT_OPTIONS.num_retries
211 |             ),
212 |             timeout=config.getfloat(
213 |                 section, "timeout", fallback=DEFAULT_OPTIONS.timeout
214 |             ),
215 |             chunk_size=config.getint(
216 |                 section, "chunk_size", fallback=DEFAULT_OPTIONS.chunk_size
217 |             ),
218 |             progress_bar=config.getboolean(
219 |                 section, "progress_bar", fallback=DEFAULT_OPTIONS.progress_bar
220 |             ),
221 |             autoload=config.getboolean(
222 |                 section, "autoload", fallback=DEFAULT_OPTIONS.autoload
223 |             ),
224 |             convert_dtypes=config.getboolean(
225 |                 section, "convert_dtypes", fallback=DEFAULT_OPTIONS.convert_dtypes
226 |             ),
227 |             cache=cache,
228 |         )
229 | 
230 |     @classmethod
231 |     def from_options(cls, options: "Options", **kwargs: Any) -> "Options":
232 |         """
233 |         Create new options from previous options.
234 | 
235 |         Parameters
236 |         ----------
237 |         options
238 |             Options from which to create new ones.
239 |         **kwargs
240 |             Keyword arguments overriding attributes from ``options``.
241 | 
242 |         Returns
243 |         -------
244 |             The newly created option.
245 |         """
246 |         if not isinstance(options, Options):
247 |             raise TypeError(
248 |                 f"Expected `options` to be of type `Options`, found `{type(options)}`."
249 |             )
250 | 
251 |         kwargs = {k: v for k, v in kwargs.items() if hasattr(options, k)}
252 | 
253 |         return cls(**{**options.__dict__, **kwargs})
254 | 
255 |     def write(self, section: Optional[str] = None) -> NoReturn:
256 |         """Write the current options to a configuration file."""
257 |         self.config_path.parent.mkdir(parents=True, exist_ok=True)
258 | 
259 |         with open(self.config_path, "w") as fout:
260 |             self._create_config(section).write(fout)
261 | 
262 |         return self
263 | 
264 |     def __enter__(self) -> "Options":
265 |         return self.from_options(self)
266 | 
267 |     def __exit__(self, exc_type, exc_val, exc_tb) -> None:
268 |         pass
269 | 
270 | 
271 | options = Options.from_config()
272 | 
273 | 
274 | __all__ = [options, Options]
275 | 


--------------------------------------------------------------------------------
/omnipath/_core/requests/interactions/_evidences.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple, Union, Callable, Iterable, Optional
  2 | 
  3 | import pandas as pd
  4 | 
  5 | from omnipath._misc.utils import to_set
  6 | from omnipath._core.requests._utils import (
  7 |     _count_resources,
  8 |     _count_references,
  9 |     _strip_resource_label_df,
 10 | )
 11 | 
 12 | EVIDENCES_KEYS = ("positive", "negative", "directed", "undirected")
 13 | 
 14 | 
 15 | def _must_have_evidences(df: pd.DataFrame) -> None:
 16 |     """Raise an error if the input data frame does not contain evidences."""
 17 |     if "evidences" not in df.columns:
 18 |         raise ValueError("The input data frame must contain `evidences` column.")
 19 | 
 20 | 
 21 | def unnest_evidences(df: pd.DataFrame, col: str = "evidences") -> pd.DataFrame:
 22 |     """
 23 |     Create new columns of evidences by direction and effect sign.
 24 | 
 25 |     Plucks evidence lists of each direction and effect sign into separate,
 26 |     new columns. This will yield four new columns: "positive", "negative",
 27 |     "directed" and "undirected", each containing lists of dicts of evidences.
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     df
 32 |         An OmniPath interaction data frame with "evidences" column.
 33 |     col
 34 |         Name of the column containing the nested evidences.
 35 | 
 36 |     Returns
 37 |     -------
 38 |     :class:`pandas.DataFrame`
 39 |         The input data frame with new columns "positive", "negative",
 40 |         "directed" and "undirected" each containing lists of dicts of
 41 |         evidences.
 42 | 
 43 |     Raises
 44 |     ------
 45 |     ValueError
 46 |         If the input data frame does not contain "evidences" column.
 47 |     """
 48 |     for key in ("positive", "negative", "directed", "undirected"):
 49 |         df[key] = df[col].apply(lambda x: x[key])  # noqa: B023
 50 | 
 51 |     return df
 52 | 
 53 | 
 54 | def filter_evidences(
 55 |     df: pd.DataFrame,
 56 |     datasets: Optional[Union[str, Iterable[str]]] = None,
 57 |     resources: Optional[Union[str, Iterable[str]]] = None,
 58 |     col: str = "evidences",
 59 |     target_col: Optional[str] = None,
 60 | ) -> pd.DataFrame:
 61 |     """
 62 |     Filter evidences by dataset and resource.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     df
 67 |         An OmniPath interaction data frame with "evidences" column.
 68 |     datasets
 69 |         A list of dataset names. If None, all datasets will be included.
 70 |     resources
 71 |         A list of resource names. If None, all resources will be included.
 72 |     col
 73 |         Name of the column containing the evidences.
 74 |     target_col
 75 |         Column to output the filtered evidences to. By default `col` is
 76 |         to be overwritten.
 77 | 
 78 |     Returns
 79 |     -------
 80 |     :class:`pandas.DataFrame`
 81 |         The input data frame with the evidences filtered, with a new column
 82 |         depending on the `target_col` parameter.
 83 |     """
 84 |     target_col = target_col or col
 85 |     datasets = to_set(datasets)
 86 |     resources = to_set(resources)
 87 | 
 88 |     def the_filter(evs):
 89 |         if isinstance(evs, dict):
 90 |             return {k: the_filter(v) for k, v in evs.items()}
 91 | 
 92 |         elif isinstance(evs, list):
 93 |             return [
 94 |                 ev
 95 |                 for ev in evs
 96 |                 if (
 97 |                     (not datasets or ev["dataset"] in datasets)
 98 |                     and (not resources or ev["resource"] in resources)
 99 |                 )
100 |             ]
101 | 
102 |         else:
103 |             return evs
104 | 
105 |     df[target_col] = df[col].apply(the_filter)
106 | 
107 |     return df
108 | 
109 | 
110 | def from_evidences(
111 |     df: pd.DataFrame,
112 |     col: str = "evidences",
113 | ) -> pd.DataFrame:
114 |     """
115 |     Recreate interaction records from an evidences column.
116 | 
117 |     Parameters
118 |     ----------
119 |     df
120 |         An OmniPath interaction data frame.
121 |     col:
122 |         Name of the column containing the evidences.
123 | 
124 |     Returns
125 |     -------
126 |     :class:`pandas.DataFrame`
127 |         The input data frame with its standard columns reconstructed based
128 |         on the evidences in `col`. The records with no evidences from the
129 |         specified datasets and resources will be removed.
130 |     """
131 |     evs_df = pd.DataFrame({"evidences": df[col]})
132 |     evs_df = unnest_evidences(evs_df)
133 |     evs_df["ce_positive"] = _curation_effort_from(evs_df, columns="positive")
134 |     evs_df["ce_negative"] = _curation_effort_from(evs_df, columns="negative")
135 |     evs_df["ce_directed"] = _curation_effort_from(evs_df, columns="directed")
136 | 
137 |     df["is_directed"] = evs_df["directed"].apply(bool)
138 |     df["is_stimulation"] = evs_df["positive"].apply(bool)
139 |     df["is_inhibition"] = evs_df["negative"].apply(bool)
140 |     df["curation_effort"] = _curation_effort_from(evs_df)
141 |     df["sources"] = _resources_from(evs_df)
142 |     df["references"] = _references_from(evs_df)
143 |     df["consensus_stimulation"] = evs_df["ce_positive"] >= evs_df["ce_negative"]
144 |     df["consensus_inhibition"] = evs_df["ce_positive"] <= evs_df["ce_negative"]
145 | 
146 |     # recompile the consensus_direction
147 |     opposite_direction = pd.DataFrame(
148 |         {
149 |             "source": df["source"],
150 |             "target": df["target"],
151 |             "ce_directed_opp": evs_df["ce_directed"],
152 |         }
153 |     )
154 |     df["ce_directed"] = evs_df["ce_directed"]
155 |     df = df.merge(
156 |         opposite_direction,
157 |         on=["source", "target"],
158 |         how="left",
159 |         sort=False,
160 |     )
161 |     df["consensus_direction"] = (
162 |         pd.isnull(df["ce_directed_opp"]) | df["ce_directed"] >= df["ce_directed_opp"]
163 |     )
164 |     df.drop(columns=["ce_directed", "ce_directed_opp"], inplace=True)
165 | 
166 |     _count_resources(df)
167 |     _count_references(df)
168 |     _strip_resource_label_df(df, col="references")
169 | 
170 |     # drop records which remained without evidences
171 |     df = df[df.sources.apply(bool)]
172 | 
173 |     return df
174 | 
175 | 
176 | def _ensure_unnested(
177 |     df: pd.DataFrame,
178 |     columns: Union[str, Iterable[str]] = EVIDENCES_KEYS,
179 | ) -> Tuple[pd.DataFrame, Tuple[str]]:
180 |     """
181 |     Unnest a nested evidences column in a single column data frame.
182 | 
183 |     Used only in some specific contexts within this module, all are helper
184 |     functions of `from_evidences`.
185 | 
186 |     Returns
187 |     -------
188 |         A tuple of the input data frame and a tuple of column names. If the
189 |         data frame does not consist of a single nested evidences columns it
190 |         will be still subsetted to the specified columns.
191 |     """
192 |     columns = list(to_set(columns))
193 |     evs_df = df[columns]
194 | 
195 |     if (
196 |         evs_df.shape[1] == 1
197 |         and isinstance(evs_df.iloc[0, 0], dict)
198 |         and not set(EVIDENCES_KEYS) - set(evs_df.iloc[0, 0].keys())
199 |     ):
200 |         evs_df = unnest_evidences(evs_df, col=evs_df.columns[0])
201 |         columns = EVIDENCES_KEYS
202 | 
203 |     evs_df = evs_df[columns]
204 | 
205 |     return evs_df, columns
206 | 
207 | 
208 | def _from(
209 |     df: pd.DataFrame,
210 |     func: Callable,
211 |     columns: Union[str, Iterable[str]] = EVIDENCES_KEYS,
212 | ) -> List[Union[int, str]]:
213 |     """Compile a new column by applying a function on evidences."""
214 |     evs_df, columns = _ensure_unnested(df, columns)
215 | 
216 |     return [
217 |         func(ev for evs in rec for ev in evs)
218 |         for rec in evs_df[columns].itertuples(index=False)
219 |     ]
220 | 
221 | 
222 | def _curation_effort_from(
223 |     df: pd.DataFrame,
224 |     columns: Union[str, Iterable[str]] = EVIDENCES_KEYS,
225 | ) -> List[int]:
226 |     """Curation effort from one or more evidences columns."""
227 |     return _from(
228 |         df=df,
229 |         func=lambda evs: sum(len(ev["references"]) + 1 for ev in evs),
230 |         columns=columns,
231 |     )
232 | 
233 | 
234 | def _resources_from(
235 |     df: pd.DataFrame,
236 |     columns: Union[str, Iterable[str]] = EVIDENCES_KEYS,
237 | ) -> List[str]:
238 |     """Resources from one or more evidences columns."""
239 | 
240 |     def extract_resources(evs: tuple) -> str:
241 |         return ";".join(
242 |             sorted(
243 |                 {
244 |                     f"{ev['resource']}{'_' if ev['via'] else ''}{ev['via'] or ''}"
245 |                     for ev in evs
246 |                 }
247 |             )
248 |         )
249 | 
250 |     return _from(df=df, func=extract_resources, columns=columns)
251 | 
252 | 
253 | def _references_from(
254 |     df: pd.DataFrame,
255 |     columns: Union[str, Iterable[str]] = EVIDENCES_KEYS,
256 |     prefix: bool = True,
257 | ) -> List[str]:
258 |     """Get references from one or more evidences columns."""
259 | 
260 |     def extract_references(evs: tuple) -> str:
261 |         return ";".join(
262 |             sorted(
263 |                 {
264 |                     f"{ev['resource'] + ':' if prefix else ''}{ref}"
265 |                     for ev in evs
266 |                     for ref in ev["references"]
267 |                 }
268 |             )
269 |         )
270 | 
271 |     return _from(df=df, func=extract_references, columns=columns)
272 | 
273 | 
274 | def only_from(
275 |     df: pd.DataFrame,
276 |     datasets: Optional[Union[str, Iterable[str]]] = None,
277 |     resources: Optional[Union[str, Iterable[str]]] = None,
278 | ):
279 |     """
280 |     Restrict interactions to the specified datasets and resources.
281 | 
282 |     The OmniPath interactions database fully integrates all attributes from all
283 |     resources for each interaction. This comes with the advantage that
284 |     interaction data frames are ready for use in most of the applications;
285 |     however, it makes it impossible to know which of the resources and
286 |     references support the direction or effect sign of the interaction. This
287 |     information can be recovered from the "evidences" column. The "evidences"
288 |     column preserves all the details about interaction provenances. In cases
289 |     when you want to use a faithful copy of a certain resource or dataset, this
290 |     function will help you do so. Still, in most of the applications the best
291 |     is to use the interaction data as it is returned by the web service.
292 | 
293 |     Parameters
294 |     ----------
295 |     df
296 |         An OmniPath interaction data frame with "evidences" column.
297 |     datasets
298 |         A list of dataset names. If None, all datasets will be included.
299 |     resources
300 |         A list of resource names. If None, all resources will be included.
301 | 
302 |     Returns
303 |     -------
304 |         The input data frame with the standard columns reconstructed from the
305 |         evidences supported by the datasets and resources provided. The
306 |         records with no evidences from the specified datasets or resources
307 |         will be removed.
308 |     """
309 |     tmp_col = "evidences_filtered_tmp"
310 | 
311 |     _must_have_evidences(df)
312 | 
313 |     df = filter_evidences(df, datasets, resources, target_col=tmp_col)
314 |     df = from_evidences(df, tmp_col)
315 |     df = df.drop(columns=tmp_col)
316 | 
317 |     return df
318 | 


--------------------------------------------------------------------------------
/omnipath/_core/query/_query_validator.py:
--------------------------------------------------------------------------------
  1 | from abc import ABCMeta
  2 | from enum import Enum, EnumMeta
  3 | from typing import (
  4 |     Any,
  5 |     Set,
  6 |     List,
  7 |     Union,
  8 |     Mapping,
  9 |     Iterable,
 10 |     Optional,
 11 |     Sequence,
 12 |     FrozenSet,
 13 | )
 14 | import sys
 15 | import json
 16 | import logging
 17 | 
 18 | from omnipath._core.utils._docs import d
 19 | from omnipath._core.query._types import (
 20 |     Int_t,
 21 |     Str_t,
 22 |     Bool_t,
 23 |     None_t,
 24 |     Strseq_t,
 25 |     License_t,
 26 |     Organism_t,
 27 | )
 28 | from omnipath._core.utils._options import Options
 29 | from omnipath.constants._constants import NoValue
 30 | from omnipath.constants._pkg_constants import Key, Format
 31 | from omnipath._core.downloader._downloader import Downloader
 32 | 
 33 | 
 34 | def _to_string_set(item: Union[Any, Sequence[Any]]) -> Set[str]:
 35 |     """
 36 |     Convert ``item`` to a `str` set.
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     item
 41 |         Item to convert. If it's not a sequence, it will be made into singleton.
 42 | 
 43 |     Returns
 44 |     -------
 45 |     :class:`set`
 46 |         Set of `str`.
 47 |     """
 48 |     if isinstance(item, (str, Enum)) or not isinstance(item, Iterable):
 49 |         item = (item,)
 50 |     return set({str(i.value if isinstance(i, Enum) else i) for i in item})
 51 | 
 52 | 
 53 | class ServerValidatorMeta(EnumMeta, ABCMeta):  # noqa: D101
 54 |     class Validator:
 55 |         """
 56 |         Class that validates values for some parameter passed to the server.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         param
 61 |             Name of the parameter we're checking. Only used for informing the user.
 62 |         haystack
 63 |             Valid values for the ``paramter``. If `None`, no validation will be performed.
 64 |         doc
 65 |             Doctring specific to the ``param``.
 66 |         """
 67 | 
 68 |         def __init__(
 69 |             self,
 70 |             param: str,
 71 |             haystack: Optional[Set[str]] = None,
 72 |             doc: Optional[str] = None,
 73 |         ):
 74 |             if isinstance(haystack, str):
 75 |                 haystack = (haystack,)
 76 |             elif haystack is not None and not isinstance(haystack, Iterable):
 77 |                 raise TypeError(
 78 |                     f"Expected `haystack` for `{param}` to be either a "
 79 |                     f"`str` or a  `Sequence`, found `{type(haystack)}`."
 80 |                 )
 81 | 
 82 |             self._param = param.lower()
 83 |             self._haystack = haystack if haystack is None else frozenset(haystack)
 84 |             self._query_doc_ = None if not doc else doc  # doc can also be `()`
 85 | 
 86 |         @property
 87 |         def haystack(self) -> Optional[FrozenSet[str]]:
 88 |             """Return the valid values for this parameter."""
 89 |             return self._haystack
 90 | 
 91 |         def __call__(self, needle: Optional[Set[str]]) -> Optional[Set[str]]:
 92 |             """
 93 |             Check whether ``needle`` is a valid value for :attr:`_param`.
 94 | 
 95 |             Parameters
 96 |             ----------
 97 |             needle
 98 |                 Needle to check.
 99 | 
100 |             Returns
101 |             -------
102 |                 `None` if the ``needle`` was `None`, otherwise the ``needle`` as a `str` set,
103 |                 optionally intersected with :attr:`_haystack` if it is not `None`.
104 | 
105 |             Raises
106 |             ------
107 |             ValueError
108 |                 If :attr:`haystack` is not `None` and no valid values were found.
109 |             """
110 |             if needle is None:
111 |                 return None
112 |             elif isinstance(needle, bool):
113 |                 needle = int(needle)
114 |             elif isinstance(needle, Enum):
115 |                 needle = needle.value
116 | 
117 |             needle = _to_string_set(needle)
118 |             if self.haystack is None:
119 |                 logging.debug(
120 |                     f"Unable to perform parameter validation for `{self._param}`, haystack is empty"
121 |                 )
122 |                 return needle
123 | 
124 |             res = needle & self.haystack
125 |             if not len(res):
126 |                 raise ValueError(
127 |                     f"No valid options found for parameter `{self._param}` in: `{sorted(needle)}`.\n"
128 |                     f"Valid options are: `{sorted(self.haystack)}`."
129 |                 )
130 |             elif len(res) < len(needle):
131 |                 logging.warning(
132 |                     f"Encountered invalid value(s) for `{self._param}`. "
133 |                     f"Remaining values are `{sorted(res)}`"
134 |                 )
135 | 
136 |             return res
137 | 
138 |     def __new__(cls, clsname, superclasses, attributedict):  # noqa: D102
139 |         from omnipath import options
140 | 
141 |         endpoint = attributedict.pop(
142 |             "__endpoint__", clsname.lower().replace("validator", "")
143 |         )
144 |         use_default = True
145 |         old_members = list(attributedict._member_names)
146 |         old_values = cls._remove_old_members(attributedict)
147 | 
148 |         if endpoint is None:
149 |             if len(old_members):
150 |                 raise ValueError(
151 |                     "If `__endpoint__` is `None`, no members must be specified."
152 |                 )
153 |         elif options.autoload:
154 |             use_default = False
155 |             with Options.from_options(
156 |                 options,
157 |                 num_retries=0,
158 |                 timeout=3.0,
159 |                 cache=None,
160 |                 progress_bar=False,
161 |                 chunk_size=2048,
162 |             ) as opt:
163 |                 try:
164 |                     logging.debug("Attempting to construct classes from the server")
165 |                     res = Downloader(opt).maybe_download(
166 |                         f"{Key.QUERIES.s}/{endpoint}",
167 |                         callback=json.load,
168 |                         params={Key.FORMAT.s: Format.JSON.s},
169 |                     )
170 | 
171 |                     if len({str(k).upper() for k in res.keys()}) != len(res):
172 |                         raise RuntimeError(
173 |                             f"After upper casing, key will not be unique: `{list(res.keys())}`."
174 |                         )
175 | 
176 |                     for k, value in res.items():
177 |                         if (
178 |                             isinstance(value, str)
179 |                             and "no such query available" in value
180 |                         ):
181 |                             raise RuntimeError(f"Invalid endpoint: `{endpoint}`.")
182 | 
183 |                         key = str(k).upper()
184 |                         if value is None:
185 |                             attributedict[key] = cls.Validator(param=k)
186 |                         elif isinstance(value, Sequence):
187 |                             attributedict[key] = cls.Validator(
188 |                                 param=k, haystack={str(v) for v in value}
189 |                             )
190 |                         else:
191 |                             attributedict[key] = cls.Validator(param=k)
192 |                 except Exception as e:
193 |                     logging.debug(
194 |                         f"Unable to construct classes from the server. Reason: `{e}`"
195 |                     )
196 |                     use_default = True
197 | 
198 |         if use_default:
199 |             if endpoint is not None:
200 |                 logging.debug(
201 |                     f"Using predefined class: `{clsname}`."
202 |                     + (
203 |                         ""
204 |                         if options.autoload
205 |                         else " Consider specifying `omnipath.options.autoload = True`"
206 |                     )
207 |                 )
208 | 
209 |             _ = cls._remove_old_members(attributedict)
210 |             for k, v in zip(old_members, old_values):
211 |                 attributedict[k] = cls.Validator(param=k, doc=v)
212 | 
213 |         return super().__new__(cls, clsname, superclasses, attributedict)
214 | 
215 |     @classmethod
216 |     def _remove_old_members(cls, attributedict) -> List[Any]:
217 |         vals = []
218 |         for k in list(attributedict._member_names):
219 |             vals.append(attributedict.pop(k, None))
220 |         attributedict._member_names = [] if sys.version_info[1] < 11 else {}
221 | 
222 |         return vals
223 | 
224 | 
225 | class AutoValidator(NoValue):  # noqa: D101
226 |     @property
227 |     def valid(self) -> Optional[Set[str]]:
228 |         """Return the valid values."""
229 |         return self.value.haystack
230 | 
231 |     @property
232 |     def annotation(self) -> Mapping[str, type]:
233 |         """Return the type annotations."""
234 |         return getattr(self, "__annotations__", {}).get(self.name, Any)
235 | 
236 |     @property
237 |     def doc(self) -> Optional[str]:
238 |         """Return the docstring."""
239 |         return getattr(self.value, "_query_doc_", None)
240 | 
241 |     @d.dedent
242 |     def __call__(self, value: Union[str, Sequence[str]]) -> Optional[Set[str]]:
243 |         """%(validate)s"""  # noqa: D401
244 |         return self.value(value)
245 | 
246 | 
247 | class QueryValidatorMixin(AutoValidator, metaclass=ServerValidatorMeta):  # noqa: D101
248 |     __endpoint__ = None
249 | 
250 | 
251 | class EnzsubValidator(QueryValidatorMixin):  # noqa: D101
252 |     DATABASES: Strseq_t = ()
253 |     ENZYME_SUBSTRATE: Str_t = ()
254 |     ENZYMES: Strseq_t = ()
255 |     FIELDS: Strseq_t = ()
256 |     FORMAT: Str_t = ()
257 |     GENESYMBOLS: Bool_t = ()
258 |     HEADER: Str_t = ()
259 |     LICENSE: License_t = ()
260 |     LIMIT: Int_t = ()
261 |     MODIFICATION: Str_t = ()
262 |     ORGANISMS: Organism_t = ()
263 |     PARTNERS: Strseq_t = ()
264 |     PASSWORD: Str_t = ()
265 |     RESIDUES: Strseq_t = ()
266 |     RESOURCES: Strseq_t = ()
267 |     SUBSTRATES: Strseq_t = ()
268 |     TYPES: Strseq_t = ()
269 | 
270 | 
271 | class InteractionsValidator(QueryValidatorMixin):  # noqa: D101
272 |     DATABASES: Strseq_t = ()
273 |     DATASETS: Strseq_t = ()
274 |     DIRECTED: Bool_t = ()
275 |     DOROTHEA_LEVELS: Strseq_t = ()
276 |     DOROTHEA_METHODS: Strseq_t = ()
277 |     ENTITY_TYPES: Strseq_t = ()
278 |     FIELDS: Strseq_t = ()
279 |     FORMAT: Str_t = ()
280 |     GENESYMBOLS: Bool_t = ()
281 |     HEADER: Str_t = ()
282 |     LICENSE: License_t = ()
283 |     LIMIT: Int_t = ()
284 |     LOOPS: Bool_t = ()
285 |     ORGANISMS: Organism_t = ()
286 |     PARTNERS: Strseq_t = ()
287 |     PASSWORD: Str_t = ()
288 |     RESOURCES: Strseq_t = ()
289 |     SIGNED: Bool_t = ()
290 |     SOURCE_TARGET: Bool_t = ()
291 |     SOURCES: Strseq_t = ()
292 |     TARGETS: Strseq_t = ()
293 |     TFREGULONS_LEVELS: Strseq_t = ()
294 |     TFREGULONS_METHODS: Strseq_t = ()
295 |     TYPES: Strseq_t = ()
296 | 
297 | 
298 | class ComplexesValidator(QueryValidatorMixin):  # noqa: D101
299 |     DATABASES: Strseq_t = ()
300 |     FIELDS: Strseq_t = ()
301 |     FORMAT: Str_t = ()
302 |     HEADER: Str_t = ()
303 |     LICENSE: License_t = ()
304 |     LIMIT: Int_t = ()
305 |     PASSWORD: Str_t = ()
306 |     PROTEINS: Strseq_t = ()
307 |     RESOURCES: Strseq_t = ()
308 | 
309 | 
310 | class AnnotationsValidator(QueryValidatorMixin):  # noqa: D101
311 |     DATABASES: Strseq_t = ()
312 |     ENTITY_TYPES: Strseq_t = ()
313 |     FIELDS: Strseq_t = ()
314 |     FORMAT: Str_t = ()
315 |     GENESYMBOLS: Bool_t = ()
316 |     HEADER: Str_t = ()
317 |     LICENSE: License_t = ()
318 |     LIMIT: Int_t = ()
319 |     PASSWORD: Str_t = ()
320 |     PROTEINS: Strseq_t = ()
321 |     RESOURCES: Strseq_t = ()
322 | 
323 | 
324 | class IntercellValidator(QueryValidatorMixin):  # noqa: D101
325 |     ASPECT: Str_t = ()
326 |     CATEGORIES: Str_t = ()
327 |     CAUSALITY: Str_t = ()
328 |     DATABASES: Strseq_t = ()
329 |     ENTITY_TYPES: Str_t = ()
330 |     FIELDS: Strseq_t = ()
331 |     FORMAT: Str_t = ()
332 |     HEADER: None_t = ()
333 |     LICENSE: License_t = ()
334 |     LIMIT: Int_t = ()
335 |     PARENT: Str_t = ()
336 |     PASSWORD: Str_t = ()
337 |     PLASMA_MEMBRANE_PERIPHERAL: Bool_t = ()
338 |     PLASMA_MEMBRANE_TRANSMEMBRANE: Bool_t = ()
339 |     PMP: Bool_t = ()
340 |     PMTM: Bool_t = ()
341 |     PROTEINS: Strseq_t = ()
342 |     REC: Bool_t = ()
343 |     RECEIVER: Strseq_t = ()
344 |     RESOURCES: Strseq_t = ()
345 |     SCOPE: Str_t = ()
346 |     SEC: Bool_t = ()
347 |     SECRETED: Bool_t = ()
348 |     SOURCE: Str_t = ()
349 |     TOPOLOGY: Str_t = ()
350 |     TRANS: Bool_t = ()
351 |     TRANSMITTER: Bool_t = ()
352 | 
353 | 
354 | __all__ = [
355 |     "EnzsubValidator",
356 |     "InteractionsValidator",
357 |     "ComplexesValidator",
358 |     "AnnotationsValidator",
359 |     "IntercellValidator",
360 | ]
361 | 


--------------------------------------------------------------------------------
/tests/test_interactions.py:
--------------------------------------------------------------------------------
  1 | from io import StringIO
  2 | from urllib.parse import urljoin, quote_plus
  3 | import json
  4 | 
  5 | import pytest
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from omnipath import options
 11 | from omnipath.constants import Organism, InteractionDataset
 12 | from omnipath._core.requests import Intercell
 13 | from omnipath.constants._pkg_constants import Key, Endpoint
 14 | from omnipath._core.requests.interactions._utils import import_intercell_network
 15 | from omnipath._core.requests.interactions._interactions import (
 16 |     TFmiRNA,
 17 |     Dorothea,
 18 |     OmniPath,
 19 |     TFtarget,
 20 |     CollecTRI,
 21 |     KinaseExtra,
 22 |     LigRecExtra,
 23 |     PathwayExtra,
 24 |     AllInteractions,
 25 |     Transcriptional,
 26 |     PostTranslational,
 27 |     miRNA,
 28 |     lncRNAmRNA,
 29 | )
 30 | 
 31 | options.fallback_urls = ()
 32 | 
 33 | 
 34 | class TestInteractions:
 35 |     def test_all_excluded_excluded(self):
 36 |         with pytest.raises(
 37 |             ValueError, match=r"After excluding `\d+` datasets, none were left."
 38 |         ):
 39 |             AllInteractions.get(exclude=list(InteractionDataset))
 40 | 
 41 |     def test_invalid_excluded_datasets(self):
 42 |         with pytest.raises(
 43 |             ValueError, match=r"Invalid value `foo` for `InteractionDataset`."
 44 |         ):
 45 |             AllInteractions.get(exclude="foo")
 46 | 
 47 |     def test_graph_empty(self):
 48 |         with pytest.raises(ValueError, match=r"No data were retrieved. Please"):
 49 |             AllInteractions.graph(pd.DataFrame())
 50 | 
 51 |     def test_graph_source_target(self):
 52 |         interaction = pd.DataFrame(
 53 |             {
 54 |                 "source": ["alpha", "beta", "gamma"],
 55 |                 "target": [0, 1, 0],
 56 |                 "source_genesymbol": "bar",
 57 |                 "target_genesymbol": "baz",
 58 |             }
 59 |         )
 60 |         src, tgt = AllInteractions._get_source_target_cols(interaction)
 61 | 
 62 |         assert src == "source_genesymbol"
 63 |         assert tgt == "target_genesymbol"
 64 | 
 65 |         src, tgt = AllInteractions._get_source_target_cols(
 66 |             interaction[
 67 |                 interaction.columns.difference(
 68 |                     ["source_genesymbol", "target_genesymbol"]
 69 |                 )
 70 |             ]
 71 |         )
 72 | 
 73 |         assert src == "source"
 74 |         assert tgt == "target"
 75 | 
 76 |     @pytest.mark.parametrize(
 77 |         "interaction",
 78 |         [
 79 |             PathwayExtra,
 80 |             KinaseExtra,
 81 |             LigRecExtra,
 82 |             miRNA,
 83 |             TFmiRNA,
 84 |             lncRNAmRNA,
 85 |             Dorothea,
 86 |             TFtarget,
 87 |             OmniPath,
 88 |             PostTranslational,
 89 |         ],
 90 |     )
 91 |     def test_resources(
 92 |         self, cache_backup, interaction, interaction_resources: bytes, requests_mock
 93 |     ):
 94 |         url = urljoin(options.url, Endpoint.RESOURCES.s)
 95 |         data = json.loads(interaction_resources)
 96 |         requests_mock.register_uri(
 97 |             "GET", f"{url}?format=json", content=interaction_resources
 98 |         )
 99 | 
100 |         resources = interaction.resources()
101 |         for resource in resources:
102 |             assert {
103 |                 InteractionDataset(d)
104 |                 for d in data[resource][Key.QUERIES.s][
105 |                     interaction._query_type.endpoint
106 |                 ][Key.DATASETS.s]
107 |             } & interaction()._datasets
108 |         assert requests_mock.called_once
109 | 
110 |     def test_invalid_organism(self):
111 |         with pytest.raises(
112 |             ValueError, match=r"Invalid value `foo` for `Organism`. Valid options are:"
113 |         ):
114 |             AllInteractions.get(**{Key.ORGANISM.s: "foo"})
115 | 
116 |     @pytest.mark.parametrize(
117 |         "interaction",
118 |         [
119 |             PathwayExtra,
120 |             KinaseExtra,
121 |             LigRecExtra,
122 |             miRNA,
123 |             TFmiRNA,
124 |             lncRNAmRNA,
125 |             Dorothea,
126 |             TFtarget,
127 |             OmniPath,
128 |             PostTranslational,
129 |             AllInteractions,
130 |             CollecTRI,
131 |         ],
132 |     )
133 |     def test_interaction_get(
134 |         self, cache_backup, interaction, interaction_resources: bytes, requests_mock
135 |     ):
136 |         url = urljoin(options.url, interaction._query_type.endpoint)
137 |         datasets = quote_plus(
138 |             ",".join(sorted(d.value for d in interaction()._datasets))
139 |         )
140 |         if getattr(interaction, "_strict_evidences", False):
141 |             pytest.skip("Test not yet implemented")
142 |         fields = "fields=curation_effort%2Creferences%2Csources"
143 |         if interaction is AllInteractions:
144 |             fields += "%2Ctype"
145 | 
146 |         requests_mock.register_uri(
147 |             "GET",
148 |             f"{url}?datasets={datasets}&{fields}&format=tsv",
149 |             content=interaction_resources,
150 |         )
151 | 
152 |         _ = interaction.get()
153 | 
154 |         assert requests_mock.called_once
155 | 
156 |     @pytest.mark.parametrize("organisms", list(Organism))
157 |     def test_valid_organism(
158 |         self, cache_backup, organisms, requests_mock, interaction_resources
159 |     ):
160 |         url = urljoin(options.url, AllInteractions._query_type.endpoint)
161 |         datasets = quote_plus(",".join(sorted(d.value for d in InteractionDataset)))
162 |         requests_mock.register_uri(
163 |             "GET",
164 |             f"{url}?datasets={datasets}&fields=curation_effort%2Creferences%2Csources%2Ctype&"
165 |             f"format=json&organisms={organisms.code}",
166 |             content=interaction_resources,
167 |         )
168 | 
169 |         x = AllInteractions.get(organism=organisms, format="json")
170 |         y = AllInteractions.get(organisms=organisms.value, format="json")
171 | 
172 |         assert requests_mock.called_once
173 |         pd.testing.assert_frame_equal(x, y)
174 | 
175 |     def test_dorothea_params(self):
176 |         params = Dorothea.params()
177 | 
178 |         assert "dorothea_levels" in params
179 |         assert "dorothea_methods" in params
180 |         assert "tfregulons_levels" not in params
181 |         assert "tfregulons_methods" not in params
182 |         assert Key.DATASETS.s not in params
183 | 
184 |     def test_tftarget_params(self):
185 |         params = TFtarget.params()
186 | 
187 |         assert "dorothea_levels" not in params
188 |         assert "dorothea_methods" not in params
189 |         assert "tfregulons_levels" in params
190 |         assert "tfregulons_methods" in params
191 |         assert Key.DATASETS.s not in params
192 | 
193 |     @pytest.mark.parametrize(
194 |         "interaction", [OmniPath, Transcriptional, AllInteractions]
195 |     )
196 |     def test_transcriptional_params(self, interaction):
197 |         params = interaction.params()
198 | 
199 |         assert "dorothea_levels" in params
200 |         assert "dorothea_methods" in params
201 |         assert "tfregulons_levels" in params
202 |         assert "tfregulons_methods" in params
203 |         assert Key.DATASETS.s not in params
204 | 
205 |     @pytest.mark.parametrize(
206 |         "interaction",
207 |         [PathwayExtra, KinaseExtra, LigRecExtra, miRNA, TFmiRNA, lncRNAmRNA],
208 |     )
209 |     def test_rest_params(self, interaction):
210 |         params = interaction.params()
211 | 
212 |         assert "dorothea_levels" not in params
213 |         assert "dorothea_methods" not in params
214 |         assert "tfregulons_levels" not in params
215 |         assert "tfregulons_methods" not in params
216 |         assert Key.DATASETS.s not in params
217 | 
218 | 
219 | class TestUtils:
220 |     def test_import_intercell_network(
221 |         self,
222 |         cache_backup,
223 |         requests_mock,
224 |         interactions_data: bytes,
225 |         transmitters_data: bytes,
226 |         receivers_data: bytes,
227 |         import_intercell_result: pd.DataFrame,
228 |     ):
229 |         interactions_url = urljoin(options.url, AllInteractions._query_type.endpoint)
230 |         intercell_url = urljoin(options.url, Intercell._query_type.endpoint)
231 | 
232 |         # interactions
233 |         requests_mock.register_uri(
234 |             "GET",
235 |             f"{interactions_url}?datasets=omnipath&dorothea_levels=A&fields=curation_effort%2C"
236 |             f"references%2Csources%2Ctype&format=tsv",
237 |             content=interactions_data,
238 |         )
239 |         # transmitter
240 |         requests_mock.register_uri(
241 |             "GET",
242 |             f"{intercell_url}?categories=ligand&causality=trans&format=tsv&scope=generic",
243 |             content=transmitters_data,
244 |         )
245 |         # receiver
246 |         requests_mock.register_uri(
247 |             "GET",
248 |             f"{intercell_url}?categories=receptor&causality=rec&format=tsv&scope=generic",
249 |             content=receivers_data,
250 |         )
251 | 
252 |         res = import_intercell_network(
253 |             include=InteractionDataset.OMNIPATH,
254 |             transmitter_params={"categories": "ligand"},
255 |             interactions_params={"dorothea_levels": "A"},
256 |             receiver_params={"categories": "receptor"},
257 |         )
258 | 
259 |         sortby = [
260 |             "source",
261 |             "target",
262 |             "category_intercell_source",
263 |             "category_intercell_target",
264 |             "database_intercell_source",
265 |             "database_intercell_target",
266 |         ]
267 | 
268 |         for df in (res, import_intercell_result):
269 |             df.sort_values(sortby, inplace=True)
270 |             df.reset_index(drop=True, inplace=True)
271 | 
272 |         assert len(requests_mock.request_history) == 3
273 |         np.testing.assert_array_equal(res.shape, import_intercell_result.shape)
274 |         np.testing.assert_array_equal(res.index, import_intercell_result.index)
275 |         np.testing.assert_array_equal(res.columns, import_intercell_result.columns)
276 |         # TODO(michalk8): broken in `pandas=2.0`
277 |         # np.testing.assert_array_equal(res.dtypes, import_intercell_result.dtypes)
278 |         np.testing.assert_array_equal(
279 |             res.values[~pd.isnull(res)],
280 |             import_intercell_result.values[~pd.isnull(import_intercell_result)],
281 |         )
282 |         np.testing.assert_array_equal(
283 |             pd.isnull(res), pd.isnull(import_intercell_result)
284 |         )
285 | 
286 |     @pytest.mark.parametrize("which", ["interactions", "receivers", "transmitters"])
287 |     def test_intercell_empty(
288 |         self,
289 |         which: str,
290 |         cache_backup,
291 |         requests_mock,
292 |         interactions_data: bytes,
293 |         transmitters_data: bytes,
294 |         receivers_data: bytes,
295 |     ):
296 |         interactions_url = urljoin(options.url, AllInteractions._query_type.endpoint)
297 |         intercell_url = urljoin(options.url, Intercell._query_type.endpoint)
298 | 
299 |         handle = StringIO()
300 |         pd.DataFrame({"is_directed": []}).to_csv(handle, sep="\t", index=False)
301 |         empty_data = bytes(handle.getvalue(), encoding="utf-8")
302 | 
303 |         if which == "interactions":
304 |             interactions_data = empty_data
305 |         elif which == "receivers":
306 |             receivers_data = empty_data
307 |         elif which == "transmitters":
308 |             transmitters_data = empty_data
309 |         else:
310 |             raise AssertionError(which)
311 | 
312 |         # interactions
313 |         requests_mock.register_uri(
314 |             "GET",
315 |             f"{interactions_url}?datasets=omnipath,pathwayextra&fields=curation_effort%2C"
316 |             f"references%2Csources%2Ctype&format=tsv&resources=CellPhoneDB",
317 |             content=interactions_data,
318 |         )
319 |         # transmitter
320 |         requests_mock.register_uri(
321 |             "GET",
322 |             f"{intercell_url}?categories=ligand&causality=trans&format=tsv&scope=generic",
323 |             content=transmitters_data,
324 |         )
325 |         # receiver
326 |         requests_mock.register_uri(
327 |             "GET",
328 |             f"{intercell_url}?categories=receptor&causality=rec&format=tsv&scope=generic",
329 |             content=receivers_data,
330 |         )
331 | 
332 |         with pytest.raises(ValueError, match=rf"No {which} were retrieved. Please"):
333 |             import_intercell_network(
334 |                 include=(InteractionDataset.OMNIPATH, InteractionDataset.PATHWAY_EXTRA),
335 |                 transmitter_params={"categories": "ligand"},
336 |                 interactions_params={"resources": "CellPhoneDB"},
337 |                 receiver_params={"categories": "receptor"},
338 |             )
339 | 


--------------------------------------------------------------------------------