├── docs
    ├── _static
    │   └── .gitignore
    ├── guide.rst
    ├── license.rst
    ├── requirements.txt
    ├── notebook
    │   ├── clean.sh
    │   ├── to-rst.sh
    │   ├── server.sh
    │   └── correct_nbconvert.py
    ├── index.rst
    ├── Makefile
    ├── api.rst
    ├── make.bat
    ├── conf.py
    └── cli.rst
├── dev
    ├── test_data
    │   ├── map
    │   │   ├── empty.json
    │   │   ├── pubchem.json
    │   │   └── module.json
    │   ├── module-entry-ids.txt
    │   ├── glycan-pubchem-entry-ids.txt
    │   ├── pathway-module-entry-ids.txt
    │   ├── ddi-output.txt
    │   ├── brite-entries
    │   │   ├── pull-results.json
    │   │   ├── br_br08902.txt
    │   │   └── br_br08005.txt
    │   ├── all-brite-entry-ids.txt
    │   └── drug-entry-ids.txt
    ├── pytest.ini
    ├── test.sh
    ├── install.sh
    ├── README.md
    ├── conftest.py
    ├── test_utils.py
    ├── test_entry_ids_cli.py
    ├── test_pathway_organizer_cli.py
    ├── test_entry_ids.py
    ├── test_map_cli.py
    ├── test_pathway_organizer.py
    ├── utils.py
    ├── test_pull_cli.py
    ├── test_rest_cli.py
    ├── test_main.py
    ├── test_rest.py
    ├── test_kegg_url.py
    └── test_map.py
├── requirements.txt
├── .gitignore
├── src
    └── kegg_pull
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── entry_ids_cli.py
    │   ├── pathway_organizer_cli.py
    │   ├── map_cli.py
    │   ├── entry_ids.py
    │   ├── _utils.py
    │   ├── pull_cli.py
    │   ├── rest_cli.py
    │   ├── pathway_organizer.py
    │   └── rest.py
├── CITATION.cff
├── .github
    └── workflows
    │   ├── main.yml
    │   └── build_documentation.yml
├── setup.py
├── LICENSE
└── README.rst


/docs/_static/.gitignore:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dev/test_data/map/empty.json:
--------------------------------------------------------------------------------
1 | {}


--------------------------------------------------------------------------------
/dev/test_data/module-entry-ids.txt:
--------------------------------------------------------------------------------
1 | md:M00050
2 | md:M00959


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | docopt
2 | requests
3 | tqdm
4 | jsonschema
5 | 


--------------------------------------------------------------------------------
/docs/guide.rst:
--------------------------------------------------------------------------------
1 | Guide
2 | =====
3 | 
4 | .. include:: ../README.rst
5 | 


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
1 | License
2 | =======
3 | 
4 | .. include:: ../LICENSE
5 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=5.3.0
2 | sphinx_rtd_theme
3 | tqdm
4 | jsonschema
5 | 


--------------------------------------------------------------------------------
/dev/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     disable_mock_organism_set: Disable mocking the AbstractKEGGurl._get_organism_set method
4 | 


--------------------------------------------------------------------------------
/dev/test_data/glycan-pubchem-entry-ids.txt:
--------------------------------------------------------------------------------
1 | gl:G13143	pubchem:405226698
2 | gl:G13141	pubchem:405226697
3 | gl:G13139	pubchem:405226696
4 | 


--------------------------------------------------------------------------------
/dev/test_data/map/pubchem.json:
--------------------------------------------------------------------------------
1 | {
2 |   "cpd:C00001": [
3 |     "pubchem:3303"
4 |   ],
5 |   "cpd:C00002": [
6 |     "pubchem:3304"
7 |   ]
8 | }


--------------------------------------------------------------------------------
/dev/test_data/map/module.json:
--------------------------------------------------------------------------------
1 | {
2 |   "md:M00965": [
3 |     "ko:K12696",
4 |     "ko:K22365"
5 |   ],
6 |   "md:M00962": [
7 |     "ko:K22435"
8 |   ]
9 | }


--------------------------------------------------------------------------------
/dev/test_data/pathway-module-entry-ids.txt:
--------------------------------------------------------------------------------
1 | md:M00575	path:map05133
2 | md:M00574	path:map05133
3 | md:M00363	path:map05130
4 | md:M00363	path:map05131
5 | 


--------------------------------------------------------------------------------
/dev/test_data/ddi-output.txt:
--------------------------------------------------------------------------------
1 | D00100	D00564	CI,P	unclassified
2 | D00109	D00564	P	unclassified
3 | D00564	D00100	CI,P	unclassified
4 | D00564	D00109	P	unclassified
5 | 


--------------------------------------------------------------------------------
/dev/test.sh:
--------------------------------------------------------------------------------
1 | source .env/bin/activate || source .env/Scripts/activate # Windows has Scripts instead of bin
2 | python3 -m pytest dev --cov --cov-branch --cov-report=term-missing
3 | 


--------------------------------------------------------------------------------
/docs/notebook/clean.sh:
--------------------------------------------------------------------------------
1 | rm -rf brite-entries/ brite-entries/ brite-entry-ids.txt compound-entries compound-entries.zip hierarchy-nodes.json mapping.json pull-entries/ pull-results.json standard_input.txt
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .coverage
 2 | .env/
 3 | .pypi-env/
 4 | .idea/
 5 | __pycache__/
 6 | src/kegg_pull.egg-info/
 7 | docs/_build
 8 | docs/notebook/.ipynb_checkpoints/
 9 | docs/notebook/tutorial.ipynb
10 | dist/
11 | 


--------------------------------------------------------------------------------
/docs/notebook/to-rst.sh:
--------------------------------------------------------------------------------
1 | # Note this will only work if pandoc is installed separately via "sudo dnf install pandoc"
2 | jupyter nbconvert --to rst tutorial.ipynb
3 | python3 correct_nbconvert.py
4 | mv tutorial.rst ../tutorial.rst
5 | 


--------------------------------------------------------------------------------
/src/kegg_pull/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This package has the following modules:
 3 | 
 4 | ``pull``
 5 | 
 6 | ``entry_ids``
 7 | 
 8 | ``map``
 9 | 
10 | ``pathway_organizer``
11 | 
12 | ``rest``
13 | 
14 | ``kegg_url``
15 | """
16 | __version__ = '3.1.0'
17 | 


--------------------------------------------------------------------------------
/dev/test_data/brite-entries/pull-results.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "percent-success": 100.0,
 3 | "pull-minutes": 0.03,
 4 | "num-successful": 1,
 5 | "num-failed": 0,
 6 | "num-timed-out": 0,
 7 | "num-total": 1,
 8 | "successful-entry-ids": [
 9 | "br:br08902"
10 | ],
11 | "failed-entry-ids": [],
12 | "timed-out-entry-ids": []
13 | }


--------------------------------------------------------------------------------
/docs/notebook/server.sh:
--------------------------------------------------------------------------------
1 | # This will start the server and print the URL that you need to copy and paste into your web browser.
2 | # Note that if you're port forwarding, you need to replace the port number in the URL with the one that you're forwarding to localhost (your local computer) rather than the one you're running on a lab machine.
3 | jupyter notebook
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/notebook/correct_nbconvert.py:
--------------------------------------------------------------------------------
 1 | with open('tutorial.rst', 'r') as file:
 2 |     contents: str = file.read()
 3 | 
 4 | contents: str = contents.replace('ipython3', 'python3')
 5 | contents: str = contents.replace('python3\n\n    !', 'none\n\n    !')
 6 | contents: str = contents.replace('! ', '% ')
 7 | 
 8 | with open('tutorial.rst', 'w') as file:
 9 |     file.write(contents)
10 | 
11 | 


--------------------------------------------------------------------------------
/dev/install.sh:
--------------------------------------------------------------------------------
1 | echo "Removing previous .env/ directory if it exists..."
2 | rm -rf .env/
3 | echo "Creating new .env/ directory..."
4 | python3 -m venv .env/
5 | source .env/bin/activate || source .env/Scripts/activate # Windows has Scripts instead of bin
6 | python3 -m pip install --upgrade pip
7 | python3 -m pip install pytest pytest-mock pytest-cov sphinx sphinx-rtd-theme notebook
8 | python3 -m pip install -e .
9 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. kegg_pull documentation master file, created by
 2 |    sphinx-quickstart on Tue Aug 30 16:32:53 2022.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to kegg_pull's documentation!
 7 | =====================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    guide
14 |    tutorial
15 |    cli
16 |    api
17 |    license
18 | 
19 | Indices and tables
20 | ==================
21 | 
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | 


--------------------------------------------------------------------------------
/dev/README.md:
--------------------------------------------------------------------------------
 1 | # Local Development
 2 | ## Installing testing dependencies and kegg_pull as a package
 3 | With the root of the repository as the working directory, run the following:
 4 | ```
 5 | bash tests/dev-install.sh # Installs testing dependencies and the kegg_pull package
 6 | bash tests/test.sh # Runs tests on the kegg_pull package
 7 | ```
 8 | ## Preventing the "module not found" error in PyCharm
 9 | * After installing `kegg_pull`, a file at `src/kegg_pull.egg-info/PKG-INFO` is generated.
10 | * Go into that file and change `kegg-pull` (with a dash) to `kegg_pull` (with an underscore).
11 | * Restart PyCharm
12 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite this article in the reference section."
 3 | authors:
 4 | - family-names: "Huckvale"
 5 |   given-names: "Erik"
 6 | - family-names: "Moseley"
 7 |   given-names: "Hunter"
 8 | title: "kegg-pull"
 9 | version: 3.0.0
10 | date-released: 2023-02-15
11 | url: "https://github.com/MoseleyBioinformaticsLab/kegg_pull"
12 | references:
13 |   - authors:
14 |     - family-names: "Huckvale"
15 |       given-names: "Erik"
16 |     - family-names: "Moseley"
17 |       given-names: "Hunter"
18 |     type: article
19 |     doi: "https://doi.org/10.1101/2022.11.03.515120"
20 |     journal: "BMC Bioinformatics"
21 |     title: "kegg_pull: a Software Package for the RESTful Access and Pulling from The Kyoto Encyclopedia of Gene and Genomes"
22 |     volume: 24
23 |     year: 2023
24 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | 
 3 | name: build
 4 | 
 5 | on:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |       - dev
10 |   pull_request:
11 |     branches:
12 |       - main
13 |       - dev
14 |   workflow_dispatch:
15 | 
16 | jobs:
17 |   build:
18 | 
19 |     strategy:
20 |       matrix:
21 |         python-version: ["3.10", "3.11"]
22 |         os: [ ubuntu-latest, windows-latest ]
23 |     runs-on: ${{matrix.os}}
24 | 
25 |     steps:
26 |     - uses: actions/checkout@v3
27 |     - name: Set up Python ${{ matrix.python-version }}
28 |       uses: actions/setup-python@v3
29 |       with:
30 |         python-version: ${{ matrix.python-version }}
31 |     - name: Install testing environment and kegg_pull package
32 |       run: bash dev/install.sh
33 |     - name: Test with pytest
34 |       run: bash dev/test.sh
35 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. |Functionality| replace:: Provides API functionality
 2 | .. |Interface for| replace:: Provides wrapper methods for
 3 | 
 4 | API
 5 | ===
 6 | **Note:** Many KEGG entry IDs contain colons and ``kegg_pull`` saves KEGG entry files with their ID in the file name. When running on Windows, all file names with colons will have their colons replaced with underscores.
 7 | 
 8 | .. automodule:: kegg_pull
 9 | 
10 | .. automodule:: kegg_pull.pull
11 |     :members:
12 |     :undoc-members:
13 | 
14 | .. automodule:: kegg_pull.entry_ids
15 |     :members:
16 |     :undoc-members:
17 | 
18 | .. automodule:: kegg_pull.map
19 |     :members:
20 |     :undoc-members:
21 | 
22 | .. automodule:: kegg_pull.pathway_organizer
23 |     :members:
24 |     :undoc-members:
25 | 
26 | .. automodule:: kegg_pull.rest
27 |     :members:
28 |     :undoc-members:
29 | 
30 | .. automodule:: kegg_pull.kegg_url
31 |     :members:
32 |     :undoc-members:
33 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools as st
 2 | import re
 3 | 
 4 | 
 5 | requirements = [
 6 |     'docopt',
 7 |     'requests',
 8 |     'tqdm',
 9 |     'jsonschema'
10 | ]
11 | 
12 | 
13 | def _readme() -> str:
14 |     with open('README.rst') as readme_file:
15 |         return readme_file.read()
16 | 
17 | 
18 | def _get_version() -> str:
19 |     with open('src/kegg_pull/__init__.py', 'r') as fd:
20 |         version: str = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', fd.read(), re.MULTILINE).group(1)
21 |     if not version:
22 |         raise RuntimeError('Cannot find version information')
23 |     return version
24 | 
25 | 
26 | st.setup(
27 |     name='kegg_pull',
28 |     version=_get_version(),
29 |     package_dir={'': 'src'},
30 |     packages=st.find_packages('src', exclude=['dev', 'docs']),
31 |     install_requires=requirements,
32 |     entry_points={'console_scripts': ['kegg_pull = kegg_pull.__main__:main']},
33 |     author='Erik Huckvale',
34 |     author_email='edhu227@g.uky.edu',
35 |     url='https://github.com/MoseleyBioinformaticsLab/KEGGpull',
36 |     description='Pulls any and all entries from any and all KEGG databases, pulls KEGG entry IDs, and wraps all the KEGG REST API operations in both Python API and the command line.',
37 |     long_description_content_type='text/x-rst',
38 |     long_description=_readme())
39 | 


--------------------------------------------------------------------------------
/dev/conftest.py:
--------------------------------------------------------------------------------
 1 | # noinspection PyPackageRequirements
 2 | import pytest as pt
 3 | import os
 4 | import shutil as sh
 5 | import kegg_pull.kegg_url as ku
 6 | 
 7 | 
 8 | @pt.fixture(autouse=True)
 9 | def mock_organism_set(mocker, request):
10 |     if 'disable_mock_organism_set' not in request.keywords:
11 |         organism_set_mock = {'organism-code', 'organism-T-number'}
12 |         mocker.patch.object(ku.AbstractKEGGurl, 'organism_set', organism_set_mock)
13 | 
14 | 
15 | @pt.fixture(name='output_file', params=['dir/subdir/file.txt', 'dir/file.txt', './file.txt', 'file.txt'])
16 | def get_output_file(request):
17 |     output_file: str = request.param
18 |     yield output_file
19 |     os.remove(output_file)
20 |     sh.rmtree('dir', ignore_errors=True)
21 | 
22 | 
23 | @pt.fixture(name='zip_archive_data', params=['file.txt', 'dir/file.txt', '/file.txt', '/dir/file.txt'])
24 | def get_zip_archive_data(request):
25 |     zip_file_name: str = request.param
26 |     zip_archive_path = 'archive.zip'
27 |     yield zip_archive_path, zip_file_name
28 |     os.remove(zip_archive_path)
29 | 
30 | 
31 | @pt.fixture(name='json_file_path', params=[
32 |     'dir/subdir/file.json', 'dir/file.json', './file.json', 'file.json', 'archive.zip:file.json', 'archive.zip:dir/file.json'])
33 | def get_json_file_path(request):
34 |     json_file_path: str = request.param
35 |     yield json_file_path
36 |     if '.zip:' in json_file_path:
37 |         os.remove('archive.zip')
38 |     else:
39 |         os.remove(json_file_path)
40 |     sh.rmtree('dir', ignore_errors=True)
41 | 


--------------------------------------------------------------------------------
/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build Documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:    
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build:
10 | 
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v3
15 |     - name: Set up Python 3.11
16 |       uses: actions/setup-python@v4
17 |       with:
18 |         python-version: '3.11'
19 |     - name: Upgrade pip, install package, install requirements, build docs
20 |       run: |
21 |         pip install --upgrade pip
22 |         pip install -r ./docs/requirements.txt
23 |         sphinx-build docs ./docs/_build/html/
24 |     # Create an artifact of the html output.
25 |     - uses: actions/upload-artifact@v3
26 |       with:
27 |         name: DocumentationHTML
28 |         path: docs/_build/html/
29 |     # Publish built docs to gh-pages branch.
30 |     # ===============================
31 |     - name: Commit documentation changes
32 |       env:
33 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
34 |       run: |
35 |         git config --global user.name "${GITHUB_ACTOR}"
36 |         git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com"
37 |         git clone "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" --branch gh-pages --single-branch gh-pages
38 |         cd gh-pages/
39 |         git rm -r .
40 |         cp -r ../docs/_build/html/* .
41 |         touch .nojekyll
42 |         git add .
43 |         git commit -m "Update documentation." -a || true
44 |         # The above command will fail if no changes were present, so we ignore
45 |         # that.
46 |     - name: Push changes
47 |       uses: ad-m/github-push-action@master
48 |       with:
49 |         branch: gh-pages
50 |         directory: gh-pages
51 |         github_token: ${{ secrets.GITHUB_TOKEN }}
52 |     # ===============================
53 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The Clear BSD License with Extra Clause
 2 | 
 3 | Copyright (c) 2022, Erik Huckvale, Hunter N.B. Moseley
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without modification,
 7 | are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
11 | * All advertising materials mentioning features or use of this software must display the following acknowledgement: This product includes software developed by the copyright holder.
12 | * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
13 | * If the source code is used in a published work, then proper citation of the source code must be included with the published work.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/src/kegg_pull/__main__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 |     kegg_pull -h | --help           Show this help message.
 4 |     kegg_pull -v | --version        Displays the package version.
 5 |     kegg_pull --full-help           Show the help message of all sub commands.
 6 |     kegg_pull pull ...              Pull, separate, and store an arbitrary number of KEGG entries to the local file system.
 7 |     kegg_pull entry-ids ...         Obtain a list of KEGG entry IDs.
 8 |     kegg_pull map ...               Obtain a mapping of entry IDs (KEGG or outside databases) to the IDs of related entries.
 9 |     kegg_pull pathway-organizer ... Creates a flattened version of a pathways Brite hierarchy.
10 |     kegg_pull rest ...              Executes one of the KEGG REST API operations.
11 | """
12 | import sys
13 | from . import __version__
14 | from . import pull_cli as p_cli
15 | from . import entry_ids_cli as ei_cli
16 | from . import map_cli as map_cli
17 | from . import pathway_organizer_cli as po_cli
18 | from . import rest_cli as r_cli
19 | 
20 | 
21 | def main() -> None:
22 |     first_arg: str = sys.argv[1] if len(sys.argv) > 1 else None
23 |     if first_arg == 'pull':
24 |         p_cli.main()
25 |     elif first_arg == 'entry-ids':
26 |         ei_cli.main()
27 |     elif first_arg == 'map':
28 |         map_cli.main()
29 |     elif first_arg == 'pathway-organizer':
30 |         po_cli.main()
31 |     elif first_arg == 'rest':
32 |         r_cli.main()
33 |     elif first_arg == '--full-help':
34 |         separator = '-'*80
35 |         print(__doc__)
36 |         print(separator)
37 |         print(p_cli.__doc__)
38 |         print(separator)
39 |         print(ei_cli.__doc__)
40 |         print(separator)
41 |         print(map_cli.__doc__)
42 |         print(separator)
43 |         print(po_cli.__doc__)
44 |         print(separator)
45 |         print(r_cli.__doc__)
46 |     elif first_arg == '--version' or first_arg == '-v':
47 |         print(__version__)
48 |     else:
49 |         print(__doc__)
50 | 
51 | 
52 | if __name__ == '__main__':  # pragma: no cover
53 |     main()  # pragma: no cover
54 | 


--------------------------------------------------------------------------------
/dev/test_utils.py:
--------------------------------------------------------------------------------
 1 | # noinspection PyPackageRequirements
 2 | import pytest as pt
 3 | # noinspection PyProtectedMember
 4 | import kegg_pull._utils as utils
 5 | import dev.utils as u
 6 | import kegg_pull.pull as p
 7 | import kegg_pull.rest as r
 8 | import kegg_pull.pathway_organizer as po
 9 | 
10 | 
11 | @pt.mark.parametrize('comma_separated_list', [',,', ',', ''])
12 | def test_parse_input_sequence_comma_exception(comma_separated_list: str):
13 |     with pt.raises(ValueError) as error:
14 |         utils.parse_input_sequence(input_source=comma_separated_list)
15 |     expected_message = f'Empty list provided from comma separated list: "{comma_separated_list}"'
16 |     u.assert_exception(expected_message=expected_message, exception=error)
17 | 
18 | 
19 | @pt.mark.parametrize('stdin_input', ['', '\n', '\t\t', '\n\n', '\t \n \t', ' \n \n\t\t \t\n'])
20 | def test_parse_input_sequence_stdin_exception(mocker, stdin_input: str):
21 |     stdin_mock: mocker.MagicMock = mocker.patch('kegg_pull._utils.sys.stdin.read', return_value=stdin_input)
22 |     with pt.raises(ValueError) as error:
23 |         utils.parse_input_sequence(input_source='-')
24 |     stdin_mock.assert_called_once_with()
25 |     expected_message = 'Empty list provided from standard input'
26 |     u.assert_exception(expected_message=expected_message, exception=error)
27 | 
28 | 
29 | def test_get_range_values_exception():
30 |     with pt.raises(ValueError) as error:
31 |         utils._get_range_values(range_values=['1', '2', '3'], value_type=int)
32 |     expected_message = f'Range can only be specified by two values but 3 values were provided: 1, 2, 3'
33 |     u.assert_exception(expected_message=expected_message, exception=error)
34 | 
35 | 
36 | @pt.mark.parametrize(
37 |     'NonInstantiable,kwargs', [(p.PullResult, {}), (r.KEGGresponse, {'status': None, 'kegg_url': None}), (po.PathwayOrganizer, {})])
38 | def test_non_instantiable(NonInstantiable: type, kwargs: dict):
39 |     expected_error_message = f'The class "{NonInstantiable.__name__}" cannot be instantiated outside of its module.'
40 |     with pt.raises(RuntimeError) as error:
41 |         NonInstantiable(**kwargs)
42 |     u.assert_exception(expected_message=expected_error_message, exception=error)
43 | 


--------------------------------------------------------------------------------
/dev/test_data/all-brite-entry-ids.txt:
--------------------------------------------------------------------------------
  1 | br:br08901
  2 | br:br08902
  3 | br:br08904
  4 | br:br08906
  5 | br:ko00001
  6 | br:ko00002
  7 | br:ko00003
  8 | br:br08907
  9 | br:ko01000
 10 | br:ko01001
 11 | br:ko01009
 12 | br:ko01002
 13 | br:ko01003
 14 | br:ko01005
 15 | br:ko01011
 16 | br:ko01004
 17 | br:ko01008
 18 | br:ko01006
 19 | br:ko01007
 20 | br:ko00199
 21 | br:ko00194
 22 | br:ko03000
 23 | br:ko03021
 24 | br:ko03019
 25 | br:ko03041
 26 | br:ko03011
 27 | br:ko03009
 28 | br:ko03016
 29 | br:ko03012
 30 | br:ko03110
 31 | br:ko04131
 32 | br:ko04121
 33 | br:ko03051
 34 | br:ko03032
 35 | br:ko03036
 36 | br:ko03400
 37 | br:ko03029
 38 | br:ko02000
 39 | br:ko02044
 40 | br:ko02042
 41 | br:ko02022
 42 | br:ko02035
 43 | br:ko03037
 44 | br:ko04812
 45 | br:ko04147
 46 | br:ko02048
 47 | br:ko04030
 48 | br:ko04050
 49 | br:ko04054
 50 | br:ko03310
 51 | br:ko04040
 52 | br:ko04031
 53 | br:ko04052
 54 | br:ko04515
 55 | br:ko04090
 56 | br:ko01504
 57 | br:ko00535
 58 | br:ko00536
 59 | br:ko00537
 60 | br:ko04091
 61 | br:ko04990
 62 | br:ko03200
 63 | br:ko03210
 64 | br:ko03100
 65 | br:br08001
 66 | br:br08002
 67 | br:br08003
 68 | br:br08005
 69 | br:br08006
 70 | br:br08007
 71 | br:br08009
 72 | br:br08021
 73 | br:br08120
 74 | br:br08201
 75 | br:br08202
 76 | br:br08204
 77 | br:br08203
 78 | br:br08303
 79 | br:br08302
 80 | br:br08301
 81 | br:br08313
 82 | br:br08312
 83 | br:br08304
 84 | br:br08305
 85 | br:br08331
 86 | br:br08330
 87 | br:br08332
 88 | br:br08310
 89 | br:br08307
 90 | br:br08327
 91 | br:br08311
 92 | br:br08403
 93 | br:br08402
 94 | br:br08401
 95 | br:br08411
 96 | br:br08410
 97 | br:br08420
 98 | br:br08601
 99 | br:br08610
100 | br:br08611
101 | br:br08612
102 | br:br08613
103 | br:br08614
104 | br:br08615
105 | br:br08620
106 | br:br08621
107 | br:br08605
108 | br:br03220
109 | br:br03222
110 | br:br03223
111 | br:br01610
112 | br:br01611
113 | br:br01612
114 | br:br01613
115 | br:br01601
116 | br:br01602
117 | br:br01600
118 | br:br01620
119 | br:br01553
120 | br:br01554
121 | br:br01556
122 | br:br01555
123 | br:br01557
124 | br:br01800
125 | br:br01810
126 | br:br08011
127 | br:br08020
128 | br:br08012
129 | br:br08110
130 | br:br08319
131 | br:br08329
132 | br:br08318
133 | br:br08328
134 | br:br08309
135 | br:br08341
136 | br:br08324
137 | br:br08317
138 | br:br08315
139 | br:br08314
140 | br:br08442
141 | br:br08441
142 | br:br08431


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # Add the package to the python path so autodoc can import modules so doc strings can be included in the documentation
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.abspath('../src'))
10 | 
11 | # It's recommended that you import the project version from your package's __init__.py file
12 | from kegg_pull import __version__
13 | 
14 | def skip_organism_set(app, what, name, obj, skip, options) -> bool:
15 |     if name in {'organism_set'}:
16 |         return True
17 | 
18 | def setup(app):
19 |     app.connect('autodoc-skip-member', skip_organism_set)
20 | 
21 | # -- Project information -----------------------------------------------------
22 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
23 | 
24 | project = 'kegg_pull'
25 | copyright = '2022, Erik Huckvale'
26 | author = 'Erik Huckvale'
27 | 
28 | version = __version__
29 | release = __version__
30 | 
31 | # -- General configuration ---------------------------------------------------
32 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
33 | 
34 | extensions = [
35 |     'sphinx.ext.autodoc',
36 |     'sphinx.ext.doctest',
37 |     'sphinx.ext.intersphinx',
38 |     'sphinx.ext.todo',
39 |     'sphinx.ext.coverage',
40 |     'sphinx.ext.viewcode',
41 |     'sphinx.ext.githubpages',
42 | ]
43 | 
44 | templates_path = ['_templates']
45 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
46 | latex_elements = {'preamble': r'\usepackage{pmboxdraw}'}
47 | 
48 | # -- Options for HTML output -------------------------------------------------
49 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
50 | 
51 | autodoc_typehints = 'both'
52 | autoclass_content = 'both'
53 | autodoc_member_order = 'bysource'
54 | html_theme = 'sphinx_rtd_theme'
55 | html_static_path = ['_static']
56 | 
57 | # -- Options for intersphinx extension ---------------------------------------
58 | # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration
59 | 
60 | intersphinx_mapping = {
61 |     'python': ('https://docs.python.org/3', None),
62 | }
63 | 
64 | # -- Options for todo extension ----------------------------------------------
65 | # https://www.sphinx-doc.org/en/master/usage/extensions/todo.html#configuration
66 | 
67 | todo_include_todos = True
68 | 


--------------------------------------------------------------------------------
/src/kegg_pull/entry_ids_cli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 |     kegg_pull entry-ids -h | --help
 4 |     kegg_pull entry-ids database <database> [--output=<output>]
 5 |     kegg_pull entry-ids keywords <database> <keywords> [--output=<output>]
 6 |     kegg_pull entry-ids molec-attr <database> (--formula=<formula>|--em=<exact-mass>...|--mw=<molecular-weight>...) [--output=<output>]
 7 | 
 8 | Options:
 9 |     -h --help               Show this help message.
10 |     database                Pulls all the entry IDs within a given database.
11 |     <database>              The KEGG database from which to pull a list of entry IDs.
12 |     --output=<output>       Path to the file (either in a directory or ZIP archive) to store the output (1 entry ID per line). Prints to the console if not specified. If a ZIP archive, the file path must be in the form of /path/to/zip-archive.zip:/path/to/file (e.g. ./archive.zip:file.txt).
13 |     keywords                Searches for entries within a database based on provided keywords.
14 |     <keywords>              Comma separated list of keywords to search entries with (e.g. kw1,kw2,kw3 etc.). Or if equal to "-", keywords are read from standard input, one keyword per line; Press CTRL+D to finalize input or pipe (e.g. cat file.txt | kegg_pull rest find brite - ...).
15 |     molec-attr              Searches a database of molecule-type KEGG entries by molecular attributes.
16 |     --formula=<formula>     Sequence of atoms in a chemical formula format to search for (e.g. "O5C7" searches for molecule entries containing 5 oxygen atoms and/or 7 carbon atoms).
17 |     --em=<exact-mass>       Either a single number (e.g. "--em=155.5") or two numbers (e.g. "--em=155.5 --em=244.4"). If a single number, searches for molecule entries with an exact mass equal to that value rounded by the last decimal point. If two numbers, searches for molecule entries with an exact mass within the two values (a range).
18 |     --mw=<molecular-weight> Same as "--em=<exact-mass>" but searches based on the molecular weight.
19 | """
20 | import docopt as d
21 | from . import entry_ids as ei
22 | from . import _utils as u
23 | 
24 | 
25 | def main() -> None:
26 |     args = d.docopt(__doc__)
27 |     database: str = args['<database>']
28 |     if args['database']:
29 |         entry_ids = ei.from_database(database=database)
30 |     elif args['keywords']:
31 |         keywords: list = u.parse_input_sequence(input_source=args['<keywords>'])
32 |         entry_ids = ei.from_keywords(database=database, keywords=keywords)
33 |     else:
34 |         formula, exact_mass, molecular_weight = u.get_molecular_attribute_args(args=args)
35 |         entry_ids = ei.from_molecular_attribute(
36 |             database=database, formula=formula, exact_mass=exact_mass, molecular_weight=molecular_weight)
37 |     entry_ids_str = '\n'.join(entry_ids)
38 |     u.print_or_save(output_target=args['--output'], output_content=entry_ids_str)
39 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | #########
 2 | kegg_pull
 3 | #########
 4 | Description
 5 | -----------
 6 | The ``kegg_pull`` package provides a number of useful CLI and API features for interacting with the KEGG REST API. This includes wrapper methods/commands for all the REST API operations, pulling lists of KEGG entry IDs, and pulling an arbitrary number of KEGG entries, in a single call, that are automatically separated and saved in individual files.
 7 | 
 8 | Documentation
 9 | -------------
10 | The complete documentation for our API and CLI including tutorials can be found `here <https://moseleybioinformaticslab.github.io/kegg_pull/>`__.
11 | 
12 | Installation
13 | ------------
14 | Requires python 3.10 and above.
15 | 
16 | Install on Linux, Mac OS X
17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | .. parsed-literal::
19 |    python3 -m pip install kegg-pull
20 | 
21 | Install on Windows
22 | ~~~~~~~~~~~~~~~~~~
23 | .. parsed-literal::
24 |    py -3 -m pip install kegg-pull
25 | 
26 | **Note:** Many KEGG entry IDs contain colons and ``kegg_pull`` saves KEGG entry files with their ID in the file name. When running on Windows, all file names with colons will have their colons replaced with underscores.
27 | 
28 | **Note:** If ``py`` is not installed on Windows (e.g. Python was installed via the Windows store rather than from the official Python website), the installation command is the same as Linux and Mac OS X.
29 | 
30 | **Note:** If the ``kegg_pull`` console script is not found on Windows, the CLI can be used via ``python3 -m kegg_pull`` or ``py -3 -m kegg_pull`` or ``path\to\console\script\kegg_pull.exe``. Alternatively, the directory where the console script is located can be added to the Path environment variable. For example, the console script may be installed at:
31 | 
32 | .. parsed-literal::
33 |    c:\\users\\<username>\\appdata\\local\\programs\\python\\python310\\Scripts\\
34 | 
35 | PyPi
36 | ~~~~
37 | See our PyPi page `here <https://pypi.org/project/kegg-pull/>`__.
38 | 
39 | Questions, Feature Requests, and Bug Reports
40 | --------------------------------------------
41 | Please submit any questions or feature requests you may have and report any potential bugs/errors you observe on `our GitHub issues page <https://github.com/MoseleyBioinformaticsLab/kegg_pull/issues>`__.
42 | 
43 | Dependencies
44 | ------------
45 | Note, the ``pip`` command will install dependencies automatically.
46 | 
47 | .. parsed-literal::
48 |    docopt
49 |    requests
50 |    tqdm
51 |    jsonschema
52 | 
53 | Get the source code
54 | -------------------
55 | Code is available on GitHub: https://github.com/MoseleyBioinformaticsLab/kegg_pull.
56 | 
57 | You can clone the repository via:
58 | 
59 | .. parsed-literal::
60 |    git clone https://github.com/MoseleyBioinformaticsLab/kegg_pull.git
61 | 
62 | Once you have a copy of the source, you can embed it in your own Python package, or install it into your system site-packages easily:
63 | 
64 | Linux, Mac OS X
65 | ~~~~~~~~~~~~~~~
66 | .. parsed-literal::
67 |    python3 setup.py install
68 | 
69 | Windows
70 | ~~~~~~~
71 | .. parsed-literal::
72 |    py -3 setup.py install
73 | 


--------------------------------------------------------------------------------
/dev/test_entry_ids_cli.py:
--------------------------------------------------------------------------------
 1 | # noinspection PyPackageRequirements
 2 | import pytest as pt
 3 | import kegg_pull.entry_ids_cli as ei_cli
 4 | import dev.utils as u
 5 | 
 6 | entry_ids_mock = ['a', 'b']
 7 | expected_output: str = '\n'.join(entry_ids_mock)
 8 | 
 9 | 
10 | def test_help(mocker):
11 |     u.assert_help(mocker=mocker, module=ei_cli, subcommand='entry-ids')
12 | 
13 | 
14 | test_data = [
15 |     (['entry-ids', 'database', 'compound'], 'entry_ids_cli.ei.from_database', {'database': 'compound'}, None),
16 |     (['entry-ids', 'keywords', 'pathway', 'k1,,k2'], 'entry_ids_cli.ei.from_keywords', {'database': 'pathway', 'keywords': ['k1', 'k2']},
17 |      None),
18 |     (['entry-ids', 'molec-attr', 'drug', '--formula=CO2'], 'entry_ids_cli.ei.from_molecular_attribute',
19 |      {'database': 'drug', 'formula': 'CO2', 'exact_mass': None, 'molecular_weight': None}, None),
20 |     (['entry-ids', 'molec-attr', 'drug', '--em=20.2'], 'entry_ids_cli.ei.from_molecular_attribute',
21 |      {'database': 'drug', 'formula': None, 'exact_mass': 20.2, 'molecular_weight': None}, None),
22 |     (['entry-ids', 'molec-attr', 'drug', '--mw=202'], 'entry_ids_cli.ei.from_molecular_attribute',
23 |      {'database': 'drug', 'formula': None, 'exact_mass': None, 'molecular_weight': 202}, None),
24 |     (['entry-ids', 'molec-attr', 'drug', '--em=20.2', '--em=30.3'], 'entry_ids_cli.ei.from_molecular_attribute',
25 |      {'database': 'drug', 'formula': None, 'exact_mass': (20.2, 30.3), 'molecular_weight': None}, None),
26 |     (['entry-ids', 'molec-attr', 'drug', '--mw=202', '--mw=303'], 'entry_ids_cli.ei.from_molecular_attribute',
27 |      {'database': 'drug', 'formula': None, 'exact_mass': None, 'molecular_weight': (202, 303)}, None),
28 |     (['entry-ids', 'keywords', 'pathway', '-'], 'entry_ids_cli.ei.from_keywords',
29 |      {'database': 'pathway', 'keywords': ['k1', 'k2']}, 'k1\nk2')]
30 | 
31 | 
32 | # noinspection DuplicatedCode
33 | @pt.mark.parametrize('args,method,kwargs,stdin_mock', test_data)
34 | def test_print(mocker, args: list, method: str, kwargs: dict, stdin_mock: str):
35 |     u.test_print(
36 |         mocker=mocker, argv_mock=args, stdin_mock=stdin_mock, method=method, method_return_value=entry_ids_mock, method_kwargs=kwargs,
37 |         module=ei_cli, expected_output=expected_output)
38 | 
39 | 
40 | @pt.mark.parametrize('args,method,kwargs,stdin_mock', test_data)
41 | def test_file(mocker, args: list, method: str, kwargs: dict, output_file: str, stdin_mock: str):
42 |     u.test_file(
43 |         mocker=mocker, argv_mock=args, output_file=output_file, stdin_mock=stdin_mock, method=method, method_return_value=entry_ids_mock,
44 |         method_kwargs=kwargs, module=ei_cli, expected_output=expected_output)
45 | 
46 | 
47 | @pt.mark.parametrize('args,method,kwargs,stdin_mock', test_data)
48 | def test_zip_archive(mocker, args: list, method: str, kwargs: dict, zip_archive_data: tuple, stdin_mock: str):
49 |     u.test_zip_archive(
50 |         mocker=mocker, argv_mock=args, zip_archive_data=zip_archive_data, stdin_mock=stdin_mock, method=method,
51 |         method_return_value=entry_ids_mock, method_kwargs=kwargs, module=ei_cli, expected_output=expected_output)
52 | 


--------------------------------------------------------------------------------
/src/kegg_pull/pathway_organizer_cli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 |     kegg_pull pathway-organizer [--tln=<top-level-nodes>] [--fn=<filter-nodes>] [--output=<output>]
 4 | 
 5 | Options:
 6 |     -h --help               Show this help message.
 7 |     --tln=<top-level-nodes> Node names in the highest level of the hierarchy to select from. If not set, all top level nodes are traversed to create the mapping of node key to node info. Either a comma separated list (e.g. node1,node2,node3 etc.) or if equal to "-", read from standard input one node per line; Press CTRL+D to finalize input or pipe (e.g. cat nodes.txt | kegg_pull pathway-organizer --tln=- ...). If both "--tln" and "--fn" are set as "-", one of the lines must be the delimiter "---" without quotes in order to distinguish the input, with the top level nodes first and filter nodes second.
 8 |     --fn=<filter-nodes>     Names (not keys) of nodes to exclude from the mapping of node key to node info. Neither these nodes nor any of their children will be included. If not set, no nodes will be excluded. Either a comma separated list (e.g. node1,node2,node3 etc.) or if equal to "-", read from standard input one node per line; Press CTRL+D to finalize input or pipe (e.g. cat nodes.txt | kegg_pull pathway-organizer --fn=- ...). If both "--tln" and "--fn" are set as "-", one of the lines must be the delimiter "---" without quotes in order to distinguish the input, with the top level nodes first and filter nodes second.
 9 |     --output=<output>       The file to store the flattened Brite hierarchy as a JSON structure with node keys mapping to node info, either a JSON file or ZIP archive. Prints to the console if not set. If saving to a ZIP archive, the file path must be in the form of /path/to/zip-archive.zip:/path/to/file (e.g. ./archive.zip:mapping.json).
10 | """
11 | import docopt as d
12 | import sys
13 | from . import pathway_organizer as po
14 | from . import _utils as u
15 | 
16 | 
17 | def main():
18 |     args = d.docopt(__doc__)
19 |     if args['--tln'] == '-' and args['--fn'] == '-':
20 |         # If both the top level nodes and filter nodes are coming from standard input, convert them to comma separated lists
21 |         inputs = sys.stdin.read()
22 |         [top_level_nodes, filter_nodes] = inputs.split('---\n')
23 |         top_level_nodes = ','.join(top_level_nodes.strip().split('\n'))
24 |         filter_nodes = ','.join(filter_nodes.strip().split('\n'))
25 |         top_level_nodes = set(u.parse_input_sequence(input_source=top_level_nodes))
26 |         filter_nodes = set(u.parse_input_sequence(input_source=filter_nodes))
27 |     else:
28 |         top_level_nodes: str | set[str] = args['--tln']
29 |         filter_nodes: str | set[str] = args['--fn']
30 |         if top_level_nodes:
31 |             top_level_nodes = set[str](u.parse_input_sequence(input_source=top_level_nodes))
32 |         if filter_nodes:
33 |             filter_nodes = set[str](u.parse_input_sequence(input_source=filter_nodes))
34 |     pathway_organizer = po.PathwayOrganizer.load_from_kegg(top_level_nodes=top_level_nodes, filter_nodes=filter_nodes)
35 |     hierarchy_nodes_json_string = str(pathway_organizer)
36 |     u.print_or_save(output_target=args['--output'], output_content=hierarchy_nodes_json_string)
37 | 


--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
 1 | .. |Functionality| replace:: Provides commandline functionality
 2 | .. |Interface for| replace:: Provides commandline functionality for accessing
 3 | 
 4 | CLI
 5 | ===
 6 | **Note:** Many KEGG entry IDs contain colons and ``kegg_pull`` saves KEGG entry files with their ID in the file name. When running on Windows, all file names with colons will have their colons replaced with underscores.
 7 | 
 8 | kegg_pull Commandline Interface
 9 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 | Top-level commandline interface.
11 | 
12 | .. literalinclude:: ../src/kegg_pull/__main__.py
13 |     :start-at: Usage:
14 |     :end-before: """
15 |     :language: none
16 | 
17 | .. include:: ../src/kegg_pull/pull.py
18 |     :start-after: """
19 |     :end-before: """
20 | 
21 | A JSON file, called ``pull-results.json``, is saved, describing the results of the pull. Below is the interpretation of each of the fields:
22 | 
23 | **percent-success:** The percentage of the requested entries that were successfully pulled and saved in a file.
24 | 
25 | **pull-minutes:** The number of minutes that the pull took to complete.
26 | 
27 | **num-successful:** The number of entries that were successfully pulled and saved in a file.
28 | 
29 | **num-failed:** The number of entries that failed to be pulled.
30 | 
31 | **num-timed-out:** The number of entries that timed out when requested.
32 | 
33 | **num-total:** The number of total entry IDs requested.
34 | 
35 | **successful-entry-ids:** The list of successful entry IDs.
36 | 
37 | **failed-entry-ids:** The list of failed entry IDs.
38 | 
39 | **timed-out-entry-ids:** The list of timed out entry IDs.
40 | 
41 | If the ``--unsuccessful-threshold`` option is set and surpassed, an ``aborted-pull-results.json`` file is instead output with the following fields:
42 | 
43 | **num-remaining-entry-ids:** The number of requested entries remaining after the process aborted. The process aborted before ``kegg_pull`` could even try to pull these entries.
44 | 
45 | **num-successful:** The number of entries that were successfully pulled before the process aborted.
46 | 
47 | **num-failed:** The number of entries that failed by the time the process aborted.
48 | 
49 | **num-timed-out:** The number of entries that timed out by the time the process aborted.
50 | 
51 | **remaining-entry-ids:** The IDs of the remaining entries.
52 | 
53 | **successful-entry-ids:** The IDs of the successful entries.
54 | 
55 | **failed-entry-ids:** The IDs of the failed entries.
56 | 
57 | **timed-out-entry-ids:** The IDs of the timed out entries.
58 | 
59 | .. literalinclude:: ../src/kegg_pull/pull_cli.py
60 |     :start-at: Usage:
61 |     :end-before: """
62 |     :language: none
63 | 
64 | .. include:: ../src/kegg_pull/entry_ids.py
65 |     :start-after: """
66 |     :end-before: """
67 | 
68 | .. literalinclude:: ../src/kegg_pull/entry_ids_cli.py
69 |     :start-at: Usage:
70 |     :end-before: """
71 |     :language: none
72 | 
73 | .. include:: ../src/kegg_pull/map.py
74 |     :start-after: """
75 |     :end-before: """
76 | 
77 | .. literalinclude:: ../src/kegg_pull/map_cli.py
78 |     :start-at: Usage:
79 |     :end-before: """
80 |     :language: none
81 | 
82 | .. include:: ../src/kegg_pull/pathway_organizer.py
83 |     :start-after: """
84 |     :end-before: """
85 | 
86 | .. literalinclude:: ../src/kegg_pull/pathway_organizer_cli.py
87 |     :start-at: Usage:
88 |     :end-before: """
89 |     :language: none
90 | 
91 | .. include:: ../src/kegg_pull/rest.py
92 |     :start-after: """
93 |     :end-before: """
94 | 
95 | .. literalinclude:: ../src/kegg_pull/rest_cli.py
96 |     :start-at: Usage:
97 |     :end-before: """
98 |     :language: none
99 | 


--------------------------------------------------------------------------------
/dev/test_pathway_organizer_cli.py:
--------------------------------------------------------------------------------
 1 | # noinspection PyPackageRequirements
 2 | import pytest as pt
 3 | import json
 4 | import kegg_pull.pathway_organizer as po
 5 | import kegg_pull.pathway_organizer_cli as po_cli
 6 | import dev.utils as u
 7 | 
 8 | 
 9 | def test_help(mocker):
10 |     u.assert_help(mocker=mocker, module=po_cli, subcommand='pathway-organizer')
11 | 
12 | 
13 | method = 'pathway_organizer_cli.po.PathwayOrganizer.load_from_kegg'
14 | test_data = [
15 |     (['pathway-organizer', '--tln=-', '--fn=-'], {'top_level_nodes': {'node1'}, 'filter_nodes': {'node2', 'node3'}},
16 |      ' node1\n---\nnode2\t\nnode3 '),
17 |     (['pathway-organizer', '--tln=-', '--fn=node2,node3,node4'],
18 |      {'top_level_nodes': {'node1', 'node5'}, 'filter_nodes': {'node2', 'node3', 'node4'}}, '\nnode1\n node5\n'),
19 |     (['pathway-organizer', '--tln=node1', '--fn=-'], {'top_level_nodes': {'node1'}, 'filter_nodes': {'node2'}}, 'node2'),
20 |     (['pathway-organizer', '--tln=node1,node2', '--fn=node3'], {'top_level_nodes': {'node1', 'node2'}, 'filter_nodes': {'node3'}}, None),
21 |     (['pathway-organizer', '--tln=-'], {'top_level_nodes': {'node1', 'node2', 'node3'}, 'filter_nodes': None}, 'node1\nnode2\nnode3'),
22 |     (['pathway-organizer', '--fn=-'], {'top_level_nodes': None, 'filter_nodes': {'node1', 'node2', 'node3'}}, 'node1\nnode2\nnode3'),
23 |     (['pathway-organizer', '--tln=node1,node2,node3'], {'top_level_nodes': {'node1', 'node2', 'node3'}, 'filter_nodes': None}, None),
24 |     (['pathway-organizer', '--fn=node1,node2,node3'], {'top_level_nodes': None, 'filter_nodes': {'node1', 'node2', 'node3'}}, None),
25 |     (['pathway-organizer'], {'top_level_nodes': None, 'filter_nodes': None}, None)]
26 | 
27 | 
28 | @pt.mark.parametrize('args,kwargs,stdin_mock', test_data)
29 | def test_print(mocker, args: list, kwargs: dict, stdin_mock: str):
30 |     pathway_org_mock, expected_output = _get_mock_pathway_org_and_expected_output(mocker=mocker)
31 |     u.test_print(
32 |         mocker=mocker, argv_mock=args, stdin_mock=stdin_mock, method=method, method_return_value=pathway_org_mock, method_kwargs=kwargs,
33 |         module=po_cli, expected_output=expected_output)
34 | 
35 | 
36 | def _get_mock_pathway_org_and_expected_output(mocker):
37 |     u.mock_non_instantiable(mocker=mocker)
38 |     hierarchy_nodes_mock: po.HierarchyNodes = {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': ['a'], 'entry_id': 'd'}}
39 |     pathway_org_mock = po.PathwayOrganizer()
40 |     pathway_org_mock.hierarchy_nodes = hierarchy_nodes_mock
41 |     expected_output: str = json.dumps(hierarchy_nodes_mock, indent=2)
42 |     return pathway_org_mock, expected_output
43 | 
44 | 
45 | @pt.mark.parametrize('args,kwargs,stdin_mock', test_data)
46 | def test_file(mocker, args: list, kwargs: dict, stdin_mock: str, output_file: str):
47 |     pathway_org_mock, expected_output = _get_mock_pathway_org_and_expected_output(mocker=mocker)
48 |     u.test_file(
49 |         mocker=mocker, argv_mock=args, output_file=output_file, stdin_mock=stdin_mock, method=method,
50 |         method_return_value=pathway_org_mock, method_kwargs=kwargs, module=po_cli, expected_output=expected_output)
51 | 
52 | 
53 | @pt.mark.parametrize('args,kwargs,stdin_mock', test_data)
54 | def test_zip_archive(mocker, args: list, kwargs: dict, stdin_mock: str, zip_archive_data: tuple):
55 |     pathway_org_mock, expected_output = _get_mock_pathway_org_and_expected_output(mocker=mocker)
56 |     u.test_zip_archive(
57 |         mocker=mocker, argv_mock=args, zip_archive_data=zip_archive_data, stdin_mock=stdin_mock, method=method,
58 |         method_return_value=pathway_org_mock, method_kwargs=kwargs, module=po_cli, expected_output=expected_output)
59 | 


--------------------------------------------------------------------------------
/dev/test_entry_ids.py:
--------------------------------------------------------------------------------
 1 | # noinspection PyPackageRequirements
 2 | import pytest as pt
 3 | import typing as t
 4 | import os
 5 | import kegg_pull.rest as r
 6 | import kegg_pull.entry_ids as ei
 7 | import kegg_pull.kegg_url as ku
 8 | import dev.utils as u
 9 | 
10 | 
11 | test_from_kegg_rest_data = [
12 |     (ei.from_database, ku.ListKEGGurl, {'database': 'compound'}, 'list/compound'),
13 |     (ei.from_keywords, ku.KeywordsFindKEGGurl, {'database': 'compound', 'keywords': ['kw1', 'kw2']}, 'find/compound/kw1+kw2'),
14 |     (
15 |         ei.from_molecular_attribute, ku.MolecularFindKEGGurl,
16 |         {'database': 'compound', 'formula': 'M4O3C2K1', 'exact_mass': None, 'molecular_weight': None},
17 |         'find/compound/M4O3C2K1/formula')]
18 | 
19 | 
20 | @pt.mark.parametrize('get_entry_ids,KEGGurl,kwargs,url', test_from_kegg_rest_data)
21 | def test_from_kegg_rest(mocker, get_entry_ids: t.Callable, KEGGurl: type, kwargs: dict, url: str):
22 |     text_body_mock = '''
23 |     cpd:C22501	alpha-D-Xylulofuranose
24 |     cpd:C22502	alpha-D-Fructofuranose; alpha-D-Fructose
25 |     cpd:C22500	2,8-Dihydroxyadenine
26 |     cpd:C22504	cis-Alkene
27 |     cpd:C22506	Archaeal dolichyl alpha-D-glucosyl phosphate; Dolichyl alpha-D-glucosyl phosphate
28 |     cpd:C22507	6-Sulfo-D-rhamnose
29 |     cpd:C22509	3',5'-Cyclic UMP; Uridine 3',5'-cyclic monophosphate; cUMP
30 |     cpd:C22510	4-Deoxy-4-sulfo-D-erythrose
31 |     cpd:C22511	4-Deoxy-4-sulfo-D-erythrulose
32 |     cpd:C22512	Solabiose
33 |     cpd:C22513	sn-3-O-(Farnesylgeranyl)glycerol 1-phosphate
34 |     cpd:C22514	2,3-Bis-O-(geranylfarnesyl)-sn-glycerol 1-phosphate
35 |     '''
36 |     get_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.rq.get', return_value=mocker.MagicMock(text=text_body_mock, status_code=200))
37 |     request_and_check_error_spy: mocker.MagicMock = mocker.spy(r, 'request_and_check_error')
38 |     actual_entry_ids: list = get_entry_ids(**kwargs)
39 |     request_and_check_error_spy.assert_called_once_with(kegg_rest=None, KEGGurl=KEGGurl, **kwargs)
40 |     url = f'{ku.BASE_URL}/{url}'
41 |     get_mock.assert_called_once_with(url=url, timeout=60)
42 |     expected_entry_ids = [
43 |         'cpd:C22501', 'cpd:C22502', 'cpd:C22500', 'cpd:C22504', 'cpd:C22506', 'cpd:C22507', 'cpd:C22509', 'cpd:C22510',
44 |         'cpd:C22511', 'cpd:C22512', 'cpd:C22513', 'cpd:C22514']
45 |     assert actual_entry_ids == expected_entry_ids
46 | 
47 | 
48 | @pt.fixture(name='file_info', params=[True, False])
49 | def file_mock(request):
50 |     is_empty = request.param
51 |     if is_empty:
52 |         file_contents_mock = ''
53 |     else:
54 |         file_contents_mock = '''
55 |         cpd:C22501
56 |         cpd:C22502
57 |         cpd:C22500
58 |         cpd:C22504
59 | 
60 |         cpd:C22506
61 |         cpd:C22507
62 |         cpd:C22509
63 |         cpd:C22510
64 |         cpd:C22511
65 |         cpd:C22512
66 |         cpd:C22513
67 |         cpd:C22514
68 |         '''
69 |     file_name = 'file-mock.txt'
70 |     with open(file_name, 'w') as file:
71 |         file.write(file_contents_mock)
72 |     yield file_name, is_empty
73 |     os.remove(file_name)
74 | 
75 | 
76 | def test_from_file(file_info: str):
77 |     file_name, is_empty = file_info
78 |     if is_empty:
79 |         with pt.raises(ValueError) as error:
80 |             ei.from_file(file_path=file_name)
81 |         u.assert_exception(expected_message=f'Attempted to load entry IDs from {file_name}. But the file is empty', exception=error)
82 |     else:
83 |         actual_entry_ids: list = ei.from_file(file_path=file_name)
84 |         expected_entry_ids = [
85 |             'cpd:C22501', 'cpd:C22502', 'cpd:C22500', 'cpd:C22504', 'cpd:C22506', 'cpd:C22507', 'cpd:C22509', 'cpd:C22510',
86 |             'cpd:C22511', 'cpd:C22512', 'cpd:C22513', 'cpd:C22514']
87 |         assert actual_entry_ids == expected_entry_ids
88 | 


--------------------------------------------------------------------------------
/dev/test_map_cli.py:
--------------------------------------------------------------------------------
 1 | # noinspection PyPackageRequirements
 2 | import pytest as pt
 3 | import kegg_pull.map_cli as map_cli
 4 | import dev.utils as u
 5 | 
 6 | mapping_mock = {'k1': {'v1'}, 'k2': {'v1', 'v2'}, 'k3': {'v3', 'v4'}}
 7 | 
 8 | 
 9 | def test_help(mocker):
10 |     u.assert_help(mocker=mocker, module=map_cli, subcommand='map')
11 | 
12 | 
13 | test_data = [
14 |     (['conv', 'compound', 'chebi'], 'database_conv', {'kegg_database': 'compound', 'outside_database': 'chebi', 'reverse': False}, None),
15 |     (['conv', 'entry-ids', '-', 'pubchem'], 'entries_conv', {'entry_ids': ['e1', 'e2'], 'target_database': 'pubchem', 'reverse': False},
16 |      'e1\ne2'),
17 |     (['conv', 'entry-ids', 'e1', 'chebi', '--reverse'], 'entries_conv', {'entry_ids': ['e1'], 'target_database': 'chebi', 'reverse': True},
18 |      None),
19 |     (['link', 'enzyme', 'compound'], 'database_link',
20 |      {'source_database': 'enzyme', 'target_database': 'compound', 'deduplicate': False, 'add_glycans': False, 'add_drugs': False}, None),
21 |     (['link', 'compound', 'reaction', '--add-glycans', '--add-drugs'], 'database_link',
22 |      {'source_database': 'compound', 'target_database': 'reaction', 'deduplicate': False, 'add_glycans': True, 'add_drugs': True}, None),
23 |     (['link', 'pathway', 'reaction', '--deduplicate'], 'database_link',
24 |      {'source_database': 'pathway', 'target_database': 'reaction', 'deduplicate': True, 'add_glycans': False, 'add_drugs': False}, None),
25 |     (['link', 'entry-ids', 'e1,e2,e3', 'glycan'], 'entries_link',
26 |      {'entry_ids': ['e1', 'e2', 'e3'], 'target_database': 'glycan', 'reverse': False}, None),
27 |     (['link', 'entry-ids', '-', 'ko', '--reverse'], 'entries_link',
28 |      {'entry_ids': ['e1', 'e2', 'e3'], 'target_database': 'ko', 'reverse': True}, ' e1\ne2\t\ne3\n\n'),
29 |     (['link', 'ko', 'reaction', 'compound'], 'indirect_link',
30 |      {'source_database': 'ko', 'intermediate_database': 'reaction', 'target_database': 'compound', 'deduplicate': False,
31 |       'add_glycans': False, 'add_drugs': False}, None),
32 |     (['link', 'pathway', 'reaction', 'ko', '--deduplicate'], 'indirect_link',
33 |      {'source_database': 'pathway', 'intermediate_database': 'reaction', 'target_database': 'ko', 'deduplicate': True,
34 |       'add_glycans': False, 'add_drugs': False}, None),
35 |     (['link', 'compound', 'reaction', 'ko', '--add-glycans', '--add-drugs'], 'indirect_link',
36 |      {'source_database': 'compound', 'intermediate_database': 'reaction', 'target_database': 'ko', 'deduplicate': False,
37 |       'add_glycans': True, 'add_drugs': True}, None)]
38 | 
39 | 
40 | def _prepare_input(args: list, method: str) -> tuple[list, str, str]:
41 |     args = ['map'] + args
42 |     method = f'map_cli.kmap.{method}'
43 |     expected_output = '{\n  "k1": [\n    "v1"\n  ],\n  "k2": [\n    "v1",\n    "v2"\n  ],\n  "k3": [\n    "v3",\n    "v4"\n  ]\n}'
44 |     return args, method, expected_output
45 | 
46 | 
47 | @pt.mark.parametrize('args,method,kwargs,stdin_mock', test_data)
48 | def test_print(mocker, args: list, method: str, kwargs: dict, stdin_mock: str):
49 |     args, method, expected_output = _prepare_input(args=args, method=method)
50 |     u.test_print(
51 |         mocker=mocker, argv_mock=args, stdin_mock=stdin_mock, method=method, method_return_value=mapping_mock, method_kwargs=kwargs,
52 |         module=map_cli, expected_output=expected_output)
53 | 
54 | 
55 | @pt.mark.parametrize('args,method,kwargs,stdin_mock', test_data)
56 | def test_file(mocker, args: list, method: str, kwargs: dict, stdin_mock: str, output_file: str):
57 |     args, method, expected_output = _prepare_input(args=args, method=method)
58 |     u.test_file(
59 |         mocker=mocker, argv_mock=args, output_file=output_file, stdin_mock=stdin_mock, method=method,
60 |         method_return_value=mapping_mock, method_kwargs=kwargs, module=map_cli, expected_output=expected_output)
61 | 
62 | 
63 | @pt.mark.parametrize('args,method,kwargs,stdin_mock', test_data)
64 | def test_zip_archive(mocker, args: list, method: str, kwargs: dict, stdin_mock: str, zip_archive_data: tuple):
65 |     args, method, expected_output = _prepare_input(args=args, method=method)
66 |     u.test_zip_archive(
67 |         mocker=mocker, argv_mock=args, zip_archive_data=zip_archive_data, stdin_mock=stdin_mock, method=method,
68 |         method_return_value=mapping_mock, method_kwargs=kwargs, module=map_cli, expected_output=expected_output)
69 | 


--------------------------------------------------------------------------------
/src/kegg_pull/map_cli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 |     kegg_pull map -h | --help
 4 |     kegg_pull map conv <kegg-database> <outside-database> [--reverse] [--output=<output>]
 5 |     kegg_pull map link <source-database> <target-database> [--deduplicate] [--add-glycans] [--add-drugs] [--output=<output>]
 6 |     kegg_pull map (link|conv) entry-ids <entry-ids> <target-database> [--reverse] [--output=<output>]
 7 |     kegg_pull map link <source-database> <intermediate-database> <target-database> [--deduplicate] [--add-glycans] [--add-drugs] [--output=<output>]
 8 | 
 9 | Options:
10 |     -h --help               Show this help message.
11 |     conv                    Converts the output of the KEGG "conv" operation into a JSON mapping.
12 |     <kegg-database>         The name of the KEGG database with entry IDs mapped to the outside database.
13 |     <outside-database>      The name of the outside database with entry IDs mapped from the KEGG database.
14 |     --reverse               Reverses the mapping with the target becoming the source and the source becoming the target.
15 |     --output=<output>       The location (either a directory or ZIP archive) of the JSON file to store the mapping. If not set, prints a JSON representation of the mapping to the console. If a ZIP archive, the file path must be in the form of /path/to/zip-archive.zip:/path/to/file (e.g. ./archive.zip:mapping.json).
16 |     link                    Converts the output of the KEGG "link" operation into a JSON mapping.
17 |     <source-database>       The name of the database with entry IDs mapped to the target database.
18 |     <target-database>       The name of the database with entry IDs mapped from the source database.
19 |     --deduplicate           Some mappings including pathway entry IDs result in half beginning with the normal "path:map" prefix but the other half with a different prefix. If set, removes the IDs corresponding to identical entries but with a different prefix. Raises an exception if neither the source nor the target database are "pathway".
20 |     --add-glycans           Whether to add the corresponding compound IDs of equivalent glycan entries. Logs a warning if neither the source nor the target database are "compound".
21 |     --add-drugs             Whether to add the corresponding compound IDs of equivalent drug entries. Logs a warning if neither the source nor the target database are "compound".
22 |     entry-ids               Create a mapping to a target database from a list of specific entry IDs.
23 |     <entry-ids>             Comma separated list of entry IDs (e.g. Id1,Id2,Id3 etc.). Or if equal to "-", entry IDs are read from standard input, one entry ID per line; Press CTRL+D to finalize input or pipe (e.g. cat file.txt | kegg_pull map entry-ids drug - ...).
24 |     <intermediate-database> The name of an intermediate KEGG database with which to find cross-references to cross-references e.g. "kegg_pull map link ko reaction compound" creates a mapping from ko-to-compound via ko-to-reaction cross-references connected to reaction-to-compound cross-references.
25 | """
26 | import docopt as doc
27 | from . import map as kmap
28 | from . import _utils as u
29 | 
30 | 
31 | def main() -> None:
32 |     args = doc.docopt(__doc__)
33 |     source_database: str = args['<source-database>']
34 |     intermediate_database: str = args['<intermediate-database>']
35 |     target_database: str = args['<target-database>']
36 |     deduplicate: bool = args['--deduplicate']
37 |     add_glycans: bool = args['--add-glycans']
38 |     add_drugs: bool = args['--add-drugs']
39 |     reverse: bool = args['--reverse']
40 |     if intermediate_database:
41 |         mapping: kmap.KEGGmapping = kmap.indirect_link(
42 |             source_database=source_database, intermediate_database=intermediate_database,
43 |             target_database=target_database, deduplicate=deduplicate, add_glycans=add_glycans, add_drugs=add_drugs)
44 |     elif args['entry-ids']:
45 |         entry_ids = u.parse_input_sequence(input_source=args['<entry-ids>'])
46 |         if args['link']:
47 |             mapping = kmap.entries_link(
48 |                 entry_ids=entry_ids, target_database=target_database, reverse=reverse)
49 |         else:
50 |             mapping = kmap.entries_conv(entry_ids=entry_ids, target_database=target_database, reverse=reverse)
51 |     elif args['link']:
52 |         mapping = kmap.database_link(
53 |             source_database=source_database, target_database=target_database, deduplicate=deduplicate,
54 |             add_glycans=add_glycans, add_drugs=add_drugs)
55 |     else:
56 |         kegg_database: str = args['<kegg-database>']
57 |         outside_database: str = args['<outside-database>']
58 |         mapping = kmap.database_conv(kegg_database=kegg_database, outside_database=outside_database, reverse=reverse)
59 |     mapping_str: str = kmap.to_json_string(mapping=mapping)
60 |     u.print_or_save(output_target=args['--output'], output_content=mapping_str)
61 | 


--------------------------------------------------------------------------------
/src/kegg_pull/entry_ids.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Pulling Lists of KEGG Entry IDs
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 4 | |Functionality| for pulling lists of KEGG entry IDs from the KEGG REST API.
 5 | """
 6 | from . import rest as r
 7 | from . import kegg_url as ku
 8 | 
 9 | 
10 | def from_database(database: str, kegg_rest: r.KEGGrest | None = None) -> list[str]:
11 |     """ Pulls the KEGG entry IDs of a given database.
12 | 
13 |     :param database: The KEGG database to pull the entry IDs from. If equal to "brite", the "br:" prefix is prepended to each entry ID such that they succeed if used in downstream use of the KEGG "get" operation (e.g. for the "pull" API module or CLI subcommand).
14 |     :param kegg_rest: The KEGGrest object to request the entry IDs. If None, one is created with the default parameters.
15 |     :return: The list of resulting entry IDs.
16 |     :raises RuntimeError: Raised if the request to the KEGG REST API fails or times out.
17 |     """
18 |     entry_ids = _process_response(KEGGurl=ku.ListKEGGurl, kegg_rest=kegg_rest, database=database)
19 |     if database == 'brite':
20 |         entry_ids = [f'br:{entry_id}' for entry_id in entry_ids if not entry_id.startswith('br:')]
21 |     return entry_ids
22 | 
23 | 
24 | def _process_response(KEGGurl: type[ku.AbstractKEGGurl], kegg_rest: r.KEGGrest | None, **kwargs) -> list[str]:
25 |     """ Extracts the entry IDs from a KEGG response if successful, else raises an exception. The KEGG response arrives from making
26 |     an entry IDs related request with a KEGGrest object.
27 | 
28 |     :param KEGGurl: The URL class for the request.
29 |     :param kegg_rest: The KEGGrest object to make the request with. If None, one is created with the default parameters.
30 |     :param kwargs: The arguments to pass into the KEGGrest method.
31 |     :return: The list of KEGG entry IDs.
32 |     :raises RuntimeError: Raised if the KEGG response indicates a failure or time out.
33 |     """
34 |     kegg_response: r.KEGGresponse = r.request_and_check_error(kegg_rest=kegg_rest, KEGGurl=KEGGurl, **kwargs)
35 |     return _parse_entry_ids_string(entry_ids_string=kegg_response.text_body)
36 | 
37 | 
38 | def _parse_entry_ids_string(entry_ids_string: str) -> list[str]:
39 |     """ Parses the entry IDs contained in a string.
40 | 
41 |     :param entry_ids_string: The string containing the entry IDs.
42 |     :return: The list of parsed entry IDs.
43 |     """
44 |     entry_ids = entry_ids_string.strip().split('\n')
45 |     return [entry_id.split('\t')[0].strip() for entry_id in entry_ids if entry_id.strip() != '']
46 | 
47 | 
48 | def from_file(file_path: str) -> list[str]:
49 |     """ Loads KEGG entry IDs that are listed in a file with one entry ID on each line.
50 | 
51 |     :param file_path: The path to the file containing the entry IDs.
52 |     :return: The list of entry IDs.
53 |     :raises ValueError: Raised if the file is empty.
54 |     """
55 |     with open(file_path, 'r') as file:
56 |         entry_ids = file.read()
57 |         if entry_ids == '':
58 |             raise ValueError(f'Attempted to load entry IDs from {file_path}. But the file is empty')
59 |         return _parse_entry_ids_string(entry_ids_string=entry_ids)
60 | 
61 | 
62 | def from_keywords(database: str, keywords: list[str], kegg_rest: r.KEGGrest | None = None) -> list[str]:
63 |     """ Pulls entry IDs from a KEGG database based on keywords searched in the entries.
64 | 
65 |     :param database: The name of the database to pull entry IDs from.
66 |     :param keywords: The keywords to search entries in the database with.
67 |     :param kegg_rest: The KEGGrest object to request the entry IDs. If None, one is created with the default parameters.
68 |     :return: The list of entry IDs.
69 |     :raises RuntimeError: Raised if the request to the KEGG REST API fails or times out.
70 |     """
71 |     return _process_response(KEGGurl=ku.KeywordsFindKEGGurl, kegg_rest=kegg_rest, database=database, keywords=keywords)
72 | 
73 | 
74 | def from_molecular_attribute(
75 |         database: str, formula: str | None = None, exact_mass: float | tuple[float, float] | None = None,
76 |         molecular_weight: int | tuple[int, int] | None = None, kegg_rest: r.KEGGrest | None = None) -> list[str]:
77 |     """ Pulls entry IDs from a KEGG database containing chemical entries based on one (and only one) of three molecular attributes of the entries.
78 | 
79 |     :param database: The name of the database containing chemical entries.
80 |     :param formula: The chemical formula to search for.
81 |     :param exact_mass: The exact mass of the compound to search for (a single value or a range).
82 |     :param molecular_weight: The molecular weight of the compound to search for (a single value or a range).
83 |     :param kegg_rest: The KEGGrest object to request the entry IDs. If None, one is created with the default parameters.
84 |     :return: The list of entry IDs.
85 |     :raises RuntimeError: Raised if the request to the KEGG REST API fails or times out.
86 |     """
87 |     return _process_response(
88 |         KEGGurl=ku.MolecularFindKEGGurl, kegg_rest=kegg_rest, database=database, formula=formula, exact_mass=exact_mass,
89 |         molecular_weight=molecular_weight)
90 | 


--------------------------------------------------------------------------------
/dev/test_data/brite-entries/br_br08902.txt:
--------------------------------------------------------------------------------
  1 | +C	Br number
  2 | !
  3 | APathway and Brite
  4 | B  Pathway maps
  5 | C    br08901  KEGG pathway maps
  6 | B  Brite files
  7 | C    br08902  BRITE hierarchy files
  8 | C    br08904  BRITE table files
  9 | C    br08906  BRITE binary relation files
 10 | AGenes and Proteins
 11 | B  Orthologs, modules and networks
 12 | C    ko00001  KEGG Orthology (KO)
 13 | C    ko00002  KEGG modules
 14 | C    ko00003  KEGG reaction modules
 15 | C    br08907  KEGG networks
 16 | B  Protein families: metabolism
 17 | C    ko01000  Enzymes
 18 | C    ko01001  Protein kinases
 19 | C    ko01009  Protein phosphatases and associated proteins
 20 | C    ko01002  Peptidases and inhibitors
 21 | C    ko01003  Glycosyltransferases
 22 | C    ko01005  Lipopolysaccharide biosynthesis proteins
 23 | C    ko01011  Peptidoglycan biosynthesis and degradation proteins
 24 | C    ko01004  Lipid biosynthesis proteins
 25 | C    ko01008  Polyketide biosynthesis proteins
 26 | C    ko01006  Prenyltransferases
 27 | C    ko01007  Amino acid related enzymes
 28 | C    ko00199  Cytochrome P450
 29 | C    ko00194  Photosynthesis proteins
 30 | B  Protein families: genetic information processing
 31 | C    ko03000  Transcription factors
 32 | C    ko03021  Transcription machinery
 33 | C    ko03019  Messenger RNA biogenesis
 34 | C    ko03041  Spliceosome
 35 | C    ko03011  Ribosome
 36 | C    ko03009  Ribosome biogenesis
 37 | C    ko03016  Transfer RNA biogenesis
 38 | C    ko03012  Translation factors
 39 | C    ko03110  Chaperones and folding catalysts
 40 | C    ko04131  Membrane trafficking
 41 | C    ko04121  Ubiquitin system
 42 | C    ko03051  Proteasome
 43 | C    ko03032  DNA replication proteins
 44 | C    ko03036  Chromosome and associated proteins
 45 | C    ko03400  DNA repair and recombination proteins
 46 | C    ko03029  Mitochondrial biogenesis
 47 | B  Protein families: signaling and cellular processes
 48 | C    ko02000  Transporters
 49 | C    ko02044  Secretion system
 50 | C    ko02042  Bacterial toxins
 51 | C    ko02022  Two-component system
 52 | C    ko02035  Bacterial motility proteins
 53 | C    ko03037  Cilium and associated proteins
 54 | C    ko04812  Cytoskeleton proteins
 55 | C    ko04147  Exosome
 56 | C    ko02048  Prokaryotic defense system
 57 | C    ko04030  G protein-coupled receptors
 58 | C    ko04050  Cytokine receptors
 59 | C    ko04054  Pattern recognition receptors
 60 | C    ko03310  Nuclear receptors
 61 | C    ko04040  Ion channels
 62 | C    ko04031  GTP-binding proteins
 63 | C    ko04052  Cytokines and growth factors
 64 | C    ko04515  Cell adhesion molecules
 65 | C    ko04090  CD molecules
 66 | C    ko01504  Antimicrobial resistance genes
 67 | C    ko00535  Proteoglycans
 68 | C    ko00536  Glycosaminoglycan binding proteins
 69 | C    ko00537  Glycosylphosphatidylinositol (GPI)-anchored proteins
 70 | C    ko04091  Lectins
 71 | C    ko04990  Domain-containing proteins not elsewhere classified
 72 | B  Viral protein families
 73 | C    ko03200  Viral proteins
 74 | C    ko03210  Viral fusion proteins
 75 | B  RNA family
 76 | C    ko03100  Non-coding RNAs
 77 | ACompounds and Reactions
 78 | B  Compounds
 79 | C    br08001  Compounds with biological roles
 80 | C    br08002  Lipids
 81 | C    br08003  Phytochemical compounds
 82 | C    br08005  Bioactive peptides
 83 | C    br08006  Endocrine disrupting compounds
 84 | C    br08007  Pesticides
 85 | C    br08009  Natural toxins
 86 | C    br08021  Glycosides
 87 | B  Glycans
 88 | C    br08120  O-antigens
 89 | B  Reactions
 90 | C    br08201  Enzymatic reactions
 91 | C    br08202  IUBMB reaction hierarchy
 92 | C    br08204  Reaction class
 93 | C    br08203  Glycosyltransferase reactions
 94 | ADrugs
 95 | B  Drug classifications
 96 | C    br08303  Anatomical Therapeutic Chemical (ATC) classification
 97 | C    br08302  USP drug classification
 98 | C    br08301  Therapeutic category of drugs in Japan
 99 | C    br08313  Classification of Japanese OTC drugs
100 | C    br08312  Risk category of Japanese OTC drugs
101 | C    br08304  Traditional Chinese Medicine in Japan
102 | C    br08305  Crude drugs
103 | C    br08331  Animal drugs in Japan
104 | B  Drug information
105 | C    br08330  Drug groups
106 | C    br08332  Drug classes
107 | C    br08310  Target-based classification of drugs
108 | C    br08307  Antimicrobials
109 | C    br08327  Antimicrobials abbreviations
110 | C    br08311  Drugs listed in the Japanese Pharmacopoeia
111 | ADiseases
112 | B  Human diseases
113 | C    br08403  Human diseases in ICD-11 classification
114 | C    br08402  Pathway-based classification of diseases
115 | C    br08401  Genome-based classification of infectious diseases
116 | C    br08411  ICD-11 International Classification of Diseases
117 | C    br08410  ICD-10 International Classification of Diseases
118 | C    br08420  ICD-O-3 International Classification of Diseases for Oncology
119 | AOrganisms and Viruses
120 | B  Taxonomy
121 | C    br08601  KEGG organisms
122 | C    br08610  KEGG organisms in the NCBI taxonomy
123 | C    br08611  KEGG organisms in taxonomic ranks
124 | C    br08612  KEGG organisms: animals
125 | C    br08613  KEGG organisms: plants
126 | C    br08614  KEGG organisms: fungi
127 | C    br08615  KEGG organisms: protists
128 | C    br08620  KEGG viruses in the NCBI taxonomy
129 | C    br08621  KEGG viruses in taxonomic ranks
130 | B  Organism information
131 | C    br08605  Plant pathogens
132 | !
133 | #
134 | #Last updated: July 10, 2023
135 | #&raquo; Japanese version
136 | 


--------------------------------------------------------------------------------
/src/kegg_pull/_utils.py:
--------------------------------------------------------------------------------
  1 | import logging as log
  2 | import typing as t
  3 | import zipfile as zf
  4 | import os
  5 | import sys
  6 | import json
  7 | import jsonschema as js
  8 | import inspect as ins
  9 | 
 10 | 
 11 | def get_molecular_attribute_args(args: dict) -> tuple[str | None, float | tuple[float, float] | None, int | tuple[int, int] | None]:
 12 |     formula: str | None = args['--formula']
 13 |     exact_mass: list[str] | None = args['--em']
 14 |     molecular_weight: list[str] | None = args['--mw']
 15 |     # exact_mass and molecular_weight will be [] (empty list) if not specified in the commandline args
 16 |     if exact_mass:
 17 |         exact_mass: float | tuple[float, float] = _get_range_values(range_values=exact_mass, value_type=float)
 18 |     else:
 19 |         exact_mass = None
 20 |     if molecular_weight:
 21 |         molecular_weight: int | tuple[int, int] = _get_range_values(range_values=molecular_weight, value_type=int)
 22 |     else:
 23 |         molecular_weight = None
 24 |     return formula, exact_mass, molecular_weight
 25 | 
 26 | 
 27 | def _get_range_values(
 28 |         range_values: list[str], value_type: type[int | float]) -> int | float | tuple[int, int] | tuple[float, float]:
 29 |     if len(range_values) == 1:
 30 |         [val] = range_values
 31 |         return value_type(val)
 32 |     elif len(range_values) == 2:
 33 |         [min_val, max_val] = range_values
 34 |         return value_type(min_val), value_type(max_val)
 35 |     else:
 36 |         raise ValueError(
 37 |             f'Range can only be specified by two values but {len(range_values)} values were provided: '
 38 |             f'{", ".join(range_value for range_value in range_values)}')
 39 | 
 40 | 
 41 | def load_json_file(file_path: str, json_schema: dict, validation_error_message: str) -> dict:
 42 |     if '.zip:' in file_path:
 43 |         [file_location, file_name] = file_path.split('.zip:')
 44 |         file_location = file_location + '.zip'
 45 |         with zf.ZipFile(file_location, 'r') as zip_file:
 46 |             json_object: bytes = zip_file.read(file_name)
 47 |             json_object: dict = json.loads(s=json_object)
 48 |     else:
 49 |         with open(file_path, 'r') as file:
 50 |             json_object: dict = json.load(file)
 51 |     validate_json_object(json_object=json_object, json_schema=json_schema, validation_error_message=validation_error_message)
 52 |     return json_object
 53 | 
 54 | 
 55 | def validate_json_object(json_object: dict, json_schema: dict, validation_error_message: str) -> None:
 56 |     try:
 57 |         js.validate(json_object, json_schema)
 58 |     except js.exceptions.ValidationError as e:
 59 |         log.error(validation_error_message)
 60 |         raise e
 61 | 
 62 | 
 63 | def parse_input_sequence(input_source: str) -> list[str]:
 64 |     if input_source == '-':
 65 |         # Read from standard input
 66 |         inputs: str = sys.stdin.read()
 67 |         inputs: list = inputs.strip().split('\n')
 68 |     else:
 69 |         # Split a comma separated list
 70 |         inputs: list = input_source.split(',')
 71 |     inputs: list = [input_string.strip() for input_string in inputs if input_string.strip() != '']
 72 |     # If the inputs end up being an empty list
 73 |     if not inputs:
 74 |         input_source = 'standard input' if input_source == '-' else f'comma separated list: "{input_source}"'
 75 |         raise ValueError(f'Empty list provided from {input_source}')
 76 |     return inputs
 77 | 
 78 | 
 79 | def print_or_save(output_target: str, output_content: str | bytes) -> None:
 80 |     if output_target is None:
 81 |         if type(output_content) is bytes:
 82 |             log.warning('Printing binary output...')
 83 |         print(output_content)
 84 |     else:
 85 |         save_output(output_target=output_target, output_content=output_content)
 86 | 
 87 | 
 88 | def save_output(output_target: str, output_content: str | bytes) -> None:
 89 |     if '.zip:' in output_target:
 90 |         [file_location, file_name] = output_target.split('.zip:')
 91 |         file_location: str = file_location + '.zip'
 92 |     else:
 93 |         file_location, file_name = os.path.split(output_target)
 94 |         file_location = '.' if file_location == '' else file_location
 95 |     save_file(file_location=file_location, file_content=output_content, file_name=file_name)
 96 | 
 97 | 
 98 | def save_file(file_location: str, file_content: str | bytes, file_name: str) -> None:
 99 |     if os.name == 'nt':  # pragma: no cover
100 |         # If the OS is Windows, replace colons with underscores (Windows does not support colons in file names).
101 |         file_name = file_name.replace(':', '_')  # pragma: no cover
102 |     if file_location.endswith('.zip'):
103 |         with zf.ZipFile(file_location, 'a') as zip_file:
104 |             zip_file.writestr(file_name, file_content)
105 |     else:
106 |         if not os.path.isdir(file_location):
107 |             os.makedirs(file_location)
108 |         file_path = os.path.join(file_location, file_name)
109 |         save_type = 'wb' if type(file_content) is bytes else 'w'
110 |         encoding: str | None = None if type(file_content) is bytes else 'utf-8'
111 |         with open(file_path, save_type, encoding=encoding) as file:
112 |             file.write(file_content)
113 | 
114 | 
115 | class NonInstantiable:
116 |     """Base classes of this class are only instantiable in the same module that they are defined in."""
117 |     @classmethod
118 |     def __init__(cls) -> None:
119 |         caller_module_path = ins.stack()[2].filename
120 |         class_module_path = ins.getfile(cls)
121 |         # Ensure the python module of the caller matches that of the class
122 |         # This ensures the class is only instantiated in the same module that it's defined in
123 |         if caller_module_path != class_module_path:
124 |             raise RuntimeError(f'The class "{cls.__name__}" cannot be instantiated outside of its module.')
125 | 
126 | 
127 | class staticproperty(staticmethod):
128 |     def __get__(self, *_) -> t.Any:
129 |         return self.__func__()
130 | 


--------------------------------------------------------------------------------
/dev/test_data/drug-entry-ids.txt:
--------------------------------------------------------------------------------
  1 | dr:D00227
  2 | dr:D00240
  3 | dr:D00246
  4 | dr:D00262
  5 | dr:D00277
  6 | dr:D00328
  7 | dr:D00356
  8 | dr:D00383
  9 | dr:D00400
 10 | dr:D00523
 11 | dr:D00585
 12 | dr:D00603
 13 | dr:D00631
 14 | dr:D00637
 15 | dr:D00650
 16 | dr:D00651
 17 | dr:D00657
 18 | dr:D00663
 19 | dr:D00702
 20 | dr:D00716
 21 | dr:D00718
 22 | dr:D00725
 23 | dr:D00752
 24 | dr:D00824
 25 | dr:D00874
 26 | dr:D00892
 27 | dr:D00935
 28 | dr:D00946
 29 | dr:D00948
 30 | dr:D00949
 31 | dr:D00955
 32 | dr:D00961
 33 | dr:D00983
 34 | dr:D00986
 35 | dr:D01003
 36 | dr:D01018
 37 | dr:D01042
 38 | dr:D01044
 39 | dr:D01056
 40 | dr:D01069
 41 | dr:D01103
 42 | dr:D01117
 43 | dr:D01124
 44 | dr:D01131
 45 | dr:D01229
 46 | dr:D01289
 47 | dr:D01297
 48 | dr:D01315
 49 | dr:D01332
 50 | dr:D01402
 51 | dr:D01408
 52 | dr:D01451
 53 | dr:D01452
 54 | dr:D01472
 55 | dr:D01477
 56 | dr:D01483
 57 | dr:D01490
 58 | dr:D01551
 59 | dr:D01616
 60 | dr:D01619
 61 | dr:D01640
 62 | dr:D01692
 63 | dr:D01745
 64 | dr:D01773
 65 | dr:D01836
 66 | dr:D01858
 67 | dr:D01872
 68 | dr:D01922
 69 | dr:D01926
 70 | dr:D01976
 71 | dr:D02071
 72 | dr:D02110
 73 | dr:D02114
 74 | dr:D02190
 75 | dr:D02196
 76 | dr:D02203
 77 | dr:D02212
 78 | dr:D02222
 79 | dr:D02238
 80 | dr:D02239
 81 | dr:D02241
 82 | dr:D02293
 83 | dr:D02307
 84 | dr:D02332
 85 | dr:D02345
 86 | dr:D02429
 87 | dr:D02463
 88 | dr:D02556
 89 | dr:D02575
 90 | dr:D02605
 91 | dr:D02635
 92 | dr:D02651
 93 | dr:D02656
 94 | dr:D02666
 95 | dr:D02675
 96 | dr:D02683
 97 | dr:D02715
 98 | dr:D02733
 99 | dr:D02775
100 | dr:D02880
101 | dr:D02889
102 | dr:D02899
103 | dr:D02903
104 | dr:D02942
105 | dr:D03005
106 | dr:D03051
107 | dr:D03064
108 | dr:D03088
109 | dr:D03134
110 | dr:D03147
111 | dr:D03214
112 | dr:D03321
113 | dr:D03499
114 | dr:D03517
115 | dr:D03547
116 | dr:D03558
117 | dr:D03607
118 | dr:D03630
119 | dr:D03652
120 | dr:D03654
121 | dr:D03681
122 | dr:D03704
123 | dr:D03752
124 | dr:D03772
125 | dr:D03775
126 | dr:D03788
127 | dr:D03792
128 | dr:D03797
129 | dr:D03807
130 | dr:D03809
131 | dr:D03810
132 | dr:D03848
133 | dr:D03850
134 | dr:D03892
135 | dr:D03895
136 | dr:D03910
137 | dr:D03971
138 | dr:D04009
139 | dr:D04018
140 | dr:D04023
141 | dr:D04033
142 | dr:D04040
143 | dr:D04066
144 | dr:D04112
145 | dr:D04179
146 | dr:D04190
147 | dr:D04222
148 | dr:D04225
149 | dr:D04226
150 | dr:D04452
151 | dr:D04512
152 | dr:D04517
153 | dr:D04586
154 | dr:D04611
155 | dr:D04627
156 | dr:D04646
157 | dr:D04696
158 | dr:D04735
159 | dr:D04758
160 | dr:D04774
161 | dr:D04789
162 | dr:D04871
163 | dr:D04949
164 | dr:D05077
165 | dr:D05116
166 | dr:D05120
167 | dr:D05131
168 | dr:D05149
169 | dr:D05194
170 | dr:D05290
171 | dr:D05308
172 | dr:D05339
173 | dr:D05375
174 | dr:D05426
175 | dr:D05451
176 | dr:D05477
177 | dr:D05498
178 | dr:D05507
179 | dr:D05613
180 | dr:D05649
181 | dr:D05700
182 | dr:D05709
183 | dr:D05718
184 | dr:D05719
185 | dr:D05802
186 | dr:D05806
187 | dr:D05834
188 | dr:D05895
189 | dr:D05897
190 | dr:D05911
191 | dr:D05915
192 | dr:D05939
193 | dr:D05981
194 | dr:D06029
195 | dr:D06102
196 | dr:D06134
197 | dr:D06138
198 | dr:D06154
199 | dr:D06160
200 | dr:D06187
201 | dr:D06331
202 | dr:D06334
203 | dr:D06342
204 | dr:D06395
205 | dr:D06579
206 | dr:D06618
207 | dr:D06877
208 | dr:D06883
209 | dr:D07072
210 | dr:D07084
211 | dr:D07086
212 | dr:D07093
213 | dr:D07096
214 | dr:D07099
215 | dr:D07143
216 | dr:D07236
217 | dr:D07288
218 | dr:D07319
219 | dr:D07355
220 | dr:D07438
221 | dr:D07453
222 | dr:D07472
223 | dr:D07499
224 | dr:D07503
225 | dr:D07545
226 | dr:D07547
227 | dr:D07609
228 | dr:D07636
229 | dr:D07650
230 | dr:D07675
231 | dr:D07702
232 | dr:D07730
233 | dr:D07733
234 | dr:D07736
235 | dr:D07741
236 | dr:D07761
237 | dr:D07796
238 | dr:D07863
239 | dr:D07865
240 | dr:D07879
241 | dr:D07915
242 | dr:D07927
243 | dr:D07932
244 | dr:D07977
245 | dr:D07980
246 | dr:D08001
247 | dr:D08019
248 | dr:D08039
249 | dr:D08117
250 | dr:D08146
251 | dr:D08160
252 | dr:D08168
253 | dr:D08212
254 | dr:D08227
255 | dr:D08236
256 | dr:D08277
257 | dr:D08296
258 | dr:D08313
259 | dr:D08316
260 | dr:D08371
261 | dr:D08397
262 | dr:D08410
263 | dr:D08457
264 | dr:D08498
265 | dr:D08508
266 | dr:D08551
267 | dr:D08574
268 | dr:D08595
269 | dr:D08616
270 | dr:D08618
271 | dr:D08656
272 | dr:D08660
273 | dr:D08662
274 | dr:D08688
275 | dr:D08757
276 | dr:D08845
277 | dr:D08851
278 | dr:D08872
279 | dr:D08890
280 | dr:D08894
281 | dr:D08940
282 | dr:D08949
283 | dr:D08970
284 | dr:D09003
285 | dr:D09026
286 | dr:D09028
287 | dr:D09341
288 | dr:D09344
289 | dr:D09360
290 | dr:D09369
291 | dr:D09389
292 | dr:D09393
293 | dr:D09402
294 | dr:D09567
295 | dr:D09572
296 | dr:D09645
297 | dr:D09671
298 | dr:D09702
299 | dr:D09730
300 | dr:D09732
301 | dr:D09772
302 | dr:D09787
303 | dr:D09816
304 | dr:D09861
305 | dr:D09919
306 | dr:D09922
307 | dr:D09925
308 | dr:D09931
309 | dr:D09962
310 | dr:D09976
311 | dr:D09992
312 | dr:D09997
313 | dr:D10008
314 | dr:D10014
315 | dr:D10019
316 | dr:D10020
317 | dr:D10073
318 | dr:D10084
319 | dr:D10157
320 | dr:D10180
321 | dr:D10198
322 | dr:D10309
323 | dr:D10313
324 | dr:D10322
325 | dr:D10330
326 | dr:D10345
327 | dr:D10370
328 | dr:D10381
329 | dr:D10389
330 | dr:D10397
331 | dr:D10426
332 | dr:D10549
333 | dr:D10594
334 | dr:D10624
335 | dr:D10631
336 | dr:D10648
337 | dr:D10658
338 | dr:D10661
339 | dr:D10669
340 | dr:D10674
341 | dr:D10678
342 | dr:D10692
343 | dr:D10703
344 | dr:D10725
345 | dr:D10730
346 | dr:D10747
347 | dr:D10750
348 | dr:D10833
349 | dr:D10871
350 | dr:D10883
351 | dr:D10924
352 | dr:D10959
353 | dr:D11038
354 | dr:D11049
355 | dr:D11051
356 | dr:D11137
357 | dr:D11156
358 | dr:D11245
359 | dr:D11259
360 | dr:D11300
361 | dr:D11316
362 | dr:D11326
363 | dr:D11371
364 | dr:D11409
365 | dr:D11437
366 | dr:D11446
367 | dr:D11465
368 | dr:D11499
369 | dr:D11509
370 | dr:D11585
371 | dr:D11602
372 | dr:D11622
373 | dr:D11658
374 | dr:D11667
375 | dr:D11691
376 | dr:D11754
377 | dr:D11781
378 | dr:D11788
379 | dr:D11791
380 | dr:D11817
381 | dr:D11824
382 | dr:D11842
383 | dr:D11864
384 | dr:D11889
385 | dr:D11915
386 | dr:D12053
387 | dr:D12120
388 | dr:D12160
389 | dr:D12238
390 | dr:D12251
391 | dr:D12273
392 | dr:D12369
393 | dr:D12384
394 | dr:D12390
395 | dr:D12391
396 | dr:D12423
397 | dr:D12467
398 | dr:D12559
399 | dr:D12604
400 | dr:D12646


--------------------------------------------------------------------------------
/dev/test_pathway_organizer.py:
--------------------------------------------------------------------------------
  1 | # noinspection PyPackageRequirements
  2 | import pytest as pt
  3 | import json
  4 | import typing as t
  5 | import kegg_pull.pathway_organizer as po
  6 | import dev.utils as u
  7 | 
  8 | 
  9 | def test_load_from_kegg_warning(mocker, caplog):
 10 |     get_mock: mocker.MagicMock = _get_get_mock(mocker=mocker)
 11 |     parse_hierarchy_spy: mocker.MagicMock = mocker.spy(po.PathwayOrganizer, '_parse_hierarchy')
 12 |     pathway_org: po.PathwayOrganizer = po.PathwayOrganizer.load_from_kegg(top_level_nodes={'invalid-top-level-node'})
 13 |     get_mock.assert_called_once_with(entry_ids=['br:br08901'], entry_field='json')
 14 |     u.assert_warning(
 15 |         message='Top level node name "invalid-top-level-node" is not recognized and will be ignored. Valid values are: "Cellular '
 16 |                 'Processes, Drug Development, Environmental Information Processing, Genetic Information Processing, '
 17 |                 'Human Diseases, Metabolism, Organismal Systems"', caplog=caplog)
 18 |     parse_hierarchy_spy.assert_called_once_with(pathway_org, level=1, raw_hierarchy_nodes=[], parent_name=None)
 19 |     assert pathway_org.hierarchy_nodes == dict()
 20 | 
 21 | 
 22 | def _get_get_mock(mocker):
 23 |     def get_mock(**_) -> mocker.MagicMock:
 24 |         with open('dev/test_data/pathway-organizer/pathway-hierarchy.json', 'r') as file_:
 25 |             text_body_mock: str = file_.read()
 26 |         kegg_response_mock = mocker.MagicMock(text_body=text_body_mock)
 27 |         return kegg_response_mock
 28 |     return mocker.patch('kegg_pull.pathway_organizer.r.KEGGrest.get', wraps=get_mock)
 29 | 
 30 | 
 31 | test_load_from_kegg_data = [
 32 |     (None, None, 'all-nodes.json'),
 33 |     ({'Metabolism', 'Genetic Information Processing'}, None, 'top-level-nodes.json'),
 34 |     (None, {'Genetic Information Processing', 'Global and overview maps', '00010  Glycolysis / Gluconeogenesis'}, 'filter-nodes.json')]
 35 | 
 36 | 
 37 | @pt.mark.parametrize('top_level_nodes,filter_nodes,hierarchy_nodes_file', test_load_from_kegg_data)
 38 | def test_load_from_kegg(mocker, top_level_nodes: set, filter_nodes: set, hierarchy_nodes_file: str):
 39 |     get_mock: mocker.MagicMock = _get_get_mock(mocker=mocker)
 40 |     pathway_organizer = po.PathwayOrganizer.load_from_kegg(top_level_nodes=top_level_nodes, filter_nodes=filter_nodes)
 41 |     get_mock.assert_called_once_with(entry_ids=['br:br08901'], entry_field='json')
 42 |     if top_level_nodes is not None:
 43 |         actual_top_level_nodes = {node_key for node_key, node_val in pathway_organizer.hierarchy_nodes.items() if node_val['level'] == 1}
 44 |         assert actual_top_level_nodes == top_level_nodes
 45 |     if filter_nodes is not None:
 46 |         for filter_node in filter_nodes:
 47 |             assert filter_node not in pathway_organizer.hierarchy_nodes.keys()
 48 |     expected_hierarchy_nodes: dict = _get_expected_hierarchy_nodes(hierarchy_nodes_file=hierarchy_nodes_file)
 49 |     assert pathway_organizer.hierarchy_nodes == expected_hierarchy_nodes
 50 | 
 51 | 
 52 | def _get_expected_hierarchy_nodes(hierarchy_nodes_file: str) -> dict:
 53 |     with open(f'dev/test_data/pathway-organizer/{hierarchy_nodes_file}') as file:
 54 |         expected_hierarchy_nodes: dict = json.load(file)
 55 |     return expected_hierarchy_nodes
 56 | 
 57 | 
 58 | def test_save_to_json(mocker, json_file_path: str):
 59 |     u.mock_non_instantiable(mocker=mocker)
 60 |     pathway_organizer = po.PathwayOrganizer()
 61 |     pathway_organizer.hierarchy_nodes = _get_expected_hierarchy_nodes(hierarchy_nodes_file='top-level-nodes.json')
 62 |     pathway_organizer.save_to_json(file_path=json_file_path)
 63 |     u.test_save_to_json(json_file_path=json_file_path, expected_saved_json_object=pathway_organizer.hierarchy_nodes)
 64 | 
 65 | 
 66 | def test_load_from_json(json_file_path: str):
 67 |     expected_hierarchy_nodes: dict = _get_expected_hierarchy_nodes(hierarchy_nodes_file='top-level-nodes.json')
 68 |     u.test_load_from_json(
 69 |         json_file_path=json_file_path, saved_object=expected_hierarchy_nodes, method=po.PathwayOrganizer.load_from_json,
 70 |         expected_loaded_object=expected_hierarchy_nodes, loaded_object_attribute='hierarchy_nodes')
 71 | 
 72 | 
 73 | test_invalid_load_from_json_data = [
 74 |     1, 'a', [], [1, 2], ['a', 'b'], [[], []], [[1], [2]], [['a'], ['b']], [{}, {}], [{'a': {}, 'b': []}], {}, {'a': []}, {'a': {}},
 75 |     {'a': {'b': 1}}, {'a': {'name': 'b'}}, {'a': {'level': 1, 'b': 'c'}},
 76 |     {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': None, 'entry_id': 'x'},
 77 |      '': {'name': 'b', 'level': 1, 'parent': 'c', 'children': ['d'], 'entry_id': None}},
 78 |     {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': None, 'entry_id': None, 'x': 'y'}},
 79 |     {'a': {'name': 2, 'level': 1, 'parent': 'c', 'children': None, 'entry_id': None}},
 80 |     {'a': {'name': '', 'level': 1, 'parent': 'c', 'children': None, 'entry_id': None}},
 81 |     {'a': {'name': None, 'level': 1, 'parent': 'c', 'children': None, 'entry_id': None}},
 82 |     {'a': {'name': 'b', 'level': '1', 'parent': 'c', 'children': None, 'entry_id': None}},
 83 |     {'a': {'name': 'b', 'level': None, 'parent': 'c', 'children': None, 'entry_id': None}},
 84 |     {'a': {'name': 'b', 'level': 0, 'parent': 'c', 'children': None, 'entry_id': None}},
 85 |     {'a': {'name': 'b', 'level': 1, 'parent': '', 'children': None, 'entry_id': None}},
 86 |     {'a': {'name': 'b', 'level': 1, 'parent': 2, 'children': None, 'entry_id': None}},
 87 |     {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': [], 'entry_id': None}},
 88 |     {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': [1], 'entry_id': None}},
 89 |     {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': [''], 'entry_id': None}},
 90 |     {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': ['a'], 'entry_id': 1}},
 91 |     {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': ['a'], 'entry_id': ''}}]
 92 | 
 93 | 
 94 | @pt.mark.parametrize('invalid_json_object', test_invalid_load_from_json_data)
 95 | def test_invalid_load_from_json(caplog, json_file_path: str, invalid_json_object: list | dict | int | float | str):
 96 |     expected_error_message = f'Failed to load the hierarchy nodes. The pathway organizer JSON file at {json_file_path} is ' \
 97 |                              f'corrupted and will need to be re-created.'
 98 |     u.test_invalid_load_from_json(
 99 |         json_file_path=json_file_path, invalid_json_object=invalid_json_object, method=po.PathwayOrganizer.load_from_json,
100 |         expected_error_message=expected_error_message, caplog=caplog)
101 | 


--------------------------------------------------------------------------------
/dev/utils.py:
--------------------------------------------------------------------------------
  1 | # noinspection PyPackageRequirements
  2 | import pytest as pt
  3 | import zipfile as zf
  4 | import typing as t
  5 | import json
  6 | import jsonschema as js
  7 | import os
  8 | 
  9 | 
 10 | def assert_exception(expected_message: str, exception: pt.ExceptionInfo):
 11 |     actual_message = str(exception.value)
 12 |     assert actual_message == expected_message
 13 | 
 14 | 
 15 | def assert_warning(message: str, caplog):
 16 |     [record] = caplog.records
 17 |     assert record.levelname == 'WARNING'
 18 |     assert record.message == message
 19 | 
 20 | 
 21 | def assert_error(message: str, caplog):
 22 |     [record] = caplog.records
 23 |     assert record.levelname == 'ERROR'
 24 |     assert record.message == message
 25 | 
 26 | 
 27 | def assert_help(mocker, module, subcommand: str):
 28 |     for help_arg in ['-h', '--help']:
 29 |         mocker.patch('sys.argv', ['kegg_pull', subcommand, help_arg])
 30 |         print_mock: mocker.MagicMock = mocker.patch('builtins.print')
 31 |         with pt.raises(SystemExit):
 32 |             module.main()
 33 |         print_mock.assert_any_call(module.__doc__.strip('\n'))
 34 |     
 35 | 
 36 | def assert_call_args(function_mock, expected_call_args_list: list, do_kwargs: bool):
 37 |     actual_call_args_list = function_mock.call_args_list
 38 |     for actual_call_args, expected_call_args in zip(actual_call_args_list, expected_call_args_list):
 39 |         if do_kwargs:
 40 |             assert actual_call_args.kwargs == expected_call_args
 41 |         else:
 42 |             assert actual_call_args.args == expected_call_args
 43 | 
 44 | 
 45 | def _test_main(mocker, argv_mock: list, stdin_mock: str, method: str, method_return_value: object, method_kwargs: dict, module):
 46 |     argv_mock: list = ['kegg_pull'] + argv_mock
 47 |     mocker.patch('sys.argv', argv_mock)
 48 |     stdin_mock: mocker.MagicMock = mocker.patch('kegg_pull._utils.sys.stdin.read', return_value=stdin_mock) if stdin_mock else None
 49 |     method_mock: mocker.MagicMock = mocker.patch(f'kegg_pull.{method}', return_value=method_return_value)
 50 |     module.main()
 51 |     method_mock.assert_called_once_with(**method_kwargs)
 52 |     if stdin_mock:
 53 |         stdin_mock.assert_called_once_with()
 54 | 
 55 | 
 56 | def test_print(
 57 |         mocker, argv_mock: list, stdin_mock: str, method: str, method_return_value: object, method_kwargs: dict, module,
 58 |         expected_output: str | bytes, is_binary: bool = False, caplog=None):
 59 |     print_mock: mocker.MagicMock = mocker.patch('builtins.print')
 60 |     _test_main(
 61 |         mocker=mocker, argv_mock=argv_mock, stdin_mock=stdin_mock, method=method, method_return_value=method_return_value,
 62 |         method_kwargs=method_kwargs, module=module)
 63 |     if is_binary:
 64 |         assert_warning(message='Printing binary output...', caplog=caplog)
 65 |     print_mock.assert_called_once_with(expected_output)
 66 | 
 67 | 
 68 | def test_file(
 69 |         mocker, argv_mock: list, output_file: str, stdin_mock: str, method: str, method_return_value: object, method_kwargs: dict, module,
 70 |         expected_output: str | bytes, is_binary: bool = False):
 71 |     argv_mock: list = argv_mock + [f'--output={output_file}']
 72 |     _test_main(
 73 |         mocker=mocker, argv_mock=argv_mock, stdin_mock=stdin_mock, method=method, method_return_value=method_return_value,
 74 |         method_kwargs=method_kwargs, module=module)
 75 |     read_type: str = 'rb' if is_binary else 'r'
 76 |     with open(output_file, read_type) as file:
 77 |         actual_output: str | bytes = file.read()
 78 |     assert actual_output == expected_output
 79 | 
 80 | 
 81 | def test_zip_archive(
 82 |         mocker, argv_mock: list, zip_archive_data: tuple, stdin_mock: str, method: str, method_return_value: object, method_kwargs: dict,
 83 |         module, expected_output: str | bytes, is_binary: bool = False):
 84 |     zip_archive_path, zip_file_name = zip_archive_data
 85 |     argv_mock: list = argv_mock + [f'--output={zip_archive_path}:{zip_file_name}']
 86 |     _test_main(
 87 |         mocker=mocker, argv_mock=argv_mock, stdin_mock=stdin_mock, method=method, method_return_value=method_return_value,
 88 |         method_kwargs=method_kwargs, module=module)
 89 |     with zf.ZipFile(zip_archive_path, 'r') as zip_file:
 90 |         actual_output: bytes = zip_file.read(zip_file_name)
 91 |         if not is_binary:
 92 |             actual_output: str = actual_output.decode()
 93 |     assert actual_output == expected_output
 94 | 
 95 | 
 96 | def test_save_to_json(json_file_path: str, expected_saved_json_object: dict):
 97 |     if '.zip:' in json_file_path:
 98 |         with zf.ZipFile('archive.zip', 'r') as zip_file:
 99 |             json_file_name: str = 'dir/file.json' if 'dir/' in json_file_path else 'file.json'
100 |             actual_saved_mapping: dict = json.loads(zip_file.read(name=json_file_name))
101 |     else:
102 |         with open(json_file_path, 'r') as file:
103 |             actual_saved_mapping: dict = json.load(file)
104 |     assert actual_saved_mapping == expected_saved_json_object
105 | 
106 | 
107 | def test_load_from_json(
108 |         json_file_path: str, saved_object: dict, method: t.Callable, expected_loaded_object: dict, loaded_object_attribute: str = None):
109 |     _write_test_json_object(json_file_path=json_file_path, test_object=saved_object)
110 |     actual_loaded_object = method(file_path=json_file_path)
111 |     if loaded_object_attribute is not None:
112 |         actual_loaded_object: dict = actual_loaded_object.__getattribute__(loaded_object_attribute)
113 |     assert actual_loaded_object == expected_loaded_object
114 | 
115 | 
116 | def _write_test_json_object(json_file_path: str, test_object: list | dict | int | float | str) -> None:
117 |     if '.zip:' in json_file_path:
118 |         with zf.ZipFile('archive.zip', 'w') as zip_file:
119 |             json_file_name: str = 'dir/file.json' if 'dir/' in json_file_path else 'file.json'
120 |             zip_file.writestr(json_file_name, json.dumps(test_object, indent=2))
121 |     else:
122 |         if json_file_path.startswith('dir'):
123 |             directory, _ = os.path.split(json_file_path)
124 |             os.makedirs(directory)
125 |         with open(json_file_path, 'w') as file:
126 |             file.write(json.dumps(test_object, indent=2))
127 | 
128 | 
129 | def test_invalid_load_from_json(
130 |         json_file_path: str, invalid_json_object: dict, method: t.Callable, expected_error_message: str, caplog):
131 |     _write_test_json_object(json_file_path=json_file_path, test_object=invalid_json_object)
132 |     with pt.raises(js.exceptions.ValidationError):
133 |         method(file_path=json_file_path)
134 |     assert_error(message=expected_error_message, caplog=caplog)
135 | 
136 | 
137 | def mock_non_instantiable(mocker):
138 |     mocker.patch('kegg_pull._utils.NonInstantiable.__init__')
139 | 


--------------------------------------------------------------------------------
/src/kegg_pull/pull_cli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 |     kegg_pull pull -h | --help
 4 |     kegg_pull pull database <database> [--force-single-entry] [--multi-process] [--n-workers=<n-workers>] [--output=<output>] [--print] [--sep=<print-separator>] [--entry-field=<entry-field>] [--n-tries=<n-tries>] [--time-out=<time-out>] [--sleep-time=<sleep-time>] [--ut=<unsuccessful-threshold>]
 5 |     kegg_pull pull entry-ids <entry-ids> [--force-single-entry] [--multi-process] [--n-workers=<n-workers>] [--output=<output>] [--print] [--sep=<print-separator>] [--entry-field=<entry-field>] [--n-tries=<n-tries>] [--time-out=<time-out>] [--sleep-time=<sleep-time>] [--ut=<unsuccessful-threshold>]
 6 | 
 7 | Options:
 8 |     -h --help                       Show this help message.
 9 |     database                        Pulls all the entries in a KEGG database.
10 |     <database>                      The KEGG database from which to pull entries.
11 |     --force-single-entry            Forces pulling only one entry at a time for every request to the KEGG web API. This flag is automatically set if <database> is "brite".
12 |     --multi-process                 If set, the entries are pulled across multiple processes to increase speed. Otherwise, the entries are pulled sequentially in a single process.
13 |     --n-workers=<n-workers>         The number of sub-processes to create when pulling. Defaults to the number of cores available. Ignored if --multi-process is not set.
14 |     --output=<output>               The directory where the pulled KEGG entries will be stored. Defaults to the current working directory. If ends in ".zip", entries are saved to a ZIP archive instead of a directory. Ignored if --print is set.
15 |     --print                         If set, prints the entries to the screen rather than saving them to the file system. Separates entries by the --sep option if set.
16 |     --sep=<print-separator>         The string that separates the entries which are printed to the screen when the --print option is set. Ignored if the --print option is not set. Defaults to printing the entry id, followed by the entry, followed by a newline.
17 |     --entry-field=<entry-field>     Optional field to extract from the entries pulled rather than the standard flat file format (or "htext" in the case of brite entries).
18 |     --n-tries=<n-tries>             The number of times to attempt a KEGG request before marking it as timed out or failed. Defaults to 3.
19 |     --time-out=<time-out>           The number of seconds to wait for a KEGG request before marking it as timed out. Defaults to 60.
20 |     --sleep-time=<sleep-time>       The amount of time to wait after a KEGG request times out (or potentially blacklists with a 403 error code) before attempting it again. Defaults to 5.0.
21 |     --ut=<unsuccessful-threshold>   If set, the ratio of unsuccessful entry IDs (failed or timed out) to total entry IDs at which kegg_pull quits. Valid values are between 0.0 and 1.0 non-inclusive.
22 |     entry-ids                       Pulls entries specified by a comma separated list. Or from standard input: one entry ID per line; Press CTRL+D to finalize input or pipe (e.g. cat file.txt | kegg_pull pull entry-ids - ...).
23 |     <entry-ids>                     Comma separated list of entry IDs to pull (e.g. id1,id2,id3 etc.). Or if equal to "-", entry IDs are read from standard input. Will likely need to set --force-single-entry if any of the entries are from the brite database.
24 | """
25 | import docopt as d
26 | import json
27 | import time
28 | import logging as log
29 | from . import pull as p
30 | from . import rest as r
31 | from . import entry_ids as ei
32 | from . import kegg_url as ku
33 | from . import _utils as u
34 | 
35 | 
36 | def main():
37 |     args = d.docopt(__doc__)
38 |     n_tries = int(args['--n-tries']) if args['--n-tries'] is not None else None
39 |     time_out = int(args['--time-out']) if args['--time-out'] is not None else None
40 |     sleep_time = float(args['--sleep-time']) if args['--sleep-time'] is not None else None
41 |     kegg_rest = r.KEGGrest(n_tries=n_tries, time_out=time_out, sleep_time=sleep_time)
42 |     output = args['--output'] if args['--output'] is not None else '.'
43 |     print_to_screen: bool = args['--print']
44 |     entry_field: str = args['--entry-field']
45 |     force_single_entry: bool = args['--force-single-entry']
46 |     if args['database']:
47 |         database: str = args['<database>']
48 |         if database == 'brite':
49 |             force_single_entry = True
50 |         entry_ids = ei.from_database(database=database)
51 |     else:
52 |         entry_ids = u.parse_input_sequence(input_source=args['<entry-ids>'])
53 |     unsuccessful_threshold = float(args['--ut']) if args['--ut'] is not None else None
54 |     if args['--multi-process']:
55 |         n_workers = int(args['--n-workers']) if args['--n-workers'] is not None else None
56 |         multiple_pull = p.MultiProcessMultiplePull(kegg_rest=kegg_rest, unsuccessful_threshold=unsuccessful_threshold, n_workers=n_workers)
57 |     else:
58 |         multiple_pull = p.SingleProcessMultiplePull(kegg_rest=kegg_rest, unsuccessful_threshold=unsuccessful_threshold)
59 |     time1 = _testable_time()
60 |     if print_to_screen:
61 |         pull_result, kegg_entry_mapping = multiple_pull.pull_dict(
62 |             entry_ids=entry_ids, entry_field=entry_field, force_single_entry=force_single_entry)
63 |         if ku.GetKEGGurl.is_binary(entry_field=entry_field):
64 |             log.warning('Printing binary output...')
65 |         print_separator: str = args['--sep']
66 |         if print_separator:
67 |             print(f'\n{print_separator}\n'.join(kegg_entry_mapping.values()))
68 |         else:
69 |             for entry_id, entry in kegg_entry_mapping.items():
70 |                 print(entry_id)
71 |                 print(f'{entry}\n')
72 |     else:
73 |         pull_result = multiple_pull.pull(entry_ids=entry_ids, output=output, entry_field=entry_field, force_single_entry=force_single_entry)
74 |     time2 = _testable_time()
75 |     n_total_entry_ids = len(pull_result.successful_entry_ids) + len(pull_result.failed_entry_ids)
76 |     n_total_entry_ids += len(pull_result.timed_out_entry_ids)
77 |     percent_success = len(pull_result.successful_entry_ids) / n_total_entry_ids * 100
78 |     pull_results = {
79 |         'percent-success': float(f'{percent_success:.2f}'),
80 |         'pull-minutes': float(f'{(time2 - time1) / 60:.2f}'),
81 |         'num-successful': len(pull_result.successful_entry_ids),
82 |         'num-failed': len(pull_result.failed_entry_ids),
83 |         'num-timed-out': len(pull_result.timed_out_entry_ids),
84 |         'num-total': n_total_entry_ids,
85 |         'successful-entry-ids': pull_result.successful_entry_ids,
86 |         'failed-entry-ids': pull_result.failed_entry_ids,
87 |         'timed-out-entry-ids': pull_result.timed_out_entry_ids}
88 |     with open('pull-results.json', 'w') as file:
89 |         json.dump(pull_results, file, indent=0)
90 | 
91 | 
92 | def _testable_time() -> float:
93 |     """ The time.time() function causes issues when mocked in tests, so we create this wrapper that can be safely mocked
94 | 
95 |     :return: The result of time.time()
96 |     """
97 |     return time.time()  # pragma: no cover
98 | 


--------------------------------------------------------------------------------
/dev/test_pull_cli.py:
--------------------------------------------------------------------------------
  1 | # noinspection PyPackageRequirements
  2 | import pytest as pt
  3 | import os
  4 | import json
  5 | import kegg_pull.pull_cli as p_cli
  6 | import dev.utils as u
  7 | 
  8 | 
  9 | def test_help(mocker):
 10 |     u.assert_help(mocker=mocker, module=p_cli, subcommand='pull')
 11 | 
 12 | 
 13 | @pt.fixture(name='_')
 14 | def teardown():
 15 |     yield
 16 |     os.remove('pull-results.json')
 17 | 
 18 | 
 19 | test_data = [
 20 |     (['database', 'db-mock', '--print'], {'n_tries': None, 'time_out': None, 'sleep_time': None}, 'ei.from_database',
 21 |      {'database': 'db-mock'}, 'SingleProcessMultiplePull', {'unsuccessful_threshold': None},
 22 |      {'force_single_entry': False, 'entry_field': None}, True, None),
 23 |     (['database', 'db-mock', '--print', '--sep=#####', '--force-single-entry', '--ut=0.1', '--multi-process', '--entry-field=image'],
 24 |      {'n_tries': None, 'time_out': None, 'sleep_time': None}, 'ei.from_database', {'database': 'db-mock'}, 'MultiProcessMultiplePull',
 25 |      {'unsuccessful_threshold': 0.1, 'n_workers': None}, {'force_single_entry': True, 'entry_field': 'image'}, True, '#####'),
 26 |     (['entry-ids', '-'], {'n_tries': None, 'time_out': None, 'sleep_time': None}, 'u.parse_input_sequence', {'input_source': '-'},
 27 |      'SingleProcessMultiplePull', {'unsuccessful_threshold': None}, {'output': '.', 'force_single_entry': False, 'entry_field': None},
 28 |      False, None),
 29 |     (['entry-ids', '1,2', '--output=out-dir/', '--sleep-time=10.1'], {'n_tries': None, 'time_out': None, 'sleep_time': 10.1},
 30 |      'u.parse_input_sequence', {'input_source': '1,2'}, 'SingleProcessMultiplePull', {'unsuccessful_threshold': None},
 31 |      {'output': 'out-dir/', 'force_single_entry': False, 'entry_field': None}, False, None),
 32 |     (['entry-ids', '1,2', '--n-tries=4', '--time-out=50', '--entry-field=mol'], {'n_tries': 4, 'time_out': 50, 'sleep_time': None},
 33 |      'u.parse_input_sequence', {'input_source': '1,2'}, 'SingleProcessMultiplePull', {'unsuccessful_threshold': None},
 34 |      {'output': '.', 'force_single_entry': False, 'entry_field': 'mol'}, False, None),
 35 |     (['entry-ids', '-', '--entry-field=mol'], {'n_tries': None, 'time_out': None, 'sleep_time': None},
 36 |      'u.parse_input_sequence', {'input_source': '-'}, 'SingleProcessMultiplePull', {'unsuccessful_threshold': None},
 37 |      {'output': '.', 'force_single_entry': False, 'entry_field': 'mol'}, False, None),
 38 |     (['database', 'pathway', '--output=out-dir', '--multi-process', '--sleep-time=20', '--force-single-entry'],
 39 |      {'n_tries': None, 'time_out': None, 'sleep_time': 20}, 'ei.from_database', {'database': 'pathway'}, 'MultiProcessMultiplePull',
 40 |      {'n_workers': None, 'unsuccessful_threshold': None}, {'output': 'out-dir', 'force_single_entry': True, 'entry_field': None}, False,
 41 |      None),
 42 |     (['database', 'brite', '--multi-process', '--n-tries=5', '--time-out=35', '--n-workers=6'],
 43 |      {'n_tries': 5, 'time_out': 35, 'sleep_time': None}, 'ei.from_database', {'database': 'brite'}, 'MultiProcessMultiplePull',
 44 |      {'n_workers': 6, 'unsuccessful_threshold': None}, {'output': '.', 'force_single_entry': True, 'entry_field': None}, False, None),
 45 |     (['entry-ids', '-', '--ut=0.4'], {'n_tries': None, 'time_out': None, 'sleep_time': None},
 46 |      'u.parse_input_sequence', {'input_source': '-'}, 'SingleProcessMultiplePull', {'unsuccessful_threshold': 0.4},
 47 |      {'output': '.', 'force_single_entry': False, 'entry_field': None}, False, None)]
 48 | 
 49 | 
 50 | @pt.mark.parametrize(
 51 |     'args,kegg_rest_kwargs,entry_ids_method,entry_ids_kwargs,multiple_pull_class,multiple_pull_kwargs,pull_kwargs,print_to_screen,separator',
 52 |     test_data)
 53 | def test_main(
 54 |         mocker, _, args: list, kegg_rest_kwargs: dict, entry_ids_method: str, entry_ids_kwargs: dict, multiple_pull_class: str,
 55 |         multiple_pull_kwargs: dict, pull_kwargs: dict, print_to_screen: bool, separator: str | None, caplog):
 56 |     args = ['kegg_pull', 'pull'] + args
 57 |     mocker.patch('sys.argv', args)
 58 |     kegg_rest_mock = mocker.MagicMock()
 59 |     KEGGrestMock = mocker.patch('kegg_pull.pull.r.KEGGrest', return_value=kegg_rest_mock)
 60 |     pull_result_mock = mocker.MagicMock(
 61 |         successful_entry_ids=('a', 'b', 'c', 'x'), failed_entry_ids=('y', 'z'), timed_out_entry_ids=())
 62 |     pull_dict_return_value = pull_result_mock, {'a': 'x', 'b': 'y', 'c': 'z', 'x': 'abc123'}
 63 |     entry_ids_mock = ['1', '2']
 64 |     entry_ids_method_mock: mocker.MagicMock = mocker.patch(
 65 |         f'kegg_pull.pull_cli.{entry_ids_method}', return_value=entry_ids_mock)
 66 |     multiple_pull_mock = mocker.MagicMock(
 67 |         pull=mocker.MagicMock(return_value=pull_result_mock),
 68 |         pull_dict=mocker.MagicMock(return_value=pull_dict_return_value))
 69 |     MultiplePullMock: mocker.MagicMock = mocker.patch(
 70 |         f'kegg_pull.pull_cli.p.{multiple_pull_class}', return_value=multiple_pull_mock)
 71 |     time_mock: mocker.MagicMock = mocker.patch('kegg_pull.pull_cli._testable_time', side_effect=[26, 94])
 72 |     print_mock: mocker.MagicMock = mocker.patch('builtins.print')
 73 |     p_cli.main()
 74 |     KEGGrestMock.assert_called_once_with(**kegg_rest_kwargs)
 75 |     assert time_mock.call_count == 2
 76 |     MultiplePullMock.assert_called_once_with(kegg_rest=kegg_rest_mock, **multiple_pull_kwargs)
 77 |     if print_to_screen:
 78 |         multiple_pull_mock.pull_dict.assert_called_once_with(entry_ids=entry_ids_mock, **pull_kwargs)
 79 |         if pull_kwargs['entry_field'] is not None:
 80 |             u.assert_warning(message='Printing binary output...', caplog=caplog)
 81 |         if separator is not None:
 82 |             print_mock.assert_called_once_with(f'\n{separator}\n'.join(['x', 'y', 'z', 'abc123']))
 83 |         else:
 84 |             u.assert_call_args(
 85 |                 function_mock=print_mock, expected_call_args_list=[(arg,) for arg in ['a', 'x\n', 'b', 'y\n', 'c', 'z\n', 'x', 'abc123\n']],
 86 |                 do_kwargs=False)
 87 |     else:
 88 |         multiple_pull_mock.pull.assert_called_once_with(entry_ids=entry_ids_mock, **pull_kwargs)
 89 |     entry_ids_method_mock.assert_called_with(**entry_ids_kwargs)
 90 |     expected_pull_results = {
 91 |         'percent-success': 66.67, 'pull-minutes': 1.13, 'num-successful': 4, 'num-failed': 2, 'num-timed-out': 0, 'num-total': 6,
 92 |         'successful-entry-ids': ['a', 'b', 'c', 'x'], 'failed-entry-ids': ['y', 'z'], 'timed-out-entry-ids': []}
 93 |     with open('pull-results.json', 'r') as file:
 94 |         actual_pull_results: dict = json.load(file)
 95 |     assert actual_pull_results == expected_pull_results
 96 |     expected_pull_results_text: str = '\n'.join([
 97 |         '{',
 98 |         '"percent-success": 66.67,',
 99 |         '"pull-minutes": 1.13,',
100 |         '"num-successful": 4,',
101 |         '"num-failed": 2,',
102 |         '"num-timed-out": 0,',
103 |         '"num-total": 6,',
104 |         '"successful-entry-ids": [',
105 |         '"a",',
106 |         '"b",',
107 |         '"c",',
108 |         '"x"',
109 |         '],',
110 |         '"failed-entry-ids": [',
111 |         '"y",',
112 |         '"z"',
113 |         '],',
114 |         '"timed-out-entry-ids": []',
115 |         '}'])
116 |     with open('pull-results.json', 'r') as file:
117 |         actual_pull_results_text: str = file.read()
118 |     assert expected_pull_results_text == actual_pull_results_text
119 | 


--------------------------------------------------------------------------------
/dev/test_data/brite-entries/br_br08005.txt:
--------------------------------------------------------------------------------
  1 | +C	Peptide
  2 | !
  3 | ANeuropeptides
  4 | B  Tachykinin
  5 | C    C16094  Substance P
  6 | C    C16095  Neuropeptide K
  7 | C    C16096  Neuropeptide gamma
  8 | C    C16097  Neurokinin A
  9 | C    C16098  Neurokinin B
 10 | C    C16099  Endokinin A/B
 11 | C    C16100  Endokinin C
 12 | C    C16101  Endokinin D
 13 | B  Neurotensin
 14 | C    C01836  Neurotensin
 15 | C    C15868  Neuromedin N
 16 | B  Feeding-related peptide
 17 | C    C16025  Ghrelin
 18 | C    C15901  Galanin
 19 | C    C16046  Galanin-like peptide
 20 | C    C16102  Obestatin
 21 | C    C16027  Cocaine- and amphetamine-regulated transcript (1-39)
 22 | C    C16029  Cocaine- and amphetamine-regulated transcript (42-89)
 23 | C    C16103  Agouti related protein (87-132)
 24 | C    C16030  Melanin-concentrating hormone
 25 | C    C16031  Neuropeptide GE
 26 | C    C16104  Neuropeptide EI
 27 | C    C16105  Orexin A
 28 | C    C16106  Orexin B
 29 | C    C16115  Gastrin-releasing peptide
 30 | C    C15866  Neuromedin C
 31 | C    C15869  Neuromedin U
 32 | C    C16107  Neuromedin S
 33 | C    C15949  Neuropeptide Y
 34 | C    C16032  Neuropeptide W-30
 35 | C    C16033  Neuropeptide W-23
 36 | C    C16034  Neuropeptide B-29
 37 | C    C16035  Neuropeptide B-23
 38 | C    C15865  Neuromedin B
 39 | C    C16036  Neuropeptide S
 40 | B  Endogenous opioid peptide
 41 | C    C16037  Leumorphin
 42 | C    C01574  Dynorphin A
 43 | C    C16135  Dynorphin B
 44 | C    C16039  Neoendorphin alpha
 45 | C    C16040  Neoendorphin beta
 46 | C    C16041  Leu-enkephalin
 47 | C    C11684  Met-enkephalin
 48 | C    C16042  Met-enkephalin-Arg-Gly-Leu
 49 | C    C16043  Met-enkephalin-Arg-Phe
 50 | C    C16108  Adrenorphin
 51 | C    C15890  Endomorphin-1
 52 | C    C15891  Endomorphin-2
 53 | C    C16044  Nociceptin
 54 | C    C15871  Nocistatin
 55 | B  Other neuropeptides
 56 | C    C15863  Cerebellin
 57 | C    C16109  RFamide-related peptide 1
 58 | C    C16045  RFamide-related peptide 2
 59 | C    C16110  RFamide-related peptide 3
 60 | C    C16111  Neuropeptide AF
 61 | C    C16112  Neuropeptide FF
 62 | ACardiovascular peptides
 63 | B  Angiotensin
 64 | C    C00873  Angiotensin I
 65 | C    C02135  Angiotensin II
 66 | C    C15848  Angiotensin III
 67 | C    C15849  Angiotensin IV
 68 | C    C15851  Angiotensin (1-9)
 69 | C    C15850  Angiotensin (1-7)
 70 | C    C15852  Angiotensin (1-5)
 71 | C    C20970  Angiotensin A
 72 | C    C20971  Alamandine
 73 | B  Bradykinin
 74 | C    C16008  T-kinin
 75 | C    C01505  Kallidin
 76 | C    C00306  Bradykinin
 77 | B  Fibrinopeptide
 78 | C    C00952  Fibrinopeptide A
 79 | C    C02404  Fibrinopeptide B
 80 | B  Natriuretic peptide
 81 | C    C16000  Urodilatin
 82 | C    C16003  Atrial natriuretic peptide
 83 | C    C16004  Brain natriuretic peptide
 84 | C    C16005  C-Type natriuretic peptide
 85 | B  Guanylin
 86 | C    C16006  Uroguanylin
 87 | C    C16007  Guanylin
 88 | B  Endothelin
 89 | C    C16009  Big endothelin
 90 | C    C16010  Endothelin-1
 91 | C    C16012  Endothelin-2
 92 | C    C16013  Endothelin-3
 93 | B  Urotensin
 94 | C    C16076  Urotensin I
 95 | C    C16016  Urotensin II
 96 | B  Adrenomedullin
 97 | C    C16127  Adrenomedullin
 98 | C    C16128  Adrenomedullin-2
 99 | C    C18198  Proadrenomedullin N-terminal 20 peptide
100 | ACalcium-regulating peptides
101 | B  Thyroid peptide hormone
102 | C    C06865  Calcitonin
103 | C    C16125  Calcitonin gene-related peptide 1
104 | C    C16126  Calcitonin gene-related peptide 2
105 | B  parathyroid peptide hormone
106 | C    C16051  Parathyroid hormone
107 | B  Others
108 | C    C15876  Katacalcin
109 | C    C16129  Calcitonin receptor-stimulating peptide 1
110 | C    C16052  Parathyroid hormone-related peptide (1-36)
111 | C    C16053  Tuberoinfundibular peptide of 39 residues
112 | APituitary hormones
113 | B  Anterior pituitary hormone
114 | C    C18181  Growth hormone
115 | C    C18182  Thyroid stimulating hormone
116 | C    C18183  Luteinizing hormone
117 | C    C18184  Follicle stimulating hormone
118 | C    C18201  Prolactin
119 | B  Posterior pituitary hormone
120 | C    C00746  Oxytocin
121 | C    C13662  Arg-vasopressin
122 | C    C07105  Lys-vasopressin
123 | C    C16077  Neurophysin I
124 | C    C16078  Neurophysin II
125 | B  Proopiomelanocortin-derived peptide
126 | C    C02017  Corticotropin
127 | C    C16134  Corticotropin-like intermediary peptide
128 | C    C16017  Endorphin alpha
129 | C    C02210  Endorphin beta
130 | C    C16018  Endorphin gamma
131 | C    C16019  Lipotropin beta
132 | C    C16020  Lipotropin gamma
133 | C    C02758  Melanotropin alpha
134 | C    C16136  Melanotropin beta
135 | C    C16137  Melanotropin gamma
136 | AHypothalamic hormones
137 | B  Corticotropin-releasing hormone
138 | C    C16079  Corticotropin releasing hormone
139 | C    C16080  Urocortin I
140 | C    C16081  Urocortin II
141 | C    C16082  Urocortin III
142 | B  Gonadotropin-releasing hormone
143 | C    C07607  Gonadotropin-releasing hormone I
144 | C    C16084  Gonadotropin-releasing hormone II
145 | B  Growth hormone-releasing hormone / Somatostatin
146 | C    C16085  Growth hormone-releasing hormone
147 | C    C16021  Somatostatin-28
148 | C    C16022  Somatostatin-14
149 | C    C16023  Cortistatin-29
150 | C    C16024  Cortistatin-17
151 | B  Thyrotropin-releasing hormone
152 | C    C03958  Thyrotropin-releasing hormone
153 | B  Prolactin-releasing peptide
154 | C    C16086  Prolactin-releasing peptide-31
155 | C    C16087  Prolactin-releasing peptide-20
156 | B  Pituitary adenylate cyclase-activating peptide
157 | C    C16088  Pituitary adenylate cyclase-activating peptide-38
158 | C    C16089  Pituitary adenylate cyclase-activating peptide-27
159 | B  Metastin
160 | C    C16090  Metastin
161 | C    C16091  Kisspeptin-14
162 | C    C16092  Kisspeptin-13
163 | C    C16093  Kisspeptin-10
164 | APancreatic peptides
165 | B  Insulin / C-peptide
166 | C    C00723  Insulin
167 | C    C16120  C-peptide
168 | C    C16131  Insulin-like growth factor I
169 | C    C16132  Insulin-like growth factor II
170 | B  Relaxin
171 | C    C16121  Relaxin-1
172 | C    C16122  Relaxin-2
173 | C    C16123  Relaxin-3
174 | C    C16124  Relaxin-like factor
175 | C    C16178  Insulin-like peptide 5
176 | B  Glucagon
177 | C    C01501  Glucagon
178 | C    C16048  Glucagon-like peptide 1
179 | C    C16049  Glucagon-like peptide 2
180 | C    C16050  Glicentin
181 | C    C18197  Oxyntomodulin
182 | B  Amylin
183 | C    C16130  Amylin
184 | AGonadal peptides
185 | B  Placental peptide
186 | C    C18185  Chorionic gonadotropin
187 | C    C17813  Activin A
188 | C    C18208  Activin AB
189 | C    C18209  Activin B
190 | C    C18210  Placental lactogen
191 | ANon-endocrine glands secretion peptides
192 | B  Gut peptide
193 | C    C18186  Gastrin-14
194 | C    C16113  Gastrin-17
195 | C    C18187  Gastrin-34
196 | C    C16133  Cholecystokinin-33
197 | C    C16114  Cholecystokinin-8
198 | C    C16047  Motilin
199 | C    C15906  Gastric inhibitory polypeptide
200 | C    C16117  Pancreatic polypeptide
201 | C    C16118  Peptide YY
202 | C    C16119  Vasoactive intestinal peptide
203 | C    C13523  Secretin
204 | C    C15856  Apelin-36
205 | C    C15855  Apelin-13
206 | B  Renal peptide
207 | C    C18200  Erythropoietin
208 | B  Adipocytes secretion peptide
209 | C    C18188  Leptin
210 | C    C18189  Adiponectin
211 | C    C18190  Resistin
212 | AOthers
213 | B  Microbicidal and cytotoxic peptide
214 | C    C16054  Defensin alpha-1
215 | C    C16055  Defensin alpha-2
216 | C    C16056  Defensin alpha-3
217 | C    C16057  Defensin alpha-4
218 | C    C16058  Defensin alpha-5
219 | C    C16059  Defensin beta-1
220 | C    C16060  Defensin beta-2
221 | C    C16061  Defensin beta-3
222 | C    C16062  Defensin beta-4
223 | C    C16063  Liver-expressed antimicrobial peptide 1
224 | C    C16064  Liver-expressed antimicrobial peptide 2
225 | C    C18313  Cathelicidin LL-37
226 | C    C15922  Indolicidin
227 | C    C15921  Histatin 5
228 | !
229 | #
230 | #[ BRITE | KEGG2 | KEGG ]
231 | #Last updated: January 21, 2019
232 | 


--------------------------------------------------------------------------------
/dev/test_rest_cli.py:
--------------------------------------------------------------------------------
  1 | # noinspection PyPackageRequirements
  2 | import pytest as pt
  3 | import typing as t
  4 | import kegg_pull.rest as r
  5 | import kegg_pull.rest_cli as r_cli
  6 | import kegg_pull.kegg_url as ku
  7 | import dev.utils as u
  8 | 
  9 | 
 10 | def test_help(mocker):
 11 |     u.assert_help(mocker=mocker, module=r_cli, subcommand='rest')
 12 | 
 13 | 
 14 | test_exception_data = [
 15 |     ('The request to the KEGG web API failed with the following URL: url/mock', r.KEGGresponse.Status.FAILED),
 16 |     ('The request to the KEGG web API timed out with the following URL: url/mock', r.KEGGresponse.Status.TIMEOUT)]
 17 | 
 18 | 
 19 | @pt.mark.parametrize('expected_message,status', test_exception_data)
 20 | def test_exception(mocker, expected_message: str, status):
 21 |     mocker.patch(
 22 |         'kegg_pull.rest.KEGGrest.info', return_value=mocker.MagicMock(status=status, kegg_url=mocker.MagicMock(url='url/mock')))
 23 |     mocker.patch('sys.argv', ['kegg_pull', 'rest', 'info', 'db-name'])
 24 |     with pt.raises(RuntimeError) as error:
 25 |         r_cli.main()
 26 |     u.assert_exception(expected_message=expected_message, exception=error)
 27 | 
 28 | 
 29 | test_args = [
 30 |     ['rest', 'info', 'ligand'], ['rest', 'list', 'module'], ['rest', 'get', 'x,y,z'], ['rest', 'get', ',,,a', '--entry-field=image'],
 31 |     ['rest', 'find', 'pathway', 'a,b,c,,,'], ['rest', 'find', 'drug', '--formula=CO2'], ['rest', 'find', 'drug', '--em=20.2'],
 32 |     ['rest', 'find', 'drug', '--mw=202'], ['rest', 'find', 'drug', '--em=20.2', '--em=30.3'],
 33 |     ['rest', 'find', 'drug', '--mw=202', '--mw=303'], ['rest', 'conv', 'kegg-db', 'out-db'],
 34 |     ['rest', 'conv', 'entry-ids', 'eid1,eid2', 'genes'], ['rest', 'link', 'target-db', 'source-db'],
 35 |     ['rest', 'link', 'entry-ids', ',x,,,y', 'target-db'], ['rest', 'ddi', 'de1,de2,de3'], ['rest', 'get', '-'],
 36 |     ['rest', 'find', 'pathway', '-'], ['rest', 'conv', 'entry-ids', '-', 'genes'], ['rest', 'link', 'entry-ids', '-', 'target-db'],
 37 |     ['rest', 'ddi', '-']]
 38 | test_kwargs = [
 39 |     {'database': 'ligand'}, {'database': 'module'}, {'entry_ids': ['x', 'y', 'z'], 'entry_field': None},
 40 |     {'entry_ids': ['a'], 'entry_field': 'image'}, {'database': 'pathway', 'keywords': ['a', 'b', 'c']},
 41 |     {'database': 'drug', 'formula': 'CO2', 'exact_mass': None, 'molecular_weight': None},
 42 |     {'database': 'drug', 'formula': None, 'exact_mass': 20.2, 'molecular_weight': None},
 43 |     {'database': 'drug', 'formula': None, 'exact_mass': None, 'molecular_weight': 202},
 44 |     {'database': 'drug', 'formula': None, 'exact_mass': (20.2, 30.3), 'molecular_weight': None},
 45 |     {'database': 'drug', 'formula': None, 'exact_mass': None, 'molecular_weight': (202, 303)},
 46 |     {'kegg_database': 'kegg-db', 'outside_database': 'out-db'},
 47 |     {'target_database': 'genes', 'entry_ids': ['eid1', 'eid2']},
 48 |     {'target_database': 'target-db', 'source_database': 'source-db'},
 49 |     {'target_database': 'target-db', 'entry_ids': ['x', 'y']}, {'drug_entry_ids': ['de1', 'de2', 'de3']}]
 50 | test_data = [
 51 |     ('rest_cli.r.KEGGrest.info', test_args[0], test_kwargs[0], False, None),
 52 |     ('rest_cli.r.KEGGrest.list', test_args[1], test_kwargs[1], False, None),
 53 |     ('rest_cli.r.KEGGrest.get', test_args[2], test_kwargs[2], False, None),
 54 |     ('rest_cli.r.KEGGrest.get', test_args[3], test_kwargs[3], True, None),
 55 |     ('rest_cli.r.KEGGrest.keywords_find', test_args[4], test_kwargs[4], False, None),
 56 |     ('rest_cli.r.KEGGrest.molecular_find', test_args[5], test_kwargs[5], False, None),
 57 |     ('rest_cli.r.KEGGrest.molecular_find', test_args[6], test_kwargs[6], False, None),
 58 |     ('rest_cli.r.KEGGrest.molecular_find', test_args[7], test_kwargs[7], False, None),
 59 |     ('rest_cli.r.KEGGrest.molecular_find', test_args[8], test_kwargs[8], False, None),
 60 |     ('rest_cli.r.KEGGrest.molecular_find', test_args[9], test_kwargs[9], False, None),
 61 |     ('rest_cli.r.KEGGrest.database_conv', test_args[10], test_kwargs[10], False, None),
 62 |     ('rest_cli.r.KEGGrest.entries_conv', test_args[11], test_kwargs[11], False, None),
 63 |     ('rest_cli.r.KEGGrest.database_link', test_args[12], test_kwargs[12], False, None),
 64 |     ('rest_cli.r.KEGGrest.entries_link', test_args[13], test_kwargs[13], False, None),
 65 |     ('rest_cli.r.KEGGrest.ddi', test_args[14], test_kwargs[14], False, None),
 66 |     ('rest_cli.r.KEGGrest.get', test_args[15], test_kwargs[2], False, '\tx\ny\t\n z '),
 67 |     ('rest_cli.r.KEGGrest.keywords_find', test_args[16], test_kwargs[4], False, '\t a\n \tb\nc  \n '),
 68 |     ('rest_cli.r.KEGGrest.entries_conv', test_args[17], test_kwargs[11], False, 'eid1\neid2'),
 69 |     ('rest_cli.r.KEGGrest.entries_link', test_args[18], test_kwargs[13], False, '\nx\n y \n'),
 70 |     ('rest_cli.r.KEGGrest.ddi', test_args[19], test_kwargs[14], False, '\t\n\t\tde1\nde2\nde3\n\n  \n  ')]
 71 | 
 72 | 
 73 | @pt.mark.parametrize('rest_method,args,kwargs,is_binary,stdin_mock', test_data)
 74 | def test_print(mocker, rest_method: str, args: list, kwargs: dict, is_binary: bool, stdin_mock: str, caplog):
 75 |     kegg_response_mock, expected_output = _get_kegg_response_mock_and_expected_output(mocker=mocker, is_binary=is_binary)
 76 |     u.test_print(
 77 |         mocker=mocker, argv_mock=args, stdin_mock=stdin_mock, method=rest_method, method_return_value=kegg_response_mock,
 78 |         method_kwargs=kwargs, module=r_cli, expected_output=expected_output, is_binary=is_binary, caplog=caplog)
 79 | 
 80 | 
 81 | def _get_kegg_response_mock_and_expected_output(mocker, is_binary: bool) -> tuple:
 82 |     kegg_response_mock: mocker.MagicMock = mocker.MagicMock(
 83 |         status=r.KEGGresponse.Status.SUCCESS, text_body='text body mock', binary_body=b'binary body mock')
 84 |     if is_binary:
 85 |         expected_output: bytes = kegg_response_mock.binary_body
 86 |     else:
 87 |         expected_output: str = kegg_response_mock.text_body
 88 |     return kegg_response_mock, expected_output
 89 | 
 90 | 
 91 | @pt.mark.parametrize('rest_method,args,kwargs,is_binary,stdin_mock', test_data)
 92 | def test_file(mocker, rest_method: str, args: list, kwargs: dict, is_binary: bool, output_file: str, stdin_mock: str):
 93 |     kegg_response_mock, expected_output = _get_kegg_response_mock_and_expected_output(mocker=mocker, is_binary=is_binary)
 94 |     u.test_file(
 95 |         mocker=mocker, argv_mock=args, output_file=output_file, stdin_mock=stdin_mock, method=rest_method,
 96 |         method_return_value=kegg_response_mock, method_kwargs=kwargs, module=r_cli, expected_output=expected_output,
 97 |         is_binary=is_binary)
 98 | 
 99 | 
100 | @pt.fixture(name='test_result', params=[True, False])
101 | def get_test_result(request):
102 |     yield request.param
103 | 
104 | 
105 | test_test_data = [
106 |     (ku.InfoKEGGurl, test_args[0], test_kwargs[0]),
107 |     (ku.ListKEGGurl, test_args[1], test_kwargs[1]),
108 |     (ku.GetKEGGurl, test_args[2], test_kwargs[2]),
109 |     (ku.GetKEGGurl, test_args[3], test_kwargs[3]),
110 |     (ku.KeywordsFindKEGGurl, test_args[4], test_kwargs[4]),
111 |     (ku.MolecularFindKEGGurl, test_args[5], test_kwargs[5]),
112 |     (ku.MolecularFindKEGGurl, test_args[6], test_kwargs[6]),
113 |     (ku.MolecularFindKEGGurl, test_args[7], test_kwargs[7]),
114 |     (ku.MolecularFindKEGGurl, test_args[8], test_kwargs[8]),
115 |     (ku.MolecularFindKEGGurl, test_args[9], test_kwargs[9]),
116 |     (ku.DatabaseConvKEGGurl, test_args[10], test_kwargs[10]),
117 |     (ku.EntriesConvKEGGurl, test_args[11], test_kwargs[11]),
118 |     (ku.DatabaseLinkKEGGurl, test_args[12], test_kwargs[12]),
119 |     (ku.EntriesLinkKEGGurl, test_args[13], test_kwargs[13]),
120 |     (ku.DdiKEGGurl, test_args[14], test_kwargs[14])]
121 | 
122 | 
123 | @pt.mark.parametrize('KEGGurl,args,kwargs', test_test_data)
124 | def test_test(mocker, KEGGurl: type[ku.AbstractKEGGurl], args: list, kwargs: dict, test_result: bool):
125 |     test_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest_cli.r.KEGGrest.test', return_value=test_result)
126 |     argv_mock: list = ['kegg_pull'] + args + ['--test']
127 |     mocker.patch('sys.argv', argv_mock)
128 |     print_mock: mocker.MagicMock = mocker.patch('builtins.print')
129 |     r_cli.main()
130 |     test_mock.assert_called_with(KEGGurl=KEGGurl, **kwargs)
131 |     print_mock.assert_called_once_with(test_result)
132 | 
133 | 
134 | @pt.mark.parametrize('rest_method,args,kwargs,is_binary,stdin_mock', test_data)
135 | def test_zip_archive(mocker, rest_method: str, args: list, kwargs: dict, is_binary: bool, zip_archive_data: tuple, stdin_mock: str):
136 |     kegg_response_mock, expected_output = _get_kegg_response_mock_and_expected_output(mocker=mocker, is_binary=is_binary)
137 |     u.test_zip_archive(
138 |         mocker=mocker, argv_mock=args, zip_archive_data=zip_archive_data, stdin_mock=stdin_mock, method=rest_method,
139 |         method_return_value=kegg_response_mock, method_kwargs=kwargs, module=r_cli, expected_output=expected_output,
140 |         is_binary=is_binary)
141 | 


--------------------------------------------------------------------------------
/dev/test_main.py:
--------------------------------------------------------------------------------
  1 | # noinspection PyPackageRequirements
  2 | import pytest as pt
  3 | import zipfile as zf
  4 | import os
  5 | import shutil as sh
  6 | import json
  7 | import kegg_pull.__main__ as m
  8 | import kegg_pull.entry_ids_cli as ei_cli
  9 | import kegg_pull.rest_cli as r_cli
 10 | import kegg_pull.pull_cli as p_cli
 11 | import kegg_pull.map_cli as map_cli
 12 | import kegg_pull.pathway_organizer_cli as po_cli
 13 | import dev.utils as u
 14 | 
 15 | 
 16 | def test_help(mocker):
 17 |     mocker.patch('sys.argv', ['kegg_pull', '--full-help'])
 18 |     print_mock: mocker.MagicMock = mocker.patch('builtins.print')
 19 |     m.main()
 20 |     delimiter: str = '-'*80
 21 |     expected_print_call_args = [
 22 |         (m.__doc__,), (delimiter,), (p_cli.__doc__,), (delimiter,), (ei_cli.__doc__,), (delimiter,), (map_cli.__doc__,),
 23 |         (delimiter,), (po_cli.__doc__,), (delimiter,), (r_cli.__doc__,)]
 24 |     u.assert_call_args(function_mock=print_mock, expected_call_args_list=expected_print_call_args, do_kwargs=False)
 25 |     for help_arg in (['--help'], ['-h'], []):
 26 |         help_args = ['kegg_pull']
 27 |         help_args.extend(help_arg)
 28 |         mocker.patch('sys.argv', help_args)
 29 |         print_mock.reset_mock()
 30 |         m.main()
 31 |         print_mock.assert_called_once_with(m.__doc__)
 32 | 
 33 | 
 34 | def test_version(mocker):
 35 |     mocker.patch('sys.argv', ['kegg_pull', '--version'])
 36 |     version_mock = 'version mock'
 37 |     mocker.patch('kegg_pull.__main__.__version__', version_mock)
 38 |     print_mock: mocker.MagicMock = mocker.patch('builtins.print')
 39 |     m.main()
 40 |     print_mock.assert_called_once_with(version_mock)
 41 |     print_mock.reset_mock()
 42 |     mocker.patch('sys.argv', ['kegg_pull', '-v'])
 43 |     m.main()
 44 |     print_mock.assert_called_once_with(version_mock)
 45 | 
 46 | 
 47 | @pt.fixture(name='print_output', params=[True, False])
 48 | def print_output_fixture(request):
 49 |     print_output: bool = request.param
 50 |     yield print_output
 51 |     if not print_output:
 52 |         os.remove('output.txt')
 53 | 
 54 | 
 55 | test_entry_ids_data = [
 56 |     (['database', 'brite'], 'dev/test_data/all-brite-entry-ids.txt'),
 57 |     (['keywords', 'module', 'Guanine,ribonucleotide'], 'dev/test_data/module-entry-ids.txt'),
 58 |     (['molec-attr', 'drug', '--em=420', '--em=440'], 'dev/test_data/drug-entry-ids.txt')]
 59 | 
 60 | 
 61 | @pt.mark.parametrize('args,expected_output', test_entry_ids_data)
 62 | def test_entry_ids(mocker, args: list, expected_output: str, print_output: bool):
 63 |     args: list = ['kegg_pull', 'entry-ids'] + args
 64 |     _test_output(mocker=mocker, args=args, expected_output=expected_output, print_output=print_output)
 65 | 
 66 | 
 67 | def _test_output(mocker, args: list, expected_output: str, print_output: bool, json_output: bool = False):
 68 |     print_mock = None
 69 |     if print_output:
 70 |         print_mock: mocker.MagicMock = mocker.patch('builtins.print')
 71 |     else:
 72 |         args += ['--output=output.txt']
 73 |     mocker.patch('sys.argv', args)
 74 |     m.main()
 75 |     with open(expected_output, 'r') as file:
 76 |         expected_output: str = file.read()
 77 |     if print_output:
 78 |         if json_output:
 79 |             expected_json: dict = json.loads(expected_output)
 80 |             [[actual_json], _] = print_mock.call_args
 81 |             actual_json: str = actual_json
 82 |             actual_json: dict = json.loads(actual_json)
 83 |             assert actual_json == expected_json
 84 |         else:
 85 |             print_mock.assert_called_once_with(expected_output)
 86 |     else:
 87 |         with open('output.txt', 'r') as file:
 88 |             actual_output: str = file.read()
 89 |         if json_output:
 90 |             actual_json: dict = json.loads(actual_output)
 91 |             expected_json: dict = json.loads(expected_output)
 92 |             assert actual_json == expected_json
 93 |         else:
 94 |             assert actual_output == expected_output
 95 | 
 96 | 
 97 | test_rest_data = [
 98 |     (['conv', 'glycan', 'pubchem'], 'dev/test_data/glycan-pubchem-conv.txt'),
 99 |     (['conv', 'entry-ids', 'gl:G13143,gl:G13141,gl:G13139', 'pubchem'], 'dev/test_data/glycan-pubchem-entry-ids.txt'),
100 |     (['link', 'module', 'pathway'], 'dev/test_data/module-pathway-link.txt'),
101 |     (['link', 'entry-ids', 'md:M00575,md:M00574,md:M00363', 'pathway'], 'dev/test_data/pathway-module-entry-ids.txt'),
102 |     (['ddi', 'D00564,D00100,D00109'], 'dev/test_data/ddi-output.txt')]
103 | 
104 | 
105 | @pt.mark.parametrize('args,expected_output', test_rest_data)
106 | def test_rest(mocker, args: list, expected_output: str, print_output: bool):
107 |     args = ['kegg_pull', 'rest'] + args
108 |     _test_output(mocker=mocker, args=args, expected_output=expected_output, print_output=print_output)
109 | 
110 | 
111 | @pt.fixture(name='output', params=['brite-entries.zip', 'brite-entries'])
112 | def pull_output(request):
113 |     output: str = request.param
114 |     yield output
115 |     if output == 'brite-entries.zip' and os.path.isfile(output):
116 |         os.remove(output)
117 |     else:
118 |         sh.rmtree(output, ignore_errors=True)
119 |     os.remove('pull-results.json')
120 | 
121 | 
122 | test_pull_data = [
123 |     ['--force-single-entry', '--multi-process', '--n-workers=2'], ['--print'], ['--print', '--multi-process'],
124 |     ['--multi-process', '--n-workers=2'], ['--force-single-entry']]
125 | 
126 | 
127 | @pt.mark.parametrize('args', test_pull_data)
128 | def test_pull(mocker, args: list, output: str):
129 |     stdin_mock = """
130 |         br:br08005
131 |         br:br08902
132 |         br:br08431
133 | 
134 |         br:br03220
135 |         br:br03222
136 |     """
137 |     stdin_mock: mocker.MagicMock = mocker.patch('kegg_pull._utils.sys.stdin.read', return_value=stdin_mock)
138 |     successful_entry_ids = ['br:br08005', 'br:br08902', 'br:br08431']
139 |     # The expected output file names have underscores instead of colons in case testing on Windows.
140 |     expected_output_files = [entry_id.replace(':', '_') for entry_id in successful_entry_ids]
141 |     expected_pull_results = {
142 |         'successful-entry-ids': successful_entry_ids,
143 |         'failed-entry-ids': ['br:br03220', 'br:br03222'],
144 |         'timed-out-entry-ids': [],
145 |         'num-successful': 3,
146 |         'num-failed': 2,
147 |         'num-timed-out': 0,
148 |         'num-total': 5,
149 |         'percent-success': 60.0,
150 |         'pull-minutes': 1.0}
151 |     args = ['kegg_pull', 'pull', 'entry-ids', '-'] + args + [f'--output={output}']
152 |     mocker.patch('sys.argv', args)
153 |     time_mock: mocker.MagicMock = mocker.patch('kegg_pull.pull_cli._testable_time', side_effect=[30, 90])
154 |     print_mock = mocker.patch('builtins.print')
155 |     m.main()
156 |     stdin_mock.assert_called_once_with()
157 |     assert time_mock.call_count == 2
158 |     # If running on Windows, change the actual files names to have underscores instead of colons.
159 |     if os.name == 'nt':  # pragma: no cover
160 |         expected_output_files = expected_output_files[:-1]  # The last brite gives different output on Windows
161 |         successful_entry_ids = expected_output_files  # pragma: no cover
162 |     for successful_entry_id, expected_output_file in zip(successful_entry_ids, expected_output_files):
163 |         with open(f'dev/test_data/brite-entries/{expected_output_file}.txt') as expected_file:
164 |             expected_entry: str = expected_file.read()
165 |         if '--print' in args:
166 |             print_mock.assert_any_call(successful_entry_id.replace('_', ':'))
167 |             print_mock.assert_any_call(f'{expected_entry}\n')
168 |         else:
169 |             if output.endswith('.zip'):
170 |                 with zf.ZipFile(output, 'r') as actual_zip:
171 |                     actual_entry: str = actual_zip.read(successful_entry_id + '.txt').decode()
172 |             else:
173 |                 with open(f'{output}/{successful_entry_id}.txt') as actual_file:
174 |                     actual_entry: str = actual_file.read()
175 |             assert actual_entry == expected_entry
176 |     with open('pull-results.json', 'r') as file:
177 |         actual_pull_results: dict = json.load(file)
178 |     assert actual_pull_results == expected_pull_results
179 | 
180 | 
181 | test_map_data = [
182 |     (['link', 'entry-ids', 'mmu:620551', 'reaction'], None, 'empty'),
183 |     (['conv', 'mmu', 'ncbi-geneid', '--reverse'], None, 'mmu-ncbi'),
184 |     (['conv', 'entry-ids', 'cpd:C00001,cpd:C00002', 'pubchem'], None, 'pubchem'),
185 |     (['link', 'entry-ids', '-', 'module', '--reverse'], '\nK12696\nK22365\nK22435\t', 'module'),
186 |     (['link', 'pathway', 'ko', '--deduplicate'], None, 'pathway-gene'),
187 |     (['link', 'compound', 'reaction', 'pathway', '--add-glycans', '--add-drugs', '--deduplicate'], None, 'compound-reaction-pathway')]
188 | 
189 | 
190 | @pt.mark.parametrize('args,stdin_mock_str,expected_output', test_map_data)
191 | @pt.mark.disable_mock_organism_set
192 | def test_map(mocker, print_output: bool, args: list, stdin_mock_str: str, expected_output: str):
193 |     args: list = ['kegg_pull', 'map'] + args
194 |     stdin_mock = None
195 |     if stdin_mock_str:
196 |         stdin_mock: mocker.MagicMock = mocker.patch('kegg_pull._utils.sys.stdin.read', return_value=stdin_mock_str)
197 |     _test_output(
198 |         mocker=mocker, args=args, expected_output=f'dev/test_data/map/{expected_output}.json', print_output=print_output,
199 |         json_output=True)
200 |     if stdin_mock:
201 |         stdin_mock.assert_called_once_with()
202 | 
203 | 
204 | def test_pathway_organizer(mocker, print_output: bool):
205 |     args = ['kegg_pull', 'pathway-organizer', '--tln=Metabolism', '--fn=Global and overview maps']
206 |     _test_output(
207 |         mocker=mocker, args=args, expected_output='dev/test_data/pathway-organizer/metabolic-pathways.json',
208 |         print_output=print_output, json_output=True)
209 | 


--------------------------------------------------------------------------------
/dev/test_rest.py:
--------------------------------------------------------------------------------
  1 | # noinspection PyPackageRequirements
  2 | import pytest as pt
  3 | import typing as t
  4 | import requests as rq
  5 | import kegg_pull.rest as r
  6 | import kegg_pull.kegg_url as ku
  7 | import dev.utils as u
  8 | 
  9 | 
 10 | test_kegg_response_exception_data = [
 11 |     ({'status': r.KEGGresponse.Status.SUCCESS, 'kegg_url': None},
 12 |      'A KEGG response cannot be marked as successful if its response body is empty')]
 13 | 
 14 | 
 15 | @pt.mark.parametrize('kwargs,expected_message', test_kegg_response_exception_data)
 16 | def test_kegg_response_exception(mocker, kwargs: dict, expected_message: str):
 17 |     u.mock_non_instantiable(mocker=mocker)
 18 |     with pt.raises(ValueError) as error:
 19 |         r.KEGGresponse(**kwargs)
 20 |     u.assert_exception(expected_message=expected_message, exception=error)
 21 | 
 22 | 
 23 | def test_kegg_rest_exception():
 24 |     with pt.raises(ValueError) as error:
 25 |         r.KEGGrest(n_tries=0)
 26 |     expected_message = '0 is not a valid number of tries to make a KEGG request.'
 27 |     u.assert_exception(expected_message=expected_message, exception=error)
 28 | 
 29 | 
 30 | def test_kegg_rest():
 31 |     kegg_rest = r.KEGGrest(n_tries=2, time_out=30, sleep_time=0.5)
 32 |     assert kegg_rest._n_tries == 2
 33 |     assert kegg_rest._time_out == 30
 34 |     assert kegg_rest._sleep_time == 0.5
 35 |     kegg_rest = r.KEGGrest(n_tries=None, time_out=None, sleep_time=None)
 36 |     assert kegg_rest._n_tries == 3
 37 |     assert kegg_rest._time_out == 60
 38 |     assert kegg_rest._sleep_time == 5.0
 39 | 
 40 | 
 41 | def test_request_and_test_success(mocker):
 42 |     kegg_rest = r.KEGGrest()
 43 |     text_mock = 'text mock'
 44 |     content_mock = b'content mock'
 45 |     response_mock = mocker.MagicMock(text=text_mock, content=content_mock, status_code=200)
 46 |     get_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.rq.get', return_value=response_mock)
 47 |     url_mock = 'url mock'
 48 |     kegg_url_mock = mocker.MagicMock(url=url_mock)
 49 |     create_url_spy = mocker.spy(r.KEGGrest, '_get_kegg_url')
 50 |     kegg_response: r.KEGGresponse = kegg_rest.request(kegg_url=kegg_url_mock)
 51 |     create_url_spy.assert_called_once_with(KEGGurl=None, kegg_url=kegg_url_mock)
 52 |     get_mock.assert_called_once_with(url=url_mock, timeout=60)
 53 |     assert kegg_response.status == r.KEGGresponse.Status.SUCCESS
 54 |     assert kegg_response.text_body == text_mock
 55 |     assert kegg_response.binary_body == content_mock
 56 |     assert kegg_response.kegg_url == kegg_url_mock
 57 |     head_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.rq.head', return_value=response_mock)
 58 |     success: bool = kegg_rest.test(kegg_url=kegg_url_mock)
 59 |     head_mock.assert_called_once_with(url=url_mock, timeout=60)
 60 |     assert success
 61 | 
 62 | 
 63 | def test_request_and_test_failed(mocker):
 64 |     n_tries = 4
 65 |     kegg_rest = r.KEGGrest(n_tries=4)
 66 |     url_mock = 'url mock'
 67 |     kegg_url_mock = mocker.MagicMock(url=url_mock)
 68 |     failed_status_code = 403
 69 |     response_mock = mocker.MagicMock(text='', content=b'', status_code=failed_status_code)
 70 |     get_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.rq.get', return_value=response_mock)
 71 |     sleep_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.time.sleep')
 72 |     kegg_response: r.KEGGresponse = kegg_rest.request(kegg_url=kegg_url_mock)
 73 |     get_mock.assert_has_calls(mocker.call(url=url_mock, timeout=60) for _ in range(n_tries))
 74 |     sleep_mock.assert_has_calls(mocker.call(5.0) for _ in range(n_tries))
 75 |     assert kegg_response.status == r.KEGGresponse.Status.FAILED
 76 |     assert kegg_response.kegg_url == kegg_url_mock
 77 |     assert kegg_response.text_body is None
 78 |     assert kegg_response.binary_body is None
 79 |     head_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.rq.head', return_value=response_mock)
 80 |     success: bool = kegg_rest.test(kegg_url=kegg_url_mock)
 81 |     head_mock.assert_has_calls(mocker.call(url=url_mock, timeout=60) for _ in range(n_tries))
 82 |     assert not success
 83 | 
 84 | 
 85 | def test_request_and_test_timeout(mocker):
 86 |     n_tries = 2
 87 |     time_out = 30
 88 |     sleep_time = 10.5
 89 |     kegg_rest = r.KEGGrest(n_tries=n_tries, time_out=time_out, sleep_time=sleep_time)
 90 |     url_mock = 'url mock'
 91 |     kegg_url_mock = mocker.MagicMock(url=url_mock)
 92 |     get_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.rq.get', side_effect=rq.exceptions.Timeout())
 93 |     sleep_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.time.sleep')
 94 |     kegg_response: r.KEGGresponse = kegg_rest.request(kegg_url=kegg_url_mock)
 95 |     get_mock.assert_has_calls(mocker.call(url=url_mock, timeout=time_out) for _ in range(n_tries))
 96 |     sleep_mock.assert_has_calls(mocker.call(sleep_time) for _ in range(n_tries))
 97 |     assert kegg_response.status == r.KEGGresponse.Status.TIMEOUT
 98 |     assert kegg_response.kegg_url == kegg_url_mock
 99 |     assert kegg_response.text_body is None
100 |     assert kegg_response.binary_body is None
101 |     sleep_mock.reset_mock()
102 |     head_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.rq.head', side_effect=rq.exceptions.Timeout())
103 |     success: bool = kegg_rest.test(kegg_url=kegg_url_mock)
104 |     head_mock.assert_has_calls(mocker.call(url=url_mock, timeout=time_out) for _ in range(n_tries))
105 |     sleep_mock.assert_has_calls(mocker.call(sleep_time) for _ in range(n_tries))
106 |     assert not success
107 | 
108 | 
109 | test_rest_method_data = [
110 |     (ku.ListKEGGurl, r.KEGGrest.list, {'database': 'module'}),
111 |     (ku.GetKEGGurl, r.KEGGrest.get, {'entry_ids': ['xyz'], 'entry_field': None}),
112 |     (ku.InfoKEGGurl, r.KEGGrest.info, {'database': 'pathway'}),
113 |     (ku.KeywordsFindKEGGurl, r.KEGGrest.keywords_find, {'database': '', 'keywords': ['a', 'b']}),
114 |     (ku.MolecularFindKEGGurl, r.KEGGrest.molecular_find, {'database': '', 'formula': 'abc', 'exact_mass': None, 'molecular_weight': None}),
115 |     (ku.DatabaseConvKEGGurl, r.KEGGrest.database_conv, {'kegg_database': 'a', 'outside_database': 'b'}),
116 |     (ku.EntriesConvKEGGurl, r.KEGGrest.entries_conv, {'target_database': 'module', 'entry_ids': ['123', 'abc']}),
117 |     (ku.DatabaseLinkKEGGurl, r.KEGGrest.database_link, {'target_database': 'x', 'source_database': 'y'}),
118 |     (ku.EntriesLinkKEGGurl, r.KEGGrest.entries_link, {'target_database': '123', 'entry_ids': ['x', 'y']}),
119 |     (ku.DdiKEGGurl, r.KEGGrest.ddi, {'drug_entry_ids': ['1', '2']})]
120 | 
121 | 
122 | @pt.mark.parametrize('KEGGurl,method,kwargs', test_rest_method_data)
123 | def test_rest_method(mocker, KEGGurl: type, method: t.Callable, kwargs: dict):
124 |     kegg_rest = r.KEGGrest()
125 |     request_spy = mocker.spy(kegg_rest, 'request')
126 |     create_url_spy = mocker.spy(r.KEGGrest, '_get_kegg_url')
127 |     kegg_url_mock = mocker.MagicMock()
128 |     KEGGurlMock: mocker.MagicMock = mocker.patch(f'kegg_pull.rest.ku.{KEGGurl.__name__}', return_value=kegg_url_mock)
129 |     getmro_mock: mocker.MagicMock = mocker.patch(f'kegg_pull.rest.ins.getmro', return_value={ku.AbstractKEGGurl})
130 |     mocker.patch('kegg_pull.rest.rq.get', return_value=mocker.MagicMock(status_code=200))
131 |     kegg_response = method(self=kegg_rest, **kwargs)
132 |     request_spy.assert_called_once_with(KEGGurl=KEGGurlMock, **kwargs)
133 |     create_url_spy.assert_called_once_with(KEGGurl=KEGGurlMock, kegg_url=None, **kwargs)
134 |     KEGGurlMock.assert_called_once_with(**kwargs)
135 |     getmro_mock.assert_called_once_with(KEGGurlMock)
136 |     assert create_url_spy.spy_return == kegg_url_mock
137 |     assert request_spy.spy_return == kegg_response
138 |     assert kegg_response.kegg_url == kegg_url_mock
139 | 
140 | 
141 | test_get_kegg_url_exception_data = [
142 |     ({'KEGGurl': None, 'kegg_url': None},
143 |      'Either an instantiated kegg_url object must be provided or an extended class of AbstractKEGGurl along with the'
144 |      ' corresponding kwargs for its constructor.'),
145 |     ({'KEGGurl': r.KEGGrest, 'kegg_url': None},
146 |      'The value for KEGGurl must be an inherited class of AbstractKEGGurl. The class "KEGGrest" is not.')]
147 | 
148 | 
149 | @pt.mark.parametrize('kwargs,expected_message', test_get_kegg_url_exception_data)
150 | def test_get_kegg_url_exception(kwargs: dict, expected_message: str):
151 |     with pt.raises(ValueError) as error:
152 |         r.KEGGrest._get_kegg_url(**kwargs)
153 |     u.assert_exception(expected_message=expected_message, exception=error)
154 | 
155 | 
156 | def test_get_kegg_url_warning(mocker, caplog):
157 |     kegg_url_mock = mocker.MagicMock()
158 |     kegg_url = r.KEGGrest._get_kegg_url(KEGGurl=ku.InfoKEGGurl, kegg_url=kegg_url_mock, database='database mock')
159 |     u.assert_warning(
160 |         message='Both an instantiated kegg_url object and KEGGurl class are provided. Using the instantiated object...', caplog=caplog)
161 |     assert kegg_url == kegg_url_mock
162 | 
163 | 
164 | test_request_and_check_error_data = [
165 |     ('The KEGG request failed with the following URL: url/mock', r.KEGGresponse.Status.FAILED),
166 |     ('The KEGG request timed out with the following URL: url/mock', r.KEGGresponse.Status.TIMEOUT)]
167 | 
168 | 
169 | @pt.mark.parametrize('expected_message,status', test_request_and_check_error_data)
170 | def test_request_and_check_error(mocker, expected_message: str, status: r.KEGGresponse.Status):
171 |     kegg_url_mock = mocker.MagicMock(url='url/mock')
172 |     kegg_response_mock = mocker.MagicMock(kegg_url=kegg_url_mock, status=status)
173 |     request_mock: mocker.MagicMock = mocker.patch('kegg_pull.rest.KEGGrest.request', return_value=kegg_response_mock)
174 |     with pt.raises(RuntimeError) as error:
175 |         r.request_and_check_error(kegg_url=kegg_url_mock, kwarg1='val1', kwarg2='val2')
176 |     request_mock.assert_called_once_with(KEGGurl=None, kegg_url=kegg_url_mock, kwarg1='val1', kwarg2='val2')
177 |     u.assert_exception(expected_message=expected_message, exception=error)
178 | 


--------------------------------------------------------------------------------
/src/kegg_pull/rest_cli.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Usage:
  3 |     kegg_pull rest -h | --help
  4 |     kegg_pull rest info <database> [--test] [--output=<output>]
  5 |     kegg_pull rest list <database> [--test] [--output=<output>]
  6 |     kegg_pull rest get <entry-ids> [--entry-field=<entry-field>] [--test] [--output=<output>]
  7 |     kegg_pull rest find <database> <keywords> [--test] [--output=<output>]
  8 |     kegg_pull rest find <database> (--formula=<formula>|--em=<exact-mass>...|--mw=<molecular-weight>...) [--test] [--output=<output>]
  9 |     kegg_pull rest conv <kegg-database> <outside-database> [--test] [--output=<output>]
 10 |     kegg_pull rest conv entry-ids <entry-ids> <target-database> [--test] [--output=<output>]
 11 |     kegg_pull rest link <target-database> <source-database> [--test] [--output=<output>]
 12 |     kegg_pull rest link entry-ids <entry-ids> <target-database> [--test] [--output=<output>]
 13 |     kegg_pull rest ddi <drug-entry-ids> [--test] [--output=<output>]
 14 | 
 15 | Options:
 16 |     -h --help                   Show this help message.
 17 |     info                        Executes the "info" KEGG API operation, pulling information about a KEGG database.
 18 |     <database>                  The name of the database to pull information about or entry IDs from.
 19 |     --test                      If set, test the request to ensure it works rather than sending it. Print True if the request would succeed and False if the request would fail. Ignores --output if this options is set along with --test.
 20 |     --output=<output>           Path to the file (either in a directory or ZIP archive) to store the response body from the KEGG web API operation. Prints to the console if not specified. If a ZIP archive, the file path must be in the form of /path/to/zip-archive.zip:/path/to/file (e.g. ./archive.zip:file.txt).
 21 |     list                        Executes the "list" KEGG API operation, pulling the entry IDs of the provided database.
 22 |     get                         Executes the "get" KEGG API operation, pulling the entries of the provided entry IDs.
 23 |     <entry-ids>                 Comma separated list of entry IDs (e.g. id1,id2,id3 etc.). Or if equal to "-", entry IDs are read from standard input, one entry ID per line; Press CTRL+D to finalize input or pipe (e.g. cat file.txt | kegg_pull rest get - ...).
 24 |     --entry-field=<entry-field> Optional field to extract from an entry instead of the default entry info (i.e. flat file or htext in the case of brite entries).
 25 |     find                        Executes the "find" KEGG API operation, finding entry IDs based on provided queries.
 26 |     <keywords>                  Comma separated list of keywords to search entries with (e.g. kw1,kw2,kw3 etc.). Or if equal to "-", keywords are read from standard input, one keyword per line; Press CTRL+D to finalize input or pipe (e.g. cat file.txt | kegg_pull rest find brite - ...).
 27 |     --formula=<formula>         Sequence of atoms in a chemical formula format to search for (e.g. "O5C7" searches for molecule entries containing 5 oxygen atoms and/or 7 carbon atoms).
 28 |     --em=<exact-mass>           Either a single number (e.g. --em=155.5) or two numbers (e.g. --em=155.5 --em=244.4). If a single number, searches for molecule entries with an exact mass equal to that value rounded by the last decimal point. If two numbers, searches for molecule entries with an exact mass within the two values (a range).
 29 |     --mw=<molecular-weight>     Same as --em but searches based on the molecular weight.
 30 |     conv                        Executes the "conv" KEGG API operation, converting entry IDs from an outside database to those of a KEGG database and vice versa.
 31 |     <kegg-database>             The name of the KEGG database from which to view equivalent outside database entry IDs.
 32 |     <outside-database>          The name of the non-KEGG database from which to view equivalent KEGG database entry IDs.
 33 |     entry-ids                   Perform the "conv" or "link" operation of the form that maps specific provided entry IDs to a target database.
 34 |     link                        Executes the "link" KEGG API operation, showing the IDs of entries that are connected/related to entries of other databases.
 35 |     <target-database>           The name of the database that the entry IDs of the source database or provided entry IDs are mapped to.
 36 |     <source-database>           The name of the database from which cross-references are found in the target database.
 37 |     ddi                         Executes the "ddi" KEGG API operation, searching for drug to drug interactions. Providing one entry ID reports all known interactions, while providing multiple checks if any drug pair in a given set of drugs is CI or P. If providing multiple, all entries must belong to the same database.
 38 |     <drug-entry-ids>            Comma separated list of drug entry IDs from the following databases: drug, ndc, or yj (e.g. id1,id2,id3 etc.). Or if equal to "-", entry IDs are read from standard input, one entry ID per line; Press CTRL+D to finalize input or pipe (e.g. cat file.txt | kegg_pull rest ddi - ...).
 39 | """
 40 | import docopt as d
 41 | from . import kegg_url as ku
 42 | from . import rest as r
 43 | from . import _utils as u
 44 | 
 45 | 
 46 | def main():
 47 |     args = d.docopt(__doc__)
 48 |     database: str = args['<database>']
 49 |     entry_ids: str | list[str] = args['<entry-ids>']
 50 |     target_database: str = args['<target-database>']
 51 |     test: bool = args['--test']
 52 |     is_binary = False
 53 |     test_result: bool | None = None
 54 |     kegg_response: r.KEGGresponse | None = None
 55 |     kegg_rest = r.KEGGrest()
 56 |     if args['info']:
 57 |         if test:
 58 |             test_result = kegg_rest.test(KEGGurl=ku.InfoKEGGurl, database=database)
 59 |         else:
 60 |             kegg_response = kegg_rest.info(database=database)
 61 |     elif args['list']:
 62 |         if test:
 63 |             test_result = kegg_rest.test(KEGGurl=ku.ListKEGGurl, database=database)
 64 |         else:
 65 |             kegg_response = kegg_rest.list(database=database)
 66 |     elif args['get']:
 67 |         entry_ids = u.parse_input_sequence(input_source=entry_ids)
 68 |         entry_field: str = args['--entry-field']
 69 |         if test:
 70 |             test_result = kegg_rest.test(KEGGurl=ku.GetKEGGurl, entry_ids=entry_ids, entry_field=entry_field)
 71 |         else:
 72 |             if ku.GetKEGGurl.is_binary(entry_field=entry_field):
 73 |                 is_binary = True
 74 |             kegg_response = kegg_rest.get(entry_ids=entry_ids, entry_field=entry_field)
 75 |     elif args['find']:
 76 |         if args['<keywords>']:
 77 |             keywords = u.parse_input_sequence(input_source=args['<keywords>'])
 78 |             if test:
 79 |                 test_result = kegg_rest.test(KEGGurl=ku.KeywordsFindKEGGurl, database=database, keywords=keywords)
 80 |             else:
 81 |                 kegg_response = kegg_rest.keywords_find(database=database, keywords=keywords)
 82 |         else:
 83 |             formula, exact_mass, molecular_weight = u.get_molecular_attribute_args(args=args)
 84 |             if test:
 85 |                 test_result = kegg_rest.test(
 86 |                     KEGGurl=ku.MolecularFindKEGGurl, database=database, formula=formula,
 87 |                     exact_mass=exact_mass, molecular_weight=molecular_weight)
 88 |             else:
 89 |                 kegg_response = kegg_rest.molecular_find(
 90 |                     database=database, formula=formula, exact_mass=exact_mass, molecular_weight=molecular_weight)
 91 |     elif args['conv']:
 92 |         if args['entry-ids']:
 93 |             entry_ids = u.parse_input_sequence(input_source=entry_ids)
 94 |             if test:
 95 |                 test_result = kegg_rest.test(KEGGurl=ku.EntriesConvKEGGurl, target_database=target_database, entry_ids=entry_ids)
 96 |             else:
 97 |                 kegg_response = kegg_rest.entries_conv(target_database=target_database, entry_ids=entry_ids)
 98 |         else:
 99 |             kegg_database = args['<kegg-database>']
100 |             outside_database = args['<outside-database>']
101 |             if test:
102 |                 test_result = kegg_rest.test(
103 |                     KEGGurl=ku.DatabaseConvKEGGurl, kegg_database=kegg_database, outside_database=outside_database)
104 |             else:
105 |                 kegg_response = kegg_rest.database_conv(kegg_database=kegg_database, outside_database=outside_database)
106 |     elif args['link']:
107 |         if args['entry-ids']:
108 |             entry_ids = u.parse_input_sequence(input_source=entry_ids)
109 |             if test:
110 |                 test_result = kegg_rest.test(KEGGurl=ku.EntriesLinkKEGGurl, target_database=target_database, entry_ids=entry_ids)
111 |             else:
112 |                 kegg_response = kegg_rest.entries_link(target_database=target_database, entry_ids=entry_ids)
113 |         else:
114 |             source_database: str = args['<source-database>']
115 |             if test:
116 |                 test_result = kegg_rest.test(
117 |                     KEGGurl=ku.DatabaseLinkKEGGurl, target_database=target_database, source_database=source_database)
118 |             else:
119 |                 kegg_response = kegg_rest.database_link(
120 |                     target_database=target_database, source_database=source_database)
121 |     else:
122 |         drug_entry_ids = u.parse_input_sequence(input_source=args['<drug-entry-ids>'])
123 |         if test:
124 |             test_result = kegg_rest.test(KEGGurl=ku.DdiKEGGurl, drug_entry_ids=drug_entry_ids)
125 |         else:
126 |             kegg_response = kegg_rest.ddi(drug_entry_ids=drug_entry_ids)
127 |     if test:
128 |         print(test_result)
129 |     else:
130 |         if kegg_response.status == r.KEGGresponse.Status.FAILED:
131 |             raise RuntimeError(
132 |                 f'The request to the KEGG web API failed with the following URL: {kegg_response.kegg_url.url}')
133 |         elif kegg_response.status == r.KEGGresponse.Status.TIMEOUT:
134 |             raise RuntimeError(
135 |                 f'The request to the KEGG web API timed out with the following URL: {kegg_response.kegg_url.url}')
136 |         if is_binary:
137 |             response_body: bytes = kegg_response.binary_body
138 |         else:
139 |             response_body: str = kegg_response.text_body
140 |         u.print_or_save(output_target=args['--output'], output_content=response_body)
141 | 


--------------------------------------------------------------------------------
/src/kegg_pull/pathway_organizer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Flattening A Pathways Brite Hierarchy
  3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  4 | |Functionality| for flattening a pathways Brite hierarchy (ID: 'br:br08901') into a collection of its nodes, mapping a node ID to information about it, enabling combinations with other KEGG data.
  5 | """
  6 | from __future__ import annotations
  7 | import json
  8 | import logging as log
  9 | import typing as t
 10 | from . import rest as r
 11 | from . import _utils as u
 12 | 
 13 | 
 14 | class HierarchyNode(t.TypedDict):
 15 |     """A dictionary  with the following keys:"""
 16 |     name: str
 17 |     """The name of the node obtained directly from the Brite hierarchy."""
 18 |     level: int
 19 |     """The level that the node appears in the hierarchy."""
 20 |     parent: str | None
 21 |     """The key (not the name) of the parent node (None if top level node)."""
 22 |     children: list[str] | None
 23 |     """The keys (not the names) of the node's children (None if leaf node)."""
 24 |     entry_id: str | None
 25 |     """The entry ID of the node (None if the node does not correspond to a KEGG entry)."""
 26 | 
 27 | 
 28 | HierarchyNodes = dict[str, HierarchyNode]
 29 | _RawHierarchyNode = t.TypedDict('_RawHierarchyNode', {'name': str, 'children': list[dict] | None})
 30 | 
 31 | 
 32 | class PathwayOrganizer(u.NonInstantiable):
 33 |     """
 34 |     Contains methods for managing a mapping of node keys to node information, these nodes coming from a pathways Brite hierarchy.
 35 |     An instantiated ``PathwayOrganizer`` object must be returned from either ``PathwayOrganizer.load_from_kegg`` or
 36 |     ``PathwayOrganizer.load_from_json``. The ``__init__`` is not meant to be called directly. The ``__str__`` method returns a JSON
 37 |     string of ``hierarchy_nodes``.
 38 | 
 39 |     :ivar dict[str, HierarchyNode] hierarchy_nodes: The mapping of node keys to node information managed by the PathwayOrganizer.
 40 |     """
 41 |     def __init__(self) -> None:
 42 |         super(PathwayOrganizer, self).__init__()
 43 |         self.hierarchy_nodes: HierarchyNodes | None = None
 44 |         self._filter_nodes: set[str] | None = None
 45 | 
 46 |     @staticmethod
 47 |     def load_from_kegg(
 48 |             top_level_nodes: set[str] | None = None, filter_nodes: set[str] | None = None,
 49 |             kegg_rest: r.KEGGrest | None = None) -> PathwayOrganizer:
 50 |         """ Pulls the Brite hierarchy from the KEGG REST API and converts it to the ``hierarchy_nodes`` mapping.
 51 | 
 52 |         :param top_level_nodes: Node names in the highest level of the hierarchy to select from. If None, all top level nodes are traversed to create the ``hierarchy_nodes``.
 53 |         :param filter_nodes: Names (not keys) of nodes to exclude from the ``hierarchy_nodes`` mapping. Neither these nodes nor any of their children will be included.
 54 |         :param kegg_rest: Optional KEGGrest object for obtaining the Brite hierarchy. A new KEGGrest object is created by default.
 55 |         :returns: The resulting PathwayOrganizer object.
 56 |         """
 57 |         pathway_org = PathwayOrganizer()
 58 |         pathway_org.hierarchy_nodes = HierarchyNodes()
 59 |         pathway_org._filter_nodes = filter_nodes
 60 |         hierarchy = PathwayOrganizer._get_hierarchy(kegg_rest=kegg_rest)
 61 |         valid_top_level_nodes = sorted(top_level_node['name'] for top_level_node in hierarchy)
 62 |         if top_level_nodes is not None:
 63 |             for top_level_node in list(top_level_nodes):
 64 |                 if top_level_node not in valid_top_level_nodes:
 65 |                     log.warning(
 66 |                         f'Top level node name "{top_level_node}" is not recognized and will be ignored. Valid values are: '
 67 |                         f'"{", ".join(valid_top_level_nodes)}"')
 68 |                     top_level_nodes.remove(top_level_node)
 69 |             hierarchy = [top_level_node for top_level_node in hierarchy if top_level_node['name'] in top_level_nodes]
 70 |         pathway_org._parse_hierarchy(level=1, raw_hierarchy_nodes=hierarchy, parent_name=None)
 71 |         return pathway_org
 72 | 
 73 |     @staticmethod
 74 |     def _get_hierarchy(kegg_rest: r.KEGGrest | None) -> list[_RawHierarchyNode]:
 75 |         """ Pulls the Brite hierarchy (to be converted to hierarchy_nodes) from the KEGG REST API.
 76 | 
 77 |         :return: The list of top level nodes that branch out into the rest of the hierarchy until reaching leaf nodes.
 78 |         """
 79 |         kegg_rest = kegg_rest if kegg_rest is not None else r.KEGGrest()
 80 |         kegg_response = kegg_rest.get(entry_ids=['br:br08901'], entry_field='json')
 81 |         text_body = kegg_response.text_body.strip()
 82 |         brite_hierarchy: dict = json.loads(s=text_body)
 83 |         return brite_hierarchy['children']
 84 | 
 85 |     def _parse_hierarchy(self, level: int, raw_hierarchy_nodes: list[_RawHierarchyNode], parent_name: str | None) -> set[str]:
 86 |         """ Recursively traverses the Brite hierarchy to create the hierarchy_nodes mapping.
 87 | 
 88 |         :param level: The current level of recursion representing the level of the node in the hierarchy.
 89 |         :param raw_hierarchy_nodes: The list of nodes in the current branch of the hierarchy being traversed.
 90 |         :param parent_name: The node key of the parent node of the current branch of the hierarchy.
 91 |         :return: The keys of the nodes added to the hierarchy_nodes property representing the children of the parent node.
 92 |         """
 93 |         nodes_added = set[str]()
 94 |         for raw_hierarchy_node in raw_hierarchy_nodes:
 95 |             node_name = raw_hierarchy_node['name']
 96 |             if self._filter_nodes is None or node_name not in self._filter_nodes:
 97 |                 if 'children' in raw_hierarchy_node.keys():
 98 |                     node_children = self._parse_hierarchy(
 99 |                         level=level+1, raw_hierarchy_nodes=raw_hierarchy_node['children'], parent_name=node_name)
100 |                     if self._filter_nodes is not None:
101 |                         expected_n_children_added = len(
102 |                             [child for child in raw_hierarchy_node['children'] if child['name'] not in self._filter_nodes])
103 |                     else:
104 |                         expected_n_children_added = len(raw_hierarchy_node['children'])
105 |                     assert len(node_children) == expected_n_children_added, f'Not all children added for node: {node_name}'
106 |                     node_key = self._add_hierarchy_node(
107 |                         name=node_name, level=level, parent=parent_name, children=node_children, entry_id=None)
108 |                 else:
109 |                     entry_id = node_name.split(' ')[0]
110 |                     entry_id = f'path:map{entry_id}'
111 |                     node_key = self._add_hierarchy_node(
112 |                         name=node_name, level=level, parent=parent_name, children=None, entry_id=entry_id)
113 |                 nodes_added.add(node_key)
114 |         return nodes_added
115 | 
116 |     def _add_hierarchy_node(self, name: str, level: int, parent: str, children: set[str] | None, entry_id: str | None) -> str:
117 |         """ Adds a Brite hierarchy node representation to the hierarchy_nodes property.
118 | 
119 |         :param name: The name of the node obtained directly from the Brite hierarchy.
120 |         :param level: The level that the node appears in the hierarchy.
121 |         :param parent: The key of the parent node (None if top level node).
122 |         :param children: The keys of the node's children (None if leaf node).
123 |         :param entry_id: The entry ID of the node; string if it represents a KEGG pathway mapping, else None.
124 |         :return: The key chosen for the node, equal to its entry ID if not None, else the name of the Node.
125 |         """
126 |         key = entry_id if entry_id is not None else name
127 |         assert key not in self.hierarchy_nodes.keys(), f'Duplicate brite hierarchy node name {key}'
128 |         children = sorted(children) if children is not None else None
129 |         self.hierarchy_nodes[key] = HierarchyNode(name=name, level=level, parent=parent, children=children, entry_id=entry_id)
130 |         return key
131 | 
132 |     def __str__(self) -> str:
133 |         """ Converts the hierarchy nodes to a JSON string.
134 | 
135 |         :return: The JSON string version of the hierarchy nodes.
136 |         """
137 |         return json.dumps(self.hierarchy_nodes, indent=2)
138 | 
139 |     _schema = {
140 |         'type': 'object',
141 |         'minProperties': 1,
142 |         'additionalProperties': False,
143 |         'patternProperties': {
144 |             '^.+$': {
145 |                 'type': 'object',
146 |                 'required': ['name', 'level', 'parent', 'children', 'entry_id'],
147 |                 'additionalProperties': False,
148 |                 'properties': {
149 |                     'name': {
150 |                         'type': 'string',
151 |                         'minLength': 1
152 |                     },
153 |                     'level': {
154 |                         'type': 'integer',
155 |                         'minimum': 1
156 |                     },
157 |                     'parent': {
158 |                         'type': ['string', 'null'],
159 |                         'minLength': 1
160 |                     },
161 |                     'children': {
162 |                         'minItems': 1,
163 |                         'type': ['array', 'null'],
164 |                         'items': {
165 |                             'type': 'string',
166 |                             'minLength': 1
167 |                         }
168 |                     },
169 |                     'entry_id': {
170 |                         'type': ['string', 'null'],
171 |                         'minLength': 1
172 |                     }
173 |                 }
174 |             }
175 |         }
176 |     }
177 | 
178 |     @staticmethod
179 |     def load_from_json(file_path: str) -> PathwayOrganizer:
180 |         """ Loads the ``hierarchy_nodes`` mapping that was cached in a JSON file using ``load_from_kegg`` followed by ``save_to_json``.
181 | 
182 |         :param file_path: Path to the JSON file. If reading from a ZIP archive, the file path must be in the following format: /path/to/zip-archive.zip:/path/to/file (e.g. ./archive.zip:hierarchy-nodes.json).
183 |         :returns: The resulting PathwayOrganizer object.
184 |         :raises ValidationError: Raised if the JSON file does not follow the correct JSON schema. Should follow the correct schema if ``hierarchy_nodes`` was cached using ``load_from_kegg`` followed by ``save_to_json`` and without any additional alteration.
185 |         """
186 |         pathway_org = PathwayOrganizer()
187 |         hierarchy_nodes: HierarchyNodes = u.load_json_file(
188 |             file_path=file_path, json_schema=PathwayOrganizer._schema,
189 |             validation_error_message=f'Failed to load the hierarchy nodes. The pathway organizer JSON file at {file_path} is '
190 |                                      f'corrupted and will need to be re-created.')
191 |         pathway_org.hierarchy_nodes = hierarchy_nodes
192 |         return pathway_org
193 | 
194 |     def save_to_json(self, file_path: str) -> None:
195 |         """ Saves the ``hierarchy_nodes`` mapping to a JSON file to cache it.
196 | 
197 |         :param file_path: The path to the JSON file to save the ``hierarchy_nodes`` mapping. If saving in a ZIP archive, the file path must be in the following format: /path/to/zip-archive.zip:/path/to/file (e.g. ./archive.zip:hierarchy-nodes.json).
198 |         """
199 |         json_string = str(self)
200 |         u.save_output(output_target=file_path, output_content=json_string)
201 | 


--------------------------------------------------------------------------------
/dev/test_kegg_url.py:
--------------------------------------------------------------------------------
  1 | # noinspection PyPackageRequirements
  2 | import pytest as pt
  3 | import requests as rq
  4 | import kegg_pull.kegg_url as ku
  5 | import dev.utils as u
  6 | 
  7 | 
  8 | test_validate_exception_data = [
  9 |     (ku.KeywordsFindKEGGurl, {'database': 'ko', 'keywords': ['keyword'] * 500},
 10 |      'The KEGG URL length of 4028 exceeds the limit of 4000'),
 11 |     (ku.ListKEGGurl, {'database': 'ligand'},
 12 |      'Invalid database name: "ligand". Valid values are: <org>, ag, atc, brite, brite_ja, compound, compound_ja, '
 13 |      'dgroup, dgroup_ja, disease, disease_ja, drug, drug_ja, enzyme, genome, glycan, jtc, ko, module, ndc, network, '
 14 |      'organism, pathway, rclass, reaction, variant, vg, vp, yj. Where <org> is an organism code or T number.'),
 15 |     (ku.InfoKEGGurl, {'database': 'organism'},
 16 |      'Invalid database name: "organism". Valid values are: <org>, ag, brite, compound, dgroup, disease, drug, '
 17 |      'enzyme, genes, genome, glycan, kegg, ko, ligand, module, network, pathway, rclass, reaction, variant, vg, vp.'
 18 |      ' Where <org> is an organism code or T number.'),
 19 |     (ku.GetKEGGurl, {'entry_ids': [], 'entry_field': None}, 'Entry IDs must be specified for the KEGG get operation'),
 20 |     (ku.GetKEGGurl, {'entry_ids': ['x'], 'entry_field': 'invalid-entry-field'},
 21 |      'Invalid KEGG entry field: "invalid-entry-field". Valid values are: aaseq, conf, image, json, kcf, kgml, mol, '
 22 |      'ntseq.'),
 23 |     (ku.GetKEGGurl, {'entry_ids': ['x', 'y'], 'entry_field': 'json'},
 24 |      'The KEGG entry field: "json" only supports requests of one KEGG entry at a time but 2 entry IDs are provided'),
 25 |     (ku.GetKEGGurl, {'entry_ids': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11']},
 26 |      f'The maximum number of entry IDs is {ku.GetKEGGurl.MAX_ENTRY_IDS_PER_URL} but 11 were provided'),
 27 |     (ku.KeywordsFindKEGGurl, {'database': 'not-brite', 'keywords': []}, 'No search keywords specified'),
 28 |     (ku.KeywordsFindKEGGurl, {'database': 'brite', 'keywords': ['x']},
 29 |      'Invalid database name: "brite". Valid values are: <org>, ag, atc, brite_ja, compound, compound_ja, dgroup, '
 30 |      'dgroup_ja, disease, disease_ja, drug, drug_ja, enzyme, genes, genome, glycan, jtc, ko, ligand, module, ndc, '
 31 |      'network, pathway, rclass, reaction, variant, vg, vp, yj. Where <org> is an organism code or T number.'),
 32 |     (ku.MolecularFindKEGGurl, {'database': 'glycan'}, 'Invalid molecular database name: "glycan". Valid values are: compound, drug.'),
 33 |     (ku.MolecularFindKEGGurl, {'database': 'drug'}, 'Must provide either a chemical formula, exact mass, or molecular weight option'),
 34 |     (ku.MolecularFindKEGGurl, {'database': 'compound', 'exact_mass': ()},
 35 |      'Exact mass range can only be constructed from 2 values but 0 are provided: '),
 36 |     (ku.MolecularFindKEGGurl, {'database': 'compound', 'exact_mass': (1.1, 2.2, 3.3)},
 37 |      'Exact mass range can only be constructed from 2 values but 3 are provided: 1.1, 2.2, 3.3'),
 38 |     (ku.MolecularFindKEGGurl, {'database': 'compound', 'molecular_weight': ()},
 39 |      'Molecular weight range can only be constructed from 2 values but 0 are provided: '),
 40 |     (ku.MolecularFindKEGGurl, {'database': 'compound', 'molecular_weight': (10, 20, 30)},
 41 |      'Molecular weight range can only be constructed from 2 values but 3 are provided: 10, 20, 30'),
 42 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'exact_mass': (30.3, 20.2)},
 43 |      'The first value in the range must be less than the second. Values provided: 30.3-20.2'),
 44 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'exact_mass': (10.1, 10.1)},
 45 |      'The first value in the range must be less than the second. Values provided: 10.1-10.1'),
 46 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'molecular_weight': (303, 202)},
 47 |      'The first value in the range must be less than the second. Values provided: 303-202'),
 48 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'molecular_weight': (101, 101)},
 49 |      'The first value in the range must be less than the second. Values provided: 101-101'),
 50 |     (ku.DatabaseConvKEGGurl, {'kegg_database': 'genes', 'outside_database': ''},
 51 |      'Invalid KEGG database: "genes". Valid values are: <org>, compound, drug, glycan. Where <org> is an organism '
 52 |      'code or T number.'),
 53 |     (ku.DatabaseConvKEGGurl, {'kegg_database': 'drug', 'outside_database': 'glycan'},
 54 |      'Invalid outside database: "glycan". Valid values are: chebi, ncbi-geneid, ncbi-proteinid, pubchem, uniprot.'),
 55 |     (ku.DatabaseConvKEGGurl, {'kegg_database': 'organism-T-number', 'outside_database': 'pubchem'},
 56 |      'KEGG database "organism-T-number" is a gene database but outside database "pubchem" is not.'),
 57 |     (ku.DatabaseConvKEGGurl, {'kegg_database': 'compound', 'outside_database': 'ncbi-geneid'},
 58 |      'KEGG database "compound" is a molecule database but outside database "ncbi-geneid" is not.'),
 59 |     (ku.EntriesConvKEGGurl, {'target_database': 'rclass', 'entry_ids': []},
 60 |      'Invalid target database: "rclass". Valid values are: <org>, chebi, compound, drug, genes, glycan, ncbi-geneid,'
 61 |      ' ncbi-proteinid, pubchem, uniprot. Where <org> is an organism code or T number.'),
 62 |     (ku.EntriesConvKEGGurl, {'target_database': 'chebi', 'entry_ids': []},
 63 |      'Entry IDs must be specified for this KEGG "conv" operation'),
 64 |     (ku.DatabaseLinkKEGGurl, {'target_database': 'genes', 'source_database': ''},
 65 |      'Invalid database name: "genes". Valid values are: <org>, ag, atc, brite, compound, dgroup, disease, drug, '
 66 |      'enzyme, genome, glycan, jtc, ko, module, ndc, network, pathway, pubmed, rclass, reaction, variant, vg, vp, yj.'
 67 |      ' Where <org> is an organism code or T number.'),
 68 |     (ku.DatabaseLinkKEGGurl, {'target_database': 'ndc', 'source_database': 'kegg'},
 69 |      'Invalid database name: "kegg". Valid values are: <org>, ag, atc, brite, compound, dgroup, disease, drug, '
 70 |      'enzyme, genome, glycan, jtc, ko, module, ndc, network, pathway, pubmed, rclass, reaction, variant, vg, vp, yj.'
 71 |      ' Where <org> is an organism code or T number.'),
 72 |     (ku.DatabaseLinkKEGGurl, {'target_database': 'drug', 'source_database': 'drug'},
 73 |      'The source and target database cannot be identical. Database selected: drug.'),
 74 |     (ku.EntriesLinkKEGGurl, {'target_database': 'ligand', 'entry_ids': []},
 75 |      'Invalid database name: "ligand". Valid values are: <org>, ag, atc, brite, compound, dgroup, disease, drug, '
 76 |      'enzyme, genes, genome, glycan, jtc, ko, module, ndc, network, pathway, pubmed, rclass, reaction, variant, vg, '
 77 |      'vp, yj. Where <org> is an organism code or T number.'),
 78 |     (ku.EntriesLinkKEGGurl, {'target_database': 'yj', 'entry_ids': []},
 79 |      'At least one entry ID must be specified to perform the link operation'),
 80 |     (ku.DdiKEGGurl, {'drug_entry_ids': []}, 'At least one drug entry ID must be specified for the DDI operation')]
 81 | 
 82 | 
 83 | @pt.mark.parametrize('KEGGurl,kwargs,expected_message', test_validate_exception_data)
 84 | def test_validate_exception(KEGGurl: type, kwargs: dict, expected_message: str):
 85 |     with pt.raises(ValueError) as error:
 86 |         KEGGurl(**kwargs)
 87 |     expected_message = f'Cannot create URL - {expected_message}'
 88 |     u.assert_exception(expected_message=expected_message, exception=error)
 89 | 
 90 | 
 91 | test_validate_warning_data = [
 92 |     (ku.MolecularFindKEGGurl, {'database': 'compound', 'formula': 'O3', 'exact_mass': 20.2},
 93 |      'Only a chemical formula, exact mass, or molecular weight is used to construct the URL. Using formula...', 'find/compound/O3/formula'),
 94 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'formula': 'O3', 'molecular_weight': 200},
 95 |      'Only a chemical formula, exact mass, or molecular weight is used to construct the URL. Using formula...', 'find/drug/O3/formula'),
 96 |     (ku.MolecularFindKEGGurl, {'database': 'compound', 'exact_mass': 20.2, 'molecular_weight': 200},
 97 |      'Both an exact mass and molecular weight are provided. Using exact mass...', 'find/compound/20.2/exact_mass')]
 98 | 
 99 | 
100 | @pt.mark.parametrize('KEGGurl,kwargs,expected_message,url', test_validate_warning_data)
101 | def test_validate_warning(KEGGurl: type, kwargs: dict, expected_message: str, url: str, caplog):
102 |     kegg_url: ku.AbstractKEGGurl = KEGGurl(**kwargs)
103 |     u.assert_warning(message=expected_message, caplog=caplog)
104 |     expected_url = f'{ku.BASE_URL}/{url}'
105 |     assert kegg_url.url == expected_url
106 | 
107 | 
108 | test_create_rest_options_data = [
109 |     (ku.ListKEGGurl, {'database': 'vg'}, 'list', 'vg'),
110 |     (ku.ListKEGGurl, {'database': 'organism-code'}, 'list', 'organism-code'),
111 |     (ku.ListKEGGurl, {'database': 'organism'}, 'list', 'organism'),
112 |     (ku.InfoKEGGurl, {'database': 'ligand'}, 'info', 'ligand'),
113 |     (ku.GetKEGGurl, {'entry_ids': ['x'], 'entry_field': None}, 'get', 'x'),
114 |     (ku.GetKEGGurl, {'entry_ids': ['x'], 'entry_field': 'image'}, 'get', 'x/image'),
115 |     (ku.GetKEGGurl, {'entry_ids': ['x'], 'entry_field': 'aaseq'}, 'get', 'x/aaseq'),
116 |     (ku.GetKEGGurl, {'entry_ids': ['x', 'y'], 'entry_field': None}, 'get', 'x+y'),
117 |     (ku.GetKEGGurl, {'entry_ids': ['x', 'y', 'z'], 'entry_field': 'ntseq'}, 'get', 'x+y+z/ntseq'),
118 |     (ku.KeywordsFindKEGGurl, {'database': 'organism-T-number', 'keywords': ['key', 'word']}, 'find', 'organism-T-number/key+word'),
119 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'formula': 'CH4'}, 'find', 'drug/CH4/formula'),
120 |     (ku.MolecularFindKEGGurl, {'database': 'compound', 'exact_mass': 30.3}, 'find', 'compound/30.3/exact_mass'),
121 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'molecular_weight': 300}, 'find', 'drug/300/mol_weight'),
122 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'exact_mass': (20.2, 30.3)}, 'find', 'drug/20.2-30.3/exact_mass'),
123 |     (ku.MolecularFindKEGGurl, {'database': 'drug', 'molecular_weight': (200, 300)}, 'find', 'drug/200-300/mol_weight'),
124 |     (ku.DatabaseConvKEGGurl, {'kegg_database': 'organism-code', 'outside_database': 'uniprot'}, 'conv', 'organism-code/uniprot'),
125 |     (ku.DatabaseConvKEGGurl, {'kegg_database': 'glycan', 'outside_database': 'chebi'}, 'conv', 'glycan/chebi'),
126 |     (ku.EntriesConvKEGGurl, {'target_database': 'genes', 'entry_ids': ['x', 'y', 'z']}, 'conv', 'genes/x+y+z'),
127 |     (ku.EntriesConvKEGGurl, {'target_database': 'ncbi-proteinid', 'entry_ids': ['a']}, 'conv', 'ncbi-proteinid/a'),
128 |     (ku.DatabaseLinkKEGGurl, {'target_database': 'pubmed', 'source_database': 'atc'}, 'link', 'pubmed/atc'),
129 |     (ku.EntriesLinkKEGGurl, {'target_database': 'genes', 'entry_ids': ['a', 'b', 'c']}, 'link', 'genes/a+b+c'),
130 |     (ku.EntriesLinkKEGGurl, {'target_database': 'jtc', 'entry_ids': ['x']}, 'link', 'jtc/x'),
131 |     (ku.DdiKEGGurl, {'drug_entry_ids': ['x', 'y']}, 'ddi', 'x+y')]
132 | 
133 | 
134 | @pt.mark.parametrize('KEGGurl,kwargs,rest_operation,rest_options', test_create_rest_options_data)
135 | def test_create_rest_options(KEGGurl: type, kwargs: dict, rest_operation: str, rest_options: str):
136 |     kegg_url: ku.AbstractKEGGurl = KEGGurl(**kwargs)
137 |     expected_url = f'{ku.BASE_URL}/{rest_operation}/{rest_options}'
138 |     assert str(kegg_url) == kegg_url.url == expected_url
139 |     if KEGGurl == ku.GetKEGGurl:
140 |         assert kegg_url.__getattribute__('multiple_entry_ids') == (len(kegg_url.__getattribute__('entry_ids')) > 1)
141 | 
142 | 
143 | @pt.fixture(name='_')
144 | def reset_organism_set():
145 |     ku.AbstractKEGGurl._organism_set = None
146 | 
147 | 
148 | @pt.mark.disable_mock_organism_set
149 | def test_organism_set(mocker, _):
150 |     text_mock = """
151 |         T06555	psyt	Candidatus Prometheoarchaeum syntrophicum	Prokaryotes;Archaea;Lokiarchaeota;Prometheoarchaeum
152 |         T03835	agw	Archaeon GW2011_AR10	Prokaryotes;Archaea;unclassified Archaea
153 |         T03843	arg	Archaeon GW2011_AR20	Prokaryotes;Archaea;unclassified Archaea
154 |     """
155 |     response_mock = mocker.MagicMock(status_code=200, text=text_mock)
156 |     get_mock: mocker.MagicMock = mocker.patch('kegg_pull.kegg_url.rq.get', return_value=response_mock)
157 |     actual_organism_set = ku.AbstractKEGGurl.organism_set
158 |     get_mock.assert_called_once_with(url=f'{ku.BASE_URL}/list/organism', timeout=60)
159 |     expected_organism_set = {'agw', 'T03835', 'T06555', 'T03843', 'psyt', 'arg'}
160 |     assert actual_organism_set == expected_organism_set
161 |     get_mock.reset_mock()
162 |     actual_organism_set = ku.AbstractKEGGurl.organism_set
163 |     get_mock.assert_not_called()
164 |     assert actual_organism_set == expected_organism_set
165 | 
166 | 
167 | @pt.mark.parametrize('timeout', [True, False])
168 | @pt.mark.disable_mock_organism_set
169 | def test_organism_set_unsuccessful(mocker, timeout: bool, _):
170 |     get_function_patch_path = 'kegg_pull.kegg_url.rq.get'
171 |     url = f'{ku.BASE_URL}/list/organism'
172 |     error_message = 'The request to the KEGG web API {} while fetching the organism set using the URL: {}'
173 |     if timeout:
174 |         get_mock: mocker.MagicMock = mocker.patch(get_function_patch_path, side_effect=rq.exceptions.Timeout())
175 |         error_message: str = error_message.format('timed out', url)
176 |     else:
177 |         failed_status_code = 404
178 |         get_mock: mocker.MagicMock = mocker.patch(
179 |             get_function_patch_path, return_value=mocker.MagicMock(status_code=failed_status_code))
180 |         error_message: str = error_message.format(f'failed with status code {failed_status_code}', url)
181 |     with pt.raises(RuntimeError) as error:
182 |         ku.AbstractKEGGurl.organism_set()
183 |     get_mock.assert_called_once_with(url=url, timeout=60)
184 |     u.assert_exception(expected_message=error_message, exception=error)
185 | 


--------------------------------------------------------------------------------
/src/kegg_pull/rest.py:
--------------------------------------------------------------------------------
  1 | """
  2 | KEGG REST API Operations
  3 | ~~~~~~~~~~~~~~~~~~~~~~~~
  4 | |Interface for| the KEGG REST API including all its operations.
  5 | """
  6 | import typing as t
  7 | import enum as e
  8 | import requests as rq
  9 | import time
 10 | import inspect as ins
 11 | import logging as log
 12 | from . import kegg_url as ku
 13 | from . import _utils as u
 14 | 
 15 | 
 16 | class KEGGresponse(u.NonInstantiable):
 17 |     """
 18 |     Class containing details of a response from the KEGG REST API.
 19 | 
 20 |     :ivar Status status: The status of the KEGG response.
 21 |     :ivar AbstractKEGGurl kegg_url: The URL used in the request to the KEGG REST API that resulted in the KEGG response.
 22 |     :ivar str text_body: The text version of the response body.
 23 |     :ivar bytes binary_body: The binary version of the response body.
 24 |     """
 25 |     class Status(e.Enum):
 26 |         """The status of a KEGG response."""
 27 |         SUCCESS = 1
 28 |         FAILED = 2
 29 |         TIMEOUT = 3
 30 | 
 31 |     def __init__(self, status: Status, kegg_url: ku.AbstractKEGGurl, text_body: str = None, binary_body: bytes = None) -> None:
 32 |         """
 33 |         :param status: The status of the KEGG response.
 34 |         :param kegg_url: The URL used in the request to the KEGG REST API that resulted in the KEGG response.
 35 |         :param text_body: The text version of the response body.
 36 |         :param binary_body: The binary version of the response body.
 37 |         :raises ValueError: Raised if the status is SUCCESS but a response body is not provided.
 38 |         """
 39 |         super(KEGGresponse, self).__init__()
 40 |         if status == KEGGresponse.Status.SUCCESS and (text_body is None or binary_body is None or text_body == '' or binary_body == b''):
 41 |             raise ValueError('A KEGG response cannot be marked as successful if its response body is empty')
 42 |         self.status = status
 43 |         self.kegg_url = kegg_url
 44 |         self.text_body = text_body
 45 |         self.binary_body = binary_body
 46 | 
 47 | 
 48 | class KEGGrest:
 49 |     """Class containing methods for making requests to the KEGG REST API, including all the KEGG REST API operations."""
 50 |     def __init__(self, n_tries: int | None = 3, time_out: int | None = 60, sleep_time: float | None = 5.0):
 51 |         """
 52 |         :param n_tries: The number of times to try to make a request (can succeed the first time, or any of n_tries, or none of the tries).
 53 |         :param time_out: The number of seconds to wait for a request until marking it as timed out.
 54 |         :param sleep_time: The number of seconds to wait in between timed out requests or blacklisted requests.
 55 |         """
 56 |         self._n_tries = n_tries if n_tries is not None else 3
 57 |         self._time_out = time_out if time_out is not None else 60
 58 |         self._sleep_time = sleep_time if sleep_time is not None else 5.0
 59 |         if self._n_tries < 1:
 60 |             raise ValueError(f'{self._n_tries} is not a valid number of tries to make a KEGG request.')
 61 | 
 62 |     def request(self, KEGGurl: type[ku.AbstractKEGGurl] = None, kegg_url: ku.AbstractKEGGurl = None, **kwargs) -> KEGGresponse:
 63 |         """ General KEGG request function based on a given KEGG URL (either a class that is instantiated or an already instantiated KEGG URL object).
 64 | 
 65 |         :param KEGGurl: Optional KEGG URL class (extended from AbstractKEGGurl) that's instantiated with provided keyword arguments.
 66 |         :param kegg_url: Optional KEGGurl object that's already instantiated (used if KEGGurl class is not provided).
 67 |         :param kwargs: The keyword arguments used to instantiate the KEGGurl class, if provided.
 68 |         :return: The KEGG response.
 69 |         """
 70 |         kegg_url = KEGGrest._get_kegg_url(KEGGurl=KEGGurl, kegg_url=kegg_url, **kwargs)
 71 |         status: KEGGresponse.Status | None = None
 72 |         for _ in range(self._n_tries):
 73 |             try:
 74 |                 response = rq.get(url=kegg_url.url, timeout=self._time_out)
 75 |                 if response.status_code == 200:
 76 |                     return KEGGresponse(
 77 |                         status=KEGGresponse.Status.SUCCESS, kegg_url=kegg_url, text_body=response.text, binary_body=response.content)
 78 |                 else:
 79 |                     status = KEGGresponse.Status.FAILED
 80 |                 if response.status_code == 403:
 81 |                     # 403 forbidden. KEGG may have blocked the request due to too many requests in too little time.
 82 |                     # In case blacklisting, sleep to allow time for KEGG to unblock further requests.
 83 |                     time.sleep(self._sleep_time)
 84 |             except rq.exceptions.Timeout:
 85 |                 status = KEGGresponse.Status.TIMEOUT
 86 |                 time.sleep(self._sleep_time)
 87 |         return KEGGresponse(status=status, kegg_url=kegg_url)
 88 | 
 89 |     @staticmethod
 90 |     def _get_kegg_url(
 91 |             KEGGurl: type[ku.AbstractKEGGurl] | None = None, kegg_url: ku.AbstractKEGGurl | None = None, **kwargs) -> ku.AbstractKEGGurl:
 92 |         """ Gets the KEGGurl object to be used to make the request to KEGG.
 93 | 
 94 |         :param KEGGurl: Optional KEGGurl class to instantiate a KEGGurl object using keyword arguments.
 95 |         :param kegg_url: Instantiated KEGGurl object that's simply returned if provided (used if the KEGGurl class is not provided).
 96 |         :param kwargs: The keyword arguments used to instantiate the KEGGurl object if a KEGGurl class is provided.
 97 |         :return: The KEGGurl object.
 98 |         :raises ValueError: Raised if both a class and object are provided or the class does not inherit from AbstractKEGGurl.
 99 |         """
100 |         if KEGGurl is None and kegg_url is None:
101 |             raise ValueError(
102 |                 f'Either an instantiated kegg_url object must be provided or an extended class of '
103 |                 f'{ku.AbstractKEGGurl.__name__} along with the corresponding kwargs for its constructor.')
104 |         if kegg_url is not None and KEGGurl is not None:
105 |             log.warning(
106 |                 'Both an instantiated kegg_url object and KEGGurl class are provided. Using the instantiated object...')
107 |         if kegg_url is not None:
108 |             return kegg_url
109 |         if ku.AbstractKEGGurl not in ins.getmro(KEGGurl):
110 |             raise ValueError(
111 |                 f'The value for KEGGurl must be an inherited class of {ku.AbstractKEGGurl.__name__}. '
112 |                 f'The class "{KEGGurl.__name__}" is not.')
113 |         kegg_url = KEGGurl(**kwargs)
114 |         return kegg_url
115 | 
116 |     def test(
117 |             self, KEGGurl: type[ku.AbstractKEGGurl] | None = None, kegg_url: ku.AbstractKEGGurl | None = None,
118 |             **kwargs) -> bool:
119 |         """ Tests if a KEGGurl will succeed upon being used in a request to the KEGG REST API.
120 | 
121 |         :param KEGGurl: Optional KEGGurl class used to instantiate a KEGGurl object given keyword arguments.
122 |         :param kegg_url: KEGGurl object that's already instantiated (used if a KEGGurl class is not provided).
123 |         :param kwargs: The keyword arguments used to instantiated the KEGGurl object from the KEGGurl class, if provided.
124 |         :return: True if the URL would succeed, false if it would fail or time out.
125 |         """
126 |         kegg_url = KEGGrest._get_kegg_url(KEGGurl=KEGGurl, kegg_url=kegg_url, **kwargs)
127 |         for _ in range(self._n_tries):
128 |             try:
129 |                 response = rq.head(url=kegg_url.url, timeout=self._time_out)
130 |                 if response.status_code == 200:
131 |                     return True
132 |             except rq.exceptions.Timeout:
133 |                 time.sleep(self._sleep_time)
134 |         return False
135 | 
136 |     def list(self, database: str) -> KEGGresponse:
137 |         """ Executes the "list" KEGG API operation, pulling the entry IDs of the provided database.
138 | 
139 |         :param database: The database from which to pull entry IDs.
140 |         :return: The KEGG response.
141 |         """
142 |         return self.request(KEGGurl=ku.ListKEGGurl, database=database)
143 | 
144 |     def get(self, entry_ids: t.List[str], entry_field: str | None = None) -> KEGGresponse:
145 |         """ Executes the "get" KEGG API operation, pulling the entries of the provided entry IDs.
146 | 
147 |         :param entry_ids: The IDs of entries to pull.
148 |         :param entry_field: Optional field to extract from the entries.
149 |         :return: The KEGG response.
150 |         """
151 |         return self.request(KEGGurl=ku.GetKEGGurl, entry_ids=entry_ids, entry_field=entry_field)
152 | 
153 |     def info(self, database: str) -> KEGGresponse:
154 |         """ Executes the "info" KEGG API operation, pulling information about a KEGG database.
155 | 
156 |         :param database: The database to pull information about.
157 |         :return: The KEGG response
158 |         """
159 |         return self.request(KEGGurl=ku.InfoKEGGurl, database=database)
160 | 
161 |     def keywords_find(self, database: str, keywords: t.List[str]) -> KEGGresponse:
162 |         """ Executes the "find" KEGG API operation, finding entry IDs based on keywords to search in entries.
163 | 
164 |         :param database: The name of the database containing entries to search for.
165 |         :param keywords: The keywords to search in entries.
166 |         :return: The KEGG response
167 |         """
168 |         return self.request(KEGGurl=ku.KeywordsFindKEGGurl, database=database, keywords=keywords)
169 | 
170 |     def molecular_find(
171 |             self, database: str, formula: str | None = None, exact_mass: float | tuple[float, float] | None = None,
172 |             molecular_weight: int | tuple[int, int] | None = None) -> KEGGresponse:
173 |         """ Executes the "find" KEGG API operation, finding entry IDs in chemical databases based on one (and only one) choice of three molecular attributes of the entries.
174 | 
175 |         :param database: The name of the chemical database to search for entries in.
176 |         :param formula: The chemical formula (one of three choices) of chemical entries to search for.
177 |         :param exact_mass: The exact mass (one of three choices) of chemical entries to search for (single value or range).
178 |         :param molecular_weight: The molecular weight (one of three choices) of chemical entries to search for (single value or range).
179 |         :return: The KEGG response
180 |         """
181 |         return self.request(
182 |             KEGGurl=ku.MolecularFindKEGGurl, database=database, formula=formula, exact_mass=exact_mass, molecular_weight=molecular_weight)
183 | 
184 |     def database_conv(self, kegg_database: str, outside_database: str) -> KEGGresponse:
185 |         """ Executes the "conv" KEGG API operation, converting the entry IDs of a KEGG database to those of an outside database.
186 | 
187 |         :param kegg_database: The name of the KEGG database to pull converted entry IDs from.
188 |         :param outside_database: The name of the outside database to pull converted entry IDs from.
189 |         :return: The KEGG response.
190 |         """
191 |         return self.request(KEGGurl=ku.DatabaseConvKEGGurl, kegg_database=kegg_database, outside_database=outside_database)
192 | 
193 |     def entries_conv(self, target_database: str, entry_ids: t.List[str]) -> KEGGresponse:
194 |         """ Executes the "conv" KEGG API operation, converting provided entry IDs from one database to the form of a target database.
195 | 
196 |         :param target_database: The name of the database to get converted entry IDs from.
197 |         :param entry_ids: The entry IDs to convert to the form of the target database.
198 |         :return: The KEGG response.
199 |         """
200 |         return self.request(KEGGurl=ku.EntriesConvKEGGurl, target_database=target_database, entry_ids=entry_ids)
201 | 
202 |     def database_link(self, target_database: str, source_database: str) -> KEGGresponse:
203 |         """ Executes the "link" KEGG API operation, showing the IDs of entries in one KEGG database that are connected/related to entries of another KEGG database.
204 | 
205 |         :param target_database: One of the two KEGG databases to pull linked entries from.
206 |         :param source_database: The other KEGG database to link entries from the target database.
207 |         :return: The KEGG response
208 |         """
209 |         return self.request(KEGGurl=ku.DatabaseLinkKEGGurl, target_database=target_database, source_database=source_database)
210 | 
211 |     def entries_link(self, target_database: str, entry_ids: t.List[str]) -> KEGGresponse:
212 |         """ Executes the "link" KEGG API operation, showing the IDs of entries that are connected/related to entries of a provided databases.
213 | 
214 |         :param target_database: The KEGG database to find links to the provided entries.
215 |         :param entry_ids: The IDs of the entries to link to entries in the target database.
216 |         :return: The KEGG response
217 |         """
218 |         return self.request(KEGGurl=ku.EntriesLinkKEGGurl, target_database=target_database, entry_ids=entry_ids)
219 | 
220 |     def ddi(self, drug_entry_ids: t.List[str]) -> KEGGresponse:
221 |         """ Executes the "ddi" KEGG API operation, searching for drug to drug interactions. Providing one entry ID reports all known interactions, while providing multiple checks if any drug pair in a given set of drugs is CI or P. If providing multiple, all entries must belong to the same database.
222 | 
223 |         :param drug_entry_ids: The IDs of the drug entries within which search for drug interactions.
224 |         :return: The KEGG response
225 |         """
226 |         return self.request(KEGGurl=ku.DdiKEGGurl, drug_entry_ids=drug_entry_ids)
227 | 
228 | 
229 | def request_and_check_error(
230 |         kegg_rest: KEGGrest | None = None, KEGGurl: type[ku.AbstractKEGGurl] | None = None,
231 |         kegg_url: ku.AbstractKEGGurl = None, **kwargs) -> KEGGresponse:
232 |     """ Makes a general request to the KEGG REST API using a KEGGrest object. Creates the KEGGrest object if one is not provided.
233 |     Additionally, raises an exception if the request is not successful, specifying the URL that was unsuccessful.
234 | 
235 |     :param kegg_rest: The KEGGrest object to perform the request. If None, one is created with the default parameters.
236 |     :param KEGGurl: Optional KEGG URL class (extended from AbstractKEGGurl) that's instantiated with provided keyword arguments.
237 |     :param kegg_url: Optional KEGGurl object that's already instantiated (used if KEGGurl class is not provided).
238 |     :param kwargs: The keyword arguments used to instantiate the KEGGurl class, if provided.
239 |     :return: The KEGG response
240 |     :raises RuntimeError: Raised if the request fails or times out.
241 |     """
242 |     kegg_rest = kegg_rest if kegg_rest is not None else KEGGrest()
243 |     kegg_response = kegg_rest.request(KEGGurl=KEGGurl, kegg_url=kegg_url, **kwargs)
244 |     if kegg_response.status == KEGGresponse.Status.FAILED:
245 |         raise RuntimeError(f'The KEGG request failed with the following URL: {kegg_response.kegg_url.url}')
246 |     elif kegg_response.status == KEGGresponse.Status.TIMEOUT:
247 |         raise RuntimeError(f'The KEGG request timed out with the following URL: {kegg_response.kegg_url.url}')
248 |     return kegg_response
249 | 


--------------------------------------------------------------------------------
/dev/test_map.py:
--------------------------------------------------------------------------------
  1 | # noinspection PyPackageRequirements
  2 | import pytest as pt
  3 | import typing as t
  4 | import jsonschema as js
  5 | import kegg_pull.map as kmap
  6 | import kegg_pull.kegg_url as ku
  7 | import dev.utils as u
  8 | 
  9 | 
 10 | @pt.fixture(name='kegg_rest', params=[True, False])
 11 | def get_kegg_rest(request, mocker):
 12 |     use_kegg_rest = request.param
 13 |     if use_kegg_rest:
 14 |         yield mocker.MagicMock()
 15 |     else:
 16 |         yield None
 17 | 
 18 | 
 19 | @pt.fixture(name='reverse', params=[True, False])
 20 | def get_reverse(request):
 21 |     yield request.param
 22 | 
 23 | 
 24 | def test_to_dict(mocker, kegg_rest):
 25 |     kegg_rest = kegg_rest
 26 |     text_body_mock = """
 27 |         a1\tb1
 28 |         a1\tb2
 29 |         a1\tb3
 30 |         a2\tb1
 31 |         a2\tb4
 32 |         a3\tb3
 33 |         a4\tb5
 34 |         a5\tb6
 35 |         a5\tb7
 36 |     """
 37 |     kwargs_mock = {'kegg_rest': kegg_rest, 'KEGGurl': ku.EntriesLinkKEGGurl, 'k': 'v'}
 38 |     kegg_response_mock = mocker.MagicMock(text_body=text_body_mock)
 39 |     request_and_check_error_mock: mocker.MagicMock = mocker.patch(
 40 |         'kegg_pull.map.r.request_and_check_error', return_value=kegg_response_mock)
 41 |     actual_mapping: kmap.KEGGmapping = kmap._to_dict(**kwargs_mock)
 42 |     request_and_check_error_mock.assert_called_once_with(**kwargs_mock)
 43 |     expected_mapping = {
 44 |         'a1': {'b1', 'b2', 'b3'}, 'a2': {'b1', 'b4'}, 'a3': {'b3'}, 'a4': {'b5'}, 'a5': {'b6', 'b7'}}
 45 |     assert actual_mapping == expected_mapping
 46 | 
 47 | 
 48 | test_map_and_reverse_data = [
 49 |     ('database_conv', ku.DatabaseConvKEGGurl, {'kegg_database': 'kegg-db', 'outside_database': 'outside-db'}),
 50 |     ('entries_conv', ku.EntriesConvKEGGurl, {'entry_ids': ['e1', 'e2'], 'target_database': 'x'}),
 51 |     ('entries_link', ku.EntriesLinkKEGGurl, {'entry_ids': ['e1', 'e2'], 'target_database': 'x'})]
 52 | 
 53 | 
 54 | @pt.mark.parametrize('method,KEGGurl,kwargs', test_map_and_reverse_data)
 55 | def test_map_and_reverse(mocker, method: str, KEGGurl: type, kwargs: dict, reverse: bool, kegg_rest):
 56 |     expected_mapping = {'k': {'v1', 'v2'}}
 57 |     to_dict_mock = mocker.patch('kegg_pull.map._to_dict', return_value=expected_mapping)
 58 |     # noinspection PyUnresolvedReferences
 59 |     method: t.Callable = kmap.__getattribute__(method)
 60 |     actual_mapping: kmap.KEGGmapping = method(reverse=reverse, kegg_rest=kegg_rest, **kwargs)
 61 |     to_dict_mock.assert_called_once_with(KEGGurl=KEGGurl, kegg_rest=kegg_rest, **kwargs)
 62 |     if reverse:
 63 |         expected_mapping = kmap.reverse(mapping=expected_mapping)
 64 |     assert actual_mapping == expected_mapping
 65 | 
 66 | 
 67 | test_deduplicate_pathway_ids_data = [
 68 |     {'source_database': 'pathway', 'target_database': 'x'}, {'source_database': 'x', 'target_database': 'pathway'}]
 69 | 
 70 | 
 71 | @pt.mark.parametrize('kwargs', test_deduplicate_pathway_ids_data)
 72 | def test_deduplicate_pathway_ids(mocker, kwargs: dict, kegg_rest):
 73 |     kwargs['kegg_rest'] = kegg_rest
 74 |     to_dict_return = {'path:map1': {'x1'}, f'path:ko1': {'x1'}, 'path:map2': {'x2', 'x3'}, 'path:ko2': {'x2', 'x3'}}
 75 |     pathway_is_target = kwargs['target_database'] == 'pathway'
 76 |     to_dict_return = kmap.reverse(mapping=to_dict_return) if pathway_is_target else to_dict_return
 77 |     to_dict_mock = mocker.patch('kegg_pull.map._to_dict', return_value=to_dict_return)
 78 |     actual_mapping = kmap.database_link(deduplicate=True, **kwargs)
 79 |     to_dict_mock.assert_called_once_with(KEGGurl=ku.DatabaseLinkKEGGurl, **kwargs)
 80 |     expected_mapping = {'path:map1': {'x1'}, 'path:map2': {'x2', 'x3'}}
 81 |     expected_mapping = kmap.reverse(mapping=expected_mapping) if pathway_is_target else expected_mapping
 82 |     assert actual_mapping == expected_mapping
 83 | 
 84 | 
 85 | def test_deduplicate_pathway_ids_exception(mocker):
 86 |     message = f'Cannot deduplicate path:map entry ids when neither the source database nor the target database is set to "pathway".' \
 87 |               f' Databases specified: module, ko.'
 88 |     mocker.patch('kegg_pull.map._to_dict')
 89 |     with pt.raises(ValueError) as error:
 90 |         kmap.database_link(source_database='module', target_database='ko', deduplicate=True)
 91 |     u.assert_exception(expected_message=message, exception=error)
 92 | 
 93 | 
 94 | @pt.fixture(name='mapping_data', params=[(True, True), (False, True), (True, False), (False, False)])
 95 | def get_mapping_data(request, mocker):
 96 |     add_glycans, add_drugs = request.param
 97 | 
 98 |     def mapping_data(kegg_rest: mocker.MagicMock | None, kwargs: dict) -> tuple:
 99 |         compound_is_target = kwargs['target_database'] == 'compound'
100 |         expected_call_args_list = [kwargs]
101 |         compound_to_x = {'cpd1': {'x1', 'x2'}, 'cpd2': {'x1'}, 'cpd3': {'x2'}, 'cpd4': {'x3'}, 'cpd5': {'x2'}, 'cpd6': {'x4'}}
102 |         to_dict_side_effect = [kmap.reverse(mapping=compound_to_x) if compound_is_target else compound_to_x]
103 |         if add_glycans:
104 |             expected_call_args_list.extend([
105 |                 {'source_database': 'compound', 'target_database': 'glycan'}, {'source_database': 'glycan', 'target_database': 'x'}])
106 |             to_dict_side_effect.extend([
107 |                 {'cpd1': {'gl1'}, 'cpd7': {'gl1', 'gl3'}, 'cpd8': {'gl2'}, 'cpd9': {'gl2'}, 'cpd10': {'gl3'}, 'cpd11': {'gl4'}},
108 |                 {'gl1': {'x1', 'x5'}, 'gl2': {'x2', 'x5'}, 'gl4': {'x3'}, 'gl3': {'x3'}, 'gl5': {'x6'}}])
109 |         if add_drugs:
110 |             expected_call_args_list.extend([
111 |                 {'source_database': 'compound', 'target_database': 'drug'}, {'source_database': 'drug', 'target_database': 'x'}])
112 |             to_dict_side_effect.extend([
113 |                 {'cpd4': {'d1'}, 'cpd3': {'d1'}, 'cpd6': {'d2'}, 'cpd5': {'d2'}, 'cpd12': {'d4'}, 'cpd13': {'d4'}, 'cpd14': {'d5'}},
114 |                 {'d1': {'x1', 'x5'}, 'd2': {'x2', 'x5'}, 'd3': {'x3'}, 'd4': {'x3'}, 'd5': {'x6'}, 'd6': {'x6'}}])
115 |         expected_call_args_list = [{
116 |             'source_database': d['source_database'], 'target_database': d['target_database'],
117 |             'kegg_rest': kegg_rest, 'KEGGurl': ku.DatabaseLinkKEGGurl} for d in expected_call_args_list]
118 |         if add_glycans and add_drugs:
119 |             expected_mapping = {
120 |                 'cpd4': {'x1', 'x3', 'x5'}, 'cpd2': {'x1'}, 'cpd3': {'x1', 'x2', 'x5'}, 'cpd1': {'x1', 'x2', 'x5'},
121 |                 'cpd7': {'x1', 'x3', 'x5'}, 'cpd8': {'x2', 'x5'}, 'cpd6': {'x2', 'x4', 'x5'}, 'cpd9': {'x2', 'x5'},
122 |                 'cpd5': {'x2', 'x5'}, 'cpd12': {'x3'}, 'cpd11': {'x3'}, 'cpd10': {'x3'}, 'cpd13': {'x3'}, 'cpd14': {'x6'}}
123 |         elif not add_glycans and add_drugs:
124 |             expected_mapping = {
125 |                 'cpd4': {'x3', 'x5', 'x1'}, 'cpd1': {'x2', 'x1'}, 'cpd3': {'x5', 'x2', 'x1'}, 'cpd2': {'x1'}, 'cpd6': {'x5', 'x4', 'x2'},
126 |                 'cpd5': {'x5', 'x2'}, 'cpd12': {'x3'}, 'cpd13': {'x3'}, 'cpd14': {'x6'}}
127 |         elif add_glycans and not add_drugs:
128 |             expected_mapping = {
129 |                 'cpd7': {'x3', 'x5', 'x1'}, 'cpd1': {'x5', 'x2', 'x1'}, 'cpd2': {'x1'}, 'cpd8': {'x5', 'x2'}, 'cpd3': {'x2'},
130 |                 'cpd9': {'x5', 'x2'}, 'cpd5': {'x2'}, 'cpd11': {'x3'}, 'cpd10': {'x3'}, 'cpd4': {'x3'}, 'cpd6': {'x4'}}
131 |         else:
132 |             expected_mapping = compound_to_x
133 |         expected_mapping = kmap.reverse(mapping=expected_mapping) if compound_is_target else expected_mapping
134 |         return add_glycans, add_drugs, expected_call_args_list, to_dict_side_effect, expected_mapping
135 |     yield mapping_data
136 | 
137 | 
138 | test_add_glycans_or_drugs_data = [
139 |     {'source_database': 'compound', 'target_database': 'x'}, {'source_database': 'x', 'target_database': 'compound'}]
140 | 
141 | 
142 | @pt.mark.parametrize('kwargs', test_add_glycans_or_drugs_data)
143 | def test_add_glycans_or_drugs(mocker, kegg_rest, mapping_data: t.Callable, kwargs: dict):
144 |     add_glycans, add_drugs, expected_call_args_list, to_dict_side_effect, expected_mapping = mapping_data(
145 |         kegg_rest=kegg_rest, kwargs=kwargs)
146 |     to_dict_mock: mocker.MagicMock = mocker.patch('kegg_pull.map._to_dict', side_effect=to_dict_side_effect)
147 |     # noinspection PyUnresolvedReferences
148 |     actual_mapping: kmap.KEGGmapping = kmap.database_link(add_drugs=add_drugs, add_glycans=add_glycans, kegg_rest=kegg_rest, **kwargs)
149 |     u.assert_call_args(function_mock=to_dict_mock, expected_call_args_list=expected_call_args_list, do_kwargs=True)
150 |     assert actual_mapping == expected_mapping
151 | 
152 | 
153 | def test_add_glycans_or_drugs_warning(mocker, caplog):
154 |     mocker.patch('kegg_pull.map._to_dict')
155 |     expected_message = f'Adding compound IDs (corresponding to equivalent glycan and/or drug entries) to a mapping where ' \
156 |                        f'neither the source database nor the target database are "compound". Databases specified: reaction, ko.'
157 |     kmap.database_link(source_database='reaction', target_database='ko', add_glycans=True)
158 |     u.assert_warning(message=expected_message, caplog=caplog)
159 | 
160 | 
161 | test_indirect_link_data = ['drugs_and_glycans', 'deduplicate', 'drugs_and_glycans_and_deduplicate', '']
162 | 
163 | 
164 | @pt.mark.parametrize('test_case', test_indirect_link_data)
165 | def test_indirect_link(mocker, kegg_rest, test_case: str):
166 |     kegg_rest = kegg_rest
167 |     compound_to_reaction = {'cpd1': {'rn1', 'rn3'}, 'cpd2': {'rn2'}, 'cpd3': {'rn3'}}
168 |     pathway_to_reaction = {
169 |         'path:map1': {'rn1', 'rn3'}, 'path:rn1': {'rn1', 'rn3'}, 'path:map2': {'rn2'}, 'path:rn2': {'rn2'},
170 |         'path:map3': {'rn3'}, 'path:rn3': {'rn3'}}
171 |     reaction_to_gene = {'rn1': {'ko1', 'ko2'}, 'rn4': {'ko4', 'ko3'}, 'rn3': {'ko3'}}
172 |     compound_to_glycan = {'cpd1': {'gl1'}}
173 |     compound_to_drug = {'cpd3': {'d1'}}
174 |     compound_to_gene_expected_call_args_list = [
175 |         {'source_database': 'compound', 'target_database': 'reaction'},
176 |         {'source_database': 'reaction', 'target_database': 'ko'}]
177 |     if test_case == 'drugs_and_glycans':
178 |         glycan_to_gene = {'gl1': {'ko1', 'ko4'}, 'gl2': {'ko5'}}
179 |         drug_to_gene = {'d1': {'ko3', 'ko6'}, 'd2': {'ko7'}}
180 |         expected_call_args_list = compound_to_gene_expected_call_args_list
181 |         expected_call_args_list.extend([
182 |             {'source_database': 'compound', 'target_database': 'glycan'}, {'source_database': 'glycan', 'target_database': 'ko'},
183 |             {'source_database': 'compound', 'target_database': 'drug'}, {'source_database': 'drug', 'target_database': 'ko'}])
184 |         side_effect = [
185 |             compound_to_reaction, reaction_to_gene, compound_to_glycan, glycan_to_gene, compound_to_drug, drug_to_gene]
186 |         to_dict_mock = mocker.patch('kegg_pull.map._to_dict', side_effect=side_effect)
187 |         actual_mapping = kmap.indirect_link(
188 |             source_database='compound', intermediate_database='reaction', target_database='ko', add_glycans=True, add_drugs=True,
189 |             kegg_rest=kegg_rest)
190 |         expected_mapping = {'cpd1': {'ko3', 'ko1', 'ko2', 'ko4'}, 'cpd3': {'ko3', 'ko6'}}
191 |     elif test_case == 'deduplicate':
192 |         expected_call_args_list = [
193 |             {'source_database': 'pathway', 'target_database': 'reaction'}, {'source_database': 'reaction', 'target_database': 'ko'}]
194 |         to_dict_mock = mocker.patch('kegg_pull.map._to_dict', side_effect=[pathway_to_reaction, reaction_to_gene])
195 |         actual_mapping = kmap.indirect_link(
196 |             source_database='pathway', intermediate_database='reaction', target_database='ko', deduplicate=True,
197 |             kegg_rest=kegg_rest)
198 |         expected_mapping = {'path:map1': {'ko3', 'ko1', 'ko2'}, 'path:map3': {'ko3'}}
199 |     elif test_case == 'drugs_and_glycans_and_deduplicate':
200 |         reaction_to_pathway = kmap.reverse(mapping=pathway_to_reaction)
201 |         glycan_to_pathway = {'gl1': {'path:map1', 'path:map4'}, 'gl2': {'path:map5'}}
202 |         drug_to_pathway = {'d1': {'path:map3', 'path:map6'}, 'd2': {'path:map7'}}
203 |         expected_call_args_list = [
204 |             {'source_database': 'compound', 'target_database': 'reaction'}, {'source_database': 'reaction', 'target_database': 'pathway'},
205 |             {'source_database': 'compound', 'target_database': 'glycan'}, {'source_database': 'glycan', 'target_database': 'pathway'},
206 |             {'source_database': 'compound', 'target_database': 'drug'}, {'source_database': 'drug', 'target_database': 'pathway'}]
207 |         side_effect = [
208 |             compound_to_reaction, reaction_to_pathway, compound_to_glycan, glycan_to_pathway, compound_to_drug, drug_to_pathway]
209 |         to_dict_mock = mocker.patch('kegg_pull.map._to_dict', side_effect=side_effect)
210 |         actual_mapping = kmap.indirect_link(
211 |             source_database='compound', intermediate_database='reaction', target_database='pathway',
212 |             deduplicate=True, add_glycans=True, add_drugs=True, kegg_rest=kegg_rest)
213 |         expected_mapping = {
214 |             'cpd1': {'path:map1', 'path:map3', 'path:map4'}, 'cpd2': {'path:map2'}, 'cpd3': {'path:map1', 'path:map3', 'path:map6'}}
215 |     else:
216 |         expected_call_args_list = compound_to_gene_expected_call_args_list
217 |         to_dict_mock = mocker.patch('kegg_pull.map._to_dict', side_effect=[compound_to_reaction, reaction_to_gene])
218 |         actual_mapping = kmap.indirect_link(
219 |             source_database='compound', intermediate_database='reaction', target_database='ko',
220 |             kegg_rest=kegg_rest)
221 |         expected_mapping = {'cpd1': {'ko3', 'ko1', 'ko2'}, 'cpd3': {'ko3'}}
222 |     expected_call_args_list = [{
223 |         'source_database': d['source_database'], 'target_database': d['target_database'], 'kegg_rest': kegg_rest,
224 |         'KEGGurl': ku.DatabaseLinkKEGGurl} for d in expected_call_args_list]
225 |     u.assert_call_args(function_mock=to_dict_mock, expected_call_args_list=expected_call_args_list, do_kwargs=True)
226 |     assert actual_mapping == expected_mapping
227 | 
228 | 
229 | test_indirect_link_exception_data = [
230 |     ({'source_database': 'pathway', 'intermediate_database': 'reaction', 'target_database': 'reaction'},
231 |      'The source, intermediate, and target database must all be unique. Databases specified: pathway, reaction, reaction.'),
232 |     ({'source_database': 'reaction', 'intermediate_database': 'reaction', 'target_database': 'reaction'},
233 |      'The source, intermediate, and target database must all be unique. Databases specified: reaction, reaction, reaction.')]
234 | 
235 | 
236 | @pt.mark.parametrize('kwargs,error_message', test_indirect_link_exception_data)
237 | def test_indirect_link_exception(kwargs: dict, error_message: str):
238 |     with pt.raises(ValueError) as error:
239 |         kmap.indirect_link(**kwargs)
240 |     u.assert_exception(expected_message=error_message, exception=error)
241 | 
242 | 
243 | def test_combine_mappings():
244 |     mapping1 = {'k1': {'v1'}, 'k4': {'v3', 'v4'}, 'k5': {'v6', 'v7'}}
245 |     mapping2 = {'k2': {'v1'}, 'k3': {'v2', 'v3'}, 'k4': {'v3', 'v4'}, 'k5': {'v5', 'v6'}}
246 |     actual_combined_mapping = kmap.combine_mappings(mapping1=mapping1, mapping2=mapping2)
247 |     expected_combined_mapping = {'k1': {'v1'}, 'k4': {'v3', 'v4'}, 'k5': {'v6', 'v7', 'v5'}, 'k2': {'v1'}, 'k3': {'v2', 'v3'}}
248 |     assert actual_combined_mapping == expected_combined_mapping
249 | 
250 | 
251 | def test_reverse():
252 |     mapping = {'k1': {'v1', 'v2'}, 'k2': {'v1', 'v3', 'v4'}, 'k3': {'v1', 'v2', 'v3', 'v5'}, 'k4': {'v4', 'v5', 'v6'}}
253 |     expected_reverse_mapping = {
254 |         'v1': {'k1', 'k2', 'k3'}, 'v2': {'k1', 'k3'}, 'v3': {'k2', 'k3'}, 'v4': {'k2', 'k4'}, 'v5': {'k3', 'k4'}, 'v6': {'k4'}}
255 |     actual_reverse_mapping = kmap.reverse(mapping=mapping)
256 |     assert actual_reverse_mapping == expected_reverse_mapping
257 | 
258 | 
259 | def test_to_json_string():
260 |     mapping = {'k1': {'v1'}, 'k2': {'v1', 'v2'}, 'k3': {'v3', 'v4'}}
261 |     expected_json_string = '{\n  "k1": [\n    "v1"\n  ],\n  "k2": [\n    "v1",\n    "v2"\n  ],\n  "k3": [\n    "v3",\n    "v4"\n  ]\n}'
262 |     actual_json_string: str = kmap.to_json_string(mapping=mapping)
263 |     assert actual_json_string == expected_json_string
264 | 
265 | 
266 | def test_save_to_json(json_file_path: str):
267 |     kmap.save_to_json(mapping={'k1': {'v1'}, 'k2': {'v3', 'v2'}}, file_path=json_file_path)
268 |     u.test_save_to_json(json_file_path=json_file_path, expected_saved_json_object={'k1': ['v1'], 'k2': ['v2', 'v3']})
269 | 
270 | 
271 | def test_load_from_json(json_file_path: str):
272 |     u.test_load_from_json(
273 |         json_file_path=json_file_path, saved_object={'k1': ['v1'], 'k2': ['v2', 'v3']}, method=kmap.load_from_json,
274 |         expected_loaded_object={'k1': {'v1'}, 'k2': {'v3', 'v2'}})
275 | 
276 | 
277 | test_invalid_save_to_json_data = [{'a': [1]}, {'a': [1.2]}, {'a': [[], []]}, {'a': {}}, {'a': []}, {'': ['b']}]
278 | expected_error_message = 'The mapping must be a dictionary of entry IDs (strings) mapped to a set of entry IDs'
279 | 
280 | 
281 | @pt.mark.parametrize('invalid_json_object', test_invalid_save_to_json_data)
282 | def test_invalid_save_to_json(caplog, invalid_json_object: dict):
283 |     with pt.raises(js.exceptions.ValidationError):
284 |         kmap.save_to_json(mapping=invalid_json_object, file_path='xxx.json')
285 |     u.assert_error(
286 |         message=expected_error_message, caplog=caplog)
287 | 
288 | 
289 | test_invalid_load_from_json_data = test_invalid_save_to_json_data.copy()
290 | test_invalid_load_from_json_data.extend([
291 |     ['1', '2'], {'a': 'b'}, {'a': [2]}, 'abc', 123, 123.123, {1: 2}, {1.2: 2.3}, {'a': [{}, {}]}, {'a': ['b', 1]},
292 |     {'a': [1.2, 'b']}])
293 | 
294 | 
295 | @pt.mark.parametrize('invalid_json_object', test_invalid_load_from_json_data)
296 | def test_invalid_load_from_json(caplog, json_file_path: str, invalid_json_object: list | dict | int | float | str):
297 |     u.test_invalid_load_from_json(
298 |         json_file_path=json_file_path, invalid_json_object=invalid_json_object, method=kmap.load_from_json,
299 |         expected_error_message=expected_error_message, caplog=caplog)
300 | 


--------------------------------------------------------------------------------