├── .bumpversion.cfg ├── .flake8 ├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile └── source │ ├── bio2bel_repositories.rst │ ├── cli │ ├── inference.rst │ ├── summarize.rst │ └── train_and_evaluate.rst │ ├── conf.py │ ├── convert.rst │ ├── hyper_parameter_optimization.rst │ ├── images │ ├── batch_size.png │ ├── data_source.png │ ├── epochs.png │ ├── execution_mode.png │ ├── output_directory.png │ ├── preferred_device.png │ ├── provide_dataset.png │ ├── random_seed.png │ ├── select_database.png │ └── select_model.png │ ├── index.rst │ ├── installation.rst │ └── train_and_evaluate.rst ├── notebooks ├── Case Scenario ADEPTUS and HSDN.ipynb ├── Case Scenario ADEPTUS.ipynb ├── Case Scenario ComPath.ipynb ├── Case Scenario HSDN.ipynb └── README.rst ├── requirements-rtd.txt ├── setup.cfg ├── setup.py ├── src └── biokeen │ ├── __init__.py │ ├── __main__.py │ ├── cli │ ├── __init__.py │ ├── __main__.py │ ├── cli.py │ ├── messages.py │ └── prompts.py │ ├── constants.py │ ├── content.py │ └── convert │ ├── __init__.py │ ├── converters.py │ └── io.py ├── tests ├── __init__.py ├── test_convert.py └── test_simple.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.0.15-dev 3 | commit = True 4 | tag = False 5 | parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? 6 | serialize = 7 | {major}.{minor}.{patch}-{release}+{build} 8 | {major}.{minor}.{patch}+{build} 9 | {major}.{minor}.{patch}-{release} 10 | {major}.{minor}.{patch} 11 | 12 | [bumpversion:part:release] 13 | optional_value = production 14 | first_value = dev 15 | values = 16 | dev 17 | production 18 | 19 | [bumpverion:part:build] 20 | values = [0-9A-Za-z-]+ 21 | 22 | [bumpversion:file:setup.cfg] 23 | search = version = {current_version} 24 | replace = version = {new_version} 25 | 26 | [bumpversion:file:src/biokeen/constants.py] 27 | search = VERSION = '{current_version}' 28 | replace = VERSION = '{new_version}' 29 | 30 | [bumpversion:file:docs/source/conf.py] 31 | search = release = '{current_version}' 32 | replace = release = '{new_version}' 33 | 34 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | ######################### 2 | # Flake8 Configuration # 3 | # (.flake8) # 4 | # (formerly in tox.ini) # 5 | ######################### 6 | [flake8] 7 | # flake8 8 | ignore = 9 | # Line too long 10 | E501 11 | # Line break after binary operator 12 | W504 13 | exclude = 14 | .tox, 15 | .git, 16 | __pycache__, 17 | docs/source/conf.py, 18 | build, 19 | dist, 20 | *.pyc, 21 | *.egg-info, 22 | .cache, 23 | .eggs 24 | format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s 25 | 26 | # flake8-import-order 27 | application-import-names = 28 | biokeen 29 | pykeen 30 | pybel 31 | bio2bel 32 | tests 33 | import-order-style = pycharm 34 | 35 | # mccabe 36 | max-complexity = 10 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # PyCharm project settings 92 | .idea/* 93 | 94 | *.pickle 95 | *.gpickle 96 | 97 | scratch 98 | scratch/* 99 | 100 | .pytest_cache 101 | 102 | ### OSX ### 103 | # General 104 | .DS_Store 105 | .AppleDouble 106 | .LSOverride 107 | 108 | # Icon must end with two \r 109 | Icon 110 | 111 | # Thumbnails 112 | ._* 113 | 114 | # Files that might appear in the root of a volume 115 | .DocumentRevisions-V100 116 | .fseventsd 117 | .Spotlight-V100 118 | .TemporaryItems 119 | .Trashes 120 | .VolumeIcon.icns 121 | .com.apple.timemachine.donotpresent 122 | 123 | # Directories potentially created on remote AFP share 124 | .AppleDB 125 | .AppleDesktop 126 | Network Trash Folder 127 | Temporary Items 128 | .apdisk 129 | 130 | ### Vim ### 131 | # Swap 132 | [._]*.s[a-v][a-z] 133 | [._]*.sw[a-p] 134 | [._]s[a-rt-v][a-z] 135 | [._]ss[a-gi-z] 136 | [._]sw[a-p] 137 | 138 | # Session 139 | Session.vim 140 | 141 | # Temporary 142 | .netrwhist 143 | *~ 144 | # Auto-generated tag files 145 | tags 146 | # Persistent undo 147 | [._]*.un~ 148 | 149 | .idea/ 150 | data/entities_to_embeddings.pkl 151 | data/hyper_parameters.json 152 | data/relations_to_embeddings.pkl 153 | data/losses.json 154 | data/evaluation_summary.json 155 | /data/ 156 | /data_local/ 157 | /data_local/* -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # See: https://docs.readthedocs.io/en/latest/yaml-config.html 2 | build: 3 | image: latest 4 | python: 5 | version: 3.6 6 | requirements_file: requirements-rtd.txt 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | cache: pip 3 | language: python 4 | python: 5 | - 3.6 6 | stages: 7 | - lint 8 | - docs 9 | - test 10 | jobs: 11 | include: 12 | # lint stage 13 | - stage: lint 14 | env: TOXENV=manifest 15 | - env: TOXENV=flake8 16 | - env: TOXENV=xenon 17 | - env: TOXENV=pyroma 18 | # docs stage 19 | - stage: docs 20 | env: TOXENV=doc8 21 | - env: TOXENV=readme 22 | - env: TOXENV=docs 23 | - stage: test 24 | env: TOXENV=py 25 | python: "3.6" 26 | - env: TOXENV=py 27 | sudo: true 28 | python: "3.7" 29 | dist: xenial 30 | allow_failures: 31 | - env: TOXENV=xenon 32 | install: 33 | - sh -c 'if [ "$TOXENV" = "py" ]; then pip install tox codecov coverage; else pip install tox; fi' 34 | script: 35 | - tox 36 | after_success: 37 | - sh -c 'if [ "$TOXENV" = "py" ]; then tox -e coverage-report; codecov; fi' 38 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | BioKEEN is part of the KEEN Universe with which we aim to make knowledge graph embedding 4 | research reproducible and transferable to other domains. Contributions, whether big or small, are appreciated! 5 | You can get involved by submitting an issue, making a suggestion, or adding code to the project. 6 | 7 | 8 | Having a Problem? Submit an Issue. 9 | ---------------------------------- 10 | 11 | 1. Check that you have the latest version of :code:`BioKEEN` 12 | 2. Go here: https://github.com/SmartDataAnalytics/BioKEEN/issues 13 | 3. Check that this issue hasn't been solved 14 | 4. Click "new issue" 15 | 5. Add a short, but descriptive title 16 | 6. Add a full description of the problem, including the code that caused it and any support files related to this code 17 | so others can reproduce your problem 18 | 7. Copy the output and error message you're getting 19 | 20 | Have a Question or Suggestion? 21 | ------------------------------ 22 | 23 | Same drill! Submit an issue and we'll have a nice conversation in the thread. 24 | 25 | Want to Contribute? 26 | ------------------- 27 | 28 | 1. Get the code. Fork the repository from GitHub using the big button in the top-right corner of https://github.com/SmartDataAnalytics/BioKEEN 29 | 30 | 2. Clone your directory with 31 | 32 | $ git clone https://github.com//SmartDataAnalytics/BioKEEN.git 33 | 34 | 3. Install with :code:`pip`. The flag, :code:`-e`, makes your installation editable, so your changes will be reflected 35 | automatically in your installation. 36 | 37 | $ cd biokeen 38 | $ python3 -m pip install -e . 39 | 40 | 4. Make a branch off of develop, then make contributions! This line makes a new branch and checks it out 41 | 42 | $ git checkout -b feature/ 43 | 44 | 5. This project should be well tested, so write unit tests in the :code:`tests/` directory 45 | 6. Check that all tests are passing and code coverage is good with :code:`tox` before committing. 46 | 47 | $ tox 48 | 49 | 50 | Pull Requests 51 | ------------- 52 | 53 | Once you've got your feature or bugfix finished (or if its in a partially complete state but you want to publish it 54 | for comment), push it to your fork of the repository and open a pull request against the develop branch on GitHub. 55 | 56 | Make a descriptive comment about your pull request, perhaps referencing the issue it is meant to fix 57 | (something along the lines of "fixes issue #10" will cause GitHub to automatically link to that issue). 58 | The maintainers will review your pull request and perhaps make comments about it, request changes, or may pull it in 59 | to the develop branch! If you need to make changes to your pull request, simply push more commits to the feature branch 60 | in your fork to GitHub and they will automatically be added to the pull. You do not need to close and reissue your 61 | pull request to make changes! 62 | 63 | 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Smart Data Analytics 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src 2 | graft tests 3 | prune notebooks 4 | prune data 5 | 6 | recursive-include docs/source *.py 7 | recursive-include docs/source *.rst 8 | recursive-include docs/source *.png 9 | include docs/Makefile 10 | include config/config.ini 11 | 12 | global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store *.gpickle 13 | 14 | exclude .bumpversion.cfg data .travis.yml .readthedocs.yml .flake8 requirements-rtd.txt 15 | include *.rst LICENSE tox.ini *.tsv 16 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | WARNING: This repository is superceded by `PyKEEN `_ and is therefore no longer maintained. 2 | ============================================================================================================================= 3 | BioKEEN 4 | ------- 5 | |build| |coverage| |docs| |zenodo| 6 | 7 | BioKEEN (Biological KnowlEdge EmbeddiNgs) is a package for training and evaluating biological knowledge graph 8 | embeddings built on . 9 | 10 | Because we use PyKEEN as the underlying software package, implementations of 10 knowledge graph embedding models are 11 | currently available for BioKEEN. Furthermore, BioKEEN can be run in *training mode* in which users provide their own set 12 | of hyper-parameter values, or in *hyper-parameter optimization mode* to find suitable hyper-parameter values from set 13 | of user defined values. 14 | 15 | Through the integration of the `Bio2BEL `_ [2]_ software numerous biomedical databases are 16 | directly accessible within BioKEEN. 17 | 18 | BioKEEN can also be run without having experience in programing by using its interactive command line interface that can 19 | be started with the command “biokeen” from a terminal. 20 | 21 | Share Your Experimental Artifacts 22 | --------------------------------- 23 | You can share you trained KGE models along the other experimental artifacts through the `KEEN-Model-Zoo `_. 24 | 25 | Tutorials 26 | --------- 27 | A brief tutorial on how to get started with BioKEEN is available `here `_. 28 | 29 | .. image:: https://i.vimeocdn.com/video/755767182.jpg?mw=1100&mh=619&q=70 30 | :width: 300px 31 | :target: https://vimeo.com/314252656 32 | 33 | 34 | Further tutorials are can be found in the `notebooks directory `_ and in our `documentation `_. 35 | 36 | Citation 37 | -------- 38 | If you find BioKEEN useful in your work, please consider citing: 39 | 40 | .. [1] Ali, M., *et al.* (2019). `BioKEEN: A library for learning and evaluating biological knowledge graph embeddings 41 | `_. *Bioinformatics*, btz117. 42 | 43 | **Note**: ComPath has been updated, for this reason we have uploaded the dataset version that we have used for 44 | our experiments: `dataset `_ 45 | 46 | Installation |pypi_version| |python_versions| |pypi_license| 47 | ------------------------------------------------------------ 48 | To install biokeen, Python 3.6+ is required, and we recommend to install it on Linux or Mac OS systems. 49 | Please run following command: 50 | 51 | .. code-block:: sh 52 | 53 | $ pip install git+https://github.com/SmartDataAnalytics/BioKEEN.git 54 | 55 | Alternatively, it can be installed from the source for development with: 56 | 57 | .. code-block:: sh 58 | 59 | $ git clone https://github.com/SmartDataAnalytics/BioKEEN.git biokeen 60 | $ cd biokeen 61 | $ pip install -e . 62 | 63 | Contributing 64 | ------------ 65 | Contributions, whether filing an issue, making a pull request, or forking, are appreciated. 66 | See `CONTRIBUTING.rst `_ for more 67 | information on getting involved. 68 | 69 | CLI Usage 70 | --------- 71 | To show BioKEEN's available commands, please run following command: 72 | 73 | .. code-block:: sh 74 | 75 | biokeen 76 | 77 | Starting the Training/HPO Pipeline - Set Up Your Experiment within 60 seconds 78 | ***************************************************************************** 79 | To configure an experiment via the CLI, please run following command: 80 | 81 | .. code-block:: sh 82 | 83 | biokeen start 84 | 85 | To start BioKEEN with an existing configuration file, please run the following command: 86 | 87 | .. code-block:: sh 88 | 89 | biokeen start -f /path/to/config.json 90 | 91 | Starting the Prediction Pipeline 92 | ******************************** 93 | To make prediction based on a trained model, please run following command: 94 | 95 | .. code-block:: sh 96 | 97 | biokeen predict -m /path/to/model/directory -d /path/to/data/directory 98 | 99 | where the value for the argument **-m** is the directory containing the model, in more detail following files must be 100 | contained in the directory: 101 | 102 | * configuration.json 103 | * entities_to_embeddings.json 104 | * relations_to_embeddings.json 105 | * trained_model.pkl 106 | 107 | These files are created automatically created after model is trained (and evaluated) and exported in your 108 | specified output directory. 109 | 110 | The value for the argument **-d** is the directory containing the data for which inference should be applied, and it 111 | needs to contain following files: 112 | 113 | * entities.tsv 114 | * relations.tsv 115 | 116 | where *entities.tsv* contains all entities of interest, and relations.tsv all relations. Both files should contain 117 | should contain a single column containing all the entities/relations. Based on these files, PyKEEN will create all 118 | triple permutations, and computes the predictions for them, and saves them in data directory 119 | in *predictions.tsv*. 120 | 121 | Summarize the Results of All Experiments 122 | **************************************** 123 | To summarize the results of all experiments, please run following command: 124 | 125 | .. code-block:: sh 126 | 127 | biokeen summarize -d /path/to/experiments/directory -o /path/to/output/file.csv 128 | 129 | Getting Bio2BEL Data 130 | ******************** 131 | To download and structure the data from a `Bio2BEL `_ repository, run: 132 | 133 | .. code-block:: sh 134 | 135 | biokeen data get 136 | 137 | Where ```` can be any repository name in Bio2BEL such as ``hippie``, ``mirtarbase``. 138 | 139 | References 140 | ---------- 141 | 142 | .. [2] Hoyt, C., *et al.* (2019). `Integration of Structured Biological Data Sources using Biological Expression Language 143 | `_. *bioRxiv*, 631812. 144 | 145 | .. |build| image:: https://travis-ci.org/SmartDataAnalytics/BioKEEN.svg?branch=master 146 | :target: https://travis-ci.org/SmartDataAnalytics/BioKEEN 147 | 148 | .. |zenodo| image:: https://zenodo.org/badge/150270965.svg 149 | :target: https://zenodo.org/badge/latestdoi/150270965 150 | 151 | .. |docs| image:: http://readthedocs.org/projects/biokeen/badge/?version=latest 152 | :target: https://biokeen.readthedocs.io/en/latest/ 153 | :alt: Documentation Status 154 | 155 | .. |python_versions| image:: https://img.shields.io/pypi/pyversions/biokeen.svg 156 | :alt: Stable Supported Python Versions 157 | 158 | .. |pypi_version| image:: https://img.shields.io/pypi/v/biokeen.svg 159 | :alt: Current version on PyPI 160 | 161 | .. |pypi_license| image:: https://img.shields.io/pypi/l/biokeen.svg 162 | :alt: MIT License 163 | 164 | .. |coverage| image:: https://codecov.io/gh/SmartDataAnalytics/BioKEEN/branch/master/graphs/badge.svg 165 | :target: https://codecov.io/gh/SmartDataAnalytics/BioKEEN 166 | :alt: Coverage Status on CodeCov 167 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | -------------------------------------------------------------------------------- /docs/source/bio2bel_repositories.rst: -------------------------------------------------------------------------------- 1 | Biological Databases 2 | ==================== 3 | The following biological databases can be used for training and evaluating knowledge graph embeddings. This is done 4 | by using the `Bio2BEL `_ universe. 5 | 6 | +------------------------+-----------------------+ 7 | | Source | DOI | 8 | | | | 9 | +========================+=======================+ 10 | | ADEPTUS | |adeptus_zenodo| | 11 | +------------------------+-----------------------+ 12 | | ComPath | |compath_zenodo| | 13 | +------------------------+-----------------------+ 14 | | DrugBank | |drugbank_zenodo| | 15 | +------------------------+-----------------------+ 16 | | ExPASy | |expasy_zenodo| | 17 | +------------------------+-----------------------+ 18 | | HIPPIE | |hippie_zenodo| | 19 | +------------------------+-----------------------+ 20 | | HSDN | |hsdn_zenodo| | 21 | +------------------------+-----------------------+ 22 | | KEGG | |kegg_zenodo| | 23 | +------------------------+-----------------------+ 24 | | miRTarBase | |mirtarbase_zenodo| | 25 | +------------------------+-----------------------+ 26 | | MSigDB | |msig_zenodo| | 27 | +------------------------+-----------------------+ 28 | | Reactome | |reactome_zenodo| | 29 | +------------------------+-----------------------+ 30 | | SIDER | |sider_zenodo| | 31 | +------------------------+-----------------------+ 32 | | InterPro | |interpro_zenodo| | 33 | +------------------------+-----------------------+ 34 | | WikiPathways | |wikipathways_zenodo| | 35 | +------------------------+-----------------------+ 36 | 37 | 38 | .. |interpro_zenodo| image:: https://zenodo.org/badge/98345182.svg 39 | :target: https://zenodo.org/badge/latestdoi/98345182 40 | :alt: InterPro Zenodo DOI 41 | 42 | .. |hsdn_zenodo| image:: https://zenodo.org/badge/158366852.svg 43 | :target: https://zenodo.org/badge/latestdoi/158366852 44 | :alt: HSDN Zenodo DOI 45 | 46 | .. |sider_zenodo| image:: https://zenodo.org/badge/129140922.svg 47 | :target: https://zenodo.org/badge/latestdoi/129140922 48 | :alt: SIDER Zenodo DOI 49 | 50 | .. |expasy_zenodo| image:: https://zenodo.org/badge/100023822.svg 51 | :target: https://zenodo.org/badge/latestdoi/100023822 52 | :alt: ExPASy Zenodo DOI 53 | 54 | .. |adeptus_zenodo| image:: https://zenodo.org/badge/158358036.svg 55 | :target: https://zenodo.org/badge/latestdoi/158358036 56 | :alt: ADEPTUS Zenodo DOI 57 | 58 | .. |kegg_zenodo| image:: https://zenodo.org/badge/105248163.svg 59 | :target: https://zenodo.org/badge/latestdoi/105248163 60 | :alt: KEGG Zenodo DOI 61 | 62 | .. |compath_zenodo| image:: https://zenodo.org/badge/132792765.svg 63 | :target: https://zenodo.org/badge/latestdoi/132792765 64 | 65 | .. |reactome_zenodo| image:: https://zenodo.org/badge/103138323.svg 66 | :target: https://zenodo.org/badge/latestdoi/103138323 67 | :alt: Reactome Zenodo DOI 68 | 69 | .. |wikipathways_zenodo| image:: https://zenodo.org/badge/118924155.svg 70 | :target: https://zenodo.org/badge/latestdoi/118924155 71 | :alt: WikiPathways Zenodo DOI 72 | 73 | .. |msig_zenodo| image:: https://zenodo.org/badge/123948554.svg 74 | :target: https://zenodo.org/badge/latestdoi/123948554 75 | :alt: MSigDB Zenodo DOI 76 | 77 | .. |drugbank_zenodo| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1243727.svg 78 | :target: https://doi.org/10.5281/zenodo.1243727 79 | :alt: Zenodo DOI 80 | 81 | .. |hippie_zenodo| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1435930.svg 82 | :target: https://doi.org/10.5281/zenodo.1435930 83 | :alt: HIPPIE Zenodo DOI 84 | 85 | .. |mirtarbase_zenodo| image:: https://zenodo.org/badge/95350968.svg 86 | :target: https://zenodo.org/badge/latestdoi/95350968 87 | :alt: Zenodo DOI 88 | -------------------------------------------------------------------------------- /docs/source/cli/inference.rst: -------------------------------------------------------------------------------- 1 | Perform Inference 2 | ================= 3 | Starting the Prediction Pipeline 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | .. code-block:: sh 6 | 7 | biokeen predict -m /path/to/model/directory -d /path/to/data/directory 8 | 9 | where the value for the argument **-m** is the directory containing the model, in more detail following files must be 10 | contained in the directory: 11 | 12 | * configuration.json 13 | * entities_to_embeddings.json 14 | * relations_to_embeddings.json 15 | * trained_model.pkl 16 | 17 | These files are created automatically created when an experiment is configured through the CLI. 18 | 19 | The value for the argument **-d** is the directory containing the data for which inference should be applied, and it 20 | needs to contain following files: 21 | 22 | * entities.tsv 23 | * relations.tsv 24 | 25 | where *entities.tsv* contains all entities of interest, and relations.tsv all relations. PyKEEN will create all possible 26 | combinations of triples, and computes the predictions for them, and saves them in data directory in *predictions.tsv*. 27 | 28 | Optionally, a set of triples can be provided that should be exluded from the prediction, e.g. all the triples 29 | contained in the training set: 30 | 31 | .. code-block:: sh 32 | 33 | pykeen-predict -m /path/to/model/directory -d /path/to/data/directory -t /path/to/triples.tsv 34 | 35 | Hence, it is easily possible to compute plausibility scores forr all triples that are not contained in the training set. 36 | 37 | CLI Manual 38 | ~~~~~~~~~~ 39 | .. click:: biokeen.cli.cli:predict 40 | :prog: biokeen predict 41 | :show-nested: 42 | -------------------------------------------------------------------------------- /docs/source/cli/summarize.rst: -------------------------------------------------------------------------------- 1 | Summarize all Experiments 2 | ========================= 3 | Here, we describe how to summarize all experiments into a single csv-file. 4 | To get the summary, please provide the path to parent directory containing all the experiments as sub-directories, 5 | and the path to the output file: 6 | 7 | .. code-block:: python 8 | 9 | biokeen summarize -d /path/to/experiments/directory -o /path/to/output/file.csv 10 | -------------------------------------------------------------------------------- /docs/source/cli/train_and_evaluate.rst: -------------------------------------------------------------------------------- 1 | Train and Evaluate 2 | ================== 3 | Tutorial 4 | -------- 5 | Step 1: Start CLI 6 | ~~~~~~~~~~~~~~~~~ 7 | .. code-block:: sh 8 | 9 | biokeen start 10 | 11 | Step 2: Select data source 12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | .. image:: ../images/data_source.png 14 | 15 | Step 3: Select database 16 | ~~~~~~~~~~~~~~~~~~~~~~~ 17 | .. image:: ../images/select_database.png 18 | 19 | Step 4: Specify execution mode 20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 21 | .. image:: ../images/execution_mode.png 22 | 23 | Step 5: Select KGE model 24 | ~~~~~~~~~~~~~~~~~~~~~~~~ 25 | .. image:: ../images/select_model.png 26 | 27 | Step 6: Specify model dependent hyper-parameters 28 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29 | 30 | Step 7: Specify the batch-size 31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | .. image:: ../images/batch_size.png 33 | 34 | Step 8: Specify the number of training epochs 35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 36 | .. image:: ../images/epochs.png 37 | 38 | Step 9: Specify whether to evaluate the model 39 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 40 | .. image:: ../images/epochs.png 41 | 42 | Step 10: Specify whether to evaluate the model 43 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 44 | .. image:: ../images/epochs.png 45 | 46 | Step 11: Provide a random seed 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | .. image:: ../images/random_seed.png 49 | 50 | Step 12: Specify preferred device 51 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 52 | .. image:: ../images/preferred_device.png 53 | 54 | Step 13: Specify the path to the output directory 55 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 56 | .. image:: ../images/output_directory.png 57 | 58 | Reference 59 | --------- 60 | .. click:: biokeen.cli.cli:start 61 | :prog: biokeen 62 | :show-nested: 63 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import re 17 | import sys 18 | 19 | # -- Mockup PyTorch to exclude it while compiling the docs-------------------------------------------------------------- 20 | autodoc_mock_imports = ['pykeen'] 21 | 22 | sys.path.insert(0, os.path.abspath('../../src')) 23 | 24 | # -- Project information ----------------------------------------------------- 25 | 26 | project = 'BioKEEN' 27 | copyright = '2018, Mehdi Ali, Charles Tapley Hoyt, and Daniel Domingo-Fernández' 28 | author = 'Mehdi Ali, Charles Tapley Hoyt, and Daniel Domingo-Fernández' 29 | 30 | # The full version, including alpha/beta/rc tags. 31 | release = '0.0.15-dev' 32 | 33 | # The short X.Y version. 34 | parsed_version = re.match( 35 | '(?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?', 36 | release 37 | ) 38 | version = parsed_version.expand('\g.\g.\g') 39 | 40 | if parsed_version.group('release'): 41 | tags.add('prerelease') 42 | 43 | # -- General configuration --------------------------------------------------- 44 | 45 | # If your documentation needs a minimal Sphinx version, state it here. 46 | # 47 | # needs_sphinx = '1.0' 48 | 49 | # Add any Sphinx extension module names here, as strings. They can be 50 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 51 | # ones. 52 | extensions = [ 53 | 'sphinx.ext.autodoc', 54 | 'sphinx.ext.intersphinx', 55 | 'sphinx.ext.coverage', 56 | 'sphinx.ext.viewcode', 57 | 'sphinx_autodoc_typehints', 58 | 'sphinx_click.ext', 59 | ] 60 | 61 | # Add any paths that contain templates here, relative to this directory. 62 | # templates_path = ['_templates'] 63 | 64 | # The suffix(es) of source filenames. 65 | # You can specify multiple suffix as a list of string: 66 | # 67 | # source_suffix = ['.rst', '.md'] 68 | source_suffix = '.rst' 69 | 70 | # The master toctree document. 71 | master_doc = 'index' 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | # 76 | # This is also used if you do content translation via gettext catalogs. 77 | # Usually you set "language" from the command line for these cases. 78 | language = None 79 | 80 | # List of patterns, relative to source directory, that match files and 81 | # directories to ignore when looking for source files. 82 | # This pattern also affects html_static_path and html_extra_path. 83 | exclude_patterns = [] 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | pygments_style = None 87 | 88 | # -- Options for HTML output ------------------------------------------------- 89 | 90 | # The theme to use for HTML and HTML Help pages. See the documentation for 91 | # a list of builtin themes. 92 | # 93 | html_theme = 'sphinx_rtd_theme' 94 | 95 | # Theme options are theme-specific and customize the look and feel of a theme 96 | # further. For a list of options available for each theme, see the 97 | # documentation. 98 | # 99 | # html_theme_options = {} 100 | 101 | # Add any paths that contain custom static files (such as style sheets) here, 102 | # relative to this directory. They are copied after the builtin static files, 103 | # so a file named "default.css" will overwrite the builtin "default.css". 104 | # html_static_path = ['_static'] 105 | 106 | # Custom sidebar templates, must be a dictionary that maps document names 107 | # to template names. 108 | # 109 | # The default sidebars (for documents that don't match any pattern) are 110 | # defined by theme itself. Builtin themes are using these templates by 111 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 112 | # 'searchbox.html']``. 113 | # 114 | # html_sidebars = {} 115 | 116 | 117 | # -- Options for HTMLHelp output --------------------------------------------- 118 | 119 | # Output file base name for HTML help builder. 120 | htmlhelp_basename = 'BioKEENdoc' 121 | 122 | # -- Options for LaTeX output ------------------------------------------------ 123 | 124 | latex_elements = { 125 | # The paper size ('letterpaper' or 'a4paper'). 126 | # 127 | # 'papersize': 'letterpaper', 128 | 129 | # The font size ('10pt', '11pt' or '12pt'). 130 | # 131 | # 'pointsize': '10pt', 132 | 133 | # Additional stuff for the LaTeX preamble. 134 | # 135 | # 'preamble': '', 136 | 137 | # Latex figure (float) alignment 138 | # 139 | # 'figure_align': 'htbp', 140 | } 141 | 142 | # Grouping the document tree into LaTeX files. List of tuples 143 | # (source start file, target name, title, 144 | # author, documentclass [howto, manual, or own class]). 145 | latex_documents = [ 146 | (master_doc, 'BioKEEN.tex', 'BioKEEN Documentation', 147 | 'Mehdi Ali, Charles Tapley Hoyt, and Daniel Domingo-Fernández', 'manual'), 148 | ] 149 | 150 | # -- Options for manual page output ------------------------------------------ 151 | 152 | # One entry per manual page. List of tuples 153 | # (source start file, name, description, authors, manual section). 154 | man_pages = [ 155 | (master_doc, 'biokeen', 'BioKEEN Documentation', 156 | [author], 1) 157 | ] 158 | 159 | # -- Options for Texinfo output ---------------------------------------------- 160 | 161 | # Grouping the document tree into Texinfo files. List of tuples 162 | # (source start file, target name, title, author, 163 | # dir menu entry, description, category) 164 | texinfo_documents = [ 165 | (master_doc, 'BioKEEN', 'BioKEEN Documentation', 166 | author, 'BioKEEN', 'One line description of project.', 167 | 'Miscellaneous'), 168 | ] 169 | 170 | # -- Options for Epub output ------------------------------------------------- 171 | 172 | # Bibliographic Dublin Core info. 173 | epub_title = project 174 | 175 | # The unique identifier of the text. This can be a ISBN number 176 | # or the project homepage. 177 | # 178 | # epub_identifier = '' 179 | 180 | # A unique identification for the text. 181 | # 182 | # epub_uid = '' 183 | 184 | # A list of files that should not be packed into the epub file. 185 | epub_exclude_files = ['search.html'] 186 | 187 | # -- Extension configuration ------------------------------------------------- 188 | 189 | # -- Options for intersphinx extension --------------------------------------- 190 | 191 | # Example configuration for intersphinx: refer to the Python standard library. 192 | intersphinx_mapping = { 193 | 'https://docs.python.org/3': None, 194 | } 195 | -------------------------------------------------------------------------------- /docs/source/convert.rst: -------------------------------------------------------------------------------- 1 | Handling BEL 2 | ============ 3 | .. autofunction:: biokeen.convert.to_pykeen_path 4 | .. autofunction:: biokeen.convert.to_pykeen_df 5 | -------------------------------------------------------------------------------- /docs/source/hyper_parameter_optimization.rst: -------------------------------------------------------------------------------- 1 | Apply a Hyper-Parameter Optimization 2 | ==================================== 3 | 4 | Here, we describe how to define an experiment that should perform a hyper-parameter optimization mode. 5 | 6 | 7 | Configure your experiment 8 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 9 | To run experiments programmatically, the core software library PyKEEN should be used. To run PyKEEN in 10 | hyper-parameter optimization (HPO) mode, please set **execution_mode** to **HPO_mode**.In HPO mode several values 11 | can be provided for the hyper-parameters from which different settings will be tested based 12 | on the hyper-parameter optimization algorithm. The possible values for a single hyper-parameter need to be provided as 13 | a list. The **maximum_number_of_hpo_iters** defines how many HPO iterations should be performed. 14 | 15 | .. code-block:: python 16 | 17 | config = dict( 18 | training_set_path = 'data/corpora/compath.tsv', 19 | test_set_ratio = 0.1, 20 | execution_mode = 'HPO_mode', 21 | kg_embedding_model_name = 'TransE', 22 | embedding_dim = [50,100,150] 23 | normalization_of_entities = 2, # corresponds to L2 24 | scoring_function = [1,2], # corresponds to L1 25 | margin_loss = [1,1.5,2], 26 | learning_rate = [0.1,0.01], 27 | batch_size = [32,128], 28 | num_epochs = 1000, 29 | maximum_number_of_hpo_iters = 5, 30 | filter_negative_triples = True, 31 | random_seed = 2, 32 | preferred_device = 'cpu', 33 | ) 34 | 35 | 36 | Run your experiment 37 | ~~~~~~~~~~~~~~~~~~~ 38 | The experiment will be started with the *run* function, and in the output directory the exported results will be saved. 39 | 40 | .. code-block:: python 41 | 42 | results = pykeen.run( 43 | config=config, 44 | output_directory=output_directory, 45 | ) 46 | 47 | Access your results 48 | ~~~~~~~~~~~~~~~~~~~ 49 | Show all keys contained in ``results``: 50 | 51 | .. code-block:: python 52 | 53 | print('Keys:', *sorted(results.results.keys()), sep='\n ') 54 | 55 | 56 | Access trained KGE model 57 | ~~~~~~~~~~~~~~~~~~~~~~~~ 58 | .. code-block:: python 59 | 60 | results.results['trained_model'] 61 | 62 | Access the losses 63 | ~~~~~~~~~~~~~~~~~~ 64 | 65 | .. code-block:: python 66 | 67 | results.results['losses'] 68 | 69 | Access evaluation results 70 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 71 | 72 | .. code-block:: python 73 | 74 | results.results['eval_summary'] 75 | 76 | -------------------------------------------------------------------------------- /docs/source/images/batch_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/batch_size.png -------------------------------------------------------------------------------- /docs/source/images/data_source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/data_source.png -------------------------------------------------------------------------------- /docs/source/images/epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/epochs.png -------------------------------------------------------------------------------- /docs/source/images/execution_mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/execution_mode.png -------------------------------------------------------------------------------- /docs/source/images/output_directory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/output_directory.png -------------------------------------------------------------------------------- /docs/source/images/preferred_device.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/preferred_device.png -------------------------------------------------------------------------------- /docs/source/images/provide_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/provide_dataset.png -------------------------------------------------------------------------------- /docs/source/images/random_seed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/random_seed.png -------------------------------------------------------------------------------- /docs/source/images/select_database.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/select_database.png -------------------------------------------------------------------------------- /docs/source/images/select_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmartDataAnalytics/BioKEEN/9d401e7a22f5ae5fe3aa57e4bf3f8bc1dfa812bc/docs/source/images/select_model.png -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | BioKEEN 2 | ======= 3 | BioKEEN (Biological KnowlEdge EmbeddiNgs) is a package for training and evaluating **biological** knowledge graph 4 | embeddings built on `PyKEEN `_. Within BioKEEN several biomedical 5 | databases are directly accessible for training and evaluating biological knowledge graph embeddings 6 | (see :ref:`bio2bel_repositories`). 7 | 8 | Because we use PyKEEN as the core underlying framework, currently, implementations of 10 9 | knowledge graph emebddings models are avaialble for BioKEEN. Furthermore, it can be run in training mode in 10 | which users provide their own set of hyper-parameter values, or in hyper-parameter optimization mode to find suitable 11 | hyper-parameter values from set of user defined values. BioKEEN can also be run without having experience in programing 12 | by using its interactive command line interface that can be started with the command "biokeen" from a terminal. 13 | 14 | Installation is as easy as getting the code from `PyPI `_ with 15 | :code:`python3 -m pip install biokeen`. 16 | 17 | Citation 18 | -------- 19 | If you use BioKEEN in your work, please cite [1]_: 20 | 21 | .. [1] Ali, M., *et al.* (2018). `BioKEEN: A library for learning and evaluating biological knowledge graph embeddings 22 | `_. 23 | 24 | .. toctree:: 25 | :maxdepth: 2 26 | :caption: Getting Started 27 | :name: start 28 | 29 | installation 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: CLI Usage 34 | :name: cli 35 | 36 | cli/train_and_evaluate 37 | cli/inference 38 | cli/summarize 39 | 40 | .. toctree:: 41 | :maxdepth: 2 42 | :caption: Running PyKEEN programmatically 43 | :name: prog 44 | 45 | train_and_evaluate 46 | hyper_parameter_optimization 47 | 48 | 49 | .. toctree:: 50 | :maxdepth: 2 51 | :caption: Reference 52 | :name: reference 53 | 54 | convert 55 | 56 | .. toctree:: 57 | :maxdepth: 2 58 | :caption: Biological Databases 59 | :name: bio2bel_repositories 60 | 61 | bio2bel_repositories 62 | 63 | Indices and tables 64 | ================== 65 | * :ref:`genindex` 66 | * :ref:`modindex` 67 | * :ref:`search` 68 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | There are several ways to download and install BioKEEN. 4 | 5 | .. warning:: BioKEEN requires Python 3.6+ 6 | 7 | Easiest 8 | ~~~~~~~ 9 | Download the latest stable code from `PyPI `_ with: 10 | 11 | .. code-block:: sh 12 | 13 | $ pip install biokeen 14 | 15 | Get the Latest 16 | ~~~~~~~~~~~~~~~ 17 | Download the most recent code from `GitHub `_ with: 18 | 19 | .. code-block:: sh 20 | 21 | $ pip install git+https://github.com/SmartDataAnalytics/BioKEEN.git 22 | -------------------------------------------------------------------------------- /docs/source/train_and_evaluate.rst: -------------------------------------------------------------------------------- 1 | Train and Evaluate 2 | ================== 3 | Here, we explain how to define and run experiments programmatically. This should be done using PyKEEN. 4 | 5 | Configure your experiment 6 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 7 | To programmatically train (and evaluate) a KGE model, a python dictionary must be created specifying the experiment: 8 | 9 | .. code-block:: python 10 | 11 | config = dict( 12 | training_set_path = 'data/corpora/fb15k/compath.tsv', 13 | test_set_ratio = 0.1, 14 | execution_mode = 'Training_mode', 15 | kg_embedding_model_name = 'TransE', 16 | embedding_dim = 50, 17 | normalization_of_entities = 2, # corresponds to L2 18 | scoring_function = 1, # corresponds to L1 19 | margin_loss = 1, 20 | learning_rate = 0.01, 21 | batch_size = 32, 22 | num_epochs = 1000, 23 | filter_negative_triples = True, 24 | random_seed = 2, 25 | preferred_device = 'cpu', 26 | ) 27 | 28 | Run your experiment 29 | ~~~~~~~~~~~~~~~~~~~ 30 | .. code-block:: python 31 | 32 | results = pykeen.run( 33 | config=config, 34 | output_directory=output_directory, 35 | ) 36 | 37 | Access your results 38 | ~~~~~~~~~~~~~~~~~~~ 39 | Show all keys contained in ``results``: 40 | 41 | .. code-block:: python 42 | 43 | print('Keys:', *sorted(results.results.keys()), sep='\n ') 44 | 45 | 46 | Access trained KGE model 47 | ~~~~~~~~~~~~~~~~~~~~~~~~ 48 | .. code-block:: python 49 | 50 | results.results['trained_model'] 51 | 52 | Access the losses 53 | ~~~~~~~~~~~~~~~~~ 54 | .. code-block:: python 55 | 56 | results.results['losses'] 57 | 58 | Access evaluation results 59 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 60 | .. code-block:: python 61 | 62 | results.results['eval_summary'] 63 | 64 | -------------------------------------------------------------------------------- /notebooks/Case Scenario ADEPTUS and HSDN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using BioKEEN to Train and Evaluate a KGE Model on ADEPTUS" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import json\n", 17 | "import logging\n", 18 | "import os\n", 19 | "import sys\n", 20 | "import time\n", 21 | "import warnings\n", 22 | "\n", 23 | "import matplotlib\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import numpy as np\n", 26 | "\n", 27 | "import biokeen\n", 28 | "import pykeen" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "warnings.filterwarnings('ignore', category=UserWarning)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "logging.basicConfig(level=logging.INFO)\n", 47 | "logging.getLogger('biokeen').setLevel(logging.INFO)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "3.7.0 (default, Jul 23 2018, 20:22:55) \n", 60 | "[Clang 9.1.0 (clang-902.0.39.2)]\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "print(sys.version)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 5, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "Sun Jan 20 21:23:06 2019\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "print(time.asctime())" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "PyKEEN Version: 0.0.19-dev\n", 95 | "BioKEEN Version: 0.0.12-dev\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "print(f'PyKEEN Version: {pykeen.constants.VERSION}')\n", 101 | "print(f'BioKEEN Version: {biokeen.constants.VERSION}')" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 7, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "output_directory = os.path.join(\n", 111 | " os.path.expanduser('~'), \n", 112 | " 'Desktop', \n", 113 | " 'biokeen_test'\n", 114 | ")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Step 1: Configure your experiment" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "config = dict(\n", 131 | " training_set_path = [\n", 132 | " 'bio2bel:adeptus', \n", 133 | " 'bio2bel:hsdn',\n", 134 | " ],\n", 135 | " execution_mode = 'Training_mode', \n", 136 | " kg_embedding_model_name = 'TransE',\n", 137 | " embedding_dim = 50, \n", 138 | " normalization_of_entities = 2, # corresponds to L2\n", 139 | " scoring_function = 1, # corresponds to L1\n", 140 | " margin_loss = 1,\n", 141 | " learning_rate = 0.01,\n", 142 | " batch_size = 128,\n", 143 | " num_epochs = 1000, \n", 144 | " test_set_ratio = 0.1,\n", 145 | " filter_negative_triples = True,\n", 146 | " random_seed = 2,\n", 147 | " preferred_device = 'cpu',\n", 148 | ")" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "## Step 2: Run BioKEEN to Train and Evaluate the Model" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 9, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stderr", 165 | "output_type": "stream", 166 | "text": [ 167 | "INFO:pykeen.utilities.pipeline:-------------Train KG Embeddings-------------\n", 168 | "Training epoch: 100%|██████████| 1000/1000 [10:19<00:00, 1.61it/s]\n", 169 | "INFO:pykeen.utilities.pipeline:-------------Start Evaluation-------------\n", 170 | "INFO:pykeen.utilities.evaluation_utils.metrics_computations:Evaluation took 121.18s seconds\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "results = pykeen.run(\n", 176 | " config=config,\n", 177 | " output_directory=output_directory,\n", 178 | ")" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "## Step 3: Show Exported Results" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "### 3.1: Show Trained Model" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 10, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "TransE(\n", 204 | " (criterion): MarginRankingLoss()\n", 205 | " (entity_embeddings): Embedding(4127, 50)\n", 206 | " (relation_embeddings): Embedding(3, 50)\n", 207 | ")" 208 | ] 209 | }, 210 | "execution_count": 10, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "results.results['trained_model']" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "### 3.2: Plot losses" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 11, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "losses = results.results['losses']" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 12, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "image/png": "\n", 243 | "text/plain": [ 244 | "
" 245 | ] 246 | }, 247 | "metadata": {}, 248 | "output_type": "display_data" 249 | } 250 | ], 251 | "source": [ 252 | "epochs = np.arange(len(losses))\n", 253 | "plt.title(r'Loss Per Epoch')\n", 254 | "plt.xlabel('epoch')\n", 255 | "plt.ylabel('loss')\n", 256 | "plt.plot(epochs, losses)\n", 257 | "plt.show()" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 13, 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "name": "stdout", 267 | "output_type": "stream", 268 | "text": [ 269 | "{\n", 270 | " \"mean_rank\": 315.866030283081,\n", 271 | " \"hits@k\": {\n", 272 | " \"1\": 0.10368663594470046,\n", 273 | " \"3\": 0.21329822251481237,\n", 274 | " \"5\": 0.293614219881501,\n", 275 | " \"10\": 0.4206714944042133\n", 276 | " }\n", 277 | "}\n" 278 | ] 279 | } 280 | ], 281 | "source": [ 282 | "print(json.dumps(results.results['eval_summary'], indent=2))" 283 | ] 284 | } 285 | ], 286 | "metadata": { 287 | "kernelspec": { 288 | "display_name": "Python 3", 289 | "language": "python", 290 | "name": "python3" 291 | }, 292 | "language_info": { 293 | "codemirror_mode": { 294 | "name": "ipython", 295 | "version": 3 296 | }, 297 | "file_extension": ".py", 298 | "mimetype": "text/x-python", 299 | "name": "python", 300 | "nbconvert_exporter": "python", 301 | "pygments_lexer": "ipython3", 302 | "version": "3.7.0" 303 | } 304 | }, 305 | "nbformat": 4, 306 | "nbformat_minor": 2 307 | } 308 | -------------------------------------------------------------------------------- /notebooks/Case Scenario ADEPTUS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using BioKEEN to Train and Evaluate a KGE Model on ADEPTUS" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import json\n", 17 | "import logging\n", 18 | "import os\n", 19 | "import sys\n", 20 | "import time\n", 21 | "import warnings\n", 22 | "\n", 23 | "import matplotlib\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import numpy as np\n", 26 | "\n", 27 | "import biokeen\n", 28 | "import pykeen" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "warnings.filterwarnings('ignore', category=UserWarning)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "logging.basicConfig(level=logging.INFO)\n", 47 | "logging.getLogger('biokeen').setLevel(logging.INFO)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "3.7.0 (default, Jul 23 2018, 20:22:55) \n", 60 | "[Clang 9.1.0 (clang-902.0.39.2)]\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "print(sys.version)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 5, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "Sun Jan 20 21:07:16 2019\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "print(time.asctime())" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "PyKEEN Version: 0.0.19-dev\n", 95 | "BioKEEN Version: 0.0.12-dev\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "print(f'PyKEEN Version: {pykeen.constants.VERSION}')\n", 101 | "print(f'BioKEEN Version: {biokeen.constants.VERSION}')" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 7, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "output_directory = os.path.join(\n", 111 | " os.path.expanduser('~'), \n", 112 | " 'Desktop', \n", 113 | " 'biokeen_test'\n", 114 | ")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Step 1: Configure your experiment" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "config = dict(\n", 131 | " training_set_path = 'bio2bel:adeptus',\n", 132 | " execution_mode = 'Training_mode', \n", 133 | " kg_embedding_model_name = 'TransE',\n", 134 | " embedding_dim = 50, \n", 135 | " normalization_of_entities = 2, # corresponds to L2\n", 136 | " scoring_function = 1, # corresponds to L1\n", 137 | " margin_loss = 1,\n", 138 | " learning_rate = 0.01,\n", 139 | " batch_size = 128,\n", 140 | " num_epochs = 1000, \n", 141 | " test_set_ratio = 0.1,\n", 142 | " filter_negative_triples = True,\n", 143 | " random_seed = 2,\n", 144 | " preferred_device = 'cpu',\n", 145 | ")" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Step 2: Run BioKEEN to Train and Evaluate the Model" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 10, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stderr", 162 | "output_type": "stream", 163 | "text": [ 164 | "INFO:pykeen.utilities.pipeline:-------------Train KG Embeddings-------------\n", 165 | "Training epoch: 100%|██████████| 1000/1000 [02:57<00:00, 5.62it/s]\n", 166 | "INFO:pykeen.utilities.pipeline:-------------Start Evaluation-------------\n", 167 | "INFO:pykeen.utilities.evaluation_utils.metrics_computations:Evaluation took 32.19s seconds\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "results = pykeen.run(\n", 173 | " config=config,\n", 174 | " output_directory=output_directory,\n", 175 | ")" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 11, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "Keys:\n", 188 | " entity_to_embedding\n", 189 | " entity_to_id\n", 190 | " eval_summary\n", 191 | " final_configuration\n", 192 | " losses\n", 193 | " relation_to_embedding\n", 194 | " relation_to_id\n", 195 | " trained_model\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "print('Keys:', *sorted(results.results.keys()), sep='\\n ')" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "## Step 3: Show Exported Results" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "### 3.1: Show Trained Model" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 12, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "TransE(\n", 226 | " (criterion): MarginRankingLoss()\n", 227 | " (entity_embeddings): Embedding(3696, 50)\n", 228 | " (relation_embeddings): Embedding(2, 50)\n", 229 | ")" 230 | ] 231 | }, 232 | "execution_count": 12, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "results.results['trained_model']" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "### 3.2: Plot losses" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 13, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "losses = results.results['losses']" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 14, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XecVOW9x/HPb2cbZVk6wlKWKgoKCFLsRiWKURL1qiRq7CY3XaNBE0u8JjExakxijCZ6LVG8EjUSMUHBrgFZUFRQelukLLCUBbb/7h9zdpjthZ1t832/XvNy5pwzZ56zR+Y7z/Oc8zzm7oiIiAAkNHcBRESk5VAoiIhIhEJBREQiFAoiIhKhUBARkQiFgoiIRCgURNo4M8s0MzezxOYui7R8CgVpscxsnZmd3gyfe7mZlZhZnpntMbOPzOwrjbh/N7N9wf7LHjc11v5FDoV+OYhU7T/ufoKZJQDfAZ4zswx3z63rDsws0d2Lq1k9yt1XNUpJRRqRagrSKpnZNWa2ysx2mtksM+sTLDczu9/MtgW/8j8xs5HBuilmtszM9prZJjP7cW2f4+6lwGNAO2BwsJ+vBLWHXWb2vpkdHVWudWb2EzP7GNhX3yYbM7vDzP5uZv8XlHOxmY2KWn+Emb0ZfPZSMzs3al07M7vXzNab2W4ze9fM2kXt/htmtsHMtpvZT+tTLokfCgVpdczsS8CvgAuB3sB64Nlg9WTgJGAYkB5ssyNY9yhwnbunASOB1+vwWYnA1UAesNLMxhAOieuAbsDDwCwzS4l62zTgbKBzDTWFmkwFZgJdgWeAf5hZkpklAf8EXgV6At8Dnjazw4P3/RYYCxwXvPcmoDRqvycAhwOnAbeZ2RENKJu0cQoFaY2+ATzm7ovdvQC4GZhkZplAEZAGDAfM3T9z983B+4qAI82sk7vnuvviGj5jopntArYQ/pL/mrvvBq4FHnb3Be5e4u5PAAXAxKj3/t7dN7r7gRr2vzj4tV/2+HLUukXu/nd3LwLuA1KD/U8EOgJ3u3uhu78OvAxMC5q5rgR+4O6bgrK9H/x9yvzc3Q+4+xJgCTAKkQoUCtIa9SFcOwDA3fMI1wYygi/KPwIPAtvM7BEz6xRsej4wBVhvZm+Z2aQaPmO+u3d29+7uPtHd5wbLBwA3RH+hA/2CMpXZWIdjOCbYf9ljTlXvD5qvsoP99wE2BsvKrAcygO6Ew2N1DZ+5Jer5fsIBI1KOQkFaoy8IfzkDYGYdCDflbAJw99+7+1jgSMLNSDcGyxe6+1TCTS//AJ5rwGdvBH5R4Qu9vbvPiNrmUIce7lf2JKgB9CV8zF8A/YJlZfoTPu7tQD5Bv4dIQykUpKVLMrPUqEciMAO4wsxGB235vwQWuPs6MzvWzCYE7e/7CH9RlppZspl9w8zSg2aZPZRvb6+rvwDfCj7DzKyDmZ1tZmmNdLwAY83svOBYf0i4eWo+sIDwL/ybgj6GU4BzgGejOsTvM7M+ZhYys0kV+jpEaqVQkJbuFeBA1OOOoCnnVuB5YDPhX8cXB9t3IvzFnUu4aWUHcE+w7lJgnZntAb5FuG+iXtw9C7iGcBNVLrAKuLwBx7Wkwn0Kv4ta9xJwUbD/S4Hz3L3I3QsJh8BZhGsGfwIuc/fPg/f9GPgEWAjsBH6N/o1LPZkm2RFpOczsDmCIu1/S3GWR+KRfESIiEqFQEBGRCDUfiYhIhGoKIiIS0eoGxOvevbtnZmY2dzFERFqVRYsWbXf3HrVt1+pCITMzk6ysrOYuhohIq2Jm62vfSs1HIiISRaEgIiIRCgUREYlQKIiISIRCQUREIhQKIiISoVAQEZGIuAmFhet2cu+ryykqacgQ+iIi8SFuQmHx+lz+8PoqCosVCiIi1YmbUAglGAAlGgBQRKRacRMKiWWhUKJQEBGpTsxCwcweM7NtZvZpNevNzH5vZqvM7GMzOyZWZQEIhcKHWlyqUBARqU4sawqPA2fWsP4sYGjwuBZ4KIZlOVhTUCiIiFQrZqHg7m8Tnjy8OlOBJz1sPtDZzHrHqjxlfQrFpepoFhGpTnP2KWQAG6NeZwfLKjGza80sy8yycnJyGvRhqimIiNSuVXQ0u/sj7j7O3cf16FHrHBFVOlhTUCiIiFSnOUNhE9Av6nXfYFlMJCaED1U1BRGR6jVnKMwCLguuQpoI7Hb3zbH6sEhNQZekiohUK2bTcZrZDOAUoLuZZQO3A0kA7v5n4BVgCrAK2A9cEauygPoURETqImah4O7TalnvwHdi9fkVhUK6+khEpDatoqO5MaimICJSu7gJBV19JCJSu7gJBV19JCJSu7gJBdUURERqF3ehUKKOZhGRasVNKCTqPgURkVrFTSiEdPWRiEit4iYUEjXzmohIreImFJKCSXY0R7OISPXiJhRSk0IA5BcpFEREqhNHoRA+1PyikmYuiYhIyxVHoRDUFIoVCiIi1YmbUEhJLKspqPlIRKQ6cRMKZkZKYgIFaj4SEalW3IQChJuQ1KcgIlK9OAuFBDUfiYjUIM5CIaSOZhGRGsRVKCQmmEZJFRGpQVyFQijBKNGAeCIi1YqrUEgw09hHIiI1iKtQSAyZRkkVEalBXIVCyBQKIiI1ia9QSDBK1XwkIlKtuAsFzbwmIlK9uAoFdTSLiNQsrkJBHc0iIjWLq1BIUEeziEiN4ioUEtXRLCJSo7gKBXU0i4jULK5CIcFUUxARqUlchUJiSAPiiYjUJK5CIcGMUoWCiEi14ioUQgm6T0FEpCZxFwrqaBYRqV58hYI6mkVEahRXoaA7mkVEahZXoaA7mkVEahZXoaCOZhGRmsVfKKijWUSkWjENBTM708yWm9kqM5texfr+ZvaGmX1oZh+b2ZRYlicxQTeviYjUJGahYGYh4EHgLOBIYJqZHVlhs58Bz7n7GOBi4E+xKg9AWmoSB4pKKCopjeXHiIi0WrGsKYwHVrn7GncvBJ4FplbYxoFOwfN04IsYlocuHZIByN1fGMuPERFptWIZChnAxqjX2cGyaHcAl5hZNvAK8L2qdmRm15pZlpll5eTkNLhAXduHQ2HnPoWCiEhVmrujeRrwuLv3BaYAT5lZpTK5+yPuPs7dx/Xo0aPBH9alfRKgUBARqU4sQ2ET0C/qdd9gWbSrgOcA3P0/QCrQPVYFap+SCMCBwpJYfYSISKsWy1BYCAw1s4Fmlky4I3lWhW02AKcBmNkRhEOh4e1DtWiXFAIgv0gdzSIiVYlZKLh7MfBdYA7wGeGrjJaa2Z1mdm6w2Q3ANWa2BJgBXO4eu7vLykLhQJFqCiIiVUmM5c7d/RXCHcjRy26Ler4MOD6WZYiWmhzOQIWCiEjVmrujuUlFmo/UpyAiUqW4CoVUNR+JiNQorkIhKZRAYoKRr1AQEalSXIUChJuQVFMQEala3IVCanJINQURkWrEXSi0Swrp5jURkWrEZyiopiAiUqW4C4XUpATd0SwiUo04DAXVFEREqhN3odBOHc0iItWKv1BQR7OISLXiMhT2KxRERKoUd6GQ3j6J3QeKmrsYIiItUtyFQpf2yeQVFFNYrCuQREQqir9Q6BCep3nXfk3JKSJSUfyFQjBPc+5+NSGJiFQUh6EQrins3KeagohIRXEbCmo+EhGpLP5CoYOaj0REqhN/oRDUFHJVUxARqSTuQiE1KUS7pBC56lMQEakk7kIBwlcgqflIRKSyuAyFzu2T1XwkIlKFuAyFLh2SFAoiIlWIy1DokJzI/gINiiciUlF8hkJKIvuLipu7GCIiLU5chkK75JBqCiIiVYjLUAiZsWNfIZ9t3tPcRRERaVHiMhSWBWFw+0tLm7kkIiItS1yGQlFJeC6FlKS4PHwRkWrF5bfiuAFdAejeMaWZSyIi0rLEZShMP2s4AF2DCXdERCQsLkMhOTF82I++uxZ3b+bSiIi0HHUKBTP7gZl1srBHzWyxmU2OdeGaQnGpQkFEpExdawpXuvseYDLQBbgUuDtmpWpC+UW6X0FEpExdQ8GC/04BnnL3pVHLWqULxvYFIGtdbjOXRESk5ahrKCwys1cJh8IcM0sDSmNXrNgbnxm+AumKxxc2c0lERFqOxDpudxUwGljj7vvNrCtwReyKFXu6R0FEpLK6fjNOApa7+y4zuwT4GbA7dsWKvcQEhYKISEV1/WZ8CNhvZqOAG4DVwJO1vcnMzjSz5Wa2ysymV7PNhWa2zMyWmtkzdS75ISrRpagiIpXUtfmo2N3dzKYCf3T3R83sqpreYGYh4EHgDCAbWGhms9x9WdQ2Q4GbgePdPdfMejbsMOqvpLRVd4mIiMREXWsKe83sZsKXos42swQgqZb3jAdWufsady8EngWmVtjmGuBBd88FcPdtdS/6oSkqUU1BRKSiuobCRUAB4fsVtgB9gXtqeU8GsDHqdXawLNowYJiZvWdm883szDqW55AdP6R7U32UiEirUadQCILgaSDdzL4C5Lt7rX0KdZAIDAVOAaYBfzGzzhU3MrNrzSzLzLJycnIa4WMho3M7fnj6UMzgQKFuYBMRgboPc3Eh8AHwX8CFwAIzu6CWt20C+kW97hssi5YNzHL3IndfC6wgHBLluPsj7j7O3cf16NGjLkWuk+GHpeEOi9brBjYREah789FPgWPd/Zvufhnh/oJba3nPQmComQ00s2TgYmBWhW3+QbiWgJl1J9yctKaOZTpkJw/rSVLIeGtFk3VliIi0aHUNhYQKncA7anuvuxcD3wXmAJ8Bz7n7UjO708zODTabA+wws2XAG8CN7r6jXkdwCNolh+jWIYVd+4ua6iNFRFq0ul6S+m8zmwPMCF5fBLxS25vc/ZWK27n7bVHPHbg+eDQLM5i5KJuJg7pxzIAuDOzeobmKIiLS7OoUCu5+o5mdDxwfLHrE3V+MXbGazubd+QDcMHMJAEtun0x6u9quthURaZvqWlPA3Z8Hno9hWVqEv7y9hh9/+fDmLoaISLOosV/AzPaa2Z4qHnvNbE9TFTKW5t1wMqlRg+PtLyxh6578ZiyRiEjzqa2zOM3dO1XxSHP3Tk1VyFga3KMjEwZ2i7x+7L21TPjlPAWDiMQlDRVajZ37Cpu7CCIiTU6hAJRWMWKqBlEVkXikUAC254VrBRY1wegBzd0sInFIoQCM7pcOwNj+XSLLNB6SiMQjhQJw+zkjeO1HJ/GlIw5O57C/sLgZSyQi0jwUCkBqUoihvdKYdmz/yLLH31/XfAUSEWkmCoUoXTok88zVEwB4f/UO8tWvICJxRqFQwXFDujOmf3hKh+15BZSWOn+Yt1KXqIpIXFAoVOG6kwYBcMKv3+DKJxZy72sruGPW0mYulYhI7CkUqtC5fXLk+ZvLwzO9zVryBTOzNlb3FhGRNkGhUIW+XdpVufzGv3/cxCUREWlaCoUq9O3SnpOHVT/t59srciguKW3CEomINA2FQjWeuHJ8lcszp8/mssc+4JevfN7EJRIRiT2FQh2kJFb+Mz323lo27tzfDKUREYkdhUIN7j7vKABmf//EKtef+Js3cI2cJyJtSJ1nXotHF4/vz8Xj+9e4zcfZuxnVr3MTlUhEJLZUU6ij2d8/gbEDutC/a/tyy6c++B6Z02dz18vLIsuWb9nL7v1FTV1EEZFDpppCHY3ok87z3z6O/KISXvxwEwvX7uSFDzdF1v/13bV8eeRhfPeZxWzdU8BRGen883snNGOJRUTqTzWFekpNCjFtfH9uO+fISuseenM1W/cUAPDJpt1NXTQRkUOmUGigzu2TeeDi0aSlHqxsvf75tmYskYjIoVMoHIKpozN4+8ZTq11fdmXSwnU7uXHmEj7dtJvf/PtzTeAjIi2W+hQOUZcOydWuO+7u17npzMP50f8tAWDmomwAOrdP4tqTBjdJ+URE6kM1hUbw/LePizz/n6kjIs83786PBEK04lLd2yAiLZNCoREM69Ux8vzSSZl8csfkGrdPDh38s//8n0tZsGZHzMomIlIfCoVG0C4pBEC3oCkpLTWJmd+aFFnfIy2F57998PVf31nL+6u288yCDfzve+u46JH5TVtgEZFqqE+hESSGEvjN+UczYVDXyLJjMw8+n3fDyXRKTeJ/po7g1peWsmVPPl//64Jy+7j6iSwKiksY2L0Dt58zAndnztKtfLQxl8RQAj85c3iTHY+IxC+FQiO58Nh+lZb95oKjefSdtaSlhP/Ml07K5NaXqp7Bbe5nWwF4Z+V2po7OYP6aHdwzZ3lkfa+0FC4/fmAMSi4icpC1tgHdxo0b51lZWc1djAb7+T+XktG5HZdMHEBqUohnP9jA9Bc+KbdNh+QQ+6q4bHXGNRMpLi3liN6d6N4xBYAXFmfTMy2VE4Z2b5Lyi0jrZGaL3H1crdspFJqXu7OvsISRt8/BDOp6Or4+oT/njcnggj//B4B1d58dw1KKSGunUGhlVm3Lo1NqIuN/Oa9B709JTOCBi0dz5sje5BeVkBRKIJRgjVxKEWmt6hoKuvqohRjSsyM9O6U2eBjuguJSvjfjQwCG3/pvbpxZ+f4IEZHaKBRamF98dSRfHtEr8vrVH50EwDmj+tT63qRQQmQIjegRXEVE6krNRy1U5vTZQLivoKC4hJTEEHfMWsrj76/jlinDKSgq5d7XVtS6n5nfmkRRcSkYjM/sSoIZv311OaEE44bJhwNw8j1vkJaayMvfq3qGORFp/erafKRLUluoK48fyKh+6QCkJIZvjrvj3BHccW54GI33Vm2v035ueG4JG4K5pL80vCfdOybzXFZ4DKbrzxjGjA82sn5H+bmmV23by8DuHdUnIRKHFAotVFXzNUQ7bnC3Ou2nLBCg8tDeG3ce4JYXy18Ou37HPk6/723++5TB3KQb5kTijvoUWikzY8ltk3nk0rEsv+tMJg2qW0hEO+meN8q9dne254UnCXpjeU6jlFNEWpeYhoKZnWlmy81slZlNr2G7883MzazW9i45KL19EpNHHEZKYog/XzoWgN/+1yjOPro3l00aUO/9rdyWx/urwoPz5ewtqLR+7rKtvPzxF4dWaBFp0WLWfGRmIeBB4AwgG1hoZrPcfVmF7dKAHwALKu9F6iq9XVLkBrYLxvYF4Mn/rK+03XPXTeLCh/9T5T4m3/925Pn2vIJIBzdAflEJVz8Z7uA//YhepAaDAIpI2xLLmsJ4YJW7r3H3QuBZYGoV2/0P8GsgP4ZliUv9u7bnjCMPXt76zDUTyOzWHoAu7ZMiy//49TFVvv++V1dw9RNZXPn4Qm576dPI8uG3/ptXl26p9nPfX7WdgTfPZkde5dqGiLRssexozgA2Rr3OBiZEb2BmxwD93H22md1Y3Y7M7FrgWoD+/fvHoKht09s3hacK/dZTi1i7fR/HDe5OflH4PobrTh5MRud2DO7RkYQKPw2+NLwnr3++jYffXlPtvv/96RbGDujC/XNX8J1Th9A7vR2zP97M/XNX0Ds9FXf4aOMuTjuiV7X7EJGWp9muPjKzBOA+4PLatnX3R4BHIHyfQmxL1vaU9TcApCaFqhwn6TfnH81Nz38MwMXH9qOopJR3VlZ/2esLH26K3CC3bU8BJw7tHhkBdkNwiWthcWmjHYOINI1YNh9tAqLHk+4bLCuTBowE3jSzdcBEYJY6m5vHhcf247ThPQFw4FfnHVXn9766bGu5IcELS8JhsGb7Pp6av56lX+zmP6t3RGopItJyxbKmsBAYamYDCYfBxcDXy1a6+24gMt6zmb0J/Njd2/7tyi1URpd2AHRITqRbh5TI8j9MG8Pzi7N5M+oy1aSQUVRSc6Utej4IgBOGdOehS44hLTXcn7Fi616ue2oRM781ic278lmSvYtLJtb/qikRaTwxCwV3Lzaz7wJzgBDwmLsvNbM7gSx3nxWrz5aGufmsIxiZkc7xQ7phZnxtTAbnjOrNl4b3YspRvRl8yytAeOiMwuJSZmZt5MJx/SrNIledd1dt56wH3iE79wAAZx/dm7Xb9zHurrmRbb4xoT8Hikr4Ylc+Q3p2rG5XIhIjGvtI6qy4pJScvAJ6p7crt3zy/W+xYmteuWVpqYnszS+u92d8dXQfsnMPkLU+lw9uOY2enVIPqcwiEqb5FKTJ7C8sJr+olKSQsSe/mI079/OnN1fz9oocLhjbl78vym7wvn94+lAmDOzGpY8u4JYpR3DlCZqSVKQhFArSrPYXFrMmZx99u7Rj9J2vATDru8fzu7krK43BVB/fPmUwl04cQJ/O5WsrT/1nHbe+tJQlt08mvV1S1W8WiWOaZEeaVfvkREZmpNO5fTKrfzmFN358Ckf37czdwVVNA7t3aNB+H3pzNcfd/ToPzF0JELmi6fH31wGwefeBQy+8SBzTKKkSc6EEi4RAj7QUpp81nCkjezNz0UZG9e3Mzv2F3PT38D0SQ3t25JKJA7h91tKadsn9c1cwqEcHvjfjQ/521QQSLDzM9+bd+XRMSaRvl/axPSiRNkrNR9Ls9uYXcc2TWfzqvKMj4XHpowvK3TyXlpLI3oK6d1zPuGYijtO5XTJb9+RzanAPxuqcPA7rlEqHFP0ekviiSXak1UhLTeLZayeVWzamX2feWbmdrxzdm+lnDSe/qIQ7Zi3jupMHcemjH9S6z2l/mV/u9bq7z6ak1Dnt3rcYN6ALz103iYfeWs1ry7biwD/++zjMNKmQiGoK0iItWp/L+Q+9z6s/OolhvdLKrfvj6yv57avhqUgzOrdj067a+xG+OWkAT0SNGpualEB+0cFhOF7+3gmMzEhvpNKLtDy6+kjarPyiEq5/7iOOH9KdiYO68eX73+ZfPziRM6KG/m6IE4Z050dnDGXsgK6NVFKRlkOhIHHnsXfXcufLy0hMMIpLG/7/9V8vG8fhh6XRr2v5zuqiklLWbd/H0Ao1lx15BXRMTYzMPSHSEumSVIk7Jw3rAcBjlx/Lny8ZS69OKXxpeE+evHI8yYnh/9Wnju7Db/9rVI37ufrJLE78zRv8YvYyMqfPJnP6bLbnFXDrPz7ljPvfZsvufEpLnZy9BRwoLGHsXXP56oPv80VUM9a2vfmapU5aJdUUJO7sPlDEZY99wF1TR3Jkn06RMZ0AThzavcohw3t1SiFnbwGlDg9cPJpZH33BvCpuwlt399n8ft5K7nst3Ofx4a1n0KVDcuwORqSOVFMQqUZ6uyRe+s7xHNU3nVCC8cEtp0XWPXHFeNbdfTbnjupT7j1b94QDAeAHz35UZSAAPPL26kggAFV2gs9ZuoXM6bOZv2ZHIxyNSONSKEjc69kplWeunsANZwwjISF8Weq9Fx5sYhp+WFp1b63kl698Xu71Sx9twt257aVPmXz/W6wMhgsH+Nv88nNoP7dwI3+Yt7KhhyHSKNR8JFKN3H2FFJc6y7fs5ZJHKw8P/sDFo/nBsx8B8MilY7k2+LKviRmU/ZO7+Nh+3Dl1JPfM+ZyvjenLlN+/A4SboFZu3Uuv9FQ6pVY/jtPGnftJCiVwWLpGkpXa6eY1kUNU1hfQIy2F1284mQQzTvntmwBcMLYvU0dnUOpORuf2jB/YlV+ddxQ3v/BJjfuM/g2WkpjA3xdl85d31vKXd9ZGlt/20qc8+Z/1pCQmsPyus6rczxe7DnDib94AqHJ6VZGGUiiI1MGgHuUn/ElNCre8fm1M38iyaeP7M218f9yd++eu5PxjMjj5njer3Wf0zXTRngyWFxSX8p2nFzP9rOF8sesAP3n+Y2ZcO5He6e047u7XD/GIRKqmUBCph7nXn8zp971VLgwqMjOuP2MYEG5iWrUtjwkDuzGwRweOr+HLPMGg4u0Vsz/ZzFsrcsgLxn26+JH5vHXjqeW2cXfumv0Zx2Z2Ia+ghAvGHixbdu5+dh8oYkSf8N3a2/bm89zCjfz3KUMi/Sci0RQKIvUwpGfHejXXTB2dUe71pEHdWLQhl4U/PZ13VuYw/flPIl/43z9tKL+bW7mjOS9qIMD1O/bz3MKN5dYPvDl8Se2j74aboI7KSOdfn27mlMN78tUH3wPgqhMG8tMpR/DTFz/ltWVbmTS4m+7cliqpo1mkCZWWOk54OHGAwuJS7pq9jC7tk/n2KYOZv2YHn2/ZS0piAr+ds5x9hSWR9/ZMS2Hb3oI6f9Zxg7vx/uryl70e078zizfs4i+XjeOMI3s1yjFJ66BhLkRauZ37Cnni/XU8MG8lF47ry+3njGDE7XMi61MSEygoLq1hD9Ubflga/33qED7asIubpwwnMcE464F3uOL4TIb0TKNrh+RyEyHNX7ODoT070q1jyiEflzQPXX0k0sp17ZAcudy01Ck3B8S3TxnMMf27cM2TDfuB9PmWvXx/xocAbN2bzy+/dhSfb9nLT54/ePXUk1eOJzv3AOeM6s3Fj8ynf9f2vH3TqdXtUtoIhYJIC5YRzEU9qEeHyOuRGZ34yZnDAXjiyvHc/9oK/vSNY9i2t4A3l28r1y9x8rAevLUip9wQ47+fNiYSCACzP97M7I83V/rsyx4Lz1uxPS/cZLVh536WbNzFyIz0SPNXtHXb9/Hxpt2V7gaX1kXNRyIt3LsrtzNpcLcqv4ir8uAbq5g0uBvb9uRz2hG9KCl1du4rjFzGuvZXU/jTm6u5Z87yBpfpwnF9uWTiAI7o3Ym5y7Zy+GFpTPn9O+QXlfLkleNZsXUvuw8Ucf0ZwzR5UQuhPgURKedfn2zmw427uGXKEbg7C9bu5OJHDs5Ql5aayKmH92TWkrqP7nrDGcO4N2qsp4pumTKcy48biBkkhTSqTnNSKIhIrTbs2M95D73H9rxCVv7iLJJCCeQXlfD+6u1c+Xjj/Ts7pn9nenVK5edTR9AzrephOQqKS/g4ezfHZupS2VhQR7OI1Kp/t/a8/uNT2LBjf+SXfGpSiFF9OwPQvWMy2/MKgXDn9kNvrgZgzg9P4su/q/tMd4s37ALgg7U7+fqE/oQSjLz8YsZldqWk1BnUowNnPRAe+2nu9SczpGfHmnYnMaSagohU4u7c++oKzj66Nx9t3MXNL3zCnB+eRJf2SaQkhUhvl0Tm9NkAfPfUIfzxjVVV7mfmtybx+HvrmP1J5Y7s6kwb34+Jg7oxok8n2icn0js9FTNjztItJCYYG3fuZ3T/Lozu15k7/7mMv81fz4pfVD1GlByk5iMRianh025mAAALg0lEQVSd+wp5Lmsj35yUyW0vfcrMRdlk/ex0Nuzcz0NvruYP08aQmhTikr8u4N1VlScuqo9Xvn9iZBTZMuvuPjsSTEtum0x6+yQOFJaQX1RCYshYv2M/IzPSD+lz2xKFgog0mfyiEtbk7OPIPp0qrZv98Wa+88xiZlwzkfdWbWfioG4cN7gbg4IZ7xo6p/alEwfwVDAnxe8uGs1Xx2TwtT+9x4cbdkXu5n7qqvEMP6wTPdJ0051CQURatIXrdrJ9bwHdOqYwZ+kW5izdQnZu+F6K6Hkn6mL8wK6UljpZ63MrrRvasyO5+wvp1C4JHP529QT6BPd/lPkkezfrduzjnDZ8j4VCQURalZJSZ8GaHQw7LI3EBOPpBRvYvPsAf5u/odx2t0wZzsNvrWHHvsIGf9bvLhpN1w7JnDSsB4+/t5Y7/rkMgE/umMyqbXkM65VGSmICiW3oMlqFgoi0CS8szuaZBRvIWp/LJRP7c9dXj8Ld+d3clXRICVWaArU+tYzkUAKFJQfHj4puyjpvTAb3XTQaCA9ceMuLn3D1iQMZflgnikpKyc49UG58qJZOoSAibcp7q7Yzql9nOqaUv5L+/dXb+fpfwtOlZnRux5wfncT3Z3zI659v438vP5ZO7ZI4/6H3G/SZ4zO78ujl4zjqjlcjy/738mOZ9/lW/jZ/A2P6d+bcUX244viBDT+wJqJQEJG4UXYVUnVzXWzdE55caED3DqzJySs3PlRaaiJ784urfB/ANyb05+kFG6pdD+HJlD7csIvbzzmSguJSvvW3Rfx48uHlrn56YXE2I/qkc/hhafU5tEajUBCRuLH0i92EEozhh1W++qkqZSHSq1MKr11/Mr965TNmfLCRXp1S2Lqn7nNWVCV66I+sn51Oersk3lqew9VPZtE+OcSyO8/E3WsdE2pvfhEAaalJh1SeMgoFEZFqFJWUMu+zbYzM6ETfLu0BWL5lL/26tuPI2+ZU+75fn38UP3n+k3pfHVXdvk4c2oPVOXn06pRKcYnjOG8uz+Hy4zIZ8z+vUVRSytpf1X2mv5ooFEREGmDxhlyufXIR2/MKOH5IN95btYM7p44gNSnEheP6kbuvkPYpIQ7/2b8BePbaibyxfBsvL9nMjn0F5Bc1bOKjaMN6dWTF1jwAnrpqPD3SUupcC6qOQkFEpIFKSp3Pt+xh+GGdKCguoX1y5WHi3l6Rwz8+3MS9F46KNAV9tnlPZAynpJBx1sje9Rp1tib//O4JDOzRoVJHe10pFEREmsF5f3qPfQUlzPnRSQCs3LqXpxds4KWPNpG7vyiy3aAeHViTs69e+/75uSP45nGZDSpXiwgFMzsTeAAIAX9197srrL8euBooBnKAK919fU37VCiISEtW9p1asSN5X0ExL364ibOP6s3qnDzGZXYlv6iEf326mdXb9rFjXwEzPthY7j1njTyMf326JfL6scvH8aXhvRpUrmYPBTMLASuAM4BsYCEwzd2XRW1zKrDA3feb2beBU9z9opr2q1AQkbYqO3c/yaEEXvxwE4vW5/LnS8aSkGCccd9brNyWx5wfntTgS1pbwnwK44FV7r4mKNCzwFQgEgru/kbU9vOBS2JYHhGRFq3sSqjrTh5cbvljlx/LMx9saJJ5JmI5sEcGEF0Xyg6WVecq4F9VrTCza80sy8yycnJyGrGIIiItX7+u7fnJmcPrPE/3oWgRoz2Z2SXAOOCeqta7+yPuPs7dx/Xo0aNpCyciEkdi2Xy0CegX9bpvsKwcMzsd+Clwsrsf2q2EIiJySGJZU1gIDDWzgWaWDFwMzIrewMzGAA8D57r7thiWRURE6iBmoeDuxcB3gTnAZ8Bz7r7UzO40s3ODze4BOgIzzewjM5tVze5ERKQJxLL5CHd/BXilwrLbop6fHsvPFxGR+mkRHc0iItIyKBRERCRCoSAiIhGtbkA8M8sBahwfqQbdge2NWJzWQMccH3TM8eFQjnmAu9d6o1erC4VDYWZZdRn7oy3RMccHHXN8aIpjVvORiIhEKBRERCQi3kLhkeYuQDPQMccHHXN8iPkxx1WfgoiI1CzeagoiIlIDhYKIiETETSiY2ZlmttzMVpnZ9OYuT2Mxs35m9oaZLTOzpWb2g2B5VzN7zcxWBv/tEiw3M/t98Hf42MyOad4jaBgzC5nZh2b2cvB6oJktCI7r/4KReTGzlOD1qmB9ZnOWu6HMrLOZ/d3MPjezz8xsUhyc4x8F/09/amYzzCy1LZ5nM3vMzLaZ2adRy+p9bs3sm8H2K83smw0tT1yEQjBf9IPAWcCRwDQzO7J5S9VoioEb3P1IYCLwneDYpgPz3H0oMC94DeG/wdDgcS3wUNMXuVH8gPDou2V+Ddzv7kOAXMIz+RH8NzdYfn+wXWv0APBvdx8OjCJ87G32HJtZBvB9YJy7jwRChIffb4vn+XHgzArL6nVuzawrcDswgfBUyLeXBUm9uXubfwCTgDlRr28Gbm7ucsXoWF8CzgCWA72DZb2B5cHzh4FpUdtHtmstD8ITNs0DvgS8DBjhuzwTK55vwkO3TwqeJwbbWXMfQz2PNx1YW7Hcbfwcl03n2zU4by8DX26r5xnIBD5t6LkFpgEPRy0vt119HnFRU6D+80W3SkGVeQywAOjl7puDVVuAXsHztvC3+B1wE1AavO4G7PLwHB5Q/pgixxus3x1s35oMBHKA/w2azP5qZh1ow+fY3TcBvwU2AJsJn7dFtO3zHK2+57bRznm8hEKbZ2YdgeeBH7r7nuh1Hv7p0CauPTazrwDb3H1Rc5elCSUCxwAPufsYYB8HmxOAtnWOAYKmj6mEA7EP0IHKTSxxoanPbbyEQp3mi26tzCyJcCA87e4vBIu3mlnvYH1voGy609b+tzgeONfM1gHPEm5CegDobGZlk0ZFH1PkeIP16cCOpixwI8gGst19QfD674RDoq2eY4DTgbXunuPuRcALhM99Wz7P0ep7bhvtnMdLKNQ6X3RrZWYGPAp85u73Ra2aBZRdgfBNwn0NZcsvC65imAjsjqqmtnjufrO793X3TMLn8XV3/wbwBnBBsFnF4y37O1wQbN+qflG7+xZgo5kdHiw6DVhGGz3HgQ3ARDNrH/w/XnbMbfY8V1DfczsHmGxmXYJa1uRgWf01dwdLE3bkTAFWAKuBnzZ3eRrxuE4gXLX8GPgoeEwh3J46D1gJzAW6Btsb4SuxVgOfEL66o9mPo4HHfgrwcvB8EPABsAqYCaQEy1OD16uC9YOau9wNPNbRQFZwnv8BdGnr5xj4OfA58CnwFJDSFs8zMINwv0kR4VrhVQ05t8CVwfGvAq5oaHk0zIWIiETES/ORiIjUgUJBREQiFAoiIhKhUBARkQiFgoiIRCgURJqQmZ1SNrKrSEukUBARkQiFgkgVzOwSM/vAzD4ys4eD+RvyzOz+YIz/eWbWI9h2tJnND8a3fzFq7PshZjbXzJaY2WIzGxzsvmPU3AhPB3fsirQICgWRCszsCOAi4Hh3Hw2UAN8gPChblruPAN4iPH49wJPAT9z9aMJ3mZYtfxp40N1HAccRvmsVwiPZ/pDw3B6DCI/pI9IiJNa+iUjcOQ0YCywMfsS3IzwgWSnwf8E2fwNeMLN0oLO7vxUsfwKYaWZpQIa7vwjg7vkAwf4+cPfs4PVHhMfSfzf2hyVSO4WCSGUGPOHuN5dbaHZrhe0aOkZMQdTzEvTvUFoQNR+JVDYPuMDMekJkvtwBhP+9lI3Q+XXgXXffDeSa2YnB8kuBt9x9L5BtZl8N9pFiZu2b9ChEGkC/UEQqcPdlZvYz4FUzSyA8euV3CE9uMz5Yt41wvwOEhzb+c/Clvwa4Ilh+KfCwmd0Z7OO/mvAwRBpEo6SK1JGZ5bl7x+Yuh0gsqflIREQiVFMQEZEI1RRERCRCoSAiIhEKBRERiVAoiIhIhEJBREQi/h9YGhbXCyJrBAAAAABJRU5ErkJggg==\n", 265 | "text/plain": [ 266 | "
" 267 | ] 268 | }, 269 | "metadata": {}, 270 | "output_type": "display_data" 271 | } 272 | ], 273 | "source": [ 274 | "epochs = np.arange(len(losses))\n", 275 | "plt.title(r'Loss Per Epoch')\n", 276 | "plt.xlabel('epoch')\n", 277 | "plt.ylabel('loss')\n", 278 | "plt.plot(epochs, losses)\n", 279 | "plt.show()" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 15, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "name": "stdout", 289 | "output_type": "stream", 290 | "text": [ 291 | "{\n", 292 | " \"mean_rank\": 836.7464646464647,\n", 293 | " \"hits@k\": {\n", 294 | " \"1\": 0.048484848484848485,\n", 295 | " \"3\": 0.1414141414141414,\n", 296 | " \"5\": 0.20707070707070707,\n", 297 | " \"10\": 0.32626262626262625\n", 298 | " }\n", 299 | "}\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "print(json.dumps(results.results['eval_summary'], indent=2))" 305 | ] 306 | } 307 | ], 308 | "metadata": { 309 | "kernelspec": { 310 | "display_name": "Python 3", 311 | "language": "python", 312 | "name": "python3" 313 | }, 314 | "language_info": { 315 | "codemirror_mode": { 316 | "name": "ipython", 317 | "version": 3 318 | }, 319 | "file_extension": ".py", 320 | "mimetype": "text/x-python", 321 | "name": "python", 322 | "nbconvert_exporter": "python", 323 | "pygments_lexer": "ipython3", 324 | "version": "3.7.0" 325 | } 326 | }, 327 | "nbformat": 4, 328 | "nbformat_minor": 2 329 | } 330 | -------------------------------------------------------------------------------- /notebooks/Case Scenario ComPath.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using BioKEEN to Train and Evaluate a KGE Model on ComPath" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import json\n", 17 | "import logging\n", 18 | "import os\n", 19 | "import sys\n", 20 | "import time\n", 21 | "import warnings\n", 22 | "\n", 23 | "import matplotlib\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import numpy as np\n", 26 | "\n", 27 | "import biokeen\n", 28 | "import pykeen" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "warnings.filterwarnings('ignore', category=UserWarning)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "logging.basicConfig(level=logging.INFO)\n", 47 | "logging.getLogger('biokeen').setLevel(logging.INFO)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "3.7.0 (default, Jul 23 2018, 20:22:55) \n", 60 | "[Clang 9.1.0 (clang-902.0.39.2)]\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "print(sys.version)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 5, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "Sun Jan 20 20:57:46 2019\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "print(time.asctime())" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "PyKEEN Version: 0.0.19-dev\n", 95 | "BioKEEN Version: 0.0.12-dev\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "print(f'PyKEEN Version: {pykeen.constants.VERSION}')\n", 101 | "print(f'BioKEEN Version: {biokeen.constants.VERSION}')" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 7, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "output_directory = os.path.join(\n", 111 | " os.path.expanduser('~'), \n", 112 | " 'Desktop', \n", 113 | " 'biokeen_test'\n", 114 | ")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Step 1: Configure Your Experiment" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "config = dict(\n", 131 | " training_set_path = 'bio2bel:compath',\n", 132 | " execution_mode = 'Training_mode', \n", 133 | " kg_embedding_model_name = 'TransE',\n", 134 | " embedding_dim = 50, # 150 is better for real\n", 135 | " normalization_of_entities = 2, # corresponds to L2\n", 136 | " scoring_function = 1, # corresponds to L1\n", 137 | " margin_loss = 5,\n", 138 | " learning_rate = 0.01,\n", 139 | " batch_size = 32,\n", 140 | " num_epochs = 1000, # 2500 is better for real\n", 141 | " test_set_ratio = 0.1,\n", 142 | " filter_negative_triples = True,\n", 143 | " random_seed = 2,\n", 144 | " preferred_device = 'cpu',\n", 145 | ")" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Step 2: Run BioKEEN to Train and Evaluate the Model" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 10, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stderr", 162 | "output_type": "stream", 163 | "text": [ 164 | "INFO:pykeen.utilities.pipeline:-------------Train KG Embeddings-------------\n", 165 | "Training epoch: 100%|██████████| 1000/1000 [01:22<00:00, 12.12it/s]\n", 166 | "INFO:pykeen.utilities.pipeline:-------------Start Evaluation-------------\n", 167 | "INFO:pykeen.utilities.evaluation_utils.metrics_computations:Evaluation took 2.77s seconds\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "results = pykeen.run(\n", 173 | " config=config,\n", 174 | " output_directory=output_directory,\n", 175 | ")" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 11, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "Keys:\n", 188 | " entity_to_embedding\n", 189 | " entity_to_id\n", 190 | " eval_summary\n", 191 | " final_configuration\n", 192 | " losses\n", 193 | " relation_to_embedding\n", 194 | " relation_to_id\n", 195 | " trained_model\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "print('Keys:', *sorted(results.results.keys()), sep='\\n ')" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "## Step 3: Show Exported Results" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "### 3.1: Show the Trained Model" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 12, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "TransE(\n", 226 | " (criterion): MarginRankingLoss()\n", 227 | " (entity_embeddings): Embedding(997, 50)\n", 228 | " (relation_embeddings): Embedding(1, 50)\n", 229 | ")" 230 | ] 231 | }, 232 | "execution_count": 12, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "results.results['trained_model']" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "## 3.2: Plot the Losses" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 13, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "losses = results.results['losses']" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 14, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "image/png": "\n", 265 | "text/plain": [ 266 | "
" 267 | ] 268 | }, 269 | "metadata": {}, 270 | "output_type": "display_data" 271 | } 272 | ], 273 | "source": [ 274 | "epochs = np.arange(len(losses))\n", 275 | "plt.title(r'Loss per Epoch')\n", 276 | "plt.xlabel('Epoch')\n", 277 | "plt.ylabel('Loss')\n", 278 | "plt.plot(epochs, losses)\n", 279 | "plt.show()" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "### 3.3: Show Evaluation Results" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 15, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "name": "stdout", 296 | "output_type": "stream", 297 | "text": [ 298 | "{\n", 299 | " \"mean_rank\": 188.53191489361703,\n", 300 | " \"hits@k\": {\n", 301 | " \"1\": 0.18085106382978725,\n", 302 | " \"3\": 0.2765957446808511,\n", 303 | " \"5\": 0.3049645390070922,\n", 304 | " \"10\": 0.39361702127659576\n", 305 | " }\n", 306 | "}\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "print(json.dumps(results.results['eval_summary'], indent=2))" 312 | ] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "Python 3", 318 | "language": "python", 319 | "name": "python3" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 3 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython3", 331 | "version": "3.7.0" 332 | } 333 | }, 334 | "nbformat": 4, 335 | "nbformat_minor": 2 336 | } 337 | -------------------------------------------------------------------------------- /notebooks/Case Scenario HSDN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Use BioKEEN Programmatically to Train and Evalaute a KGE Model on HSDN" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import json\n", 17 | "import logging\n", 18 | "import os\n", 19 | "import sys\n", 20 | "import time\n", 21 | "import warnings\n", 22 | "\n", 23 | "import matplotlib\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import numpy as np\n", 26 | "\n", 27 | "import biokeen\n", 28 | "import pykeen" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "warnings.filterwarnings('ignore', category=UserWarning)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "logging.basicConfig(level=logging.INFO)\n", 47 | "logging.getLogger('biokeen').setLevel(logging.INFO)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "3.7.0 (default, Jul 23 2018, 20:22:55) \n", 60 | "[Clang 9.1.0 (clang-902.0.39.2)]\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "print(sys.version)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 5, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "Sun Jan 20 21:11:49 2019\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "print(time.asctime())" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "PyKEEN Version: 0.0.19-dev\n", 95 | "BioKEEN Version: 0.0.12-dev\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "print(f'PyKEEN Version: {pykeen.constants.VERSION}')\n", 101 | "print(f'BioKEEN Version: {biokeen.constants.VERSION}')" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 7, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "output_directory = os.path.join(\n", 111 | " os.path.expanduser('~'), \n", 112 | " 'Desktop', \n", 113 | " 'biokeen_test'\n", 114 | ")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Step 1: Configure your experiment" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "config = dict(\n", 131 | " training_set_path = 'bio2bel:hsdn',\n", 132 | " execution_mode = 'Training_mode', \n", 133 | " kg_embedding_model_name = 'TransE',\n", 134 | " embedding_dim = 50, \n", 135 | " normalization_of_entities = 2, # corresponds to L2\n", 136 | " scoring_function = 1, # corresponds to L1\n", 137 | " margin_loss = 1,\n", 138 | " learning_rate = 0.01,\n", 139 | " batch_size = 128,\n", 140 | " num_epochs = 1000, \n", 141 | " test_set_ratio = 0.1,\n", 142 | " filter_negative_triples = True,\n", 143 | " random_seed = 2,\n", 144 | " preferred_device = 'cpu',\n", 145 | ")" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Step 2: Run BioKEEN to Train and Evaluate the Model" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 9, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stderr", 162 | "output_type": "stream", 163 | "text": [ 164 | "INFO:pykeen.utilities.pipeline:-------------Train KG Embeddings-------------\n", 165 | "Training epoch: 100%|██████████| 1000/1000 [04:36<00:00, 3.61it/s]\n", 166 | "INFO:pykeen.utilities.pipeline:-------------Start Evaluation-------------\n", 167 | "INFO:pykeen.utilities.evaluation_utils.metrics_computations:Evaluation took 10.69s seconds\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "results = pykeen.run(\n", 173 | " config=config,\n", 174 | " output_directory=output_directory,\n", 175 | ")" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 10, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "Keys:\n", 188 | " entity_to_embedding\n", 189 | " entity_to_id\n", 190 | " eval_summary\n", 191 | " final_configuration\n", 192 | " losses\n", 193 | " relation_to_embedding\n", 194 | " relation_to_id\n", 195 | " trained_model\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "print('Keys:', *sorted(results.results.keys()), sep='\\n ')" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "## Step 3: Show Exported Results" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "### 3.1: Show Trained Model" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 11, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "TransE(\n", 226 | " (criterion): MarginRankingLoss()\n", 227 | " (entity_embeddings): Embedding(433, 50)\n", 228 | " (relation_embeddings): Embedding(1, 50)\n", 229 | ")" 230 | ] 231 | }, 232 | "execution_count": 11, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "results.results['trained_model']" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "### 3.2: Plot losses" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 12, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "losses = results.results['losses']" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 13, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XmYFNW9xvHvb3r2YYcBWR1QQIiCC+KuRFEBjeaqcY3GuCU3GjVRE0mMMSa5em+MiYkk0STGaFxiNBqMKARxF9lcQEBkWIRhHfZlFmb53T+quunZB5ymZ+j38zzz0FV1uvrUNNNvn3OqTpm7IyIiApCW7AqIiEjroVAQEZEYhYKIiMQoFEREJEahICIiMQoFERGJUSiI7OfMrMDM3MzSk10Xaf0UCtJqmdlyMxudhNe90syqzGyHmW0zsw/N7OwW3L+b2c5w/9Gf77XU/kU+D31zEKnfdHc/0czSgOuBZ8yst7tvbu4OzCzd3Ssb2Dzc3QtbpKYiLUgtBWmTzOxaMys0s01mNtHMeoXrzcx+ZWbrw2/588zs0HDbODNbYGbbzWyVmd3a1Ou4ezXwCJADHBTu5+yw9bDFzN41s2Fx9VpuZt83s7nAzj3tsjGzu8zsWTP7e1jP981seNz2IWb2evja883snLhtOWb2SzP7zMy2mtnbZpYTt/vLzGyFmW0wsx/uSb0kdSgUpM0xs1OBe4ALgZ7AZ8DT4eYzgJOBQUDHsMzGcNufgW+4e3vgUGBaM14rHbgG2AEsNrMjCELiG0BX4CFgopllxT3tEuAsoFMjLYXGnAv8A+gCPAm8YGYZZpYBvAhMAboD3waeMLPB4fPuA44Cjg+f+z2gOm6/JwKDgdOAO81syF7UTfZzCgVpiy4DHnH39929HBgPHGdmBUAF0B44BDB3X+jua8LnVQBDzayDu2929/cbeY1jzWwLsJbgQ/6/3H0rcB3wkLvPcPcqd/8rUA4cG/fc37j7SncvbWT/74ff9qM/Z8Ztm+Puz7p7BXA/kB3u/1igHXCvu+9y92nAv4FLwm6uq4Cb3H1VWLd3w99P1E/cvdTdPwI+AoYjUotCQdqiXgStAwDcfQdBa6B3+EH5IDABWG9mD5tZh7Do+cA44DMze8PMjmvkNd5z907u3s3dj3X3qeH6A4Fb4j/Qgb5hnaJWNuMYjgz3H/2ZXN/zw+6ronD/vYCV4bqoz4DeQDeC8FjSyGuujXtcQhAwIjUoFKQtWk3w4QyAmeURdOWsAnD337j7UcBQgm6k28L1s9z9XIKulxeAZ/bitVcCP6/1gZ7r7k/Flfm8Uw/3jT4IWwB9CI55NdA3XBfVj+C4NwBlhOMeIntLoSCtXYaZZcf9pANPAV83s8PDvvz/AWa4+3IzO9rMjgn733cSfFBWm1mmmV1mZh3Dbplt1Oxvb64/At8MX8PMLM/MzjKz9i10vABHmdl54bHeTNA99R4wg+Ab/vfCMYZRwJeAp+MGxO83s15mFjGz42qNdYg0SaEgrd0koDTu566wK+dHwHPAGoJvxxeH5TsQfHBvJuha2Qj8Itx2ObDczLYB3yQYm9gj7j4buJagi2ozUAhcuRfH9VGt6xR+HbftX8BF4f4vB85z9wp330UQAmMJWga/A65w90/C590KzANmAZuA/0V/47KHTDfZEWk9zOwu4GB3/2qy6yKpSd8iREQkJmGhYGaPhBcQfdzA9kPMbLqZlTfnIiIREUm8hHUfmdnJBBf8PObuh9azvTvBGSRfBja7+30JqYiIiDRbwloK7v4mwWBXQ9vXu/ssgguKRESkFWhzE+J169bNCwoKkl0NEZE2Zc6cORvcPb+pcm0iFMzsOoLpBejXrx+zZ89Oco1ERNoWM/us6VJt5Owjd3/Y3Ue4+4j8/CaDTkRE9lKbCAUREdk3EtZ9ZGZPAaOAbmZWBPwYyABw9z+Y2QHAbIIrUKvN7GZgqLtvS1SdRESkcQkLBXe/pIntawkm+hIRkVZC3UciIhKjUBARkRiFgoiIxKRMKCxau51fTlnExh3lTRcWEUlRKRMKS4p38NtphRQrFEREGpQyoZCdERxqWcXe3GxLRCQ1pE4opEcAKKuoSnJNRERar5QJhawMhYKISFNSJhTUfSQi0rQUCoWgpVBeqZaCiEhDUi4U1H0kItKw1AmFdHUfiYg0JXVCQS0FEZEmpVwolCoUREQalDKhEEkzMiKm7iMRkUakTChAcAGbuo9ERBqWUqGQlRHRKakiIo1IWCiY2SNmtt7MPm5gu5nZb8ys0MzmmtmRiapLVHZGmrqPREQakciWwqPAmEa2jwUGhj/XAb9PYF2AYLBZ3UciIg1LWCi4+5vApkaKnAs85oH3gE5m1jNR9YFoS0GhICLSkGSOKfQGVsYtF4Xr6jCz68xstpnNLi4u3usXDAaa1X0kItKQNjHQ7O4Pu/sIdx+Rn5+/1/vJzohQpoFmEZEGJTMUVgF945b7hOsSRgPNIiKNS2YoTASuCM9COhbY6u5rEvmCWRkRyjWmICLSoPRE7djMngJGAd3MrAj4MZAB4O5/ACYB44BCoAT4eqLqEqWL10REGpewUHD3S5rY7sD1iXr9+mRnpFFWqe4jEZGGtImB5pai6xRERBqXYqEQXKcQNFJERKS21AqF9AjVDhVVCgURkfqkVihEb7SjaxVEROqVYqEQvSWnQkFEpD4pFQpZ6UFLYZfOQBIRqVdKhUJmenC4CgURkfqlVChkhaFQrlAQEalXSoWCWgoiIo1LqVCIjimopSAiUr+UCgW1FEREGpdSobB7TEGnpIqI1CelQkEtBRGRxqVUKOjsIxGRxqVWKGREB5rVfSQiUp+UCoW8zCAUdpQrFERE6pPQUDCzMWa2yMwKzez2erYfaGavmtlcM3vdzPoksj55WcE9hXaWVybyZURE2qyEhYKZRYAJwFhgKHCJmQ2tVew+4DF3HwbcDdyTqPoAZETSyEpPY4dCQUSkXolsKYwECt19qbvvAp4Gzq1VZigwLXz8Wj3bW1z77HSFgohIAxIZCr2BlXHLReG6eB8B54WP/wtob2Zda+/IzK4zs9lmNru4uPhzVapdVjo7yhQKIiL1SfZA863AKWb2AXAKsAqoMwrs7g+7+wh3H5Gfn/+5XjA3M11jCiIiDUhP4L5XAX3jlvuE62LcfTVhS8HM2gHnu/uWBNYpuE+zTkkVEalXIlsKs4CBZtbfzDKBi4GJ8QXMrJuZReswHngkgfUBgltyllXo4jURkfokLBTcvRK4AZgMLASecff5Zna3mZ0TFhsFLDKzT4EewM8TVZ+onIyIbscpItKARHYf4e6TgEm11t0Z9/hZ4NlE1qG2bIWCiEiDkj3QvM9lZaSp+0hEpAEpFwrZGRHNfSQi0oDUC4V0DTSLiDQk9UIhI01jCiIiDUjBUIhQWe1UVKm1ICJSWwqGQnDIai2IiNSVgqEQ3FNB4woiInWlXiikR0NBLQURkdpSLhSyMqL3aVYoiIjUlnKhoO4jEZGGpVwo5GSo+0hEpCEpFwpqKYiINCwFQ0GnpIqINCQFQyFsKWigWUSkjtQLhXR1H4mINCT1QkHdRyIiDUpoKJjZGDNbZGaFZnZ7Pdv7mdlrZvaBmc01s3GJrA9Als4+EhFpUMJCwcwiwARgLDAUuMTMhtYqdgfBbTqPILiH8+8SVZ8otRRERBqWyJbCSKDQ3Ze6+y7gaeDcWmUc6BA+7gisTmB9AMiMpGGmMQURkfokMhR6AyvjlovCdfHuAr5qZkUE93L+dn07MrPrzGy2mc0uLi7+XJUys/BGO2opiIjUluyB5kuAR929DzAOeNzM6tTJ3R929xHuPiI/P/9zv2h2RppOSRURqUciQ2EV0DduuU+4Lt7VwDMA7j4dyAa6JbBOAORmplO6S91HIiK1JTIUZgEDzay/mWUSDCRPrFVmBXAagJkNIQiFz9c/1Aw5mRFKKyoT/TIiIm1OwkLB3SuBG4DJwEKCs4zmm9ndZnZOWOwW4Foz+wh4CrjS3T1RdYrKy4yws1zdRyIitaUncufuPolgADl+3Z1xjxcAJySyDvXJyYxQukuhICJSW7IHmpMiLzOdnbvUfSQiUltKhkJOZoQStRREROpIyVDIy0ynRC0FEZE6UjIUcrMilGigWUSkjtQMhcwIJRVV7IMTnURE2pQUDYV0qqqd8kpdwCYiEi9FQyGYPlunpYqI1JSSoZCXGVyeodNSRURqSslQyAlbCjotVUSkppQMhbwshYKISH1SMhRyw+6jknJ1H4mIxEvRUFBLQUSkPikaChpoFhGpT4qGgk5JFRGpT0qGwu5TUhUKIiLxUjIUYqekaqBZRKSGhIaCmY0xs0VmVmhmt9ez/Vdm9mH486mZbUlkfaIy09PIiBglFWopiIjES9id18wsAkwATgeKgFlmNjG82xoA7v6duPLfBo5IVH1qy87Q3ddERGpLZEthJFDo7kvdfRfwNHBuI+UvIbhP8z6Rq1tyiojUkchQ6A2sjFsuCtfVYWYHAv2BaQ1sv87MZpvZ7OLi4hapXE5GRN1HIiK1tJaB5ouBZ9293k9pd3/Y3Ue4+4j8/PwWecGczHS1FEREamlWKJjZTWbWwQJ/NrP3zeyMJp62Cugbt9wnXFefi9mHXUcQdh9V6OwjEZF4zW0pXOXu24AzgM7A5cC9TTxnFjDQzPqbWSbBB//E2oXM7JBwn9ObXesWkKOBZhGROpobChb+Ow543N3nx62rl7tXAjcAk4GFwDPuPt/M7jazc+KKXgw87fv43pg5mRHNfSQiUktzT0mdY2ZTCAaDx5tZe6DJe1m6+yRgUq11d9ZavquZdWhRORkRSjXQLCJSQ3ND4WrgcGCpu5eYWRfg64mrVuLplFQRkbqa2310HLDI3beY2VeBO4CtiatW4uniNRGRupobCr8HSsxsOHALsAR4LGG12geCs48UCiIi8ZobCpXhQPC5wIPuPgFon7hqJV5ORoTKamdXZZNDIyIiKaO5YwrbzWw8wamoJ5lZGpCRuGolXnSm1NKKKjLTW8s1fCIiydXcT8OLgHKC6xXWElyI9ouE1WofiN59TeMKIiK7NSsUwiB4AuhoZmcDZe7epscU8rKClsL2sook10REpPVo7jQXFwIzga8AFwIzzOyCRFYs0TrnZgKwpVShICIS1dwxhR8CR7v7egAzywemAs8mqmKJ1ik3GBLZvHNXkmsiItJ6NHdMIS0aCKGNe/DcVkktBRGRuprbUnjFzCazeybTi6g1fUVbE20pbClRS0FEJKpZoeDut5nZ+cAJ4aqH3f35xFUr8dplpZOeZmwuUUtBRCSq2fdodvfngOcSWJd9yszolJuhloKISJxGQ8HMtgP1TWltgLt7h4TUah/plJvJFrUURERiGg0Fd2/TU1k0pVNOhkJBRCROmz6D6PPKyYxQoknxRERiEhoKZjbGzBaZWaGZ3d5AmQvNbIGZzTezJxNZn9qCeyroPs0iIlHNHmjeU2YWASYApwNFwCwzm+juC+LKDATGAye4+2Yz656o+tQnNzNdt+QUEYmTyJbCSKDQ3Ze6+y7gaYKpt+NdC0xw980AtS6QS7iczAhl6j4SEYlJZCj0BlbGLReF6+INAgaZ2Ttm9p6ZjUlgferIzYiopSAiEidh3Ud78PoDgVEE03G/aWaHufuW+EJmdh1wHUC/fv1a7MVzwruvuTtm1mL7FRFpqxLZUlgF9I1b7hOui1cETHT3CndfBnxKEBI1uPvD7j7C3Ufk5+e3WAVzMiO4Q1mF7r4mIgKJDYVZwEAz629mmcDFwMRaZV4gaCVgZt0IupOWJrBONeRmBPdUKNEZSCIiQAJDwd0rgRuAycBC4Bl3n29md5vZOWGxycBGM1sAvAbc5u4bE1Wn2mJ3X9Ngs4gIkOAxBXefRK3ZVN39zrjHDnw3/NnnYvdp1mCziAiQ4lc052ZGu48UCiIikOKhkJOhUBARiZfaoRDtPqrQQLOICKR4KORlBUMqkz9el+SaiIi0DikdCgO65QGwcadutCMiAikeCumRNI7p34VtZbqngogIpHgoAHTIyWBbqUJBRAQUCnRUKIiIxKR8KHTIzmBbmc4+EhEBhQIdctLZUV5JZZUmxRMRSflQ6JiTAcB2tRZERBQKHbKDUNAZSCIiCoVYS2FbqVoKIiIKhdwgFDaX6AI2EZGUD4UDOmQDsHZrWZJrIiKSfCkfCt07ZAGwYlNJkmsiIpJ8KR8KWekRBvVox+T5a5NdFRGRpEtoKJjZGDNbZGaFZnZ7PduvNLNiM/sw/LkmkfVpyLEDurJ+e3kyXlpEpFVJ2O04zSwCTABOB4qAWWY20d0X1Cr6d3e/IVH1aI6OORlsL6ugutpJS7NkVkVEJKkS2VIYCRS6+1J33wU8DZybwNfbax2yM6h22LlLp6WKSGpLZCj0BlbGLReF62o738zmmtmzZta3vh2Z2XVmNtvMZhcXF7d4RTvkBA0mXdUsIqku2QPNLwIF7j4M+A/w1/oKufvD7j7C3Ufk5+e3eCWiF7DpWgURSXWJDIVVQPw3/z7huhh33+ju0RHePwFHJbA+DeoeXquwfpsGm0UktSUyFGYBA82sv5llAhcDE+MLmFnPuMVzgIUJrE+DohewrdEFbCKS4hJ29pG7V5rZDcBkIAI84u7zzexuYLa7TwRuNLNzgEpgE3BlourTmO7ts8hMT6Nw/Y5kvLyISKuRsFAAcPdJwKRa6+6MezweGJ/IOjRHeiSN4X068lHRlmRXRUQkqZI90Nxq9O2cy8pNJbrZjoikNIVCqHuHbNZvL+eMX7+Z7KqIiCSNQiHUvX0wMd7S4p1JromISPIoFELRaxVERFKZQiHUQaEgIqJQiGqXldATsURE2gSFQig9snt21AqdgSQiKUqhEOoU132kW3OKSKpSKIQG9mjPVSf0B6Boc2mSayMikhwKhTiXHtMPgPXb1VIQkdSkUIjTJS8TgM07NYW2iKQmhUKc6LUKsz7bnOSaiIgkh0IhTiS8P/NLc9fw5IwVSa6NiMi+p1Co5ebRAwH4cKVaCyKSehQKtdw8ehAHds2lvFLXKohI6lEo1GPD9nL+9eFqyiqqkl0VEZF9KqGhYGZjzGyRmRWa2e2NlDvfzNzMRiSyPs21c1cQBp+s3Z7kmoiI7FsJCwUziwATgLHAUOASMxtaT7n2wE3AjETVZU8d3rcTACXllUmuiYjIvpXIlsJIoNDdl7r7LuBp4Nx6yv0U+F+g1Vwx9r/nDwPgvWWbdBaSiKSURIZCb2Bl3HJRuC7GzI4E+rr7S43tyMyuM7PZZja7uLi45WtaS+fc4HqF37y6mB88Pw93T/hrioi0BkkbaDazNOB+4Jamyrr7w+4+wt1H5OfnJ7xu3dpl1VjeoW4kEUkRiQyFVUDfuOU+4bqo9sChwOtmthw4FpjYGgab09KMOXeMji1vLa1IYm1ERPadRIbCLGCgmfU3s0zgYmBidKO7b3X3bu5e4O4FwHvAOe4+O4F1arau7bIYPaQ7oFAQkdSRsFBw90rgBmAysBB4xt3nm9ndZnZOol63JV194gAA/vLOch6fvjypdRER2RcSeg9Kd58ETKq17s4Gyo5KZF32xtBeHQB4dk4Rz84pYkRBF4b07JDkWomIJI6uaG5Ex7i7sQGMfeAt1m8vY+qCdUmqkYhIYikUmvDAxYfXWD7vd+9yzWOzuWvi/CTVSEQkcRQKTTh7WK8ay9FbdT767nLWbA0el+yqZOGabfu8biIiLU2h0IRImvHe+NPq3faTiQsAuPGpDxn7wFuaQE9E2jyFQjN0a5dZ7/pX5q9l4kermbowGGP4yYsL9mW1RERanEKhGdIjDf+abnzqg9jjp2au0JQYItKmJfSU1P1Nl7xM3v/R6fzu9UImfri63qm1+4+fxA/GHQJA17wsBh/Qnjc+LaaiqpqbThuImcXK3vjUB0TSjF9ddHid/YiIJINCoZnm3nUGkfAD/VujDqZH+2xu+cdH9Zb9n0mf1Lt+ZEEXjhnQFXdnyoJ1TPxoNUCdUPjllEWcNqRHbArvqG1lFXTIrnmarIhIS1IoNFPtD+NIWhAQXfIy2bRzV7P2cemfgltGnDSwG28t3hBbX13tRBsQ5ZXV/HZaIRNeK2TpPWfFyny8aitn//ZtfnfZkYw7rGds/cpNJVRUVbO1tILhfTqRlra7JSIisqcUCnvp7GE92VpawcUj+7K0eCdjH3ir2c+NDwSAAT/YfdH398YMBqDaoeD2l/i/84dx4dF9efjNpQC8NHcND04rpLB4B3+7+hgufGh67Lm3jz2Ey47px23/mEv//Dy+Pyboxnpg6mIGH9COL/TqyEvz1nDWYT15u3ADfTvnMrxvR9pnZ/DA1MX8auqnACz9n3GYQWW1EzHDjBrdXhAEGdCsENpaWkFWehrZGZFm/45EJDmsrQ2MjhgxwmfPbhVz5tWxeN12Tv/VmwAc1rsjf7j8KH764gJemb8WgL9eNZL128q47dm5e7Tf0UO6M3Xh+maVzUxPY1dlNQBDenbg95cdyaj7Xgege/ss1m8vr1E+LzPCv244gdH3vxlbN338qfzz/VX8YvIijj+oK2u3lfHENceQlR6hS14mcz7bxPm/n05B11xevulkVm0ppX+3PHaUVZKXFakzMF9w+0sc3rcTL1x/wh4d93Nzijiway4jCrrs0fNEpC4zm+PuTc5CrVBoYQW3B/cLWn5v0PWzekspx987jZtHD+Tm0YNYtmEnXww/pFurMV84IBZktbXPSmd73P0l2mens71s9/JXj+1H/27t6N0pmzGH9sTd6T8+aAktu2ccZoa712l5AGwvqyAjsrtFEf1dRp8nInuvuaGg7qME69Uph3l3nUH7cEyioGtubFt++yyKt5fvUUsAYPSQHtw+djAZkTS+9shMlm8sadE6NxQIQI1AAGoEAsDf3qt5+9Lbzhwcezz4jlcYkJ/HJ2u3c/xBXTm6oAunD+3B1IXrOG5AVy56+L1Y2XvOOyz2OBoqAPdfOJyD8tvx5IwVjB7ag007yzl7WC/WbC3l4O7tY+U27ChnW2kF2RkR3l+xmbMO61kjWLaVVTBz6SZOG9K9WYEzc9kmDu/bicz0PTuL292Z9sl6Rg3uHhuHAthSsou3Fm/gS8N7NfLs3baXVbBuW1mNYxRJBLUUWtgrH6+hqhrOGtazwTIbd5STl5VOeprxy/98yrUnDSAzPQ13Z2nxTiJpxssfr+Frxxfwk4kLuPvcL2BmbC7ZxZaSXRx14O7ulB3llRz648mx5T9dMYLOeRmc//vp9b10g7q1y2TDjt0D5sP7duKjlVv2aB+tRSTNqArHPHp0yGLdtnLGfOEAJlx2JKu3lPLou8v589vLYuVHD+nB7796JEYQciUVVTwzayVHHdiZD1Zs4Z3CDcxcvolbzxjEDacOrPFa05dsZHtZBVkZEY46sDPtsmp+z3pp7hquf/J9fnT2UK4+sT/Tl2zk6ILOnDvhHeav3sa0W05hQH47IAiyFZtKOLJf5zrHdM6DbzO3aCvfOGUA48cOqbN9085ddMnLpHD9DjIjaXTvkEVGJK1GEO2NraUVdSaG3Je2llZQWVVN11p3Q5Q9p+6jFFO4fjudcjNjtxJ9fPpyfvSvYNK+T382lhWbSthcsouv/CEIizOG9mBK3Gyvy+4ZV+Mb+XUnD4gNbqeCows60zk3s8bvpCG9O+WQlgaDe7Sv0cL7+gkF/OisoVS583+vfEJWeoS0NOM3ry7m2pP68/LHaynaXMroIT1iV8F/+9SD+e20Ql64/gS++/cPWbphJzN/eBrd22cDsGzDTv4+ayV/eGNJ7HWm3XIKB3bNY/WWUh6cVkiHnHT++NYyHrlyBFc9uvtv47wje3P/hU1fA1Nd7dz7yiecMbQHIwq6sHJTCc9/sIqDu7fjW0+8z/PfOp4/vLGES0b2Y9Tg7s3+nda2eN12+nfLIz0SjHtlRKzJVtrgO16mvLI61h0re0+hIKzcVEJ5ZVWNLof4MY/XF63nnkmfcN9XhnNYn47sKK/k6Zkr+NlLC5lw6ZEsWruNaocHXyskMz2NS0f24+bRA/moaCtPzviMO7/0Bdpnp/P49M8Y2qsDj0//jGmfrKddVnpK3te6Y05Gi96l78bTBvKbVxc3u3yn3Ay2lNR8/bGHHsAvLxxOZiSN9EgaH6/aylMzV5DfPouPV23jxtMOZldlNReEXxamfOdkrn/ifRav38HA7u1YvH4HXxreixfDa2qW3TOO+au3MXXhOq46sT8VlTW/xe8sr+QHz8/jm6ccxHtLN/K14wpISzPWbC3luHumceXxBXz3jEEMu2sKd5w1hGtOGtDoMdUeo9vXPly5hZ/+ewFPXHNMmz97rlWEgpmNAR4AIsCf3P3eWtu/CVwPVAE7gOvcvdEJhBQKn8+bnxbTISejzoVxUVXVzrtLNnDiwd0wMyqrqjnz129y6xmDGXtYw11itd341Aexi/Mg+Hb9peG9WL5hJ6/MX8vRBZ2ZtXwzEIwfPDVzBXOLtsbKnzG0Bx1zMvjHnKIa+z1pYDc+WLGlWaHziwuGcduzc/nrVSOZV7SFbWWVKdX6iderYzbtszNYtK7uVfgN6ZqXycZa1+DcdNpAJrxWSGX17s+N+y8czhcHd+eHL8xj1vLNFMed4XbfV4YzfclGzj+qN5f+cQZ9Oudw73nD+Oqfg2t2Zt8xmhE/m8oNXzyYW+PGnyD4QP7yhHeAoFv0msdmM+U7JzOoR/Alp2RXJTOXbWqw9bJ6Sykbd+zisD4dmbF0IwvXbKN351xOH9qjwWOurKqucfbc2b99i49XbePJa45hREGXPR5TitpaWsH0JRsZc+gBe/X8lpD0UDCzCPApcDpQRHDP5kviP/TNrIO7bwsfnwN8y93HNLZfhULbsHZrGfe+vJBDe3fkgI7ZNaYgX7B6G0N6Bn/Y0e6DVVtKufxPM9hVVc2Dlx7J4X07MXn+Wr7x+Bx+MO4QdpZXkZ0R4crjC0iPGKUVVdz5wse88OFqBuTnsbR4Z43X/+qx/fjZlw+rc6ZT9JtnvNFDuvP6omLGHHoAFxzVhyv/MguAT346ho07d/HI28tqjEEAnDwIjEB5AAANO0lEQVQonzc/LY4t52REKN3LWXJHDc7n9UXFTRcEstLTKA9POW6L8jIj7NxV/+9p4d1jeHbOSk4elE9OZoSRP3+1TpmRBV147OqRpKcZB//wZQAeuvwo/j5rJScP7EYkzfhg5RZWbS5lSfEONuzYRUHX3BonY3x45+kUbS7liRmfccVxBWwvqyTN4OE3lzJlwTruPe8wnv9gFVkZEZYW74hNl39Ev07kt8tiyoJ1XDSiL/eefxhmxh/fXMp/FqzjmW8e1+Bxf+Px2Uyev443bhvFgV3z2FZWwbef/IAff2lobEypKWUVVZ+rtdIaQuE44C53PzNcHg/g7vc0UP4S4Ap3H9vYfhUKqcPdWbx+R+ybYX0eeXsZo4f0oF94VteIn02lS14GU75zSr3lR/58KiMKOvO7y45i+pKNPDuniPu+MozyymoyI2mYwZ3/mk9OZoQfjNs9oPuXd5bxz/dX8dx/H0+awS+mLOKhN5Zy25mDufakARTvKOeNRcWs3FzCQfnt+M+CtUxZsI74P6/jD+rKt08dyM8nLeDsYb249+VgOpR3bj+VP765lEffXc4dZw1heN9OvPVpMb+ZVhh77hlDe3DrmYMZ1KN9LNgKuubyywuH1zmp4KdfPpTyiiouPaYfQ++cTGP29Mw32e2xq0ayYUc5330mmO6m8Odja7QyXp63hu8/N5c/X3k01/x1NltLK3j+W8dzRL/O/O29z7jjhY/5ylF96N4hi865mTW60h59Zxl9OudSWe28XVjM32etpKLK+ee3jq/3RITmaA2hcAEwxt2vCZcvB45x9xtqlbse+C6QCZzq7nU6Uc3sOuA6gH79+h312WefJaTO0vZVVlWTZpbw6T6mL9nIJX98r8k/0q2lFXTITmfeqq0c2DWvxpk80Q/3BXefydqtZUz8aDX/PeogstIjuDtPzlzBF3p1ZGtpBUf06xSbaiX6vA/vPJ1OuZl1Wj/v3H4qvTvl1CgLkBExKqqckQVd+NYXD+KUQfmYGfOKtpIeMXp0yObHE+czalA+D75WyLINOzmgQzZrt5W1zC8tzos3nEjR5hL++4n3W3zfyTLjB6fxxqfFrN1aRkG3vBozKMf77umD2LijnL9Or/k5NqRnB247cxD/WbCOp2aurPe5t505mOu/ePBe1a/NhEJc+UuBM939a43tVy0FaS0qqqrJaGRa9aZMXbCO5z9YxYOXHrFHF+f98c2lPDHjM6bdMoq0NKvxwf/u7afSKwwEgC/e9zq9O+XwyJVHs3pLKa8tWs/XT+jf5GtUVzsvzl3N2EN7smDNNp6euYKDu7fjZy8tBIJB7YuO7sspA/P59lMfxMYezjuiN1kZaVx70gA+WLElNmnkizecyI7ySp57v4j/O39YLLQPv3tKncHx+nxr1EFM+2R9vTMTA5x6SHemfVK3xbMnXXNtwXdPH8SNpw1sumA9WkMo7Gn3URqw2d07NrZfhYJITY9PX84TM1bw/bGH8MV6Bl0buoJ8T+0sr+TcCe9w7vBe3HDqwTX2OebXb3LR0X1rBE706v2f/9ehXHbMgfXu87k5RXVmG37y2mOYumA9j7yzexxn0o0nMfiA9hwUzhMWvf4k6pOfjuGQH70CwGu3jorNGrD83rP489vLeGDqpzx46ZFc8cjM2HO+N2Yw//fKIsYddgCT5tW8YPO4AV05cWA3Xvyo/inyk+Xak/rzw7OG7tVzW0MopBMMNJ8GrCIYaL7U3efHlRkY7S4ysy8BP26q0goFkbZj885ddMrNaDSUoqfxRtKMispqOuftvtNh6a4q3lu2MRZ2o37xGltKK3hv/GmUVVTxz/dXcfKgfA7u3q7G6auT5q1hw45yrjiuoMZrvTxvDWu2lvHQm0uY8p1TYt1590xaSJe8TP4xp4iizSXMu+vMWCtwweptvDRvNRNeW1JjX8vvPYvfvrqYX/7nUy4Z2Y+nZq6oM0XMH68YwbyiLbHxoV9cMIyJH63mrcUbOHtYT1ZsKomdddfYSQQTLj2ST9Zu45qTBuz1xYRJD4WwEuOAXxOckvqIu//czO4GZrv7RDN7ABgNVACbgRviQ6M+CgWR1LWrshrHyUqvexbO6i2lVFU7fbvk1vPMz++JGZ+xanMpI8ILHY/o1zl2p8VfTF7E715fwjdOHsDVJ/Xn+ife59YzBnPMgK4AHPXT/1BWUcX8u8dw2Z/e453CjTz69aMZNbg7pbuqyEpPY3tZJcPvngIE84+dMLAbn67dTkG3PK4+sekuv6a0irmP3H0SMKnWujvjHt+UyNcXkf1LY9cJxI+lJEJ9XWDRFtBZw3ry+qJivnZ8Ad3bZ/OPbx5fo9w7t58ae5yTEXzsRk8vzckM/u2QE6wf2b8L9180nNzM5ExNpyuaRUT2ofXbynj03eXccsbgzz031Z5oFS0FERGpqXuHbL4X3gCrNdr78+lERGS/o1AQEZEYhYKIiMQoFEREJEahICIiMQoFERGJUSiIiEiMQkFERGLa3BXNZlYM7O0NFboBG1qwOm2Bjjk16JhTw+c55gPdPb+pQm0uFD4PM5vdnMu89yc65tSgY04N++KY1X0kIiIxCgUREYlJtVB4ONkVSAIdc2rQMaeGhB9zSo0piIhI41KtpSAiIo1QKIiISEzKhIKZjTGzRWZWaGa3J7s+LcXM+prZa2a2wMzmm9lN4fouZvYfM1sc/ts5XG9m9pvw9zDXzI5M7hHsHTOLmNkHZvbvcLm/mc0Ij+vvZpYZrs8KlwvD7QXJrPfnYWadzOxZM/vEzBaa2XH78/tsZt8J/09/bGZPmVn2/vg+m9kjZrbezD6OW7fH76uZfS0sv9jMvra39UmJUDCzCDABGAsMBS4xs6HJrVWLqQRucfehwLHA9eGx3Q686u4DgVfDZQh+BwPDn+uA3+/7KreIm4CFccv/C/zK3Q8GNgNXh+uvBjaH638VlmurHgBecfdDgOEEx79fvs9m1hu4ERjh7ocCEeBi9s/3+VFgTK11e/S+mlkX4MfAMcBI4MfRINlj7r7f/wDHAZPjlscD45NdrwQd67+A04FFQM9wXU9gUfj4IeCSuPKxcm3lB+gT/qGcCvwbMIKrPNNrv9/AZOC48HF6WM6SfQx7ccwdgWW1676/vs9Ab2Al0CV83/4NnLm/vs9AAfDx3r6vwCXAQ3Hra5Tbk5+UaCmw+z9YVFG4br8SNpmPAGYAPdx9TbhpLdAjfLw//C5+DXwPqA6XuwJb3L0yXI4/ptjxhtu3huXbmv5AMfCXsNvsT2aWx376Prv7KuA+YAWwhuB9m8P+/z5H7en72mLvd6qEwn7PzNoBzwE3u/u2+G0efHXYL849NrOzgfXuPifZddnH0oEjgd+7+xHATnZ3KQD73fvcGTiXIAx7AXnU7WJJCfv6fU2VUFgF9I1b7hOu2y+YWQZBIDzh7v8MV68zs57h9p7A+nB9W/9dnACcY2bLgacJupAeADqZWXpYJv6YYscbbu8IbNyXFW4hRUCRu88Il58lCIn99X0eDSxz92J3rwD+SfDe7+/vc9Sevq8t9n6nSijMAgaGZy5kEgxYTUxynVqEmRnwZ2Chu98ft2kiED0D4WsEYw3R9VeEZzEcC2yNa6a2eu4+3t37uHsBwfs4zd0vA14DLgiL1T7e6O/hgrB8m/s27e5rgZVmNjhcdRqwgP30fSboNjrWzHLD/+PR492v3+c4e/q+TgbOMLPOYSvrjHDdnkv2AMs+HMgZB3wKLAF+mOz6tOBxnUjQtJwLfBj+jCPoT30VWAxMBbqE5Y3gTKwlwDyCszuSfhx7eeyjgH+HjwcAM4FC4B9AVrg+O1wuDLcPSHa9P8fxHg7MDt/rF4DO+/P7DPwE+AT4GHgcyNof32fgKYJxkwqCFuHVe/O+AleFx18IfH1v66NpLkREJCZVuo9ERKQZFAoiIhKjUBARkRiFgoiIxCgUREQkRqEgsg+Z2ajozK4irZFCQUREYhQKIvUws6+a2Uwz+9DMHgrv37DDzH4VzvH/qpnlh2UPN7P3wvntn4+b+/5gM5tqZh+Z2ftmdlC4+3Zx90V4IrxiV6RVUCiI1GJmQ4CLgBPc/XCgCriMYFK22e7+BeANgvnrAR4Dvu/uwwiuMo2ufwKY4O7DgeMJrlqFYCbbmwnu7TGAYE4fkVYhvekiIinnNOAoYFb4JT6HYEKyauDvYZm/Af80s45AJ3d/I1z/V+AfZtYe6O3uzwO4exlAuL+Z7l4ULn9IMJf+24k/LJGmKRRE6jLgr+4+vsZKsx/VKre3c8SUxz2uQn+H0oqo+0ikrleBC8ysO8Tul3sgwd9LdIbOS4G33X0rsNnMTgrXXw684e7bgSIz+3K4jywzy92nRyGyF/QNRaQWd19gZncAU8wsjWD2yusJbmwzMty2nmDcAYKpjf8QfugvBb4err8ceMjM7g738ZV9eBgie0WzpIo0k5ntcPd2ya6HSCKp+0hERGLUUhARkRi1FEREJEahICIiMQoFERGJUSiIiEiMQkFERGL+HyYBe0rzfQeZAAAAAElFTkSuQmCC\n", 265 | "text/plain": [ 266 | "
" 267 | ] 268 | }, 269 | "metadata": {}, 270 | "output_type": "display_data" 271 | } 272 | ], 273 | "source": [ 274 | "epochs = np.arange(len(losses))\n", 275 | "plt.title(r'Loss Per Epoch')\n", 276 | "plt.xlabel('epoch')\n", 277 | "plt.ylabel('loss')\n", 278 | "plt.plot(epochs, losses)\n", 279 | "plt.show()" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "### 3.3: Show Evaluation Results" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 14, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "name": "stdout", 296 | "output_type": "stream", 297 | "text": [ 298 | "{\n", 299 | " \"mean_rank\": 24.59951219512195,\n", 300 | " \"hits@k\": {\n", 301 | " \"1\": 0.12097560975609756,\n", 302 | " \"3\": 0.2531707317073171,\n", 303 | " \"5\": 0.3297560975609756,\n", 304 | " \"10\": 0.4678048780487805\n", 305 | " }\n", 306 | "}\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "print(json.dumps(results.results['eval_summary'], indent=2))" 312 | ] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "Python 3", 318 | "language": "python", 319 | "name": "python3" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 3 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython3", 331 | "version": "3.7.0" 332 | } 333 | }, 334 | "nbformat": 4, 335 | "nbformat_minor": 2 336 | } 337 | -------------------------------------------------------------------------------- /notebooks/README.rst: -------------------------------------------------------------------------------- 1 | Notebooks 2 | ========= 3 | This folder contains the following notebooks: 4 | 5 | Tutorials 6 | --------- 7 | 1. How to use BioKEEN 8 | 9 | Application Scenarios 10 | --------------------- 11 | There are several tasks that can be completed with biological knowledge graph embeddings. 12 | This section links notebooks that show several of those scenarios. 13 | 14 | Link Prediction 15 | ~~~~~~~~~~~~~~~ 16 | 1. Case Scenario applied to the ComPath dataset. 17 | 2. Case Scenario applied to the ADEPTUS dataset. 18 | 3. Case Scenario applied to the HSDN dataset. 19 | 4. Case Scenario applied to union of the HSDN and ADEPTUS datasets. 20 | 21 | Entity Disambiguation 22 | --------------------- 23 | This is the task of identifying when several nodes from different KGs that have been 24 | merged refer to the same entity. We haven't got a use case for this yet. 25 | 26 | Link-Based Clustering 27 | --------------------- 28 | This is the task of clustering nodes based on their embeddings. We haven't got a use 29 | case for this yet. 30 | -------------------------------------------------------------------------------- /requirements-rtd.txt: -------------------------------------------------------------------------------- 1 | pybel<0.14.0 2 | bio2bel<0.3.0 3 | tqdm 4 | pandas 5 | sphinx 6 | sphinx-rtd-theme 7 | sphinx-click 8 | sphinx-autodoc-typehints 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | ########################## 2 | # Setup.py Configuration # 3 | ########################## 4 | # Configuring setup() 5 | [metadata] 6 | name = biokeen 7 | version = 0.0.15-dev 8 | description = A package for training and evaluating biological knowledge graph embeddings 9 | long_description = file: README.rst 10 | 11 | url = https://github.com/SmartDataAnalytics/BioKEEN 12 | download_url = https://github.com/SmartDataAnalytics/BioKEEN/releases 13 | project_urls = 14 | Bug Tracker = https://github.com/SmartDataAnalytics/BioKEEN/issues 15 | Documentation = https://biokeen.readthedocs.io 16 | 17 | author = Mehdi Ali 18 | author_email = mehdi.ali@cs.uni-bonn.de 19 | maintainer = Mehdi Ali 20 | maintainer_email = mehdi.ali@cs.uni-bonn.de 21 | 22 | license = MIT 23 | license_file = LICENSE 24 | 25 | classifiers = 26 | Development Status :: 1 - Planning 27 | Intended Audience :: Developers 28 | License :: OSI Approved :: MIT License 29 | Programming Language :: Python 30 | Programming Language :: Python :: 3.7 31 | Programming Language :: Python :: 3.6 32 | Programming Language :: Python :: 3 :: Only 33 | keywords = 34 | KEEN 35 | knowledge embedding 36 | 37 | [options] 38 | install_requires = 39 | mock 40 | pykeen<1.0.0 41 | pybel<0.14.0 42 | bio2bel<0.3.0 43 | tqdm 44 | pandas 45 | click-default-group 46 | click 47 | 48 | zip_safe = false 49 | python_requires = >=3.6 50 | packages = find: 51 | package_dir = 52 | = src 53 | 54 | [options.extras_require] 55 | docs = 56 | sphinx 57 | sphinx-rtd-theme 58 | sphinx-click 59 | sphinx_autodoc_typehints 60 | 61 | [options.packages.find] 62 | where = src 63 | 64 | [options.entry_points] 65 | console_scripts = 66 | biokeen = biokeen.cli:main 67 | pykeen.data.importer = 68 | bio2bel = biokeen.content:handle_bio2bel 69 | bel_commons = biokeen.content:handle_bel_commons 70 | 71 | ###################### 72 | # Doc8 Configuration # 73 | # (doc8.ini) # 74 | ###################### 75 | [doc8] 76 | max-line-length = 120 77 | 78 | ########################## 79 | # Coverage Configuration # 80 | # (.coveragerc) # 81 | ########################## 82 | [coverage:run] 83 | branch = True 84 | source = biokeen 85 | omit = 86 | tests/* 87 | docs/* 88 | src/biokeen/cli/* 89 | src/biokeen/__main__.py 90 | 91 | [coverage:paths] 92 | source = 93 | src/biokeen 94 | .tox/*/lib/python*/site-packages/biokeen 95 | 96 | [coverage:report] 97 | show_missing = True 98 | exclude_lines = 99 | def __str__ 100 | def __repr__ 101 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Setup module for BioKEEN.""" 4 | 5 | import setuptools 6 | 7 | if __name__ == '__main__': 8 | setuptools.setup() 9 | -------------------------------------------------------------------------------- /src/biokeen/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """A package for training and evaluating knowledge graph embedding models on biological knowledge graphs.""" 4 | 5 | from . import convert # noqa: 401 6 | -------------------------------------------------------------------------------- /src/biokeen/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Entrypoint module, in case you use ``python -m biokeen``. 4 | 5 | Why does this file exist, and why ``__main__``? For more info, read: 6 | - https://www.python.org/dev/peps/pep-0338/ 7 | - https://docs.python.org/3/using/cmdline.html#cmdoption-m 8 | """ 9 | 10 | from .cli import main 11 | 12 | if __name__ == '__main__': 13 | main() 14 | -------------------------------------------------------------------------------- /src/biokeen/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """The command line interface for BioKEEN.""" 4 | 5 | from .cli import main # noqa: 401 6 | -------------------------------------------------------------------------------- /src/biokeen/cli/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Entrypoint module, in case you use ``python -m biokeen.cli``. 4 | 5 | Why does this file exist, and why ``__main__``? For more info, read: 6 | - https://www.python.org/dev/peps/pep-0338/ 7 | - https://docs.python.org/3/using/cmdline.html#cmdoption-m 8 | """ 9 | 10 | from .cli import main 11 | 12 | if __name__ == '__main__': 13 | main() 14 | -------------------------------------------------------------------------------- /src/biokeen/cli/cli.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """A command line interface for BioKEEN. 4 | 5 | Why does this file exist, and why not put this in ``__main__``? You might be tempted to import things from ``__main__`` 6 | later, but that will cause problems--the code will get executed twice: 7 | - When you run ``python3 -m biokeen`` python will execute``__main__.py`` as a script. That means there won't be any 8 | ``biokeen.__main__`` in ``sys.modules``. 9 | - When you import __main__ it will get executed again (as a module) because 10 | there's no ``biokeen.__main__`` in ``sys.modules``. 11 | .. seealso:: http://click.pocoo.org/5/setuptools/#setuptools-integration 12 | """ 13 | 14 | import json 15 | import logging 16 | import os 17 | from typing import List, Optional, TextIO 18 | 19 | import click 20 | from click_default_group import DefaultGroup 21 | 22 | from bio2bel.constants import get_global_connection 23 | from biokeen.constants import EMOJI, VERSION, biokeen_config 24 | from pykeen.constants import VERSION as PYKEEN_VERSION 25 | 26 | connection_option = click.option( 27 | '-c', 28 | '--connection', 29 | default=get_global_connection(), 30 | show_default=True, 31 | help='Bio2BEL database connection string', 32 | ) 33 | 34 | 35 | @click.group(cls=DefaultGroup, default_if_no_args=False) 36 | @click.version_option() 37 | def main(): # noqa: D401 38 | """A command line interface for BioKEEN.""" 39 | 40 | 41 | @main.command() 42 | @connection_option 43 | @click.option('-f', '--config', type=click.File()) 44 | @click.option('-r', '--rebuild', is_flag=True) 45 | @click.option('-x', '--no-prompt-bio2bel', is_flag=True) 46 | def start(config: Optional[TextIO], connection: str, rebuild: bool, no_prompt_bio2bel: bool): 47 | """Start the BioKEEN training pipeline.""" 48 | import pykeen 49 | 50 | if config is not None: 51 | config = json.load(config) 52 | else: 53 | from .prompts import prompt_biokeen_config 54 | config = prompt_biokeen_config( 55 | connection=connection, 56 | rebuild=rebuild, 57 | do_prompt_bio2bel=(not no_prompt_bio2bel), 58 | ) 59 | 60 | config['pykeen-version'] = PYKEEN_VERSION 61 | config['biokeen-version'] = VERSION 62 | pykeen.run(config) 63 | 64 | 65 | @main.command() 66 | @click.option('-m', '--model-directory', type=click.Path(file_okay=False, dir_okay=True, exists=True), required=True) 67 | @click.option('-d', '--data-directory', type=click.Path(file_okay=False, dir_okay=True, exists=True), required=True) 68 | def predict(model_directory: str, data_directory: str): 69 | """Use a trained model to make predictions.""" 70 | from pykeen.predict import start_predictions_pipeline 71 | start_predictions_pipeline(model_directory, data_directory) 72 | 73 | 74 | @main.group() 75 | def data(): 76 | """Commands for data acquisition.""" 77 | 78 | 79 | @data.command(help=f'Data stored in {biokeen_config.data_directory}') 80 | def ls(): 81 | """List built data.""" 82 | for path in biokeen_config.iterate_source_paths(): 83 | click.echo(path) 84 | 85 | 86 | @data.command() 87 | @click.confirmation_option() 88 | def clear(): 89 | """Remove all built data.""" 90 | for path in biokeen_config.iterate_source_paths(): 91 | os.remove(path) 92 | 93 | 94 | @data.command() 95 | @click.argument('names', nargs=-1) 96 | @connection_option 97 | @click.option('-r', '--rebuild', is_flag=True) 98 | @click.option('-v', '--verbose', count=True) 99 | def get(names: List[str], connection: str, rebuild: bool, verbose: bool): 100 | """Install, populate, and build Bio2BEL repository.""" 101 | if verbose == 1: 102 | logging.basicConfig(level=logging.INFO) 103 | elif verbose == 2: 104 | logging.basicConfig(level=logging.DEBUG) 105 | 106 | from biokeen.content import install_bio2bel_module 107 | 108 | for name in names: 109 | click.secho(f'{EMOJI} Getting {name}', fg='cyan') 110 | install_bio2bel_module(name, connection, rebuild) 111 | 112 | 113 | if __name__ == '__main__': 114 | main() 115 | -------------------------------------------------------------------------------- /src/biokeen/cli/messages.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """BioKEEN's command line interface helper.""" 4 | 5 | import click 6 | 7 | 8 | def print_welcome_message(): 9 | """Print welcome message.""" 10 | click.secho('#################################################') 11 | click.secho("#\t\tWelcome to " + click.style("BioKEEN", bold=True) + "\t\t#") 12 | click.secho('#################################################') 13 | 14 | 15 | def print_intro(): 16 | """Print intro.""" 17 | click.secho("This interface will assist you to configure your experiment.") 18 | click.secho("") 19 | click.secho( 20 | "BioKEEN can be run in two modes: \n" 21 | "1.) Training mode: BioKEEN trains a model based on a set of user-defined hyper-parameters.\n" 22 | "2.) Hyper-parameter optimization mode: " 23 | "Apply Random Search to determine the most appropriate set of hyper-parameter values" 24 | ) 25 | -------------------------------------------------------------------------------- /src/biokeen/cli/prompts.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Prompts for the BioKEEN command line interface.""" 4 | 5 | import os 6 | import re 7 | from collections import OrderedDict 8 | from typing import Dict, Iterable, Optional 9 | 10 | import click 11 | 12 | from pykeen.cli.prompt import prompt_config 13 | from pykeen.cli.utils.cli_print_msg_helper import print_section_divider 14 | from pykeen.constants import TRAINING_SET_PATH 15 | from .messages import print_intro, print_welcome_message 16 | from ..constants import ID_TO_DATABASE_MAPPING 17 | from ..content import install_bio2bel_module 18 | 19 | __all__ = [ 20 | 'prompt_biokeen_config', 21 | ] 22 | 23 | 24 | def prompt_biokeen_config(*, connection: str, rebuild: bool, do_prompt_bio2bel: Optional[bool] = None) -> Dict: 25 | """Configure experiments.""" 26 | config = OrderedDict() 27 | 28 | # Step 1: Welcome + Intro 29 | print_welcome_message() 30 | print_section_divider() 31 | print_intro() 32 | print_section_divider() 33 | 34 | # Step 2: Ask for data source 35 | if do_prompt_bio2bel is None: 36 | do_prompt_bio2bel = click.confirm('Do you want to use one of the databases provided by BioKEEN?', default=True) 37 | print_section_divider() 38 | 39 | do_prompt_training = True 40 | 41 | if do_prompt_bio2bel: 42 | do_prompt_training = False 43 | config[TRAINING_SET_PATH] = [] 44 | for name in select_bio2bel_repository(): 45 | try: 46 | path = install_bio2bel_module(name=name, connection=connection, rebuild=rebuild) 47 | except Exception: 48 | click.secho(f'failed: {name}', fg='red') 49 | else: 50 | if os.path.exists(path): 51 | config[TRAINING_SET_PATH].append(f'bio2bel:{name}') 52 | else: 53 | click.secho(f'failed: {name}: {path}', fg='red') 54 | 55 | # TODO replace this with less safe code that assumes everything installs no problemo 56 | """ 57 | config[TRAINING_SET_PATH] = [ 58 | f'bio2bel:{name}' 59 | for name in select_bio2bel_repository() 60 | ] 61 | """ 62 | 63 | print_section_divider() 64 | 65 | return prompt_config( 66 | config=config, 67 | show_welcome=False, 68 | do_prompt_training=do_prompt_training, 69 | ) 70 | 71 | 72 | def select_bio2bel_repository() -> Iterable[str]: 73 | """Prompt the user for a Bio2BEL database.""" 74 | click.secho("Current Step: Please select the database(s) you want to train on:", fg='blue') 75 | 76 | number_width = 1 + round(len(ID_TO_DATABASE_MAPPING) / 10) 77 | for model, model_id in sorted(ID_TO_DATABASE_MAPPING.items()): 78 | click.echo(f'{model: >{number_width}}: {model_id}') 79 | 80 | while True: 81 | try: 82 | user_input = click.prompt('> Please select one or more of the options', value_proc=process_selection) 83 | except ValueError as e: 84 | click.secho(str(e), fg='red') 85 | else: 86 | return user_input 87 | 88 | 89 | def process_selection(values: str) -> Iterable[str]: 90 | for value in re.split(r'\s|,', values): 91 | value = value.strip() 92 | if not value: 93 | continue 94 | 95 | try: 96 | value = int(value) 97 | except ValueError: 98 | if value not in ID_TO_DATABASE_MAPPING.values(): 99 | raise ValueError(f'{value} is an invalid database') 100 | yield value 101 | else: 102 | if value not in ID_TO_DATABASE_MAPPING: 103 | raise ValueError(f'{value} is an invalid index') 104 | yield ID_TO_DATABASE_MAPPING[value] 105 | -------------------------------------------------------------------------------- /src/biokeen/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Constants for BioKEEN.""" 4 | 5 | import os 6 | from typing import Iterable 7 | 8 | import easy_config 9 | 10 | HERE = os.path.abspath(os.path.dirname(__file__)) 11 | HOME = os.path.expanduser('~') 12 | 13 | 14 | class BiokeenConfig(easy_config.EasyConfig): 15 | """Configuration for BioKEEN.""" 16 | 17 | NAME = 'biokeen' 18 | FILES = [ 19 | os.path.join(HOME, '.config', 'biokeen.cfg'), 20 | os.path.join(HOME, '.config', 'config.ini'), 21 | ] 22 | 23 | #: the data directory where TSVs get exported 24 | data_directory: str = os.path.abspath(os.path.join(HOME, '.keen', 'biokeen')) 25 | 26 | #: The file extension of pre-processed Bio2BEL databases 27 | keen_tsv_ext: str = 'keen.tsv' 28 | 29 | def iterate_source_paths(self) -> Iterable[str]: 30 | """Iterate over the source paths.""" 31 | for file_name in os.listdir(self.data_directory): 32 | if file_name.endswith(self.keen_tsv_ext): 33 | yield os.path.join(self.data_directory, file_name) 34 | 35 | 36 | biokeen_config = BiokeenConfig.load() 37 | os.makedirs(biokeen_config.data_directory, exist_ok=True) 38 | 39 | VERSION = '0.0.15-dev' 40 | EMOJI = '🍩' 41 | 42 | # Available databases 43 | COMPATH_NAME = 'compath' 44 | HIPPIE_NAME = 'hippie' 45 | KEGG_NAME = 'kegg' 46 | MIRTARBASE_NAME = 'mirtarbase' 47 | MSIG_NAME = 'msig' 48 | REACTOME_NAME = 'reactome' 49 | SIDER_NAME = 'sider' 50 | WIKIPATHWAYS_NAME = 'wikipathways' 51 | DRUGBANK_NAME = 'drugbank' 52 | ADEPTUS_NAME = 'adeptus' 53 | HSDN_NAME = 'hsdn' 54 | INTERPRO_NAME = 'interpro' 55 | DDR_NAME = 'ddr' 56 | 57 | # ToDo: Add databases 58 | DATABASES = [ 59 | COMPATH_NAME, 60 | HIPPIE_NAME, 61 | KEGG_NAME, 62 | MIRTARBASE_NAME, 63 | MSIG_NAME, 64 | REACTOME_NAME, 65 | SIDER_NAME, 66 | WIKIPATHWAYS_NAME, 67 | DRUGBANK_NAME, 68 | ADEPTUS_NAME, 69 | HSDN_NAME, 70 | INTERPRO_NAME, 71 | DDR_NAME, 72 | ] 73 | 74 | ID_TO_DATABASE_MAPPING = dict(enumerate(DATABASES, start=1)) 75 | -------------------------------------------------------------------------------- /src/biokeen/content.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Utilities for BioKEEN.""" 4 | 5 | import importlib 6 | import logging 7 | import os 8 | import sys 9 | from contextlib import redirect_stdout 10 | from typing import Optional, Union 11 | 12 | import numpy as np 13 | import pkg_resources 14 | 15 | from bio2bel import AbstractManager 16 | from bio2bel.manager.bel_manager import BELManagerMixin 17 | from pybel import from_json_path, from_web, to_json_path 18 | from .constants import EMOJI, biokeen_config 19 | from .convert import to_pykeen_df, to_pykeen_path, to_pykeen_summary_path 20 | 21 | _SPECIAL_CASES = { 22 | 'compath': 'compath_resources', 23 | } 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | def install_bio2bel_module(name: str, connection: Optional[str] = None, rebuild: bool = False) -> Optional[str]: 29 | """Install Bio2BEL module. 30 | 31 | :param name: The name of the Bio2BEL module 32 | :param connection: The optional database connection 33 | :param rebuild: Should the cache not be used? Defaults to False. 34 | """ 35 | module_name = _SPECIAL_CASES.get(name, f'bio2bel_{name}') 36 | 37 | pykeen_df_path = os.path.join(biokeen_config.data_directory, f'{name}.{biokeen_config.keen_tsv_ext}') 38 | pykeen_df_summary_path = os.path.join(biokeen_config.data_directory, f'{name}.keen.summary.json') 39 | json_path = os.path.join(biokeen_config.data_directory, f'{name}.bel.json') 40 | 41 | if os.path.exists(pykeen_df_path) and not rebuild: 42 | logger.info(f'{EMOJI} {module_name} has already been retrieved. See: {pykeen_df_path}') 43 | return pykeen_df_path 44 | 45 | if os.path.exists(json_path) and not rebuild: 46 | logger.info(f'{EMOJI} loaded {module_name} JSON: {json_path}') 47 | graph = from_json_path(json_path) 48 | df = to_pykeen_df(graph) 49 | to_pykeen_path(df, pykeen_df_path) 50 | to_pykeen_summary_path(df, pykeen_df_summary_path) 51 | return pykeen_df_path 52 | 53 | bio2bel_module = ensure_bio2bel_installation(module_name) 54 | logger.debug(f'{EMOJI} imported {module_name}') 55 | 56 | manager_cls = bio2bel_module.Manager 57 | 58 | if not issubclass(manager_cls, BELManagerMixin): 59 | version = pkg_resources.get_distribution(module_name).version 60 | logger.warning(f'{EMOJI} {module_name} v{version} does not produce BEL') 61 | sys.exit(1) 62 | 63 | manager = manager_cls(connection=connection) 64 | 65 | if issubclass(manager_cls, AbstractManager): 66 | if not manager.is_populated(): 67 | logger.info(f'{EMOJI} populating {module_name}') 68 | manager.populate() 69 | else: 70 | logger.debug(f'{EMOJI} {module_name} has already been populated') 71 | 72 | logger.debug(f'{EMOJI} generating BEL for {module_name}') 73 | graph = manager.to_bel() 74 | 75 | logger.debug(f'Summary: {graph.number_of_nodes()} nodes / {graph.number_of_edges()} edges') 76 | to_json_path(graph, json_path, indent=2) 77 | 78 | logger.debug(f'{EMOJI} generating PyKEEN TSV for {module_name}') 79 | df = to_pykeen_df(graph) 80 | to_pykeen_summary_path(df, pykeen_df_summary_path) 81 | success = to_pykeen_path(df, pykeen_df_path) 82 | 83 | if success: 84 | logger.debug(f'{EMOJI} wrote PyKEEN TSV to {pykeen_df_path}') 85 | return pykeen_df_path 86 | 87 | logger.warning(f'{EMOJI} no statements generated') 88 | 89 | 90 | def ensure_bio2bel_installation(package: str): 91 | """Import a package, or install it.""" 92 | try: 93 | b_module = importlib.import_module(package) 94 | 95 | except ImportError: 96 | logger.info(f'{EMOJI} pip install {package}') 97 | # Install this package using pip 98 | # https://stackoverflow.com/questions/12332975/installing-python-module-within-code 99 | from pip._internal import main as pip_main 100 | 101 | with redirect_stdout(sys.stderr): 102 | pip_exit_code = pip_main(['install', '-q', package]) # -q means quiet 103 | 104 | if 0 != pip_exit_code: # command failed 105 | logger.warning(f'{EMOJI} could not find {package} on PyPI. Try installing from GitHub with:') 106 | name = package.split("_")[-1] 107 | logger.warning(f'\n pip install git+https://github.com/bio2bel/{name}.git\n') 108 | sys.exit(1) 109 | 110 | try: 111 | return importlib.import_module(package) 112 | except ImportError: 113 | logger.exception(f'{EMOJI} failed to import {package}') 114 | sys.exit(1) 115 | 116 | return b_module 117 | 118 | 119 | BIO2BEL_PREFIX = 'bio2bel' 120 | 121 | 122 | def handle_bio2bel(module_name: str) -> np.ndarray: 123 | """Load a Bio2BEL repository. 124 | 125 | :param module_name: The name of the bio2bel repository (with no prefix) 126 | """ 127 | path = install_bio2bel_module(module_name) 128 | return np.loadtxt( 129 | fname=path, 130 | dtype=str, 131 | comments='@Comment@ Subject Predicate Object', 132 | delimiter='\t', 133 | ) 134 | 135 | 136 | def handle_bel_commons(network_id: Union[int, str], host: Optional[str] = None) -> np.ndarray: 137 | """Load a BEL document from BEL Commons. 138 | 139 | :param network_id: The network identifier in BEL Commons 140 | :param host: The host for BEL Commons. Defaults to the Fraunhofer SCAI public instance. 141 | """ 142 | graph = from_web(int(network_id), host=host) 143 | df = to_pykeen_df(graph) 144 | return df.to_numpy() 145 | -------------------------------------------------------------------------------- /src/biokeen/convert/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Conversion from BEL to proper triples.""" 4 | 5 | from .io import get_pykeen_summary, get_triple, to_pykeen_df, to_pykeen_path, to_pykeen_summary_path # noqa: F401 6 | -------------------------------------------------------------------------------- /src/biokeen/convert/converters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Conversion base classes.""" 4 | 5 | from abc import ABC, abstractmethod 6 | from typing import Dict, Tuple 7 | 8 | from pybel.constants import ( 9 | ACTIVITY, ASSOCIATION, CORRELATIVE_RELATIONS, DECREASES, DIRECTLY_DECREASES, DIRECTLY_INCREASES, EQUIVALENT_TO, 10 | HAS_COMPONENT, INCREASES, IS_A, MODIFIER, OBJECT, PART_OF, REGULATES, RELATION, 11 | ) 12 | from pybel.dsl import ( 13 | Abundance, BaseAbundance, BaseEntity, BiologicalProcess, ComplexAbundance, MicroRna, 14 | NamedComplexAbundance, Pathology, Protein, Rna, 15 | ) 16 | 17 | 18 | class Converter(ABC): 19 | """A condition and converter for a BEL edge.""" 20 | 21 | @staticmethod 22 | @abstractmethod 23 | def predicate(u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> bool: 24 | """Test a BEL edge.""" 25 | 26 | @staticmethod 27 | @abstractmethod 28 | def convert(u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> Tuple[str, str, str]: 29 | """Convert a BEL edge.""" 30 | 31 | 32 | class SimpleConverter(Converter): 33 | """A class for converting the source and target that have simple names.""" 34 | 35 | @classmethod 36 | def convert(cls, u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> Tuple[str, str, str]: 37 | """Convert a BEL edge.""" 38 | return ( 39 | f'{u.namespace}:{u.identifier or u.name}', 40 | edge_data[RELATION], 41 | f'{v.namespace}:{v.identifier or v.name}', 42 | ) 43 | 44 | 45 | class TypedConverter(Converter): 46 | """A class for converting the source and target but replaces the relation.""" 47 | 48 | target_relation = None 49 | 50 | @classmethod 51 | def convert(cls, u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> Tuple[str, str, str]: 52 | """Convert a BEL edge.""" 53 | return ( 54 | f'{u.namespace}:{u.identifier or u.name}', 55 | cls.target_relation, 56 | f'{v.namespace}:{v.identifier or v.name}', 57 | ) 58 | 59 | 60 | class SimplePredicate(Converter): 61 | """Converts BEL statements based on a given relation.""" 62 | 63 | relation = ... 64 | 65 | @classmethod 66 | def predicate(cls, u, v, key, edge_data) -> bool: 67 | """Test a BEL edge has a given relation.""" 68 | return edge_data[RELATION] == cls.relation 69 | 70 | 71 | class SimpleTypedPredicate(SimplePredicate): 72 | """Finds BEL statements like ``A(X) B C(Y)`` where relation B and types A and C are defined in the class.""" 73 | 74 | subject_type = ... 75 | object_type = ... 76 | 77 | @classmethod 78 | def predicate(cls, u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> bool: 79 | """Test a BEL edge.""" 80 | return super().predicate(u, v, key, edge_data) and ( 81 | isinstance(u, cls.subject_type) and 82 | isinstance(v, cls.object_type) 83 | ) 84 | 85 | 86 | class _PartOfConverter(SimpleTypedPredicate, TypedConverter): 87 | relation = PART_OF 88 | target_relation = 'partOf' 89 | 90 | 91 | class PartOfNamedComplexConverter(_PartOfConverter): 92 | """Converts BEL statements like ``p(X) partOf complex(Y)``.""" 93 | 94 | subject_type = Protein 95 | object_type = NamedComplexAbundance 96 | 97 | 98 | class SubprocessPartOfBiologicalProcess(_PartOfConverter): 99 | """Converts BEL statements like ``bp(X) partOf bp(Y)``.""" 100 | 101 | subject_type = BiologicalProcess 102 | object_type = BiologicalProcess 103 | 104 | 105 | class ProteinPartOfBiologicalProcess(_PartOfConverter): 106 | """Converts BEL statements like ``p(X) partOf bp(Y)``.""" 107 | 108 | subject_type = Protein 109 | object_type = BiologicalProcess 110 | 111 | 112 | class NamedComplexHasComponentConverter(SimpleTypedPredicate): 113 | """Converts BEL statements like ``complex(X) hasComponent p(Y)``.""" 114 | 115 | subject_type = NamedComplexAbundance 116 | relation = HAS_COMPONENT 117 | object_type = Protein 118 | target_relation = 'partOf' 119 | 120 | @classmethod 121 | def convert(cls, u: BaseEntity, v: BaseEntity, key: str, data: Dict) -> Tuple[str, str, str]: 122 | """Convert a BEL edge.""" 123 | return ( 124 | f'{v.namespace}:{v.identifier or v.name}', 125 | cls.target_relation, 126 | f'{u.namespace}:{u.identifier or u.name}', 127 | ) 128 | 129 | 130 | class ListComplexHasComponentConverter(SimpleTypedPredicate): 131 | """Converts BEL statements like ``complex(p(X), p(Y), ...) hasComponent p(X)``.""" 132 | 133 | subject_type = ComplexAbundance 134 | relation = HAS_COMPONENT 135 | object_type = Protein 136 | target_relation = 'partOf' 137 | 138 | @classmethod 139 | def convert(cls, u: ComplexAbundance, v: BaseAbundance, key: str, data: Dict) -> Tuple[str, str, str]: 140 | """Convert a BEL edge.""" 141 | return ( 142 | f'{v.namespace}:{v.identifier or v.name}', 143 | cls.target_relation, 144 | str(u), 145 | ) 146 | 147 | 148 | ''' 149 | class TranslationConverter(TypedConverter): 150 | relation = TRANSLATED_TO 151 | target_relation = 'ribosomallyTranslatesTo' 152 | 153 | 154 | class TranscriptionConverter(TypedConverter): 155 | relation = TRANSCRIBED_TO 156 | target_relation = 'ribosomallyTranslatesTo' 157 | ''' 158 | 159 | 160 | class IsAConverter(SimplePredicate, SimpleConverter): 161 | """Converts BEL statements like ``X isA Y``.""" 162 | 163 | relation = IS_A 164 | target_relation = 'isA' 165 | 166 | 167 | class EquivalenceConverter(SimplePredicate, SimpleConverter): 168 | """Converts BEL statements like ``X eq Y``.""" 169 | 170 | relation = EQUIVALENT_TO 171 | target_relation = 'equivalentTo' 172 | 173 | 174 | class CorrelationConverter(SimpleConverter): 175 | """Converts BEL statements like ``A(B) pos|neg|noCorrelation C(D)``.""" 176 | 177 | @staticmethod 178 | def predicate(u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> bool: 179 | """Test a BEL edge.""" 180 | return edge_data[RELATION] in CORRELATIVE_RELATIONS 181 | 182 | 183 | class AssociationConverter(Converter): 184 | """Converts BEL statements like ``a(X) -- path(Y)``.""" 185 | 186 | @staticmethod 187 | def predicate(u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> bool: 188 | """Test a BEL edge.""" 189 | return edge_data[RELATION] == ASSOCIATION 190 | 191 | @staticmethod 192 | def convert(u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> Tuple[str, str, str]: 193 | """Convert a BEL edge.""" 194 | return ( 195 | f'{u.namespace}:{u.identifier or u.name}', 196 | edge_data.get('association_type', ASSOCIATION), # allow more specific association to be defined 197 | f'{v.namespace}:{v.identifier or v.name}', 198 | ) 199 | 200 | 201 | class DrugEffectConverter(SimpleConverter, SimpleTypedPredicate): 202 | """Converts BEL statements like ``a(X) ? path(Y)``.""" 203 | 204 | subject_type = Abundance 205 | relation = ... 206 | object_type = Pathology 207 | 208 | 209 | class DrugIndicationConverter(DrugEffectConverter): 210 | """Converts BEL statements like ``a(X) -| path(Y)``.""" 211 | 212 | relation = DECREASES 213 | 214 | 215 | class DrugSideEffectConverter(DrugEffectConverter): 216 | """Converts BEL statements like ``a(X) -> path(Y)``.""" 217 | 218 | relation = INCREASES 219 | 220 | 221 | class RegulatesAmountConverter(TypedConverter): 222 | """Converts BEL statements like ``A(B) reg C(D)``.""" 223 | 224 | relation = REGULATES 225 | target_relation = 'regulatesAmountOf' 226 | 227 | @classmethod 228 | def predicate(cls, u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> bool: 229 | """Test a BEL edge.""" 230 | object_modifier = edge_data.get(OBJECT) 231 | return edge_data[RELATION] == cls.relation and (not object_modifier or not object_modifier.get(MODIFIER)) 232 | 233 | 234 | class IncreasesAmountConverter(RegulatesAmountConverter): 235 | """Converts BEL statements like ``A(B) -> C(D)``.""" 236 | 237 | relation = INCREASES 238 | target_relation = 'increasesAmountOf' 239 | 240 | 241 | class DecreasesAmountConverter(RegulatesAmountConverter): 242 | """Converts BEL statements like ``A(B) -| C(D)``.""" 243 | 244 | relation = DECREASES 245 | target_relation = 'decreasesAmountOf' 246 | 247 | 248 | class RegulatesActivityConverter(TypedConverter): 249 | """Converts BEL statements like ``A(B) reg act(C(D) [, ma(E)])``.""" 250 | 251 | relation = REGULATES 252 | target_relation = 'activityDirectlyRegulatesActivityOf' 253 | 254 | @classmethod 255 | def predicate(cls, u: BaseEntity, v: BaseEntity, key: str, edge_data: Dict) -> bool: 256 | """Test a BEL edge.""" 257 | object_modifier = edge_data.get(OBJECT) 258 | return edge_data[RELATION] == cls.relation and object_modifier and object_modifier.get(MODIFIER) == ACTIVITY 259 | 260 | 261 | class IncreasesActivityConverter(RegulatesActivityConverter): 262 | """Converts BEL statements like ``A(B) -> act(C(D) [, ma(E)])``.""" 263 | 264 | relation = INCREASES 265 | target_relation = 'activityDirectlyPositivelyRegulatesActivityOf' 266 | 267 | 268 | class DecreasesActivityConverter(RegulatesActivityConverter): 269 | """Converts BEL statements like ``A(B) -| act(C(D) [, ma(E)])``.""" 270 | 271 | relation = DECREASES 272 | target_relation = 'activityDirectlyNegativelyRegulatesActivityOf' 273 | 274 | 275 | class MiRNARegulatesExpressionConverter(TypedConverter, SimpleTypedPredicate): 276 | """Converts BEL statements like ``m(X) reg r(Y)``.""" 277 | 278 | subject_type = MicroRna 279 | relation = REGULATES 280 | object_type = Rna 281 | target_relation = 'regulatesExpressionOf' 282 | 283 | 284 | class MiRNAIncreasesExpressionConverter(MiRNARegulatesExpressionConverter): 285 | """Converts BEL statements like ``m(X) -> r(Y)``.""" 286 | 287 | relation = INCREASES 288 | target_relation = 'increasesExpressionOf' 289 | 290 | 291 | class MiRNADirectlyIncreasesExpressionConverter(MiRNARegulatesExpressionConverter): 292 | """Converts BEL statements like ``m(X) => r(Y)``.""" 293 | 294 | relation = DIRECTLY_INCREASES 295 | target_relation = 'increasesExpressionOf' 296 | 297 | 298 | class MiRNADecreasesExpressionConverter(MiRNARegulatesExpressionConverter): 299 | """Converts BEL statements like ``m(X) -| r(Y)``.""" 300 | 301 | relation = DECREASES 302 | target_relation = 'repressesExpressionOf' 303 | 304 | 305 | class MiRNADirectlyDecreasesExpressionConverter(MiRNARegulatesExpressionConverter): 306 | """Converts BEL statements like ``m(X) =| r(Y)``.""" 307 | 308 | relation = DIRECTLY_DECREASES 309 | target_relation = 'repressesExpressionOf' 310 | -------------------------------------------------------------------------------- /src/biokeen/convert/io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Input and output for BEL conversion.""" 4 | 5 | import itertools as itt 6 | import json 7 | import logging 8 | from collections import Counter 9 | from typing import Dict, Optional, Tuple 10 | 11 | import pandas as pd 12 | from tqdm import tqdm 13 | 14 | from pybel import BELGraph 15 | from pybel.dsl import BaseEntity 16 | from .converters import ( 17 | AssociationConverter, CorrelationConverter, DecreasesAmountConverter, DrugIndicationConverter, 18 | DrugSideEffectConverter, EquivalenceConverter, IncreasesAmountConverter, IsAConverter, 19 | ListComplexHasComponentConverter, MiRNADecreasesExpressionConverter, MiRNADirectlyDecreasesExpressionConverter, 20 | NamedComplexHasComponentConverter, PartOfNamedComplexConverter, ProteinPartOfBiologicalProcess, 21 | RegulatesActivityConverter, RegulatesAmountConverter, SubprocessPartOfBiologicalProcess, 22 | ) 23 | from ..constants import EMOJI 24 | 25 | __all__ = [ 26 | 'to_pykeen_path', 27 | 'to_pykeen_df', 28 | 'get_pykeen_summary', 29 | 'to_pykeen_summary_path', 30 | 'get_triple', 31 | ] 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | def to_pykeen_path(df: pd.DataFrame, path: str) -> bool: 37 | """Write the relationships in the BEL graph to a KEEN TSV file. 38 | 39 | If you have a BEL graph, first do: 40 | 41 | >>> from biokeen.convert import to_pykeen_df, to_pykeen_path 42 | >>> graph = ... # Something from PyBEL 43 | >>> df = to_pykeen_df(graph) 44 | >>> to_pykeen_path(df, 'graph.keen.tsv') 45 | """ 46 | if len(df.index) == 0: 47 | return False 48 | df.to_csv(path, sep='\t', index=None, header=None) 49 | return True 50 | 51 | 52 | def get_pykeen_summary(df: pd.DataFrame) -> Dict: 53 | """Summarize a KEEN dataframe.""" 54 | entity_count = Counter(itt.chain(df[df.columns[0]], df[df.columns[2]])) 55 | return { 56 | 'namespaces': Counter( 57 | element.split(':')[0] 58 | for element in itt.chain(df[df.columns[0]], df[df.columns[2]]) 59 | ), 60 | 'entities': len(entity_count), 61 | 'relations': len(df.index), 62 | } 63 | 64 | 65 | def to_pykeen_summary_path(df: pd.DataFrame, path: str, indent=2, **kwargs): 66 | """Write the summary of a KEEN dataframe to a file.""" 67 | with open(path, 'w') as file: 68 | json.dump(get_pykeen_summary(df), file, indent=indent, **kwargs) 69 | 70 | 71 | def to_pykeen_df(graph: BELGraph, use_tqdm: bool = True) -> pd.DataFrame: 72 | """Get a DataFrame representing the triples.""" 73 | it = graph.edges(keys=True) 74 | 75 | if use_tqdm: 76 | it = tqdm(it, total=graph.number_of_edges(), desc=f'{EMOJI} preparing TSV') 77 | 78 | triples = ( 79 | get_triple(graph, u, v, key) 80 | for u, v, key in it 81 | ) 82 | 83 | # clean duplicates and Nones 84 | triples = list(sorted({triple for triple in triples if triple is not None})) 85 | 86 | return pd.DataFrame(triples, columns=['subject', 'predicate', 'object']) 87 | 88 | 89 | def get_triple(graph: BELGraph, u: BaseEntity, v: BaseEntity, key: str) -> Optional[Tuple[str, str, str]]: # noqa: C901 90 | """Get the triples' strings that should be written to the file.""" 91 | data = graph[u][v][key] 92 | 93 | # order is important 94 | converters = [ 95 | NamedComplexHasComponentConverter, 96 | ListComplexHasComponentConverter, 97 | PartOfNamedComplexConverter, 98 | SubprocessPartOfBiologicalProcess, 99 | ProteinPartOfBiologicalProcess, 100 | RegulatesActivityConverter, 101 | MiRNADecreasesExpressionConverter, 102 | MiRNADirectlyDecreasesExpressionConverter, 103 | IsAConverter, 104 | EquivalenceConverter, 105 | CorrelationConverter, 106 | AssociationConverter, 107 | DrugIndicationConverter, 108 | DrugSideEffectConverter, 109 | RegulatesAmountConverter, 110 | IncreasesAmountConverter, 111 | DecreasesAmountConverter, 112 | ] 113 | 114 | for converter in converters: 115 | if converter.predicate(u, v, key, data): 116 | return converter.convert(u, v, key, data) 117 | 118 | logger.warning(f'{EMOJI} unhandled: {graph.edge_to_bel(u, v, data)}') 119 | return None 120 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Tests for BioKEEN.""" 4 | -------------------------------------------------------------------------------- /tests/test_convert.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Tests for the conversion procedure.""" 4 | 5 | import unittest 6 | from typing import Tuple, Type 7 | 8 | from biokeen.convert import get_triple 9 | from biokeen.convert.converters import ( 10 | AssociationConverter, Converter, CorrelationConverter, DecreasesAmountConverter, DrugIndicationConverter, 11 | DrugSideEffectConverter, EquivalenceConverter, IncreasesAmountConverter, IsAConverter, 12 | MiRNADecreasesExpressionConverter, NamedComplexHasComponentConverter, 13 | PartOfNamedComplexConverter, RegulatesActivityConverter, RegulatesAmountConverter, SubprocessPartOfBiologicalProcess 14 | ) 15 | from pybel import BELGraph 16 | from pybel.constants import ( 17 | ASSOCIATION, DECREASES, EQUIVALENT_TO, HAS_COMPONENT, INCREASES, IS_A, NEGATIVE_CORRELATION, OBJECT, PART_OF, 18 | POSITIVE_CORRELATION, REGULATES, RELATION, 19 | ) 20 | from pybel.dsl import ( 21 | Abundance, BaseEntity, BiologicalProcess, MicroRna, NamedComplexAbundance, Pathology, Protein, 22 | Rna, activity, 23 | ) 24 | from pybel.testing.utils import n 25 | from pybel.typing import EdgeData 26 | 27 | 28 | def _rel(x): 29 | return {RELATION: x} 30 | 31 | 32 | def _rela(x, y=None): 33 | return {RELATION: x, OBJECT: activity(y)} 34 | 35 | 36 | def _assoc(y): 37 | return {RELATION: ASSOCIATION, 'association_type': y} 38 | 39 | 40 | a1 = Abundance('CHEBI', '1') 41 | p1 = Protein('HGNC', '1') 42 | pf1 = Protein('INTERPRO', '1') 43 | d1 = Pathology('MESH', '1') 44 | b1 = BiologicalProcess('GO', '1') 45 | b2 = BiologicalProcess('GO', '2') 46 | m1 = MicroRna('MIRBASE', '1') 47 | r1 = Rna('HGNC', '1') 48 | r2 = Rna('HGNC', '2') 49 | nca1 = NamedComplexAbundance('FPLX', '1') 50 | 51 | converters_true_list = [ 52 | (NamedComplexHasComponentConverter, nca1, p1, _rel(HAS_COMPONENT), ('HGNC:1', 'partOf', 'FPLX:1')), 53 | (PartOfNamedComplexConverter, p1, nca1, _rel(PART_OF), ('HGNC:1', 'partOf', 'FPLX:1')), 54 | (SubprocessPartOfBiologicalProcess, b1, b2, _rel(PART_OF), ('GO:1', 'partOf', 'GO:2')), 55 | (AssociationConverter, r1, r2, _rel(ASSOCIATION), ('HGNC:1', 'association', 'HGNC:2')), 56 | (AssociationConverter, r1, r2, _assoc('similarity'), ('HGNC:1', 'similarity', 'HGNC:2')), 57 | (CorrelationConverter, r1, r2, _rel(POSITIVE_CORRELATION), ('HGNC:1', 'positiveCorrelation', 'HGNC:2')), 58 | (IsAConverter, p1, pf1, _rel(IS_A), ('HGNC:1', 'isA', 'INTERPRO:1')), 59 | # Found in ADEPTUS 60 | (CorrelationConverter, d1, r1, _rel(POSITIVE_CORRELATION), ('MESH:1', 'positiveCorrelation', 'HGNC:1')), 61 | (CorrelationConverter, d1, r1, _rel(NEGATIVE_CORRELATION), ('MESH:1', 'negativeCorrelation', 'HGNC:1')), 62 | # Found in LINCS (not integrated yet) 63 | (RegulatesAmountConverter, a1, r1, _rel(REGULATES), ('CHEBI:1', 'regulatesAmountOf', 'HGNC:1')), 64 | (IncreasesAmountConverter, a1, r1, _rel(INCREASES), ('CHEBI:1', 'increasesAmountOf', 'HGNC:1')), 65 | (DecreasesAmountConverter, a1, r1, _rel(DECREASES), ('CHEBI:1', 'decreasesAmountOf', 'HGNC:1')), 66 | # Found in SIDER 67 | (DrugSideEffectConverter, a1, d1, _rel(INCREASES), ('CHEBI:1', 'increases', 'MESH:1')), 68 | (DrugIndicationConverter, a1, d1, _rel(DECREASES), ('CHEBI:1', 'decreases', 'MESH:1')), 69 | # Found in miRTarBase 70 | (MiRNADecreasesExpressionConverter, m1, r1, _rel(DECREASES), ('MIRBASE:1', 'repressesExpressionOf', 'HGNC:1')), 71 | # Found in DrugBank 72 | (RegulatesActivityConverter, a1, p1, _rela(REGULATES), ('CHEBI:1', 'activityDirectlyRegulatesActivityOf', 73 | 'HGNC:1')), 74 | # Found in ComPath 75 | (EquivalenceConverter, b1, b2, _rel(EQUIVALENT_TO), ('GO:1', 'equivalentTo', 'GO:2')), 76 | (SubprocessPartOfBiologicalProcess, b1, b2, _rel(PART_OF), ('GO:1', 'partOf', 'GO:2')), 77 | # Found in HSDN 78 | ] 79 | 80 | converters_false_list = [ 81 | (NamedComplexHasComponentConverter, nca1, p1, _rel(PART_OF)), 82 | (PartOfNamedComplexConverter, nca1, p1, _rel(HAS_COMPONENT)), 83 | ] 84 | 85 | 86 | class TestConverters(unittest.TestCase): 87 | """Tests for the converter classes.""" 88 | 89 | def help_test_convert(self, 90 | converter: Type[Converter], 91 | u: BaseEntity, 92 | v: BaseEntity, 93 | edge_data: EdgeData, 94 | triple: Tuple[str, str, str], 95 | ) -> None: 96 | """Test a converter class.""" 97 | self.assertTrue(issubclass(converter, Converter), msg=f'Not a Converter: {converter.__name__}') 98 | key = n() 99 | self.assertTrue( 100 | converter.predicate(u, v, key, edge_data), 101 | msg=f'Predicate failed: {converter.__name__}', 102 | ) 103 | self.assertEqual( 104 | triple, 105 | converter.convert(u, v, key, edge_data), 106 | msg=f'Conversion failed: {converter.__name__}', 107 | ) 108 | graph = BELGraph() 109 | graph.add_edge(u, v, key=key, **edge_data) 110 | self.assertEqual( 111 | triple, 112 | get_triple(graph, u, v, key), 113 | msg=f'get_triple failed: {converter.__name__}', 114 | ) 115 | 116 | def test_converters_true(self): 117 | """Test passing converters.""" 118 | for converter, u, v, edge_data, triple in converters_true_list: 119 | with self.subTest(msg=f'Converter: {converter.__qualname__}'): 120 | self.help_test_convert(converter, u, v, edge_data, triple) 121 | 122 | def test_converters_false(self): 123 | """Test falsification of converters.""" 124 | for converter, u, v, edge_data in converters_false_list: 125 | with self.subTest(): 126 | self.assertFalse(converter.predicate(u, v, n(), edge_data)) 127 | -------------------------------------------------------------------------------- /tests/test_simple.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Simple tests for BioKEEN.""" 4 | 5 | import unittest 6 | 7 | from biokeen.constants import VERSION 8 | 9 | 10 | class TestImport(unittest.TestCase): 11 | """Simple tests for importing BioKEEN.""" 12 | 13 | def test_version_type(self): 14 | """Test the type of the version string.""" 15 | self.assertIsInstance(VERSION, str) 16 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | # always keep coverage-clean first 4 | coverage-clean 5 | # code linters/stylers 6 | manifest 7 | flake8 8 | pyroma 9 | xenon 10 | # documentation linters/checkers 11 | doc8 12 | readme 13 | docs 14 | # the actual tests 15 | py 16 | # always keep coverage-report last 17 | coverage-report 18 | 19 | [testenv] 20 | commands = coverage run -p -m pytest --durations=20 {posargs:tests} 21 | passenv = KEEN_TEST_CONNECTOR KEEN_TEST_CONNECTION TRAVIS CI DB 22 | deps = 23 | coverage 24 | pytest 25 | mock 26 | pathlib 27 | {env:KEEN_TEST_CONNECTOR:} 28 | whitelist_externals = 29 | /bin/cat 30 | /bin/cp 31 | /bin/mkdir 32 | /usr/bin/git 33 | /usr/local/bin/git 34 | 35 | [testenv:coverage-clean] 36 | deps = coverage 37 | skip_install = true 38 | commands = coverage erase 39 | 40 | [testenv:manifest] 41 | deps = check-manifest 42 | skip_install = true 43 | commands = check-manifest 44 | 45 | [testenv:flake8] 46 | skip_install = true 47 | deps = 48 | flake8 49 | flake8-bandit 50 | flake8-colors 51 | flake8-docstrings 52 | flake8-import-order 53 | pep8-naming 54 | commands = 55 | flake8 src/biokeen/ tests/ setup.py 56 | description = Run the flake8 tool with several plugins (bandit, docstrings, import order, pep8 naming). 57 | 58 | [testenv:xenon] 59 | deps = xenon 60 | skip_install = true 61 | commands = xenon --max-average A --max-modules A --max-absolute B . 62 | description = Run the xenon tool to monitor code complexity. 63 | 64 | [testenv:pyroma] 65 | deps = 66 | pygments 67 | pyroma 68 | skip_install = true 69 | commands = pyroma --min=10 . 70 | description = Run the pyroma tool to check the project's package friendliness. 71 | 72 | [testenv:doc8] 73 | skip_install = true 74 | deps = 75 | sphinx 76 | doc8 77 | commands = 78 | doc8 docs/source/ README.rst 79 | description = Run the doc8 tool to check the style of the RST files in the project docs. 80 | 81 | [testenv:readme] 82 | commands = rst-lint README.rst 83 | skip_install = true 84 | deps = 85 | restructuredtext_lint 86 | pygments 87 | 88 | [testenv:docs] 89 | changedir = docs 90 | extras = 91 | docs 92 | commands = 93 | mkdir -p {envtmpdir} 94 | cp -r source {envtmpdir}/source 95 | sphinx-build -W -b html -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/html 96 | sphinx-build -W -b coverage -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/coverage 97 | cat {envtmpdir}/build/coverage/c.txt 98 | cat {envtmpdir}/build/coverage/python.txt 99 | 100 | [testenv:coverage-report] 101 | deps = coverage 102 | skip_install = true 103 | commands = 104 | coverage combine 105 | coverage report 106 | 107 | #################### 108 | # Deployment tools # 109 | #################### 110 | 111 | [testenv:bumpversion] 112 | commands = bumpversion {posargs} 113 | skip_install = true 114 | passenv = HOME 115 | deps = 116 | bumpversion 117 | 118 | [testenv:build] 119 | skip_install = true 120 | deps = 121 | wheel 122 | setuptools 123 | commands = 124 | python setup.py -q sdist bdist_wheel 125 | 126 | [testenv:release] 127 | skip_install = true 128 | deps = 129 | {[testenv:build]deps} 130 | twine >= 1.5.0 131 | commands = 132 | {[testenv:build]commands} 133 | twine upload --skip-existing dist/* 134 | 135 | [testenv:finish] 136 | skip_install = true 137 | passenv = HOME 138 | deps = 139 | {[testenv:release]deps} 140 | bumpversion 141 | commands = 142 | bumpversion release 143 | {[testenv:release]commands} 144 | git push 145 | bumpversion patch 146 | git push 147 | --------------------------------------------------------------------------------