├── interpro ├── __init__.py ├── urls.py ├── wsgi.py └── renderers.py ├── release ├── __init__.py ├── .gitignore └── management │ ├── __init__.py │ └── commands │ ├── __init__.py │ └── warmer.py ├── webfront ├── __init__.py ├── tests │ ├── __init__.py │ ├── test_mail.py │ ├── managed_model_test_runner.py │ ├── tests_structure_endpoint.py │ ├── test_ida_search.py │ ├── tests_protein_endpoint.py │ ├── README.md │ ├── tests_utils.py │ ├── tests_utils_endpoint.py │ └── fixtures_organisms.json ├── views │ ├── __init__.py │ ├── mail.py │ ├── MODIFIER_README.md │ ├── QUERYSET_README.md │ ├── modifier_manager.py │ ├── README.md │ ├── proteome.py │ ├── cache.py │ ├── set.py │ └── taxonomy.py ├── migrations │ ├── __init__.py │ ├── 0025_remove_set_alignment.py │ ├── 0026_remove_structuralmodel.py │ ├── 0017_structural_model_plddt.py │ ├── 0005_droping_columns.py │ ├── 0027_remove_llm_description.py │ ├── 0018_taxa_modifier.py │ ├── 0007_history.py │ ├── 0023_rename_is_alive_entry_is_public.py │ ├── 0020_alter_entryannotation_num_sequences.py │ ├── 0021_set_info.py │ ├── 0028_in_alphafold.py │ ├── 0037_protein_in_bfvd.py │ ├── 0010_wiki_field_type_change.py │ ├── 0015_structural_model_lddt.py │ ├── 0024_entry_llm_description.py │ ├── 0032_entry_is_updated_llm.py │ ├── 0034_set_wikipedia.py │ ├── 0033_alter_proteinextrafeatures_sequence_feature.py │ ├── 0006_interaction_and_pathways.py │ ├── 0016_structural_model_algorithm.py │ ├── 0031_strain_assembly_nullable.py │ ├── 0012_seq_and_seq_raw.py │ ├── 0014_structural_model.py │ ├── 0008_dw_changes.py │ ├── 0011_pfam2interpro.py │ ├── 0009_entry_annotation_changes.py │ ├── 0029_remove_is_featured_plus_llm_entry.py │ ├── 0019_entrytaxa_table.py │ ├── 0036_default_value_lists_dicts.py │ ├── 0022_chain_sequence.py │ ├── 0004_taxonomy_per_entryX.py │ ├── 0003_taxonomy_per_entryDB.py │ ├── 0002_taxonomy_per_entry.py │ ├── 0035_interpronmatches.py │ ├── 0013_protein_extra.py │ └── 0030_num_proteins.py ├── searcher │ ├── __init__.py │ ├── README.md │ └── search_controller.py ├── models │ └── __init__.py ├── static │ ├── logo_178x178.png │ ├── swagger │ │ ├── favicon-16x16.png │ │ ├── favicon-32x32.png │ │ ├── index.html │ │ └── oauth2-redirect.html │ ├── bootstrap │ │ ├── fonts │ │ │ ├── glyphicons-halflings-regular.eot │ │ │ ├── glyphicons-halflings-regular.ttf │ │ │ ├── glyphicons-halflings-regular.woff │ │ │ └── glyphicons-halflings-regular.woff2 │ │ └── js │ │ │ └── npm.js │ └── interpro-api.css ├── README.md ├── templatetags │ └── interpro_tags.py ├── exceptions.py ├── response.py ├── serializers │ ├── utils.py │ └── README.md ├── constants.py └── pagination.py ├── functional_tests ├── __init__.py ├── base.py └── tests.py ├── .pre-commit-config.yaml ├── config ├── db.template.yml ├── elastic_ida_mapping.json └── elastic_mapping.json ├── .coveragerc ├── .editorconfig ├── dev_requirements.txt ├── .gitignore ├── requirements.txt ├── manage.py ├── example_data ├── entry │ ├── PF17180.json │ ├── PF02171.json │ └── IPR003165.json └── protein │ ├── M5ADK6.json │ ├── A0A0A2L2G2.json │ ├── A1CUJ5.json │ ├── X2JLE1.json │ ├── V5XAD2.json │ └── P16582.json ├── templates └── rest_framework │ └── api.html ├── .github └── workflows │ └── testing.yml ├── docs └── examples │ ├── fetch-protein-matches.py │ ├── fetch-alphafold-for-entry.py │ └── overlapping-entries.py ├── README.md └── deploy_tools └── README.md /interpro/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /release/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webfront/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /functional_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /release/.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | -------------------------------------------------------------------------------- /webfront/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webfront/views/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /release/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webfront/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webfront/searcher/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /release/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webfront/models/__init__.py: -------------------------------------------------------------------------------- 1 | from webfront.models.interpro_new import * 2 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: stable 4 | hooks: 5 | - id: black 6 | -------------------------------------------------------------------------------- /webfront/static/logo_178x178.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/logo_178x178.png -------------------------------------------------------------------------------- /webfront/static/swagger/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/swagger/favicon-16x16.png -------------------------------------------------------------------------------- /webfront/static/swagger/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/swagger/favicon-32x32.png -------------------------------------------------------------------------------- /config/db.template.yml: -------------------------------------------------------------------------------- 1 | engine: django.db.backends.mysql 2 | host: URL 3 | sid: DATABASE_NAME 4 | port: 4602 5 | user: USER 6 | password: PASSWORD 7 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | # omit anything in a release directory anywhere 4 | */release/* 5 | # not using wsgi for testing 6 | interpro/wsgi.py 7 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | indent_style = space 3 | insert_final_newline = true 4 | 5 | [*.py] 6 | indent_size = 4 7 | 8 | [*.md] 9 | trim_trailing_whitespace = false 10 | -------------------------------------------------------------------------------- /webfront/static/bootstrap/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /webfront/static/bootstrap/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /webfront/static/bootstrap/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /webfront/static/bootstrap/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | # remember to check the version is compatible with python 3.8 2 | selenium==4.21.0 3 | django-debug-toolbar==4.4.2 4 | ipdb==0.13.13 5 | coveralls==3.3.1 6 | tqdm==4.66.4 7 | black==24.4.2 8 | -------------------------------------------------------------------------------- /interpro/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import re_path 2 | from webfront.views import common, mail 3 | 4 | urlpatterns = [ 5 | re_path(r"^api/mail/$", mail.send_email), 6 | re_path(r"^api/(?P.*)$", common.GeneralHandler.as_view()), 7 | ] 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | db.sqlite3 2 | __pycache__ 3 | .idea 4 | unifam/mysql.conf 5 | *.pyc 6 | .DS_Store 7 | .vscode 8 | unifam/settings-private.py 9 | .coverage 10 | config/interpro.local.yml 11 | config/db.yml 12 | *.db 13 | *.iml 14 | gunicorn.local.conf.py 15 | geckodriver.log 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Python 3.10 2 | Django==5.2.7 3 | djangorestframework==3.15.2 4 | PyYAML==6.0 5 | jsonfield2==4.0.0.post0 6 | pymysql==1.1.1 7 | django-cors-headers==4.3.1 8 | gunicorn==22.0.0 9 | django-redis==5.4.0 10 | redis==5.0.4 11 | requests==2.32.3 12 | 13 | 14 | -------------------------------------------------------------------------------- /config/elastic_ida_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "mappings": { 3 | "properties": { 4 | "ida_id": { 5 | "type": "keyword" 6 | }, 7 | "ida": { 8 | "type": "keyword" 9 | }, 10 | "counts": { 11 | "type": "long" 12 | }, 13 | "representative": { 14 | "type": "object", 15 | "enabled": false 16 | } 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /webfront/migrations/0025_remove_set_alignment.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.15 on 2023-11-09 15:09 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("webfront", "0024_entry_llm_description"), 10 | ] 11 | 12 | operations = [ 13 | migrations.DeleteModel( 14 | name="Alignment", 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /webfront/migrations/0026_remove_structuralmodel.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.15 on 2023-11-16 16:23 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("webfront", "0025_remove_set_alignment"), 10 | ] 11 | 12 | operations = [ 13 | migrations.DeleteModel( 14 | name="StructuralModel", 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /webfront/migrations/0017_structural_model_plddt.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.1.7 on 2021-08-28 10:30 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [("webfront", "0016_structural_model_algorithm")] 9 | 10 | operations = [ 11 | migrations.RenameField( 12 | model_name="structuralmodel", old_name="lddt", new_name="plddt" 13 | ) 14 | ] 15 | -------------------------------------------------------------------------------- /webfront/migrations/0005_droping_columns.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.1 on 2020-01-23 10:54 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [("webfront", "0004_taxonomy_per_entryX")] 9 | 10 | operations = [ 11 | migrations.RemoveField(model_name="protein", name="other_names"), 12 | migrations.RemoveField(model_name="protein", name="size"), 13 | ] 14 | -------------------------------------------------------------------------------- /webfront/README.md: -------------------------------------------------------------------------------- 1 | Developers Documentation 2 | === 3 | 4 | This API was developed using [Django REST Framework](http://www.django-rest-framework.org/), but we have adapted it for our needs. 5 | 6 | You can read about the details of the changes in each of the parts of the framework: 7 | 8 | * [Views](./views/README.md) 9 | * [Queryset](./views/QUERYSET_README.md) 10 | * [Serializers](./serializers/README.md) 11 | * [Modifiers](./views/MODIFIER_README.md) 12 | -------------------------------------------------------------------------------- /webfront/migrations/0027_remove_llm_description.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.15 on 2023-12-15 16:32 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("webfront", "0026_remove_structuralmodel"), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name="entry", 15 | name="llm_description", 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /webfront/migrations/0018_taxa_modifier.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.8 on 2022-02-01 12:14 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0017_structural_model_plddt")] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="entry", name="taxa", field=jsonfield.fields.JSONField(null=True) 14 | ) 15 | ] 16 | -------------------------------------------------------------------------------- /webfront/static/interpro-api.css: -------------------------------------------------------------------------------- 1 | 2 | div.region { 3 | text-align: right; 4 | } 5 | div.region form div.btn-group a{ 6 | min-width: 10rem; 7 | } 8 | 9 | div.open ul.dropdown-menu { 10 | display: block; 11 | } 12 | ul.dropdown-menu li { 13 | text-align: right; 14 | padding-right: 1em; 15 | } 16 | ul.dropdown-menu li:hover { 17 | background-color: rgb(20,160,206); 18 | } 19 | ul.dropdown-menu li a.format-option{ 20 | color: rgb(231, 231, 231);; 21 | } -------------------------------------------------------------------------------- /webfront/migrations/0007_history.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.1 on 2020-02-06 09:27 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0006_interaction_and_pathways")] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="entry", 14 | name="history", 15 | field=jsonfield.fields.JSONField(null=True), 16 | ) 17 | ] 18 | -------------------------------------------------------------------------------- /webfront/migrations/0023_rename_is_alive_entry_is_public.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.20 on 2023-09-19 21:37 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("webfront", "0022_chain_sequence"), 10 | ] 11 | 12 | operations = [ 13 | migrations.RenameField( 14 | model_name="entry", 15 | old_name="is_alive", 16 | new_name="is_public", 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /webfront/migrations/0020_alter_entryannotation_num_sequences.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.12 on 2022-08-31 15:35 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [("webfront", "0019_entrytaxa_table")] 9 | 10 | operations = [ 11 | migrations.AlterField( 12 | model_name="entryannotation", 13 | name="num_sequences", 14 | field=models.IntegerField(null=True), 15 | ) 16 | ] 17 | -------------------------------------------------------------------------------- /webfront/migrations/0021_set_info.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.15 on 2023-01-03 14:29 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0020_alter_entryannotation_num_sequences")] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="entry", 14 | name="set_info", 15 | field=jsonfield.fields.JSONField(null=True), 16 | ) 17 | ] 18 | -------------------------------------------------------------------------------- /webfront/migrations/0028_in_alphafold.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.23 on 2024-02-15 09:49 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('webfront', '0027_remove_llm_description'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='protein', 15 | name='in_alphafold', 16 | field=models.BooleanField(default=False), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /webfront/migrations/0037_protein_in_bfvd.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.13 on 2025-03-11 09:46 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('webfront', '0036_default_value_lists_dicts'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='protein', 15 | name='in_bfvd', 16 | field=models.BooleanField(default=False), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /webfront/migrations/0010_wiki_field_type_change.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1.10 on 2020-08-10 09:32 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0009_entry_annotation_changes")] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="entry", 14 | name="wikipedia", 15 | field=jsonfield.fields.JSONField(null=True), 16 | ) 17 | ] 18 | -------------------------------------------------------------------------------- /webfront/migrations/0015_structural_model_lddt.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.7 on 2021-02-23 15:20 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [("webfront", "0014_structural_model")] 9 | 10 | operations = [ 11 | migrations.AddField( 12 | model_name="structuralmodel", 13 | name="lddt", 14 | field=models.FloatField(default=0), 15 | preserve_default=False, 16 | ) 17 | ] 18 | -------------------------------------------------------------------------------- /webfront/migrations/0024_entry_llm_description.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.19 on 2023-10-03 11:38 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("webfront", "0023_rename_is_alive_entry_is_public"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="entry", 15 | name="llm_description", 16 | field=models.TextField(null=True), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /webfront/migrations/0032_entry_is_updated_llm.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.23 on 2024-05-14 20:35 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('webfront', '0031_strain_assembly_nullable'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='entry', 15 | name='is_updated_llm', 16 | field=models.BooleanField(default=False), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | try: 6 | import pymysql 7 | except ImportError: 8 | pass 9 | else: 10 | pymysql.version_info = (1, 4, 6, "final", 0) # change mysqlclient version 11 | pymysql.install_as_MySQLdb() 12 | 13 | 14 | def main(): 15 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "interpro.settings") 16 | from django.core.management import execute_from_command_line 17 | execute_from_command_line(sys.argv) 18 | 19 | 20 | if __name__ == "__main__": 21 | main() 22 | -------------------------------------------------------------------------------- /webfront/migrations/0034_set_wikipedia.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.13 on 2025-01-17 10:57 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('webfront', '0033_alter_proteinextrafeatures_sequence_feature'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name='set', 16 | name='wikipedia', 17 | field=jsonfield.fields.JSONField(null=True), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /webfront/migrations/0033_alter_proteinextrafeatures_sequence_feature.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.24 on 2024-11-20 18:26 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('webfront', '0032_entry_is_updated_llm'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='proteinextrafeatures', 15 | name='sequence_feature', 16 | field=models.CharField(max_length=255), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /webfront/static/bootstrap/js/npm.js: -------------------------------------------------------------------------------- 1 | // This file is autogenerated via the `commonjs` Grunt task. You can require() this file in a CommonJS environment. 2 | require('../../js/transition.js') 3 | require('../../js/alert.js') 4 | require('../../js/button.js') 5 | require('../../js/carousel.js') 6 | require('../../js/collapse.js') 7 | require('../../js/dropdown.js') 8 | require('../../js/modal.js') 9 | require('../../js/tooltip.js') 10 | require('../../js/popover.js') 11 | require('../../js/scrollspy.js') 12 | require('../../js/tab.js') 13 | require('../../js/affix.js') -------------------------------------------------------------------------------- /webfront/templatetags/interpro_tags.py: -------------------------------------------------------------------------------- 1 | from django import template 2 | from django.utils.html import escape 3 | from django.utils.encoding import iri_to_uri 4 | from rest_framework.utils.urls import replace_query_param 5 | 6 | from django.conf import settings 7 | 8 | register = template.Library() 9 | 10 | 11 | @register.simple_tag 12 | def get_url_with_prefix(request, key, val): 13 | iri = request.get_full_path() 14 | uri = iri_to_uri(iri) 15 | return settings.INTERPRO_CONFIG["url_path_prefix"] + escape( 16 | replace_query_param(uri, key, val) 17 | ) 18 | -------------------------------------------------------------------------------- /example_data/entry/PF17180.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "accession": "PF17180", 4 | "id": "", 5 | "type": "domain", 6 | "GO": { 7 | "biologicalProcess": [], 8 | "molecularFunction": [], 9 | "cellularComponent": [] 10 | }, 11 | "sourceDataBase": "Pfam", 12 | "memberDataBases": {}, 13 | "integrated": null, 14 | "name": { 15 | "name": "Zinc-binding domain", 16 | "short": "zf-3CxxC_2", 17 | "other": [] 18 | }, 19 | "description": "", 20 | "wikipedia": "", 21 | "literature": {}, 22 | "cross_references": {} 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /webfront/migrations/0006_interaction_and_pathways.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.1 on 2020-02-04 15:33 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0005_droping_columns")] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="entry", 14 | name="interactions", 15 | field=jsonfield.fields.JSONField(null=True), 16 | ), 17 | migrations.AddField( 18 | model_name="entry", 19 | name="pathways", 20 | field=jsonfield.fields.JSONField(null=True), 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /webfront/migrations/0016_structural_model_algorithm.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.1.7 on 2021-06-24 12:34 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [("webfront", "0015_structural_model_lddt")] 9 | 10 | operations = [ 11 | migrations.AddField( 12 | model_name="structuralmodel", 13 | name="algorithm", 14 | field=models.CharField(default="trRosetta", max_length=20), 15 | preserve_default=False, 16 | ), 17 | migrations.AlterField( 18 | model_name="structuralmodel", name="lddt", field=models.BinaryField() 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /interpro/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for interpro project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | try: 15 | import pymysql 16 | 17 | pymysql.version_info = (1, 4, 6, "final", 0) # change mysqlclient version 18 | pymysql.install_as_MySQLdb() 19 | print("running pymysql") 20 | except ImportError: 21 | pass 22 | 23 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "interpro.settings") 24 | 25 | application = get_wsgi_application() 26 | -------------------------------------------------------------------------------- /webfront/migrations/0031_strain_assembly_nullable.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.23 on 2024-05-14 20:35 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('webfront', '0030_num_proteins'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='proteome', 15 | name='assembly', 16 | field=models.CharField(max_length=512, null=True), 17 | ), 18 | migrations.AlterField( 19 | model_name='proteome', 20 | name='strain', 21 | field=models.CharField(max_length=512, null=True), 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /webfront/migrations/0012_seq_and_seq_raw.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.7 on 2021-01-11 13:42 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [("webfront", "0011_pfam2interpro")] 9 | 10 | operations = [ 11 | migrations.RemoveField(model_name="protein", name="extra_features"), 12 | migrations.RemoveField(model_name="protein", name="residues"), 13 | migrations.RemoveField(model_name="protein", name="sequence"), 14 | migrations.AddField( 15 | model_name="protein", 16 | name="sequence_bin", 17 | field=models.BinaryField(db_column="sequence", null=True), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /webfront/migrations/0014_structural_model.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.7 on 2021-02-05 10:34 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [("webfront", "0013_protein_extra")] 9 | 10 | operations = [ 11 | migrations.CreateModel( 12 | name="StructuralModel", 13 | fields=[ 14 | ("model_id", models.IntegerField(primary_key=True, serialize=False)), 15 | ("accession", models.CharField(max_length=25)), 16 | ("contacts", models.BinaryField()), 17 | ("structure", models.BinaryField()), 18 | ], 19 | options={"db_table": "webfront_structuralmodel"}, 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /webfront/migrations/0008_dw_changes.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.1 on 2020-04-09 15:04 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0007_history")] 10 | 11 | operations = [ 12 | migrations.RemoveField(model_name="set", name="integrated"), 13 | migrations.RemoveField(model_name="set", name="is_set"), 14 | migrations.RemoveField(model_name="structure", name="other_names"), 15 | migrations.RemoveField(model_name="structure", name="short_name"), 16 | migrations.AlterField( 17 | model_name="protein", 18 | name="structure", 19 | field=jsonfield.fields.JSONField(default={}, null=True), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /webfront/migrations/0011_pfam2interpro.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.7 on 2020-08-24 14:45 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0010_wiki_field_type_change")] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="entry", 14 | name="details", 15 | field=jsonfield.fields.JSONField(null=True), 16 | ), 17 | migrations.AddField( 18 | model_name="set", 19 | name="authors", 20 | field=jsonfield.fields.JSONField(null=True), 21 | ), 22 | migrations.AddField( 23 | model_name="set", 24 | name="literature", 25 | field=jsonfield.fields.JSONField(null=True), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /webfront/migrations/0009_entry_annotation_changes.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.7 on 2020-06-08 10:45 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0008_dw_changes")] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="entryannotation", 14 | name="num_sequences", 15 | field=models.FloatField(null=True), 16 | ), 17 | migrations.AlterField( 18 | model_name="entryannotation", 19 | name="accession", 20 | field=models.ForeignKey( 21 | db_column="accession", 22 | null=True, 23 | on_delete=django.db.models.deletion.SET_NULL, 24 | to="webfront.Entry", 25 | ), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /webfront/migrations/0029_remove_is_featured_plus_llm_entry.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.23 on 2024-02-21 10:18 2 | 3 | from django.db import migrations, models 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('webfront', '0028_in_alphafold'), 11 | ] 12 | 13 | operations = [ 14 | migrations.RemoveField( 15 | model_name='entry', 16 | name='is_featured', 17 | ), 18 | migrations.AddField( 19 | model_name='entry', 20 | name='is_llm', 21 | field=models.BooleanField(default=False), 22 | ), 23 | migrations.AddField( 24 | model_name='entry', 25 | name='is_reviewed_llm', 26 | field=models.BooleanField(default=False), 27 | ), 28 | migrations.AddField( 29 | model_name='entry', 30 | name='representative_structure', 31 | field=jsonfield.fields.JSONField(null=True), 32 | ), 33 | ] 34 | -------------------------------------------------------------------------------- /webfront/migrations/0019_entrytaxa_table.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.8 on 2022-03-18 08:03 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | import jsonfield.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [("webfront", "0018_taxa_modifier")] 11 | 12 | operations = [ 13 | migrations.CreateModel( 14 | name="EntryTaxa", 15 | fields=[ 16 | ( 17 | "accession", 18 | models.OneToOneField( 19 | db_column="accession", 20 | on_delete=django.db.models.deletion.CASCADE, 21 | primary_key=True, 22 | serialize=False, 23 | to="webfront.entry", 24 | ), 25 | ), 26 | ("tree", jsonfield.fields.JSONField(null=True)), 27 | ], 28 | options={"db_table": "webfront_entrytaxa"}, 29 | ), 30 | migrations.RemoveField(model_name="entry", name="taxa"), 31 | ] 32 | -------------------------------------------------------------------------------- /webfront/exceptions.py: -------------------------------------------------------------------------------- 1 | class DeletedEntryError(Exception): 2 | def __init__(self, accession, database, _type, name, short_name, history, date): 3 | self.accession = accession 4 | self.database = database 5 | self.type = _type 6 | self.name = name 7 | self.short_name = short_name 8 | self.history = history 9 | self.date = date 10 | 11 | 12 | class EmptyQuerysetError(Exception): 13 | def __init__(self, message): 14 | self.message = message 15 | 16 | 17 | class ExpectedUniqueError(Exception): 18 | def __init__(self, message): 19 | self.message = message 20 | 21 | 22 | class HmmerWebError(Exception): 23 | def __init__(self, message): 24 | self.message = message 25 | 26 | 27 | class BadURLParameterError(Exception): 28 | def __init__(self, message): 29 | self.message = message 30 | 31 | 32 | class InvalidOperationRequest(Exception): 33 | def __init__(self, message): 34 | self.message = message 35 | 36 | 37 | class DeprecatedModifier(Exception): 38 | def __init__(self, message): 39 | self.message = message 40 | -------------------------------------------------------------------------------- /webfront/migrations/0036_default_value_lists_dicts.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.19 on 2025-02-24 14:14 2 | 3 | from django.db import migrations 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ("webfront", "0035_interpronmatches"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="entry", 16 | name="overlaps_with", 17 | field=jsonfield.fields.JSONField(default=list), 18 | ), 19 | migrations.AlterField( 20 | model_name="protein", 21 | name="structure", 22 | field=jsonfield.fields.JSONField(default=dict, null=True), 23 | ), 24 | migrations.AlterField( 25 | model_name="set", 26 | name="authors", 27 | field=jsonfield.fields.JSONField(default=list), 28 | ), 29 | migrations.AlterField( 30 | model_name="set", 31 | name="literature", 32 | field=jsonfield.fields.JSONField(default=list), 33 | ), 34 | migrations.AlterField( 35 | model_name="set", 36 | name="wikipedia", 37 | field=jsonfield.fields.JSONField(default=list), 38 | ), 39 | ] 40 | -------------------------------------------------------------------------------- /webfront/migrations/0022_chain_sequence.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.15 on 2023-08-03 16:04 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ("webfront", "0021_set_info"), 11 | ] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name="ChainSequence", 16 | fields=[ 17 | ("id", models.IntegerField(primary_key=True, serialize=False)), 18 | ("chain", models.CharField(db_column="chain_acc", max_length=10)), 19 | ("sequence_bin", models.BinaryField(db_column="sequence", null=True)), 20 | ("length", models.IntegerField()), 21 | ( 22 | "structure", 23 | models.ForeignKey( 24 | blank=True, 25 | db_column="structure_acc", 26 | null=True, 27 | on_delete=django.db.models.deletion.SET_NULL, 28 | to="webfront.structure", 29 | ), 30 | ), 31 | ], 32 | options={ 33 | "db_table": "webfront_chain_sequence", 34 | }, 35 | ), 36 | ] 37 | -------------------------------------------------------------------------------- /webfront/migrations/0004_taxonomy_per_entryX.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1.10 on 2019-10-25 11:20 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0003_taxonomy_per_entryDB")] 10 | 11 | operations = [ 12 | migrations.RenameField( 13 | model_name="taxonomyperentrydb", 14 | old_name="entry_db", 15 | new_name="source_database", 16 | ), 17 | migrations.AlterField( 18 | model_name="taxonomyperentry", 19 | name="taxonomy", 20 | field=models.ForeignKey( 21 | blank=True, 22 | db_column="tax_id", 23 | null=True, 24 | on_delete=django.db.models.deletion.SET_NULL, 25 | to="webfront.Taxonomy", 26 | ), 27 | ), 28 | migrations.AlterField( 29 | model_name="taxonomyperentrydb", 30 | name="taxonomy", 31 | field=models.ForeignKey( 32 | blank=True, 33 | db_column="tax_id", 34 | null=True, 35 | on_delete=django.db.models.deletion.SET_NULL, 36 | to="webfront.Taxonomy", 37 | ), 38 | ), 39 | ] 40 | -------------------------------------------------------------------------------- /webfront/migrations/0003_taxonomy_per_entryDB.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1.10 on 2019-10-16 10:58 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | import jsonfield.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [("webfront", "0002_taxonomy_per_entry")] 11 | 12 | operations = [ 13 | migrations.CreateModel( 14 | name="TaxonomyPerEntryDB", 15 | fields=[ 16 | ( 17 | "id", 18 | models.AutoField( 19 | auto_created=True, 20 | primary_key=True, 21 | serialize=False, 22 | verbose_name="ID", 23 | ), 24 | ), 25 | ("entry_db", models.CharField(db_index=True, max_length=100)), 26 | ("counts", jsonfield.fields.JSONField(null=True)), 27 | ( 28 | "taxonomy", 29 | models.ForeignKey( 30 | blank=True, 31 | null=True, 32 | on_delete=django.db.models.deletion.SET_NULL, 33 | to="webfront.Taxonomy", 34 | ), 35 | ), 36 | ], 37 | ) 38 | ] 39 | -------------------------------------------------------------------------------- /webfront/migrations/0002_taxonomy_per_entry.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1.10 on 2019-10-15 10:52 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | import jsonfield.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [("webfront", "0001_merged")] 11 | 12 | operations = [ 13 | migrations.CreateModel( 14 | name="TaxonomyPerEntry", 15 | fields=[ 16 | ( 17 | "id", 18 | models.AutoField( 19 | auto_created=True, 20 | primary_key=True, 21 | serialize=False, 22 | verbose_name="ID", 23 | ), 24 | ), 25 | ("counts", jsonfield.fields.JSONField(null=True)), 26 | ( 27 | "entry_acc", 28 | models.ForeignKey( 29 | db_column="entry_acc", 30 | null=True, 31 | on_delete=django.db.models.deletion.SET_NULL, 32 | to="webfront.Entry", 33 | ), 34 | ), 35 | ( 36 | "taxonomy", 37 | models.ForeignKey( 38 | blank=True, 39 | null=True, 40 | on_delete=django.db.models.deletion.SET_NULL, 41 | to="webfront.Taxonomy", 42 | ), 43 | ), 44 | ], 45 | ) 46 | ] 47 | -------------------------------------------------------------------------------- /webfront/migrations/0035_interpronmatches.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.19 on 2025-02-24 14:11 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ("webfront", "0034_set_wikipedia"), 11 | ] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name="InterProNMatches", 16 | fields=[ 17 | ("match_id", models.AutoField(primary_key=True, serialize=False)), 18 | ("in_interpro", models.BooleanField(db_column="in_interpro")), 19 | ("is_preferred", models.BooleanField(db_column="is_preferred")), 20 | ("locations", models.JSONField()), 21 | ( 22 | "entry", 23 | models.ForeignKey( 24 | db_column="entry_acc", 25 | null=True, 26 | on_delete=django.db.models.deletion.SET_NULL, 27 | to="webfront.entry", 28 | ), 29 | ), 30 | ( 31 | "protein_acc", 32 | models.ForeignKey( 33 | db_column="protein_acc", 34 | null=True, 35 | on_delete=django.db.models.deletion.SET_NULL, 36 | to="webfront.protein", 37 | ), 38 | ), 39 | ], 40 | options={ 41 | "db_table": "webfront_interpro_n", 42 | }, 43 | ), 44 | ] 45 | -------------------------------------------------------------------------------- /interpro/renderers.py: -------------------------------------------------------------------------------- 1 | from rest_framework import renderers 2 | import io 3 | import csv 4 | 5 | fields_to_exclude = ["entry_annotations"] 6 | 7 | 8 | def flatDict(original, output=None, prefix=None): 9 | if output is None: 10 | output = {} 11 | for key, value in original.items(): 12 | p_key = key if prefix is None else "{}__{}".format(prefix, key) 13 | if isinstance(value, dict): 14 | flatDict(value, output, prefix=p_key) 15 | else: 16 | output[p_key] = value 17 | 18 | return output 19 | 20 | 21 | class TSVRenderer(renderers.BaseRenderer): 22 | media_type = "text/tab-separated-values" 23 | format = "tsv" 24 | 25 | def render(self, data, media_type=None, renderer_context=None): 26 | objs = None 27 | if "metadata" in data: 28 | objs = [data["metadata"]] 29 | # writer.writeheader(data["metadata"]) 30 | elif "results" in data: 31 | # writer.writeheader(data["results"][0]["metadata"]) 32 | objs = [item["metadata"] for item in data["results"]] 33 | elif isinstance(data, dict): 34 | objs = [flatDict(data)] 35 | 36 | output = io.StringIO() 37 | if objs is not None: 38 | writer = csv.DictWriter( 39 | output, 40 | fieldnames=[k for k in sorted(objs[0]) if k not in fields_to_exclude], 41 | extrasaction="ignore", 42 | delimiter="\t", 43 | quoting=csv.QUOTE_NONNUMERIC, 44 | ) 45 | writer.writeheader() 46 | writer.writerows(objs) 47 | 48 | return output.getvalue() 49 | -------------------------------------------------------------------------------- /webfront/static/swagger/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Swagger UI 7 | 8 | 9 | 10 | 31 | 32 | 33 | 34 |
35 | 36 | 37 | 38 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /example_data/protein/M5ADK6.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata" : { 3 | "accession": "M5ADK6", 4 | "id": "M5ADK6_LACBR", 5 | "sourceOrganism": { 6 | "name": "Lactobacillus brevis KB290", 7 | "taxid": 1001583 8 | }, 9 | "name": { 10 | "name": "Band 7 protein", 11 | "short": null, 12 | "other": [] 13 | }, 14 | "description": null, 15 | "length": 297, 16 | "sequence": "MESGIIEILIRNGVSHMTEKPVFHINGYLGLILVLVILGLGVYLSVVGWGVLGVILVVLAVLAASSLTIIEPNQSKVLTFFGRYIGTIKESGLYLTVPLTTKTTVSLRVRNFNSAILKVNDLQGNPVEIAAVIVFKVVDTSKALFAVEDYEKFVEIQSESAIRHVASEYAYDNFGDHQALTLRSNPTEVSNHLTEELQARLEVAGVQIIETRLTHLAYATEIASAMLQRQQSQAILSARKIIVEGAVSITEGAIEQLAAETDLHLTDNQKLQLINNMMVSIINERGSQPVINTGKVE", 17 | "proteome": "UP000012042", 18 | "gene": "LVISKB_0797", 19 | "GO": { 20 | "biologicalProcess": [], 21 | "molecularFunction": [], 22 | "cellularComponent": [ 23 | { 24 | "id": "GO:0016020", 25 | "name": "membrane" 26 | } 27 | ] 28 | }, 29 | "proteinEvidence": 1 30 | }, 31 | "representation": { 32 | "entries": [ 33 | { 34 | "id": "IPR001972", 35 | "name": "Stomatin family", 36 | "type": "family" 37 | }, 38 | { 39 | "id": "IPR001107", 40 | "name": "Band 7 domain", 41 | "type": "domain" 42 | } 43 | ], 44 | "signalPeptide": [], 45 | "transmembrane": [], 46 | "coiledCoil": [], 47 | "lowComplexity_disorder": [], 48 | "activeSites": [], 49 | "perResidueFeatures": [], 50 | "disulphideBridges": [] 51 | }, 52 | "structure": { 53 | "chains": [] 54 | }, 55 | "genomicContext": { 56 | "DNA": "" 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /webfront/response.py: -------------------------------------------------------------------------------- 1 | from rest_framework.response import Response as R 2 | from django.conf import settings 3 | 4 | from webfront.models import Database 5 | 6 | 7 | class Response(R): 8 | def __init__( 9 | self, 10 | data=None, 11 | status=None, 12 | template_name=None, 13 | headers={}, 14 | exception=False, 15 | content_type=None, 16 | ): 17 | 18 | if settings.DEBUG: 19 | from django.db import connection 20 | from webfront.searcher.elastic_controller import es_results 21 | 22 | timings = [ 23 | # 'cpu;dur=1;desc="CPU"', 24 | 'mysql;dur={:0.2f};desc="MySQL"'.format( 25 | sum((float(query["time"]) for query in connection.queries)) * 1000 26 | ), 27 | # 'filesystem;dur=0;desc="Filesystem"', 28 | 'es;dur={:0.2f};desc="Elasticsearch"'.format( 29 | sum(query["took"] for query in es_results if "took" in query) 30 | ), 31 | # 'django;dur={:0.2f};desc=Django'.format( 32 | # (datetime.now().timestamp() - django_time['time']) * 1000 33 | # ) 34 | ] 35 | 36 | headers["Server-Timing"] = ",".join(timings) 37 | 38 | if not hasattr(settings, "CACHED_VERSION"): 39 | settings.CACHED_VERSION = Database.objects.get(pk="interpro").version 40 | 41 | headers["InterPro-Version"] = settings.CACHED_VERSION 42 | headers["InterPro-Version-Minor"] = settings.MINOR_VERSION 43 | 44 | super(Response, self).__init__( 45 | data, status, template_name, headers, exception, content_type 46 | ) 47 | -------------------------------------------------------------------------------- /webfront/migrations/0013_protein_extra.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.7 on 2021-01-11 16:29 2 | 3 | from django.db import migrations, models 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("webfront", "0012_seq_and_seq_raw")] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="ProteinExtraFeatures", 14 | fields=[ 15 | ("feature_id", models.IntegerField(primary_key=True, serialize=False)), 16 | ("protein_acc", models.CharField(max_length=15)), 17 | ("entry_acc", models.CharField(max_length=25)), 18 | ("source_database", models.CharField(max_length=10)), 19 | ("location_start", models.IntegerField()), 20 | ("location_end", models.IntegerField()), 21 | ("sequence_feature", models.CharField(max_length=35)), 22 | ], 23 | options={"db_table": "webfront_proteinfeature"}, 24 | ), 25 | migrations.CreateModel( 26 | name="ProteinResidues", 27 | fields=[ 28 | ("residue_id", models.IntegerField(primary_key=True, serialize=False)), 29 | ("protein_acc", models.CharField(max_length=15)), 30 | ("entry_acc", models.CharField(max_length=25)), 31 | ("entry_name", models.CharField(max_length=100)), 32 | ("source_database", models.CharField(max_length=10)), 33 | ("description", models.CharField(max_length=255)), 34 | ("fragments", jsonfield.fields.JSONField(null=True)), 35 | ], 36 | options={"db_table": "webfront_proteinresidue"}, 37 | ), 38 | ] 39 | -------------------------------------------------------------------------------- /webfront/serializers/utils.py: -------------------------------------------------------------------------------- 1 | def to_camel_case(snake_case_string): 2 | parts = snake_case_string.split("_") 3 | return parts[0] + "".join(p[0].upper() + p[1:] for p in parts[1:]) 4 | 5 | 6 | def set_or_create_and_set(obj, path, value): 7 | key = path[0] 8 | if key in obj: 9 | if len(path) == 1: 10 | raise KeyError( 11 | 'Overload of key "{}", please use the corresponding field in the other table'.format( 12 | key 13 | ) 14 | ) 15 | else: 16 | set_or_create_and_set(obj[key], path[1:], value) 17 | else: 18 | if len(path) == 1: 19 | obj[key] = value 20 | else: 21 | obj[key] = {} 22 | set_or_create_and_set(obj[key], path[1:], value) 23 | 24 | 25 | def flat_to_nested(flat, convert_to_camel_case=True): 26 | nested = {} 27 | for key, value in flat.items(): 28 | path = key.split("__") 29 | if convert_to_camel_case: 30 | path = [to_camel_case(part) for part in path] 31 | set_or_create_and_set(nested, path, value) 32 | return nested 33 | 34 | 35 | # def recategorise_go_terms(go_terms): 36 | # for term in go_terms: 37 | # if "category" in term and "code" in term['category']: 38 | # if term['category']["code"] == "F": 39 | # term['category'] = "Molecular Function" 40 | # elif term['category']["code"] == "C": 41 | # term['category'] = "Cellular Component" 42 | # elif term['category']["code"] == "P": 43 | # term['category'] = "Biological Process" 44 | # return 45 | # raise Exception("Unknown Go Term category '{0}'".format(term['category'])) 46 | -------------------------------------------------------------------------------- /webfront/constants.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class SerializerDetail(Enum): 5 | ALL = 1 6 | 7 | ENTRY_HEADERS = 100 8 | ENTRY_OVERVIEW = 101 9 | ENTRY_DETAIL = 102 10 | ENTRY_MATCH = 103 11 | ENTRY_PROTEIN_HEADERS = 105 12 | ENTRY_DB = 106 13 | 14 | PROTEIN_HEADERS = 200 15 | PROTEIN_OVERVIEW = 201 16 | PROTEIN_DETAIL = 202 17 | PROTEIN_ENTRY_DETAIL = 203 18 | PROTEIN_DB = 204 19 | 20 | STRUCTURE_HEADERS = 300 21 | STRUCTURE_OVERVIEW = 301 22 | STRUCTURE_DETAIL = 302 23 | STRUCTURE_CHAIN = 303 24 | STRUCTURE_ENTRY_DETAIL = 304 25 | STRUCTURE_PROTEIN_DETAIL = 305 26 | STRUCTURE_DB = 306 27 | 28 | TAXONOMY_HEADERS = 400 29 | TAXONOMY_OVERVIEW = 401 30 | TAXONOMY_DETAIL = 402 31 | # TAXONOMY_CHAIN = 403 32 | # TAXONOMY_ENTRY_DETAIL = 404 33 | # TAXONOMY_PROTEIN_DETAIL = 405 34 | TAXONOMY_DB = 406 35 | TAXONOMY_DETAIL_NAMES = 432 36 | TAXONOMY_PER_ENTRY = 410 37 | TAXONOMY_PER_ENTRY_DB = 411 38 | 39 | PROTEOME_OVERVIEW = 450 40 | PROTEOME_HEADERS = 451 41 | PROTEOME_DETAIL = 453 42 | ORGANISM_TAXONOMY_PROTEOME = 420 43 | ORGANISM_TAXONOMY_PROTEOME_HEADERS = 421 44 | PROTEOME_DB = 460 45 | 46 | SET_HEADERS = 500 47 | SET_OVERVIEW = 501 48 | SET_DETAIL = 502 49 | SET_DB = 503 50 | 51 | IDA_LIST = 600 52 | 53 | GROUP_BY = 800 54 | GROUP_BY_MEMBER_DATABASES = 801 55 | 56 | ANNOTATION_BLOB = 1000 57 | 58 | 59 | class QuerysetType(Enum): 60 | ENTRY = 100 61 | PROTEIN = 200 62 | STRUCTURE = 300 63 | ENTRY_PROTEIN = 150 64 | ENTRY_STRUCTURE = 160 65 | STRUCTURE_PROTEIN = 250 66 | 67 | 68 | class ModifierType(Enum): 69 | FILTER = 1 70 | REPLACE_PAYLOAD = 2 71 | EXTEND_PAYLOAD = 3 72 | -------------------------------------------------------------------------------- /webfront/tests/test_mail.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from django.test import TestCase 4 | from django.test import Client 5 | from rest_framework import status 6 | 7 | 8 | class TestMail(TestCase): 9 | def test_mail(self, sleep=60): 10 | self.client = Client() 11 | response = self.client.post( 12 | "/api/mail/", 13 | { 14 | "path": "echo", 15 | "subject": "Add annotation test from API", 16 | "message": "Test", 17 | "from_email": "swaathik@ebi.ac.uk", 18 | }, 19 | ) 20 | self.assertEqual(response.status_code, status.HTTP_200_OK) 21 | self.assertEqual(response.json()["from"], "swaathik@ebi.ac.uk") 22 | time.sleep(sleep) 23 | 24 | def test_spam(self): 25 | self.test_mail(sleep=0) 26 | self.client = Client() 27 | response = self.client.post( 28 | "/api/mail/", 29 | { 30 | "path": "echo", 31 | "subject": "Add annotation test from API", 32 | "message": "Test", 33 | "from_email": "swaathik@ebi.ac.uk", 34 | }, 35 | ) 36 | self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) 37 | time.sleep(60) 38 | 39 | def test_mail_invalid_queue(self): 40 | self.client = Client() 41 | response = self.client.post( 42 | "/api/mail/", 43 | { 44 | "path": "echo", 45 | "subject": "Add annotation test from API", 46 | "message": "Test", 47 | "queue": "uniprot", 48 | "from_email": "swaathik@ebi.ac.uk", 49 | }, 50 | ) 51 | self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) 52 | time.sleep(60) 53 | -------------------------------------------------------------------------------- /webfront/searcher/README.md: -------------------------------------------------------------------------------- 1 | Searcher Controller 2 | === 3 | 4 | The classes here are responsible of executing queries in our search index. 5 | Although we are using a Search index, we don't just use if for search purposes, it is also effectively the join table of all the entities in MySQL. 6 | 7 | Initially it was split into `SearchController` and `ElasticController` because at that time, we hadn't decided upon the technology we were going to use. 8 | `SearchController` is basically an abstract class with methods that need to be implemented by any particular 9 | technology controller. We eventually chose Elasticsearch, so the other controllers were removed to avoid the redundancy of maintaining multiple systems that we weren't being used. Therefore for the rest of the document we will be focus on the Elasticsearch controller 10 | 11 | Elasticsearch allows a query to be submitted by either the `q` URL parameter or in the body of the request. 12 | The general approach in this class is to use the `q` parameter for the filtering of the index and use the _body_ method for the aggregations and other complex operation. We combined these two methods; for example to get the number of InterPro matches of a protein, we filter the index using something like: `q=protein_acc:p99999 AND entry_type:interpro` and then in the body of the query, we aggregate the results to get the count of *unique* matches. e.g. 13 | ```json 14 | { 15 | "aggs": { 16 | "count": {"cardinality": {"field": "entry_acc"}} 17 | }, 18 | "size": 0 19 | } 20 | ``` 21 | 22 | Most of the methods in this class are about building generalisations to create the JSON to include in the query. For example the counter above is part of the `get_grouped_object()` method wich can be used in the different endpoints. 23 | 24 | There are only 2 methods that actually deal with the HTTP transaction to Elasticsearch `execute_query()` and `_elastic_json_query()` (*Note*: checking this now makes me think we could merge this 2 methods). 25 | -------------------------------------------------------------------------------- /webfront/tests/managed_model_test_runner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | from django.test.runner import DiscoverRunner 5 | from django.conf import settings 6 | 7 | 8 | class UnManagedModelTestRunner(DiscoverRunner): 9 | """ 10 | Test runner that automatically makes all unmanaged models in your Django 11 | project managed for the duration of the test run. 12 | Many thanks to the Caktus Group: http://bit.ly/1N8TcHW 13 | """ 14 | 15 | def __init__(self, *args, **kwargs): 16 | settings.IN_TEST_MODE = True 17 | self.unmanaged_models = [] 18 | super(UnManagedModelTestRunner, self).__init__(*args, **kwargs) 19 | 20 | def setup_test_environment(self, *args, **kwargs): 21 | from django.apps import apps 22 | 23 | myapp = apps.get_app_config("webfront") 24 | self.unmanaged_models = [ 25 | m for m in myapp.models.values() if not m._meta.managed 26 | ] 27 | for m in self.unmanaged_models: 28 | m._meta.managed = True 29 | m._meta.db_table = re.sub( 30 | r'^"([^"]+)"\."([^"]+)"$', r"\1_\2", m._meta.db_table 31 | ) 32 | 33 | super(UnManagedModelTestRunner, self).setup_test_environment(*args, **kwargs) 34 | 35 | def teardown_test_environment(self, *args, **kwargs): 36 | super(UnManagedModelTestRunner, self).teardown_test_environment(*args, **kwargs) 37 | # reset unmanaged models 38 | for m in self.unmanaged_models: 39 | m._meta.managed = False 40 | 41 | 42 | if "interpro_ro" in settings.DATABASES and ( 43 | "test" in sys.argv or "test_coverage" in sys.argv 44 | ): # Covers regular testing and django-coverage 45 | settings.DATABASES["interpro_ro"]["ENGINE"] = "django.db.backends.sqlite3" 46 | settings.DATABASES["interpro_ro"]["NAME"] = os.path.join( 47 | settings.BASE_DIR, "../database/db3.sqlite3" 48 | ) 49 | settings.DATABASES["interpro_ro"]["TEST"] = {"MIRROR": "default"} 50 | -------------------------------------------------------------------------------- /example_data/protein/A0A0A2L2G2.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata" : { 3 | "accession": "A0A0A2L2G2", 4 | "id": "A0A0A2L2G2_PENIT", 5 | "sourceOrganism": { 6 | "name": "Penicillium italicum", 7 | "taxid": 40296 8 | }, 9 | "name": { 10 | "name": "Propeptide, carboxypeptidase Y", 11 | "short": null, 12 | "other": [] 13 | }, 14 | "description": null, 15 | "length": 550, 16 | "sequence": "MRVLSTTLLVGAASAAAPSFQQVLGAHSEHAENVAQQGADAFKPLQHLQDQFKSLSSEARQLWEEVSNYFPESMGSAPMLSLPKKHTRRPDSHWDYHVSGAKVQDIWVSGAEGTKEREVDGKLEDYALRAKKVDPSALGIDPGVKQYSGYLDDNENDKHLFYWFFESRNDPKNDPVVLWLNGGPGCSSLTGLFMELGPSSIGANIKPIYNDFSWNNNASVIFLDQPINVGYSYSGSSVSDTVAAGKDVYALLTLFFKQFPEYATQDFHIAGESYAGHYIPVMASEILSHKKRNINLKSVLIGNGLTDGLTQYEYYRPMACGEGGYPAVLDESTCQSMDNALSRCQSMIQSCYNSESPWVCVPASIYCNNAMLGPYQRTGQNVYDVRGKCEDESNLCYKGLGYVSEYLGQESVREAVGAEVDGYDSCNFDINRNFLFNGDWFKPYHRLVPGLLEQIPVLIYAGDADFICNWLGNKAWSEALEWPGQKEFASAELEDLKIVQNEHVGKKIGQIKSHGNFTFMRIFGGGHMVPMDQPESGLEFFNRWIGGEWF", 17 | "proteome": "UP000030104", 18 | "gene": "PITC_084940", 19 | "GO": { 20 | "biologicalProcess": [ 21 | { 22 | "id": "GO:0006508", 23 | "name": "proteolysis" 24 | } 25 | ], 26 | "molecularFunction": [ 27 | { 28 | "id": "GO:0004185", 29 | "name": "serine-type carboxypeptidase activity" 30 | } 31 | ], 32 | "cellularComponent": [ 33 | { 34 | "id": "GO:0005773", 35 | "name": "vacuole" 36 | } 37 | ] 38 | }, 39 | "proteinEvidence": 1 40 | }, 41 | "representation": { 42 | "signalPeptide": [], 43 | "transmembrane": [], 44 | "coiledCoil": [], 45 | "lowComplexity_disorder": [], 46 | "activeSites": [ 47 | { 48 | "id": "IPR018202", 49 | "name": "Peptidase S10, serine carboxypeptidase, active site" 50 | } 51 | ], 52 | "perResidueFeatures": [], 53 | "disulphideBridges": [] 54 | }, 55 | "structure": { 56 | "chains": [] 57 | }, 58 | "genomicContext": { 59 | "DNA": "" 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /templates/rest_framework/api.html: -------------------------------------------------------------------------------- 1 | {% extends "rest_framework/base.html" %} 2 | {% load static %} 3 | {% load rest_framework %} 4 | {% load interpro_tags %} 5 | 6 | {% block bootstrap_theme %} 7 | 8 | 9 | {% endblock %} 10 | 11 | {% block breadcrumbs %}{% endblock %} 12 | 13 | {% block title %} 14 | InterPro API - EBI 15 | {% endblock %} 16 | 17 | {% block branding %} 18 | InterPro 7 19 | {% endblock %} 20 | 21 | {% block request_forms %} 22 | {% if 'GET' in allowed_methods %} 23 |
24 |
25 | {% if api_settings.URL_FORMAT_OVERRIDE %} 26 |
27 | GET 28 | 29 | 32 | 39 |
40 | {% else %} 41 | GET 42 | {% endif %} 43 |
44 |
45 | {% endif %} 46 | {% endblock %} 47 | -------------------------------------------------------------------------------- /webfront/tests/tests_structure_endpoint.py: -------------------------------------------------------------------------------- 1 | from rest_framework import status 2 | from webfront.tests.InterproRESTTestCase import InterproRESTTestCase 3 | 4 | 5 | class StructureRESTTest(InterproRESTTestCase): 6 | def test_can_read_structure_overview(self): 7 | response = self.client.get("/api/structure") 8 | self.assertEqual(response.status_code, status.HTTP_200_OK) 9 | self._check_structure_count_overview(response.data) 10 | 11 | def test_can_read_structure_pdb(self): 12 | response = self.client.get("/api/structure/pdb") 13 | self.assertEqual(response.status_code, status.HTTP_200_OK) 14 | self._check_is_list_of_objects_with_key(response.data["results"], "metadata") 15 | self.assertEqual(len(response.data["results"]), 4) 16 | 17 | def test_can_read_structure_pdb_accession(self): 18 | response = self.client.get("/api/structure/pdb/2BKM") 19 | self.assertEqual(response.status_code, status.HTTP_200_OK) 20 | self.assertIn("metadata", response.data) 21 | self._check_structure_details(response.data["metadata"]) 22 | self.assertIn("proteins", response.data["metadata"]["counters"]) 23 | self.assertIn("entries", response.data["metadata"]["counters"]) 24 | self.assertEqual(2, response.data["metadata"]["counters"]["proteins"]) 25 | self.assertEqual(1, response.data["metadata"]["counters"]["entries"]) 26 | 27 | def test_can_read_structure_pdb_accession_chain(self): 28 | response = self.client.get("/api/structure/pdb/2bkm/B") 29 | self.assertEqual(response.status_code, status.HTTP_200_OK) 30 | self.assertIn("metadata", response.data) 31 | self._check_structure_details(response.data["metadata"]) 32 | for chain in response.data["metadata"]["chains"].values(): 33 | self._check_structure_chain_details(chain) 34 | self.assertEqual(chain["chain"].upper(), "B") 35 | 36 | # TODO: 37 | def test_cant_read_structure_bad_db(self): 38 | self._check_HTTP_response_code( 39 | "/api/structure/bad_db", code=status.HTTP_404_NOT_FOUND 40 | ) 41 | 42 | def test_cant_read_structure_pdb_bad_chain(self): 43 | self._check_HTTP_response_code("/api/structure/pdb/2bkm/C") 44 | -------------------------------------------------------------------------------- /example_data/protein/A1CUJ5.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata" : { 3 | "accession": "A1CUJ5", 4 | "id": "CBPYA_ASPCL", 5 | "sourceOrganism": { 6 | "name": "Aspergillus clavatus (strain ATCC 1007 / CBS 513.65 / DSM 816 / NCTC 3887 / NRRL 1)", 7 | "taxid": 344612 8 | }, 9 | "name": { 10 | "name": "Carboxypeptidase Y homolog A", 11 | "short": null, 12 | "other": [] 13 | }, 14 | "description": "Vacuolar carboxypeptidase involved in degradation of small peptides. Digests preferentially peptides containing an aliphatic or hydrophobic residue in P1' position, as well as methionine, leucine or phenylalanine in P1 position of ester substrate (By similarity).", 15 | "length": 543, 16 | "sequence": "MRVLPATLLVGAATAAVPPFQQILGLPKKGADTLSKPLHDFQEQLKTLSDDARRLWDEVAKHFPDSMDHNPVFSLPKKHTRRPDSHWDHIVRGADVQSVWVTGANGEKEREVDGKLEAYDLRVKTTDPGALGIDPGVKQYTGYLDDNENDKHLFYWFFESRNDPKNDPVVLWLNGGPGCSSLTGLFLELGPSSIDSKIKPVYNDFAWNSNASVIFLDQPVNVGYSYSGSAVSDTVAAGKDVYALLTLFFKQFPEYAKQDFHIAGESYAGHYIPVFASEILSHKKRNINLKSVLIGNGLTDPLTQYDHYRPMACGDGGYPAVLDEASCQSMDNALPRCKSMIESCYNTESSWVCVPASIYCNNALIGPYQRTGQNVYDVRGKCEDESNLCYKGMGYVSEYLNKREVREAVGAEVDGYDSCNFDINRNFLFHGDWMKPYHRLVPGLLEQIPVLIYAGDADFICNWLGNKAWSEALEWPGQKEYASAELEDLVIEQNEHQGKKIGQIKSHGNFTFMRLYGGGHMVPMDQPEASLEFFNRWIGGEWF", 17 | "proteome": "UP000006701", 18 | "gene": "cpyA", 19 | "GO": { 20 | "biologicalProcess": [ 21 | { 22 | "id": "GO:0006508", 23 | "name": "proteolysis" 24 | } 25 | ], 26 | "molecularFunction": [ 27 | { 28 | "id": "GO:0004185", 29 | "name": "serine-type carboxypeptidase activity" 30 | } 31 | ], 32 | "cellularComponent": [ 33 | { 34 | "id": "GO:0005773", 35 | "name": "vacuole" 36 | } 37 | ] 38 | }, 39 | "proteinEvidence": 3 40 | }, 41 | "representation": { 42 | "signalPeptide": [], 43 | "transmembrane": [], 44 | "coiledCoil": [], 45 | "lowComplexity_disorder": [], 46 | "activeSites": [ 47 | { 48 | "id": "IPR018202", 49 | "name": "Peptidase S10, serine carboxypeptidase, active site" 50 | } 51 | ], 52 | "perResidueFeatures": [], 53 | "disulphideBridges": [] 54 | }, 55 | "structure": { 56 | "chains": [] 57 | }, 58 | "genomicContext": { 59 | "DNA": "" 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /webfront/tests/test_ida_search.py: -------------------------------------------------------------------------------- 1 | from webfront.tests.InterproRESTTestCase import InterproRESTTestCase 2 | from rest_framework import status 3 | 4 | 5 | class IDASearchModifierTest(InterproRESTTestCase): 6 | def _assertSearch(self, response, count): 7 | self.assertEqual(response.status_code, status.HTTP_200_OK) 8 | self.assertIn("results", response.data) 9 | self.assertIn("count", response.data) 10 | self.assertEqual(response.data["count"], len(response.data["results"])) 11 | self.assertEqual(response.data["count"], count) 12 | 13 | def test_search_by_a_single_accession(self): 14 | response = self.client.get("/api/entry?ida_search=IPR003165") 15 | self._assertSearch(response, 2) 16 | 17 | def test_search_by_ordered_search(self): 18 | response = self.client.get("/api/entry?ida_search=IPR003165,IPR003165&ordered") 19 | self._assertSearch(response, 2) 20 | 21 | def test_search_by_non_existing_ipr(self): 22 | response = self.client.get("/api/entry?ida_search=IPR00XXXX") 23 | self._assertSearch(response, 0) 24 | 25 | def test_search_with_ignoring_list(self): 26 | response = self.client.get("/api/entry?ida_search=IPR001175&ida_ignore=pf17176") 27 | self._assertSearch(response, 0) 28 | 29 | def test_search_exact_single(self): 30 | response = self.client.get("/api/entry?ida_search=pf17180&ordered&exact") 31 | self._assertSearch(response, 1) 32 | 33 | def test_search_exact_vs_ordered(self): 34 | response = self.client.get("/api/entry?ida_search=PF02171,PF02171&exact") 35 | self._assertSearch(response, 1) 36 | response = self.client.get("/api/entry?ida_search=PF02171,PF02171&ordered") 37 | self._assertSearch(response, 2) 38 | 39 | def test_search_pfam_vs_interpro_accession(self): 40 | response = self.client.get("/api/entry?ida_search=PF02171,PF02171&exact") 41 | self._assertSearch(response, 1) 42 | response = self.client.get("/api/entry?ida_search=IPR003165,PF02171&exact") 43 | self._assertSearch(response, 1) 44 | response = self.client.get("/api/entry?ida_search=PF02171,IPR003165&exact") 45 | self._assertSearch(response, 1) 46 | response = self.client.get("/api/entry?ida_search=IPR003165,IPR003165&exact") 47 | self._assertSearch(response, 1) 48 | -------------------------------------------------------------------------------- /.github/workflows/testing.yml: -------------------------------------------------------------------------------- 1 | name: Unit and Funtional Testing 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | - name: 🐍 - Set up Python 3.8 13 | uses: actions/setup-python@v4 14 | with: 15 | python-version: '3.10' 16 | - name: ⚙️ - Configure sysctl limits 17 | run: | 18 | sudo swapoff -a 19 | sudo sysctl -w vm.swappiness=1 20 | sudo sysctl -w fs.file-max=262144 21 | sudo sysctl -w vm.max_map_count=262144 22 | - name: 🔎 - Runs Elasticsearch 23 | uses: elastic/elastic-github-actions/elasticsearch@master 24 | with: 25 | stack-version: 8.2.0 26 | security-enabled: true 27 | elasticsearch_password: "elasticsearch_password" 28 | - name: ⚙️🔎 - Setup elastic search 29 | run: | 30 | curl --user elastic:elasticsearch_password 'https://localhost:9200' -k 31 | curl --user elastic:elasticsearch_password -XPUT 'https://localhost:9200/test?pretty' -H 'Content-Type: application/json' -d @config/elastic_mapping.json -k 32 | curl --user elastic:elasticsearch_password -XPUT 'https://localhost:9200/ida?pretty' -H 'Content-Type: application/json' -d @config/elastic_ida_mapping.json -k 33 | - name: 🔧 - Install Dependencies 34 | run: | 35 | pip install -r requirements.txt 36 | pip install -r dev_requirements.txt 37 | pip install tblib 38 | pip freeze 39 | - name: 🧪 - Testing 40 | run: | 41 | echo -e "searcher_user: elastic \nsearcher_test_password: elasticsearch_password" > config/interpro.local.yml 42 | cat config/interpro.local.yml 43 | export BROWSER_TEST=chrome 44 | coverage run --source='.' manage.py test 45 | - name: 🧥‍ - Coveralls 46 | continue-on-error: true 47 | run: | 48 | export COVERALLS_REPO_TOKEN=0NCZQkRT7k27xoKabeCH3UzAEUIDk5BAw 49 | coveralls 50 | - name: 📮 - Slack Notification 51 | uses: rtCamp/action-slack-notify@v2 52 | continue-on-error: true 53 | if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/dev' 54 | env: 55 | SLACK_COLOR: "${{ job.status == 'success' && 'good' || 'danger' }}" 56 | SLACK_USERNAME: "Github Actions API" 57 | SLACK_ICON_EMOJI: ":octocat:" 58 | SLACK_TITLE: "CI API results in GitHub Actions" 59 | SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} 60 | SLACK_CHANNEL: "#interpro7" 61 | MSG_MINIMAL: Actions URL 62 | -------------------------------------------------------------------------------- /example_data/protein/X2JLE1.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata" : { 3 | "accession": "X2JLE1", 4 | "id": "X2JLE1_DROME", 5 | "sourceOrganism": { 6 | "name": "Drosophila melanogaster (Fruit fly)", 7 | "taxid": 7227 8 | }, 9 | "name": { 10 | "name": "Dopamine 2-like receptor, isoform G", 11 | "short": null, 12 | "other": [] 13 | }, 14 | "description": null, 15 | "length": 897, 16 | "sequence": "MLSPFDWRRGISSSGTGGTMAAQPLSSTAATTAAATGATAATAATAATTSATLSTAAASTSTTAAPSAGATWINHHLAVEADSSQPANGSDAQAGVEGPTMPAGYLPLYEDVETAAEDAGYALIDDISEWLLGSVGSEAAVGGPENSTNLAVTGANGTLAWLEALNSTQPAQSNSSAEDGERGRYSLRSFVEQQLAGGGAAGAGDGGDAGIALIDSGEEAALDNVADAETDYGMLGGFGDAELLQRTATVARETLGNRTAPSTTSYDGGGSGDVGVAGGLAGTAGGGVGGAGGSGGSTFMLLLENFNDYFPNYNGSTVSGTSTIAPGVAITGSRGSGLLLEQNLTGLYLDGYRLNCTNETLNLTDSCGELRVVDHNYWALILILFPILTLFGNILVILSVCRERSLQTVTNYFIVSLAIADLLVAVVVMPFAVYFLVNGAWALPDVVCDFYIAMDVICSTSSIFNLVAISIDRYIAVTQPIKYAKHKNSRRVCLTILLVWAISAAIGSPIVLGLNNTPNREPDVCAFYNADFILYSSLSSFYIPCIIMVFLYWNIFKALRSRARKQRAARKPHLSELTGGSVIENIAQTRRLAETALDSSRHASRILPDEAATNTASGSNEEEDENAISPDIDDCHVIVNDKSTEFMLATVVEETGNSVVAQITTQPQLVVADPNGNHDSGYAASNVDDVLAGVAPASASAATSAAPRSSGSPPDSPLPSGATLQRSSVSSQRRPTGDDSPKRGEPALSVAMKPLSFVRYGVQEAMTLARNDSTLSTTSKTSSRKDKKNSQASRFTIYKVHKASKKKREKSSAKKERKATKTLAIVLGVFLFCWLPFFSCNIMDAMCAKFKKDCRPGLTAYMMTTWLGYINSFVNPVIYTIFNPEFRKAFKKIMHMG", 17 | "proteome": "UP000000803", 18 | "gene": "Dop2R", 19 | "GO": { 20 | "biologicalProcess": [ 21 | { 22 | "id": "GO:0007186", 23 | "name": "G-protein coupled receptor signaling pathway" 24 | } 25 | ], 26 | "molecularFunction": [ 27 | { 28 | "id": "GO:0004930", 29 | "name": "G-protein coupled receptor activity" 30 | } 31 | ], 32 | "cellularComponent": [ 33 | { 34 | "id": "GO:0016021", 35 | "name": "integral component of membrane" 36 | } 37 | ] 38 | }, 39 | "proteinEvidence": 1 40 | }, 41 | "representation": { 42 | "entries": [ 43 | { 44 | "id": "IPR000276", 45 | "name": "G protein-coupled receptor, rhodopsin-like", 46 | "type": "family" 47 | }, 48 | { 49 | "id": "IPR017452", 50 | "name": "GPCR, rhodopsin-like, 7TM", 51 | "type": "domain" 52 | } 53 | ], 54 | "signalPeptide": [], 55 | "transmembrane": [], 56 | "coiledCoil": [], 57 | "lowComplexity_disorder": [], 58 | "activeSites": [], 59 | "perResidueFeatures": [], 60 | "disulphideBridges": [] 61 | }, 62 | "structure": { 63 | "chains": [] 64 | }, 65 | "genomicContext": { 66 | "DNA": "" 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /example_data/protein/V5XAD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata" : { 3 | "accession": "V5XAD2", 4 | "id": "V5XAD2_MYCNE", 5 | "sourceOrganism": { 6 | "name": "Mycobacterium neoaurum VKM Ac-1815D", 7 | "taxid": 700508 8 | }, 9 | "name": { 10 | "name": "NADH-quinone oxidoreductase subunit I", 11 | "short": null, 12 | "other": ["NADH dehydrogenase I subunit I", "NDH-1 subunit I"] 13 | }, 14 | "description": "NDH-1 shuttles electrons from NADH, via FMN and iron-sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be menaquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient.", 15 | "length": 179, 16 | "sequence": "MSKVGDALAGFGVTFKAMLHKPITEQYPEKPGPVAPRYHGRHQLNRYADGLEKCIGCELCAWACPADAIFVEGADNTAEQRFSPGERYGRVYQINYLRCIGCGLCIEACPTRALTMTNDYEMADDNRADLIYGKDKLLAPLTADMTAPPHAMAEGSTDEDYYRGNIRADGLARPSEATR", 17 | "proteome": "UP000018763", 18 | "gene": "nuoI", 19 | "GO": { 20 | "biologicalProcess": [ 21 | { 22 | "id": "GO:0055114", 23 | "name": "oxidation-reduction process" 24 | } 25 | ], 26 | "molecularFunction": [ 27 | { 28 | "id": "GO:0016651", 29 | "name": "oxidoreductase activity, acting on NAD(P)H" 30 | }, 31 | { 32 | "id": "GO:0051539", 33 | "name": "4 iron, 4 sulfur cluster binding" 34 | } 35 | ], 36 | "cellularComponent": [ 37 | { 38 | "id": "GO:0016020", 39 | "name": "membrane" 40 | } 41 | ] 42 | }, 43 | "proteinEvidence": 3 44 | }, 45 | "representation": { 46 | "entries": [ 47 | { 48 | "id": "IPR010226", 49 | "name": "NADH-quinone oxidoreductase, chain I", 50 | "type": "family" 51 | }, 52 | { 53 | "id": "IPR017896", 54 | "name": "4Fe-4S ferredoxin-type, iron-sulphur binding domain", 55 | "type": "domain" 56 | }, 57 | { 58 | "id": "IPR017900", 59 | "name": "4Fe-4S ferredoxin, iron-sulphur binding, conserved site", 60 | "type": "conservedSite" 61 | } 62 | ], 63 | "signalPeptide": [], 64 | "transmembrane": [], 65 | "coiledCoil": [], 66 | "lowComplexity_disorder": [], 67 | "activeSites": [], 68 | "perResidueFeatures": [], 69 | "disulphideBridges": [] 70 | }, 71 | "structure": { 72 | "chains": [] 73 | }, 74 | "genomicContext": { 75 | "DNA": "" 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /webfront/views/mail.py: -------------------------------------------------------------------------------- 1 | from email.mime.text import MIMEText 2 | from subprocess import Popen, PIPE 3 | from django.http import JsonResponse 4 | from interpro.settings import INTERPRO_CONFIG 5 | from django.views.decorators.csrf import csrf_exempt 6 | from datetime import datetime, timedelta 7 | from django.conf import settings 8 | 9 | 10 | @csrf_exempt 11 | def send_email(request): 12 | ip_address = get_client_ip(request) 13 | now = datetime.now() 14 | if not hasattr(settings, "credentials"): 15 | return store_credentials_and_mail(request, ip_address, now) 16 | else: 17 | last_accessed = settings.credentials 18 | if last_accessed["ip"] == ip_address: 19 | then = datetime.strptime(last_accessed["time"], "%Y-%m-%d %H:%M:%S.%f") 20 | time_diff = now - then 21 | elapsed_min = time_diff / timedelta(minutes=1) 22 | if elapsed_min >= 1: 23 | return store_credentials_and_mail(request, ip_address, now) 24 | else: 25 | data = {"error": "Request Aborted"} 26 | return JsonResponse(data, status=429) 27 | else: 28 | return store_credentials_and_mail(request, ip_address, now) 29 | 30 | 31 | def get_client_ip(request): 32 | x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR") 33 | if x_forwarded_for: 34 | ip = x_forwarded_for.split(",")[0] 35 | else: 36 | ip = request.META.get("REMOTE_ADDR") 37 | return ip 38 | 39 | 40 | def store_credentials_and_mail(request, ip, time): 41 | settings.credentials = {"ip": ip, "time": time.strftime("%Y-%m-%d %H:%M:%S.%f")} 42 | return mail(request) 43 | 44 | 45 | def mail(request): 46 | path = request.POST.get("path", INTERPRO_CONFIG.get("sendmail_path")) 47 | subject = request.POST.get("subject", "") 48 | message = request.POST.get("message", "") 49 | from_email = request.POST.get("from_email", "") 50 | queue = request.POST.get("queue", "interpro").lower() 51 | to_email = {"interpro": "interhelp@ebi.ac.uk", "pfam": "pfam-help@ebi.ac.uk"}.get( 52 | queue, "" 53 | ) 54 | if path and subject and message and from_email and to_email: 55 | message = MIMEText(message) 56 | message["From"] = from_email 57 | message["To"] = to_email 58 | message["Subject"] = subject 59 | p = Popen([path, "-t", "-oi"], stdin=PIPE) 60 | p.communicate(message.as_bytes()) 61 | data = {"from": from_email, "subject": subject} 62 | return JsonResponse(data) 63 | else: 64 | data = {"error": "Make sure all fields are entered and valid"} 65 | return JsonResponse(data, status=400) 66 | -------------------------------------------------------------------------------- /example_data/entry/PF02171.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "accession": "PF02171", 4 | "id": "", 5 | "type": "domain", 6 | "GO": { 7 | "biologicalProcess": [], 8 | "molecularFunction": [ 9 | { 10 | "id": "GO:0003676", 11 | "name": "nucleic acid binding" 12 | } 13 | ], 14 | "cellularComponent": [] 15 | }, 16 | "sourceDataBase": "Pfam", 17 | "memberDataBases": {}, 18 | "integrated": "IPR003165", 19 | "name": { 20 | "name": "Piwi domain", 21 | "short": "Piwi", 22 | "other": [] 23 | }, 24 | "description": "This domain is found in the protein Piwi and its relatives. The function of this domain is the dsRNA guided hydrolysis of ssRNA. Determination of the crystal structure of Argonaute reveals that PIWI is an RNase H domain, and identifies Argonaute as Slicer, the enzyme that cleaves mRNA in the RNAi RISC complex [PUB00020128]. In addition, Mg+2 dependence and production of 3'-OH and 5' phosphate products are shared characteristics of RNaseH and RISC. The PIWI domain core has a tertiary structure belonging to the RNase H family of enzymes. RNase H fold proteins all have a five-stranded mixed beta-sheet surrounded by helices. By analogy to RNase H enzymes which cleave single-stranded RNA guided by the DNA strand in an RNA/DNA hybrid, the PIWI domain can be inferred to cleave single-stranded RNA, for example mRNA, guided by double stranded siRNA.", 25 | "wikipedia": "Piwi", 26 | "literature": { 27 | "PUB00020128": { 28 | "PMID": 15284453, 29 | "type": "J", 30 | "ISBN": null, 31 | "volume": "305", 32 | "issue": "5689", 33 | "year": 2004, 34 | "title": "Crystal structure of Argonaute and its implications for RISC slicer activity.", 35 | "URL": null, 36 | "rawPages": "1434-7", 37 | "medlineJournal": "Science", 38 | "ISOJournal": "Science", 39 | "authors": "Song JJ, Smith SK, Hannon GJ, Joshua-Tor L.", 40 | "DOI_URL": "http://dx.doi.org/10.1126/science.1102514", 41 | }, 42 | "PUB00018283": { 43 | "PMID": 11050429, 44 | "type": "J", 45 | "ISBN": null, 46 | "volume": "25", 47 | "issue": "10", 48 | "year": 2000, 49 | "title": "Domains in gene silencing and cell differentiation proteins: the novel PAZ domain and redefinition of the Piwi domain.", 50 | "URL": null, 51 | "rawPages": "481-2", 52 | "medlineJournal": "Trends Biochem Sci", 53 | "ISOJournal": "Trends Biochem. Sci.", 54 | "authors": "Cerutti L, Mian N, Bateman A.", 55 | "DOI_URL": "http://dx.doi.org/10.1016/S0968-0004(00)01641-8", 56 | } 57 | }, 58 | "cross_references": { 59 | "ENZYME": [ 60 | "6.4.1.1" 61 | ], 62 | "PRIAM": [ 63 | "PRI003248" 64 | ] 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /webfront/static/swagger/oauth2-redirect.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Swagger UI: OAuth2 Redirect 5 | 6 | 7 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /docs/examples/fetch-protein-matches.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download matches and other features from InterPro for a given UniProt accession 3 | 4 | Requires python >= 3.6 5 | 6 | Example of running command: 7 | $ python fetch-protein-matches.py UNIPROT-ACCESSION 8 | """ 9 | 10 | import json 11 | import sys 12 | from urllib.error import HTTPError 13 | from urllib.request import urlopen 14 | 15 | 16 | def main(): 17 | query = sys.argv[1] 18 | 19 | api_url = "https://www.ebi.ac.uk/interpro/api" 20 | url = f"{api_url}/entry/all/protein/UniProt/{query}/" 21 | url += "?page_size=200&extra_fields=hierarchy,short_name" 22 | 23 | with urlopen(url) as res: 24 | data = json.loads(res.read().decode("utf-8")) 25 | 26 | protein_accession = "" 27 | protein_length = "" 28 | for i, m in enumerate(data["results"]): 29 | meta = m["metadata"] 30 | protein = m["proteins"][0] 31 | 32 | if meta["member_databases"]: 33 | dbs = meta["member_databases"].values() 34 | signatures = ",".join([sig for db in dbs for sig in db.keys()]) 35 | else: 36 | signatures = "-" 37 | 38 | if meta["go_terms"]: 39 | go_terms = ",".join([t["identifier"] for t in meta["go_terms"]]) 40 | else: 41 | go_terms = "-" 42 | 43 | locations = [] 44 | for l in protein["entry_protein_locations"]: 45 | for f in l["fragments"]: 46 | locations.append(f"{f['start']}..{f['end']}") 47 | 48 | if i == 0: 49 | protein_accession = protein["accession"].upper() 50 | protein_length = str(protein["protein_length"]) 51 | 52 | length = protein["protein_length"] 53 | print("\t".join([ 54 | meta["accession"], 55 | meta["name"] or "-", 56 | meta["source_database"], 57 | meta["type"], 58 | meta["integrated"] or "-", 59 | signatures, 60 | go_terms, 61 | protein_accession, 62 | protein_length, 63 | ",".join(locations) 64 | ])) 65 | 66 | url = f"{api_url}/protein/UniProt/{query}/?extra_features=true" 67 | with urlopen(url) as res: 68 | features = json.loads(res.read().decode("utf-8")) 69 | for feature in features.values(): 70 | 71 | locations = [] 72 | for l in feature["locations"]: 73 | for f in l["fragments"]: 74 | locations.append(f"{f['start']}..{f['end']}") 75 | 76 | print("\t".join([ 77 | feature["accession"], 78 | "-", 79 | feature["source_database"], 80 | "-", 81 | "-", 82 | "-", 83 | "-", 84 | protein_accession, 85 | protein_length, 86 | ",".join(locations) 87 | ])) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /webfront/searcher/search_controller.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import re 3 | from webfront.views.queryset_manager import escape 4 | 5 | 6 | class SearchController(metaclass=abc.ABCMeta): 7 | @abc.abstractmethod 8 | def get_group_obj_of_field_by_query( 9 | self, query, fields, fq=None, rows=0, inner_field_to_count=None 10 | ): 11 | raise NotImplementedError( 12 | "users must define get_group_obj_of_field_by_query to use this base class" 13 | ) 14 | 15 | def get_number_of_field_by_endpoint(self, endpoint, field, accession, query="*:*"): 16 | db = field 17 | fq = None 18 | if field.startswith("entry"): 19 | db = "entry_db" 20 | elif field.startswith("protein"): 21 | db = "protein_db" 22 | elif field == "tax_id": 23 | db = "{}_db".format(endpoint) 24 | acc = "{}_acc".format(endpoint) 25 | if endpoint == "taxonomy": 26 | acc = "tax_lineage" 27 | elif endpoint == "structure": 28 | db = "structure_chain_acc" 29 | elif endpoint == "proteome": 30 | db = "proteome_acc" 31 | acc = "proteome_acc" 32 | ngroups = self.get_group_obj_of_field_by_query( 33 | "{} && {}:* && {}:{}".format( 34 | query, db, acc, escape(str(accession).lower()) 35 | ), 36 | field, 37 | fq, 38 | )["ngroups"] 39 | if isinstance(ngroups, dict): 40 | ngroups = ngroups["value"] 41 | return ngroups 42 | 43 | @abc.abstractmethod 44 | def get_chain(self): 45 | raise NotImplementedError("users must define get_chain to use this base class") 46 | 47 | @abc.abstractmethod 48 | def get_counter_object(self, endpoint, query=None, extra_counters=[]): 49 | raise NotImplementedError( 50 | "users must define get_counter_object to use this base class" 51 | ) 52 | 53 | @abc.abstractmethod 54 | def get_grouped_object( 55 | self, endpoint, field, query=None, extra_counters=[], size=10 56 | ): 57 | raise NotImplementedError( 58 | "users must define get_counter_object to use this base class" 59 | ) 60 | 61 | @abc.abstractmethod 62 | def get_list_of_endpoint(self, endpoint, query=None, rows=1, start=0): 63 | raise NotImplementedError( 64 | "users must define get_list_of_endpoint to use this base class" 65 | ) 66 | 67 | @abc.abstractmethod 68 | def execute_query(self, query, fq=None, rows=0, start=0): 69 | raise NotImplementedError( 70 | "users must define execute_query to use this base class" 71 | ) 72 | 73 | @abc.abstractmethod 74 | def add(self, docs): 75 | raise NotImplementedError("users must define add to use this base class") 76 | 77 | @abc.abstractmethod 78 | def clear_all_docs(self): 79 | raise NotImplementedError( 80 | "users must define clear_all_docs to use this base class" 81 | ) 82 | -------------------------------------------------------------------------------- /webfront/tests/tests_protein_endpoint.py: -------------------------------------------------------------------------------- 1 | from rest_framework import status 2 | from webfront.tests.InterproRESTTestCase import InterproRESTTestCase 3 | 4 | 5 | class ProteinRESTTest(InterproRESTTestCase): 6 | def test_can_read_protein_overview(self): 7 | response = self.client.get("/api/protein") 8 | self.assertEqual(response.status_code, status.HTTP_200_OK) 9 | self._check_protein_count_overview(response.data) 10 | 11 | def test_can_read_protein_uniprot(self): 12 | response = self.client.get("/api/protein/uniprot") 13 | self.assertEqual(response.status_code, status.HTTP_200_OK) 14 | self._check_is_list_of_objects_with_key(response.data["results"], "metadata") 15 | self.assertEqual(len(response.data["results"]), 5) 16 | 17 | def test_can_read_protein_uniprot_accession(self): 18 | response = self.client.get("/api/protein/uniprot/P16582") 19 | self.assertEqual(response.status_code, status.HTTP_200_OK) 20 | self.assertIn("metadata", response.data) 21 | self._check_protein_details(response.data["metadata"]) 22 | self.assertIn("structures", response.data["metadata"]["counters"]) 23 | self.assertIn("entries", response.data["metadata"]["counters"]) 24 | self.assertEqual(1, response.data["metadata"]["counters"]["structures"]) 25 | self.assertEqual(2, response.data["metadata"]["counters"]["entries"]) 26 | 27 | def test_can_read_protein_id(self): 28 | url_id = "/api/protein/uniprot/CBPYA_ASPCL" 29 | response = self.client.get(url_id) 30 | self.assertEqual(response.status_code, status.HTTP_302_FOUND) 31 | self.assertIn("a1cuj5", response.url.lower()) 32 | 33 | def test_can_read_protein_reviewed(self): 34 | response = self.client.get("/api/protein/reviewed") 35 | self.assertEqual(response.status_code, status.HTTP_200_OK) 36 | self._check_is_list_of_objects_with_key(response.data["results"], "metadata") 37 | self.assertEqual(len(response.data["results"]), 2) 38 | 39 | def test_can_read_protein_reviewed_accession(self): 40 | response = self.client.get("/api/protein/reviewed/A1CUJ5") 41 | self.assertEqual(response.status_code, status.HTTP_200_OK) 42 | self.assertIn("metadata", response.data) 43 | self._check_protein_details(response.data["metadata"]) 44 | self.assertIn("structures", response.data["metadata"]["counters"]) 45 | self.assertIn("entries", response.data["metadata"]["counters"]) 46 | self.assertEqual(1, response.data["metadata"]["counters"]["structures"]) 47 | self.assertEqual(5, response.data["metadata"]["counters"]["entries"]) 48 | 49 | def test_cant_read_protein_bad_db(self): 50 | self._check_HTTP_response_code( 51 | "/api/protein/bad_db", code=status.HTTP_404_NOT_FOUND 52 | ) 53 | 54 | def test_cant_read_protein_uniprot_bad_id(self): 55 | self._check_HTTP_response_code( 56 | "/api/protein/uniprot/badformmedID", code=status.HTTP_404_NOT_FOUND 57 | ) 58 | self._check_HTTP_response_code( 59 | "/api/protein/uniprot/A1CUJ6", 60 | code=status.HTTP_204_NO_CONTENT, 61 | msg="It should fail as 204 because the ID is well formed but it is not in the BD", 62 | ) 63 | -------------------------------------------------------------------------------- /webfront/tests/README.md: -------------------------------------------------------------------------------- 1 | Testing 2 | === 3 | 4 | Generating fixtures for gzip BinaryFields 5 | --- 6 | This in an example of how to generate the fixtures file for `StructuralModel`. The same approach was used for the sequence in the `Protein` table. 7 | 8 | 1. Go the djano shell. Make sure you are using the test DB by setting `use_test_db: true` in `interpro.local.yml`. 9 | ```shell 10 | python3 manage.py shell 11 | ``` 12 | 13 | 2. Manually create the fixture, using `gzip` and `bytes` for the binary fields 14 | ```python 15 | import gzip 16 | from webfront.models import StructuralModel 17 | 18 | contacts = "[[1,1,1,1,1.0], [1,2,1,2,30,0.5], [1,3,1,4,0.8], [2,2,2,2,1.0], [2,3,2,4,0.9], [3,3,4,4,1.0]]" 19 | contacts_gz = gzip.compress(bytes(contacts,'utf-8')) 20 | 21 | plddt = '[0.7807835340499878, 0.8842586278915405, 0.8649855852127075]' 22 | plddt_gz = gzip.compress(bytes(plddt,'utf-8')) 23 | 24 | structure = """ATOM 1 N VAL A 1 -0.701 1.770 1.392 1.00 4.92 N 25 | ATOM 1 N ARG A 1 -0.099 0.648 -0.392 1.00 0.00 N 26 | ATOM 2 CA ARG A 1 1.339 0.488 -0.541 1.00 0.00 C 27 | ATOM 3 C ARG A 1 2.039 1.845 -0.536 1.00 0.00 C 28 | ATOM 4 O ARG A 1 1.712 2.743 -1.333 1.00 0.00 O 29 | ATOM 5 CB ARG A 1 1.666 -0.244 -1.831 1.00 0.00 C 30 | ATOM 6 CG ARG A 1 3.140 -0.516 -2.039 1.00 0.00 C 31 | ATOM 7 CD ARG A 1 3.410 -1.196 -3.331 1.00 0.00 C 32 | ATOM 8 NE ARG A 1 4.824 -1.452 -3.502 1.00 0.00 N 33 | ATOM 9 CZ ARG A 1 5.425 -1.747 -4.668 1.00 0.00 C 34 | """ 35 | structure_gz = gzip.compress(bytes(structure,'utf-8')) 36 | 37 | model = StructuralModel(model_id=1, accession='PF17176', algorithm='RoseTTAFold', contacts=contacts_gz, 38 | plddt=plddt_gz, structure=structure_gz) 39 | model.save() 40 | ``` 41 | 42 | 3. Generate the fixture using the `dumpdata` tool in django: 43 | ```shell 44 | python manage.py dumpdata webfront --indent 4 45 | ``` 46 | 47 | ```json 48 | [ 49 | { 50 | "model": "webfront.structuralmodel", 51 | "pk": 1, 52 | "fields": { 53 | "accession": "PF17176", 54 | "algorithm": "RoseTTAFold", 55 | "contacts": "H4sIAKMfQmEC/4uONtSBQj2DWB0FINdIB4SNDXQM9EwhIsZAERMg1wLENdKBQKh6oEogByRrCeIaA7kmQAiSjQUAU15YL10AAAA=", 56 | "plddt": "H4sIAKMfQmEC/x3IwQ3AIAwDwFU6AEIm2NiZBXX/NSr1nncxHThbm2B3nPFgJizllNNLhP477EhRrTKs9wMKEasjPAAAAA==", 57 | "structure": "H4sIAKMfQmEC/43SS27EIAwG4P2cggvEssE8sqR0NJvpRKqqLnr/g9Q2VZUZEgkW4aHw8Zukfm0fzho595Duu95d7VNtC0JGHRPkjNaH1WuHMmPQ8X/T/Zf6KtbP24uI6yoDhMTF5jsR7bkXd6C81eoAaqTQQS4djEwnYHsCgy0MoAc0kKBw7GBIUyA7t7mjhJm8wZnlzEUThxNwewLl+PZ2BKaULJlnNrCEuZJlV7uNYABi7KWSwn9XMAFmWXg/ApnQktGqYJCS5xLKJ3xcR5Ch+F4qR29gxLnfRupoPyMYgX00MLMUsbDcaTlLePkFk6MiNykDAAA=" 58 | } 59 | } 60 | ] 61 | ``` 62 | 63 | 4. Now you can use the generated JSON to included in one of the fixture files in `webfront/tests/`. 64 | 65 | In this example the generated fixture is included at the end of `webfront/tests/fixtures_structure.json`. 66 | -------------------------------------------------------------------------------- /functional_tests/base.py: -------------------------------------------------------------------------------- 1 | from django.contrib.staticfiles.testing import StaticLiveServerTestCase 2 | from django.test import override_settings 3 | from selenium import webdriver 4 | from selenium.webdriver.common.by import By 5 | import sys 6 | import time 7 | import os 8 | from selenium.common.exceptions import StaleElementReferenceException 9 | from selenium.webdriver.chrome.options import Options 10 | 11 | from webfront.tests.fixtures_reader import FixtureReader 12 | from interpro.settings import SEARCHER_TEST_URL, SEARCHER_TEST_PASSWORD 13 | 14 | 15 | @override_settings(SEARCHER_URL=SEARCHER_TEST_URL) 16 | @override_settings(SEARCHER_PASSWORD=SEARCHER_TEST_PASSWORD) 17 | @override_settings(SEARCHER_INDEX="test") 18 | class FunctionalTest(StaticLiveServerTestCase): 19 | fixtures = [ 20 | "webfront/tests/fixtures_entry.json", 21 | "webfront/tests/fixtures_protein.json", 22 | "webfront/tests/fixtures_structure.json", 23 | "webfront/tests/fixtures_organisms.json", 24 | "webfront/tests/fixtures_set.json", 25 | "webfront/tests/fixtures_database.json", 26 | ] 27 | links_fixtures = "webfront/tests/relationship_features.json" 28 | 29 | @classmethod 30 | def setUpClass(cls): 31 | for arg in sys.argv: 32 | if "liveserver" in arg: 33 | cls.server_url = "http://" + arg.split("=")[1] 34 | return 35 | super().setUpClass() 36 | cls.server_url = cls.live_server_url 37 | cls.fr = FixtureReader(cls.fixtures + [cls.links_fixtures]) 38 | docs = cls.fr.get_fixtures() 39 | cls.fr.add_to_search_engine(docs) 40 | 41 | @classmethod 42 | def tearDownClass(cls): 43 | # cls.fr.clear_search_engine() 44 | if cls.server_url == cls.live_server_url: 45 | super().tearDownClass() 46 | 47 | def setUp(self): 48 | try: 49 | if os.environ["BROWSER_TEST"] == "chrome": 50 | chrome_options = Options() 51 | chrome_options.add_argument("--headless") 52 | 53 | if "BROWSER_TEST_PATH" in os.environ: 54 | self.browser = webdriver.Chrome( 55 | executable_path=os.environ["BROWSER_TEST_PATH"], 56 | options=chrome_options, 57 | ) 58 | else: 59 | self.browser = webdriver.Chrome(options=chrome_options) 60 | else: 61 | raise KeyError 62 | except KeyError: 63 | self.browser = webdriver.Firefox() 64 | self.browser.implicitly_wait(3) 65 | 66 | def tearDown(self): 67 | self.browser.quit() 68 | 69 | def click_link_and_wait(self, link): 70 | link.click() 71 | 72 | def link_has_gone_stale(): 73 | try: 74 | # poll the link with an arbitrary call 75 | link.find_elements(By.ID, "doesnt-matter") 76 | return False 77 | except StaleElementReferenceException: 78 | return True 79 | 80 | self.wait_for(link_has_gone_stale) 81 | 82 | def wait_for(self, condition_function): 83 | start_time = time.time() 84 | while time.time() < start_time + 3: 85 | if condition_function(): 86 | return True 87 | else: 88 | time.sleep(0.1) 89 | raise Exception("Timeout waiting for {}".format(condition_function.__name__)) 90 | -------------------------------------------------------------------------------- /webfront/views/MODIFIER_README.md: -------------------------------------------------------------------------------- 1 | Modifier Manager 2 | === 3 | 4 | Modifiers are the technique defined to extend the functionality of the API. Modifiers are exposed to the user as URL parameters, so they can modify the current queryset by filtering, aggregating or change the serializer of the queryset. 5 | 6 | As is described [here](./README.md), the API executes a view for each level in the URL. The main purpose of the view execution is to filter the queryset. Additionally, modifiers are registered in during this process. For example, the modifier `with_names` is registered at the level of the [TaxonomyAccessionHandler](./taxonomy.py#L39). This modifier is used by users wanting to include the names of children and parents of a given taxon id in the response. 7 | 8 | After all the levels of the URL have been processed by the views, the custom manager checks if there are any modifiers registered, and if the associated URL parameter is present, executes the modifier. 9 | 10 | This are the parameters of the method to register a modifier: 11 | * `parameter`: the associated URL parameter of the modifier. 12 | * `action`: the modifier function. It should returns a queryset or None. And its parameters are: 13 | * `value`: the value given as a URL parameter. 14 | * `general_handler`: the handler that is in charge of the current request. 15 | * `type` (default: `ModifierType.FILTER`), there are 3 types of modifiers: 16 | * `ModifierType.FILTER` adds new filters into the queryset before it gets executed or serialized. 17 | * `ModifierType.REPLACE_PAYLOAD` creates a new payload that replaces the one that would normally been used by the `custom view` 18 | * `ModifierType.EXTEND_PAYLOAD` extends an already calculated payload. The data extending the payload is added as a new key at the level of `"metadata"` or `"results"`. 19 | * `serializer` (default: `None`): in case the modification requires to be serialized in an specific way. 20 | * `many` (default: `False`): this is to explicitly indicate when the modifier queryset has *many* results and needs to be iterated. This is useful to indicate the pagination logic needs to be included. Note that this only makes sense if `use_model_as_payload == True`. 21 | * `works_in_single_endpoint` (default: `True`): it indicates that a given modifier works for single endpoints URLs. If is false it will raise an exception when a single endpoint URL has this modifier 22 | * `works_in_multiple_endpoint` (default: `True`): it indicates that a given modifier works for multiple endpoints URLs. If is false it will raise an exception when a multiple endpoint URL has this modifier. 23 | 24 | Examples: 25 | * `with_names`: Doesn't actually needs to execute an action so it uses the `passing` one defined in [modifiers.py#L684](./modifiers.py#L684) and registered in [taxonomy.py#L35](./taxonomy.py#L35). The actual change of this modifier is to use a different seializer (`SerializerDetail.TAXONOMY_DETAIL_NAMES`) 26 | * `filter_by_key_species`: Defined in [modifiers.py#L249](./modifiers.py#L249) and registered in [taxonomy.py#L96](./taxonomy.py#L96). It doesn't require to replace the current queryset, but adds a new filter to it. 27 | * `filter_by_entry`: Defined in [modifiers.py#L255](./modifiers.py#L249) and registered in [taxonomy.py#L38](./taxonomy.py#L96). It replaces the current queryset, because it uses the model TaxonomyPerEntry, which doesn't correspond to any of the main endpoints. It is registered using its own serializer (`SerializerDetail.TAXONOMY_PER_ENTRY`) 28 | * 29 | -------------------------------------------------------------------------------- /example_data/entry/IPR003165.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "accession": "IPR003165", 4 | "id": "", 5 | "type": "domain", 6 | "GO": { 7 | "biologicalProcess": [], 8 | "molecularFunction": [ 9 | { 10 | "id": "GO:0003676", 11 | "name": "nucleic acid binding" 12 | } 13 | ], 14 | "cellularComponent": [] 15 | }, 16 | "sourceDataBase": "InterPro", 17 | "memberDataBases": { 18 | "SMART": ["SM00950"], 19 | "PROSITEProfiles": ["PS50822"], 20 | "Pfam": ["PF02171"], 21 | }, 22 | "integrated": null, 23 | "name": { 24 | "name": "Piwi domain", 25 | "short": "Piwi", 26 | "other": [] 27 | }, 28 | "description": "The piwi domain [PUB00020128] is a protein domain found in piwi proteins and a large number of related nucleic acid-binding proteins, especially those that bind and cleave RNA. The function of the domain is double stranded-RNA-guided hydrolysis of single stranded-RNA, as has been determined in the argonaute family of related proteins [PUB00018283].", 29 | "wikipedia": "", 30 | "literature": { 31 | "PUB00020128": { 32 | "PMID": 15284453, 33 | "type": "J", 34 | "ISBN": null, 35 | "volume": "305", 36 | "issue": "5689", 37 | "year": 2004, 38 | "title": "Crystal structure of Argonaute and its implications for RISC slicer activity.", 39 | "URL": null, 40 | "rawPages": "1434-7", 41 | "medlineJournal": "Science", 42 | "ISOJournal": "Science", 43 | "authors": "Song JJ, Smith SK, Hannon GJ, Joshua-Tor L.", 44 | "DOI_URL": "http://dx.doi.org/10.1126/science.1102514", 45 | }, 46 | "PUB00018283": { 47 | "PMID": 11050429, 48 | "type": "J", 49 | "ISBN": null, 50 | "volume": "25", 51 | "issue": "10", 52 | "year": 2000, 53 | "title": "Domains in gene silencing and cell differentiation proteins: the novel PAZ domain and redefinition of the Piwi domain.", 54 | "URL": null, 55 | "rawPages": "481-2", 56 | "medlineJournal": "Trends Biochem Sci", 57 | "ISOJournal": "Trends Biochem. Sci.", 58 | "authors": "Cerutti L, Mian N, Bateman A.", 59 | "DOI_URL": "http://dx.doi.org/10.1016/S0968-0004(00)01641-8", 60 | }, 61 | "PUB00050034": { 62 | "PMID": 19187762 63 | }, 64 | "PUB00033445": { 65 | "PMID": 14749716 66 | }, 67 | "PUB00033483": { 68 | "PMID": 12526743 69 | }, 70 | "PUB00053929": { 71 | "PMID": 20211138 72 | }, 73 | "PUB00006604": { 74 | "PMID": 9851978 75 | }, 76 | "PUB00018289": { 77 | "PMID": 9927466 78 | }, 79 | "PUB00036180": { 80 | "PMID": 16815998 81 | }, 82 | "PUB00018290": { 83 | "PMID": 10631171 84 | }, 85 | "PUB00040580": { 86 | "PMID": 17027504 87 | }, 88 | "PUB00042042": { 89 | "PMID": 17130125 90 | }, 91 | "PUB00032750": { 92 | "PMID": 15800628 93 | }, 94 | "PUB00038628": { 95 | "PMID": 15800629 96 | }, 97 | "cross_references": { 98 | "ENZYME": [ 99 | "6.4.1.1" 100 | ], 101 | "PRIAM": [ 102 | "PRI003248" 103 | ] 104 | } 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /webfront/serializers/README.md: -------------------------------------------------------------------------------- 1 | Serializers 2 | === 3 | 4 | This is the last part of the API logic; after all the endpoints have been processed by their respective views; and the modifiers have been executed, the final Queryset is then serialized. We basically use DRF (_Django REST Framework_) strategy for [serialization](https://www.django-rest-framework.org/api-guide/serializers/). Below we describe how we adapted this. 5 | 6 | The call to be serialized is done in [custom.py](../views/custom.py#L134), making sure to pass extra data for any processing required at serializing. In that sense, this aren't pure serializers, as there are certain data tasks that only get executed there, for instance, when using the modifier `with_names` (See [Modifiers docs](../views/MODIFIER_README.md)) the serializer requires to get the names out of the model [taxonomy.py#L323](./taxonomy.py#L323). 7 | 8 | Besides those cases, the task of the serializer is to create a data structure using simple types plus dictionaries and lists. DRF will take care of actually writing a JSON (or other configured format) out of it. 9 | 10 | We have defined a base serializer class [ModelContentSerializer](./content_serializers.py), that deals with the initiation and contains methods shared for the serializers inheriting from it. 11 | 12 | When defining the view that is going to deal with an endpoint, there are 3 class attributes that are useful to define the serializing behaviour. 13 | * `serializer_class` ( Default: `None`): Inherited from DRF, it is the class that will be used to serialize the instances. 14 | * `serializer_detail` (Default: `SerializerDetail.ALL`): Indicates which serializer function use, if this view is in the main endpoint. 15 | * `serializer_detail_filter` (Default: `SerializerDetail.ALL`): Indicates which serializer function use, if this view is in the filter endpoints. 16 | 17 | These values get overwritten by the latest View when processing the URL. So for example, if the URL is `/api/entry/interpro/protein` this is how these values get changed: 18 | 19 | 0. `/api`: This is the root path so we start with the defaults: 20 | * `serializer_class = None` 21 | * `serializer_detail = SerializerDetail.ALL` 22 | * `serializer_detail_filter = SerializerDetail.ALL` 23 | 1. `/entry`: Main endpoint redifines the values for the main endpoint 24 | * `serializer_class = EntrySerializer` 25 | * `serializer_detail = SerializerDetail.ENTRY_OVERVIEW` 26 | 2. `/interpro`: Still part of the main endpoint so it changes the detail values: 27 | * `serializer_class = EntrySerializer` 28 | * `serializer_detail = SerializerDetail.ENTRY_HEADERS` 29 | 3. `/protein` It is a different endpoint so this should be use to define the filter values 30 | * `serializer_detail_filter = SerializerDetail.PROTEIN_OVERVIEW` 31 | 32 | Actually [custom.py](../views/custom.py#L231) register multiple `serializer_detail_filter` in case there are several filter endpoints in the URL. 33 | 34 | So at the end with the result serialzers of this example Django will go to the `EntrySerializer` class to process each instance of the queryset, and because `serializer_detail = SerializerDetail.ENTRY_OVERVIEW` it would only include the header information of each instance and not its whole metadata ([see interpro.py#L52](./interpro.py#L52)). And for each instance it would include the protein count representation because `serializer_detail_filter = SerializerDetail.PROTEIN_OVERVIEW` ([see interpro.py#L63](./interpro.py#L63)). 35 | 36 | All the values for `serializer_detail` and `serializer_detail_filter` are defined in ([constants.py](../constants.py#L4)). 37 | -------------------------------------------------------------------------------- /webfront/migrations/0030_num_proteins.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.24 on 2024-03-13 13:38 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | import jsonfield.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('webfront', '0029_remove_is_featured_plus_llm_entry'), 12 | ] 13 | 14 | operations = [ 15 | migrations.CreateModel( 16 | name='ProteomePerEntry', 17 | fields=[ 18 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 19 | ('counts', jsonfield.fields.JSONField(null=True)), 20 | ('num_proteins', models.IntegerField(default=0)), 21 | ], 22 | ), 23 | migrations.CreateModel( 24 | name='ProteomePerEntryDB', 25 | fields=[ 26 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 27 | ('source_database', models.CharField(db_index=True, max_length=100)), 28 | ('counts', jsonfield.fields.JSONField(null=True)), 29 | ('num_proteins', models.IntegerField(default=0)), 30 | ], 31 | ), 32 | migrations.AddField( 33 | model_name='proteome', 34 | name='num_proteins', 35 | field=models.IntegerField(default=0), 36 | ), 37 | migrations.AddField( 38 | model_name='taxonomy', 39 | name='num_proteins', 40 | field=models.IntegerField(default=0), 41 | ), 42 | migrations.AddField( 43 | model_name='taxonomyperentry', 44 | name='num_proteins', 45 | field=models.IntegerField(default=0), 46 | ), 47 | migrations.AddField( 48 | model_name='taxonomyperentrydb', 49 | name='num_proteins', 50 | field=models.IntegerField(default=0), 51 | ), 52 | migrations.AddIndex( 53 | model_name='taxonomyperentry', 54 | index=models.Index(fields=['entry_acc', 'taxonomy'], name='webfront_ta_entry_a_9b7542_idx'), 55 | ), 56 | migrations.AddIndex( 57 | model_name='taxonomyperentrydb', 58 | index=models.Index(fields=['source_database', 'taxonomy'], name='webfront_ta_source__af48cb_idx'), 59 | ), 60 | migrations.AddField( 61 | model_name='proteomeperentrydb', 62 | name='proteome', 63 | field=models.ForeignKey(blank=True, db_column='accession', null=True, on_delete=django.db.models.deletion.SET_NULL, to='webfront.proteome'), 64 | ), 65 | migrations.AddField( 66 | model_name='proteomeperentry', 67 | name='entry_acc', 68 | field=models.ForeignKey(db_column='entry_acc', null=True, on_delete=django.db.models.deletion.SET_NULL, to='webfront.entry'), 69 | ), 70 | migrations.AddField( 71 | model_name='proteomeperentry', 72 | name='proteome', 73 | field=models.ForeignKey(blank=True, db_column='accession', null=True, on_delete=django.db.models.deletion.SET_NULL, to='webfront.proteome'), 74 | ), 75 | migrations.AddIndex( 76 | model_name='proteomeperentrydb', 77 | index=models.Index(fields=['source_database', 'proteome'], name='webfront_pr_source__efd3c1_idx'), 78 | ), 79 | migrations.AddIndex( 80 | model_name='proteomeperentry', 81 | index=models.Index(fields=['entry_acc', 'proteome'], name='webfront_pr_entry_a_5a69ef_idx'), 82 | ), 83 | ] 84 | -------------------------------------------------------------------------------- /example_data/protein/P16582.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata" : { 3 | "accession": "P16582", 4 | "id": "LSHR_PIG", 5 | "sourceOrganism": { 6 | "name": "Sus scrofa (Pig)", 7 | "taxid": 9823 8 | }, 9 | "name": { 10 | "full": "Lutropin-choriogonadotropic hormone receptor", 11 | "short": "LH/CG-R", 12 | "other": ["Luteinizing hormone receptor", "LSH-R"] 13 | }, 14 | "description": "Receptor for lutropin-choriogonadotropic hormone. The activity of this receptor is mediated by G proteins which activate adenylate cyclase.", 15 | "length": 696, 16 | "sequence": "MRRRSLALRLLLALLLLPPPLPQTLLGAPCPEPCSCRPDGALRCPGPRAGLSRLSLTYLPIKVIPSQAFRGLNEVVKIEISQSDSLEKIEANAFDNLLNLSEILIQNTKNLVYIEPGAFTNLPRLKYLSICNTGIRKLPDVTKIFSSEFNFILEICDNLHITTVPANAFQGMNNESITLKLYGNGFEEIQSHAFNGTTLISLELKENAHLKKMHNDAFRGARGPSILDISSTKLQALPSYGLESIQTLIATSSYSLKKLPSREKFTNLLDATLTYPSHCCAFRNLPTKEQNFSFSIFKNFSKQCESTARRPNNETLYSAIFAESELSDWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYDFLRVLIWLINILAIMGNVTVLFVLLTSHYKLTVPRFLMCNLSFADFCMGLYLLLIASVDAQTKGQYYNHAIDWQTGNGCSVAGFFTVFASELSVYTLTVITLERWHTITYAIQLDQKLRLRHAIPIMLGGWLFSTLIAMLPLVGVSSYMKVSICLPMDVETTLSQVYILTILILNVVAFIIICACYIKIYFAVQNPELMATNKDTKIAKKMAVLIFTDFTCMAPISFFAISAALKVPLITVTNSKVLLVLFYPVNSCANPFLYAIFTKAFRRDFFLLLSKSGCCKHQAELYRRKDFSAYCKNGFTGSNKPSRSTLKLTTLQCQYSTVMDKTCYKDC", 17 | "proteome": "UP000008227", 18 | "gene": "LHCGR", 19 | "GO": { 20 | "biologicalProcess": [ 21 | { 22 | "id": "GO:0007186", 23 | "termName": "G-protein coupled receptor signaling pathway" 24 | } 25 | ], 26 | "molecularFunction": [ 27 | { 28 | "id": "GO:0004930", 29 | "name": "G-protein coupled receptor activity" 30 | }, 31 | { 32 | "id": "GO:0004964", 33 | "name": "luteinizing hormone receptor activity" 34 | }, 35 | { 36 | "id": "GO:0016500", 37 | "name": "protein-hormone receptor activity" 38 | } 39 | ], 40 | "cellularComponent": [ 41 | { 42 | "id": "GO:0016021", 43 | "name": "integral component of membrane" 44 | } 45 | ] 46 | }, 47 | "proteinEvidence": 4 48 | }, 49 | "representation": { 50 | "entries": [ 51 | { 52 | "id": "IPR000276", 53 | "name": "G protein-coupled receptor, rhodopsin-like", 54 | "type": "family" 55 | }, 56 | { 57 | "id": "IPR002131", 58 | "name": "Glycoprotein hormone receptor family", 59 | "type": "family" 60 | }, 61 | { 62 | "id": "IPR002273", 63 | "name": "Lutropin-choriogonadotropic hormone receptor", 64 | "type": "family" 65 | }, 66 | { 67 | "id": "IPR000372", 68 | "name": "Leucine-rich repeat N-terminal domain", 69 | "type": "domain" 70 | }, 71 | { 72 | "id": "IPR032675", 73 | "name": "Leucine-rich repeat domain, L domain-like", 74 | "type": "domain" 75 | }, 76 | { 77 | "id": "IPR017452", 78 | "name": "GPCR, rhodopsin-like, 7TM", 79 | "type": "domain" 80 | }, 81 | { 82 | "id": "IPR026906", 83 | "name": "Repeat Leucine rich repeat 5", 84 | "type": "repeat" 85 | }, 86 | ], 87 | "signalPeptide": [], 88 | "transmembrane": [], 89 | "coiledCoil": [], 90 | "lowComplexity_disorder": [], 91 | "activeSites": [], 92 | "perResidueFeatures": [], 93 | "disulphideBridges": [] 94 | }, 95 | "structure": { 96 | "chains": [] 97 | }, 98 | "genomicContext": { 99 | "DNA": "" 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /docs/examples/fetch-alphafold-for-entry.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script allows to downloads the AlphaFold predictions 3 | (PDB format) of all UniProt proteins matched by a given 4 | InterPro entry or member database signature. 5 | 6 | Requires python >= 3.6 7 | 8 | Example of running command: 9 | $ mkdir outdir 10 | $ python af-entry-dl.py PF05306 outdir 11 | """ 12 | 13 | import json 14 | import os 15 | import sys 16 | from concurrent.futures import ThreadPoolExecutor, as_completed 17 | from urllib.parse import urlencode 18 | from urllib.request import urlopen 19 | 20 | 21 | def get_uniprot_accessions(source_db, query): 22 | api_url = "https://www.ebi.ac.uk/interpro/api" 23 | url = f"{api_url}/protein/UniProt/entry/{source_db}/{query}/?" 24 | url += urlencode({"with": "alphafold", "page_size": 100}) 25 | accessions = [] 26 | 27 | while True: 28 | with urlopen(url) as res: 29 | payload = res.read().decode("utf-8") 30 | obj = json.loads(payload) 31 | 32 | accessions += [r["metadata"]["accession"] for r in obj["results"]] 33 | 34 | url = obj.get("next") 35 | if not url: 36 | break 37 | 38 | return accessions 39 | 40 | def get_mem_db(query): 41 | url = f"https://www.ebi.ac.uk/interpro/api/utils/accession/{query}" 42 | 43 | with urlopen(url) as res: 44 | if res.status != 200: 45 | sys.stderr.write(f"error: no results found for {query}\n") 46 | sys.exit(1) 47 | 48 | payload = res.read().decode("utf-8") 49 | obj = json.loads(payload) 50 | if obj["endpoint"] != "entry": 51 | sys.stderr.write(f"error: {query} is not an entry\n") 52 | 53 | return obj["source_database"] 54 | 55 | def download_af_pdb(accession, outdir): 56 | url = f"https://alphafold.ebi.ac.uk/api/prediction/{accession}" 57 | with urlopen(url) as res: 58 | payload = res.read().decode("utf-8") 59 | obj = json.loads(payload) 60 | pdb_url = obj[0]["pdbUrl"] 61 | 62 | filename = os.path.basename(pdb_url) 63 | filepath = os.path.join(outdir, filename) 64 | 65 | with open(filepath, "wb") as fh, urlopen(pdb_url) as res: 66 | for chunk in res: 67 | fh.write(chunk) 68 | 69 | def main(): 70 | 71 | query = sys.argv[1] 72 | outdir = sys.argv[2] 73 | 74 | source_db = get_mem_db(query) 75 | proteins = get_uniprot_accessions(source_db, query) 76 | 77 | with ThreadPoolExecutor(max_workers=8) as executor: 78 | fs = {} 79 | done = 0 80 | milestone = step = 10 81 | total = len(proteins) 82 | 83 | while True: 84 | for accession in proteins: 85 | f = executor.submit(download_af_pdb, accession, outdir) 86 | fs[f] = accession 87 | 88 | failed = [] 89 | for f in as_completed(fs): 90 | accession = fs[f] 91 | 92 | try: 93 | f.result() 94 | except Exception as exc: 95 | failed.append(accession) 96 | sys.stderr.write(f"error: {exc}\n") 97 | else: 98 | done += 1 99 | progress = done / total * 100 100 | if progress >= milestone: 101 | sys.stderr.write(f"progress: {progress:.0f}%\n") 102 | milestone += step 103 | 104 | proteins = failed 105 | if not proteins: 106 | break 107 | 108 | 109 | if __name__ == "__main__": 110 | main() 111 | -------------------------------------------------------------------------------- /config/elastic_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "analysis": { 4 | "analyzer": { 5 | "autocomplete": { 6 | "tokenizer": "autocomplete", 7 | "filter": [ 8 | "lowercase" 9 | ] 10 | } 11 | }, 12 | "tokenizer": { 13 | "autocomplete": { 14 | "type": "edge_ngram", 15 | "min_gram": 2, 16 | "max_gram": 20, 17 | "token_chars": [ 18 | "letter", 19 | "digit" 20 | ] 21 | } 22 | } 23 | } 24 | }, 25 | "mappings": { 26 | "properties": { 27 | "entry_acc": { 28 | "type": "keyword" 29 | }, 30 | "entry_db": { 31 | "type": "keyword" 32 | }, 33 | "entry_type": { 34 | "type": "keyword" 35 | }, 36 | "entry_date": { 37 | "type": "date" 38 | }, 39 | "entry_integrated": { 40 | "type": "keyword" 41 | }, 42 | "entry_go_terms": { 43 | "type": "keyword" 44 | }, 45 | "ida_id": { 46 | "type": "keyword" 47 | }, 48 | "ida": { 49 | "type": "keyword" 50 | }, 51 | "protein_acc": { 52 | "type": "keyword" 53 | }, 54 | "protein_length": { 55 | "type": "long" 56 | }, 57 | "protein_db": { 58 | "type": "keyword" 59 | }, 60 | "structure_acc": { 61 | "type": "keyword" 62 | }, 63 | "structure_resolution": { 64 | "type": "float" 65 | }, 66 | "structure_date": { 67 | "type": "date" 68 | }, 69 | "structure_chain_acc": { 70 | "type": "text", 71 | "analyzer": "keyword" 72 | }, 73 | "structure_chain": { 74 | "type": "text", 75 | "analyzer": "keyword", 76 | "fielddata": true 77 | }, 78 | "structure_evidence": { 79 | "type": "keyword" 80 | }, 81 | "protein_af_score": { 82 | "type": "float" 83 | }, 84 | "protein_bfvd_score": { 85 | "type": "float" 86 | }, 87 | "proteome_acc": { 88 | "type": "keyword" 89 | }, 90 | "proteome_name": { 91 | "type": "keyword" 92 | }, 93 | "proteome_is_reference": { 94 | "type": "keyword" 95 | }, 96 | "tax_id": { 97 | "type": "long" 98 | }, 99 | "tax_name": { 100 | "type": "keyword" 101 | }, 102 | "tax_lineage": { 103 | "type": "keyword" 104 | }, 105 | "tax_rank": { 106 | "type": "keyword" 107 | }, 108 | "structure_protein_acc": { 109 | "type": "keyword" 110 | }, 111 | "structure_protein_db": { 112 | "type": "keyword" 113 | }, 114 | "structure_protein_length": { 115 | "type": "long" 116 | }, 117 | "structure_protein_locations": { 118 | "type": "object", 119 | "enabled": false 120 | }, 121 | "entry_protein_locations": { 122 | "type": "object", 123 | "enabled": false 124 | }, 125 | "entry_structure_locations": { 126 | "type": "object", 127 | "enabled": false 128 | }, 129 | "text_entry": { 130 | "type": "text", 131 | "analyzer": "autocomplete" 132 | }, 133 | "text_protein": { 134 | "type": "text", 135 | "analyzer": "autocomplete" 136 | }, 137 | "text_structure": { 138 | "type": "text", 139 | "analyzer": "autocomplete" 140 | }, 141 | "text_set": { 142 | "type": "text", 143 | "analyzer": "autocomplete" 144 | }, 145 | "text_taxonomy": { 146 | "type": "text", 147 | "analyzer": "autocomplete" 148 | }, 149 | "text_proteome": { 150 | "type": "text", 151 | "analyzer": "autocomplete" 152 | }, 153 | "set_acc": { 154 | "type": "keyword" 155 | }, 156 | "set_db": { 157 | "type": "keyword" 158 | } 159 | } 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /webfront/views/QUERYSET_README.md: -------------------------------------------------------------------------------- 1 | # Queryset Manager 2 | 3 | Django provides the tools to map the model into the selected database. 4 | This project has a hybrid of MySQL-Elasticsearch as its data source. 5 | 6 | Please notice we are not using elastic as a search index for the MySQL DB. 7 | Instead, we use an index in Elasticsearch as our denormalized table that works as a 8 | pre-calculated `join` of all the tables. 9 | 10 | In MySQL we have tables that have all the details of the entities: 11 | `Entry`, `Protein`, `Structure`, `Taxonomy`, `Proteome` and `Set`. 12 | And when we need anything that links two or more of these entities we should query 13 | the Elasticsearch index. 14 | 15 | For example if we need the sequence of the proteins that have a match with an entry 16 | with accession `IPR000001`, we start by querying Elasticsearch, filtering the index where 17 | `entry_acc:IPR000001` to get the list of protein accessions. 18 | And then, we query MySQL to get the sequence of all the proteins of the given accessions. 19 | 20 | The `QuerysetManager` class (see section below) is in charge of collecting the filters over the entities and 21 | built the corresponding queries for elastic and MySQL. 22 | 23 | When the API processes the URL ([Read more](./README.md)), it adds a filter to the 24 | `QuerysetManager` for each level in the URL. When all the levels are processed, the `get()` method 25 | of `CustomView` checks if all the filters belong to the same entity, in which case, a Django 26 | queryset can be built in the traditional way, by passing the filters included in the manager. 27 | Otherwise an Elasticsearch query is created with the method `get_searcher_query` of the 28 | QuerysetManager. And the Django queryset is using this result. 29 | 30 | In any case a Django Queryset is created and can be use to serialize a response. 31 | 32 | ## The `QuerysetManager` class 33 | 34 | The class is in the [`queryset_manager.py` file](./queryset_manager.py). 35 | The main endpoint of the queryset needs to be defined when the `QuerysetManager` is initialised. e.g. 36 | 37 | ```python 38 | queryset_manager.reset_filters("entry", endpoint_levels) 39 | ``` 40 | 41 | The `filters` attribute of this class is where the filters get stored. It is a dictionary where each key is an endpoint, 42 | and their value is also a dictionary where each key:value is a filter:value. 43 | 44 | For example to filter a queryset for the entry with accession `"IPR000001"`, you can use the following code: 45 | 46 | ```python 47 | queryset_manager.add_filter( 48 | "entry", accession__iexact="IPR000001" 49 | ) 50 | ``` 51 | 52 | Then the value of `queryset_manager.filters` will be something like: 53 | 54 | ```python 55 | { 56 | "search": {}, 57 | "searcher": {}, 58 | "entry": { 59 | "accession__iexact": "IPR000001", 60 | }, 61 | "structure": {}, 62 | "protein": {}, 63 | "taxonomy": {}, 64 | "proteome": {}, 65 | "set": {}, 66 | "set_alignment": {}, 67 | } 68 | ``` 69 | 70 | A second structure similar to the one above is used to store the _filter by exclusion_, 71 | for example something that is different to the type `"family"`. 72 | 73 | ## From stored filters to data 74 | 75 | When all the filters are applying to a single endpoint, the data will be taken from MySQL. 76 | Tables in MySQL are mapped into the Django model, and given that we are using the same format 77 | to store filters, the Django Queryset can be created by starting with the model that corresponds 78 | to the one defined as `main_endpoint`, and adding all the filters that have been stored for it. 79 | That is basically the logic in the method `get_queryset()`. 80 | 81 | But when there are more multiple endpoints the query needs to be executed in our Elasticsearch index. 82 | The method `get_searcher_query()` is in charge of translating the filter into a string that follows 83 | the Lucene Query Language, which is the format that Elasticsearch supports in it `q` parameter. 84 | 85 | For example with the same filter than above for accession `IPR00001` the outcome of calling `queryset_manager.get_searcher_query()` should be something like `entry_acc:IPR00001` 86 | 87 | This query is not executed yet, oly the string of the query is generated. the logic for elastic queries 88 | is explained [here](../searcher/README.md) 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![Unit and Funtional Testing](https://github.com/ProteinsWebTeam/interpro7-api/workflows/Unit%20and%20Funtional%20Testing/badge.svg)](https://github.com/ProteinsWebTeam/interpro7-api/actions?query=workflow%3A%22Unit+and+Funtional+Testing%22) 3 | [![Coverage Status](https://coveralls.io/repos/github/ProteinsWebTeam/interpro7-api/badge.svg?branch=master)](https://coveralls.io/github/ProteinsWebTeam/interpro7-api?branch=master) 4 | [![GitHub license](https://img.shields.io/badge/license-apache-blue.svg)](https://github.com/ProteinsWebTeam/interpro7-api/blob/master/LICENSE) 5 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black) 6 | 7 | ![Logo InterPro7](https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/master/webfront/static/logo_178x178.png "Logo InterPro7") 8 | 9 | ## Interpro 7 API 10 | 11 | InterPro provides functional analysis of proteins by classifying them into families and predicting domains and important sites. 12 | 13 | This is the repository for the source code running the InterPro Rest API, which is currently available at [https://www.ebi.ac.uk/interpro/api/]. 14 | 15 | This API provides the data that the new InterPro website uses. You can explore the website at [www.ebi.ac.uk/interpro]. 16 | 17 | The repository for the InterPro Website can be found at [https://github.com/ProteinsWebTeam/interpro7-client]. 18 | 19 | 20 | #### API URL Design 21 | 22 | The InterPro API can be accessed by any of its 6 endpoints: 23 | 24 | * entry 25 | * protein 26 | * structure 27 | * set 28 | * taxonomy 29 | * proteome 30 | 31 | 32 | 33 | if the URL only contains the name of the endpoint (e.g. `/structure`), the API returns an overview object with counters of the chosen entity grouped by its databases. 34 | 35 | For each endpoint the user can specify a database (e.g. `/entry/pfam`), and the API will return a list of the instance in such database. 36 | 37 | Similarly, the user can include an accession of an entity in that endpoint (e.g. `/protein/uniprot/P99999`), which will return an object with detailed metadata of such entity. 38 | 39 | The user can freely combine the endpoint blocks (e.g. `/entry/interpro/ipr000001/protein/reviewed`). The only limitation is that a block describing an endpoint can only appears once in the URL. 40 | 41 | The google doc here contains more information about the URL design of this API: [Document](https://docs.google.com/document/d/1JkZAkGI6KjZdqwJFXYlTFPna82p68vom_CojYYaTAR0/edit?usp=sharing) 42 | 43 | The `interpro7-api/docs/modifiers.md` document contains an exhaustive list of modifiers that can be used with example links. 44 | 45 | 46 | #### Dependencies 47 | 48 | InterPro7 API runs on [Python3](https://docs.python.org/3/) and uses [Django](https://www.djangoproject.com/) as its web framework, 49 | together with the [Django REST framework](http://www.django-rest-framework.org/) to implement the REST API logic. 50 | 51 | Another set of dependencies in the codebase are related to data access. Our data storage has 3 sources, a MySQL database for the metadata of all our entities, an elasticsearch instance for the links between them, and, optionally, redis to cache responses of often used requests. 52 | The python clients used to communicate with the sources are: mysqlclient, redis and django-redis. For elastic search we use regular http transactions, and therefore no client is required. 53 | 54 | The specific versions of these dependencies can be found in the file [requirements.txt](https://github.com/ProteinsWebTeam/interpro7-api/blob/master/requirements.txt). Other minor dependencies are also included in the file. 55 | 56 | An optional set of dependencies, not required to run the API, but useful for development purposes can be found in [dev_requirements.txt](https://github.com/ProteinsWebTeam/interpro7-api/blob/master/dev_requirements.txt). 57 | 58 | 59 | #### Local Installation 60 | 61 | The procedure to install this project can be seen [HERE](deploy_tools/README.md). 62 | 63 | #### Developers Documentation 64 | 65 | Some details about decisions, compromises and techniques used throughout the project can be found [HERE](./webfront/README.md) 66 | 67 | --- 68 | This project followed some of the recommendations and guidelines presented in the book: 69 | [Test-Driven Development with Python](http://www.obeythetestinggoat.com/) 70 | -------------------------------------------------------------------------------- /webfront/views/modifier_manager.py: -------------------------------------------------------------------------------- 1 | from webfront.views.custom import is_single_endpoint 2 | from webfront.constants import ModifierType 3 | 4 | 5 | class ModifierManager: 6 | def __init__(self, general_handler=None): 7 | self.general_handler = general_handler 8 | self.modifiers = {} 9 | self.payload = None 10 | self.serializer = None 11 | self.many = None 12 | self.search_size = None 13 | self.after_key = None 14 | self.before_key = None 15 | 16 | def register( 17 | self, 18 | parameter, 19 | action, 20 | type=ModifierType.FILTER, 21 | serializer=None, 22 | many=None, 23 | works_in_single_endpoint=True, 24 | works_in_multiple_endpoint=True, 25 | ): 26 | self.modifiers[parameter] = { 27 | "action": action, 28 | "type": type, 29 | "serializer": serializer, 30 | "many": many, 31 | "works_in_single_endpoint": works_in_single_endpoint, 32 | "works_in_multiple_endpoint": works_in_multiple_endpoint, 33 | } 34 | 35 | def unregister(self, parameter): 36 | if parameter in self.modifiers: 37 | del self.modifiers[parameter] 38 | 39 | def _check_modifier(self, modifier): 40 | single = is_single_endpoint(self.general_handler) 41 | if single and not self.modifiers[modifier]["works_in_single_endpoint"]: 42 | raise Exception( 43 | "The modifier '{}' doesn't work on URLs of a single endpoint".format( 44 | modifier 45 | ) 46 | ) 47 | if not single and not self.modifiers[modifier]["works_in_multiple_endpoint"]: 48 | raise Exception( 49 | "The modifier '{}' doesn't work on URLs of multiple endpoints".format( 50 | modifier 51 | ) 52 | ) 53 | 54 | def execute(self, request): 55 | payload_modifiers = {} 56 | queryset_modifiers = {} 57 | 58 | for p, m in self.modifiers.items(): 59 | if m["type"] == ModifierType.REPLACE_PAYLOAD: 60 | payload_modifiers[p] = m 61 | elif m["type"] == ModifierType.FILTER: 62 | queryset_modifiers[p] = m 63 | 64 | for modifier in queryset_modifiers: 65 | param = request.query_params.get(modifier) 66 | if param is not None: 67 | self._check_modifier(modifier) 68 | self.payload = self.modifiers[modifier]["action"]( 69 | param, self.general_handler 70 | ) 71 | self.serializer = self.modifiers[modifier]["serializer"] 72 | if self.modifiers[modifier]["many"] is not None: 73 | self.many = (self.many is not None and self.many) or self.modifiers[ 74 | modifier 75 | ]["many"] 76 | use_model_as_payload = False 77 | for modifier in payload_modifiers: 78 | param = request.query_params.get(modifier) 79 | if param is not None: 80 | self._check_modifier(modifier) 81 | self.payload = self.modifiers[modifier]["action"]( 82 | param, self.general_handler 83 | ) 84 | if self.modifiers[modifier]["many"] is not None: 85 | self.many = (self.many is not None and self.many) or self.modifiers[ 86 | modifier 87 | ]["many"] 88 | if self.serializer is None: 89 | self.serializer = self.modifiers[modifier]["serializer"] 90 | else: 91 | raise ( 92 | Exception, 93 | "only one modifier can change the shape of the payload", 94 | ) 95 | use_model_as_payload = True 96 | return use_model_as_payload 97 | 98 | def execute_extenders(self, request, current_payload): 99 | extenders = {} 100 | for p, m in self.modifiers.items(): 101 | if m["type"] == ModifierType.EXTEND_PAYLOAD: 102 | extenders[p] = m 103 | extensions = {} 104 | for extender in extenders: 105 | param = request.query_params.get(extender) 106 | if param is not None: 107 | extensions[extender] = self.modifiers[extender]["action"]( 108 | param, current_payload 109 | ) 110 | return extensions 111 | -------------------------------------------------------------------------------- /webfront/views/README.md: -------------------------------------------------------------------------------- 1 | # InterPro7 Views 2 | 3 | This folder contains the Django views inplemented for the InterPro7 API. 4 | 5 | Our strategy was to have a hierarchical structure (OOP) for all our views with 6 | a single entry point. 7 | 8 | ``` 9 | | | <- | GeneralHandler | 10 | | GenericAPIView | <- | CustomView | 11 | | | <- | *BlockHandler | 12 | ``` 13 | 14 | 15 | The [`urls.py`](../../interpro/urls.py) file is rather simple and just points 16 | every path starting with `/api` to the common view: `GeneralHandler`. 17 | 18 | ```python 19 | urlpatterns = [url(r"^api/(?P.*)$", common.GeneralHandler.as_view())] 20 | ``` 21 | 22 | ## URL structure 23 | 24 | As seen above a valid URL on the InterPro7 API should start with `/api`. 25 | From then onward, we will split the URL on the parts separated by `/`, and each of those 26 | parts will be called *block* in this document. (e.g. /api/[block1]/[block2]/...) 27 | 28 | A set of *blocks* can create an *endpoint-block*. For example`/api/protein/reviewed` defines a 29 | single *endpoint-block* formed by 2 *blocks*: `/protein` indicating that we are using the 30 | *protein* endpoint and `/reviewed` indicating the database to be filtered by. 31 | 32 | We have define a structure that allows to combine information from multiple endpoints. 33 | The first *endpoint-block* will be called *main-endpoint-block*. Any following 34 | *endpoint-block* are considered filters. 35 | In this way, the *main-endpoint-block* defines the set to return, and the rest of the endpoints 36 | filter the set. For example `/api/protein/reviewed/entry/interpro` is a list of reviewed proteins 37 | that have matches with InterPro entries; in contrast of `/api/entry/interpro/protein/reviewed`, 38 | which is a list of InterPro entries that can match reviewed proteins. 39 | 40 | 41 | ## GeneralHandler 42 | 43 | This configuration ensures that all the API requests are first managed in a single place, 44 | including all the common logic: 45 | * Defines the available endpoints 46 | * For the current request 47 | * Initializes the QuerysetManager [(Read More)](./QUERYSET_README.md). 48 | * Initializes the SearchController [(Read More)](../searcher/README.md). 49 | * Initializes the ModifierManager [(Read More)](./MODIFIER_README.md). 50 | * Splits a given URL into blocks. 51 | * If there are not blocks generates the response for the root query i.e. `/api/`. 52 | * Tries to get the response from the redis cache. 53 | * A recursion chain gets started: it invokes the `get()` method of its parent class `CustomView` 54 | and recursively finding a handler for each block. 55 | 56 | ### Cache Strategy 57 | 58 | Besides using the cache for fast responses, we use it to avoid duplication of expensive queries. 59 | When a query is executed it has 90 seconds (by default) to get a response. 60 | Otherwise the response will be a time put HTTP code `408`, which will be temporarily saved in the 61 | cache. 62 | This however won't interrupt the query, which will keep its execution in parallel. 63 | if a duplicate request arrives before the original request finishes, it will automatically get the 64 | `408` from the cache. 65 | When the original request completes, it saves the response in the cache, replacing the `408` one. 66 | This way, any future duplicate request will get the value from the cache almost instantly. 67 | 68 | 69 | ## CustomView 70 | 71 | All *block* handlers inherit from `CustomView` and have to implement their `get()` method. 72 | 73 | ### main-endpoint-block 74 | 75 | Basically the task of the `get()` method in `CustomView` is to find what is the most appropriate 76 | handler for the current block, and once it founds it invokes the `get()` method of such handler. 77 | The usual tasks of a handler and in particular of the `get()` method are: 78 | 79 | * To add more filters to the current queryset. For example in a URL `/api/entry/interpro` the 80 | handler of the `/interpro` *block* adds a filter like `source_database="interpro"`. 81 | * To define modifiers. Which is our strategy to extend the API, for example the modifier 82 | `go_term` allows to filter a set of entries, selecting those which are annotated with a given GO ID. 83 | * To define a serializer linked to this *block*. The actual serializer that a response will use, 84 | it is the one linked to the last *block* of the *main-endpoint-block*. 85 | * Finally, the `get()` method will return the result of invoking the `get()` method of its parent class. 86 | This, of course is when the recursion occurs. 87 | 88 | Once all the *blocks* of the main endpoint have been exhausted, is is time to process the filters: 89 | 90 | ### filter-endpoint-blocks 91 | 92 | The logic is very similar, but now the method to call in all the handlers is `filter()`. 93 | The `filter()` method should be defined as _static_ and should return the filtered queryset. 94 | This is then repeated for the rest of the *endpoint-blocks* 95 | 96 | After processing the filters the last call of the recursion occurs, and because there are not more blocks, 97 | we should finish the response, which implies the execution of any available modifiers, setting up 98 | the pagination in case of responses with many items, and finally serializing the built queryset. 99 | 100 | -------------------------------------------------------------------------------- /functional_tests/tests.py: -------------------------------------------------------------------------------- 1 | from functional_tests.base import FunctionalTest 2 | from selenium.webdriver.common.by import By 3 | import json 4 | import re 5 | 6 | 7 | class RESTRequestsTest(FunctionalTest): 8 | def test_request_entry_endpoint(self): 9 | self.browser.get(self.server_url + "/api/entry/?format=json") 10 | content = self.browser.find_element(By.TAG_NAME, "body").text 11 | jsonp = json.loads(content) 12 | 13 | self.assertEqual(len(jsonp["entries"]), 5, "the output has exactly 5 keys") 14 | self.assertIn('"member_databases"', content) 15 | self.assertIn('"interpro"', content) 16 | self.assertIn('"unintegrated"', content) 17 | 18 | num_interpro = jsonp["entries"]["interpro"] 19 | self.assertEqual( 20 | num_interpro, 2, "the fixtures dataset only includes two interpro entry" 21 | ) 22 | 23 | self.browser.get(self.server_url + "/api/entry/interpro?format=json") 24 | content = self.browser.find_element(By.TAG_NAME, "body").text 25 | 26 | jsonp = json.loads(content) 27 | 28 | self.assertEqual( 29 | len(jsonp["results"]), 30 | num_interpro, 31 | "The response should have as many entries as reported in /entry ", 32 | ) 33 | 34 | acc = jsonp["results"][0]["metadata"]["accession"] 35 | self.assertEqual(acc, "IPR001165") 36 | self.browser.get( 37 | self.server_url + "/api/entry/interpro/" + acc + "?format=json" 38 | ) 39 | content = self.browser.find_element(By.TAG_NAME, "body").text 40 | 41 | jsonp = json.loads(content) 42 | self.assertEqual( 43 | acc, 44 | jsonp["metadata"]["accession"], 45 | "The accession in the response object should be the same as reequested", 46 | ) 47 | self.assertIn( 48 | "metadata", 49 | jsonp.keys(), 50 | "'metadata' should be one of the keys in the response", 51 | ) 52 | self.assertTrue( 53 | isinstance(jsonp["metadata"]["go_terms"], list), "go_terms should be a list" 54 | ) 55 | 56 | self.assertEqual(jsonp["metadata"]["counters"]["proteins"], 1) 57 | 58 | def test_request_protein_endpoint(self): 59 | self.browser.get(self.server_url + "/api/protein/?format=json") 60 | content = self.browser.find_element(By.TAG_NAME, "body").text 61 | 62 | jsonp = json.loads(content) 63 | 64 | self.assertEqual(len(jsonp["proteins"]), 3, "the output has exactly 3 keys") 65 | 66 | self.assertIn('"uniprot"', content) 67 | 68 | num_uniprot = jsonp["proteins"]["uniprot"] 69 | self.assertEqual( 70 | num_uniprot, 5, "the TEST dataset only includes 5 uniprot entries" 71 | ) 72 | 73 | self.browser.get(self.server_url + "/api/protein/uniprot?format=json") 74 | content = self.browser.find_element(By.TAG_NAME, "body").text 75 | 76 | jsonp = json.loads(content) 77 | 78 | self.assertEqual( 79 | len(jsonp["results"]), 80 | num_uniprot, 81 | "The response should have as many entries as reported in /entry ", 82 | ) 83 | acc = jsonp["results"][0]["metadata"]["accession"] 84 | 85 | self.browser.get( 86 | self.server_url + "/api/protein/uniprot/" + acc + "?format=json" 87 | ) 88 | content = self.browser.find_element(By.TAG_NAME, "body").text 89 | 90 | jsonp = json.loads(content) 91 | self.assertEqual( 92 | acc, 93 | jsonp["metadata"]["accession"], 94 | "The accession in the response object should be the same as reequested", 95 | ) 96 | self.assertIn( 97 | "category", 98 | jsonp["metadata"]["go_terms"][0], 99 | "the key is part of the go_terms and has been parsed OK", 100 | ) 101 | 102 | self.browser.get( 103 | self.server_url 104 | + "/api/protein/uniprot/" 105 | + jsonp["metadata"]["id"] 106 | + "?format=json" 107 | ) 108 | content2 = self.browser.find_element(By.TAG_NAME, "body").text 109 | 110 | jsonp2 = json.loads(content2) 111 | self.assertEqual( 112 | jsonp, 113 | jsonp2, 114 | "The recovered JSON object when quierying by accession should be the same than the " 115 | "correspondent search by ID", 116 | ) 117 | 118 | def test_request_to_api_frontend(self): 119 | url = "/api/entry/" 120 | self.browser.get(self.server_url + url) 121 | 122 | req_info = self.browser.find_element(By.CSS_SELECTOR, ".request-info").text 123 | 124 | self.assertIn("GET", req_info) 125 | self.assertIn(url, req_info) 126 | 127 | response = self.browser.find_element(By.CSS_SELECTOR, ".response-info").text 128 | match = re.search("[\{\[]", response) 129 | json_frontend = json.loads(response[match.start() :]) 130 | 131 | self.browser.find_element(By.CSS_SELECTOR, ".format-selection button").click() 132 | self.click_link_and_wait( 133 | self.browser.find_element(By.CSS_SELECTOR, ".js-tooltip.format-option") 134 | ) 135 | 136 | content = self.browser.find_element(By.TAG_NAME, "body").text 137 | 138 | jsonp = json.loads(content) 139 | 140 | self.assertEqual(json_frontend, jsonp) 141 | -------------------------------------------------------------------------------- /webfront/views/proteome.py: -------------------------------------------------------------------------------- 1 | from webfront.models import Proteome 2 | from webfront.serializers.proteome import ProteomeSerializer 3 | from webfront.views.custom import CustomView, SerializerDetail 4 | from webfront.views.modifiers import ( 5 | group_by, 6 | add_extra_fields, 7 | filter_by_boolean_field, 8 | show_subset, 9 | ) 10 | from webfront.constants import ModifierType 11 | 12 | 13 | class ProteomeAccessionHandler(CustomView): 14 | level_description = "Proteome accession level" 15 | serializer_class = ProteomeSerializer 16 | queryset = Proteome.objects.all() 17 | many = False 18 | serializer_detail_filter = SerializerDetail.PROTEOME_DETAIL 19 | 20 | def get( 21 | self, 22 | request, 23 | endpoint_levels, 24 | available_endpoint_handlers=None, 25 | level=0, 26 | parent_queryset=None, 27 | handler=None, 28 | general_handler=None, 29 | *args, 30 | **kwargs 31 | ): 32 | general_handler.queryset_manager.add_filter( 33 | "proteome", accession__iexact=endpoint_levels[level - 1] 34 | ) 35 | # self.serializer_detail = SerializerDetail.PROTEIN_DETAIL 36 | return super(ProteomeAccessionHandler, self).get( 37 | request._request, 38 | endpoint_levels, 39 | available_endpoint_handlers, 40 | level, 41 | self.queryset, 42 | handler, 43 | general_handler, 44 | request, 45 | *args, 46 | **kwargs 47 | ) 48 | 49 | @staticmethod 50 | def filter(queryset, level_name="", general_handler=None): 51 | general_handler.queryset_manager.add_filter( 52 | "proteome", accession__iexact=level_name 53 | ) 54 | return queryset 55 | 56 | 57 | class UniprotHandler(CustomView): 58 | level_description = "proteome db level" 59 | child_handlers = [(r"UP\d{9}", ProteomeAccessionHandler)] 60 | queryset = Proteome.objects.all() 61 | serializer_class = ProteomeSerializer 62 | serializer_detail_filter = SerializerDetail.PROTEOME_DB 63 | 64 | def get( 65 | self, 66 | request, 67 | endpoint_levels, 68 | available_endpoint_handlers=None, 69 | level=0, 70 | parent_queryset=None, 71 | handler=None, 72 | general_handler=None, 73 | *args, 74 | **kwargs 75 | ): 76 | self.serializer_detail = SerializerDetail.PROTEOME_HEADERS 77 | general_handler.queryset_manager.add_filter("proteome", accession__isnull=False) 78 | general_handler.queryset_manager.order_by("-num_proteins") 79 | general_handler.modifiers.register( 80 | "extra_fields", add_extra_fields(Proteome, "counters") 81 | ) 82 | general_handler.modifiers.register("show-subset", show_subset) 83 | 84 | return super(UniprotHandler, self).get( 85 | request._request, 86 | endpoint_levels, 87 | available_endpoint_handlers, 88 | level, 89 | self.queryset, 90 | handler, 91 | general_handler, 92 | request, 93 | *args, 94 | **kwargs 95 | ) 96 | 97 | @staticmethod 98 | def filter(queryset, level_name="", general_handler=None): 99 | general_handler.queryset_manager.add_filter("proteome", accession__isnull=False) 100 | return queryset 101 | 102 | 103 | class ProteomeHandler(CustomView): 104 | level_description = "Proteome level" 105 | from_model = False 106 | child_handlers = [("uniprot", UniprotHandler)] 107 | many = False 108 | serializer_class = ProteomeSerializer 109 | serializer_detail = SerializerDetail.PROTEOME_OVERVIEW 110 | serializer_detail_filter = SerializerDetail.PROTEOME_OVERVIEW 111 | 112 | def get_database_contributions(self, queryset): 113 | qs = Proteome.objects.filter(accession__in=queryset) 114 | return {"proteomes": {"uniprot": qs.count()}} 115 | 116 | def get( 117 | self, 118 | request, 119 | endpoint_levels, 120 | available_endpoint_handlers=None, 121 | level=0, 122 | parent_queryset=None, 123 | handler=None, 124 | general_handler=None, 125 | *args, 126 | **kwargs 127 | ): 128 | general_handler.modifiers.register( 129 | "group_by", 130 | group_by(Proteome, {"proteome_is_reference": "proteome_acc"}), 131 | type=ModifierType.REPLACE_PAYLOAD, 132 | many=False, 133 | serializer=SerializerDetail.GROUP_BY, 134 | ) 135 | general_handler.modifiers.register( 136 | "is_reference", filter_by_boolean_field("proteome", "is_reference") 137 | ) 138 | general_handler.queryset_manager.reset_filters("proteome", endpoint_levels) 139 | 140 | return super(ProteomeHandler, self).get( 141 | request._request, 142 | endpoint_levels, 143 | available_endpoint_handlers, 144 | level, 145 | self.queryset, 146 | handler, 147 | general_handler, 148 | request, 149 | *args, 150 | **kwargs 151 | ) 152 | 153 | @staticmethod 154 | def filter(queryset, level_name="", general_handler=None): 155 | general_handler.queryset_manager.add_filter("proteome", accession__isnull=False) 156 | return queryset 157 | -------------------------------------------------------------------------------- /webfront/views/cache.py: -------------------------------------------------------------------------------- 1 | import re 2 | from urllib.parse import urlparse, urlunparse, parse_qs, urlencode 3 | from collections import OrderedDict 4 | 5 | from django.core.cache import cache 6 | 7 | from rest_framework import status 8 | from webfront.response import Response 9 | from webfront.views.utils import endpoints 10 | 11 | from django.conf import settings 12 | 13 | multiple_slashes = re.compile("/+") 14 | 15 | 16 | FIVE_DAYS = 5 * 24 * 60 * 60 17 | SHOULD_NO_CACHE = -1 18 | 19 | names = [ep["name"].lower() for ep in endpoints] 20 | 21 | short_life_parameters = [ 22 | "cursor", 23 | "size", 24 | "go_terms", 25 | "ida_ignore", 26 | "ida_search", 27 | "format", 28 | "page_size", 29 | "search", 30 | ] 31 | 32 | no_cache_modifiers = [ 33 | "extra_features", 34 | "residues", 35 | "isoforms", 36 | "ida", 37 | "taxa", 38 | "model:", 39 | "annotation:info", 40 | "subfamilies", 41 | "subfamily", 42 | "page_size", 43 | "interpro_n", 44 | ] 45 | 46 | 47 | def get_timeout_from_path(path, endpoint_levels): 48 | parsed = urlparse(path) 49 | # process query 50 | query = parse_qs(parsed.query, keep_blank_values=True) 51 | 52 | if ( # is requesting by accession 53 | len(endpoint_levels) == 3 54 | and len([ep for ep in endpoint_levels if ep.lower() in names]) == 1 55 | ): 56 | # it doesn't have modifiers 57 | if len(query.keys()) == 0: 58 | return SHOULD_NO_CACHE 59 | for parameter in no_cache_modifiers: 60 | if parameter in query: 61 | return SHOULD_NO_CACHE 62 | 63 | # order querystring, lowercase keys 64 | query = OrderedDict( 65 | sorted(((key.lower(), sorted(value)) for key, value in query.items())) 66 | ) 67 | page = query.get("page") 68 | if page is not None and len(page) > 0: 69 | try: 70 | page = int(page[0]) 71 | if page > 1: 72 | return FIVE_DAYS 73 | except Exception: 74 | return SHOULD_NO_CACHE 75 | for parameter in short_life_parameters: 76 | if parameter in query: 77 | return FIVE_DAYS 78 | return None 79 | 80 | 81 | def canonical(url, remove_all_page_size=False): 82 | parsed = urlparse(url) 83 | # process query 84 | query = parse_qs(parsed.query, keep_blank_values=True) 85 | # order querystring, lowercase keys 86 | query = OrderedDict( 87 | sorted(((key.lower(), sorted(value)) for key, value in query.items())) 88 | ) 89 | # handle page_size 90 | if remove_all_page_size or query.get("page_size") == [ 91 | str(settings.INTERPRO_CONFIG.get("default_page_size", 20)) 92 | ]: 93 | query.pop("page_size", None) 94 | # handle page 95 | if query.get("page") == ["1"]: 96 | query.pop("page", None) 97 | # generate new canonical ParseResult 98 | canonical_parsed = parsed._replace( 99 | path=multiple_slashes.sub("/", parsed.path + "/"), 100 | query=urlencode(query, doseq=True), 101 | ) 102 | # stringify and return 103 | return urlunparse(canonical_parsed) 104 | 105 | 106 | class InterProCache: 107 | def set(self, key, response, timeout=None): 108 | if settings.INTERPRO_CONFIG.get( 109 | "enable_caching", False 110 | ) and settings.INTERPRO_CONFIG.get("enable_cache_write", False): 111 | if response.data and ( 112 | response.status_code == status.HTTP_200_OK 113 | or response.status_code == status.HTTP_204_NO_CONTENT 114 | or response.status_code == status.HTTP_410_GONE 115 | or response.status_code == status.HTTP_408_REQUEST_TIMEOUT 116 | ): 117 | key = canonical(key) 118 | value = { 119 | "data": {x: response.data[x] for x in response.data}, 120 | "status": response.status_code, 121 | "template_name": response.template_name, 122 | "exception": response.exception, 123 | "content_type": response.content_type, 124 | "headers": { 125 | "Content-Type": response.get("Content-Type", ""), 126 | "InterPro-Version": response.get("InterPro-Version", ""), 127 | "InterPro-Version-Minor": response.get( 128 | "InterPro-Version-Minor", "" 129 | ), 130 | "Server-Timing": response.get("Server-Timing", ""), 131 | "Cached": "true", 132 | }, 133 | } 134 | if timeout == None: 135 | cache.set(key, value) 136 | cache.persist(key) 137 | else: 138 | cache.add(key, value, timeout=timeout) 139 | 140 | def get(self, key): 141 | if settings.INTERPRO_CONFIG.get("enable_caching", False): 142 | key = canonical(key) 143 | value = cache.get(key) 144 | if value: 145 | value = Response( 146 | value.get("data"), 147 | value.get("status", 200), 148 | value.get("template_name"), 149 | value.get("headers", {}), 150 | value.get("exception", False), 151 | value.get("content_type"), 152 | ) 153 | return value 154 | -------------------------------------------------------------------------------- /webfront/pagination.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from django.core.paginator import Paginator as DjangoPaginator 3 | 4 | from webfront.response import Response 5 | from rest_framework.pagination import PageNumberPagination, CursorPagination 6 | from rest_framework.utils.urls import replace_query_param, remove_query_param 7 | from django.conf import settings 8 | 9 | from django.http import QueryDict 10 | 11 | 12 | def replace_url_host(url): 13 | host = settings.INTERPRO_CONFIG.get("api_url", "http://localhost:8007/api/") 14 | divider = "/wwwapi/" if "/wwwapi/" in url else "/api/" 15 | return host + url.split(divider)[1] 16 | 17 | 18 | class CustomPaginator(DjangoPaginator): 19 | def __init__(self, object_list, per_page, orphans=0, allow_empty_first_page=True): 20 | if not object_list.ordered: 21 | object_list = object_list.order_by("accession") 22 | super(CustomPaginator, self).__init__( 23 | object_list, per_page, orphans, allow_empty_first_page 24 | ) 25 | 26 | 27 | class CustomPagination(CursorPagination): 28 | page_size = settings.INTERPRO_CONFIG.get("default_page_size", 20) 29 | page_size_query_param = "page_size" 30 | ordering = "accession" 31 | current_size = None 32 | after_key = None 33 | before_key = None 34 | elastic_result = None 35 | 36 | def get_paginated_response(self, data): 37 | base = [ 38 | ("count", self.current_size), 39 | ("next", self.get_next_link()), 40 | ("previous", self.get_previous_link()), 41 | ("results", self._sortBasedOnElastic(data["data"])), 42 | ] 43 | if "extensions" in data and len(data["extensions"]) > 0: 44 | for ext in data["extensions"]: 45 | base.append((ext, data["extensions"][ext])) 46 | return Response(OrderedDict(base)) 47 | 48 | # If there is data in elastic_result, implies that the wueryset was created by querying elastic first. 49 | # This method uses the list of accession retrieved via elastic to order the results. 50 | def _sortBasedOnElastic(self, data): 51 | if self.elastic_result is None: 52 | return data 53 | ordered_data = [] 54 | for acc in self.elastic_result: 55 | obj = next( 56 | filter( 57 | lambda item: item.get("metadata", {}).get("accession", "").lower() 58 | == acc.lower(), 59 | data, 60 | ), 61 | None, 62 | ) 63 | if obj is not None: 64 | ordered_data.append(obj) 65 | return ordered_data 66 | 67 | def _get_position_from_instance(self, instance, ordering): 68 | if type(instance) == tuple: 69 | return instance[0] 70 | return super(CustomPagination, self)._get_position_from_instance( 71 | instance, ordering 72 | ) 73 | 74 | # Extract some values passed as kwargs before invoking the implementation in the super class 75 | def paginate_queryset(self, queryset, request, **kwargs): 76 | self.current_size = None 77 | self.after_key = None 78 | self.elastic_result = None 79 | if ( 80 | hasattr(queryset, "model") 81 | and queryset.model._meta.ordering != [] 82 | and queryset.model._meta.ordering != "" 83 | and queryset.model._meta.ordering is not None 84 | ): 85 | self.ordering = queryset.model._meta.ordering 86 | if "search_size" in kwargs and kwargs["search_size"] is not None: 87 | if not queryset.ordered: 88 | queryset = queryset.order_by("accession") 89 | self.current_size = kwargs["search_size"] 90 | if "after_key" in kwargs and kwargs["after_key"] is not None: 91 | self.after_key = kwargs["after_key"] 92 | if "before_key" in kwargs and kwargs["before_key"] is not None: 93 | self.before_key = kwargs["before_key"] 94 | if "elastic_result" in kwargs and kwargs["elastic_result"] is not None: 95 | self.elastic_result = kwargs["elastic_result"] 96 | if "ordering" in kwargs and kwargs["ordering"] is not None: 97 | self.ordering = kwargs["ordering"] 98 | return super(CustomPagination, self).paginate_queryset( 99 | queryset, request, kwargs["view"] 100 | ) 101 | 102 | def decode_cursor(self, request): 103 | if self.after_key is not None or self.before_key is not None: 104 | return None 105 | return super(CustomPagination, self).decode_cursor(request) 106 | 107 | def has_next_page(self): 108 | if self.after_key is None: 109 | return self.has_next 110 | return True 111 | 112 | def has_prev_page(self): 113 | if self.before_key is None: 114 | return self.has_previous 115 | return True 116 | 117 | def get_next_link(self): 118 | if not self.has_next_page(): 119 | return None 120 | self.base_url = replace_url_host(self.base_url) 121 | if self.after_key is None: 122 | return super(CustomPagination, self).get_next_link() 123 | return replace_query_param(self.base_url, "cursor", self.after_key) 124 | 125 | def get_previous_link(self): 126 | if not self.has_prev_page(): 127 | return None 128 | self.base_url = replace_url_host(self.base_url) 129 | if self.before_key is None: 130 | return super(CustomPagination, self).get_previous_link() 131 | return replace_query_param( 132 | self.base_url, "cursor", "-{}".format(self.before_key) 133 | ) 134 | -------------------------------------------------------------------------------- /webfront/views/set.py: -------------------------------------------------------------------------------- 1 | from django.db.models import Count 2 | 3 | from webfront.models import Set 4 | from webfront.serializers.collection import SetSerializer 5 | from webfront.views.custom import CustomView, SerializerDetail 6 | from django.conf import settings 7 | 8 | from webfront.views.modifiers import ( 9 | add_extra_fields, 10 | get_deprecated_response, 11 | sort_by, 12 | show_subset, 13 | ) 14 | 15 | entry_sets = "|".join(settings.ENTRY_SETS) + "|all" 16 | entry_sets_accessions = r"^({})$".format( 17 | "|".join((set["accession"] for (_, set) in settings.ENTRY_SETS.items())) 18 | ) 19 | 20 | 21 | class SetAccessionHandler(CustomView): 22 | level_description = "Set accession level" 23 | serializer_class = SetSerializer 24 | queryset = Set.objects.all() 25 | many = False 26 | serializer_detail_filter = SerializerDetail.SET_DETAIL 27 | 28 | def get( 29 | self, 30 | request, 31 | endpoint_levels, 32 | available_endpoint_handlers=None, 33 | level=0, 34 | parent_queryset=None, 35 | handler=None, 36 | general_handler=None, 37 | *args, 38 | **kwargs 39 | ): 40 | general_handler.queryset_manager.add_filter( 41 | "set", accession__iexact=endpoint_levels[level - 1].lower() 42 | ) 43 | general_handler.modifiers.register( 44 | "alignments", 45 | get_deprecated_response( 46 | "Profile-profile alignments have been permanently removed in InterPro 97.0." 47 | ), 48 | ) 49 | 50 | return super(SetAccessionHandler, self).get( 51 | request._request, 52 | endpoint_levels, 53 | available_endpoint_handlers, 54 | level, 55 | self.queryset, 56 | handler, 57 | general_handler, 58 | request, 59 | *args, 60 | **kwargs 61 | ) 62 | 63 | @staticmethod 64 | def filter(queryset, level_name="", general_handler=None): 65 | general_handler.queryset_manager.add_filter( 66 | "set", accession__iexact=level_name.lower() 67 | ) 68 | return queryset 69 | 70 | 71 | class SetTypeHandler(CustomView): 72 | level_description = "set type level" 73 | child_handlers = [ 74 | (entry_sets_accessions, SetAccessionHandler), 75 | # ("proteome", ProteomeHandler), 76 | ] 77 | queryset = Set.objects.all() 78 | serializer_class = SetSerializer 79 | serializer_detail = SerializerDetail.SET_HEADERS 80 | serializer_detail_filter = SerializerDetail.SET_DB 81 | 82 | def get( 83 | self, 84 | request, 85 | endpoint_levels, 86 | available_endpoint_handlers=None, 87 | level=0, 88 | parent_queryset=None, 89 | handler=None, 90 | general_handler=None, 91 | *args, 92 | **kwargs 93 | ): 94 | db = endpoint_levels[level - 1] 95 | if db.lower() != "all": 96 | general_handler.queryset_manager.add_filter("set", source_database=db) 97 | general_handler.modifiers.register( 98 | "extra_fields", add_extra_fields(Set, "counters") 99 | ) 100 | general_handler.modifiers.register("show-subset", show_subset) 101 | return super(SetTypeHandler, self).get( 102 | request._request, 103 | endpoint_levels, 104 | available_endpoint_handlers, 105 | level, 106 | self.queryset, 107 | handler, 108 | general_handler, 109 | request, 110 | *args, 111 | **kwargs 112 | ) 113 | 114 | @staticmethod 115 | def filter(queryset, level_name="", general_handler=None): 116 | if level_name.lower() != "all": 117 | general_handler.queryset_manager.add_filter( 118 | "set", source_database=level_name 119 | ) 120 | return queryset 121 | 122 | 123 | class SetHandler(CustomView): 124 | level_description = "Set level" 125 | from_model = False 126 | child_handlers = [ 127 | (entry_sets, SetTypeHandler), 128 | # ("proteome", ProteomeHandler), 129 | ] 130 | many = False 131 | serializer_class = SetSerializer 132 | serializer_detail = SerializerDetail.SET_OVERVIEW 133 | serializer_detail_filter = SerializerDetail.SET_OVERVIEW 134 | 135 | def get_database_contributions(self, queryset): 136 | qs = Set.objects.filter(accession__in=queryset) 137 | set_counter = qs.values_list("source_database").annotate( 138 | total=Count("source_database") 139 | ) 140 | output = {c[0]: c[1] for c in set_counter if c[0] != "node"} 141 | output["all"] = sum(output.values()) 142 | return {"sets": output} 143 | 144 | def get( 145 | self, 146 | request, 147 | endpoint_levels, 148 | available_endpoint_handlers=None, 149 | level=0, 150 | parent_queryset=None, 151 | handler=None, 152 | general_handler=None, 153 | *args, 154 | **kwargs 155 | ): 156 | general_handler.queryset_manager.reset_filters("set", endpoint_levels) 157 | general_handler.queryset_manager.add_filter("set", accession__isnull=False) 158 | general_handler.modifiers.register( 159 | "sort_by", 160 | sort_by({"accession": "set_acc"}), 161 | ) 162 | 163 | return super(SetHandler, self).get( 164 | request._request, 165 | endpoint_levels, 166 | available_endpoint_handlers, 167 | level, 168 | self.queryset, 169 | handler, 170 | general_handler, 171 | request, 172 | *args, 173 | **kwargs 174 | ) 175 | 176 | @staticmethod 177 | def filter(queryset, level_name="", general_handler=None): 178 | general_handler.queryset_manager.add_filter("set", accession__isnull=False) 179 | return queryset 180 | -------------------------------------------------------------------------------- /deploy_tools/README.md: -------------------------------------------------------------------------------- 1 | Provisioning a new site 2 | ======================= 3 | 4 | ## Required packages: 5 | 6 | * Python 3 7 | * Git 8 | * pip 9 | 10 | eg, on Ubuntu: 11 | 12 | sudo apt-get install git python3 python3-pip 13 | 14 | ## Folder structure: 15 | Assume we have a user account at /home/username 16 | 17 | ``` 18 | /home/username 19 | └── sites 20 | └── PROJECT 21 | ├── database 22 | ├── source 23 | ├── static_files 24 | └── virtualenv 25 | ``` 26 | 27 | ## Local Deployment 28 | 29 | 1. Create directory structure in ~/sites 30 | 31 | ```bash 32 | mkdir -p PROJECT/database 33 | mkdir -p PROJECT/source 34 | mkdir -p PROJECT/static_files 35 | mkdir -p PROJECT/virtualenv 36 | ``` 37 | 38 | 2. Pull down source code into folder named source 39 | 40 | ```bash 41 | git clone https://github.com/ProteinsWebTeam/interpro7-api.git PROJECT/source 42 | cd PROJECT/source 43 | ``` 44 | 45 | From now on all the command assume you are in the ```PROJECT/source``` directory. 46 | 47 | 3. Start the virtual env in the assigned folder: 48 | 49 | ```bash 50 | python -m venv virtualenv 51 | ``` 52 | 53 | 4. Install requirements in the virtual environment 54 | 55 | ```bash 56 | ../virtualenv/bin/pip install -r requirements.txt 57 | ``` 58 | 59 | 5. Install requirements for development 60 | 61 | ```bash 62 | ../virtualenv/bin/pip install -r dev_requirements.txt 63 | ``` 64 | 65 | 5. Create a local configuration file in `config/interpro.local.yml`. 66 | In this file you can overwite any of the settings included in the read-only file `config/interpro.yml`. 67 | Below is an example of the local config that will run in debug mode using the test DB with SQLite, a local instance of elasticsearch without redis: 68 | ```yaml 69 | use_test_db: false 70 | debug: true 71 | allowed_host: ["localhost", "127.0.0.1"] 72 | searcher_path: "https://localhost:9200" 73 | searcher_index: "test" 74 | searcher_user: "elastic" 75 | searcher_password: "password" 76 | api_url: "http://localhost:8007/api/" 77 | static_url: "api/static_files/" 78 | searcher_test_path: "https://localhost:9200" 79 | searcher_test_password: "password" 80 | 81 | ``` 82 | 83 | * This configuration assumes a running instance of elasticsearch in port 9200. For details on how to install elasticsearch go 84 | [HERE](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html) 85 | 86 | 6. Migrate the database models (For SQLite) 87 | 88 | ```bash 89 | ../virtualenv/bin/python manage.py migrate 90 | ``` 91 | 92 | If anything goes wrong check that the database directory exists 93 | 94 | ```bash 95 | ls ../database 96 | ``` 97 | 98 | 7. Collect the static files. Only necessary for server deployment. 99 | 100 | ```bash 101 | ../virtualenv/bin/python manage.py collectstatic --noinput 102 | ``` 103 | 104 | 8. Load the fixture data into the SQLite DB. 105 | ```bash 106 | ../virtualenv/bin/python manage.py loaddata webfront/tests/fixtures_*.json 107 | 108 | 9. Install Elasticsearch and load index. Currently running version 8.12 with authentication by password 109 | 110 | e.g for OSX: brew install elasticsearch@8.12 111 | ``` 112 | curl -XPUT 'localhost:9200/test?pretty' -H 'Content-Type: application/json' -d @config/elastic_mapping.json 113 | ``` 114 | 115 | 10. Run the tests. When running the tests, the API loads the fixtures in the existing elasticsearch instance, which is necessary to run the server with fixtures. 116 | ``` 117 | ../virtualenv/bin/python manage.py test 118 | ``` 119 | 120 | 11. Start the server 121 | ``` 122 | ../virtualenv/bin/python manage.py runserver 0.0.0.0:8000 123 | ``` 124 | 125 | 12. _[Optional]_ Install precommit, black and the pre-commit hook, to enable the formatting of files before each commit. 126 | ``` 127 | ../virtualenv/bin/pip install pre-commit black 128 | ../virtualenv/bin/pre-commit install 129 | ``` 130 | *Note 1*:It is important to run the test in Python 3.8 because the VMs where the API runs use that version. 131 | 132 | 133 | 134 | ## Testing 135 | 136 | The unit tests are located in `[project]/source/webfront/tests/`. 137 | 138 | To run unit tests use 139 | 140 | ```sh 141 | ../virtualenv/bin/python manage.py test webfront 142 | ``` 143 | 144 | The functional test are in `[project]/functional_tests` and are configured to Google Chrome (or Chromium), so you need to have it installed in your machine. 145 | 146 | To run functional tests use 147 | 148 | ```sh 149 | export BROWSER_TEST="chrome" 150 | 151 | # Only required if ChromeDriver is not in your PATH 152 | # or if its binary is not `chromedriver` (e.g. `chromium.chromedriver`) 153 | export BROWSER_TEST_PATH="/path/to/chromedriver" 154 | 155 | ../virtualenv/bin/python manage.py test functional_tests 156 | ``` 157 | 158 | As a reference [HERE](https://docs.google.com/presentation/d/13_a6IbTq8KPGRH5AhsauEDJt4jEXNsT7DFdg1PNn4_I/edit?usp=sharing) is a graphic describing the fixtures. 159 | 160 | All the test can be run at the same time: 161 | 162 | ```sh 163 | ../virtualenv/bin/python manage.py test 164 | ``` 165 | 166 | ## Setting up real data (MySQL - elasticsearch) 167 | 168 | For the next steps you need an installation of MySQL with a database compatible with the defined [model](https://github.com/ProteinsWebTeam/interpro7-api/blob/master/webfront/models/interpro_new.py). 169 | 170 | 1. Remove the line `use_test_db: true` from the `config/interpro.local.yml` file. 171 | You could also set the value to false, but given that false is the default value, you can just remove it. 172 | 173 | 2. Edit the same `config/interpro.local.yml` file, changing the `searcher_path` setting for one with the elastic search instance that corresponds with the data in MySQL. 174 | 175 | 3. Copy the template mysql configuration file into `config/mysql.yml` and edit the file with your data. 176 | ```bash 177 | cp config/mysql.template.yml config/mysql.yml 178 | ``` 179 | 180 | 3. Start the server 181 | ``` 182 | ../virtualenv/bin/python manage.py runserver 0.0.0.0:8000 183 | ``` 184 | -------------------------------------------------------------------------------- /webfront/tests/tests_utils.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | from webfront.views.common import map_url_to_levels 4 | from webfront.views.cache import ( 5 | canonical, 6 | get_timeout_from_path, 7 | SHOULD_NO_CACHE, 8 | FIVE_DAYS, 9 | ) 10 | from webfront.serializers.content_serializers import reverse_url 11 | 12 | 13 | class CanonicalTestCase(TestCase): 14 | def test_basic_unchanged_urls(self): 15 | url = "/api/entry/InterPro/IPR000001/" 16 | self.assertEqual(url, canonical(url)) 17 | url = "/api/PrOtEiN/ReViEwEd/" 18 | self.assertEqual(url, canonical(url)) 19 | 20 | def test_with_query_unchanged_urls(self): 21 | url = "/api/protein/reviewed/?length=1-100" 22 | self.assertEqual(url, canonical(url)) 23 | url = "/api/protein/reviewed/?length=1-100&page=2" 24 | self.assertEqual(url, canonical(url)) 25 | url = "/api/protein/reviewed/?length=1-100&page=2&page_size=50" 26 | self.assertEqual(url, canonical(url)) 27 | 28 | def test_basic_incorrect_slash_urls(self): 29 | self.assertEqual( 30 | "/api/entry/InterPro/IPR000001/", 31 | canonical("/api/entry//InterPro/IPR000001/"), 32 | ) 33 | self.assertEqual( 34 | "/api/entry/InterPro/IPR000001/", 35 | canonical("/api///entry//////InterPro//IPR000001/"), 36 | ) 37 | self.assertEqual( 38 | "/api/entry/InterPro/IPR000001/", 39 | canonical("/api///entry//////InterPro//IPR000001"), 40 | ) 41 | self.assertEqual( 42 | "/api/entry/InterPro/IPR000001/", canonical("/api/entry/InterPro/IPR000001") 43 | ) 44 | 45 | def test_with_query_reorder_urls(self): 46 | self.assertEqual( 47 | "/api/entry/InterPro/?integrated=pfam&page=2", 48 | canonical("/api/entry/InterPro/?page=2&integrated=pfam"), 49 | ) 50 | 51 | def with_query_remove_unneeded_urls(self): 52 | self.assertEqual( 53 | "/api/entry/InterPro/?integrated=pfam", 54 | canonical("/api/entry/InterPro/?page=1&integrated=pfam"), 55 | ) 56 | self.assertEqual( 57 | "/api/entry/InterPro/", 58 | canonical("/api/entry/InterPro/?page=1&page_size=20"), 59 | ) 60 | self.assertEqual( 61 | "/api/entry/InterPro/?integrated=pfam", 62 | canonical("/api/entry/InterPro/?integrated=pfam&page_size=20"), 63 | ) 64 | 65 | 66 | class CacheLifespanTestCase(TestCase): 67 | def test_urls_no_cacheable(self): 68 | urls = [ 69 | "/entry/InterPro/IPR000001/", 70 | "/protein/uniprot/p99999/?extra_features", 71 | "/entry/InterPro/?page", 72 | ] 73 | for url in urls: 74 | levels = map_url_to_levels(url.split("?")[0]) 75 | self.assertEqual(SHOULD_NO_CACHE, get_timeout_from_path(url, levels)) 76 | 77 | def test_urls_short_life(self): 78 | urls = [ 79 | "/entry/InterPro/?page=33", 80 | "/entry/InterPro/?page_size=33", 81 | "/entry/InterPro/?format", 82 | ] 83 | for url in urls: 84 | levels = map_url_to_levels(url.split("?")[0]) 85 | self.assertEqual(FIVE_DAYS, get_timeout_from_path(url, levels)) 86 | 87 | def test_urls_long_life(self): 88 | urls = [ 89 | "/entry/", 90 | "/entry/InterPro/", 91 | "/entry/InterPro/protein", 92 | "/entry/InterPro/IPR000001/protein", 93 | "/protein/uniprot/p99999/?conservation", 94 | ] 95 | for url in urls: 96 | levels = map_url_to_levels(url.split("?")[0]) 97 | self.assertIsNone(get_timeout_from_path(url, levels)) 98 | 99 | 100 | class TestReverseURL(TestCase): 101 | def test_reverse_to_entry(self): 102 | urls = [ 103 | [ 104 | "/protein/uniprot/p99999/entry/InterPro/", 105 | "/entry/InterPro/protein/uniprot/p99999/", 106 | ], 107 | [ # Modifiers are removed 108 | "/protein/uniprot/p99999/entry/InterPro/?some-modifier", 109 | "/entry/InterPro/protein/uniprot/p99999/", 110 | ], 111 | [ # 3 endpoints -endpoint 112 | "/protein/uniprot/p99999/entry/InterPro/structure", 113 | "/entry/InterPro/protein/uniprot/p99999/structure/", 114 | ], 115 | [ # 3 endpoints - db 116 | "/protein/uniprot/p99999/entry/InterPro/structure/pdb", 117 | "/entry/InterPro/protein/uniprot/p99999/structure/pdb", 118 | ], 119 | [ # 3 endpoints - accession 120 | "/protein/uniprot/p99999/entry/InterPro/structure/pdb/1cuk", 121 | "/entry/InterPro/protein/uniprot/p99999/structure/pdb/1cuk", 122 | ], 123 | ] 124 | for url in urls: 125 | self.assertEqual(reverse_url(url[0], "entry", "p99999"), url[1]) 126 | 127 | def test_reverse_to_protein(self): 128 | urls = [ 129 | [ 130 | "/entry/InterPro/ipr000001/protein/uniprot/", 131 | "/protein/uniprot/entry/InterPro/ipr000001/", 132 | ], 133 | [ # Modifiers are removed 134 | "/entry/InterPro/ipr000001/protein/uniprot/?some-modifier", 135 | "/protein/uniprot/entry/InterPro/ipr000001/", 136 | ], 137 | [ # 3 endpoints -endpoint 138 | "/entry/InterPro/ipr000001/protein/uniprot/structure", 139 | "/protein/uniprot/entry/InterPro/ipr000001/structure/", 140 | ], 141 | [ # 3 endpoints - db 142 | "/entry/InterPro/ipr000001/protein/uniprot/structure/pdb", 143 | "/protein/uniprot/entry/InterPro/ipr000001/structure/pdb", 144 | ], 145 | [ # 3 endpoints - accession 146 | "/entry/InterPro/ipr000001/protein/uniprot/structure/pdb/1cuk", 147 | "/protein/uniprot/entry/InterPro/ipr000001/structure/pdb/1cuk", 148 | ], 149 | ] 150 | for url in urls: 151 | self.assertEqual(reverse_url(url[0], "protein", "ipr000001"), url[1]) 152 | -------------------------------------------------------------------------------- /release/management/commands/warmer.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import requests 3 | from tqdm import tqdm 4 | from argparse import FileType 5 | import sys 6 | 7 | import rest_framework 8 | 9 | from django.core.management import BaseCommand 10 | from webfront.views.cache import canonical 11 | 12 | requests.adapters.DEFAULT_RETRIES = 5 13 | 14 | 15 | def get_unique_lines(logfiles): 16 | files = tqdm(logfiles, unit="files", leave=True, desc="reading files") 17 | lut = OrderedDict() 18 | for file in files: 19 | with open(file, "r") as handle: 20 | for line in tqdm( 21 | handle, desc="reading {}".format(file), unit="lines", leave=False 22 | ): 23 | line = line.strip() 24 | if line and not line.startswith("#"): 25 | line = canonical(line, remove_all_page_size=True) 26 | lut[line] = lut.get(line, 0) + 1 27 | return sorted(lut, key=lut.get, reverse=True) 28 | 29 | 30 | def send_query(url): 31 | return ( 32 | # first request 33 | requests.get(url), 34 | # second request (to make sure it's in cache) 35 | requests.get(url), 36 | ) 37 | 38 | 39 | def stat_message(responses): 40 | return ( 41 | # total request time 42 | responses[0].elapsed.total_seconds(), 43 | # return status code from the server 44 | responses[0].status_code, 45 | # URL for this request 46 | responses[0].url, 47 | # was the URL already in the cache somehow? (from previous run maybe?) 48 | responses[0].headers.get("Cached", "false"), 49 | # is the URL now in the cache? 50 | responses[1].headers.get("Cached", "false"), 51 | ) 52 | 53 | 54 | def send_queries(root, page_sizes, queries): 55 | wrapped = tqdm(unit="queries", desc="sending queries", total=len(queries)) 56 | for query in queries: 57 | url = root + query 58 | wrapped.update() 59 | wrapped.set_description(url, True) 60 | try: 61 | responses = send_query(url) 62 | yield stat_message(responses) 63 | if responses[1].status_code is not rest_framework.status.HTTP_200_OK: 64 | continue 65 | # check to see if the result content contains an array of results 66 | # if so, we need to also trigger the caching for other page sizes 67 | results = responses[1].json().get("results") 68 | if results and len(results): 69 | for size in page_sizes: 70 | extra_url = canonical( 71 | "{}{}page_size={}".format( 72 | url, "?" if url.endswith("/") else "&", size 73 | ) 74 | ) 75 | wrapped.update() 76 | wrapped.total += 1 77 | wrapped.set_description(extra_url, True) 78 | try: 79 | responses = send_query(extra_url) 80 | yield stat_message(responses) 81 | except KeyboardInterrupt: 82 | raise 83 | except Exception as e: 84 | print(e, file=sys.stderr) 85 | raise 86 | except KeyboardInterrupt: 87 | raise 88 | except Exception as e: 89 | print(e, file=sys.stderr) 90 | raise 91 | wrapped.set_description("sent all {} queries".format(wrapped.total), True) 92 | return 93 | 94 | 95 | def analyze_stats(stats, n, output): 96 | stats.sort(key=lambda t: t[0], reverse=True) 97 | print(" - Here are the {} longest queries\n".format(n), file=sys.stderr) 98 | for (time, status, url, was_cached, is_cached) in stats[:n]: 99 | if time < 1: 100 | log_time = "{}ms".format(int(time * 1000)) 101 | else: 102 | log_time = "{:.3f}s".format(round(time, 3)) 103 | print( 104 | "{}: {}{}".format(log_time, url, " (was cached)" if was_cached else ""), 105 | file=sys.stderr, 106 | ) 107 | print("time", "status", "url", "was cached", "got cached", sep="\t", file=output) 108 | failed = 0 109 | for (time, status, url, was_cached, is_cached) in stats: 110 | if status is not rest_framework.status.HTTP_200_OK: 111 | failed += 1 112 | print(time, status, url, was_cached, is_cached, sep="\t", file=output) 113 | if failed: 114 | print( 115 | "❌ {} URL{} failed to be processed".format( 116 | failed, "s" if failed > 1 else "" 117 | ), 118 | file=sys.stderr, 119 | ) 120 | else: 121 | print("✅ all URLs were processed successfully", file=sys.stderr) 122 | return failed 123 | 124 | 125 | def main(logfiles, root, page_sizes, top, output, *args, **kwargs): 126 | queries = get_unique_lines(logfiles) 127 | print("- Found {} unique URLs".format(len(queries)), file=sys.stderr) 128 | stats = list(send_queries(root, page_sizes, queries)) 129 | return analyze_stats(stats, top, output) 130 | 131 | 132 | class Command(BaseCommand): 133 | help = "warms up the API" 134 | 135 | def add_arguments(self, parser): 136 | parser.add_argument( 137 | "logfiles", type=str, nargs="+", help="log files with requests to execute" 138 | ) 139 | parser.add_argument( 140 | "--root", 141 | "-r", 142 | type=str, 143 | help="URL root", 144 | default="http://wp-np3-ac.ebi.ac.uk/interpro7", 145 | ) 146 | parser.add_argument( 147 | "--page_sizes", 148 | "-p", 149 | type=int, 150 | nargs="+", 151 | help="extra page size to query", 152 | default=[100, 50], 153 | ) 154 | parser.add_argument( 155 | "--top", 156 | "-t", 157 | type=int, 158 | help="display the top n longest queries", 159 | default=10, 160 | ) 161 | parser.add_argument( 162 | "--output", 163 | "-o", 164 | type=FileType("w"), 165 | help="output file for logs (defaults to stdout)", 166 | default=sys.stdout, 167 | ) 168 | 169 | def handle(self, *args, **options): 170 | failed = main(**options) 171 | sys.exit(1 if failed else 0) 172 | -------------------------------------------------------------------------------- /webfront/views/taxonomy.py: -------------------------------------------------------------------------------- 1 | from webfront.models import Taxonomy 2 | from webfront.serializers.taxonomy import TaxonomySerializer 3 | from webfront.views.custom import CustomView, SerializerDetail 4 | from webfront.views.modifiers import ( 5 | passing, 6 | add_extra_fields, 7 | filter_by_key_species, 8 | filter_by_entry, 9 | filter_by_entry_db, 10 | filter_by_domain_architectures, 11 | add_taxonomy_names, 12 | get_taxonomy_by_scientific_name, 13 | show_subset, 14 | ) 15 | from webfront.constants import ModifierType 16 | 17 | 18 | class TaxonomyAccessionHandler(CustomView): 19 | level_description = "Taxonomy accession level" 20 | serializer_class = TaxonomySerializer 21 | queryset = Taxonomy.objects.all() 22 | many = False 23 | serializer_detail_filter = SerializerDetail.TAXONOMY_DETAIL 24 | 25 | def get( 26 | self, 27 | request, 28 | endpoint_levels, 29 | available_endpoint_handlers=None, 30 | level=0, 31 | parent_queryset=None, 32 | handler=None, 33 | general_handler=None, 34 | *args, 35 | **kwargs 36 | ): 37 | general_handler.queryset_manager.add_filter( 38 | "taxonomy", accession=endpoint_levels[level - 1] 39 | ) 40 | general_handler.modifiers.register( 41 | "with_names", passing, serializer=SerializerDetail.TAXONOMY_DETAIL_NAMES 42 | ) 43 | general_handler.modifiers.register( 44 | "filter_by_entry", 45 | filter_by_entry, 46 | serializer=SerializerDetail.TAXONOMY_PER_ENTRY, 47 | type=ModifierType.REPLACE_PAYLOAD, 48 | ) 49 | general_handler.modifiers.register( 50 | "filter_by_entry_db", 51 | filter_by_entry_db, 52 | serializer=SerializerDetail.TAXONOMY_PER_ENTRY_DB, 53 | type=ModifierType.REPLACE_PAYLOAD, 54 | ) 55 | 56 | return super(TaxonomyAccessionHandler, self).get( 57 | request._request, 58 | endpoint_levels, 59 | available_endpoint_handlers, 60 | level, 61 | self.queryset, 62 | handler, 63 | general_handler, 64 | request, 65 | *args, 66 | **kwargs 67 | ) 68 | 69 | @staticmethod 70 | def filter(queryset, level_name="", general_handler=None): 71 | general_handler.queryset_manager.add_filter( 72 | "taxonomy", accession__iexact=level_name 73 | ) 74 | return queryset 75 | 76 | 77 | class UniprotHandler(CustomView): 78 | level_description = "taxonomy db level" 79 | child_handlers = [(r"\d+", TaxonomyAccessionHandler)] 80 | queryset = Taxonomy.objects.all() 81 | serializer_class = TaxonomySerializer 82 | serializer_detail = SerializerDetail.TAXONOMY_HEADERS 83 | serializer_detail_filter = SerializerDetail.TAXONOMY_DB 84 | 85 | def get( 86 | self, 87 | request, 88 | endpoint_levels, 89 | available_endpoint_handlers=None, 90 | level=0, 91 | parent_queryset=None, 92 | handler=None, 93 | general_handler=None, 94 | *args, 95 | **kwargs 96 | ): 97 | general_handler.queryset_manager.add_filter("taxonomy", accession__isnull=False) 98 | general_handler.queryset_manager.order_by("-num_proteins") 99 | general_handler.modifiers.register( 100 | "extra_fields", add_extra_fields(Taxonomy, "counters") 101 | ) 102 | general_handler.modifiers.register("key_species", filter_by_key_species) 103 | general_handler.modifiers.register( 104 | "ida", 105 | filter_by_domain_architectures, 106 | type=ModifierType.REPLACE_PAYLOAD, 107 | serializer=SerializerDetail.TAXONOMY_HEADERS, 108 | many=True, 109 | ) 110 | general_handler.modifiers.register( 111 | "with_names", add_taxonomy_names, type=ModifierType.EXTEND_PAYLOAD 112 | ) 113 | general_handler.modifiers.register("show-subset", show_subset) 114 | 115 | return super(UniprotHandler, self).get( 116 | request._request, 117 | endpoint_levels, 118 | available_endpoint_handlers, 119 | level, 120 | self.queryset, 121 | handler, 122 | general_handler, 123 | request, 124 | *args, 125 | **kwargs 126 | ) 127 | 128 | @staticmethod 129 | def filter(queryset, level_name="", general_handler=None): 130 | general_handler.queryset_manager.add_filter("taxonomy", accession__isnull=False) 131 | return queryset 132 | 133 | 134 | class TaxonomyHandler(CustomView): 135 | level_description = "Taxonomy level" 136 | from_model = False 137 | child_handlers = [("uniprot", UniprotHandler)] 138 | many = False 139 | serializer_class = TaxonomySerializer 140 | serializer_detail = SerializerDetail.TAXONOMY_OVERVIEW 141 | serializer_detail_filter = SerializerDetail.TAXONOMY_OVERVIEW 142 | 143 | def get_database_contributions(self, queryset): 144 | qs = Taxonomy.objects.filter(accession__in=queryset) 145 | return {"taxa": {"uniprot": qs.count()}} 146 | 147 | def get( 148 | self, 149 | request, 150 | endpoint_levels, 151 | available_endpoint_handlers=None, 152 | level=0, 153 | parent_queryset=None, 154 | handler=None, 155 | general_handler=None, 156 | *args, 157 | **kwargs 158 | ): 159 | general_handler.queryset_manager.reset_filters("taxonomy", endpoint_levels) 160 | 161 | general_handler.modifiers.register( 162 | "scientific_name", 163 | get_taxonomy_by_scientific_name, 164 | serializer=SerializerDetail.ALL, 165 | many=False, 166 | ) 167 | 168 | return super(TaxonomyHandler, self).get( 169 | request._request, 170 | endpoint_levels, 171 | available_endpoint_handlers, 172 | level, 173 | self.queryset, 174 | handler, 175 | general_handler, 176 | request, 177 | *args, 178 | **kwargs 179 | ) 180 | 181 | @staticmethod 182 | def filter(queryset, level_name="", general_handler=None): 183 | general_handler.queryset_manager.add_filter("taxonomy", accession__isnull=False) 184 | return queryset 185 | -------------------------------------------------------------------------------- /docs/examples/overlapping-entries.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script generates the list of InterPro entries overlapping with a specified entry. 3 | 4 | Requires python >= 3.6 5 | 6 | 7 | The script requires to download the "match_complete.xml.gz" file for the release of interest from the InterPro ftp 8 | $ wget https://ftp.ebi.ac.uk/pub/databases/interpro/releases/88.0/match_complete.xml.gz 9 | 10 | Example of running command: 11 | $ python overlapping-entries.py match_complete.xml.gz > overlapping.tsv 12 | 13 | To extract entries overlapping with IPR035979, run: 14 | $ awk -v FS='\t' -v OFS='\t' '($1=="IPR035979") {print $4,$5,$6}' overlapping-entries.tsv > overlapping-with-IPR035979.tsv 15 | """ 16 | 17 | import gzip 18 | import re 19 | import sys 20 | from xml.etree import ElementTree 21 | 22 | 23 | def parse_xml(file): 24 | regx = re.compile(r" i >= 0: 37 | yield ElementTree.fromstring(buffer[i:j]) 38 | 39 | i = j 40 | 41 | if i >= 0: 42 | buffer = buffer[i:] 43 | i = 0 44 | 45 | j = re.search(r"", buffer).start() 46 | yield ElementTree.fromstring(buffer[i:j]) 47 | 48 | 49 | def condense_locations(entries): 50 | for accession, locations in entries.items(): 51 | condensed = [] 52 | start = end = None 53 | 54 | for s, e in sorted(locations): 55 | if start is None: 56 | start = s 57 | end = e 58 | continue 59 | elif e <= end: 60 | continue 61 | elif s <= end: 62 | overlap = min(end, e) - max(start, s) + 1 63 | shortest = min(end - start, e - s) + 1 64 | 65 | if overlap >= shortest * 0.1: 66 | end = e 67 | continue 68 | 69 | condensed.append((start, end)) 70 | start, end = s, e 71 | 72 | condensed.append((start, end)) 73 | entries[accession] = condensed 74 | 75 | 76 | def main(): 77 | file = sys.argv[1] 78 | 79 | entries = {} 80 | for i, protein in enumerate(parse_xml(file)): 81 | protein_acc = protein.attrib["id"] 82 | 83 | protein_entries = {} 84 | for match in protein.findall("match"): 85 | match_acc = match.attrib["id"] 86 | ipr = match.find("ipr") 87 | 88 | if ipr is None: 89 | continue 90 | 91 | entry_acc = ipr.attrib["id"] 92 | if entry_acc not in entries: 93 | entries[entry_acc] = { 94 | "name": ipr.attrib["name"], 95 | "type": ipr.attrib["type"], 96 | "proteins": 0, 97 | "overlaps": {} 98 | } 99 | 100 | try: 101 | domains = protein_entries[entry_acc] 102 | except KeyError: 103 | domains = protein_entries[entry_acc] = [] 104 | 105 | for lcn in match.findall("lcn"): 106 | domains.append(( 107 | int(lcn.attrib["start"]), 108 | int(lcn.attrib["end"]) 109 | )) 110 | 111 | condense_locations(protein_entries) 112 | 113 | for entry_acc, locations in protein_entries.items(): 114 | entries[entry_acc]["proteins"] += 1 115 | entry_overlaps = entries[entry_acc]["overlaps"] 116 | 117 | for other_acc, other_locations in protein_entries.items(): 118 | if other_acc >= entry_acc: 119 | continue 120 | 121 | try: 122 | counts = entry_overlaps[other_acc] 123 | except KeyError: 124 | counts = entry_overlaps[other_acc] = [0, 0] 125 | 126 | flag = 0 127 | for start1, end1 in locations: 128 | length1 = end1 - start1 + 1 129 | 130 | for start2, end2 in other_locations: 131 | length2 = end2 - start2 + 1 132 | overlap = min(end1, end2) - max(start1, start2) + 1 133 | 134 | if not flag & 1 and overlap >= length1 * 0.5: 135 | flag |= 1 136 | counts[0] += 1 137 | 138 | if not flag & 2 and overlap >= length2 * 0.5: 139 | flag |= 2 140 | counts[1] += 1 141 | 142 | if flag == 3: 143 | break 144 | 145 | if (i + 1) % 1e6 == 0: 146 | sys.stderr.write(f"{i+1:>20,}\n") 147 | 148 | sys.stderr.write(f"{i+1:>20,}\n") 149 | 150 | supfam = "homologous_superfamily" 151 | types = (supfam, "domain", "family", "repeat") 152 | 153 | for entry_acc, entry in entries.items(): 154 | entry_cnt = entry["proteins"] 155 | 156 | for other_acc, (cnt1, cnt2) in entry["overlaps"].items(): 157 | other_cnt = entries[other_acc]["proteins"] 158 | 159 | coef1 = cnt1 / (entry_cnt + other_cnt - cnt1) 160 | coef2 = cnt2 / (entry_cnt + other_cnt - cnt2) 161 | 162 | coef = (coef1 + coef2) * 0.5 163 | 164 | cont1 = cnt1 / entry_cnt 165 | cont2 = cnt2 / other_cnt 166 | 167 | if all(e < 0.75 for e in (coef, cont1, cont2)): 168 | continue 169 | 170 | type1 = entry["type"].lower() 171 | type2 = entries[other_acc]["type"].lower() 172 | if ((type1 == supfam and type2 in types) 173 | or (type2 == supfam and type1 in types)): 174 | 175 | print("\t".join(( 176 | entry_acc, 177 | entry["name"], 178 | entry["type"], 179 | other_acc, 180 | entries[other_acc]["name"], 181 | entries[other_acc]["type"] 182 | ))) 183 | print("\t".join(( 184 | other_acc, 185 | entries[other_acc]["name"], 186 | entries[other_acc]["type"], 187 | entry_acc, 188 | entry["name"], 189 | entry["type"] 190 | ))) 191 | 192 | 193 | if __name__ == "__main__": 194 | main() 195 | -------------------------------------------------------------------------------- /webfront/tests/tests_utils_endpoint.py: -------------------------------------------------------------------------------- 1 | from rest_framework import status 2 | from webfront.tests.InterproRESTTestCase import InterproRESTTestCase 3 | from webfront.models.interpro_new import Release_Note 4 | 5 | 6 | class UtilsAccessionTest(InterproRESTTestCase): 7 | def test_can_read_structure_overview(self): 8 | response = self.client.get("/api/utils") 9 | self.assertEqual(response.status_code, status.HTTP_200_OK) 10 | self.assertIn("available", response.data) 11 | self.assertIn("accession", response.data["available"]) 12 | self.assertIn("release", response.data["available"]) 13 | 14 | def test_accession_endpoint_doesnt_fail(self): 15 | response = self.client.get("/api/utils/accession") 16 | self.assertEqual(response.status_code, status.HTTP_200_OK) 17 | 18 | def test_accession_endpoint_with_unexisting_acc(self): 19 | response = self.client.get("/api/utils/accession/xxXx") 20 | self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) 21 | 22 | def test_accession_endpoint_with_ipro(self): 23 | response = self.client.get("/api/utils/accession/IPR003165") 24 | self.assertEqual(response.status_code, status.HTTP_200_OK) 25 | self.assertEqual(response.data["endpoint"], "entry") 26 | self.assertEqual(response.data["source_database"], "interpro") 27 | 28 | def test_accession_endpoint_with_protein(self): 29 | response = self.client.get("/api/utils/accession/A1CUJ5") 30 | self.assertEqual(response.status_code, status.HTTP_200_OK) 31 | self.assertEqual(response.data["endpoint"], "protein") 32 | self.assertEqual(response.data["source_database"], "reviewed") 33 | 34 | def test_accession_endpoint_with_structure(self): 35 | response = self.client.get("/api/utils/accession/1JM7") 36 | self.assertEqual(response.status_code, status.HTTP_200_OK) 37 | self.assertEqual(response.data["endpoint"], "structure") 38 | self.assertEqual(response.data["source_database"], "pdb") 39 | 40 | def test_accession_endpoint_with_proteome(self): 41 | response = self.client.get("/api/utils/accession/UP000012042") 42 | self.assertEqual(response.status_code, status.HTTP_200_OK) 43 | self.assertEqual(response.data["endpoint"], "proteome") 44 | self.assertEqual(response.data["source_database"], "uniprot") 45 | 46 | def test_accession_endpoint_with_set(self): 47 | response = self.client.get("/api/utils/accession/CL0001") 48 | self.assertEqual(response.status_code, status.HTTP_200_OK) 49 | self.assertEqual(response.data["endpoint"], "set") 50 | self.assertEqual(response.data["source_database"], "pfam") 51 | 52 | def test_accession_endpoint_with_taxonomy(self): 53 | response = self.client.get("/api/utils/accession/344612") 54 | self.assertEqual(response.status_code, status.HTTP_200_OK) 55 | self.assertEqual(response.data["endpoint"], "taxonomy") 56 | self.assertEqual(response.data["source_database"], "uniprot") 57 | 58 | def test_accession_endpoint_with_protein_id(self): 59 | response = self.client.get("/api/utils/accession/CBPYA_ASPCL") 60 | self.assertEqual(response.status_code, status.HTTP_200_OK) 61 | self.assertEqual(response.data["endpoint"], "protein") 62 | self.assertEqual(response.data["source_database"], "reviewed") 63 | self.assertEqual(response.data["accession"], "A1CUJ5") 64 | 65 | def test_accession_endpoint_with_gene_name(self): 66 | response = self.client.get("/api/utils/accession/FOLH1") 67 | self.assertEqual(response.status_code, status.HTTP_200_OK) 68 | self.assertEqual(response.data["endpoint"], "protein") 69 | self.assertEqual(response.data["source_database"], "UniProt") 70 | self.assertIn("proteins", response.data) 71 | self.assertGreater(len(response.data["proteins"]), 0) 72 | self.assertIn("accession", response.data["proteins"][0]) 73 | self.assertEqual(response.data["proteins"][0]["accession"], "Q0VDM6") 74 | self.assertIn("organism", response.data["proteins"][0]) 75 | self.assertIn("tax_id", response.data["proteins"][0]) 76 | self.assertIn("is_fragment", response.data["proteins"][0]) 77 | 78 | 79 | class UtilsReleaseTest(InterproRESTTestCase): 80 | def test_can_read_structure_overview(self): 81 | response = self.client.get("/api/utils") 82 | self.assertEqual(response.status_code, status.HTTP_200_OK) 83 | self.assertIn("available", response.data) 84 | self.assertIn("accession", response.data["available"]) 85 | self.assertIn("release", response.data["available"]) 86 | 87 | def test_release_endpoint_doesnt_fail(self): 88 | response = self.client.get("/api/utils/release") 89 | self.assertEqual(response.status_code, status.HTTP_200_OK) 90 | 91 | def test_release_version_endpoint_doesnt_fail(self): 92 | response = self.client.get("/api/utils/release/current") 93 | self.assertEqual(response.status_code, status.HTTP_200_OK) 94 | response = self.client.get("/api/utils/release/70.0") 95 | self.assertEqual(response.status_code, status.HTTP_200_OK) 96 | 97 | def test_release_version_endpoint_fails(self): 98 | response = self.client.get("/api/utils/release/x") 99 | self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) 100 | 101 | def test_the_fixtures_are_loaded(self): 102 | notes = Release_Note.objects.all() 103 | self.assertEqual(notes.count(), 2) 104 | 105 | def test_release_endpoint_returns_the_fixtures(self): 106 | notes = Release_Note.objects.all() 107 | response = self.client.get("/api/utils/release") 108 | self.assertEqual(response.status_code, status.HTTP_200_OK) 109 | self.assertEqual(len(response.data), len(notes)) 110 | for note in notes: 111 | self.assertIn(note.version, response.data) 112 | 113 | def test_release_current_is_same_as_accession(self): 114 | response1 = self.client.get("/api/utils/release/current") 115 | self.assertEqual(response1.status_code, status.HTTP_200_OK) 116 | response2 = self.client.get("/api/utils/release/70.0") 117 | self.assertEqual(response2.status_code, status.HTTP_200_OK) 118 | self.assertEqual(response1.data, response1.data) 119 | 120 | def test_release_70_is_same_as_fixture(self): 121 | note_version = "70.0" 122 | note = Release_Note.objects.all().filter(version=note_version).first() 123 | response = self.client.get("/api/utils/release/" + note_version) 124 | self.assertEqual(response.status_code, status.HTTP_200_OK) 125 | self.assertEqual(response.data["content"], note.content) 126 | -------------------------------------------------------------------------------- /webfront/tests/fixtures_organisms.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "webfront.Taxonomy", 4 | "fields": { 5 | "accession": "1", 6 | "scientific_name": "ROOT", 7 | "full_name": "ROOT", 8 | "lineage": " 1 ", 9 | "rank": "no rank", 10 | "children": [ 11 | "2579", 12 | "2" 13 | ], 14 | "counts": { 15 | "entries": { 16 | "total": 7, 17 | "interpro": 2, 18 | "pfam": 3, 19 | "profile": 1, 20 | "smart": 1 21 | }, 22 | "structures": 4, 23 | "proteins": 4, 24 | "sets": 2, 25 | "proteomes": 3 26 | } 27 | } 28 | }, 29 | { 30 | "model": "webfront.Taxonomy", 31 | "fields": { 32 | "accession": "2579", 33 | "scientific_name": "Eukaryota", 34 | "full_name": "Eucaryotae", 35 | "lineage": " 1 2579 ", 36 | "parent": "1", 37 | "rank": "SUPERKINGDOM", 38 | "children": [ 39 | "344612", 40 | "1001583" 41 | ], 42 | "counts": { 43 | "entries": { 44 | "total": 6, 45 | "interpro": 2, 46 | "pfam": 3, 47 | "smart": 1 48 | }, 49 | "structures": 2, 50 | "proteins": 2, 51 | "sets": 2, 52 | "proteomes": 2 53 | } 54 | } 55 | }, 56 | { 57 | "model": "webfront.Taxonomy", 58 | "fields": { 59 | "accession": "2", 60 | "scientific_name": "Bacteria", 61 | "full_name": "Prokaryota", 62 | "lineage": " 1 2 ", 63 | "parent": "1", 64 | "rank": "SUPERKINGDOM", 65 | "children": [ 66 | "40296" 67 | ], 68 | "counts": { 69 | "entries": { 70 | "total": 2, 71 | "interpro": 1, 72 | "profile": 1 73 | }, 74 | "structures": 3, 75 | "proteins": 2, 76 | "sets": 0, 77 | "proteomes": 1 78 | } 79 | } 80 | }, 81 | { 82 | "model": "webfront.Taxonomy", 83 | "fields": { 84 | "accession": "40296", 85 | "scientific_name": "Penicillium italicum", 86 | "full_name": "Blue mold", 87 | "lineage": " 1 2 40296 ", 88 | "parent": "2", 89 | "rank": "SPECIES", 90 | "children": [], 91 | "counts": { 92 | "entries": { 93 | "total": 2, 94 | "interpro": 1, 95 | "profile": 1 96 | }, 97 | "structures": 3, 98 | "proteins": 2, 99 | "sets": 0, 100 | "proteomes": 1 101 | } 102 | } 103 | }, 104 | { 105 | "model": "webfront.Taxonomy", 106 | "fields": { 107 | "accession": "1001583", 108 | "scientific_name": "Lactobacillus brevis KB290", 109 | "full_name": "Lactobacillus brevis str. KB290", 110 | "lineage": " 1 2579 1001583 ", 111 | "parent": "2579", 112 | "rank": "SPECIES", 113 | "children": [], 114 | "counts": { 115 | "entries": { 116 | "total": 1, 117 | "pfam": 1 118 | }, 119 | "structures": 2, 120 | "proteins": 1, 121 | "sets": 1, 122 | "proteomes": 1 123 | } 124 | } 125 | }, 126 | { 127 | "model": "webfront.Taxonomy", 128 | "fields": { 129 | "accession": "344612", 130 | "scientific_name": "Aspergillus clavatus", 131 | "full_name": "Aspergillus clavatus (strain ATCC 1007 / CBS 513.65 / DSM 816 / NCTC 3887 / NRRL 1)", 132 | "lineage": " 1 2579 344612 ", 133 | "parent": "2579", 134 | "rank": "SPECIES", 135 | "children": [], 136 | "counts": { 137 | "entries": { 138 | "total": 5, 139 | "pfam": 2, 140 | "interpro": 2, 141 | "smart": 1 142 | }, 143 | "structures": 1, 144 | "proteins": 1, 145 | "sets": 2, 146 | "proteomes": 1 147 | } 148 | } 149 | }, 150 | { 151 | "model": "webfront.Taxonomy", 152 | "fields": { 153 | "accession": "10090", 154 | "scientific_name": "Mus musculus", 155 | "full_name": "Mus musculus", 156 | "lineage": " 1 2579 10090 ", 157 | "parent": "2579", 158 | "rank": "SPECIES", 159 | "children": [], 160 | "counts": { 161 | "entries": { 162 | "total": 0 163 | }, 164 | "structures": 0, 165 | "proteins": 1, 166 | "sets": 0, 167 | "proteomes": 1 168 | } 169 | } 170 | }, 171 | { 172 | "model": "webfront.Proteome", 173 | "fields": { 174 | "accession": "UP000006701", 175 | "name": "Aspergillus clavatus NRRL 1 ", 176 | "is_reference": true, 177 | "strain": "ATCC 1007 / CBS 513.65 / DSM 816 / NCTC 3887 / NRRL 1", 178 | "assembly": "GCA_000002715.1 from ENA/EMBL", 179 | "taxonomy": "344612", 180 | "counts": { 181 | "entries": { 182 | "total": 5, 183 | "pfam": 2, 184 | "interpro": 2, 185 | "smart": 1 186 | }, 187 | "structures": 1, 188 | "proteins": 1, 189 | "taxa": 1, 190 | "sets": 2 191 | } 192 | } 193 | }, 194 | { 195 | "model": "webfront.Proteome", 196 | "fields": { 197 | "accession": "UP000030104", 198 | "name": "Penicillium italicum Blue mold", 199 | "is_reference": true, 200 | "strain": "PHI-1", 201 | "assembly": "GCA_000769765.1 from ENA/EMBL", 202 | "taxonomy": "40296", 203 | "counts": { 204 | "entries": { 205 | "total": 2, 206 | "profile": 1, 207 | "interpro": 1 208 | }, 209 | "structures": 3, 210 | "proteins": 2, 211 | "taxa": 1, 212 | "sets": 0 213 | } 214 | } 215 | }, 216 | { 217 | "model": "webfront.Proteome", 218 | "fields": { 219 | "accession": "UP000012042", 220 | "name": "Lactobacillus brevis KB290", 221 | "is_reference": false, 222 | "strain": "KB290", 223 | "assembly": "GCA_000359625.1 from ENA/EMBL", 224 | "taxonomy": "1001583", 225 | "counts": { 226 | "entries": { 227 | "total": 1, 228 | "pfam": 1 229 | }, 230 | "structures": 2, 231 | "proteins": 1, 232 | "taxa": 1, 233 | "sets": 1 234 | } 235 | } 236 | }, 237 | { 238 | "model": "webfront.Proteome", 239 | "fields": { 240 | "accession": "UP000000589", 241 | "name": "Mus musculus", 242 | "is_reference": true, 243 | "strain": "C57BL", 244 | "assembly": "GCA_000001635.8 from Ensembl", 245 | "taxonomy": "10090", 246 | "counts": { 247 | "entries": { 248 | "total": 0 249 | }, 250 | "structures": 0, 251 | "proteins": 1, 252 | "taxa": 1, 253 | "sets": 0 254 | } 255 | } 256 | } 257 | ] 258 | --------------------------------------------------------------------------------