├── interpro
    ├── __init__.py
    ├── urls.py
    ├── wsgi.py
    └── renderers.py
├── release
    ├── __init__.py
    ├── .gitignore
    └── management
    │   ├── __init__.py
    │   └── commands
    │       ├── __init__.py
    │       └── warmer.py
├── webfront
    ├── __init__.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_mail.py
    │   ├── managed_model_test_runner.py
    │   ├── tests_structure_endpoint.py
    │   ├── test_ida_search.py
    │   ├── tests_protein_endpoint.py
    │   ├── README.md
    │   ├── tests_utils.py
    │   ├── tests_utils_endpoint.py
    │   └── fixtures_organisms.json
    ├── views
    │   ├── __init__.py
    │   ├── mail.py
    │   ├── MODIFIER_README.md
    │   ├── QUERYSET_README.md
    │   ├── modifier_manager.py
    │   ├── README.md
    │   ├── proteome.py
    │   ├── cache.py
    │   ├── set.py
    │   └── taxonomy.py
    ├── migrations
    │   ├── __init__.py
    │   ├── 0025_remove_set_alignment.py
    │   ├── 0026_remove_structuralmodel.py
    │   ├── 0017_structural_model_plddt.py
    │   ├── 0005_droping_columns.py
    │   ├── 0027_remove_llm_description.py
    │   ├── 0018_taxa_modifier.py
    │   ├── 0007_history.py
    │   ├── 0023_rename_is_alive_entry_is_public.py
    │   ├── 0020_alter_entryannotation_num_sequences.py
    │   ├── 0021_set_info.py
    │   ├── 0028_in_alphafold.py
    │   ├── 0037_protein_in_bfvd.py
    │   ├── 0010_wiki_field_type_change.py
    │   ├── 0015_structural_model_lddt.py
    │   ├── 0024_entry_llm_description.py
    │   ├── 0032_entry_is_updated_llm.py
    │   ├── 0034_set_wikipedia.py
    │   ├── 0033_alter_proteinextrafeatures_sequence_feature.py
    │   ├── 0006_interaction_and_pathways.py
    │   ├── 0016_structural_model_algorithm.py
    │   ├── 0031_strain_assembly_nullable.py
    │   ├── 0012_seq_and_seq_raw.py
    │   ├── 0014_structural_model.py
    │   ├── 0008_dw_changes.py
    │   ├── 0011_pfam2interpro.py
    │   ├── 0009_entry_annotation_changes.py
    │   ├── 0029_remove_is_featured_plus_llm_entry.py
    │   ├── 0019_entrytaxa_table.py
    │   ├── 0036_default_value_lists_dicts.py
    │   ├── 0022_chain_sequence.py
    │   ├── 0004_taxonomy_per_entryX.py
    │   ├── 0003_taxonomy_per_entryDB.py
    │   ├── 0002_taxonomy_per_entry.py
    │   ├── 0035_interpronmatches.py
    │   ├── 0013_protein_extra.py
    │   └── 0030_num_proteins.py
    ├── searcher
    │   ├── __init__.py
    │   ├── README.md
    │   └── search_controller.py
    ├── models
    │   └── __init__.py
    ├── static
    │   ├── logo_178x178.png
    │   ├── swagger
    │   │   ├── favicon-16x16.png
    │   │   ├── favicon-32x32.png
    │   │   ├── index.html
    │   │   └── oauth2-redirect.html
    │   ├── bootstrap
    │   │   ├── fonts
    │   │   │   ├── glyphicons-halflings-regular.eot
    │   │   │   ├── glyphicons-halflings-regular.ttf
    │   │   │   ├── glyphicons-halflings-regular.woff
    │   │   │   └── glyphicons-halflings-regular.woff2
    │   │   └── js
    │   │   │   └── npm.js
    │   └── interpro-api.css
    ├── README.md
    ├── templatetags
    │   └── interpro_tags.py
    ├── exceptions.py
    ├── response.py
    ├── serializers
    │   ├── utils.py
    │   └── README.md
    ├── constants.py
    └── pagination.py
├── functional_tests
    ├── __init__.py
    ├── base.py
    └── tests.py
├── .pre-commit-config.yaml
├── config
    ├── db.template.yml
    ├── elastic_ida_mapping.json
    └── elastic_mapping.json
├── .coveragerc
├── .editorconfig
├── dev_requirements.txt
├── .gitignore
├── requirements.txt
├── manage.py
├── example_data
    ├── entry
    │   ├── PF17180.json
    │   ├── PF02171.json
    │   └── IPR003165.json
    └── protein
    │   ├── M5ADK6.json
    │   ├── A0A0A2L2G2.json
    │   ├── A1CUJ5.json
    │   ├── X2JLE1.json
    │   ├── V5XAD2.json
    │   └── P16582.json
├── templates
    └── rest_framework
    │   └── api.html
├── .github
    └── workflows
    │   └── testing.yml
├── docs
    └── examples
    │   ├── fetch-protein-matches.py
    │   ├── fetch-alphafold-for-entry.py
    │   └── overlapping-entries.py
├── README.md
└── deploy_tools
    └── README.md


/interpro/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/release/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/webfront/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/functional_tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/release/.gitignore:
--------------------------------------------------------------------------------
1 | logs
2 | 


--------------------------------------------------------------------------------
/webfront/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/webfront/views/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/release/management/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/webfront/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/webfront/searcher/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/release/management/commands/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/webfront/models/__init__.py:
--------------------------------------------------------------------------------
1 | from webfront.models.interpro_new import *
2 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | -   repo: https://github.com/ambv/black
3 |     rev: stable
4 |     hooks:
5 |     - id: black
6 | 


--------------------------------------------------------------------------------
/webfront/static/logo_178x178.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/logo_178x178.png


--------------------------------------------------------------------------------
/webfront/static/swagger/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/swagger/favicon-16x16.png


--------------------------------------------------------------------------------
/webfront/static/swagger/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/swagger/favicon-32x32.png


--------------------------------------------------------------------------------
/config/db.template.yml:
--------------------------------------------------------------------------------
1 | engine: django.db.backends.mysql
2 | host: URL
3 | sid: DATABASE_NAME
4 | port: 4602
5 | user: USER
6 | password: PASSWORD
7 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 |     # omit anything in a release directory anywhere
4 |     */release/*
5 |     # not using wsgi for testing
6 |     interpro/wsgi.py
7 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | [*]
 2 | indent_style = space
 3 | insert_final_newline = true
 4 | 
 5 | [*.py]
 6 | indent_size = 4
 7 | 
 8 | [*.md]
 9 | trim_trailing_whitespace = false
10 | 


--------------------------------------------------------------------------------
/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.eot


--------------------------------------------------------------------------------
/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.ttf


--------------------------------------------------------------------------------
/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.woff


--------------------------------------------------------------------------------
/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/HEAD/webfront/static/bootstrap/fonts/glyphicons-halflings-regular.woff2


--------------------------------------------------------------------------------
/dev_requirements.txt:
--------------------------------------------------------------------------------
1 | # remember to check the version is compatible with python 3.8
2 | selenium==4.21.0
3 | django-debug-toolbar==4.4.2
4 | ipdb==0.13.13
5 | coveralls==3.3.1
6 | tqdm==4.66.4
7 | black==24.4.2
8 | 


--------------------------------------------------------------------------------
/interpro/urls.py:
--------------------------------------------------------------------------------
1 | from django.urls import re_path
2 | from webfront.views import common, mail
3 | 
4 | urlpatterns = [
5 |     re_path(r"^api/mail/$", mail.send_email),
6 |     re_path(r"^api/(?P<url>.*)$", common.GeneralHandler.as_view()),
7 | ]
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | db.sqlite3
 2 | __pycache__
 3 | .idea
 4 | unifam/mysql.conf
 5 | *.pyc
 6 | .DS_Store
 7 | .vscode
 8 | unifam/settings-private.py
 9 | .coverage
10 | config/interpro.local.yml
11 | config/db.yml
12 | *.db
13 | *.iml
14 | gunicorn.local.conf.py
15 | geckodriver.log
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Python 3.10
 2 | Django==5.2.7
 3 | djangorestframework==3.15.2
 4 | PyYAML==6.0
 5 | jsonfield2==4.0.0.post0
 6 | pymysql==1.1.1
 7 | django-cors-headers==4.3.1
 8 | gunicorn==22.0.0
 9 | django-redis==5.4.0
10 | redis==5.0.4
11 | requests==2.32.3
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/config/elastic_ida_mapping.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "mappings": {
 3 |     "properties": {
 4 |       "ida_id": {
 5 |         "type": "keyword"
 6 |       },
 7 |       "ida": {
 8 |         "type": "keyword"
 9 |       },
10 |       "counts": {
11 |         "type": "long"
12 |       },
13 |       "representative": {
14 |         "type": "object",
15 |         "enabled": false
16 |       }
17 |     }
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/webfront/migrations/0025_remove_set_alignment.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.15 on 2023-11-09 15:09
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("webfront", "0024_entry_llm_description"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.DeleteModel(
14 |             name="Alignment",
15 |         ),
16 |     ]
17 | 


--------------------------------------------------------------------------------
/webfront/migrations/0026_remove_structuralmodel.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.15 on 2023-11-16 16:23
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("webfront", "0025_remove_set_alignment"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.DeleteModel(
14 |             name="StructuralModel",
15 |         ),
16 |     ]
17 | 


--------------------------------------------------------------------------------
/webfront/migrations/0017_structural_model_plddt.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.7 on 2021-08-28 10:30
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [("webfront", "0016_structural_model_algorithm")]
 9 | 
10 |     operations = [
11 |         migrations.RenameField(
12 |             model_name="structuralmodel", old_name="lddt", new_name="plddt"
13 |         )
14 |     ]
15 | 


--------------------------------------------------------------------------------
/webfront/migrations/0005_droping_columns.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.1 on 2020-01-23 10:54
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [("webfront", "0004_taxonomy_per_entryX")]
 9 | 
10 |     operations = [
11 |         migrations.RemoveField(model_name="protein", name="other_names"),
12 |         migrations.RemoveField(model_name="protein", name="size"),
13 |     ]
14 | 


--------------------------------------------------------------------------------
/webfront/README.md:
--------------------------------------------------------------------------------
 1 | Developers Documentation
 2 | ===
 3 | 
 4 | This API was developed using [Django REST Framework](http://www.django-rest-framework.org/), but we have adapted it for our needs.
 5 | 
 6 | You can read about the details of the changes in each of the parts of the framework:
 7 | 
 8 | * [Views](./views/README.md)
 9 | * [Queryset](./views/QUERYSET_README.md)
10 | * [Serializers](./serializers/README.md)
11 | * [Modifiers](./views/MODIFIER_README.md)
12 | 


--------------------------------------------------------------------------------
/webfront/migrations/0027_remove_llm_description.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.15 on 2023-12-15 16:32
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("webfront", "0026_remove_structuralmodel"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.RemoveField(
14 |             model_name="entry",
15 |             name="llm_description",
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/webfront/migrations/0018_taxa_modifier.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.8 on 2022-02-01 12:14
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0017_structural_model_plddt")]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="entry", name="taxa", field=jsonfield.fields.JSONField(null=True)
14 |         )
15 |     ]
16 | 


--------------------------------------------------------------------------------
/webfront/static/interpro-api.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.region {
 3 |     text-align: right;
 4 | }
 5 | div.region form div.btn-group a{
 6 |     min-width: 10rem;
 7 | }
 8 | 
 9 | div.open ul.dropdown-menu {
10 |     display: block;
11 | }
12 | ul.dropdown-menu li {
13 |     text-align: right;
14 |     padding-right: 1em;
15 | }
16 | ul.dropdown-menu li:hover {
17 |     background-color: rgb(20,160,206);
18 | }
19 | ul.dropdown-menu li a.format-option{
20 |     color: rgb(231, 231, 231);;
21 | }


--------------------------------------------------------------------------------
/webfront/migrations/0007_history.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.1 on 2020-02-06 09:27
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0006_interaction_and_pathways")]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="entry",
14 |             name="history",
15 |             field=jsonfield.fields.JSONField(null=True),
16 |         )
17 |     ]
18 | 


--------------------------------------------------------------------------------
/webfront/migrations/0023_rename_is_alive_entry_is_public.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.20 on 2023-09-19 21:37
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("webfront", "0022_chain_sequence"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.RenameField(
14 |             model_name="entry",
15 |             old_name="is_alive",
16 |             new_name="is_public",
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/webfront/migrations/0020_alter_entryannotation_num_sequences.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.12 on 2022-08-31 15:35
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [("webfront", "0019_entrytaxa_table")]
 9 | 
10 |     operations = [
11 |         migrations.AlterField(
12 |             model_name="entryannotation",
13 |             name="num_sequences",
14 |             field=models.IntegerField(null=True),
15 |         )
16 |     ]
17 | 


--------------------------------------------------------------------------------
/webfront/migrations/0021_set_info.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.15 on 2023-01-03 14:29
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0020_alter_entryannotation_num_sequences")]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="entry",
14 |             name="set_info",
15 |             field=jsonfield.fields.JSONField(null=True),
16 |         )
17 |     ]
18 | 


--------------------------------------------------------------------------------
/webfront/migrations/0028_in_alphafold.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.23 on 2024-02-15 09:49
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('webfront', '0027_remove_llm_description'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name='protein',
15 |             name='in_alphafold',
16 |             field=models.BooleanField(default=False),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/webfront/migrations/0037_protein_in_bfvd.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.13 on 2025-03-11 09:46
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('webfront', '0036_default_value_lists_dicts'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name='protein',
15 |             name='in_bfvd',
16 |             field=models.BooleanField(default=False),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/webfront/migrations/0010_wiki_field_type_change.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.1.10 on 2020-08-10 09:32
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0009_entry_annotation_changes")]
10 | 
11 |     operations = [
12 |         migrations.AlterField(
13 |             model_name="entry",
14 |             name="wikipedia",
15 |             field=jsonfield.fields.JSONField(null=True),
16 |         )
17 |     ]
18 | 


--------------------------------------------------------------------------------
/webfront/migrations/0015_structural_model_lddt.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.7 on 2021-02-23 15:20
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [("webfront", "0014_structural_model")]
 9 | 
10 |     operations = [
11 |         migrations.AddField(
12 |             model_name="structuralmodel",
13 |             name="lddt",
14 |             field=models.FloatField(default=0),
15 |             preserve_default=False,
16 |         )
17 |     ]
18 | 


--------------------------------------------------------------------------------
/webfront/migrations/0024_entry_llm_description.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.19 on 2023-10-03 11:38
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("webfront", "0023_rename_is_alive_entry_is_public"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="entry",
15 |             name="llm_description",
16 |             field=models.TextField(null=True),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/webfront/migrations/0032_entry_is_updated_llm.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.23 on 2024-05-14 20:35
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('webfront', '0031_strain_assembly_nullable'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name='entry',
15 |             name='is_updated_llm',
16 |             field=models.BooleanField(default=False),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | try:
 6 |     import pymysql
 7 | except ImportError:
 8 |     pass
 9 | else:
10 |     pymysql.version_info = (1, 4, 6, "final", 0)  # change mysqlclient version
11 |     pymysql.install_as_MySQLdb()
12 | 
13 | 
14 | def main():
15 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "interpro.settings")
16 |     from django.core.management import execute_from_command_line
17 |     execute_from_command_line(sys.argv)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()
22 | 


--------------------------------------------------------------------------------
/webfront/migrations/0034_set_wikipedia.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.13 on 2025-01-17 10:57
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ('webfront', '0033_alter_proteinextrafeatures_sequence_feature'),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AddField(
15 |             model_name='set',
16 |             name='wikipedia',
17 |             field=jsonfield.fields.JSONField(null=True),
18 |         ),
19 |     ]
20 | 


--------------------------------------------------------------------------------
/webfront/migrations/0033_alter_proteinextrafeatures_sequence_feature.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.24 on 2024-11-20 18:26
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('webfront', '0032_entry_is_updated_llm'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='proteinextrafeatures',
15 |             name='sequence_feature',
16 |             field=models.CharField(max_length=255),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/webfront/static/bootstrap/js/npm.js:
--------------------------------------------------------------------------------
 1 | // This file is autogenerated via the `commonjs` Grunt task. You can require() this file in a CommonJS environment.
 2 | require('../../js/transition.js')
 3 | require('../../js/alert.js')
 4 | require('../../js/button.js')
 5 | require('../../js/carousel.js')
 6 | require('../../js/collapse.js')
 7 | require('../../js/dropdown.js')
 8 | require('../../js/modal.js')
 9 | require('../../js/tooltip.js')
10 | require('../../js/popover.js')
11 | require('../../js/scrollspy.js')
12 | require('../../js/tab.js')
13 | require('../../js/affix.js')


--------------------------------------------------------------------------------
/webfront/templatetags/interpro_tags.py:
--------------------------------------------------------------------------------
 1 | from django import template
 2 | from django.utils.html import escape
 3 | from django.utils.encoding import iri_to_uri
 4 | from rest_framework.utils.urls import replace_query_param
 5 | 
 6 | from django.conf import settings
 7 | 
 8 | register = template.Library()
 9 | 
10 | 
11 | @register.simple_tag
12 | def get_url_with_prefix(request, key, val):
13 |     iri = request.get_full_path()
14 |     uri = iri_to_uri(iri)
15 |     return settings.INTERPRO_CONFIG["url_path_prefix"] + escape(
16 |         replace_query_param(uri, key, val)
17 |     )
18 | 


--------------------------------------------------------------------------------
/example_data/entry/PF17180.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metadata": {
 3 |     "accession": "PF17180",
 4 |     "id": "",
 5 |     "type": "domain",
 6 |     "GO": {
 7 |       "biologicalProcess": [],
 8 |       "molecularFunction": [],
 9 |       "cellularComponent": []
10 |     },
11 |     "sourceDataBase": "Pfam",
12 |     "memberDataBases": {},
13 |     "integrated": null,
14 |     "name": {
15 |       "name": "Zinc-binding domain",
16 |       "short": "zf-3CxxC_2",
17 |       "other": []
18 |     },
19 |     "description": "",
20 |     "wikipedia": "",
21 |     "literature": {},
22 |     "cross_references": {}
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/webfront/migrations/0006_interaction_and_pathways.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.1 on 2020-02-04 15:33
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0005_droping_columns")]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="entry",
14 |             name="interactions",
15 |             field=jsonfield.fields.JSONField(null=True),
16 |         ),
17 |         migrations.AddField(
18 |             model_name="entry",
19 |             name="pathways",
20 |             field=jsonfield.fields.JSONField(null=True),
21 |         ),
22 |     ]
23 | 


--------------------------------------------------------------------------------
/webfront/migrations/0016_structural_model_algorithm.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.7 on 2021-06-24 12:34
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [("webfront", "0015_structural_model_lddt")]
 9 | 
10 |     operations = [
11 |         migrations.AddField(
12 |             model_name="structuralmodel",
13 |             name="algorithm",
14 |             field=models.CharField(default="trRosetta", max_length=20),
15 |             preserve_default=False,
16 |         ),
17 |         migrations.AlterField(
18 |             model_name="structuralmodel", name="lddt", field=models.BinaryField()
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/interpro/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for interpro project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.wsgi import get_wsgi_application
13 | 
14 | try:
15 |     import pymysql
16 | 
17 |     pymysql.version_info = (1, 4, 6, "final", 0)  # change mysqlclient version
18 |     pymysql.install_as_MySQLdb()
19 |     print("running pymysql")
20 | except ImportError:
21 |     pass
22 | 
23 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "interpro.settings")
24 | 
25 | application = get_wsgi_application()
26 | 


--------------------------------------------------------------------------------
/webfront/migrations/0031_strain_assembly_nullable.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.23 on 2024-05-14 20:35
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('webfront', '0030_num_proteins'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='proteome',
15 |             name='assembly',
16 |             field=models.CharField(max_length=512, null=True),
17 |         ),
18 |         migrations.AlterField(
19 |             model_name='proteome',
20 |             name='strain',
21 |             field=models.CharField(max_length=512, null=True),
22 |         ),
23 |     ]
24 | 


--------------------------------------------------------------------------------
/webfront/migrations/0012_seq_and_seq_raw.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.7 on 2021-01-11 13:42
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [("webfront", "0011_pfam2interpro")]
 9 | 
10 |     operations = [
11 |         migrations.RemoveField(model_name="protein", name="extra_features"),
12 |         migrations.RemoveField(model_name="protein", name="residues"),
13 |         migrations.RemoveField(model_name="protein", name="sequence"),
14 |         migrations.AddField(
15 |             model_name="protein",
16 |             name="sequence_bin",
17 |             field=models.BinaryField(db_column="sequence", null=True),
18 |         ),
19 |     ]
20 | 


--------------------------------------------------------------------------------
/webfront/migrations/0014_structural_model.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.7 on 2021-02-05 10:34
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [("webfront", "0013_protein_extra")]
 9 | 
10 |     operations = [
11 |         migrations.CreateModel(
12 |             name="StructuralModel",
13 |             fields=[
14 |                 ("model_id", models.IntegerField(primary_key=True, serialize=False)),
15 |                 ("accession", models.CharField(max_length=25)),
16 |                 ("contacts", models.BinaryField()),
17 |                 ("structure", models.BinaryField()),
18 |             ],
19 |             options={"db_table": "webfront_structuralmodel"},
20 |         )
21 |     ]
22 | 


--------------------------------------------------------------------------------
/webfront/migrations/0008_dw_changes.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.1 on 2020-04-09 15:04
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0007_history")]
10 | 
11 |     operations = [
12 |         migrations.RemoveField(model_name="set", name="integrated"),
13 |         migrations.RemoveField(model_name="set", name="is_set"),
14 |         migrations.RemoveField(model_name="structure", name="other_names"),
15 |         migrations.RemoveField(model_name="structure", name="short_name"),
16 |         migrations.AlterField(
17 |             model_name="protein",
18 |             name="structure",
19 |             field=jsonfield.fields.JSONField(default={}, null=True),
20 |         ),
21 |     ]
22 | 


--------------------------------------------------------------------------------
/webfront/migrations/0011_pfam2interpro.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.7 on 2020-08-24 14:45
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0010_wiki_field_type_change")]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="entry",
14 |             name="details",
15 |             field=jsonfield.fields.JSONField(null=True),
16 |         ),
17 |         migrations.AddField(
18 |             model_name="set",
19 |             name="authors",
20 |             field=jsonfield.fields.JSONField(null=True),
21 |         ),
22 |         migrations.AddField(
23 |             model_name="set",
24 |             name="literature",
25 |             field=jsonfield.fields.JSONField(null=True),
26 |         ),
27 |     ]
28 | 


--------------------------------------------------------------------------------
/webfront/migrations/0009_entry_annotation_changes.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.7 on 2020-06-08 10:45
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0008_dw_changes")]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="entryannotation",
14 |             name="num_sequences",
15 |             field=models.FloatField(null=True),
16 |         ),
17 |         migrations.AlterField(
18 |             model_name="entryannotation",
19 |             name="accession",
20 |             field=models.ForeignKey(
21 |                 db_column="accession",
22 |                 null=True,
23 |                 on_delete=django.db.models.deletion.SET_NULL,
24 |                 to="webfront.Entry",
25 |             ),
26 |         ),
27 |     ]
28 | 


--------------------------------------------------------------------------------
/webfront/migrations/0029_remove_is_featured_plus_llm_entry.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.23 on 2024-02-21 10:18
 2 | 
 3 | from django.db import migrations, models
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ('webfront', '0028_in_alphafold'),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.RemoveField(
15 |             model_name='entry',
16 |             name='is_featured',
17 |         ),
18 |         migrations.AddField(
19 |             model_name='entry',
20 |             name='is_llm',
21 |             field=models.BooleanField(default=False),
22 |         ),
23 |         migrations.AddField(
24 |             model_name='entry',
25 |             name='is_reviewed_llm',
26 |             field=models.BooleanField(default=False),
27 |         ),
28 |         migrations.AddField(
29 |             model_name='entry',
30 |             name='representative_structure',
31 |             field=jsonfield.fields.JSONField(null=True),
32 |         ),
33 |     ]
34 | 


--------------------------------------------------------------------------------
/webfront/migrations/0019_entrytaxa_table.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.8 on 2022-03-18 08:03
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | import jsonfield.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [("webfront", "0018_taxa_modifier")]
11 | 
12 |     operations = [
13 |         migrations.CreateModel(
14 |             name="EntryTaxa",
15 |             fields=[
16 |                 (
17 |                     "accession",
18 |                     models.OneToOneField(
19 |                         db_column="accession",
20 |                         on_delete=django.db.models.deletion.CASCADE,
21 |                         primary_key=True,
22 |                         serialize=False,
23 |                         to="webfront.entry",
24 |                     ),
25 |                 ),
26 |                 ("tree", jsonfield.fields.JSONField(null=True)),
27 |             ],
28 |             options={"db_table": "webfront_entrytaxa"},
29 |         ),
30 |         migrations.RemoveField(model_name="entry", name="taxa"),
31 |     ]
32 | 


--------------------------------------------------------------------------------
/webfront/exceptions.py:
--------------------------------------------------------------------------------
 1 | class DeletedEntryError(Exception):
 2 |     def __init__(self, accession, database, _type, name, short_name, history, date):
 3 |         self.accession = accession
 4 |         self.database = database
 5 |         self.type = _type
 6 |         self.name = name
 7 |         self.short_name = short_name
 8 |         self.history = history
 9 |         self.date = date
10 | 
11 | 
12 | class EmptyQuerysetError(Exception):
13 |     def __init__(self, message):
14 |         self.message = message
15 | 
16 | 
17 | class ExpectedUniqueError(Exception):
18 |     def __init__(self, message):
19 |         self.message = message
20 | 
21 | 
22 | class HmmerWebError(Exception):
23 |     def __init__(self, message):
24 |         self.message = message
25 | 
26 | 
27 | class BadURLParameterError(Exception):
28 |     def __init__(self, message):
29 |         self.message = message
30 | 
31 | 
32 | class InvalidOperationRequest(Exception):
33 |     def __init__(self, message):
34 |         self.message = message
35 | 
36 | 
37 | class DeprecatedModifier(Exception):
38 |     def __init__(self, message):
39 |         self.message = message
40 | 


--------------------------------------------------------------------------------
/webfront/migrations/0036_default_value_lists_dicts.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.19 on 2025-02-24 14:14
 2 | 
 3 | from django.db import migrations
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ("webfront", "0035_interpronmatches"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterField(
15 |             model_name="entry",
16 |             name="overlaps_with",
17 |             field=jsonfield.fields.JSONField(default=list),
18 |         ),
19 |         migrations.AlterField(
20 |             model_name="protein",
21 |             name="structure",
22 |             field=jsonfield.fields.JSONField(default=dict, null=True),
23 |         ),
24 |         migrations.AlterField(
25 |             model_name="set",
26 |             name="authors",
27 |             field=jsonfield.fields.JSONField(default=list),
28 |         ),
29 |         migrations.AlterField(
30 |             model_name="set",
31 |             name="literature",
32 |             field=jsonfield.fields.JSONField(default=list),
33 |         ),
34 |         migrations.AlterField(
35 |             model_name="set",
36 |             name="wikipedia",
37 |             field=jsonfield.fields.JSONField(default=list),
38 |         ),
39 |     ]
40 | 


--------------------------------------------------------------------------------
/webfront/migrations/0022_chain_sequence.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.15 on 2023-08-03 16:04
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ("webfront", "0021_set_info"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.CreateModel(
15 |             name="ChainSequence",
16 |             fields=[
17 |                 ("id", models.IntegerField(primary_key=True, serialize=False)),
18 |                 ("chain", models.CharField(db_column="chain_acc", max_length=10)),
19 |                 ("sequence_bin", models.BinaryField(db_column="sequence", null=True)),
20 |                 ("length", models.IntegerField()),
21 |                 (
22 |                     "structure",
23 |                     models.ForeignKey(
24 |                         blank=True,
25 |                         db_column="structure_acc",
26 |                         null=True,
27 |                         on_delete=django.db.models.deletion.SET_NULL,
28 |                         to="webfront.structure",
29 |                     ),
30 |                 ),
31 |             ],
32 |             options={
33 |                 "db_table": "webfront_chain_sequence",
34 |             },
35 |         ),
36 |     ]
37 | 


--------------------------------------------------------------------------------
/webfront/migrations/0004_taxonomy_per_entryX.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.1.10 on 2019-10-25 11:20
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0003_taxonomy_per_entryDB")]
10 | 
11 |     operations = [
12 |         migrations.RenameField(
13 |             model_name="taxonomyperentrydb",
14 |             old_name="entry_db",
15 |             new_name="source_database",
16 |         ),
17 |         migrations.AlterField(
18 |             model_name="taxonomyperentry",
19 |             name="taxonomy",
20 |             field=models.ForeignKey(
21 |                 blank=True,
22 |                 db_column="tax_id",
23 |                 null=True,
24 |                 on_delete=django.db.models.deletion.SET_NULL,
25 |                 to="webfront.Taxonomy",
26 |             ),
27 |         ),
28 |         migrations.AlterField(
29 |             model_name="taxonomyperentrydb",
30 |             name="taxonomy",
31 |             field=models.ForeignKey(
32 |                 blank=True,
33 |                 db_column="tax_id",
34 |                 null=True,
35 |                 on_delete=django.db.models.deletion.SET_NULL,
36 |                 to="webfront.Taxonomy",
37 |             ),
38 |         ),
39 |     ]
40 | 


--------------------------------------------------------------------------------
/webfront/migrations/0003_taxonomy_per_entryDB.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.1.10 on 2019-10-16 10:58
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | import jsonfield.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [("webfront", "0002_taxonomy_per_entry")]
11 | 
12 |     operations = [
13 |         migrations.CreateModel(
14 |             name="TaxonomyPerEntryDB",
15 |             fields=[
16 |                 (
17 |                     "id",
18 |                     models.AutoField(
19 |                         auto_created=True,
20 |                         primary_key=True,
21 |                         serialize=False,
22 |                         verbose_name="ID",
23 |                     ),
24 |                 ),
25 |                 ("entry_db", models.CharField(db_index=True, max_length=100)),
26 |                 ("counts", jsonfield.fields.JSONField(null=True)),
27 |                 (
28 |                     "taxonomy",
29 |                     models.ForeignKey(
30 |                         blank=True,
31 |                         null=True,
32 |                         on_delete=django.db.models.deletion.SET_NULL,
33 |                         to="webfront.Taxonomy",
34 |                     ),
35 |                 ),
36 |             ],
37 |         )
38 |     ]
39 | 


--------------------------------------------------------------------------------
/webfront/migrations/0002_taxonomy_per_entry.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.1.10 on 2019-10-15 10:52
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | import jsonfield.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [("webfront", "0001_merged")]
11 | 
12 |     operations = [
13 |         migrations.CreateModel(
14 |             name="TaxonomyPerEntry",
15 |             fields=[
16 |                 (
17 |                     "id",
18 |                     models.AutoField(
19 |                         auto_created=True,
20 |                         primary_key=True,
21 |                         serialize=False,
22 |                         verbose_name="ID",
23 |                     ),
24 |                 ),
25 |                 ("counts", jsonfield.fields.JSONField(null=True)),
26 |                 (
27 |                     "entry_acc",
28 |                     models.ForeignKey(
29 |                         db_column="entry_acc",
30 |                         null=True,
31 |                         on_delete=django.db.models.deletion.SET_NULL,
32 |                         to="webfront.Entry",
33 |                     ),
34 |                 ),
35 |                 (
36 |                     "taxonomy",
37 |                     models.ForeignKey(
38 |                         blank=True,
39 |                         null=True,
40 |                         on_delete=django.db.models.deletion.SET_NULL,
41 |                         to="webfront.Taxonomy",
42 |                     ),
43 |                 ),
44 |             ],
45 |         )
46 |     ]
47 | 


--------------------------------------------------------------------------------
/webfront/migrations/0035_interpronmatches.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.19 on 2025-02-24 14:11
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ("webfront", "0034_set_wikipedia"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.CreateModel(
15 |             name="InterProNMatches",
16 |             fields=[
17 |                 ("match_id", models.AutoField(primary_key=True, serialize=False)),
18 |                 ("in_interpro", models.BooleanField(db_column="in_interpro")),
19 |                 ("is_preferred", models.BooleanField(db_column="is_preferred")),
20 |                 ("locations", models.JSONField()),
21 |                 (
22 |                     "entry",
23 |                     models.ForeignKey(
24 |                         db_column="entry_acc",
25 |                         null=True,
26 |                         on_delete=django.db.models.deletion.SET_NULL,
27 |                         to="webfront.entry",
28 |                     ),
29 |                 ),
30 |                 (
31 |                     "protein_acc",
32 |                     models.ForeignKey(
33 |                         db_column="protein_acc",
34 |                         null=True,
35 |                         on_delete=django.db.models.deletion.SET_NULL,
36 |                         to="webfront.protein",
37 |                     ),
38 |                 ),
39 |             ],
40 |             options={
41 |                 "db_table": "webfront_interpro_n",
42 |             },
43 |         ),
44 |     ]
45 | 


--------------------------------------------------------------------------------
/interpro/renderers.py:
--------------------------------------------------------------------------------
 1 | from rest_framework import renderers
 2 | import io
 3 | import csv
 4 | 
 5 | fields_to_exclude = ["entry_annotations"]
 6 | 
 7 | 
 8 | def flatDict(original, output=None, prefix=None):
 9 |     if output is None:
10 |         output = {}
11 |     for key, value in original.items():
12 |         p_key = key if prefix is None else "{}__{}".format(prefix, key)
13 |         if isinstance(value, dict):
14 |             flatDict(value, output, prefix=p_key)
15 |         else:
16 |             output[p_key] = value
17 | 
18 |     return output
19 | 
20 | 
21 | class TSVRenderer(renderers.BaseRenderer):
22 |     media_type = "text/tab-separated-values"
23 |     format = "tsv"
24 | 
25 |     def render(self, data, media_type=None, renderer_context=None):
26 |         objs = None
27 |         if "metadata" in data:
28 |             objs = [data["metadata"]]
29 |             # writer.writeheader(data["metadata"])
30 |         elif "results" in data:
31 |             # writer.writeheader(data["results"][0]["metadata"])
32 |             objs = [item["metadata"] for item in data["results"]]
33 |         elif isinstance(data, dict):
34 |             objs = [flatDict(data)]
35 | 
36 |         output = io.StringIO()
37 |         if objs is not None:
38 |             writer = csv.DictWriter(
39 |                 output,
40 |                 fieldnames=[k for k in sorted(objs[0]) if k not in fields_to_exclude],
41 |                 extrasaction="ignore",
42 |                 delimiter="\t",
43 |                 quoting=csv.QUOTE_NONNUMERIC,
44 |             )
45 |             writer.writeheader()
46 |             writer.writerows(objs)
47 | 
48 |         return output.getvalue()
49 | 


--------------------------------------------------------------------------------
/webfront/static/swagger/index.html:
--------------------------------------------------------------------------------
 1 | <!-- HTML for static distribution bundle build -->
 2 | <!DOCTYPE html>
 3 | <html lang="en">
 4 |   <head>
 5 |     <meta charset="UTF-8">
 6 |     <title>Swagger UI</title>
 7 |     <link rel="stylesheet" type="text/css" href="./swagger-ui.css" />
 8 |     <link rel="icon" type="image/png" href="./favicon-32x32.png" sizes="32x32" />
 9 |     <link rel="icon" type="image/png" href="./favicon-16x16.png" sizes="16x16" />
10 |     <style>
11 |       html
12 |       {
13 |         box-sizing: border-box;
14 |         overflow: -moz-scrollbars-vertical;
15 |         overflow-y: scroll;
16 |       }
17 | 
18 |       *,
19 |       *:before,
20 |       *:after
21 |       {
22 |         box-sizing: inherit;
23 |       }
24 | 
25 |       body
26 |       {
27 |         margin:0;
28 |         background: #fafafa;
29 |       }
30 |     </style>
31 |   </head>
32 | 
33 |   <body>
34 |     <div id="swagger-ui"></div>
35 | 
36 |     <script src="./swagger-ui-bundle.js" charset="UTF-8"> </script>
37 |     <script src="./swagger-ui-standalone-preset.js" charset="UTF-8"> </script>
38 |     <script>
39 |     window.onload = function() {
40 |       // Begin Swagger UI call region
41 |       const ui = SwaggerUIBundle({
42 |         url: "../interpro7-swagger.yml",
43 |         dom_id: '#swagger-ui',
44 |         deepLinking: true,
45 |         presets: [
46 |           SwaggerUIBundle.presets.apis
47 |         ],
48 |         plugins: [
49 |           SwaggerUIBundle.plugins.DownloadUrl
50 |         ],
51 |         layout: "BaseLayout"
52 |       });
53 |       // End Swagger UI call region
54 | 
55 |       window.ui = ui;
56 |     };
57 |   </script>
58 |   </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/example_data/protein/M5ADK6.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metadata" : {
 3 |     "accession": "M5ADK6",
 4 |     "id": "M5ADK6_LACBR",
 5 |     "sourceOrganism": {
 6 |       "name": "Lactobacillus brevis KB290",
 7 |       "taxid": 1001583
 8 |     },
 9 |     "name": {
10 |       "name": "Band 7 protein",
11 |       "short": null,
12 |       "other": []
13 |     },
14 |     "description": null,
15 |     "length": 297,
16 |     "sequence": "MESGIIEILIRNGVSHMTEKPVFHINGYLGLILVLVILGLGVYLSVVGWGVLGVILVVLAVLAASSLTIIEPNQSKVLTFFGRYIGTIKESGLYLTVPLTTKTTVSLRVRNFNSAILKVNDLQGNPVEIAAVIVFKVVDTSKALFAVEDYEKFVEIQSESAIRHVASEYAYDNFGDHQALTLRSNPTEVSNHLTEELQARLEVAGVQIIETRLTHLAYATEIASAMLQRQQSQAILSARKIIVEGAVSITEGAIEQLAAETDLHLTDNQKLQLINNMMVSIINERGSQPVINTGKVE",
17 |     "proteome": "UP000012042",
18 |     "gene": "LVISKB_0797",
19 |     "GO": {
20 |       "biologicalProcess": [],
21 |       "molecularFunction": [],
22 |       "cellularComponent": [
23 |         {
24 |           "id": "GO:0016020",
25 |           "name": "membrane"
26 |         }
27 |       ]
28 |     },
29 |     "proteinEvidence": 1
30 |   },
31 |   "representation": {
32 |     "entries": [
33 |       {
34 |         "id": "IPR001972",
35 |         "name": "Stomatin family",
36 |         "type": "family"
37 |       },
38 |       {
39 |         "id": "IPR001107",
40 |         "name": "Band 7 domain",
41 |         "type": "domain"
42 |       }
43 |     ],
44 |     "signalPeptide": [],
45 |     "transmembrane": [],
46 |     "coiledCoil": [],
47 |     "lowComplexity_disorder": [],
48 |     "activeSites": [],
49 |     "perResidueFeatures": [],
50 |     "disulphideBridges": []
51 |   },
52 |   "structure": {
53 |     "chains": []
54 |   },
55 |   "genomicContext": {
56 |     "DNA": ""
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/webfront/response.py:
--------------------------------------------------------------------------------
 1 | from rest_framework.response import Response as R
 2 | from django.conf import settings
 3 | 
 4 | from webfront.models import Database
 5 | 
 6 | 
 7 | class Response(R):
 8 |     def __init__(
 9 |         self,
10 |         data=None,
11 |         status=None,
12 |         template_name=None,
13 |         headers={},
14 |         exception=False,
15 |         content_type=None,
16 |     ):
17 | 
18 |         if settings.DEBUG:
19 |             from django.db import connection
20 |             from webfront.searcher.elastic_controller import es_results
21 | 
22 |             timings = [
23 |                 # 'cpu;dur=1;desc="CPU"',
24 |                 'mysql;dur={:0.2f};desc="MySQL"'.format(
25 |                     sum((float(query["time"]) for query in connection.queries)) * 1000
26 |                 ),
27 |                 # 'filesystem;dur=0;desc="Filesystem"',
28 |                 'es;dur={:0.2f};desc="Elasticsearch"'.format(
29 |                     sum(query["took"] for query in es_results if "took" in query)
30 |                 ),
31 |                 # 'django;dur={:0.2f};desc=Django'.format(
32 |                 #     (datetime.now().timestamp() - django_time['time']) * 1000
33 |                 # )
34 |             ]
35 | 
36 |             headers["Server-Timing"] = ",".join(timings)
37 | 
38 |         if not hasattr(settings, "CACHED_VERSION"):
39 |             settings.CACHED_VERSION = Database.objects.get(pk="interpro").version
40 | 
41 |         headers["InterPro-Version"] = settings.CACHED_VERSION
42 |         headers["InterPro-Version-Minor"] = settings.MINOR_VERSION
43 | 
44 |         super(Response, self).__init__(
45 |             data, status, template_name, headers, exception, content_type
46 |         )
47 | 


--------------------------------------------------------------------------------
/webfront/migrations/0013_protein_extra.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.7 on 2021-01-11 16:29
 2 | 
 3 | from django.db import migrations, models
 4 | import jsonfield.fields
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [("webfront", "0012_seq_and_seq_raw")]
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="ProteinExtraFeatures",
14 |             fields=[
15 |                 ("feature_id", models.IntegerField(primary_key=True, serialize=False)),
16 |                 ("protein_acc", models.CharField(max_length=15)),
17 |                 ("entry_acc", models.CharField(max_length=25)),
18 |                 ("source_database", models.CharField(max_length=10)),
19 |                 ("location_start", models.IntegerField()),
20 |                 ("location_end", models.IntegerField()),
21 |                 ("sequence_feature", models.CharField(max_length=35)),
22 |             ],
23 |             options={"db_table": "webfront_proteinfeature"},
24 |         ),
25 |         migrations.CreateModel(
26 |             name="ProteinResidues",
27 |             fields=[
28 |                 ("residue_id", models.IntegerField(primary_key=True, serialize=False)),
29 |                 ("protein_acc", models.CharField(max_length=15)),
30 |                 ("entry_acc", models.CharField(max_length=25)),
31 |                 ("entry_name", models.CharField(max_length=100)),
32 |                 ("source_database", models.CharField(max_length=10)),
33 |                 ("description", models.CharField(max_length=255)),
34 |                 ("fragments", jsonfield.fields.JSONField(null=True)),
35 |             ],
36 |             options={"db_table": "webfront_proteinresidue"},
37 |         ),
38 |     ]
39 | 


--------------------------------------------------------------------------------
/webfront/serializers/utils.py:
--------------------------------------------------------------------------------
 1 | def to_camel_case(snake_case_string):
 2 |     parts = snake_case_string.split("_")
 3 |     return parts[0] + "".join(p[0].upper() + p[1:] for p in parts[1:])
 4 | 
 5 | 
 6 | def set_or_create_and_set(obj, path, value):
 7 |     key = path[0]
 8 |     if key in obj:
 9 |         if len(path) == 1:
10 |             raise KeyError(
11 |                 'Overload of key "{}", please use the corresponding field in the other table'.format(
12 |                     key
13 |                 )
14 |             )
15 |         else:
16 |             set_or_create_and_set(obj[key], path[1:], value)
17 |     else:
18 |         if len(path) == 1:
19 |             obj[key] = value
20 |         else:
21 |             obj[key] = {}
22 |             set_or_create_and_set(obj[key], path[1:], value)
23 | 
24 | 
25 | def flat_to_nested(flat, convert_to_camel_case=True):
26 |     nested = {}
27 |     for key, value in flat.items():
28 |         path = key.split("__")
29 |         if convert_to_camel_case:
30 |             path = [to_camel_case(part) for part in path]
31 |         set_or_create_and_set(nested, path, value)
32 |     return nested
33 | 
34 | 
35 | # def recategorise_go_terms(go_terms):
36 | #     for term in go_terms:
37 | #         if "category" in term and "code" in term['category']:
38 | #             if term['category']["code"] == "F":
39 | #                 term['category'] = "Molecular Function"
40 | #             elif term['category']["code"] == "C":
41 | #                 term['category'] = "Cellular Component"
42 | #             elif term['category']["code"] == "P":
43 | #                 term['category'] = "Biological Process"
44 | #             return
45 | #         raise Exception("Unknown Go Term category '{0}'".format(term['category']))
46 | 


--------------------------------------------------------------------------------
/webfront/constants.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class SerializerDetail(Enum):
 5 |     ALL = 1
 6 | 
 7 |     ENTRY_HEADERS = 100
 8 |     ENTRY_OVERVIEW = 101
 9 |     ENTRY_DETAIL = 102
10 |     ENTRY_MATCH = 103
11 |     ENTRY_PROTEIN_HEADERS = 105
12 |     ENTRY_DB = 106
13 | 
14 |     PROTEIN_HEADERS = 200
15 |     PROTEIN_OVERVIEW = 201
16 |     PROTEIN_DETAIL = 202
17 |     PROTEIN_ENTRY_DETAIL = 203
18 |     PROTEIN_DB = 204
19 | 
20 |     STRUCTURE_HEADERS = 300
21 |     STRUCTURE_OVERVIEW = 301
22 |     STRUCTURE_DETAIL = 302
23 |     STRUCTURE_CHAIN = 303
24 |     STRUCTURE_ENTRY_DETAIL = 304
25 |     STRUCTURE_PROTEIN_DETAIL = 305
26 |     STRUCTURE_DB = 306
27 | 
28 |     TAXONOMY_HEADERS = 400
29 |     TAXONOMY_OVERVIEW = 401
30 |     TAXONOMY_DETAIL = 402
31 |     # TAXONOMY_CHAIN = 403
32 |     # TAXONOMY_ENTRY_DETAIL = 404
33 |     # TAXONOMY_PROTEIN_DETAIL = 405
34 |     TAXONOMY_DB = 406
35 |     TAXONOMY_DETAIL_NAMES = 432
36 |     TAXONOMY_PER_ENTRY = 410
37 |     TAXONOMY_PER_ENTRY_DB = 411
38 | 
39 |     PROTEOME_OVERVIEW = 450
40 |     PROTEOME_HEADERS = 451
41 |     PROTEOME_DETAIL = 453
42 |     ORGANISM_TAXONOMY_PROTEOME = 420
43 |     ORGANISM_TAXONOMY_PROTEOME_HEADERS = 421
44 |     PROTEOME_DB = 460
45 | 
46 |     SET_HEADERS = 500
47 |     SET_OVERVIEW = 501
48 |     SET_DETAIL = 502
49 |     SET_DB = 503
50 | 
51 |     IDA_LIST = 600
52 | 
53 |     GROUP_BY = 800
54 |     GROUP_BY_MEMBER_DATABASES = 801
55 | 
56 |     ANNOTATION_BLOB = 1000
57 | 
58 | 
59 | class QuerysetType(Enum):
60 |     ENTRY = 100
61 |     PROTEIN = 200
62 |     STRUCTURE = 300
63 |     ENTRY_PROTEIN = 150
64 |     ENTRY_STRUCTURE = 160
65 |     STRUCTURE_PROTEIN = 250
66 | 
67 | 
68 | class ModifierType(Enum):
69 |     FILTER = 1
70 |     REPLACE_PAYLOAD = 2
71 |     EXTEND_PAYLOAD = 3
72 | 


--------------------------------------------------------------------------------
/webfront/tests/test_mail.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from django.test import TestCase
 4 | from django.test import Client
 5 | from rest_framework import status
 6 | 
 7 | 
 8 | class TestMail(TestCase):
 9 |     def test_mail(self, sleep=60):
10 |         self.client = Client()
11 |         response = self.client.post(
12 |             "/api/mail/",
13 |             {
14 |                 "path": "echo",
15 |                 "subject": "Add annotation test from API",
16 |                 "message": "Test",
17 |                 "from_email": "swaathik@ebi.ac.uk",
18 |             },
19 |         )
20 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
21 |         self.assertEqual(response.json()["from"], "swaathik@ebi.ac.uk")
22 |         time.sleep(sleep)
23 | 
24 |     def test_spam(self):
25 |         self.test_mail(sleep=0)
26 |         self.client = Client()
27 |         response = self.client.post(
28 |             "/api/mail/",
29 |             {
30 |                 "path": "echo",
31 |                 "subject": "Add annotation test from API",
32 |                 "message": "Test",
33 |                 "from_email": "swaathik@ebi.ac.uk",
34 |             },
35 |         )
36 |         self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS)
37 |         time.sleep(60)
38 | 
39 |     def test_mail_invalid_queue(self):
40 |         self.client = Client()
41 |         response = self.client.post(
42 |             "/api/mail/",
43 |             {
44 |                 "path": "echo",
45 |                 "subject": "Add annotation test from API",
46 |                 "message": "Test",
47 |                 "queue": "uniprot",
48 |                 "from_email": "swaathik@ebi.ac.uk",
49 |             },
50 |         )
51 |         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
52 |         time.sleep(60)
53 | 


--------------------------------------------------------------------------------
/webfront/searcher/README.md:
--------------------------------------------------------------------------------
 1 | Searcher Controller
 2 | ===
 3 | 
 4 | The classes here are responsible of executing queries in our search index.
 5 | Although we are using a Search index, we don't just use if for search purposes, it is also effectively the join table of all the entities in MySQL.
 6 | 
 7 | Initially it was split into `SearchController` and `ElasticController` because at that time, we hadn't decided upon the technology we were going to use. 
 8 | `SearchController` is basically an abstract class with methods that need to be implemented by any particular 
 9 | technology controller. We eventually chose Elasticsearch, so the other controllers were removed to avoid the redundancy of maintaining multiple systems that we weren't being used. Therefore for the rest of the document we will be focus on the Elasticsearch controller
10 | 
11 | Elasticsearch allows a query to be submitted by either the `q` URL parameter or in the body of the request. 
12 | The general approach in this class is to use the `q` parameter for the filtering of the index and use the _body_ method for the aggregations and other complex operation. We combined these two methods; for example to get the number of InterPro matches of a protein, we filter the index using something like: `q=protein_acc:p99999 AND entry_type:interpro` and then in the body of the query, we aggregate the results to get the count of *unique* matches. e.g.
13 | ```json
14 | {
15 |   "aggs": {
16 |     "count": {"cardinality": {"field": "entry_acc"}}
17 |   },
18 |   "size": 0
19 | }
20 | ```
21 | 
22 | Most of the methods in this class are about building generalisations to create the JSON to include in the query. For example the counter above is part of the `get_grouped_object()` method wich can be used in the different endpoints.
23 | 
24 | There are only 2 methods that actually deal with the HTTP transaction to Elasticsearch `execute_query()` and `_elastic_json_query()` (*Note*: checking this now makes me think we could merge this 2 methods).
25 | 


--------------------------------------------------------------------------------
/webfront/tests/managed_model_test_runner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import sys
 4 | from django.test.runner import DiscoverRunner
 5 | from django.conf import settings
 6 | 
 7 | 
 8 | class UnManagedModelTestRunner(DiscoverRunner):
 9 |     """
10 |     Test runner that automatically makes all unmanaged models in your Django
11 |     project managed for the duration of the test run.
12 |     Many thanks to the Caktus Group: http://bit.ly/1N8TcHW
13 |     """
14 | 
15 |     def __init__(self, *args, **kwargs):
16 |         settings.IN_TEST_MODE = True
17 |         self.unmanaged_models = []
18 |         super(UnManagedModelTestRunner, self).__init__(*args, **kwargs)
19 | 
20 |     def setup_test_environment(self, *args, **kwargs):
21 |         from django.apps import apps
22 | 
23 |         myapp = apps.get_app_config("webfront")
24 |         self.unmanaged_models = [
25 |             m for m in myapp.models.values() if not m._meta.managed
26 |         ]
27 |         for m in self.unmanaged_models:
28 |             m._meta.managed = True
29 |             m._meta.db_table = re.sub(
30 |                 r'^"([^"]+)"\."([^"]+)"$', r"\1_\2", m._meta.db_table
31 |             )
32 | 
33 |         super(UnManagedModelTestRunner, self).setup_test_environment(*args, **kwargs)
34 | 
35 |     def teardown_test_environment(self, *args, **kwargs):
36 |         super(UnManagedModelTestRunner, self).teardown_test_environment(*args, **kwargs)
37 |         # reset unmanaged models
38 |         for m in self.unmanaged_models:
39 |             m._meta.managed = False
40 | 
41 | 
42 | if "interpro_ro" in settings.DATABASES and (
43 |     "test" in sys.argv or "test_coverage" in sys.argv
44 | ):  # Covers regular testing and django-coverage
45 |     settings.DATABASES["interpro_ro"]["ENGINE"] = "django.db.backends.sqlite3"
46 |     settings.DATABASES["interpro_ro"]["NAME"] = os.path.join(
47 |         settings.BASE_DIR, "../database/db3.sqlite3"
48 |     )
49 |     settings.DATABASES["interpro_ro"]["TEST"] = {"MIRROR": "default"}
50 | 


--------------------------------------------------------------------------------
/example_data/protein/A0A0A2L2G2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metadata" : {
 3 |     "accession": "A0A0A2L2G2",
 4 |     "id": "A0A0A2L2G2_PENIT",
 5 |     "sourceOrganism": {
 6 |       "name": "Penicillium italicum",
 7 |       "taxid": 40296
 8 |     },
 9 |     "name": {
10 |       "name": "Propeptide, carboxypeptidase Y",
11 |       "short": null,
12 |       "other": []
13 |     },
14 |     "description": null,
15 |     "length": 550,
16 |     "sequence": "MRVLSTTLLVGAASAAAPSFQQVLGAHSEHAENVAQQGADAFKPLQHLQDQFKSLSSEARQLWEEVSNYFPESMGSAPMLSLPKKHTRRPDSHWDYHVSGAKVQDIWVSGAEGTKEREVDGKLEDYALRAKKVDPSALGIDPGVKQYSGYLDDNENDKHLFYWFFESRNDPKNDPVVLWLNGGPGCSSLTGLFMELGPSSIGANIKPIYNDFSWNNNASVIFLDQPINVGYSYSGSSVSDTVAAGKDVYALLTLFFKQFPEYATQDFHIAGESYAGHYIPVMASEILSHKKRNINLKSVLIGNGLTDGLTQYEYYRPMACGEGGYPAVLDESTCQSMDNALSRCQSMIQSCYNSESPWVCVPASIYCNNAMLGPYQRTGQNVYDVRGKCEDESNLCYKGLGYVSEYLGQESVREAVGAEVDGYDSCNFDINRNFLFNGDWFKPYHRLVPGLLEQIPVLIYAGDADFICNWLGNKAWSEALEWPGQKEFASAELEDLKIVQNEHVGKKIGQIKSHGNFTFMRIFGGGHMVPMDQPESGLEFFNRWIGGEWF",
17 |     "proteome": "UP000030104",
18 |     "gene": "PITC_084940",
19 |     "GO": {
20 |       "biologicalProcess": [
21 |         {
22 |           "id": "GO:0006508",
23 |           "name": "proteolysis"
24 |         }
25 |       ],
26 |       "molecularFunction": [
27 |         {
28 |           "id": "GO:0004185",
29 |           "name": "serine-type carboxypeptidase activity"
30 |         }
31 |       ],
32 |       "cellularComponent": [
33 |         {
34 |           "id": "GO:0005773",
35 |           "name": "vacuole"
36 |         }
37 |       ]
38 |     },
39 |     "proteinEvidence": 1
40 |   },
41 |   "representation": {
42 |     "signalPeptide": [],
43 |     "transmembrane": [],
44 |     "coiledCoil": [],
45 |     "lowComplexity_disorder": [],
46 |     "activeSites": [
47 |       {
48 |         "id": "IPR018202",
49 |         "name": "Peptidase S10, serine carboxypeptidase, active site"
50 |       }
51 |     ],
52 |     "perResidueFeatures": [],
53 |     "disulphideBridges": []
54 |   },
55 |   "structure": {
56 |     "chains": []
57 |   },
58 |   "genomicContext": {
59 |     "DNA": ""
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/templates/rest_framework/api.html:
--------------------------------------------------------------------------------
 1 | {% extends "rest_framework/base.html" %}
 2 | {% load static %}
 3 | {% load rest_framework %}
 4 | {% load interpro_tags %}
 5 | 
 6 | {% block bootstrap_theme %}
 7 |     <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootswatch/4.5.2/yeti/bootstrap.min.css" type="text/css">
 8 |     <link rel="stylesheet" href="{% static "interpro-api.css" %}"" type="text/css">
 9 | {% endblock %}
10 | 
11 | {% block breadcrumbs %}{% endblock %}
12 | 
13 | {% block title %}
14 |     InterPro API - EBI
15 | {% endblock %}
16 | 
17 | {% block branding %}
18 |     <img src="https://github.com/ProteinsWebTeam/interpro7-client/raw/master/src/images/logo/logo_InterPro.png" alt="InterPro 7" width="200px">
19 | {% endblock %}
20 | 
21 | {% block request_forms %}
22 |     {% if 'GET' in allowed_methods %}
23 |     <form id="get-form" class="pull-right">
24 |     <fieldset>
25 |         {% if api_settings.URL_FORMAT_OVERRIDE %}
26 |         <div class="btn-group format-selection">
27 |             <a class="btn btn-primary js-tooltip" href="{{ request.get_full_path }}" rel="nofollow" title="Make a GET request on the {{ name }} resource">GET</a>
28 | 
29 |             <button class="btn btn-primary dropdown-toggle js-tooltip" data-toggle="dropdown" title="Specify a format for the GET request">
30 |             <span class="caret"></span>
31 |             </button>
32 |             <ul class="dropdown-menu">
33 |             {% for format in available_formats %}
34 |                 <li>
35 |                 <a class="js-tooltip format-option" href="{% get_url_with_prefix request api_settings.URL_FORMAT_OVERRIDE format %}" rel="nofollow" title="Make a GET request on the {{ name }} resource with the format set to `{{ format }}`">{{ format }}</a>
36 |                 </li>
37 |             {% endfor %}
38 |             </ul>
39 |         </div>
40 |         {% else %}
41 |         <a class="btn btn-primary js-tooltip" href="{{ request.get_full_path }}" rel="nofollow" title="Make a GET request on the {{ name }} resource">GET</a>
42 |         {% endif %}
43 |     </fieldset>
44 |     </form>
45 |     {% endif %}
46 | {% endblock %}
47 | 


--------------------------------------------------------------------------------
/webfront/tests/tests_structure_endpoint.py:
--------------------------------------------------------------------------------
 1 | from rest_framework import status
 2 | from webfront.tests.InterproRESTTestCase import InterproRESTTestCase
 3 | 
 4 | 
 5 | class StructureRESTTest(InterproRESTTestCase):
 6 |     def test_can_read_structure_overview(self):
 7 |         response = self.client.get("/api/structure")
 8 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 9 |         self._check_structure_count_overview(response.data)
10 | 
11 |     def test_can_read_structure_pdb(self):
12 |         response = self.client.get("/api/structure/pdb")
13 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
14 |         self._check_is_list_of_objects_with_key(response.data["results"], "metadata")
15 |         self.assertEqual(len(response.data["results"]), 4)
16 | 
17 |     def test_can_read_structure_pdb_accession(self):
18 |         response = self.client.get("/api/structure/pdb/2BKM")
19 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
20 |         self.assertIn("metadata", response.data)
21 |         self._check_structure_details(response.data["metadata"])
22 |         self.assertIn("proteins", response.data["metadata"]["counters"])
23 |         self.assertIn("entries", response.data["metadata"]["counters"])
24 |         self.assertEqual(2, response.data["metadata"]["counters"]["proteins"])
25 |         self.assertEqual(1, response.data["metadata"]["counters"]["entries"])
26 | 
27 |     def test_can_read_structure_pdb_accession_chain(self):
28 |         response = self.client.get("/api/structure/pdb/2bkm/B")
29 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
30 |         self.assertIn("metadata", response.data)
31 |         self._check_structure_details(response.data["metadata"])
32 |         for chain in response.data["metadata"]["chains"].values():
33 |             self._check_structure_chain_details(chain)
34 |             self.assertEqual(chain["chain"].upper(), "B")
35 | 
36 |     # TODO:
37 |     def test_cant_read_structure_bad_db(self):
38 |         self._check_HTTP_response_code(
39 |             "/api/structure/bad_db", code=status.HTTP_404_NOT_FOUND
40 |         )
41 | 
42 |     def test_cant_read_structure_pdb_bad_chain(self):
43 |         self._check_HTTP_response_code("/api/structure/pdb/2bkm/C")
44 | 


--------------------------------------------------------------------------------
/example_data/protein/A1CUJ5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metadata" : {
 3 |     "accession": "A1CUJ5",
 4 |     "id": "CBPYA_ASPCL",
 5 |     "sourceOrganism": {
 6 |       "name": "Aspergillus clavatus (strain ATCC 1007 / CBS 513.65 / DSM 816 / NCTC 3887 / NRRL 1)",
 7 |       "taxid": 344612
 8 |     },
 9 |     "name": {
10 |       "name": "Carboxypeptidase Y homolog A",
11 |       "short": null,
12 |       "other": []
13 |     },
14 |     "description": "Vacuolar carboxypeptidase involved in degradation of small peptides. Digests preferentially peptides containing an aliphatic or hydrophobic residue in P1' position, as well as methionine, leucine or phenylalanine in P1 position of ester substrate (By similarity).",
15 |     "length": 543,
16 |     "sequence": "MRVLPATLLVGAATAAVPPFQQILGLPKKGADTLSKPLHDFQEQLKTLSDDARRLWDEVAKHFPDSMDHNPVFSLPKKHTRRPDSHWDHIVRGADVQSVWVTGANGEKEREVDGKLEAYDLRVKTTDPGALGIDPGVKQYTGYLDDNENDKHLFYWFFESRNDPKNDPVVLWLNGGPGCSSLTGLFLELGPSSIDSKIKPVYNDFAWNSNASVIFLDQPVNVGYSYSGSAVSDTVAAGKDVYALLTLFFKQFPEYAKQDFHIAGESYAGHYIPVFASEILSHKKRNINLKSVLIGNGLTDPLTQYDHYRPMACGDGGYPAVLDEASCQSMDNALPRCKSMIESCYNTESSWVCVPASIYCNNALIGPYQRTGQNVYDVRGKCEDESNLCYKGMGYVSEYLNKREVREAVGAEVDGYDSCNFDINRNFLFHGDWMKPYHRLVPGLLEQIPVLIYAGDADFICNWLGNKAWSEALEWPGQKEYASAELEDLVIEQNEHQGKKIGQIKSHGNFTFMRLYGGGHMVPMDQPEASLEFFNRWIGGEWF",
17 |     "proteome": "UP000006701",
18 |     "gene": "cpyA",
19 |     "GO": {
20 |       "biologicalProcess": [
21 |         {
22 |           "id": "GO:0006508",
23 |           "name": "proteolysis"
24 |         }
25 |       ],
26 |       "molecularFunction": [
27 |         {
28 |           "id": "GO:0004185",
29 |           "name": "serine-type carboxypeptidase activity"
30 |         }
31 |       ],
32 |       "cellularComponent": [
33 |         {
34 |           "id": "GO:0005773",
35 |           "name": "vacuole"
36 |         }
37 |       ]
38 |     },
39 |     "proteinEvidence": 3
40 |   },
41 |   "representation": {
42 |     "signalPeptide": [],
43 |     "transmembrane": [],
44 |     "coiledCoil": [],
45 |     "lowComplexity_disorder": [],
46 |     "activeSites": [
47 |       {
48 |         "id": "IPR018202",
49 |         "name": "Peptidase S10, serine carboxypeptidase, active site"
50 |       }
51 |     ],
52 |     "perResidueFeatures": [],
53 |     "disulphideBridges": []
54 |   },
55 |   "structure": {
56 |     "chains": []
57 |   },
58 |   "genomicContext": {
59 |     "DNA": ""
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/webfront/tests/test_ida_search.py:
--------------------------------------------------------------------------------
 1 | from webfront.tests.InterproRESTTestCase import InterproRESTTestCase
 2 | from rest_framework import status
 3 | 
 4 | 
 5 | class IDASearchModifierTest(InterproRESTTestCase):
 6 |     def _assertSearch(self, response, count):
 7 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 8 |         self.assertIn("results", response.data)
 9 |         self.assertIn("count", response.data)
10 |         self.assertEqual(response.data["count"], len(response.data["results"]))
11 |         self.assertEqual(response.data["count"], count)
12 | 
13 |     def test_search_by_a_single_accession(self):
14 |         response = self.client.get("/api/entry?ida_search=IPR003165")
15 |         self._assertSearch(response, 2)
16 | 
17 |     def test_search_by_ordered_search(self):
18 |         response = self.client.get("/api/entry?ida_search=IPR003165,IPR003165&ordered")
19 |         self._assertSearch(response, 2)
20 | 
21 |     def test_search_by_non_existing_ipr(self):
22 |         response = self.client.get("/api/entry?ida_search=IPR00XXXX")
23 |         self._assertSearch(response, 0)
24 | 
25 |     def test_search_with_ignoring_list(self):
26 |         response = self.client.get("/api/entry?ida_search=IPR001175&ida_ignore=pf17176")
27 |         self._assertSearch(response, 0)
28 | 
29 |     def test_search_exact_single(self):
30 |         response = self.client.get("/api/entry?ida_search=pf17180&ordered&exact")
31 |         self._assertSearch(response, 1)
32 | 
33 |     def test_search_exact_vs_ordered(self):
34 |         response = self.client.get("/api/entry?ida_search=PF02171,PF02171&exact")
35 |         self._assertSearch(response, 1)
36 |         response = self.client.get("/api/entry?ida_search=PF02171,PF02171&ordered")
37 |         self._assertSearch(response, 2)
38 | 
39 |     def test_search_pfam_vs_interpro_accession(self):
40 |         response = self.client.get("/api/entry?ida_search=PF02171,PF02171&exact")
41 |         self._assertSearch(response, 1)
42 |         response = self.client.get("/api/entry?ida_search=IPR003165,PF02171&exact")
43 |         self._assertSearch(response, 1)
44 |         response = self.client.get("/api/entry?ida_search=PF02171,IPR003165&exact")
45 |         self._assertSearch(response, 1)
46 |         response = self.client.get("/api/entry?ida_search=IPR003165,IPR003165&exact")
47 |         self._assertSearch(response, 1)
48 | 


--------------------------------------------------------------------------------
/.github/workflows/testing.yml:
--------------------------------------------------------------------------------
 1 | name: Unit and Funtional Testing
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |     - uses: actions/checkout@v3
12 |     - name: 🐍 - Set up Python 3.8
13 |       uses: actions/setup-python@v4
14 |       with:
15 |         python-version: '3.10'
16 |     - name: ⚙️ - Configure sysctl limits
17 |       run: |
18 |         sudo swapoff -a
19 |         sudo sysctl -w vm.swappiness=1
20 |         sudo sysctl -w fs.file-max=262144
21 |         sudo sysctl -w vm.max_map_count=262144
22 |     - name: 🔎 - Runs Elasticsearch
23 |       uses: elastic/elastic-github-actions/elasticsearch@master
24 |       with:
25 |         stack-version: 8.2.0
26 |         security-enabled: true
27 |         elasticsearch_password: "elasticsearch_password"
28 |     - name: ⚙️🔎 - Setup elastic search
29 |       run: |
30 |         curl --user elastic:elasticsearch_password 'https://localhost:9200' -k
31 |         curl --user elastic:elasticsearch_password -XPUT 'https://localhost:9200/test?pretty' -H 'Content-Type: application/json' -d @config/elastic_mapping.json -k
32 |         curl --user elastic:elasticsearch_password -XPUT 'https://localhost:9200/ida?pretty' -H 'Content-Type: application/json' -d @config/elastic_ida_mapping.json -k
33 |     - name: 🔧 - Install Dependencies
34 |       run: |
35 |         pip install -r requirements.txt
36 |         pip install -r dev_requirements.txt
37 |         pip install tblib
38 |         pip freeze
39 |     - name: 🧪 - Testing
40 |       run: |
41 |         echo -e "searcher_user: elastic \nsearcher_test_password: elasticsearch_password" > config/interpro.local.yml
42 |         cat config/interpro.local.yml
43 |         export BROWSER_TEST=chrome
44 |         coverage run --source='.' manage.py test
45 |     - name: 🧥‍ - Coveralls
46 |       continue-on-error: true
47 |       run: |
48 |         export COVERALLS_REPO_TOKEN=0NCZQkRT7k27xoKabeCH3UzAEUIDk5BAw
49 |         coveralls
50 |     - name: 📮 - Slack Notification
51 |       uses: rtCamp/action-slack-notify@v2
52 |       continue-on-error: true
53 |       if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/dev'
54 |       env:
55 |         SLACK_COLOR: "${{ job.status == 'success' && 'good' || 'danger' }}"
56 |         SLACK_USERNAME: "Github Actions API"
57 |         SLACK_ICON_EMOJI: ":octocat:"
58 |         SLACK_TITLE: "CI API results in GitHub Actions"
59 |         SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
60 |         SLACK_CHANNEL: "#interpro7"
61 |         MSG_MINIMAL: Actions URL
62 | 


--------------------------------------------------------------------------------
/example_data/protein/X2JLE1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metadata" : {
 3 |     "accession": "X2JLE1",
 4 |     "id": "X2JLE1_DROME",
 5 |     "sourceOrganism": {
 6 |       "name": "Drosophila melanogaster (Fruit fly)",
 7 |       "taxid": 7227
 8 |     },
 9 |     "name": {
10 |       "name": "Dopamine 2-like receptor, isoform G",
11 |       "short": null,
12 |       "other": []
13 |     },
14 |     "description": null,
15 |     "length": 897,
16 |     "sequence": "MLSPFDWRRGISSSGTGGTMAAQPLSSTAATTAAATGATAATAATAATTSATLSTAAASTSTTAAPSAGATWINHHLAVEADSSQPANGSDAQAGVEGPTMPAGYLPLYEDVETAAEDAGYALIDDISEWLLGSVGSEAAVGGPENSTNLAVTGANGTLAWLEALNSTQPAQSNSSAEDGERGRYSLRSFVEQQLAGGGAAGAGDGGDAGIALIDSGEEAALDNVADAETDYGMLGGFGDAELLQRTATVARETLGNRTAPSTTSYDGGGSGDVGVAGGLAGTAGGGVGGAGGSGGSTFMLLLENFNDYFPNYNGSTVSGTSTIAPGVAITGSRGSGLLLEQNLTGLYLDGYRLNCTNETLNLTDSCGELRVVDHNYWALILILFPILTLFGNILVILSVCRERSLQTVTNYFIVSLAIADLLVAVVVMPFAVYFLVNGAWALPDVVCDFYIAMDVICSTSSIFNLVAISIDRYIAVTQPIKYAKHKNSRRVCLTILLVWAISAAIGSPIVLGLNNTPNREPDVCAFYNADFILYSSLSSFYIPCIIMVFLYWNIFKALRSRARKQRAARKPHLSELTGGSVIENIAQTRRLAETALDSSRHASRILPDEAATNTASGSNEEEDENAISPDIDDCHVIVNDKSTEFMLATVVEETGNSVVAQITTQPQLVVADPNGNHDSGYAASNVDDVLAGVAPASASAATSAAPRSSGSPPDSPLPSGATLQRSSVSSQRRPTGDDSPKRGEPALSVAMKPLSFVRYGVQEAMTLARNDSTLSTTSKTSSRKDKKNSQASRFTIYKVHKASKKKREKSSAKKERKATKTLAIVLGVFLFCWLPFFSCNIMDAMCAKFKKDCRPGLTAYMMTTWLGYINSFVNPVIYTIFNPEFRKAFKKIMHMG",
17 |     "proteome": "UP000000803",
18 |     "gene": "Dop2R",
19 |     "GO": {
20 |       "biologicalProcess": [
21 |         {
22 |           "id": "GO:0007186",
23 |           "name": "G-protein coupled receptor signaling pathway"
24 |         }
25 |       ],
26 |       "molecularFunction": [
27 |         {
28 |           "id": "GO:0004930",
29 |           "name": "G-protein coupled receptor activity"
30 |         }
31 |       ],
32 |       "cellularComponent": [
33 |         {
34 |           "id": "GO:0016021",
35 |           "name": "integral component of membrane"
36 |         }
37 |       ]
38 |     },
39 |     "proteinEvidence": 1
40 |   },
41 |   "representation": {
42 |     "entries": [
43 |       {
44 |         "id": "IPR000276",
45 |         "name": "G protein-coupled receptor, rhodopsin-like",
46 |         "type": "family"
47 |       },
48 |       {
49 |         "id": "IPR017452",
50 |         "name": "GPCR, rhodopsin-like, 7TM",
51 |         "type": "domain"
52 |       }
53 |     ],
54 |     "signalPeptide": [],
55 |     "transmembrane": [],
56 |     "coiledCoil": [],
57 |     "lowComplexity_disorder": [],
58 |     "activeSites": [],
59 |     "perResidueFeatures": [],
60 |     "disulphideBridges": []
61 |   },
62 |   "structure": {
63 |     "chains": []
64 |   },
65 |   "genomicContext": {
66 |     "DNA": ""
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/example_data/protein/V5XAD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metadata" : {
 3 |     "accession": "V5XAD2",
 4 |     "id": "V5XAD2_MYCNE",
 5 |     "sourceOrganism": {
 6 |       "name": "Mycobacterium neoaurum VKM Ac-1815D",
 7 |       "taxid": 700508
 8 |     },
 9 |     "name": {
10 |       "name": "NADH-quinone oxidoreductase subunit I",
11 |       "short": null,
12 |       "other": ["NADH dehydrogenase I subunit I", "NDH-1 subunit I"]
13 |     },
14 |     "description": "NDH-1 shuttles electrons from NADH, via FMN and iron-sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be menaquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient.",
15 |     "length": 179,
16 |     "sequence": "MSKVGDALAGFGVTFKAMLHKPITEQYPEKPGPVAPRYHGRHQLNRYADGLEKCIGCELCAWACPADAIFVEGADNTAEQRFSPGERYGRVYQINYLRCIGCGLCIEACPTRALTMTNDYEMADDNRADLIYGKDKLLAPLTADMTAPPHAMAEGSTDEDYYRGNIRADGLARPSEATR",
17 |     "proteome": "UP000018763",
18 |     "gene": "nuoI",
19 |     "GO": {
20 |       "biologicalProcess": [
21 |         {
22 |           "id": "GO:0055114",
23 |           "name": "oxidation-reduction process"
24 |         }
25 |       ],
26 |       "molecularFunction": [
27 |         {
28 |           "id": "GO:0016651",
29 |           "name": "oxidoreductase activity, acting on NAD(P)H"
30 |         },
31 |         {
32 |           "id": "GO:0051539",
33 |           "name": "4 iron, 4 sulfur cluster binding"
34 |         }
35 |       ],
36 |       "cellularComponent": [
37 |         {
38 |           "id": "GO:0016020",
39 |           "name": "membrane"
40 |         }
41 |       ]
42 |     },
43 |     "proteinEvidence": 3
44 |   },
45 |   "representation": {
46 |     "entries": [
47 |       {
48 |         "id": "IPR010226",
49 |         "name": "NADH-quinone oxidoreductase, chain I",
50 |         "type": "family"
51 |       },
52 |       {
53 |         "id": "IPR017896",
54 |         "name": "4Fe-4S ferredoxin-type, iron-sulphur binding domain",
55 |         "type": "domain"
56 |       },
57 |       {
58 |         "id": "IPR017900",
59 |         "name": "4Fe-4S ferredoxin, iron-sulphur binding, conserved site",
60 |         "type": "conservedSite"
61 |       }
62 |     ],
63 |     "signalPeptide": [],
64 |     "transmembrane": [],
65 |     "coiledCoil": [],
66 |     "lowComplexity_disorder": [],
67 |     "activeSites": [],
68 |     "perResidueFeatures": [],
69 |     "disulphideBridges": []
70 |   },
71 |   "structure": {
72 |     "chains": []
73 |   },
74 |   "genomicContext": {
75 |     "DNA": ""
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/webfront/views/mail.py:
--------------------------------------------------------------------------------
 1 | from email.mime.text import MIMEText
 2 | from subprocess import Popen, PIPE
 3 | from django.http import JsonResponse
 4 | from interpro.settings import INTERPRO_CONFIG
 5 | from django.views.decorators.csrf import csrf_exempt
 6 | from datetime import datetime, timedelta
 7 | from django.conf import settings
 8 | 
 9 | 
10 | @csrf_exempt
11 | def send_email(request):
12 |     ip_address = get_client_ip(request)
13 |     now = datetime.now()
14 |     if not hasattr(settings, "credentials"):
15 |         return store_credentials_and_mail(request, ip_address, now)
16 |     else:
17 |         last_accessed = settings.credentials
18 |         if last_accessed["ip"] == ip_address:
19 |             then = datetime.strptime(last_accessed["time"], "%Y-%m-%d %H:%M:%S.%f")
20 |             time_diff = now - then
21 |             elapsed_min = time_diff / timedelta(minutes=1)
22 |             if elapsed_min >= 1:
23 |                 return store_credentials_and_mail(request, ip_address, now)
24 |             else:
25 |                 data = {"error": "Request Aborted"}
26 |                 return JsonResponse(data, status=429)
27 |         else:
28 |             return store_credentials_and_mail(request, ip_address, now)
29 | 
30 | 
31 | def get_client_ip(request):
32 |     x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR")
33 |     if x_forwarded_for:
34 |         ip = x_forwarded_for.split(",")[0]
35 |     else:
36 |         ip = request.META.get("REMOTE_ADDR")
37 |     return ip
38 | 
39 | 
40 | def store_credentials_and_mail(request, ip, time):
41 |     settings.credentials = {"ip": ip, "time": time.strftime("%Y-%m-%d %H:%M:%S.%f")}
42 |     return mail(request)
43 | 
44 | 
45 | def mail(request):
46 |     path = request.POST.get("path", INTERPRO_CONFIG.get("sendmail_path"))
47 |     subject = request.POST.get("subject", "")
48 |     message = request.POST.get("message", "")
49 |     from_email = request.POST.get("from_email", "")
50 |     queue = request.POST.get("queue", "interpro").lower()
51 |     to_email = {"interpro": "interhelp@ebi.ac.uk", "pfam": "pfam-help@ebi.ac.uk"}.get(
52 |         queue, ""
53 |     )
54 |     if path and subject and message and from_email and to_email:
55 |         message = MIMEText(message)
56 |         message["From"] = from_email
57 |         message["To"] = to_email
58 |         message["Subject"] = subject
59 |         p = Popen([path, "-t", "-oi"], stdin=PIPE)
60 |         p.communicate(message.as_bytes())
61 |         data = {"from": from_email, "subject": subject}
62 |         return JsonResponse(data)
63 |     else:
64 |         data = {"error": "Make sure all fields are entered and valid"}
65 |         return JsonResponse(data, status=400)
66 | 


--------------------------------------------------------------------------------
/example_data/entry/PF02171.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metadata": {
 3 |     "accession": "PF02171",
 4 |     "id": "",
 5 |     "type": "domain",
 6 |     "GO": {
 7 |       "biologicalProcess": [],
 8 |       "molecularFunction": [
 9 |         {
10 |           "id": "GO:0003676",
11 |           "name": "nucleic acid binding"
12 |         }
13 |       ],
14 |       "cellularComponent": []
15 |     },
16 |     "sourceDataBase": "Pfam",
17 |     "memberDataBases": {},
18 |     "integrated": "IPR003165",
19 |     "name": {
20 |       "name": "Piwi domain",
21 |       "short": "Piwi",
22 |       "other": []
23 |     },
24 |     "description": "This domain is found in the protein Piwi and its relatives. The function of this domain is the dsRNA guided hydrolysis of ssRNA. Determination of the crystal structure of Argonaute reveals that PIWI is an RNase H domain, and identifies Argonaute as Slicer, the enzyme that cleaves mRNA in the RNAi RISC complex [PUB00020128]. In addition, Mg+2 dependence and production of 3'-OH and 5' phosphate products are shared characteristics of RNaseH and RISC. The PIWI domain core has a tertiary structure belonging to the RNase H family of enzymes. RNase H fold proteins all have a five-stranded mixed beta-sheet surrounded by helices. By analogy to RNase H enzymes which cleave single-stranded RNA guided by the DNA strand in an RNA/DNA hybrid, the PIWI domain can be inferred to cleave single-stranded RNA, for example mRNA, guided by double stranded siRNA.",
25 |     "wikipedia": "Piwi",
26 |     "literature": {
27 |       "PUB00020128": {
28 |         "PMID": 15284453,
29 |         "type": "J",
30 |         "ISBN": null,
31 |         "volume": "305",
32 |         "issue": "5689",
33 |         "year": 2004,
34 |         "title": "Crystal structure of Argonaute and its implications for RISC slicer activity.",
35 |         "URL": null,
36 |         "rawPages": "1434-7",
37 |         "medlineJournal": "Science",
38 |         "ISOJournal": "Science",
39 |         "authors": "Song JJ, Smith SK, Hannon GJ, Joshua-Tor L.",
40 |         "DOI_URL": "http://dx.doi.org/10.1126/science.1102514",
41 |       },
42 |       "PUB00018283": {
43 |         "PMID": 11050429,
44 |         "type": "J",
45 |         "ISBN": null,
46 |         "volume": "25",
47 |         "issue": "10",
48 |         "year": 2000,
49 |         "title": "Domains in gene silencing and cell differentiation proteins: the novel PAZ domain and redefinition of the Piwi domain.",
50 |         "URL": null,
51 |         "rawPages": "481-2",
52 |         "medlineJournal": "Trends Biochem Sci",
53 |         "ISOJournal": "Trends Biochem. Sci.",
54 |         "authors": "Cerutti L, Mian N, Bateman A.",
55 |         "DOI_URL": "http://dx.doi.org/10.1016/S0968-0004(00)01641-8",
56 |       }
57 |     },
58 |     "cross_references": {
59 |       "ENZYME": [
60 |         "6.4.1.1"
61 |       ],
62 |       "PRIAM": [
63 |         "PRI003248"
64 |       ]
65 |     }
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/webfront/static/swagger/oauth2-redirect.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en-US">
 3 | <head>
 4 |     <title>Swagger UI: OAuth2 Redirect</title>
 5 | </head>
 6 | <body>
 7 | <script>
 8 |     'use strict';
 9 |     function run () {
10 |         var oauth2 = window.opener.swaggerUIRedirectOauth2;
11 |         var sentState = oauth2.state;
12 |         var redirectUrl = oauth2.redirectUrl;
13 |         var isValid, qp, arr;
14 | 
15 |         if (/code|token|error/.test(window.location.hash)) {
16 |             qp = window.location.hash.substring(1);
17 |         } else {
18 |             qp = location.search.substring(1);
19 |         }
20 | 
21 |         arr = qp.split("&");
22 |         arr.forEach(function (v,i,_arr) { _arr[i] = '"' + v.replace('=', '":"') + '"';});
23 |         qp = qp ? JSON.parse('{' + arr.join() + '}',
24 |                 function (key, value) {
25 |                     return key === "" ? value : decodeURIComponent(value);
26 |                 }
27 |         ) : {};
28 | 
29 |         isValid = qp.state === sentState;
30 | 
31 |         if ((
32 |           oauth2.auth.schema.get("flow") === "accessCode" ||
33 |           oauth2.auth.schema.get("flow") === "authorizationCode" ||
34 |           oauth2.auth.schema.get("flow") === "authorization_code"
35 |         ) && !oauth2.auth.code) {
36 |             if (!isValid) {
37 |                 oauth2.errCb({
38 |                     authId: oauth2.auth.name,
39 |                     source: "auth",
40 |                     level: "warning",
41 |                     message: "Authorization may be unsafe, passed state was changed in server Passed state wasn't returned from auth server"
42 |                 });
43 |             }
44 | 
45 |             if (qp.code) {
46 |                 delete oauth2.state;
47 |                 oauth2.auth.code = qp.code;
48 |                 oauth2.callback({auth: oauth2.auth, redirectUrl: redirectUrl});
49 |             } else {
50 |                 let oauthErrorMsg;
51 |                 if (qp.error) {
52 |                     oauthErrorMsg = "["+qp.error+"]: " +
53 |                         (qp.error_description ? qp.error_description+ ". " : "no accessCode received from the server. ") +
54 |                         (qp.error_uri ? "More info: "+qp.error_uri : "");
55 |                 }
56 | 
57 |                 oauth2.errCb({
58 |                     authId: oauth2.auth.name,
59 |                     source: "auth",
60 |                     level: "error",
61 |                     message: oauthErrorMsg || "[Authorization failed]: no accessCode received from the server"
62 |                 });
63 |             }
64 |         } else {
65 |             oauth2.callback({auth: oauth2.auth, token: qp, isValid: isValid, redirectUrl: redirectUrl});
66 |         }
67 |         window.close();
68 |     }
69 | 
70 |     window.addEventListener('DOMContentLoaded', function () {
71 |       run();
72 |     });
73 | </script>
74 | </body>
75 | </html>
76 | 


--------------------------------------------------------------------------------
/docs/examples/fetch-protein-matches.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Download matches and other features from InterPro for a given UniProt accession
 3 | 
 4 | Requires python >= 3.6
 5 | 
 6 | Example of running command:
 7 | $ python fetch-protein-matches.py UNIPROT-ACCESSION
 8 | """
 9 | 
10 | import json
11 | import sys
12 | from urllib.error import HTTPError
13 | from urllib.request import urlopen
14 | 
15 | 
16 | def main():
17 |     query = sys.argv[1]
18 | 
19 |     api_url = "https://www.ebi.ac.uk/interpro/api"
20 |     url = f"{api_url}/entry/all/protein/UniProt/{query}/"
21 |     url += "?page_size=200&extra_fields=hierarchy,short_name"
22 | 
23 |     with urlopen(url) as res:
24 |         data = json.loads(res.read().decode("utf-8"))
25 | 
26 |     protein_accession = ""
27 |     protein_length = ""
28 |     for i, m in enumerate(data["results"]):
29 |         meta = m["metadata"]
30 |         protein = m["proteins"][0]
31 |         
32 |         if meta["member_databases"]:
33 |             dbs = meta["member_databases"].values()
34 |             signatures = ",".join([sig for db in dbs for sig in db.keys()])
35 |         else:
36 |             signatures = "-"
37 | 
38 |         if meta["go_terms"]:
39 |             go_terms = ",".join([t["identifier"] for t in meta["go_terms"]])
40 |         else:
41 |             go_terms = "-"
42 | 
43 |         locations = []
44 |         for l in protein["entry_protein_locations"]:
45 |             for f in l["fragments"]:
46 |                 locations.append(f"{f['start']}..{f['end']}")
47 | 
48 |         if i == 0:
49 |             protein_accession = protein["accession"].upper()
50 |             protein_length = str(protein["protein_length"])
51 | 
52 |         length = protein["protein_length"]
53 |         print("\t".join([
54 |             meta["accession"],
55 |             meta["name"] or "-",
56 |             meta["source_database"],
57 |             meta["type"],
58 |             meta["integrated"] or "-",
59 |             signatures,
60 |             go_terms,
61 |             protein_accession,
62 |             protein_length,
63 |             ",".join(locations)
64 |         ]))
65 | 
66 |     url = f"{api_url}/protein/UniProt/{query}/?extra_features=true"
67 |     with urlopen(url) as res:
68 |         features = json.loads(res.read().decode("utf-8"))
69 |         for feature in features.values():
70 | 
71 |             locations = []
72 |             for l in feature["locations"]:
73 |                 for f in l["fragments"]:
74 |                     locations.append(f"{f['start']}..{f['end']}")
75 | 
76 |             print("\t".join([
77 |                 feature["accession"],
78 |                 "-",
79 |                 feature["source_database"],
80 |                 "-",
81 |                 "-",
82 |                 "-",
83 |                 "-",
84 |                 protein_accession,
85 |                 protein_length,
86 |                 ",".join(locations)
87 |             ]))
88 |     
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/webfront/searcher/search_controller.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import re
 3 | from webfront.views.queryset_manager import escape
 4 | 
 5 | 
 6 | class SearchController(metaclass=abc.ABCMeta):
 7 |     @abc.abstractmethod
 8 |     def get_group_obj_of_field_by_query(
 9 |         self, query, fields, fq=None, rows=0, inner_field_to_count=None
10 |     ):
11 |         raise NotImplementedError(
12 |             "users must define get_group_obj_of_field_by_query to use this base class"
13 |         )
14 | 
15 |     def get_number_of_field_by_endpoint(self, endpoint, field, accession, query="*:*"):
16 |         db = field
17 |         fq = None
18 |         if field.startswith("entry"):
19 |             db = "entry_db"
20 |         elif field.startswith("protein"):
21 |             db = "protein_db"
22 |         elif field == "tax_id":
23 |             db = "{}_db".format(endpoint)
24 |         acc = "{}_acc".format(endpoint)
25 |         if endpoint == "taxonomy":
26 |             acc = "tax_lineage"
27 |         elif endpoint == "structure":
28 |             db = "structure_chain_acc"
29 |         elif endpoint == "proteome":
30 |             db = "proteome_acc"
31 |             acc = "proteome_acc"
32 |         ngroups = self.get_group_obj_of_field_by_query(
33 |             "{} && {}:* && {}:{}".format(
34 |                 query, db, acc, escape(str(accession).lower())
35 |             ),
36 |             field,
37 |             fq,
38 |         )["ngroups"]
39 |         if isinstance(ngroups, dict):
40 |             ngroups = ngroups["value"]
41 |         return ngroups
42 | 
43 |     @abc.abstractmethod
44 |     def get_chain(self):
45 |         raise NotImplementedError("users must define get_chain to use this base class")
46 | 
47 |     @abc.abstractmethod
48 |     def get_counter_object(self, endpoint, query=None, extra_counters=[]):
49 |         raise NotImplementedError(
50 |             "users must define get_counter_object to use this base class"
51 |         )
52 | 
53 |     @abc.abstractmethod
54 |     def get_grouped_object(
55 |         self, endpoint, field, query=None, extra_counters=[], size=10
56 |     ):
57 |         raise NotImplementedError(
58 |             "users must define get_counter_object to use this base class"
59 |         )
60 | 
61 |     @abc.abstractmethod
62 |     def get_list_of_endpoint(self, endpoint, query=None, rows=1, start=0):
63 |         raise NotImplementedError(
64 |             "users must define get_list_of_endpoint to use this base class"
65 |         )
66 | 
67 |     @abc.abstractmethod
68 |     def execute_query(self, query, fq=None, rows=0, start=0):
69 |         raise NotImplementedError(
70 |             "users must define execute_query to use this base class"
71 |         )
72 | 
73 |     @abc.abstractmethod
74 |     def add(self, docs):
75 |         raise NotImplementedError("users must define add to use this base class")
76 | 
77 |     @abc.abstractmethod
78 |     def clear_all_docs(self):
79 |         raise NotImplementedError(
80 |             "users must define clear_all_docs to use this base class"
81 |         )
82 | 


--------------------------------------------------------------------------------
/webfront/tests/tests_protein_endpoint.py:
--------------------------------------------------------------------------------
 1 | from rest_framework import status
 2 | from webfront.tests.InterproRESTTestCase import InterproRESTTestCase
 3 | 
 4 | 
 5 | class ProteinRESTTest(InterproRESTTestCase):
 6 |     def test_can_read_protein_overview(self):
 7 |         response = self.client.get("/api/protein")
 8 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 9 |         self._check_protein_count_overview(response.data)
10 | 
11 |     def test_can_read_protein_uniprot(self):
12 |         response = self.client.get("/api/protein/uniprot")
13 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
14 |         self._check_is_list_of_objects_with_key(response.data["results"], "metadata")
15 |         self.assertEqual(len(response.data["results"]), 5)
16 | 
17 |     def test_can_read_protein_uniprot_accession(self):
18 |         response = self.client.get("/api/protein/uniprot/P16582")
19 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
20 |         self.assertIn("metadata", response.data)
21 |         self._check_protein_details(response.data["metadata"])
22 |         self.assertIn("structures", response.data["metadata"]["counters"])
23 |         self.assertIn("entries", response.data["metadata"]["counters"])
24 |         self.assertEqual(1, response.data["metadata"]["counters"]["structures"])
25 |         self.assertEqual(2, response.data["metadata"]["counters"]["entries"])
26 | 
27 |     def test_can_read_protein_id(self):
28 |         url_id = "/api/protein/uniprot/CBPYA_ASPCL"
29 |         response = self.client.get(url_id)
30 |         self.assertEqual(response.status_code, status.HTTP_302_FOUND)
31 |         self.assertIn("a1cuj5", response.url.lower())
32 | 
33 |     def test_can_read_protein_reviewed(self):
34 |         response = self.client.get("/api/protein/reviewed")
35 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
36 |         self._check_is_list_of_objects_with_key(response.data["results"], "metadata")
37 |         self.assertEqual(len(response.data["results"]), 2)
38 | 
39 |     def test_can_read_protein_reviewed_accession(self):
40 |         response = self.client.get("/api/protein/reviewed/A1CUJ5")
41 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
42 |         self.assertIn("metadata", response.data)
43 |         self._check_protein_details(response.data["metadata"])
44 |         self.assertIn("structures", response.data["metadata"]["counters"])
45 |         self.assertIn("entries", response.data["metadata"]["counters"])
46 |         self.assertEqual(1, response.data["metadata"]["counters"]["structures"])
47 |         self.assertEqual(5, response.data["metadata"]["counters"]["entries"])
48 | 
49 |     def test_cant_read_protein_bad_db(self):
50 |         self._check_HTTP_response_code(
51 |             "/api/protein/bad_db", code=status.HTTP_404_NOT_FOUND
52 |         )
53 | 
54 |     def test_cant_read_protein_uniprot_bad_id(self):
55 |         self._check_HTTP_response_code(
56 |             "/api/protein/uniprot/badformmedID", code=status.HTTP_404_NOT_FOUND
57 |         )
58 |         self._check_HTTP_response_code(
59 |             "/api/protein/uniprot/A1CUJ6",
60 |             code=status.HTTP_204_NO_CONTENT,
61 |             msg="It should fail as 204 because the ID is well formed but it is not in the BD",
62 |         )
63 | 


--------------------------------------------------------------------------------
/webfront/tests/README.md:
--------------------------------------------------------------------------------
 1 | Testing
 2 | ===
 3 | 
 4 | Generating fixtures for gzip BinaryFields
 5 | ---
 6 | This in an example of how to generate the fixtures file for `StructuralModel`. The same approach was used for the sequence in the `Protein` table.
 7 | 
 8 | 1. Go the djano shell. Make sure you are using the test DB by setting `use_test_db: true` in `interpro.local.yml`. 
 9 |    ```shell
10 |    python3 manage.py shell
11 |    ```
12 | 
13 | 2. Manually create the fixture, using `gzip` and `bytes` for the binary fields
14 |     ```python
15 |     import gzip 
16 |     from webfront.models import StructuralModel  
17 |     
18 |     contacts = "[[1,1,1,1,1.0], [1,2,1,2,30,0.5], [1,3,1,4,0.8], [2,2,2,2,1.0], [2,3,2,4,0.9], [3,3,4,4,1.0]]"  
19 |     contacts_gz = gzip.compress(bytes(contacts,'utf-8'))
20 |    
21 |     plddt = '[0.7807835340499878, 0.8842586278915405, 0.8649855852127075]'
22 |     plddt_gz = gzip.compress(bytes(plddt,'utf-8'))
23 |    
24 |     structure = """ATOM      1  N   VAL A   1      -0.701   1.770   1.392  1.00  4.92           N   
25 |     ATOM      1  N   ARG A   1      -0.099   0.648  -0.392  1.00  0.00           N  
26 |     ATOM      2  CA  ARG A   1       1.339   0.488  -0.541  1.00  0.00           C  
27 |     ATOM      3  C   ARG A   1       2.039   1.845  -0.536  1.00  0.00           C  
28 |     ATOM      4  O   ARG A   1       1.712   2.743  -1.333  1.00  0.00           O  
29 |     ATOM      5  CB  ARG A   1       1.666  -0.244  -1.831  1.00  0.00           C  
30 |     ATOM      6  CG  ARG A   1       3.140  -0.516  -2.039  1.00  0.00           C  
31 |     ATOM      7  CD  ARG A   1       3.410  -1.196  -3.331  1.00  0.00           C  
32 |     ATOM      8  NE  ARG A   1       4.824  -1.452  -3.502  1.00  0.00           N  
33 |     ATOM      9  CZ  ARG A   1       5.425  -1.747  -4.668  1.00  0.00           C
34 |     """  
35 |     structure_gz = gzip.compress(bytes(structure,'utf-8'))
36 | 
37 |     model = StructuralModel(model_id=1, accession='PF17176', algorithm='RoseTTAFold', contacts=contacts_gz,
38 |                             plddt=plddt_gz, structure=structure_gz)
39 |     model.save()
40 |     ```
41 | 
42 | 3. Generate the fixture using the `dumpdata` tool in django:
43 |     ```shell
44 |     python manage.py dumpdata webfront --indent 4
45 |     ```
46 | 
47 |     ```json
48 |     [
49 |         {
50 |             "model": "webfront.structuralmodel",
51 |             "pk": 1,
52 |             "fields": {
53 |                 "accession": "PF17176",
54 |                 "algorithm": "RoseTTAFold",
55 |                 "contacts": "H4sIAKMfQmEC/4uONtSBQj2DWB0FINdIB4SNDXQM9EwhIsZAERMg1wLENdKBQKh6oEogByRrCeIaA7kmQAiSjQUAU15YL10AAAA=",
56 |                 "plddt": "H4sIAKMfQmEC/x3IwQ3AIAwDwFU6AEIm2NiZBXX/NSr1nncxHThbm2B3nPFgJizllNNLhP477EhRrTKs9wMKEasjPAAAAA==",
57 |                 "structure": "H4sIAKMfQmEC/43SS27EIAwG4P2cggvEssE8sqR0NJvpRKqqLnr/g9Q2VZUZEgkW4aHw8Zukfm0fzho595Duu95d7VNtC0JGHRPkjNaH1WuHMmPQ8X/T/Zf6KtbP24uI6yoDhMTF5jsR7bkXd6C81eoAaqTQQS4djEwnYHsCgy0MoAc0kKBw7GBIUyA7t7mjhJm8wZnlzEUThxNwewLl+PZ2BKaULJlnNrCEuZJlV7uNYABi7KWSwn9XMAFmWXg/ApnQktGqYJCS5xLKJ3xcR5Ch+F4qR29gxLnfRupoPyMYgX00MLMUsbDcaTlLePkFk6MiNykDAAA="
58 |             }
59 |         }
60 |     ]
61 |     ```
62 | 
63 | 4. Now you can use the generated JSON to included in one of the fixture files in `webfront/tests/`. 
64 | 
65 |    In this example the generated fixture is included at the end of `webfront/tests/fixtures_structure.json`.
66 | 


--------------------------------------------------------------------------------
/functional_tests/base.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.staticfiles.testing import StaticLiveServerTestCase
 2 | from django.test import override_settings
 3 | from selenium import webdriver
 4 | from selenium.webdriver.common.by import By
 5 | import sys
 6 | import time
 7 | import os
 8 | from selenium.common.exceptions import StaleElementReferenceException
 9 | from selenium.webdriver.chrome.options import Options
10 | 
11 | from webfront.tests.fixtures_reader import FixtureReader
12 | from interpro.settings import SEARCHER_TEST_URL, SEARCHER_TEST_PASSWORD
13 | 
14 | 
15 | @override_settings(SEARCHER_URL=SEARCHER_TEST_URL)
16 | @override_settings(SEARCHER_PASSWORD=SEARCHER_TEST_PASSWORD)
17 | @override_settings(SEARCHER_INDEX="test")
18 | class FunctionalTest(StaticLiveServerTestCase):
19 |     fixtures = [
20 |         "webfront/tests/fixtures_entry.json",
21 |         "webfront/tests/fixtures_protein.json",
22 |         "webfront/tests/fixtures_structure.json",
23 |         "webfront/tests/fixtures_organisms.json",
24 |         "webfront/tests/fixtures_set.json",
25 |         "webfront/tests/fixtures_database.json",
26 |     ]
27 |     links_fixtures = "webfront/tests/relationship_features.json"
28 | 
29 |     @classmethod
30 |     def setUpClass(cls):
31 |         for arg in sys.argv:
32 |             if "liveserver" in arg:
33 |                 cls.server_url = "http://" + arg.split("=")[1]
34 |                 return
35 |         super().setUpClass()
36 |         cls.server_url = cls.live_server_url
37 |         cls.fr = FixtureReader(cls.fixtures + [cls.links_fixtures])
38 |         docs = cls.fr.get_fixtures()
39 |         cls.fr.add_to_search_engine(docs)
40 | 
41 |     @classmethod
42 |     def tearDownClass(cls):
43 |         # cls.fr.clear_search_engine()
44 |         if cls.server_url == cls.live_server_url:
45 |             super().tearDownClass()
46 | 
47 |     def setUp(self):
48 |         try:
49 |             if os.environ["BROWSER_TEST"] == "chrome":
50 |                 chrome_options = Options()
51 |                 chrome_options.add_argument("--headless")
52 | 
53 |                 if "BROWSER_TEST_PATH" in os.environ:
54 |                     self.browser = webdriver.Chrome(
55 |                         executable_path=os.environ["BROWSER_TEST_PATH"],
56 |                         options=chrome_options,
57 |                     )
58 |                 else:
59 |                     self.browser = webdriver.Chrome(options=chrome_options)
60 |             else:
61 |                 raise KeyError
62 |         except KeyError:
63 |             self.browser = webdriver.Firefox()
64 |         self.browser.implicitly_wait(3)
65 | 
66 |     def tearDown(self):
67 |         self.browser.quit()
68 | 
69 |     def click_link_and_wait(self, link):
70 |         link.click()
71 | 
72 |         def link_has_gone_stale():
73 |             try:
74 |                 # poll the link with an arbitrary call
75 |                 link.find_elements(By.ID, "doesnt-matter")
76 |                 return False
77 |             except StaleElementReferenceException:
78 |                 return True
79 | 
80 |         self.wait_for(link_has_gone_stale)
81 | 
82 |     def wait_for(self, condition_function):
83 |         start_time = time.time()
84 |         while time.time() < start_time + 3:
85 |             if condition_function():
86 |                 return True
87 |             else:
88 |                 time.sleep(0.1)
89 |         raise Exception("Timeout waiting for {}".format(condition_function.__name__))
90 | 


--------------------------------------------------------------------------------
/webfront/views/MODIFIER_README.md:
--------------------------------------------------------------------------------
 1 | Modifier Manager
 2 | ===
 3 | 
 4 | Modifiers are the technique defined to extend the functionality of the API. Modifiers are exposed to the user as URL parameters, so they can modify the current queryset by filtering, aggregating or change the serializer of the queryset.
 5 | 
 6 | As is described [here](./README.md), the API executes a view for each level in the URL. The main purpose of the view execution is to filter the queryset. Additionally, modifiers are registered in during this process. For example, the modifier `with_names` is registered at the level of the [TaxonomyAccessionHandler](./taxonomy.py#L39). This modifier is used by users wanting to include the names of children and parents of a given taxon id in the response.
 7 | 
 8 | After all the levels of the URL have been processed by the views, the custom manager checks if there are any modifiers registered, and if the associated URL parameter is present, executes the modifier.
 9 | 
10 | This are the parameters of the method to register a modifier:
11 | * `parameter`: the associated URL parameter of the modifier.
12 | * `action`: the modifier function. It should returns a queryset or None. And its parameters are:
13 |   * `value`: the value given as a URL parameter.
14 |   * `general_handler`: the handler that is in charge of the current request.
15 | * `type` (default: `ModifierType.FILTER`), there are 3 types of modifiers:
16 |   * `ModifierType.FILTER` adds new filters into the queryset before it gets executed or serialized.
17 |   * `ModifierType.REPLACE_PAYLOAD` creates a new payload that replaces the one that would normally been used by the `custom view`
18 |   * `ModifierType.EXTEND_PAYLOAD` extends an already calculated payload. The data extending the payload is added as a new key at the level of `"metadata"` or `"results"`.
19 | * `serializer` (default: `None`): in case the modification requires to be serialized in an specific way.
20 | * `many` (default: `False`): this is to explicitly indicate when the modifier queryset has *many* results and needs to be iterated. This is useful to indicate the pagination logic needs to be included. Note that this only makes sense if `use_model_as_payload == True`. 
21 | * `works_in_single_endpoint` (default: `True`): it indicates that a given modifier works for single endpoints URLs. If is false it will raise an exception when a single endpoint URL has this modifier
22 | * `works_in_multiple_endpoint` (default: `True`): it indicates that a given modifier works for multiple endpoints URLs. If is false it will raise an exception when a multiple endpoint URL has this modifier.
23 | 
24 | Examples:
25 | * `with_names`: Doesn't actually needs to execute an action so it uses the `passing` one defined in [modifiers.py#L684](./modifiers.py#L684) and registered in [taxonomy.py#L35](./taxonomy.py#L35). The actual change of this modifier is to use a different seializer (`SerializerDetail.TAXONOMY_DETAIL_NAMES`)
26 | * `filter_by_key_species`: Defined in [modifiers.py#L249](./modifiers.py#L249) and registered in [taxonomy.py#L96](./taxonomy.py#L96). It doesn't require to replace the current queryset, but adds a new filter to it. 
27 | * `filter_by_entry`: Defined in [modifiers.py#L255](./modifiers.py#L249) and registered in [taxonomy.py#L38](./taxonomy.py#L96). It replaces the current queryset, because it uses the model TaxonomyPerEntry, which doesn't correspond to any of the main endpoints. It is registered using its own serializer (`SerializerDetail.TAXONOMY_PER_ENTRY`) 
28 | * 
29 | 


--------------------------------------------------------------------------------
/example_data/entry/IPR003165.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "metadata": {
  3 |     "accession": "IPR003165",
  4 |     "id": "",
  5 |     "type": "domain",
  6 |     "GO": {
  7 |       "biologicalProcess": [],
  8 |       "molecularFunction": [
  9 |         {
 10 |           "id": "GO:0003676",
 11 |           "name": "nucleic acid binding"
 12 |         }
 13 |       ],
 14 |       "cellularComponent": []
 15 |     },
 16 |     "sourceDataBase": "InterPro",
 17 |     "memberDataBases": {
 18 |       "SMART": ["SM00950"],
 19 |       "PROSITEProfiles": ["PS50822"],
 20 |       "Pfam": ["PF02171"],
 21 |     },
 22 |     "integrated": null,
 23 |     "name": {
 24 |       "name": "Piwi domain",
 25 |       "short": "Piwi",
 26 |       "other": []
 27 |     },
 28 |     "description": "The piwi domain [PUB00020128] is a protein domain found in piwi proteins and a large number of related nucleic acid-binding proteins, especially those that bind and cleave RNA. The function of the domain is double stranded-RNA-guided hydrolysis of single stranded-RNA, as has been determined in the argonaute family of related proteins [PUB00018283].",
 29 |     "wikipedia": "",
 30 |     "literature": {
 31 |       "PUB00020128": {
 32 |         "PMID": 15284453,
 33 |         "type": "J",
 34 |         "ISBN": null,
 35 |         "volume": "305",
 36 |         "issue": "5689",
 37 |         "year": 2004,
 38 |         "title": "Crystal structure of Argonaute and its implications for RISC slicer activity.",
 39 |         "URL": null,
 40 |         "rawPages": "1434-7",
 41 |         "medlineJournal": "Science",
 42 |         "ISOJournal": "Science",
 43 |         "authors": "Song JJ, Smith SK, Hannon GJ, Joshua-Tor L.",
 44 |         "DOI_URL": "http://dx.doi.org/10.1126/science.1102514",
 45 |       },
 46 |       "PUB00018283": {
 47 |         "PMID": 11050429,
 48 |         "type": "J",
 49 |         "ISBN": null,
 50 |         "volume": "25",
 51 |         "issue": "10",
 52 |         "year": 2000,
 53 |         "title": "Domains in gene silencing and cell differentiation proteins: the novel PAZ domain and redefinition of the Piwi domain.",
 54 |         "URL": null,
 55 |         "rawPages": "481-2",
 56 |         "medlineJournal": "Trends Biochem Sci",
 57 |         "ISOJournal": "Trends Biochem. Sci.",
 58 |         "authors": "Cerutti L, Mian N, Bateman A.",
 59 |         "DOI_URL": "http://dx.doi.org/10.1016/S0968-0004(00)01641-8",
 60 |       },
 61 |       "PUB00050034": {
 62 |         "PMID": 19187762
 63 |       },
 64 |       "PUB00033445": {
 65 |         "PMID": 14749716
 66 |       },
 67 |       "PUB00033483": {
 68 |         "PMID": 12526743
 69 |       },
 70 |       "PUB00053929": {
 71 |         "PMID": 20211138
 72 |       },
 73 |       "PUB00006604": {
 74 |         "PMID": 9851978
 75 |       },
 76 |       "PUB00018289": {
 77 |         "PMID": 9927466
 78 |       },
 79 |       "PUB00036180": {
 80 |         "PMID": 16815998
 81 |       },
 82 |       "PUB00018290": {
 83 |         "PMID": 10631171
 84 |       },
 85 |       "PUB00040580": {
 86 |         "PMID": 17027504
 87 |       },
 88 |       "PUB00042042": {
 89 |         "PMID": 17130125
 90 |       },
 91 |       "PUB00032750": {
 92 |         "PMID": 15800628
 93 |       },
 94 |       "PUB00038628": {
 95 |         "PMID": 15800629
 96 |       },
 97 |       "cross_references": {
 98 |         "ENZYME": [
 99 |           "6.4.1.1"
100 |         ],
101 |         "PRIAM": [
102 |           "PRI003248"
103 |         ]
104 |       }
105 |     }
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/webfront/serializers/README.md:
--------------------------------------------------------------------------------
 1 | Serializers
 2 | ===
 3 | 
 4 | This is the last part of the API logic; after all the endpoints have been processed by their respective views; and the modifiers have been executed, the final Queryset is then serialized. We basically use DRF (_Django REST Framework_) strategy for [serialization](https://www.django-rest-framework.org/api-guide/serializers/). Below we describe how we adapted this.
 5 | 
 6 | The call to be serialized is done in [custom.py](../views/custom.py#L134), making sure to pass extra data for any processing required at serializing. In that sense, this aren't pure serializers, as there are certain data tasks that only get executed there, for instance, when using the modifier `with_names` (See [Modifiers docs](../views/MODIFIER_README.md)) the serializer requires to get the names out of the model [taxonomy.py#L323](./taxonomy.py#L323).
 7 | 
 8 | Besides those cases, the task of the serializer is to create a data structure using simple types plus dictionaries and lists. DRF will take care of actually writing a JSON (or other configured format) out of it.
 9 | 
10 | We have defined a base serializer class [ModelContentSerializer](./content_serializers.py), that deals with the initiation and contains methods shared for the serializers inheriting from it.
11 | 
12 | When defining the view that is going to deal with an endpoint, there are 3 class attributes that are useful to define the serializing behaviour.
13 | * `serializer_class` ( Default: `None`): Inherited from DRF, it is the class that will be used to serialize the instances.
14 | * `serializer_detail` (Default: `SerializerDetail.ALL`): Indicates which serializer function use, if this view is in the main endpoint.
15 | * `serializer_detail_filter` (Default: `SerializerDetail.ALL`): Indicates which serializer function use, if this view is in the filter endpoints.
16 | 
17 | These values get overwritten by the latest View when processing the URL. So for example, if the URL is `/api/entry/interpro/protein` this is how these values get changed:
18 | 
19 | 0. `/api`: This is the root path so we start with the defaults:
20 |    * `serializer_class = None`
21 |    * `serializer_detail = SerializerDetail.ALL`
22 |    * `serializer_detail_filter = SerializerDetail.ALL`
23 | 1. `/entry`: Main endpoint redifines the values for the main endpoint
24 |    * `serializer_class = EntrySerializer`
25 |    * `serializer_detail = SerializerDetail.ENTRY_OVERVIEW`
26 | 2. `/interpro`: Still part of the main endpoint so it changes the detail values:
27 |    * `serializer_class = EntrySerializer`
28 |    * `serializer_detail = SerializerDetail.ENTRY_HEADERS`
29 | 3. `/protein` It is a different endpoint so this should be use to define the filter values
30 |    * `serializer_detail_filter = SerializerDetail.PROTEIN_OVERVIEW`
31 |   
32 | Actually [custom.py](../views/custom.py#L231) register multiple `serializer_detail_filter` in case there are several filter endpoints in the URL.
33 | 
34 | So at the end with the result serialzers of this example Django will go to the `EntrySerializer` class to process each instance of the queryset, and because `serializer_detail = SerializerDetail.ENTRY_OVERVIEW` it would only include the header information of each instance and not its whole metadata ([see interpro.py#L52](./interpro.py#L52)). And for each instance it would include the protein count representation because `serializer_detail_filter = SerializerDetail.PROTEIN_OVERVIEW` ([see interpro.py#L63](./interpro.py#L63)).
35 | 
36 | All the values for `serializer_detail` and `serializer_detail_filter` are defined in ([constants.py](../constants.py#L4)).
37 | 


--------------------------------------------------------------------------------
/webfront/migrations/0030_num_proteins.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.24 on 2024-03-13 13:38
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | import jsonfield.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('webfront', '0029_remove_is_featured_plus_llm_entry'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.CreateModel(
16 |             name='ProteomePerEntry',
17 |             fields=[
18 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
19 |                 ('counts', jsonfield.fields.JSONField(null=True)),
20 |                 ('num_proteins', models.IntegerField(default=0)),
21 |             ],
22 |         ),
23 |         migrations.CreateModel(
24 |             name='ProteomePerEntryDB',
25 |             fields=[
26 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
27 |                 ('source_database', models.CharField(db_index=True, max_length=100)),
28 |                 ('counts', jsonfield.fields.JSONField(null=True)),
29 |                 ('num_proteins', models.IntegerField(default=0)),
30 |             ],
31 |         ),
32 |         migrations.AddField(
33 |             model_name='proteome',
34 |             name='num_proteins',
35 |             field=models.IntegerField(default=0),
36 |         ),
37 |         migrations.AddField(
38 |             model_name='taxonomy',
39 |             name='num_proteins',
40 |             field=models.IntegerField(default=0),
41 |         ),
42 |         migrations.AddField(
43 |             model_name='taxonomyperentry',
44 |             name='num_proteins',
45 |             field=models.IntegerField(default=0),
46 |         ),
47 |         migrations.AddField(
48 |             model_name='taxonomyperentrydb',
49 |             name='num_proteins',
50 |             field=models.IntegerField(default=0),
51 |         ),
52 |         migrations.AddIndex(
53 |             model_name='taxonomyperentry',
54 |             index=models.Index(fields=['entry_acc', 'taxonomy'], name='webfront_ta_entry_a_9b7542_idx'),
55 |         ),
56 |         migrations.AddIndex(
57 |             model_name='taxonomyperentrydb',
58 |             index=models.Index(fields=['source_database', 'taxonomy'], name='webfront_ta_source__af48cb_idx'),
59 |         ),
60 |         migrations.AddField(
61 |             model_name='proteomeperentrydb',
62 |             name='proteome',
63 |             field=models.ForeignKey(blank=True, db_column='accession', null=True, on_delete=django.db.models.deletion.SET_NULL, to='webfront.proteome'),
64 |         ),
65 |         migrations.AddField(
66 |             model_name='proteomeperentry',
67 |             name='entry_acc',
68 |             field=models.ForeignKey(db_column='entry_acc', null=True, on_delete=django.db.models.deletion.SET_NULL, to='webfront.entry'),
69 |         ),
70 |         migrations.AddField(
71 |             model_name='proteomeperentry',
72 |             name='proteome',
73 |             field=models.ForeignKey(blank=True, db_column='accession', null=True, on_delete=django.db.models.deletion.SET_NULL, to='webfront.proteome'),
74 |         ),
75 |         migrations.AddIndex(
76 |             model_name='proteomeperentrydb',
77 |             index=models.Index(fields=['source_database', 'proteome'], name='webfront_pr_source__efd3c1_idx'),
78 |         ),
79 |         migrations.AddIndex(
80 |             model_name='proteomeperentry',
81 |             index=models.Index(fields=['entry_acc', 'proteome'], name='webfront_pr_entry_a_5a69ef_idx'),
82 |         ),
83 |     ]
84 | 


--------------------------------------------------------------------------------
/example_data/protein/P16582.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "metadata" : {
  3 |     "accession": "P16582",
  4 |     "id": "LSHR_PIG",
  5 |     "sourceOrganism": {
  6 |       "name": "Sus scrofa (Pig)",
  7 |       "taxid": 9823
  8 |     },
  9 |     "name": {
 10 |       "full": "Lutropin-choriogonadotropic hormone receptor",
 11 |       "short": "LH/CG-R",
 12 |       "other": ["Luteinizing hormone receptor", "LSH-R"]
 13 |     },
 14 |     "description": "Receptor for lutropin-choriogonadotropic hormone. The activity of this receptor is mediated by G proteins which activate adenylate cyclase.",
 15 |     "length": 696,
 16 |     "sequence": "MRRRSLALRLLLALLLLPPPLPQTLLGAPCPEPCSCRPDGALRCPGPRAGLSRLSLTYLPIKVIPSQAFRGLNEVVKIEISQSDSLEKIEANAFDNLLNLSEILIQNTKNLVYIEPGAFTNLPRLKYLSICNTGIRKLPDVTKIFSSEFNFILEICDNLHITTVPANAFQGMNNESITLKLYGNGFEEIQSHAFNGTTLISLELKENAHLKKMHNDAFRGARGPSILDISSTKLQALPSYGLESIQTLIATSSYSLKKLPSREKFTNLLDATLTYPSHCCAFRNLPTKEQNFSFSIFKNFSKQCESTARRPNNETLYSAIFAESELSDWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYDFLRVLIWLINILAIMGNVTVLFVLLTSHYKLTVPRFLMCNLSFADFCMGLYLLLIASVDAQTKGQYYNHAIDWQTGNGCSVAGFFTVFASELSVYTLTVITLERWHTITYAIQLDQKLRLRHAIPIMLGGWLFSTLIAMLPLVGVSSYMKVSICLPMDVETTLSQVYILTILILNVVAFIIICACYIKIYFAVQNPELMATNKDTKIAKKMAVLIFTDFTCMAPISFFAISAALKVPLITVTNSKVLLVLFYPVNSCANPFLYAIFTKAFRRDFFLLLSKSGCCKHQAELYRRKDFSAYCKNGFTGSNKPSRSTLKLTTLQCQYSTVMDKTCYKDC",
 17 |     "proteome": "UP000008227",
 18 |     "gene": "LHCGR",
 19 |     "GO": {
 20 |       "biologicalProcess": [
 21 |         {
 22 |           "id": "GO:0007186",
 23 |           "termName": "G-protein coupled receptor signaling pathway"
 24 |         }
 25 |       ],
 26 |       "molecularFunction": [
 27 |         {
 28 |           "id": "GO:0004930",
 29 |           "name": "G-protein coupled receptor activity"
 30 |         },
 31 |         {
 32 |           "id": "GO:0004964",
 33 |           "name": "luteinizing hormone receptor activity"
 34 |         },
 35 |         {
 36 |           "id": "GO:0016500",
 37 |           "name": "protein-hormone receptor activity"
 38 |         }
 39 |       ],
 40 |       "cellularComponent": [
 41 |         {
 42 |           "id": "GO:0016021",
 43 |           "name": "integral component of membrane"
 44 |         }
 45 |       ]
 46 |     },
 47 |     "proteinEvidence": 4
 48 |   },
 49 |   "representation": {
 50 |     "entries": [
 51 |       {
 52 |         "id": "IPR000276",
 53 |         "name": "G protein-coupled receptor, rhodopsin-like",
 54 |         "type": "family"
 55 |       },
 56 |       {
 57 |         "id": "IPR002131",
 58 |         "name": "Glycoprotein hormone receptor family",
 59 |         "type": "family"
 60 |       },
 61 |       {
 62 |         "id": "IPR002273",
 63 |         "name": "Lutropin-choriogonadotropic hormone receptor",
 64 |         "type": "family"
 65 |       },
 66 |       {
 67 |         "id": "IPR000372",
 68 |         "name": "Leucine-rich repeat N-terminal domain",
 69 |         "type": "domain"
 70 |       },
 71 |       {
 72 |         "id": "IPR032675",
 73 |         "name": "Leucine-rich repeat domain, L domain-like",
 74 |         "type": "domain"
 75 |       },
 76 |       {
 77 |         "id": "IPR017452",
 78 |         "name": "GPCR, rhodopsin-like, 7TM",
 79 |         "type": "domain"
 80 |       },
 81 |       {
 82 |         "id": "IPR026906",
 83 |         "name": "Repeat Leucine rich repeat 5",
 84 |         "type": "repeat"
 85 |       },
 86 |     ],
 87 |     "signalPeptide": [],
 88 |     "transmembrane": [],
 89 |     "coiledCoil": [],
 90 |     "lowComplexity_disorder": [],
 91 |     "activeSites": [],
 92 |     "perResidueFeatures": [],
 93 |     "disulphideBridges": []
 94 |   },
 95 |   "structure": {
 96 |     "chains": []
 97 |   },
 98 |   "genomicContext": {
 99 |     "DNA": ""
100 |   }
101 | }
102 | 


--------------------------------------------------------------------------------
/docs/examples/fetch-alphafold-for-entry.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script allows to downloads the AlphaFold predictions 
  3 | (PDB format) of all UniProt proteins matched by a given 
  4 | InterPro entry or member database signature.
  5 | 
  6 | Requires python >= 3.6
  7 | 
  8 | Example of running command:
  9 | $ mkdir outdir
 10 | $ python af-entry-dl.py PF05306 outdir
 11 | """
 12 | 
 13 | import json
 14 | import os
 15 | import sys
 16 | from concurrent.futures import ThreadPoolExecutor, as_completed
 17 | from urllib.parse import urlencode
 18 | from urllib.request import urlopen
 19 | 
 20 | 
 21 | def get_uniprot_accessions(source_db, query):
 22 |     api_url = "https://www.ebi.ac.uk/interpro/api"
 23 |     url = f"{api_url}/protein/UniProt/entry/{source_db}/{query}/?"
 24 |     url += urlencode({"with": "alphafold", "page_size": 100})
 25 |     accessions = []
 26 | 
 27 |     while True:
 28 |         with urlopen(url) as res:
 29 |             payload = res.read().decode("utf-8")
 30 |             obj = json.loads(payload)
 31 | 
 32 |             accessions += [r["metadata"]["accession"] for r in obj["results"]]
 33 |             
 34 |             url = obj.get("next")
 35 |             if not url:
 36 |                 break
 37 | 
 38 |     return accessions
 39 | 
 40 | def get_mem_db(query):
 41 |     url = f"https://www.ebi.ac.uk/interpro/api/utils/accession/{query}"
 42 | 
 43 |     with urlopen(url) as res:
 44 |         if res.status != 200:
 45 |             sys.stderr.write(f"error: no results found for {query}\n")
 46 |             sys.exit(1)
 47 | 
 48 |         payload = res.read().decode("utf-8")
 49 |         obj = json.loads(payload)
 50 |         if obj["endpoint"] != "entry":
 51 |             sys.stderr.write(f"error: {query} is not an entry\n")
 52 | 
 53 |         return obj["source_database"]
 54 |     
 55 | def download_af_pdb(accession, outdir):
 56 |     url = f"https://alphafold.ebi.ac.uk/api/prediction/{accession}"
 57 |     with urlopen(url) as res:
 58 |         payload = res.read().decode("utf-8")
 59 |         obj = json.loads(payload)
 60 |         pdb_url = obj[0]["pdbUrl"]
 61 |         
 62 |     filename = os.path.basename(pdb_url)
 63 |     filepath = os.path.join(outdir, filename)
 64 | 
 65 |     with open(filepath, "wb") as fh, urlopen(pdb_url) as res:
 66 |         for chunk in res:
 67 |             fh.write(chunk)
 68 | 
 69 | def main():
 70 | 
 71 |     query = sys.argv[1]
 72 |     outdir = sys.argv[2]
 73 | 
 74 |     source_db = get_mem_db(query)
 75 |     proteins = get_uniprot_accessions(source_db, query)
 76 | 
 77 |     with ThreadPoolExecutor(max_workers=8) as executor:
 78 |         fs = {}
 79 |         done = 0
 80 |         milestone = step = 10
 81 |         total = len(proteins)
 82 | 
 83 |         while True:
 84 |             for accession in proteins:
 85 |                 f = executor.submit(download_af_pdb, accession, outdir)
 86 |                 fs[f] = accession
 87 | 
 88 |             failed = []
 89 |             for f in as_completed(fs):
 90 |                 accession = fs[f]
 91 | 
 92 |                 try:
 93 |                     f.result()
 94 |                 except Exception as exc:
 95 |                     failed.append(accession)
 96 |                     sys.stderr.write(f"error: {exc}\n")
 97 |                 else:
 98 |                     done += 1
 99 |                     progress = done / total * 100
100 |                     if progress >= milestone:
101 |                         sys.stderr.write(f"progress: {progress:.0f}%\n")
102 |                         milestone += step
103 | 
104 |             proteins = failed
105 |             if not proteins:
106 |                 break
107 | 
108 | 
109 | if __name__ == "__main__":
110 |     main()
111 | 


--------------------------------------------------------------------------------
/config/elastic_mapping.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "settings": {
  3 |     "analysis": {
  4 |       "analyzer": {
  5 |         "autocomplete": {
  6 |           "tokenizer": "autocomplete",
  7 |           "filter": [
  8 |             "lowercase"
  9 |           ]
 10 |         }
 11 |       },
 12 |       "tokenizer": {
 13 |         "autocomplete": {
 14 |           "type": "edge_ngram",
 15 |           "min_gram": 2,
 16 |           "max_gram": 20,
 17 |           "token_chars": [
 18 |             "letter",
 19 |             "digit"
 20 |           ]
 21 |         }
 22 |       }
 23 |     }
 24 |   },
 25 |   "mappings": {
 26 |     "properties": {
 27 |       "entry_acc": {
 28 |         "type": "keyword"
 29 |       },
 30 |       "entry_db": {
 31 |         "type": "keyword"
 32 |       },
 33 |       "entry_type": {
 34 |         "type": "keyword"
 35 |       },
 36 |       "entry_date": {
 37 |         "type": "date"
 38 |       },
 39 |       "entry_integrated": {
 40 |         "type": "keyword"
 41 |       },
 42 |       "entry_go_terms": {
 43 |         "type": "keyword"
 44 |       },
 45 |       "ida_id": {
 46 |         "type": "keyword"
 47 |       },
 48 |       "ida": {
 49 |         "type": "keyword"
 50 |       },
 51 |       "protein_acc": {
 52 |         "type": "keyword"
 53 |       },
 54 |       "protein_length": {
 55 |         "type": "long"
 56 |       },
 57 |       "protein_db": {
 58 |         "type": "keyword"
 59 |       },
 60 |       "structure_acc": {
 61 |         "type": "keyword"
 62 |       },
 63 |       "structure_resolution": {
 64 |         "type": "float"
 65 |       },
 66 |       "structure_date": {
 67 |         "type": "date"
 68 |       },
 69 |       "structure_chain_acc": {
 70 |         "type": "text",
 71 |         "analyzer": "keyword"
 72 |       },
 73 |       "structure_chain": {
 74 |         "type": "text",
 75 |         "analyzer": "keyword",
 76 |         "fielddata": true
 77 |       },
 78 |       "structure_evidence": {
 79 |         "type": "keyword"
 80 |       },
 81 |       "protein_af_score": {
 82 |         "type": "float"
 83 |       },
 84 |       "protein_bfvd_score": {
 85 |         "type": "float"
 86 |       },
 87 |       "proteome_acc": {
 88 |         "type": "keyword"
 89 |       },
 90 |       "proteome_name": {
 91 |         "type": "keyword"
 92 |       },
 93 |       "proteome_is_reference": {
 94 |         "type": "keyword"
 95 |       },
 96 |       "tax_id": {
 97 |         "type": "long"
 98 |       },
 99 |       "tax_name": {
100 |         "type": "keyword"
101 |       },
102 |       "tax_lineage": {
103 |         "type": "keyword"
104 |       },
105 |       "tax_rank": {
106 |         "type": "keyword"
107 |       },
108 |       "structure_protein_acc": {
109 |         "type": "keyword"
110 |       },
111 |       "structure_protein_db": {
112 |         "type": "keyword"
113 |       },
114 |       "structure_protein_length": {
115 |         "type": "long"
116 |       },
117 |       "structure_protein_locations": {
118 |         "type": "object",
119 |         "enabled": false
120 |       },
121 |       "entry_protein_locations": {
122 |         "type": "object",
123 |         "enabled": false
124 |       },
125 |       "entry_structure_locations": {
126 |         "type": "object",
127 |         "enabled": false
128 |       },
129 |       "text_entry": {
130 |         "type": "text",
131 |         "analyzer": "autocomplete"
132 |       },
133 |       "text_protein": {
134 |         "type": "text",
135 |         "analyzer": "autocomplete"
136 |       },
137 |       "text_structure": {
138 |         "type": "text",
139 |         "analyzer": "autocomplete"
140 |       },
141 |       "text_set": {
142 |         "type": "text",
143 |         "analyzer": "autocomplete"
144 |       },
145 |       "text_taxonomy": {
146 |         "type": "text",
147 |         "analyzer": "autocomplete"
148 |       },
149 |       "text_proteome": {
150 |         "type": "text",
151 |         "analyzer": "autocomplete"
152 |       },
153 |       "set_acc": {
154 |         "type": "keyword"
155 |       },
156 |       "set_db": {
157 |         "type": "keyword"
158 |       }
159 |     }
160 |   }
161 | }
162 | 


--------------------------------------------------------------------------------
/webfront/views/QUERYSET_README.md:
--------------------------------------------------------------------------------
 1 | # Queryset Manager
 2 | 
 3 | Django provides the tools to map the model into the selected database. 
 4 | This project has a hybrid of MySQL-Elasticsearch as its data source. 
 5 | 
 6 | Please notice we are not using elastic as a search index for the MySQL DB. 
 7 | Instead, we use an index in Elasticsearch as our denormalized table that works as a 
 8 | pre-calculated `join` of all the tables.
 9 | 
10 | In MySQL we have tables that have all the details of the entities: 
11 | `Entry`, `Protein`, `Structure`, `Taxonomy`, `Proteome` and `Set`.
12 | And when we need anything that links two or more of these entities we should query 
13 | the Elasticsearch index.
14 | 
15 | For example if we need the sequence of the proteins that have a match with an entry 
16 | with accession `IPR000001`, we start by querying Elasticsearch, filtering the index where
17 | `entry_acc:IPR000001` to get the list of protein accessions. 
18 | And then, we query MySQL to get the sequence of all the proteins of the given accessions.
19 | 
20 | The `QuerysetManager` class (see section below) is in charge of collecting the filters over the entities and 
21 | built the corresponding queries for elastic and MySQL.
22 | 
23 | When the API processes the URL ([Read more](./README.md)), it adds a filter to the 
24 | `QuerysetManager` for each level in the URL. When all the levels are processed, the `get()` method 
25 | of `CustomView` checks if all the filters belong to the same entity, in which case, a Django
26 | queryset can be built in the traditional way, by passing the filters included in the manager.
27 | Otherwise an Elasticsearch query is created with the method `get_searcher_query` of the
28 | QuerysetManager. And the Django queryset is using this result.
29 |   
30 | In any case a Django Queryset is created and can be use to serialize a response.
31 | 
32 | ## The `QuerysetManager` class
33 | 
34 | The class is in the [`queryset_manager.py` file](./queryset_manager.py). 
35 | The main endpoint of the queryset needs to be defined when the `QuerysetManager` is initialised. e.g.
36 | 
37 | ```python
38 | queryset_manager.reset_filters("entry", endpoint_levels)
39 | ```
40 | 
41 | The `filters` attribute of this class is where the filters get stored. It is a dictionary where each key is an endpoint,
42 | and their value is also a dictionary where each key:value is a filter:value.
43 | 
44 | For example to filter a queryset for the entry with accession `"IPR000001"`, you can use the following code:
45 | 
46 | ```python
47 | queryset_manager.add_filter(
48 |   "entry", accession__iexact="IPR000001"
49 | )
50 | ```
51 | 
52 | Then the value of  `queryset_manager.filters` will be something like:
53 | 
54 | ```python
55 | {
56 |   "search": {},
57 |   "searcher": {},
58 |   "entry": {
59 |     "accession__iexact": "IPR000001",
60 |   },
61 |   "structure": {},
62 |   "protein": {},
63 |   "taxonomy": {},
64 |   "proteome": {},
65 |   "set": {},
66 |   "set_alignment": {},
67 | }
68 | ```
69 | 
70 | A second structure similar to the one above is used to store the _filter by exclusion_, 
71 | for example something that is different to the type `"family"`.
72 | 
73 | ## From stored filters to data
74 | 
75 | When all the filters are applying to a single endpoint, the data will be taken from MySQL.
76 | Tables in MySQL are mapped into the Django model, and given that we are using the same format 
77 | to store filters, the Django Queryset can be created by starting with the model that corresponds 
78 | to the one defined as `main_endpoint`, and adding all the filters that have been stored for it.
79 | That is basically the logic in the method `get_queryset()`.
80 | 
81 | But when there are more multiple endpoints the query needs to be executed in our Elasticsearch index.
82 | The method `get_searcher_query()` is in charge of translating the filter into a string that follows 
83 | the Lucene Query Language, which is the format that Elasticsearch supports in it `q` parameter.
84 | 
85 | For example with the same filter than above for accession `IPR00001` the outcome of calling `queryset_manager.get_searcher_query()` should be something like `entry_acc:IPR00001`
86 | 
87 | This query is not executed yet, oly the string of the query is generated. the logic for elastic queries 
88 | is explained [here](../searcher/README.md)
89 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | [![Unit and Funtional Testing](https://github.com/ProteinsWebTeam/interpro7-api/workflows/Unit%20and%20Funtional%20Testing/badge.svg)](https://github.com/ProteinsWebTeam/interpro7-api/actions?query=workflow%3A%22Unit+and+Funtional+Testing%22)
 3 | [![Coverage Status](https://coveralls.io/repos/github/ProteinsWebTeam/interpro7-api/badge.svg?branch=master)](https://coveralls.io/github/ProteinsWebTeam/interpro7-api?branch=master)
 4 | [![GitHub license](https://img.shields.io/badge/license-apache-blue.svg)](https://github.com/ProteinsWebTeam/interpro7-api/blob/master/LICENSE)
 5 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black)
 6 | 
 7 | ![Logo InterPro7](https://raw.githubusercontent.com/ProteinsWebTeam/interpro7-api/master/webfront/static/logo_178x178.png  "Logo InterPro7")
 8 | 
 9 | ## Interpro 7 API
10 | 
11 | InterPro provides functional analysis of proteins by classifying them into families and predicting domains and important sites. 
12 | 
13 | This is the repository for the source code running the InterPro Rest API, which is currently available at [https://www.ebi.ac.uk/interpro/api/].
14 | 
15 | This API provides the data that the new InterPro website uses. You can explore the website at [www.ebi.ac.uk/interpro].
16 | 
17 | The repository for the InterPro Website can be found at [https://github.com/ProteinsWebTeam/interpro7-client].
18 | 
19 | 
20 | #### API URL Design
21 | 
22 | The InterPro API can be accessed by any of its 6 endpoints: 
23 | 
24 | * entry
25 | * protein
26 | * structure
27 | * set
28 | * taxonomy
29 | * proteome
30 | 
31 | 
32 | 
33 | if the URL only contains the name of the endpoint (e.g. `/structure`), the API returns an overview object with counters of the chosen entity grouped by its databases. 
34 | 
35 | For each endpoint the user can specify a database (e.g. `/entry/pfam`), and the API will return a list of the instance in such database.
36 | 
37 | Similarly, the user can include an accession of an entity in that endpoint (e.g. `/protein/uniprot/P99999`), which will return an object with detailed metadata of such entity. 
38 | 
39 | The user can freely combine the endpoint blocks (e.g. `/entry/interpro/ipr000001/protein/reviewed`). The only limitation is that a block describing an endpoint can only appears once in the URL. 
40 | 
41 | The google doc here contains more information about the URL design of this API: [Document](https://docs.google.com/document/d/1JkZAkGI6KjZdqwJFXYlTFPna82p68vom_CojYYaTAR0/edit?usp=sharing)
42 | 
43 | The `interpro7-api/docs/modifiers.md` document contains an exhaustive list of modifiers that can be used with example links. 
44 | 
45 | 
46 | #### Dependencies
47 | 
48 | InterPro7 API runs on [Python3](https://docs.python.org/3/) and uses [Django](https://www.djangoproject.com/) as its web framework, 
49 | together with the [Django REST framework](http://www.django-rest-framework.org/) to implement the REST API logic.
50 | 
51 | Another set of dependencies in the codebase are related to data access. Our data storage has 3 sources, a MySQL database for the metadata of all our entities, an elasticsearch instance for the links between them, and, optionally, redis to cache responses of often used requests.
52 | The python clients used to communicate with the sources are: mysqlclient, redis and django-redis. For elastic search we use regular http transactions, and therefore no client is required.
53 | 
54 | The specific versions of these dependencies can be found in the file [requirements.txt](https://github.com/ProteinsWebTeam/interpro7-api/blob/master/requirements.txt). Other minor dependencies are also included in the file.
55 | 
56 | An optional set of dependencies, not required to run the API, but useful for development purposes can be found in [dev_requirements.txt](https://github.com/ProteinsWebTeam/interpro7-api/blob/master/dev_requirements.txt).
57 | 
58 | 
59 | #### Local Installation
60 | 
61 | The procedure to install this project can be seen [HERE](deploy_tools/README.md).
62 | 
63 | #### Developers Documentation
64 | 
65 | Some details about decisions, compromises and techniques used throughout the project can be found [HERE](./webfront/README.md)
66 | 
67 | ---
68 | This project followed some of the recommendations and guidelines presented in the book:
69 | [Test-Driven Development with Python](http://www.obeythetestinggoat.com/)
70 | 


--------------------------------------------------------------------------------
/webfront/views/modifier_manager.py:
--------------------------------------------------------------------------------
  1 | from webfront.views.custom import is_single_endpoint
  2 | from webfront.constants import ModifierType
  3 | 
  4 | 
  5 | class ModifierManager:
  6 |     def __init__(self, general_handler=None):
  7 |         self.general_handler = general_handler
  8 |         self.modifiers = {}
  9 |         self.payload = None
 10 |         self.serializer = None
 11 |         self.many = None
 12 |         self.search_size = None
 13 |         self.after_key = None
 14 |         self.before_key = None
 15 | 
 16 |     def register(
 17 |         self,
 18 |         parameter,
 19 |         action,
 20 |         type=ModifierType.FILTER,
 21 |         serializer=None,
 22 |         many=None,
 23 |         works_in_single_endpoint=True,
 24 |         works_in_multiple_endpoint=True,
 25 |     ):
 26 |         self.modifiers[parameter] = {
 27 |             "action": action,
 28 |             "type": type,
 29 |             "serializer": serializer,
 30 |             "many": many,
 31 |             "works_in_single_endpoint": works_in_single_endpoint,
 32 |             "works_in_multiple_endpoint": works_in_multiple_endpoint,
 33 |         }
 34 | 
 35 |     def unregister(self, parameter):
 36 |         if parameter in self.modifiers:
 37 |             del self.modifiers[parameter]
 38 | 
 39 |     def _check_modifier(self, modifier):
 40 |         single = is_single_endpoint(self.general_handler)
 41 |         if single and not self.modifiers[modifier]["works_in_single_endpoint"]:
 42 |             raise Exception(
 43 |                 "The modifier '{}' doesn't work on URLs of a single endpoint".format(
 44 |                     modifier
 45 |                 )
 46 |             )
 47 |         if not single and not self.modifiers[modifier]["works_in_multiple_endpoint"]:
 48 |             raise Exception(
 49 |                 "The modifier '{}' doesn't work on URLs of multiple endpoints".format(
 50 |                     modifier
 51 |                 )
 52 |             )
 53 | 
 54 |     def execute(self, request):
 55 |         payload_modifiers = {}
 56 |         queryset_modifiers = {}
 57 | 
 58 |         for p, m in self.modifiers.items():
 59 |             if m["type"] == ModifierType.REPLACE_PAYLOAD:
 60 |                 payload_modifiers[p] = m
 61 |             elif m["type"] == ModifierType.FILTER:
 62 |                 queryset_modifiers[p] = m
 63 | 
 64 |         for modifier in queryset_modifiers:
 65 |             param = request.query_params.get(modifier)
 66 |             if param is not None:
 67 |                 self._check_modifier(modifier)
 68 |                 self.payload = self.modifiers[modifier]["action"](
 69 |                     param, self.general_handler
 70 |                 )
 71 |                 self.serializer = self.modifiers[modifier]["serializer"]
 72 |                 if self.modifiers[modifier]["many"] is not None:
 73 |                     self.many = (self.many is not None and self.many) or self.modifiers[
 74 |                         modifier
 75 |                     ]["many"]
 76 |         use_model_as_payload = False
 77 |         for modifier in payload_modifiers:
 78 |             param = request.query_params.get(modifier)
 79 |             if param is not None:
 80 |                 self._check_modifier(modifier)
 81 |                 self.payload = self.modifiers[modifier]["action"](
 82 |                     param, self.general_handler
 83 |                 )
 84 |                 if self.modifiers[modifier]["many"] is not None:
 85 |                     self.many = (self.many is not None and self.many) or self.modifiers[
 86 |                         modifier
 87 |                     ]["many"]
 88 |                 if self.serializer is None:
 89 |                     self.serializer = self.modifiers[modifier]["serializer"]
 90 |                 else:
 91 |                     raise (
 92 |                         Exception,
 93 |                         "only one modifier can change the shape of the payload",
 94 |                     )
 95 |                 use_model_as_payload = True
 96 |         return use_model_as_payload
 97 | 
 98 |     def execute_extenders(self, request, current_payload):
 99 |         extenders = {}
100 |         for p, m in self.modifiers.items():
101 |             if m["type"] == ModifierType.EXTEND_PAYLOAD:
102 |                 extenders[p] = m
103 |         extensions = {}
104 |         for extender in extenders:
105 |             param = request.query_params.get(extender)
106 |             if param is not None:
107 |                 extensions[extender] = self.modifiers[extender]["action"](
108 |                     param, current_payload
109 |                 )
110 |         return extensions
111 | 


--------------------------------------------------------------------------------
/webfront/views/README.md:
--------------------------------------------------------------------------------
  1 | # InterPro7 Views
  2 | 
  3 | This folder contains the Django views inplemented for the InterPro7 API. 
  4 | 
  5 | Our strategy was to have a hierarchical structure (OOP) for all our views with 
  6 | a single entry point.
  7 | 
  8 | ```
  9 |                       |            | <- | GeneralHandler |
 10 | | GenericAPIView | <- | CustomView |
 11 |                       |            | <- | *BlockHandler |
 12 | ```
 13 | 
 14 | 
 15 | The [`urls.py`](../../interpro/urls.py) file is rather simple and just points 
 16 | every path starting with `/api` to the common view: `GeneralHandler`. 
 17 | 
 18 | ```python
 19 | urlpatterns = [url(r"^api/(?P<url>.*)$", common.GeneralHandler.as_view())]
 20 | ```
 21 | 
 22 | ## URL structure
 23 | 
 24 | As seen above a valid URL on the InterPro7 API should start with `/api`.
 25 | From then onward, we will split the URL on the parts separated by `/`, and each of those
 26 | parts will be called *block* in this document. (e.g. /api/[block1]/[block2]/...)
 27 | 
 28 | A set of *blocks* can create an *endpoint-block*. For example`/api/protein/reviewed` defines a 
 29 | single *endpoint-block* formed by 2 *blocks*: `/protein` indicating that we are using the 
 30 | *protein* endpoint and `/reviewed` indicating the database to be filtered by.
 31 | 
 32 | We have define a structure that allows to combine information from multiple endpoints.
 33 | The first *endpoint-block* will be called *main-endpoint-block*. Any following 
 34 | *endpoint-block* are considered filters. 
 35 | In this way, the *main-endpoint-block* defines the set to return, and the rest of the endpoints
 36 | filter the set. For example `/api/protein/reviewed/entry/interpro` is a list of reviewed proteins
 37 | that have matches with InterPro entries; in contrast of `/api/entry/interpro/protein/reviewed`, 
 38 | which is a list of InterPro entries that can match reviewed proteins.
 39 | 
 40 | 
 41 | ## GeneralHandler
 42 | 
 43 | This configuration ensures that all the API requests are first managed in a single place, 
 44 | including all the common logic:
 45 | * Defines the available endpoints
 46 | * For the current request
 47 |     * Initializes the QuerysetManager [(Read More)](./QUERYSET_README.md).
 48 |     * Initializes the SearchController [(Read More)](../searcher/README.md).
 49 |     * Initializes the ModifierManager [(Read More)](./MODIFIER_README.md).
 50 |     * Splits a given URL into blocks.
 51 |     * If there are not blocks generates the response for the root query i.e. `/api/`.
 52 |     * Tries to get the response from the redis cache.
 53 |     * A recursion chain gets started: it invokes the `get()` method of its parent class `CustomView`
 54 |     and recursively finding a handler for each block.
 55 | 
 56 | ### Cache Strategy
 57 | 
 58 | Besides using the cache for fast responses, we use it to avoid duplication of expensive queries.
 59 | When a query is executed it has 90 seconds (by default) to get a response. 
 60 | Otherwise the response will be a time put HTTP code `408`, which will be temporarily saved in the 
 61 | cache. 
 62 | This however won't interrupt the query, which will keep its execution in parallel.
 63 | if a duplicate request arrives before the original request finishes, it will automatically get the
 64 | `408` from the cache.
 65 | When the original request completes, it saves the response in the cache, replacing the `408` one. 
 66 | This way, any future duplicate request will get the value from the cache almost instantly.  
 67 | 
 68 | 
 69 | ## CustomView
 70 | 
 71 | All *block* handlers inherit from `CustomView` and have to implement their `get()` method.
 72 | 
 73 | ### main-endpoint-block
 74 |  
 75 | Basically the task of the `get()` method in `CustomView` is to find what is the most appropriate 
 76 | handler for the current block, and once it founds it invokes the `get()` method of such handler.
 77 | The usual tasks of a handler and in particular of the `get()` method are:
 78 | 
 79 |  *  To add more filters to the current queryset. For example in a URL `/api/entry/interpro` the 
 80 |     handler of the `/interpro` *block* adds a filter like `source_database="interpro"`. 
 81 |  *  To define modifiers. Which is our strategy to extend the API, for example the modifier 
 82 |     `go_term` allows to filter a set of entries, selecting those which are annotated with a given GO ID.
 83 |  *  To define a serializer linked to this *block*. The actual serializer that a response will use, 
 84 |     it is the one linked to the last *block* of the *main-endpoint-block*.
 85 |  *  Finally, the `get()` method will return the result of invoking the `get()` method of its parent class.
 86 |     This, of course is when the recursion occurs.  
 87 | 
 88 | Once all the *blocks* of the main endpoint have been exhausted, is is time to process the filters:
 89 |  
 90 | ### filter-endpoint-blocks
 91 |  
 92 | The logic is very similar, but now the method to call in all the handlers is `filter()`.
 93 | The `filter()` method should be defined as _static_ and should return the filtered queryset.
 94 | This is then repeated for the rest of the *endpoint-blocks*
 95 |  
 96 | After processing the filters the last call of the recursion occurs, and because there are not more blocks, 
 97 | we should finish the response, which implies the execution of any available modifiers, setting up 
 98 | the pagination in case of responses with many items, and finally serializing the built queryset.
 99 |  
100 | 


--------------------------------------------------------------------------------
/functional_tests/tests.py:
--------------------------------------------------------------------------------
  1 | from functional_tests.base import FunctionalTest
  2 | from selenium.webdriver.common.by import By
  3 | import json
  4 | import re
  5 | 
  6 | 
  7 | class RESTRequestsTest(FunctionalTest):
  8 |     def test_request_entry_endpoint(self):
  9 |         self.browser.get(self.server_url + "/api/entry/?format=json")
 10 |         content = self.browser.find_element(By.TAG_NAME, "body").text
 11 |         jsonp = json.loads(content)
 12 | 
 13 |         self.assertEqual(len(jsonp["entries"]), 5, "the output has exactly 5 keys")
 14 |         self.assertIn('"member_databases"', content)
 15 |         self.assertIn('"interpro"', content)
 16 |         self.assertIn('"unintegrated"', content)
 17 | 
 18 |         num_interpro = jsonp["entries"]["interpro"]
 19 |         self.assertEqual(
 20 |             num_interpro, 2, "the fixtures dataset only includes two interpro entry"
 21 |         )
 22 | 
 23 |         self.browser.get(self.server_url + "/api/entry/interpro?format=json")
 24 |         content = self.browser.find_element(By.TAG_NAME, "body").text
 25 | 
 26 |         jsonp = json.loads(content)
 27 | 
 28 |         self.assertEqual(
 29 |             len(jsonp["results"]),
 30 |             num_interpro,
 31 |             "The response should have as many entries as reported in /entry ",
 32 |         )
 33 | 
 34 |         acc = jsonp["results"][0]["metadata"]["accession"]
 35 |         self.assertEqual(acc, "IPR001165")
 36 |         self.browser.get(
 37 |             self.server_url + "/api/entry/interpro/" + acc + "?format=json"
 38 |         )
 39 |         content = self.browser.find_element(By.TAG_NAME, "body").text
 40 | 
 41 |         jsonp = json.loads(content)
 42 |         self.assertEqual(
 43 |             acc,
 44 |             jsonp["metadata"]["accession"],
 45 |             "The accession in the response object should be the same as reequested",
 46 |         )
 47 |         self.assertIn(
 48 |             "metadata",
 49 |             jsonp.keys(),
 50 |             "'metadata' should be one of the keys in the response",
 51 |         )
 52 |         self.assertTrue(
 53 |             isinstance(jsonp["metadata"]["go_terms"], list), "go_terms should be a list"
 54 |         )
 55 | 
 56 |         self.assertEqual(jsonp["metadata"]["counters"]["proteins"], 1)
 57 | 
 58 |     def test_request_protein_endpoint(self):
 59 |         self.browser.get(self.server_url + "/api/protein/?format=json")
 60 |         content = self.browser.find_element(By.TAG_NAME, "body").text
 61 | 
 62 |         jsonp = json.loads(content)
 63 | 
 64 |         self.assertEqual(len(jsonp["proteins"]), 3, "the output has exactly 3 keys")
 65 | 
 66 |         self.assertIn('"uniprot"', content)
 67 | 
 68 |         num_uniprot = jsonp["proteins"]["uniprot"]
 69 |         self.assertEqual(
 70 |             num_uniprot, 5, "the TEST dataset only includes 5 uniprot entries"
 71 |         )
 72 | 
 73 |         self.browser.get(self.server_url + "/api/protein/uniprot?format=json")
 74 |         content = self.browser.find_element(By.TAG_NAME, "body").text
 75 | 
 76 |         jsonp = json.loads(content)
 77 | 
 78 |         self.assertEqual(
 79 |             len(jsonp["results"]),
 80 |             num_uniprot,
 81 |             "The response should have as many entries as reported in /entry ",
 82 |         )
 83 |         acc = jsonp["results"][0]["metadata"]["accession"]
 84 | 
 85 |         self.browser.get(
 86 |             self.server_url + "/api/protein/uniprot/" + acc + "?format=json"
 87 |         )
 88 |         content = self.browser.find_element(By.TAG_NAME, "body").text
 89 | 
 90 |         jsonp = json.loads(content)
 91 |         self.assertEqual(
 92 |             acc,
 93 |             jsonp["metadata"]["accession"],
 94 |             "The accession in the response object should be the same as reequested",
 95 |         )
 96 |         self.assertIn(
 97 |             "category",
 98 |             jsonp["metadata"]["go_terms"][0],
 99 |             "the key is part of the go_terms and has been parsed OK",
100 |         )
101 | 
102 |         self.browser.get(
103 |             self.server_url
104 |             + "/api/protein/uniprot/"
105 |             + jsonp["metadata"]["id"]
106 |             + "?format=json"
107 |         )
108 |         content2 = self.browser.find_element(By.TAG_NAME, "body").text
109 | 
110 |         jsonp2 = json.loads(content2)
111 |         self.assertEqual(
112 |             jsonp,
113 |             jsonp2,
114 |             "The recovered JSON object when quierying by accession should be the same than the "
115 |             "correspondent search by ID",
116 |         )
117 | 
118 |     def test_request_to_api_frontend(self):
119 |         url = "/api/entry/"
120 |         self.browser.get(self.server_url + url)
121 | 
122 |         req_info = self.browser.find_element(By.CSS_SELECTOR, ".request-info").text
123 | 
124 |         self.assertIn("GET", req_info)
125 |         self.assertIn(url, req_info)
126 | 
127 |         response = self.browser.find_element(By.CSS_SELECTOR, ".response-info").text
128 |         match = re.search("[\{\[]", response)
129 |         json_frontend = json.loads(response[match.start() :])
130 | 
131 |         self.browser.find_element(By.CSS_SELECTOR, ".format-selection button").click()
132 |         self.click_link_and_wait(
133 |             self.browser.find_element(By.CSS_SELECTOR, ".js-tooltip.format-option")
134 |         )
135 | 
136 |         content = self.browser.find_element(By.TAG_NAME, "body").text
137 | 
138 |         jsonp = json.loads(content)
139 | 
140 |         self.assertEqual(json_frontend, jsonp)
141 | 


--------------------------------------------------------------------------------
/webfront/views/proteome.py:
--------------------------------------------------------------------------------
  1 | from webfront.models import Proteome
  2 | from webfront.serializers.proteome import ProteomeSerializer
  3 | from webfront.views.custom import CustomView, SerializerDetail
  4 | from webfront.views.modifiers import (
  5 |     group_by,
  6 |     add_extra_fields,
  7 |     filter_by_boolean_field,
  8 |     show_subset,
  9 | )
 10 | from webfront.constants import ModifierType
 11 | 
 12 | 
 13 | class ProteomeAccessionHandler(CustomView):
 14 |     level_description = "Proteome accession level"
 15 |     serializer_class = ProteomeSerializer
 16 |     queryset = Proteome.objects.all()
 17 |     many = False
 18 |     serializer_detail_filter = SerializerDetail.PROTEOME_DETAIL
 19 | 
 20 |     def get(
 21 |         self,
 22 |         request,
 23 |         endpoint_levels,
 24 |         available_endpoint_handlers=None,
 25 |         level=0,
 26 |         parent_queryset=None,
 27 |         handler=None,
 28 |         general_handler=None,
 29 |         *args,
 30 |         **kwargs
 31 |     ):
 32 |         general_handler.queryset_manager.add_filter(
 33 |             "proteome", accession__iexact=endpoint_levels[level - 1]
 34 |         )
 35 |         # self.serializer_detail = SerializerDetail.PROTEIN_DETAIL
 36 |         return super(ProteomeAccessionHandler, self).get(
 37 |             request._request,
 38 |             endpoint_levels,
 39 |             available_endpoint_handlers,
 40 |             level,
 41 |             self.queryset,
 42 |             handler,
 43 |             general_handler,
 44 |             request,
 45 |             *args,
 46 |             **kwargs
 47 |         )
 48 | 
 49 |     @staticmethod
 50 |     def filter(queryset, level_name="", general_handler=None):
 51 |         general_handler.queryset_manager.add_filter(
 52 |             "proteome", accession__iexact=level_name
 53 |         )
 54 |         return queryset
 55 | 
 56 | 
 57 | class UniprotHandler(CustomView):
 58 |     level_description = "proteome db level"
 59 |     child_handlers = [(r"UP\d{9}", ProteomeAccessionHandler)]
 60 |     queryset = Proteome.objects.all()
 61 |     serializer_class = ProteomeSerializer
 62 |     serializer_detail_filter = SerializerDetail.PROTEOME_DB
 63 | 
 64 |     def get(
 65 |         self,
 66 |         request,
 67 |         endpoint_levels,
 68 |         available_endpoint_handlers=None,
 69 |         level=0,
 70 |         parent_queryset=None,
 71 |         handler=None,
 72 |         general_handler=None,
 73 |         *args,
 74 |         **kwargs
 75 |     ):
 76 |         self.serializer_detail = SerializerDetail.PROTEOME_HEADERS
 77 |         general_handler.queryset_manager.add_filter("proteome", accession__isnull=False)
 78 |         general_handler.queryset_manager.order_by("-num_proteins")
 79 |         general_handler.modifiers.register(
 80 |             "extra_fields", add_extra_fields(Proteome, "counters")
 81 |         )
 82 |         general_handler.modifiers.register("show-subset", show_subset)
 83 | 
 84 |         return super(UniprotHandler, self).get(
 85 |             request._request,
 86 |             endpoint_levels,
 87 |             available_endpoint_handlers,
 88 |             level,
 89 |             self.queryset,
 90 |             handler,
 91 |             general_handler,
 92 |             request,
 93 |             *args,
 94 |             **kwargs
 95 |         )
 96 | 
 97 |     @staticmethod
 98 |     def filter(queryset, level_name="", general_handler=None):
 99 |         general_handler.queryset_manager.add_filter("proteome", accession__isnull=False)
100 |         return queryset
101 | 
102 | 
103 | class ProteomeHandler(CustomView):
104 |     level_description = "Proteome level"
105 |     from_model = False
106 |     child_handlers = [("uniprot", UniprotHandler)]
107 |     many = False
108 |     serializer_class = ProteomeSerializer
109 |     serializer_detail = SerializerDetail.PROTEOME_OVERVIEW
110 |     serializer_detail_filter = SerializerDetail.PROTEOME_OVERVIEW
111 | 
112 |     def get_database_contributions(self, queryset):
113 |         qs = Proteome.objects.filter(accession__in=queryset)
114 |         return {"proteomes": {"uniprot": qs.count()}}
115 | 
116 |     def get(
117 |         self,
118 |         request,
119 |         endpoint_levels,
120 |         available_endpoint_handlers=None,
121 |         level=0,
122 |         parent_queryset=None,
123 |         handler=None,
124 |         general_handler=None,
125 |         *args,
126 |         **kwargs
127 |     ):
128 |         general_handler.modifiers.register(
129 |             "group_by",
130 |             group_by(Proteome, {"proteome_is_reference": "proteome_acc"}),
131 |             type=ModifierType.REPLACE_PAYLOAD,
132 |             many=False,
133 |             serializer=SerializerDetail.GROUP_BY,
134 |         )
135 |         general_handler.modifiers.register(
136 |             "is_reference", filter_by_boolean_field("proteome", "is_reference")
137 |         )
138 |         general_handler.queryset_manager.reset_filters("proteome", endpoint_levels)
139 | 
140 |         return super(ProteomeHandler, self).get(
141 |             request._request,
142 |             endpoint_levels,
143 |             available_endpoint_handlers,
144 |             level,
145 |             self.queryset,
146 |             handler,
147 |             general_handler,
148 |             request,
149 |             *args,
150 |             **kwargs
151 |         )
152 | 
153 |     @staticmethod
154 |     def filter(queryset, level_name="", general_handler=None):
155 |         general_handler.queryset_manager.add_filter("proteome", accession__isnull=False)
156 |         return queryset
157 | 


--------------------------------------------------------------------------------
/webfront/views/cache.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from urllib.parse import urlparse, urlunparse, parse_qs, urlencode
  3 | from collections import OrderedDict
  4 | 
  5 | from django.core.cache import cache
  6 | 
  7 | from rest_framework import status
  8 | from webfront.response import Response
  9 | from webfront.views.utils import endpoints
 10 | 
 11 | from django.conf import settings
 12 | 
 13 | multiple_slashes = re.compile("/+")
 14 | 
 15 | 
 16 | FIVE_DAYS = 5 * 24 * 60 * 60
 17 | SHOULD_NO_CACHE = -1
 18 | 
 19 | names = [ep["name"].lower() for ep in endpoints]
 20 | 
 21 | short_life_parameters = [
 22 |     "cursor",
 23 |     "size",
 24 |     "go_terms",
 25 |     "ida_ignore",
 26 |     "ida_search",
 27 |     "format",
 28 |     "page_size",
 29 |     "search",
 30 | ]
 31 | 
 32 | no_cache_modifiers = [
 33 |     "extra_features",
 34 |     "residues",
 35 |     "isoforms",
 36 |     "ida",
 37 |     "taxa",
 38 |     "model:",
 39 |     "annotation:info",
 40 |     "subfamilies",
 41 |     "subfamily",
 42 |     "page_size",
 43 |     "interpro_n",
 44 | ]
 45 | 
 46 | 
 47 | def get_timeout_from_path(path, endpoint_levels):
 48 |     parsed = urlparse(path)
 49 |     # process query
 50 |     query = parse_qs(parsed.query, keep_blank_values=True)
 51 | 
 52 |     if (  # is requesting by accession
 53 |         len(endpoint_levels) == 3
 54 |         and len([ep for ep in endpoint_levels if ep.lower() in names]) == 1
 55 |     ):
 56 |         # it doesn't have modifiers
 57 |         if len(query.keys()) == 0:
 58 |             return SHOULD_NO_CACHE
 59 |         for parameter in no_cache_modifiers:
 60 |             if parameter in query:
 61 |                 return SHOULD_NO_CACHE
 62 | 
 63 |     # order querystring, lowercase keys
 64 |     query = OrderedDict(
 65 |         sorted(((key.lower(), sorted(value)) for key, value in query.items()))
 66 |     )
 67 |     page = query.get("page")
 68 |     if page is not None and len(page) > 0:
 69 |         try:
 70 |             page = int(page[0])
 71 |             if page > 1:
 72 |                 return FIVE_DAYS
 73 |         except Exception:
 74 |             return SHOULD_NO_CACHE
 75 |     for parameter in short_life_parameters:
 76 |         if parameter in query:
 77 |             return FIVE_DAYS
 78 |     return None
 79 | 
 80 | 
 81 | def canonical(url, remove_all_page_size=False):
 82 |     parsed = urlparse(url)
 83 |     # process query
 84 |     query = parse_qs(parsed.query, keep_blank_values=True)
 85 |     # order querystring, lowercase keys
 86 |     query = OrderedDict(
 87 |         sorted(((key.lower(), sorted(value)) for key, value in query.items()))
 88 |     )
 89 |     # handle page_size
 90 |     if remove_all_page_size or query.get("page_size") == [
 91 |         str(settings.INTERPRO_CONFIG.get("default_page_size", 20))
 92 |     ]:
 93 |         query.pop("page_size", None)
 94 |     # handle page
 95 |     if query.get("page") == ["1"]:
 96 |         query.pop("page", None)
 97 |     # generate new canonical ParseResult
 98 |     canonical_parsed = parsed._replace(
 99 |         path=multiple_slashes.sub("/", parsed.path + "/"),
100 |         query=urlencode(query, doseq=True),
101 |     )
102 |     # stringify and return
103 |     return urlunparse(canonical_parsed)
104 | 
105 | 
106 | class InterProCache:
107 |     def set(self, key, response, timeout=None):
108 |         if settings.INTERPRO_CONFIG.get(
109 |             "enable_caching", False
110 |         ) and settings.INTERPRO_CONFIG.get("enable_cache_write", False):
111 |             if response.data and (
112 |                 response.status_code == status.HTTP_200_OK
113 |                 or response.status_code == status.HTTP_204_NO_CONTENT
114 |                 or response.status_code == status.HTTP_410_GONE
115 |                 or response.status_code == status.HTTP_408_REQUEST_TIMEOUT
116 |             ):
117 |                 key = canonical(key)
118 |                 value = {
119 |                     "data": {x: response.data[x] for x in response.data},
120 |                     "status": response.status_code,
121 |                     "template_name": response.template_name,
122 |                     "exception": response.exception,
123 |                     "content_type": response.content_type,
124 |                     "headers": {
125 |                         "Content-Type": response.get("Content-Type", ""),
126 |                         "InterPro-Version": response.get("InterPro-Version", ""),
127 |                         "InterPro-Version-Minor": response.get(
128 |                             "InterPro-Version-Minor", ""
129 |                         ),
130 |                         "Server-Timing": response.get("Server-Timing", ""),
131 |                         "Cached": "true",
132 |                     },
133 |                 }
134 |                 if timeout == None:
135 |                     cache.set(key, value)
136 |                     cache.persist(key)
137 |                 else:
138 |                     cache.add(key, value, timeout=timeout)
139 | 
140 |     def get(self, key):
141 |         if settings.INTERPRO_CONFIG.get("enable_caching", False):
142 |             key = canonical(key)
143 |             value = cache.get(key)
144 |             if value:
145 |                 value = Response(
146 |                     value.get("data"),
147 |                     value.get("status", 200),
148 |                     value.get("template_name"),
149 |                     value.get("headers", {}),
150 |                     value.get("exception", False),
151 |                     value.get("content_type"),
152 |                 )
153 |             return value
154 | 


--------------------------------------------------------------------------------
/webfront/pagination.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | from django.core.paginator import Paginator as DjangoPaginator
  3 | 
  4 | from webfront.response import Response
  5 | from rest_framework.pagination import PageNumberPagination, CursorPagination
  6 | from rest_framework.utils.urls import replace_query_param, remove_query_param
  7 | from django.conf import settings
  8 | 
  9 | from django.http import QueryDict
 10 | 
 11 | 
 12 | def replace_url_host(url):
 13 |     host = settings.INTERPRO_CONFIG.get("api_url", "http://localhost:8007/api/")
 14 |     divider = "/wwwapi/" if "/wwwapi/" in url else "/api/"
 15 |     return host + url.split(divider)[1]
 16 | 
 17 | 
 18 | class CustomPaginator(DjangoPaginator):
 19 |     def __init__(self, object_list, per_page, orphans=0, allow_empty_first_page=True):
 20 |         if not object_list.ordered:
 21 |             object_list = object_list.order_by("accession")
 22 |         super(CustomPaginator, self).__init__(
 23 |             object_list, per_page, orphans, allow_empty_first_page
 24 |         )
 25 | 
 26 | 
 27 | class CustomPagination(CursorPagination):
 28 |     page_size = settings.INTERPRO_CONFIG.get("default_page_size", 20)
 29 |     page_size_query_param = "page_size"
 30 |     ordering = "accession"
 31 |     current_size = None
 32 |     after_key = None
 33 |     before_key = None
 34 |     elastic_result = None
 35 | 
 36 |     def get_paginated_response(self, data):
 37 |         base = [
 38 |             ("count", self.current_size),
 39 |             ("next", self.get_next_link()),
 40 |             ("previous", self.get_previous_link()),
 41 |             ("results", self._sortBasedOnElastic(data["data"])),
 42 |         ]
 43 |         if "extensions" in data and len(data["extensions"]) > 0:
 44 |             for ext in data["extensions"]:
 45 |                 base.append((ext, data["extensions"][ext]))
 46 |         return Response(OrderedDict(base))
 47 | 
 48 |     # If there is data in elastic_result, implies that the wueryset was created by querying elastic first.
 49 |     # This method uses the list of accession retrieved via elastic to order the results.
 50 |     def _sortBasedOnElastic(self, data):
 51 |         if self.elastic_result is None:
 52 |             return data
 53 |         ordered_data = []
 54 |         for acc in self.elastic_result:
 55 |             obj = next(
 56 |                 filter(
 57 |                     lambda item: item.get("metadata", {}).get("accession", "").lower()
 58 |                     == acc.lower(),
 59 |                     data,
 60 |                 ),
 61 |                 None,
 62 |             )
 63 |             if obj is not None:
 64 |                 ordered_data.append(obj)
 65 |         return ordered_data
 66 | 
 67 |     def _get_position_from_instance(self, instance, ordering):
 68 |         if type(instance) == tuple:
 69 |             return instance[0]
 70 |         return super(CustomPagination, self)._get_position_from_instance(
 71 |             instance, ordering
 72 |         )
 73 | 
 74 |     # Extract some values passed as kwargs before invoking the implementation in the super class
 75 |     def paginate_queryset(self, queryset, request, **kwargs):
 76 |         self.current_size = None
 77 |         self.after_key = None
 78 |         self.elastic_result = None
 79 |         if (
 80 |             hasattr(queryset, "model")
 81 |             and queryset.model._meta.ordering != []
 82 |             and queryset.model._meta.ordering != ""
 83 |             and queryset.model._meta.ordering is not None
 84 |         ):
 85 |             self.ordering = queryset.model._meta.ordering
 86 |         if "search_size" in kwargs and kwargs["search_size"] is not None:
 87 |             if not queryset.ordered:
 88 |                 queryset = queryset.order_by("accession")
 89 |             self.current_size = kwargs["search_size"]
 90 |         if "after_key" in kwargs and kwargs["after_key"] is not None:
 91 |             self.after_key = kwargs["after_key"]
 92 |         if "before_key" in kwargs and kwargs["before_key"] is not None:
 93 |             self.before_key = kwargs["before_key"]
 94 |         if "elastic_result" in kwargs and kwargs["elastic_result"] is not None:
 95 |             self.elastic_result = kwargs["elastic_result"]
 96 |         if "ordering" in kwargs and kwargs["ordering"] is not None:
 97 |             self.ordering = kwargs["ordering"]
 98 |         return super(CustomPagination, self).paginate_queryset(
 99 |             queryset, request, kwargs["view"]
100 |         )
101 | 
102 |     def decode_cursor(self, request):
103 |         if self.after_key is not None or self.before_key is not None:
104 |             return None
105 |         return super(CustomPagination, self).decode_cursor(request)
106 | 
107 |     def has_next_page(self):
108 |         if self.after_key is None:
109 |             return self.has_next
110 |         return True
111 | 
112 |     def has_prev_page(self):
113 |         if self.before_key is None:
114 |             return self.has_previous
115 |         return True
116 | 
117 |     def get_next_link(self):
118 |         if not self.has_next_page():
119 |             return None
120 |         self.base_url = replace_url_host(self.base_url)
121 |         if self.after_key is None:
122 |             return super(CustomPagination, self).get_next_link()
123 |         return replace_query_param(self.base_url, "cursor", self.after_key)
124 | 
125 |     def get_previous_link(self):
126 |         if not self.has_prev_page():
127 |             return None
128 |         self.base_url = replace_url_host(self.base_url)
129 |         if self.before_key is None:
130 |             return super(CustomPagination, self).get_previous_link()
131 |         return replace_query_param(
132 |             self.base_url, "cursor", "-{}".format(self.before_key)
133 |         )
134 | 


--------------------------------------------------------------------------------
/webfront/views/set.py:
--------------------------------------------------------------------------------
  1 | from django.db.models import Count
  2 | 
  3 | from webfront.models import Set
  4 | from webfront.serializers.collection import SetSerializer
  5 | from webfront.views.custom import CustomView, SerializerDetail
  6 | from django.conf import settings
  7 | 
  8 | from webfront.views.modifiers import (
  9 |     add_extra_fields,
 10 |     get_deprecated_response,
 11 |     sort_by,
 12 |     show_subset,
 13 | )
 14 | 
 15 | entry_sets = "|".join(settings.ENTRY_SETS) + "|all"
 16 | entry_sets_accessions = r"^({})$".format(
 17 |     "|".join((set["accession"] for (_, set) in settings.ENTRY_SETS.items()))
 18 | )
 19 | 
 20 | 
 21 | class SetAccessionHandler(CustomView):
 22 |     level_description = "Set accession level"
 23 |     serializer_class = SetSerializer
 24 |     queryset = Set.objects.all()
 25 |     many = False
 26 |     serializer_detail_filter = SerializerDetail.SET_DETAIL
 27 | 
 28 |     def get(
 29 |         self,
 30 |         request,
 31 |         endpoint_levels,
 32 |         available_endpoint_handlers=None,
 33 |         level=0,
 34 |         parent_queryset=None,
 35 |         handler=None,
 36 |         general_handler=None,
 37 |         *args,
 38 |         **kwargs
 39 |     ):
 40 |         general_handler.queryset_manager.add_filter(
 41 |             "set", accession__iexact=endpoint_levels[level - 1].lower()
 42 |         )
 43 |         general_handler.modifiers.register(
 44 |             "alignments",
 45 |             get_deprecated_response(
 46 |                 "Profile-profile alignments have been permanently removed in InterPro 97.0."
 47 |             ),
 48 |         )
 49 | 
 50 |         return super(SetAccessionHandler, self).get(
 51 |             request._request,
 52 |             endpoint_levels,
 53 |             available_endpoint_handlers,
 54 |             level,
 55 |             self.queryset,
 56 |             handler,
 57 |             general_handler,
 58 |             request,
 59 |             *args,
 60 |             **kwargs
 61 |         )
 62 | 
 63 |     @staticmethod
 64 |     def filter(queryset, level_name="", general_handler=None):
 65 |         general_handler.queryset_manager.add_filter(
 66 |             "set", accession__iexact=level_name.lower()
 67 |         )
 68 |         return queryset
 69 | 
 70 | 
 71 | class SetTypeHandler(CustomView):
 72 |     level_description = "set type level"
 73 |     child_handlers = [
 74 |         (entry_sets_accessions, SetAccessionHandler),
 75 |         # ("proteome", ProteomeHandler),
 76 |     ]
 77 |     queryset = Set.objects.all()
 78 |     serializer_class = SetSerializer
 79 |     serializer_detail = SerializerDetail.SET_HEADERS
 80 |     serializer_detail_filter = SerializerDetail.SET_DB
 81 | 
 82 |     def get(
 83 |         self,
 84 |         request,
 85 |         endpoint_levels,
 86 |         available_endpoint_handlers=None,
 87 |         level=0,
 88 |         parent_queryset=None,
 89 |         handler=None,
 90 |         general_handler=None,
 91 |         *args,
 92 |         **kwargs
 93 |     ):
 94 |         db = endpoint_levels[level - 1]
 95 |         if db.lower() != "all":
 96 |             general_handler.queryset_manager.add_filter("set", source_database=db)
 97 |         general_handler.modifiers.register(
 98 |             "extra_fields", add_extra_fields(Set, "counters")
 99 |         )
100 |         general_handler.modifiers.register("show-subset", show_subset)
101 |         return super(SetTypeHandler, self).get(
102 |             request._request,
103 |             endpoint_levels,
104 |             available_endpoint_handlers,
105 |             level,
106 |             self.queryset,
107 |             handler,
108 |             general_handler,
109 |             request,
110 |             *args,
111 |             **kwargs
112 |         )
113 | 
114 |     @staticmethod
115 |     def filter(queryset, level_name="", general_handler=None):
116 |         if level_name.lower() != "all":
117 |             general_handler.queryset_manager.add_filter(
118 |                 "set", source_database=level_name
119 |             )
120 |         return queryset
121 | 
122 | 
123 | class SetHandler(CustomView):
124 |     level_description = "Set level"
125 |     from_model = False
126 |     child_handlers = [
127 |         (entry_sets, SetTypeHandler),
128 |         # ("proteome", ProteomeHandler),
129 |     ]
130 |     many = False
131 |     serializer_class = SetSerializer
132 |     serializer_detail = SerializerDetail.SET_OVERVIEW
133 |     serializer_detail_filter = SerializerDetail.SET_OVERVIEW
134 | 
135 |     def get_database_contributions(self, queryset):
136 |         qs = Set.objects.filter(accession__in=queryset)
137 |         set_counter = qs.values_list("source_database").annotate(
138 |             total=Count("source_database")
139 |         )
140 |         output = {c[0]: c[1] for c in set_counter if c[0] != "node"}
141 |         output["all"] = sum(output.values())
142 |         return {"sets": output}
143 | 
144 |     def get(
145 |         self,
146 |         request,
147 |         endpoint_levels,
148 |         available_endpoint_handlers=None,
149 |         level=0,
150 |         parent_queryset=None,
151 |         handler=None,
152 |         general_handler=None,
153 |         *args,
154 |         **kwargs
155 |     ):
156 |         general_handler.queryset_manager.reset_filters("set", endpoint_levels)
157 |         general_handler.queryset_manager.add_filter("set", accession__isnull=False)
158 |         general_handler.modifiers.register(
159 |             "sort_by",
160 |             sort_by({"accession": "set_acc"}),
161 |         )
162 | 
163 |         return super(SetHandler, self).get(
164 |             request._request,
165 |             endpoint_levels,
166 |             available_endpoint_handlers,
167 |             level,
168 |             self.queryset,
169 |             handler,
170 |             general_handler,
171 |             request,
172 |             *args,
173 |             **kwargs
174 |         )
175 | 
176 |     @staticmethod
177 |     def filter(queryset, level_name="", general_handler=None):
178 |         general_handler.queryset_manager.add_filter("set", accession__isnull=False)
179 |         return queryset
180 | 


--------------------------------------------------------------------------------
/deploy_tools/README.md:
--------------------------------------------------------------------------------
  1 | Provisioning a new site
  2 | =======================
  3 | 
  4 | ## Required packages:
  5 | 
  6 | * Python 3
  7 | * Git
  8 | * pip
  9 | 
 10 | eg, on Ubuntu:
 11 | 
 12 |     sudo apt-get install git python3 python3-pip
 13 | 
 14 | ## Folder structure:
 15 | Assume we have a user account at /home/username
 16 | 
 17 | ```
 18 | /home/username
 19 | └── sites
 20 |     └── PROJECT
 21 |          ├── database
 22 |          ├── source
 23 |          ├── static_files
 24 |          └── virtualenv
 25 | ```
 26 | 
 27 | ## Local Deployment
 28 | 
 29 | 1.  Create directory structure in ~/sites
 30 | 
 31 |     ```bash
 32 |     mkdir -p PROJECT/database
 33 |     mkdir -p PROJECT/source
 34 |     mkdir -p PROJECT/static_files
 35 |     mkdir -p PROJECT/virtualenv
 36 |     ```
 37 | 
 38 | 2.  Pull down source code into folder named source
 39 | 
 40 |     ```bash
 41 |     git clone https://github.com/ProteinsWebTeam/interpro7-api.git PROJECT/source
 42 |     cd PROJECT/source
 43 |     ```
 44 | 
 45 |     From now on all the command assume you are in the ```PROJECT/source``` directory.
 46 | 
 47 | 3.  Start the virtual env in the assigned folder:
 48 | 
 49 |     ```bash
 50 |     python -m venv virtualenv
 51 |     ```
 52 | 
 53 | 4.  Install requirements in the virtual environment
 54 | 
 55 |     ```bash
 56 |     ../virtualenv/bin/pip install -r requirements.txt
 57 |     ```
 58 | 
 59 | 5.  Install requirements for development
 60 | 
 61 |         ```bash
 62 |         ../virtualenv/bin/pip install -r dev_requirements.txt
 63 |         ```
 64 | 
 65 | 5.  Create a local configuration file in `config/interpro.local.yml`.
 66 |     In this file you can overwite any of the settings included in the read-only file `config/interpro.yml`.
 67 |     Below is an example of the local config that will run in debug mode using the test DB with SQLite, a local instance of elasticsearch without redis:
 68 |     ```yaml
 69 |     use_test_db: false
 70 |     debug: true
 71 |     allowed_host: ["localhost", "127.0.0.1"]
 72 |     searcher_path: "https://localhost:9200"
 73 |     searcher_index: "test"
 74 |     searcher_user: "elastic"
 75 |     searcher_password: "password"
 76 |     api_url: "http://localhost:8007/api/"
 77 |     static_url: "api/static_files/"
 78 |     searcher_test_path: "https://localhost:9200"
 79 |     searcher_test_password: "password"
 80 | 
 81 |     ```
 82 | 
 83 |     *   This configuration assumes a running instance of elasticsearch in port 9200. For details on how to install elasticsearch go
 84 |         [HERE](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html)
 85 | 
 86 | 6.  Migrate the database models (For SQLite)
 87 | 
 88 |     ```bash
 89 |      ../virtualenv/bin/python manage.py migrate
 90 |     ```
 91 | 
 92 |     If anything goes wrong check that the database directory exists
 93 | 
 94 |     ```bash
 95 |      ls ../database
 96 |     ```
 97 | 
 98 | 7.  Collect the static files. Only necessary for server deployment.
 99 | 
100 |     ```bash
101 |     ../virtualenv/bin/python manage.py collectstatic --noinput
102 |     ```
103 | 
104 | 8.  Load the fixture data into the SQLite DB.
105 |     ```bash
106 |     ../virtualenv/bin/python manage.py loaddata webfront/tests/fixtures_*.json
107 |     
108 | 9.  Install Elasticsearch and load index. Currently running version 8.12 with authentication by password
109 | 
110 |     e.g for OSX: brew install elasticsearch@8.12
111 |     ```
112 |     curl -XPUT 'localhost:9200/test?pretty' -H 'Content-Type: application/json' -d @config/elastic_mapping.json
113 |     ```
114 | 
115 | 10.  Run the tests. When running the tests, the API loads the fixtures in the existing elasticsearch instance, which is necessary to run the server with fixtures.
116 |     ```
117 |     ../virtualenv/bin/python manage.py test
118 |     ```
119 | 
120 | 11.  Start the server
121 |     ```
122 |     ../virtualenv/bin/python manage.py runserver 0.0.0.0:8000
123 |     ```
124 | 
125 | 12.  _[Optional]_ Install precommit, black and the pre-commit hook, to enable the formatting of files before each commit.
126 |     ```
127 |     ../virtualenv/bin/pip install pre-commit black
128 |     ../virtualenv/bin/pre-commit install
129 |     ```  
130 |     *Note 1*:It is important to run the test in Python 3.8 because the VMs where the API runs use that version.
131 | 
132 | 
133 | 
134 | ## Testing
135 | 
136 | The unit tests are located in `[project]/source/webfront/tests/`.
137 | 
138 | To run unit tests use 
139 | 
140 | ```sh
141 | ../virtualenv/bin/python manage.py test webfront
142 | ```
143 | 
144 | The functional test are in `[project]/functional_tests` and are configured to Google Chrome (or Chromium), so you need to have it installed in your machine.
145 | 
146 | To run functional tests use
147 | 
148 | ```sh
149 | export BROWSER_TEST="chrome"
150 | 
151 | # Only required if ChromeDriver is not in your PATH
152 | # or if its binary is not `chromedriver` (e.g. `chromium.chromedriver`)
153 | export BROWSER_TEST_PATH="/path/to/chromedriver"
154 | 
155 | ../virtualenv/bin/python manage.py test functional_tests
156 | ```
157 | 
158 | As a reference [HERE](https://docs.google.com/presentation/d/13_a6IbTq8KPGRH5AhsauEDJt4jEXNsT7DFdg1PNn4_I/edit?usp=sharing) is a graphic describing the fixtures.
159 | 
160 | All the test can be run at the same time:
161 | 
162 | ```sh
163 | ../virtualenv/bin/python manage.py test
164 | ```
165 | 
166 | ## Setting up real data (MySQL - elasticsearch)
167 | 
168 | For the next steps you need an installation of MySQL with a database compatible with the defined [model](https://github.com/ProteinsWebTeam/interpro7-api/blob/master/webfront/models/interpro_new.py).
169 | 
170 | 1.  Remove the line `use_test_db: true` from the `config/interpro.local.yml` file.
171 |     You could also set the value to false, but given that false is the default value, you can just remove it.
172 | 
173 | 2.  Edit the same `config/interpro.local.yml` file, changing the `searcher_path` setting for one with the elastic search instance that corresponds with the data in MySQL.
174 | 
175 | 3.  Copy the template mysql configuration file into `config/mysql.yml` and edit the file with your data.
176 |     ```bash
177 |     cp config/mysql.template.yml config/mysql.yml
178 |     ```
179 | 
180 | 3.  Start the server
181 |     ```
182 |     ../virtualenv/bin/python manage.py runserver 0.0.0.0:8000
183 |     ```
184 | 


--------------------------------------------------------------------------------
/webfront/tests/tests_utils.py:
--------------------------------------------------------------------------------
  1 | from django.test import TestCase
  2 | 
  3 | from webfront.views.common import map_url_to_levels
  4 | from webfront.views.cache import (
  5 |     canonical,
  6 |     get_timeout_from_path,
  7 |     SHOULD_NO_CACHE,
  8 |     FIVE_DAYS,
  9 | )
 10 | from webfront.serializers.content_serializers import reverse_url
 11 | 
 12 | 
 13 | class CanonicalTestCase(TestCase):
 14 |     def test_basic_unchanged_urls(self):
 15 |         url = "/api/entry/InterPro/IPR000001/"
 16 |         self.assertEqual(url, canonical(url))
 17 |         url = "/api/PrOtEiN/ReViEwEd/"
 18 |         self.assertEqual(url, canonical(url))
 19 | 
 20 |     def test_with_query_unchanged_urls(self):
 21 |         url = "/api/protein/reviewed/?length=1-100"
 22 |         self.assertEqual(url, canonical(url))
 23 |         url = "/api/protein/reviewed/?length=1-100&page=2"
 24 |         self.assertEqual(url, canonical(url))
 25 |         url = "/api/protein/reviewed/?length=1-100&page=2&page_size=50"
 26 |         self.assertEqual(url, canonical(url))
 27 | 
 28 |     def test_basic_incorrect_slash_urls(self):
 29 |         self.assertEqual(
 30 |             "/api/entry/InterPro/IPR000001/",
 31 |             canonical("/api/entry//InterPro/IPR000001/"),
 32 |         )
 33 |         self.assertEqual(
 34 |             "/api/entry/InterPro/IPR000001/",
 35 |             canonical("/api///entry//////InterPro//IPR000001/"),
 36 |         )
 37 |         self.assertEqual(
 38 |             "/api/entry/InterPro/IPR000001/",
 39 |             canonical("/api///entry//////InterPro//IPR000001"),
 40 |         )
 41 |         self.assertEqual(
 42 |             "/api/entry/InterPro/IPR000001/", canonical("/api/entry/InterPro/IPR000001")
 43 |         )
 44 | 
 45 |     def test_with_query_reorder_urls(self):
 46 |         self.assertEqual(
 47 |             "/api/entry/InterPro/?integrated=pfam&page=2",
 48 |             canonical("/api/entry/InterPro/?page=2&integrated=pfam"),
 49 |         )
 50 | 
 51 |     def with_query_remove_unneeded_urls(self):
 52 |         self.assertEqual(
 53 |             "/api/entry/InterPro/?integrated=pfam",
 54 |             canonical("/api/entry/InterPro/?page=1&integrated=pfam"),
 55 |         )
 56 |         self.assertEqual(
 57 |             "/api/entry/InterPro/",
 58 |             canonical("/api/entry/InterPro/?page=1&page_size=20"),
 59 |         )
 60 |         self.assertEqual(
 61 |             "/api/entry/InterPro/?integrated=pfam",
 62 |             canonical("/api/entry/InterPro/?integrated=pfam&page_size=20"),
 63 |         )
 64 | 
 65 | 
 66 | class CacheLifespanTestCase(TestCase):
 67 |     def test_urls_no_cacheable(self):
 68 |         urls = [
 69 |             "/entry/InterPro/IPR000001/",
 70 |             "/protein/uniprot/p99999/?extra_features",
 71 |             "/entry/InterPro/?page",
 72 |         ]
 73 |         for url in urls:
 74 |             levels = map_url_to_levels(url.split("?")[0])
 75 |             self.assertEqual(SHOULD_NO_CACHE, get_timeout_from_path(url, levels))
 76 | 
 77 |     def test_urls_short_life(self):
 78 |         urls = [
 79 |             "/entry/InterPro/?page=33",
 80 |             "/entry/InterPro/?page_size=33",
 81 |             "/entry/InterPro/?format",
 82 |         ]
 83 |         for url in urls:
 84 |             levels = map_url_to_levels(url.split("?")[0])
 85 |             self.assertEqual(FIVE_DAYS, get_timeout_from_path(url, levels))
 86 | 
 87 |     def test_urls_long_life(self):
 88 |         urls = [
 89 |             "/entry/",
 90 |             "/entry/InterPro/",
 91 |             "/entry/InterPro/protein",
 92 |             "/entry/InterPro/IPR000001/protein",
 93 |             "/protein/uniprot/p99999/?conservation",
 94 |         ]
 95 |         for url in urls:
 96 |             levels = map_url_to_levels(url.split("?")[0])
 97 |             self.assertIsNone(get_timeout_from_path(url, levels))
 98 | 
 99 | 
100 | class TestReverseURL(TestCase):
101 |     def test_reverse_to_entry(self):
102 |         urls = [
103 |             [
104 |                 "/protein/uniprot/p99999/entry/InterPro/",
105 |                 "/entry/InterPro/protein/uniprot/p99999/",
106 |             ],
107 |             [  # Modifiers are removed
108 |                 "/protein/uniprot/p99999/entry/InterPro/?some-modifier",
109 |                 "/entry/InterPro/protein/uniprot/p99999/",
110 |             ],
111 |             [  # 3 endpoints -endpoint
112 |                 "/protein/uniprot/p99999/entry/InterPro/structure",
113 |                 "/entry/InterPro/protein/uniprot/p99999/structure/",
114 |             ],
115 |             [  # 3 endpoints - db
116 |                 "/protein/uniprot/p99999/entry/InterPro/structure/pdb",
117 |                 "/entry/InterPro/protein/uniprot/p99999/structure/pdb",
118 |             ],
119 |             [  # 3 endpoints - accession
120 |                 "/protein/uniprot/p99999/entry/InterPro/structure/pdb/1cuk",
121 |                 "/entry/InterPro/protein/uniprot/p99999/structure/pdb/1cuk",
122 |             ],
123 |         ]
124 |         for url in urls:
125 |             self.assertEqual(reverse_url(url[0], "entry", "p99999"), url[1])
126 | 
127 |     def test_reverse_to_protein(self):
128 |         urls = [
129 |             [
130 |                 "/entry/InterPro/ipr000001/protein/uniprot/",
131 |                 "/protein/uniprot/entry/InterPro/ipr000001/",
132 |             ],
133 |             [  # Modifiers are removed
134 |                 "/entry/InterPro/ipr000001/protein/uniprot/?some-modifier",
135 |                 "/protein/uniprot/entry/InterPro/ipr000001/",
136 |             ],
137 |             [  # 3 endpoints -endpoint
138 |                 "/entry/InterPro/ipr000001/protein/uniprot/structure",
139 |                 "/protein/uniprot/entry/InterPro/ipr000001/structure/",
140 |             ],
141 |             [  # 3 endpoints - db
142 |                 "/entry/InterPro/ipr000001/protein/uniprot/structure/pdb",
143 |                 "/protein/uniprot/entry/InterPro/ipr000001/structure/pdb",
144 |             ],
145 |             [  # 3 endpoints - accession
146 |                 "/entry/InterPro/ipr000001/protein/uniprot/structure/pdb/1cuk",
147 |                 "/protein/uniprot/entry/InterPro/ipr000001/structure/pdb/1cuk",
148 |             ],
149 |         ]
150 |         for url in urls:
151 |             self.assertEqual(reverse_url(url[0], "protein", "ipr000001"), url[1])
152 | 


--------------------------------------------------------------------------------
/release/management/commands/warmer.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import requests
  3 | from tqdm import tqdm
  4 | from argparse import FileType
  5 | import sys
  6 | 
  7 | import rest_framework
  8 | 
  9 | from django.core.management import BaseCommand
 10 | from webfront.views.cache import canonical
 11 | 
 12 | requests.adapters.DEFAULT_RETRIES = 5
 13 | 
 14 | 
 15 | def get_unique_lines(logfiles):
 16 |     files = tqdm(logfiles, unit="files", leave=True, desc="reading files")
 17 |     lut = OrderedDict()
 18 |     for file in files:
 19 |         with open(file, "r") as handle:
 20 |             for line in tqdm(
 21 |                 handle, desc="reading {}".format(file), unit="lines", leave=False
 22 |             ):
 23 |                 line = line.strip()
 24 |                 if line and not line.startswith("#"):
 25 |                     line = canonical(line, remove_all_page_size=True)
 26 |                     lut[line] = lut.get(line, 0) + 1
 27 |     return sorted(lut, key=lut.get, reverse=True)
 28 | 
 29 | 
 30 | def send_query(url):
 31 |     return (
 32 |         # first request
 33 |         requests.get(url),
 34 |         # second request (to make sure it's in cache)
 35 |         requests.get(url),
 36 |     )
 37 | 
 38 | 
 39 | def stat_message(responses):
 40 |     return (
 41 |         # total request time
 42 |         responses[0].elapsed.total_seconds(),
 43 |         # return status code from the server
 44 |         responses[0].status_code,
 45 |         # URL for this request
 46 |         responses[0].url,
 47 |         # was the URL already in the cache somehow? (from previous run maybe?)
 48 |         responses[0].headers.get("Cached", "false"),
 49 |         # is the URL now in the cache?
 50 |         responses[1].headers.get("Cached", "false"),
 51 |     )
 52 | 
 53 | 
 54 | def send_queries(root, page_sizes, queries):
 55 |     wrapped = tqdm(unit="queries", desc="sending queries", total=len(queries))
 56 |     for query in queries:
 57 |         url = root + query
 58 |         wrapped.update()
 59 |         wrapped.set_description(url, True)
 60 |         try:
 61 |             responses = send_query(url)
 62 |             yield stat_message(responses)
 63 |             if responses[1].status_code is not rest_framework.status.HTTP_200_OK:
 64 |                 continue
 65 |             # check to see if the result content contains an array of results
 66 |             # if so, we need to also trigger the caching for other page sizes
 67 |             results = responses[1].json().get("results")
 68 |             if results and len(results):
 69 |                 for size in page_sizes:
 70 |                     extra_url = canonical(
 71 |                         "{}{}page_size={}".format(
 72 |                             url, "?" if url.endswith("/") else "&", size
 73 |                         )
 74 |                     )
 75 |                     wrapped.update()
 76 |                     wrapped.total += 1
 77 |                     wrapped.set_description(extra_url, True)
 78 |                     try:
 79 |                         responses = send_query(extra_url)
 80 |                         yield stat_message(responses)
 81 |                     except KeyboardInterrupt:
 82 |                         raise
 83 |                     except Exception as e:
 84 |                         print(e, file=sys.stderr)
 85 |                         raise
 86 |         except KeyboardInterrupt:
 87 |             raise
 88 |         except Exception as e:
 89 |             print(e, file=sys.stderr)
 90 |             raise
 91 |     wrapped.set_description("sent all {} queries".format(wrapped.total), True)
 92 |     return
 93 | 
 94 | 
 95 | def analyze_stats(stats, n, output):
 96 |     stats.sort(key=lambda t: t[0], reverse=True)
 97 |     print(" - Here are the {} longest queries\n".format(n), file=sys.stderr)
 98 |     for (time, status, url, was_cached, is_cached) in stats[:n]:
 99 |         if time < 1:
100 |             log_time = "{}ms".format(int(time * 1000))
101 |         else:
102 |             log_time = "{:.3f}s".format(round(time, 3))
103 |         print(
104 |             "{}: {}{}".format(log_time, url, " (was cached)" if was_cached else ""),
105 |             file=sys.stderr,
106 |         )
107 |     print("time", "status", "url", "was cached", "got cached", sep="\t", file=output)
108 |     failed = 0
109 |     for (time, status, url, was_cached, is_cached) in stats:
110 |         if status is not rest_framework.status.HTTP_200_OK:
111 |             failed += 1
112 |         print(time, status, url, was_cached, is_cached, sep="\t", file=output)
113 |     if failed:
114 |         print(
115 |             "❌ {} URL{} failed to be processed".format(
116 |                 failed, "s" if failed > 1 else ""
117 |             ),
118 |             file=sys.stderr,
119 |         )
120 |     else:
121 |         print("✅ all URLs were processed successfully", file=sys.stderr)
122 |     return failed
123 | 
124 | 
125 | def main(logfiles, root, page_sizes, top, output, *args, **kwargs):
126 |     queries = get_unique_lines(logfiles)
127 |     print("- Found {} unique URLs".format(len(queries)), file=sys.stderr)
128 |     stats = list(send_queries(root, page_sizes, queries))
129 |     return analyze_stats(stats, top, output)
130 | 
131 | 
132 | class Command(BaseCommand):
133 |     help = "warms up the API"
134 | 
135 |     def add_arguments(self, parser):
136 |         parser.add_argument(
137 |             "logfiles", type=str, nargs="+", help="log files with requests to execute"
138 |         )
139 |         parser.add_argument(
140 |             "--root",
141 |             "-r",
142 |             type=str,
143 |             help="URL root",
144 |             default="http://wp-np3-ac.ebi.ac.uk/interpro7",
145 |         )
146 |         parser.add_argument(
147 |             "--page_sizes",
148 |             "-p",
149 |             type=int,
150 |             nargs="+",
151 |             help="extra page size to query",
152 |             default=[100, 50],
153 |         )
154 |         parser.add_argument(
155 |             "--top",
156 |             "-t",
157 |             type=int,
158 |             help="display the top n longest queries",
159 |             default=10,
160 |         )
161 |         parser.add_argument(
162 |             "--output",
163 |             "-o",
164 |             type=FileType("w"),
165 |             help="output file for logs (defaults to stdout)",
166 |             default=sys.stdout,
167 |         )
168 | 
169 |     def handle(self, *args, **options):
170 |         failed = main(**options)
171 |         sys.exit(1 if failed else 0)
172 | 


--------------------------------------------------------------------------------
/webfront/views/taxonomy.py:
--------------------------------------------------------------------------------
  1 | from webfront.models import Taxonomy
  2 | from webfront.serializers.taxonomy import TaxonomySerializer
  3 | from webfront.views.custom import CustomView, SerializerDetail
  4 | from webfront.views.modifiers import (
  5 |     passing,
  6 |     add_extra_fields,
  7 |     filter_by_key_species,
  8 |     filter_by_entry,
  9 |     filter_by_entry_db,
 10 |     filter_by_domain_architectures,
 11 |     add_taxonomy_names,
 12 |     get_taxonomy_by_scientific_name,
 13 |     show_subset,
 14 | )
 15 | from webfront.constants import ModifierType
 16 | 
 17 | 
 18 | class TaxonomyAccessionHandler(CustomView):
 19 |     level_description = "Taxonomy accession level"
 20 |     serializer_class = TaxonomySerializer
 21 |     queryset = Taxonomy.objects.all()
 22 |     many = False
 23 |     serializer_detail_filter = SerializerDetail.TAXONOMY_DETAIL
 24 | 
 25 |     def get(
 26 |         self,
 27 |         request,
 28 |         endpoint_levels,
 29 |         available_endpoint_handlers=None,
 30 |         level=0,
 31 |         parent_queryset=None,
 32 |         handler=None,
 33 |         general_handler=None,
 34 |         *args,
 35 |         **kwargs
 36 |     ):
 37 |         general_handler.queryset_manager.add_filter(
 38 |             "taxonomy", accession=endpoint_levels[level - 1]
 39 |         )
 40 |         general_handler.modifiers.register(
 41 |             "with_names", passing, serializer=SerializerDetail.TAXONOMY_DETAIL_NAMES
 42 |         )
 43 |         general_handler.modifiers.register(
 44 |             "filter_by_entry",
 45 |             filter_by_entry,
 46 |             serializer=SerializerDetail.TAXONOMY_PER_ENTRY,
 47 |             type=ModifierType.REPLACE_PAYLOAD,
 48 |         )
 49 |         general_handler.modifiers.register(
 50 |             "filter_by_entry_db",
 51 |             filter_by_entry_db,
 52 |             serializer=SerializerDetail.TAXONOMY_PER_ENTRY_DB,
 53 |             type=ModifierType.REPLACE_PAYLOAD,
 54 |         )
 55 | 
 56 |         return super(TaxonomyAccessionHandler, self).get(
 57 |             request._request,
 58 |             endpoint_levels,
 59 |             available_endpoint_handlers,
 60 |             level,
 61 |             self.queryset,
 62 |             handler,
 63 |             general_handler,
 64 |             request,
 65 |             *args,
 66 |             **kwargs
 67 |         )
 68 | 
 69 |     @staticmethod
 70 |     def filter(queryset, level_name="", general_handler=None):
 71 |         general_handler.queryset_manager.add_filter(
 72 |             "taxonomy", accession__iexact=level_name
 73 |         )
 74 |         return queryset
 75 | 
 76 | 
 77 | class UniprotHandler(CustomView):
 78 |     level_description = "taxonomy db level"
 79 |     child_handlers = [(r"\d+", TaxonomyAccessionHandler)]
 80 |     queryset = Taxonomy.objects.all()
 81 |     serializer_class = TaxonomySerializer
 82 |     serializer_detail = SerializerDetail.TAXONOMY_HEADERS
 83 |     serializer_detail_filter = SerializerDetail.TAXONOMY_DB
 84 | 
 85 |     def get(
 86 |         self,
 87 |         request,
 88 |         endpoint_levels,
 89 |         available_endpoint_handlers=None,
 90 |         level=0,
 91 |         parent_queryset=None,
 92 |         handler=None,
 93 |         general_handler=None,
 94 |         *args,
 95 |         **kwargs
 96 |     ):
 97 |         general_handler.queryset_manager.add_filter("taxonomy", accession__isnull=False)
 98 |         general_handler.queryset_manager.order_by("-num_proteins")
 99 |         general_handler.modifiers.register(
100 |             "extra_fields", add_extra_fields(Taxonomy, "counters")
101 |         )
102 |         general_handler.modifiers.register("key_species", filter_by_key_species)
103 |         general_handler.modifiers.register(
104 |             "ida",
105 |             filter_by_domain_architectures,
106 |             type=ModifierType.REPLACE_PAYLOAD,
107 |             serializer=SerializerDetail.TAXONOMY_HEADERS,
108 |             many=True,
109 |         )
110 |         general_handler.modifiers.register(
111 |             "with_names", add_taxonomy_names, type=ModifierType.EXTEND_PAYLOAD
112 |         )
113 |         general_handler.modifiers.register("show-subset", show_subset)
114 | 
115 |         return super(UniprotHandler, self).get(
116 |             request._request,
117 |             endpoint_levels,
118 |             available_endpoint_handlers,
119 |             level,
120 |             self.queryset,
121 |             handler,
122 |             general_handler,
123 |             request,
124 |             *args,
125 |             **kwargs
126 |         )
127 | 
128 |     @staticmethod
129 |     def filter(queryset, level_name="", general_handler=None):
130 |         general_handler.queryset_manager.add_filter("taxonomy", accession__isnull=False)
131 |         return queryset
132 | 
133 | 
134 | class TaxonomyHandler(CustomView):
135 |     level_description = "Taxonomy level"
136 |     from_model = False
137 |     child_handlers = [("uniprot", UniprotHandler)]
138 |     many = False
139 |     serializer_class = TaxonomySerializer
140 |     serializer_detail = SerializerDetail.TAXONOMY_OVERVIEW
141 |     serializer_detail_filter = SerializerDetail.TAXONOMY_OVERVIEW
142 | 
143 |     def get_database_contributions(self, queryset):
144 |         qs = Taxonomy.objects.filter(accession__in=queryset)
145 |         return {"taxa": {"uniprot": qs.count()}}
146 | 
147 |     def get(
148 |         self,
149 |         request,
150 |         endpoint_levels,
151 |         available_endpoint_handlers=None,
152 |         level=0,
153 |         parent_queryset=None,
154 |         handler=None,
155 |         general_handler=None,
156 |         *args,
157 |         **kwargs
158 |     ):
159 |         general_handler.queryset_manager.reset_filters("taxonomy", endpoint_levels)
160 | 
161 |         general_handler.modifiers.register(
162 |             "scientific_name",
163 |             get_taxonomy_by_scientific_name,
164 |             serializer=SerializerDetail.ALL,
165 |             many=False,
166 |         )
167 | 
168 |         return super(TaxonomyHandler, self).get(
169 |             request._request,
170 |             endpoint_levels,
171 |             available_endpoint_handlers,
172 |             level,
173 |             self.queryset,
174 |             handler,
175 |             general_handler,
176 |             request,
177 |             *args,
178 |             **kwargs
179 |         )
180 | 
181 |     @staticmethod
182 |     def filter(queryset, level_name="", general_handler=None):
183 |         general_handler.queryset_manager.add_filter("taxonomy", accession__isnull=False)
184 |         return queryset
185 | 


--------------------------------------------------------------------------------
/docs/examples/overlapping-entries.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script generates the list of InterPro entries overlapping with a specified entry.
  3 | 
  4 | Requires python >= 3.6
  5 | 
  6 | 
  7 | The script requires to download the "match_complete.xml.gz" file for the release of interest from the InterPro ftp
  8 | $ wget https://ftp.ebi.ac.uk/pub/databases/interpro/releases/88.0/match_complete.xml.gz
  9 | 
 10 | Example of running command:
 11 | $ python overlapping-entries.py match_complete.xml.gz > overlapping.tsv
 12 | 
 13 | To extract entries overlapping with IPR035979, run:
 14 | $ awk -v FS='\t' -v OFS='\t' '($1=="IPR035979") {print $4,$5,$6}' overlapping-entries.tsv > overlapping-with-IPR035979.tsv
 15 | """
 16 | 
 17 | import gzip
 18 | import re
 19 | import sys
 20 | from xml.etree import ElementTree
 21 | 
 22 | 
 23 | def parse_xml(file):
 24 |     regx = re.compile(r"<protein")
 25 | 
 26 |     with gzip.open(file, "rt") as fh:
 27 |         buffer = ""
 28 |         i = -1
 29 | 
 30 |         for line in fh:
 31 |             buffer += line
 32 | 
 33 |             for match in regx.finditer(buffer):
 34 |                 j = match.start()
 35 | 
 36 |                 if j > i >= 0:
 37 |                     yield ElementTree.fromstring(buffer[i:j])
 38 | 
 39 |                 i = j
 40 | 
 41 |             if i >= 0:
 42 |                 buffer = buffer[i:]
 43 |                 i = 0
 44 | 
 45 |         j = re.search(r"</interpromatch>", buffer).start()
 46 |         yield ElementTree.fromstring(buffer[i:j])
 47 | 
 48 | 
 49 | def condense_locations(entries):
 50 |     for accession, locations in entries.items():
 51 |         condensed = []
 52 |         start = end = None
 53 | 
 54 |         for s, e in sorted(locations):
 55 |             if start is None:
 56 |                 start = s
 57 |                 end = e
 58 |                 continue
 59 |             elif e <= end:
 60 |                 continue
 61 |             elif s <= end:
 62 |                 overlap = min(end, e) - max(start, s) + 1
 63 |                 shortest = min(end - start, e - s) + 1
 64 | 
 65 |                 if overlap >= shortest * 0.1:
 66 |                     end = e
 67 |                     continue
 68 | 
 69 |             condensed.append((start, end))
 70 |             start, end = s, e
 71 | 
 72 |         condensed.append((start, end))
 73 |         entries[accession] = condensed
 74 | 
 75 | 
 76 | def main():
 77 |     file = sys.argv[1]
 78 | 
 79 |     entries = {}
 80 |     for i, protein in enumerate(parse_xml(file)):
 81 |         protein_acc = protein.attrib["id"]
 82 |         
 83 |         protein_entries = {}
 84 |         for match in protein.findall("match"):
 85 |             match_acc = match.attrib["id"]
 86 |             ipr = match.find("ipr")
 87 | 
 88 |             if ipr is None:
 89 |                 continue
 90 | 
 91 |             entry_acc = ipr.attrib["id"]
 92 |             if entry_acc not in entries:
 93 |                 entries[entry_acc] = {
 94 |                     "name": ipr.attrib["name"],
 95 |                     "type": ipr.attrib["type"],
 96 |                     "proteins": 0,
 97 |                     "overlaps": {}
 98 |                 }
 99 | 
100 |             try:
101 |                 domains = protein_entries[entry_acc]
102 |             except KeyError:
103 |                 domains = protein_entries[entry_acc] = []
104 | 
105 |             for lcn in match.findall("lcn"):
106 |                 domains.append((
107 |                     int(lcn.attrib["start"]), 
108 |                     int(lcn.attrib["end"])
109 |                 ))                    
110 | 
111 |         condense_locations(protein_entries)
112 | 
113 |         for entry_acc, locations in protein_entries.items():
114 |             entries[entry_acc]["proteins"] += 1
115 |             entry_overlaps = entries[entry_acc]["overlaps"]
116 | 
117 |             for other_acc, other_locations in protein_entries.items():
118 |                 if other_acc >= entry_acc:
119 |                     continue
120 | 
121 |                 try:
122 |                     counts = entry_overlaps[other_acc]
123 |                 except KeyError:
124 |                     counts = entry_overlaps[other_acc] = [0, 0]
125 | 
126 |                 flag = 0
127 |                 for start1, end1 in locations:
128 |                     length1 = end1 - start1 + 1
129 | 
130 |                     for start2, end2 in other_locations:
131 |                         length2 = end2 - start2 + 1
132 |                         overlap = min(end1, end2) - max(start1, start2) + 1
133 | 
134 |                         if not flag & 1 and overlap >= length1 * 0.5:
135 |                             flag |= 1
136 |                             counts[0] += 1
137 | 
138 |                         if not flag & 2 and overlap >= length2 * 0.5:
139 |                             flag |= 2
140 |                             counts[1] += 1
141 | 
142 |                     if flag == 3:
143 |                         break
144 | 
145 |         if (i + 1) % 1e6 == 0:
146 |             sys.stderr.write(f"{i+1:>20,}\n")
147 | 
148 |     sys.stderr.write(f"{i+1:>20,}\n")
149 | 
150 |     supfam = "homologous_superfamily"
151 |     types = (supfam, "domain", "family", "repeat")
152 | 
153 |     for entry_acc, entry in entries.items():
154 |         entry_cnt = entry["proteins"]
155 | 
156 |         for other_acc, (cnt1, cnt2) in entry["overlaps"].items():
157 |             other_cnt = entries[other_acc]["proteins"]
158 | 
159 |             coef1 = cnt1 / (entry_cnt + other_cnt - cnt1)
160 |             coef2 = cnt2 / (entry_cnt + other_cnt - cnt2)
161 | 
162 |             coef = (coef1 + coef2) * 0.5
163 | 
164 |             cont1 = cnt1 / entry_cnt
165 |             cont2 = cnt2 / other_cnt
166 | 
167 |             if all(e < 0.75 for e in (coef, cont1, cont2)):
168 |                 continue
169 | 
170 |             type1 = entry["type"].lower()
171 |             type2 = entries[other_acc]["type"].lower()
172 |             if ((type1 == supfam and type2 in types)
173 |                     or (type2 == supfam and type1 in types)):
174 | 
175 |                 print("\t".join((
176 |                     entry_acc,
177 |                     entry["name"],
178 |                     entry["type"],
179 |                     other_acc,
180 |                     entries[other_acc]["name"],
181 |                     entries[other_acc]["type"]
182 |                 )))
183 |                 print("\t".join((
184 |                     other_acc,
185 |                     entries[other_acc]["name"],
186 |                     entries[other_acc]["type"],                    
187 |                     entry_acc,
188 |                     entry["name"],
189 |                     entry["type"]
190 |                 )))
191 | 
192 | 
193 | if __name__ == "__main__":
194 |     main()
195 | 


--------------------------------------------------------------------------------
/webfront/tests/tests_utils_endpoint.py:
--------------------------------------------------------------------------------
  1 | from rest_framework import status
  2 | from webfront.tests.InterproRESTTestCase import InterproRESTTestCase
  3 | from webfront.models.interpro_new import Release_Note
  4 | 
  5 | 
  6 | class UtilsAccessionTest(InterproRESTTestCase):
  7 |     def test_can_read_structure_overview(self):
  8 |         response = self.client.get("/api/utils")
  9 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 10 |         self.assertIn("available", response.data)
 11 |         self.assertIn("accession", response.data["available"])
 12 |         self.assertIn("release", response.data["available"])
 13 | 
 14 |     def test_accession_endpoint_doesnt_fail(self):
 15 |         response = self.client.get("/api/utils/accession")
 16 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 17 | 
 18 |     def test_accession_endpoint_with_unexisting_acc(self):
 19 |         response = self.client.get("/api/utils/accession/xxXx")
 20 |         self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
 21 | 
 22 |     def test_accession_endpoint_with_ipro(self):
 23 |         response = self.client.get("/api/utils/accession/IPR003165")
 24 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 25 |         self.assertEqual(response.data["endpoint"], "entry")
 26 |         self.assertEqual(response.data["source_database"], "interpro")
 27 | 
 28 |     def test_accession_endpoint_with_protein(self):
 29 |         response = self.client.get("/api/utils/accession/A1CUJ5")
 30 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 31 |         self.assertEqual(response.data["endpoint"], "protein")
 32 |         self.assertEqual(response.data["source_database"], "reviewed")
 33 | 
 34 |     def test_accession_endpoint_with_structure(self):
 35 |         response = self.client.get("/api/utils/accession/1JM7")
 36 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 37 |         self.assertEqual(response.data["endpoint"], "structure")
 38 |         self.assertEqual(response.data["source_database"], "pdb")
 39 | 
 40 |     def test_accession_endpoint_with_proteome(self):
 41 |         response = self.client.get("/api/utils/accession/UP000012042")
 42 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 43 |         self.assertEqual(response.data["endpoint"], "proteome")
 44 |         self.assertEqual(response.data["source_database"], "uniprot")
 45 | 
 46 |     def test_accession_endpoint_with_set(self):
 47 |         response = self.client.get("/api/utils/accession/CL0001")
 48 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 49 |         self.assertEqual(response.data["endpoint"], "set")
 50 |         self.assertEqual(response.data["source_database"], "pfam")
 51 | 
 52 |     def test_accession_endpoint_with_taxonomy(self):
 53 |         response = self.client.get("/api/utils/accession/344612")
 54 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 55 |         self.assertEqual(response.data["endpoint"], "taxonomy")
 56 |         self.assertEqual(response.data["source_database"], "uniprot")
 57 | 
 58 |     def test_accession_endpoint_with_protein_id(self):
 59 |         response = self.client.get("/api/utils/accession/CBPYA_ASPCL")
 60 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 61 |         self.assertEqual(response.data["endpoint"], "protein")
 62 |         self.assertEqual(response.data["source_database"], "reviewed")
 63 |         self.assertEqual(response.data["accession"], "A1CUJ5")
 64 | 
 65 |     def test_accession_endpoint_with_gene_name(self):
 66 |         response = self.client.get("/api/utils/accession/FOLH1")
 67 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 68 |         self.assertEqual(response.data["endpoint"], "protein")
 69 |         self.assertEqual(response.data["source_database"], "UniProt")
 70 |         self.assertIn("proteins", response.data)
 71 |         self.assertGreater(len(response.data["proteins"]), 0)
 72 |         self.assertIn("accession", response.data["proteins"][0])
 73 |         self.assertEqual(response.data["proteins"][0]["accession"], "Q0VDM6")
 74 |         self.assertIn("organism", response.data["proteins"][0])
 75 |         self.assertIn("tax_id", response.data["proteins"][0])
 76 |         self.assertIn("is_fragment", response.data["proteins"][0])
 77 | 
 78 | 
 79 | class UtilsReleaseTest(InterproRESTTestCase):
 80 |     def test_can_read_structure_overview(self):
 81 |         response = self.client.get("/api/utils")
 82 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 83 |         self.assertIn("available", response.data)
 84 |         self.assertIn("accession", response.data["available"])
 85 |         self.assertIn("release", response.data["available"])
 86 | 
 87 |     def test_release_endpoint_doesnt_fail(self):
 88 |         response = self.client.get("/api/utils/release")
 89 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 90 | 
 91 |     def test_release_version_endpoint_doesnt_fail(self):
 92 |         response = self.client.get("/api/utils/release/current")
 93 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 94 |         response = self.client.get("/api/utils/release/70.0")
 95 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
 96 | 
 97 |     def test_release_version_endpoint_fails(self):
 98 |         response = self.client.get("/api/utils/release/x")
 99 |         self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
100 | 
101 |     def test_the_fixtures_are_loaded(self):
102 |         notes = Release_Note.objects.all()
103 |         self.assertEqual(notes.count(), 2)
104 | 
105 |     def test_release_endpoint_returns_the_fixtures(self):
106 |         notes = Release_Note.objects.all()
107 |         response = self.client.get("/api/utils/release")
108 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
109 |         self.assertEqual(len(response.data), len(notes))
110 |         for note in notes:
111 |             self.assertIn(note.version, response.data)
112 | 
113 |     def test_release_current_is_same_as_accession(self):
114 |         response1 = self.client.get("/api/utils/release/current")
115 |         self.assertEqual(response1.status_code, status.HTTP_200_OK)
116 |         response2 = self.client.get("/api/utils/release/70.0")
117 |         self.assertEqual(response2.status_code, status.HTTP_200_OK)
118 |         self.assertEqual(response1.data, response1.data)
119 | 
120 |     def test_release_70_is_same_as_fixture(self):
121 |         note_version = "70.0"
122 |         note = Release_Note.objects.all().filter(version=note_version).first()
123 |         response = self.client.get("/api/utils/release/" + note_version)
124 |         self.assertEqual(response.status_code, status.HTTP_200_OK)
125 |         self.assertEqual(response.data["content"], note.content)
126 | 


--------------------------------------------------------------------------------
/webfront/tests/fixtures_organisms.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "model": "webfront.Taxonomy",
  4 |     "fields": {
  5 |       "accession": "1",
  6 |       "scientific_name": "ROOT",
  7 |       "full_name": "ROOT",
  8 |       "lineage": " 1 ",
  9 |       "rank": "no rank",
 10 |       "children": [
 11 |         "2579",
 12 |         "2"
 13 |       ],
 14 |       "counts": {
 15 |         "entries": {
 16 |           "total": 7,
 17 |           "interpro": 2,
 18 |           "pfam": 3,
 19 |           "profile": 1,
 20 |           "smart": 1
 21 |         },
 22 |         "structures": 4,
 23 |         "proteins": 4,
 24 |         "sets": 2,
 25 |         "proteomes": 3
 26 |       }
 27 |     }
 28 |   },
 29 |   {
 30 |     "model": "webfront.Taxonomy",
 31 |     "fields": {
 32 |       "accession": "2579",
 33 |       "scientific_name": "Eukaryota",
 34 |       "full_name": "Eucaryotae",
 35 |       "lineage": " 1 2579 ",
 36 |       "parent": "1",
 37 |       "rank": "SUPERKINGDOM",
 38 |       "children": [
 39 |         "344612",
 40 |         "1001583"
 41 |       ],
 42 |       "counts": {
 43 |         "entries": {
 44 |           "total": 6,
 45 |           "interpro": 2,
 46 |           "pfam": 3,
 47 |           "smart": 1
 48 |         },
 49 |         "structures": 2,
 50 |         "proteins": 2,
 51 |         "sets": 2,
 52 |         "proteomes": 2
 53 |       }
 54 |     }
 55 |   },
 56 |   {
 57 |     "model": "webfront.Taxonomy",
 58 |     "fields": {
 59 |       "accession": "2",
 60 |       "scientific_name": "Bacteria",
 61 |       "full_name": "Prokaryota",
 62 |       "lineage": " 1 2 ",
 63 |       "parent": "1",
 64 |       "rank": "SUPERKINGDOM",
 65 |       "children": [
 66 |         "40296"
 67 |       ],
 68 |       "counts": {
 69 |         "entries": {
 70 |           "total": 2,
 71 |           "interpro": 1,
 72 |           "profile": 1
 73 |         },
 74 |         "structures": 3,
 75 |         "proteins": 2,
 76 |         "sets": 0,
 77 |         "proteomes": 1
 78 |       }
 79 |     }
 80 |   },
 81 |   {
 82 |     "model": "webfront.Taxonomy",
 83 |     "fields": {
 84 |       "accession": "40296",
 85 |       "scientific_name": "Penicillium italicum",
 86 |       "full_name": "Blue mold",
 87 |       "lineage": " 1 2 40296 ",
 88 |       "parent": "2",
 89 |       "rank": "SPECIES",
 90 |       "children": [],
 91 |       "counts": {
 92 |         "entries": {
 93 |           "total": 2,
 94 |           "interpro": 1,
 95 |           "profile": 1
 96 |         },
 97 |         "structures": 3,
 98 |         "proteins": 2,
 99 |         "sets": 0,
100 |         "proteomes": 1
101 |       }
102 |     }
103 |   },
104 |   {
105 |     "model": "webfront.Taxonomy",
106 |     "fields": {
107 |       "accession": "1001583",
108 |       "scientific_name": "Lactobacillus brevis KB290",
109 |       "full_name": "Lactobacillus brevis str. KB290",
110 |       "lineage": " 1 2579 1001583 ",
111 |       "parent": "2579",
112 |       "rank": "SPECIES",
113 |       "children": [],
114 |       "counts": {
115 |         "entries": {
116 |           "total": 1,
117 |           "pfam": 1
118 |         },
119 |         "structures": 2,
120 |         "proteins": 1,
121 |         "sets": 1,
122 |         "proteomes": 1
123 |       }
124 |     }
125 |   },
126 |   {
127 |     "model": "webfront.Taxonomy",
128 |     "fields": {
129 |       "accession": "344612",
130 |       "scientific_name": "Aspergillus clavatus",
131 |       "full_name": "Aspergillus clavatus (strain ATCC 1007 / CBS 513.65 / DSM 816 / NCTC 3887 / NRRL 1)",
132 |       "lineage": " 1 2579 344612 ",
133 |       "parent": "2579",
134 |       "rank": "SPECIES",
135 |       "children": [],
136 |       "counts": {
137 |         "entries": {
138 |           "total": 5,
139 |           "pfam": 2,
140 |           "interpro": 2,
141 |           "smart": 1
142 |         },
143 |         "structures": 1,
144 |         "proteins": 1,
145 |         "sets": 2,
146 |         "proteomes": 1
147 |       }
148 |     }
149 |   },
150 |   {
151 |     "model": "webfront.Taxonomy",
152 |     "fields": {
153 |       "accession": "10090",
154 |       "scientific_name": "Mus musculus",
155 |       "full_name": "Mus musculus",
156 |       "lineage": " 1 2579 10090 ",
157 |       "parent": "2579",
158 |       "rank": "SPECIES",
159 |       "children": [],
160 |       "counts": {
161 |         "entries": {
162 |           "total": 0
163 |         },
164 |         "structures": 0,
165 |         "proteins": 1,
166 |         "sets": 0,
167 |         "proteomes": 1
168 |       }
169 |     }
170 |   },
171 |   {
172 |     "model": "webfront.Proteome",
173 |     "fields": {
174 |       "accession": "UP000006701",
175 |       "name": "Aspergillus clavatus NRRL 1 ",
176 |       "is_reference": true,
177 |       "strain": "ATCC 1007 / CBS 513.65 / DSM 816 / NCTC 3887 / NRRL 1",
178 |       "assembly": "GCA_000002715.1 from ENA/EMBL",
179 |       "taxonomy": "344612",
180 |       "counts": {
181 |         "entries": {
182 |           "total": 5,
183 |           "pfam": 2,
184 |           "interpro": 2,
185 |           "smart": 1
186 |         },
187 |         "structures": 1,
188 |         "proteins": 1,
189 |         "taxa": 1,
190 |         "sets": 2
191 |       }
192 |     }
193 |   },
194 |   {
195 |     "model": "webfront.Proteome",
196 |     "fields": {
197 |       "accession": "UP000030104",
198 |       "name": "Penicillium italicum Blue mold",
199 |       "is_reference": true,
200 |       "strain": "PHI-1",
201 |       "assembly": "GCA_000769765.1 from ENA/EMBL",
202 |       "taxonomy": "40296",
203 |       "counts": {
204 |         "entries": {
205 |           "total": 2,
206 |           "profile": 1,
207 |           "interpro": 1
208 |         },
209 |         "structures": 3,
210 |         "proteins": 2,
211 |         "taxa": 1,
212 |         "sets": 0
213 |       }
214 |     }
215 |   },
216 |   {
217 |     "model": "webfront.Proteome",
218 |     "fields": {
219 |       "accession": "UP000012042",
220 |       "name": "Lactobacillus brevis KB290",
221 |       "is_reference": false,
222 |       "strain": "KB290",
223 |       "assembly": "GCA_000359625.1 from ENA/EMBL",
224 |       "taxonomy": "1001583",
225 |       "counts": {
226 |         "entries": {
227 |           "total": 1,
228 |           "pfam": 1
229 |         },
230 |         "structures": 2,
231 |         "proteins": 1,
232 |         "taxa": 1,
233 |         "sets": 1
234 |       }
235 |     }
236 |   },
237 |   {
238 |     "model": "webfront.Proteome",
239 |     "fields": {
240 |       "accession": "UP000000589",
241 |       "name": "Mus musculus",
242 |       "is_reference": true,
243 |       "strain": "C57BL",
244 |       "assembly": "GCA_000001635.8 from Ensembl",
245 |       "taxonomy": "10090",
246 |       "counts": {
247 |         "entries": {
248 |           "total": 0
249 |         },
250 |         "structures": 0,
251 |         "proteins": 1,
252 |         "taxa": 1,
253 |         "sets": 0
254 |       }
255 |     }
256 |   }
257 | ]
258 | 


--------------------------------------------------------------------------------