├── .gitignore ├── LICENSE ├── README.md ├── README.virtualbox.txt ├── REST_API ├── notebooks │ ├── basic101.ipynb │ ├── comparative_validation.ipynb │ ├── ligand_entries_domains.ipynb │ ├── pdb_entry_publications.ipynb │ └── tutorial_utils.ipynb ├── presentations │ └── 20141120_PDBe_workshop.pdf └── snippets │ └── basic_get_post.py ├── Vagrantfile ├── bootstrap.sh ├── environment.yml ├── images └── README.txt └── search_interface └── notebooks ├── Timeline.ipynb ├── search_facets.ipynb └── search_introduction.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | *.pyc 3 | *.svg 4 | *password.txt 5 | model_data/ 6 | .vagrant/ 7 | *.p 8 | !modeldata_[2~da[2~ta/README.txt 9 | *.eps 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PDBe Programming Interfaces 2 | ================= 3 | 4 | 5 | -------------------------------------------------------------------------------- /README.virtualbox.txt: -------------------------------------------------------------------------------- 1 | INSTALLING ON VIRTUALBOX 2 | 3 | #Setting up vagrant------------------------------------------------------- 4 | 5 | # To run these please ensure you have the following installed: 6 | # Vagrant 1.9.6 (https://releases.hashicorp.com/vagrant/1.9.6/) 7 | # VirtualBox 5.1.30 (https://www.virtualbox.org/wiki/Download_Old_Builds_5_1) 8 | # Vagrant 2 and VirtualBox 5.2 have some defects. 9 | 10 | #If you are behind an HTTP proxy: 11 | export http_proxy=http://user:password@host:port 12 | export https_proxy=https://user:password@host:port 13 | vagrant plugin install vagrant-proxyconf 14 | 15 | 16 | # For all users: plugins to install 17 | vagrant plugin install vagrant-triggers 18 | vagrant plugin uninstall vagrant-vbguest 19 | 20 | #Creating the vagrant box ------------------------------------------------ 21 | 22 | #All packages are set to be installed on a vagrant box that needs to be created first. 23 | #To create it please do the following: 24 | 25 | vagrant up 26 | # this takes several minutes 27 | 28 | #If there are any issues, please check the logs by: 29 | vagrant ssh -c 'cat /var/log/vboxadd-install.log' 30 | vagrant ssh -c 'sudo journalctl -r' 31 | 32 | # if this is the first time using this box and get an SSL error: 33 | # download manually and install 34 | curl -v -L -o centos.box -k https://vagrantcloud.com/bento/boxes/centos-7.4/versions/201710.25.0/providers/virtualbox.box 35 | vagrant box add bento/centos-7.2 centos.box 36 | 37 | #Suspending and resuming the vagrantbox ----------------------------------- 38 | 39 | #After working, please stop the vagrant box session (don't forget to save any work!) 40 | vagrant halt 41 | #To start again after the vagrantbox has been created 42 | vagrant up 43 | 44 | -------------------------------------------------------------------------------- /REST_API/notebooks/basic101.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:4763d932625b7de630c84058263487407591efdb231a03266685a8c0f26c9a56" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# PDBe REST API 101" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "collapsed": false, 21 | "input": [ 22 | "import sys\n", 23 | "\n", 24 | "PY3 = sys.version > '3'\n", 25 | "\n", 26 | "if PY3:\n", 27 | " import urllib.request as urllib2\n", 28 | "else:\n", 29 | " import urllib2\n", 30 | "\n", 31 | "SERVER_URL = \"https://www.ebi.ac.uk/pdbe/api\"\n", 32 | "\n", 33 | "def make_request(url, data): \n", 34 | " request = urllib2.Request(url)\n", 35 | "\n", 36 | " try:\n", 37 | " url_file = urllib2.urlopen(request, data)\n", 38 | " except urllib2.HTTPError as e:\n", 39 | " if e.code == 404:\n", 40 | " print(\"[NOTFOUND %d] %s\" % (e.code, url))\n", 41 | " else:\n", 42 | " print(\"[ERROR %d] %s\" % (e.code, url))\n", 43 | "\n", 44 | " return None\n", 45 | "\n", 46 | " return url_file.read().decode()\n", 47 | "\n", 48 | "def get_request(url, arg, pretty=False):\n", 49 | " full_url = \"%s/%s/%s?pretty=%s\" % (SERVER_URL, url, arg, str(pretty).lower())\n", 50 | " \n", 51 | " return make_request(full_url, None)\n", 52 | "\n", 53 | "def post_request(url, data, pretty=False):\n", 54 | " full_url = \"%s/%s/?pretty=%s\" % (SERVER_URL, url, str(pretty).lower())\n", 55 | " \n", 56 | " if isinstance(data, (list, tuple)):\n", 57 | " data = \",\".join(data)\n", 58 | " \n", 59 | " return make_request(full_url, data.encode())\n" 60 | ], 61 | "language": "python", 62 | "metadata": {}, 63 | "outputs": [], 64 | "prompt_number": 1 65 | }, 66 | { 67 | "cell_type": "code", 68 | "collapsed": false, 69 | "input": [ 70 | "summary = \"/pdb/entry/summary\"\n", 71 | "experiment = \"/pdb/entry/experiment\"\n", 72 | "sifts = \"/mappings\"" 73 | ], 74 | "language": "python", 75 | "metadata": {}, 76 | "outputs": [], 77 | "prompt_number": 2 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "We can make GET resquests simply by doing:" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "collapsed": false, 89 | "input": [ 90 | "print(get_request(experiment, \"1cbs\", True))" 91 | ], 92 | "language": "python", 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "output_type": "stream", 97 | "stream": "stdout", 98 | "text": [ 99 | "{\n", 100 | " \"1cbs\": [\n", 101 | " {\n", 102 | " \"resolution_low\": 8,\n", 103 | " \"r_factor\": 0.2,\n", 104 | " \"r_free_selection_details\": null,\n", 105 | " \"resolution_high\": 1.8,\n", 106 | " \"r_free_percent_reflections\": null,\n", 107 | " \"starting_model\": null,\n", 108 | " \"refinement_software\": \"X-PLOR \",\n", 109 | " \"completeness\": 90.54,\n", 110 | " \"r_free\": 0.237,\n", 111 | " \"cell\": {\n", 112 | " \"a\": 45.65,\n", 113 | " \"c\": 77.61,\n", 114 | " \"b\": 47.56,\n", 115 | " \"beta\": 90,\n", 116 | " \"alpha\": 90,\n", 117 | " \"gamma\": 90\n", 118 | " },\n", 119 | " \"percent_reflections_observed\": 90.3,\n", 120 | " \"diffraction_experiment\": [\n", 121 | " {\n", 122 | " \"wavelength_list\": null,\n", 123 | " \"detector_details\": null,\n", 124 | " \"beam_source_type\": null,\n", 125 | " \"detector_type\": null,\n", 126 | " \"synchrotron_beamline\": null,\n", 127 | " \"source_details\": null,\n", 128 | " \"synchrotron_site\": null,\n", 129 | " \"diffraction_protocol\": null,\n", 130 | " \"ambient_temp\": null,\n", 131 | " \"wavelength\": null,\n", 132 | " \"detector\": null,\n", 133 | " \"beam_source_name\": null\n", 134 | " }\n", 135 | " ],\n", 136 | " \"expression_host_scientific_name\": [\n", 137 | " {\n", 138 | " \"scientific_name\": \"Escherichia coli BL21(DE3)\",\n", 139 | " \"tax_id\": 469008\n", 140 | " }\n", 141 | " ],\n", 142 | " \"crystal_growth\": [\n", 143 | " {\n", 144 | " \"grow_details\": null,\n", 145 | " \"grow_ph\": null,\n", 146 | " \"grow_method\": null,\n", 147 | " \"grow_temperature\": null\n", 148 | " }\n", 149 | " ],\n", 150 | " \"experimental_method\": \"X-ray diffraction\",\n", 151 | " \"num_reflections\": 14678,\n", 152 | " \"phasing_method\": null,\n", 153 | " \"experiment_data_available\": \"Y\",\n", 154 | " \"experimental_method_class\": \"x-ray\",\n", 155 | " \"r_work\": 0.2,\n", 156 | " \"spacegroup\": \"P 21 21 21\",\n", 157 | " \"resolution\": 1.8,\n", 158 | " \"structure_determination_method\": null\n", 159 | " }\n", 160 | " ]\n", 161 | "}\n" 162 | ] 163 | } 164 | ], 165 | "prompt_number": 11 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "POST requests passing a string:" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "collapsed": false, 177 | "input": [ 178 | "print(post_request(summary, \"1cbs, 1otz, 2ktn\", True))" 179 | ], 180 | "language": "python", 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "output_type": "stream", 185 | "stream": "stdout", 186 | "text": [ 187 | "{\n", 188 | " \"1cbs\": [\n", 189 | " {\n", 190 | " \"experimental_method\": [\n", 191 | " \"X-ray diffraction\"\n", 192 | " ],\n", 193 | " \"assemblies\": [\n", 194 | " {\n", 195 | " \"preferred\": true,\n", 196 | " \"form\": \"homo\",\n", 197 | " \"name\": \"monomeric\",\n", 198 | " \"assembly_id\": \"1\"\n", 199 | " }\n", 200 | " ],\n", 201 | " \"title\": \"CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID\",\n", 202 | " \"release_date\": \"19950126\",\n", 203 | " \"split_entry\": null,\n", 204 | " \"experimental_method_class\": [\n", 205 | " \"x-ray\"\n", 206 | " ],\n", 207 | " \"revision_date\": \"20090224\",\n", 208 | " \"entry_authors\": [\n", 209 | " \"Bergfors, T.\",\n", 210 | " \"Jones, T.A.\",\n", 211 | " \"Kleywegt, G.J.\"\n", 212 | " ],\n", 213 | " \"deposition_site\": null,\n", 214 | " \"number_of_entities\": {\n", 215 | " \"water\": 1,\n", 216 | " \"polypeptide\": 1,\n", 217 | " \"other\": 0,\n", 218 | " \"dna\": 0,\n", 219 | " \"ligand\": 1,\n", 220 | " \"dna/rna\": 0,\n", 221 | " \"rna\": 0,\n", 222 | " \"sugar\": 0\n", 223 | " },\n", 224 | " \"deposition_date\": \"19940928\",\n", 225 | " \"processing_site\": null\n", 226 | " }\n", 227 | " ],\n", 228 | " \"2ktn\": [\n", 229 | " {\n", 230 | " \"experimental_method\": [\n", 231 | " \"Solution NMR\"\n", 232 | " ],\n", 233 | " \"assemblies\": [\n", 234 | " {\n", 235 | " \"preferred\": true,\n", 236 | " \"form\": \"homo\",\n", 237 | " \"name\": \"monomeric\",\n", 238 | " \"assembly_id\": \"1\"\n", 239 | " }\n", 240 | " ],\n", 241 | " \"title\": \"Spatial structure of Lch-alpha peptide from two-component lantibiotic system Lichenicidin VK21\",\n", 242 | " \"release_date\": \"20100721\",\n", 243 | " \"split_entry\": null,\n", 244 | " \"experimental_method_class\": [\n", 245 | " \"nmr\"\n", 246 | " ],\n", 247 | " \"revision_date\": \"20110810\",\n", 248 | " \"entry_authors\": [\n", 249 | " \"Arseniev, A.S.\",\n", 250 | " \"Mineev, K.S.\",\n", 251 | " \"Ovchinnikova, T.V.\",\n", 252 | " \"Shenkarev, Z.O.\"\n", 253 | " ],\n", 254 | " \"deposition_site\": \"BMRB\",\n", 255 | " \"number_of_entities\": {\n", 256 | " \"water\": 0,\n", 257 | " \"polypeptide\": 1,\n", 258 | " \"other\": 0,\n", 259 | " \"dna\": 0,\n", 260 | " \"ligand\": 0,\n", 261 | " \"dna/rna\": 0,\n", 262 | " \"rna\": 0,\n", 263 | " \"sugar\": 0\n", 264 | " },\n", 265 | " \"deposition_date\": \"20100205\",\n", 266 | " \"processing_site\": \"RCSB\"\n", 267 | " }\n", 268 | " ]\n", 269 | "}\n" 270 | ] 271 | } 272 | ], 273 | "prompt_number": 4 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "A list:" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "collapsed": false, 285 | "input": [ 286 | "print(post_request(summary, [\"1cbs\", \"1otz\", \"2ktn\"], True))" 287 | ], 288 | "language": "python", 289 | "metadata": {}, 290 | "outputs": [] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "Or a tuple:" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "collapsed": false, 302 | "input": [ 303 | "response = post_request(summary, (\"1cbs\", \"1otz\", \"2ktn\"))\n", 304 | "print(response)" 305 | ], 306 | "language": "python", 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "output_type": "stream", 311 | "stream": "stdout", 312 | "text": [ 313 | "{\"1cbs\":[{\"experimental_method\":[\"X-ray diffraction\"],\"assemblies\":[{\"preferred\":true,\"form\":\"homo\",\"name\":\"monomeric\",\"assembly_id\":\"1\"}],\"title\":\"CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID\",\"release_date\":\"19950126\",\"split_entry\":null,\"experimental_method_class\":[\"x-ray\"],\"revision_date\":\"20090224\",\"entry_authors\":[\"Bergfors, T.\",\"Jones, T.A.\",\"Kleywegt, G.J.\"],\"deposition_site\":null,\"number_of_entities\":{\"water\":1,\"polypeptide\":1,\"other\":0,\"dna\":0,\"ligand\":1,\"dna/rna\":0,\"rna\":0,\"sugar\":0},\"deposition_date\":\"19940928\",\"processing_site\":null}],\"2ktn\":[{\"experimental_method\":[\"Solution NMR\"],\"assemblies\":[{\"preferred\":true,\"form\":\"homo\",\"name\":\"monomeric\",\"assembly_id\":\"1\"}],\"title\":\"Spatial structure of Lch-alpha peptide from two-component lantibiotic system Lichenicidin VK21\",\"release_date\":\"20100721\",\"split_entry\":null,\"experimental_method_class\":[\"nmr\"],\"revision_date\":\"20110810\",\"entry_authors\":[\"Arseniev, A.S.\",\"Mineev, K.S.\",\"Ovchinnikova, T.V.\",\"Shenkarev, Z.O.\"],\"deposition_site\":\"BMRB\",\"number_of_entities\":{\"water\":0,\"polypeptide\":1,\"other\":0,\"dna\":0,\"ligand\":0,\"dna/rna\":0,\"rna\":0,\"sugar\":0},\"deposition_date\":\"20100205\",\"processing_site\":\"RCSB\"}]}\n" 314 | ] 315 | } 316 | ], 317 | "prompt_number": 6 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "Please note that we have not used the pretty=True argument in the last request. This is the best way to access the REST API programatically as, by eliminating all the whitespaces, you will save bandwidth and, therefore, time.\n", 324 | "\n", 325 | "The response is a string that can be parsed into a Python dictionary using the json module:" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "collapsed": false, 331 | "input": [ 332 | "import json\n", 333 | "entries = json.loads(response)" 334 | ], 335 | "language": "python", 336 | "metadata": {}, 337 | "outputs": [], 338 | "prompt_number": 7 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "And now we can access the elements individually:" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "collapsed": false, 350 | "input": [ 351 | "print(entries[\"1cbs\"][0][\"experimental_method\"])\n", 352 | "print(entries[\"1cbs\"][0][\"entry_authors\"])" 353 | ], 354 | "language": "python", 355 | "metadata": {}, 356 | "outputs": [] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "collapsed": false, 361 | "input": [ 362 | "entries[\"1cbs\"]" 363 | ], 364 | "language": "python", 365 | "metadata": {}, 366 | "outputs": [ 367 | { 368 | "metadata": {}, 369 | "output_type": "pyout", 370 | "prompt_number": 12, 371 | "text": [ 372 | "[{u'assemblies': [{u'assembly_id': u'1',\n", 373 | " u'form': u'homo',\n", 374 | " u'name': u'monomeric',\n", 375 | " u'preferred': True}],\n", 376 | " u'deposition_date': u'19940928',\n", 377 | " u'deposition_site': None,\n", 378 | " u'entry_authors': [u'Bergfors, T.', u'Jones, T.A.', u'Kleywegt, G.J.'],\n", 379 | " u'experimental_method': [u'X-ray diffraction'],\n", 380 | " u'experimental_method_class': [u'x-ray'],\n", 381 | " u'number_of_entities': {u'dna': 0,\n", 382 | " u'dna/rna': 0,\n", 383 | " u'ligand': 1,\n", 384 | " u'other': 0,\n", 385 | " u'polypeptide': 1,\n", 386 | " u'rna': 0,\n", 387 | " u'sugar': 0,\n", 388 | " u'water': 1},\n", 389 | " u'processing_site': None,\n", 390 | " u'release_date': u'19950126',\n", 391 | " u'revision_date': u'20090224',\n", 392 | " u'split_entry': None,\n", 393 | " u'title': u'CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID'}]" 394 | ] 395 | } 396 | ], 397 | "prompt_number": 12 398 | }, 399 | { 400 | "cell_type": "code", 401 | "collapsed": false, 402 | "input": [ 403 | "for k in entries.keys():\n", 404 | " print entries[k]" 405 | ], 406 | "language": "python", 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "output_type": "stream", 411 | "stream": "stdout", 412 | "text": [ 413 | "[{u'split_entry': None, u'title': u'CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID', u'release_date': u'19950126', u'experimental_method': [u'X-ray diffraction'], u'experimental_method_class': [u'x-ray'], u'revision_date': u'20090224', u'entry_authors': [u'Bergfors, T.', u'Jones, T.A.', u'Kleywegt, G.J.'], u'deposition_site': None, u'number_of_entities': {u'polypeptide': 1, u'dna': 0, u'ligand': 1, u'dna/rna': 0, u'rna': 0, u'sugar': 0, u'water': 1, u'other': 0}, u'processing_site': None, u'deposition_date': u'19940928', u'assemblies': [{u'assembly_id': u'1', u'form': u'homo', u'preferred': True, u'name': u'monomeric'}]}]\n", 414 | "[{u'split_entry': None, u'title': u'Spatial structure of Lch-alpha peptide from two-component lantibiotic system Lichenicidin VK21', u'release_date': u'20100721', u'experimental_method': [u'Solution NMR'], u'experimental_method_class': [u'nmr'], u'revision_date': u'20110810', u'entry_authors': [u'Arseniev, A.S.', u'Mineev, K.S.', u'Ovchinnikova, T.V.', u'Shenkarev, Z.O.'], u'deposition_site': u'BMRB', u'number_of_entities': {u'polypeptide': 1, u'dna': 0, u'ligand': 0, u'dna/rna': 0, u'rna': 0, u'sugar': 0, u'water': 0, u'other': 0}, u'processing_site': u'RCSB', u'deposition_date': u'20100205', u'assemblies': [{u'assembly_id': u'1', u'form': u'homo', u'preferred': True, u'name': u'monomeric'}]}]\n" 415 | ] 416 | } 417 | ], 418 | "prompt_number": 10 419 | }, 420 | { 421 | "cell_type": "code", 422 | "collapsed": false, 423 | "input": [], 424 | "language": "python", 425 | "metadata": {}, 426 | "outputs": [] 427 | } 428 | ], 429 | "metadata": {} 430 | } 431 | ] 432 | } 433 | -------------------------------------------------------------------------------- /REST_API/notebooks/comparative_validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:dc490de48a7d2dd1115840164b711c34e8257aace1b2d6506fe92a4267611015" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Strained conformation or outlier?" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "Validation of protein structures is a collection of checks that quantify the expectation of occurrence of features in a model. An unexpected feature is not always a modelling error, and it is sometimes argued that such feature is strained but stabilized by its neighbouring atoms. Additional evidence needs to be gathered in such tricky situation.\n", 24 | "\n", 25 | "In addition to checking electron density itself, it would be useful to check whether similarly strained feature occurs in other models of that protein, e.g.\n", 26 | "\n", 37 | "\n", 38 | "In general, it would be useful to create a quality profile from multiple models of a protein to see whether a residue systematically exhibits an unusual geometry, or whether the geometry is an error not observed in other models.\n", 39 | "\n", 40 | "In this tutorial, let us see how backbone and sidechain features of residues compare across the multiple copies of \n", 41 | "GroEL in PDB entry 1kp8 determined in 2003 at resolution of 2Å." 42 | ] 43 | }, 44 | { 45 | "cell_type": "heading", 46 | "level": 3, 47 | "metadata": {}, 48 | "source": [ 49 | "Getting started" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "Let us run the [tutorial_utils](tutorial_utils.ipynb) notebook to setup API URL, logger, caller utility, etc. Check out that notebook to setup anything differently." 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "collapsed": false, 62 | "input": [ 63 | "%run 'tutorial_utils.ipynb'" 64 | ], 65 | "language": "python", 66 | "metadata": {}, 67 | "outputs": [], 68 | "prompt_number": 2 69 | }, 70 | { 71 | "cell_type": "heading", 72 | "level": 3, 73 | "metadata": {}, 74 | "source": [ 75 | "Fetching validation information" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "Now we will identify the molecule number of GroEL in entry 1kp8 using the /pdb/entry/molecules call." 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "collapsed": false, 88 | "input": [ 89 | "the_pdbid = \"1kp8\"\n", 90 | "\n", 91 | "mols_data = get_PDBe_API_data(PDBE_API_URL + \"/pdb/entry/molecules/\" + the_pdbid)[the_pdbid]\n", 92 | "logging.info(\"PDB entry %s has %d types of molecules.\" % (the_pdbid, len(mols_data)))\n", 93 | "\n", 94 | "for mol_info in mols_data :\n", 95 | " if mol_info[\"molecule_type\"] == \"polypeptide(L)\" :\n", 96 | " logging.info( \"Molecule no. %d, '%s', is a protein modelled in %d chains %s.\" % \\\n", 97 | " (mol_info[\"entity_id\"], mol_info[\"molecule_name\"], \\\n", 98 | " len(mol_info[\"in_chains\"]), \",\".join(mol_info[\"in_chains\"])) \\\n", 99 | " )" 100 | ], 101 | "language": "python", 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "output_type": "stream", 106 | "stream": "stdout", 107 | "text": [ 108 | "LOG|11-Nov-2014 14:24:38|INFO PDB entry 1kp8 has 6 types of molecules.\n" 109 | ] 110 | }, 111 | { 112 | "output_type": "stream", 113 | "stream": "stdout", 114 | "text": [ 115 | "LOG|11-Nov-2014 14:24:38|INFO Molecule no. 1, '60 kDa chaperonin', is a protein modelled in 14 chains A,B,C,D,E,F,G,H,I,J,K,L,M,N.\n" 116 | ] 117 | } 118 | ], 119 | "prompt_number": 3 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "So the protein of interest is molecule no. 1 and there are 14 copies of it.\n", 126 | "\n", 127 | "Let us now obtain per-residue information on backbone (Ramachandran) and sidechain quality using the call /validation/rama_sidechain_listing/." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "collapsed": false, 133 | "input": [ 134 | "rama_data = get_PDBe_API_data(PDBE_API_URL + \"/validation/rama_sidechain_listing/entry/\" + the_pdbid) [the_pdbid]" 135 | ], 136 | "language": "python", 137 | "metadata": {}, 138 | "outputs": [], 139 | "prompt_number": 4 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "Let us define a container which will hold information about Ramachandran state (outlier, allowed, favoured) and rotamer state (rotamer name or outlier) for each residue of each chain." 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "collapsed": false, 151 | "input": [ 152 | "import collections\n", 153 | "outliers = {\n", 154 | " \"rama\":collections.defaultdict(lambda: collections.defaultdict(lambda:[])),\n", 155 | " \"rota\":collections.defaultdict(lambda: collections.defaultdict(lambda:[])),\n", 156 | "}" 157 | ], 158 | "language": "python", 159 | "metadata": {}, 160 | "outputs": [], 161 | "prompt_number": 5 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "Let us populate this container with Rama and sidechain information from chains modelling molecule no. 1 in the first and only MODEL in the entry." 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "collapsed": false, 173 | "input": [ 174 | "for mol in rama_data[\"molecules\"] :\n", 175 | " if str(mol[\"entity_id\"]) != \"1\" :\n", 176 | " continue\n", 177 | " for chain in mol[\"chains\"] :\n", 178 | " model = chain[\"models\"][0]\n", 179 | " logging.info(\"Chain id %s has Rama sidechain validation for %d residues.\" % (chain[\"chain_id\"], len(model[\"residues\"])))\n", 180 | " for residue in model[\"residues\"] :\n", 181 | " res_id = (residue[\"residue_name\"],residue[\"author_residue_number\"],residue[\"author_insertion_code\"])\n", 182 | " outliers[\"rama\"] [res_id] [ residue[\"rama\"] ].append( chain[\"chain_id\"] )\n", 183 | " outliers[\"rota\"] [res_id] [ residue[\"rota\"] ].append( chain[\"chain_id\"] )" 184 | ], 185 | "language": "python", 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "output_type": "stream", 190 | "stream": "stdout", 191 | "text": [ 192 | "LOG|11-Nov-2014 14:25:33|INFO Chain id A has Rama sidechain validation for 524 residues.\n" 193 | ] 194 | }, 195 | { 196 | "output_type": "stream", 197 | "stream": "stdout", 198 | "text": [ 199 | "LOG|11-Nov-2014 14:25:33|INFO Chain id C has Rama sidechain validation for 524 residues.\n" 200 | ] 201 | }, 202 | { 203 | "output_type": "stream", 204 | "stream": "stdout", 205 | "text": [ 206 | "LOG|11-Nov-2014 14:25:33|INFO Chain id B has Rama sidechain validation for 524 residues.\n" 207 | ] 208 | }, 209 | { 210 | "output_type": "stream", 211 | "stream": "stdout", 212 | "text": [ 213 | "LOG|11-Nov-2014 14:25:33|INFO Chain id E has Rama sidechain validation for 524 residues.\n" 214 | ] 215 | }, 216 | { 217 | "output_type": "stream", 218 | "stream": "stdout", 219 | "text": [ 220 | "LOG|11-Nov-2014 14:25:33|INFO Chain id D has Rama sidechain validation for 524 residues.\n" 221 | ] 222 | }, 223 | { 224 | "output_type": "stream", 225 | "stream": "stdout", 226 | "text": [ 227 | "LOG|11-Nov-2014 14:25:33|INFO Chain id G has Rama sidechain validation for 524 residues.\n" 228 | ] 229 | }, 230 | { 231 | "output_type": "stream", 232 | "stream": "stdout", 233 | "text": [ 234 | "LOG|11-Nov-2014 14:25:33|INFO Chain id F has Rama sidechain validation for 524 residues.\n" 235 | ] 236 | }, 237 | { 238 | "output_type": "stream", 239 | "stream": "stdout", 240 | "text": [ 241 | "LOG|11-Nov-2014 14:25:33|INFO Chain id I has Rama sidechain validation for 524 residues.\n" 242 | ] 243 | }, 244 | { 245 | "output_type": "stream", 246 | "stream": "stdout", 247 | "text": [ 248 | "LOG|11-Nov-2014 14:25:33|INFO Chain id H has Rama sidechain validation for 524 residues.\n" 249 | ] 250 | }, 251 | { 252 | "output_type": "stream", 253 | "stream": "stdout", 254 | "text": [ 255 | "LOG|11-Nov-2014 14:25:33|INFO Chain id K has Rama sidechain validation for 524 residues.\n" 256 | ] 257 | }, 258 | { 259 | "output_type": "stream", 260 | "stream": "stdout", 261 | "text": [ 262 | "LOG|11-Nov-2014 14:25:33|INFO Chain id J has Rama sidechain validation for 524 residues.\n" 263 | ] 264 | }, 265 | { 266 | "output_type": "stream", 267 | "stream": "stdout", 268 | "text": [ 269 | "LOG|11-Nov-2014 14:25:33|INFO Chain id M has Rama sidechain validation for 524 residues.\n" 270 | ] 271 | }, 272 | { 273 | "output_type": "stream", 274 | "stream": "stdout", 275 | "text": [ 276 | "LOG|11-Nov-2014 14:25:33|INFO Chain id L has Rama sidechain validation for 524 residues.\n" 277 | ] 278 | }, 279 | { 280 | "output_type": "stream", 281 | "stream": "stdout", 282 | "text": [ 283 | "LOG|11-Nov-2014 14:25:33|INFO Chain id N has Rama sidechain validation for 524 residues.\n" 284 | ] 285 | } 286 | ], 287 | "prompt_number": 6 288 | }, 289 | { 290 | "cell_type": "heading", 291 | "level": 3, 292 | "metadata": {}, 293 | "source": [ 294 | "Finding the odd ones" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "Now let us write a function which will find the residues that have more than one Rama or sidechain state, such that at least one of the states occurs a majority of times." 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "collapsed": false, 307 | "input": [ 308 | "def print_unusual_residues(val_key, major_state_freq) :\n", 309 | " for res_id, val_info in outliers[val_key].items() :\n", 310 | " # not interested in all residues at this index to be in the same state\n", 311 | " if len(val_info) == 1 : continue\n", 312 | " # not interested in this residue index if no state occurs at least with required frequency\n", 313 | " if not any( [len(val_info[k]) >= major_state_freq for k in val_info] ) :\n", 314 | " continue\n", 315 | " # write out the popular state and also those occurring less frequently\n", 316 | " val_keys = sorted( val_info.keys(), key = lambda vk:len(val_info[vk]), reverse=True )\n", 317 | " state_frequencies = \", \".join([\"%7s\"%k+\":%2d\" % len(val_info[k]) for k in val_keys])\n", 318 | " minor_chains_str = \" \".join([\"%s:%s\"%(k,\",\".join(val_info[k])) for k in val_keys[1:]])\n", 319 | " logging.info(\"Residue [%3s %4s%s] has multiple %s states: %s || Unusual chains %s\" % \\\n", 320 | " (res_id[0], res_id[1], res_id[2], val_key, state_frequencies, minor_chains_str) \\\n", 321 | " )" 322 | ], 323 | "language": "python", 324 | "metadata": {}, 325 | "outputs": [], 326 | "prompt_number": 8 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "Let us print residues with unusual Ramachandran state that contrasts against counterparts in other chains." 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "collapsed": false, 338 | "input": [ 339 | "print_unusual_residues(\"rama\", 12)" 340 | ], 341 | "language": "python", 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "output_type": "stream", 346 | "stream": "stdout", 347 | "text": [ 348 | "LOG|11-Nov-2014 14:30:37|INFO Residue [PRO 462] has multiple rama states: Favored:12, Allowed: 2 || Unusual chains Allowed:E,L\n" 349 | ] 350 | }, 351 | { 352 | "output_type": "stream", 353 | "stream": "stdout", 354 | "text": [ 355 | "LOG|11-Nov-2014 14:30:37|INFO Residue [ILE 342] has multiple rama states: Favored:13, Allowed: 1 || Unusual chains Allowed:I\n" 356 | ] 357 | }, 358 | { 359 | "output_type": "stream", 360 | "stream": "stdout", 361 | "text": [ 362 | "LOG|11-Nov-2014 14:30:37|INFO Residue [LEU 222] has multiple rama states: Allowed:12, Favored: 2 || Unusual chains Favored:D,G\n" 363 | ] 364 | }, 365 | { 366 | "output_type": "stream", 367 | "stream": "stdout", 368 | "text": [ 369 | "LOG|11-Nov-2014 14:30:37|INFO Residue [ALA 383] has multiple rama states: OUTLIER:13, Allowed: 1 || Unusual chains Allowed:A\n" 370 | ] 371 | }, 372 | { 373 | "output_type": "stream", 374 | "stream": "stdout", 375 | "text": [ 376 | "LOG|11-Nov-2014 14:30:37|INFO Residue [THR 497] has multiple rama states: Favored:13, Allowed: 1 || Unusual chains Allowed:K\n" 377 | ] 378 | }, 379 | { 380 | "output_type": "stream", 381 | "stream": "stdout", 382 | "text": [ 383 | "LOG|11-Nov-2014 14:30:37|INFO Residue [GLU 156] has multiple rama states: Favored:13, Allowed: 1 || Unusual chains Allowed:E\n" 384 | ] 385 | }, 386 | { 387 | "output_type": "stream", 388 | "stream": "stdout", 389 | "text": [ 390 | "LOG|11-Nov-2014 14:30:37|INFO Residue [ASP 253] has multiple rama states: OUTLIER:12, Allowed: 2 || Unusual chains Allowed:D,N\n" 391 | ] 392 | }, 393 | { 394 | "output_type": "stream", 395 | "stream": "stdout", 396 | "text": [ 397 | "LOG|11-Nov-2014 14:30:37|INFO Residue [SER 154] has multiple rama states: Favored:12, Allowed: 2 || Unusual chains Allowed:B,E\n" 398 | ] 399 | } 400 | ], 401 | "prompt_number": 9 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "Here we see that Thr K-497, Glu E-156 are in the allowed region whereas residues in other chains are all in the favoured region.\n", 408 | "These residues need to be reviewed.\n", 409 | "\n", 410 | "Residues Ala 383, Asp 253 are nearly always outliers. This could be a genuine outlier with good reason, or it could be a systematic error!" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "Similarly, let us print residues where only a small minority of chains have a different sidechain rotamer at a residue position." 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "collapsed": false, 423 | "input": [ 424 | "print_unusual_residues(\"rota\", 13)" 425 | ], 426 | "language": "python", 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "output_type": "stream", 431 | "stream": "stdout", 432 | "text": [ 433 | "LOG|11-Nov-2014 14:30:54|INFO Residue [VAL 510] has multiple rota states: t:13, OUTLIER: 1 || Unusual chains OUTLIER:B\n" 434 | ] 435 | }, 436 | { 437 | "output_type": "stream", 438 | "stream": "stdout", 439 | "text": [ 440 | "LOG|11-Nov-2014 14:30:54|INFO Residue [ARG 58] has multiple rota states: mtm-85:13, mtt180: 1 || Unusual chains mtt180:B\n" 441 | ] 442 | }, 443 | { 444 | "output_type": "stream", 445 | "stream": "stdout", 446 | "text": [ 447 | "LOG|11-Nov-2014 14:30:54|INFO Residue [ASP 52] has multiple rota states: t70:13, t0: 1 || Unusual chains t0:B\n" 448 | ] 449 | }, 450 | { 451 | "output_type": "stream", 452 | "stream": "stdout", 453 | "text": [ 454 | "LOG|11-Nov-2014 14:30:54|INFO Residue [LYS 75] has multiple rota states: OUTLIER:13, tttt: 1 || Unusual chains tttt:E\n" 455 | ] 456 | }, 457 | { 458 | "output_type": "stream", 459 | "stream": "stdout", 460 | "text": [ 461 | "LOG|11-Nov-2014 14:30:54|INFO Residue [VAL 417] has multiple rota states: m:13, OUTLIER: 1 || Unusual chains OUTLIER:L\n" 462 | ] 463 | }, 464 | { 465 | "output_type": "stream", 466 | "stream": "stdout", 467 | "text": [ 468 | "LOG|11-Nov-2014 14:30:54|INFO Residue [GLU 130] has multiple rota states: tp10:13, mt-10: 1 || Unusual chains mt-10:B\n" 469 | ] 470 | }, 471 | { 472 | "output_type": "stream", 473 | "stream": "stdout", 474 | "text": [ 475 | "LOG|11-Nov-2014 14:30:54|INFO Residue [VAL 128] has multiple rota states: t:13, m: 1 || Unusual chains m:B\n" 476 | ] 477 | }, 478 | { 479 | "output_type": "stream", 480 | "stream": "stdout", 481 | "text": [ 482 | "LOG|11-Nov-2014 14:30:54|INFO Residue [GLU 129] has multiple rota states: mt-10:13, tp10: 1 || Unusual chains tp10:B\n" 483 | ] 484 | }, 485 | { 486 | "output_type": "stream", 487 | "stream": "stdout", 488 | "text": [ 489 | "LOG|11-Nov-2014 14:30:54|INFO Residue [ARG 368] has multiple rota states: mtm180:13, mtp180: 1 || Unusual chains mtp180:N\n" 490 | ] 491 | }, 492 | { 493 | "output_type": "stream", 494 | "stream": "stdout", 495 | "text": [ 496 | "LOG|11-Nov-2014 14:30:54|INFO Residue [MET 514] has multiple rota states: mtp:13, OUTLIER: 1 || Unusual chains OUTLIER:E\n" 497 | ] 498 | }, 499 | { 500 | "output_type": "stream", 501 | "stream": "stdout", 502 | "text": [ 503 | "LOG|11-Nov-2014 14:30:54|INFO Residue [ARG 231] has multiple rota states: mtt180:13, mtp180: 1 || Unusual chains mtp180:J\n" 504 | ] 505 | }, 506 | { 507 | "output_type": "stream", 508 | "stream": "stdout", 509 | "text": [ 510 | "LOG|11-Nov-2014 14:30:54|INFO Residue [ASP 490] has multiple rota states: m-20:13, t70: 1 || Unusual chains t70:E\n" 511 | ] 512 | }, 513 | { 514 | "output_type": "stream", 515 | "stream": "stdout", 516 | "text": [ 517 | "LOG|11-Nov-2014 14:30:54|INFO Residue [ASP 334] has multiple rota states: t70:13, t0: 1 || Unusual chains t0:D\n" 518 | ] 519 | }, 520 | { 521 | "output_type": "stream", 522 | "stream": "stdout", 523 | "text": [ 524 | "LOG|11-Nov-2014 14:30:54|INFO Residue [SER 424] has multiple rota states: p:13, m: 1 || Unusual chains m:E\n" 525 | ] 526 | }, 527 | { 528 | "output_type": "stream", 529 | "stream": "stdout", 530 | "text": [ 531 | "LOG|11-Nov-2014 14:30:54|INFO Residue [ARG 284] has multiple rota states: ptt180:13, OUTLIER: 1 || Unusual chains OUTLIER:M\n" 532 | ] 533 | }, 534 | { 535 | "output_type": "stream", 536 | "stream": "stdout", 537 | "text": [ 538 | "LOG|11-Nov-2014 14:30:54|INFO Residue [VAL 74] has multiple rota states: p:13, t: 1 || Unusual chains t:B\n" 539 | ] 540 | } 541 | ], 542 | "prompt_number": 10 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": {}, 547 | "source": [ 548 | "Note residues Val B-510, Met E-514, Val L-417, Arg M-284 which have rotameric counterparts in all other chains - their sidechain most probably needs remodelling.\n", 549 | "Also note residues like Lys 75 could be outliers for a good reason, or could just be systematic outliers." 550 | ] 551 | }, 552 | { 553 | "cell_type": "heading", 554 | "level": 3, 555 | "metadata": {}, 556 | "source": [ 557 | "Your turn!" 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": {}, 563 | "source": [ 564 | "" 575 | ] 576 | } 577 | ], 578 | "metadata": {} 579 | } 580 | ] 581 | } 582 | -------------------------------------------------------------------------------- /REST_API/notebooks/ligand_entries_domains.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:d8cdc94684753244221d26ed4c11d5fdec21b1c0dbfbb2b20a710f8bedcf6007" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Compile a dataset of ligand-binding domains" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "\n", 24 | " \n", 25 | " \n", 48 | " \n", 51 | " \n", 52 | "
\n", 26 | " Often we want to see how a ligand binds to a domain and how such binding sites compare across multiple PDB entries.\n", 27 | "
\n", 28 | "
\n", 29 | " In this example, we will use the PDBe API to :\n", 30 | "
    \n", 31 | "
  • \n", 32 | " find the PDB entries containing retinoic acid (REA),\n", 33 | "
  • \n", 34 | "
  • \n", 35 | " analyze the domain composition of those entries,\n", 36 | "
  • \n", 37 | "
  • \n", 38 | " find the lipocalin CATH domains which interact with REA,\n", 39 | "
  • \n", 40 | "
  • \n", 41 | " find a set of chains that contain lipocalin domain and bind to REA too, and\n", 42 | "
  • \n", 43 | "
  • \n", 44 | " write your own improvements to the demo code.\n", 45 | "
  • \n", 46 | "
\n", 47 | "
\n", 49 | " \n", 50 | "
\n", 53 | "\n" 54 | ] 55 | }, 56 | { 57 | "cell_type": "heading", 58 | "level": 3, 59 | "metadata": {}, 60 | "source": [ 61 | "Getting started" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Let us run the [tutorial_utils](tutorial_utils.ipynb) notebook to setup API URL, logger, caller utility, etc. Check out that notebook to setup anything differently." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "collapsed": false, 74 | "input": [ 75 | "%run 'tutorial_utils.ipynb'" 76 | ], 77 | "language": "python", 78 | "metadata": {}, 79 | "outputs": [], 80 | "prompt_number": 48 81 | }, 82 | { 83 | "cell_type": "heading", 84 | "level": 3, 85 | "metadata": {}, 86 | "source": [ 87 | "Entries containing reinoic acid" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "Let us begin by finding PDB entries that contain retinoic acid (PDB chemical component REA). For this, we will use the API call /pdb/compound/in_pdb." 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "collapsed": false, 100 | "input": [ 101 | "chem_comp_id = \"REA\"\n", 102 | "cc_entries_URL = PDBE_API_URL + \"/pdb/compound/in_pdb/\" + chem_comp_id\n", 103 | "cc_entries_data = get_PDBe_API_data(cc_entries_URL) # function from tutorial_utils\n", 104 | "cc_entries = cc_entries_data[chem_comp_id]\n", 105 | "logging.info(\"There are %d PDB entries containing chemical component %s.\" % (len(cc_entries), chem_comp_id))" 106 | ], 107 | "language": "python", 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "output_type": "stream", 112 | "stream": "stdout", 113 | "text": [ 114 | "LOG|11-Nov-2014 13:37:31|INFO There are 31 PDB entries containing chemical component REA.\n" 115 | ] 116 | } 117 | ], 118 | "prompt_number": 49 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "Hmmm, so 31 PDB entries contain REA.\n", 125 | "\n", 126 | "Alternatively, we could have used the PDBe search system too to find these entries. See the search_introduction notebook to know more." 127 | ] 128 | }, 129 | { 130 | "cell_type": "heading", 131 | "level": 3, 132 | "metadata": {}, 133 | "source": [ 134 | "Obtaining the domains" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "So far so good. Now let's figure the domain composition of these entries by using the SIFTS mappings call. This call returns many types of mappings, such as UniProt accessions, Pfam families, InterPro, SCOP, CATH, etc. We will make one mapping call for each PDB entry id." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "collapsed": false, 147 | "input": [ 148 | "def get_mappings() :\n", 149 | " all_mappings = {}\n", 150 | " for pdb_id in cc_entries :\n", 151 | " mappings_data = get_PDBe_API_data(PDBE_API_URL+\"/mappings/\"+pdb_id)\n", 152 | " try :\n", 153 | " all_mappings[pdb_id] = mappings_data[pdb_id]\n", 154 | " print \".\",\n", 155 | " except KeyError :\n", 156 | " logging.warn(\"Mappings call failed for entry \" + pdb_id)\n", 157 | " print \"\"\n", 158 | " logging.info(\"Mappings fetched for %d of %d entries.\" % (len(all_mappings), len(cc_entries)))\n", 159 | " return all_mappings\n", 160 | "\n", 161 | "mappings = get_mappings()" 162 | ], 163 | "language": "python", 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "output_type": "stream", 168 | "stream": "stdout", 169 | "text": [ 170 | ". " 171 | ] 172 | }, 173 | { 174 | "output_type": "stream", 175 | "stream": "stdout", 176 | "text": [ 177 | ". " 178 | ] 179 | }, 180 | { 181 | "output_type": "stream", 182 | "stream": "stdout", 183 | "text": [ 184 | ". " 185 | ] 186 | }, 187 | { 188 | "output_type": "stream", 189 | "stream": "stdout", 190 | "text": [ 191 | ". " 192 | ] 193 | }, 194 | { 195 | "output_type": "stream", 196 | "stream": "stdout", 197 | "text": [ 198 | ". " 199 | ] 200 | }, 201 | { 202 | "output_type": "stream", 203 | "stream": "stdout", 204 | "text": [ 205 | ". " 206 | ] 207 | }, 208 | { 209 | "output_type": "stream", 210 | "stream": "stdout", 211 | "text": [ 212 | ". " 213 | ] 214 | }, 215 | { 216 | "output_type": "stream", 217 | "stream": "stdout", 218 | "text": [ 219 | ". " 220 | ] 221 | }, 222 | { 223 | "output_type": "stream", 224 | "stream": "stdout", 225 | "text": [ 226 | ". " 227 | ] 228 | }, 229 | { 230 | "output_type": "stream", 231 | "stream": "stdout", 232 | "text": [ 233 | ". " 234 | ] 235 | }, 236 | { 237 | "output_type": "stream", 238 | "stream": "stdout", 239 | "text": [ 240 | ". " 241 | ] 242 | }, 243 | { 244 | "output_type": "stream", 245 | "stream": "stdout", 246 | "text": [ 247 | ". " 248 | ] 249 | }, 250 | { 251 | "output_type": "stream", 252 | "stream": "stdout", 253 | "text": [ 254 | ". " 255 | ] 256 | }, 257 | { 258 | "output_type": "stream", 259 | "stream": "stdout", 260 | "text": [ 261 | ". " 262 | ] 263 | }, 264 | { 265 | "output_type": "stream", 266 | "stream": "stdout", 267 | "text": [ 268 | ". " 269 | ] 270 | }, 271 | { 272 | "output_type": "stream", 273 | "stream": "stdout", 274 | "text": [ 275 | ". " 276 | ] 277 | }, 278 | { 279 | "output_type": "stream", 280 | "stream": "stdout", 281 | "text": [ 282 | ". " 283 | ] 284 | }, 285 | { 286 | "output_type": "stream", 287 | "stream": "stdout", 288 | "text": [ 289 | ". " 290 | ] 291 | }, 292 | { 293 | "output_type": "stream", 294 | "stream": "stdout", 295 | "text": [ 296 | ". " 297 | ] 298 | }, 299 | { 300 | "output_type": "stream", 301 | "stream": "stdout", 302 | "text": [ 303 | ". " 304 | ] 305 | }, 306 | { 307 | "output_type": "stream", 308 | "stream": "stdout", 309 | "text": [ 310 | ". " 311 | ] 312 | }, 313 | { 314 | "output_type": "stream", 315 | "stream": "stdout", 316 | "text": [ 317 | ". " 318 | ] 319 | }, 320 | { 321 | "output_type": "stream", 322 | "stream": "stdout", 323 | "text": [ 324 | ". " 325 | ] 326 | }, 327 | { 328 | "output_type": "stream", 329 | "stream": "stdout", 330 | "text": [ 331 | ". " 332 | ] 333 | }, 334 | { 335 | "output_type": "stream", 336 | "stream": "stdout", 337 | "text": [ 338 | ". " 339 | ] 340 | }, 341 | { 342 | "output_type": "stream", 343 | "stream": "stdout", 344 | "text": [ 345 | ". " 346 | ] 347 | }, 348 | { 349 | "output_type": "stream", 350 | "stream": "stdout", 351 | "text": [ 352 | ". " 353 | ] 354 | }, 355 | { 356 | "output_type": "stream", 357 | "stream": "stdout", 358 | "text": [ 359 | ". " 360 | ] 361 | }, 362 | { 363 | "output_type": "stream", 364 | "stream": "stdout", 365 | "text": [ 366 | ". " 367 | ] 368 | }, 369 | { 370 | "output_type": "stream", 371 | "stream": "stdout", 372 | "text": [ 373 | ". " 374 | ] 375 | }, 376 | { 377 | "output_type": "stream", 378 | "stream": "stdout", 379 | "text": [ 380 | ". \n", 381 | "LOG|11-Nov-2014 13:37:39|INFO Mappings fetched for 31 of 31 entries.\n" 382 | ] 383 | } 384 | ], 385 | "prompt_number": 50 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": {}, 390 | "source": [ 391 | "Which mappings are available? Let us count." 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "collapsed": false, 397 | "input": [ 398 | "import collections\n", 399 | "mapping_counts = collections.defaultdict(lambda:0)\n", 400 | "\n", 401 | "for pdb_id in mappings :\n", 402 | " for mapping_type, mappings_info in mappings[pdb_id].items() :\n", 403 | " if len(mappings_info) > 0 :\n", 404 | " mapping_counts[mapping_type] += 1\n", 405 | "\n", 406 | "for mt, mc in mapping_counts.items() :\n", 407 | " logging.info(\"%3d entries have %s mappings.\" % (mc, mt))" 408 | ], 409 | "language": "python", 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "output_type": "stream", 414 | "stream": "stdout", 415 | "text": [ 416 | "LOG|11-Nov-2014 13:37:43|INFO 31 entries have Pfam mappings.\n" 417 | ] 418 | }, 419 | { 420 | "output_type": "stream", 421 | "stream": "stdout", 422 | "text": [ 423 | "LOG|11-Nov-2014 13:37:43|INFO 31 entries have InterPro mappings.\n" 424 | ] 425 | }, 426 | { 427 | "output_type": "stream", 428 | "stream": "stdout", 429 | "text": [ 430 | "LOG|11-Nov-2014 13:37:43|INFO 26 entries have CATH mappings.\n" 431 | ] 432 | }, 433 | { 434 | "output_type": "stream", 435 | "stream": "stdout", 436 | "text": [ 437 | "LOG|11-Nov-2014 13:37:43|INFO 4 entries have EC mappings.\n" 438 | ] 439 | }, 440 | { 441 | "output_type": "stream", 442 | "stream": "stdout", 443 | "text": [ 444 | "LOG|11-Nov-2014 13:37:43|INFO 31 entries have UniProt mappings.\n" 445 | ] 446 | }, 447 | { 448 | "output_type": "stream", 449 | "stream": "stdout", 450 | "text": [ 451 | "LOG|11-Nov-2014 13:37:43|INFO 17 entries have SCOP mappings.\n" 452 | ] 453 | }, 454 | { 455 | "output_type": "stream", 456 | "stream": "stdout", 457 | "text": [ 458 | "LOG|11-Nov-2014 13:37:43|INFO 31 entries have GO mappings.\n" 459 | ] 460 | } 461 | ], 462 | "prompt_number": 51 463 | }, 464 | { 465 | "cell_type": "markdown", 466 | "metadata": {}, 467 | "source": [ 468 | "Let us focus on CATH superfamilies and count how many times each superfamily is mapped onto entries containing the compound of our interest." 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "collapsed": false, 474 | "input": [ 475 | "dom_type = \"CATH\"\n", 476 | "dom_counts = collections.defaultdict(lambda:0)\n", 477 | "dom_family_name = {}\n", 478 | "\n", 479 | "for pdb_id in mappings :\n", 480 | " if not mappings[pdb_id].has_key(dom_type) :\n", 481 | " continue\n", 482 | " else :\n", 483 | " for dom_id, mranges in mappings[pdb_id][dom_type].items() :\n", 484 | " dom_family_name[dom_id] = mappings[pdb_id][dom_type][dom_id][\"homology\"]\n", 485 | " dom_counts[dom_id] += 1\n", 486 | "\n", 487 | "for dom_id, frequency in dom_counts.items() :\n", 488 | " logging.info(\"%2d entries contain CATH superfamily %11s (%s).\" % (frequency, dom_id, dom_family_name[dom_id]))" 489 | ], 490 | "language": "python", 491 | "metadata": {}, 492 | "outputs": [ 493 | { 494 | "output_type": "stream", 495 | "stream": "stdout", 496 | "text": [ 497 | "LOG|11-Nov-2014 13:39:14|INFO 9 entries contain CATH superfamily 2.40.128.20 (Lipocalin).\n" 498 | ] 499 | }, 500 | { 501 | "output_type": "stream", 502 | "stream": "stdout", 503 | "text": [ 504 | "LOG|11-Nov-2014 13:39:14|INFO 2 entries contain CATH superfamily 2.60.40.180 (Immunoglobulin-like).\n" 505 | ] 506 | }, 507 | { 508 | "output_type": "stream", 509 | "stream": "stdout", 510 | "text": [ 511 | "LOG|11-Nov-2014 13:39:14|INFO 3 entries contain CATH superfamily 3.30.50.10 (Erythroid Transcription Factor GATA-1, subunit A).\n" 512 | ] 513 | }, 514 | { 515 | "output_type": "stream", 516 | "stream": "stdout", 517 | "text": [ 518 | "LOG|11-Nov-2014 13:39:14|INFO 15 entries contain CATH superfamily 1.10.565.10 (Retinoid X Receptor).\n" 519 | ] 520 | }, 521 | { 522 | "output_type": "stream", 523 | "stream": "stdout", 524 | "text": [ 525 | "LOG|11-Nov-2014 13:39:14|INFO 1 entries contain CATH superfamily 1.10.630.10 (Cytochrome p450).\n" 526 | ] 527 | } 528 | ], 529 | "prompt_number": 54 530 | }, 531 | { 532 | "cell_type": "markdown", 533 | "metadata": {}, 534 | "source": [ 535 | "We see there are 9 entries which contain CATH domain called Lipocalin (2.40.128.20).\n", 536 | "\n", 537 | "Let us now make a reverse associative array from PDB chains to superfamilies, and note that a chain can contain multiple CATH domains." 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "collapsed": false, 543 | "input": [ 544 | "chain_to_domain = collections.defaultdict(lambda:[])\n", 545 | "\n", 546 | "for pdb_id in mappings :\n", 547 | " if mappings[pdb_id].has_key(dom_type) :\n", 548 | " for dom_id, mapping_info in mappings[pdb_id][dom_type].items() :\n", 549 | " for arange in mapping_info[\"mappings\"] :\n", 550 | " chain_to_domain[(pdb_id,arange[\"chain_id\"])].append(dom_id)\n", 551 | "\n", 552 | "for (pdb_id,chain_id), dom_ids in chain_to_domain.items() [0:5] :\n", 553 | " logging.info(\"PDB id %s chain %s contain domains %s\" % (pdb_id, chain_id, \", \".join(dom_ids)))" 554 | ], 555 | "language": "python", 556 | "metadata": {}, 557 | "outputs": [ 558 | { 559 | "output_type": "stream", 560 | "stream": "stdout", 561 | "text": [ 562 | "LOG|11-Nov-2014 13:39:29|INFO PDB id 3fal chain D contain domains 1.10.565.10\n" 563 | ] 564 | }, 565 | { 566 | "output_type": "stream", 567 | "stream": "stdout", 568 | "text": [ 569 | "LOG|11-Nov-2014 13:39:29|INFO PDB id 3fc6 chain B contain domains 1.10.565.10\n" 570 | ] 571 | }, 572 | { 573 | "output_type": "stream", 574 | "stream": "stdout", 575 | "text": [ 576 | "LOG|11-Nov-2014 13:39:29|INFO PDB id 3dzy chain A contain domains 3.30.50.10, 1.10.565.10\n" 577 | ] 578 | }, 579 | { 580 | "output_type": "stream", 581 | "stream": "stdout", 582 | "text": [ 583 | "LOG|11-Nov-2014 13:39:29|INFO PDB id 1xls chain B contain domains 1.10.565.10\n" 584 | ] 585 | }, 586 | { 587 | "output_type": "stream", 588 | "stream": "stdout", 589 | "text": [ 590 | "LOG|11-Nov-2014 13:39:29|INFO PDB id 1xiu chain B contain domains 1.10.565.10\n" 591 | ] 592 | } 593 | ], 594 | "prompt_number": 55 595 | }, 596 | { 597 | "cell_type": "heading", 598 | "level": 3, 599 | "metadata": {}, 600 | "source": [ 601 | "Obtaining the binding sites" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": {}, 607 | "source": [ 608 | "Now let us turn our attention to binding sites information. Let us write a function that will fetch it using binding sites API call." 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "collapsed": false, 614 | "input": [ 615 | "def get_binding_sites() :\n", 616 | " all_sites = collections.defaultdict(lambda:[])\n", 617 | " for pdb_id in cc_entries :\n", 618 | " api_data = get_PDBe_API_data(PDBE_API_URL+\"/pdb/entry/binding_sites/\"+pdb_id)\n", 619 | " for asite in api_data[pdb_id] :\n", 620 | " if any((alig[\"chem_comp_id\"] == chem_comp_id for alig in asite[\"ligand_residues\"])) :\n", 621 | " all_sites[pdb_id].append(asite)\n", 622 | " return all_sites" 623 | ], 624 | "language": "python", 625 | "metadata": {}, 626 | "outputs": [], 627 | "prompt_number": 56 628 | }, 629 | { 630 | "cell_type": "markdown", 631 | "metadata": {}, 632 | "source": [ 633 | "Ideally we expect there to be at least one binding site for REA defined in each entry. Let us verify that." 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "collapsed": false, 639 | "input": [ 640 | "binding_sites = get_binding_sites()\n", 641 | "\n", 642 | "binding_frequency = collections.defaultdict(lambda:0)\n", 643 | "\n", 644 | "for pdb_id, sites_info in binding_sites.items() :\n", 645 | " binding_frequency[len(sites_info)] += 1\n", 646 | " \n", 647 | "for num_sites, num_entries in binding_frequency.items() :\n", 648 | " logging.info(\"%2d entries have %2d binding sites of %s.\" % (num_entries, num_sites, chem_comp_id))" 649 | ], 650 | "language": "python", 651 | "metadata": {}, 652 | "outputs": [ 653 | { 654 | "output_type": "stream", 655 | "stream": "stdout", 656 | "text": [ 657 | "LOG|11-Nov-2014 13:39:31|INFO 15 entries have 1 binding sites of REA.\n" 658 | ] 659 | }, 660 | { 661 | "output_type": "stream", 662 | "stream": "stdout", 663 | "text": [ 664 | "LOG|11-Nov-2014 13:39:31|INFO 12 entries have 2 binding sites of REA.\n" 665 | ] 666 | }, 667 | { 668 | "output_type": "stream", 669 | "stream": "stdout", 670 | "text": [ 671 | "LOG|11-Nov-2014 13:39:31|INFO 4 entries have 4 binding sites of REA.\n" 672 | ] 673 | } 674 | ], 675 | "prompt_number": 57 676 | }, 677 | { 678 | "cell_type": "markdown", 679 | "metadata": {}, 680 | "source": [ 681 | "Let us make a list of chains which bind our favourite compound." 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "collapsed": false, 687 | "input": [ 688 | "binding_chains = set()\n", 689 | "for pdb_id, sites_info in binding_sites.items() :\n", 690 | " for asite in sites_info :\n", 691 | " for res_info in asite[\"site_residues\"] :\n", 692 | " binding_chains.add( (pdb_id, res_info[\"chain_id\"]) )\n", 693 | "\n", 694 | "logging.info(\"%d PDB chains bind %s, e.g. %s ....\" % ( \\\n", 695 | " len(binding_chains), chem_comp_id, \\\n", 696 | " \" \".join([bc[0]+\":\"+bc[1] for bc in list(binding_chains)[0:5]]) \\\n", 697 | ")) " 698 | ], 699 | "language": "python", 700 | "metadata": {}, 701 | "outputs": [ 702 | { 703 | "output_type": "stream", 704 | "stream": "stdout", 705 | "text": [ 706 | "LOG|11-Nov-2014 13:39:34|INFO 55 PDB chains bind REA, e.g. 1n4h:A 4nqa:H 3dzy:A 1xls:B 1xiu:B ....\n" 707 | ] 708 | } 709 | ], 710 | "prompt_number": 58 711 | }, 712 | { 713 | "cell_type": "markdown", 714 | "metadata": {}, 715 | "source": [ 716 | "Now we can find chains common to binding sites and CATH superfamilies to find instances of domains with binding ligands." 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "collapsed": false, 722 | "input": [ 723 | "domain_of_choice = \"2.40.128.20\"\n", 724 | "for pdb_chain in binding_chains :\n", 725 | " if chain_to_domain.has_key(pdb_chain) and domain_of_choice in chain_to_domain[pdb_chain] :\n", 726 | " logging.info(\"PDB chain %s:%s binds %s and contains CATH domains %s\" % \\\n", 727 | " (pdb_chain[0], pdb_chain[1], chem_comp_id, \",\".join(chain_to_domain[pdb_chain])))" 728 | ], 729 | "language": "python", 730 | "metadata": {}, 731 | "outputs": [ 732 | { 733 | "output_type": "stream", 734 | "stream": "stdout", 735 | "text": [ 736 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1epb:A binds REA and contains CATH domains 2.40.128.20\n" 737 | ] 738 | }, 739 | { 740 | "output_type": "stream", 741 | "stream": "stdout", 742 | "text": [ 743 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1rlb:E binds REA and contains CATH domains 2.40.128.20\n" 744 | ] 745 | }, 746 | { 747 | "output_type": "stream", 748 | "stream": "stdout", 749 | "text": [ 750 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 2g78:A binds REA and contains CATH domains 2.40.128.20\n" 751 | ] 752 | }, 753 | { 754 | "output_type": "stream", 755 | "stream": "stdout", 756 | "text": [ 757 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1cbr:A binds REA and contains CATH domains 2.40.128.20\n" 758 | ] 759 | }, 760 | { 761 | "output_type": "stream", 762 | "stream": "stdout", 763 | "text": [ 764 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1gx9:A binds REA and contains CATH domains 2.40.128.20\n" 765 | ] 766 | }, 767 | { 768 | "output_type": "stream", 769 | "stream": "stdout", 770 | "text": [ 771 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1cbs:A binds REA and contains CATH domains 2.40.128.20\n" 772 | ] 773 | }, 774 | { 775 | "output_type": "stream", 776 | "stream": "stdout", 777 | "text": [ 778 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1rlb:F binds REA and contains CATH domains 2.40.128.20\n" 779 | ] 780 | }, 781 | { 782 | "output_type": "stream", 783 | "stream": "stdout", 784 | "text": [ 785 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1fem:A binds REA and contains CATH domains 2.40.128.20\n" 786 | ] 787 | }, 788 | { 789 | "output_type": "stream", 790 | "stream": "stdout", 791 | "text": [ 792 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1epb:B binds REA and contains CATH domains 2.40.128.20\n" 793 | ] 794 | }, 795 | { 796 | "output_type": "stream", 797 | "stream": "stdout", 798 | "text": [ 799 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 1cbr:B binds REA and contains CATH domains 2.40.128.20\n" 800 | ] 801 | }, 802 | { 803 | "output_type": "stream", 804 | "stream": "stdout", 805 | "text": [ 806 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 2fr3:A binds REA and contains CATH domains 2.40.128.20\n" 807 | ] 808 | }, 809 | { 810 | "output_type": "stream", 811 | "stream": "stdout", 812 | "text": [ 813 | "LOG|11-Nov-2014 13:39:37|INFO PDB chain 3cwk:A binds REA and contains CATH domains 2.40.128.20\n" 814 | ] 815 | } 816 | ], 817 | "prompt_number": 59 818 | }, 819 | { 820 | "cell_type": "markdown", 821 | "metadata": {}, 822 | "source": [ 823 | "Hurray! we now have a dataset of chains we can study to understand the binding between retinoic acid and the CATH domain!" 824 | ] 825 | }, 826 | { 827 | "cell_type": "heading", 828 | "level": 3, 829 | "metadata": {}, 830 | "source": [ 831 | "Your turn!" 832 | ] 833 | }, 834 | { 835 | "cell_type": "markdown", 836 | "metadata": {}, 837 | "source": [ 838 | "There are many improvement you can readily make to this recipe to obtain a better dataset for your study, such as:\n", 839 | "" 844 | ] 845 | } 846 | ], 847 | "metadata": {} 848 | } 849 | ] 850 | } -------------------------------------------------------------------------------- /REST_API/notebooks/pdb_entry_publications.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:e17265ba5ecfb9380cc0ffa7d1d5c1e7bc0f8d041f3110a143a449e4923ab8e0" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Impact of PDB entries" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "PDB entries are experimentally determined models of interesting proteins. Scholarly literature often refers to PDB entries in discussions about interesting aspects of macromolecules, e.g. fold, funtion, folding, etc. So, a basic way to measure the impact of a PDB entry is simply to count of publications associated with the entry." 24 | ] 25 | }, 26 | { 27 | "cell_type": "heading", 28 | "level": 3, 29 | "metadata": {}, 30 | "source": [ 31 | "Getting started" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "Let us run the [tutorial_utils](tutorial_utils.ipynb) notebook to setup API URL, logger, caller utility, etc. Check out that notebook to setup anything differently." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "collapsed": false, 44 | "input": [ 45 | "%run 'tutorial_utils.ipynb'" 46 | ], 47 | "language": "python", 48 | "metadata": {}, 49 | "outputs": [], 50 | "prompt_number": 1 51 | }, 52 | { 53 | "cell_type": "heading", 54 | "level": 3, 55 | "metadata": {}, 56 | "source": [ 57 | "Publications from the API" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "There are two calls for entry publications - /pdb/entry/publications and /pdb/entry/related_publications.\n", 65 | "Check them out in the interactive documentation explorer of the API.\n", 66 | "\n", 67 | "The first call provides articles associated with the entry directly, i.e. the ones which depositor provided.\n", 68 | "\n", 69 | "The second call provides articles and reviews mined from EuroPMC. These publications either cite the depositor's citations directly or merely mention the PDB entry id in the text of the article without explicitly citing any article. Note that articles with a mention of the PDB id can be mined by EuroPMC only when the full text of the article is freely available." 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "In this example, we will use entries deposited by 'Kleywegt' - I have copied Kleywegt's PDB ids using the PDBe search service. There are 39 as of August 2014. These can be obtained programatically too, see search_introduction notebook to know more." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "collapsed": false, 82 | "input": [ 83 | "pdb_ids_list = [\n", 84 | " \"2c10\", \"2c11\", \"1wc2\", \"2c3n\", \"2c3q\", \"2c3t\", \"1xwg\", \"1usb\", \"1pkw\", \"1pkz\",\\\n", 85 | " \"1pl1\", \"1pl2\", \"1o8v\", \"2cds\", \"1hb6\", \"1hb8\", \"1hgw\", \"1hgy\", \"1egn\", \"1hbk\",\\\n", 86 | " \"2cbs\", \"3cbs\", \"2cbr\", \"1qjw\", \"1qk0\", \"1qk2\", \"2a2u\", \"2a2g\", \"1eg1\", \"1cb2\",\\\n", 87 | " \"1fss\", \"1lbs\", \"1lbt\", \"2chr\", \"1fcc\", \"1cbq\", \"1cbr\", \"1cbs\", \"1guh\", \\\n", 88 | "]" 89 | ], 90 | "language": "python", 91 | "metadata": {}, 92 | "outputs": [], 93 | "prompt_number": 7 94 | }, 95 | { 96 | "cell_type": "heading", 97 | "level": 3, 98 | "metadata": {}, 99 | "source": [ 100 | "Fetching publications from API" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Let us now define a dictionary which will hold publications in three categories for each entry." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "collapsed": false, 113 | "input": [ 114 | "import collections\n", 115 | "entry_pub_keys = collections.defaultdict( \\\n", 116 | " lambda:{\"cited_by\":set(), \"appears_without_citation\":set(), \"depositor_citations\":set()} \\\n", 117 | ")" 118 | ], 119 | "language": "python", 120 | "metadata": {}, 121 | "outputs": [], 122 | "prompt_number": 8 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Since the two API calls return publications in slightly different formats, we need to store them in a more uniform data structure before further analysis.\n", 129 | "We should define a unique identifier for an article, so that we do not consider the same article twice.\n", 130 | "\n", 131 | "Pubmed id would have been a good choice, but not all articles are indexed in pubmed, notably those from Acta Cryst!\n", 132 | "\n", 133 | "So, let us create a unique key which is a composite of title, journal name, volume, pages, publication year and pubmed_id." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "collapsed": false, 139 | "input": [ 140 | "from collections import namedtuple\n", 141 | "ArticleKey = namedtuple(\"ArticleKey\", [\"title\",\"journal\",\"volume\",\"pages\",\"year\",\"pubmed_id\"])" 142 | ], 143 | "language": "python", 144 | "metadata": {}, 145 | "outputs": [], 146 | "prompt_number": 6 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "Let us write functions to create ArticleKey from the calls mentioned above." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "collapsed": false, 158 | "input": [ 159 | "def make_entry_citation_key(pub_info) :\n", 160 | " return ArticleKey(\n", 161 | " pub_info[\"title\"],\n", 162 | " pub_info[\"journal_info\"][\"pdb_abbreviation\"],\n", 163 | " pub_info[\"journal_info\"][\"volume\"],\n", 164 | " pub_info[\"journal_info\"][\"pages\"],\n", 165 | " pub_info[\"journal_info\"][\"year\"],\n", 166 | " pub_info[\"pubmed_id\"],\n", 167 | " )\n", 168 | "\n", 169 | "def make_entry_related_publication_key(pub_info) :\n", 170 | " return ArticleKey(\n", 171 | " pub_info[\"title\"],\n", 172 | " pub_info[\"journal\"],\n", 173 | " pub_info[\"volume\"],\n", 174 | " pub_info[\"pages\"],\n", 175 | " pub_info[\"year\"],\n", 176 | " pub_info[\"pubmed_id\"],\n", 177 | " )" 178 | ], 179 | "language": "python", 180 | "metadata": {}, 181 | "outputs": [], 182 | "prompt_number": 10 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "Let us obtain unique articles from /pdb/entry/publications" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "collapsed": false, 194 | "input": [ 195 | "for pdb_id in pdb_ids_list :\n", 196 | " pub_url = PDBE_API_URL + \"/pdb/entry/publications/\" +pdb_id\n", 197 | " try :\n", 198 | " api_pub_data = get_PDBe_API_data(pub_url)[pdb_id]\n", 199 | " except :\n", 200 | " logging.warn(\"Entry publications could not be obtained for PDB id \" + pdb_id)\n", 201 | " else :\n", 202 | " for pub_info in api_pub_data :\n", 203 | " pub_key = make_entry_citation_key(pub_info)\n", 204 | " if pub_key.year is None :\n", 205 | " continue\n", 206 | " entry_pub_keys[pdb_id][\"depositor_citations\"].add(pub_key)\n", 207 | " \n", 208 | "logging.info(\"Entry publications obtained for %d entries.\" % len(entry_pub_keys))" 209 | ], 210 | "language": "python", 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "output_type": "stream", 215 | "stream": "stdout", 216 | "text": [ 217 | "LOG|11-Nov-2014 14:15:09|INFO Entry publications obtained for 38 entries.\n" 218 | ] 219 | } 220 | ], 221 | "prompt_number": 13 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "Now let us do the same for /pdb/entry/publications" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "collapsed": false, 233 | "input": [ 234 | "for pdb_id in pdb_ids_list :\n", 235 | " pub_url = PDBE_API_URL + \"/pdb/entry/related_publications/\" +pdb_id\n", 236 | " try :\n", 237 | " api_pub_data = get_PDBe_API_data(pub_url)[pdb_id]\n", 238 | " except :\n", 239 | " logging.warn(\"Entry related publications could not be obtained for PDB id \" + pdb_id)\n", 240 | " else :\n", 241 | " for pub_category in api_pub_data :\n", 242 | " for pub_type, publications in api_pub_data[pub_category].items() :\n", 243 | " for pub_info in publications :\n", 244 | " pub_key = make_entry_related_publication_key(pub_info)\n", 245 | " if pub_key.year is None :\n", 246 | " continue\n", 247 | " entry_pub_keys[pdb_id][pub_category].add(pub_key)\n", 248 | "\n", 249 | "logging.info(\"Entry related publications obtained for %d entries.\" % len(entry_pub_keys))" 250 | ], 251 | "language": "python", 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "output_type": "stream", 256 | "stream": "stdout", 257 | "text": [ 258 | "LOG|11-Nov-2014 14:16:40|WARNING Error fetching PDBe-API data! Trial number 0 for call http://www.ebi.ac.uk/pdbe/api/pdb/entry/related_publications/2cds\n" 259 | ] 260 | }, 261 | { 262 | "output_type": "stream", 263 | "stream": "stdout", 264 | "text": [ 265 | "LOG|11-Nov-2014 14:16:40|WARNING Error fetching PDBe-API data! Trial number 1 for call http://www.ebi.ac.uk/pdbe/api/pdb/entry/related_publications/2cds\n" 266 | ] 267 | }, 268 | { 269 | "output_type": "stream", 270 | "stream": "stdout", 271 | "text": [ 272 | "LOG|11-Nov-2014 14:16:40|WARNING Error fetching PDBe-API data! Trial number 2 for call http://www.ebi.ac.uk/pdbe/api/pdb/entry/related_publications/2cds\n" 273 | ] 274 | }, 275 | { 276 | "output_type": "stream", 277 | "stream": "stdout", 278 | "text": [ 279 | "LOG|11-Nov-2014 14:16:40|WARNING Entry related publications could not be obtained for PDB id 2cds\n" 280 | ] 281 | }, 282 | { 283 | "output_type": "stream", 284 | "stream": "stdout", 285 | "text": [ 286 | "LOG|11-Nov-2014 14:16:41|INFO Entry related publications obtained for 38 entries.\n" 287 | ] 288 | } 289 | ], 290 | "prompt_number": 15 291 | }, 292 | { 293 | "cell_type": "heading", 294 | "level": 3, 295 | "metadata": {}, 296 | "source": [ 297 | "Ranking entries on impact" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "Let us rank entries according to the number of publications in all three categories, and print a few at the top." 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "collapsed": false, 310 | "input": [ 311 | "for pub_category in [\"depositor_citations\", \"cited_by\", \"appears_without_citation\"] :\n", 312 | " def key_func(pdb_id) :\n", 313 | " return len(entry_pub_keys[pdb_id][pub_category])\n", 314 | " for pdb_id in sorted(entry_pub_keys.keys(), reverse=True, key=key_func) [0:3] :\n", 315 | " logging.info(\"PDB id %s has %d citations of type %s.\" % (pdb_id, len(entry_pub_keys[pdb_id][pub_category]), pub_category))" 316 | ], 317 | "language": "python", 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "output_type": "stream", 322 | "stream": "stdout", 323 | "text": [ 324 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1cbr has 5 citations of type depositor_citations.\n" 325 | ] 326 | }, 327 | { 328 | "output_type": "stream", 329 | "stream": "stdout", 330 | "text": [ 331 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1cbs has 5 citations of type depositor_citations.\n" 332 | ] 333 | }, 334 | { 335 | "output_type": "stream", 336 | "stream": "stdout", 337 | "text": [ 338 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1cbq has 5 citations of type depositor_citations.\n" 339 | ] 340 | }, 341 | { 342 | "output_type": "stream", 343 | "stream": "stdout", 344 | "text": [ 345 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1qk2 has 156 citations of type cited_by.\n" 346 | ] 347 | }, 348 | { 349 | "output_type": "stream", 350 | "stream": "stdout", 351 | "text": [ 352 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1qk0 has 156 citations of type cited_by.\n" 353 | ] 354 | }, 355 | { 356 | "output_type": "stream", 357 | "stream": "stdout", 358 | "text": [ 359 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1qjw has 156 citations of type cited_by.\n" 360 | ] 361 | }, 362 | { 363 | "output_type": "stream", 364 | "stream": "stdout", 365 | "text": [ 366 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1fss has 11 citations of type appears_without_citation.\n" 367 | ] 368 | }, 369 | { 370 | "output_type": "stream", 371 | "stream": "stdout", 372 | "text": [ 373 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1fcc has 10 citations of type appears_without_citation.\n" 374 | ] 375 | }, 376 | { 377 | "output_type": "stream", 378 | "stream": "stdout", 379 | "text": [ 380 | "LOG|11-Nov-2014 14:18:48|INFO PDB id 1cbs has 5 citations of type appears_without_citation.\n" 381 | ] 382 | } 383 | ], 384 | "prompt_number": 16 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "Let us plot the number of publications now." 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "collapsed": false, 396 | "input": [ 397 | "%matplotlib inline\n", 398 | "def make_publications_bar_plot(ordered_pdbids, xtick_maker) :\n", 399 | " import matplotlib.pyplot as plt\n", 400 | " fig = plt.figure()\n", 401 | " ax = fig.add_subplot(111)\n", 402 | " plt.ylabel(\"Number of publications\")\n", 403 | " pub_categories = [\"depositor_citations\", \"cited_by\", \"appears_without_citation\"]\n", 404 | " bar_colours = [\"red\",\"green\",\"blue\"]\n", 405 | " plot_objects = []\n", 406 | " xticks, xtick_labels = [], []\n", 407 | " for pci in range(len(pub_categories)) :\n", 408 | " x, y = [], []\n", 409 | " pub_category = pub_categories[pci]\n", 410 | " for pi in range(len(ordered_pdbids)) :\n", 411 | " pdb_id = ordered_pdbids[pi]\n", 412 | " x.append( 5 + pci + pi*(1+len(pub_categories)) )\n", 413 | " if pci==1 :\n", 414 | " xticks.append( 5 + pci + pi*(1+len(pub_categories)) )\n", 415 | " xtick_labels.append( xtick_maker(pdb_id) )\n", 416 | " y.append( len(entry_pub_keys[pdb_id][pub_category]) )\n", 417 | " plot_objects.append( ax.bar(x, y, color=bar_colours[pci]) )\n", 418 | " ax.legend( [po[0] for po in plot_objects], pub_categories )\n", 419 | " ax.set_xticks(xticks)\n", 420 | " xticks_obj = ax.set_xticklabels(xtick_labels)\n", 421 | " plt.setp(xticks_obj, rotation=90)\n", 422 | " plt.ylim([0,200])\n", 423 | " plt.show()\n", 424 | "\n", 425 | "make_publications_bar_plot(entry_pub_keys.keys(), lambda pid:pid)" 426 | ], 427 | "language": "python", 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "metadata": {}, 432 | "output_type": "display_data", 433 | "png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEQCAYAAABWY8jCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXl8FEXa+L8TSOTKHQhnCJco/ERcBVEQgrqe68mCoCII\n76rrite+u+IBBnFFV3j11XddkUUQUVQU1wN1dZEgKoeuyCGgoAIC8eAQAsiRpH5/PN0zPZOZ6Q7J\nTI55vp9Pf6a7q6bq6erqeqqeukBRFEVRFEVRFEVRFEVRFEVRFEVRFEVRFEVRFEVRFEVRPNMOWAh8\nAawBbrbuZwHvAV8B7wIZjv/cCWwA1gPnxE1SRVEUpdppCfS0zpsBXwLHA38F/mzdvwN40DrvBnwO\nJAP5wEYgKU6yKoqiKDHmn8DZSC0/17rX0roGaQXc4fD/DtAnbtIpiqIkIPGqaecDJwHLEAXwg3X/\nBwIKoTWw1fGfrUCbOMmnKIqSkMRDCTQDXgFuAUpC3Ix1RCKam6IoilJFGsY4/GREATyLmINAav8t\nge+BVsCP1v1tSGeyTVvrXhAnnniiWblyZazkVRRFqa+sJNBP6yeWLQEfMB1YCzzquP86MMI6H0FA\nObwODAVSgA5AF2B5aKArV67EGOP5uPfee6vsp6rutSWM+hJHXZFT06L+xVGX5Aw9gBPDFdSxbAn0\nBa4GVgErrHt3IqOBXgJGA5uAIZbbWuv+WqAUuBE1BymKosSUWCqBD4nc0jg7wv0HrENRFEWJAw1q\nWoCjoLCwsLBSf8jPz6+yn6q615Yw6ksc1RFGfYmjOsLQOOIbRrzkdDJhwgSACaH3fZUKpXZgLPuW\noiiK4hGfzwdhyvxYjw5SlLiSlZXF7t27a1oMRakxMjMz2bVrl2f/2hJQ6hU+nw/NH0oiE+kbiNQS\n0LV5FEVREhhVAoqiKAmMKgFFUZQERpWAotQQI0eOZNy4cTUS9+9//3vuv//+Gonb5oILLuDZZ5+t\ntvCee+45zj333GoLL1HQjmGlXhGuUywrLY3dJaFrF1Yfmamp7Nq7t9L/u/baa2nXrh333XdfDKTy\nTlFREcOHD+e7776rMRlmzpzJ9OnTWbx4sSf/mzZtomPHjpSWlpKUpHVZJ5XtGNYhokq9Z3dJSUzX\nH/FVQcHUhwpNaWkpDRvWTFFSH9KvplEVqihxYsWKFfzqV78iLS2NoUOHcvDgQb/bm2++Sc+ePcnM\nzKRv376sXr3a75afn8+DDz5I9+7dycrKYtSoURw6dMjvPm3aNLp06UJ2djaXXHIJxcXFfrfbbruN\n3Nxc0tPT6dGjB2vXrgUCpqgDBw5w/vnns337dlJTU0lLS+P777/n0KFD3HrrrbRp04Y2bdpw2223\ncfjwYUBaDm3btuWvf/0rrVq1YvTo0VGf+7XXXqNnz56kp6fTuXNn3n33XQAKCgqYPn0669ev54Yb\nbmDJkiWkpqaSlZUFwPz58znppJNIT08nLy/PnvEKQP/+/QHIyMggLS2NpUuXMnPmTM444wy/n48/\n/phevXqRkZFB7969WbJkid+toKCA8ePH069fP9LS0jj33HPZuXMnAAcPHuTqq68mJyeHzMxMevfu\nzY8//ohSezCKEolw+QMwJoaHlzx56NAhk5eXZx599FFTWlpqXn75ZZOcnGzGjRtnPvvsM9OiRQuz\nfPlyU15ebp555hmTn59vDh8+bIwxpn379uaEE04wW7duNbt27TJ9+/Y199xzjzHGmAULFpicnByz\nYsUKc+jQITNmzBjTv39/Y4wx77zzjjn55JPNnj17jDHGrF+/3hQXFxtjjBk5cqQZN26cMcaYoqIi\n07Zt2yB5x40bZ0477TTz008/mZ9++smcfvrpfv8LFy40DRs2NGPHjjWHDx82v/zyS8TnXrZsmUlP\nTzf//ve/jTHGbNu2zaxfv94YY0xBQYGZPn26McaYmTNnmn79+gX9t6ioyKxZs8YYY8yqVatMbm6u\n+ec//2mMMWbTpk3G5/OZsrIyv/8ZM2b4w9i5c6fJyMgws2fPNmVlZWbOnDkmMzPT7Nq1yxhjzIAB\nA0znzp3Nhg0bzC+//GIKCgrM2LFjjTHGPPnkk+aiiy4yv/zyiykvLzefffaZ2bt3r+s7ri1Eyo9E\nWJBTWwKKEgeWLl1KaWkpt9xyCw0aNGDQoEH06tULYwzTpk3j+uuvp1evXvh8Pq655hqOOeYYli5d\nCogt96abbqJNmzZkZmZy9913M2fOHEA6Q0ePHk3Pnj1JSUlh0qRJLFmyhC1btpCSkkJJSQnr1q2j\nvLycrl270rJlS79MxjKlmDAmleeff57x48eTk5NDTk4O9957b1AnblJSEhMmTCA5OZlGjRpFfO7p\n06czevRozjrrLABat25N165dK/gLJ8OAAQPo3r07ACeccAJDhw5l0aJFEf07mT9/Pl27duWqq64i\nKSmJoUOHctxxx/H666/70/Taa6+lc+fONGrUiCFDhvD5558DkJKSws6dO9mwYQM+n4+TTjqJ1NTU\nqPHVZVQJKEoc2L59O23aBO+W2r59ewA2b97MlClTyMzM9B9bt25l+/btfr/t2gX2W8rLy/O7FRcX\n+8MBaNq0KdnZ2Wzbto2BAwdy00038Yc//IHc3Fyuv/56Sjz2X2zfvj0oXGecAM2bNyclJcU1nK1b\nt9KpUydPcYaybNkyBg4cSIsWLcjIyGDq1Kl+k40b27dvJy8vL+he+/btg57BqRAbN27Mvn37ABg+\nfDjnnnsuQ4cOpU2bNtxxxx2UlpYe1TPUBVQJKEocaNWqFdu2BW+Ut3nzZkAK+Lvvvpvdu3f7j337\n9nHFFVf4/W7ZsiXo3FYorVu3ZtOmTX63/fv3s3PnTr/7mDFj+PTTT1m7di1fffUVDz/8sN+vNVrE\n/+skNNwtW7bQunXrCv91o127dmzcuNHVX7jwrrzySi699FK2bt3Kzz//zA033EB5ebmn+Nu0aeNP\nX5vNmzdXUMThaNiwIePHj+eLL77g448/5s0332TWrFmu/6urqBJQlDhw+umn07BhQx577DGOHDnC\nvHnz+OSTT/D5fPzud7/jySefZPny5Rhj2L9/P/Pnz/fXTI0xPPHEE2zbto1du3bxl7/8xa8ghg0b\nxowZM1i5ciWHDh3irrvuok+fPuTl5fHpp5+ybNkyjhw5QpMmTWjUqBENGjTwh2mbVHJzc9m5cyd7\nHcNchw0bxv3338+OHTvYsWMH9913H8OHD6/0c48ePZoZM2bw/vvvU15ezrZt2/jyyy8r+MvNzWXr\n1q0cOXLEf2/fvn1kZmaSkpLC8uXLef755/2Ff/PmzUlKSuLrr78OG+/555/PV199xZw5cygtLeXF\nF19k/fr1/OY3v/H7iWRSWrhwIatXr6asrIzU1FSSk5P96VYfUSWgKHEgOTmZefPmMXPmTLKzs3np\npZcYNGgQACeffDLTpk3jpptuIisriy5dujBr1qygmvqVV17JOeecQ6dOnejSpQv33HMPAGeddRYT\nJ05k0KBBtG7dmm+//ZYXXngBgL1793LdddeRlZVFfn4+OTk5/OlPf/KHaYd/3HHHMWzYMDp27EhW\nVhbff/8999xzD6eccgo9evSgR48enHLKKf447f97oVevXsyYMYPbbruNjIwMCgoKglo1NmeddRbd\nu3enZcuWtGjRAoAnnniC8ePHk5aWxsSJE4NaRk2aNOHuu++mb9++ZGVlsWzZsqBnys7O5s0332TK\nlCnk5OQwefJk3nzzTf/Io9BncP73hx9+YPDgwaSnp9OtWzcKCgqOSgHWFXSymFKvqEuTxbzSoUMH\npk+fzplnnhmzOJT6g04WU5QQYllAK0pdR81BiqJUiQceeIDU1NQKx4UXXljToikeUHOQUq/QTWWU\nREc3lVEURVE8o0pAURQlgVEloCiKksCoElAURUlgVAkoiqIkMKoEFKUGqe4tEb1sWVlUVBS0IJ2S\n2KgSUOo9aRlp/mUBYnGkZaQdtWxXXXUV//rXv/zXSUlJfPPNN0cdnnP5A0Xxgs4YVuo9JXtKoDCG\n4RdW75IUVZ3noPMklMqgLQFFiRPfffcdl19+OS1atCAnJ4cxY8YEbYlob5l44oknkpqayty5c4Ho\nW09G27LSjUmTJtG8eXM6dOjA888/D8Ann3xCy5YtgxTJvHnz6NmzZ5WfX6mdqBJQlDhQVlbGb37z\nGzp06MDmzZvZvn07Q4cODTLdfPDBBwCsWrWKkpISBg8ezIoVKxg9ejTTpk1j165dXH/99Vx88cUc\nOXKEw4cPc+mllzJixAh2797N4MGDeeWVVzyZg77//nt27tzJ9u3beeaZZ7juuuvYsGEDvXr1Ijs7\nO8hE9eyzzzJixIjqTxSlVqBKQFHiwPLlyykuLubhhx+mcePGpKSk0LdvX1fTzVNPPRV268klS5ZE\n3LLSKxMnTiQ5OZn+/ftz4YUX8uKLLwJwzTXXMHv2bAB27drFu+++y5VXXnn0D6/UarRPQFHiwHff\nfUf79u1JSqpcvWvz5s3MmjWLxx9/3H/vyJEjFBcXY4wJu2Wllz6BzMxMGjduHPQ/e+vFq666iu7d\nu3PgwAFeeukl+vfvT25ubqXkVuoO2hJQlDjQrl07tmzZQllZWaX+l5eXF3HryUhbVnoxB+3evZsD\nBw4E/c9WKG3btqVPnz7MmzeP2bNn1+sNVRRVAooSF0499VRatWrF2LFjOXDgAAcPHuSjjz6q4C83\nNzdoy8RoW09G2rLSK/feey9Hjhxh8eLFzJ8/n8GDB/vdrrnmGh566CHWrFnD5ZdfXrWHV2o1qgQU\nJQ4kJSXxxhtvsHHjRvLy8mjXrh1z586tMK6/sLCQESNGkJmZycsvvxxx60mIvmVlNHw+H61atSIz\nM5PWrVszfPhwpk6dyrHHHuv3c/nll7NlyxYuu+wyGjVqVP0JotQa6uKsEt1PQIlIuLXU0zLSZK5A\njEhNT2Xvz/Vv97IuXbowdepU3dayjqHbSypKCPWxgI418+bNw+fzqQJIANQcpCj1kKps+VhQUMCN\nN97I3/72tzhIqtQ0ag5S6hW6vaSS6Oj2koqiKIpnVAkoiqIkMKoEFEVREhhVAoqiKAmMKgFFUZQE\nRpWAoihxITU1lU2bNkV0z8/PZ8GCBfETqBJs2bKF1NTUah15dsEFF/Dss89WW3hHiyoBpd6TlpYV\n2+0l07Jq+hHrBCUlJeTn5wPh90KO1daYzo17jpa8vDxKSkr88hUUFDB9+nTP/y8sLKywEN9bb71V\nKxbni7USeBr4AVjtuFcIbAVWWMf5Drc7gQ3AeuCcGMumJAglJbsBE7NDwq/7lJaW1rQIdQbdx9k7\nZwAnEawE7gVuD+O3G/A5kAzkAxsJr6SMokQiXP4ADJgYHt7y5KRJk0ynTp1Mamqq6datm3n11VeN\nMcbMmDHDnH766eamm24y6enp5rjjjjMLFizw/2/AgAFm7Nixpnfv3iYtLc1ccsklZteuXX73JUuW\nmNNOO81kZGSYE0880RQVFfndnn76aXP88ceb1NRU07FjRzN16lS/28KFC02bNm3MQw89ZFq2bGmu\nueYas2PHDnPhhReajIwMk5WVZc444wxTXl4e8Zmefvppc9FFF/mvO3fubAYPHuy/btu2rVm5cqUx\nxhifz2c2btxopk6dapKTk01KSopp1qyZufjii40xxuTn55vJkyebHj16mPT0dHPFFVeYgwcP+sN6\n6qmnTOfOnU1WVpa5+OKLzfbt240xxnz77bfG5/OZsrKyoDT7xz/+YdatW2eOOeYY06BBA9OsWTOT\nmZkZ9R0dOHDA3H777aZ9+/YmPT3d9OvXzxw8eNAfR2lpqbnrrrtMgwYNTKNGjUyzZs3MmDFjjDHG\n3HzzzaZdu3YmLS3NnHzyyWbx4sXGGGPefvttk5KSYpKTk02zZs1Mz549g2Q0xpjy8nIzceJE0759\ne9OiRQtzzTXXmD179gQ93zPPPGPy8vJMTk6O+ctf/hLxGSLlR/kOaoZ8KiqBP4bxdydwh+P6HaBP\nGH9RX6KS2ITLH9QSJTB37lxTXFxsjDHmxRdfNE2bNjXFxcVmxowZpmHDhubRRx81paWl5sUXXzTp\n6elm9+7dxhgpLNq0aWO++OILs3//fjNo0CBz9dVXG2OM2bp1q8nOzjZvv/22McaY9957z2RnZ5sd\nO3YYY4yZP3+++eabb4wxxixatMg0adLEfPbZZ8YYUQINGzY0Y8eONYcPHza//PKLGTt2rLnhhhtM\naWmpKS0tNR9++GHUZ/rmm29MRkaGMcaYbdu2mfbt25t27doZY4z5+uuvgwpdn89nvv76a2OMMSNH\njjTjxo0LCqt9+/bm1FNPNcXFxWbXrl3m+OOPN08++aQxxpgFCxaYnJwcs2LFCnPo0CEzZswY079/\nf2NMeCVQUFBgpk+fbowxZubMmaZfv36e3tGNN95oBg4caLZv327KysrMkiVLzKFDhyrE4QzfZvbs\n2WbXrl2mrKzMTJkyxbRs2dIcOnTIGGNMYWGhGT58eJB/ZxjTp083nTt3Nt9++63Zt2+fufzyy/3+\n7bivu+46c/DgQbNy5UpzzDHHmHXr1oV9hkj5kQhKoKb6BMYAK4HpQIZ1rzViJrLZCrRBUeoJv/3t\nb2nZsiUAQ4YMoUuXLixfvhyAFi1a+LeJHDJkCF27duXNN98E8G8r2a1bN5o0acLEiRN56aWXKC8v\nZ/bs2VxwwQWcd955AJx99tmccsopzJ8/H5DOxw4dOgCykf0555zD4sWL/TIlJSUxYcIEkpOTadSo\nESkpKRQXF7Np0yYaNGhA3759oz5Thw4dSE1NZcWKFXzwwQece+65tG7dmi+//JJFixbRv3//iP81\nIZ2sPp+Pm2++mZYtW5KZmclFF13E559/DsBzzz3H6NGj6dmzJykpKUyaNIklS5awZcsW13QPjScS\n5eXlzJgxg//93/+lVatWJCUl0adPH1JSUjyFe9VVV5GZmUlSUhK33347hw4d4ssvv/T7jSbHc889\nxx//+Efy8/Np2rQpkyZN4oUXXqC8vNzv59577+WYY46hR48enHjiiaxcudLTc7nhRQk8DKQhZpoF\nwA6gKr0Zfwc6AD2BYmBKFL+6CIxSb5g1axYnnXQSmZmZZGZmsmbNGnbs2IHP5wu7TWRxcbH/ul27\ndv7zvLw8jhw5wo4dO9i8eTNz5871h5mZmclHH33E999/D8Dbb79Nnz59yM7OJjMzk7feeoudO3f6\nw2revHlQIfenP/2Jzp07c84559CpUyceeugh1+caMGAARUVFLF68mAEDBjBgwAAWLVrEBx98wIAB\nAyqVRraSBGjcuDH79+8HoLi4mPbt2/vdmjZtSnZ2doWd1arCjh07OHjwIJ06dfLkP7RfYPLkyXTr\n1o2MjAwyMzPZs2cPO3bs8BRW6PPl5eVRWlrKDz/84L/nTJsmTZr406aqeFlK+hzgT8BlwCbgcmAx\ncLRjm350nP8DeMM63wa0c7i1te5VoLCw0H9eUFBAQUHBUYqiKPFh8+bNXHfddbz//vucdtpp+Hw+\nTjrpJEBqieG2ibzkkkv8184a75YtW0hOTqZ58+bk5eUxfPhwnnrqqQpxHjp0iEGDBjF79mwuueQS\nGjRowGWXXRZUIw0tyJo1a8bkyZOZPHkyX3zxBWeeeSa9evWKuqT0gAEDeP3119m0aRN33303GRkZ\nzJ49m6VLlzJmzJiw/6lsx2rr1q2Dhpfu37+fnTt30qZNG/9eyQcOHKBZs2YAfiVYmbhycnJo1KgR\nGzdupEePHlH9hoa5ePFiHn74Yd5//326d+8OQFZWlj+t3WQIfb4tW7bQsGFDcnNzPbV2wlFUVERR\nUZGrPy8tAVtR/AZ4GdhD1WrorRznlxHoL3gdGAqkIC2FLsDycAEUFhb6D1UASl1g//79+Hw+cnJy\n/GaHNWvW+N1//PFH/zaRc+fOZf369VxwwQWAKInZs2ezbt06Dhw4wPjx4xk8eDA+n4+rr76aN954\ng3fffZeysjIOHjxIUVER27Zt4/Dhwxw+fJicnBySkpJ4++23effdd6PKOX/+fDZu3IgxhrS0NBo0\naECDBg2i/mfAgAEsXLiQgwcP0rp1a/r168c777zDrl27/IoulNzcXL755hvXdLML0WHDhjFjxgxW\nrlzJoUOHuOuuu+jTpw95eXk0b96cNm3a8Oyzz1JWVsbTTz8dtEVnbm4uW7du5ciRI1HjSkpKYtSo\nUdx+++0UFxdTVlbGkiVLOHz4cFj5nXGUlJTQsGFDcnJyOHz4MPfddx979wb2sWjZsiWbNm2KaBIa\nNmwYjzzyCJs2bWLfvn3cddddDB06lKSkyEW0m5mroKAgqKyM+NxRQxHeQIZsnoyYg1oABz38D2AO\n8DHQFfgOGAU8BKxC+gQGALdZftcCL1m/bwM3ouYgpZ7QrVs3/vjHP3LaaafRsmVL1qxZQ79+/QCp\nJZ566qls2LCB5s2bM27cOF555RUyMzP97sOHD2fkyJG0atWKw4cP89hjjwGyKfxrr73GAw88QIsW\nLcjLy2PKlCkYY0hNTeWxxx5jyJAhZGVlMWfOnKDWhR22kw0bNvDrX/+a1NRUTj/9dP7whz+4mnS6\ndOlCamqqfyx+WloanTp1om/fvkHhO89Hjx7N2rVryczMjLiHsXPewFlnncXEiRMZNGgQrVu35ttv\nv+WFF17w+502bRoPP/wwOTk5rF27Nqgv46yzzqJ79+60bNmSFi1aRH2WyZMnc8IJJ9CrVy+ys7O5\n8847w9bmb7nlFl5++WWysrK49dZbOe+88zjvvPM49thjyc/Pp3HjxuTl5fn92/s3Z2dnc8opp1SI\nd9SoUQwfPpz+/fvTsWNHmjRpwuOPPx427aLdOxq8hpIN/AyUAU2BVOD7qP+IHcZrR4+SeITdXjIt\nK6Zj+VNTM9m7d9dR/3/mzJlMnz49qMPWycCBAxk+fDijRo066jiUxCFW20seB7RHOodBauizjk5E\nRYkvVSmgawta8VFihRdz0GxkhFA/4BTr6BVLoRQlkfCyXEJNzlC1180JPdLS0ti6dat7ALWM7t27\nh32eOXPm1LRoNYKXnLUOmc1bW6oiag5SIqLbSyqJTiy2l1xD8IgeRVEUpZ7gpU+gOTJiZzlwyLpn\ngItjJZSiKIoSH7wogULr125f+Kg9piFFURSlCnhRAkVAS6Qz2CAtgh+j/UFRaorMzExd5ldJaOz5\nJV7x8rUMQUYHLbKu+yPLSMytVEzVh3YMK4oDW+lF+i58Pp+05wuPfqipWxxK7acq8wTuQVoBdu2/\nOTJzuKaUgKIoilJNeBkd5AN+clzvxPtMY0VRFKUW46Ul8A7wL+B5pPC/AlnbR1EURanjeFECf0aW\nj+6HdAxPBV6NpVCKoihKfPCiBAzwinUoiqIo9YhofQIfWb/7gJKQY2+kPymKoih1h2gtAXtB7mbx\nEERRFEWJP15GB4XbRvJot5ZUFEVRahFelMD/C7luiOwypiiKotRxoimBuxD7/wkE9wf8iOwHrCiK\notRxoimBB5BtJCdbv/aRBYyNvWiKoihKrPEyRHQskAl0ARo57n8QE4kURVGUuOFFCfwOuBloB6wA\n+gBLgDNjKJeiKIoSB7x0DN8C9AY2AQOBk4A9MZRJURRFiRNelMBB4BfrvBGwHugaM4kURVGUuOHF\nHPQd0ifwT+A9YDfSKlAURVHqOF6UwGXWbyGyy1gasrKooiiKUsfxYg7qgxT8IEqgCOkXUBRFUeo4\nXpTAk8gicjb7rXuKoihKHceLEgAod5yXAQ1iIIuiKIoSZ7wogW+ReQLJQAoyZPSbWAqlKIqixAcv\nSuAGZFnpbcBWpI/gulgKpSiKosQHL6ODfkD2FVYURVHqGdGUwB3AQ8DjYdwMYiJSFEVR6jDRlMBa\n6/c/YdxMDGRRFEVR4kw0JfCG9TszDnIoiqIoNYAXJRAOA1xczbIoiqIocSaaEpgSxU3NQYqiKPWA\naEqgyHF+DHAcMmnsS+BwDGVSFEVR4oSXIaIXIstE2BPEOgLXA2/FSihFURQlPnhRAv+DbCaz0bru\nhCgAVQKKoih1HC8zhvcSUAAgLYK9sRFHURRFiSfRWgKDrN9PkVr/S9b1YOueoiiKUseJpgQuIjAK\n6EdggHX+E7LNpKIoilLHiaYERsZLCEVRFKVm8NIxPCPk2m4djKpmWRRFUZQ440UJzCdQ8DdG9hze\nHjOJEpS0jDRK9pSQmp7K3p+1311RlPjgRQm8HHL9PPBRDGRJaEr2lEAhlBSW1LQoiqIkEF63l3Ry\nLNC8ugVRFEVR4o8XJbAPKLGOvcjCcnd4DP9pZFOa1Y57WcB7wFfAu0CGw+1OYAOwHjjHYxyKoijK\nUeJFCTQDUq0jDegCvOIx/BnAeSH3xiJK4FhggXUN0A3Zwayb9Z8nPMqnKIqiHCVeClkfMnHsEWRl\n0csqEf5iYHfIvYuBZ6zzZ4BLrfNLgDnAEWATMku5dyXiUhRFUSqJFyXwBLJg3CrgC2Tj+SeqEGcu\nYiLC+s21zlsjG9nbbAXaVCEeRVEUxQUvo4MGIiaacut6JoGtJ6uKIfreBGHdCgsL/ecFBQUUFBRU\nkziKoij1g6KiIoqKilz9eVECG4E8xESDdb4xom93fgBaAt8DrZAlKQC2Ae0c/tpa9yrgVAKKoihK\nRUIryBMmTAjrz4s5KA1YByxCNppZi3QSvwG8fhSyvQ6MsM5HAP903B8KpAAdkA7o5UcRvqIoiuIR\nLy2B8WHuGaTD2G2byTnIwnM5wHdWWA8iK5KORloXQyy/a637a4FS4EYP4SuKoihVwIsSKKpC+MMi\n3D87wv0HrENRFEWJAzoOX1EUJYFRJaAoipLARFMCC6zfv8ZDEEVRFCX+ROsTaAWcjszwfYGKHcGf\nxVAuRVEUJQ5EUwL3IqN52iDLRYQyMCYSKYqiKHEjmhKYax3jgfviI46iKIoST7wMEb0PWdytP2IO\nWoRMFFMURVHqOF5GBz0I3IwsHrfOOp8US6EURVGU+OClJXAh0BMos65nAp8jG8AoiqIodRgvLQFD\n8O5fGeiFLMvbAAAdrUlEQVRyDoqiKPUCLy2BSchw0IXIMNEBBHYDUxRFUeowXpTAHKQzuBfSAhgL\nFMdSKEVRFCU+eFECANuB12IpiKIoihJ/dO0gRVGUBEaVgKIoSgLjpgQaAl/GQxBFURQl/rgpgVJg\nPdA+DrIoiqIoccZLx3AWMlt4ObDfumeQ1UUVRVGUOowXJTAuzD2dLKYoilIP8LrHcD7QGfg30MTj\n/xRFUZRajpfRQdchS0pPta7bAq/GTCJFSSDSMtJIy0iraTGUBMZLjf4PQG9gqXX9FdAiZhIpSgJR\nsqekpkVQEhwvLYFD1mHTEO0TUBRFqRd4UQKLgLuRvoBfI6Yh3VRGURSlHuBFCYwFfgJWA9cDbwH3\nxFIoRVEUJT546RMoA54BliFmoPWoOUhRFKVe4KUlcCGwEXgMeBz4GrgglkIpiqLEgrSMNHw+X8QR\nWW7u9REvSuB/gIHIZjIDgALgkRjKpCiKEhNK9pRAYeRRWW7u9REvSmAv0hKw+ca6pyiKotRxovUJ\nDLJ+P0U6g1+yrgdb9xRFUZQ6TjQlcBGBDuAfEVMQyEihRrEUSlEURYkP0ZTAyHgJoSiKotQMXoaI\ndgTGIIvI2f51KWlFUZR6gBcl8E/gH8gs4XLrns4TUBRFqQd4UQIHkTkCiqIoSj3DixJ4HCgE/kXw\nQnKfxUIgRVEUJX54UQLdgeHIhLFyx/2BMZFIiYg9i3Hvz+GnaaRlpFGyp4TU9NSwftzcFUVJPLwo\ngcFAB+BwjGVRXHCbxeif7VjoMhsygruiKImHlxnDq4HMWAuiKIqixB8vLYFMZOXQTwj0CegQUUVR\nlHqAFyVwb8ylUBRFUWoEL0qgKNZCKIqiKDWDFyWwj8DksBQg2bqXOAtuK4qi1FO8KIFmjvMkpC+g\nT2zEURRFUeKJl9FBTsqRZSTOi4EsiqIoSpzx0hIY5DhPAk4GfomNOIqiKEo88aIEnPsKlAKbgEti\nJZCiKIoSP7wogZExinsTsk1lGXAE6A1kAS8C7S33IcDPMYpfURQl4YmmBCLND7BbBfdVMW6DbFq/\ny3FvLPAe8FfgDut6bBXjURSlhtF1q2ov0TqG9yNDQZ2HAUYjBXR14Au5vhh4xjp/Bri0muJRFKUG\n8a9b5bL+VSKQlpGGz+fzLwhZ00RTApOBKdYxDWgMXAu8gCwoV1UM8G9k0/rfWfdygR+s8x+sa0VR\nlHpDbVOIbn0C2cBtwFXALOBXwO5qirsvUAw0R0xA60PcDRF2MCssLPSfFxQUUFBQUE0iKYqi1A+K\nioooKipy9RdNCUwGLgOeAnoA1a22iq3fn4BXkY7hH4CWwPdAK+DHcH90KgFFURSlIqEV5AkTJoT1\nF80cdDvQBrgH2I4oAfuoas9OEyDVOm8KnIMsWf06MMK6PwKZmKYoiqLEiGgtgcrOJq4MuUjt35bh\nOeBdpH/gJaTzeRMyRFRRFEWJEV7mCcSCb4GeYe7vAs6OsyyKoigJSyxr+4qiKEotR5WAoihKAqNK\nQFEUJYFRJaAoipLAqBJQFEVJYFQJKIqiJDCqBBRFURIYVQKKoigJjCoBpVLUtmVwj5b68hyKUlVU\nCSiVorYtg3u01JfnUJSqokpAURQlgVEloCiKksCoElAURUlgVAkoiqIkMKoEFEVREhhVAoqiKAmM\nKgFFUZQERpWAoihKAqNKQFEUJYFRJaAoipLAqBJQFEVJYFQJKIqiJDCqBBRFURIYVQKKotQK0jLS\ndGnvGqBhTQugKIoCuqx3TaEtAUVRlARGlYCiKEoCo0pAURQlgVEloCiKksCoElAURUlgVAkoiqIk\nMKoEFEVREhhVAoqiKAmMKgGlXpKWkYbP59MZqPUIfacBqjMtVAko9ZKSPSVQqLNQ6xP6TgNUZ1qo\nElAURUlgVAkoiqIkMKoEPFAd9je1ZyYedSXf1Je8WV+eI96oEvBAddjf1J6ZeNSVfFNf8ma8nsNt\nyevqUv7xUmaqBBRFUSpByZ6SqIrGuzJqKMoiLavScVQnqgQUpRaQlpYVsUBQ6iulgKGkZHeNSqGb\nyihKLUAKAkNJia+mRVESDG0JKNWKF3toddhUa8NWhNoRqdQHVAko1YoXe2h12FTjaTONJkN96FBV\nEpt6rwS0tqYoihKZeq8E6kNtLSstjay0yEosK00UXYDgUQcV3SNTG8wsiqLEj9qoBM4D1gMbgDtq\nWJZawe6SEnaXRFZiu0tKMEF3gkcdVHSPTG0wsyiKEj9qmxJoAPwfogi6AcOA42MVmV3DLioq8viP\n8DXs1CZNYiWiUgNUPl8osUyryrRklcpT25RAb2AjsAk4ArwAXBKryOwadqQMXDHzha9h7/vll1iJ\nqNQAbvmiZog8sag2YKdVLOY7uLVkvSiJyptUq5/aqsxqmxJoA3znuN5q3as01ZHglTGj1HeOJj3T\n0rJqXaFVWz9Ed4IrIPZzOAu20PR2K/gqUlHRhIbh9k4D8x2C5YxG5eUMxst3WtGkGvysFcOofqVb\nW8uT2qYEqi2NKvNSJz04qcqdoZFGIMWiBhKLwtVNzqOpjZWU7A6aDRmuQKlMB3a4gs+LHJV5Did2\nvghXuHqRMxpVzRf2czgLttD0dutLcmvphgsjNA43vHyHbnLGBrfZuu5KNxS3ARpeqOogkKOhtlWJ\n+gCFSJ8AwJ1AOfCQw8/nwInxFUtRFKXOsxLoWdNCuNEQ+BrIB1KQAj9mHcOKoihK7eN84Eukg/jO\nGpZFURRFURRFURRFURSlnlHbOoarShLwW+AlFz99gI+j+BkRcm0PcJh19KLVWxoBB138tCN46C9A\nS+D7mEikANjDRXbVqBSxZRAVRxTuAVYDP3oMoyPwjcu9rsB/I32V9vL7BjjTOn/DuvY53JznF3uU\npbI8BVwXo7DrNP/x4OdzF/f/Ax63jmlIhnjZ4X4rkI686OnACuDckDBODhPubxznLYC7rfBnWMfT\nDvck4HQXOd3keBx4zPEsoecAjYE/Aq8C84DbkILd5kPgL8iIrdQwMnyNKNSHgAsteUIpRSb+OadW\nrwDyrPsfAncByQ73f0Z6aAerrf/cALxjXa+2zm8ICe9/gO4u4bml52pglSOe1ZbsjwE3Amdb/q4C\n/gb8IUQGgNMA5xjANOBUpGJip10T4D7gTSRd7ftu6dXecv8J6VPbaJ2/gBRgNr+KmgrC2UjeCMfx\nwFlAs5D754Xx6+TXIX5Hh8gFMCrkui+SniOs4xqH23xEyb1iHTuB95DnvoaKZAE9Qu6tCOMvtAxZ\nBfweeU+nWIfz+34MeBG4CCnw5wCPAgOs42HkPScDC4AdwPCQOBaEkWOBJXO4IxvYFuY/CvAgorXb\nEZxoTiYjLQavLaEM4F+O61XW77lI4fn/qJiZPgNOcFwPA5Y7rpcgH/gQS5bfIjUbJ27Kyk2OaUiB\nMQa4GVgMTCXwQQHMRQq8gUjN5h/WPZuOlt9pwBrgUySDO2mPfKh/BzaHkXsFUiCuADo77v0bKaxP\nQhTvx0COwx0kTUKPy63fHUgB93ekEG1nHacBTyIfps3vgI+Qd3AD4ZWVW3o+DExC3msP4AErLT5H\nPsg3gGet/w4HnrEOJ58TPD+ngRXHWgK1zGlWuP2QIdPzrPtu6bUUuILgzaIaAkMtN5siZH2uidYz\nhmMW8BWwzHrui4BMJB99iSidzcCljv+EK1Cd2K3BScAH1jN+bYUZLozZyDM+QaDy8rjD/V0g13Gd\na93LBr6w7i1CCuAs4Fvk/T+CKLJBSAXPzk+XAyMd/7Vxq1iGc3feW2n9XoZ8a+kE8lpjS95VBJdX\n+cg7KrfkDnccdpErYdlE+ARzsg9J3CNAiXXsjRJmCvJB2Ky2fh9DMg5U/AA6IorgOKQAWkxwweNW\nwIO7snKTYxnBtcVk656TtWHCDb3XGilIngDWEawQ2wJXIoXuUuAtKo7qsmXqa/3/IuveyhB/V1tx\nd3L85whSkM4IOWYi73FDGPltwrkdh1QUtgDPI8rPxi09wxVyKwi0ChoiZgi7EPY5wrQJ995XIeli\n81mI+8qQX5vQ9KpMWrQCbkEU42pgXIT/tUYK6S1Ii24NgRZAPlIpuNW6XoEowkjHAcvfGgL5MgN4\nG1EIPoLTeB3RK2rrQq59jnt2OHZ6/xcwwTpfjSxHMxNpPTjz1WNUbIEXIpWYVoSvWK5D3oFNxxDZ\nbKUyHRn9CIF3eStSPh0iuLxaBdyEtGraE55QE6tSjTgz7nzkpTgnrM1EahwbgaZITSNcbaArkhne\nIdgUAnA/Yj6JhpuycpPjS6SWYZNl3XMyG6k52/RBarM2XyOK4xakCRw6y7zccr+UyB+s88NuhbRO\nfkE+jkYhfs+2nqfYug5tUTn5zop7SIhcSUiNOFThNbDkfA1JpzuQd2y3GGYSPT1XISYBm97Ix/wF\nUthkIu/ITvPGVKxVvooUqslI5eIWpFb9MgFTyAygl3V+LPCJde6WXi8iivpUpPBujbzPvxO5n+wE\nJA8cCbk/HGk1LgFeB/6MFI6hz9MMqRQ8YqXBbsTsWeA4Bli/tp0+tPBuiJhCXw4Jf671DJF4Avk+\nRyA1+DeQZ20KLLT8rEby3LvI+4JALRzcTa4QvmLp7DM4D1GSi6xjM8FmxAeRWv3nyDtvQcW8eTPh\nuYnIE7wi/UdBXuyViF3QPpz4kObfI8AUpJnmpMBx9ENMDE4aIHbVDOs6m8As5tUhxw9IwWvbk/cR\nKNDLkU5VL62RcESTA+BaJEPaZolNyMfilHOdJcdmy72c4I/0FuTjXGqFMYqASQcrvpuQAmgJYkb4\nrxA5Qz/kZKA/cDuSxqGchNh2sfxFqgn1AjogBdxPSG13g3X+kuVm8whSWD5FoDCwsRWjW3r2Qmqx\nm6xjtRXWeALv+Tqkdv4Py++fQ+LKRdLqR+uYgxQKGUj6foMUEEeQwuYDhwxu6XUM0jcR2j9yo+Vm\n0w2p3a5BCq0bLRmc7ERMJ9cSnI4LqVgoJSPvvdyK70zCs9j6nY8ohlDut8KwKQJ+Rgpwu1L2usPd\nHgjyiHWEazUPRr67v1vXnZD+AxzXbyCmxZ+QCkLHCPJHoxGSLidSUVGD5CW7hdgUGRhh05ZAy6IT\nUjYdexQyHBX1bXQQSK2mI6J1yxz3xzjO/44k9hwkDa5Aarw3eozDh5gL+iG9/4uR2pyhYkeXcfwH\npPDwijOecqQG/apHOWxaESj0lhEYkRMqZyihcjZDCoQ/IYv6NXC4pSKmnv6IiQKkE9PmLaT5bdc2\nWyEFgZcOSq/4CNTAd1Jx1Mi1iGLYH+a/GUhh4yU9bf9Y/7HJR5T4LiRvnYwohVATjhvpSKHbAFlA\n8YcwfrKoOOrHOaIlG0kDJx0ImEWXIn0pc4ncuehDOtLPsI7OiEn0TuQ9ho7s8iF54MMI4TmxO5zD\nLb/bFnluCK/wQJRDdbEM6V95wbq+Aikr7BZfW8SMZb/Xnohy/SqMe2fLfRXB5uPBSGtpL2J2+xXS\nH/MZUsm6DbHvP4qYhxYjafkA0Uck/ppAZemoqY9KYB1S04m2Tth6y49d60hCam/nAH9FXuxbSIeY\nXXD9k0AnWKgSGYJ8gKFK5GQCBfhHBNt6+0eQ7QPHuZuycpMjXKHmVCJY/9+GtEgGIuaBWQQKuClI\nIdAM6aRbjHzoX1vu/0FqmR9bsi9GWhVOfgdcgNTU2iG1uf9G+gYiYZDm7uMe/ETC+ZGcTPjhhJsR\nWze4p+fXSAG62DpCTSN2PHZ6f0hF+/4sS2Y7fTORNHaOiomWb0DS+nxLfpC8PJfA6Cc3dy+kWTL0\nt44c5NntVvVpyPPvdfg/nopmjmh4yZ/hyMPbdwryPh+15DVI2txGQGGuouKIoZVIjd4uoI8gLY3Q\nAjrTxd0uwFcj31U/pLUzGWk99kbSsDeiGLdY8hZbYb+PtPIi8R0VrRQK7nZEkKF3+Y7rfOuel9Eq\nIEok1Aa9PiSO8cjLn4AM91tJcOfbmwSauO8hH+z7IWG4xePm/nekKX0tUsi8g9hRnaxEmql2Te9h\n5MOy+S3BIzBCyQtzL9yShjchz7wa+UhATFP2SKWRIceISviJhLPjbCnysf7HOg4j7/QbAvZbt/Rs\nhJgx7kY6M78meCir2zuH8B3DzntewrgQUbjNEIXxBcEmmkjuvRBzzmyk8LDz3SdULGxWI/nnSqSw\nDSdzuFFONiVhjq1IIW+bW9zyZ6QwfgTuwf07BVFKwxGTVTLSUl1GYJjlQ0jrJt867kBs+CDp1tQK\n+wDSggUpoFd4cHemFVa4V4XI6fTn7Kuw/+elo12xsBNmIZHtiPb1IqQpughpWh6wzr2MVoHISsTJ\nVwTbBhsT3EQMpR2BoYBe43Fz96Ks7Of6MwGTmfNZT0aar86jEwH75lsEj0BqRaDm+kfruN36XYkU\nQPa9UNIJPxchmh+vH8k8gmvC3RDbcCcC790tPRsi/U1jEXPWUqTz1MbLO19JsJLMIngEkdd8cxnS\nB7MaGYDgxf0TpIUwDClMByO18bMsv5Uh0ignm/uB65EWQhrSV/IQMsqsyPLjlj8jhbGZYJNQpO80\nVCablUQeRWjfB/cC2s3dZj7SF/UtYkpsRCDP/YfA9+NUto0tP1462qtEQ3cvdYYp1q9zth6Oe178\nPEHwDNjZiO3zX4jGf8O6n4qYnZZb/+tNYASHzTbkRdphNSJg6wzHVgIrprrF41WOjUhNfZN1nWfd\nc3KYQCe6bZ5xFup/QxSBnclPQGpA6cgEmlcRW3uoqceWz2mCedW6Dr3fCxkdYk+i+hmZSPSpBz/9\nkJrePodf+/06R/J0Jdh8sxYZLmqvWvsG7um5FylU/wfp+N1BMF7e+RSkwH3JknEwMhnPSxihprE0\nS/6bCJ+nQ90bIi0YkMLUng+ygMC38b+IGeQNKmKQvoapSIF2M1Kb9yF5wTli5mKCzSxPIQXjHQSG\nELvlz0hh7CNYkYZ+p1juPut570RMfCAm1bcRRe5GOfItHEHMmTaNrbDLXNztfprBiPJ9GMm3rZC+\nNQgMRYbgvJKFVJb+G6nMFIWRL3Sk31FRn5RAkfXbEbGp2Z1OjQn0xLv5mY4MqbP9gZiIBiM2SC+K\nxv5Q9yCFzrvW9a8Jnizm/KCTkOa6PRzRLZ5w09Od7pVRVqMQE9hfkA+7I/JR2WxHClu7AO2GdGr9\nGaldn4j0CbyGjOK5AbFjg4xAcZJuyRE6CuppxO5ujx7pZ93r4cHPMrx9JF8gBdYLBOz9ay3Zi5E0\nj/ZeQWrQZ1hy/I5AP4i9BWq0d24XCLOQ93ymFfZllhxe8s1/QmR0Xof+hnPvg5i+7FnRlyGKeQAy\nTh0Cdmw7D4aSjQxhLUDG1N9j3V9A8BIGB5AC11Y0vyWg2PLxpnQjhTGLiv1vzu8UpDXqfHe2bHZa\nfBLiHso83AvoDS7uc5EK1BtIa8ummMAQaLv/LLRM2oUo8GizsM+I4uaZ+tgx/B+kE8ieTXcM0kHX\ny8XPR8h0cC9EUiLfIrbq0BFBNobADNKRjvulSG0odGRFtHiiudtDKiMVaosiPllFvqBih+IXSAF8\nO1JY+JCWxGqkiWyQ2rKNW01/BRVt0p8RPHrIi59oNEEKDrs/4iOk5XcQqT2W4J7eNschNb9bkaGV\nN+D+zv+DFAgLCC4QbEZ6CKOq9EYKyWKkJjydwL7e1xHc8orGRUh/QrQ1ozohrYo+1vVSJL22IUOI\nV+OeP6OFcTLeRiJFYibRlcC1jvOj/Q5fRRTB75Hvwfmsod+IW7kVLo5cKjfaMCz1UQl8TsVxzHZv\nv1c/jZFCqjsB+6whMIKjqkrEK27xuLl7KdRCCziQZ7U7715CTADOGnRzxHY7CmmiQ8Xa6ARHeKup\nWIt/gkBN/1FLNmeT/SCBSWufefDzc5hnrexH4paeryD55msCI6GWE36oYyif471AcCNSAQrhWwSh\n7qGjYY4mjqbI8FU7HT4kMBrJK16VbiSifaeDcK/pR2IUwet4uRXQkdyHIyOVbkFm1Ifi/EbcyqSY\nlTn1yRxkswNpnr9mXV9CRdutm59nkWbquciLuprgCVQNCF634xAVFwrrB9xLxZUH7cI13Edmj9S4\nHyl43eJxc59L8GzgcuueM+M4W0iNkCa3c5bxSKQAt5cG+AixUx5EapUl1v1Iph6Qls5ix/WHBIZl\ngmR+g6QXIfdBhq66+Umj4rO+TOBZ3d4HuKfnJORjdcruJFocQ5ECoQHRO7+9yPmOde9ZJP/YI06e\nsK7/4OKeYcVhD1MuQkYiOQtxtzg2IS3OfkjH5RNIJ6b9Prpa91oihXQPxMZ/vyMOt/zpFka07/Qi\njl4JTCBYCYTLFyke3NcjI4JWETziDioulOdWJnkpcxSLzoid+DvrWELwDFcvfuyefbszNHTNnX8T\nsANjnYeuAvgl0hmUiwwhsw+bSIuRjSVg03eLx8093AgOL5OXQselR6MXotA2W8dKKtZOHkU6Ewus\n4+/IuGp7tFE4Qj8SNz9uz+r2PsA9Pe8nuOKUjpgVKhPHBVTE+Rxewgj3rOGGJEZyn4cUdB0Rk0sh\nFQtFtzDc1oz6AOmYt//jo+K8Crd35haG23caiVFUnNnvPEIXZqvqdxiO0HV/3Mqko4kjIbGXuE11\nHBDcueLFj90RtxgppJsTPPLBi6Jxy4yRFiODwJBBt3iqI+M4h4Cegti3nR9iP8T+u4Hw66asJriD\nqh8Vh8sVIUN3Ix3h8LI4ltOP27N6KRzc0nMSoiBPRCYWfknwTPTKTJRy4nwOL2GsRNLZpi/BBaoX\n93BhViYOtzWjnP09NqGFvts7cwvD7TuNxHeIKeskAvMDnMf2EP9H+x16VTReyiQvZU7C42WJW6/L\n4P4X0sM/ACn0fkIKR5APo5t1/htkdMTZjv+ebB0PIrX90wgeY28TaTGyUFkgOGNcS0UiuXvJOEUE\nCuP3kGWMnePO3Wqm4ZSZl5aE19qY1w8p0rN6fR9ePkSQd/0LUlB0se55icPtObzKace3iuDW168q\n4b6Uioo7dJ6AWxhua0a9jaS/nT9+S2B4qo1b/nQLI9p36pbeTxN5dM0cx7lbvojm7kXReCmTvObN\nhMdtiVuvfo5HPvTQDTPOR2qCSxG7/V+t83FIs9Ue91tE5Bqvc0ZwpMXImiKdr5FwqyGHc3dTIqE4\n/bjVTI/G1GPL6eUj8eIn2kdShPv78Fo5GIAM57wLKSjeRtZR8hKH23N4CSOUDALrGIV7r5Hc7fVt\n7AL+c4IHTniNIxVJ4weQJQ+2ONw6IbX6A8jzfUTk9aoi5c9oYUT7TqFyNf1IuOULN3cvisatTKrK\n/g0Jh9sSt178uCW4vfFHE6RDNN1ya0z4mYmhhLNzh/vI3GoxlbFnhuLVzOK1ZlpE5MJrn4ucXj4S\nNz9V+Ujs9+GlcgBifujmuL4c9wk7dhxea57RwojG0VQO0gnkYS+VA2cYnyLp9hTSIds+7D8kXe2h\nwZWNI1IYXt55VdLbxi1feM030XArk6ojjqjUpyGiC5HFnJw2w2RkLPTVyIQsNz9rkTHJ+5AEfxmZ\nOPUokuA+AqMfQod0hRviFYrbgk+2+w9IDWt3GD8fIyMFormHriDppCsycmG1i5+PiTy6whB5uWAI\nDLFzew63NZ68sIbo78zLAlyhcyGaIcNB1xIYmQRSAQgdGfRnAhOUosVRFeww3N5ZNIVkv/eqxmGH\n0YKKyxZci0wkq644IoWxh6N/55XBLV8ku7i7lQfgXiatq4Y4olKfhoheQ8WNMY4gi4w95dHPVALL\nD2xCzBuvILUcHzIsqwnSPHXWhjMIrEgaLYPnenAHWWukGeE1/SLEJh3N/UyiF74gH3E0PwVR5HSr\nmdpD7NyeozrwEf2deUnvH5GPyf4Q9yH9PdMJHlMfbmjoGCruFxsuDje8yOn2ztzcqyMOm3Dr1txH\n+DWhKhuHm5x7if7Oqwu3fLHIxd0LbmXSxGqIQ6kEC4m+YUa4zSJAOkrt3a/cbJHVYat0ozrMLNH4\njqqZpKoTt3fmJb3bEbzJh40P6TR1e9bqeKdewnB7Z27u1RFHtLQ4VE1xuIXh9s6rC7d84eYeDxmU\naqY6EryqH2ptIR4FX3Xg9s6qI73dnrU64ohHvqgvaaEFo6LEgXh87HWFRHpWNzQtFCVB0I9dURRF\nURRFURRFURRFURRFURRFURRFURRFURRFUeos/x89dX2lUPvP0QAAAABJRU5ErkJggg==\n", 434 | "text": [ 435 | "" 436 | ] 437 | } 438 | ], 439 | "prompt_number": 18 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "\n", 446 | "\n", 447 | "Do number of publications correlate with year of deposition of the entry? From the data we already fetched from the API, we have publication years - the earliest of which would be close to the year of deposition.\n", 448 | "\n", 449 | "Let us reorder X axis of the plot based on this year." 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "collapsed": false, 455 | "input": [ 456 | "def earliest_year(pdb_id) :\n", 457 | " ret_year = 3000\n", 458 | " for pub_cat in entry_pub_keys[pdb_id] :\n", 459 | " for pub_key in entry_pub_keys[pdb_id][pub_cat] :\n", 460 | " if pub_key.year == None :\n", 461 | " logging.warn(\"Missing year publication! \" + str(pub_key))\n", 462 | " continue\n", 463 | " entry_year = int(pub_key.year)\n", 464 | " if ret_year > entry_year :\n", 465 | " ret_year = entry_year\n", 466 | " return ret_year\n", 467 | "\n", 468 | "pdbids_with_publications = list( sorted(entry_pub_keys.keys(), key=earliest_year) )\n", 469 | "make_publications_bar_plot(pdbids_with_publications, lambda pid:pid+\":\"+str(earliest_year(pid)))" 470 | ], 471 | "language": "python", 472 | "metadata": {}, 473 | "outputs": [ 474 | { 475 | "metadata": {}, 476 | "output_type": "display_data", 477 | "png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEsCAYAAAAl2w8UAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXl4FUX2sN8bILJlDzuEIOACI4IIoiAEdZRxQ0UQFwTl\nG3Uc0VF/MzKuuG8wOjrjiIqAoii4Ky64BVFRVBBZFYSwu7AIAWQJqe+P053u27n3die5uSHJeZ+n\nn3u763RXdVV1nVpOVYGiKIqiKIqiKIqiKIqiKIqiKIqiKIqiKIqiKIqiKIqiBKYN8DGwGFgEXG1d\nzwTeB34AZgLprnv+CSwHlgEnJyykiqIoStxpDnS1/jcGvgcOBx4A/mFdvwG4z/rfCfgWqAfkAiuA\npASFVVEURalkXgNOQmr5zaxrza1zkFbADS75d4FeCQudoihKLSRRNe1coBvwJaIAfrau/4yjEFoC\n61z3rANaJSh8iqIotZJEKIHGwMvANUChx81YRzRiuSmKoigVpG4lP78eogCeRbqDQGr/zYGfgBbA\nL9b19chgsk1r61oYRx55pFmwYEFlhVdRFKWmsgBnnLaEymwJhIAJwBLgYdf1N4Dh1v/hOMrhDWAo\nkAy0AzoCc70PXbBgAcaYwMdtt91WYZmKuh8oz6gpflSXcGpc1Dw/qlM4vQdwZKSCujJbAr2Bi4Dv\ngPnWtX8i1kDTgJFAATDEcltiXV8CFAFXot1BiqIolUplKoFPid7SOCnK9XusQ1EURUkAdao6AOVg\nzJgxY8p0Q25uboVlKup+oDyjpvgRj2fUFD/i8Qz1I7HPSFQ43dx+++0At3uvh8r0lAMDY/VvKYqi\nKAEJhUIQocyvbOsgRUkomZmZbN26taqDoShVRkZGBlu2bAksry0BpUYRCoXQ/KHUZqJ9A9FaAro2\nj6IoSi1GlYCiKEotRpWAoihKLUaVgKJUESNGjOCWW26pEr//8pe/cNddd1WJ3zannnoqzz77bNye\n99xzz3HKKafE7Xm1BR0YVmoUkQbFMlNT2VroXbswfmSkpLBl+/Yy33fJJZfQpk0b7rjjjkoIVXDy\n8/MZNmwYa9eurbIwTJo0iQkTJjB79uxA8gUFBRx88MEUFRWRlKR1WTdlHRhWE1GlxrO1sLBS1x8J\nVUDB1IQKTVFREXXrVk1RUhPir6pRFaooCWL+/PkcddRRpKamMnToUHbv3l3i9tZbb9G1a1cyMjLo\n3bs3CxcuLHHLzc3lvvvuo3PnzmRmZnLppZeyZ8+eEvcnn3ySjh07kpWVxcCBA9m4cWOJ27XXXkuz\nZs1IS0ujS5cuLFmyBHC6onbt2sWf/vQnNmzYQEpKCqmpqfz000/s2bOHv/3tb7Rq1YpWrVpx7bXX\nsnfvXkBaDq1bt+aBBx6gRYsWjBw5MuZ7v/7663Tt2pW0tDQ6dOjAzJkzAcjLy2PChAksW7aMK664\ngjlz5pCSkkJmZiYAM2bMoFu3bqSlpZGTk2PPeAWgb9++AKSnp5OamsoXX3zBpEmTOP7440tkPv/8\nc3r06EF6ejo9e/Zkzpw5JW55eXnceuut9OnTh9TUVE455RQ2b94MwO7du7nooovIzs4mIyODnj17\n8ssvv6AcOBhFiUak/AEYU4lHkDy5Z88ek5OTYx5++GFTVFRkXnrpJVOvXj1zyy23mHnz5pmmTZua\nuXPnmuLiYjN58mSTm5tr9u7da4wxpm3btuaII44w69atM1u2bDG9e/c2N998szHGmA8//NBkZ2eb\n+fPnmz179phRo0aZvn37GmOMeffdd0337t3Ntm3bjDHGLFu2zGzcuNEYY8yIESPMLbfcYowxJj8/\n37Ru3TosvLfccos59thjza+//mp+/fVXc9xxx5XIf/zxx6Zu3bpm9OjRZu/eveb333+P+t5ffvml\nSUtLMx988IExxpj169ebZcuWGWOMycvLMxMmTDDGGDNp0iTTp0+fsHvz8/PNokWLjDHGfPfdd6ZZ\ns2bmtddeM8YYU1BQYEKhkNm/f3+J/MSJE0uesXnzZpOenm6mTJli9u/fb6ZOnWoyMjLMli1bjDHG\n9OvXz3To0MEsX77c/P777yYvL8+MHj3aGGPM448/bs444wzz+++/m+LiYjNv3jyzfft23zQ+UIiW\nH4myIKe2BBQlAXzxxRcUFRVxzTXXUKdOHQYNGkSPHj0wxvDkk09y+eWX06NHD0KhEBdffDEHHXQQ\nX3zxBSB9uVdddRWtWrUiIyODm266ialTpwIyGDpy5Ei6du1KcnIy9957L3PmzGHNmjUkJydTWFjI\n0qVLKS4u5tBDD6V58+YlYTJWV4qJ0KXy/PPPc+utt5KdnU12dja33XZb2CBuUlISt99+O/Xq1aN+\n/fpR33vChAmMHDmSE088EYCWLVty6KGHlpKLFIZ+/frRuXNnAI444giGDh3KrFmzosq7mTFjBoce\neigXXnghSUlJDB06lMMOO4w33nijJE4vueQSOnToQP369RkyZAjffvstAMnJyWzevJnly5cTCoXo\n1q0bKSkpMf2rzqgSUJQEsGHDBlq1Ct8ttW3btgCsXr2acePGkZGRUXKsW7eODRs2lMi2aePst5ST\nk1PitnHjxpLnADRq1IisrCzWr19P//79ueqqq/jrX/9Ks2bNuPzyyykMOH6xYcOGsOe6/QRo0qQJ\nycnJvs9Zt24d7du3D+Snly+//JL+/fvTtGlT0tPTGT9+fEmXjR8bNmwgJycn7Frbtm3D3sGtEBs0\naMCOHTsAGDZsGKeccgpDhw6lVatW3HDDDRQVFZXrHaoDqgQUJQG0aNGC9evDN8pbvXo1IAX8TTfd\nxNatW0uOHTt2cN5555XIrlmzJuy/rVBatmxJQUFBidvOnTvZvHlzifuoUaP4+uuvWbJkCT/88AMP\nPvhgiaxlLVLy68b73DVr1tCyZctS9/rRpk0bVqxY4SsX6XkXXHABZ511FuvWreO3337jiiuuoLi4\nOJD/rVq1Kolfm9WrV5dSxJGoW7cut956K4sXL+bzzz/nrbfe4plnnvG9r7qiSkBREsBxxx1H3bp1\neeSRR9i3bx+vvPIKX331FaFQiD//+c88/vjjzJ07F2MMO3fuZMaMGSU1U2MMjz32GOvXr2fLli3c\nfffdJQri/PPPZ+LEiSxYsIA9e/Zw44030qtXL3Jycvj666/58ssv2bdvHw0bNqR+/frUqVOn5Jl2\nl0qzZs3YvHkz211mrueffz533XUXmzZtYtOmTdxxxx0MGzaszO89cuRIJk6cyEcffURxcTHr16/n\n+++/LyXXrFkz1q1bx759+0qu7dixg4yMDJKTk5k7dy7PP/98SeHfpEkTkpKS+PHHHyP6+6c//Ykf\nfviBqVOnUlRUxIsvvsiyZcs4/fTTS2SidSl9/PHHLFy4kP3795OSkkK9evVK4q0mokpAURJAvXr1\neOWVV5g0aRJZWVlMmzaNQYMGAdC9e3eefPJJrrrqKjIzM+nYsSPPPPNMWE39ggsu4OSTT6Z9+/Z0\n7NiRm2++GYATTzyRO++8k0GDBtGyZUtWrVrFCy+8AMD27du57LLLyMzMJDc3l+zsbP7+97+XPNN+\n/mGHHcb555/PwQcfTGZmJj/99BM333wzRx99NF26dKFLly4cffTRJX7a9wehR48eTJw4kWuvvZb0\n9HTy8vLCWjU2J554Ip07d6Z58+Y0bdoUgMcee4xbb72V1NRU7rzzzrCWUcOGDbnpppvo3bs3mZmZ\nfPnll2HvlJWVxVtvvcW4cePIzs5m7NixvPXWWyWWR953cN/7888/M3jwYNLS0ujUqRN5eXnlUoDV\nBZ0sptQoqtNksaC0a9eOCRMmcMIJJ1SaH0rNQSeLKYqHyiygFaW6o91BiqJUiHvuuYeUlJRSx2mn\nnVbVQVMCoN1BSo1CN5VRaju6qYyiKIoSGFUCiqIotRhVAoqiKLUYVQKKoii1GFUCiqIotRhVAopS\nhcR7S8QgW1bm5+eHLUin1G5UCSg1ntT01JJlASrjSE1PLXfYLrzwQt57772S86SkJFauXFnu57mX\nP1CUIOiMYaXGU7itEMZU4vPHxHdJiorOc9B5EkpZ0JaAoiSItWvXcs4559C0aVOys7MZNWpU2JaI\n9paJRx55JCkpKUyfPh2IvfVkrC0r/bj33ntp0qQJ7dq14/nnnwfgq6++onnz5mGK5JVXXqFr164V\nfn/lwESVgKIkgP3793P66afTrl07Vq9ezYYNGxg6dGhY180nn3wCwHfffUdhYSGDBw9m/vz5jBw5\nkieffJItW7Zw+eWXc+aZZ7Jv3z727t3LWWedxfDhw9m6dSuDBw/m5ZdfDtQd9NNPP7F582Y2bNjA\n5MmTueyyy1i+fDk9evQgKysrrIvq2WefZfjw4fGPFOWAQJWAoiSAuXPnsnHjRh588EEaNGhAcnIy\nvXv39u26eeKJJyJuPTlnzpyoW1YG5c4776RevXr07duX0047jRdffBGAiy++mClTpgCwZcsWZs6c\nyQUXXFD+l1cOaHRMQFESwNq1a2nbti1JSWWrd61evZpnnnmGRx99tOTavn372LhxI8aYiFtWBhkT\nyMjIoEGDBmH32VsvXnjhhXTu3Jldu3Yxbdo0+vbtS7NmzcoUbqX6oC0BRUkAbdq0Yc2aNezfv79M\n9+Xk5ETdejLalpVBuoO2bt3Krl27wu6zFUrr1q3p1asXr7zyClOmTKnRG6ooqgQUJSEcc8wxtGjR\ngtGjR7Nr1y52797NZ599VkquWbNmYVsmxtp6MtqWlUG57bbb2LdvH7Nnz2bGjBkMHjy4xO3iiy/m\n/vvvZ9GiRZxzzjkVe3nlgEaVgKIkgKSkJN58801WrFhBTk4Obdq0Yfr06aXs+seMGcPw4cPJyMjg\npZdeirr1JMTesjIWoVCIFi1akJGRQcuWLRk2bBjjx4/nkEMOKZE555xzWLNmDWeffTb169ePf4Qo\nBwzVcVaJ7iegRCXSWuqp6akyV6CSSElLYftvNW/3so4dOzJ+/Hjd1rKaodtLKoqHmlhAVzavvPIK\noVBIFUAtQLuDFKUGUpEtH/Py8rjyyiv573//m4CQKlWNdgcpNQrdXlKp7ej2koqiKEpgVAkoiqLU\nYlQJKIqi1GJUCSiKotRiVAkoiqLUYlQJKIqSEFJSUigoKIjqnpuby4cffpi4AJWBNWvWkJKSElfL\ns1NPPZVnn302bs8rL6oElBpPampm5W4vmZpZ1a9YLSgsLCQ3NxeIvBdyZW2N6d64p7zk5ORQWFhY\nEr68vDwmTJgQ+P4xY8aUWojv7bffPiAW56tsJfA08DOw0HVtDLAOmG8df3K5/RNYDiwDTq7ksCm1\nhMLCrYCptEOeX/0pKiqq6iBUG3Qf5+AcD3QjXAncBlwXQbYT8C1QD8gFVhBZSZmaSEpaigFMSlpK\nTBk/d79n1HQi5Q/AgKnEI1ievPfee0379u1NSkqK6dSpk3n11VeNMcZMnDjRHHfcceaqq64yaWlp\n5rDDDjMffvhhyX39+vUzo0ePNj179jSpqalm4MCBZsuWLSXuc+bMMccee6xJT083Rx55pMnPzy9x\ne/rpp83hhx9uUlJSzMEHH2zGjx9f4vbxxx+bVq1amfvvv980b97cXHzxxWbTpk3mtNNOM+np6SYz\nM9Mcf/zxpri4OOo7Pf300+aMM84oOe/QoYMZPHhwyXnr1q3NggULjDHGhEIhs2LFCjN+/HhTr149\nk5ycbBo3bmzOPPNMY4wxubm5ZuzYsaZLly4mLS3NnHfeeWb37t0lz3riiSdMhw4dTGZmpjnzzDPN\nhg0bjDHGrFq1yoRCIbN///6wOHvqqafM0qVLzUEHHWTq1KljGjdubDIyMmKm0a5du8x1111n2rZt\na9LS0kyfPn3M7t27S/woKioyN954o6lTp46pX7++ady4sRk1apQxxpirr77atGnTxqSmppru3bub\n2bNnG2OMeeedd0xycrKpV6+eady4senatWtYGI0xpri42Nx5552mbdu2pmnTpubiiy8227ZtC3u/\nyZMnm5ycHJOdnW3uvvvuqO8QLT/Kd1A15FJaCVwfQe6fwA2u83eBXhHkYiZidQUwjCFmgQIB3H2e\nUdOJ9O4cIEpg+vTpZuPGjcYYY1588UXTqFEjs3HjRjNx4kRTt25d8/DDD5uioiLz4osvmrS0NLN1\n61ZjjBQWrVq1MosXLzY7d+40gwYNMhdddJExxph169aZrKws88477xhjjHn//fdNVlaW2bRpkzHG\nmBkzZpiVK1caY4yZNWuWadiwoZk3b54xRpRA3bp1zejRo83evXvN77//bkaPHm2uuOIKU1RUZIqK\nisynn34a851Wrlxp0tPTjTHGrF+/3rRt29a0adPGGGPMjz/+GFbohkIh8+OPPxpjjBkxYoS55ZZb\nwp7Vtm1bc8wxx5iNGzeaLVu2mMMPP9w8/vjjxhhjPvzwQ5OdnW3mz59v9uzZY0aNGmX69u1rjIms\nBPLy8syECROMMcZMmjTJ9OnTJ1AaXXnllaZ///5mw4YNZv/+/WbOnDlmz549pfxwP99mypQpZsuW\nLWb//v1m3Lhxpnnz5mbPnj3GGGPGjBljhg0bFibvfsaECRNMhw4dzKpVq8yOHTvMOeecUyJv+33Z\nZZeZ3bt3mwULFpiDDjrILF26NOI7RMuPRFECVTUmMApYAEwA0q1rLZFuIpt1QCsUpYZw7rnn0rx5\ncwCGDBlCx44dmTt3LgBNmzYt2SZyyJAhHHroobz11lsAJdtKdurUiYYNG3LnnXcybdo0iouLmTJl\nCqeeeioDBgwA4KSTTuLoo49mxowZgAw+tmvXDpCN7E8++WRmz55dEqakpCRuv/126tWrR/369UlO\nTmbjxo0UFBRQp04devfuHfOd2rVrR0pKCvPnz+eTTz7hlFNOoWXLlnz//ffMmjWLvn37Rr3XeAZZ\nQ6EQV199Nc2bNycjI4MzzjiDb7/9FoDnnnuOkSNH0rVrV5KTk7n33nuZM2cOa9as8Y13rz/RKC4u\nZuLEifz73/+mRYsWJCUl0atXL5KTkwM998ILLyQjI4OkpCSuu+469uzZw/fff18iGysczz33HNdf\nfz25ubk0atSIe++9lxdeeIHi4uISmdtuu42DDjqILl26cOSRR7JgwYJA7+VHECXwIJCKdNN8CGwC\nKjKa8T+gHdAV2AiMiyGri8AoNYZnnnmGbt26kZGRQUZGBosWLWLTpk2EQqGI20Ru3Lix5LxNmzYl\n/3Nycti3bx+bNm1i9erVTJ8+veSZGRkZfPbZZ/z0008AvPPOO/Tq1YusrCwyMjJ4++232bx5c8mz\nmjRpElbI/f3vf6dDhw6cfPLJtG/fnvvvv9/3vfr160d+fj6zZ8+mX79+9OvXj1mzZvHJJ5/Qr1+/\nMsWRrSQBGjRowM6dOwHYuHEjbdu2LXFr1KgRWVlZpXZWqwibNm1i9+7dtG/fPpC8d1xg7NixdOrU\nifT0dDIyMti2bRubNm0K9Czv++Xk5FBUVMTPP/9ccs0dNw0bNiyJm4oSZCnpk4G/A2cDBcA5wGyg\nvLZNv7j+PwW8af1fD7RxubW2rpVizJgxJf/z8vLIy8srZ1AUJTGsXr2ayy67jI8++ohjjz2WUChE\nt27dAKklRtomcuDAgSXn7hrvmjVrqFevHk2aNCEnJ4dhw4bxxBNPlPJzz549DBo0iClTpjBw4EDq\n1KnD2WefHVYj9RZkjRs3ZuzYsYwdO5bFixdzwgkn0KNHj5hLSvfr14833niDgoICbrrpJtLT05ky\nZQpffPEFo0aNinhPWQdWW7ZsGWZeunPnTjZv3kyrVq1K9kretWsXjRs3BihRgmXxKzs7m/r167Ni\nxQq6dOkSU9b7zNmzZ/Pggw/y0Ucf0blzZwAyMzNL4tovDN73W7NmDXXr1qVZs2aBWjuRyM/PJz8/\n31cuSEvAVhSnAy8B26hYDb2F6//ZOOMFbwBDgWSkpdARmBvpAWPGjCk5VAEo1YGdO3cSCoXIzs4u\n6XZYtGhRifsvv/xSsk3k9OnTWbZsGaeeeiogSmLKlCksXbqUXbt2ceuttzJ48GBCoRAXXXQRb775\nJjNnzmT//v3s3r2b/Px81q9fz969e9m7dy/Z2dkkJSXxzjvvMHPmzJjhnDFjBitWrMAYQ2pqKnXq\n1KFOnTox7+nXrx8ff/wxu3fvpmXLlvTp04d3332XLVu2lCg6L82aNWPlypW+8WYXoueffz4TJ05k\nwYIF7NmzhxtvvJFevXqRk5NDkyZNaNWqFc8++yz79+/n6aefDtuis1mzZqxbt459+/bF9CspKYlL\nL72U6667jo0bN7J//37mzJnD3r17I4bf7UdhYSF169YlOzubvXv3cscdd7B9u7OPRfPmzSkoKIja\nJXT++efz0EMPUVBQwI4dO7jxxhsZOnQoSUnRi2i/bq68vLywsjLqe8d8ivAmYrLZHekOagrsDnAf\nwFTgc+BQYC1wKXA/8B0yJtAPuNaSXQJMs37fAa5Eu4OUGkKnTp24/vrrOfbYY2nevDmLFi2iT58+\ngNQSjznmGJYvX06TJk245ZZbePnll8nIyChxHzZsGCNGjKBFixbs3buXRx55BJBN4V9//XXuuece\nmjZtSk5ODuPGjcMYQ0pKCo888ghDhgwhMzOTqVOnhrUu7Ge7Wb58OX/84x9JSUnhuOOO469//atv\nl07Hjh1JSUkpscVPTU2lffv29O7dO+z57v8jR45kyZIlZGRkRN3D2D1v4MQTT+TOO+9k0KBBtGzZ\nklWrVvHCCy+UyD755JM8+OCDZGdns2TJkrCxjBNPPJHOnTvTvHlzmjZtGvNdxo4dyxFHHEGPHj3I\nysrin//8Z8Ta/DXXXMNLL71EZmYmf/vb3xgwYAADBgzgkEMOITc3lwYNGpCTk1Mib+/fnJWVxdFH\nH13K30svvZRhw4bRt29fDj74YBo2bMijjz4aMe5iXSsPQZ+SBfwG7AcaASnATzHvqDxM0IGe6kQo\nFJIZFGOia3g70WO6+zyjphNxe8nUzEq15U9JyWD79i3lvn/SpElMmDAhbMDWTf/+/Rk2bBiXXnpp\nuf1Qag+Vtb3kYUBbZHAYpIb+TPmCqCiJpSIF9IFCbVXqSuUTpDtoCmIh1Ac42jp6VGagFKU2EWS5\nhKqcoWqvm+M9UlNTWbdunf8DDjA6d+4c8X2mTp1a1UGrEoLkrKXIbN4DpSqi3UHaHRQV3V5Sqe1U\nxvaSiwi36FEURVFqCEHGBJogFjtzgT3WNQOcWVmBUhRFURJDECUwxvq12xchDpyuIUVRFKUCBFEC\n+UBzZDDYIC2CX2LdoChVRUZGhi7zq9Rq7PklQQmiBIYg1kGzrPP/IMtITC+TT4qSALZsqf7moIqS\nSIIogZuRVoBd+2+CzBxWJaAoilLNCWIdFAJ+dZ1vJvhMY0VRFOUAJkhL4F3gPeB5pPA/D1nbR1EU\nRanmBFEC/0CWj+6DDAyPB16tzEApiqIoiSGIEjDAy9ahKIqi1CBijQl8Zv3uAAo9x/ZoNymKoijV\nh1gtAXtB7saJCIiiKIqSeIJYB0XaRrK8W0sqiqIoBxBBlMAfPOd1kV3GFEVRlGpOLCVwI9L/fwTh\n4wG/IPsBK4qiKNWcWErgHmQbybHWr31kAqMrP2iKoihKZRPERHQ0kAF0BOq7rn9SKSFSFEVREkYQ\nJfBn4GqgDTAf6AXMAU6oxHApiqIoCSDIwPA1QE+gAOgPdAO2VWKYFEVRlAQRRAnsBn63/tcHlgGH\nVlqIFEVRlIQRpDtoLTIm8BrwPrAVaRUoiqIo1ZwgSuBs63cMsstYKrKyqKIoilLNCdId1Asp+EGU\nQD4yLqAoiqJUc4IogceRReRsdlrXFEVRlGpOECUAUOz6vx+oUwlhURRFURJMECWwCpknUA9IRkxG\nV1ZmoBRFUZTEEEQJXIEsK70eWIeMEVxWmYFSFEVREkMQ66CfkX2FFUVRlBpGLCVwA3A/8GgEN4N0\nESmKoijVmFhKYIn1+00EN1MJYVEURVESTCwl8Kb1OykB4VAURVGqgCBKIBIGODPOYVEURVESTCwl\nMC6Gm3YHKYqi1ABiKYF81/+DgMOQSWPfA3srMUyKoihKgghiInoaskyEPUHsYOBy4O3KCpSiKIqS\nGIIogX8hm8mssM7bIwpAlYCiKEo1J8iM4e04CgCkRbC9coKjKIqiJJJYLYFB1u/XSK1/mnU+2Lqm\nKIqiVHNiKYEzcKyAfgH6Wf9/RbaZVBRFUao5sZTAiEQFQlEURakaggwMT/Sc262DS+McFkVRFCXB\nBFECM3AK/gbInsMbKi1ESo0nNV12K93+m9oXKEpVE0QJvOQ5fx74rBLCotQSCrcVVnUQFEWxCLq9\npJtDgCbxDoiiKIqSeIIogR1AoXVsRxaWuyHg859GNqVZ6LqWCbwP/ADMBNJdbv8ElgPLgJMD+qEo\niqKUkyBKoDGQYh2pQEfg5YDPnwgM8FwbjSiBQ4APrXOATsgOZp2sex4LGD5FURSlnAQpZEPIxLGH\nkJVFzy7D82cDWz3XzgQmW/8nA2dZ/wcCU4F9QAEyS7lnGfxSFEVRykgQJfAYsmDcd8BiZOP5xyrg\nZzOkiwjrt5n1vyWykb3NOqBVBfxRFEVRfAhiHdQf6aIpts4n4Ww9WVEMsfcmiOg2ZsyYkv95eXnk\n5eXFKTiKoig1g/z8fPLz833lgiiBFUAO0kWD9X9FVGl/fgaaAz8BLZAlKQDWA21ccq2ta6VwKwFF\nURSlNN4K8u233x5RLkh3UCqwFJiFbDSzBBkkfhN4oxxhewMYbv0fDrzmuj4USAbaIQPQc8vxfEVR\nFCUgQVoCt0a4ZpABY79tJqciC89lA2utZ92HrEg6EmldDLFkl1jXlwBFwJUBnq8oiqJUgCBKIL8C\nzz8/yvWToly/xzoURVGUBKB2+IqiKLUYVQKKoii1mFhK4EPr94FEBERRFEVJPLHGBFoAxyEzfF+g\n9EDwvEoMl6IoipIAYimB2xBrnlbIchFe+ldKiBRFUZSEEUsJTLeOW4E7EhMcRVEUJZEEMRG9A1nc\nrS/SHTTsQ33vAAAgAElEQVQLmSimKIqiVHOCWAfdB1yNLB631Pp/b2UGSlEURUkMQVoCpwFdgf3W\n+STgW2QDGEVRFKUaE6QlYAjf/SsdXc5BURSlRhCkJXAvYg76MWIm2g9nNzBFURSlGhNECUxFBoN7\nIC2A0cDGygyUoiiKkhiCKAGADcDrlRkQRVEUJfHo2kGKoii1GFUCiqIotRg/JVAX+D4RAVEURVES\nj58SKAKWAW0TEBZFURQlwQQZGM5EZgvPBXZa1wyyuqiiKIpSjQmiBG6JcE0niymKotQAgu4xnAt0\nAD4AGga8T1EURTnACWIddBmypPR467w18GqlhUhRFEVJGEGUwF+BPsB26/wHoGmlhUhRFEVJGEGU\nwB7rsKmLjgkoiqLUCIIogVnATchYwB+RriHdVEZREkBqeiqhUIjU9NSqDopSQwmiBEYDvwILgcuB\nt4GbKzNQiqIIhdsKYYz1qyiVQBArn/3AZOBLpBtoGdodpCiKUiMIurPY48BK6/xgnBaBoiiKUo0J\nogT+BfQHVljn7REFoEpAURSlmhNkTGA7jgIAaRFsjyKrKIqiVCNitQQGWb9fI7X+adb5YOuaoigx\nSE1PpXBbISlpKWz/LXK9ybb6ieauKJVNLCVwBs4A8C/I3sIglkL1KzNQilITKLHsGRPdsketfpSq\nJpYSGJGoQCiKoihVQ5CB4YOBUcgicra8LiWtKIpSAwiiBF4DnkJmCRdb13SegKIoSg0giBLYDTxS\n2QFRFEVREk8QJfAoMAZ4j/CF5OZVRoAURVGUxBFECXQGhiETxopd1/tXSojiTDzM9II8Q1EUpToS\nRAkMBtoBeys5LJVCPMz0gjxDURSlOhJkxvBCIKOyA6IoiqIkniAtgQxk5dCvcMYE1ERUURSlBhBE\nCdxW6aFQFEVRqoQgSiC/sgOhKIqiVA1BlMAOnMlhyUA965rud6coilLNCaIEGrv+JyFjAb0qJziK\noihKIgliHeSmGFlGYkAlhEVRFEVJMEFaAoNc/5OA7sDvlRMcRVEUJZEEUQLufQWKgAJgYGUFSFEU\nRUkcQZTAiEryuwDZpnI/sA/oCWQCLwJtLfchwG+V5L+iKEqtJ5YSiDY/wG4V3FFBvw2QB2xxXRsN\nvA88ANxgnY+uoD9KNUPXalKUxBFrYHgnYgrqPgwwEimg40HIc34mMNn6Pxk4K07+KNWIkrWadOtF\nRal0YrUExrr+pwJXA5cALwDj4uC3AT5AuoPGA08CzYCfLfefrXMlgejG54pSu/AbE8gCrgUuBJ4B\njgK2xsnv3sBGoAnSBbTM426IsoPZmDFjSv7n5eWRl5cXpyApWvtWlJpBfn4++fn5vnJ+LYGzgSeA\nLkC8S4eN1u+vwKvIwPDPQHPgJ6AF8EukG91KQFEURSmNt4J8++23R5SLNSZwHdAKuBnYgCgB+6ho\nX0FDIMX63wg4GVmy+g1guHV9ODIxTVEURakkYrUEyjqbuCw0Q2r/dhieA2YCXwPTkMHnAsREVFEU\nRakkgswTqAxWAV0jXN8CnJTgsCiKotRaKrO2ryiKohzgqBJQFEWpxagSUBRFqcWoElAURanFqBJQ\nFEWpxagSUBRFqcWoElAURanFqBJQFEWpxagSUBRFqcWoElAURanFqBKIA6npqYRCoZK1+MsroyiK\nkmhUCcSBIDth6W5ZiqIciKgSUBRFqcWoElAURanFqBJQFEWpxagSUBRFqcWoElAURanFqBJQFEWp\nxagSUBRFqcWoElAURanFqBJQFEWpxagSUBRFqcWoElAURanFqBKoRegidoqieFElUIvQRewURfGi\nSkBRFKUWo0pAURSlFqNKQFEUpRajSkBRFKUWo0pAURSlFqNKQFEUpRajSkBRFKUWo0pAURSlFqNK\nQFHKic7ADk6QuEpNT9W4rAJUCShKOdEZ2MEJEleF2wo1LqsAVQKKoii1GFUCiqIotRhVAopSzfHr\nS/frj9f++tqNKgFFqeb49aX79cdrf31iiYfSjadRgioBJSKpqZmkpmaWvh6HWqWi1GbioXTjaZSg\nSkCJSGHhVgoLt5a+HodapVIe6opyjaCYFaUi1K3qACiKEoQiwFBYGKrqgCg1DG0JKIpSLYjXALcO\nkoejSkBRlGpBvAa4dZA8HFUCSrWkptXGFKWqqLFKIDNVmnV+MpmpTkGSmpoZNvgW5BlK1VDTamPV\nHbUKq74ciEpgALAMWA7cUN6HbC0sxASQ2VroFCRiDWNKrGKCPENRFLUKq84caEqgDvAfRBF0As4H\nDq/SEClRODBNFu3WW0rDhuVyt2UyU1PJz8+P+QxFiUS0fHOgcqApgZ7ACqAA2Ae8AAysygAlnqop\nXL1dY5Hcwws+22RxaxT3qsFuve34/fdyudsyWwsLo37Mfi3E8nRFHhgcmIq9umHnG2/38oHKgaYE\nWgFrXefrrGsHJPH42P0K1/I9o26Zxza8XWOR3GMVfPHoOvMLp+1+4BWe4USKC+8MbG98HxgFRum8\n5yXaTPLEUj2Ulbd7+UDlQFMCldgFH/+MU55xh8Q8I/xjPlDGNvyUWelwhqeZ7e4tPKOlqd9ApZ/7\nvffdK1ZIcSj4os3AdruXpWUVRCGWvfLg/wzvewQNZ1n88DfQ8G+FetOsonERjwpI5VQaK07Vt9/D\n6QWMQcYEAP4JFAP3u2S+BY5MbLAURVGqPQuArlUdCD/qAj8CuUAyUuDrwLCiKEot4k/A98gA8T+r\nOCyKoiiKoiiKoiiKoihKtSK7qgOglJmsADJn+rgf5eN+ZQA/gshUJw6roHt5OR64Hji5gjJB8sWB\nRi8gzfrfELgDeAsxdEkrg4ybNKA7kFE5Qa7e/AlYBXwKdAMWI4PM64GTXHI5QLr1vx0wGPiDy70f\ncKj1vw/wd+A0l/sA4HHgTet4HMeayU0zJLGOsv7b1AOuAN4FFlrHu9a1emWQieUH1jsORT6u64Hz\nXO/txx+BLsAXyFyNJwjPdHMDuNu0BmxbvQ7AucAh1vkJyNjPF8hEwe+RNPsR6GHJnAMMsg77/8/W\n/3Osdz8KJx66I2luX78+wrEZuM46CCjjxU8RfeTjDuGKxp2H0oEJSLo/T+m0jcStPu5rArofCXyA\nTNRsB3wMbANmI+kXCXdcuNP+z4hxx23AZzhjfH4y9wNNLPejgZVIPlkD5AFtgQaWexJwKbLSwF9w\n9kgZ5XpGB+AT4DfgS+CIAO4g+ctWPk2BZ4BFwItIvj6IcDP7E4D/Q8oigCWu8DwJPIyUKWOAVwLK\nPIdTmT3FioMPrN8hxIEDzUS0IixACr10YAZwKlK4HI58SN2A0cDlwF7gQSTBPkO08dOIguiBFLTv\nAicC7yCK4VvrekckM6y3/G0NDEMy6dWWP/+zwrHOJfMb8tH/A9gKTPY8YzhSkJ6HfICxZO7z8eMP\nyEf1vsu9DVK43249NxZrkUx2J/JRjEQ+tDOt95wP7PJx7wZcA1yLzP5+CPgbUpj0Bu6x3EcAjZF4\nPsNyPwr4N1JDLELS4lcrbCFEEbxknQ9H0nmPK/y9rGsg6TkD+djs+69BPjas+NjhI7MQ51sx1v/H\nkELHfoYh/Hs6BPjBut4FUSxebgTutv4PQ+IMRAFsBJ4Czkby31kR7nezFngthvsIYJKPewowB0mb\nxsC/ECX4IlIRGoVUemLFxS2u9/gaKRB/BRoheeUPOPkjmgw4FbN8pCL2FRKnU4H6SLruAh4ADrbe\n/UQrTJciadnJesbbSAH7GhKXdyPfUSz33sBSHOvEaVbcvGT5cyGinPsh3+rfkbR627r2DbLagX3/\nPMJbqAsQhbvUR6aOKy7mIEvpFCCK4SMkbykW813/13rcvrV+lyA1iGzkw7drAo2QlsMSRLM3QgrU\nRpZ7Pct9eRS/Q0jhB5Jwx0SQ6WW5RXsGLjc/GT8/fiByrT/D9ew3Yxy7gO889/ZH3rEXEtd+7iBx\n1giJ711AC1c45hOeZks9z7PdeiCZ/UqcwmeVS24QUos71XXN7Z6DfLgPIM1tr3sQmSKkiT7ROiYB\nha7zN5Aa2+FILTUXyYP2f5D89iKinG9DanpbXefuuFhAuEJZYP0WxjiKrN/LkQJ9uOsYgbRs/Nzx\nhGMF4cwPEBffIS2/LM+zwPkO/WSW4rR4v/C4L8RR1iAFZx3XuZ0vv3dd+8rzjO8CuHuf8Y1HZgHS\nKnC7262TulY4X0IUEkjc2K3bQ1x++sksxukW+pTwd12MEsZsJIP/A4mca5ElJ4YjtQlwErcO8Avh\nEbrQui+E1DS24hQIdZAEX4h0W3g5xnKD2AX4CqSmM4TwZmQS0gKwa0F+Mn5+RFMC6a57twKnI81r\n++hn/f6CZHJvn2QX6/mbA7hD+AfuVRrf4hRuEF7TDRH+gdVBWhEfI3HtLcRTkFr7dKTg9brbz/8c\n6f6L5B5Lxk8RgXQdzMZZ66qsimYdUuu+HqnpuZWAHXdrgOZRwr4WiZ/eUdwLAri7/YLS4yKL8I+L\nAut8FdKN09K6noKjBPxkRiGt2BMQZflvJG/eDjwLzERq4wAv4yjabJw8dTeioA5GWlzXInnjEkSJ\n+bmDdHPegRTu45A0BqnwzEJq5nbX0bs43Z4NrLhKR1rdK5Hvdp/1zp/gTHj1kxmCKLpLkW6yl3Fa\ndeNQwuiARMy9QCrSlF6C9Ku1t2SmWscbwBQkQi9CuoKmAI8g2nYucBfSVXQzkiEfQvqb5yI1lfet\nYymSeN0tPx5BmoTnAcchH91Q69p/kH7WaUjzd7l1/Gpda2c9w0/Gz4/hSL/6/4CbrONxJKNdYvnx\nLvKRRWI20tw9NoJbDtJ09nMHqR3ZNbrWLpkGyMc6EKe15aY9osy9tEIK+pVRwn0UovB/jeLeGBiL\nfGDRiCbjp4jsex8CXsfpxvMSTdGMwWkV3Ib0QYO0np6x/t9N5EoIiHLJxFEwkfBzBxl3SolwvQNO\n91iQuPDSEClwg8r0R/L7fKSC9Q5SyauH5LF8JJ++ibTa8xEF4h7/uwT5NjchrZWlSPmQFtA9GVE8\na6yjGGnRTbXC0AXJx88iabQSKYO+Qb4PmzRkpu7RRFfisWQ6Iun7KqKg/oeMD8SFmjQmEIT6SGG5\nEXgPUQDHIfsXjEfGCvohNeElQF+ke2MZojhsWiAFkgE2WM9zcypSwNk1nPXW/W+7ZEJIc9gAW4i8\nblIsGT8/MpGM4nafaT0nUbRF4mef53orpOvkg0rwM4QUYtsr4dkgYX8YUfrRCrWuSL55PIp7Y6TQ\n74nksepKrLhohrP443pkMN9LEJlYdEK6TeoiLamvgP1lfEZQ0i1/NhP+HdZFrJrscKxFvjP3QlHN\nkEqQIXZc+MlUCjVJCWQBVyER+DRiZXAcUpjfQ3iiBCWENDG9NcseSILtR7pelpXxuY2RwTD3M2Yi\nNY2yyAQh07p/WwyZDEvGXXDWQVoUg5BB5f1IH+njSK3Lzz0efgSVOcFyd8fVU0jXVAoyaGffv9e6\n/jjOQGkQGT9CSC2uIvmiPdLl0BpJ5+8RowZ3nIWQ2rddCVmPtE4NUlCNRlobzaxrvyADnvdZ98dy\n/82SGWDJuAvo15DWox9+hhHzAsocjlRy7DCsQyo53rEjdyXJS7r1Lu6K0Huu9/RzB4nvnoTHhR3f\nfsQrLrzpsQ5pbQZJD19qkhJ4B+nPTENsnhciXQe2ueNApPB+AEnIGxErjJ7IB3sZ0gx7zHK/Gmnm\n1UdMwUYgNdpxSAJ1R5r16db1YUgtwF1ouT9mu9AaglglfYc0eecg6dAFaUJ+F0BmsY8frZBm7UCk\ngLO7JiYgXQr7Asg8CaxGauvnIgXRbKSb5g2k6yWW+yNx8OMRpBCOJdPSSrcPkQ9lFZKef7H8vghp\nRn+AdME0RqyvbkY+phut58SSuYXYiqgf/vnCT9Fcg4zRzEIsceZbzzsbKQw+Rmqcj1n3uQuMjpbM\n9VY8TEZqkgZptQ7H6fqL5X4y0v8eywLuesQaLFKhNAGx9rkMZ3zLphfS2j4S6UKJJfM8YgXzAuHW\nbechg+vPI/3jJ+JUbtKsdxuNjDlcTGwLOePjPhn/+P6S2Ep1VhziIh9/i0TFwh4QCiFdEJHcvkJq\n1+cjiTrYkj8RKWgXIDWQY5GaRS/rvsORj/JbHIuidjgmeX9EaukgH/TtiHnjvxEzypORAuZqRDnZ\n/bLZrvu6IIUHAWT8/PgYUR4hpGb5MFKw3Y0MdhFAxh7otrEz6UFIDdfPPR5+EEDGPYBcFycOMxBl\n6R2Q/tr6TcKx/vCTmUTs+A6SL95A+qDbIAPAtyJdCM8gLdVFOIYKDZECBKTv2R5QXYYzCOqmneX2\nQwQ3mx8CuIO/BdwLSM21l/UubZDv5XGkgPYzWojlhy2znPD5MDbJOPNKzsOxr8f6PxTHmsjPQi6I\nBZ1ffM9EtsBtjlOhboEohpnELy4i4bZIVCwWIl0fOUht0R5kzcYpSNzWKt7JM9/ib2bqLizqeORt\ns7Ughadt9dPA8wzb5MtPxs+PBR73ea7/dsHnJ/MNzuSg7oQPlC4J4B4PPwggswBnQk9bwk0KFyPK\n/XjrfCDS3HeHgQAyfvEdJF/4KZqFSKsTJB9/7ZK184Vf4fg+0kJyTy5rjhRUHwRwB38LOD/zZT+j\nBQLIRCt8c5G4CmJm7WchF8SCzi++/ZRqPOIiiEVihajrL1Jt+BeSaFuRmv4HSNfAYUiTH6R5fgrS\ndAwhTe1Xkeb8HqSJfrnlvh0xG5uGWBz8Zj1vAlLDPdP6BbFwsQvtvUihtQIptOxJTHuQbpu3kb68\nT5C+vumWu3tavJ+Mnx+bkObiR0j3g229kYRTY/GT+bvlthenlgVS433LCl8s93j4QQCZbxDlshyZ\n6W1PWmqKKIj7kfGBjkhheqnr/v9a/6/wkRlO7Pj+Bv98sRNRNLYZqW1Ga4/xPIW0VL+05Ow9NJq6\nZJ+2ZKYS3oUx1HJ7HKmFzsIp6H9GWiH27FI/9xFITT+F8C6Q7ZbbY5bsS66wJyGt6i1IyyiS0cJ/\ncIwW/GQGIN/vCpzKWBskfa5yhWOyyz0HSSdbAd+NpMtMT1ydjLTkin3cwT+++yNK1e5eA1Gqw5FK\nZjziojux00PxkIzz0aUi2rKJy70n0sc2Fak1foBE5jxkUM/PzDQZ+CuSQH/Gab43wKm5nIBkAHuv\nZLtLqQkyHgHS5/t/SHeBTRJOTdBPxs+PtojiWIRMYrInaWUhhTEBZZIIjz8vfu7x8COITBYy3hN0\nWYyy4hffQfLFkUiB8htienyo6xl2v+4fkMI01jo+nRCjh0etYzTOzNd40gIpgLoTbrIYxMQ5HtRB\nupnsJUN64VRaD0L65L3LqlxpudlkIhVCeymQoTi2/EHcIXZ8ZyLpvwypfG61/j8Q4TkVJVp6VJia\nNDBsczTO4F15LDTiQRJSMEWzV7eJZDETVCaoHxXBbaFkDz67LZT83OPhR1CZWBZbQaxu/GQSEd8g\nNf9WyHusROzSy4Lbqsa2HnJb1fi5Q7gFEkgN1GsRE8Ip6Nzmy/UoPXBsWxdNQFrjQWRACjt3GMpr\nNmm3oDeX0728xCsugqRHualJSiCIhQaEf+x2geH+2G1zQ7cVSCxzQ3tS1iRXWGIVWvGw3Nnn44f7\nPdzu9nsQQMbPQukwH/fv4uBHEEupDGKn+zn4W90EscyJFd9B80UsRdMZGXTORVpQ85FWwiwrfNvw\nNwG9nHCrmpDll21VU+zjfi/+FjHvId2lfyJckdimlX7rXsVjbawgZpNtiW1BZHzcCwLE92/EVqrx\niIsJ+KeHYhHEQuMaZHDsZqQweQyxzFiKFDD3IR/tMGQ28VjEfGs+Uhj5WXhgyc1FCrofkZnIzyFN\n1i7Ex3LHzw+/9yCAjJ+FUhArp4r6QQAZv3QPYnXjJ+MX30HyhV/e+xKni6gnzizhP+MslhfEGiXW\nQKafO/hbxFxsxcHj1rvcjJgyrkQKrkSsjfVvpL98KDJ+cjyi3N5GBlrB34IoiIWRX3zfgOSP0Ygp\n8jCk6+hb6zceceGXHoqLIBYafh97PMwN/QqteFju+Pnh9x4EkAlioeRn5VRRPwgg45fuQaxu/GT8\n4jtIvvDLe940d7+H/bH7WaP4WdX4uUMwi5hYppWJWBsriNmkX+Ea1MIoGj/gH1fxigs/xV0hapJ1\nUBALDYNE6H7ko7fXrVljXS9C+gc3I807+z57trGfhYfNbpe8XUu1J7L9RMUtd/z88HsPrDiIJeNn\noRTEyqmifgSR8Uv38fhb3fhZ5mQQO763458v/PLeD8iktI+R1p+tBJJx0nw1sa1RxhLbqsb4uIO/\nRcwIImP3Tw9F4u+/hM/M/RjHsstP5lok3W3rn5AVhouRvNAPaS259yXAuva79X8esS2IjI87+Md3\nayRfF3jC0RJJ53jExVBip4fiIoiFxjVIre4ppOZjmwI2RQqZ83Bmp65F+olt9+cJZuFxP1JTvBlZ\njM42T81CapWxLGbOtf77yfj54fceBJTxs2Lyc4+HH34yQdL9D0i8xbK6iSXjF99B8oVf3stA9riw\nV7i0F3FLw1moL4g1im1Vcy6lrWqCuENsi5ggixOCs+RKNtHHHmPJnIoo8DdxNm+ylwvvjv9Cjn4W\nREEsjPziewCiUN9FZr8/af3/EWdjmXjERaVahNWkgeGg/AH50BcRuU8tC1kMawXlW28IpNA6HGni\nv29dS0IKrN3RboqzH0HeIx7v6kci/CgvjfG3vrFl4pGmfnmvuhBpccL3cNI3FVGAP3ru64LTdRZE\nxg97IUc7DN6FHBNBHZy1heyB4a+R1jgkLi4UH94JINPYx91vD9ZLfNyD4LdFYFCZWATZS9ZPxm+m\nYpCZjBX1I4iMX7p7Z42XVyYWQfKFX957wvX/MMSixXvPAOKzJWgaMqA/BbjA48djPuEE6d/egIxz\nLCF8tuv8gDJ1kQl8d1J6/4ObXf8j9ZW79xUfgJhf5npkLg3oDrHj2494xEVF08OXmtQSOCrK9RCy\ndaDfBIs1SJ9gNNYifXHldQcptI6I4R7kGX4y8fLjbxGuGyQ+xyPmiLHcsyO4l9WPbJxJZdFkom1O\nbqf7gzHCcDNSEF4fQCYa8YjvNcjy05EIIbXBVkjX0l+Rrg97+07bGmo+/lt++rl3QyZG/mDJXIqY\nu16ItHbmI2MeDyD94W8j8Wvb9b+GWK0MQGrltpXTjdZzbT8W+MjMQ7rzvkKsbmbh7PU83/r/rCXz\nDZIXV7ncuyHmrr2tZ52BWBQ94pJ518e9W4D4Ho4oUzsubsBpDc1FupUqGhf2gojR0sPeprPc1KSB\n4a+IvlmIvUlErI89Belvi0Y6sWue9iYgsQqtFsjmFdGwt6fzk/Hzw+89CCDzAtJv7x30DiF98X7u\n8fCDADJ+6X43MmDq3dMghDNQ7SfjF99B8oVf3tuEjJ9Ewh6Ivgzp896B1F5fsn7tzV5ScOzkxyIF\n5LtIQRrEHZy5DCBLqtyE2M4PtK49bflrK5JZiCLZhIxl1cHplpmLmL++Rbgi9JPpiaNY/4PUeF/B\nqQk/iHRHLUHS5n3EkGKOy48zkAJyH7J3w1SkW/LagO7gH9//s+614+IzHKVaL05x4ZceFaYmtQQW\nI5N7Ipl12bWx3UT/2K9FPvj/Q9aE8c6OHIf08w0gct/250gf6T6iF1rnIrMreyJWQtHCucZHprmP\nH8bnPbIQRRNLZjVS04lUwK1FZs7Gcm8TBz/aIDW1WDLbiZ3u65DtCr+O4t4GKTxiyfjF907884Vf\n3vsV6XaIpAjscC5GJpXZNEbmXyzBmVfSl/C9I7ogBWgGEhex3LOQWm9nz7uOQCbDNUYGv490uV2E\n1FzPQArJ35EC2d2/nYoUYMcjYyif+8ispHR34W1Iq6+p5Yd7g/XO1jvcYMl1I3wDd5AK7xOWP4cj\n33os9874x3eSJxz9kcHhixAFEY+4+JHY6dEWpYRYa66cbf3OQZaViESQPVqfxllt0stU63ce0bsH\n1uK/RSABZPz88HsPAsj0JXoG6xHAPR5+EEDGL90PI/q6Q3YXoZ+MX3wHyRd+ee8qoncJ2RZGH0eQ\nqYd0IRQTe8vPpwK4g9Sy/xhBZgBis76YcMstkAUWVyA12q6IyamXZJwWh5/Mc4Rb19j8P0SJfk3p\n7t3WSNeKPdA/AzEl9XIXEld+7uAf3377bMcjLvzSQykjfh97kD1Y/QhSsFUUPz+CvEc83tWPRPiR\nCOKRpkGUkR9tosiGgD4Bn1FRrgPyIlzvhmM1Vdn8kcgKMx1n4LgBTveql9YB3ME/voMoVaWK6O5z\nrpSNMyp4Hg8/gsj4pbt3MDvS4HYQmcrGW/CUZ9VIv/cI8p7e+ItmfBGNJ33Og8jEIy4OBOIRFxVN\nj4gk+YtUS67wOQf/j+B2n/MZPufgX2jN9zkPIuPnh997BJHxdmOU9TwefgSRCZLuFcUvvoPkC7+8\nN8HnPKg/FcUbf3/xnHvf/XTP+XjP+eMR/PBe897jFxdB8rffNxTkO/SLb7809b6X9zyIjF96KIhS\nOy6grF+i+X3sLTznLSlNkAxaUfz8iEcNOx5UxI8knIXvorkHTfeK4hff3nwQKV/Eo7Xhfa43P1aU\nILXMys7f3rGHSCQi74J/uh4ILchyUZOsg2y+JfoAW7z4FDGNm42YhUUy6fwXUmtZHMGtLLjXbPdy\nEmJdsKuCfsRiOI45JDhWPvYql4Mova75NsSa55cK+PtHwvuYvyF2t55fuv8NmIhYEz2FFHKjCV+K\nN4hMUDKRvmXvjM+DEeuXaNeORfKMvbS5ba3i3oi8OxIfbk5H4msoMmv1A6TP+jjEmuUJ676lSPo0\nRN7tKMu/ewi3GspHul6mI8tMuxcDjEXQvQKi8QRimgliGfMz8p19gnx37jC2ofQ2sC2ompnDsTgU\nsZLLxTHLN8gy6zZvUvo7c/9vhFiPufkwwrUyUxOVwFhkVuTLRN90we9jf5TYCfIwYg3SB/lodyMZ\n1Iid5uMAABR2SURBVD356c+IKVc9xHpkKpKBc4g92eYs/NdCL7CuPYOs/bIV+UjsD2UrYj3QyrrH\nlgeZcOJeeOpBZALR74jN+JGIyeKzlvt/cOKxvhWmeTjrHM2w4sBeuC3Pcm8H3GGFsakVH7mEfwTu\nmZlevJOs7kNs0V9ETDJtbOXol+7fIZYbpyDN6lusd+xWRpneEd7DVoizkJpoXaSQ/hWpJLhtzyNN\n8HEruG+R/GhbqNRBrGHc93jNZs+3/FhhyTdETDkbI6aTJ1lyPaz3K0L6m3ciZp0nWddte3SbFkgL\nbAiijKbhbL3YAFlnp48VB7Nx9k/wW0M/2q5b7olxNm0tP/ogawdtxVH2RVb4L8WpCM1D4i/IdxYN\n7wTAhYSXASDf5VdIfmyEKGn3EiQDcOZkfIfEzTxkYTms57kV+SPIngVTLH/ORxTgDOS7u5vwAflU\n6/lBVgCodexAPqB9SA29kNK7ctm1s1MQe9w/EN4P+CRSmI5CzPNmI/1zw60DpDk4FJnIspTotcXD\nkAJsDWJr/g1SwHRDCtjPcWbX2mEIsta5m5ZWONcgH8a9iEJ4GKlNXe2S9fZ32ksYn43U1NKIvV5J\nOuHvOpPwjcubWdfsxdVAzCPvRwqTc3EWL3szxuFt3RQgsye9h41futsF5iM4hZ03LvxkpiDp9RjO\nYl7uCXH2/gT/D6drxH7m4dY7r7SePcj6HUF4a/FbShOpNTEPyVt/RvJnmsuvukgrzM4/IcvNvXuY\ne3lyKL2UtZsjkHd31+KnI/mlP1Kjfcq6FmSJ5mIip+UqZEasTWtkgtjjSL5/G1lIzWY+MqN3PrI1\nrH0NpCUU6zsbFOGw02WTJ9wPIt/UEYiyvAf5tt5A8t1ryPwOt2Jx5xtvqy0SkWS+QSqWq5C5Nu54\n+g5n5VelHPh97F8Svi5JPcKb4z9a59cgNbhoA+x1kIzxOpKgNyA1iBddMhchzfX2rjAE+ZBAJpmM\nRwrZN5Blb49Dmu52+NORNXQeRgoDb8FnF0ATcGyzYxUIyYRPzFrqcQ+5rtl+RSrYQGp1pyM1HPvo\nZ/1WpCspEpMQ5bQCqbmlUvrD85NZSuzW80Kk9jwTZ56HXYAPtJ6/GWmF2scjhI9nvIoo7XpIXLuX\nKnBzqBWed3HMcBcjSxVkIErQXm67geVm15qx/LbNWw9BarVuOiGzYRchLZwrcWY/g7NHB55rQdbQ\nX0F0k1t3906xdc9ZRI53O3/1RuLiDNc1bx72fmf7kNbKRM8xidKLCkYz2FiE8/3kIi22v0W4Zwyi\nrFogrSD7cLPUCpvNwYR/W1ejBCaEaPOHkFmpZ0eQmUTsj/17wte0z8TZdAPkw3wJqZ1MRj6sDoTz\nkPX8Jwif+LWY0rNb3ZNtQJTEY8juSi2toxfSpJzmum8zMtX8EsI3+fYWzHVxpvt7xyjuQ1a0/BYp\ndJoSrvDctfMZSC3kfpf7Y9b14Uit9k0rnI1wuojuQlbh9PIu4f2ibmZHuHYcUjO82HXY+KV7HaSb\nwF42I4vwma9BZKYTeaDXZjBO0x/ko345wjvEohmS/r9Yx1Scwneh5/gZZ5Oh75Ba8krr2mVIofcU\nUlj9w3qvyZbMl0hBuAppNXrj4gukQGtFZKYQbiPfC+k6a4f/RvRBJsZhhekqKz7mIN1u/8/l7i5o\nWyCtd3s/Ab9JbX4TAN18R/hOZz0RJbPYE4bGSCv5IcIrPgWUbvF4x4UGIC35WdaxGumpAGkR2Uqj\nPZLPD4kSdgX5AGciBeOlSKJ4V9vz+9gvQRJhsnUUEHkzjcZIl9EanL4+9zMalbpDJtt4TekgfLJN\nkLXOQQq+PyCmYs8jCmEKwWZDusnC6TpoRLgtdp7r6EPpxdCSkO6dh6zjXJxa2w6crpliZOwkWhed\nH35dMX7p7lYS/0KUhLd26SeTj/S1z8RRjG+U8T3aW/dtQgrH15FaXxByPUdb67DPbRl3gTGE0gV8\nGlIIdye8Ky8Idn5ciqTpauT7KKb0ZvV+a+gHIQUpIO9BvjP3qq5ehVwPmdgH/pPayjIBsAeiSAus\nYyGiCGYhyzd4w2DPKC4r9ZF0ORJHgV1j+fkD8v3/gLTa7a0+lQgso3RT1Lt2e5ACoQXShB9I6Qkq\n45AC165pDSe8KQfygR3lOdrjFLaRBse8hUFWBBl3jT8VGSy7Dykgf0AyYJDZkDaDreeADIS+Spwm\nocQZv64Yv3T3Kol3KV058JPJi3LYBCngv0S68epZx0WEt7yeIXz7xgwi7yDVHSkgRhE5vWz3q33c\nvff3QFpwUxCF/z7OIGg3Sisi7xGLSMsfxJL5Bil8n0DiyVtov014t20LSo91xJN0wtMmyAzuILV4\nt0wHpCJlyyxGKmbZyDiZbQqcQeRuKgVZgS/XdZ5rXXPj97FHUhJuzsW/BvUF0tz+xjr2Iom2Emnm\nfU74uiOdKN1V4yez0HqXCyhduAd5D/sZIJk2H2mlzEWsK17A2UnL/bG9FsDdTd8oh01hhGMdopDs\nQtSvK8Yv3YNUDoLIxMKvgIfIg+7u/utI4yfea7ci6XY7YoG1AFHg8XD/ChkbOh9Jg8FIPjqR8FU6\n2+PUVvsjyibS3sNuvN0sfjKRlnZ3V57+jOSROkh6f4eztPijMY5HAri7+RFZz+gKwheUszkWpyKF\n9f8YnFr8cqLX4v1k3AW9N+9EG2srEzXJRPRN6zcVZ/9RY/3/ivDukWVIgWo32ZKQWr1tbvU/JJNP\nReJoCFJ4X2m5dyeybfxqnB2FXkE+LLvQ7oSY1/3DcrsRGSg+FRnkewax63Yn7GkBZGLh9x7g2Nff\nhxQMzyEZbzPhSwYfhbNkcBB3t0njW4SbmfZEFKM9HnAX8vHbi60NxRnA+w9S+DS2njkXsZSAcLM9\nv3R/C+lfLrDOc61nu7vm/GQizQexa8nXI8qvi8d9AdK8z7TC+g+kS8l+1/OQWt1ol3x/HNPXTKTb\nwd1//YPlj72jWQPrvkPi4O5OO+8eG+65GAuQ7yAXqZG/jhSQ3m5RNycig9hvBpDBeu5AHKukFkhX\np7vlchXSXdQWKaQ/s66PwMlz3nLOzjex3Ce7zusjhbptqnoI8q3Y1kDRzHqTkXzYAInL9sh4RAay\nf3g3pHyIJVOMjLfsQyp69j7DDZCKprerr8zUpP0Exlm/Xnte+5qbFUjmLrDOc6xrNv0JVxKTCLeG\n+C/yAdia+QgkMdOQ/vn3kELbXWu3lcyPVnhmIJnkfaSAO4fwwWdiyPwbqUFE+pgMUkCPD/AeILbc\nTyDN8PuQDJ+ELHZmT+m/CqnVfoIzI9PP3Y13DKSN9Q42ZxJeeD6BfFg3IApiHNHTNdJcDre7HUcp\nSJeSV0kQUAYrzJGUld0N9w4yOOsu4O3dzeYRng/tCVF2YWQrgXGI0ptmuQ1GbMTdrEcKAbsQr49T\nOFTUfR/SUk2z/D8bqW33w1G+IHmqCMmTdg16PqIUhhFuYWOnjT242ieADJa/05CWdxtk/OX/cPZm\nsO9pgyilXtb9/0Lyups0K8yRFLmfexESL/stmV9xNp63cY8B7EcUwV5kLsZOwg0/tuLkVT8Z99wN\ndxpmEnuPisDUJCWQb/0ejESkbSXQAKffLujH7qckNiC132i1/Pcst/8hXSZ2LXwJokDcG7+kIorh\nKissV1N6MxavjF1LGUdkshBzt1jvYc9SHYw0/x9EaqgtkMGuh5DCwS4opiD7G7yH9FFu8XGPxTrC\n13LfhRSY063zc13P3YGkbbR0tecKRHMPUjkIWoGIpqzOI3y/20gFfC6xsdPjGZxWkkEKYVtx2/li\nG5K/Zlrnf0TyckXdQfLYA0hc9kG6JiYj+cZ+L5DCy7bUshW/bUq9C+d7dGNXcoLIgMzXOQhpZbhr\n+scRni6vWucplK7w9UDGVOzumt+Qb/frgO4gRgwLEeXyFKXnEaxCvtv/IWn+FyQt2yBxsg9pzds0\nwMlnxT4yq61zb/7eQuk9ictFTeoOsvkG6aOzJ50chGSco3EG8aJ97P9n/ffrWvBuNuG+ZjeZGyLd\nLvZ6+p8h4w7nIbV7d63K3TydjNOUDcWQicWbyJjF7zHew56lGm3q+XVI7TXfc70bUki84+PuHuBz\nK7UkJH5W4ayp3h6pZfeyzm3zxPVWGD8lcrp+imPJESvdwV+JBJH5AlGObmV1nRVuv2UrBhF9BjvI\njlGx0gMq3sVRli4QPzojhfLnOLtyDUFakxXFW9O/GCmE51vX/uWRT7OuR7I4W4h8h7bJcR/kO+wS\n0B2kS+p4JK/tQ975E2RCGsi39gjS8gZJw2uQ/LOB0ktltEIqQR8gys1PBvzzf7mpiUog0sdo98va\nRPvYbeuDaEpilvV/GtLl4q7lN0EKtbgkTEAWEjmsjXCWZYj2Ht8ihdlfkI/Kq3C8H1pFGOH6X4S0\nTD4t4zP80tXP3U9JBJGJpqx6IFZb0QaRX0G6J2IpgW4kLj2CkI7s0mUP4Ocjg8jbot3gIdI31ozw\nJUyiyYyg9E50biV2u3U9SC0+0jId9tISQdzdHIbU1v+GzN2wB8bdLeJIxKMCEqRcKxc1qTvIZhOi\nuV+3zgdSuvk2nfCJLsXWNb9ao80IpPZgzw78DGlF7MMZ7OyDfES5hK8zY1u7RCvAbbkuAWTetX6f\ntWQutNwew9kfN9p7DEUGtuogzehoNEA+rM44md5WMEHcoXT/rJdDrTA3t57TBel6ucsl45eufu52\nH63NHsItmoLI/EjkOR43I+8caYcoECUwIoobSFzdT7D0AP+8VVF3kMJ1IY510DCki9Huo3YXYDbu\nZ0T6xl4iXOkGkYHoNf2nKV2Lf5rwWvwsZHzMPU4zC6eQ93Ofh0z464qk/ydIXNjdZyC9AD9bbrMp\nvdBdpPecRnhl0U8mSLmmWHRA+hzXWsccSs/mjWRd4zbT+wbpsrE5iMj7z8bie6SvvRnOpJlsl/uD\nSLeJvR7J/dZhT/4JIhPpPbxrlvi9x6mUxl2Av4SMd6xE5kO8T7gJnZ87ODNa3bNdP0W6VrKQj+cY\nV9hDlDaX9UtXP/cPCN+ceyDSbKcMModa53bYuuDsZBWNWIvk2bjNIv3SA/zzVkXdIfLSIe5r7vta\nIxWiO13uft9YEJkeSF5ZbR0LCFcQkezkvfME8pF5D9EOP3csP/0qzG2RStj/rLC63y0ecRGkXFM8\npODUqC7xuPl97H4J0gcp7JYTfRq41z7ci18BHkRmAeHbCvam7JkvEu5CyX6GbQnlXUfJzx2iL8A1\nGhm/sBWT+93cYT8c6SdPITxdBwR0h2AfkZ9MEGXlxY5L75IP7mNv5FtLPcPGL29V1B2kq8u9b3If\nwucJRMJdAMdD6S6MEAa3rfzDSC0+zzr+h1Qs7MmZ0fBTzF73uwhXAmmEt279FrqraFwEyd+KD96P\nqKK1xlg1qe7WcR9S+B1L+KxhG78CPIiMbabqrim5/Yj1HkELJbvZOxspxJsQrvD83CH2jmkLkUHm\nDq5r5+KYVl6NxHe0lRr93EHisJP1/3Sk9n4S4QT50KIpqyBx+TPRZ9xuCPgMv7xVUXc3XQnPW/+/\nvbMHsSIJ4vhvTw1ONzDyogPBQIz8QDTy9tILzEwEETQQwchEUETQZAMRDIwWzQzvQrmLzjVQRBQF\nRRANFEX8QFQUQRQ0+L9mZvrNTPVsz3s+H/VLdpfqndfdr7q7urqr5g5V/3M5In4rOiTuarlaZSxL\nfxHbiq/DClqL5fODz92IgtEeoEjrgJXoLqcvUvQ7i2k6E7jbIoujex8hiy4M9A9otxCuTx5EQVPn\nBn8/Q37AwDuKSSom3GkPlLev3yjODPYjH2uICH7H8I7FKnMLWdarI3kYKG3tWIMmuLc1bbhW+n0B\n3Uk+ju5pz1KNPrXkID/3dgoLdBtFZO5XdC1xAblbnqOdVTjfOIAmnI9owvx78PNsonwe3dpYhiaG\nP1D8xQk0KZ9GA+0Quja8mWrmznmKvPCvqQ7eXejMZQt2X14a9E3dwL2C9MJ6hqVbv2TKy8n87iDd\nCrr3HulWmOjLdQmH/eHtbxtQdHd5of2A2veoQxnLX/8nw4T3ZYQFtI7fWmRBXuYoMp6uo+9njmo2\n381ox7IbxbY8RLvG8+T3haXfTgnL0rKosw5il1IXS6qOum1oOR9JvAh0KRNIaQdooOyoKQvFoNuA\nFHM2kv+VKA80JeBaRfW1kbMUNz1CPWN3S5yp0ZLfR8bOSjSwwqT2K4Vr4V6pDWtpTgm8Dk0Gn5BO\nXR2UT+lLi9xndHVxdJWDbUHvI81yTbVuF+lu6Yc6WvNBl/liDunRMfRd/MtwdtW6RHd99IWl306J\nlEGU65ddpFkh/0+oY9dtaFOZPv3LTfThholZymJ2meGrceVMjZa87Ywk/N11oNUtVj+aXN3q6+wi\nZUFNXXTr2J9YR2s+6LLo3qBwJ8JwdP9N6hPd9dEXln5nM41xAm28pH3L/ablf9dTvWkTU96Gtj0j\nTg1R9xnWM94yunYE7qH78OVt6EW0Db2NFLFNHt+9jnmK3FdWPX9HV29fRPIZdEbyxJCfQe6gT8gd\nEgbOarRwb0ED7TDVCX8FipbdQ/NLg0I74vTaoyJXt1J0L3eMPKQaSDmLrljeR9/DJoaDLevKNPF0\nUM+2OrYlGlwKyylyggWOoJt7INdq/BKkfejaeG5f7KRdv7vG2wwxTWcCKfThl23iJFoELF97ii/e\nKvMfo2tHYIYit8tj5H/9B1k5MwlysP2u1mCGZgv2G+0DIMjnKAJ5ypbTcopXhe5lOGLzy0C+QDf/\n8SjJ1a0U3csdI6/Q5BYW1I/oMP4Cxf19q4zV3xeNOvZNvACADobDIlD3FrxTyOef2xdL1X9niVhb\nxHFtQ3P9w334qHPdMGD7Xfuo5zjIPW/qi1zdGsfZRUqOfavMpPS3Nd7b5J/ppy+cCWNSlHMcWMqZ\norw/yyRvMS3t+FmYlP7u83DZmRImRTkdxxk949hZOY7jOI7jOI7jOI7jOI7jOI7jOI7jOI7jOI7j\njIjvTha9wDdGyGEAAAAASUVORK5CYII=\n", 478 | "text": [ 479 | "" 480 | ] 481 | } 482 | ], 483 | "prompt_number": 41 484 | }, 485 | { 486 | "cell_type": "heading", 487 | "level": 3, 488 | "metadata": {}, 489 | "source": [ 490 | "Your turn!" 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": {}, 496 | "source": [ 497 | "\n" 505 | ] 506 | } 507 | ], 508 | "metadata": {} 509 | } 510 | ] 511 | } 512 | -------------------------------------------------------------------------------- /REST_API/notebooks/tutorial_utils.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:edb7573496142fa1568b286b58fa094bf7bba760b23353c43d3063e253e852f4" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Tutorial Utilities" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "This notebook is a collection of useful functions used in other notebooks." 24 | ] 25 | }, 26 | { 27 | "cell_type": "heading", 28 | "level": 3, 29 | "metadata": {}, 30 | "source": [ 31 | "Which API URL?" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "You should generally use the production URL of the API." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "collapsed": false, 44 | "input": [ 45 | "PDBE_API_URL = \"http://www.ebi.ac.uk/pdbe/api\"" 46 | ], 47 | "language": "python", 48 | "metadata": {}, 49 | "outputs": [], 50 | "prompt_number": 1 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "The API also has a development instance meant for beta testing and pre-release hosting, available at :" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "collapsed": false, 62 | "input": [ 63 | "PDBE_API_URL = \"http://www.ebi.ac.uk/pdbe/api\"" 64 | ], 65 | "language": "python", 66 | "metadata": {}, 67 | "outputs": [], 68 | "prompt_number": 8 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "At the time of writing this notebook (Nov 2014), the API was not released on www. So let us use the wwwdev instance, otherwise just comment out the line above.\n", 75 | "\n", 76 | "Browse the API documentation interactively at PDBE_API_URL + \"/doc\", i.e. http://www.ebi.ac.uk/pdbe/api/doc" 77 | ] 78 | }, 79 | { 80 | "cell_type": "heading", 81 | "level": 3, 82 | "metadata": {}, 83 | "source": [ 84 | "Logger setup" 85 | ] 86 | }, 87 | { 88 | "cell_type": "raw", 89 | "metadata": {}, 90 | "source": [ 91 | "It is a good practice to setup a logger." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "collapsed": false, 97 | "input": [ 98 | "import logging, sys\n", 99 | "\n", 100 | "# configure the logger\n", 101 | "# btw, reload is just a hack to make logging work in the notebook, it's usually uncessary\n", 102 | "reload(logging)\n", 103 | "logging.basicConfig(\n", 104 | " level=logging.DEBUG, stream=sys.stdout,\n", 105 | " format='LOG|%(asctime)s|%(levelname)s %(message)s', datefmt='%d-%b-%Y %H:%M:%S'\n", 106 | ")" 107 | ], 108 | "language": "python", 109 | "metadata": {}, 110 | "outputs": [], 111 | "prompt_number": 9 112 | }, 113 | { 114 | "cell_type": "heading", 115 | "level": 3, 116 | "metadata": {}, 117 | "source": [ 118 | "Function to get API data" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "The API call you made can fail (very rarely) due to a variety of reasons beyond your control, e.g. network, overloading of our http servers, temporary failure of our databases, etc. Hence it is always more robust to try the call, say thrice, before raising an exception, or reporting a bug to the PDBe. Let us write a function for this which will return python object for equivalent to the json returned by the API." 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "collapsed": false, 131 | "input": [ 132 | "import json, urllib2\n", 133 | "\n", 134 | "def get_PDBe_API_data(api_url) :\n", 135 | " num_trials = 3\n", 136 | " for trial_number in range(num_trials) :\n", 137 | " try :\n", 138 | " return json.loads(urllib2.urlopen(api_url).read())\n", 139 | " except Exception, err :\n", 140 | " logging.warn(\"Error fetching PDBe-API data! Trial number %d for call %s\" % (trial_number, api_url))\n", 141 | " if trial_number == num_trials-1 :\n", 142 | " raise err" 143 | ], 144 | "language": "python", 145 | "metadata": {}, 146 | "outputs": [], 147 | "prompt_number": 10 148 | } 149 | ], 150 | "metadata": {} 151 | } 152 | ] 153 | } 154 | -------------------------------------------------------------------------------- /REST_API/presentations/20141120_PDBe_workshop.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PDBeurope/PDBe_Programming/a2a5be13e94eb44365e2db495a2812b26b3ebe54/REST_API/presentations/20141120_PDBe_workshop.pdf -------------------------------------------------------------------------------- /REST_API/snippets/basic_get_post.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import sys 5 | 6 | PY3 = sys.version > '3' 7 | 8 | if PY3: 9 | import urllib.request as urllib2 10 | else: 11 | import urllib2 12 | 13 | SERVER_URL = "https://www.ebi.ac.uk/pdbe/api" 14 | 15 | SUMMARY = "/pdb/entry/summary" 16 | 17 | def make_request(url, data): 18 | request = urllib2.Request(url) 19 | 20 | try: 21 | url_file = urllib2.urlopen(request, data) 22 | except urllib2.HTTPError as e: 23 | if e.code == 404: 24 | print("[NOTFOUND %d] %s" % (e.code, url)) 25 | else: 26 | print("[ERROR %d] %s" % (e.code, url)) 27 | 28 | return None 29 | 30 | return url_file.read().decode() 31 | 32 | def get_request(url, arg, pretty=False): 33 | full_url = "%s/%s/%s?pretty=%s" % (SERVER_URL, url, arg, str(pretty).lower()) 34 | 35 | return make_request(full_url, None) 36 | 37 | def post_request(url, data, pretty=False): 38 | full_url = "%s/%s/?pretty=%s" % (SERVER_URL, url, str(pretty).lower()) 39 | 40 | if isinstance(data, (list, tuple)): 41 | data = ",".join(data) 42 | 43 | return make_request(full_url, data.encode()) 44 | 45 | if __name__ == '__main__': 46 | parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) 47 | parser.add_argument('-e', type=str, default=None, action='store', help='the pdbid') 48 | parser.add_argument('-p', type=str, default=None, action='store', help='the comma-separated list of pdbids') 49 | args = parser.parse_args() 50 | 51 | if args.e: 52 | response = get_request(SUMMARY, args.e, True) 53 | elif args.p: 54 | response = post_request(SUMMARY, args.p, True) 55 | else: 56 | parser.print_help() 57 | sys.exit(1) 58 | 59 | if response: 60 | print(response) 61 | 62 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # All Vagrant configuration is done below. The "2" in Vagrant.configure 5 | # configures the configuration version (we support older styles for 6 | # backwards compatibility). Please don't change it unless you know what 7 | # you're doing. 8 | Vagrant.configure(2) do |config| 9 | # The most common configuration options are documented and commented below. 10 | # For a complete reference, please see the online documentation at 11 | # https://docs.vagrantup.com. 12 | 13 | # Without this, vagrant ssh logs you in as "vagrant". 14 | # This is the user that runs jupyter, so that seems good. 15 | #config.ssh.username='root' 16 | #config.ssh.password='vagrant' 17 | #config.ssh.insert_key='true' 18 | 19 | # Every Vagrant development environment requires a box. You can search for 20 | # boxes at https://atlas.hashicorp.com/search. 21 | config.vm.box = "bento/centos-7.4" 22 | config.vm.box_download_insecure=true 23 | # often use config.vm.box = "centos/7" 24 | 25 | # Disable automatic box update checking. 26 | # These configurations are fragile, 27 | # Let's not invite the framework to break it. 28 | config.vm.box_check_update = false 29 | 30 | # Create a forwarded port mapping which allows access to a specific port 31 | # within the machine from a port on the host machine. In the example below, 32 | # accessing "localhost:8888" will access port 8888 on the guest machine. 33 | # This is the port for jupyter 34 | config.vm.network "forwarded_port", guest: 8888, host: 8888 35 | 36 | # or create a private network, which allows host-only access to the machine 37 | # using a specific IP. 38 | # config.vm.network "private_network", ip: "10.0.0.2" 39 | 40 | # Do not do this, since the jupyter installation is unsecure: 41 | #config.vm.network "public_network" 42 | 43 | if Vagrant.has_plugin?("vagrant-vbguest") 44 | puts "Use of vbguest with this VM is not recommended." 45 | # vbguest succeeds only after kernel is updated 46 | config.vm.provision "shell", inline: "yum -y update kernel" 47 | config.vm.provision :reload 48 | end 49 | 50 | # pass any proxy details to guest 51 | if Vagrant.has_plugin?("vagrant-proxyconf") 52 | config.proxy.http = ENV['http_proxy'] || "" 53 | config.proxy.https = ENV['https_proxy'] || "" 54 | config.proxy.no_proxy = "localhost,127.0.0.1" 55 | end 56 | 57 | 58 | # Share an additional folder to the guest VM. The first argument is 59 | # the path on the host to the actual folder. The second argument is 60 | # the path on the guest to mount the folder. And the optional third 61 | # argument is a set of non-required options. 62 | config.vm.synced_folder ".", "/home/vagrant", type: "virtualbox" 63 | 64 | # Provider-specific configuration so you can fine-tune various 65 | # backing providers for Vagrant. These expose provider-specific options. 66 | # Example for VirtualBox: 67 | # 68 | config.vm.provider "virtualbox" do |vb| 69 | # # Display the VirtualBox GUI when booting the machine 70 | # vb.gui = true 71 | # 72 | # Customize the amount of memory on the VM: 73 | vb.memory = "8192" #"4096" 74 | vb.linked_clone = true 75 | end 76 | # 77 | # View the documentation for the provider you are using for more 78 | # information on available options. 79 | 80 | # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies 81 | # such as FTP and Heroku are also available. See the documentation at 82 | # https://docs.vagrantup.com/v2/push/atlas.html for more information. 83 | # config.push.define "atlas" do |push| 84 | # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME" 85 | # end 86 | 87 | 88 | # Enable provisioning with a shell script. Additional provisioners such as 89 | # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the 90 | # documentation for more information about their specific syntax and use. 91 | # config.vm.provision "shell", inline: <<-SHELL 92 | # sudo apt-get update 93 | # sudo apt-get install -y apache2 94 | # SHELL 95 | config.vm.provision :shell, path: "bootstrap.sh" 96 | 97 | # start jupyter 98 | config.trigger.after :up, :stderr => true do 99 | #if Vagrant.has_plugin?("vagrant-vbguest") 100 | #run "vagrant vbguest --auto-reboot --no-provision" 101 | #run "mount -t vboxsf -o uid=1000,gid=1000 vagrant /vagrant" 102 | #end 103 | run_remote "systemctl start jupyter" 104 | end 105 | 106 | 107 | end 108 | -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | # Run from Vagrantfile the first time a VM is provisioned. 2 | # To run again, you must do vagrant destroy 3 | username=${1:-vagrant} 4 | 5 | echo `pwd` 6 | 7 | # faster installations of updates 8 | yum install -y deltarpm 9 | 10 | # operating system dependencies for virtualbox additions 11 | # Note do not yum update -y 12 | yum install -y kernel-headers gcc 13 | # note that dkms, often recommended for virtualbox, is unsstable in Centos 14 | 15 | # could mount iso and 16 | # cd /media/VirtualBoxGuestAdditions 17 | # ./VBoxLinuxAdditions.run 18 | 19 | # requirements for rdkit.Chem 20 | yum install -y libXrender libXext 21 | 22 | # requirement for pydot 23 | yum install -y graphviz 24 | 25 | # requirement for Miniconda 26 | yum install -y bzip2 27 | 28 | # requirement for Theano 29 | #yum install -y gcc-c++ epel-release 30 | #yum install -y openblas 31 | 32 | #Install miniconda 33 | for i in 1 2 3 4 5; do 34 | wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && break 35 | sleep 15 36 | echo 'Retrying bash download' 37 | done 38 | 39 | bash Miniconda3-latest-Linux-x86_64.sh -b 40 | 41 | #bash Miniconda3-latest-Linux-x86_64.sh < /etc/jupyter/jupyter_notebook_config.py < /usr/lib/systemd/system/jupyter.service <> ../.bashrc 107 | echo 'source activate root' >> ../.bashrc 108 | # jupyter will use root environment when started by systemd 109 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: root 2 | channels: 3 | - rdkit 4 | - defaults 5 | dependencies: 6 | # - h5py 7 | # - keras 8 | # - mkl 9 | #- m2w64-toolchain 10 | - matplotlib 11 | - numpy 12 | - pandas 13 | - pillow 14 | - pydot 15 | - python=3.6.3 16 | - scikit-learn 17 | - scipy 18 | - seaborn 19 | #- tensorflow 20 | #- tensorflow-base 21 | #- tensorflow-tensorboard 22 | #- rdkit 23 | - jupyter 24 | - cython 25 | - pip 26 | - pip: 27 | - mysolr 28 | #- molvs 29 | -------------------------------------------------------------------------------- /images/README.txt: -------------------------------------------------------------------------------- 1 | For images saved by notebooks 2 | -------------------------------------------------------------------------------- /search_interface/notebooks/search_facets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Search with facetting and grouping" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Introduction" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "In [search_introduction](search_introduction.ipynb), we saw how basic selectors can be progressively added to a Solr query to find entries of interest.
\n", 22 | "Now we will see how facetting, grouping and pivoting can be used to find interesting facts about your favorite protein." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Getting started" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "Let us setup logger and create mysolr instance for the Solr core." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "from mysolr import Solr\n", 46 | "PDBE_SOLR_URL = \"http://wwwdev.ebi.ac.uk/pdbe/search/pdb\"\n", 47 | "solr = Solr(PDBE_SOLR_URL, version=4)\n", 48 | "\n", 49 | "UNLIMITED_ROWS = 10000000 # necessary because default in mysolr is mere 10\n", 50 | "\n", 51 | "import logging, sys\n", 52 | "#reload(logging) # reload is just a hack to make logging work in the notebook, it's usually unnecessary\n", 53 | "logging.basicConfig( level=logging.INFO, stream=sys.stdout,\n", 54 | " format='LOG|%(asctime)s|%(levelname)s %(message)s', datefmt='%d-%b-%Y %H:%M:%S' )\n", 55 | "logging.getLogger(\"requests\").setLevel(logging.WARNING)\n", 56 | "\n", 57 | "def join_with_AND(selectors) :\n", 58 | " return \" AND \".join(\n", 59 | " [\"%s:%s\" % (k,v) for k,v in selectors]\n", 60 | " )" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Find your protein" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Identifying previous instances of your protein in the PDB is not an easy task because molecule names given by depositors can differ slightly.\n", 75 | "The SIFTS project assigns UniProt cross-references to proteins in PDB entries and names them consistently.\n", 76 | "The following function searches and facets on UniProt name to find proteins of our interest.\n", 77 | "Note how we are using facet options to identify all distinct values of molecule_name." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "def molecule_name_facet_search(selectors) :\n", 87 | " response = solr.search(**{\n", 88 | " \"rows\" : UNLIMITED_ROWS, \"fl\" : \"pdb_id, entity_id\", \"q\" : join_with_AND(selectors),\n", 89 | " \"facet\" : \"true\", \"facet.limit\" : UNLIMITED_ROWS, \"facet.mincount\" : 1,\n", 90 | " \"facet.field\" : \"molecule_name\",\n", 91 | " })\n", 92 | " num_mols = len(response.documents)\n", 93 | " mol_name_counts = response.facets['facet_fields']['molecule_name']\n", 94 | " logging.info(\"%d molecules found with %d distinct molecule_names.\" % (num_mols, len(mol_name_counts.keys())))\n", 95 | " for mol_name, nmol in mol_name_counts.items() :\n", 96 | " logging.info(\"%3d molecules are named as %s\" % (nmol, mol_name))" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Let us assume we are interested in carbonic anhydrases. We write the protein name as a regular expression allowing for case changes on start of word." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 6, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "LOG|11-Jul-2018 09:53:09|INFO 0 molecules found with 0 distinct molecule_names.\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "molecule_name_facet_search([\n", 121 | " ( 'molecule_name' , '/.*[Cc]arbonic.*[aA]nhydrase.*/'),\n", 122 | "])" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "Note that there are some unintended hits - one putative and another inhibitor. Let us filter those out." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 7, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "LOG|11-Jul-2018 09:53:13|INFO 0 molecules found with 0 distinct molecule_names.\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "selectors = [\n", 147 | " ( 'molecule_name' , '/.*[Cc]arbonic.*[aA]nhydrase.*/'),\n", 148 | " ('NOT molecule_name' , '(/.*Putative.*/ OR /.*Inhibitor.*/)'),\n", 149 | "]\n", 150 | "molecule_name_facet_search(selectors)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "We can also sharpen our search considerably by using annotations like GO, SCOP etc. But the filters should strike a balance in removing spurious hits and keeping genuine ones.\n", 158 | "Often optimal filters are found through multiple trials." 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "## Count entries by experiment type" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "Now let us see a summary of experiment types that have been used to solve carbonic anhydrases.\n", 173 | "Since experiment is a property entry, and not molecules within it, we need to group on pdb_id and facet in a group-sensitive way so that the counts we get are for entries." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 8, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "name": "stdout", 183 | "output_type": "stream", 184 | "text": [ 185 | "LOG|11-Jul-2018 09:53:16|INFO There are 0 experimental methods with this protein's structure has been studied.\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "response = solr.search(**{\n", 191 | " \"rows\" : UNLIMITED_ROWS, \"fl\" : \"pdb_id, entity_id\",\n", 192 | " \"q\" : join_with_AND(selectors),\n", 193 | " \"facet\" : \"true\", \"facet.limit\" : UNLIMITED_ROWS, \"facet.mincount\" : 1,\n", 194 | " \"facet.field\" : \"experimental_method\",\n", 195 | " \"group\" : \"true\", \"group.facet\" : \"true\",\n", 196 | " \"group.field\" : \"pdb_id\",\n", 197 | "})\n", 198 | "\n", 199 | "expt_counts = response.facets['facet_fields']['experimental_method']\n", 200 | "logging.info(\"There are %d experimental methods with this protein's structure has been studied.\" % len(expt_counts))\n", 201 | "for expt, count in expt_counts.items() :\n", 202 | " logging.info(\"%s : %d\" % (expt,count))" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "## Count entries by year of deposition" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "Let us now facet on year of deposition and see the years in which an entry was deposited for carbonic anhydrases." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 9, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "LOG|11-Jul-2018 09:53:19|INFO There are 0 years in which this protein's structure has been studied.\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "response = solr.search(**{\n", 234 | " \"rows\" : UNLIMITED_ROWS, \"fl\" : \"pdb_id, entity_id\",\n", 235 | " \"q\" : join_with_AND(selectors),\n", 236 | " \"facet\" : \"true\", \"facet.limit\" : UNLIMITED_ROWS, \"facet.mincount\" : 1,\n", 237 | " \"facet.field\" : \"deposition_year\",\n", 238 | " \"group\" : \"true\", \"group.facet\" : \"true\",\n", 239 | " \"group.field\" : \"pdb_id\",\n", 240 | "})\n", 241 | "year_counts = response.facets['facet_fields']['deposition_year']\n", 242 | "logging.info(\"There are %d years in which this protein's structure has been studied.\" % len(year_counts))\n", 243 | "for year in sorted(year_counts.keys(), key=lambda x : int(x)) :\n", 244 | " logging.info(\"%s : %d\" % (year,year_counts[year]))" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "Note that we do not have to facet on one field at a time - we could have facetted on multiple fields individually in the same call - just provide comma-separated fields list." 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "## Range-based facets" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "Facets can be defined to be range based, e.g. this is useful for fields like resolution, year, length of crystallographic cell, etc." 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 10, 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "name": "stdout", 275 | "output_type": "stream", 276 | "text": [ 277 | "LOG|11-Jul-2018 09:53:22|INFO Resolutions at which this protein has been solved is as follows:\n" 278 | ] 279 | } 280 | ], 281 | "source": [ 282 | "response = solr.search(**{\n", 283 | " \"rows\" : UNLIMITED_ROWS, \"fl\" : \"pdb_id, entity_id\",\n", 284 | " \"q\" : join_with_AND(selectors),\n", 285 | " \"facet\" : \"true\", \"facet.limit\" : UNLIMITED_ROWS, \"facet.mincount\" : 1,\n", 286 | " \"facet.field\" : \"resolution\",\n", 287 | " \"facet.range\" : \"resolution\",\n", 288 | " \"f.resolution.facet.range.start\" : \"0.0\",\n", 289 | " \"f.resolution.facet.range.end\" : \"100\",\n", 290 | " \"f.resolution.facet.range.gap\" : \"0.5\",\n", 291 | " \"f.resolution.facet.range.other\" : \"between\",\n", 292 | " \"f.resolution.facet.range.include\" : \"upper\",\n", 293 | " \"group\" : \"true\", \"group.facet\" : \"true\",\n", 294 | " \"group.field\" : \"pdb_id\",\n", 295 | "})\n", 296 | "\n", 297 | "import string, collections\n", 298 | "\n", 299 | "resol_counts = response.facets['facet_ranges']['resolution']['counts']\n", 300 | "resol_counts = collections.OrderedDict([(resol_counts[rci], resol_counts[rci+1]) for rci in range(0, len(resol_counts), 2)])\n", 301 | "logging.info(\"Resolutions at which this protein has been solved is as follows:\")\n", 302 | "for resol in sorted(resol_counts.keys(), key=lambda x : string.atof(x)) :\n", 303 | " logging.info(\"%3d entries in resolution bin starting %s\" % (resol_counts[resol], resol))" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "## Hierarchical facetting" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "Factes can be used hierarchically too, e.g. facet first on resolution, then on year, etc.\n", 318 | "Unfortunately mysolr does not support this feature, but the good news is that you can write simple python on documents returned and achieve the same effect.\n", 319 | "e.g. let us see how to find distribution of resolution vs deposition year in this set of entries." 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 12, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "LOG|11-Jul-2018 11:59:04|INFO \n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "response = solr.search(**{\n", 337 | " \"rows\" : UNLIMITED_ROWS,\n", 338 | " \"fl\" : \"pdb_id, entity_id, deposition_year, resolution\",\n", 339 | " \"q\" : join_with_AND(selectors),\n", 340 | "})\n", 341 | "\n", 342 | "resbin_width = 0.5\n", 343 | "def resol_bin(resol) :\n", 344 | " import decimal\n", 345 | " return decimal.Decimal(int(resol/resbin_width) * resbin_width)\n", 346 | "\n", 347 | "yearbin_width = 5\n", 348 | "def depyear_bin(year) :\n", 349 | " return (year / yearbin_width) * yearbin_width\n", 350 | "\n", 351 | "entry_counted = set()\n", 352 | "counts = collections.defaultdict( lambda : collections.defaultdict( lambda: 0 ) )\n", 353 | "for adoc in response.documents :\n", 354 | " if adoc['pdb_id'] not in entry_counted :\n", 355 | " res_bin = resol_bin(adoc['resolution'])\n", 356 | " year_bin = depyear_bin(adoc['deposition_year'])\n", 357 | " counts[year_bin][res_bin] += 1\n", 358 | "\n", 359 | "import itertools\n", 360 | "year_bins = sorted(counts.keys())\n", 361 | "resol_bins = sorted(set( itertools.chain(*[v.keys() for v in counts.values()]) ))\n", 362 | "\n", 363 | "logging.info(\" \" + \" \".join(\"%.1f-%.1f\" % (rb,float(rb)+resbin_width) for rb in resol_bins))\n", 364 | "for year in year_bins :\n", 365 | " to_print = [\"%d-%d\" % (year,year+yearbin_width)]\n", 366 | " total = 0\n", 367 | " for resol in resol_bins :\n", 368 | " total += counts.get(year, {}).get(resol, 0)\n", 369 | " for resol in resol_bins :\n", 370 | " count = counts.get(year, {}).get(resol, 0)\n", 371 | " to_print.append(count) #, #int(count*100./total),\n", 372 | " logging.info(to_print[0] + \" \".join([\"%5d\" % tp for tp in to_print[1:]]))" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "Note how higher resolution structures have increased over the years." 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "## Your turn!" 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "metadata": {}, 392 | "source": [ 393 | "Find entries with protein of your interest, and facet by organism, genus, etc." 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [] 402 | } 403 | ], 404 | "metadata": { 405 | "kernelspec": { 406 | "display_name": "Python 3", 407 | "language": "python", 408 | "name": "python3" 409 | }, 410 | "language_info": { 411 | "codemirror_mode": { 412 | "name": "ipython", 413 | "version": 3 414 | }, 415 | "file_extension": ".py", 416 | "mimetype": "text/x-python", 417 | "name": "python", 418 | "nbconvert_exporter": "python", 419 | "pygments_lexer": "ipython3", 420 | "version": "3.6.3" 421 | } 422 | }, 423 | "nbformat": 4, 424 | "nbformat_minor": 1 425 | } 426 | -------------------------------------------------------------------------------- /search_interface/notebooks/search_introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The new search service under development at PDBe is powered by Apache Solr.\n", 15 | "\n", 16 | "A pre-release version of user interface is available here: http://wwwdev.ebi.ac.uk/pdbe/entry/search/index\n", 17 | "\n", 18 | "For programmatic usage, a Solr instance is available here: http://wwwdev.ebi.ac.uk/pdbe/search/pdb\n", 19 | "\n", 20 | "Please note that the search service will be released in 2015 - at that point, it would be better to use URLs similar to those above, but hosted from www instead of wwwdev." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Getting started" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "To avoid writing long Solr URLs by hand and having to encode them etc., we will use a Solr client library called mysolr. It is pretty lightweight and easy to install, e.g. I installed it on my Redhat (Enterprise 6.6) machine as follows:\n", 35 | "\n", 36 | "easy_install mysolr==0.7\n", 37 | "\n", 38 | "There are many such client libraries available for python as well as other languages." 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "Let us now make a simple query - let us look for a PDB entry." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 1, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Number of results: 0\n" 60 | ] 61 | }, 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "{'response': {'docs': [], 'numFound': 237765, 'start': 0},\n", 66 | " 'responseHeader': {'QTime': 3,\n", 67 | " 'params': {'q': 'status:REL', 'rows': '0', 'wt': 'json'},\n", 68 | " 'status': 0}}" 69 | ] 70 | }, 71 | "execution_count": 1, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "PDBE_SOLR_URL = \"http://www.ebi.ac.uk/pdbe/search/pdb\" \n", 78 | "# or https://www.ebi.ac.uk/pdbe/search/pdb/select?rows=0&q=status:REL&wt=json\n", 79 | "\n", 80 | "from mysolr import Solr\n", 81 | "solr = Solr(PDBE_SOLR_URL, version=4)\n", 82 | "\n", 83 | "response = solr.search(q='status:REL', rows=0)\n", 84 | "\n", 85 | "documents = response.documents\n", 86 | "print(\"Number of results:\", len(documents))\n", 87 | "\n", 88 | "#fields = response.documents[0].keys()\n", 89 | "#print(\"Number of fields in the documents:\", [len(rd.keys()) for rd in documents])\n", 90 | "\n", 91 | "response.raw_content" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "There are 3 documents in Solr response for a single PDB id, and each has >75 fields. At this juncture, it is essential to understand what the document represents and contains before proceeding further." 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# Entity document" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "PDBe Solr instance serves documents based on polymeric entities in PDB entries, i.e. each document indexed by Solr represents polymeric molecules of type protein, sugar, DNA, RNA or DNA/RNA hybrid. This is why for entry 2qk9 we get 3 documents in the response, each representing the protein, RNA and DNA molecule in that entry.\n", 113 | "\n", 114 | "Fields in PDBe's entity-based Solr document cover a wide range of properties, such as entry's experimental details, details of deposition and primary publication, entity's taxonomy, entry's quality, entity's cross references to UniProt and popular domain databases, biological assembly, etc. They are documented here: http://wwwdev.ebi.ac.uk/pdbe/api/doc/search.html" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "# Solr features" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "It is also useful now to understand a little more about Solr querying. Solr has a rich and complex query syntax, described at http://wiki.apache.org/solr/CommonQueryParameters and elsewhere.\n", 129 | "\n", 130 | "The fields of immediate relevance to us in this tutorial are:\n", 131 | " * q - the query itself. There is a lot of flexibility in describing a query, e.g. fields, wildcards, case-insensitivity, logical operators, ranges, etc.\n", 132 | " * rows - number of results returned by Solr. Needs to be explicitly set in mysolr because it defaults to 10. Useful if only part of results are desired. \n", 133 | " * fl - fields returned in each document. This is useful to reduce the size of response.\n", 134 | "\n", 135 | "Solr capabilities combined with the wide-ranging description in entity document can help us write really powerful Solr queries to find precisely the entries or polymers of interest.\n", 136 | "\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "# Examples" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "Now let us write a query to find entities containing a Pfam domain called \"Lipocalin\" in X-ray entries of decent resolution (1Å - 2Å)." 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 2, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "Found 292 matching entities in 292 entries.\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "def join_with_AND(query_params) :\n", 170 | " '''convenience function to create query string with AND'''\n", 171 | " return \" AND \".join([\"%s:%s\" % (k,v) for k,v in query_params.items()])\n", 172 | "\n", 173 | "def execute_solr_query(query, query_fields) :\n", 174 | " '''convenience function'''\n", 175 | " query[\"q\"] = join_with_AND(query_fields) # add q\n", 176 | " response = solr.search(**query)\n", 177 | " documents = response.documents\n", 178 | " print(\"Found %d matching entities in %d entries.\" % (len(documents), len({rd[\"pdb_id\"] for rd in documents})))\n", 179 | " return documents\n", 180 | "\n", 181 | "query_detail = { \n", 182 | " \"pfam_name\" : \"Lipocalin\",\n", 183 | " \"resolution\" : \"[1 TO 2]\",\n", 184 | "}\n", 185 | "query = { \n", 186 | " \"rows\" : pow(10,8), # i.e. all matching documents are required in response\n", 187 | " \"fl\" : \"pdb_id, entity_id\", # restrict the returned documents to these fields only\n", 188 | "}\n", 189 | "\n", 190 | "docs = execute_solr_query(query, query_detail)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "Let us narrow down to proteins of human origin." 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 3, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "Found 171 matching entities in 171 entries.\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "query_detail = { \n", 217 | " \"pfam_name\" : \"Lipocalin\",\n", 218 | " \"resolution\" : \"[1 TO 2]\",\n", 219 | " \"tax_id\" : \"9606\",\n", 220 | "}\n", 221 | "query = { \n", 222 | " \"rows\" : pow(10,8), # i.e. all matching documents are required in response\n", 223 | " \"fl\" : \"pdb_id, entity_id\", # restrict the returned documents to these fields only\n", 224 | "}\n", 225 | "\n", 226 | "docs = execute_solr_query(query, query_detail)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "Let us look for entries deposited by Kleywegt." 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 4, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [ 243 | { 244 | "name": "stdout", 245 | "output_type": "stream", 246 | "text": [ 247 | "Found 2 matching entities in 2 entries.\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "query_detail = { \n", 253 | " \"pfam_name\" : \"Lipocalin\",\n", 254 | " \"resolution\" : \"[1 TO 2]\",\n", 255 | " \"tax_id\" : \"9606\",\n", 256 | " \"entry_authors\" : \"*Kleywegt*\",\n", 257 | "}\n", 258 | "query = { \n", 259 | " \"rows\" : pow(10,8), # i.e. all matching documents are required in response\n", 260 | " \"fl\" : \"pdb_id, entity_id\", # restrict the returned documents to these fields only\n", 261 | "}\n", 262 | "\n", 263 | "docs = execute_solr_query(query, query_detail)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "# Your turn!" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "Can you now query PDBe Solr instance to find entries that match the following criteria?\n", 278 | "* entries published in Nature and containing transmembrane protein.\n", 279 | "* number of SCOP domain families in entries that have homo-tetramer as the most likely assembly." 280 | ] 281 | } 282 | ], 283 | "metadata": { 284 | "kernelspec": { 285 | "display_name": "Python 3", 286 | "language": "python", 287 | "name": "python3" 288 | }, 289 | "language_info": { 290 | "codemirror_mode": { 291 | "name": "ipython", 292 | "version": 3 293 | }, 294 | "file_extension": ".py", 295 | "mimetype": "text/x-python", 296 | "name": "python", 297 | "nbconvert_exporter": "python", 298 | "pygments_lexer": "ipython3", 299 | "version": "3.5.3" 300 | } 301 | }, 302 | "nbformat": 4, 303 | "nbformat_minor": 1 304 | } 305 | --------------------------------------------------------------------------------