├── .gitignore ├── LICENSE ├── chembl ├── chembl.py ├── chembl_proc.py └── split_chembl.py ├── combind ├── dock ├── dock.py ├── grid.py ├── ligprep.py ├── struct_align.py ├── struct_process.py └── struct_sort.py ├── features ├── features.py ├── ifp.py ├── ifp_similarity.py ├── mcss.py ├── mcss16.typ ├── shape.py └── test │ ├── 3ZPR_lig-to-2VT4_ifp.csv │ ├── 3ZPR_lig-to-2VT4_ifp_raw.csv │ ├── 3ZPR_lig-to-2VT4_pv.maegz │ ├── 6IBL-to-2VT4_pv.maegz │ ├── __pycache__ │ └── ifp_test.cpython-36-pytest-5.2.4.pyc │ └── ifp_test.py ├── pymol ├── .DS_Store ├── interactions.py ├── view_complexes.py └── view_poses.py ├── readme.md ├── score ├── density_estimate.py ├── pose_prediction.py ├── screen.py ├── statistics.py └── tests │ ├── __pycache__ │ ├── test_density_estimate.cpython-36-pytest-5.2.4.pyc │ ├── test_lig_pair.cpython-36-pytest-5.2.4.pyc │ ├── test_pose_pair.cpython-36-pytest-5.2.4.pyc │ └── test_prob_opt.cpython-36-pytest-5.2.4.pyc │ ├── test_density_estimate.py │ ├── test_lig_pair.py │ ├── test_pose_pair.py │ └── test_prob_opt.py ├── setup.sh ├── stats_data ├── default │ ├── ._stats.pdf │ ├── native_contact.txt │ ├── native_hbond.txt │ ├── native_mcss.txt │ ├── native_saltbridge.txt │ ├── native_shape.txt │ ├── reference_contact.txt │ ├── reference_hbond.txt │ ├── reference_mcss.txt │ ├── reference_saltbridge.txt │ ├── reference_shape.txt │ └── stats.pdf ├── helper_best_affinity_diverse.csv ├── helper_best_mcss.csv ├── mcss_sizes.pkl ├── pdbs.txt ├── pdbs_for_benchmark.csv ├── structures.tar.gz └── systems.txt └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | Attribution-NonCommercial-ShareAlike 3.0 Unported 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR 10 | DAMAGES RESULTING FROM ITS USE. 11 | 12 | License 13 | 14 | THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE 15 | COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY 16 | COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS 17 | AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED. 18 | 19 | BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE 20 | TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY 21 | BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS 22 | CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND 23 | CONDITIONS. 24 | 25 | 1. Definitions 26 | 27 | a. "Adaptation" means a work based upon the Work, or upon the Work and 28 | other pre-existing works, such as a translation, adaptation, 29 | derivative work, arrangement of music or other alterations of a 30 | literary or artistic work, or phonogram or performance and includes 31 | cinematographic adaptations or any other form in which the Work may be 32 | recast, transformed, or adapted including in any form recognizably 33 | derived from the original, except that a work that constitutes a 34 | Collection will not be considered an Adaptation for the purpose of 35 | this License. For the avoidance of doubt, where the Work is a musical 36 | work, performance or phonogram, the synchronization of the Work in 37 | timed-relation with a moving image ("synching") will be considered an 38 | Adaptation for the purpose of this License. 39 | b. "Collection" means a collection of literary or artistic works, such as 40 | encyclopedias and anthologies, or performances, phonograms or 41 | broadcasts, or other works or subject matter other than works listed 42 | in Section 1(g) below, which, by reason of the selection and 43 | arrangement of their contents, constitute intellectual creations, in 44 | which the Work is included in its entirety in unmodified form along 45 | with one or more other contributions, each constituting separate and 46 | independent works in themselves, which together are assembled into a 47 | collective whole. A work that constitutes a Collection will not be 48 | considered an Adaptation (as defined above) for the purposes of this 49 | License. 50 | c. "Distribute" means to make available to the public the original and 51 | copies of the Work or Adaptation, as appropriate, through sale or 52 | other transfer of ownership. 53 | d. "License Elements" means the following high-level license attributes 54 | as selected by Licensor and indicated in the title of this License: 55 | Attribution, Noncommercial, ShareAlike. 56 | e. "Licensor" means the individual, individuals, entity or entities that 57 | offer(s) the Work under the terms of this License. 58 | f. "Original Author" means, in the case of a literary or artistic work, 59 | the individual, individuals, entity or entities who created the Work 60 | or if no individual or entity can be identified, the publisher; and in 61 | addition (i) in the case of a performance the actors, singers, 62 | musicians, dancers, and other persons who act, sing, deliver, declaim, 63 | play in, interpret or otherwise perform literary or artistic works or 64 | expressions of folklore; (ii) in the case of a phonogram the producer 65 | being the person or legal entity who first fixes the sounds of a 66 | performance or other sounds; and, (iii) in the case of broadcasts, the 67 | organization that transmits the broadcast. 68 | g. "Work" means the literary and/or artistic work offered under the terms 69 | of this License including without limitation any production in the 70 | literary, scientific and artistic domain, whatever may be the mode or 71 | form of its expression including digital form, such as a book, 72 | pamphlet and other writing; a lecture, address, sermon or other work 73 | of the same nature; a dramatic or dramatico-musical work; a 74 | choreographic work or entertainment in dumb show; a musical 75 | composition with or without words; a cinematographic work to which are 76 | assimilated works expressed by a process analogous to cinematography; 77 | a work of drawing, painting, architecture, sculpture, engraving or 78 | lithography; a photographic work to which are assimilated works 79 | expressed by a process analogous to photography; a work of applied 80 | art; an illustration, map, plan, sketch or three-dimensional work 81 | relative to geography, topography, architecture or science; a 82 | performance; a broadcast; a phonogram; a compilation of data to the 83 | extent it is protected as a copyrightable work; or a work performed by 84 | a variety or circus performer to the extent it is not otherwise 85 | considered a literary or artistic work. 86 | h. "You" means an individual or entity exercising rights under this 87 | License who has not previously violated the terms of this License with 88 | respect to the Work, or who has received express permission from the 89 | Licensor to exercise rights under this License despite a previous 90 | violation. 91 | i. "Publicly Perform" means to perform public recitations of the Work and 92 | to communicate to the public those public recitations, by any means or 93 | process, including by wire or wireless means or public digital 94 | performances; to make available to the public Works in such a way that 95 | members of the public may access these Works from a place and at a 96 | place individually chosen by them; to perform the Work to the public 97 | by any means or process and the communication to the public of the 98 | performances of the Work, including by public digital performance; to 99 | broadcast and rebroadcast the Work by any means including signs, 100 | sounds or images. 101 | j. "Reproduce" means to make copies of the Work by any means including 102 | without limitation by sound or visual recordings and the right of 103 | fixation and reproducing fixations of the Work, including storage of a 104 | protected performance or phonogram in digital form or other electronic 105 | medium. 106 | 107 | 2. Fair Dealing Rights. Nothing in this License is intended to reduce, 108 | limit, or restrict any uses free from copyright or rights arising from 109 | limitations or exceptions that are provided for in connection with the 110 | copyright protection under copyright law or other applicable laws. 111 | 112 | 3. License Grant. Subject to the terms and conditions of this License, 113 | Licensor hereby grants You a worldwide, royalty-free, non-exclusive, 114 | perpetual (for the duration of the applicable copyright) license to 115 | exercise the rights in the Work as stated below: 116 | 117 | a. to Reproduce the Work, to incorporate the Work into one or more 118 | Collections, and to Reproduce the Work as incorporated in the 119 | Collections; 120 | b. to create and Reproduce Adaptations provided that any such Adaptation, 121 | including any translation in any medium, takes reasonable steps to 122 | clearly label, demarcate or otherwise identify that changes were made 123 | to the original Work. For example, a translation could be marked "The 124 | original work was translated from English to Spanish," or a 125 | modification could indicate "The original work has been modified."; 126 | c. to Distribute and Publicly Perform the Work including as incorporated 127 | in Collections; and, 128 | d. to Distribute and Publicly Perform Adaptations. 129 | 130 | The above rights may be exercised in all media and formats whether now 131 | known or hereafter devised. The above rights include the right to make 132 | such modifications as are technically necessary to exercise the rights in 133 | other media and formats. Subject to Section 8(f), all rights not expressly 134 | granted by Licensor are hereby reserved, including but not limited to the 135 | rights described in Section 4(e). 136 | 137 | 4. Restrictions. The license granted in Section 3 above is expressly made 138 | subject to and limited by the following restrictions: 139 | 140 | a. You may Distribute or Publicly Perform the Work only under the terms 141 | of this License. You must include a copy of, or the Uniform Resource 142 | Identifier (URI) for, this License with every copy of the Work You 143 | Distribute or Publicly Perform. You may not offer or impose any terms 144 | on the Work that restrict the terms of this License or the ability of 145 | the recipient of the Work to exercise the rights granted to that 146 | recipient under the terms of the License. You may not sublicense the 147 | Work. You must keep intact all notices that refer to this License and 148 | to the disclaimer of warranties with every copy of the Work You 149 | Distribute or Publicly Perform. When You Distribute or Publicly 150 | Perform the Work, You may not impose any effective technological 151 | measures on the Work that restrict the ability of a recipient of the 152 | Work from You to exercise the rights granted to that recipient under 153 | the terms of the License. This Section 4(a) applies to the Work as 154 | incorporated in a Collection, but this does not require the Collection 155 | apart from the Work itself to be made subject to the terms of this 156 | License. If You create a Collection, upon notice from any Licensor You 157 | must, to the extent practicable, remove from the Collection any credit 158 | as required by Section 4(d), as requested. If You create an 159 | Adaptation, upon notice from any Licensor You must, to the extent 160 | practicable, remove from the Adaptation any credit as required by 161 | Section 4(d), as requested. 162 | b. You may Distribute or Publicly Perform an Adaptation only under: (i) 163 | the terms of this License; (ii) a later version of this License with 164 | the same License Elements as this License; (iii) a Creative Commons 165 | jurisdiction license (either this or a later license version) that 166 | contains the same License Elements as this License (e.g., 167 | Attribution-NonCommercial-ShareAlike 3.0 US) ("Applicable License"). 168 | You must include a copy of, or the URI, for Applicable License with 169 | every copy of each Adaptation You Distribute or Publicly Perform. You 170 | may not offer or impose any terms on the Adaptation that restrict the 171 | terms of the Applicable License or the ability of the recipient of the 172 | Adaptation to exercise the rights granted to that recipient under the 173 | terms of the Applicable License. You must keep intact all notices that 174 | refer to the Applicable License and to the disclaimer of warranties 175 | with every copy of the Work as included in the Adaptation You 176 | Distribute or Publicly Perform. When You Distribute or Publicly 177 | Perform the Adaptation, You may not impose any effective technological 178 | measures on the Adaptation that restrict the ability of a recipient of 179 | the Adaptation from You to exercise the rights granted to that 180 | recipient under the terms of the Applicable License. This Section 4(b) 181 | applies to the Adaptation as incorporated in a Collection, but this 182 | does not require the Collection apart from the Adaptation itself to be 183 | made subject to the terms of the Applicable License. 184 | c. You may not exercise any of the rights granted to You in Section 3 185 | above in any manner that is primarily intended for or directed toward 186 | commercial advantage or private monetary compensation. The exchange of 187 | the Work for other copyrighted works by means of digital file-sharing 188 | or otherwise shall not be considered to be intended for or directed 189 | toward commercial advantage or private monetary compensation, provided 190 | there is no payment of any monetary compensation in con-nection with 191 | the exchange of copyrighted works. 192 | d. If You Distribute, or Publicly Perform the Work or any Adaptations or 193 | Collections, You must, unless a request has been made pursuant to 194 | Section 4(a), keep intact all copyright notices for the Work and 195 | provide, reasonable to the medium or means You are utilizing: (i) the 196 | name of the Original Author (or pseudonym, if applicable) if supplied, 197 | and/or if the Original Author and/or Licensor designate another party 198 | or parties (e.g., a sponsor institute, publishing entity, journal) for 199 | attribution ("Attribution Parties") in Licensor's copyright notice, 200 | terms of service or by other reasonable means, the name of such party 201 | or parties; (ii) the title of the Work if supplied; (iii) to the 202 | extent reasonably practicable, the URI, if any, that Licensor 203 | specifies to be associated with the Work, unless such URI does not 204 | refer to the copyright notice or licensing information for the Work; 205 | and, (iv) consistent with Section 3(b), in the case of an Adaptation, 206 | a credit identifying the use of the Work in the Adaptation (e.g., 207 | "French translation of the Work by Original Author," or "Screenplay 208 | based on original Work by Original Author"). The credit required by 209 | this Section 4(d) may be implemented in any reasonable manner; 210 | provided, however, that in the case of a Adaptation or Collection, at 211 | a minimum such credit will appear, if a credit for all contributing 212 | authors of the Adaptation or Collection appears, then as part of these 213 | credits and in a manner at least as prominent as the credits for the 214 | other contributing authors. For the avoidance of doubt, You may only 215 | use the credit required by this Section for the purpose of attribution 216 | in the manner set out above and, by exercising Your rights under this 217 | License, You may not implicitly or explicitly assert or imply any 218 | connection with, sponsorship or endorsement by the Original Author, 219 | Licensor and/or Attribution Parties, as appropriate, of You or Your 220 | use of the Work, without the separate, express prior written 221 | permission of the Original Author, Licensor and/or Attribution 222 | Parties. 223 | e. For the avoidance of doubt: 224 | 225 | i. Non-waivable Compulsory License Schemes. In those jurisdictions in 226 | which the right to collect royalties through any statutory or 227 | compulsory licensing scheme cannot be waived, the Licensor 228 | reserves the exclusive right to collect such royalties for any 229 | exercise by You of the rights granted under this License; 230 | ii. Waivable Compulsory License Schemes. In those jurisdictions in 231 | which the right to collect royalties through any statutory or 232 | compulsory licensing scheme can be waived, the Licensor reserves 233 | the exclusive right to collect such royalties for any exercise by 234 | You of the rights granted under this License if Your exercise of 235 | such rights is for a purpose or use which is otherwise than 236 | noncommercial as permitted under Section 4(c) and otherwise waives 237 | the right to collect royalties through any statutory or compulsory 238 | licensing scheme; and, 239 | iii. Voluntary License Schemes. The Licensor reserves the right to 240 | collect royalties, whether individually or, in the event that the 241 | Licensor is a member of a collecting society that administers 242 | voluntary licensing schemes, via that society, from any exercise 243 | by You of the rights granted under this License that is for a 244 | purpose or use which is otherwise than noncommercial as permitted 245 | under Section 4(c). 246 | f. Except as otherwise agreed in writing by the Licensor or as may be 247 | otherwise permitted by applicable law, if You Reproduce, Distribute or 248 | Publicly Perform the Work either by itself or as part of any 249 | Adaptations or Collections, You must not distort, mutilate, modify or 250 | take other derogatory action in relation to the Work which would be 251 | prejudicial to the Original Author's honor or reputation. Licensor 252 | agrees that in those jurisdictions (e.g. Japan), in which any exercise 253 | of the right granted in Section 3(b) of this License (the right to 254 | make Adaptations) would be deemed to be a distortion, mutilation, 255 | modification or other derogatory action prejudicial to the Original 256 | Author's honor and reputation, the Licensor will waive or not assert, 257 | as appropriate, this Section, to the fullest extent permitted by the 258 | applicable national law, to enable You to reasonably exercise Your 259 | right under Section 3(b) of this License (right to make Adaptations) 260 | but not otherwise. 261 | 262 | 5. Representations, Warranties and Disclaimer 263 | 264 | UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING AND TO THE 265 | FULLEST EXTENT PERMITTED BY APPLICABLE LAW, LICENSOR OFFERS THE WORK AS-IS 266 | AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE 267 | WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT 268 | LIMITATION, WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 269 | PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, 270 | ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT 271 | DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED 272 | WARRANTIES, SO THIS EXCLUSION MAY NOT APPLY TO YOU. 273 | 274 | 6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE 275 | LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR 276 | ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES 277 | ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS 278 | BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 279 | 280 | 7. Termination 281 | 282 | a. This License and the rights granted hereunder will terminate 283 | automatically upon any breach by You of the terms of this License. 284 | Individuals or entities who have received Adaptations or Collections 285 | from You under this License, however, will not have their licenses 286 | terminated provided such individuals or entities remain in full 287 | compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will 288 | survive any termination of this License. 289 | b. Subject to the above terms and conditions, the license granted here is 290 | perpetual (for the duration of the applicable copyright in the Work). 291 | Notwithstanding the above, Licensor reserves the right to release the 292 | Work under different license terms or to stop distributing the Work at 293 | any time; provided, however that any such election will not serve to 294 | withdraw this License (or any other license that has been, or is 295 | required to be, granted under the terms of this License), and this 296 | License will continue in full force and effect unless terminated as 297 | stated above. 298 | 299 | 8. Miscellaneous 300 | 301 | a. Each time You Distribute or Publicly Perform the Work or a Collection, 302 | the Licensor offers to the recipient a license to the Work on the same 303 | terms and conditions as the license granted to You under this License. 304 | b. Each time You Distribute or Publicly Perform an Adaptation, Licensor 305 | offers to the recipient a license to the original Work on the same 306 | terms and conditions as the license granted to You under this License. 307 | c. If any provision of this License is invalid or unenforceable under 308 | applicable law, it shall not affect the validity or enforceability of 309 | the remainder of the terms of this License, and without further action 310 | by the parties to this agreement, such provision shall be reformed to 311 | the minimum extent necessary to make such provision valid and 312 | enforceable. 313 | d. No term or provision of this License shall be deemed waived and no 314 | breach consented to unless such waiver or consent shall be in writing 315 | and signed by the party to be charged with such waiver or consent. 316 | e. This License constitutes the entire agreement between the parties with 317 | respect to the Work licensed here. There are no understandings, 318 | agreements or representations with respect to the Work not specified 319 | here. Licensor shall not be bound by any additional provisions that 320 | may appear in any communication from You. This License may not be 321 | modified without the mutual written agreement of the Licensor and You. 322 | f. The rights granted under, and the subject matter referenced, in this 323 | License were drafted utilizing the terminology of the Berne Convention 324 | for the Protection of Literary and Artistic Works (as amended on 325 | September 28, 1979), the Rome Convention of 1961, the WIPO Copyright 326 | Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 327 | and the Universal Copyright Convention (as revised on July 24, 1971). 328 | These rights and subject matter take effect in the relevant 329 | jurisdiction in which the License terms are sought to be enforced 330 | according to the corresponding provisions of the implementation of 331 | those treaty provisions in the applicable national law. If the 332 | standard suite of rights granted under applicable copyright law 333 | includes additional rights not granted under this License, such 334 | additional rights are deemed to be included in the License; this 335 | License is not intended to restrict the license of any rights under 336 | applicable law. 337 | 338 | 339 | Creative Commons Notice 340 | 341 | Creative Commons is not a party to this License, and makes no warranty 342 | whatsoever in connection with the Work. Creative Commons will not be 343 | liable to You or any party on any legal theory for any damages 344 | whatsoever, including without limitation any general, special, 345 | incidental or consequential damages arising in connection to this 346 | license. Notwithstanding the foregoing two (2) sentences, if Creative 347 | Commons has expressly identified itself as the Licensor hereunder, it 348 | shall have all rights and obligations of Licensor. 349 | 350 | Except for the limited purpose of indicating to the public that the 351 | Work is licensed under the CCPL, Creative Commons does not authorize 352 | the use by either party of the trademark "Creative Commons" or any 353 | related trademark or logo of Creative Commons without the prior 354 | written consent of Creative Commons. Any permitted use will be in 355 | compliance with Creative Commons' then-current trademark usage 356 | guidelines, as may be published on its website or otherwise made 357 | available upon request from time to time. For the avoidance of doubt, 358 | this trademark restriction does not form part of this License. 359 | 360 | Creative Commons may be contacted at https://creativecommons.org/. 361 | -------------------------------------------------------------------------------- /chembl/chembl.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import os 3 | import pandas as pd 4 | import numpy as np 5 | from schrodinger.structure import SmilesStructure 6 | import click 7 | 8 | # Macrocycles are hard to dock. 9 | MACROCYCLE_THRESH = 8 10 | 11 | class CHEMBLDB: 12 | def __init__(self, chembldb, uniprot_chembl): 13 | # CHEMBL database was downloaded from the chembl website in sql format. 14 | self.conn = sqlite3.connect('file:{}?mode=ro'.format(chembldb), uri=True) 15 | self.cur = self.conn.cursor() 16 | 17 | # File of links from uniprot id to chembl id was downloaded from uniprot. 18 | # I just searched for all uniprot entriies with a chembl cross-reference 19 | # available. 20 | self.uniprot_chembl = pd.read_csv(uniprot_chembl, sep='\t', index_col=0) 21 | self.uniprot_chembl = self.uniprot_chembl['Cross-reference (ChEMBL)'].apply(lambda x: x.strip(';').split(';')) 22 | 23 | def __enter__ (self): 24 | return self 25 | def __exit__ (self, *exc): 26 | self.conn.close() 27 | 28 | def chembl_to_tid(self, chembl): 29 | self.cur.execute("SELECT tid FROM target_dictionary WHERE chembl_id=?", (chembl,)) 30 | rows = self.cur.fetchall() 31 | assert len(rows) == 1, rows 32 | return rows[0][0] 33 | 34 | def tid_to_target_type(self, tid): 35 | self.cur.execute("SELECT target_type FROM target_dictionary WHERE tid=?", (tid,)) 36 | rows = self.cur.fetchall() 37 | assert len(rows) == 1, rows 38 | return rows[0][0] 39 | 40 | def tid_to_assays(self, tid, protein_complex, homologous): 41 | if protein_complex and homologous: 42 | confidence = '(confidence_score=6 OR confidence_score=7)' 43 | elif protein_complex and not homologous: 44 | confidence = 'confidence_score=7' 45 | elif not protein_complex and homologous: 46 | confidence = '(confidence_score=8 OR confidence_score=9)' 47 | else: 48 | confidence = 'confidence_score=9' 49 | 50 | self.cur.execute("SELECT assay_id FROM assays WHERE tid=? AND "+confidence, (tid,)) 51 | return [row[0] for row in self.cur.fetchall()] 52 | 53 | def assay_to_chemblid(self, assay): 54 | self.cur.execute("SELECT chembl_id FROM assays WHERE assay_id=?", (assay,)) 55 | rows = self.cur.fetchall() 56 | assert len(rows) == 1, rows 57 | return rows[0][0] 58 | 59 | def assay_to_molregnos(self, assay): 60 | self.cur.execute("SELECT molregno FROM activities WHERE assay_id=?", (assay,)) 61 | return [row[0] for row in self.cur.fetchall()] 62 | 63 | def molregno_to_smiles(self, molregno): 64 | self.cur.execute("SELECT canonical_smiles FROM compound_structures WHERE molregno=?", (molregno,)) 65 | rows = self.cur.fetchall() 66 | assert len(rows) == 1, rows 67 | return rows[0][0] 68 | 69 | def molregno_to_molw(self, molregno): 70 | self.cur.execute("SELECT mw_freebase FROM compound_properties WHERE molregno=?", (molregno,)) 71 | rows = self.cur.fetchall() 72 | if not len(rows): 73 | return 0 74 | return rows[0][0] 75 | 76 | def molregno_to_smiles(self, molregno): 77 | self.cur.execute("SELECT canonical_smiles FROM compound_structures WHERE molregno=?", (molregno,)) 78 | rows = self.cur.fetchall() 79 | if not rows: 80 | return None 81 | return rows[0][0] 82 | 83 | def molregno_to_chemblid(self, molregno): 84 | self.cur.execute("SELECT chembl_id FROM molecule_dictionary WHERE molregno=?", (molregno,)) 85 | rows = self.cur.fetchall() 86 | assert len(rows) == 1, rows 87 | return rows[0][0] 88 | 89 | def molregno_and_assay_to_activities(self, molregno, assay): 90 | self.cur.execute("SELECT standard_type, standard_value, standard_units, relation, activity_comment FROM activities WHERE molregno=? AND assay_id=?", (molregno, assay)) 91 | return self.cur.fetchall() 92 | 93 | def chembl_to_activities(self, chembl, protein_complex, homologous): 94 | # chemblID, SMILES, MOLW, affinity 95 | activities = [] 96 | tid = self.chembl_to_tid(chembl) 97 | for assay in self.tid_to_assays(tid, protein_complex, homologous): 98 | assay_chembl_id = self.assay_to_chemblid(assay) 99 | for molregno in self.assay_to_molregnos(assay): 100 | molw = self.molregno_to_molw(molregno) 101 | smiles = self.molregno_to_smiles(molregno) 102 | chembl_id = self.molregno_to_chemblid(molregno) 103 | for activity in self.molregno_and_assay_to_activities(molregno, assay): 104 | activities += [[assay_chembl_id, chembl_id, molw, smiles] + list(activity)] 105 | return pd.DataFrame(activities, 106 | columns=['assay_chembl_id', 'ligand_chembl_id', 'mw_freebase', 107 | 'canonical_smiles', 'standard_type', 'standard_value', 108 | 'standard_units', 'relation', 'comment']) 109 | 110 | def uniprot_to_chembl(self, uniprot): 111 | if uniprot not in self.uniprot_chembl: 112 | return None 113 | 114 | for chembl_id in self.uniprot_chembl.loc[uniprot]: 115 | tid = self.chembl_to_tid(chembl_id) 116 | target_type = self.tid_to_target_type(tid) 117 | if target_type == 'SINGLE PROTEIN': 118 | return chembl_id 119 | 120 | ################################################################################ 121 | 122 | def get_chembl_id(uniprot, chembldb, uniprot_chembl): 123 | with CHEMBLDB(chembldb, uniprot_chembl) as chembldb: 124 | chembl = chembldb.uniprot_to_chembl(uniprot) 125 | return chembl 126 | 127 | def standardize_nonbinders(activities, affinity_thresh): 128 | # Set 'Not Active's to affinity_thresh 129 | activities.loc[activities['comment'].isin([None]), 'comment'] = '' 130 | duds = [('Not Active' in s) for s in activities['comment']] 131 | duds = np.array(duds) 132 | activities.loc[duds, 'standard_units'] = 'nM' 133 | activities.loc[duds, 'standard_value'] = affinity_thresh 134 | activities.loc[duds, 'relation'] = '=' 135 | 136 | # Most nonbinders don't have equality relation. 137 | mask = activities['standard_value'] >= affinity_thresh 138 | mask *= activities['relation'].isin(['>', '>=']) 139 | activities.loc[mask, 'relation'] = '=' 140 | 141 | # Cap affinity values. 142 | mask = activities['standard_value'] >= affinity_thresh 143 | activities.loc[mask, 'standard_value'] = affinity_thresh 144 | return activities 145 | 146 | def get_activities(chembl, chembldb, uniprot_chembl, protein_complex, homologous, affinity_thresh): 147 | with CHEMBLDB(chembldb, uniprot_chembl) as chembldb: 148 | activities = chembldb.chembl_to_activities(chembl, protein_complex, homologous) 149 | activities['target_chembl_id'] = chembl 150 | 151 | # Standardize units to nM 152 | m = {'M': 10**9, 'mM': 10**6, 'uM': 10**3, 'pM': 10**-3} 153 | for unit, relation in m.items(): 154 | mask = activities['standard_units'] == unit 155 | activities.loc[mask, 'standard_value'] *= relation 156 | activities.loc[mask, 'standard_units'] = 'nM' 157 | 158 | activities = standardize_nonbinders(activities, affinity_thresh) 159 | 160 | return activities 161 | 162 | ################################################################################ 163 | 164 | def filter_activities(activities, activity_type, molw_thresh): 165 | if activity_type == 'all': 166 | activity_types = ['IC50', 'Ki', 'Kd'] 167 | else: 168 | activity_types = [activity_type] 169 | mask = activities['standard_type'].isin(activity_types) 170 | print('Removing {} rows b/c standard_type not in {}'.format(len(mask)-sum(mask), 171 | activity_types)) 172 | print('Set of offending values is {}'.format(set(activities[~mask]['standard_type']))) 173 | activities = activities.loc[mask] 174 | 175 | mask = activities['standard_value'].notna() 176 | print('Removing {} rows b/c standard_value is na'.format(len(mask)-sum(mask))) 177 | activities = activities.loc[mask] 178 | 179 | mask = activities['standard_value'] != 0 180 | print('Removing {} rows b/c standard_value is 0'.format(len(mask)-sum(mask))) 181 | activities = activities.loc[mask] 182 | 183 | mask = ~activities['canonical_smiles'].isin([None]) 184 | print('Removing {} rows b/c canonical_smiles is None'.format(len(mask)-sum(mask))) 185 | activities = activities.loc[mask] 186 | 187 | mask = activities['mw_freebase'] <= molw_thresh+100 # Not desalted yet. 188 | print('Removing {} rows b/c mw_freebase > {}'.format(len(mask)-sum(mask), 189 | molw_thresh+100)) 190 | activities = activities.loc[mask] 191 | 192 | mask = activities['standard_units'] == 'nM' 193 | print('Removing {} rows b/c standard_units != nM'.format(len(mask)-sum(mask))) 194 | print('Set of offending values is {}'.format(set(activities[~mask]['standard_units']))) 195 | activities = activities.loc[mask] 196 | 197 | mask = activities['relation'] == '=' 198 | print('Removing {} rows b/c relation != ='.format(len(mask)-sum(mask))) 199 | print('Set of offending values is {}'.format(set(activities[~mask]['relation']))) 200 | activities = activities.loc[mask] 201 | 202 | return activities 203 | 204 | ################################################################################ 205 | 206 | def desalt(smiles): 207 | ligand = '' 208 | for molecule in smiles.split('.'): 209 | if len(molecule) > len(ligand): 210 | ligand = molecule 211 | return ligand 212 | 213 | def get_structure(smiles): 214 | smi = SmilesStructure(smiles) 215 | try: 216 | st = smi.get3dStructure(True) 217 | stereo = True 218 | except: 219 | try: 220 | st = smi.get3dStructure(False) 221 | stereo = False 222 | except: 223 | print('Error processing {}'.format(smiles)) 224 | return SmilesStructure('C').get3dStructure(), False 225 | 226 | return st, stereo 227 | 228 | def is_macrocycle(st): 229 | ring_sizes = [0]+[len(ring.atom) for ring in st.ring] 230 | return max(ring_sizes) > MACROCYCLE_THRESH 231 | 232 | def _get_properties(smiles): 233 | properties = {} 234 | properties['SMILES'] = desalt(smiles) 235 | st, properties['STEREO'] = get_structure(properties['SMILES']) 236 | properties['MACROCYCLE'] = is_macrocycle(st) 237 | properties['MOLW'] = st.total_weight 238 | return pd.Series(properties) 239 | 240 | def get_properties(activities): 241 | properties = activities.canonical_smiles.apply(_get_properties) 242 | return pd.concat([activities, properties], axis=1) 243 | 244 | def filter_properties(activities, ambiguous_stereo, molw_thresh): 245 | mask = activities['MOLW'] <= molw_thresh 246 | print('Removing {} rows b/c molw > {}'.format(len(mask)-sum(mask), 247 | molw_thresh)) 248 | activities = activities.loc[mask] 249 | 250 | mask = ~activities['MACROCYCLE'] 251 | print('Removing {} rows b/c macrocycle'.format(len(mask)-sum(mask))) 252 | activities = activities.loc[mask] 253 | 254 | if not ambiguous_stereo: 255 | mask = activities['STEREO'] 256 | print('Removing {} rows b/c ambiguous stereochemistry'.format(len(mask)-sum(mask))) 257 | activities = activities.loc[mask] 258 | return activities 259 | 260 | ################################################################################ 261 | 262 | def collapse_duplicates(activities, seperate_activity_types): 263 | if seperate_activity_types: 264 | keys = ['SMILES', 'standard_type'] 265 | else: 266 | keys = ['SMILES'] 267 | 268 | averages = activities.loc[:, keys+['standard_value']].groupby(keys).mean() 269 | activities = activities.groupby(keys, as_index=False).first() 270 | activities['standard_value'] = [averages.loc[tuple([row[key] for key in keys])]['standard_value'] 271 | for _, row in activities.iterrows()] 272 | return activities 273 | 274 | ################################################################################ 275 | 276 | @click.group() 277 | def main(): 278 | pass 279 | 280 | @main.command() 281 | @click.option('--protein-complex', is_flag=True) 282 | @click.option('--homologous', is_flag=True) 283 | @click.option('--ambiguous-stereo', is_flag=True) 284 | @click.option('--activity-type', default='all') 285 | @click.option('--affinity-thresh', default=10000) 286 | @click.option('--molw-thresh', default=500) 287 | @click.option('--output-fname') 288 | @click.argument('uniprot_or_chembl') 289 | @click.argument('chembldb', default='/oak/stanford/groups/rondror/users/jpaggi/pldb_data/raw/chembl_27.db') 290 | @click.argument('uniprot_chembl', default='/oak/stanford/groups/rondror/users/jpaggi/pldb_data/raw/uniprot-chembl.tsv') 291 | def query(protein_complex, homologous, ambiguous_stereo, activity_type, 292 | affinity_thresh, molw_thresh, output_fname, 293 | uniprot_or_chembl, chembldb, uniprot_chembl): 294 | """ 295 | protein_complex (bool): protein complexes will necessarily have a lower 296 | confidence score in CHEMBL, so when looking for data for a complex 297 | you have to lower the threshold. 298 | homologous (bool): allow matches with a confidence score of 8. This means 299 | that they weren't able to map the activity to a particular version of a 300 | protein. For instance, if a paper reports "binding affinity to the D2 301 | dopamine receptor", but doesn't specify human, rat, etc.. 302 | ambiguous_stereo (bool): allow ligands where the stereochemistry isn't 303 | completely deteremined. 304 | activity_type (str: IC50, Ki, Kd, EC50): specifies what kinds of measurements 305 | to accept. If not specified, will give [IC50, Ki, Kd]. 306 | affinity_thresh (float): affinity value to assign to "Not actives" and 307 | minimum affinity for which to accept ligands with a "greater than" 308 | relation. 10000 (10 micro-molar) is generally a good option because this 309 | is a standard threshold for binding in screens. 310 | molw-thresh (float): specifies how large of molecules to include in the 311 | results. 500 Da is a reasonable threshold to get "small molecules" 312 | output_fname (str): where to put the results. By default, 313 | CHEMBLID_ACTIVITYTYPE.csv 314 | uniprot_or_chembl (str): target for which to get affinities. 315 | chembldb (str): path to the chembl database 316 | uniprot_chembl (str): path to file cross-referencing uniprot to chembl. 317 | """ 318 | 319 | # Resolve chembl target id. 320 | if uniprot_or_chembl[:6] == 'CHEMBL': 321 | chembl = uniprot_or_chembl 322 | else: 323 | chembl = get_chembl_id(uniprot_or_chembl, chembldb, uniprot_chembl) 324 | 325 | if chembl is None: 326 | print('No CHEMBL entry...') 327 | if output_fname: 328 | with open(output_fname, 'w') as fp: 329 | fp.write('No CHEMBL entry...\n') 330 | return 331 | 332 | if output_fname is None: 333 | output_fname = '{}_{}.csv'.format(chembl, activity_type) 334 | 335 | # Look up affinities. 336 | activities = get_activities(chembl, chembldb, uniprot_chembl, 337 | protein_complex, homologous, affinity_thresh) 338 | if activities.shape[0]: 339 | activities = filter_activities(activities, activity_type, molw_thresh) 340 | 341 | if activities.shape[0]: 342 | activities = get_properties(activities) 343 | activities = filter_properties(activities, ambiguous_stereo, molw_thresh) 344 | 345 | if not activities.shape[0]: 346 | print('No valid ligands...') 347 | if output_fname: 348 | with open(output_fname, 'w') as fp: 349 | fp.write('No valid ligands...\n') 350 | return 351 | 352 | activities = activities.sort_values(['assay_chembl_id', 'ligand_chembl_id']) 353 | activities.to_csv(output_fname, index=False) 354 | 355 | @main.command() 356 | @click.argument('input_csv') 357 | @click.argument('output_csv') 358 | @click.option('--seperate-activity-types', is_flag=True) 359 | @click.option('--IC50-factor', default=2.3) 360 | def unique(input_csv, output_csv, seperate_activity_types, ic50_factor): 361 | """ 362 | If seperate_activity_types, don't merge e.g. IC50's and Ki's. 363 | """ 364 | activities = pd.read_csv(input_csv) 365 | 366 | mask = (activities.standard_type == 'IC50') & (activities.standard_value >= 1000) 367 | activities.loc[mask, 'standard_value'] /= ic50_factor 368 | 369 | # Since we're going to merge all the values, assay_chembl_id will no longer 370 | # make sense. 371 | activities = activities.drop(columns='assay_chembl_id') 372 | activities = collapse_duplicates(activities, seperate_activity_types) 373 | activities.to_csv(output_csv, index=False) 374 | 375 | if __name__ == '__main__': 376 | main() 377 | -------------------------------------------------------------------------------- /chembl/chembl_proc.py: -------------------------------------------------------------------------------- 1 | from schrodinger.structure import StructureReader, StructureWriter 2 | import sys 3 | import pandas as pd 4 | import os 5 | 6 | csv = sys.argv[1] 7 | smi = csv.replace('.csv', '.smi') 8 | nonames = csv.replace('.csv', '_nonames.maegz') 9 | noactivities = csv.replace('.csv', '_noactivities.maegz') 10 | maegz = csv.replace('.csv', '.maegz') 11 | 12 | df = pd.read_csv(csv) 13 | df = df.drop_duplicates(subset=['ligand_chembl_id']) 14 | 15 | if not os.path.exists(smi): 16 | df[['canonical_smiles', 'ligand_chembl_id']].to_csv(smi, sep=' ', index=False, header=False) 17 | 18 | if not os.path.exists(nonames): 19 | cmd = 'ligprep -epik -ismi {} -omae {} -WAIT -HOST localhost:8' 20 | cmd = cmd.format(smi, nonames) 21 | os.system(cmd) 22 | 23 | if not os.path.exists(noactivities): 24 | cmd = 'python {}/dock/ligprep.py {} {}' 25 | cmd = cmd.format(os.environ['COMBINDHOME'], nonames, noactivities) 26 | os.system(cmd) 27 | 28 | if not os.path.exists(maegz): 29 | df = df.set_index('ligand_chembl_id') 30 | with StructureReader(noactivities) as reader, StructureWriter(maegz) as writer: 31 | for st in reader: 32 | st.property['r_chembl_activity'] = df.loc[st.title, 'standard_value'] 33 | writer.append(st) 34 | -------------------------------------------------------------------------------- /chembl/split_chembl.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | 4 | csv = sys.argv[1] 5 | df = pd.read_csv(csv) 6 | 7 | a = df['standard_value'] 8 | df[a < 10**2].to_csv(csv.replace('.csv', '_nM.csv')) 9 | df[(a >= 10**2) & (a < 10**4)].to_csv(csv.replace('.csv', '_uM.csv')) 10 | df[a >= 10**4].to_csv(csv.replace('.csv', '_mM.csv')) 11 | -------------------------------------------------------------------------------- /combind: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import click 6 | import os 7 | from glob import glob 8 | from schrodinger.structure import StructureReader, StructureWriter 9 | 10 | from utils import * 11 | 12 | ############################################################################### 13 | 14 | # Defaults 15 | stats_root = os.environ['COMBINDHOME']+'/stats_data/default' 16 | mcss_version = 'mcss16' 17 | shape_version = 'pharm_max' 18 | ifp_version = 'rd1' 19 | 20 | @click.group() 21 | def main(): 22 | pass 23 | 24 | @main.command() 25 | @click.argument('struct', default='') 26 | @click.option('--grid-struct') 27 | def structprep(struct, grid_struct): 28 | """ 29 | Prepare structures and make a docking grid. 30 | 31 | "struct" specifies the name of the structure for which to make a docking 32 | grid. (Not the full path, generally just the PDB code.) Defaults to the 33 | structure with alphabetically lowest name. 34 | 35 | The following directory structure is required: 36 | 37 | \b 38 | structures/ 39 | raw/ 40 | structure_name_prot.mae 41 | structure_name_lig.mae 42 | ... 43 | processed/ 44 | structure_name/structure_name_out.mae 45 | ... 46 | aligned/ 47 | structure_name/rot-structure_name_query.mae 48 | ... 49 | proteins/ 50 | structure_name_prot.mae 51 | ... 52 | ligands/ 53 | structure_name_lig.mae 54 | ... 55 | grids/ 56 | structure_name/structure_name.zip 57 | ... 58 | 59 | The process can be started from any step, e.g. if you have processed 60 | versions of your structures, you can place these in the processed directory. 61 | 62 | Files ending with _lig contain only the small molecule ligand present in the 63 | structure, and files ending with _prot contain everything else. 64 | """ 65 | from dock.struct_align import struct_align 66 | from dock.struct_sort import struct_sort 67 | from dock.struct_process import struct_process 68 | from dock.grid import make_grid 69 | 70 | assert os.path.exists('structures'), 'No structures directory.' 71 | 72 | structs = sorted(glob('structures/raw/*_prot.mae*')) 73 | structs = [struct.split('/')[-1].split('_prot')[0] for struct in structs] 74 | 75 | if not struct: 76 | struct = structs[0] 77 | 78 | if not grid_struct: 79 | grid_struct = struct 80 | 81 | print(f'Processing {structs}, aligning to {struct}, and creating a docking' 82 | f' grid for {grid_struct}') 83 | 84 | struct_process(structs) 85 | struct_align(struct, structs) 86 | struct_sort(structs) 87 | make_grid(grid_struct) 88 | 89 | @main.command() 90 | @click.argument('smiles') 91 | @click.option('--root', default='ligands') 92 | @click.option('--multiplex', is_flag=True) 93 | @click.option('--ligand-names', default='ID') 94 | @click.option('--ligand-smiles', default='SMILES') 95 | @click.option('--delim', default=',') 96 | @click.option('--processes', default=1) 97 | def ligprep(smiles, root, multiplex, ligand_names, ligand_smiles, delim, processes): 98 | """ 99 | Prepare ligands for docking, from smiles. 100 | 101 | Specifically, this will run Schrodinger's ligprep and then perform 102 | additional processing to make the ligands readable by rdkit and to assign 103 | atom names. 104 | 105 | "smiles" should be a `delim` delimited file with columns "ligand-names" 106 | and "ligand-smiles". 107 | 108 | "root" specifies where the processed ligands will be written. 109 | 110 | By default, an individual file will be made for each ligand. If multiplex is 111 | set, then only one file, containing all the ligands, will be produced. 112 | 113 | Multiprocessing is only supported for non-multiplexed mode. 114 | """ 115 | from dock.ligprep import ligprep 116 | mkdir(root) 117 | ligands = pd.read_csv(smiles, sep=delim) 118 | print('Prepping {} mols from {} in {}'.format(len(ligands), smiles, root)) 119 | 120 | if multiplex: 121 | _name = os.path.splitext(os.path.basename(smiles))[0] 122 | _root = f'{root}/{_name}' 123 | _smiles = f'{_root}/{_name}.smi' 124 | _mae = os.path.splitext(_smiles)[0] + '.maegz' 125 | 126 | if not os.path.exists(_mae): 127 | mkdir(_root) 128 | with open(_smiles, 'w') as fp: 129 | for _, ligand in ligands.iterrows(): 130 | fp.write('{} {}\n'.format(ligand[ligand_smiles], ligand[ligand_names])) 131 | ligprep(_smiles) 132 | 133 | else: 134 | unfinished = [] 135 | for _, ligand in ligands.iterrows(): 136 | _name = ligand[ligand_names] 137 | _root = f'{root}/{_name}' 138 | _smiles = f'{_root}/{_name}.smi' 139 | _mae = os.path.splitext(_smiles)[0] + '.maegz' 140 | 141 | if not os.path.exists(_mae): 142 | mkdir(_root) 143 | with open(_smiles, 'w') as fp: 144 | fp.write('{} {}\n'.format(ligand[ligand_smiles], ligand[ligand_names])) 145 | unfinished += [(_smiles,)] 146 | mp(ligprep, unfinished, processes) 147 | 148 | @main.command() 149 | @click.argument('ligands', nargs=-1) 150 | @click.option('--root', default='docking') 151 | @click.option('--grid') 152 | @click.option('--screen', is_flag=True) 153 | @click.option('--processes', default=1) 154 | def dock(grid, root, ligands, screen, processes): 155 | """ 156 | Dock "ligands" to "grid". 157 | 158 | "root" specifies where the docking results will be written. 159 | 160 | Setting "screen" limits the thoroughness of the pose sampling. Recommended 161 | for screening, but not pose prediction. 162 | 163 | "ligands" are paths to prepared ligand files. Multiple can be specified. 164 | """ 165 | from dock.dock import dock 166 | 167 | if grid is None: 168 | grid = glob('structures/grids/*/*.zip') 169 | if grid: 170 | grid = grid[0] 171 | else: 172 | print('No grids in default location (structures/grids)' 173 | ', please specify path.') 174 | exit() 175 | 176 | ligands = [os.path.abspath(lig) for lig in ligands if 'nonames' not in lig] 177 | grid = os.path.abspath(grid) 178 | root = os.path.abspath(root) 179 | 180 | mkdir(root) 181 | unfinished = [] 182 | for ligand in ligands: 183 | name = '{}-to-{}'.format(basename(ligand), basename(grid)) 184 | _root = '{}/{}'.format(root, name) 185 | unfinished += [(grid, ligand, _root, name, not screen)] 186 | mp(dock, unfinished, processes) 187 | 188 | ################################################################################ 189 | 190 | @main.command() 191 | @click.argument('root') 192 | @click.argument('poseviewers', nargs=-1) 193 | @click.option('--native', default='structures/ligands/*_lig.mae') 194 | @click.option('--ifp-version', default=ifp_version) 195 | @click.option('--mcss-version', default=mcss_version) 196 | @click.option('--shape-version', default=shape_version) 197 | @click.option('--screen', is_flag=True) 198 | @click.option('--max-poses', default=100) 199 | @click.option('--no-mcss', is_flag=True) 200 | @click.option('--no-shape', is_flag=True) 201 | @click.option('--processes', default=1) 202 | def featurize(root, poseviewers, native, ifp_version, mcss_version, 203 | shape_version, screen, no_mcss, no_shape, processes, max_poses): 204 | from features.features import Features 205 | 206 | native_poses = {} 207 | for native_path in glob(native): 208 | name = native_path.split('/')[-1].split('_lig')[0] 209 | with StructureReader(native_path) as sts: 210 | sts = list(sts) 211 | assert len(sts) == 1 212 | native_poses[name] = sts[0] 213 | 214 | features = Features(root, ifp_version=ifp_version, shape_version=shape_version, 215 | mcss_version=mcss_version, max_poses=max_poses) 216 | 217 | features.compute_single_features(poseviewers, native_poses=native_poses) 218 | 219 | if screen: 220 | assert len(poseviewers) == 2 221 | features.compute_pair_features(poseviewers[:1], 222 | pvs2 = poseviewers[1:], 223 | mcss=not no_mcss, shape=not no_shape) 224 | else: 225 | features.compute_pair_features(poseviewers, 226 | mcss=not no_mcss, shape=not no_shape) 227 | 228 | ################################################################################ 229 | 230 | @main.command() 231 | @click.argument('root') 232 | @click.argument('out') 233 | @click.argument('ligands', nargs=-1) 234 | @click.option('--features', default='shape,mcss,hbond,saltbridge,contact') 235 | @click.option('--alpha', default=1.0) 236 | @click.option('--stats-root', default=stats_root) 237 | @click.option('--restart', default=500) 238 | @click.option('--max-iterations', default=1000) 239 | def pose_prediction(root, out, ligands, alpha, stats_root, 240 | features, restart, max_iterations): 241 | """ 242 | Run ComBind pose prediction. 243 | """ 244 | from score.pose_prediction import PosePrediction 245 | from score.statistics import read_stats 246 | from features.features import Features 247 | 248 | features = features.split(',') 249 | 250 | protein = Features(root) 251 | protein.load_features() 252 | 253 | if not ligands: 254 | ligands = set(protein.raw['name1']) 255 | ligands = sorted(ligands) 256 | 257 | data = protein.get_view(ligands, features) 258 | stats = read_stats(stats_root, features) 259 | 260 | ps = PosePrediction(ligands, features, data, stats, alpha) 261 | best_poses = ps.max_posterior(max_iterations, restart) 262 | 263 | with open(out, 'w') as fp: 264 | fp.write('ID,POSE,COMBIND_RMSD,GLIDE_RMSD,BEST_RMSD\n') 265 | for ligand in best_poses: 266 | rmsds = data['rmsd'][ligand] 267 | grmsd = rmsds[0] 268 | crmsd = rmsds[best_poses[ligand]] 269 | brmsd = min(rmsds) 270 | fp.write(','.join(map(str, [ligand.replace('_pv', ''), 271 | best_poses[ligand], 272 | crmsd, grmsd, brmsd]))+ '\n') 273 | 274 | @main.command() 275 | @click.argument('score-fname') 276 | @click.argument('root') 277 | @click.option('--stats-root', default=stats_root) 278 | @click.option('--alpha', default=1.0) 279 | @click.option('--features', default='shape,hbond,saltbridge,contact') 280 | def screen(score_fname, root, stats_root, alpha, features): 281 | """ 282 | Run ComBind screening. 283 | """ 284 | from score.screen import screen, load_features_screen 285 | from score.statistics import read_stats 286 | 287 | features = features.split(',') 288 | stats = read_stats(stats_root, features) 289 | single, raw = load_features_screen(features, root) 290 | 291 | combind_energy = screen(single, raw, stats, alpha) 292 | np.save(score_fname, combind_energy) 293 | 294 | ################################################################################ 295 | 296 | @main.command() 297 | @click.argument('scores') 298 | @click.argument('original_pvs', nargs=-1) 299 | def extract_top_poses(scores, original_pvs): 300 | """ 301 | Write top-scoring poses to a single file. 302 | """ 303 | out = scores.replace('.csv', '_pv.maegz') 304 | scores = pd.read_csv(scores).set_index('ID') 305 | 306 | with StructureWriter(out) as writer: 307 | with StructureReader(original_pvs[0]) as sts: 308 | prot = next(sts) 309 | writer.append(prot) 310 | 311 | counts = {} 312 | written = [] 313 | for pv in original_pvs: 314 | with StructureReader(original_pvs[0]) as sts: 315 | prot = next(sts) 316 | for st in sts: 317 | name = st.title 318 | if name not in counts: 319 | counts[name] = 0 320 | else: 321 | # counts is zero indexed. 322 | counts[name] += 1 323 | 324 | if name in scores.index and scores.loc[name, 'POSE'] == counts[name]: 325 | writer.append(st) 326 | written += [name] 327 | 328 | assert len(written) == len(scores), written 329 | for name in scores.index: 330 | assert name in written 331 | 332 | @main.command() 333 | @click.argument('pv') 334 | @click.argument('scores') 335 | @click.argument('out', default=None) 336 | def apply_scores(pv, scores, out): 337 | """ 338 | Add ComBind screening scores to a poseviewer. 339 | """ 340 | from score.screen import apply_scores 341 | if out is None: 342 | out = pv.replace('_pv.maegz', '_combind_pv.maegz') 343 | apply_scores(pv, scores, out) 344 | 345 | @main.command() 346 | @click.argument('pv') 347 | @click.argument('out', default=None) 348 | def scores_to_csv(pv, out): 349 | """ 350 | Write docking and ComBind scores to text. 351 | """ 352 | from score.screen import scores_to_csv 353 | scores_to_csv(pv, out) 354 | 355 | main() 356 | -------------------------------------------------------------------------------- /dock/dock.py: -------------------------------------------------------------------------------- 1 | from schrodinger.structure import StructureReader, StructureWriter 2 | from schrodinger.structutils.rmsd import ConformerRmsd 3 | import os 4 | import subprocess 5 | 6 | GLIDE_ES4 = '''GRIDFILE {grid} 7 | LIGANDFILE {ligands} 8 | DOCKING_METHOD confgen 9 | POSES_PER_LIG 100 10 | POSTDOCK_NPOSE 100 11 | PRECISION SP 12 | NENHANCED_SAMPLING 4 13 | ''' 14 | 15 | GLIDE = '''GRIDFILE {grid} 16 | LIGANDFILE {ligands} 17 | DOCKING_METHOD confgen 18 | POSES_PER_LIG 30 19 | POSTDOCK_NPOSE 30 20 | PRECISION SP 21 | ''' 22 | 23 | def docking_failed(glide_log): 24 | if not os.path.exists(glide_log): 25 | return False 26 | with open(glide_log) as fp: 27 | logtxt = fp.read() 28 | phrases = ['** NO ACCEPTABLE LIGAND POSES WERE FOUND **', 29 | 'NO VALID POSES AFTER MINIMIZATION: SKIPPING.', 30 | 'No Ligand Poses were written to external file', 31 | 'GLIDE WARNING: Skipping refinement, etc. because rough-score step failed.'] 32 | return any(phrase in logtxt for phrase in phrases) 33 | 34 | def dock(grid, ligands, root, name, enhanced, infile=None, reference=None): 35 | if infile is None: 36 | infile = GLIDE_ES4 if enhanced else GLIDE 37 | glide_in = '{}/{}.in'.format(root, name) 38 | glide_pv = '{}/{}_pv.maegz'.format(root, name) 39 | glide_log = '{}/{}.log'.format(root, name) 40 | glide_cmd = 'glide -WAIT -LOCAL -RESTART {}'.format(os.path.basename(glide_in)) 41 | 42 | if os.path.exists(glide_pv): 43 | return 44 | 45 | if enhanced and docking_failed(glide_log): 46 | return 47 | 48 | if not os.path.exists(root): 49 | os.system('mkdir {}'.format(root)) 50 | with open(glide_in, 'w') as fp: 51 | fp.write(infile.format(grid=grid, ligands=ligands, reference=reference)) 52 | 53 | subprocess.run(glide_cmd, cwd=root, shell=True) 54 | 55 | def filter_native(native, pv, out, thresh): 56 | with StructureReader(native) as sts: 57 | native = list(sts) 58 | assert len(native) == 1, len(native) 59 | native = native[0] 60 | 61 | near_native = [] 62 | with StructureReader(pv) as reader: 63 | receptor = next(reader) 64 | for st in reader: 65 | conf_rmsd = ConformerRmsd(native, st) 66 | if conf_rmsd.calculate() < thresh: 67 | near_native += [st] 68 | 69 | print('Found {} near-native poses'.format(len(near_native))) 70 | if not near_native: 71 | print('Resorting to native pose.') 72 | native.property['r_i_docking_score'] = -10.0 73 | near_native = [native] 74 | 75 | with StructureWriter(out) as writer: 76 | writer.append(receptor) 77 | for st in near_native: 78 | writer.append(st) 79 | -------------------------------------------------------------------------------- /dock/grid.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from glob import glob 4 | from schrodinger.structure import StructureReader 5 | from schrodinger.structutils.transform import get_centroid 6 | 7 | GRID_IN = """ 8 | GRID_CENTER {x},{y},{z} 9 | GRIDFILE {pdb}.zip 10 | INNERBOX 15,15,15 11 | OUTERBOX 30,30,30 12 | RECEP_FILE {prot} 13 | """ 14 | 15 | CMD = "glide -WAIT {infile}" 16 | INFILE = '{pdb}.in' 17 | ZIPFILE = '{pdb}.zip' 18 | 19 | def centroid(ligfile): 20 | with StructureReader(ligfile) as st: 21 | st = next(st) 22 | c = get_centroid(st) 23 | x,y,z = c[:3] 24 | return x, y, z 25 | 26 | def make_grid(pdb, 27 | PROTFILE='structures/proteins/{pdb}_prot.mae', 28 | LIGFILE='structures/ligands/{pdb}_lig.mae', 29 | CWD='structures/grids/{pdb}', 30 | grid_in=None): 31 | if grid_in is None: 32 | grid_in = GRID_IN 33 | 34 | cwd = os.path.abspath(CWD.format(pdb=pdb)) 35 | zipfile = os.path.abspath(cwd+'/'+ZIPFILE.format(pdb=pdb)) 36 | infile = os.path.abspath(cwd+'/'+INFILE.format(pdb=pdb)) 37 | ligfile = os.path.abspath(LIGFILE.format(pdb=pdb)) 38 | protfile = os.path.abspath(PROTFILE.format(pdb=pdb)) 39 | cmd = CMD.format(infile=os.path.basename(infile)) 40 | 41 | if os.path.exists(zipfile): 42 | return # Done. 43 | if not (os.path.exists(ligfile) and os.path.exists(protfile)): 44 | print(ligfile, protfile) 45 | return # Not ready. 46 | 47 | print('making grid', pdb) 48 | 49 | for path in glob(cwd + '/*'): 50 | os.remove(path) 51 | os.makedirs(cwd, exist_ok=True) 52 | 53 | x, y, z = centroid(ligfile) 54 | 55 | with open(infile, 'w') as fp: 56 | fp.write(grid_in.format(x=x, y=y, z=z, pdb=pdb, prot=protfile)) 57 | 58 | subprocess.run(cmd, cwd=cwd, shell=True) 59 | -------------------------------------------------------------------------------- /dock/ligprep.py: -------------------------------------------------------------------------------- 1 | from schrodinger.structure import StructureReader, StructureWriter 2 | import os 3 | import subprocess 4 | 5 | def ligprocess(input_file, output_file): 6 | with StructureReader(input_file) as reader, \ 7 | StructureWriter(output_file) as writer: 8 | for st in reader: 9 | # Remove explicit stereochemistry specifications. These cause 10 | # errors in downstream steps. 11 | for k in st.property.keys(): 12 | if 's_st_EZ_' in k or 'Chiral' in k: 13 | st.property.pop(k) 14 | 15 | # Give each atom a unique name, ligands generated from smiles 16 | # strings will not have any atom name by default. 17 | names = set() 18 | counts = {} 19 | for atom in st.atom: 20 | if not atom.pdbname.strip(): 21 | if atom.element not in counts: counts[atom.element] = 0 22 | counts[atom.element] += 1 23 | atom.pdbname = atom.element + str(counts[atom.element]) 24 | 25 | assert atom.pdbname not in names, atom.pdbname 26 | names.add(atom.pdbname) 27 | writer.append(st) 28 | 29 | def ligprep(smiles): 30 | mae_noname_file = smiles.replace('.smi', '_nonames.maegz') 31 | mae_file = smiles.replace('.smi', '.maegz') 32 | cmd = 'ligprep -WAIT -epik -ismi {} -omae {}'.format( 33 | os.path.basename(smiles), os.path.basename(mae_noname_file)) 34 | 35 | subprocess.run(cmd, shell=True, cwd=os.path.dirname(smiles)) 36 | if not os.path.exists(mae_noname_file): 37 | print('ligprep failed on {}.'.format(smiles)) 38 | print(cmd) 39 | return 40 | ligprocess(mae_noname_file, mae_file) 41 | 42 | if __name__ == '__main__': 43 | import sys 44 | input_file, output_file = sys.argv[1:] 45 | ligprocess(input_file, output_file) 46 | -------------------------------------------------------------------------------- /dock/struct_align.py: -------------------------------------------------------------------------------- 1 | import os 2 | from subprocess import run 3 | 4 | command = ('$SCHRODINGER/utilities/structalign ' 5 | '-asl "(not chain. L and not atom.element H) and (fillres within {0} chain. L)" ' 6 | '-asl_mobile "(not chain. L and not atom.element H) and (fillres within {0} chain. L)" ' 7 | '{1} {2}') 8 | 9 | def align_successful(out_dir, struct): 10 | 11 | if not os.path.exists('{}/{}/rot-{}_query.mae'.format(out_dir, struct, struct)): 12 | return False 13 | 14 | if os.path.exists('{}/{}/{}_template.mae'.format(out_dir, struct, struct)): 15 | return True # query = template so we don't need to check alignment 16 | 17 | with open('{}/{}/align.out'.format(out_dir, struct), 'r') as f: 18 | for line in f: 19 | tmp = line.strip().split() 20 | if len(tmp) > 0 and tmp[0] == 'Alignment': 21 | if float(tmp[2]) > 0.4: 22 | print('-- Alignment warning!', struct, float(tmp[2])) 23 | return False 24 | return True 25 | else: 26 | print('alignment failure', struct) 27 | return False 28 | 29 | def struct_align(template, structs, dist=15.0, retry=True, 30 | processed_out='structures/processed/{pdb}/{pdb}_out.mae', 31 | align_dir='structures/aligned'): 32 | 33 | template_path = processed_out.format(pdb=template) 34 | if not os.path.exists(template_path): 35 | print('template not processed', template_path) 36 | return 37 | 38 | for struct in structs: 39 | query_path = processed_out.format(pdb=struct) 40 | if not os.path.exists(query_path) or align_successful(align_dir, struct): 41 | continue 42 | 43 | print('align', struct, template) 44 | 45 | os.system('mkdir -p {}'.format(align_dir)) 46 | os.system('rm -rf {}/{}'.format(align_dir, struct)) 47 | os.system('mkdir -p {}/{}'.format(align_dir, struct)) 48 | 49 | _workdir = '{}/{}'.format(align_dir, struct) 50 | _template_fname = '{}_template.mae'.format(template) 51 | _query_fname = '{}_query.mae'.format(struct) 52 | 53 | os.system('cp {} {}/{}'.format(template_path, _workdir, _template_fname)) 54 | os.system('cp {} {}/{}'.format(query_path, _workdir, _query_fname)) 55 | 56 | with open('{}/align_in.sh'.format(_workdir), 'w') as f: 57 | f.write(command.format(dist, _template_fname, _query_fname)) 58 | run('sh align_in.sh > align.out', shell=True, cwd=_workdir) 59 | 60 | if retry and not align_successful(align_dir, struct): 61 | print('Alignment failed. Trying again with a larger radius.') 62 | struct_align(template, [struct], dist=25.0, retry=False, 63 | processed_out=processed_out, align_dir=align_dir) 64 | -------------------------------------------------------------------------------- /dock/struct_process.py: -------------------------------------------------------------------------------- 1 | import os 2 | from schrodinger.structure import StructureReader 3 | from subprocess import run 4 | 5 | command = '$SCHRODINGER/utilities/prepwizard -WAIT -rehtreat -watdist 0 {}_in.mae {}_out.mae' 6 | 7 | def load_complex(prot_in, lig_in, struct): 8 | 9 | prot_st = next(StructureReader(prot_in)) 10 | 11 | if not os.path.exists(lig_in): 12 | prot_st.title = struct 13 | return prot_st 14 | 15 | lig_st = next(StructureReader(lig_in)) 16 | 17 | assert len(lig_st.chain) == 1, struct 18 | for c in lig_st.chain: 19 | c.name = 'L' 20 | 21 | alpha = 'ABCDEFGHIJKMNOPQRST' 22 | alpha_count = 0 23 | for c in prot_st.chain: 24 | if c.name.strip() == '': continue 25 | 26 | c.name = alpha[alpha_count] 27 | alpha_count += 1 28 | 29 | merged_st = lig_st.merge(prot_st) 30 | merged_st.title = struct 31 | return merged_st 32 | 33 | def struct_process(structs, 34 | protein_in='structures/raw/{pdb}_prot.mae', 35 | ligand_in='structures/raw/{pdb}_lig.mae', 36 | processed_in='structures/processed/{pdb}/{pdb}_in.mae', 37 | processed_out='structures/processed/{pdb}/{pdb}_out.mae', 38 | processed_sh='structures/processed/{pdb}/process.sh'): 39 | 40 | for struct in structs: 41 | _protein_in = protein_in.format(pdb=struct) 42 | _ligand_in = ligand_in.format(pdb=struct) 43 | _processed_in = processed_in.format(pdb=struct) 44 | _processed_out = processed_out.format(pdb=struct) 45 | _processed_sh = processed_sh.format(pdb=struct) 46 | _workdir = os.path.dirname(_processed_sh) 47 | 48 | if os.path.exists(_processed_out): 49 | continue 50 | 51 | print('processing', struct) 52 | 53 | os.system('mkdir -p {}'.format(os.path.dirname(_workdir))) 54 | os.system('rm -rf {}'.format(_workdir)) 55 | os.system('mkdir {}'.format(_workdir)) 56 | 57 | merged_st = load_complex(_protein_in, _ligand_in, struct) 58 | merged_st.write(_processed_in) 59 | 60 | with open('{}/process_in.sh'.format(_workdir), 'w') as f: 61 | f.write('#!/bin/bash\n') 62 | f.write(command.format(struct, struct)) 63 | run('sh process_in.sh', shell=True, cwd=_workdir) 64 | -------------------------------------------------------------------------------- /dock/struct_sort.py: -------------------------------------------------------------------------------- 1 | import os 2 | from schrodinger.structure import StructureReader 3 | 4 | def split_complex(st, pdb_id): 5 | os.system('mkdir -p structures/proteins structures/ligands') 6 | lig_path = 'structures/ligands/{}_lig.mae'.format(pdb_id) 7 | prot_path = 'structures/proteins/{}_prot.mae'.format(pdb_id) 8 | 9 | if not os.path.exists(lig_path) and len([a.index for a in st.atom if a.chain == 'L']) > 0: 10 | lig_st = st.extract([a.index for a in st.atom if a.chain == 'L']) 11 | lig_st.title = '{}_lig'.format(pdb_id) 12 | lig_st.write(lig_path) 13 | 14 | if not os.path.exists(prot_path): 15 | prot_st = st.extract([a.index for a in st.atom if a.chain != 'L']) 16 | prot_st.title = '{}_prot'.format(pdb_id) 17 | prot_st.write(prot_path) 18 | 19 | def struct_sort(structs): 20 | for struct in structs: 21 | opt_complex = 'structures/aligned/{}/rot-{}_query.mae'.format(struct, struct) 22 | 23 | if os.path.exists(opt_complex): 24 | comp_st = next(StructureReader(opt_complex)) 25 | split_complex(comp_st, struct) 26 | -------------------------------------------------------------------------------- /features/features.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from glob import glob 5 | from schrodinger.structure import StructureReader 6 | from schrodinger.structutils.rmsd import ConformerRmsd 7 | from utils import basename, mp, mkdir, np_load 8 | 9 | IFP = {'rd1': {'version' : 'rd1', 10 | 'level' : 'residue', 11 | 'hbond_dist_opt' : 2.5, 12 | 'hbond_dist_cut' : 3.0, 13 | 'hbond_angle_opt' : 60.0, 14 | 'hbond_angle_cut' : 90.0, 15 | 'sb_dist_opt' : 4.0, 16 | 'sb_dist_cut' : 5.0, 17 | 'contact_scale_opt' : 1.25, 18 | 'contact_scale_cut' : 1.75,}, 19 | } 20 | 21 | class Features: 22 | """ 23 | Organize feature computation and loading. 24 | """ 25 | def __init__(self, root, ifp_version='rd1', shape_version='pharm_max', 26 | mcss_version='mcss16', max_poses=10000, pv_root=None, 27 | ifp_features=['hbond', 'saltbridge', 'contact']): 28 | self.root = os.path.abspath(root) 29 | if pv_root is None: 30 | self.pv_root = self.root + '/docking' 31 | 32 | self.ifp_version = ifp_version 33 | self.shape_version = shape_version 34 | self.mcss_version = mcss_version 35 | self.mcss_file = '{}/features/{}.typ'.format(os.environ['COMBINDHOME'], mcss_version) 36 | self.max_poses = max_poses 37 | self.ifp_features = ifp_features 38 | 39 | self.raw = {} 40 | 41 | def path(self, name, base=False, pv=None, pv2=None): 42 | if base: 43 | return '{}/{}'.format(self.root, name) 44 | 45 | if self.pv_root != self.root+'/docking': 46 | if pv1 is not None: 47 | pv = pv.replace(self.pv_root), self.root+'/single' 48 | if pv2 is not None: 49 | pv2 = pv2.replace(self.pv_root), self.root+'/single' 50 | 51 | # single features 52 | if name == 'rmsd': 53 | return pv.replace('_pv.maegz', '_rmsd.npy') 54 | elif name == 'gscore': 55 | return pv.replace('_pv.maegz', '_gscore.npy') 56 | elif name == 'name': 57 | return pv.replace('_pv.maegz', '_name.npy') 58 | elif name == 'ifp': 59 | suffix = '_ifp_{}.csv'.format(self.ifp_version) 60 | return pv.replace('_pv.maegz', suffix) 61 | 62 | # pair features 63 | elif name == 'shape': 64 | return f'{self.root}/shape.npy' 65 | elif name == 'mcss': 66 | return f'{self.root}/mcss.npy' 67 | else: 68 | return f'{self.root}/{name}.npy' 69 | 70 | def load_features(self): 71 | paths = glob(f'{self.root}/*.npy') 72 | for path in paths: 73 | name = path.split('/')[-1][:-4] 74 | self.raw[name] = np.load(path) 75 | 76 | def get_view(self, ligands, features): 77 | """ 78 | """ 79 | data = {} 80 | data['gscore'] = {} 81 | data['rmsd'] = {} 82 | for ligand in ligands: 83 | mask = self.raw['name1'] == ligand 84 | assert sum(mask) 85 | data['gscore'][ligand] = self.raw['gscore1'][mask] 86 | data['rmsd'][ligand] = self.raw['rmsd1'][mask] 87 | 88 | for feature in features: 89 | data[feature] = {} 90 | for i, ligand1 in enumerate(ligands): 91 | for ligand2 in ligands[i+1:]: 92 | mask1 = self.raw['name1'] == ligand1 93 | mask2 = self.raw['name1'] == ligand2 94 | data[feature][(ligand1, ligand2)] = self.raw[feature][mask1, :][:, mask2] 95 | 96 | return data 97 | 98 | def load_single_features(self, pvs, ligands=None): 99 | rmsds, gscores, poses, names, ifps = [], [], [], [], [] 100 | for pv in pvs: 101 | _rmsds = np.load(self.path('rmsd', pv=pv)) 102 | _gscores = np.load(self.path('gscore', pv=pv)) 103 | _names = np.load(self.path('name', pv=pv)) 104 | 105 | _ifps = pd.read_csv(self.path('ifp', pv=pv)) 106 | _ifps = [_ifps.loc[_ifps.pose==p] for p in range(max(_ifps.pose)+1)] 107 | 108 | with StructureReader(pv) as sts: 109 | protein = next(sts) 110 | _poses = [st for st in sts] 111 | 112 | keep = [] 113 | for i in range(len(_names)): 114 | if ((ligands == None or (_names[i] in ligands)) 115 | and sum(_names[:i] == _names[i]) < self.max_poses): 116 | keep += [i] 117 | rmsds += [_rmsds[keep]] 118 | gscores += [_gscores[keep]] 119 | names += [_names[keep]] 120 | poses += [_poses[i] for i in keep] 121 | ifps += [_ifps[i] for i in keep] 122 | 123 | rmsds = np.hstack(rmsds) 124 | names = np.hstack(names) 125 | gscores = np.hstack(gscores) 126 | return rmsds, gscores, poses, names, ifps 127 | 128 | def compute_single_features(self, pvs, native_poses): 129 | # For single features, there is no need to keep sub-sets of ligands 130 | # seperated, so just merge them at the outset to simplify the rest of 131 | # the method. 132 | if type(pvs[0]) == list: 133 | pvs = [pv for _pvs in pvs for pv in _pvs] 134 | 135 | pvs = [os.path.abspath(pv) for pv in pvs] 136 | 137 | print('Extracting glide scores.') 138 | for pv in pvs: 139 | out = self.path('gscore', pv=pv) 140 | if not os.path.exists(out): 141 | self.compute_gscore(pv, out) 142 | 143 | print('Extracting names.') 144 | for pv in pvs: 145 | out = self.path('name', pv=pv) 146 | if not os.path.exists(out): 147 | self.compute_name(pv, out) 148 | 149 | print('Computing RMSDs to native poses') 150 | for pv in pvs: 151 | out = self.path('rmsd', pv=pv) 152 | if not os.path.exists(out): 153 | self.compute_rmsd(pv, native_poses, out) 154 | 155 | print('Computing interaction fingerprints.') 156 | for pv in pvs: 157 | out = self.path('ifp', pv=pv) 158 | if not os.path.exists(out): 159 | self.compute_ifp(pv, out) 160 | 161 | def compute_pair_features(self, pvs, pvs2=None, ifp=True, shape=True, mcss=True): 162 | mkdir(self.root) 163 | rmsds1, gscores1, poses1, names1, ifps1 = self.load_single_features(pvs) 164 | out = self.path('rmsd1') 165 | np.save(out, rmsds1) 166 | out = self.path('gscore1') 167 | np.save(out, gscores1) 168 | out = self.path('name1') 169 | np.save(out, names1) 170 | if pvs2 == None: 171 | (rmsds2, gscores2, poses2, names2, ifps2 172 | ) = rmsds1, gscores1, poses1, names1, ifps1 173 | else: 174 | rmsds2, gscores2, poses2, names2, ifps2 = self.load_single_features(pvs2) 175 | out = self.path('rmsd2') 176 | np.save(out, rmsds2) 177 | out = self.path('gscore2') 178 | np.save(out, gscores2) 179 | out = self.path('name2') 180 | np.save(out, names2) 181 | 182 | if ifp: 183 | print('Computing interaction similarities.') 184 | for feature in self.ifp_features: 185 | out = self.path(feature) 186 | self.compute_ifp_pair(ifps1, ifps2, feature, out) 187 | 188 | if shape: 189 | print('Computing shape similarities.') 190 | out = self.path('shape') 191 | self.compute_shape(poses1, poses2, out) 192 | 193 | if mcss: 194 | print('Computing mcss similarities.') 195 | out = self.path('mcss') 196 | self.compute_mcss(poses1, poses2, out) 197 | 198 | # Methods to calculate features 199 | def compute_name(self, pv, out): 200 | names = [] 201 | with StructureReader(pv) as sts: 202 | next(sts) 203 | for st in sts: 204 | names += [st.property['s_m_title']] 205 | if len(names) == self.max_poses: 206 | break 207 | np.save(out, names) 208 | 209 | def compute_gscore(self, pv, out): 210 | gscores = [] 211 | with StructureReader(pv) as sts: 212 | next(sts) 213 | for st in sts: 214 | gscores += [st.property['r_i_docking_score']] 215 | if len(gscores) == self.max_poses: 216 | break 217 | np.save(out, gscores) 218 | 219 | def compute_rmsd(self, pv, native_poses, out): 220 | rmsds = [] 221 | with StructureReader(pv) as sts: 222 | protein = next(sts) 223 | for st in sts: 224 | name = st.property['s_m_title'] 225 | if name in native_poses: 226 | native = native_poses[name] 227 | try: 228 | conf_rmsd = ConformerRmsd(native, st).calculate() 229 | except: 230 | print(f'RMSD failed for {name}') 231 | conf_rmsd = -1 232 | else: 233 | conf_rmsd = -1 234 | rmsds += [conf_rmsd] 235 | np.save(out, rmsds) 236 | 237 | def compute_ifp(self, pv, out): 238 | from features.ifp import ifp 239 | settings = IFP[self.ifp_version] 240 | ifp(settings, pv, out, self.max_poses) 241 | 242 | def compute_ifp_pair(self, ifps1, ifps2, feature, out): 243 | from features.ifp_similarity import ifp_tanimoto 244 | tanimotos = ifp_tanimoto(ifps1, ifps2, feature) 245 | np.save(out, tanimotos) 246 | 247 | def compute_shape(self, poses1, poses2, out): 248 | from features.shape import shape 249 | # More efficient to have longer pose list provided as second argument. 250 | # This only matters for screening. 251 | sims = shape(poses2, poses1, version=self.shape_version).T 252 | np.save(out, sims) 253 | 254 | def compute_mcss(self, poses1, poses2, out): 255 | from features.mcss import mcss 256 | rmsds = mcss(poses1, poses2, self.mcss_file) 257 | np.save(out, rmsds) 258 | -------------------------------------------------------------------------------- /features/ifp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compute interaction fingerprints for poseviewer files. 3 | """ 4 | 5 | import tempfile 6 | import os 7 | import click 8 | import numpy as np 9 | import pandas as pd 10 | from rdkit.Chem import MolFromSmarts 11 | from rdkit.Chem.rdmolfiles import MaeMolSupplier 12 | import gzip 13 | 14 | ################################################################################ 15 | 16 | def _convert_mae(original_mae, converted_mae, poses): 17 | from schrodinger.structure import StructureReader, StructureWriter 18 | with StructureReader(original_mae) as sts, StructureWriter(converted_mae) as writer: 19 | for i, st in enumerate(sts): 20 | if i > poses: 21 | break 22 | for k in st.property.keys(): 23 | if 'title' not in k: 24 | st.property.pop(k) 25 | writer.append(st) 26 | 27 | def convert_mae(original_mae, converted_mae, poses=float('inf')): 28 | imp = 'import sys; sys.path.append("{}"); import ifp'.format(os.path.dirname(os.path.abspath(__file__))) 29 | cmd = 'ifp._convert_mae("{}", "{}", {})'.format(original_mae, converted_mae, poses) 30 | os.system('run python3 -c \'{};{}\''.format(imp, cmd)) 31 | 32 | ################################################################################ 33 | def resname(atom): 34 | info = atom.GetPDBResidueInfo() 35 | if info is None: 36 | return '' 37 | return ':'.join(map(lambda x: str(x).strip(), 38 | [info.GetChainId(), str(info.GetResidueNumber()), 39 | info.GetResidueName(), info.GetInsertionCode()])) 40 | 41 | def atomname(atom): 42 | pdb = atom.GetPDBResidueInfo() 43 | if pdb is None: 44 | return str(atom.GetIdx()) 45 | return pdb.GetName().strip() 46 | 47 | def coords(atom): 48 | return atom.GetOwningMol().GetConformer(0).GetAtomPosition(atom.GetIdx()) 49 | 50 | def centroid_coords(atoms): 51 | _coords = np.array([coords(atom) for atom in atoms]) 52 | _coords = _coords.mean(axis=0) 53 | return _coords 54 | 55 | def distance(atom1, atom2): 56 | return coords(atom1).Distance(coords(atom2)) 57 | 58 | def angle_atom(atom1, atom2, atom3): 59 | v1 = coords(atom1) - coords(atom2) 60 | v3 = coords(atom3) - coords(atom2) 61 | return v1.AngleTo(v3) * 180.0 / np.pi 62 | 63 | def angle_vector(v1, v2): 64 | v1 /= np.linalg.norm(v1) 65 | v2 /= np.linalg.norm(v2) 66 | angle = np.arccos(np.clip(np.dot(v1, v2), -1.0, 1.0)) 67 | angle *= 180 / np.pi 68 | if angle > 90: 69 | angle = 180 - angle 70 | assert 0 <= angle <= 90, angle 71 | return angle 72 | 73 | class Molecule: 74 | def __init__(self, mol, is_protein, settings): 75 | self.mol = mol 76 | self.is_protein = is_protein 77 | self.settings = settings 78 | 79 | self.contacts = self.init_contacts() 80 | self.hbond_donors, self.hbond_acceptors = self.init_hbond() 81 | self.charged, self.charge_groups = self.init_saltbridge() 82 | 83 | def init_contacts(self): 84 | coord, vdw, atom_name, res_name = [], [], [], [] 85 | for atom in self.mol.GetAtoms(): 86 | if atom.GetAtomicNum() not in self.settings['nonpolar']: continue 87 | coord += [coords(atom)] 88 | atom_name += [atomname(atom)] 89 | res_name += [resname(atom)] 90 | vdw += [self.settings['nonpolar'][atom.GetAtomicNum()]] 91 | 92 | if coord: 93 | coord = np.vstack(coord) 94 | vdw = np.array(vdw) 95 | return coord, vdw, res_name, atom_name 96 | 97 | def get_aromatic_rings(self): 98 | return [ring for ring in self.mol.GetRingInfo().AtomRings() 99 | if self.mol.GetAtomWithIdx(ring[0]).GetIsAromatic()] 100 | 101 | def get_centroid(self, atom_idx): 102 | atoms = [self.mol.GetAtomWithIdx(a) for a in atom_idx] 103 | return centroid_coords(atoms) 104 | 105 | def get_normal(self, ring): 106 | centroid = self.get_centroid(ring) 107 | coords1 = coords(self.mol.GetAtomWithIdx(ring[0])) - centroid 108 | coords2 = coords(self.mol.GetAtomWithIdx(ring[1])) - centroid 109 | 110 | normal = np.cross(coords1, coords2) 111 | normal /= np.linalg.norm(normal) 112 | return normal 113 | 114 | def init_hbond(self): 115 | donors = [atom for atom in self.mol.GetAtoms() if self._is_donor(atom)] 116 | acceptors = [atom for atom in self.mol.GetAtoms() if self._is_acceptor(atom)] 117 | return donors, acceptors 118 | 119 | def _is_donor(self, atom): 120 | if atom.GetAtomicNum() in [7, 8]: 121 | if _get_bonded_hydrogens(atom): 122 | return True 123 | return False 124 | 125 | def _is_acceptor(self, atom): 126 | if atom.GetAtomicNum() == 8: 127 | return True 128 | if atom.GetAtomicNum() == 7 and atom.GetExplicitValence() < 4: 129 | return True 130 | return False 131 | 132 | def init_saltbridge(self): 133 | charged = [atom for atom in self.mol.GetAtoms() 134 | if atom.GetFormalCharge() != 0] 135 | if self.is_protein: 136 | charge_groups = self._symmetric_charged_protein_atoms() 137 | else: 138 | charge_groups = self._symmetric_charged_ligand_atoms() 139 | return charged, charge_groups 140 | 141 | def _symmetric_charged_protein_atoms(self): 142 | protein_groups = {} 143 | for protein_atom in self.mol.GetAtoms(): 144 | if atomname(protein_atom) in ['OD1', 'OD2', 'OE1', 'OE2', 'NH1', 'NH2']: 145 | if resname(protein_atom) not in protein_groups: 146 | protein_groups[resname(protein_atom)] = [] 147 | protein_groups[(resname(protein_atom))] += [protein_atom] 148 | return protein_groups 149 | 150 | def _symmetric_charged_ligand_atoms(self): 151 | ligand_groups = {} 152 | smartss = [('[CX3](=O)[O-]', 2, [1, 2]), 153 | ('[CX3](=[NH2X3+])[NH2X3]', 1, [1, 2])] 154 | 155 | idx_to_atom = {atom.GetIdx(): atom for atom in self.mol.GetAtoms()} 156 | 157 | for smarts, k, v in smartss: 158 | mol = MolFromSmarts(smarts) 159 | matches = self.mol.GetSubstructMatches(mol) 160 | for match in matches: 161 | ligand_groups[match[k]] = [idx_to_atom[match[_v]] for _v in v] 162 | return ligand_groups 163 | 164 | ################################################################################ 165 | # Compute atom-level interactions 166 | 167 | def _get_bonded_hydrogens(atom): 168 | hydrogens = [] 169 | for bond in atom.GetBonds(): 170 | if bond.GetBeginAtomIdx() != atom.GetIdx(): 171 | hydrogen = bond.GetBeginAtom() 172 | else: 173 | hydrogen = bond.GetEndAtom() 174 | 175 | if hydrogen.GetAtomicNum() == 1: 176 | hydrogens += [hydrogen] 177 | return hydrogens 178 | 179 | def _hbond_hydrogen_angle(acceptor, donor): 180 | best_angle, best_hydrogen = 0, None 181 | for hydrogen in _get_bonded_hydrogens(donor): 182 | _angle = angle_atom(donor, hydrogen, acceptor) 183 | if _angle > best_angle: 184 | best_angle = _angle 185 | best_hydrogen = hydrogen 186 | return best_hydrogen, best_angle 187 | 188 | def _hbond_compute(donor_mol, acceptor_mol, settings, protein_is_donor): 189 | hbonds = [] 190 | for donor in donor_mol.hbond_donors: 191 | for acceptor in acceptor_mol.hbond_acceptors: 192 | for hydrogen in _get_bonded_hydrogens(donor): 193 | dist = distance(acceptor, hydrogen) 194 | if dist > settings['hbond_dist_cut']: continue 195 | angle = angle_atom(donor, hydrogen, acceptor) 196 | if angle < settings['hbond_angle_cut']: continue 197 | 198 | if protein_is_donor: 199 | label = 'hbond_donor' 200 | protein_atom = donor 201 | ligand_atom = acceptor 202 | else: 203 | label = 'hbond_acceptor' 204 | protein_atom = acceptor 205 | ligand_atom = donor 206 | 207 | hbonds += [{'label': label, 208 | 'protein_res': resname(protein_atom), 209 | 'protein_atom': atomname(protein_atom), 210 | 'ligand_atom': atomname(ligand_atom), 211 | 'dist': dist, 212 | 'angle': angle, 213 | 'hydrogen': atomname(hydrogen)}] 214 | return hbonds 215 | 216 | def hbond_compute(protein, ligand, settings): 217 | donor = _hbond_compute(protein, ligand, settings, True) 218 | acceptor = _hbond_compute(ligand, protein, settings, False) 219 | return acceptor + donor 220 | 221 | def saltbridge_compute(protein, ligand, settings): 222 | # Note that much of the complexity here stems from taking into account 223 | # symetric atoms. Specifically for carboxylate and guanidinium groups, 224 | # we consider not just the atom that is arbitrarily assigned a formal 225 | # charge, but also the atom that is charged in the other resonance 226 | # structure. 227 | 228 | saltbridges = [] 229 | for protein_atom in protein.charged: 230 | for ligand_atom in ligand.charged: 231 | lig_charge = ligand_atom.GetFormalCharge() 232 | protein_charge = protein_atom.GetFormalCharge() 233 | if lig_charge * protein_charge >= 0: continue 234 | 235 | # Expand protein_atom and ligand_atom to all symetric atoms 236 | # ... think carboxylates and guanidiniums. 237 | if ('saltbridge_resonance' in settings and 238 | ligand_atom.GetIdx() in ligand.charge_groups): 239 | ligand_atoms = ligand.charge_groups[ligand_atom.GetIdx()] 240 | else: 241 | ligand_atoms = [ligand_atom] 242 | 243 | if ('saltbridge_resonance' in settings and 244 | resname(protein_atom) in protein.charge_groups): 245 | protein_atoms = protein.charge_groups[resname(protein_atom)] 246 | else: 247 | protein_atoms = [protein_atom] 248 | 249 | # Get minimum distance between any pair of protein and ligand 250 | # atoms in the groups. 251 | dist = float('inf') 252 | for _ligand_atom in ligand_atoms: 253 | for _protein_atom in protein_atoms: 254 | _dist = distance(_protein_atom, _ligand_atom) 255 | if _dist < dist: 256 | dist = _dist 257 | closest_protein_atom = _protein_atom 258 | closest_ligand_atom = _ligand_atom 259 | 260 | if dist < settings['sb_dist_cut']: 261 | saltbridges += [{'label': 'saltbridge', 262 | 'protein_res': resname(closest_protein_atom), 263 | 'protein_atom': atomname(closest_protein_atom), 264 | 'ligand_atom': atomname(closest_ligand_atom), 265 | 'dist': dist}] 266 | return saltbridges 267 | 268 | def contact_compute(protein, ligand, settings): 269 | protein = protein.contacts 270 | ligand = ligand.contacts 271 | 272 | dists = protein[0].reshape(1, -1, 3) - ligand[0].reshape(-1, 1, 3) 273 | dists = np.linalg.norm(dists, axis=2) 274 | vdw = protein[1].reshape(1, -1) + ligand[1].reshape(-1, 1) 275 | contact_idx = np.argwhere(dists < vdw*settings['contact_scale_cut']) 276 | 277 | contacts = [] 278 | for i, j in contact_idx: 279 | contacts += [{'label': 'contact', 280 | 'protein_res': protein[2][j], 281 | 'protein_atom': protein[3][j], 282 | 'ligand_atom': ligand[3][i], 283 | 'dist': dists[i, j], 284 | 'vdw': vdw[i, j]}] 285 | return contacts 286 | 287 | ################################################################################ 288 | # Compute residue-level scores. 289 | 290 | def _piecewise(data, opt, cut): 291 | slope = 1 / (cut-opt) 292 | intercept = cut * slope 293 | 294 | data = intercept - slope * data 295 | data[data > 1] = 1 296 | data[data < 0] = 0 297 | return data 298 | 299 | def _groupby_subset(df, index, col): 300 | return df[index+[col]].groupby(index) 301 | 302 | def nodigits(s): 303 | return ''.join([i for i in s if not i.isdigit()]) 304 | 305 | def compute_scores(raw, settings): 306 | if settings['level'] == 'atom': 307 | raw['protein_res'] = [r['protein_res']+':'+nodigits(r['protein_atom']) 308 | for _, r in raw.iterrows()] 309 | 310 | scores = [] 311 | for label, group in raw.groupby('label'): 312 | group = group.copy() 313 | if label == 'contact': 314 | group['score'] = _piecewise(group['dist'] / group['vdw'], 315 | settings['contact_scale_opt'], 316 | settings['contact_scale_cut']) 317 | elif label == 'saltbridge': 318 | group['score'] = _piecewise(group['dist'], 319 | settings['sb_dist_opt'], 320 | settings['sb_dist_cut']) 321 | elif label in ['hbond_donor', 'hbond_acceptor']: 322 | group['score'] = ( _piecewise(group['dist'], 323 | settings['hbond_dist_opt'], 324 | settings['hbond_dist_cut']) 325 | * _piecewise(180 - group['angle'], 326 | settings['hbond_angle_opt'], 327 | settings['hbond_angle_cut'])) 328 | 329 | # One hydrogen bond per hydrogen 330 | if label == 'hbond_donor': 331 | idx = _groupby_subset(group, 332 | ['pose', 'protein_res', 'hydrogen'], 333 | 'score').idxmax() 334 | else: 335 | idx = _groupby_subset(group, 336 | ['pose', 'hydrogen'], 337 | 'score').idxmax() 338 | idx = idx['score'] 339 | group = group.loc[idx] 340 | group = _groupby_subset(group, ['pose', 'label', 'protein_res'], 'score').sum() 341 | scores += [group] 342 | return pd.concat(scores).sort_index() 343 | 344 | ################################################################################ 345 | 346 | def fingerprint(protein, ligand, settings): 347 | fp = hbond_compute(protein, ligand, settings) 348 | fp += saltbridge_compute(protein, ligand, settings) 349 | fp += contact_compute(protein, ligand, settings) 350 | return pd.DataFrame.from_dict(fp) 351 | 352 | def fingerprint_poseviewer(input_file, poses, settings): 353 | fps = [] 354 | with gzip.open(input_file) as fp: 355 | mols = MaeMolSupplier(fp, removeHs=False) 356 | protein = Molecule(next(mols), True, settings) 357 | 358 | for i, ligand in enumerate(mols): 359 | if i == poses: break 360 | if ligand is None: 361 | print('ligand unreadable') 362 | continue 363 | 364 | ligand = Molecule(ligand, False, settings) 365 | fps += [fingerprint(protein, ligand, settings)] 366 | fps[-1]['pose'] = i 367 | 368 | fps = pd.concat(fps, ignore_index=True, sort=False) 369 | if 'hydrogen' not in fps: 370 | fps['hydrogen'] = '' 371 | fps.loc[fps['hydrogen'].isna(), 'hydrogen'] = '' 372 | return fps 373 | 374 | def ifp(settings, input_file, output_file, poses, convert=False): 375 | settings['nonpolar'] = {6:1.7, 9:1.47, 17:1.75, 35:1.85, 53:1.98} 376 | 377 | if convert: 378 | temp = tempfile.NamedTemporaryFile(suffix='.maegz') 379 | convert_mae(input_file, temp.name, poses) 380 | input_file = temp.name 381 | 382 | # Compute atom-level interactions. 383 | fps = fingerprint_poseviewer(input_file, poses, settings) 384 | 385 | # Compute residue-level scores. 386 | scores = compute_scores(fps, settings) 387 | 388 | # Write to files 389 | fps = fps.set_index(['pose', 'label', 'protein_res', 'protein_atom', 'ligand_atom']) 390 | fps = fps.sort_index() 391 | base = output_file.split('.') 392 | base, ext = base[:-1], base[-1] 393 | raw_file = '.'.join(base) + '_raw.' + ext 394 | 395 | fps.to_csv(raw_file) 396 | scores.to_csv(output_file) 397 | 398 | @click.command() 399 | @click.argument('input_file') 400 | @click.argument('output_file') 401 | @click.argument('poses', default=100) 402 | @click.option('--convert', is_flag=True) 403 | @click.option('--level', default='residue') 404 | @click.option('--hbond_dist_cut', default=3.0) 405 | @click.option('--hbond_dist_opt', default=2.5) 406 | @click.option('--hbond_angle_cut', default=90.0) 407 | @click.option('--hbond_angle_opt', default=60.0) 408 | @click.option('--sb_dist_cut', default=5.0) 409 | @click.option('--sb_dist_opt', default=4.0) 410 | @click.option('--contact_scale_cut', default=1.75) 411 | @click.option('--contact_scale_opt', default=1.50) 412 | def main(input_file, output_file, poses, convert, **settings): 413 | ifp(settings, input_file, output_file, poses, convert) 414 | 415 | if __name__ == '__main__': 416 | main() 417 | -------------------------------------------------------------------------------- /features/ifp_similarity.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | def merge_hbonds(ifp): 5 | """ 6 | Reads IFP file and merges hbond acceptors and donors. 7 | 8 | Setting the label to hbond for the hbond_donors and hbond_acceptors while 9 | changing the residue names allows for only donor+donor or acceptor+acceptor 10 | to be counted as overlapping, but them to be merged into the same similarity 11 | measure. 12 | """ 13 | 14 | mask = ifp.label=='hbond_acceptor' 15 | ifp.loc[mask, 'protein_res'] = [res+'acceptor' for res in ifp.loc[mask, 'protein_res']] 16 | ifp.loc[mask, 'label'] = 'hbond' 17 | 18 | mask = ifp.label=='hbond_donor' 19 | ifp.loc[mask, 'protein_res'] = [res+'donor' for res in ifp.loc[mask, 'protein_res']] 20 | ifp.loc[mask, 'label'] = 'hbond' 21 | return ifp 22 | 23 | def ifp_tanimoto(ifps1, ifps2, feature): 24 | """ 25 | Computes the tanimoto distance between ifp1 and ifp2 for feature. 26 | """ 27 | if feature == 'hbond': 28 | ifps1 = [merge_hbonds(ifp) for ifp in ifps1] 29 | ifps2 = [merge_hbonds(ifp) for ifp in ifps2] 30 | 31 | ifps1 = [ifp.loc[ifp.label == feature] for ifp in ifps1] 32 | ifps2 = [ifp.loc[ifp.label == feature] for ifp in ifps2] 33 | 34 | ifps1 = [ifp.set_index('protein_res') for ifp in ifps1] 35 | ifps2 = [ifp.set_index('protein_res') for ifp in ifps2] 36 | 37 | sims = np.zeros((len(ifps1), len(ifps2))) 38 | for i, ifp1 in enumerate(ifps1): 39 | for j, ifp2 in enumerate(ifps2): 40 | total = ifp1['score'].sum() + ifp2['score'].sum() 41 | overlap = ifp1.join(ifp2, rsuffix='_2', how='inner') 42 | overlap = overlap['score']**0.5 * overlap['score_2']**0.5 43 | overlap = overlap.sum() 44 | 45 | sims[i, j] = (1 + overlap) / (2 + total - overlap) 46 | return sims 47 | -------------------------------------------------------------------------------- /features/mcss.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import numpy as np 3 | import subprocess 4 | import os 5 | from schrodinger.structure import StructureReader, StructureWriter, SmilesStructure 6 | from schrodinger.structutils.rmsd import ConformerRmsd 7 | from schrodinger.structutils.analyze import generate_smiles 8 | from schrodinger.structutils.analyze import evaluate_smarts_canvas 9 | 10 | def mcss(sts1, sts2, mcss_types_file): 11 | """ 12 | Computes rmsd between mcss for atoms in two poseviewer files. 13 | 14 | Returns a (# poses in pv1) x (# poses in pv2) np.array of rmsds. 15 | """ 16 | memo = {} 17 | sts1 = [merge_halogens(st.copy()) for st in sts1] 18 | sts2 = [merge_halogens(st.copy()) for st in sts2] 19 | 20 | rmsds = [] 21 | for j, st1 in enumerate(sts1): 22 | smi1 = generate_smiles(st1) 23 | n_st1_atoms = n_atoms(st1) 24 | rmsds += [np.zeros(len(sts2)) + float('inf')] 25 | for i, st2 in enumerate(sts2): 26 | smi2 = generate_smiles(st2) 27 | n_st2_atoms = n_atoms(st2) 28 | 29 | if (smi1, smi2) in memo: 30 | mcss, n_mcss_atoms = memo[(smi1, smi2)] 31 | else: 32 | mcss = compute_mcss(st1, st2, mcss_types_file) 33 | # Capitalizing is useful to prevent invalid structures. 34 | # This structure isn't used for anything other than 35 | # counting atoms. 36 | mcss_st = SmilesStructure(mcss['st1'][0].split(',')[0].upper()).get2dStructure() 37 | n_mcss_atoms = n_atoms(mcss_st) 38 | memo[(smi1, smi2)] = (mcss, n_mcss_atoms) 39 | memo[(smi2, smi1)] = ({'st1': mcss['st2'], 'st2': mcss['st1']}, n_mcss_atoms) 40 | 41 | if (2*n_mcss_atoms <= min(n_st1_atoms, n_st2_atoms) 42 | or n_mcss_atoms <= 10): 43 | continue 44 | 45 | rmsds[-1][i] = compute_mcss_rmsd(st1, st2, mcss) 46 | 47 | return np.vstack(rmsds) 48 | 49 | def compute_mcss_rmsd(st1, st2, mcss): 50 | """ 51 | Compute minimum rmsd between mcss(s). 52 | 53 | Takes into account that there can be multiple mcss smarts patterns ( 54 | i.e. two patterns that are the same size) and each smarts pattern could 55 | map to multiple atom indices (e.g. symetric groups). 56 | """ 57 | rmsd = float('inf') 58 | for smarts1, smarts2 in zip(mcss['st1'], mcss['st2']): 59 | atom_idxs1 = evaluate_smarts_canvas(st1, smarts1) 60 | atom_idxs2 = evaluate_smarts_canvas(st2, smarts2) 61 | 62 | for atom_idx1 in atom_idxs1: 63 | for atom_idx2 in atom_idxs2: 64 | _rmsd = calculate_rmsd(st1, st2, atom_idx1, atom_idx2) 65 | rmsd = min(_rmsd, rmsd) 66 | return rmsd 67 | 68 | def compute_mcss(st1, st2, mcss_types_file): 69 | """ 70 | Compute smarts patterns for mcss(s) between two structures. 71 | """ 72 | cmd = "$SCHRODINGER/utilities/canvasMCS -imae {} -ocsv {} -stop 10 -atomtype C {}" 73 | with tempfile.TemporaryDirectory() as wd: 74 | mae = wd+'/temp.maegz' 75 | csv = wd+'/temp.csv' 76 | 77 | st1.title = 'st1' 78 | st2.title = 'st2' 79 | stwr = StructureWriter(mae) 80 | stwr.append(st1) 81 | stwr.append(st2) 82 | stwr.close() 83 | 84 | r = subprocess.run(cmd.format(os.path.basename(mae), os.path.basename(csv), 85 | os.path.abspath(mcss_types_file)), 86 | cwd=wd, shell=True, stderr=subprocess.PIPE) 87 | 88 | 89 | # mcss can fail with memory usage error, generally when macrocycles 90 | # are present. just skip such cases. 91 | failed = 'memory usage' in str(r.stderr) 92 | 93 | if not failed: 94 | assert os.path.exists(csv) 95 | mcss = {'st1': [], 'st2': []} 96 | with open(csv) as fp: 97 | fp.readline() 98 | for line in fp: 99 | lig = line.strip().split(',')[1] 100 | smarts = line.strip().split(',')[-1] 101 | mcss[lig] += [smarts] 102 | else: 103 | print('mcss failed') 104 | mcss = {'st1': ['C'], 'st2': ['C']} 105 | return mcss 106 | 107 | def calculate_rmsd(pose1, pose2, atom_idx1, atom_idx2): 108 | """ 109 | Calculates the RMSD between the atoms atom_idx1 in pose1 110 | and the atoms atom_idx2 in pose2. 111 | 112 | pose1, pose2: schrodinger.structure 113 | atom_idx1, atom_idx2: [int, ...] 114 | merge_halogens: If true then change the atomic number of all halogens 115 | to 9 (the atomic number of flourine) before computing 116 | rmsds. This allows for MCSS that treat all halogens 117 | the same. 118 | """ 119 | substructure1 = pose1.extract(atom_idx1) 120 | substructure2 = pose2.extract(atom_idx2) 121 | try: 122 | calc = ConformerRmsd(substructure1, substructure2) 123 | calc.use_heavy_atom_graph = True 124 | calc.renumber_structures = True 125 | rmsd = calc.calculate() 126 | except: 127 | # This is necessary because there is a bug in the 128 | # Schrodinger software that results in incorrect 129 | # atom indices being used when the heavy_atom_graph 130 | # is used. That being said, the above is more reliable 131 | # than the below, so should be tried first. 132 | calc = ConformerRmsd(substructure1, substructure2) 133 | calc.use_heavy_atom_graph = True 134 | calc.renumber_structures = False 135 | rmsd = calc.calculate() 136 | return rmsd 137 | 138 | def merge_halogens(structure): 139 | """ 140 | Sets atomic number for all halogens to be that for flourine. 141 | This enable use of ConformerRmsd for atom typing schemes that 142 | merge halogens. 143 | """ 144 | for atom in structure.atom: 145 | if atom.atomic_number in [9, 17, 35, 53]: 146 | atom.atomic_number = 9 147 | return structure 148 | 149 | def n_atoms(st): 150 | return sum(atom.element != 'H' for atom in st.atom) 151 | -------------------------------------------------------------------------------- /features/mcss16.typ: -------------------------------------------------------------------------------- 1 | ; Atom type definitions for use with CanvasMCS 2 | 3 | ; Atoms with the same atomic number are equivalent. 4 | ; All bonds are equivalent. 5 | ; Distinguish some carbons. 6 | 7 | ; Application of atom typing rules, least specific to most specific. 8 | 9 | ; Current RMSD calculation only works if all classes have same atomic number. 10 | 11 | [*] > 1 ; 12 | [#1] > 2 ; 13 | [#6] > 3 ; 14 | [#6;r5;Cx4] > 4 ; 15 | [#6;r6] > 4 ; 16 | c1ccccc1 > 5 ; 17 | [CR0] > 6 ; 18 | [#7] > 7 ; 19 | [#7;r5] > 12 ; 20 | [#8] > 13 ; 21 | O=* > 14 ; 22 | [#8;r5] > 15 ; 23 | [#15] > 17 ; 24 | [#16] > 18 ; 25 | [#16;r5] > 19 ; 26 | [#9] > 20 ; 27 | [#17] > 20 ; 28 | [#35] > 20 ; 29 | [#53] > 20 ; 30 | 31 | -------------------------------------------------------------------------------- /features/shape.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import tempfile 4 | import numpy as np 5 | from schrodinger.structure import StructureReader, StructureWriter 6 | 7 | CMD = '$SCHRODINGER/shape_screen -shape {poses1} -screen {poses2} {typing} {norm} -distinct -inplace -NOJOBID' 8 | 9 | def write_and_name(conformers, fname): 10 | titles = [] 11 | with StructureWriter(fname) as writer: 12 | for i, st in enumerate(conformers): 13 | st = st.copy() 14 | assert '-conf-' not in st.title 15 | st.title = st.title + '-conf-{}'.format(i) 16 | writer.append(st) 17 | titles += [st.title] 18 | return titles 19 | 20 | def shape(conformers1, conformers2, version='pharm_max'): 21 | typing, norm = version.split('_') 22 | 23 | if typing == 'pharm': 24 | typing = '-pharm' 25 | elif typing == 'mmod': 26 | typing = '-atomtypes mmod' 27 | elif typing == 'element': 28 | typing = '-atomtypes element' 29 | elif typing == 'qsar': 30 | typing = '-atomtypes qsar' 31 | else: 32 | assert False, 'Typing {} not supported.'.format(typing) 33 | 34 | if norm == 'max': 35 | norm = '-norm 1' 36 | elif norm == 'min': 37 | norm = '-norm 2' 38 | else: 39 | assert False, 'Norm {} not supported.'.format(norm) 40 | 41 | with tempfile.TemporaryDirectory() as wd: 42 | poses1 = wd+'/poses1.maegz' 43 | poses2 = wd+'/poses2.maegz' 44 | output = wd+'/poses1_align.maegz' 45 | log = wd+'/poses1_shape.log' 46 | 47 | ligands1 = write_and_name(conformers1, poses1) 48 | ligands2 = write_and_name(conformers2, poses2) 49 | 50 | 51 | cmd = CMD.format(poses1=os.path.basename(poses1), 52 | poses2=os.path.basename(poses2), 53 | typing=typing, norm=norm) 54 | print(cmd) 55 | subprocess.run(cmd, shell=True, cwd=wd) 56 | print('subprocess complete') 57 | 58 | if not os.path.exists(output): 59 | with open(log) as fp: 60 | txt = fp.read() 61 | assert 'Reference shape must contain at least 3 spheres' in txt, txt 62 | return 0.5*np.ones((len(ligands1), len(ligands2))) 63 | 64 | sims = np.zeros((len(ligands1), len(ligands2))) 65 | with StructureReader(output) as sts: 66 | for k, st in enumerate(sts): 67 | i = k % len(ligands1) 68 | j = int(st.title.split('-conf-')[-1]) 69 | sims[i, j] = st.property['r_phase_Shape_Sim'] 70 | return sims 71 | -------------------------------------------------------------------------------- /features/test/3ZPR_lig-to-2VT4_pv.maegz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/features/test/3ZPR_lig-to-2VT4_pv.maegz -------------------------------------------------------------------------------- /features/test/6IBL-to-2VT4_pv.maegz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/features/test/6IBL-to-2VT4_pv.maegz -------------------------------------------------------------------------------- /features/test/__pycache__/ifp_test.cpython-36-pytest-5.2.4.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/features/test/__pycache__/ifp_test.cpython-36-pytest-5.2.4.pyc -------------------------------------------------------------------------------- /features/test/ifp_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import ifp 3 | import gzip 4 | from rdkit.Chem.rdmolfiles import MaeMolSupplier 5 | 6 | 7 | settings = {'version' : 'rd1', 8 | 'level' : 'residue', 9 | 'hbond_dist_opt' : 2.5, 10 | 'hbond_dist_cut' : 3.0, 11 | 'hbond_angle_opt' : 60.0, 12 | 'hbond_angle_cut' : 90.0, 13 | 'sb_dist_opt' : 4.0, 14 | 'sb_dist_cut' : 5.0, 15 | 'contact_scale_opt' : 1.25, 16 | 'contact_scale_cut' : 1.75, 17 | 'pipi_dist_cut' : 7.0, 18 | 'pipi_dist_opt' : 7.0, 19 | 'pipi_norm_norm_angle_cut' : 30.0, 20 | 'pipi_norm_centroid_angle_cut' : 45.0, 21 | 'pipi_t_dist_cut': 6.0, 22 | 'pipi_t_dist_opt': 5.0, 23 | 'pipi_t_norm_norm_angle_cut': 60.0, 24 | 'pipi_t_norm_centroid_angle_cut': 45.0} 25 | 26 | settings['nonpolar'] = {6:1.7, 9:1.47, 17:1.75, 35:1.85, 53:1.98} 27 | 28 | with gzip.open('test/3ZPR_lig-to-2VT4_pv.maegz') as fp: 29 | mols = MaeMolSupplier(fp, removeHs=False) 30 | protein = ifp.Molecule(next(mols), True, settings) 31 | ligands = [ifp.Molecule(mol, False, settings) for mol in mols] 32 | 33 | settings['nonpolar'] = {6:1.7, 9:1.47, 17:1.75, 35:1.85, 53:1.98} 34 | def test_version(): 35 | import rdkit 36 | assert rdkit.__version__ == '2020.03.1' 37 | 38 | def test_hydrogenbond(): 39 | i = ifp.hbond_compute(protein, ligands[0], settings) 40 | assert len(i) == 2 41 | 42 | def test_saltbridge_none(): 43 | i = ifp.saltbridge_compute(protein, ligands[0], settings) 44 | assert len(i) == 0 45 | 46 | def test_saltbridge_one(): 47 | i = ifp.saltbridge_compute(protein, ligands[3], settings) 48 | assert len(i) == 1 49 | 50 | def test_contact(): 51 | ifp.contact_compute(protein, ligands[0], settings) 52 | 53 | def test_pipi_tstack(): 54 | i = ifp.pipi_compute(protein, ligands[0], settings) 55 | assert len(i) == 4 56 | i = ifp.pipi_compute(protein, ligands[3], settings) 57 | assert len(i) == 4 58 | 59 | def test_pipi_pstack(): 60 | i = ifp.pipi_compute(protein, ligands[173], settings) 61 | print(i) 62 | assert len(i) == 4 63 | 64 | i = ifp.pipi_compute(protein, ligands[180], settings) 65 | print(i) 66 | assert len(i) == 2 67 | 68 | def test_pipi_tstack_6IBL(): 69 | 70 | with gzip.open('test/6IBL-to-2VT4_pv.maegz') as fp: 71 | mols = MaeMolSupplier(fp, removeHs=False) 72 | protein = ifp.Molecule(next(mols), True, settings) 73 | ligands = [ifp.Molecule(mol, False, settings) for mol in mols] 74 | i = ifp.pipi_compute(protein, ligands[3], settings) 75 | assert len(i) == 2 76 | 77 | #ifp.fingerprint_poseviewer('test/pv.maegz', 100, settings) 78 | -------------------------------------------------------------------------------- /pymol/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/pymol/.DS_Store -------------------------------------------------------------------------------- /pymol/interactions.py: -------------------------------------------------------------------------------- 1 | from pymol import cmd 2 | import sys 3 | import pandas as pd 4 | 5 | def style(): 6 | cmd.show('cartoon') 7 | cmd.show('lines') 8 | cmd.hide('sticks') 9 | cmd.util.cbaw() 10 | cmd.color('slate', 'het and element C') 11 | cmd.hide('everything', 'element H and (element C extend 1)') 12 | 13 | def pose_name(group, pose): 14 | if pose == 0: 15 | pose = group.split('-')[0] 16 | elif pose < 10: 17 | pose = '_0{}'.format(pose) 18 | else: 19 | pose = '_{}'.format(pose) 20 | return '{}*.*{}'.format(group, pose) 21 | 22 | def enable(group, pose, prot=True): 23 | cmd.enable(group+'*-to-*') 24 | cmd.disable(group+'*-to-*.*') 25 | cmd.enable(pose_name(group, pose)) 26 | if prot: 27 | cmd.enable('{}*.*_prot'.format(group)) 28 | 29 | def show_interactions(ifp_file, interaction, lig, pose, delete=True, disable=True): 30 | pose = int(pose) 31 | 32 | if delete: 33 | cmd.delete('dist*') 34 | cmd.delete('ps*') 35 | if disable: 36 | cmd.disable('*') 37 | style() 38 | 39 | enable(lig, pose) 40 | 41 | if interaction == 'all': 42 | for interaction in ['sb', 'hbond', 'contact', 'pipi']: 43 | show_interactions(ifp_file, interaction, lig, pose, 44 | delete=False, disable=False) 45 | 46 | df = pd.read_csv(ifp_file) 47 | 48 | if interaction == 'hbond': 49 | idx = df['label'] == 'hbond_acceptor' 50 | idx |= df['label'] == 'hbond_donor' 51 | thresh = 3.5 52 | color = 'yellow' 53 | elif interaction == 'sb': 54 | idx = df['label'] == 'saltbridge' 55 | thresh = 4 56 | color = 'magenta' 57 | elif interaction == 'contact': 58 | idx = df['label'] == 'contact' 59 | thresh = 1.25 60 | color='smudge' 61 | elif interaction == 'pipi': 62 | idx = df['label'] == 'pipi' 63 | idx |= df['label'] == 'pi-t' 64 | thresh = 7.0 65 | color='green' 66 | 67 | idx &= df['pose'] == pose 68 | 69 | for i, row in df[idx].iterrows(): 70 | if interaction == 'contact' and row['dist'] > thresh*row['vdw']: continue 71 | if interaction != 'contact' and row['dist'] > thresh: continue 72 | 73 | chain, resid, _, _ = row['protein_res'].split(':') 74 | prot = '{}*.*prot and chain {} and resid {} and name {}'.format(lig, 75 | chain, 76 | resid, 77 | row['protein_atom'].replace(',', '+')) 78 | ligand = '{} and name {}'.format(pose_name(lig, pose), row['ligand_atom'].replace(',', '+')) 79 | 80 | print(prot, ligand) 81 | 82 | cmd.pseudoatom('ps{}{}prot'.format(interaction, i), prot) 83 | cmd.pseudoatom('ps{}{}lig'.format(interaction, i), ligand) 84 | 85 | cmd.dist('dist'+interaction+str(i), 86 | 'ps{}{}prot'.format(interaction, i), 87 | 'ps{}{}lig'.format(interaction, i)) 88 | cmd.color(color, 'dist'+interaction+str(i)) 89 | 90 | cmd.set('dash_width', 6) 91 | cmd.set('dash_width', 3, 'distcontact*') 92 | 93 | cmd.enable('{}*.*prot'.format(lig)) 94 | cmd.enable(pose_name(lig, pose)) 95 | cmd.enable(lig) 96 | 97 | cmd.extend('show_interactions', show_interactions) 98 | -------------------------------------------------------------------------------- /pymol/view_complexes.py: -------------------------------------------------------------------------------- 1 | from pymol import cmd 2 | from glob import glob 3 | 4 | def load_complexes(protein, n=1): 5 | n = int(n) 6 | for prot in sorted(glob('{}/structures/proteins/*_prot.mae'.format(protein)))[:n]: 7 | pdb = prot.split('/')[-1].split('_')[0] 8 | load_crystal_protein(protein, pdb) 9 | load_crystal_pose(protein, pdb) 10 | 11 | cmd.util.cbao("prot_*") 12 | cmd.util.cbay("het and crystal_*") 13 | cmd.show('sticks', "het and crystal_*") 14 | cmd.hide('lines', 'element h') 15 | cmd.show('spheres', 'het and prot* and (crystal* expand 5)') 16 | 17 | cmd.show('cartoon') 18 | cmd.set('cartoon_oval_length', '0.5') 19 | cmd.set('cartoon_transparency', '0.5') 20 | cmd.hide('everything', 'element H and not (element N+O extend 1)') 21 | cmd.hide('everything', 'name H') 22 | 23 | def load_crystal_protein(protein, ligand): 24 | cmd.load('{}/structures/proteins/{}_prot.mae'.format(protein, ligand)) 25 | cmd.set_name('{}_prot'.format(ligand), 'prot_{}'.format(ligand)) 26 | 27 | def load_crystal_pose(protein, ligand): 28 | cmd.load('{}/structures/ligands/{}_lig.mae'.format(protein, ligand, ligand)) 29 | cmd.set_name('{}_lig'.format(ligand), 'crystal_{}'.format(ligand)) 30 | 31 | ################################################################### 32 | def load_pose(protein, ligand, struct, pose, prefix): 33 | pv = glob('{}/docking/{}/{}*{}/*pv.maegz'.format(protein, 'confgen_es4', ligand, struct))[0] 34 | name = pv.split('/')[-1].split('.')[0] 35 | 36 | cmd.load(pv) 37 | cmd.ungroup('*') 38 | if pose == 0: 39 | obj = '{}.{}_lig'.format(name, ligand) 40 | elif pose < 10: 41 | obj = '{}.{}_lig_0{}'.format(name, ligand, pose) 42 | else: 43 | obj = '{}.{}_lig_{}'.format(name, ligand, pose) 44 | cmd.set_name(obj, '{}_{}'.format(prefix, ligand)) 45 | cmd.delete(name + '*') 46 | 47 | def load_top_glide(protein, n = 1): 48 | n = int(n) 49 | grid = None 50 | for prot in sorted(glob('{}/structures/proteins/*_prot.mae'.format(protein)))[:n]: 51 | pdb = prot.split('/')[-1].split('_')[0] 52 | if grid is None: 53 | grid = pdb 54 | print(pdb) 55 | load_pose(protein, pdb, grid, 0, 'glide') 56 | cmd.show('sticks', "glide_*") 57 | cmd.hide('lines', 'element h') 58 | cmd.hide('everything', 'element H and not (element N+O extend 1)') 59 | 60 | def load_results(protein, scores): 61 | struct = glob('{}/docking/grids/*'.format(protein))[0].split('/')[-1] 62 | load_crystal_protein(protein, struct) 63 | with open(scores) as fp: 64 | fp.readline() 65 | for line in fp: 66 | if line[:3] == 'com': continue 67 | (ligand, 68 | combind_rank, combind_rmsd, 69 | glide_rank, glide_rmsd, 70 | best_rank, best_rmsd) = line.strip().split(',') 71 | ligand = ligand.replace('_lig', '') 72 | load_pose(protein, ligand, struct, int(combind_rank), 'combind') 73 | load_pose(protein, ligand, struct, int(glide_rank), 'glide') 74 | if ligand[:6] != 'CHEMBL': 75 | load_crystal_pose(protein, ligand) 76 | 77 | cmd.show('sticks', "glide_*") 78 | cmd.show('sticks', "combind_*") 79 | cmd.show('sticks', "crystal_*") 80 | cmd.hide('lines', 'element h') 81 | cmd.hide('everything', 'element H and not (element N+O extend 1)') 82 | 83 | cmd.util.cbaw('*') 84 | cmd.color('yellow', 'glide* and element c') 85 | cmd.color('cyan', 'combind* and element c') 86 | cmd.set('stick_radius', '0.13') 87 | 88 | 89 | cmd.show('cartoon') 90 | cmd.set('cartoon_oval_length', '0.5') 91 | cmd.set('cartoon_transparency', '0.5') 92 | 93 | ############################################################### 94 | 95 | def parse_fp_file(fp_file): 96 | ifps = {} 97 | try: 98 | with open(fp_file) as f: 99 | pose_num = 0 100 | for line in f: 101 | if line.strip() == '': continue 102 | if line[:4] == 'Pose': 103 | pose_num = int(line.strip().split(' ')[1]) 104 | ifps[pose_num] = {} 105 | continue 106 | sc_key, sc = line.strip().split('=') 107 | i,r,ss = sc_key.split('-') 108 | i = int(i) 109 | sc = float(sc) 110 | prev_sc = ifps[(i, r)] if (i,r) in ifps[pose_num] else 0 111 | ifps[pose_num][(i,r)] = max(prev_sc, sc) 112 | 113 | except Exception as e: 114 | print(e) 115 | print(fp_file, 'fp not found') 116 | if len(ifps) == 0: 117 | print('check', fp_file) 118 | return {} 119 | return ifps 120 | 121 | def show_interactions(protein, ligand, struct, ifp, pose): 122 | ifp_file = '{}/ifp/{}/{}_lig-to-{}-confgen_es4.fp'.format(protein, ifp, ligand, struct) 123 | print(ifp_file) 124 | ifp = parse_fp_file(ifp_file)[int(pose)] 125 | cmd.hide('labels') 126 | cmd.set('label_size', 50) 127 | for (i, r), score in ifp.items(): 128 | if i not in [2, 3]: continue 129 | if score < 0.5: continue 130 | res = r.split(':')[1].split('(')[0] 131 | cmd.label('{}/ca'.format(res), score) 132 | 133 | cmd.extend('load_complexes', load_complexes) 134 | cmd.extend('load_top_glide', load_top_glide) 135 | cmd.extend('load_results', load_results) 136 | cmd.extend('show_interactions', show_interactions) 137 | -------------------------------------------------------------------------------- /pymol/view_poses.py: -------------------------------------------------------------------------------- 1 | from pymol import cmd 2 | import sys 3 | 4 | def read_results(fname): 5 | data = {} 6 | with open(fname) as fp: 7 | fp.readline() 8 | for line in fp: 9 | if line[:3] == 'com': continue 10 | print(line.strip().split(',')) 11 | ligand, combind, _, glide, _, best, _ = line.strip().split(',') 12 | 13 | data[ligand] = (int(combind), 14 | int(glide), 15 | int(best) if best != 'None' else None) 16 | return data 17 | 18 | def pose(poseviewer, pose_number, label = 'pose'): 19 | name = '.'.join(poseviewer.split('/')[-1].split('.')[:-1]) 20 | struct = name.split('-to-')[-1].split('_')[0] + '_prot' 21 | ligand = name.split('-to-')[0] 22 | if pose_number == 0: 23 | pose_number = '' 24 | elif pose_number < 10: 25 | pose_number = '0' + str(pose_number) 26 | else: 27 | pose_number = str(pose_number) 28 | print(name, struct, ligand, pose_number) 29 | cmd.load(poseviewer) 30 | cmd.split_states(name) 31 | cmd.delete(name) 32 | 33 | pose_name = label + '_' + ligand 34 | grid_name = 'grid_' + ligand 35 | cmd.set_name(ligand + pose_number, pose_name) 36 | cmd.set_name(struct, grid_name) 37 | cmd.delete(ligand + '*') 38 | 39 | cmd.show_as('cartoon', grid_name) 40 | cmd.show_as('sticks', pose_name) 41 | 42 | def results(fname, protein, struct, docking): 43 | data = read_results(fname) 44 | for ligand, (combind, glide, best) in data.items(): 45 | print(ligand) 46 | poseviewer = '{}/docking/{}/{}-to-{}/{}-to-{}_pv.maegz'.format(protein, docking, 47 | ligand, struct, 48 | ligand, struct) 49 | pose(poseviewer, combind, 'combind') 50 | pose(poseviewer, glide, 'glide') 51 | 52 | cmd.util.cbam('combind*') 53 | cmd.util.cbag('glide*') 54 | cmd.util.cbaw('grid*') 55 | cmd.hide('sticks', 'element h and not (element o+n extend 1)') 56 | 57 | cmd.group('glide', 'glide*') 58 | cmd.group('combind', 'combind*') 59 | cmd.group('grid', 'grid*') 60 | 61 | 62 | cmd.extend('pose', pose) 63 | cmd.extend('results', results) 64 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # ComBind 2 | 3 | ComBind integrates data-driven modeling and physics-based docking for 4 | improved binding pose prediction and binding affinity prediction. 5 | 6 | Given the chemical structures of several ligands that can bind 7 | a given target protein, ComBind solves for a set of poses, one per ligand, that 8 | are both highly scored by physics-based docking and display similar interactions 9 | with the target protein. ComBind quantifies this vague notion of "similar" by 10 | considering a diverse training set of protein complexes and computing the 11 | overlap between protein–ligand interactions formed by distinct ligands when 12 | they are in their correct poses, as compared to when they are in randomly 13 | selected poses. To predict binding affinities, poses are predicted for 14 | the known binders using ComBind, and then the candidate molecule is scored 15 | according to the ComBind score w.r.t. the selected poses. 16 | 17 | ## Predicting poses for known binders 18 | 19 | First, see instructuctions for software installation at the bottom of this page. 20 | 21 | Running ComBind can be broken into several components: data curation, 22 | data preparation (including docking), featurization of docked poses, 23 | and the ComBind scoring itself. 24 | 25 | Note that if you already have docked poses for your molecules of interest, you 26 | can proceed to the featurization step. If you are knowledgable about your target 27 | protein, you may well be able to get better docking results by manually 28 | preparing the data than would be obtained using the automated procedure 29 | implemented here. 30 | 31 | ### Curation of raw data 32 | 33 | To produce poses for a particular protein, you'll need to provide a 3D structure 34 | of the target protein and chemical structures of ligands to dock. 35 | 36 | These raw inputs need to be properly stored so that the rest of the pipeline 37 | can recognize them. 38 | 39 | The structure(s) should be stored in a directory `structures/raw`. 40 | Each structure should be split into two files `NAME_prot.mae` and `NAME_lig.mae` 41 | containing only the protein and only the ligand, respectively. 42 | 43 | If you'd prefer to prepare your structures yourself, save your 44 | prepared files to `structures/proteins` and `structures/ligands`. Moreover, 45 | you could even just begin with a Glide docking grid which you prepared yourself 46 | by placing it in `docking/grids`. 47 | 48 | Ligands should be specified in a csv file with a header line containing at 49 | least the entries "ID" and "SMILES", specifying the ligand name and the ligand 50 | chemical structure. 51 | 52 | ### Data preparation and docking 53 | 54 | Use the following command to prepare the structural data using Schrodinger's 55 | prepwizard, align the structures to each other, and produce a docking grid. 56 | 57 | ``` 58 | combind structprep 59 | ``` 60 | 61 | In parallel, you can prepare the ligand data using the following command. 62 | By default, the ligands will be written to seperate files (one ligand per file). 63 | You can specify the `--multiplex` flag to write all of the ligands to the same 64 | file. 65 | 66 | ``` 67 | combind ligprep ligands.csv 68 | ``` 69 | 70 | Once the docking grid and ligand data have been prepared, you can run the 71 | docking. The arguments to the dock command are a list of ligand files to be 72 | docked. By default, the docking grid is the alphabetically first grid present 73 | in `structures/grids`; use the `--grid` option to specify a different grid. 74 | 75 | ``` 76 | combind dock ligands/*/*.maegz 77 | ``` 78 | 79 | ### Featurization 80 | 81 | ``` 82 | combind featurize features docking/*/*_pv.maegz 83 | ``` 84 | 85 | ### Pose prediction with ComBind 86 | 87 | ``` 88 | combind pose-prediction features poses.csv 89 | ``` 90 | 91 | Optionally, you can extract the poses selected by ComBind to a single file. 92 | The resulting file will contain the protein structure followed by one pose (the 93 | one selected by ComBind) for each ligand. 94 | 95 | ``` 96 | combind extract-top-poses poses.csv docking/*/*_pv.maegz 97 | ``` 98 | 99 | ## ComBindVS 100 | 101 | To run virtual screening using ComBindVS, you must begin with a structure of the 102 | target protein, a set of helper ligands, and a library of compounds to screen. 103 | 104 | The first two steps, which can be done in parallel, are to determine poses for 105 | the helper ligands using ComBind and to produce an initial set of docked poses 106 | for the library to be screened. Then, ComBindVS can be 107 | 108 | ### Use ComBind to solve for poses of a set of helper ligands 109 | 110 | Use ComBind to predict poses for the known binders and extract the selected 111 | poses to a single file, as described above. In the below, we'll assume that this 112 | file is named `helpers_pv.maegz` 113 | 114 | ### Dock the library to be screened 115 | 116 | The library to be screened can be docked the same way as described above, 117 | but here it is highly recommended that you use the `--multiplex` option during 118 | ligprep (to write all the compounds to one file) and the `--screen` option 119 | during docking, which will limit the number of poses per compound to 30 and 120 | not used enhanced pose sampling. 121 | 122 | ``` 123 | combind ligprep library.csv --multiplex 124 | combind dock ligands/library/library.maegz --screen 125 | ``` 126 | 127 | ### ComBindVS 128 | 129 | To compute the ComBind scores for each pose, we need to compute the pairwise] 130 | features between each candidate pose to the helper ligand poses. 131 | 132 | ``` 133 | combind featurize --no-mcss --screen --max-poses 100000 features_screen docking/library-to-grid/library-to-grid_pv.maegz helpers_pv.maegz 134 | ``` 135 | 136 | With these features in hand, you can then compute the ComBind scores. The ComBind 137 | scores for each pose will be written to the indicated numpy file (here screen.npy). 138 | 139 | ``` 140 | combind screen screen.npy features_screen 141 | ``` 142 | 143 | It is often convenient to apply the scores to the original poseviewer file and 144 | use existing schrodinger utilities to sort the results. 145 | 146 | ``` 147 | combind apply-scores docking/library-to-grid/library-to-grid_pv.maegz screen.npy combind_scores_added_pv.maegz 148 | $SCHRODINGER/utilities/glide_sort -best_by_title -use_prop_d r_i_combind_score -o combind_pv.maegz combind_scores_added_pv.maegz 149 | ``` 150 | 151 | ## Benchmarking data 152 | 153 | See `stats_data/pdbs_for_benchmark.csv` for a list of PDBs used for benchmarking 154 | ComBind. The "query" column gives the PDB for the ligand being docked, the 155 | "grid" column gives the structure the query is docked to, and the "mcss<0.5" 156 | column indicates whether the query ligand shares a common substructure with 157 | the co-crystal ligand in the structure being docked to. 158 | 159 | See `stats_data/structures.tar.gz` for the raw structural data used for 160 | benchmarking ComBind. 161 | 162 | See `stats_data/helper_best_affinity_diverse.csv` and `stats_data/helper_best_mcss.csv` 163 | for a list of the "helper ligands" used when benchmarking ComBind. Each row 164 | lists a query ligand and one helper ligand; all the entries for each query ligand 165 | should be aggregrated. (Most query ligands have 20 associated helper ligands.) 166 | 167 | ## Installation 168 | 169 | Start by cloning this git repository. 170 | 171 | ComBind requires access to Glide along with several other Schrodinger tools 172 | and the Schrodinger Python API. 173 | 174 | First, make sure that you have a SCHRODINGER environmental variable set 175 | pointing to the root of the schrodinger software installation. 176 | 177 | You can only access the Schrodinger Python API using their interpretter. 178 | Creating a virtual environment that makes their interpretter the default 179 | python interpretter is the simplest way to do this. To create the environment 180 | and upgrade the relevant packages run the following: 181 | 182 | ``` 183 | $SCHRODINGER/run schrodinger_virtualenv.py schrodinger.ve 184 | source schrodinger.ve/bin/activate 185 | pip install --upgrade numpy sklearn scipy pandas lmdb 186 | ``` 187 | 188 | To setup the environment before each use, run 189 | `source schrodinger.vs/bin/activate` to activate the environment and then 190 | run `source setup.sh` to set combind specific environmental variables. 191 | -------------------------------------------------------------------------------- /score/density_estimate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class DensityEstimate: 4 | """ 5 | Computes and stores density estimates f(x) for input values x. 6 | 7 | DensityEstimates are stored in the form of a probability density 8 | to get a value in the form of counts at a position, just multiply 9 | by n_samples. 10 | 11 | DensityEstimates can be averaged. 12 | """ 13 | def __init__(self, points=100, domain=None, sd=1.0, 14 | reflect=True, out_of_bounds=None): 15 | """ 16 | points (int): number of values at which to compute density. 17 | domain ((float, float)): range of values at which to compute density 18 | if left as None, use min and max of input data. 19 | sd (float): standard deviation of gaussian kernel. 20 | reflect (bool): If True compute density for domain + left and right 21 | flanks, then reflect flanks and add to center. (This provides 22 | better behaviour at boundaries.) 23 | out_of_bounds (None or float): If asked to return a density for a 24 | value outside the domain, if None, return closest density, else 25 | return this value. 26 | """ 27 | self.points = points 28 | self.out_of_bounds = out_of_bounds 29 | self.sd = sd 30 | self.reflect = reflect 31 | self.domain = domain 32 | self.n_samples = 0 33 | 34 | # I/O 35 | # File format: first line -> n_samples, sd, reflect, remaining -> x, fx 36 | def __str__(self): 37 | return '\n'.join([','.join([str(self.n_samples), str(self.sd), 38 | str(self.reflect)])] 39 | + ['{},{}'.format(_x, _fx) 40 | for _x, _fx in zip(self.x, self.fx)]) 41 | 42 | def write(self, fname): 43 | with open(fname, 'w') as fp: 44 | fp.write(str(self)) 45 | 46 | @classmethod 47 | def read(cls, fname): 48 | x, fx = [], [] 49 | with open(fname) as fp: 50 | n_samples, sd, reflect = fp.readline().strip().split(',') 51 | for line in fp: 52 | _x, _fx = line.strip().split(',') 53 | x += [float(_x)] 54 | fx += [float(_fx)] 55 | de = DensityEstimate(points = len(x), 56 | sd = float(sd), 57 | reflect = bool(reflect)) 58 | de.n_samples=float(n_samples) 59 | de.x, de.fx = np.array(x), np.array(fx) 60 | return de 61 | 62 | # Core methods 63 | def __call__(self, x): 64 | """ 65 | Returns f(x) for the given value of x by linear interpolation. 66 | If x is out of functions domain, return the closest response 67 | and print a warning if self.out_of_bounds is None or else 68 | self.out_of_bounds. 69 | """ 70 | return np.interp(x, self.x, self.fx) 71 | 72 | def fit(self, X, weights=1): 73 | """ 74 | Given an array of values X and weights weights, 75 | compute a density estimate with standard deviation self.sd. 76 | If reflect, compute densities for each flank and add 77 | computed densities back to the center. 78 | If hist, normalize so that area under the curve is equal to 79 | """ 80 | if self.domain is None: 81 | self.x = np.linspace(X.min(), X.max(), self.points) 82 | else: 83 | self.x = np.linspace(self.domain[0], self.domain[1], self.points) 84 | 85 | if not X.shape[0]: 86 | return self._uniform() 87 | 88 | if self.reflect: 89 | if X.max() > self.x[-1] or X.min() < self.x[0]: 90 | print('Warning: Data out of domain of density estimate' 91 | ' with reflected boundary conditions. Squishing' 92 | ' data to be on specified domain.') 93 | X[X > self.x[-1]] = self.x[-1] 94 | X[X < self.x[0]] = self.x[0] 95 | r = self.x[-1] - self.x[0] 96 | self.x = np.hstack([self.x-r, self.x, self.x+r]) 97 | 98 | self._kde(X, weights) 99 | 100 | if self.reflect: 101 | # left, center, right 102 | self.fx = ( self.fx[self.points:0:-1] 103 | + self.fx[self.points:2*self.points] 104 | + self.fx[-1:2*self.points-1:-1]) 105 | self.x = self.x[self.points:2*self.points] 106 | 107 | self.fx *= (self.x.shape[0] / (self.x[-1]-self.x[0])) / self.fx.sum() 108 | self.n_samples = (weights*np.ones(X.shape)).sum() 109 | return self 110 | 111 | def data_loglikelihood(self, X, weights=1): 112 | return np.sum(np.log(self(X))*weights) 113 | 114 | def _gauss(self, mean, x): 115 | """ 116 | Return PDF of N(mean, sd**2) at x. 117 | """ 118 | return (np.exp(-0.5*((x - mean)/self.sd)**2) 119 | / (self.sd*np.sqrt(2*np.pi))) 120 | 121 | def _kde(self, X, weights): 122 | """ 123 | Returns density estimate at each point in self.x for the input data X 124 | weighted by weights. 125 | """ 126 | self.fx = [] 127 | for mean in self.x: 128 | kernel = self._gauss(mean, X) 129 | self.fx += [(weights*kernel).sum()] 130 | self.fx = np.array(self.fx) 131 | 132 | def _uniform(self): 133 | self.n_samples = 0 134 | self.fx = np.ones(self.x.shape) 135 | self.fx /= self.x[-1]-self.x[0] 136 | return self 137 | 138 | def _average(self, other): 139 | """ 140 | Returns a new function representing the average of the self and other 141 | functions. The domain of the new function covers the domain of both 142 | input functions with the same number of points as self. 143 | """ 144 | assert self.reflect == other.reflect, "Either reflect or don't." 145 | if not other.n_samples: 146 | return self 147 | if not self.n_samples: 148 | return other 149 | 150 | de = DensityEstimate(points = self.points, 151 | out_of_bounds=self.out_of_bounds, 152 | reflect = self.reflect) 153 | de.x = np.linspace(min(self.x[0], other.x[0]), 154 | max(self.x[-1], other.x[-1]), self.points) 155 | de.n_samples = self.n_samples + other.n_samples 156 | de.fx = (self(de.x)*self.n_samples + other(de.x)*other.n_samples) / de.n_samples 157 | return de 158 | 159 | @classmethod 160 | def merge(cls, des): 161 | out = des[0] 162 | for de in des[1:]: 163 | out = out._average(de) 164 | return out 165 | -------------------------------------------------------------------------------- /score/pose_prediction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class PosePrediction: 4 | """ 5 | Compute sets of poses that optimize the ComBind scoring function. 6 | 7 | ligands ([str, ]): names of ligands fow which to predict poses. 8 | features ([str, ]): names of features for computing similarity scores. 9 | data ({}): Raw data. 10 | stats ({feature: {'native': score.DensityEstimate, 11 | 'reference': score.DensityEstimate}}) 12 | alpha (float): Factor to which to weight the glide scores. 13 | 14 | max_poses (int): largest number of poses for any ligand. 15 | single (np.array, # ligands) 16 | pair (np.array, # ligands x # ligands x max_poses x max_poses) 17 | """ 18 | def __init__(self, ligands, features, data, stats, alpha): 19 | self.ligands = ligands 20 | self.features = features 21 | self.data = data 22 | self.stats = stats 23 | self.alpha = float(alpha) 24 | 25 | self.max_poses = self._get_max_poses() 26 | self.single = self._get_single() 27 | self.pair = self._get_pair() 28 | 29 | def _get_max_poses(self): 30 | """ 31 | Get largest number of poses present for any ligand. 32 | """ 33 | return max(len(self.data['gscore'][ligand]) for ligand in self.ligands) 34 | 35 | def _get_single(self): 36 | """ 37 | Transform docking scores into a # ligands x # poses array. 38 | 39 | * Scale docking scores by - self.alpha * 40 | """ 41 | single = [self.data['gscore'][ligand] for ligand in self.ligands] 42 | single = [self.pad(x, self.max_poses) for x in single] 43 | single = np.vstack(single) 44 | single *= -self.alpha 45 | return single 46 | 47 | def _get_pair(self): 48 | """ 49 | Transform pairwise similarities into pairwise energy terms stored 50 | in a # ligands x # ligands x max_poses x max_poses array. 51 | 52 | For each pairwise feature, compute the log ratio of feature 53 | likelihood in the native v. reference distribution. 54 | 55 | Sum over feature types to get a single energy term for each pose pair. 56 | """ 57 | pair = np.zeros((len(self.ligands), len(self.ligands), 58 | self.max_poses, self.max_poses)) 59 | for i, ligand1 in enumerate(self.ligands): 60 | for j, ligand2 in enumerate(self.ligands[i+1:]): 61 | j += i+1 62 | for feature in self.features: 63 | stats = self.stats[feature] 64 | raw = self.data[feature][(ligand1, ligand2)] 65 | 66 | if raw[0, 0] == float('inf'): 67 | # Features should either all be tehre or all be absent. 68 | assert np.all(raw == float('inf')) 69 | continue 70 | 71 | energy = np.log(stats['native'](raw)) - np.log(stats['reference'](raw)) 72 | energy = self.pad(energy, self.max_poses, self.max_poses) 73 | pair[i, j] += energy 74 | pair[j, i] += energy.T 75 | return pair 76 | 77 | def max_posterior(self, max_iterations, restart): 78 | """ 79 | Compute (probably) globally optimal pose set. 80 | 81 | Perform coordinant ascent from "restart" random initial configurations. 82 | 83 | max_iterations (int): Maximum number of iterations to attempt before exiting. 84 | restart (int): Number of times to run the optimization 85 | """ 86 | if len(self.ligands) == 1: 87 | return {self.ligands[0]: 0} 88 | 89 | best_score, best_poses = -float('inf'), None 90 | for i in range(restart): 91 | if i == 0: 92 | poses = {lig: 0 for lig in self.ligands} 93 | else: 94 | poses = {lig: np.random.randint(self.max_poses) 95 | for lig in self.ligands} 96 | 97 | poses = self.optimize_poses(poses, max_iterations) 98 | score = self.log_posterior(poses) 99 | if score > best_score: 100 | best_score = score 101 | best_poses = poses.copy() 102 | 103 | print(poses) 104 | print('run {}, score {}'.format(i, score)) 105 | return best_poses 106 | 107 | def optimize_poses(self, poses, max_iterations): 108 | """ 109 | Find (local) optimum by performing coordinate ascent starting from 110 | "poses". 111 | 112 | poses ({ligand_name: current pose number, }) 113 | max_iterations (int): 114 | """ 115 | for _ in range(max_iterations): 116 | update = False 117 | for query in np.random.permutation(list(poses.keys())): 118 | plp = self.partial_log_posterior(poses, query) 119 | best_pose = np.argmax(plp) 120 | if best_pose != poses[query]: 121 | update = True 122 | poses[query] = best_pose 123 | if not update: 124 | break 125 | else: 126 | print('Maximum iteractions reached.') 127 | return poses 128 | 129 | def partial_log_posterior(self, poses, query): 130 | """ 131 | Returns the terms of the log posterior involving "query". 132 | 133 | poses ({ligand_name: current pose number, }) 134 | query (ligand_name) 135 | """ 136 | iposes = {self.ligands.index(lig): pose 137 | for lig, pose in poses.items() 138 | if lig != query} 139 | iquery = self.ligands.index(query) 140 | 141 | plp = 0 142 | plp += self.single[iquery, :] 143 | for lig, pose in iposes.items(): 144 | plp += self.pair[iquery, lig, :, pose] / len(iposes) 145 | return plp 146 | 147 | def log_posterior(self, poses): 148 | """ 149 | Returns the log posterior for pose cluster. 150 | 151 | poses ({ligand_name: current pose number, }) 152 | """ 153 | iposes = [(self.ligands.index(lig), pose) for lig, pose in poses.items()] 154 | lp = 0 155 | for lig, pose in iposes: 156 | lp += self.single[lig, pose] 157 | 158 | for i, (lig1, pose1) in enumerate(iposes): 159 | for lig2, pose2 in iposes[i+1:]: 160 | lp += self.pair[lig1, lig2, pose1, pose2] / (len(poses)-1) 161 | return lp 162 | 163 | def pad(self, x, shape1, shape2=0, C=float('inf')): 164 | """ 165 | Expand array to ("shape1",) if 1D or ("shape1", "shape2") if 2D 166 | and fill missing values with "C". 167 | """ 168 | if len(x.shape) == 1: 169 | y = np.zeros(shape1)+C 170 | y[:x.shape[0]] = x[:shape1] 171 | elif len(x.shape) == 2: 172 | y = np.zeros((shape1, shape2)) 173 | y[:x.shape[0], :x.shape[1]] = x[:shape1, :shape2] 174 | else: 175 | assert False 176 | return y 177 | -------------------------------------------------------------------------------- /score/screen.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from utils import np_load 4 | from schrodinger.structure import StructureReader, StructureWriter 5 | 6 | def load_features_screen(features, root): 7 | single = np.load(f'{root}/gscore1.npy') 8 | 9 | raw = {} 10 | for feature in features: 11 | raw[feature] = np_load(f'{root}/{feature}.npy') 12 | return single, raw 13 | 14 | def scores_to_csv(pv, out): 15 | """ 16 | Write docking and ComBind scores to text. 17 | """ 18 | titles, glide, combind = [], [], [] 19 | with StructureReader(pv) as reader: 20 | next(reader) 21 | for st in reader: 22 | titles += [st.title] 23 | glide += [st.property['r_i_docking_score']] 24 | combind += [st.property['r_i_combind_score']] 25 | 26 | df = pd.DataFrame(np.vstack([titles, glide, combind]).T, 27 | columns = ['ID', 'GLIDE', 'COMBIND']) 28 | df.to_csv(out, index=False) 29 | 30 | def apply_scores(pv, scores, out): 31 | """ 32 | Add ComBind screening scores to a poseviewer. 33 | """ 34 | 35 | scores = np.load(scores) 36 | 37 | with StructureReader(pv) as reader, StructureWriter(out) as writer: 38 | st = next(reader) 39 | st.property['r_i_combind_score'] = 1000.0 40 | writer.append(st) 41 | for st, score in zip(reader, scores): 42 | st.property['r_i_combind_score'] = score 43 | writer.append(st) 44 | 45 | def screen(single, raw, stats, alpha): 46 | energies = {} 47 | for feature in raw: 48 | _raw = raw[feature] 49 | _stats = stats[feature] 50 | energies[feature] = ( np.log(_stats['native'](_raw)) 51 | - np.log(_stats['reference'](_raw))) 52 | 53 | pair_energy = 0 54 | for feature, energy in energies.items(): 55 | pair_energy += energy 56 | 57 | n = pair_energy.shape[1] 58 | 59 | alpha /= 0.5 * n / (1 + (n-1)*0.5) 60 | 61 | pair_energy = pair_energy.mean(axis=1) 62 | combind_energy = pair_energy/alpha - single 63 | return combind_energy 64 | -------------------------------------------------------------------------------- /score/statistics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from score.density_estimate import DensityEstimate 3 | from features.features import Features 4 | from glob import glob 5 | import os 6 | import pandas as pd 7 | 8 | def read_stats(stats_root, features): 9 | stats = {} 10 | for dist in ['native', 'reference']: 11 | for interaction in features: 12 | fname = '{}/{}_{}.txt'.format(stats_root, dist, interaction) 13 | assert os.path.exists(fname), fname 14 | if interaction not in stats: stats[interaction] = {} 15 | stats[interaction][dist] = DensityEstimate.read(fname) 16 | return stats 17 | 18 | def pair_features(protein, data_root, pairs_root): 19 | interactions = ['hbond', 'saltbridge', 'contact', 'shape', 'mcss'] 20 | features = Features(data_root + '/' + protein, max_poses=100) 21 | features.load_features(interactions) 22 | features = features.get_view() 23 | 24 | # get cross-docked ligands 25 | ligands = [] 26 | for ligand in sorted(features['gscore'].keys()): 27 | if 'native' in ligand: continue 28 | lig, grid = ligand.split('-to-') 29 | if '_lig' in lig: 30 | lig = lig.replace('_lig', '') 31 | if lig != grid and 'CHEMBL' not in lig: 32 | ligands += [ligand] 33 | 34 | df = [] 35 | for i, ligand1 in enumerate(ligands): 36 | for ligand2 in ligands[i+1:]: 37 | for r1 in range(len(features['gscore'][ligand1])): 38 | for r2 in range(len(features['gscore'][ligand2])): 39 | feats = [features[interaction][(ligand1, ligand2)][r1, r2] 40 | for interaction in interactions] 41 | gscore1 = features['gscore'][ligand1][r1] 42 | gscore2 = features['gscore'][ligand2][r2] 43 | rmsd1 = features['rmsd'][ligand1][r1] 44 | rmsd2 = features['rmsd'][ligand2][r2] 45 | df += [[protein, 46 | ligand1, ligand2, 47 | r1, r2, 48 | gscore1, gscore2, 49 | rmsd1, rmsd2] 50 | + feats] 51 | df = pd.DataFrame(df, columns=['protein', 52 | 'ligand1', 'ligand2', 53 | 'rank1', 'rank2', 54 | 'gscore1', 'gscore2', 55 | 'rmsd1', 'rmsd2'] 56 | +interactions) 57 | df.to_csv('{}/{}.csv'.format(pairs_root, protein), index=False) 58 | 59 | def compute_stats(protein, pairs_root, stats_root, features): 60 | df = pd.read_csv('{}/{}.csv'.format(pairs_root, protein)) 61 | for feature in features: 62 | if feature == 'mcss': 63 | sd = 0.03*6 64 | domain = (0, 6) 65 | else: 66 | sd = 0.03 67 | domain = (0, 1) 68 | 69 | nat_vals = df.loc[(df.rmsd1 <= 2.0)&(df.rmsd2 <= 2.0), feature] 70 | ref_vals = df.loc[:, feature] 71 | nat = DensityEstimate(domain=domain, sd=sd).fit(nat_vals) 72 | ref = DensityEstimate(domain=domain, sd=sd).fit(ref_vals) 73 | nat.write('{}/{}/native_{}.de'.format(stats_root, protein, feature)) 74 | ref.write('{}/{}/reference_{}.de'.format(stats_root, protein, feature)) 75 | 76 | def merge_stats(proteins, stats_root, merged_stats_fname, features): 77 | for feature in features: 78 | nat_des, ref_des = [], [] 79 | for protein in proteins: 80 | nat_fname = '{}/{}/native_{}.de'.format(stats_root, protein, feature) 81 | ref_fname = '{}/{}/reference_{}.de'.format(stats_root, protein, feature) 82 | nat_des += [DensityEstimate.read(nat_fname)] 83 | ref_des += [DensityEstimate.read(ref_fname)] 84 | 85 | nat_fname = merged_stats_fname.format('native', feature) 86 | ref_fname = merged_stats_fname.format('reference', feature) 87 | DensityEstimate.merge(nat_des).write(nat_fname) 88 | DensityEstimate.merge(ref_des).write(ref_fname) 89 | -------------------------------------------------------------------------------- /score/tests/__pycache__/test_density_estimate.cpython-36-pytest-5.2.4.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/score/tests/__pycache__/test_density_estimate.cpython-36-pytest-5.2.4.pyc -------------------------------------------------------------------------------- /score/tests/__pycache__/test_lig_pair.cpython-36-pytest-5.2.4.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/score/tests/__pycache__/test_lig_pair.cpython-36-pytest-5.2.4.pyc -------------------------------------------------------------------------------- /score/tests/__pycache__/test_pose_pair.cpython-36-pytest-5.2.4.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/score/tests/__pycache__/test_pose_pair.cpython-36-pytest-5.2.4.pyc -------------------------------------------------------------------------------- /score/tests/__pycache__/test_prob_opt.cpython-36-pytest-5.2.4.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/score/tests/__pycache__/test_prob_opt.cpython-36-pytest-5.2.4.pyc -------------------------------------------------------------------------------- /score/tests/test_density_estimate.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import sys 4 | import numpy as np 5 | 6 | from score.density_estimate import DensityEstimate 7 | 8 | def test_average(): 9 | de1 = DensityEstimate(points = 3, domain = (0, 2)) 10 | de1.n_samples = 5 11 | de1.x = np.array([0, 1, 2]) 12 | de1.fx = np.array([0, 0, 0]) 13 | 14 | de2 = DensityEstimate(points = 3, domain = (0, 2)) 15 | de2.n_samples = 15 16 | de2.x = np.array([0, 1, 2]) 17 | de2.fx = np.array([1, 1, 1]) 18 | 19 | avg = de1._average(de2) 20 | assert np.all(avg.fx == [0.75, 0.75, 0.75]) 21 | 22 | def test_average_zero(): 23 | de1 = DensityEstimate(points = 3, domain = (0, 2)) 24 | de1.n_samples = 0 25 | de1.x = np.array([0, 1, 2]) 26 | de1.fx = np.array([0, 0, 0]) 27 | 28 | de2 = DensityEstimate(points = 3, domain = (0, 2)) 29 | de2.n_samples = 15 30 | de2.x = np.array([0, 1, 2]) 31 | de2.fx = np.array([1, 1, 1]) 32 | 33 | avg = de1._average(de2) 34 | 35 | assert np.all(avg.fx == [1, 1, 1]) 36 | 37 | def test_merge(): 38 | de1 = DensityEstimate(points = 3, domain = (0, 2)) 39 | de1.n_samples = 5 40 | de1.x = np.array([0, 1, 2]) 41 | de1.fx = np.array([0, 0, 0]) 42 | 43 | de2 = DensityEstimate(points = 3, domain = (0, 2)) 44 | de2.n_samples = 15 45 | de2.x = np.array([0, 1, 2]) 46 | de2.fx = np.array([1, 1, 1]) 47 | 48 | de3 = DensityEstimate(points = 3, domain = (0, 2)) 49 | de3.n_samples = 0 50 | de3.x = np.array([0, 1, 2]) 51 | de3.fx = np.array([2, 2, 2]) 52 | 53 | merged = DensityEstimate.merge([de1, de2, de3]) 54 | 55 | assert np.all(merged.fx == [0.75, 0.75, 0.75]) 56 | -------------------------------------------------------------------------------- /score/tests/test_lig_pair.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the LigPair class. 3 | """ 4 | 5 | import pytest 6 | from score.pairs import LigPair 7 | from containers import Ligand, Pose 8 | 9 | 10 | def create_ligand(name, fps): 11 | ligand = Ligand(name, {}, {}) 12 | ligand.poses = [] 13 | for fp in fps: 14 | ligand.poses += [Pose(0, 0, fp)] 15 | return ligand 16 | 17 | def test_empty_tanimoto(): 18 | lig1 = create_ligand('lig1', [{}]*5) 19 | lig2 = create_ligand('lig2', [{}]*5) 20 | 21 | lp = LigPair(lig1, lig2, {'sb': [1], 'hbond': [2]}, None, 4) 22 | lp.init_pose_pairs() 23 | 24 | assert len(lp.pose_pairs) == 16 25 | 26 | for i in range(4): 27 | for j in range(4): 28 | assert lp.get_feature('sb', i, j) == 1/2 29 | assert lp.get_feature('hbond', i, j) == 1/2 30 | 31 | def test_max_one_tanimoto(): 32 | lig1 = create_ligand('lig1', [{(1, 23): 1.0}, {(1, 23): 0.5}, {}]) 33 | lig2 = create_ligand('lig2', [{}, {(1, 23): 0.5}, {(1, 23): 1.0}]) 34 | 35 | lp = LigPair(lig1, lig2, {'sb': [1], 'hbond': [2]}, None, 4) 36 | lp.init_pose_pairs() 37 | 38 | assert len(lp.pose_pairs) == 9 39 | 40 | assert lp.get_feature('sb', 0, 0) == 1/3 41 | assert lp.get_feature('sb', 1, 0) == 1/2.5 42 | assert lp.get_feature('sb', 2, 0) == 1/2 43 | assert lp.get_feature('sb', 0, 1) == (1+0.5**0.5) / (3.5 - 0.5**0.5) 44 | assert lp.get_feature('sb', 1, 1) == 1.5/2.5 45 | assert lp.get_feature('sb', 2, 1) == 1/2.5 46 | assert lp.get_feature('sb', 0, 2) == 2/3 47 | assert lp.get_feature('sb', 1, 2) == (1+0.5**0.5) / (3.5 - 0.5**0.5) 48 | assert lp.get_feature('sb', 2, 2) == 1/3 49 | 50 | for i in range(3): 51 | for j in range(3): 52 | assert lp.get_feature('hbond', i, j) == 1/2 53 | 54 | def test_max_less_than_one_tanimoto(): 55 | lig1 = create_ligand('lig1', [{(1, 23): 0.9}, {(1, 23): 0.5}, {}]) 56 | lig2 = create_ligand('lig2', [{}, {(1, 23): 0.5}, {(1, 23): 1.0}]) 57 | 58 | lp = LigPair(lig1, lig2, {'sb': [1], 'hbond': [2]}, None, 4) 59 | lp.init_pose_pairs() 60 | 61 | assert len(lp.pose_pairs) == 9 62 | 63 | assert lp.get_feature('sb', 0, 0) == 1/2.9 64 | assert lp.get_feature('sb', 1, 0) == 1/2.5 65 | assert lp.get_feature('sb', 2, 0) == 1/2 66 | assert lp.get_feature('sb', 0, 1) == (1 + (0.9*0.5)**0.5) / (3.4 - (0.9*0.5)**0.5) 67 | assert lp.get_feature('sb', 1, 1) == 1.5 / 2.5 68 | assert lp.get_feature('sb', 2, 1) == 1 / 2.5 69 | assert lp.get_feature('sb', 0, 2) == (1 + .9**0.5) / (3.9 - .9**0.5) 70 | assert lp.get_feature('sb', 1, 2) == (1+0.5**0.5) / (3.5 - 0.5**0.5) 71 | assert lp.get_feature('sb', 2, 2) == 1/3 72 | 73 | for i in range(3): 74 | for j in range(3): 75 | assert lp.get_feature('hbond', i, j) == 1/2 76 | 77 | def test_max_greater_than_one_tanimoto(): 78 | lig1 = create_ligand('lig1', [{(1, 23): 1.0, (1, 20): 1.0}, 79 | {(1, 23): 0.5}, 80 | {(1, 20): 1.0}]) 81 | lig2 = create_ligand('lig2', [{}, 82 | {(1, 23): 0.5}, 83 | {(1, 23): 1.0, (1, 20): 1.0}]) 84 | 85 | lp = LigPair(lig1, lig2, {'sb': [1], 'hbond': [2]}, None, 4) 86 | lp.init_pose_pairs() 87 | 88 | assert len(lp.pose_pairs) == 9 89 | 90 | assert lp.get_feature('sb', 0, 0) == 1/4 91 | assert lp.get_feature('sb', 1, 0) == 1/2.5 92 | assert lp.get_feature('sb', 2, 0) == 1/3 93 | assert lp.get_feature('sb', 0, 1) == (1 + 0.5**0.5) / (4.5 - 0.5**0.5) 94 | assert lp.get_feature('sb', 1, 1) == 1.5 / 2.5 95 | assert lp.get_feature('sb', 2, 1) == 1/3.5 96 | assert lp.get_feature('sb', 0, 2) == 3/4 97 | assert lp.get_feature('sb', 1, 2) == (1 + 0.5**0.5)/ (4.5 - 0.5**0.5) 98 | assert lp.get_feature('sb', 2, 2) == 2/4 99 | 100 | for i in range(3): 101 | for j in range(3): 102 | assert lp.get_feature('hbond', i, j) == 1/2 103 | -------------------------------------------------------------------------------- /score/tests/test_pose_pair.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from score.pairs import PosePair 3 | from containers import Pose 4 | 5 | features = {'sb': [1], 'hbond': [2], 'contact': [11]} 6 | 7 | def pose(rmsd=0.0, gscore=0.0, fp={}): 8 | return Pose(rmsd, gscore, fp) 9 | 10 | def test_correct_both(): 11 | pose1 = pose(rmsd=1.0) 12 | pose2 = pose(rmsd=1.4) 13 | pp = PosePair(pose1, pose2, 0.0, features, 2.0) 14 | 15 | assert pp.correct() == 1.0 16 | 17 | def test_correct_one_1(): 18 | pose1 = pose(rmsd=1.0) 19 | pose2 = pose(rmsd=2.0+1.0) 20 | pp = PosePair(pose1, pose2, 0.0, features, 2.0) 21 | 22 | assert pp.correct() == 0.0 23 | 24 | def test_correct_one_2(): 25 | pose1 = pose(rmsd=2.0+1.0) 26 | pose2 = pose(rmsd=1.4) 27 | pp = PosePair(pose1, pose2, 0.0, features, 2.0) 28 | 29 | assert pp.correct() == 0.0 30 | 31 | def test_correct_one_neither(): 32 | pose1 = pose(rmsd=2.0+1.0) 33 | pose2 = pose(rmsd=2.0+0.1) 34 | pp = PosePair(pose1, pose2, 0.0, features, 2.0) 35 | 36 | assert pp.correct() == 0.0 37 | 38 | def test_get_feature_empty(): 39 | pose1 = pose(fp={}) 40 | pose2 = pose(fp={}) 41 | pp = PosePair(pose1, pose2, 0.0, features, 2.0) 42 | 43 | assert pp.overlap('sb') == 0.0 44 | assert pp.overlap('hbond') == 0.0 45 | assert pp.overlap('contact') == 0.0 46 | assert pp.mcss_score == 0.0 47 | 48 | def test_get_feature_single(): 49 | pose1 = pose(fp={(1, 23): 1.0}) 50 | pose2 = pose(fp={(1, 23): 1.0}) 51 | pp = PosePair(pose1, pose2, 4.0, features, 2.0) 52 | 53 | assert pp.overlap('sb') == 1.0 54 | assert pp.overlap('hbond') == 0.0 55 | assert pp.overlap('contact') == 0.0 56 | assert pp.mcss_score == 4.0 57 | 58 | def test_get_feature_mismatch(): 59 | pose1 = pose(fp={(1, 2): 1.0}) 60 | pose2 = pose(fp={(1, 23): 1.0}) 61 | pp = PosePair(pose1, pose2, 1.0, features, 2.0) 62 | 63 | assert pp.overlap('sb') == 0.0 64 | assert pp.overlap('hbond') == 0.0 65 | assert pp.overlap('contact') == 0.0 66 | assert pp.mcss_score == 1.0 67 | 68 | def test_get_feature_multiple_of_same_type(): 69 | pose1 = pose(fp={(1, 2): 1.0, (1, 23): 1.0}) 70 | pose2 = pose(fp={(1, 2): 0.0, (1, 23): 1.0}) 71 | pp = PosePair(pose1, pose2, 0.0, features, 2.0) 72 | 73 | assert pp.overlap('sb') == 1.0 74 | assert pp.overlap('hbond') == 0.0 75 | assert pp.overlap('contact') == 0.0 76 | assert pp.mcss_score == 0.0 77 | 78 | def test_tanimoto_empty(): 79 | pose1 = pose(fp={}) 80 | pose2 = pose(fp={}) 81 | pp = PosePair(pose1, pose2, 0.0, features, 2.0) 82 | 83 | assert pp.tanimoto('sb') == 1/2 84 | assert pp.tanimoto('hbond') == 1/2 85 | assert pp.tanimoto('contact') == 1/2 86 | assert pp.mcss_score == 0.0 87 | 88 | def test_tanimoto_single(): 89 | pose1 = pose(fp={(1, 23): 1.0}) 90 | pose2 = pose(fp={(1, 23): 1.0}) 91 | pp = PosePair(pose1, pose2, 4.0, features, 2.0) 92 | 93 | assert pp.tanimoto('sb') == 2/3 94 | assert pp.tanimoto('hbond') == 1/2 95 | assert pp.tanimoto('contact') == 1/2 96 | assert pp.mcss_score == 4.0 97 | 98 | def test_tanimoto_mismatch(): 99 | pose1 = pose(fp={(1, 2): 1.0}) 100 | pose2 = pose(fp={(1, 23): 1.0}) 101 | pp = PosePair(pose1, pose2, 1.0, features, 2.0) 102 | 103 | assert pp.tanimoto('sb') == 1/4 104 | assert pp.tanimoto('hbond') == 1/2 105 | assert pp.tanimoto('contact') == 1/2 106 | assert pp.mcss_score == 1.0 107 | 108 | def test_tanimoto_multiple_of_same_type(): 109 | pose1 = pose(fp={(1, 2): 1.0, (1, 23): 1.0}) 110 | pose2 = pose(fp={(1, 2): 0.0, (1, 23): 1.0}) 111 | pp = PosePair(pose1, pose2, 0.0, features, 2.0) 112 | 113 | assert pp.tanimoto('sb') == 2/4 114 | assert pp.tanimoto('hbond') == 1/2 115 | assert pp.tanimoto('contact') == 1/2 116 | assert pp.mcss_score == 0.0 117 | -------------------------------------------------------------------------------- /score/tests/test_prob_opt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for optimization code. 3 | """ 4 | 5 | import pytest 6 | import numpy as np 7 | 8 | from score.prob_opt import PredictStructs 9 | from score.density_estimate import DensityEstimate 10 | from containers import Ligand, Pose 11 | 12 | ############################################################################### 13 | # Create PredictStructs objects 14 | 15 | def create_ligand(name, rmsds, gscores, fps): 16 | ligand = Ligand(name, {}, {}) 17 | ligand.poses = [] 18 | for r, g, f in zip(rmsds, gscores, fps): 19 | ligand.poses += [Pose(r, g, f)] 20 | return ligand 21 | 22 | def basic_ps(): 23 | ligands = {'lig1': create_ligand('lig1', [0, 1], [-2, -1.5], [{}, {(1, 23): 1.0}]), 24 | 'lig2': create_ligand('lig2', [0, 1], [-10, -3.5], [{(1, 23): 1.0}, {}])} 25 | stats = {'native': {'sb': DensityEstimate(domain = (0, 1)), 26 | 'hbond': DensityEstimate(domain = (0, 1))}, 27 | 'reference': {'sb': DensityEstimate(domain = (0, 1)), 28 | 'hbond': DensityEstimate(domain = (0, 1))}} 29 | stats['native']['sb'].x = np.linspace(0, 1, 100) 30 | stats['reference']['sb'].x = np.linspace(0, 1, 100) 31 | stats['native']['sb'].fx = np.linspace(0, 2, 100) 32 | stats['reference']['sb'].fx = np.linspace(2, 0, 100) 33 | features = {'sb': [1]} 34 | return PredictStructs(ligands, None, stats, features, 3, 1.0) 35 | 36 | def two_interaction_ps(): 37 | ligands = {'lig1': create_ligand('lig1', [0, 1], [-2, -1.5], 38 | [{(2, 10): 0.0}, {(1, 23): 1.0, (2, 10): 1.0}]), 39 | 'lig2': create_ligand('lig2', [0, 1], [-10, -3.5], 40 | [{(1, 23): 1.0}, {(2, 10): 0.6}])} 41 | stats = {'native': {'sb': DensityEstimate(domain = (0, 1)), 42 | 'hbond': DensityEstimate(domain = (0, 1))}, 43 | 'reference': {'sb': DensityEstimate(domain = (0, 1)), 44 | 'hbond': DensityEstimate(domain = (0, 1))}} 45 | stats['native']['sb'].x = np.linspace(0, 1, 100) 46 | stats['reference']['sb'].x = np.linspace(0, 1, 100) 47 | stats['native']['sb'].fx = np.linspace(0.0, 2, 100) 48 | stats['reference']['sb'].fx = np.linspace(2, 0.0, 100) 49 | 50 | stats['native']['hbond'].x = np.linspace(0, 1, 100) 51 | stats['reference']['hbond'].x = np.linspace(0, 1, 100) 52 | stats['native']['hbond'].fx = np.linspace(0, 2, 100) 53 | stats['reference']['hbond'].fx = np.linspace(1.0, 1.0, 100) 54 | features = {'sb': [1], 'hbond': [2, 3]} 55 | return PredictStructs(ligands, None, stats, features, 3, 1.0) 56 | 57 | ############################################################################# 58 | 59 | def test_get(): 60 | ps = basic_ps() 61 | assert ps._get_physics_score('lig1', 0) == -2.0 62 | assert ps._get_physics_score('lig1', 1) == -1.5 63 | assert ps._get_physics_score('lig2', 0) == -10.0 64 | assert ps._get_physics_score('lig2', 1) == -3.5 65 | assert ps._num_poses('lig1') == 2 66 | assert ps._num_poses('lig2') == 2 67 | with pytest.raises(KeyError): 68 | ps._num_poses('lig3') 69 | 70 | assert ps._get_feature('sb', 'lig1', 'lig2', 0, 0) == 1/3 71 | assert ps._get_feature('sb', 'lig1', 'lig2', 0, 1) == 1/2 72 | assert ps._get_feature('sb', 'lig1', 'lig2', 1, 0) == 2/3 73 | assert ps._get_feature('sb', 'lig1', 'lig2', 1, 1) == 1/3 74 | assert ps._get_feature('sb', 'lig2', 'lig1', 0, 0) == 1/3 75 | assert ps._get_feature('sb', 'lig2', 'lig1', 0, 1) == 2/3 76 | assert ps._get_feature('sb', 'lig2', 'lig1', 1, 0) == 1/2 77 | assert ps._get_feature('sb', 'lig2', 'lig1', 1, 1) == 1/3 78 | 79 | def test_like(): 80 | ps = basic_ps() 81 | def like1(feature, p1, p2): 82 | return ps._likelihoods_for_pair_and_single_feature(feature, 83 | {'lig1':p1, 'lig2':p2},'lig1', 'lig2') 84 | assert like1('sb', 0, 0) == pytest.approx((1/3, 2/3, 2 - 2/3), 0.001) 85 | assert like1('sb', 0, 1) == pytest.approx((1/2, 1, 1), 0.001) 86 | assert like1('sb', 1, 0) == pytest.approx((2/3, 2 - 2/3, 2/3), 0.001) 87 | assert like1('sb', 1, 1) == pytest.approx((1/3, 2/3, 2 - 2/3), 0.001) 88 | 89 | # These four should give identical results to above 4. 90 | # just checking that reversing ligand names isn't a problem. 91 | def like2(feature, p1, p2): 92 | return ps._likelihoods_for_pair_and_single_feature(feature, 93 | {'lig1':p1, 'lig2':p2}, 'lig2', 'lig1') 94 | assert like2('sb', 0, 0) == pytest.approx((1/3, 2/3, 2 - 2/3), 0.001) 95 | assert like2('sb', 0, 1) == pytest.approx((1/2, 1, 1), 0.001) 96 | assert like2('sb', 1, 0) == pytest.approx((2/3, 2 - 2/3, 2/3), 0.001) 97 | assert like2('sb', 1, 1) == pytest.approx((1/3, 2/3, 2 - 2/3), 0.001) 98 | 99 | def test_like_none(): 100 | ps = basic_ps() 101 | def like1(feature, p1, p2): 102 | return ps._likelihoods_for_pair_and_single_feature(feature, 103 | {'lig1':p1, 'lig2':p2},'lig1', 'lig2') 104 | def like2(feature, p1, p2): 105 | return ps._likelihoods_for_pair_and_single_feature(feature, 106 | {'lig1':p1, 'lig2':p2},'lig2', 'lig1') 107 | 108 | for i in range(2): 109 | for j in range(2): 110 | assert like1('hbond', i, j) == (0.0, 1.0, 1.0) 111 | assert like2('hbond', i, j) == (0.0, 1.0, 1.0) 112 | 113 | def test_ratio(): 114 | ps = basic_ps() 115 | def like(p1, p2): 116 | return ps._log_likelihood_ratio_pair({'lig1':p1, 'lig2':p2}, 'lig1', 'lig2') 117 | assert like(0, 0) == pytest.approx(np.log(2/3) - np.log(2 - 2/3), 0.001) 118 | assert like(0, 1) == pytest.approx(np.log(1) - np.log(1), 0.001) 119 | assert like(1, 0) == pytest.approx(np.log(2 - 2/3) - np.log(2/3), 0.001) 120 | assert like(1, 1) == pytest.approx(np.log(2/3) - np.log(2 - 2/3), 0.001) 121 | 122 | 123 | def like(p1, p2): 124 | return ps._log_likelihood_ratio_pair({'lig1':p1, 'lig2':p2}, 'lig2', 'lig1') 125 | assert like(0, 0) == pytest.approx(np.log(2/3) - np.log(2 - 2/3), 0.001) 126 | assert like(0, 1) == pytest.approx(np.log(1) - np.log(1), 0.001) 127 | assert like(1, 0) == pytest.approx(np.log(2 - 2/3) - np.log(2/3), 0.001) 128 | assert like(1, 1) == pytest.approx(np.log(2/3) - np.log(2 - 2/3), 0.001) 129 | 130 | def test_ratio_two(): 131 | ps = two_interaction_ps() 132 | 133 | x = ps._log_likelihood_ratio_pair({'lig1':1, 'lig2':1},'lig1', 'lig2') 134 | y = ps._log_likelihood_ratio_pair({'lig1':1, 'lig2':1},'lig2', 'lig1') 135 | 136 | assert x == y 137 | assert x == pytest.approx(np.log(2/3) - np.log(2 - 2/3) 138 | + np.log(2*(1 +.6**0.5) / (2 + .6 + 1 - .6**0.5)) 139 | - np.log(1.0), 0.001) 140 | 141 | 142 | x, p_x_n, p_x = ps._likelihoods_for_pair_and_single_feature('hbond', 143 | {'lig1':0, 'lig2':1}, 'lig2', 'lig1') 144 | 145 | assert x == 1 / 2.6 146 | assert p_x_n == 2 * 1 / (2 + .6) 147 | assert p_x == 1.0 148 | 149 | x = ps._log_likelihood_ratio_pair({'lig1':0, 'lig2':1},'lig1', 'lig2') 150 | y = ps._log_likelihood_ratio_pair({'lig1':0, 'lig2':1},'lig2', 'lig1') 151 | 152 | assert x == y 153 | assert x == pytest.approx(np.log(1) - np.log(1) 154 | + np.log(2 * 1 / (2 + .6)) - np.log(1.0), 0.001) 155 | 156 | def test_partial(): 157 | ps = basic_ps() 158 | def like(p1, p2, lig): 159 | return ps._partial_log_posterior({'lig1':p1, 'lig2':p2}, lig) 160 | 161 | assert like(0, 0, 'lig1') == np.log(2/3) - np.log(2 - 2/3) + 2*1.0 162 | assert like(0, 1, 'lig1') == np.log(1) - np.log(1) + 2*1.0 163 | assert like(1, 0, 'lig1') == np.log(2 - 2/3) - np.log(2/3) + 1.5*1.0 164 | assert like(1, 1, 'lig1') == np.log(2/3) - np.log(2 - 2/3) + 1.5*1.0 165 | 166 | assert like(0, 0, 'lig2') == np.log(2/3) - np.log(2 - 2/3) + 10*1.0 167 | assert like(0, 1, 'lig2') == np.log(1) - np.log(1) + 3.5*1.0 168 | assert like(1, 0, 'lig2') == np.log(2 - 2/3) - np.log(2/3) + 10.0*1.0 169 | assert like(1, 1, 'lig2') == np.log(2/3) - np.log(2 - 2/3) + 3.5*1.0 170 | 171 | def test_posterior(): 172 | ps = basic_ps() 173 | def like(p1, p2): 174 | return ps.log_posterior({'lig1':p1, 'lig2':p2}) 175 | 176 | assert like(0, 0) == np.log(2/3) - np.log(2 - 2/3) + 2*1.0 + 10*1.0 177 | assert like(0, 1) == np.log(1) - np.log(1) + 2*1.0 + 3.5*1.0 178 | assert like(1, 0) == np.log(2 - 2/3) - np.log(2/3) + 1.5*1.0 + 10.0*1.0 179 | assert like(1, 1) == np.log(2/3) - np.log(2 - 2/3) + 1.5*1.0 + 3.5*1.0 180 | 181 | # Test simple (convex) optimizations 182 | def test_optimize(): 183 | ps = basic_ps() 184 | def like(p1, p2): 185 | return ps._optimize_cluster({'lig1':p1, 'lig2':p2}, 5) 186 | 187 | opt = {'lig1':1, 'lig2':0} 188 | assert like(0, 0)[1] == opt 189 | assert like(0, 1)[1] == opt 190 | assert like(1, 0)[1] == opt 191 | assert like(1, 1)[1] == opt 192 | 193 | def test_max(): 194 | ps = basic_ps() 195 | 196 | opt = {'lig1':1, 'lig2':0} 197 | assert ps.max_posterior() == opt 198 | assert ps.max_posterior() == opt 199 | assert ps.max_posterior() == opt 200 | assert ps.max_posterior() == opt 201 | 202 | def three_ligands(): 203 | ligands = {'lig1': create_ligand('lig1', [0, 1], [-2, -1.5], 204 | [{}, {(1, 23): 1.0}]), 205 | 'lig2': create_ligand('lig2', [0, 1], [-10, -3.5], 206 | [{(1, 23): 1.0}, {}]), 207 | 'lig3': create_ligand('lig3', [0, 1], [-1, -.5], 208 | [{}, {(1, 23): 0.5}])} 209 | stats = {'native': {'sb': DensityEstimate(domain = (0, 1)), 210 | 'hbond': DensityEstimate(domain = (0, 1))}, 211 | 'reference': {'sb': DensityEstimate(domain = (0, 1)), 212 | 'hbond': DensityEstimate(domain = (0, 1))}} 213 | stats['native']['sb'].x = np.linspace(0, 1, 100) 214 | stats['reference']['sb'].x = np.linspace(0, 1, 100) 215 | stats['native']['sb'].fx = np.linspace(0, 2, 100) 216 | stats['reference']['sb'].fx = np.linspace(2, 0, 100) 217 | features = {'sb': [1]} 218 | return PredictStructs(ligands, None, stats, features, 3, 1.0) 219 | 220 | def test_max_three(): 221 | ps = three_ligands() 222 | 223 | opt = {'lig1':1, 'lig2':0, 'lig3': 1} 224 | assert ps.max_posterior() == opt 225 | assert ps.max_posterior() == opt 226 | assert ps.max_posterior() == opt 227 | assert ps.max_posterior() == opt 228 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | # Run this before anything else 2 | 3 | export COMBINDHOME=`pwd` 4 | export PATH=$PATH:$COMBINDHOME 5 | export SCHRODINGER_ALLOW_UNSAFE_MULTIPROCESSING=1 -------------------------------------------------------------------------------- /stats_data/default/._stats.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/stats_data/default/._stats.pdf -------------------------------------------------------------------------------- /stats_data/default/native_contact.txt: -------------------------------------------------------------------------------- 1 | 4057443.0,1.0,True 2 | 0.0,7.931199405991599e-23 3 | 0.010101010101010102,7.932392177327258e-22 4 | 0.020202020202020204,1.2804755495041573e-20 5 | 0.030303030303030304,1.9448260636688402e-19 6 | 0.04040404040404041,2.6417616989448778e-18 7 | 0.05050505050505051,3.209192260163893e-17 8 | 0.06060606060606061,3.486904814063895e-16 9 | 0.07070707070707072,3.3891091523810837e-15 10 | 0.08080808080808081,2.947121470900516e-14 11 | 0.09090909090909091,2.293256013592048e-13 12 | 0.10101010101010102,1.597115031197628e-12 13 | 0.11111111111111112,9.957557636836938e-12 14 | 0.12121212121212122,5.559427743596778e-11 15 | 0.13131313131313133,2.7805509050956685e-10 16 | 0.14141414141414144,1.2464411852606958e-09 17 | 0.15151515151515152,5.011345450739143e-09 18 | 0.16161616161616163,1.8089096149130816e-08 19 | 0.17171717171717174,5.8711509400000836e-08 20 | 0.18181818181818182,1.7175623132853304e-07 21 | 0.19191919191919193,4.546171958174369e-07 22 | 0.20202020202020204,1.0954500873736845e-06 23 | 0.21212121212121213,2.4264063971464253e-06 24 | 0.22222222222222224,5.012543911692823e-06 25 | 0.23232323232323235,9.848208237668704e-06 26 | 0.24242424242424243,1.8807742997203098e-05 27 | 0.25252525252525254,3.553334051940606e-05 28 | 0.26262626262626265,6.685528238017555e-05 29 | 0.27272727272727276,0.00012466373049683157 30 | 0.2828282828282829,0.00022817713555464748 31 | 0.29292929292929293,0.00040712832462749815 32 | 0.30303030303030304,0.0007071756167437544 33 | 0.31313131313131315,0.001198133049337264 34 | 0.32323232323232326,0.0019812777088562858 35 | 0.33333333333333337,0.003184863790102991 36 | 0.3434343434343435,0.004933767354817196 37 | 0.3535353535353536,0.007289271366781339 38 | 0.36363636363636365,0.010179695966777567 39 | 0.37373737373737376,0.013366621046568557 40 | 0.38383838383838387,0.016490216755815166 41 | 0.393939393939394,0.01919920607993784 42 | 0.4040404040404041,0.021315134293828412 43 | 0.4141414141414142,0.02294716780469094 44 | 0.42424242424242425,0.024492007598285473 45 | 0.43434343434343436,0.02651497980243599 46 | 0.4444444444444445,0.029570849165141445 47 | 0.4545454545454546,0.03404546955855856 48 | 0.4646464646464647,0.040074696576555525 49 | 0.4747474747474748,0.04755270807451973 50 | 0.48484848484848486,0.05621523008838535 51 | 0.494949494949495,0.06579166035627243 52 | 0.5050505050505051,0.07624316734521265 53 | 0.5151515151515152,0.08808921253404327 54 | 0.5252525252525253,0.10273631317535958 55 | 0.5353535353535354,0.12260348946271438 56 | 0.5454545454545455,0.15081065827292595 57 | 0.5555555555555556,0.19035828357673354 58 | 0.5656565656565657,0.24303898366523877 59 | 0.5757575757575758,0.3085951873144033 60 | 0.5858585858585859,0.38464483353955814 61 | 0.595959595959596,0.4675375218447179 62 | 0.6060606060606061,0.5537274810768354 63 | 0.6161616161616162,0.640852317373469 64 | 0.6262626262626263,0.7278722560976993 65 | 0.6363636363636365,0.8143286434098852 66 | 0.6464646464646465,0.8994597624088858 67 | 0.6565656565656566,0.9819213123115434 68 | 0.6666666666666667,1.0601696738399544 69 | 0.6767676767676768,1.1329015720489155 70 | 0.686868686868687,1.1990435234117611 71 | 0.696969696969697,1.2575384900908244 72 | 0.7070707070707072,1.3076734252370363 73 | 0.7171717171717172,1.3502454478071537 74 | 0.7272727272727273,1.3888832031102936 75 | 0.7373737373737375,1.4304281800320868 76 | 0.7474747474747475,1.4839529192911967 77 | 0.7575757575757577,1.5590322826096892 78 | 0.7676767676767677,1.6641896214232115 79 | 0.7777777777777778,1.8058320864360704 80 | 0.787878787878788,1.9873257510504552 81 | 0.797979797979798,2.2078987394461134 82 | 0.8080808080808082,2.4616268505568697 83 | 0.8181818181818182,2.737175430115117 84 | 0.8282828282828284,3.01889161635998 85 | 0.8383838383838385,3.2893633132003464 86 | 0.8484848484848485,3.532902101493697 87 | 0.8585858585858587,3.738825637697399 88 | 0.8686868686868687,3.903263598812447 89 | 0.8787878787878789,4.028679777170414 90 | 0.888888888888889,4.1211659138764345 91 | 0.8989898989898991,4.186386685981709 92 | 0.9090909090909092,4.225561299681266 93 | 0.9191919191919192,4.2329873853423035 94 | 0.9292929292929294,4.1963853126379 95 | 0.9393939393939394,4.100850789207737 96 | 0.9494949494949496,3.936378885354642 97 | 0.9595959595959597,3.707374009454414 98 | 0.9696969696969697,3.440223740462425 99 | 0.9797979797979799,3.1833453843192383 100 | 0.98989898989899,2.995822710174527 101 | 1.0,2.926904828860239 102 | -------------------------------------------------------------------------------- /stats_data/default/native_hbond.txt: -------------------------------------------------------------------------------- 1 | 4057443.0,1.0,True 2 | 0.0,4.6566727881762684e-05 3 | 0.010101010101010102,0.00010082525101397038 4 | 0.020202020202020204,0.0002457868909882953 5 | 0.030303030303030304,0.0006821492025952912 6 | 0.04040404040404041,0.0017793396208461193 7 | 0.05050505050505051,0.004273035957551662 8 | 0.06060606060606061,0.009437557614930125 9 | 0.07070707070707072,0.019202332325378636 10 | 0.08080808080808081,0.03608402024472747 11 | 0.09090909090909091,0.0628073961199734 12 | 0.10101010101010102,0.10156366280367246 13 | 0.11111111111111112,0.15299961364154369 14 | 0.12121212121212122,0.2152167885783857 15 | 0.13131313131313133,0.2832479733211803 16 | 0.14141414141414144,0.349571736073927 17 | 0.15151515151515152,0.40601579637912677 18 | 0.16161616161616163,0.4467191772171671 19 | 0.17171717171717174,0.4709094665009181 20 | 0.18181818181818182,0.48384726189050775 21 | 0.19191919191919193,0.49502937384615386 22 | 0.20202020202020204,0.5143259193817141 23 | 0.21212121212121213,0.5479865065626772 24 | 0.22222222222222224,0.5964489220690248 25 | 0.23232323232323235,0.6547670347998268 26 | 0.24242424242424243,0.7151324369938852 27 | 0.25252525252525254,0.7701356613413132 28 | 0.26262626262626265,0.8153262631183751 29 | 0.27272727272727276,0.8501663501217752 30 | 0.2828282828282829,0.8772884957314988 31 | 0.29292929292929293,0.9006401634165155 32 | 0.30303030303030304,0.9234206831678854 33 | 0.31313131313131315,0.9467711585953521 34 | 0.32323232323232326,0.9699378179732312 35 | 0.33333333333333337,0.9918909080775313 36 | 0.3434343434343435,1.0133520296545977 37 | 0.3535353535353536,1.0377173230288108 38 | 0.36363636363636365,1.0700622713521457 39 | 0.37373737373737376,1.1147289831749547 40 | 0.38383838383838387,1.172812168369721 41 | 0.393939393939394,1.2408124893072163 42 | 0.4040404040404041,1.3113312016511747 43 | 0.4141414141414142,1.3761928124754796 44 | 0.42424242424242425,1.4313393447129759 45 | 0.43434343434343436,1.4811492919666918 46 | 0.4444444444444445,1.5387717426937195 47 | 0.4545454545454546,1.6203954100860876 48 | 0.4646464646464647,1.7352419284425409 49 | 0.4747474747474748,1.8770330188727589 50 | 0.48484848484848486,2.0232178372679566 51 | 0.494949494949495,2.1438786371557783 52 | 0.5050505050505051,2.21554447828835 53 | 0.5151515151515152,2.231384143940097 54 | 0.5252525252525253,2.2018522058324894 55 | 0.5353535353535354,2.1468339785965918 56 | 0.5454545454545455,2.0857370732869285 57 | 0.5555555555555556,2.0316201116337447 58 | 0.5656565656565657,1.990793952929495 59 | 0.5757575757575758,1.965307044503155 60 | 0.5858585858585859,1.9553019697776715 61 | 0.595959595959596,1.9601043085439442 62 | 0.6060606060606061,1.9782724102628124 63 | 0.6161616161616162,2.0069154931377375 64 | 0.6262626262626263,2.040576849580249 65 | 0.6363636363636365,2.0705233161482375 66 | 0.6464646464646465,2.0855086157337888 67 | 0.6565656565656566,2.0742333084402795 68 | 0.6666666666666667,2.028522087095529 69 | 0.6767676767676768,1.9459004456722688 70 | 0.686868686868687,1.830866859904786 71 | 0.696969696969697,1.694762863127316 72 | 0.7070707070707072,1.5541747441650062 73 | 0.7171717171717172,1.4277313347500553 74 | 0.7272727272727273,1.3316397047715476 75 | 0.7373737373737375,1.275152605120614 76 | 0.7474747474747475,1.2575812307515069 77 | 0.7575757575757577,1.2679964594348037 78 | 0.7676767676767677,1.2877947617393974 79 | 0.7777777777777778,1.2955373732064457 80 | 0.787878787878788,1.2730428204249555 81 | 0.797979797979798,1.2112159107766844 82 | 0.8080808080808082,1.113513812101092 83 | 0.8181818181818182,0.9951131066251601 84 | 0.8282828282828284,0.8774282370631381 85 | 0.8383838383838385,0.7799443622801157 86 | 0.8484848484848485,0.7127576478172472 87 | 0.8585858585858587,0.6729124183287181 88 | 0.8686868686868687,0.646063416554479 89 | 0.8787878787878789,0.6129503167844988 90 | 0.888888888888889,0.5580785534177288 91 | 0.8989898989898991,0.47658399301405513 92 | 0.9090909090909092,0.37578683122714707 93 | 0.9191919191919192,0.27081822868069805 94 | 0.9292929292929294,0.17721556962857837 95 | 0.9393939393939394,0.10484609920674463 96 | 0.9494949494949496,0.05592171174665679 97 | 0.9595959595959597,0.026837413400736167 98 | 0.9696969696969697,0.01157804559798932 99 | 0.9797979797979799,0.004511256722432466 100 | 0.98989898989899,0.0016888542813479825 101 | 1.0,0.0009669921732633977 102 | -------------------------------------------------------------------------------- /stats_data/default/native_mcss.txt: -------------------------------------------------------------------------------- 1 | 1939377.0,1.0,True 2 | 0.0,0.32878265338268353 3 | 0.06060606060606061,0.3978150215193773 4 | 0.12121212121212122,0.41765611361586646 5 | 0.18181818181818182,0.4531910407804616 6 | 0.24242424242424243,0.49763847469461336 7 | 0.30303030303030304,0.5437866020264748 8 | 0.36363636363636365,0.5858071806472706 9 | 0.42424242424242425,0.6200148361436659 10 | 0.48484848484848486,0.6446254037572055 11 | 0.5454545454545454,0.6590928202800026 12 | 0.6060606060606061,0.6636373782668727 13 | 0.6666666666666667,0.6591689390093457 14 | 0.7272727272727273,0.6473304110219211 15 | 0.7878787878787878,0.6302752788889588 16 | 0.8484848484848485,0.6101354869308298 17 | 0.9090909090909092,0.5885360176637592 18 | 0.9696969696969697,0.5665044073633344 19 | 1.0303030303030303,0.5447156926136694 20 | 1.0909090909090908,0.5236795163768825 21 | 1.1515151515151516,0.5035895189085622 22 | 1.2121212121212122,0.48397219544835 23 | 1.2727272727272727,0.4635402648584988 24 | 1.3333333333333335,0.440523634955882 25 | 1.393939393939394,0.4133808649533193 26 | 1.4545454545454546,0.38152133786270037 27 | 1.5151515151515151,0.34567080194172023 28 | 1.5757575757575757,0.30772236371514866 29 | 1.6363636363636365,0.27016152558733886 30 | 1.696969696969697,0.2353176899567646 31 | 1.7575757575757576,0.2047360455575414 32 | 1.8181818181818183,0.17888715217738627 33 | 1.878787878787879,0.15727337285041917 34 | 1.9393939393939394,0.13882561928451523 35 | 2.0,0.12239026495738173 36 | 2.0606060606060606,0.10711483359584138 37 | 2.121212121212121,0.09262143237747553 38 | 2.1818181818181817,0.07895798104021184 39 | 2.2424242424242427,0.06640250480008478 40 | 2.303030303030303,0.05523996820582325 41 | 2.3636363636363638,0.04561623621722948 42 | 2.4242424242424243,0.03750836163863141 43 | 2.484848484848485,0.030779064148655475 44 | 2.5454545454545454,0.025253408180139908 45 | 2.606060606060606,0.020774762494278017 46 | 2.666666666666667,0.017232196208627426 47 | 2.7272727272727275,0.014568235525184986 48 | 2.787878787878788,0.012769540598603055 49 | 2.8484848484848486,0.011833398199131696 50 | 2.909090909090909,0.011709054114172423 51 | 2.9696969696969697,0.012234532113227888 52 | 3.0303030303030303,0.013107935604446914 53 | 3.090909090909091,0.013926592116900935 54 | 3.1515151515151514,0.014293803609182832 55 | 3.2121212121212124,0.013950331706212888 56 | 3.272727272727273,0.012865606047856944 57 | 3.3333333333333335,0.011239938894282856 58 | 3.393939393939394,0.009415577994433665 59 | 3.4545454545454546,0.007742242246643128 60 | 3.515151515151515,0.00646296915566598 61 | 3.5757575757575757,0.005669128977303403 62 | 3.6363636363636367,0.005332569748656659 63 | 3.6969696969696972,0.005382526288530581 64 | 3.757575757575758,0.005775942935417541 65 | 3.8181818181818183,0.0065199861160292714 66 | 3.878787878787879,0.007637236985625563 67 | 3.9393939393939394,0.009098775673446045 68 | 4.0,0.010768717189682527 69 | 4.0606060606060606,0.012397027456089225 70 | 4.121212121212121,0.013671760593637238 71 | 4.181818181818182,0.014312210395727339 72 | 4.242424242424242,0.014165337528700342 73 | 4.303030303030303,0.013267355256795272 74 | 4.363636363636363,0.011849747582742478 75 | 4.424242424242425,0.010293515547684502 76 | 4.484848484848485,0.009050846182190925 77 | 4.545454545454546,0.008550508526933407 78 | 4.606060606060606,0.00909143973588014 79 | 4.666666666666667,0.010730582606210367 80 | 4.7272727272727275,0.013197201754863492 81 | 4.787878787878788,0.015895248253351667 82 | 4.848484848484849,0.018044772950763382 83 | 4.909090909090909,0.01894275463836446 84 | 4.96969696969697,0.01823259793166956 85 | 5.03030303030303,0.016038236639056135 86 | 5.090909090909091,0.012886865847725889 87 | 5.151515151515151,0.009470858396099124 88 | 5.212121212121212,0.0063880516797256886 89 | 5.2727272727272725,0.003989739555190843 90 | 5.333333333333334,0.002383189549092172 91 | 5.3939393939393945,0.0015558192715563414 92 | 5.454545454545455,0.0015600073386146359 93 | 5.515151515151516,0.0027049298903416893 94 | 5.575757575757576,0.005692125839380314 95 | 5.636363636363637,0.011580410707994622 96 | 5.696969696969697,0.021430943511553897 97 | 5.757575757575758,0.035587568876787004 98 | 5.818181818181818,0.0528495416283254 99 | 5.878787878787879,0.0701205430862925 100 | 5.9393939393939394,0.08309281360227382 101 | 6.0,0.08793079555869358 102 | -------------------------------------------------------------------------------- /stats_data/default/native_saltbridge.txt: -------------------------------------------------------------------------------- 1 | 4057443.0,1.0,True 2 | 0.0,1.0012759372368253e-08 3 | 0.010101010101010102,2.5441675583893503e-08 4 | 0.020202020202020204,7.631404855319173e-08 5 | 0.030303030303030304,2.5020410444379615e-07 6 | 0.04040404040404041,7.518975471237738e-07 7 | 0.05050505050505051,2.0508037012430198e-06 8 | 0.06060606060606061,5.086603322693296e-06 9 | 0.07070707070707072,1.1507952084120828e-05 10 | 0.08080808080808081,2.383354855388671e-05 11 | 0.09090909090909091,4.537041306067053e-05 12 | 0.10101010101010102,7.975589885621709e-05 13 | 0.11111111111111112,0.00013017213238503706 14 | 0.12121212121212122,0.00019867647634145888 15 | 0.13131313131313133,0.0002869116289661496 16 | 0.14141414141414144,0.00040145666515268576 17 | 0.15151515151515152,0.0005721488922319743 18 | 0.16161616161616163,0.0009031826337533888 19 | 0.17171717171717174,0.0016974650488661518 20 | 0.18181818181818182,0.003718530828335796 21 | 0.19191919191919193,0.008650363193725114 22 | 0.20202020202020204,0.01971945746136518 23 | 0.21212121212121213,0.04220010074230021 24 | 0.22222222222222224,0.08321482047479557 25 | 0.23232323232323235,0.15021202413591456 26 | 0.24242424242424243,0.24820637574479437 27 | 0.25252525252525254,0.37723981500418785 28 | 0.26262626262626265,0.532363994558918 29 | 0.27272727272727276,0.7070367181890584 30 | 0.2828282828282829,0.8971652241893665 31 | 0.29292929292929293,1.1004826260126004 32 | 0.30303030303030304,1.308848163146236 33 | 0.31313131313131315,1.4989375202613198 34 | 0.32323232323232326,1.6320732032170733 35 | 0.33333333333333337,1.6688795601605644 36 | 0.3434343434343435,1.5913209211751485 37 | 0.3535353535353536,1.4160115571965202 38 | 0.36363636363636365,1.1882108309731971 39 | 0.37373737373737376,0.9610777394784819 40 | 0.38383838383838387,0.7754156718438573 41 | 0.393939393939394,0.6529524708724824 42 | 0.4040404040404041,0.6058992960822793 43 | 0.4141414141414142,0.6568656939159725 44 | 0.42424242424242425,0.8595408219762607 45 | 0.43434343434343436,1.3079274952494446 46 | 0.4444444444444445,2.118464080042587 47 | 0.4545454545454546,3.372920765157817 48 | 0.4646464646464647,5.031981880564873 49 | 0.4747474747474748,6.866010080196249 50 | 0.48484848484848486,8.46958253451741 51 | 0.494949494949495,9.392972023592069 52 | 0.5050505050505051,9.340911059116669 53 | 0.5151515151515152,8.32073078901045 54 | 0.5252525252525253,6.6396365231586545 55 | 0.5353535353535354,4.752622002009416 56 | 0.5454545454545455,3.0631660089058355 57 | 0.5555555555555556,1.794215621905502 58 | 0.5656565656565657,0.9771211038006036 59 | 0.5757575757575758,0.5234514869293582 60 | 0.5858585858585859,0.31237325153673523 61 | 0.595959595959596,0.24743747671170144 62 | 0.6060606060606061,0.2736489236102665 63 | 0.6161616161616162,0.3677020508318832 64 | 0.6262626262626263,0.5175017575627185 65 | 0.6363636363636365,0.7029551671503412 66 | 0.6464646464646465,0.8866576218501251 67 | 0.6565656565656566,1.0195203269732842 68 | 0.6666666666666667,1.0591330999976671 69 | 0.6767676767676768,0.9899043638372463 70 | 0.686868686868687,0.8313107769979287 71 | 0.696969696969697,0.6282080820516605 72 | 0.7070707070707072,0.4297039036428865 73 | 0.7171717171717172,0.2700090001479875 74 | 0.7272727272727273,0.16097235253917136 75 | 0.7373737373737375,0.09653750938782385 76 | 0.7474747474747475,0.06259469595676274 77 | 0.7575757575757577,0.045419728861507254 78 | 0.7676767676767677,0.03567653010320099 79 | 0.7777777777777778,0.02861003726518826 80 | 0.787878787878788,0.0224939439538812 81 | 0.797979797979798,0.017010522020019285 82 | 0.8080808080808082,0.012257982809276941 83 | 0.8181818181818182,0.008351545198060646 84 | 0.8282828282828284,0.005331415442742806 85 | 0.8383838383838385,0.003158207073994727 86 | 0.8484848484848485,0.001720458296840133 87 | 0.8585858585858587,0.000855469320683005 88 | 0.8686868686868687,0.00038604243638382514 89 | 0.8787878787878789,0.00015744405551147947 90 | 0.888888888888889,5.7862310450797376e-05 91 | 0.8989898989898991,1.9122421649051636e-05 92 | 0.9090909090909092,5.6745554538703144e-06 93 | 0.9191919191919192,1.51045174233152e-06 94 | 0.9292929292929294,3.6035758614839065e-07 95 | 0.9393939393939394,7.70123672949477e-08 96 | 0.9494949494949496,1.4736425038211685e-08 97 | 0.9595959595959597,2.5239069642809412e-09 98 | 0.9696969696969697,3.8679241341601827e-10 99 | 0.9797979797979799,5.303335817391146e-11 100 | 0.98989898989899,6.571921741285701e-12 101 | 1.0,1.4258722347300215e-12 102 | -------------------------------------------------------------------------------- /stats_data/default/native_shape.txt: -------------------------------------------------------------------------------- 1 | 1293964.0,1.0,True 2 | 0.0,0.17192844168891067 3 | 0.010101010101010102,0.20049028931200377 4 | 0.020202020202020204,0.21244095077324524 5 | 0.030303030303030304,0.23932523113834298 6 | 0.04040404040404041,0.28650387468991656 7 | 0.05050505050505051,0.3606533367104576 8 | 0.06060606060606061,0.46833291307421177 9 | 0.07070707070707072,0.6141167854011884 10 | 0.08080808080808081,0.7983325509980003 11 | 0.09090909090909091,1.0149372954984903 12 | 0.10101010101010102,1.2505901897648193 13 | 0.11111111111111112,1.485966858637736 14 | 0.12121212121212122,1.6995232461078396 15 | 0.13131313131313133,1.8726198497718878 16 | 0.14141414141414144,1.9939984380787834 17 | 0.15151515151515152,2.061760993539287 18 | 0.16161616161616163,2.082219772655708 19 | 0.17171717171717174,2.0665054013932522 20 | 0.18181818181818182,2.026694520580991 21 | 0.19191919191919193,1.9730219938173583 22 | 0.20202020202020204,1.9128003475743078 23 | 0.21212121212121213,1.8507009433011816 24 | 0.22222222222222224,1.7896018003570704 25 | 0.23232323232323235,1.731342575998158 26 | 0.24242424242424243,1.6771468828616376 27 | 0.25252525252525254,1.6278186029317931 28 | 0.26262626262626265,1.583913664631342 29 | 0.27272727272727276,1.545975282432119 30 | 0.2828282828282829,1.5147636093110752 31 | 0.29292929292929293,1.491355031800701 32 | 0.30303030303030304,1.4770623187213678 33 | 0.31313131313131315,1.4732497046297035 34 | 0.32323232323232326,1.4811697759118547 35 | 0.33333333333333337,1.5018814343389624 36 | 0.3434343434343435,1.5361763233066748 37 | 0.3535353535353536,1.584364176469807 38 | 0.36363636363636365,1.6458363656062307 39 | 0.37373737373737376,1.7185240767528467 40 | 0.38383838383838387,1.798557164095354 41 | 0.393939393939394,1.8804435661590675 42 | 0.4040404040404041,1.9578663309829931 43 | 0.4141414141414142,2.0248604185899333 44 | 0.42424242424242425,2.0769146411272406 45 | 0.43434343434343436,2.111590205590979 46 | 0.4444444444444445,2.1284972087549536 47 | 0.4545454545454546,2.128739029476166 48 | 0.4646464646464647,2.114090249839347 49 | 0.4747474747474748,2.0862120481173156 50 | 0.48484848484848486,2.0461686230511256 51 | 0.494949494949495,1.9943882978865766 52 | 0.5050505050505051,1.9310173766817798 53 | 0.5151515151515152,1.8564305613361192 54 | 0.5252525252525253,1.7716153681620868 55 | 0.5353535353535354,1.6782718956174434 56 | 0.5454545454545455,1.578656068751306 57 | 0.5555555555555556,1.4753040924290424 58 | 0.5656565656565657,1.3707633634959069 59 | 0.5757575757575758,1.2673810586644014 60 | 0.5858585858585859,1.1671453809473038 61 | 0.595959595959596,1.071564181802536 62 | 0.6060606060606061,0.9815817740852201 63 | 0.6161616161616162,0.8975504140824112 64 | 0.6262626262626263,0.8192765742869674 65 | 0.6363636363636365,0.7461514874614462 66 | 0.6464646464646465,0.677350397250517 67 | 0.6565656565656566,0.6120542682686568 68 | 0.6666666666666667,0.5496325422023411 69 | 0.6767676767676768,0.48974264885162005 70 | 0.686868686868687,0.4323426896427717 71 | 0.696969696969697,0.3776476506064962 72 | 0.7070707070707072,0.3260632787375579 73 | 0.7171717171717172,0.2781109535958641 74 | 0.7272727272727273,0.23433811134560412 75 | 0.7373737373737375,0.19521204879892742 76 | 0.7474747474747475,0.1610152682972194 77 | 0.7575757575757577,0.13177532059436908 78 | 0.7676767676767677,0.10725473396568332 79 | 0.7777777777777778,0.08700178368383026 80 | 0.787878787878788,0.07043927423733894 81 | 0.797979797979798,0.056959638299130706 82 | 0.8080808080808082,0.045999892288504346 83 | 0.8181818181818182,0.037082261080566635 84 | 0.8282828282828284,0.02982079703570828 85 | 0.8383838383838385,0.023907091095952036 86 | 0.8484848484848485,0.019092752984601006 87 | 0.8585858585858587,0.015179140324232343 88 | 0.8686868686868687,0.012012204462827017 89 | 0.8787878787878789,0.009474236409585555 90 | 0.888888888888889,0.007469456759837752 91 | 0.8989898989898991,0.00590934237144968 92 | 0.9090909090909092,0.004705954452314976 93 | 0.9191919191919192,0.003775516475600296 94 | 0.9292929292929294,0.0030470831644632797 95 | 0.9393939393939394,0.002469064075329767 96 | 0.9494949494949496,0.0020097083149810424 97 | 0.9595959595959597,0.0016521541438332013 98 | 0.9696969696969697,0.0013871914641008839 99 | 0.9797979797979799,0.001207270375652738 100 | 0.98989898989899,0.0011042101987808364 101 | 1.0,0.0010708320994633114 102 | -------------------------------------------------------------------------------- /stats_data/default/reference_contact.txt: -------------------------------------------------------------------------------- 1 | 26512769.0,1.0,True 2 | 0.0,0.06522608184945236 3 | 0.010101010101010102,0.0778506341071309 4 | 0.020202020202020204,0.08128226476824156 5 | 0.030303030303030304,0.08786796999954401 6 | 0.04040404040404041,0.09712546748657205 7 | 0.05050505050505051,0.10847081258703362 8 | 0.06060606060606061,0.12130112638931351 9 | 0.07070707070707072,0.13503284943580207 10 | 0.08080808080808081,0.14910260916536539 11 | 0.09090909090909091,0.1629507240585623 12 | 0.10101010101010102,0.17600623587648723 13 | 0.11111111111111112,0.18769235919054433 14 | 0.12121212121212122,0.19747233786270244 15 | 0.13131313131313133,0.20494312778745108 16 | 0.14141414141414144,0.2099532330233527 17 | 0.15151515151515152,0.2126908606726735 18 | 0.16161616161616163,0.21368729570520756 19 | 0.17171717171717174,0.21371630618803916 20 | 0.18181818181818182,0.21362232318487342 21 | 0.19191919191919193,0.21414469951539764 22 | 0.20202020202020204,0.2158023577213106 23 | 0.21212121212121213,0.2188679013958521 24 | 0.22222222222222224,0.22341713451388454 25 | 0.23232323232323235,0.22941455125306665 26 | 0.24242424242424243,0.23679701268903122 27 | 0.25252525252525254,0.24553439616445272 28 | 0.26262626262626265,0.25565804386336716 29 | 0.27272727272727276,0.26725006460761086 30 | 0.2828282828282829,0.28039224422879533 31 | 0.29292929292929293,0.2950938018304605 32 | 0.30303030303030304,0.31124070961670774 33 | 0.31313131313131315,0.32860856971479907 34 | 0.32323232323232326,0.3469447367154837 35 | 0.33333333333333337,0.3660761466439081 36 | 0.3434343434343435,0.385976003253047 37 | 0.3535353535353536,0.4067449194081699 38 | 0.36363636363636365,0.42851472783373173 39 | 0.37373737373737376,0.45132972898551155 40 | 0.38383838383838387,0.4750719560127285 41 | 0.393939393939394,0.4994710259978389 42 | 0.4040404040404041,0.5241952569266175 43 | 0.4141414141414142,0.5489841981680926 44 | 0.42424242424242425,0.5737677329030896 45 | 0.43434343434343436,0.5987248992620512 46 | 0.4444444444444445,0.6242623629736507 47 | 0.4545454545454546,0.6509285820199868 48 | 0.4646464646464647,0.6793060748152228 49 | 0.4747474747474748,0.7099220040059351 50 | 0.48484848484848486,0.7431901606084008 51 | 0.494949494949495,0.779374593734305 52 | 0.5050505050505051,0.8185704409940894 53 | 0.5151515151515152,0.8607192558314651 54 | 0.5252525252525253,0.9056788515964518 55 | 0.5353535353535354,0.953334101790132 56 | 0.5454545454545455,1.0036882820052648 57 | 0.5555555555555556,1.0568587823601334 58 | 0.5656565656565657,1.1129458299310124 59 | 0.5757575757575758,1.1718360711370925 60 | 0.5858585858585859,1.23308344545097 61 | 0.595959595959596,1.2959954668106999 62 | 0.6060606060606061,1.3599129510530519 63 | 0.6161616161616162,1.4244930230647277 64 | 0.6262626262626263,1.4897568341786271 65 | 0.6363636363636365,1.5558321484555837 66 | 0.6464646464646465,1.622583851111181 67 | 0.6565656565656566,1.6894333793394287 68 | 0.6666666666666667,1.755501152005624 69 | 0.6767676767676768,1.8199212045611544 70 | 0.686868686868687,1.8820541748199076 71 | 0.696969696969697,1.9414670713769664 72 | 0.7070707070707072,1.997781763150654 73 | 0.7171717171717172,2.0505766253605735 74 | 0.7272727272727273,2.09941276753266 75 | 0.7373737373737375,2.1439157819685826 76 | 0.7474747474747475,2.183830099104771 77 | 0.7575757575757577,2.2190430617889505 78 | 0.7676767676767677,2.2496111251895865 79 | 0.7777777777777778,2.2757657454720563 80 | 0.787878787878788,2.2978252198009717 81 | 0.797979797979798,2.315979241563567 82 | 0.8080808080808082,2.330014219760066 83 | 0.8181818181818182,2.3391150596351236 84 | 0.8282828282828284,2.341862503740862 85 | 0.8383838383838385,2.336454463857836 86 | 0.8484848484848485,2.321055145370009 87 | 0.8585858585858587,2.294085048212994 88 | 0.8686868686868687,2.2542826797134086 89 | 0.8787878787878789,2.200504561744796 90 | 0.888888888888889,2.1314014497852978 91 | 0.8989898989898991,2.0452120712003983 92 | 0.9090909090909092,1.9399010429092387 93 | 0.9191919191919192,1.813747205527124 94 | 0.9292929292929294,1.6663247303696083 95 | 0.9393939393939394,1.49969937952718 96 | 0.9494949494949496,1.319612345180145 97 | 0.9595959595959597,1.1363455085795007 98 | 0.9696969696969697,0.9647651308207964 99 | 0.9797979797979799,0.8228961097122532 100 | 0.98989898989899,0.7286932178996137 101 | 1.0,0.695619160926954 102 | -------------------------------------------------------------------------------- /stats_data/default/reference_hbond.txt: -------------------------------------------------------------------------------- 1 | 26512769.0,1.0,True 2 | 0.0,0.034344964245007485 3 | 0.010101010101010102,0.05459429079425639 4 | 0.020202020202020204,0.08218491648185963 5 | 0.030303030303030304,0.14152841631380342 6 | 0.04040404040404041,0.23950920185663285 7 | 0.05050505050505051,0.38323829003972426 8 | 0.06060606060606061,0.57711348552417 9 | 0.07070707070707072,0.8201045014079976 10 | 0.08080808080808081,1.1039513433947221 11 | 0.09090909090909091,1.4129937392231584 12 | 0.10101010101010102,1.7261620212367998 13 | 0.11111111111111112,2.020961361188576 14 | 0.12121212121212122,2.2782303740342815 15 | 0.13131313131313133,2.4857978558504406 16 | 0.14141414141414144,2.6395805334938487 17 | 0.15151515151515152,2.7420016447676225 18 | 0.16161616161616163,2.7989443270987695 19 | 0.17171717171717174,2.8169652653139723 20 | 0.18181818181818182,2.802032462525722 21 | 0.19191919191919193,2.759920810081216 22 | 0.20202020202020204,2.697112260808228 23 | 0.21212121212121213,2.6204935437048964 24 | 0.22222222222222224,2.5352304247321156 25 | 0.23232323232323235,2.4424763203838196 26 | 0.24242424242424243,2.3397543882363974 27 | 0.25252525252525254,2.225009666457726 28 | 0.26262626262626265,2.10169202500028 29 | 0.27272727272727276,1.9804829999493543 30 | 0.2828282828282829,1.875494162320335 31 | 0.29292929292929293,1.7970158382822177 32 | 0.30303030303030304,1.7454945405600657 33 | 0.31313131313131315,1.7106234254208867 34 | 0.32323232323232326,1.6762848103366164 35 | 0.33333333333333337,1.628668899568167 36 | 0.3434343434343435,1.5629853782963932 37 | 0.3535353535353536,1.484931404003755 38 | 0.36363636363636365,1.4063291317886786 39 | 0.37373737373737376,1.338045199806398 40 | 0.38383838383838387,1.2847198296478095 41 | 0.393939393939394,1.244028694169386 42 | 0.4040404040404041,1.209924954936114 43 | 0.4141414141414142,1.177166435060848 44 | 0.42424242424242425,1.1445057122859006 45 | 0.43434343434343436,1.1153192301675054 46 | 0.4444444444444445,1.095759237205349 47 | 0.4545454545454546,1.091220815678617 48 | 0.4646464646464647,1.1024354167473254 49 | 0.4747474747474748,1.1230723003605216 50 | 0.48484848484848486,1.1407029902483106 51 | 0.494949494949495,1.1415164020419102 52 | 0.5050505050505051,1.116662926877878 53 | 0.5151515151515152,1.0665010440661804 54 | 0.5252525252525253,1.000077500739114 55 | 0.5353535353535354,0.9303681311107098 56 | 0.5454545454545455,0.8685123897057797 57 | 0.5555555555555556,0.8203295855420994 58 | 0.5656565656565657,0.7861986089593924 59 | 0.5757575757575758,0.7631315407314815 60 | 0.5858585858585859,0.7472026486591586 61 | 0.595959595959596,0.7351937695668335 62 | 0.6060606060606061,0.725211929773962 63 | 0.6161616161616162,0.7164502231377263 64 | 0.6262626262626263,0.7083763458927097 65 | 0.6363636363636365,0.6997938181616087 66 | 0.6464646464646465,0.6883744753343561 67 | 0.6565656565656566,0.671097757245316 68 | 0.6666666666666667,0.6454807906610011 69 | 0.6767676767676768,0.6109031295109829 70 | 0.686868686868687,0.5692071007657769 71 | 0.696969696969697,0.5242194989165547 72 | 0.7070707070707072,0.4805140219994058 73 | 0.7171717171717172,0.44208790483099514 74 | 0.7272727272727273,0.4114778758094888 75 | 0.7373737373737375,0.3894439341233146 76 | 0.7474747474747475,0.3750540897600721 77 | 0.7575757575757577,0.36595088033114653 78 | 0.7676767676767677,0.35871500193984873 79 | 0.7777777777777778,0.34942410634993065 80 | 0.787878787878788,0.33454695622363156 81 | 0.797979797979798,0.3120754985563085 82 | 0.8080808080808082,0.2824128407836781 83 | 0.8181818181818182,0.24840866265272266 84 | 0.8282828282828284,0.21432489036307353 85 | 0.8383838383838385,0.18416818166024632 86 | 0.8484848484848485,0.16019993825834075 87 | 0.8585858585858587,0.14227888193971253 88 | 0.8686868686868687,0.1282302827128383 89 | 0.8787878787878789,0.1150035708173485 90 | 0.888888888888889,0.10009984002218376 91 | 0.8989898989898991,0.08263781254313952 92 | 0.9090909090909092,0.0635702667709057 93 | 0.9191919191919192,0.04500206744782883 94 | 0.9292929292929294,0.029068756417275535 95 | 0.9393939393939394,0.01703521256114745 96 | 0.9494949494949496,0.009021995931850355 97 | 0.9595959595959597,0.004306626107074036 98 | 0.9696969696969697,0.0018502842706237886 99 | 0.9797979797979799,0.0007185824357759826 100 | 0.98989898989899,0.0002682552509000156 101 | 1.0,0.00015332269205798046 102 | -------------------------------------------------------------------------------- /stats_data/default/reference_mcss.txt: -------------------------------------------------------------------------------- 1 | 5544816.0,1.0,True 2 | 0.0,0.12565327211501376 3 | 0.06060606060606061,0.15272746804713655 4 | 0.12121212121212122,0.16111389615852637 5 | 0.18181818181818182,0.17609376170927 6 | 0.24242424242424243,0.1947364819238003 7 | 0.30303030303030304,0.21394897624095813 8 | 0.36363636363636365,0.23130782647654038 9 | 0.42424242424242425,0.24542511900046807 10 | 0.48484848484848486,0.2558192555354896 11 | 0.5454545454545454,0.26252642400023685 12 | 0.6060606060606061,0.26576815960942546 13 | 0.6666666666666667,0.265848833196443 14 | 0.7272727272727273,0.26321868112836244 15 | 0.7878787878787878,0.2585114316740948 16 | 0.8484848484848485,0.25245010234670584 17 | 0.9090909090909092,0.24569025217630644 18 | 0.9696969696969697,0.23873229582330988 19 | 1.0303030303030303,0.23193230951611896 20 | 1.0909090909090908,0.22552136950366738 21 | 1.1515151515151516,0.21955554247978376 22 | 1.2121212121212122,0.2138356160370443 23 | 1.2727272727272727,0.20790930071418634 24 | 1.3333333333333335,0.20121473943212606 25 | 1.393939393939394,0.19330561363991358 26 | 1.4545454545454546,0.18403510135709514 27 | 1.5151515151515151,0.17361542017966902 28 | 1.5757575757575757,0.16255607359655033 29 | 1.6363636363636365,0.15153569901329964 30 | 1.696969696969697,0.141255362912694 31 | 1.7575757575757576,0.1322946906226738 32 | 1.8181818181818183,0.12498916861742189 33 | 1.878787878787879,0.119363827922759 34 | 1.9393939393939394,0.11515902089757256 35 | 2.0,0.11194904657823788 36 | 2.0606060606060606,0.10930531500210133 37 | 2.121212121212121,0.10693053924978836 38 | 2.1818181818181817,0.10470818815644359 39 | 2.2424242424242427,0.10266053202564308 40 | 2.303030303030303,0.10085817695606779 41 | 2.3636363636363638,0.09934274128952753 42 | 2.4242424242424243,0.09810031344575294 43 | 2.484848484848485,0.09707645279973262 44 | 2.5454545454545454,0.09619440737092802 45 | 2.606060606060606,0.09535230408254966 46 | 2.666666666666667,0.0944142757112354 47 | 2.7272727272727275,0.09322917560940326 48 | 2.787878787878788,0.09168485470042086 49 | 2.8484848484848486,0.08976241498530972 50 | 2.909090909090909,0.08754249726091146 51 | 2.9696969696969697,0.08515203024527042 52 | 3.0303030303030303,0.08269146430522334 53 | 3.090909090909091,0.08020027157849173 54 | 3.1515151515151514,0.07768672809472739 55 | 3.2121212121212124,0.07519581030503253 56 | 3.272727272727273,0.07285932872115941 57 | 3.3333333333333335,0.07088525540687819 58 | 3.393939393939394,0.06948554136227546 59 | 3.4545454545454546,0.0687829492024579 60 | 3.515151515151515,0.068751454791834 61 | 3.5757575757575757,0.06922435981261932 62 | 3.6363636363636367,0.06996359733558662 63 | 3.6969696969696972,0.07074943121233918 64 | 3.757575757575758,0.07144360964462945 65 | 3.8181818181818183,0.07200228091276864 66 | 3.878787878787879,0.07244752775116302 67 | 3.9393939393939394,0.07282420928718664 68 | 4.0,0.07316472211211894 69 | 4.0606060606060606,0.07347085430504506 70 | 4.121212121212121,0.07371378371762562 71 | 4.181818181818182,0.07385101184862354 72 | 4.242424242424242,0.07385401811330858 73 | 4.303030303030303,0.07373206092614519 74 | 4.363636363636363,0.07353679420063919 75 | 4.424242424242425,0.07334571948005664 76 | 4.484848484848485,0.07323925713630598 77 | 4.545454545454546,0.07328843458225397 78 | 4.606060606060606,0.07355414192031802 79 | 4.666666666666667,0.07408132214329852 80 | 4.7272727272727275,0.07487233131940885 81 | 4.787878787878788,0.07584509722596292 82 | 4.848484848484849,0.07680517043829464 83 | 4.909090909090909,0.07746461562270746 84 | 4.96969696969697,0.07751884909014901 85 | 5.03030303030303,0.0767581331319698 86 | 5.090909090909091,0.07516509562808577 87 | 5.151515151515151,0.07295098912862678 88 | 5.212121212121212,0.07051904161366138 89 | 5.2727272727272725,0.0684101459310255 90 | 5.333333333333334,0.06737224182477584 91 | 5.3939393939393945,0.06877398164942387 92 | 5.454545454545455,0.07558502834539456 93 | 5.515151515151516,0.09390954688183199 94 | 5.575757575757576,0.13439040729345422 95 | 5.636363636363637,0.21178705041130963 96 | 5.696969696969697,0.34051197003053096 97 | 5.757575757575758,0.5253741932395047 98 | 5.818181818181818,0.7508464003588142 99 | 5.878787878787879,0.976514878294994 100 | 5.9393939393939394,1.1460566182716172 101 | 6.0,1.2092945816510254 102 | -------------------------------------------------------------------------------- /stats_data/default/reference_saltbridge.txt: -------------------------------------------------------------------------------- 1 | 26512769.0,1.0,True 2 | 0.0,8.922739129943497e-06 3 | 0.010101010101010102,2.0980548134600753e-05 4 | 0.020202020202020204,5.6797020285268994e-05 5 | 0.030303030303030304,0.00017155445932645112 6 | 0.04040404040404041,0.0004814291629838028 7 | 0.05050505050505051,0.0012382502773174184 8 | 0.06060606060606061,0.00292181072809832 9 | 0.07070707070707072,0.006339466145240168 10 | 0.08080808080808081,0.012681237876009584 11 | 0.09090909090909091,0.023457766375142106 12 | 0.10101010101010102,0.040264377428540984 13 | 0.11111111111111112,0.06438459348355247 14 | 0.12121212121212122,0.09635041843327954 15 | 0.13131313131313133,0.13565155101236598 16 | 0.14141414141414144,0.18076368302884283 17 | 0.15151515151515152,0.22954099617213009 18 | 0.16161616161616163,0.2798727930187219 19 | 0.17171717171717174,0.33043427679242776 20 | 0.18181818181818182,0.38134131973620095 21 | 0.19191919191919193,0.4344129012050518 22 | 0.20202020202020204,0.49257880632929746 23 | 0.21212121212121213,0.5581512789251252 24 | 0.22222222222222224,0.6306223396133213 25 | 0.23232323232323235,0.7059437918053555 26 | 0.24242424242424243,0.7794905167335298 27 | 0.25252525252525254,0.8529275691073068 28 | 0.26262626262626265,0.9415864324610641 29 | 0.27272727272727276,1.076082898923594 30 | 0.2828282828282829,1.2924606261176674 31 | 0.29292929292929293,1.6105740566676767 32 | 0.30303030303030304,2.009594773182302 33 | 0.31313131313131315,2.417158098069862 34 | 0.32323232323232326,2.7264649418832754 35 | 0.33333333333333337,2.839683880044002 36 | 0.3434343434343435,2.715175083827248 37 | 0.3535353535353536,2.3886740190773845 38 | 0.36363636363636365,1.9545303345231555 39 | 0.37373737373737376,1.5206942336345128 40 | 0.38383838383838387,1.1670022081961002 41 | 0.393939393939394,0.9290701087514139 42 | 0.4040404040404041,0.8100746222105292 43 | 0.4141414141414142,0.8078035067552035 44 | 0.42424242424242425,0.9414228863574172 45 | 0.43434343434343436,1.265039019312422 46 | 0.4444444444444445,1.8565995794874461 47 | 0.4545454545454546,2.77468201241797 48 | 0.4646464646464647,3.9918540257229997 49 | 0.4747474747474748,5.340227292563282 50 | 0.48484848484848486,6.519836848829771 51 | 0.494949494949495,7.195472445837659 52 | 0.5050505050505051,7.145231627766019 53 | 0.5151515151515152,6.372344971235799 54 | 0.5252525252525253,5.104541172877366 55 | 0.5353535353535354,3.681307948301177 56 | 0.5454545454545455,2.4050814553926068 57 | 0.5555555555555556,1.4443593961493335 58 | 0.5656565656565657,0.8247602562421652 59 | 0.5757575757575758,0.482166394683785 60 | 0.5858585858585859,0.3277982572392194 61 | 0.595959595959596,0.29065927764665894 62 | 0.6060606060606061,0.33026086407702604 63 | 0.6161616161616162,0.4289107395876772 64 | 0.6262626262626263,0.5754929457871599 65 | 0.6363636363636365,0.7501548891712397 66 | 0.6464646464646465,0.917351144876767 67 | 0.6565656565656566,1.0318654065362074 68 | 0.6666666666666667,1.0558531149524661 69 | 0.6767676767676768,0.9769735284254799 70 | 0.686868686868687,0.8153128755690346 71 | 0.696969696969697,0.6138664121211813 72 | 0.7070707070707072,0.4187995007333594 73 | 0.7171717171717172,0.2618768838341281 74 | 0.7272727272727273,0.15385858435020414 75 | 0.7373737373737375,0.08888254451416713 76 | 0.7474747474747475,0.053690185464726876 77 | 0.7575757575757577,0.03541426672818899 78 | 0.7676767676767677,0.025224567516597952 79 | 0.7777777777777778,0.01848717637705601 80 | 0.787878787878788,0.013353527955956524 81 | 0.797979797979798,0.009290749992212863 82 | 0.8080808080808082,0.006170108250876852 83 | 0.8181818181818182,0.003894776010365466 84 | 0.8282828282828284,0.002326004080651499 85 | 0.8383838383838385,0.0013057816922769953 86 | 0.8484848484848485,0.000683929562649074 87 | 0.8585858585858587,0.0003318663892584457 88 | 0.8686868686868687,0.00014835719298813592 89 | 0.8787878787878789,6.088114366814788e-05 90 | 0.888888888888889,2.2896511137554844e-05 91 | 0.8989898989898991,7.891822081735198e-06 92 | 0.9090909090909092,2.496050848358181e-06 93 | 0.9191919191919192,7.258470375167357e-07 94 | 0.9292929292929294,1.9445777639212346e-07 95 | 0.9393939393939394,4.806294777542046e-08 96 | 0.9494949494949496,1.0962994449817125e-08 97 | 0.9595959595959597,2.3051964538374132e-09 98 | 0.9696969696969697,4.4576472845107974e-10 99 | 0.9797979797979799,7.903525092162891e-11 100 | 0.98989898989899,1.3036119249551122e-11 101 | 1.0,3.763681791575151e-12 102 | -------------------------------------------------------------------------------- /stats_data/default/reference_shape.txt: -------------------------------------------------------------------------------- 1 | 36496990.0,1.0,True 2 | 0.0,4.547699886855362 3 | 0.010101010101010102,5.021717759289014 4 | 0.020202020202020204,4.9253641731323405 5 | 0.030303030303030304,4.7699130352776455 6 | 0.04040404040404041,4.608781985943345 7 | 0.05050505050505051,4.484142886407559 8 | 0.06060606060606061,4.410278501237078 9 | 0.07070707070707072,4.374428491235633 10 | 0.08080808080808081,4.349495644581144 11 | 0.09090909090909091,4.308203949590727 12 | 0.10101010101010102,4.231725234808695 13 | 0.11111111111111112,4.111758156585997 14 | 0.12121212121212122,3.94861142321478 15 | 0.13131313131313133,3.7481309353345145 16 | 0.14141414141414144,3.519026120874482 17 | 0.15151515151515152,3.2709739856075877 18 | 0.16161616161616163,3.01337112387502 19 | 0.17171717171717174,2.7545538086861887 20 | 0.18181818181818182,2.5013779211336917 21 | 0.19191919191919193,2.2590851059724963 22 | 0.20202020202020204,2.031376464551598 23 | 0.21212121212121213,1.820608259403095 24 | 0.22222222222222224,1.6280319820791298 25 | 0.23232323232323235,1.4540214114290715 26 | 0.24242424242424243,1.298257574047256 27 | 0.25252525252525254,1.1598731533815037 28 | 0.26262626262626265,1.037579523783695 29 | 0.27272727272727276,0.929801545780796 30 | 0.2828282828282829,0.8348287199967916 31 | 0.29292929292929293,0.7509695905738804 32 | 0.30303030303030304,0.6766834934093352 33 | 0.31313131313131315,0.6106651059946795 34 | 0.32323232323232326,0.551869784979146 35 | 0.33333333333333337,0.49948520948753494 36 | 0.3434343434343435,0.45287004189340774 37 | 0.3535353535353536,0.41148582860830873 38 | 0.36363636363636365,0.374841797028601 39 | 0.37373737373737376,0.3424599589903197 40 | 0.38383838383838387,0.3138598055643663 41 | 0.393939393939394,0.2885606547877383 42 | 0.4040404040404041,0.26609936631203884 43 | 0.4141414141414142,0.24605657267834716 44 | 0.42424242424242425,0.22807924069803154 45 | 0.43434343434343436,0.21188817654834882 46 | 0.4444444444444445,0.19726673010758314 47 | 0.4545454545454546,0.1840355341512223 48 | 0.4646464646464647,0.17202294015139682 49 | 0.4747474747474748,0.1610420507003485 50 | 0.48484848484848486,0.15088376462381592 51 | 0.494949494949495,0.14132975035818304 52 | 0.5050505050505051,0.13218005861305362 53 | 0.5151515151515152,0.12328243230210757 54 | 0.5252525252525253,0.11455029314989255 55 | 0.5353535353535354,0.10596380946519367 56 | 0.5454545454545455,0.09755709465454845 57 | 0.5555555555555556,0.08939866296372835 58 | 0.5656565656565657,0.08157139289951128 59 | 0.5757575757575758,0.07415562500442457 60 | 0.5858585858585859,0.06721697917399694 61 | 0.595959595959596,0.06079910176876537 62 | 0.6060606060606061,0.054920471076488286 63 | 0.6161616161616162,0.04957401191447431 64 | 0.6262626262626263,0.04472894639752107 65 | 0.6363636363636365,0.04033527939631826 66 | 0.6464646464646465,0.03633129207693522 67 | 0.6565656565656566,0.03265309170946166 68 | 0.6666666666666667,0.02924383185740367 69 | 0.6767676767676768,0.02606014342106234 70 | 0.686868686868687,0.02307475541120281 71 | 0.696969696969697,0.020275922266352706 72 | 0.7070707070707072,0.017664813481003357 73 | 0.7171717171717172,0.015251536592687728 74 | 0.7272727272727273,0.01305007625707146 75 | 0.7373737373737375,0.01107281780616632 76 | 0.7474747474747475,0.009325957371401123 77 | 0.7575757575757577,0.007807041088487378 78 | 0.7676767676767677,0.0065049682832128185 79 | 0.7777777777777778,0.005401796344323756 80 | 0.787878787878788,0.004475325054124275 81 | 0.797979797979798,0.003701681950017292 82 | 0.8080808080808082,0.0030574780698982436 83 | 0.8181818181818182,0.002521301511510099 84 | 0.8282828282828284,0.002074494715812368 85 | 0.8383838383838385,0.0017013989896493616 86 | 0.8484848484848485,0.0013893576308964565 87 | 0.8585858585858587,0.001128600575457331 88 | 0.8686868686868687,0.0009118792217376505 89 | 0.8787878787878789,0.0007337100947197865 90 | 0.888888888888889,0.0005893709313145707 91 | 0.8989898989898991,0.00047406925557859266 92 | 0.9090909090909092,0.00038266539807461485 93 | 0.9191919191919192,0.00030999856125356594 94 | 0.9292929292929294,0.0002515226248364577 95 | 0.9393939393939394,0.00020384856874516985 96 | 0.9494949494949496,0.0001649201455517543 97 | 0.9595959595959597,0.00013377747852576992 98 | 0.9696969696969697,0.0001100649098787532 99 | 0.9797979797979799,9.354996175574382e-05 100 | 0.98989898989899,8.389260947284295e-05 101 | 1.0,8.07272515237269e-05 102 | -------------------------------------------------------------------------------- /stats_data/default/stats.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/stats_data/default/stats.pdf -------------------------------------------------------------------------------- /stats_data/mcss_sizes.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/stats_data/mcss_sizes.pkl -------------------------------------------------------------------------------- /stats_data/pdbs.txt: -------------------------------------------------------------------------------- 1 | 5HT2B:4IB4,5TVN,6DRX,6DRY,6DRZ,6DS0 2 | B1AR:2VT4,2Y00,2Y02,2Y03,2Y04,2YCW,2YCZ,3ZPQ,3ZPR,4AMI,4AMJ,5A8E 3 | B2AR:2RH1,3D4S,3NY8,3NY9,3NYA,3P0G,4LDL,4LDO 4 | MGLUR5:4OO9,5CGC,5CGD,6FFH,6FFI 5 | SMO:4JKV,4N4W,4QIM,5L7I,5V57 6 | AR:1E3G,1T5Z,2AM9,2AMB,2AX9,2AXA,2HVC,3B5R,3B65,3B66,3B67,3B68,3RLJ,3V49,3V4A,4QL8,5CJ6,5T8E,5T8J,5V8Q 7 | ER:1A52,1ERR,1GWQ,1L2I,1R5K,1SJ0,1UOM,1X7E,1X7R,1XP1,1XP6,1XP9,1XPC,1XQC,1YIM,1YIN,1ZKY,2AYR,2B1V,2B1Z,2FAI 8 | GR:1P93,3BQD,3CLD,3E7C,3K22,4CSJ,4MDD,4P6W,4P6X,4UDD,5G3J,5G5W,5NFP,5NFT,6EL6,6EL7,6EL9 9 | MR:2A3I,2AA2,2AA5,2AA7,3VHV,3WFF,3WFG,4PF3,5HCV,5L7E,5L7G,5MWP,5MWY 10 | VDR:1DB1,1IE8,1S0Z,1S19,1TXI,2HAM,2HAR,2HAS,2HB7,2HB8,3A2I,3A3Z,3A40,3A78,3AUQ,3AX8,3AZ1,3AZ2,3AZ3,3B0T,3CS4 11 | DAT:4M48,4XNU,4XNX,4XP1,4XP4,4XP5,4XP6,4XP9,4XPA 12 | GLUT1:5EQG,5EQH,5EQI 13 | SERT:5I6X,5I71,5I74,6AWO,6AWP 14 | BACE1:1FKN,1M4H,2FDP,2G94,2P4J,2QMG,2VKM,3BRA,3BUF,3BUG,3BUH,3CKP,3I25,3KMX,3KMY,3L59,3LPI,3LPK,3RSX,3RU1,3UDH 15 | F10:1EZQ,1F0R,1F0S,1FJS,1G2L,1KSN,1LPG,1LPK,1LPZ,1MQ5,1MQ6,1NFU,1NFW,1NFX,1NFY,1XKA,1Z6E,2BOH,2BOK,2BQ7,2J2U 16 | F2:1A4W,1BCU,1BHX,1C1U,1C1V,1C4U,1C5N,1C5O,1D3D,1D3P,1D4P,1D6W,1D9I,1FPC,1G30,1G32,1GHV,1GHW,1GHY,1K21,1K22 17 | P00760:1BJU,1BJV,1BTY,1C1R,1C5Q,1C5S,1C5T,1EB2,1F0T,1F0U,1G36,1G3D,1G3E,1GHZ,1GI1,1GJ6,1K1I,1K1J,1K1L,1K1M,1K1N 18 | PLAU:1C5X,1C5Y,1C5Z,1EJN,1F5L,1GI7,1GJ7,1GJ8,1GJA,1GJC,1GJD,1O3P,1O5A,1O5C,1OWE,1OWH,1SQA,1SQO,1SQT,1W0Z,1W11 19 | BRD4:2YEL,3P5O,3U5J,3U5L,3ZYU,4LYW,4LZR,4LZS,4MR3,4NUC,4NUE,4OGJ,4QB3,4WIV,4XYA,4Z93,5ACY 20 | CDK2:1AQ1,1CKP,1DI8,1DM2,1E1V,1E1X,1E9H,1FIN,1FVT,1FVV,1G5S,1GIH,1GIJ,1GZ8,1H0V,1H1Q,1H1R,1H1S,1JSV,1JVP,1KE5 21 | DHFR:1BOZ,1DLR,1DLS,1HFP,1KMS,1KMV,1OHJ,1PD8,2W3A,3GHC,3GHW,3GYF,3NTZ,3NU0,3NXO,3NXR,3NXT,3NXV,3NXY,4DDR,4KAK 22 | ELANE:3Q77,5A09,5A0A,5A0B,5A0C,5A8X,5A8Y,5A8Z,5ABW 23 | HSP90AA1:1YC1,1YC4,1YET,2QFO,2QG0,2QG2,2UWD,2XAB,2XDK,2XDL,2XDX,2XHT,2XJG,2XJJ,2XJX,2YI0,2YI7,2YKI,3B24,3B25,3B26 24 | PDE10A:3UI7,3UUO,4DFF,4HF4,4LKQ,4LLJ,4LLK,4LLP,4LLX,4LM0,4LM1,4LM2,4LM3,4LM4,4MRW,4MRZ,4MSA,4MSC,4MSN,4ZO5,5C1W 25 | PTPN1:1BZC,1BZJ,1C83,1C84,1C86,1C88,1ECV,1G7F,1G7G,1GFY,1KAV,1L8G,1NL9,1NNY,1NO6,1NWL,1NZ7,1ONY,1ONZ,1PYN,1QXK 26 | PYGM:1A8I,2G9Q,2PYI,2QRG,2QRH,2QRM,2QRP,2QRQ,3G2H,3G2I,3G2J,3G2K,3G2L,3G2N,3L79,3L7A,3L7B,3L7C,3L7D,3SYM,3SYR 27 | SIGMAR1:5HK1,5HK2,6DJZ,6DK0,6DK1 28 | GLUR2:1FTM,1M5B,1M5C,1M5E,1MQD,1MQG,1MQH,1MQI,1MQJ,1N0T,1SYH,1WVJ,2P2A,3BFT,3H03,3H06 29 | GLUK1:1VSO,2F34,2F35,2PBW,2QS1,2QS2,2QS3,2QS4,2WKY,3GBA,3GBB,3S2V,4QF9,4YMB,5M2V,5NEB,5NF5,6FZ4,6SBT 30 | GLUN12A:5H8H,5H8N,5H8Q,5I2K,5I2N,5KCJ,5KDT,5TP9,5TPA 31 | -------------------------------------------------------------------------------- /stats_data/structures.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drorlab/combind/be0f5bbf3d141a30c441345e7670a1481cf1941f/stats_data/structures.tar.gz -------------------------------------------------------------------------------- /stats_data/systems.txt: -------------------------------------------------------------------------------- 1 | Family Protein Name Gene Name UNIPROT CHEMBL 2 | GPCR A 5-hydroxytryptamine receptor 2B 5HT2B P41595 CHEMBL1833 3 | GPCR A Adenosine receptor A2a A2AR P29274 CHEMBL251 4 | GPCR A Beta-1 adrenergic receptor B1AR P07700 CHEMBL213 5 | GPCR A Beta-2 adrenergic receptor B2AR P07550 CHEMBL210 6 | GPCR A Cannabinoid receptor 1 CB1R P21554 CHEMBL218 7 | GPCR A Melatonin receptor type 1A MT1R P48039 CHEMBL1945 8 | GPCR A Muscarinic acetylcholine receptor M2 M2R P08172 CHEMBL211 9 | GPCR C Gamma-aminobutyric acid type B receptor subunit 1 GABBR1 Q9UBS5 CHEMBL2064 10 | GPCR C Metabotropic glutamate receptor 2 MGLUR2 Q14416 CHEMBL5137 11 | GPCR C Metabotropic glutamate receptor 3 MGLUR3 Q14832 CHEMBL2888 12 | GPCR C Metabotropic glutamate receptor 5 MGLUR5 P41594 CHEMBL3227 13 | GPCR F Smoothened homolog SMO Q99835 CHEMBL5971 14 | Ion channel Glutamate receptor 2 GLU2 P19491 CHEMBL3503 15 | Ion channel Glutamate receptor ionotropic, kainate 1 GLUK1 P22756 CHEMBL2919 16 | Ion channel Glutamate receptor ionotropic, kainate 3 GLUK3 P42264 CHEMBL3744 17 | Ion channel Glutamate receptor ionotropic, NMDA GLUN1 Q05586-Q12879 CHEMBL1907604 18 | Transporter Sodium-dependent dopamine transporter DAT Q7K4Y6 CHEMBL238 19 | Transporter Sodium-dependent serotonin transporter SERT P31645 CHEMBL228 20 | Transporter glucose transporter member 1 GLUT1 P11166 CHEMBL2535 21 | Nuclear Receptor Androgen receptor AR P10275 CHEMBL1871 22 | Nuclear Receptor Bile acid receptor NR1H4 Q96RI1 CHEMBL2047 23 | Nuclear Receptor Estrogen receptor ER P03372 CHEMBL206 24 | Nuclear Receptor Glucocorticoid receptor GR P04150 CHEMBL2034 25 | Nuclear Receptor Mineralocorticoid receptor MR P08235 CHEMBL1994 26 | Nuclear Receptor Vitamin D3 receptor VDR P11473 CHEMBL1977 27 | Protease Beta-secretase 1 BACE1 P56817 CHEMBL4822 28 | Protease Cationic trypsin P00760 P00760 CHEMBL3769 29 | Protease Coagulation factor X F10 P00742 CHEMBL244 30 | Protease Prothrombin F2 P00734 CHEMBL204 31 | Protease Urokinase-type plasminogen activator PLAU P00749 CHEMBL3286 32 | Reductase Aldo-keto reductase family 1 member B1 ALDR1 P15121 CHEMBL1900 33 | Reductase Corticosteroid 11-beta-dehydrogenase isozyme 1 HSD11B1 P28845 CHEMBL4235 34 | Reductase Dihydrofolate reductase DHFR P00374 CHEMBL202 35 | Reductase Ribosyldihydronicotinamide dehydrogenase NQO2 P16083 CHEMBL3959 36 | Receptor Acetylcholine-binding protein ACHP P58154 CHEMBL6084 37 | Receptor Sigma non-opioid intracellular receptor 1 SIGMAR1 Q99720 CHEMBL287 38 | Receptor TGF-beta receptor type-1 TGFR1 P36897 CHEMBL4439 39 | Hydrolase Bifunctional epoxide hydrolase 2 EPHX2 P34913 CHEMBL2409 40 | Hydrolase Leukotriene A-4 hydrolase LTA4H P09960 CHEMBL4618 41 | Kinase Cyclin-dependent kinase 2 CDK2 P24941 CHEMBL301 42 | Kinase Mitogen-activated protein kinase 1 MAPK1 P28482 CHEMBL4040 43 | Elatase Neutrophil elastase ELANE P08246 CHEMBL248 44 | Phosphorylase Glycogen phosphorylase PYGM P00489 CHEMBL4696 45 | Phosphatase Tyrosine-protein phosphatase non-receptor type 1 PTPN1 P18031 CHEMBL335 46 | Transcription factor Bromodomain-containing protein 4 BRD4 O60885 CHEMBL1163125 47 | Chaperone Heat shock protein HSP 90-alpha HSP90AA P07900 CHEMBL3880 48 | Phosphodiesterase cAMP 3',5'-cyclic phosphodiesterase 10A PDE10A Q9Y233 CHEMBL4409 49 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | import os 3 | import numpy as np 4 | from schrodinger.structure import StructureReader, StructureWriter 5 | 6 | def np_load(fname, halt=True, delete=False): 7 | fname = os.path.abspath(fname) 8 | try: 9 | return np.load(fname) 10 | except ValueError as e: 11 | m = 'Cannot load file containing pickled data when allow_pickle=False' 12 | if m in str(e): 13 | print('{} is corrupt. Regenerate and try again.'.format(fname)) 14 | if delete: 15 | os.remove(fname) 16 | else: 17 | print("Can't open {}".format(fname)) 18 | print(str(e)) 19 | 20 | if halt: 21 | exit() 22 | 23 | def pv_path(root, name): 24 | if '_native' in name: 25 | name = name.replace('_native', '') 26 | return '{}/{}/{}_native_pv.maegz'.format(root, name, name) 27 | return '{}/{}/{}_pv.maegz'.format(root, name, name) 28 | 29 | def get_pose(pv, pose): 30 | with StructureReader(pv) as sts: 31 | for _ in range(pose+1): 32 | next(sts) 33 | st = next(sts) 34 | return st 35 | 36 | def basename(path): 37 | x = os.path.basename(path) 38 | x = os.path.splitext(x)[0] 39 | return x 40 | 41 | def mp(function, unfinished, processes): 42 | if unfinished: 43 | with Pool(processes=processes) as pool: 44 | x = pool.starmap(function, unfinished) 45 | return x 46 | 47 | def mkdir(path): 48 | if not os.path.exists(path): 49 | os.mkdir(path) 50 | --------------------------------------------------------------------------------