├── LICENSE ├── README.md ├── data ├── .pdb_pre.py.swp ├── README.md ├── determine_ss.py ├── domain_list.txt ├── example │ ├── 1aj4A00-2-161 │ ├── 1bzqK00-801-924 │ ├── 1ge0A00-1-130 │ └── domain_list.txt ├── fold2seq2.png ├── fold_feat_gen.py ├── fold_features │ └── 1ab0A00-1-131.npy ├── pdb_lists │ ├── id_domain.txt │ ├── od_domain.txt │ ├── train_domain.txt │ └── val_domain.txt ├── pdb_pre.py └── ss_dense_gen.py ├── environment.yml ├── fold2seq1.png ├── fold2seq3.png └── src ├── Utils ├── __pycache__ │ ├── amino_acid.cpython-37.pyc │ ├── amino_acid.cpython-38.pyc │ ├── model_statistics.cpython-37.pyc │ └── model_statistics.cpython-38.pyc ├── amino_acid.py ├── hparam.py └── model_statistics.py ├── fold_encoder.py ├── generator.py ├── inference.py ├── seq_decoder.py └── train.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [ICML2021] Fold2Seq: A Joint Sequence(1D)-Fold(3D) Embedding-based Generative Model for Protein Design 2 | 3 | ![Fold2Seq Architecture](/fold2seq1.png) 4 | 5 | ## Environment file: 6 | * [environment.yml](environment.yml) 7 | 8 | ## Data and Feature Generation: 9 | * Go to [data/](data/) and check the README there. 10 | 11 | ## How to train the model: 12 | * go to [src/](src/) and run: 13 | 14 | `python train.py --data_path $path_to_the_data_dictionary --lr $learning_rate --model_save $path_to_the_saved_model` 15 | 16 | ## How to generate sequences: 17 | * go to [src/](src/) and run: 18 | 19 | `python inference.py --trained_model $path_to_the_trained_model --output $path_to_the_output_file --data_path $path_to_the_data_dictionary` 20 | 21 | 22 | ## Fold2Seq generated structures against natural structures: 23 | ![Fold2Seq structures](/fold2seq3.png) 24 | -------------------------------------------------------------------------------- /data/.pdb_pre.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/fold2seq/b9a97d81eac329b5259ad10e2a6f4fe80ade542f/data/.pdb_pre.py.swp -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Fold2Seq Data and Feature Generation 2 | 3 | ![Fold2Seq Architecture](/data/fold2seq2.png) 4 | 5 | ## Data 6 | The CATH IDs of protein domains in training, validation and two test sets are in [pdb_lists/](pdb_lists/). 7 | 8 | 9 | ## Feature Generation: 10 | ### Input File: 11 | * In order to generate SSE density features, you need to first provide a file with all input proteins' information. Each row describes a protein domain. The meaning of each column is: 12 | * Column1: The path to the PDB 13 | * Column2: The PDB ID 14 | * Column3: The chain ID 15 | * Column4: The starting residue ID 16 | * Column5: The ending residue ID 17 | * An example of this input file is [example/domain_list.txt](example/domain_list.txt). 18 | 19 | ### Secondary Structure Assignment: 20 | * Moreover, you need to pre-assign a secondary structure element to each residue. We provide an assignment file ([ss.txt](https://drive.google.com/file/d/1B_9JdT43-l0sVOgBJCdCRAN31tOGX8VA/view?usp=sharing)) obtained from RCSB PDB which contains most of exsiting PDBs. You can first check if your protein is in this file. If not, you can append it following the format in the file. 21 | 22 | ### Generating features: 23 | * To generate SSE density features, you can run: 24 | 25 | `python fold_feat_gen.py --domain_list example/domain_list.txt --ss ss.txt --out $path_to_the_output_dictionary`. 26 | 27 | * It will generate a python dictionary containing input information and fold features in `fold_features/`. 28 | 29 | -------------------------------------------------------------------------------- /data/determine_ss.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | 4 | amino_acid = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'] 5 | 6 | def read_ss(path): 7 | seq_ss={} 8 | b=0 9 | with open(path, "r") as f: 10 | for lines in f: 11 | if lines[0]=='>': 12 | if 'sequence' in lines: 13 | seq_ss[lines[1:7]] = {} 14 | label = lines[1:7] 15 | seq_ss[label]['seq']='' 16 | seq_ss[label]['ss']='' 17 | b=0 18 | elif 'secstr' in lines: 19 | b=1 20 | else: 21 | raise ValueError("error!") 22 | else: 23 | if b==0: 24 | seq_ss[label]['seq']+=lines.strip('\n') 25 | else: 26 | seq_ss[label]['ss']+=lines.strip('\n') 27 | print ("total number of seqs:", len(seq_ss)) 28 | 29 | remove = [] 30 | for i in seq_ss: 31 | assert len(seq_ss[i]['seq'])==len(seq_ss[i]['ss']) 32 | for j in seq_ss[i]['seq']: 33 | if j not in amino_acid: 34 | remove.append(i) 35 | break 36 | for i in remove: 37 | del seq_ss[i] 38 | 39 | print ("removed # seqs in ss.txt:", len(remove)) 40 | return seq_ss 41 | 42 | def test_seq_identity(seq_exp,seq_ref): 43 | for i in range(len(seq_ref)-len(seq_exp)+1): 44 | 45 | if seq_exp == seq_ref[i: i+len(seq_exp)]: 46 | return 1 47 | 48 | return 0 49 | 50 | if __name__=='__main__': 51 | 52 | seq_dict_new={} 53 | 54 | with open("seq_dict.pkl", "rb") as f: 55 | seq_dict = pickle.load(f) 56 | 57 | seq_ss = read_ss("ss.txt") 58 | 59 | n1=0 60 | n2=0 61 | n3=0 62 | for i in seq_dict: 63 | label = i[0:4].upper()+':'+i[4] 64 | 65 | if label in seq_ss: 66 | n1+=1 67 | 68 | if label+".pdb" not in os.listdir("pdbs/"): 69 | os.system("curl https://files.rcsb.org/download/"+i[0:4]+".pdb > 1" ) 70 | os.system("grep ATOM 1 > pdbs/"+i[0:4]+".pdb") 71 | 72 | else: 73 | #print (label) 74 | n2+=1 75 | print(n1,n2, n1+n2, n3) 76 | -------------------------------------------------------------------------------- /data/domain_list.txt: -------------------------------------------------------------------------------- 1 | example/1aj4A00-2-161 1aj4 A 2 161 2 | example/1bzqK00-801-924 1bzq K 801 924 3 | example/1ge0A00-1-130 1ge0 A 1 130 4 | -------------------------------------------------------------------------------- /data/example/1bzqK00-801-924: -------------------------------------------------------------------------------- 1 | ATOM 3809 N GLN K 801 63.141 16.501 32.533 1.00 38.57 N 2 | ATOM 3810 CA GLN K 801 63.165 17.784 33.286 1.00 38.50 C 3 | ATOM 3811 C GLN K 801 63.261 18.999 32.364 1.00 37.32 C 4 | ATOM 3812 O GLN K 801 64.323 19.405 31.901 1.00 37.30 O 5 | ATOM 3813 CB GLN K 801 64.276 17.787 34.330 1.00 39.80 C 6 | ATOM 3814 CG GLN K 801 65.137 16.539 34.384 1.00 41.11 C 7 | ATOM 3815 CD GLN K 801 66.276 16.538 33.382 1.00 41.75 C 8 | ATOM 3816 OE1 GLN K 801 67.391 16.141 33.738 1.00 42.17 O 9 | ATOM 3817 NE2 GLN K 801 66.008 16.969 32.152 1.00 41.81 N 10 | ATOM 3818 N VAL K 802 62.106 19.595 32.075 1.00 35.75 N 11 | ATOM 3819 CA VAL K 802 62.063 20.784 31.238 1.00 34.51 C 12 | ATOM 3820 C VAL K 802 61.637 21.957 32.118 1.00 33.55 C 13 | ATOM 3821 O VAL K 802 60.650 21.839 32.846 1.00 33.76 O 14 | ATOM 3822 CB VAL K 802 61.156 20.663 30.012 1.00 34.62 C 15 | ATOM 3823 CG1 VAL K 802 59.723 20.320 30.373 1.00 34.81 C 16 | ATOM 3824 CG2 VAL K 802 61.188 21.973 29.229 1.00 35.10 C 17 | ATOM 3825 N GLN K 803 62.414 23.033 32.110 1.00 32.50 N 18 | ATOM 3826 CA GLN K 803 62.045 24.184 32.923 1.00 31.82 C 19 | ATOM 3827 C GLN K 803 61.707 25.309 31.938 1.00 30.63 C 20 | ATOM 3828 O GLN K 803 62.182 25.353 30.806 1.00 30.61 O 21 | ATOM 3829 CB GLN K 803 63.024 24.605 33.990 1.00 33.34 C 22 | ATOM 3830 CG GLN K 803 64.114 23.640 34.392 1.00 35.86 C 23 | ATOM 3831 CD GLN K 803 65.476 24.304 34.508 1.00 37.35 C 24 | ATOM 3832 OE1 GLN K 803 65.825 24.829 35.571 1.00 38.45 O 25 | ATOM 3833 NE2 GLN K 803 66.255 24.303 33.428 1.00 37.36 N 26 | ATOM 3834 N LEU K 804 60.806 26.165 32.377 1.00 28.78 N 27 | ATOM 3835 CA LEU K 804 60.373 27.311 31.573 1.00 27.60 C 28 | ATOM 3836 C LEU K 804 60.840 28.536 32.351 1.00 26.99 C 29 | ATOM 3837 O LEU K 804 60.397 28.753 33.481 1.00 27.44 O 30 | ATOM 3838 CB LEU K 804 58.877 27.213 31.410 1.00 27.29 C 31 | ATOM 3839 CG LEU K 804 58.102 27.712 30.212 1.00 26.84 C 32 | ATOM 3840 CD1 LEU K 804 58.905 27.657 28.926 1.00 27.05 C 33 | ATOM 3841 CD2 LEU K 804 56.811 26.913 30.050 1.00 25.70 C 34 | ATOM 3842 N VAL K 805 61.845 29.257 31.872 1.00 26.27 N 35 | ATOM 3843 CA VAL K 805 62.355 30.383 32.644 1.00 25.76 C 36 | ATOM 3844 C VAL K 805 61.876 31.719 32.090 1.00 24.52 C 37 | ATOM 3845 O VAL K 805 61.971 32.013 30.904 1.00 24.06 O 38 | ATOM 3846 CB VAL K 805 63.887 30.328 32.794 1.00 26.30 C 39 | ATOM 3847 CG1 VAL K 805 64.236 29.057 33.568 1.00 26.52 C 40 | ATOM 3848 CG2 VAL K 805 64.592 30.351 31.457 1.00 25.90 C 41 | ATOM 3849 N GLU K 806 61.312 32.526 32.984 1.00 23.41 N 42 | ATOM 3850 CA GLU K 806 60.806 33.835 32.596 1.00 23.02 C 43 | ATOM 3851 C GLU K 806 61.570 34.973 33.255 1.00 22.14 C 44 | ATOM 3852 O GLU K 806 61.969 34.894 34.410 1.00 22.87 O 45 | ATOM 3853 CB GLU K 806 59.311 33.979 32.911 1.00 22.43 C 46 | ATOM 3854 CG GLU K 806 58.867 33.099 34.054 1.00 21.81 C 47 | ATOM 3855 CD GLU K 806 57.431 32.677 34.028 1.00 21.25 C 48 | ATOM 3856 OE1 GLU K 806 56.939 32.078 33.064 1.00 21.93 O 49 | ATOM 3857 OE2 GLU K 806 56.711 32.930 34.997 1.00 21.22 O 50 | ATOM 3858 N SER K 807 61.733 36.055 32.506 1.00 21.74 N 51 | ATOM 3859 CA SER K 807 62.409 37.224 33.052 1.00 21.34 C 52 | ATOM 3860 C SER K 807 61.803 38.496 32.495 1.00 20.25 C 53 | ATOM 3861 O SER K 807 61.138 38.479 31.465 1.00 19.75 O 54 | ATOM 3862 CB SER K 807 63.910 37.132 32.759 1.00 22.01 C 55 | ATOM 3863 OG SER K 807 64.146 36.999 31.366 1.00 23.31 O 56 | ATOM 3864 N GLY K 808 62.099 39.617 33.132 1.00 20.25 N 57 | ATOM 3865 CA GLY K 808 61.683 40.915 32.618 1.00 19.89 C 58 | ATOM 3866 C GLY K 808 60.670 41.572 33.532 1.00 20.08 C 59 | ATOM 3867 O GLY K 808 60.290 42.708 33.292 1.00 20.76 O 60 | ATOM 3868 N GLY K 809 60.233 40.900 34.584 1.00 20.44 N 61 | ATOM 3869 CA GLY K 809 59.268 41.495 35.498 1.00 21.31 C 62 | ATOM 3870 C GLY K 809 59.840 42.794 36.062 1.00 22.73 C 63 | ATOM 3871 O GLY K 809 61.040 43.086 35.986 1.00 23.66 O 64 | ATOM 3872 N GLY K 810 58.962 43.593 36.654 1.00 22.50 N 65 | ATOM 3873 CA GLY K 810 59.381 44.845 37.237 1.00 22.45 C 66 | ATOM 3874 C GLY K 810 58.188 45.631 37.747 1.00 22.52 C 67 | ATOM 3875 O GLY K 810 57.044 45.225 37.653 1.00 22.34 O 68 | ATOM 3876 N LEU K 811 58.520 46.770 38.320 1.00 23.15 N 69 | ATOM 3877 CA LEU K 811 57.534 47.683 38.907 1.00 23.77 C 70 | ATOM 3878 C LEU K 811 57.674 48.957 38.087 1.00 23.94 C 71 | ATOM 3879 O LEU K 811 58.789 49.424 37.882 1.00 24.48 O 72 | ATOM 3880 CB LEU K 811 57.811 47.747 40.389 1.00 24.33 C 73 | ATOM 3881 CG LEU K 811 57.041 48.629 41.340 1.00 26.72 C 74 | ATOM 3882 CD1 LEU K 811 55.567 48.767 40.953 1.00 27.26 C 75 | ATOM 3883 CD2 LEU K 811 57.092 48.158 42.798 1.00 26.79 C 76 | ATOM 3884 N VAL K 812 56.595 49.416 37.470 1.00 23.61 N 77 | ATOM 3885 CA VAL K 812 56.679 50.572 36.589 1.00 23.16 C 78 | ATOM 3886 C VAL K 812 55.569 51.558 36.887 1.00 23.21 C 79 | ATOM 3887 O VAL K 812 54.682 51.267 37.680 1.00 22.70 O 80 | ATOM 3888 CB VAL K 812 56.678 50.134 35.117 1.00 23.14 C 81 | ATOM 3889 CG1 VAL K 812 55.284 49.985 34.537 1.00 22.78 C 82 | ATOM 3890 CG2 VAL K 812 57.509 51.106 34.289 1.00 23.26 C 83 | ATOM 3891 N GLN K 813 55.641 52.721 36.252 1.00 24.10 N 84 | ATOM 3892 CA GLN K 813 54.617 53.738 36.470 1.00 24.80 C 85 | ATOM 3893 C GLN K 813 53.536 53.609 35.415 1.00 24.53 C 86 | ATOM 3894 O GLN K 813 53.861 53.284 34.273 1.00 24.70 O 87 | ATOM 3895 CB GLN K 813 55.279 55.108 36.417 1.00 25.24 C 88 | ATOM 3896 CG GLN K 813 56.496 55.247 37.326 1.00 25.36 C 89 | ATOM 3897 CD GLN K 813 56.869 56.722 37.440 1.00 25.94 C 90 | ATOM 3898 OE1 GLN K 813 57.198 57.370 36.442 1.00 25.93 O 91 | ATOM 3899 NE2 GLN K 813 56.810 57.248 38.656 1.00 25.71 N 92 | ATOM 3900 N ALA K 814 52.292 53.852 35.809 1.00 24.16 N 93 | ATOM 3901 CA ALA K 814 51.182 53.819 34.851 1.00 23.36 C 94 | ATOM 3902 C ALA K 814 51.588 54.521 33.559 1.00 23.27 C 95 | ATOM 3903 O ALA K 814 52.180 55.601 33.575 1.00 23.23 O 96 | ATOM 3904 CB ALA K 814 49.978 54.473 35.488 1.00 22.58 C 97 | ATOM 3905 N GLY K 815 51.437 53.836 32.436 1.00 22.92 N 98 | ATOM 3906 CA GLY K 815 51.786 54.386 31.143 1.00 22.34 C 99 | ATOM 3907 C GLY K 815 53.144 53.976 30.630 1.00 22.23 C 100 | ATOM 3908 O GLY K 815 53.451 54.268 29.470 1.00 21.97 O 101 | ATOM 3909 N GLY K 816 53.957 53.311 31.445 1.00 22.61 N 102 | ATOM 3910 CA GLY K 816 55.279 52.867 31.036 1.00 22.25 C 103 | ATOM 3911 C GLY K 816 55.290 51.518 30.334 1.00 22.26 C 104 | ATOM 3912 O GLY K 816 54.245 50.916 30.076 1.00 22.78 O 105 | ATOM 3913 N SER K 817 56.485 51.011 30.029 1.00 21.71 N 106 | ATOM 3914 CA SER K 817 56.655 49.749 29.350 1.00 21.05 C 107 | ATOM 3915 C SER K 817 57.527 48.736 30.079 1.00 21.18 C 108 | ATOM 3916 O SER K 817 58.482 49.079 30.762 1.00 21.69 O 109 | ATOM 3917 CB SER K 817 57.322 49.952 27.985 1.00 20.62 C 110 | ATOM 3918 OG SER K 817 56.386 50.496 27.086 1.00 21.21 O 111 | ATOM 3919 N LEU K 818 57.253 47.464 29.811 1.00 20.70 N 112 | ATOM 3920 CA LEU K 818 58.043 46.349 30.288 1.00 20.61 C 113 | ATOM 3921 C LEU K 818 58.168 45.344 29.139 1.00 20.74 C 114 | ATOM 3922 O LEU K 818 57.365 45.377 28.213 1.00 20.62 O 115 | ATOM 3923 CB LEU K 818 57.451 45.612 31.488 1.00 19.62 C 116 | ATOM 3924 CG LEU K 818 57.454 46.289 32.854 1.00 19.23 C 117 | ATOM 3925 CD1 LEU K 818 56.682 45.467 33.876 1.00 18.58 C 118 | ATOM 3926 CD2 LEU K 818 58.875 46.548 33.333 1.00 19.02 C 119 | ATOM 3927 N ARG K 819 59.135 44.442 29.255 1.00 21.36 N 120 | ATOM 3928 CA ARG K 819 59.299 43.389 28.251 1.00 21.31 C 121 | ATOM 3929 C ARG K 819 59.441 42.044 28.947 1.00 19.98 C 122 | ATOM 3930 O ARG K 819 60.433 41.907 29.665 1.00 19.84 O 123 | ATOM 3931 CB ARG K 819 60.539 43.655 27.402 1.00 22.57 C 124 | ATOM 3932 CG ARG K 819 60.922 42.483 26.502 1.00 24.68 C 125 | ATOM 3933 CD ARG K 819 61.035 42.969 25.065 1.00 26.56 C 126 | ATOM 3934 NE ARG K 819 61.517 41.954 24.134 1.00 28.42 N 127 | ATOM 3935 CZ ARG K 819 61.153 41.916 22.850 1.00 29.41 C 128 | ATOM 3936 NH1 ARG K 819 60.296 42.810 22.367 1.00 29.88 N 129 | ATOM 3937 NH2 ARG K 819 61.653 40.992 22.034 1.00 29.56 N 130 | ATOM 3938 N LEU K 820 58.496 41.128 28.774 1.00 18.67 N 131 | ATOM 3939 CA LEU K 820 58.651 39.821 29.421 1.00 18.00 C 132 | ATOM 3940 C LEU K 820 59.286 38.824 28.449 1.00 17.19 C 133 | ATOM 3941 O LEU K 820 59.036 38.845 27.245 1.00 16.52 O 134 | ATOM 3942 CB LEU K 820 57.338 39.303 29.964 1.00 18.07 C 135 | ATOM 3943 CG LEU K 820 56.681 40.054 31.117 1.00 18.24 C 136 | ATOM 3944 CD1 LEU K 820 55.250 39.555 31.290 1.00 18.81 C 137 | ATOM 3945 CD2 LEU K 820 57.436 39.861 32.417 1.00 18.86 C 138 | ATOM 3946 N SER K 821 60.178 38.014 28.995 1.00 16.41 N 139 | ATOM 3947 CA SER K 821 60.918 37.009 28.245 1.00 15.87 C 140 | ATOM 3948 C SER K 821 60.696 35.615 28.824 1.00 15.18 C 141 | ATOM 3949 O SER K 821 60.658 35.440 30.038 1.00 13.50 O 142 | ATOM 3950 CB SER K 821 62.407 37.345 28.373 1.00 16.28 C 143 | ATOM 3951 OG SER K 821 62.991 37.694 27.143 1.00 16.87 O 144 | ATOM 3952 N CYS K 822 60.595 34.624 27.952 1.00 15.23 N 145 | ATOM 3953 CA CYS K 822 60.474 33.240 28.387 1.00 15.60 C 146 | ATOM 3954 C CYS K 822 61.162 32.292 27.411 1.00 16.38 C 147 | ATOM 3955 O CYS K 822 60.952 32.380 26.204 1.00 16.56 O 148 | ATOM 3956 CB CYS K 822 59.002 32.845 28.493 1.00 14.74 C 149 | ATOM 3957 SG CYS K 822 58.767 31.084 28.793 1.00 13.65 S 150 | ATOM 3958 N ALA K 823 61.948 31.362 27.939 1.00 17.25 N 151 | ATOM 3959 CA ALA K 823 62.580 30.350 27.110 1.00 17.77 C 152 | ATOM 3960 C ALA K 823 62.372 29.000 27.790 1.00 18.79 C 153 | ATOM 3961 O ALA K 823 62.372 28.969 29.023 1.00 19.43 O 154 | ATOM 3962 CB ALA K 823 64.060 30.574 26.942 1.00 17.16 C 155 | ATOM 3963 N ALA K 824 62.152 27.977 26.976 1.00 18.86 N 156 | ATOM 3964 CA ALA K 824 62.043 26.639 27.545 1.00 18.95 C 157 | ATOM 3965 C ALA K 824 63.446 26.053 27.479 1.00 20.21 C 158 | ATOM 3966 O ALA K 824 64.063 26.191 26.418 1.00 20.85 O 159 | ATOM 3967 CB ALA K 824 61.071 25.772 26.773 1.00 17.58 C 160 | ATOM 3968 N SER K 825 63.924 25.482 28.574 1.00 21.32 N 161 | ATOM 3969 CA SER K 825 65.235 24.823 28.508 1.00 21.75 C 162 | ATOM 3970 C SER K 825 65.015 23.333 28.774 1.00 21.47 C 163 | ATOM 3971 O SER K 825 64.207 22.920 29.606 1.00 20.65 O 164 | ATOM 3972 CB SER K 825 66.277 25.432 29.420 1.00 22.30 C 165 | ATOM 3973 OG SER K 825 66.351 24.732 30.650 1.00 24.12 O 166 | ATOM 3974 N GLY K 826 65.735 22.515 28.007 1.00 21.68 N 167 | ATOM 3975 CA GLY K 826 65.663 21.072 28.134 1.00 20.53 C 168 | ATOM 3976 C GLY K 826 64.670 20.356 27.269 1.00 20.00 C 169 | ATOM 3977 O GLY K 826 64.588 19.135 27.410 1.00 21.19 O 170 | ATOM 3978 N TYR K 827 63.904 21.015 26.416 1.00 19.87 N 171 | ATOM 3979 CA TYR K 827 62.893 20.339 25.608 1.00 18.39 C 172 | ATOM 3980 C TYR K 827 63.492 20.050 24.243 1.00 17.87 C 173 | ATOM 3981 O TYR K 827 64.029 20.899 23.559 1.00 17.26 O 174 | ATOM 3982 CB TYR K 827 61.586 21.127 25.538 1.00 17.00 C 175 | ATOM 3983 CG TYR K 827 60.447 20.401 24.859 1.00 15.89 C 176 | ATOM 3984 CD1 TYR K 827 60.081 19.124 25.254 1.00 14.79 C 177 | ATOM 3985 CD2 TYR K 827 59.735 20.984 23.817 1.00 15.83 C 178 | ATOM 3986 CE1 TYR K 827 59.047 18.458 24.639 1.00 15.30 C 179 | ATOM 3987 CE2 TYR K 827 58.673 20.341 23.194 1.00 14.99 C 180 | ATOM 3988 CZ TYR K 827 58.336 19.069 23.612 1.00 15.73 C 181 | ATOM 3989 OH TYR K 827 57.333 18.356 22.988 1.00 14.26 O 182 | ATOM 3990 N ALA K 828 63.435 18.798 23.834 1.00 18.98 N 183 | ATOM 3991 CA ALA K 828 64.012 18.295 22.611 1.00 19.64 C 184 | ATOM 3992 C ALA K 828 63.319 18.766 21.353 1.00 20.24 C 185 | ATOM 3993 O ALA K 828 63.952 18.712 20.297 1.00 21.28 O 186 | ATOM 3994 CB ALA K 828 64.028 16.769 22.642 1.00 19.29 C 187 | ATOM 3995 N TYR K 829 62.042 19.116 21.415 1.00 20.20 N 188 | ATOM 3996 CA TYR K 829 61.311 19.555 20.230 1.00 19.88 C 189 | ATOM 3997 C TYR K 829 60.850 20.997 20.349 1.00 19.30 C 190 | ATOM 3998 O TYR K 829 61.284 21.738 21.232 1.00 18.54 O 191 | ATOM 3999 CB TYR K 829 60.140 18.574 20.024 1.00 20.38 C 192 | ATOM 4000 CG TYR K 829 60.705 17.208 19.683 1.00 21.11 C 193 | ATOM 4001 CD1 TYR K 829 61.261 16.988 18.430 1.00 21.38 C 194 | ATOM 4002 CD2 TYR K 829 60.775 16.193 20.636 1.00 21.62 C 195 | ATOM 4003 CE1 TYR K 829 61.815 15.765 18.105 1.00 21.58 C 196 | ATOM 4004 CE2 TYR K 829 61.319 14.959 20.317 1.00 21.58 C 197 | ATOM 4005 CZ TYR K 829 61.832 14.765 19.052 1.00 22.27 C 198 | ATOM 4006 OH TYR K 829 62.376 13.550 18.699 1.00 24.23 O 199 | ATOM 4007 N THR K 830 59.978 21.443 19.450 1.00 18.65 N 200 | ATOM 4008 CA THR K 830 59.436 22.791 19.527 1.00 18.46 C 201 | ATOM 4009 C THR K 830 57.955 22.765 19.926 1.00 17.14 C 202 | ATOM 4010 O THR K 830 57.133 22.089 19.306 1.00 16.89 O 203 | ATOM 4011 CB THR K 830 59.627 23.542 18.208 1.00 19.24 C 204 | ATOM 4012 OG1 THR K 830 58.348 23.690 17.565 1.00 21.06 O 205 | ATOM 4013 CG2 THR K 830 60.532 22.782 17.253 1.00 20.51 C 206 | ATOM 4014 N TYR K 831 57.619 23.468 21.009 1.00 15.47 N 207 | ATOM 4015 CA TYR K 831 56.227 23.506 21.453 1.00 14.50 C 208 | ATOM 4016 C TYR K 831 55.414 24.300 20.429 1.00 14.65 C 209 | ATOM 4017 O TYR K 831 55.791 25.404 20.023 1.00 14.19 O 210 | ATOM 4018 CB TYR K 831 56.093 24.149 22.808 1.00 13.01 C 211 | ATOM 4019 CG TYR K 831 56.633 23.421 24.004 1.00 12.60 C 212 | ATOM 4020 CD1 TYR K 831 56.034 22.263 24.496 1.00 12.58 C 213 | ATOM 4021 CD2 TYR K 831 57.738 23.926 24.672 1.00 12.35 C 214 | ATOM 4022 CE1 TYR K 831 56.535 21.624 25.614 1.00 12.67 C 215 | ATOM 4023 CE2 TYR K 831 58.241 23.303 25.800 1.00 12.55 C 216 | ATOM 4024 CZ TYR K 831 57.631 22.157 26.266 1.00 12.67 C 217 | ATOM 4025 OH TYR K 831 58.130 21.567 27.400 1.00 12.21 O 218 | ATOM 4026 N ILE K 832 54.315 23.726 19.945 1.00 14.63 N 219 | ATOM 4027 CA ILE K 832 53.571 24.415 18.895 1.00 14.71 C 220 | ATOM 4028 C ILE K 832 53.080 25.754 19.417 1.00 15.23 C 221 | ATOM 4029 O ILE K 832 53.338 26.768 18.770 1.00 16.25 O 222 | ATOM 4030 CB ILE K 832 52.420 23.573 18.337 1.00 13.83 C 223 | ATOM 4031 CG1 ILE K 832 53.050 22.322 17.730 1.00 12.88 C 224 | ATOM 4032 CG2 ILE K 832 51.580 24.362 17.345 1.00 12.52 C 225 | ATOM 4033 CD1 ILE K 832 52.273 21.056 17.981 1.00 12.88 C 226 | ATOM 4034 N TYR K 833 52.455 25.761 20.576 1.00 14.96 N 227 | ATOM 4035 CA TYR K 833 51.939 26.951 21.200 1.00 14.28 C 228 | ATOM 4036 C TYR K 833 52.649 27.322 22.492 1.00 14.09 C 229 | ATOM 4037 O TYR K 833 52.829 26.551 23.428 1.00 13.16 O 230 | ATOM 4038 CB TYR K 833 50.457 26.711 21.506 1.00 14.21 C 231 | ATOM 4039 CG TYR K 833 49.526 27.451 20.576 1.00 14.55 C 232 | ATOM 4040 CD1 TYR K 833 49.397 28.833 20.664 1.00 14.26 C 233 | ATOM 4041 CD2 TYR K 833 48.752 26.767 19.651 1.00 14.40 C 234 | ATOM 4042 CE1 TYR K 833 48.518 29.496 19.831 1.00 14.83 C 235 | ATOM 4043 CE2 TYR K 833 47.901 27.438 18.800 1.00 14.16 C 236 | ATOM 4044 CZ TYR K 833 47.779 28.801 18.902 1.00 14.59 C 237 | ATOM 4045 OH TYR K 833 46.948 29.491 18.046 1.00 15.03 O 238 | ATOM 4046 N MET K 834 53.005 28.606 22.571 1.00 14.17 N 239 | ATOM 4047 CA MET K 834 53.529 29.201 23.793 1.00 13.66 C 240 | ATOM 4048 C MET K 834 52.695 30.454 24.037 1.00 13.52 C 241 | ATOM 4049 O MET K 834 52.288 31.051 23.034 1.00 12.56 O 242 | ATOM 4050 CB MET K 834 55.010 29.536 23.704 1.00 13.14 C 243 | ATOM 4051 CG MET K 834 55.896 28.311 23.659 1.00 13.76 C 244 | ATOM 4052 SD MET K 834 57.638 28.691 23.712 1.00 15.09 S 245 | ATOM 4053 CE MET K 834 57.938 28.957 25.463 1.00 13.74 C 246 | ATOM 4054 N GLY K 835 52.444 30.796 25.298 1.00 13.31 N 247 | ATOM 4055 CA GLY K 835 51.625 31.984 25.536 1.00 13.81 C 248 | ATOM 4056 C GLY K 835 51.836 32.515 26.937 1.00 14.42 C 249 | ATOM 4057 O GLY K 835 52.572 31.938 27.720 1.00 15.68 O 250 | ATOM 4058 N TRP K 836 51.170 33.613 27.251 1.00 14.42 N 251 | ATOM 4059 CA TRP K 836 51.248 34.261 28.547 1.00 13.96 C 252 | ATOM 4060 C TRP K 836 49.882 34.299 29.227 1.00 14.71 C 253 | ATOM 4061 O TRP K 836 48.823 34.527 28.646 1.00 14.28 O 254 | ATOM 4062 CB TRP K 836 51.764 35.698 28.390 1.00 12.59 C 255 | ATOM 4063 CG TRP K 836 53.194 35.787 27.955 1.00 11.68 C 256 | ATOM 4064 CD1 TRP K 836 53.674 35.910 26.682 1.00 10.83 C 257 | ATOM 4065 CD2 TRP K 836 54.339 35.745 28.818 1.00 11.03 C 258 | ATOM 4066 NE1 TRP K 836 55.045 35.944 26.701 1.00 10.27 N 259 | ATOM 4067 CE2 TRP K 836 55.482 35.846 27.996 1.00 10.53 C 260 | ATOM 4068 CE3 TRP K 836 54.494 35.638 30.204 1.00 10.45 C 261 | ATOM 4069 CZ2 TRP K 836 56.773 35.847 28.517 1.00 9.97 C 262 | ATOM 4070 CZ3 TRP K 836 55.777 35.633 30.721 1.00 10.61 C 263 | ATOM 4071 CH2 TRP K 836 56.897 35.743 29.875 1.00 10.37 C 264 | ATOM 4072 N PHE K 837 49.883 33.984 30.513 1.00 15.28 N 265 | ATOM 4073 CA PHE K 837 48.691 33.988 31.348 1.00 15.92 C 266 | ATOM 4074 C PHE K 837 49.013 34.937 32.501 1.00 17.48 C 267 | ATOM 4075 O PHE K 837 50.197 35.196 32.692 1.00 18.98 O 268 | ATOM 4076 CB PHE K 837 48.325 32.608 31.907 1.00 14.47 C 269 | ATOM 4077 CG PHE K 837 47.866 31.674 30.827 1.00 13.41 C 270 | ATOM 4078 CD1 PHE K 837 48.760 31.067 29.966 1.00 12.45 C 271 | ATOM 4079 CD2 PHE K 837 46.511 31.435 30.652 1.00 13.60 C 272 | ATOM 4080 CE1 PHE K 837 48.313 30.254 28.949 1.00 12.12 C 273 | ATOM 4081 CE2 PHE K 837 46.058 30.603 29.643 1.00 12.50 C 274 | ATOM 4082 CZ PHE K 837 46.966 30.010 28.801 1.00 12.00 C 275 | ATOM 4083 N ARG K 838 48.037 35.444 33.225 1.00 18.53 N 276 | ATOM 4084 CA ARG K 838 48.333 36.311 34.352 1.00 19.46 C 277 | ATOM 4085 C ARG K 838 47.294 36.100 35.442 1.00 20.58 C 278 | ATOM 4086 O ARG K 838 46.156 35.768 35.140 1.00 20.86 O 279 | ATOM 4087 CB ARG K 838 48.321 37.778 33.967 1.00 19.78 C 280 | ATOM 4088 CG ARG K 838 46.991 38.275 33.403 1.00 19.38 C 281 | ATOM 4089 CD ARG K 838 46.981 39.785 33.521 1.00 19.17 C 282 | ATOM 4090 NE ARG K 838 46.441 40.470 32.361 1.00 19.57 N 283 | ATOM 4091 CZ ARG K 838 45.861 41.662 32.428 1.00 19.58 C 284 | ATOM 4092 NH1 ARG K 838 45.727 42.220 33.624 1.00 21.38 N 285 | ATOM 4093 NH2 ARG K 838 45.383 42.310 31.390 1.00 19.42 N 286 | ATOM 4094 N GLN K 839 47.678 36.346 36.677 1.00 22.25 N 287 | ATOM 4095 CA GLN K 839 46.742 36.211 37.784 1.00 24.07 C 288 | ATOM 4096 C GLN K 839 46.808 37.388 38.749 1.00 24.92 C 289 | ATOM 4097 O GLN K 839 47.849 37.567 39.393 1.00 25.50 O 290 | ATOM 4098 CB GLN K 839 47.035 34.887 38.489 1.00 23.56 C 291 | ATOM 4099 CG GLN K 839 45.988 34.555 39.537 1.00 23.93 C 292 | ATOM 4100 CD GLN K 839 46.365 33.330 40.330 1.00 24.55 C 293 | ATOM 4101 OE1 GLN K 839 45.493 32.658 40.864 1.00 25.68 O 294 | ATOM 4102 NE2 GLN K 839 47.651 33.034 40.425 1.00 25.04 N 295 | ATOM 4103 N ALA K 840 45.779 38.224 38.812 1.00 25.67 N 296 | ATOM 4104 CA ALA K 840 45.773 39.327 39.778 1.00 26.49 C 297 | ATOM 4105 C ALA K 840 45.504 38.750 41.169 1.00 27.38 C 298 | ATOM 4106 O ALA K 840 44.946 37.668 41.332 1.00 27.60 O 299 | ATOM 4107 CB ALA K 840 44.731 40.392 39.514 1.00 25.82 C 300 | ATOM 4108 N PRO K 841 45.992 39.432 42.188 1.00 28.11 N 301 | ATOM 4109 CA PRO K 841 45.879 38.977 43.565 1.00 28.63 C 302 | ATOM 4110 C PRO K 841 44.442 38.707 43.969 1.00 29.27 C 303 | ATOM 4111 O PRO K 841 43.539 39.544 43.959 1.00 28.61 O 304 | ATOM 4112 CB PRO K 841 46.561 40.062 44.373 1.00 28.72 C 305 | ATOM 4113 CG PRO K 841 46.347 41.271 43.507 1.00 28.84 C 306 | ATOM 4114 CD PRO K 841 46.699 40.737 42.123 1.00 28.34 C 307 | ATOM 4115 N GLY K 842 44.231 37.434 44.306 1.00 30.16 N 308 | ATOM 4116 CA GLY K 842 42.941 36.892 44.700 1.00 31.10 C 309 | ATOM 4117 C GLY K 842 42.219 36.238 43.525 1.00 32.09 C 310 | ATOM 4118 O GLY K 842 41.745 35.115 43.633 1.00 32.33 O 311 | ATOM 4119 N LYS K 843 42.087 36.937 42.411 1.00 32.37 N 312 | ATOM 4120 CA LYS K 843 41.422 36.535 41.207 1.00 32.47 C 313 | ATOM 4121 C LYS K 843 41.862 35.200 40.631 1.00 32.70 C 314 | ATOM 4122 O LYS K 843 42.753 34.528 41.139 1.00 32.43 O 315 | ATOM 4123 CB LYS K 843 41.684 37.620 40.144 1.00 32.65 C 316 | ATOM 4124 CG LYS K 843 40.416 38.315 39.676 1.00 33.44 C 317 | ATOM 4125 CD LYS K 843 40.389 39.787 40.062 1.00 33.21 C 318 | ATOM 4126 CE LYS K 843 39.878 40.629 38.903 1.00 33.89 C 319 | ATOM 4127 NZ LYS K 843 40.452 40.244 37.581 1.00 33.55 N 320 | ATOM 4128 N GLU K 844 41.226 34.829 39.522 1.00 33.36 N 321 | ATOM 4129 CA GLU K 844 41.584 33.614 38.807 1.00 34.33 C 322 | ATOM 4130 C GLU K 844 42.500 33.905 37.620 1.00 33.42 C 323 | ATOM 4131 O GLU K 844 42.379 34.929 36.959 1.00 33.40 O 324 | ATOM 4132 CB GLU K 844 40.351 32.833 38.366 1.00 36.01 C 325 | ATOM 4133 CG GLU K 844 39.304 33.590 37.584 1.00 38.23 C 326 | ATOM 4134 CD GLU K 844 38.456 32.679 36.705 1.00 40.17 C 327 | ATOM 4135 OE1 GLU K 844 38.942 31.570 36.348 1.00 40.50 O 328 | ATOM 4136 OE2 GLU K 844 37.313 33.094 36.374 1.00 40.45 O 329 | ATOM 4137 N ARG K 845 43.420 32.989 37.345 1.00 32.62 N 330 | ATOM 4138 CA ARG K 845 44.351 33.147 36.244 1.00 31.79 C 331 | ATOM 4139 C ARG K 845 43.623 33.328 34.921 1.00 30.09 C 332 | ATOM 4140 O ARG K 845 42.626 32.673 34.664 1.00 30.13 O 333 | ATOM 4141 CB ARG K 845 45.315 31.962 36.137 1.00 33.03 C 334 | ATOM 4142 CG ARG K 845 46.548 32.318 35.317 1.00 35.03 C 335 | ATOM 4143 CD ARG K 845 47.298 31.069 34.893 1.00 37.56 C 336 | ATOM 4144 NE ARG K 845 47.640 30.249 36.055 1.00 39.63 N 337 | ATOM 4145 CZ ARG K 845 47.343 28.961 36.135 1.00 40.40 C 338 | ATOM 4146 NH1 ARG K 845 46.718 28.372 35.130 1.00 40.42 N 339 | ATOM 4147 NH2 ARG K 845 47.630 28.252 37.213 1.00 41.35 N 340 | ATOM 4148 N GLU K 846 44.090 34.252 34.089 1.00 28.36 N 341 | ATOM 4149 CA GLU K 846 43.498 34.489 32.784 1.00 26.41 C 342 | ATOM 4150 C GLU K 846 44.554 34.576 31.694 1.00 24.53 C 343 | ATOM 4151 O GLU K 846 45.691 34.924 31.964 1.00 24.37 O 344 | ATOM 4152 CB GLU K 846 42.690 35.766 32.760 1.00 27.21 C 345 | ATOM 4153 CG GLU K 846 43.300 36.915 33.539 1.00 28.75 C 346 | ATOM 4154 CD GLU K 846 42.771 38.227 32.976 1.00 29.90 C 347 | ATOM 4155 OE1 GLU K 846 41.555 38.247 32.692 1.00 30.42 O 348 | ATOM 4156 OE2 GLU K 846 43.590 39.160 32.842 1.00 30.87 O 349 | ATOM 4157 N GLY K 847 44.167 34.238 30.478 1.00 22.84 N 350 | ATOM 4158 CA GLY K 847 45.064 34.232 29.334 1.00 20.98 C 351 | ATOM 4159 C GLY K 847 45.288 35.647 28.831 1.00 19.94 C 352 | ATOM 4160 O GLY K 847 44.365 36.434 28.767 1.00 20.49 O 353 | ATOM 4161 N VAL K 848 46.519 35.965 28.490 1.00 19.58 N 354 | ATOM 4162 CA VAL K 848 46.940 37.271 28.028 1.00 18.48 C 355 | ATOM 4163 C VAL K 848 47.185 37.259 26.532 1.00 18.69 C 356 | ATOM 4164 O VAL K 848 46.588 38.072 25.836 1.00 19.38 O 357 | ATOM 4165 CB VAL K 848 48.225 37.718 28.759 1.00 17.80 C 358 | ATOM 4166 CG1 VAL K 848 48.770 39.033 28.224 1.00 17.83 C 359 | ATOM 4167 CG2 VAL K 848 47.958 37.858 30.247 1.00 16.81 C 360 | ATOM 4168 N ALA K 849 48.085 36.402 26.079 1.00 18.58 N 361 | ATOM 4169 CA ALA K 849 48.415 36.320 24.660 1.00 18.22 C 362 | ATOM 4170 C ALA K 849 49.106 34.997 24.339 1.00 17.71 C 363 | ATOM 4171 O ALA K 849 49.687 34.378 25.220 1.00 17.27 O 364 | ATOM 4172 CB ALA K 849 49.305 37.478 24.243 1.00 17.68 C 365 | ATOM 4173 N ALA K 850 49.001 34.539 23.107 1.00 17.53 N 366 | ATOM 4174 CA ALA K 850 49.548 33.264 22.686 1.00 17.96 C 367 | ATOM 4175 C ALA K 850 49.891 33.273 21.196 1.00 18.66 C 368 | ATOM 4176 O ALA K 850 49.299 34.000 20.398 1.00 19.39 O 369 | ATOM 4177 CB ALA K 850 48.552 32.163 23.010 1.00 17.01 C 370 | ATOM 4178 N MET K 851 50.906 32.498 20.835 1.00 18.36 N 371 | ATOM 4179 CA MET K 851 51.356 32.414 19.459 1.00 18.48 C 372 | ATOM 4180 C MET K 851 51.890 31.032 19.112 1.00 18.36 C 373 | ATOM 4181 O MET K 851 52.456 30.349 19.963 1.00 18.68 O 374 | ATOM 4182 CB MET K 851 52.437 33.472 19.214 1.00 17.93 C 375 | ATOM 4183 CG MET K 851 52.751 33.702 17.742 1.00 16.82 C 376 | ATOM 4184 SD MET K 851 54.155 34.819 17.534 1.00 15.64 S 377 | ATOM 4185 CE MET K 851 53.281 36.296 17.023 1.00 16.41 C 378 | ATOM 4186 N ASP K 852 51.712 30.606 17.868 1.00 18.13 N 379 | ATOM 4187 CA ASP K 852 52.245 29.296 17.486 1.00 18.20 C 380 | ATOM 4188 C ASP K 852 53.687 29.432 17.023 1.00 18.84 C 381 | ATOM 4189 O ASP K 852 54.263 30.505 16.923 1.00 18.71 O 382 | ATOM 4190 CB ASP K 852 51.319 28.552 16.557 1.00 16.84 C 383 | ATOM 4191 CG ASP K 852 51.313 28.890 15.090 1.00 15.50 C 384 | ATOM 4192 OD1 ASP K 852 52.355 29.269 14.535 1.00 14.61 O 385 | ATOM 4193 OD2 ASP K 852 50.232 28.740 14.479 1.00 15.12 O 386 | ATOM 4194 N SER K 853 54.309 28.296 16.758 1.00 20.08 N 387 | ATOM 4195 CA SER K 853 55.706 28.199 16.356 1.00 21.12 C 388 | ATOM 4196 C SER K 853 56.013 28.820 15.012 1.00 21.92 C 389 | ATOM 4197 O SER K 853 57.148 29.218 14.754 1.00 22.30 O 390 | ATOM 4198 CB SER K 853 56.078 26.701 16.310 1.00 20.68 C 391 | ATOM 4199 OG SER K 853 55.216 26.082 15.360 1.00 20.37 O 392 | ATOM 4200 N GLY K 854 55.036 28.857 14.123 1.00 23.23 N 393 | ATOM 4201 CA GLY K 854 55.272 29.448 12.795 1.00 25.24 C 394 | ATOM 4202 C GLY K 854 55.048 30.940 12.984 1.00 26.46 C 395 | ATOM 4203 O GLY K 854 54.896 31.353 14.138 1.00 27.06 O 396 | ATOM 4204 N GLY K 855 54.884 31.707 11.918 1.00 27.32 N 397 | ATOM 4205 CA GLY K 855 54.583 33.131 12.103 1.00 27.80 C 398 | ATOM 4206 C GLY K 855 53.668 33.276 13.333 1.00 26.73 C 399 | ATOM 4207 O GLY K 855 54.086 33.774 14.371 1.00 26.35 O 400 | ATOM 4208 N GLY K 856 52.434 32.792 13.201 1.00 25.83 N 401 | ATOM 4209 CA GLY K 856 51.570 32.798 14.360 1.00 25.56 C 402 | ATOM 4210 C GLY K 856 50.119 32.521 14.091 1.00 25.00 C 403 | ATOM 4211 O GLY K 856 49.626 32.661 12.979 1.00 27.17 O 404 | ATOM 4212 N GLY K 857 49.440 32.148 15.151 1.00 23.66 N 405 | ATOM 4213 CA GLY K 857 47.980 31.972 15.176 1.00 22.22 C 406 | ATOM 4214 C GLY K 857 47.679 32.758 16.473 1.00 22.19 C 407 | ATOM 4215 O GLY K 857 47.337 32.215 17.502 1.00 22.27 O 408 | ATOM 4216 N THR K 858 48.109 34.005 16.411 1.00 21.50 N 409 | ATOM 4217 CA THR K 858 48.104 34.957 17.491 1.00 21.13 C 410 | ATOM 4218 C THR K 858 46.728 35.208 18.066 1.00 21.65 C 411 | ATOM 4219 O THR K 858 45.734 35.242 17.354 1.00 22.72 O 412 | ATOM 4220 CB THR K 858 48.697 36.273 16.957 1.00 20.86 C 413 | ATOM 4221 OG1 THR K 858 49.932 35.941 16.287 1.00 21.84 O 414 | ATOM 4222 CG2 THR K 858 48.991 37.230 18.087 1.00 20.96 C 415 | ATOM 4223 N LEU K 859 46.654 35.348 19.381 1.00 21.36 N 416 | ATOM 4224 CA LEU K 859 45.422 35.583 20.109 1.00 20.55 C 417 | ATOM 4225 C LEU K 859 45.742 36.545 21.249 1.00 20.04 C 418 | ATOM 4226 O LEU K 859 46.847 36.487 21.777 1.00 20.33 O 419 | ATOM 4227 CB LEU K 859 44.905 34.301 20.727 1.00 21.27 C 420 | ATOM 4228 CG LEU K 859 43.587 33.681 20.299 1.00 21.61 C 421 | ATOM 4229 CD1 LEU K 859 43.787 32.965 18.972 1.00 22.51 C 422 | ATOM 4230 CD2 LEU K 859 43.090 32.687 21.343 1.00 21.24 C 423 | ATOM 4231 N TYR K 860 44.846 37.461 21.564 1.00 19.18 N 424 | ATOM 4232 CA TYR K 860 45.061 38.435 22.617 1.00 18.22 C 425 | ATOM 4233 C TYR K 860 43.792 38.534 23.449 1.00 19.97 C 426 | ATOM 4234 O TYR K 860 42.717 38.386 22.869 1.00 21.03 O 427 | ATOM 4235 CB TYR K 860 45.325 39.819 22.029 1.00 15.13 C 428 | ATOM 4236 CG TYR K 860 46.723 40.003 21.478 1.00 13.22 C 429 | ATOM 4237 CD1 TYR K 860 47.793 40.258 22.337 1.00 12.45 C 430 | ATOM 4238 CD2 TYR K 860 46.970 39.916 20.120 1.00 11.48 C 431 | ATOM 4239 CE1 TYR K 860 49.075 40.426 21.854 1.00 11.72 C 432 | ATOM 4240 CE2 TYR K 860 48.248 40.082 19.631 1.00 11.57 C 433 | ATOM 4241 CZ TYR K 860 49.293 40.329 20.497 1.00 11.93 C 434 | ATOM 4242 OH TYR K 860 50.564 40.481 19.994 1.00 11.77 O 435 | ATOM 4243 N ALA K 861 43.913 38.771 24.740 1.00 21.45 N 436 | ATOM 4244 CA ALA K 861 42.754 39.015 25.581 1.00 22.70 C 437 | ATOM 4245 C ALA K 861 42.227 40.390 25.165 1.00 24.93 C 438 | ATOM 4246 O ALA K 861 43.005 41.214 24.686 1.00 24.05 O 439 | ATOM 4247 CB ALA K 861 43.137 39.027 27.036 1.00 21.57 C 440 | ATOM 4248 N ASP K 862 40.932 40.649 25.351 1.00 28.03 N 441 | ATOM 4249 CA ASP K 862 40.343 41.928 24.968 1.00 30.06 C 442 | ATOM 4250 C ASP K 862 40.928 43.115 25.708 1.00 30.02 C 443 | ATOM 4251 O ASP K 862 41.071 44.181 25.096 1.00 30.57 O 444 | ATOM 4252 CB ASP K 862 38.818 41.909 25.124 1.00 32.07 C 445 | ATOM 4253 CG ASP K 862 38.124 41.361 23.889 1.00 34.06 C 446 | ATOM 4254 OD1 ASP K 862 38.143 42.045 22.837 1.00 34.80 O 447 | ATOM 4255 OD2 ASP K 862 37.574 40.235 23.945 1.00 35.02 O 448 | ATOM 4256 N SER K 863 41.337 42.961 26.958 1.00 29.63 N 449 | ATOM 4257 CA SER K 863 41.952 44.029 27.718 1.00 29.51 C 450 | ATOM 4258 C SER K 863 43.340 44.471 27.281 1.00 29.32 C 451 | ATOM 4259 O SER K 863 43.766 45.552 27.735 1.00 29.54 O 452 | ATOM 4260 CB SER K 863 42.080 43.603 29.190 1.00 29.47 C 453 | ATOM 4261 OG SER K 863 42.258 42.197 29.271 1.00 30.19 O 454 | ATOM 4262 N VAL K 864 44.087 43.682 26.510 1.00 28.55 N 455 | ATOM 4263 CA VAL K 864 45.446 44.089 26.156 1.00 28.29 C 456 | ATOM 4264 C VAL K 864 45.654 44.337 24.670 1.00 28.21 C 457 | ATOM 4265 O VAL K 864 46.742 44.772 24.279 1.00 28.28 O 458 | ATOM 4266 CB VAL K 864 46.538 43.115 26.651 1.00 28.16 C 459 | ATOM 4267 CG1 VAL K 864 46.345 42.752 28.122 1.00 27.96 C 460 | ATOM 4268 CG2 VAL K 864 46.628 41.847 25.814 1.00 27.83 C 461 | ATOM 4269 N LYS K 865 44.650 44.048 23.855 1.00 27.99 N 462 | ATOM 4270 CA LYS K 865 44.769 44.231 22.413 1.00 27.95 C 463 | ATOM 4271 C LYS K 865 45.175 45.673 22.119 1.00 27.76 C 464 | ATOM 4272 O LYS K 865 44.593 46.596 22.677 1.00 27.87 O 465 | ATOM 4273 CB LYS K 865 43.467 43.888 21.696 1.00 28.54 C 466 | ATOM 4274 CG LYS K 865 43.660 43.013 20.471 1.00 29.60 C 467 | ATOM 4275 CD LYS K 865 42.394 42.266 20.073 1.00 30.46 C 468 | ATOM 4276 CE LYS K 865 41.935 41.300 21.156 1.00 30.86 C 469 | ATOM 4277 NZ LYS K 865 41.690 39.927 20.639 1.00 31.14 N 470 | ATOM 4278 N GLY K 866 46.187 45.862 21.285 1.00 26.86 N 471 | ATOM 4279 CA GLY K 866 46.682 47.164 20.924 1.00 25.86 C 472 | ATOM 4280 C GLY K 866 47.729 47.734 21.861 1.00 25.35 C 473 | ATOM 4281 O GLY K 866 48.338 48.749 21.504 1.00 26.06 O 474 | ATOM 4282 N ARG K 867 47.940 47.165 23.047 1.00 23.77 N 475 | ATOM 4283 CA ARG K 867 48.954 47.695 23.948 1.00 21.80 C 476 | ATOM 4284 C ARG K 867 50.078 46.688 24.165 1.00 21.31 C 477 | ATOM 4285 O ARG K 867 51.187 47.095 24.522 1.00 21.42 O 478 | ATOM 4286 CB ARG K 867 48.435 48.108 25.320 1.00 21.20 C 479 | ATOM 4287 CG ARG K 867 47.189 48.971 25.373 1.00 19.64 C 480 | ATOM 4288 CD ARG K 867 46.872 49.343 26.821 1.00 18.41 C 481 | ATOM 4289 NE ARG K 867 46.200 48.248 27.492 1.00 18.07 N 482 | ATOM 4290 CZ ARG K 867 46.583 47.645 28.604 1.00 17.80 C 483 | ATOM 4291 NH1 ARG K 867 47.684 48.020 29.231 1.00 17.19 N 484 | ATOM 4292 NH2 ARG K 867 45.855 46.630 29.058 1.00 18.00 N 485 | ATOM 4293 N PHE K 868 49.759 45.397 24.117 1.00 20.01 N 486 | ATOM 4294 CA PHE K 868 50.759 44.364 24.358 1.00 18.14 C 487 | ATOM 4295 C PHE K 868 51.075 43.643 23.062 1.00 18.43 C 488 | ATOM 4296 O PHE K 868 50.168 43.400 22.272 1.00 19.25 O 489 | ATOM 4297 CB PHE K 868 50.234 43.353 25.371 1.00 16.56 C 490 | ATOM 4298 CG PHE K 868 50.102 43.749 26.808 1.00 14.61 C 491 | ATOM 4299 CD1 PHE K 868 50.248 45.053 27.251 1.00 13.74 C 492 | ATOM 4300 CD2 PHE K 868 49.840 42.776 27.754 1.00 13.31 C 493 | ATOM 4301 CE1 PHE K 868 50.110 45.392 28.570 1.00 13.10 C 494 | ATOM 4302 CE2 PHE K 868 49.704 43.087 29.086 1.00 12.74 C 495 | ATOM 4303 CZ PHE K 868 49.827 44.402 29.482 1.00 13.19 C 496 | ATOM 4304 N THR K 869 52.326 43.275 22.849 1.00 18.83 N 497 | ATOM 4305 CA THR K 869 52.707 42.520 21.661 1.00 18.86 C 498 | ATOM 4306 C THR K 869 53.412 41.233 22.066 1.00 19.16 C 499 | ATOM 4307 O THR K 869 54.326 41.288 22.879 1.00 18.89 O 500 | ATOM 4308 CB THR K 869 53.658 43.334 20.771 1.00 18.95 C 501 | ATOM 4309 OG1 THR K 869 52.943 44.498 20.336 1.00 19.31 O 502 | ATOM 4310 CG2 THR K 869 54.143 42.525 19.580 1.00 18.71 C 503 | ATOM 4311 N ILE K 870 53.021 40.121 21.463 1.00 19.88 N 504 | ATOM 4312 CA ILE K 870 53.671 38.841 21.731 1.00 19.81 C 505 | ATOM 4313 C ILE K 870 54.532 38.487 20.526 1.00 20.49 C 506 | ATOM 4314 O ILE K 870 54.210 38.920 19.416 1.00 20.63 O 507 | ATOM 4315 CB ILE K 870 52.655 37.721 21.991 1.00 19.19 C 508 | ATOM 4316 CG1 ILE K 870 53.377 36.475 22.485 1.00 18.62 C 509 | ATOM 4317 CG2 ILE K 870 51.835 37.439 20.733 1.00 19.70 C 510 | ATOM 4318 CD1 ILE K 870 52.516 35.248 22.627 1.00 18.36 C 511 | ATOM 4319 N SER K 871 55.605 37.739 20.733 1.00 21.32 N 512 | ATOM 4320 CA SER K 871 56.468 37.368 19.617 1.00 22.22 C 513 | ATOM 4321 C SER K 871 57.253 36.126 20.016 1.00 22.71 C 514 | ATOM 4322 O SER K 871 57.462 35.915 21.205 1.00 22.90 O 515 | ATOM 4323 CB SER K 871 57.421 38.497 19.232 1.00 22.40 C 516 | ATOM 4324 OG SER K 871 57.877 39.147 20.421 1.00 23.72 O 517 | ATOM 4325 N ARG K 872 57.618 35.339 19.013 1.00 23.22 N 518 | ATOM 4326 CA ARG K 872 58.369 34.115 19.224 1.00 23.90 C 519 | ATOM 4327 C ARG K 872 59.492 34.003 18.198 1.00 25.14 C 520 | ATOM 4328 O ARG K 872 59.247 34.380 17.049 1.00 25.96 O 521 | ATOM 4329 CB ARG K 872 57.469 32.915 18.925 1.00 22.67 C 522 | ATOM 4330 CG ARG K 872 56.408 32.535 19.932 1.00 20.93 C 523 | ATOM 4331 CD ARG K 872 56.823 31.204 20.547 1.00 19.90 C 524 | ATOM 4332 NE ARG K 872 55.981 30.136 20.078 1.00 18.98 N 525 | ATOM 4333 CZ ARG K 872 56.233 28.842 20.100 1.00 18.17 C 526 | ATOM 4334 NH1 ARG K 872 57.335 28.292 20.551 1.00 17.45 N 527 | ATOM 4335 NH2 ARG K 872 55.256 28.074 19.649 1.00 18.55 N 528 | ATOM 4336 N ASP K 873 60.639 33.448 18.555 1.00 26.61 N 529 | ATOM 4337 CA ASP K 873 61.664 33.221 17.527 1.00 27.71 C 530 | ATOM 4338 C ASP K 873 61.152 32.054 16.680 1.00 28.03 C 531 | ATOM 4339 O ASP K 873 60.865 30.999 17.255 1.00 28.25 O 532 | ATOM 4340 CB ASP K 873 63.012 32.875 18.138 1.00 27.91 C 533 | ATOM 4341 CG ASP K 873 63.529 33.945 19.071 1.00 28.59 C 534 | ATOM 4342 OD1 ASP K 873 63.551 35.120 18.641 1.00 29.24 O 535 | ATOM 4343 OD2 ASP K 873 63.893 33.624 20.222 1.00 29.11 O 536 | ATOM 4344 N LYS K 874 60.899 32.253 15.397 1.00 28.58 N 537 | ATOM 4345 CA LYS K 874 60.338 31.174 14.580 1.00 28.70 C 538 | ATOM 4346 C LYS K 874 61.072 29.861 14.835 1.00 28.05 C 539 | ATOM 4347 O LYS K 874 62.304 29.841 14.798 1.00 27.86 O 540 | ATOM 4348 CB LYS K 874 60.432 31.443 13.075 1.00 29.33 C 541 | ATOM 4349 CG LYS K 874 59.131 31.973 12.515 1.00 30.75 C 542 | ATOM 4350 CD LYS K 874 59.165 33.483 12.316 1.00 31.29 C 543 | ATOM 4351 CE LYS K 874 58.804 33.850 10.883 1.00 32.19 C 544 | ATOM 4352 NZ LYS K 874 59.012 32.739 9.913 1.00 32.20 N 545 | ATOM 4353 N GLY K 875 60.313 28.811 15.101 1.00 26.83 N 546 | ATOM 4354 CA GLY K 875 60.853 27.503 15.346 1.00 26.28 C 547 | ATOM 4355 C GLY K 875 61.589 27.288 16.645 1.00 26.40 C 548 | ATOM 4356 O GLY K 875 62.150 26.198 16.813 1.00 27.43 O 549 | ATOM 4357 N LYS K 876 61.584 28.206 17.592 1.00 26.37 N 550 | ATOM 4358 CA LYS K 876 62.266 27.997 18.861 1.00 26.33 C 551 | ATOM 4359 C LYS K 876 61.312 28.064 20.045 1.00 25.05 C 552 | ATOM 4360 O LYS K 876 60.221 28.620 19.925 1.00 24.79 O 553 | ATOM 4361 CB LYS K 876 63.417 28.994 19.048 1.00 27.84 C 554 | ATOM 4362 CG LYS K 876 64.703 28.316 19.504 1.00 29.96 C 555 | ATOM 4363 CD LYS K 876 64.593 27.663 20.879 1.00 30.45 C 556 | ATOM 4364 CE LYS K 876 64.390 26.157 20.772 1.00 31.05 C 557 | ATOM 4365 NZ LYS K 876 64.423 25.489 22.109 1.00 31.28 N 558 | ATOM 4366 N ASN K 877 61.718 27.550 21.199 1.00 23.18 N 559 | ATOM 4367 CA ASN K 877 60.905 27.560 22.404 1.00 21.13 C 560 | ATOM 4368 C ASN K 877 61.120 28.801 23.244 1.00 20.38 C 561 | ATOM 4369 O ASN K 877 61.400 28.723 24.437 1.00 19.86 O 562 | ATOM 4370 CB ASN K 877 61.139 26.280 23.217 1.00 20.01 C 563 | ATOM 4371 CG ASN K 877 60.795 25.052 22.399 1.00 19.72 C 564 | ATOM 4372 OD1 ASN K 877 59.697 24.848 21.883 1.00 19.70 O 565 | ATOM 4373 ND2 ASN K 877 61.742 24.147 22.223 1.00 20.04 N 566 | ATOM 4374 N THR K 878 60.841 29.971 22.666 1.00 20.29 N 567 | ATOM 4375 CA THR K 878 60.898 31.245 23.360 1.00 20.28 C 568 | ATOM 4376 C THR K 878 59.673 32.107 23.045 1.00 20.69 C 569 | ATOM 4377 O THR K 878 59.192 32.141 21.918 1.00 20.87 O 570 | ATOM 4378 CB THR K 878 62.097 32.140 22.990 1.00 19.46 C 571 | ATOM 4379 OG1 THR K 878 62.049 32.304 21.567 1.00 19.09 O 572 | ATOM 4380 CG2 THR K 878 63.417 31.562 23.454 1.00 19.08 C 573 | ATOM 4381 N VAL K 879 59.208 32.846 24.048 1.00 20.40 N 574 | ATOM 4382 CA VAL K 879 58.102 33.766 23.893 1.00 19.65 C 575 | ATOM 4383 C VAL K 879 58.450 35.103 24.560 1.00 19.44 C 576 | ATOM 4384 O VAL K 879 58.963 35.153 25.668 1.00 19.47 O 577 | ATOM 4385 CB VAL K 879 56.760 33.356 24.523 1.00 19.00 C 578 | ATOM 4386 CG1 VAL K 879 55.719 33.080 23.453 1.00 18.20 C 579 | ATOM 4387 CG2 VAL K 879 56.903 32.245 25.543 1.00 18.58 C 580 | ATOM 4388 N TYR K 880 58.088 36.185 23.889 1.00 19.14 N 581 | ATOM 4389 CA TYR K 880 58.300 37.532 24.388 1.00 18.81 C 582 | ATOM 4390 C TYR K 880 56.970 38.261 24.492 1.00 18.40 C 583 | ATOM 4391 O TYR K 880 56.057 37.947 23.740 1.00 18.90 O 584 | ATOM 4392 CB TYR K 880 59.284 38.320 23.518 1.00 18.50 C 585 | ATOM 4393 CG TYR K 880 60.546 37.549 23.192 1.00 18.78 C 586 | ATOM 4394 CD1 TYR K 880 61.598 37.452 24.087 1.00 18.24 C 587 | ATOM 4395 CD2 TYR K 880 60.640 36.862 21.980 1.00 19.27 C 588 | ATOM 4396 CE1 TYR K 880 62.731 36.721 23.764 1.00 18.88 C 589 | ATOM 4397 CE2 TYR K 880 61.770 36.136 21.646 1.00 18.88 C 590 | ATOM 4398 CZ TYR K 880 62.805 36.060 22.557 1.00 19.07 C 591 | ATOM 4399 OH TYR K 880 63.915 35.313 22.243 1.00 19.26 O 592 | ATOM 4400 N LEU K 881 56.861 39.184 25.427 1.00 18.01 N 593 | ATOM 4401 CA LEU K 881 55.661 39.989 25.572 1.00 17.90 C 594 | ATOM 4402 C LEU K 881 56.045 41.444 25.810 1.00 18.87 C 595 | ATOM 4403 O LEU K 881 56.438 41.841 26.908 1.00 19.71 O 596 | ATOM 4404 CB LEU K 881 54.766 39.484 26.702 1.00 17.50 C 597 | ATOM 4405 CG LEU K 881 53.378 40.134 26.783 1.00 18.08 C 598 | ATOM 4406 CD1 LEU K 881 52.482 39.691 25.630 1.00 17.73 C 599 | ATOM 4407 CD2 LEU K 881 52.713 39.858 28.127 1.00 17.63 C 600 | ATOM 4408 N GLN K 882 56.045 42.240 24.750 1.00 19.74 N 601 | ATOM 4409 CA GLN K 882 56.245 43.681 24.876 1.00 20.24 C 602 | ATOM 4410 C GLN K 882 54.979 44.292 25.474 1.00 20.22 C 603 | ATOM 4411 O GLN K 882 53.879 44.082 24.951 1.00 19.62 O 604 | ATOM 4412 CB GLN K 882 56.541 44.307 23.521 1.00 20.77 C 605 | ATOM 4413 CG GLN K 882 56.946 45.765 23.645 1.00 22.76 C 606 | ATOM 4414 CD GLN K 882 58.347 45.944 24.193 1.00 24.18 C 607 | ATOM 4415 OE1 GLN K 882 58.540 46.732 25.126 1.00 24.57 O 608 | ATOM 4416 NE2 GLN K 882 59.315 45.233 23.609 1.00 24.59 N 609 | ATOM 4417 N MET K 883 55.113 44.951 26.619 1.00 20.41 N 610 | ATOM 4418 CA MET K 883 53.955 45.546 27.279 1.00 20.22 C 611 | ATOM 4419 C MET K 883 54.072 47.068 27.318 1.00 20.44 C 612 | ATOM 4420 O MET K 883 54.738 47.621 28.182 1.00 20.54 O 613 | ATOM 4421 CB MET K 883 53.772 45.005 28.693 1.00 19.90 C 614 | ATOM 4422 CG MET K 883 53.601 43.501 28.773 1.00 19.91 C 615 | ATOM 4423 SD MET K 883 53.966 42.852 30.412 1.00 19.87 S 616 | ATOM 4424 CE MET K 883 52.423 43.231 31.240 1.00 19.85 C 617 | ATOM 4425 N ASP K 884 53.375 47.731 26.401 1.00 20.73 N 618 | ATOM 4426 CA ASP K 884 53.329 49.182 26.335 1.00 20.30 C 619 | ATOM 4427 C ASP K 884 52.055 49.777 26.922 1.00 20.16 C 620 | ATOM 4428 O ASP K 884 50.984 49.176 26.928 1.00 19.12 O 621 | ATOM 4429 CB ASP K 884 53.484 49.628 24.877 1.00 20.32 C 622 | ATOM 4430 CG ASP K 884 54.838 49.262 24.310 1.00 21.16 C 623 | ATOM 4431 OD1 ASP K 884 55.887 49.520 24.935 1.00 21.08 O 624 | ATOM 4432 OD2 ASP K 884 54.894 48.686 23.201 1.00 22.64 O 625 | ATOM 4433 N SER K 885 52.157 51.003 27.422 1.00 20.62 N 626 | ATOM 4434 CA SER K 885 51.043 51.747 27.994 1.00 20.77 C 627 | ATOM 4435 C SER K 885 50.428 50.955 29.141 1.00 21.00 C 628 | ATOM 4436 O SER K 885 49.232 50.684 29.175 1.00 20.32 O 629 | ATOM 4437 CB SER K 885 49.948 52.044 26.971 1.00 20.42 C 630 | ATOM 4438 OG SER K 885 50.456 52.674 25.823 1.00 20.72 O 631 | ATOM 4439 N LEU K 886 51.304 50.559 30.056 1.00 21.78 N 632 | ATOM 4440 CA LEU K 886 50.858 49.707 31.159 1.00 22.39 C 633 | ATOM 4441 C LEU K 886 49.876 50.494 32.010 1.00 22.79 C 634 | ATOM 4442 O LEU K 886 49.969 51.720 32.041 1.00 23.61 O 635 | ATOM 4443 CB LEU K 886 52.040 49.214 31.989 1.00 22.07 C 636 | ATOM 4444 CG LEU K 886 52.850 48.078 31.372 1.00 21.79 C 637 | ATOM 4445 CD1 LEU K 886 54.293 48.118 31.826 1.00 21.70 C 638 | ATOM 4446 CD2 LEU K 886 52.209 46.741 31.716 1.00 22.22 C 639 | ATOM 4447 N LYS K 887 48.965 49.789 32.654 1.00 22.93 N 640 | ATOM 4448 CA LYS K 887 48.011 50.465 33.520 1.00 23.25 C 641 | ATOM 4449 C LYS K 887 47.837 49.647 34.787 1.00 23.46 C 642 | ATOM 4450 O LYS K 887 48.045 48.430 34.822 1.00 23.31 O 643 | ATOM 4451 CB LYS K 887 46.694 50.738 32.839 1.00 23.63 C 644 | ATOM 4452 CG LYS K 887 46.032 49.570 32.164 1.00 24.85 C 645 | ATOM 4453 CD LYS K 887 44.781 49.989 31.420 1.00 26.27 C 646 | ATOM 4454 CE LYS K 887 45.044 51.052 30.372 1.00 27.05 C 647 | ATOM 4455 NZ LYS K 887 46.467 51.103 29.930 1.00 27.75 N 648 | ATOM 4456 N PRO K 888 47.383 50.304 35.847 1.00 23.76 N 649 | ATOM 4457 CA PRO K 888 47.182 49.660 37.138 1.00 24.22 C 650 | ATOM 4458 C PRO K 888 46.510 48.310 37.056 1.00 25.01 C 651 | ATOM 4459 O PRO K 888 46.927 47.340 37.688 1.00 25.12 O 652 | ATOM 4460 CB PRO K 888 46.357 50.693 37.895 1.00 23.52 C 653 | ATOM 4461 CG PRO K 888 46.869 51.992 37.361 1.00 22.98 C 654 | ATOM 4462 CD PRO K 888 47.089 51.750 35.895 1.00 22.56 C 655 | ATOM 4463 N GLU K 889 45.504 48.145 36.207 1.00 25.77 N 656 | ATOM 4464 CA GLU K 889 44.794 46.934 35.909 1.00 26.32 C 657 | ATOM 4465 C GLU K 889 45.667 45.768 35.454 1.00 25.28 C 658 | ATOM 4466 O GLU K 889 45.266 44.608 35.595 1.00 25.35 O 659 | ATOM 4467 CB GLU K 889 43.778 47.160 34.775 1.00 27.55 C 660 | ATOM 4468 CG GLU K 889 42.516 47.887 35.170 1.00 29.80 C 661 | ATOM 4469 CD GLU K 889 42.739 49.375 35.386 1.00 31.51 C 662 | ATOM 4470 OE1 GLU K 889 43.217 50.061 34.452 1.00 32.12 O 663 | ATOM 4471 OE2 GLU K 889 42.435 49.815 36.520 1.00 32.26 O 664 | ATOM 4472 N ASP K 890 46.842 46.035 34.897 1.00 23.39 N 665 | ATOM 4473 CA ASP K 890 47.744 44.997 34.440 1.00 21.77 C 666 | ATOM 4474 C ASP K 890 48.617 44.435 35.554 1.00 20.45 C 667 | ATOM 4475 O ASP K 890 49.441 43.570 35.268 1.00 20.23 O 668 | ATOM 4476 CB ASP K 890 48.656 45.522 33.325 1.00 21.57 C 669 | ATOM 4477 CG ASP K 890 47.919 46.156 32.171 1.00 20.94 C 670 | ATOM 4478 OD1 ASP K 890 46.889 45.631 31.708 1.00 21.53 O 671 | ATOM 4479 OD2 ASP K 890 48.373 47.202 31.681 1.00 20.66 O 672 | ATOM 4480 N THR K 891 48.521 44.963 36.758 1.00 19.40 N 673 | ATOM 4481 CA THR K 891 49.323 44.517 37.887 1.00 18.83 C 674 | ATOM 4482 C THR K 891 48.955 43.078 38.191 1.00 19.05 C 675 | ATOM 4483 O THR K 891 47.795 42.838 38.507 1.00 19.72 O 676 | ATOM 4484 CB THR K 891 49.096 45.414 39.111 1.00 17.50 C 677 | ATOM 4485 OG1 THR K 891 49.761 46.654 38.818 1.00 17.96 O 678 | ATOM 4486 CG2 THR K 891 49.657 44.850 40.395 1.00 16.25 C 679 | ATOM 4487 N ALA K 892 49.906 42.163 38.076 1.00 18.71 N 680 | ATOM 4488 CA ALA K 892 49.618 40.762 38.324 1.00 18.37 C 681 | ATOM 4489 C ALA K 892 50.871 39.927 38.135 1.00 19.03 C 682 | ATOM 4490 O ALA K 892 51.873 40.468 37.668 1.00 18.84 O 683 | ATOM 4491 CB ALA K 892 48.575 40.322 37.302 1.00 18.61 C 684 | ATOM 4492 N THR K 893 50.800 38.635 38.500 1.00 19.15 N 685 | ATOM 4493 CA THR K 893 52.019 37.839 38.274 1.00 19.01 C 686 | ATOM 4494 C THR K 893 51.736 37.144 36.940 1.00 18.69 C 687 | ATOM 4495 O THR K 893 50.629 36.656 36.754 1.00 18.07 O 688 | ATOM 4496 CB THR K 893 52.520 36.969 39.411 1.00 18.54 C 689 | ATOM 4497 OG1 THR K 893 52.619 35.576 39.070 1.00 18.02 O 690 | ATOM 4498 CG2 THR K 893 51.649 37.167 40.637 1.00 18.18 C 691 | ATOM 4499 N TYR K 894 52.670 37.330 36.011 1.00 18.21 N 692 | ATOM 4500 CA TYR K 894 52.539 36.791 34.667 1.00 17.45 C 693 | ATOM 4501 C TYR K 894 53.245 35.443 34.584 1.00 17.17 C 694 | ATOM 4502 O TYR K 894 54.286 35.240 35.206 1.00 17.62 O 695 | ATOM 4503 CB TYR K 894 53.091 37.747 33.597 1.00 17.02 C 696 | ATOM 4504 CG TYR K 894 52.176 38.940 33.374 1.00 16.29 C 697 | ATOM 4505 CD1 TYR K 894 52.020 39.881 34.394 1.00 16.08 C 698 | ATOM 4506 CD2 TYR K 894 51.449 39.109 32.213 1.00 15.48 C 699 | ATOM 4507 CE1 TYR K 894 51.155 40.946 34.250 1.00 16.53 C 700 | ATOM 4508 CE2 TYR K 894 50.590 40.183 32.059 1.00 15.71 C 701 | ATOM 4509 CZ TYR K 894 50.434 41.095 33.074 1.00 16.22 C 702 | ATOM 4510 OH TYR K 894 49.597 42.184 32.954 1.00 15.80 O 703 | ATOM 4511 N TYR K 895 52.611 34.496 33.906 1.00 16.40 N 704 | ATOM 4512 CA TYR K 895 53.165 33.168 33.753 1.00 15.81 C 705 | ATOM 4513 C TYR K 895 53.354 32.788 32.288 1.00 15.14 C 706 | ATOM 4514 O TYR K 895 52.423 32.989 31.517 1.00 14.87 O 707 | ATOM 4515 CB TYR K 895 52.202 32.131 34.331 1.00 15.68 C 708 | ATOM 4516 CG TYR K 895 51.988 32.192 35.823 1.00 15.27 C 709 | ATOM 4517 CD1 TYR K 895 50.973 33.004 36.327 1.00 14.73 C 710 | ATOM 4518 CD2 TYR K 895 52.764 31.456 36.702 1.00 14.37 C 711 | ATOM 4519 CE1 TYR K 895 50.728 33.062 37.679 1.00 14.93 C 712 | ATOM 4520 CE2 TYR K 895 52.520 31.512 38.060 1.00 15.03 C 713 | ATOM 4521 CZ TYR K 895 51.505 32.310 38.542 1.00 15.74 C 714 | ATOM 4522 OH TYR K 895 51.245 32.382 39.895 1.00 16.27 O 715 | ATOM 4523 N CYS K 896 54.491 32.202 31.973 1.00 14.80 N 716 | ATOM 4524 CA CYS K 896 54.762 31.699 30.636 1.00 14.91 C 717 | ATOM 4525 C CYS K 896 54.268 30.258 30.587 1.00 15.76 C 718 | ATOM 4526 O CYS K 896 54.378 29.530 31.589 1.00 15.39 O 719 | ATOM 4527 CB CYS K 896 56.247 31.741 30.325 1.00 13.78 C 720 | ATOM 4528 SG CYS K 896 56.771 30.814 28.873 1.00 14.85 S 721 | ATOM 4529 N ALA K 897 53.711 29.849 29.454 1.00 16.13 N 722 | ATOM 4530 CA ALA K 897 53.238 28.458 29.380 1.00 16.92 C 723 | ATOM 4531 C ALA K 897 53.470 27.886 27.998 1.00 16.94 C 724 | ATOM 4532 O ALA K 897 53.430 28.642 27.036 1.00 17.29 O 725 | ATOM 4533 CB ALA K 897 51.780 28.412 29.816 1.00 17.37 C 726 | ATOM 4534 N ALA K 898 53.682 26.583 27.896 1.00 17.48 N 727 | ATOM 4535 CA ALA K 898 53.985 25.979 26.604 1.00 18.22 C 728 | ATOM 4536 C ALA K 898 53.291 24.638 26.428 1.00 18.90 C 729 | ATOM 4537 O ALA K 898 52.936 23.956 27.380 1.00 20.21 O 730 | ATOM 4538 CB ALA K 898 55.493 25.820 26.448 1.00 17.27 C 731 | ATOM 4539 N GLY K 899 53.070 24.259 25.182 1.00 18.97 N 732 | ATOM 4540 CA GLY K 899 52.524 22.988 24.803 1.00 18.99 C 733 | ATOM 4541 C GLY K 899 51.020 22.852 24.903 1.00 19.23 C 734 | ATOM 4542 O GLY K 899 50.529 21.708 24.932 1.00 19.58 O 735 | ATOM 4543 N GLY K 900 50.302 23.961 24.960 1.00 18.57 N 736 | ATOM 4544 CA GLY K 900 48.856 23.935 25.024 1.00 18.17 C 737 | ATOM 4545 C GLY K 900 48.217 24.308 23.695 1.00 18.23 C 738 | ATOM 4546 O GLY K 900 48.788 24.166 22.620 1.00 17.32 O 739 | ATOM 4547 N TYR K 901 47.012 24.858 23.790 1.00 19.36 N 740 | ATOM 4548 CA TYR K 901 46.265 25.317 22.634 1.00 19.76 C 741 | ATOM 4549 C TYR K 901 45.647 26.692 22.853 1.00 20.23 C 742 | ATOM 4550 O TYR K 901 44.905 26.887 23.819 1.00 20.05 O 743 | ATOM 4551 CB TYR K 901 45.134 24.332 22.347 1.00 19.55 C 744 | ATOM 4552 CG TYR K 901 44.319 24.601 21.110 1.00 20.54 C 745 | ATOM 4553 CD1 TYR K 901 44.925 24.732 19.870 1.00 21.12 C 746 | ATOM 4554 CD2 TYR K 901 42.930 24.709 21.169 1.00 20.90 C 747 | ATOM 4555 CE1 TYR K 901 44.186 24.969 18.729 1.00 21.64 C 748 | ATOM 4556 CE2 TYR K 901 42.172 24.937 20.041 1.00 20.84 C 749 | ATOM 4557 CZ TYR K 901 42.812 25.052 18.832 1.00 21.95 C 750 | ATOM 4558 OH TYR K 901 42.086 25.301 17.686 1.00 23.61 O 751 | ATOM 4559 N GLU K 902 45.912 27.609 21.925 1.00 21.13 N 752 | ATOM 4560 CA GLU K 902 45.302 28.939 21.957 1.00 20.67 C 753 | ATOM 4561 C GLU K 902 45.406 29.556 23.327 1.00 19.99 C 754 | ATOM 4562 O GLU K 902 46.532 29.630 23.809 1.00 20.66 O 755 | ATOM 4563 CB GLU K 902 43.854 28.798 21.465 1.00 20.98 C 756 | ATOM 4564 CG GLU K 902 43.633 29.511 20.146 1.00 22.75 C 757 | ATOM 4565 CD GLU K 902 42.654 28.878 19.199 1.00 23.33 C 758 | ATOM 4566 OE1 GLU K 902 43.044 27.892 18.556 1.00 24.65 O 759 | ATOM 4567 OE2 GLU K 902 41.504 29.302 19.000 1.00 23.81 O 760 | ATOM 4568 N LEU K 903 44.340 29.955 23.998 1.00 20.11 N 761 | ATOM 4569 CA LEU K 903 44.442 30.489 25.349 1.00 19.89 C 762 | ATOM 4570 C LEU K 903 43.742 29.583 26.356 1.00 20.16 C 763 | ATOM 4571 O LEU K 903 43.406 30.025 27.446 1.00 19.92 O 764 | ATOM 4572 CB LEU K 903 43.909 31.913 25.519 1.00 19.02 C 765 | ATOM 4573 CG LEU K 903 44.928 32.999 25.133 1.00 19.14 C 766 | ATOM 4574 CD1 LEU K 903 44.315 34.380 25.230 1.00 18.51 C 767 | ATOM 4575 CD2 LEU K 903 46.193 32.872 25.976 1.00 18.40 C 768 | ATOM 4576 N ARG K 904 43.558 28.323 25.985 1.00 20.90 N 769 | ATOM 4577 CA ARG K 904 42.903 27.349 26.841 1.00 21.60 C 770 | ATOM 4578 C ARG K 904 43.844 26.924 27.960 1.00 22.43 C 771 | ATOM 4579 O ARG K 904 44.629 25.997 27.856 1.00 22.43 O 772 | ATOM 4580 CB ARG K 904 42.494 26.107 26.064 1.00 22.21 C 773 | ATOM 4581 CG ARG K 904 41.094 26.013 25.490 1.00 21.92 C 774 | ATOM 4582 CD ARG K 904 41.048 26.759 24.181 1.00 22.66 C 775 | ATOM 4583 NE ARG K 904 40.220 26.297 23.122 1.00 23.53 N 776 | ATOM 4584 CZ ARG K 904 39.626 25.179 22.769 1.00 23.67 C 777 | ATOM 4585 NH1 ARG K 904 39.692 24.042 23.450 1.00 23.80 N 778 | ATOM 4586 NH2 ARG K 904 38.887 25.183 21.662 1.00 23.14 N 779 | ATOM 4587 N ASP K 905 43.702 27.536 29.113 1.00 23.86 N 780 | ATOM 4588 CA ASP K 905 44.560 27.282 30.265 1.00 24.65 C 781 | ATOM 4589 C ASP K 905 44.682 25.822 30.621 1.00 25.36 C 782 | ATOM 4590 O ASP K 905 45.821 25.362 30.833 1.00 25.49 O 783 | ATOM 4591 CB ASP K 905 44.145 28.251 31.340 1.00 25.14 C 784 | ATOM 4592 CG ASP K 905 43.961 27.765 32.742 1.00 26.11 C 785 | ATOM 4593 OD1 ASP K 905 42.805 27.341 32.976 1.00 26.80 O 786 | ATOM 4594 OD2 ASP K 905 44.914 27.861 33.539 1.00 26.33 O 787 | ATOM 4595 N ARG K 906 43.634 25.005 30.591 1.00 25.79 N 788 | ATOM 4596 CA ARG K 906 43.716 23.600 30.939 1.00 25.71 C 789 | ATOM 4597 C ARG K 906 44.544 22.764 29.973 1.00 24.35 C 790 | ATOM 4598 O ARG K 906 44.953 21.659 30.358 1.00 24.87 O 791 | ATOM 4599 CB ARG K 906 42.327 22.978 31.076 1.00 27.84 C 792 | ATOM 4600 CG ARG K 906 41.559 22.926 29.766 1.00 30.61 C 793 | ATOM 4601 CD ARG K 906 40.171 22.324 29.885 1.00 31.25 C 794 | ATOM 4602 NE ARG K 906 39.426 22.436 28.645 1.00 33.04 N 795 | ATOM 4603 CZ ARG K 906 39.081 23.557 28.022 1.00 34.51 C 796 | ATOM 4604 NH1 ARG K 906 39.463 24.738 28.498 1.00 34.64 N 797 | ATOM 4605 NH2 ARG K 906 38.328 23.497 26.922 1.00 35.09 N 798 | ATOM 4606 N THR K 907 44.807 23.233 28.764 1.00 21.75 N 799 | ATOM 4607 CA THR K 907 45.594 22.476 27.805 1.00 19.87 C 800 | ATOM 4608 C THR K 907 47.083 22.723 27.901 1.00 19.03 C 801 | ATOM 4609 O THR K 907 47.853 22.110 27.171 1.00 18.98 O 802 | ATOM 4610 CB THR K 907 45.077 22.726 26.381 1.00 19.59 C 803 | ATOM 4611 OG1 THR K 907 45.138 24.107 26.031 1.00 19.82 O 804 | ATOM 4612 CG2 THR K 907 43.613 22.327 26.281 1.00 19.48 C 805 | ATOM 4613 N TYR K 908 47.552 23.564 28.796 1.00 18.33 N 806 | ATOM 4614 CA TYR K 908 48.962 23.906 28.971 1.00 16.99 C 807 | ATOM 4615 C TYR K 908 49.570 23.111 30.104 1.00 17.55 C 808 | ATOM 4616 O TYR K 908 49.121 23.261 31.239 1.00 18.01 O 809 | ATOM 4617 CB TYR K 908 49.048 25.414 29.256 1.00 15.32 C 810 | ATOM 4618 CG TYR K 908 48.898 26.244 27.993 1.00 14.02 C 811 | ATOM 4619 CD1 TYR K 908 47.675 26.667 27.483 1.00 12.69 C 812 | ATOM 4620 CD2 TYR K 908 50.059 26.565 27.293 1.00 12.99 C 813 | ATOM 4621 CE1 TYR K 908 47.605 27.417 26.318 1.00 11.73 C 814 | ATOM 4622 CE2 TYR K 908 49.994 27.312 26.133 1.00 12.27 C 815 | ATOM 4623 CZ TYR K 908 48.772 27.742 25.653 1.00 11.64 C 816 | ATOM 4624 OH TYR K 908 48.791 28.461 24.487 1.00 10.62 O 817 | ATOM 4625 N GLY K 909 50.551 22.262 29.844 1.00 18.94 N 818 | ATOM 4626 CA GLY K 909 51.134 21.422 30.875 1.00 19.61 C 819 | ATOM 4627 C GLY K 909 52.481 21.905 31.367 1.00 20.26 C 820 | ATOM 4628 O GLY K 909 53.011 21.341 32.323 1.00 21.13 O 821 | ATOM 4629 N GLN K 910 53.041 22.901 30.709 1.00 20.44 N 822 | ATOM 4630 CA GLN K 910 54.359 23.399 31.140 1.00 21.00 C 823 | ATOM 4631 C GLN K 910 54.214 24.862 31.537 1.00 20.06 C 824 | ATOM 4632 O GLN K 910 53.615 25.658 30.802 1.00 19.77 O 825 | ATOM 4633 CB GLN K 910 55.361 23.102 30.035 1.00 22.04 C 826 | ATOM 4634 CG GLN K 910 56.700 22.580 30.488 1.00 23.52 C 827 | ATOM 4635 CD GLN K 910 56.639 21.380 31.402 1.00 25.22 C 828 | ATOM 4636 OE1 GLN K 910 55.981 20.367 31.149 1.00 25.29 O 829 | ATOM 4637 NE2 GLN K 910 57.410 21.492 32.489 1.00 26.28 N 830 | ATOM 4638 N TRP K 911 54.509 25.182 32.795 1.00 18.83 N 831 | ATOM 4639 CA TRP K 911 54.206 26.480 33.363 1.00 18.63 C 832 | ATOM 4640 C TRP K 911 55.421 27.138 33.982 1.00 19.17 C 833 | ATOM 4641 O TRP K 911 56.138 26.491 34.743 1.00 19.69 O 834 | ATOM 4642 CB TRP K 911 53.115 26.401 34.445 1.00 17.43 C 835 | ATOM 4643 CG TRP K 911 51.764 26.094 33.857 1.00 16.84 C 836 | ATOM 4644 CD1 TRP K 911 51.264 24.854 33.573 1.00 16.06 C 837 | ATOM 4645 CD2 TRP K 911 50.788 27.042 33.410 1.00 16.30 C 838 | ATOM 4646 NE1 TRP K 911 50.029 24.988 32.997 1.00 16.24 N 839 | ATOM 4647 CE2 TRP K 911 49.712 26.313 32.884 1.00 16.10 C 840 | ATOM 4648 CE3 TRP K 911 50.718 28.438 33.412 1.00 16.34 C 841 | ATOM 4649 CZ2 TRP K 911 48.570 26.929 32.381 1.00 16.56 C 842 | ATOM 4650 CZ3 TRP K 911 49.585 29.058 32.924 1.00 16.18 C 843 | ATOM 4651 CH2 TRP K 911 48.523 28.300 32.421 1.00 16.74 C 844 | ATOM 4652 N GLY K 912 55.607 28.427 33.693 1.00 19.80 N 845 | ATOM 4653 CA GLY K 912 56.717 29.150 34.317 1.00 20.07 C 846 | ATOM 4654 C GLY K 912 56.386 29.303 35.801 1.00 21.02 C 847 | ATOM 4655 O GLY K 912 55.278 29.015 36.251 1.00 20.66 O 848 | ATOM 4656 N GLN K 913 57.333 29.804 36.584 1.00 22.58 N 849 | ATOM 4657 CA GLN K 913 57.085 29.983 38.012 1.00 24.02 C 850 | ATOM 4658 C GLN K 913 56.412 31.311 38.304 1.00 23.53 C 851 | ATOM 4659 O GLN K 913 55.884 31.506 39.400 1.00 24.43 O 852 | ATOM 4660 CB GLN K 913 58.384 29.842 38.809 1.00 26.18 C 853 | ATOM 4661 CG GLN K 913 58.093 29.398 40.238 1.00 29.33 C 854 | ATOM 4662 CD GLN K 913 58.359 27.908 40.415 1.00 31.04 C 855 | ATOM 4663 OE1 GLN K 913 59.520 27.492 40.317 1.00 31.49 O 856 | ATOM 4664 NE2 GLN K 913 57.292 27.151 40.671 1.00 31.10 N 857 | ATOM 4665 N GLY K 914 56.439 32.255 37.371 1.00 22.64 N 858 | ATOM 4666 CA GLY K 914 55.754 33.516 37.572 1.00 21.82 C 859 | ATOM 4667 C GLY K 914 56.677 34.691 37.807 1.00 21.13 C 860 | ATOM 4668 O GLY K 914 57.689 34.599 38.484 1.00 21.98 O 861 | ATOM 4669 N THR K 915 56.326 35.811 37.199 1.00 20.06 N 862 | ATOM 4670 CA THR K 915 57.036 37.067 37.302 1.00 19.20 C 863 | ATOM 4671 C THR K 915 56.075 38.189 37.668 1.00 19.00 C 864 | ATOM 4672 O THR K 915 55.066 38.354 36.990 1.00 18.95 O 865 | ATOM 4673 CB THR K 915 57.682 37.403 35.950 1.00 18.65 C 866 | ATOM 4674 OG1 THR K 915 58.099 36.171 35.356 1.00 19.33 O 867 | ATOM 4675 CG2 THR K 915 58.893 38.268 36.197 1.00 19.88 C 868 | ATOM 4676 N GLN K 916 56.339 38.894 38.752 1.00 18.72 N 869 | ATOM 4677 CA GLN K 916 55.473 39.971 39.193 1.00 18.35 C 870 | ATOM 4678 C GLN K 916 55.697 41.168 38.282 1.00 17.66 C 871 | ATOM 4679 O GLN K 916 56.819 41.505 37.922 1.00 17.15 O 872 | ATOM 4680 CB GLN K 916 55.775 40.389 40.622 1.00 19.21 C 873 | ATOM 4681 CG GLN K 916 54.597 40.638 41.546 1.00 21.59 C 874 | ATOM 4682 CD GLN K 916 55.061 40.478 42.986 1.00 23.90 C 875 | ATOM 4683 OE1 GLN K 916 55.811 39.519 43.260 1.00 25.65 O 876 | ATOM 4684 NE2 GLN K 916 54.714 41.372 43.905 1.00 23.35 N 877 | ATOM 4685 N VAL K 917 54.616 41.786 37.875 1.00 17.34 N 878 | ATOM 4686 CA VAL K 917 54.586 42.981 37.043 1.00 17.10 C 879 | ATOM 4687 C VAL K 917 53.663 43.931 37.801 1.00 17.76 C 880 | ATOM 4688 O VAL K 917 52.493 43.600 38.030 1.00 17.44 O 881 | ATOM 4689 CB VAL K 917 54.059 42.650 35.644 1.00 16.80 C 882 | ATOM 4690 CG1 VAL K 917 53.588 43.874 34.874 1.00 16.48 C 883 | ATOM 4691 CG2 VAL K 917 55.130 41.908 34.846 1.00 16.68 C 884 | ATOM 4692 N THR K 918 54.191 45.038 38.302 1.00 18.45 N 885 | ATOM 4693 CA THR K 918 53.289 45.912 39.084 1.00 20.53 C 886 | ATOM 4694 C THR K 918 53.348 47.311 38.508 1.00 21.89 C 887 | ATOM 4695 O THR K 918 54.406 47.836 38.170 1.00 21.36 O 888 | ATOM 4696 CB THR K 918 53.518 45.784 40.588 1.00 20.03 C 889 | ATOM 4697 OG1 THR K 918 53.261 47.011 41.271 1.00 20.09 O 890 | ATOM 4698 CG2 THR K 918 54.947 45.393 40.875 1.00 20.24 C 891 | ATOM 4699 N VAL K 919 52.158 47.848 38.251 1.00 24.12 N 892 | ATOM 4700 CA VAL K 919 52.035 49.172 37.638 1.00 26.46 C 893 | ATOM 4701 C VAL K 919 51.363 50.086 38.652 1.00 28.81 C 894 | ATOM 4702 O VAL K 919 50.299 49.778 39.178 1.00 29.01 O 895 | ATOM 4703 CB VAL K 919 51.283 49.122 36.306 1.00 25.72 C 896 | ATOM 4704 CG1 VAL K 919 51.271 50.475 35.615 1.00 25.39 C 897 | ATOM 4705 CG2 VAL K 919 51.873 48.095 35.345 1.00 25.20 C 898 | ATOM 4706 N SER K 920 52.024 51.181 38.990 1.00 32.33 N 899 | ATOM 4707 CA SER K 920 51.489 52.101 39.993 1.00 35.68 C 900 | ATOM 4708 C SER K 920 50.765 53.283 39.378 1.00 38.11 C 901 | ATOM 4709 O SER K 920 51.213 53.907 38.414 1.00 38.39 O 902 | ATOM 4710 CB SER K 920 52.607 52.521 40.942 1.00 35.94 C 903 | ATOM 4711 OG SER K 920 52.952 51.390 41.738 1.00 36.70 O 904 | ATOM 4712 N SER K 921 49.602 53.584 39.954 1.00 41.18 N 905 | ATOM 4713 CA SER K 921 48.767 54.684 39.490 1.00 44.05 C 906 | ATOM 4714 C SER K 921 49.556 55.982 39.331 1.00 46.10 C 907 | ATOM 4715 O SER K 921 50.594 56.191 39.964 1.00 46.36 O 908 | ATOM 4716 CB SER K 921 47.599 54.948 40.442 1.00 44.05 C 909 | ATOM 4717 OG SER K 921 47.727 54.245 41.666 1.00 44.52 O 910 | ATOM 4718 N ARG K 922 49.022 56.873 38.498 1.00 48.40 N 911 | ATOM 4719 CA ARG K 922 49.631 58.176 38.288 1.00 50.82 C 912 | ATOM 4720 C ARG K 922 49.235 59.171 39.378 1.00 52.15 C 913 | ATOM 4721 O ARG K 922 49.988 60.109 39.640 1.00 52.64 O 914 | ATOM 4722 CB ARG K 922 49.253 58.797 36.951 1.00 51.40 C 915 | ATOM 4723 CG ARG K 922 49.451 57.887 35.759 1.00 52.45 C 916 | ATOM 4724 CD ARG K 922 48.137 57.730 35.010 1.00 53.09 C 917 | ATOM 4725 NE ARG K 922 48.365 57.181 33.678 1.00 53.81 N 918 | ATOM 4726 CZ ARG K 922 47.396 56.606 32.975 1.00 54.60 C 919 | ATOM 4727 NH1 ARG K 922 46.174 56.522 33.490 1.00 54.86 N 920 | ATOM 4728 NH2 ARG K 922 47.664 56.124 31.769 1.00 55.19 N 921 | ATOM 4729 N GLY K 923 48.069 58.982 39.974 1.00 53.38 N 922 | ATOM 4730 CA GLY K 923 47.594 59.847 41.049 1.00 54.64 C 923 | ATOM 4731 C GLY K 923 48.376 59.512 42.320 1.00 55.54 C 924 | ATOM 4732 O GLY K 923 49.613 59.524 42.270 1.00 55.57 O 925 | ATOM 4733 N ARG K 924 47.679 59.221 43.417 1.00 56.35 N 926 | ATOM 4734 CA ARG K 924 48.370 58.875 44.661 1.00 57.17 C 927 | ATOM 4735 C ARG K 924 47.384 58.556 45.783 1.00 57.28 C 928 | ATOM 4736 O ARG K 924 47.498 57.517 46.439 1.00 57.26 O 929 | ATOM 4737 CB ARG K 924 49.316 59.985 45.099 1.00 57.94 C 930 | ATOM 4738 CG ARG K 924 50.768 59.674 45.342 1.00 58.84 C 931 | ATOM 4739 CD ARG K 924 51.388 58.576 44.521 1.00 59.52 C 932 | ATOM 4740 NE ARG K 924 52.115 58.955 43.332 1.00 60.27 N 933 | ATOM 4741 CZ ARG K 924 53.109 59.809 43.153 1.00 60.81 C 934 | ATOM 4742 NH1 ARG K 924 53.585 60.525 44.169 1.00 61.10 N 935 | ATOM 4743 NH2 ARG K 924 53.643 59.980 41.944 1.00 60.74 N 936 | -------------------------------------------------------------------------------- /data/example/domain_list.txt: -------------------------------------------------------------------------------- 1 | example/1aj4A00-2-161 1aj4 A 2 161 2 | example/1bzqK00-801-924 1bzq K 801 924 3 | example/1ge0A00-1-130 1ge0 A 1 130 4 | -------------------------------------------------------------------------------- /data/fold2seq2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/fold2seq/b9a97d81eac329b5259ad10e2a6f4fe80ade542f/data/fold2seq2.png -------------------------------------------------------------------------------- /data/fold_feat_gen.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | # Load pdbs with the chain and residue constraints. 3 | ########################################################################## 4 | 5 | import numpy as np 6 | import os 7 | import sys 8 | import pickle 9 | import determine_ss 10 | import argparse 11 | import ss_dense_gen 12 | 13 | threetoone={ 14 | 'ALA':'A', 15 | 'ARG':'R', 16 | 'ASN':'N', 17 | 'ASP':'D', 18 | 'CYS':'C', 19 | 'GLU':'E', 20 | 'GLN':'Q', 21 | 'GLY':'G', 22 | 'HIS':'H', 23 | 'ILE':'I', 24 | 'LEU':'L', 25 | 'LYS':'K', 26 | 'MET':'M', 27 | 'PHE':'F', 28 | 'PRO':'P', 29 | 'SER':'S', 30 | 'THR':'T', 31 | 'TRP':'W', 32 | 'TYR':'Y', 33 | 'VAL':'V', 34 | 'MSE':'M' 35 | } 36 | 37 | def selection(pdb_path, chain, start, end, ss): 38 | b=0 39 | jd=0 40 | jd1=0 41 | outs=[] 42 | start = start.replace(')','') 43 | start = start.replace('(','') 44 | seqs='' 45 | ca_coor=[] 46 | #print (start, start.strip('(')) 47 | #exit(0) 48 | end = end.replace(')','') 49 | end = end.replace('(','') 50 | with open(pdb_path, "r") as f: 51 | for lines in f: 52 | if len(lines)<5 or lines[0:4]!='ATOM': 53 | if b==2: 54 | break 55 | continue 56 | if lines[21]!=chain: 57 | if b==2: 58 | break 59 | continue 60 | resi = lines[22:27].strip(' ') 61 | if b==2 and resi!=end: 62 | break 63 | if resi==start: 64 | b=1 65 | jd=1 66 | if resi==end: 67 | b=2 68 | jd1=1 69 | elif b==2: 70 | break 71 | if b==1 or b==2: 72 | 73 | if lines[13:16]=='CA ': 74 | resi={'name': lines[17:20]} 75 | resi['CA'] = [float(lines[30:38]), float(lines[38:46]), float(lines[46:54])] 76 | ca_coor.append(resi) 77 | seqs+=threetoone[lines[17:20]] 78 | if jd*jd1!=1: 79 | raise ValueError("encounter inconsistent pdb structure:"+pdb_path+chain+" "+start+','+end) 80 | 81 | 82 | #print(seqs) 83 | start,end = dp(seqs, ss['seq']) 84 | 85 | j=start 86 | for i in range(len(ca_coor)): 87 | while ss['seq'][j]!=threetoone[ca_coor[i]['name']]: 88 | j+=1 89 | ca_coor[i]['ss'] = ss['ss'] 90 | 91 | return ca_coor, ss['seq'][start:end] 92 | 93 | 94 | def dp(cst, cseq): 95 | 96 | k=0 97 | best_start=0 98 | best_end=0 99 | best_score=10000 100 | while k temp.pdb" %(link)) 156 | # # pymol=[] 157 | # # pymol.append("load temp.pdb") 158 | # # pymol.append("select "+sele) 159 | # # pymol.append("save "+save_path+", sele") 160 | # # np.savetxt("load_pdb.pml", pymol, fmt="%s") 161 | # # os.system("/Applications/PyMOL.app/Contents/MacOS/PyMOL -cq load_pdb.pml") 162 | # #os.system("rm temp.pdb") 163 | # #os.system("rm load_pdb.pml") 164 | 165 | if __name__=='__main__': 166 | with open("seq_dict.pkl", "rb") as f: 167 | seq_dict = pickle.load(f) 168 | seq_dict['4g50A01/-77--1']['start']='-77' 169 | seq_dict['4g50A01/-77--1']['end']='-1' 170 | del(seq_dict['4z54B00/6-291']) 171 | del(seq_dict['4ztbA01/1-135']) 172 | del(seq_dict['4ztbA02/136-321']) 173 | del(seq_dict['5dejA00/10-125']) 174 | del(seq_dict['5fojA00/1-131']) 175 | 176 | domain_seq={} 177 | 178 | seq_ss = determine_ss.read_ss("ss.txt") 179 | num=0 180 | for i in seq_dict: 181 | label = i[0:4].upper()+':'+i[4] 182 | print(i) 183 | if label in seq_ss: 184 | #seq_dict_new[i] = seq_dict[i] 185 | 186 | #x1,x2 = read_domain("domains/"+i.replace('/','-'), seq_ss[label]) 187 | x1,x2 = selection("pdbs/"+seq_dict[i]['pdb']+'.pdb' , seq_dict[i]['chain'],\ 188 | seq_dict[i]['start'], seq_dict[i]['end'], "domains/"+i.replace('/','-')) 189 | 190 | domain_seq[i]={} 191 | domain_seq[i]['seq'] =x2 192 | domain_seq[i]['3d'] = x1 193 | domain_seq[i]['fold'] = seq_dict[i]['fold'] 194 | 195 | 196 | print ("processed seqs: %d/%d" %(num, len(seq_dict))) 197 | num+=1 198 | 199 | with open("domain_dict.pkl", "wb") as f: 200 | pickle.dump(domain_seq, f) 201 | 202 | 203 | -------------------------------------------------------------------------------- /data/ss_dense_gen.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import torch 4 | from collections import OrderedDict 5 | import matplotlib.pyplot as plt 6 | from scipy import stats 7 | from scipy.stats import norm 8 | from scipy.stats import multivariate_normal 9 | amino_acid = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'] 10 | IUPAC_VOCAB = OrderedDict([ 11 | ("", 0), 12 | ("", 1), 13 | ("", 2), 14 | ("", 3), 15 | ("", 4), 16 | ("A", 5), 17 | ("B", 6), 18 | ("C", 7), 19 | ("D", 8), 20 | ("E", 9), 21 | ("F", 10), 22 | ("G", 11), 23 | ("H", 12), 24 | ("I", 13), 25 | ("K", 14), 26 | ("L", 15), 27 | ("M", 16), 28 | ("N", 17), 29 | ("O", 18), 30 | ("P", 19), 31 | ("Q", 20), 32 | ("R", 21), 33 | ("S", 22), 34 | ("T", 23), 35 | ("U", 24), 36 | ("V", 25), 37 | ("W", 26), 38 | ("X", 27), 39 | ("Y", 28), 40 | ("Z", 29)]) 41 | 42 | def to_intger_padding(seq, maxlen): 43 | vec = np.zeros((maxlen), dtype=np.int32) 44 | for i in range(len(seq)): 45 | vec[i] = IUPAC_VOCAB[seq[i]] 46 | 47 | return vec 48 | 49 | 50 | 51 | def cal_mean(coor1): 52 | coor=[] 53 | for i in coor1: 54 | coor.append(i['CA']) 55 | return np.mean(coor, axis=0) 56 | 57 | def rot_matrix(theta, ux, uy, uz): 58 | 59 | sint = np.sin(theta) 60 | cost = np.cos(theta) 61 | 62 | rot =[ [ cost + ux**2*(1-cost), ux*uy*(1-cost) - uz*sint, ux*uz*(1-cost)+uy*sint ] , 63 | [ uy*ux*(1-cost) + uz*sint, cost + uy**2*(1-cost), uy*uz*(1-cost) - ux*sint], 64 | [ uz*ux*(1-cost) - uy*sint, uz*uy*(1-cost) + ux*sint, cost + uz**2*(1-cost)] 65 | ] 66 | return np.array(rot) 67 | 68 | def feat_gen(domain_dict): 69 | 70 | print ("start translating proteins to make the center of mass to be at the orign of the coordinate system.") 71 | print ("start rotating proteins to make the first CA atom on the z-axis......") 72 | print ("total number of proteins: ", len(domain_dict)) 73 | 74 | maxd =[] 75 | for i in domain_dict: 76 | camean = cal_mean(domain_dict[i]['3d']) 77 | 78 | for j in range(len(domain_dict[i]['3d'])): 79 | domain_dict[i]['3d'][j]['CA'] -= camean 80 | 81 | 82 | first_residue_ca_coor = domain_dict[i]['3d'][0]['CA'] 83 | 84 | theta = np.arccos(-first_residue_ca_coor[2] / np.sqrt(np.sum(first_residue_ca_coor**2))) 85 | x1 = -first_residue_ca_coor[1] / np.sqrt(first_residue_ca_coor[0]**2 + first_residue_ca_coor[1]**2) 86 | y1 = first_residue_ca_coor[0] / np.sqrt(first_residue_ca_coor[0]**2 + first_residue_ca_coor[1]**2) 87 | 88 | rot = rot_matrix(theta, x1, y1, 0.) 89 | 90 | c1 = np.matmul(rot, first_residue_ca_coor.T) 91 | assert c1[0]**2 + c1[1]**2 < 1E-20 92 | 93 | 94 | for j in range(len(domain_dict[i]['3d'])): 95 | domain_dict[i]['3d'][j]['CA'] = np.matmul(rot, domain_dict[i]['3d'][j]['CA'].T).T 96 | 97 | return domain_dict 98 | 99 | 100 | 101 | def featurization(domain_dict, keys): 102 | 103 | print ('start generating features......') 104 | box_size = 40. 105 | voxel_size= 2. 106 | std_gassuian = 2.0 107 | var_gassuian = std_gassuian**2 108 | threshold = (std_gassuian*3)**2 109 | numbox = int(box_size/voxel_size) 110 | 111 | ss_map = {'H': 0, 'G': 0, 'I': 0, 'B': 1, 'E':1, ' ':2, 'T':3, 'S':3} 112 | ratio_all=[] 113 | 114 | for i in keys: 115 | new_coor=[] 116 | features = np.zeros((numbox, numbox, numbox, 4), dtype=float) 117 | 118 | for j in range(len(domain_dict[i]['3d'])): 119 | new_coor.append(domain_dict[i]['3d'][j]['CA']) 120 | 121 | minc = np.min(new_coor, axis=0) 122 | maxc = np.max(new_coor, axis=0) 123 | 124 | 125 | 126 | ratio = box_size/(maxc-minc) 127 | 128 | print ('ratio: ', ratio) 129 | ratio_all.extend(ratio) 130 | centerx = np.linspace(minc[0]*ratio[0]+voxel_size/2, minc[0]*ratio[0]+box_size-voxel_size/2, numbox) 131 | centery = np.linspace(minc[1]*ratio[1]+voxel_size/2, minc[1]*ratio[1]+box_size-voxel_size/2, numbox) 132 | centerz = np.linspace(minc[2]*ratio[2]+voxel_size/2, minc[2]*ratio[2]+box_size-voxel_size/2, numbox) 133 | 134 | 135 | for j in range(len(domain_dict[i]['3d'])): 136 | atom_pos = domain_dict[i]['3d'][j]['CA']*ratio 137 | type_ss = ss_map[domain_dict[i]['3d'][j]['ss']] 138 | for i1 in range(numbox): 139 | for i2 in range(numbox): 140 | for i3 in range(numbox): 141 | 142 | d2square = (atom_pos[0]-centerx[i1])**2+(atom_pos[1]-centerx[i2])**2+(atom_pos[2]-centerx[i3])**2 143 | 144 | 145 | density = np.exp(-d2square/var_gassuian) 146 | 147 | features[i1][i2][i3][type_ss] += density 148 | 149 | 150 | np.save("fold_features/"+i.replace('/','-'), features) 151 | 152 | 153 | 154 | 155 | if __name__=='__main__': 156 | with open("domain_dict.pkl", "rb") as f: 157 | domain_dict = pickle.load(f) 158 | domain_dict = feat_gen(domain_dict) 159 | keys=[] 160 | for i in domain_dict: 161 | keys.append(i) 162 | featurization(domain_dict, keys) 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | _libgcc_mutex=0.1=conda_forge 5 | _openmp_mutex=4.5=1_gnu 6 | ase=3.21.1=pypi_0 7 | biopython=1.77=pypi_0 8 | blas=1.0=mkl 9 | bzip2=1.0.8=h7f98852_4 10 | ca-certificates=2020.12.5=ha878542_0 11 | certifi=2020.6.20=pypi_0 12 | chardet=4.0.0=pypi_0 13 | click=7.1.2=pypi_0 14 | cudatoolkit=10.2.89=hfd86e86_1 15 | cycler=0.10.0=pypi_0 16 | decorator=4.4.2=pypi_0 17 | esm=0.2.0=pypi_0 18 | filelock=3.0.12=pypi_0 19 | freetype=2.9.1=h8a8886c_1 20 | gawk=5.1.0=h7f98852_0 21 | gettext=0.19.8.1=hf34092f_1004 22 | googledrivedownloader=0.4=pypi_0 23 | h5py=3.2.0=pypi_0 24 | idna=2.10=pypi_0 25 | intel-openmp=2019.4=243 26 | isodate=0.6.0=pypi_0 27 | jinja2=2.11.3=pypi_0 28 | joblib=1.0.0=pypi_0 29 | jpeg=9b=h024ee3a_2 30 | kiwisolver=1.2.0=pypi_0 31 | ld_impl_linux-64=2.33.1=h53a641e_7 32 | libedit=3.1.20181209=hc058e9b_0 33 | libffi=3.2.1=hd88cf55_4 34 | libgcc-ng=9.3.0=h2828fa1_18 35 | libgfortran-ng=7.3.0=hdf63c60_0 36 | libgomp=9.3.0=h2828fa1_18 37 | libidn2=2.3.0=h516909a_0 38 | libpng=1.6.37=hbc83047_0 39 | libstdcxx-ng=9.3.0=h6de172a_18 40 | libtiff=4.1.0=h2733197_0 41 | libunistring=0.9.10=h14c3975_0 42 | libuv=1.40.0=h7b6447c_0 43 | llvmlite=0.35.0=pypi_0 44 | markupsafe=1.1.1=pypi_0 45 | matplotlib=3.3.1=pypi_0 46 | mkl=2019.4=243 47 | mkl-service=2.3.0=py38he904b0f_0 48 | mkl_fft=1.0.15=py38ha843d7b_0 49 | mkl_random=1.1.0=py38h962f231_0 50 | mmseqs2=13.45111=h95f258a_1 51 | ncurses=6.1=he6710b0_1 52 | networkx=2.5=pypi_0 53 | ninja=1.9.0=py38hfd86e86_0 54 | numba=0.52.0=pypi_0 55 | numpy=1.18.1=py38h4f9e942_0 56 | numpy-base=1.18.1=py38hde5b4d6_1 57 | olefile=0.46=py_0 58 | openssl=1.1.1k=h7f98852_0 59 | packaging=20.9=pypi_0 60 | pandas=1.2.3=pypi_0 61 | pillow=7.0.0=py38hb39fc2d_0 62 | pip=20.0.2=py38_1 63 | pyparsing=2.4.7=pypi_0 64 | python=3.8.1=h0371630_1 65 | python-dateutil=2.8.1=pypi_0 66 | python-louvain=0.15=pypi_0 67 | python_abi=3.8=1_cp38 68 | pytorch=1.7.1=py3.8_cuda10.2.89_cudnn7.6.5_0 69 | pytz=2021.1=pypi_0 70 | rdflib=5.0.0=pypi_0 71 | readline=7.0=h7b6447c_5 72 | regex=2020.11.13=pypi_0 73 | requests=2.25.1=pypi_0 74 | sacremoses=0.0.43=pypi_0 75 | scikit-learn=0.24.0=pypi_0 76 | scipy=1.5.4=pypi_0 77 | sentencepiece=0.1.95=pypi_0 78 | setuptools=45.1.0=py38_0 79 | six=1.14.0=py38_0 80 | sklearn=0.0=pypi_0 81 | sqlite=3.30.1=h7b6447c_0 82 | threadpoolctl=2.1.0=pypi_0 83 | tk=8.6.8=hbc83047_0 84 | tokenizers=0.8.0rc4=pypi_0 85 | torch-cluster=1.5.9=pypi_0 86 | torch-geometric=1.6.3=pypi_0 87 | torch-scatter=2.0.6=pypi_0 88 | torch-sparse=0.6.9=pypi_0 89 | torch-spline-conv=1.2.1=pypi_0 90 | torchaudio=0.7.2=py38 91 | torchvision=0.8.2=py38_cu102 92 | tqdm=4.58.0=pypi_0 93 | transformers=3.0.0=pypi_0 94 | typing_extensions=3.7.4.3=py_0 95 | urllib3=1.26.3=pypi_0 96 | wget=1.20.1=h22169c7_0 97 | wheel=0.34.1=py38_0 98 | xz=5.2.4=h14c3975_4 99 | zlib=1.2.11=h7b6447c_3 100 | zstd=1.3.7=h0b5b093_0 101 | -------------------------------------------------------------------------------- /fold2seq1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/fold2seq/b9a97d81eac329b5259ad10e2a6f4fe80ade542f/fold2seq1.png -------------------------------------------------------------------------------- /fold2seq3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/fold2seq/b9a97d81eac329b5259ad10e2a6f4fe80ade542f/fold2seq3.png -------------------------------------------------------------------------------- /src/Utils/__pycache__/amino_acid.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/fold2seq/b9a97d81eac329b5259ad10e2a6f4fe80ade542f/src/Utils/__pycache__/amino_acid.cpython-37.pyc -------------------------------------------------------------------------------- /src/Utils/__pycache__/amino_acid.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/fold2seq/b9a97d81eac329b5259ad10e2a6f4fe80ade542f/src/Utils/__pycache__/amino_acid.cpython-38.pyc -------------------------------------------------------------------------------- /src/Utils/__pycache__/model_statistics.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/fold2seq/b9a97d81eac329b5259ad10e2a6f4fe80ade542f/src/Utils/__pycache__/model_statistics.cpython-37.pyc -------------------------------------------------------------------------------- /src/Utils/__pycache__/model_statistics.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/fold2seq/b9a97d81eac329b5259ad10e2a6f4fe80ade542f/src/Utils/__pycache__/model_statistics.cpython-38.pyc -------------------------------------------------------------------------------- /src/Utils/amino_acid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | my_seqlabel = ["!","A","C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", 4 | "S", "T", "V", "W", "Y", "*", '0' ] # * is the start symbol ! is the end symbol 5 | 6 | my_seqlabel_reverse = OrderedDict([ 7 | ("A", 1), 8 | ("C", 2), 9 | ("D", 3), 10 | ("E", 4), 11 | ("F", 5), 12 | ("G", 6), 13 | ("H", 7), 14 | ("I", 8), 15 | ("K", 9), 16 | ("L", 10), 17 | ("M", 11), 18 | ("N", 12), 19 | ("P", 13), 20 | ("Q", 14), 21 | ("R", 15), 22 | ("S", 16), 23 | ("T", 17), 24 | ("V", 18), 25 | ("W", 19), 26 | ("Y", 20),]) 27 | 28 | def seqlabel(seq): 29 | vec = np.zeros(len(seq)+1, dtype=np.int32) 30 | for i in range(len(seq)): 31 | vec[i] = my_seqlabel_reverse[seq[i]] 32 | return vec 33 | 34 | 35 | def Nature_seq(seq): 36 | 37 | for i in seq: 38 | if i not in amino_acid: 39 | return False 40 | 41 | return True 42 | 43 | 44 | 45 | import matplotlib.pyplot as plt 46 | def seq_length_plot(seq,name='seq_dis', maxlen=2000): 47 | 48 | length = [] 49 | for i in seq: 50 | length.append(len(i['seq'])) 51 | 52 | plt.hist(length, bins=100, range=[0,maxlen]) 53 | plt.xlabel("seq length") 54 | plt.ylabel("number") 55 | plt.savefig(name+".eps", format='eps') 56 | plt.show() 57 | 58 | 59 | def transformer_integer_padding(seq, maxlen=502): 60 | vec = np.zeros((maxlen)+2, dtype=np.int32)+my_seqlabel.index('0') 61 | vec[0] = my_seqlabel.index('*') 62 | vec[len(seq)+1] =my_seqlabel.index('!') 63 | 64 | for i in range(0, len(seq)): 65 | vec[i+1] = my_seqlabel.index(seq[i]) 66 | 67 | return vec 68 | 69 | 70 | def transformer_integer(seq): 71 | vec = np.zeros(len(seq)+2, dtype=np.int32) 72 | vec[0]=len(my_seqlabel)-1 73 | vec[-1] = 0 74 | 75 | for i in range(0, len(seq)): 76 | vec[i+1] = my_seqlabel_reverse[seq[i]] 77 | 78 | return vec 79 | 80 | # interger encoding and padding 81 | def to_intger_padding(seq, maxlen): 82 | vec = np.zeros((maxlen), dtype=np.int32) 83 | for i in range(len(seq)): 84 | #print (np.where(amino_acid==seq[i]), seq[i], seq) 85 | vec[i] = amino_acid.index(seq[i])+1 86 | 87 | return vec 88 | 89 | 90 | def to_onehot(seq, hparam, start=0): 91 | onehot = np.zeros((hparam['MAXLEN'], hparam['vocab_size']), dtype=np.int32) 92 | l = min(MAXLEN, len(seq)) 93 | for i in range(start, start + l): 94 | onehot[i, AAINDEX.get(seq[i - start], 0)] = 1 95 | onehot[0:start, 0] = 1 96 | onehot[start + l:, 0] = 1 97 | return onehot 98 | 99 | 100 | def to_int(seq, hparam): 101 | 102 | out = np.zeros(hparam['MAXLEN'], dtype=np.int32) 103 | for i in range(len(seq)): 104 | out[i] = AAINDEX[seq[i]] 105 | 106 | return out -------------------------------------------------------------------------------- /src/Utils/hparam.py: -------------------------------------------------------------------------------- 1 | def hparam_pretrain_seq(): 2 | hparam={} 3 | hparam['ntoken']=20 4 | hparam['nhidden']=128 5 | hparam['nfeedfoward']=512 6 | hparam['nlayers'] =4 7 | hparam['dropout'] = 0.5 8 | 9 | hparam['seq_embedding'] = 'ProteinSeqTransformer' 10 | 11 | -------------------------------------------------------------------------------- /src/Utils/model_statistics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | def net_param_num(model): 4 | num=0 5 | for i in model.parameters(): 6 | v=1 7 | for j in i.shape: 8 | v*=j 9 | 10 | num+=v 11 | return num 12 | 13 | def multi_class_accuracy(target, preds_score): 14 | top1=0. 15 | top3=0. 16 | top5=0. 17 | top10=0. 18 | 19 | preds_score = preds_score.detach().cpu().numpy() 20 | target = target.cpu().numpy() 21 | 22 | for i in range(len(preds_score)): 23 | ind_sorted = np.argsort(-preds_score[i]) 24 | 25 | if target[i] in ind_sorted[:1]: 26 | top1+=1 27 | if target[i] in ind_sorted[:3]: 28 | top3+=1 29 | if target[i] in ind_sorted[:5]: 30 | top5+=1 31 | if target[i] in ind_sorted[:10]: 32 | top10+=1 33 | 34 | #print ("top1 acc: %.4f" %(top1/len(preds_score))) 35 | #print ("top3 acc: %.4f" %(top3/len(preds_score))) 36 | #print ("top5 acc: %.4f" %(top5/len(preds_score))) 37 | #print ("top10 acc: %.4f" %(top10/len(preds_score))) 38 | 39 | return top1,top3,top5,top10 40 | -------------------------------------------------------------------------------- /src/fold_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.utils.data import Dataset, DataLoader 5 | import argparse 6 | import numpy as np 7 | import math 8 | import os 9 | import pickle 10 | import numpy as np 11 | from Utils import model_statistics 12 | 13 | 14 | class residual_block(nn.Module): 15 | def __init__(self, inc=4, outc=4, ks=(3,3,3), pads=(1,1,1)): 16 | super(residual_block, self).__init__() 17 | 18 | self.conv1 = nn.Conv3d(in_channels=inc, out_channels=outc, kernel_size=ks, padding=pads) 19 | self.bn1 = nn.BatchNorm3d(outc) 20 | self.avt1 = nn.ELU() 21 | 22 | self.conv2 = nn.Conv3d(in_channels=inc, out_channels=outc, kernel_size=ks, padding=pads) 23 | self.bn2 = nn.BatchNorm3d(outc) 24 | self.avt2 = nn.ELU() 25 | 26 | def forward(self, x): # x shape: [bs, channel, x, y, z] 27 | out = self.conv1(x) 28 | out = self.bn1(out) 29 | out = self.avt1(out) 30 | #print(out.shape) 31 | out = self.conv2(x) 32 | out = self.bn2(out) 33 | out = self.avt2(out) 34 | 35 | #print (out.shape) 36 | return x+out 37 | 38 | class transformer_encoder(nn.Module): 39 | def __init__(self, args): 40 | super(transformer_encoder, self).__init__() 41 | 42 | encoder_layer = nn.TransformerEncoderLayer(d_model=args.nhidden, nhead=8, dim_feedforward=args.fold_encoder_feedforward) 43 | self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=4) 44 | 45 | def forward(self, x): 46 | # x shape: [5*5*5, batch_size, nhidden] 47 | output = self.transformer_encoder(x) 48 | return output 49 | 50 | class PositionalEncoding_3D(nn.Module): 51 | 52 | def __init__(self, nhidden, dropout=0.1, max_len=10): 53 | super(PositionalEncoding_3D, self).__init__() 54 | self.dropout = nn.Dropout(p=dropout) 55 | 56 | pe = torch.zeros(max_len, max_len, max_len, nhidden) 57 | 58 | pe_1d = torch.zeros(max_len, nhidden) 59 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 60 | div_term = torch.exp(torch.arange(0, nhidden, 2).float() * (-math.log(10000.0) / nhidden)) 61 | # print (position.shape, div_term.shape, (position*div_term).shape) 62 | 63 | 64 | pe_1d[:, 0::2] = torch.sin(position * div_term) 65 | pe_1d[:, 1::2] = torch.cos(position * div_term) 66 | 67 | #print (pe_1d.shape, pe_1d.unsqueeze(1).unsqueeze(1).shape) 68 | 69 | pe += pe_1d.unsqueeze(0).unsqueeze(0).repeat(max_len, max_len, 1, 1) 70 | pe += pe_1d.unsqueeze(1).unsqueeze(0).repeat(max_len, 1, max_len, 1) 71 | pe += pe_1d.unsqueeze(1).unsqueeze(1).repeat(1, max_len, max_len, 1) 72 | 73 | # print (pe_1d.shape, pe_1d.unsqueeze(0).unsqueeze(0).repeat(max_len, max_len, 1, 1).shape, \ 74 | # pe_1d.unsqueeze(1).unsqueeze(0).repeat(max_len, 1, max_len, 1).shape, \ 75 | # pe_1d.unsqueeze(1).unsqueeze(1).repeat(1, max_len, max_len, 1).shape, pe.shape) 76 | 77 | #pe = pe.unsqueeze(0).transpose(0, 1) 78 | self.register_buffer('pe', pe) 79 | # pe [5, 5, 5, 64] 80 | 81 | def forward(self, x): 82 | # x [bsz, 5, 5, 5, 64] 83 | x = x + self.pe[:x.size(1), :x.size(2), :x.size(3), :] 84 | return self.dropout(x) 85 | 86 | 87 | class fold_encoder(nn.Module): 88 | def __init__(self, args): 89 | super(fold_encoder, self).__init__() 90 | 91 | # convolutional block 92 | self.rb_4 = nn.ModuleList([residual_block(inc=4, outc=4) for i in range(2)]) 93 | self.downsmp1 = nn.Conv3d(in_channels=4, out_channels=16, kernel_size=(3,3,3), stride = (2,2,2), padding=(1,1,1)) 94 | self.bn1 = nn.BatchNorm3d(16) 95 | self.avt1 = nn.ELU() 96 | 97 | self.rb_8 = nn.ModuleList([residual_block(inc=16, outc=16) for i in range(2)]) 98 | self.downsmp2 = nn.Conv3d(in_channels=16, out_channels=args.nhidden, kernel_size=(3,3,3), stride = (2,2,2), padding=(1,1,1)) 99 | self.bn2 = nn.BatchNorm3d(args.nhidden) 100 | self.avt2 = nn.ELU() 101 | 102 | self.rb_16 = nn.ModuleList([residual_block(inc=args.nhidden, outc=args.nhidden) for i in range(2)]) 103 | 104 | self.pos_encoder_3d = PositionalEncoding_3D(args.nhidden, args.dropout) 105 | self.transformer_encoder = transformer_encoder(args) 106 | #self.linear = nn.Linear(16, 64) 107 | 108 | # transformer block 109 | 110 | def forward(self, x):# x shpae: [batch_size, 20, 20, 20, 4] 111 | x = torch.transpose(x, 1, -1) 112 | out = x 113 | 114 | #print (out.shape) 115 | for i in range(2): 116 | out = self.rb_4[i](out) 117 | 118 | #print("before downsampling: ", out.shape) 119 | out = self.avt1(self.bn1(self.downsmp1(out))) 120 | 121 | #print ("after downsampling: ", out.shape) 122 | for i in range(2): 123 | out = self.rb_8[i](out) 124 | 125 | #print("before downsampling: ", out.shape) 126 | out = self.avt2(self.bn2(self.downsmp2(out))) 127 | #print ("after downsampling: ", out.shape) 128 | 129 | for i in range(2): 130 | out = self.rb_16[i](out) 131 | 132 | out = torch.transpose(out, -1, 1) 133 | #print ("before positional encoding: ", out.shape) # [bsz, 5, 5, 5, 64] 134 | 135 | out = self.pos_encoder_3d(out) 136 | 137 | out = torch.flatten(out, start_dim=1, end_dim=3) 138 | out = torch.transpose(out, 0, 1) 139 | out = self.transformer_encoder(out) 140 | #print ("fold encoder out shape:", out.shape) 141 | # The output shape should be [125, batch_size, 64] 142 | return out 143 | 144 | 145 | class fold_classification(nn.Module): 146 | def __init__(self, args, hs=432, share_hs=960, nfolds=1227): 147 | super(fold_classification, self).__init__() 148 | self.fold_encoder = fold_encoder(args) 149 | self.l1 = nn.Linear(args.nhidden*15, nfolds) 150 | self.flat = nn.Flatten() 151 | self.avgpool = nn.AvgPool1d(8) 152 | def forward(self, x): 153 | out = self.fold_encoder(x) # x [125, bsz, 64] 154 | out = out.transpose(0, 1).transpose(1,2) # out[bsz, 64, 125] 155 | print (out.shape) 156 | out = self.avgpool(out) 157 | print (out.shape) 158 | out = self.flat(out) 159 | print(out.shape) 160 | out = self.l1(out) 161 | 162 | return out 163 | 164 | 165 | class fold_dataset(Dataset): 166 | def __init__(self, path="../cath_data/domain_dict.pkl", mode='train'): 167 | self.mode = mode 168 | with open(path, "rb") as f: 169 | domain_dict = pickle.load(f) 170 | 171 | fold_index= np.loadtxt("../cath_data/fold_index.txt", dtype='str') 172 | self.name_list=[] 173 | self.label=[] 174 | for i in domain_dict: 175 | if domain_dict[i]['train']==True and mode=='train': 176 | self.name_list.append(i.replace('/','-')) 177 | self.label.append( fold_index.tolist().index(domain_dict[i]['fold']) ) 178 | 179 | elif domain_dict[i]['train']==False and mode=='test': 180 | self.name_list.append(i.replace('/','-')) 181 | self.label.append( fold_index.tolist().index(domain_dict[i]['fold']) ) 182 | 183 | def __len__(self): 184 | return len(self.name_list) 185 | 186 | def __getitem__(self, idx): 187 | x = np.load("../cath_data/fold_features/"+self.name_list[idx]+".npy") 188 | y = self.label[idx] 189 | return {'x':x, 'y': y} 190 | 191 | def data_split(): 192 | 193 | # split training and test data: 194 | with open("../cath_data/domain_dict.pkl", "rb") as f: 195 | domain_data = pickle.load(f) 196 | 197 | fold_dict={} 198 | for i in domain_data: 199 | if domain_data[i]['fold'] not in fold_dict: 200 | fold_dict[domain_data[i]['fold']] = [i] 201 | else: 202 | fold_dict[domain_data[i]['fold']].append(i) 203 | 204 | fold_train=[] 205 | fold_test = [] 206 | for i in fold_dict: 207 | ind_list=[] 208 | for j in fold_dict[i]: 209 | ind_list.append(j) 210 | ind_list=np.array(ind_list) 211 | np.random.shuffle(ind_list) 212 | for i in range(len(ind_list)): 213 | if i<0.9*float(len(ind_list)): 214 | domain_data[ind_list[i]]['train']=True 215 | fold_train.append(ind_list[i]) 216 | else: 217 | domain_data[ind_list[i]]['train']=False 218 | fold_test.append(ind_list[i]) 219 | 220 | np.savetxt("../cath_data/fold_train.txt",fold_train, fmt='%s') 221 | np.savetxt("../cath_data/fold_test.txt",fold_test, fmt='%s') 222 | with open("../cath_data/domain_dict.pkl", "wb") as f: 223 | pickle.dump(domain_data, f) 224 | 225 | def train_epoch(model, Adam_opt, dataLoader, e): 226 | 227 | #bsz=args.batch_size 228 | model.train() 229 | if torch. cuda. is_available() == True and 'K40' not in torch.cuda.get_device_name(0): 230 | device = 'cuda' 231 | else: 232 | device = 'cpu' 233 | 234 | 235 | model.to(device) 236 | model = model.double() 237 | 238 | for bt_ind, smp in enumerate(dataLoader): 239 | 240 | x = smp['x'].to(device).double() 241 | y = smp['y'].to(device) 242 | Adam_opt.zero_grad() 243 | preds = model(x) 244 | 245 | loss =nn.CrossEntropyLoss()(preds, y) 246 | loss.backward() 247 | Adam_opt.step() 248 | 249 | with open(args.model_save+".train.txt", "a") as f: 250 | f.write("epochs:%d batch %d/%d loss: %.4f\n" %(e, bt_ind, len(dataLoader), loss)) 251 | print ("epochs:%d batch %d/%d loss: %.4f" %(e, bt_ind, len(dataLoader), loss)) 252 | 253 | def eval(model, dataLoader, e, numofsamples): 254 | model.eval() 255 | 256 | if torch. cuda. is_available() == True and 'K40' not in torch.cuda.get_device_name(0): 257 | device = 'cuda' 258 | else: 259 | device = 'cpu' 260 | 261 | model.to(device) 262 | model = model.double() 263 | top1=0. 264 | top3=0. 265 | top5=0. 266 | top10=0. 267 | 268 | for bt_ind, smp in enumerate(dataLoader): 269 | 270 | x = smp['x'].to(device).double() 271 | y = smp['y'].to(device) 272 | 273 | preds = model(x) 274 | am = model_statistics.multi_class_accuracy(y, preds) 275 | 276 | top1+=am[0] 277 | top3+=am[1] 278 | top5+=am[2] 279 | top10+=am[3] 280 | 281 | print ("test batch %d/%d" %(bt_ind, len(dataLoader))) 282 | 283 | 284 | print ("overall performance: top1 %.3f\n top3 %.3f\n top5 %.3f\n top10 %3f\n" %(top1/numofsamples \ 285 | , top3/numofsamples, top5/numofsamples, top10/numofsamples)) 286 | 287 | 288 | 289 | with open(args.model_save+".eval.txt", "a") as f: 290 | f.write("epoch %d overall performance: top1 %.3f top3 %.3f top5 %.3f top10 %3f\n" %(e, top1/numofsamples \ 291 | , top3/numofsamples, top5/numofsamples, top10/numofsamples)) 292 | 293 | 294 | return top1/numofsamples 295 | 296 | if __name__ == "__main__": 297 | parser = argparse.ArgumentParser(description='Arguments for pretrain_seq.py') 298 | parser.add_argument('--nhidden', default=128, type=int) 299 | parser.add_argument('--dropout', default=0.1, type=float) 300 | parser.add_argument('--nfolds', default=1227, type=int) 301 | parser.add_argument('--batch_size', default=128, type=int) 302 | parser.add_argument('--epochs', default=100, type=int) 303 | parser.add_argument('--lr', default=1E-3, type=float) 304 | parser.add_argument('--fold_encoder_feedforward', default=512, type=int) 305 | parser.add_argument('--model_save', default="pretrained_models/foldmodel1", type=str) 306 | parser.add_argument('--pretrained_model', default=None, type=str) 307 | parser.add_argument('--wd', default=0., type=float) 308 | args = parser.parse_args() 309 | 310 | 311 | rb1 = fold_encoder(args) 312 | 313 | num_params=0 314 | for i in rb1.state_dict(): 315 | j=1 316 | for k in rb1.state_dict()[i].shape: 317 | j*=k 318 | num_params+=j 319 | 320 | print ("#num_params:", num_params) 321 | 322 | trainset = fold_dataset(mode='train') 323 | testset = fold_dataset(mode='test') 324 | trainset_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8) 325 | testset_loader = DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=8) 326 | 327 | model = fold_classification(args, nfolds=args.nfolds) 328 | Adam_opt = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay = args.wd) 329 | 330 | best_acc=0. 331 | for e in range(1, args.epochs+1): 332 | train_epoch(model, Adam_opt, trainset_loader, e) 333 | acc = eval(model, testset_loader, e, len(testset)) 334 | 335 | 336 | if acc > best_acc: 337 | best_acc=acc 338 | torch.save({ 339 | 'epoch': e, 340 | 'model_state_dict': model.state_dict(), 341 | 'optimizer_state_dict': Adam_opt.state_dict(), 342 | 'args': args 343 | }, args.model_save) 344 | 345 | -------------------------------------------------------------------------------- /src/generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn import TransformerEncoder, TransformerEncoderLayer 5 | import argparse 6 | import numpy as np 7 | import math 8 | import os 9 | import pickle 10 | import numpy as np 11 | import random 12 | from Utils import amino_acid 13 | from Utils import model_statistics 14 | import seq_decoder 15 | import fold_encoder 16 | class ProteinSeqTransformer(nn.Module): 17 | 18 | def __init__(self, args): 19 | # nhidden: the size of hidden state 20 | 21 | super(ProteinSeqTransformer, self).__init__() 22 | 23 | self.model_type = 'ProteinSeqTransformer' 24 | 25 | encoder_layers = TransformerEncoderLayer(args.nhidden, args.nhead, args.seq_encoder_feedforward, args.dropout) 26 | self.transformer_encoder = TransformerEncoder(encoder_layers, args.nlayers) 27 | 28 | 29 | def forward(self, src, padding_masking=None): 30 | 31 | 32 | output = self.transformer_encoder(src, src_key_padding_mask = padding_masking) 33 | 34 | return output 35 | 36 | class cosine_similarity(nn.Module): 37 | def __init__(self, args): 38 | super (cosine_similarity, self).__init__() 39 | self.row_wise_avgpool = nn.AvgPool1d(kernel_size = 3, stride=1) 40 | 41 | def forward(self, x, y): 42 | # x shape [len_x, bsz, hidden] seq 43 | # y shape [len_y, bsz, hidden] fold 44 | x = x.transpose(0,1) 45 | y = y.transpose(0,1) 46 | #print (x,y) 47 | cos_mat = torch.softmax(torch.bmm(y, x.transpose(1,2)), dim=2) # cos_mat shape [bsz, len_y, len_x] 48 | #print (x.shape, y.shape, cos_mat.shape, cos_mat) 49 | cos_mat = self.row_wise_avgpool(cos_mat) 50 | #print (cos_mat.shape) 51 | cos_vec, t = torch.max(cos_mat, dim=2) 52 | cos_vec = torch.mean(cos_vec, dim=1) 53 | print ("cos_vec", cos_vec.shape) 54 | return cos_vec 55 | class PositionalEncoding(nn.Module): 56 | 57 | def __init__(self, d_model, dropout=0.1, max_len=502): 58 | super(PositionalEncoding, self).__init__() 59 | self.dropout = nn.Dropout(p=dropout) 60 | 61 | pe = torch.zeros(max_len, d_model) 62 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 63 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 64 | pe[:, 0::2] = torch.sin(position * div_term) 65 | pe[:, 1::2] = torch.cos(position * div_term) 66 | pe = pe.unsqueeze(0).transpose(0, 1) 67 | self.register_buffer('pe', pe) 68 | 69 | def forward(self, x): 70 | x = x + self.pe[:x.size(0), :] 71 | return self.dropout(x) 72 | 73 | 74 | def zero_padding(hidden, padding_masking): 75 | c = 1-padding_masking.int() 76 | #print (c[0], padding_masking[0]) 77 | hidden*=c.transpose(0,1).unsqueeze(-1) 78 | 79 | 80 | 81 | class fold_classification_generator(nn.Module): 82 | def __init__(self, args): 83 | super(fold_classification_generator, self).__init__() 84 | 85 | self.args = args 86 | 87 | #self.lineartape_to_hidden = nn.Linear(768, args.nhidden) 88 | 89 | #self.linear1 = nn.Linear(args.nhidden, args.nfolds) 90 | #-------------fold encoder------------------------------------------ 91 | #################################################################### 92 | if args.lba0 !=0 or args.lba2 !=0 or args.lba4!=0: 93 | self.fold_encoder = fold_encoder.fold_encoder(args) 94 | 95 | #-------------seq encoder------------------------------------------ 96 | #################################################################### 97 | if args.lba1 !=0 or args.lba3 !=0 or args.lba4!=0: 98 | self.seq_encoder = ProteinSeqTransformer(args) 99 | self.seq_embedding = nn.Embedding(args.ntokens, args.nhidden) 100 | self.positional_embedding = PositionalEncoding(args.nhidden,args.dropout) 101 | 102 | #-------------seq decoder------------------------------------------ 103 | #################################################################### 104 | if args.lba0 !=0 or args.lba1 !=0: 105 | self.seq_decoder = seq_decoder.transformer_decoder(args) 106 | self.seq_embedding = nn.Embedding(args.ntokens, args.nhidden) 107 | self.positional_embedding = PositionalEncoding(args.nhidden, args.dropout) 108 | self.decoder_out_linear = nn.Linear(args.nhidden, args.ntokens-2) 109 | #-------------similar measure------------------------------------------ 110 | #################################################################### 111 | if args.lba4!=0: 112 | self.cosine_similarity = cosine_similarity(args) 113 | 114 | #------------- fold/seq classification------------------------------------------ 115 | #################################################################### 116 | if args.lba2!=0 or args.lba3!=0: 117 | self.fold_classification_linear = nn.Linear(args.nhidden, args.nfolds) 118 | 119 | 120 | self.dropout = nn.Dropout(args.dropout) 121 | 122 | 123 | def forward(self, seq, fold, padding_masking=None, mode='train'): 124 | # seq: shape: [batch_size, seq_len] 125 | # fold shape [bsz, 20, 20, 20, 4] 126 | token_preds_from_fold=None 127 | token_preds_from_seq=None 128 | foldclass_preds=None 129 | seqclass_preds=None 130 | sim_score0=None 131 | sim_score1=None 132 | seqclass_preds1=None 133 | 134 | 135 | seq=seq.transpose(0,1) 136 | 137 | seq = self.seq_embedding(seq) 138 | seq = self.positional_embedding(seq) 139 | 140 | #-------------fold encoder------------------------------------------ 141 | #################################################################### 142 | if self.args.lba0+self.args.lba2+self.args.lba4!=0: 143 | hidden_state_fold = self.fold_encoder(fold) # hidden: [seq_len, batch_size, hidden states] 144 | mean_hidden_state_fold = torch.mean(hidden_state_fold, dim=0) 145 | 146 | #-------------seq encoder------------------------------------------ 147 | #################################################################### 148 | if self.args.lba1+self.args.lba3+self.args.lba4!=0: 149 | hidden_state_seq = self.seq_encoder(seq, padding_masking=padding_masking) 150 | #print (hidden_state_seq, 'sdd') 151 | zero_padding(hidden_state_seq, padding_masking) 152 | #print (hidden_state_seq) 153 | mean_hidden_state_seq = torch.mean(hidden_state_seq, dim=0) 154 | 155 | #-------------fold2seq ------------------------------------------ 156 | #################################################################### 157 | if self.args.lba0!=0: 158 | decoder_out_from_fold = self.seq_decoder(seq, hidden_state_fold, padding_masking, mode) 159 | token_preds_from_fold = self.decoder_out_linear(decoder_out_from_fold).transpose(0,1) 160 | 161 | #-------------seq2seq decoder------------------------------------------ 162 | #################################################################### 163 | if self.args.lba1!=0: 164 | decoder_out_from_seq = self.seq_decoder(seq, hidden_state_seq, padding_masking, mode) 165 | token_preds_from_seq = self.decoder_out_linear(decoder_out_from_seq).transpose(0,1) 166 | 167 | #-------------fold2 fold classification------------------------------------------ 168 | #################################################################### 169 | if self.args.lba2!=0: 170 | foldclass_preds = self.fold_classification_linear(mean_hidden_state_fold) 171 | 172 | #-------------seq2 fold classification------------------------------------------ 173 | #################################################################### 174 | if self.args.lba3!=0: 175 | seqclass_preds = self.fold_classification_linear(mean_hidden_state_seq) 176 | 177 | #-------------similarity measure------------------------------------------ 178 | #################################################################### 179 | if self.args.lba4!=0: 180 | #sim_score = torch.sum((mean_hidden_state_seq - mean_hidden_state_fold)**2, dim=1)/(self.args.nhidden**2) 181 | sim_score0 = -self.cosine_similarity(hidden_state_seq, hidden_state_fold) 182 | pred_seq = torch.max(token_preds_from_fold.transpose(0,1) , dim=2).indices # shape [ seq_len, bsz] 183 | start = torch.ones((1, pred_seq.size(1)) , dtype=torch.long, device=self.args.device)+20 184 | pred_seq = torch.cat( (start, pred_seq[:-1]), dim=0 ) 185 | pred_seq = self.seq_embedding(pred_seq.long()) 186 | pred_seq = self.positional_embedding(pred_seq) 187 | hidden_state_seq1 = self.seq_encoder(pred_seq, padding_masking=padding_masking) 188 | #print (hidden_state_seq, 'sdd') 189 | zero_padding(hidden_state_seq1, padding_masking) 190 | #print (hidden_state_seq) 191 | mean_hidden_state_seq1 = torch.mean(hidden_state_seq1, dim=0) 192 | seqclass_preds1 = self.fold_classification_linear(mean_hidden_state_seq1) 193 | sim_score1 = torch.mean((hidden_state_seq1 - hidden_state_seq)**2, dim=[0,2]) 194 | 195 | #print("simscore shape", sim_score0.shape, sim_score1.shape, seqclass_preds1.shape) 196 | 197 | return token_preds_from_fold, token_preds_from_seq, foldclass_preds,seqclass_preds, sim_score0,sim_score1, seqclass_preds1 198 | 199 | 200 | 201 | -------------------------------------------------------------------------------- /src/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.utils.data import Dataset, DataLoader 5 | from torch.nn import TransformerEncoder, TransformerEncoderLayer 6 | from collections import OrderedDict 7 | import argparse 8 | import numpy as np 9 | import math 10 | import os 11 | import pickle 12 | import numpy as np 13 | import random 14 | from Utils import amino_acid 15 | from Utils import model_statistics 16 | import train 17 | import time 18 | 19 | def top_k_sampling(token_preds, k=5): 20 | token_preds = torch.topk(token_preds, k=k, dim=-1) 21 | tokens_dis = torch.distributions.Categorical(token_preds.values) 22 | smp = tokens_dis.sample() 23 | token_smp = token_preds.indices[torch.arange(0, smp.size(0)) , smp] 24 | 25 | return token_smp 26 | 27 | def greedy_sampling(token_preds): 28 | return torch.max(token_preds, 1).indices 29 | 30 | class fold_dataset(Dataset): 31 | def __init__(self, args): 32 | 33 | 34 | self.name_list=[] 35 | self.args=args 36 | 37 | with open(args.data_path, "rb") as f: 38 | domain = pickle.load(f) 39 | 40 | 41 | for i in domain: 42 | for j in range(args.n): 43 | self.name_list.append(i.replace('/','-')) 44 | print ("mode=", args.mode, " ", len(self.name_list)) 45 | 46 | def __len__(self): 47 | return len(self.name_list) 48 | 49 | def __getitem__(self, idx): 50 | x = np.load(self.args.fold_path+self.name_list[idx]+".npy") 51 | return [torch.tensor(x).float(), self.name_list[idx]] 52 | 53 | def inference(model, args, fold): 54 | # fold should be [bsz, 20, 20, 20, 4] 55 | model.eval() 56 | 57 | if torch. cuda. is_available() == True and 'K40' not in torch.cuda.get_device_name(0): 58 | device = 'cuda' 59 | else: 60 | device = 'cpu' 61 | #device='cpu' 62 | model.to(device) 63 | fold=fold.to(device) 64 | #tape_token_encode=[2] # 2 is the starting encode in tape 65 | 66 | seq_embed=torch.tensor([[21] for i in range(fold.size(0))]).to(device) 67 | start_time = time.time() 68 | 69 | t1=time.time() 70 | with torch.no_grad(): 71 | for i in range(args.maxlen): 72 | #tape_out = tape_model(torch.tensor([tape_token_encode]).to(device)) 73 | print (seq_embed.shape, fold.shape) 74 | c1, c2, c3, c4, c5,c6,c7 = model(seq_embed, fold, mode='inference') 75 | 76 | c1=c1.transpose(0,1) 77 | print ("c1 shape", c1.shape) 78 | # c1 shape : [seq_len, bsz, 21] 79 | c1 = torch.softmax(c1, dim=-1) 80 | if len(args.decodetype)<=2: 81 | tokens = top_k_sampling(c1[-1], int(args.decodetype)) # tokens should have shape [bsz, 1] 82 | elif args.decodetype=='greedy': 83 | tokens = greedy_sampling(c1[-1]) 84 | seq_embed = torch.cat((seq_embed, tokens.unsqueeze(1)), 1) 85 | print ( seq_embed.shape) 86 | t3=time.time() 87 | with open("runtime_fold2seq.txt_cpu", "a") as fout: 88 | fout.write ("%d %.4f\n" %(i+1, (t3-t1)/args.batch_size)) 89 | end_time = time.time() 90 | print ("batch time", end_time-start_time) 91 | return seq_embed 92 | 93 | 94 | def main(): 95 | parser = argparse.ArgumentParser(description='Arguments for inference.py') 96 | parser.add_argument('--data_path', default="./domain_dict.pkl", type=str) 97 | parser.add_argument('--fold_path', default="../data/fold_features/", type=str) 98 | parser.add_argument('--trained_model', default=None, type=str) 99 | parser.add_argument('--batch_size', default=128, type=int) 100 | parser.add_argument('--n', default=100, type=int) 101 | parser.add_argument('--output', default='./gen_seq.txt', type=str) 102 | parser.add_argument('--mode', default='test2', type=str) 103 | parser.add_argument('--maxlen', default=200, type=int) 104 | parser.add_argument('--decodetype', default="5", type=str) 105 | parser.add_argument('--lba0', default=1, type=float) # coefficient before fold2seq_loss 106 | parser.add_argument('--lba1', default=0, type=float) # coefficient before seq2seq_loss 107 | parser.add_argument('--lba2', default=0, type=float) # coefficient before foldclass_loss 108 | parser.add_argument('--lba3', default=0, type=float) # coefficient before seqclass_loss 109 | parser.add_argument('--lba4', default=0, type=float) # coefficient before sim_loss 110 | parser.add_argument('--lba5', default=0, type=float) 111 | parser.add_argument('--lba6', default=0, type=float) 112 | 113 | args = parser.parse_args() 114 | 115 | if args.trained_model == None: 116 | raise ValueError("Must specify a trained model to be inferenced") 117 | 118 | if torch. cuda. is_available() == True and 'K40' not in torch.cuda.get_device_name(0): 119 | device = 'cuda' 120 | else: 121 | device = 'cpu' 122 | 123 | device='cpu' 124 | trained_dict = torch.load(args.trained_model, map_location = 'cpu') 125 | trained_dict['args'].device = device 126 | trained_dict['args'].lba0=1 127 | trained_dict['args'].lba1=0 128 | trained_dict['args'].lba2=0 129 | trained_dict['args'].lba3=0 130 | trained_dict['args'].lba4=0 131 | trained_dict['args'].lba5=0 132 | trained_dict['args'].lba6=0 133 | print (trained_dict['args'], trained_dict['epoch'], trained_dict['metric']) 134 | 135 | model_name = args.trained_model.split('/')[1] 136 | print ("output path: ", args.output) 137 | 138 | model = train.fold_classification_generator(trained_dict['args']) 139 | 140 | new_state_dict = OrderedDict() 141 | for k,v in trained_dict['model_state_dict'].items(): 142 | if 'module' ==k[:6]: # that means the pretrained model having dataparllel 143 | new_state_dict[k[7:]]=v 144 | else: 145 | new_state_dict[k]=v 146 | 147 | model.load_state_dict(new_state_dict, strict=False) 148 | 149 | if torch.cuda.device_count() > 1: 150 | print("Let's use ",torch.cuda.device_count()," GPUs!") 151 | model = nn.DataParallel(model) 152 | args.batch_size*=torch.cuda.device_count() 153 | 154 | testset = fold_dataset(args) 155 | testset_loader = DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=8) 156 | 157 | for bt_ind, fold in enumerate(testset_loader): 158 | gen_seq_tokens = inference(model, args, fold[0]) 159 | t = gen_seq_tokens.detach().cpu().numpy() 160 | 161 | 162 | with open(args.output, "a") as f: 163 | for i in range(len(fold[1])): 164 | s='' 165 | for j in range(args.maxlen): 166 | if t[i][j]==0: 167 | break 168 | s+=amino_acid.my_seqlabel[t[i][j]] 169 | print (s) 170 | f.write(fold[1][i]+" "+s+"\n") 171 | 172 | 173 | if __name__=='__main__': 174 | main() 175 | -------------------------------------------------------------------------------- /src/seq_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.utils.data import Dataset, DataLoader 5 | import argparse 6 | import numpy as np 7 | import math 8 | import os 9 | import pickle 10 | import numpy as np 11 | from Utils import model_statistics 12 | 13 | # autoregressive sequence decoder 14 | # current two versions: 15 | # 1. transformer 16 | # 2. LSTM 17 | class transformer_decoder(nn.Module): 18 | def __init__(self, args): 19 | super(transformer_decoder, self).__init__() 20 | 21 | decoder_layer = nn.TransformerDecoderLayer(d_model=args.nhidden, nhead=8, dim_feedforward=args.decoder_feedforward) 22 | self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=4) 23 | self.args = args 24 | self.nhidden = args.nhidden 25 | #self.pos_encoder = PositionalEncoding(args.nhidden, args.dropout) 26 | #self.encoder = nn.Embedding(args.ntokens, args.nhidden) 27 | #self.init_weights() 28 | 29 | def _generate_square_subsequent_mask(self, sz): 30 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 31 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 32 | return mask 33 | 34 | # def init_weights(self): 35 | # initrange = 0.1 36 | # self.encoder.weight.data.uniform_(-initrange, initrange) 37 | 38 | 39 | def forward(self, seq, fold_embed, tgt_padding_masking = None, mode='train'): 40 | # seq shape: [seq_length, batch_size] 41 | if mode=='train': 42 | mask = self._generate_square_subsequent_mask(len(seq)).to(self.args.device) 43 | else: 44 | mask = None 45 | #print (mask) 46 | #print (tgt_padding_masking) 47 | # seq = self.encoder(seq) * math.sqrt(self.nhidden) 48 | # seq = self.pos_encoder(seq) 49 | #print ("before decoder. seq:", seq.shape, "fold embed shape:", fold_embed.shape) 50 | 51 | # ad hoc way to do mem key padding mask!!!!!!!!!!! 52 | if fold_embed.size(0)==125: 53 | memory_key_padding_mask=None 54 | else: 55 | memory_key_padding_mask=tgt_padding_masking 56 | output = self.transformer_decoder(seq, fold_embed, tgt_mask = mask, tgt_key_padding_mask = tgt_padding_masking, \ 57 | memory_key_padding_mask=memory_key_padding_mask) 58 | return output 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.utils.data import Dataset, DataLoader 5 | from torch.nn import TransformerEncoder, TransformerEncoderLayer 6 | import argparse 7 | from collections import OrderedDict 8 | import numpy as np 9 | import math 10 | import os 11 | import pickle 12 | import numpy as np 13 | import random 14 | from Utils import amino_acid 15 | from Utils import model_statistics 16 | from generator import fold_classification_generator 17 | 18 | class generator_dataset(Dataset): 19 | def __init__(self, args, domain_data, mode): 20 | 21 | self.args=args 22 | self.domain_data = {} 23 | self.name_list=[] 24 | self.permute=[] 25 | for i in domain_data: 26 | self.domain_data[i]=domain_data[i] 27 | self.name_list.append(i) 28 | self.permute.append((0,1,2,3)) 29 | 30 | if args.augmentation==1: 31 | self.permute=[(0,1,2,3) for i in range(len(self.name_list))] + [(0,2,1,3) for i in range(len(self.name_list))] + [(1,0,2,3) for i in range(len(self.name_list))] \ 32 | + [(1,2,0,3) for i in range(len(self.name_list))] + [(2,0,1,3) for i in range(len(self.name_list))] + [(2,1,0,3) for i in range(len(self.name_list))] 33 | self.name_list = self.name_list+self.name_list+self.name_list+self.name_list+self.name_list+self.name_list 34 | print (mode+" dataset: "+str(len(self.name_list))) 35 | 36 | 37 | def __len__(self): 38 | return len(self.name_list) 39 | 40 | def __getitem__(self, idx): 41 | fold_feat = np.load("../data/fold_features/"+self.name_list[idx].replace('/','-')+".npy") 42 | seq_feat = self.domain_data[self.name_list[idx]]['embed'][:self.args.maxlen+2] 43 | classlabel = self.domain_data[self.name_list[idx]]['fold_index'] 44 | seq_padding =self.domain_data[self.name_list[idx]]['padding'][:self.args.maxlen+2] 45 | return {'fold_feat':torch.tensor(fold_feat).float().permute(self.permute[idx]), 'seq_feat': torch.tensor(seq_feat).long(), "classlabel": torch.tensor(classlabel), 'pad':torch.tensor(seq_padding)} 46 | 47 | def var_len_data_pre(domain_data, batch_size, mode='train'): 48 | # Batch those sequences that have simlar length. We pad sequences to the maximal length in every 512 sequences 49 | # ordered from short to long. 50 | 51 | 52 | 53 | fold_index = np.loadtxt("../data/fold_index.txt", dtype=str) 54 | 55 | keys=[] 56 | for i in domain_data: 57 | if domain_data[i]['mode'] == mode: 58 | keys.append(i) 59 | 60 | domain_data[i]['fold_index'] = fold_index.tolist().index(domain_data[i]['fold']) 61 | domain_data[i]['decoderlabel'] = amino_acid.seqlabel(domain_data[i]['seq']) 62 | 63 | sorted_keys = sorted(keys, key=lambda x: len(domain_data[x]['seq'])) 64 | 65 | batch_ind_all = [] 66 | bz=0 67 | clen = len(domain_data[sorted_keys[0]]['seq']) 68 | bsz_list=[] 69 | for i in range(len(sorted_keys)): 70 | key = sorted_keys[i] 71 | if len(domain_data[key]['seq']) == clen and bz<=batch_size: 72 | bsz_list.append(i) 73 | bz+=1 74 | else: 75 | batch_ind_all.append(bsz_list) 76 | bsz_list=[i] 77 | bz=1 78 | clen = len(domain_data[key]['seq']) 79 | 80 | batch_ind_all.append(bsz_list) 81 | 82 | print (mode, "#proteins:", len(sorted_keys), "#batches:", len(batch_ind_all)) 83 | 84 | return sorted_keys, batch_ind_all 85 | 86 | # -----sorted_keys: The sorted list of keys 87 | # -----batch_ind: The index of batch 88 | # -----domain_data: The domain data 89 | 90 | def dataloader(args, domain_data, sorted_keys, bt, device): 91 | 92 | seq = [] 93 | fold = [] 94 | foldlabel=[] 95 | decoderlabel=[] 96 | for i in bt: 97 | keys = sorted_keys[i] 98 | #print (len(domain_data[keys]['seq'])) 99 | #seq.append(torch.load("../data/seq_features_nopads/"+keys.replace('/','-')+".pt", map_location=device)) 100 | #if args.encoder == 'fold' or args.encoder == 'both': 101 | seq_tokens = amino_acid.transformer_integer(domain_data[keys]['seq']) 102 | seq.append(seq_tokens) 103 | fold.append(np.load("../data/fold_features/"+keys.replace('/','-')+".npy")) 104 | foldlabel.append(domain_data[keys]['fold_index']) 105 | decoderlabel.append(seq_tokens[1:]) 106 | 107 | return torch.tensor(seq).long(), torch.tensor(fold).float(), \ 108 | torch.tensor(foldlabel), torch.tensor(decoderlabel) 109 | 110 | def loss(args, preds, label, added_loss=[0,0,0,0,0,0], bsz=0): 111 | # L = lba0*fold2seq_loss + lba1*seq2seq_loss + lba2*foldclass_loss + lba3*seqclass_loss 112 | # + lba4*sim_score 113 | L=torch.tensor(0.).to(args.device) 114 | #label['seqlabel'] = label['seqlabel'].transpose(0,1).flatten() # labels shape [(seqs+1)*bsz] 115 | loss_list=[None, None, None, None, None, None] 116 | 117 | if args.lba0!=0: 118 | preds['fold2seq_preds'] = (preds['fold2seq_preds'].transpose(0,1).contiguous())[:-1].view(-1, args.ntokens-2) # preds shape: [(seqs+1)*bsz, 21] 119 | fold2seq_loss =nn.CrossEntropyLoss(ignore_index=22)(preds['fold2seq_preds'], label['seqlabel'].long()) 120 | L+=args.lba0*fold2seq_loss 121 | loss_list[0]=fold2seq_loss.detach().cpu().numpy() 122 | added_loss[0]+=loss_list[0]*bsz 123 | #print (loss_list, loss_list[0]) 124 | 125 | if args.lba1!=0: 126 | preds['seq2seq_preds'] = (preds['seq2seq_preds'].transpose(0,1).contiguous())[:-1].view(-1, args.ntokens-2) # preds shape: [(seqs+1)*bsz, ntokens] 127 | seq2seq_loss =nn.CrossEntropyLoss(ignore_index=22)(preds['seq2seq_preds'], label['seqlabel'].long()) 128 | L+=args.lba1*seq2seq_loss 129 | loss_list[1]=seq2seq_loss.detach().cpu().numpy() 130 | added_loss[1]+=loss_list[1]*bsz 131 | 132 | if args.lba2!=0: 133 | foldclass_loss = nn.CrossEntropyLoss()(preds['foldclass_preds'], label['classlabel'].long() ) 134 | L+=args.lba2*foldclass_loss 135 | loss_list[2]=foldclass_loss.detach().cpu().numpy() 136 | added_loss[2]+=loss_list[2]*bsz 137 | 138 | if args.lba3!=0: 139 | seqclass_loss = nn.CrossEntropyLoss()(preds['seqclass_preds'], label['classlabel'].long() ) 140 | L+=args.lba3*seqclass_loss 141 | loss_list[3]=seqclass_loss.detach().cpu().numpy() 142 | added_loss[3]+=loss_list[3]*bsz 143 | 144 | if args.lba4!=0: 145 | l40 = torch.mean(preds['sim_score'][0]) 146 | L+=args.lba4*(l40) 147 | loss_list[4]=l40.detach().cpu().numpy() 148 | added_loss[4]+=loss_list[4]*bsz 149 | l41 = torch.mean(preds['sim_score'][1]) 150 | L+=args.lba4*(l41) 151 | loss_list[4]=(l41).detach().cpu().numpy() 152 | added_loss[4]+=loss_list[4]*bsz 153 | 154 | loss_list[5] = L.detach().cpu().numpy() 155 | added_loss[5]+=loss_list[5]*bsz 156 | return loss_list, L 157 | 158 | 159 | def result_print(args, epoch, loss_list, batch=None, am=None, mode='train'): 160 | with open(args.model_save+"."+mode+".txt", "a") as f: 161 | f.write("epochs:%d " %(epoch)) 162 | print("epochs:%d " %(epoch), end='') 163 | if batch!=None: 164 | f.write("batch %d/%d: " %(batch[0], batch[1])) 165 | print("batch %d/%d: " %(batch[0], batch[1]), end='') 166 | else: 167 | f.write("overall: ") 168 | print("overall: ", end='') 169 | for i in loss_list: 170 | if i==None: 171 | f.write("None ") 172 | print("None ", end='') 173 | else: 174 | f.write("%.4f " %(i)) 175 | print("%.4f " %(i), end='') 176 | print('\n', end='') 177 | f.write('\n') 178 | 179 | if am!=None: 180 | f.write("epoch %d fold2class performance: top1 %.3f top3 %.3f top5 %.3f top10 %3f\n" %(epoch, \ 181 | am[0][0], am[0][1], am[0][2], am[0][3])) 182 | f.write("epoch %d seqclass performance: top1 %.3f top3 %.3f top5 %.3f top10 %3f\n" %(epoch, \ 183 | am[1][0], am[1][1], am[1][2], am[1][3])) 184 | 185 | 186 | 187 | def eval(model,args, dataloader, e): 188 | 189 | model.eval() 190 | model.to(args.device) 191 | seqclass_acc=np.zeros(4, dtype=float) 192 | foldclass_acc=np.zeros(4, dtype=float) 193 | 194 | add_loss=np.zeros(6, dtype=float) 195 | 196 | n=0. 197 | for bt_ind, smp in enumerate(dataloader): 198 | #seqs, fold, classlabel, seqlabel = dataloader(args, domain_data, sorted_keys, batch_ind[idx1], args.device) 199 | seqs = smp['seq_feat'].to(args.device) 200 | fold = smp['fold_feat'].to(args.device) 201 | classlabel = smp['classlabel'].to(args.device) 202 | seqpads = smp['pad'].to(args.device) 203 | seqlabel = seqs.transpose(0,1)[1:].flatten() 204 | #seqlabel = seqlabel.to(args.device) 205 | #fold = fold.to(args.dev 206 | print (seqs.shape, fold.shape, classlabel.shape, seqlabel.shape, seqpads.shape) 207 | c0,c1,c2,c3,c4,c5,c6 = model(seqs, fold, padding_masking=seqpads) # c0 shape: [bsz, maxlen+1, ntokens-2] 208 | preds = {'fold2seq_preds': c0, 'seq2seq_preds':c1, 209 | 'foldclass_preds':c2, 'seqclass_preds':c3,'sim_score': [c4,c5,c6]} 210 | 211 | label={'classlabel':classlabel, 'seqlabel':seqlabel} 212 | 213 | loss_list,L = loss(args, preds, label, add_loss, fold.size(0)) 214 | 215 | if args.lba2!=0: 216 | am = model_statistics.multi_class_accuracy(classlabel, preds['foldclass_preds']) 217 | foldclass_acc += am 218 | if args.lba3!=0: 219 | am = model_statistics.multi_class_accuracy(classlabel, preds['seqclass_preds']) 220 | seqclass_acc += am 221 | 222 | n+=fold.size(0) 223 | 224 | print ("test batch %d/%d " %(bt_ind, len(dataloader))) 225 | print ("batch_loss:", loss_list) 226 | add_loss=(add_loss/n).tolist() 227 | seqclass_acc/=n 228 | foldclass_acc/=n 229 | if args.lba0==0: 230 | add_loss[0]=None 231 | if args.lba1==0: 232 | add_loss[1]=None 233 | if args.lba2==0: 234 | add_loss[2]=None 235 | if args.lba3==0: 236 | add_loss[3]=None 237 | if args.lba4==0: 238 | add_loss[4]=None 239 | result_print(args, e, add_loss, batch=None, am=[foldclass_acc, seqclass_acc], mode='eval') 240 | 241 | if args.lba0!=0: 242 | return add_loss[0] 243 | if args.lba1!=0: 244 | return add_loss[1] 245 | if args.lba2!=0: 246 | return -foldclass_acc[0] 247 | if args.lba3!=0: 248 | return -seqclass_acc[0] 249 | 250 | 251 | def train_epoch(model, args,Adam_opt, scheduler, dataloader, e): 252 | # L = lba0*fold2seq_loss + lba1*seq2seq_loss + lba2*foldclass_loss + lba3*seqclass_loss 253 | # + lba4*sim_score 254 | 255 | model.train() 256 | model.to(args.device) 257 | 258 | #bt_ind=1 259 | #for idx1 in idx: 260 | for bt_ind, smp in enumerate(dataloader): 261 | #seqs, fold, classlabel, seqlabel = dataloader(args, domain_data, sorted_keys, batch_ind[idx1], args.device) 262 | seqs = smp['seq_feat'].to(args.device) 263 | fold = smp['fold_feat'].to(args.device) 264 | classlabel = smp['classlabel'].to(args.device) 265 | seqlabel = seqs.transpose(0,1)[1:].flatten() 266 | seqpads = smp['pad'].to(args.device) 267 | #seqlabel = seqlabel.to(args.device) 268 | #fold = fold.to(args.device) 269 | print (seqs.shape, fold.shape, classlabel.shape, seqlabel.shape, seqpads.shape) 270 | 271 | label={'classlabel':classlabel, 'seqlabel':seqlabel} 272 | 273 | c0,c1,c2,c3,c4,c5,c6 = model(seqs, fold, padding_masking=seqpads) # c0 shape: [bsz, maxlen+1, ntokens-2] 274 | preds = {'fold2seq_preds': c0, 'seq2seq_preds':c1,'foldclass_preds':c2, 'seqclass_preds':c3,'sim_score': [c4,c5,c6]} 275 | 276 | 277 | print ('seq input shape:', seqs.shape,'fold input shape', fold.shape,'decoder label shape', seqlabel.shape) 278 | 279 | loss_list,L = loss(args, preds, label) 280 | 281 | Adam_opt.zero_grad() 282 | L.backward() 283 | #torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) 284 | Adam_opt.step() 285 | if args.lr==-1: 286 | scheduler.step() 287 | 288 | result_print(args, e, loss_list, batch=[bt_ind, len(dataloader)], am=None, mode='train') 289 | #bt_ind+=1 290 | 291 | def var_len_shuffle(domain_data, sorted_keys, idx): 292 | 293 | cl = len(domain_data[sorted_keys[0]]['seq']) 294 | st=0 295 | for i in range(len(sorted_keys)): 296 | key = sorted_keys[i] 297 | if len(domain_data[key]['seq']) != cl: 298 | cl = len(domain_data[key]['seq']) 299 | c = sorted_keys[st:i] 300 | random.shuffle(c) 301 | sorted_keys[st:i] = c 302 | st = i 303 | 304 | np.random.shuffle(idx) 305 | 306 | def load_pretrained_model(model, args): 307 | if args.pretrained_model !=None: 308 | model_pre = torch.load(args.pretrained_model, map_location='cpu') 309 | 310 | print ("loading model:", model_pre['args'], '\nepoch:', model_pre['epoch'], 'loss:', model_pre['metric']) 311 | new_state_dict = OrderedDict() 312 | for k,v in model_pre['model_state_dict'].items(): 313 | if 'module' ==k[:6]: # that means the pretrained model having dataparllel 314 | new_state_dict[k[7:]]=v 315 | else: 316 | new_state_dict[k]=v 317 | 318 | 319 | print (model.decoder_out_linear.bias) 320 | model.load_state_dict(new_state_dict, strict=False) 321 | print (model.decoder_out_linear.bias) 322 | 323 | 324 | if args.freeze_seq_encoder == 1: # freeze the sequence encoder 325 | for param in model.seq_encoder.parameters(): 326 | param.requires_grad = False 327 | for param in model.seq_embedding.parameters(): 328 | param.requires_grad = False 329 | for param in model.positional_embedding.parameters(): 330 | param.requires_grad = False 331 | for param in model.fold_classification_linear.parameters(): 332 | param.requires_grad = False 333 | if args.freeze_seq_decoder == 1: # freeze the sequence decoder 334 | for param in model.seq_decoder.parameters(): 335 | param.requires_grad = False 336 | for param in model.seq_embedding.parameters(): 337 | param.requires_grad = False 338 | for param in model.positional_embedding.parameters(): 339 | param.requires_grad = False 340 | for param in model.decoder_out_linear.parameters(): 341 | param.requires_grad = False 342 | 343 | 344 | def main(): 345 | 346 | 347 | parser = argparse.ArgumentParser(description='Arguments for pretrain_seq.py') 348 | parser.add_argument('--ntokens', default=23, type=int) 349 | parser.add_argument('--nhidden', default=128, type=int) 350 | parser.add_argument('--nhead', default=4, type=int) 351 | parser.add_argument('--decoder_feedforward', default=512, type=int) 352 | parser.add_argument('--seq_encoder_feedforward', default=512, type=int) 353 | parser.add_argument('--fold_encoder_feedforward', default=512, type=int) 354 | parser.add_argument('--nlayers', default=6, type=int) 355 | parser.add_argument('--dropout', default=0.1, type=float) 356 | #parser.add_argument('--seq_embedding', default='ProteinSeqTransformer', type=str) 357 | parser.add_argument('--nfolds', default=1227, type=int) 358 | parser.add_argument('--data_path', default="../data/domain_dict_full.pkl", type=str) 359 | parser.add_argument('--maxlen', default=200, type=int) 360 | parser.add_argument('--batch_size', default=64, type=int) 361 | parser.add_argument('--epochs', default=200, type=int) 362 | parser.add_argument('--encoder', default='both', type=str) 363 | parser.add_argument('--lr', default=1E-3, type=float) 364 | parser.add_argument('--model_save', default="trained_models/model1", type=str) 365 | parser.add_argument('--pretrained_model', default=None, type=str) 366 | parser.add_argument('--freeze_seq_encoder', default=1, type=int) 367 | parser.add_argument('--freeze_seq_decoder', default=0, type=int) 368 | parser.add_argument('--augmentation', default=0, type=int) 369 | parser.add_argument('--lba0', default=1, type=float) # coefficient before fold2seq_loss 370 | parser.add_argument('--lba1', default=0, type=float) # coefficient before seq2seq_loss 371 | parser.add_argument('--lba2', default=0, type=float) # coefficient before foldclass_loss 372 | parser.add_argument('--lba3', default=0, type=float) # coefficient before seqclass_loss 373 | parser.add_argument('--lba4', default=0, type=float) # coefficient before sim_loss 374 | parser.add_argument('--lba5', default=0, type=float) 375 | parser.add_argument('--lba6', default=0, type=float) 376 | args = parser.parse_args() 377 | 378 | with open(args.data_path, "rb") as f: 379 | domain_data = pickle.load(f) 380 | 381 | if torch. cuda. is_available() == True and 'K40' not in torch.cuda.get_device_name(0): 382 | device = 'cuda' 383 | else: 384 | device = 'cpu' 385 | args.device = device 386 | with open("check_device", "w") as f: 387 | f.write(device) 388 | 389 | model = fold_classification_generator(args) 390 | print ("model params:", model_statistics.net_param_num(model)) 391 | md = model.state_dict() 392 | for i in md: 393 | print (i, md[i].shape) 394 | load_pretrained_model(model, args) 395 | 396 | if torch.cuda.device_count() > 1: 397 | print("Let's use ",torch.cuda.device_count()," GPUs!") 398 | with open("check_device", "w") as f: 399 | f.write("Let's use "+str(torch.cuda.device_count())+" GPUs!") 400 | model = nn.DataParallel(model) 401 | args.batch_size*=torch.cuda.device_count() 402 | 403 | trainset = generator_dataset(args, domain_data, mode='train') 404 | trainset_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8) 405 | 406 | if args.lr!=-1: 407 | Adam_opt = torch.optim.Adam(model.parameters(), lr=args.lr) 408 | scheduler=None 409 | else: 410 | Adam_opt = torch.optim.Adam(model.parameters(), lr=1, betas=(0.9, 0.98), eps=1E-09) 411 | lambdalr = lambda x: args.nhidden**(-0.5)*min((x+0.1)**(-0.5), x*((4000)**-1.5)) 412 | scheduler = torch.optim.lr_scheduler.LambdaLR(Adam_opt, lr_lambda=lambdalr) 413 | best_m=eval(model,args, val_loader, 0) 414 | for e in range(1, args.epochs+1): 415 | 416 | if args.lba4!=0: 417 | args.lba4=1.0/2**(e-3.) 418 | train_epoch(model, args, Adam_opt, scheduler ,trainset_loader,e ) 419 | 420 | torch.save({ 421 | 'epoch': e, 422 | 'model_state_dict': model.state_dict(), 423 | 'optimizer_state_dict': Adam_opt.state_dict(), 424 | 'lrschedule_state_dict': scheduler.state_dict(), 425 | 'args': args, 426 | }, args.model_save+".e"+str(e)) 427 | 428 | 429 | if __name__=='__main__': 430 | main() 431 | --------------------------------------------------------------------------------