├── .gitignore ├── LICENSE ├── README.md ├── config_generator.py ├── datasets ├── ReadME ├── test.txt ├── test_length.txt ├── test_mat_Y.npy ├── train.txt └── train_length.txt ├── deepprime2sec.py ├── installations ├── deepprime2sec.yml └── requirements.txt ├── layers ├── crf.py └── utility.py ├── models ├── a_cnn_bilstm.py ├── b_cnn_bilstm_highway.py ├── c_cnn_bilstm_crf.py ├── d_cnn_bilstm_attention.py ├── e_cnn.py └── f_multiscale_cnn.py ├── sample_configs ├── model_a.yaml ├── model_b.yaml ├── model_c.yaml ├── model_d.yaml ├── model_e.yaml └── model_f.yaml └── utility ├── feed_generation_utility.py ├── file_utility.py ├── labeling_utility.py ├── list_set_util.py ├── training.py └── vis_utility.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | .idea/ 14 | dist/ 15 | datasets/*train* 16 | results/* 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2019 Ehsaneddin Asgari 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepPrime2Sec 2 | 3 | 4 | 5 | ## Table of Content 6 | 7 | [1. Summary](#Summary) 8 | 9 | [2. Installation](#Installation) 10 | 11 | [3. Running Configuration](#Configuration) 12 | 13 | [3.1 Features](#Features) 14 | 15 | [3.2 Training parameters](#Training) 16 | 17 | [3.3 Model specific parameters](#Models) 18 | 19 | [4. Output](##Output) 20 | 21 |
22 | 23 | # Summary 24 |
25 | 26 | DeepPrime2Seq is developed deep learning-based prediction of protein secondary structure from the protein primary sequence. 27 | It facilitate the function of different features in this task, including one-hot vectors, biophysical features, 28 | protein sequence embedding (ProtVec), deep contextualized embedding (known as ELMo), and the Position Specific Scoring Matrix (PSSM). 29 | 30 | In addition to the role of features, it allows for the evaluation of various deep learning architectures including the following models/mechanisms and 31 | certain combinations: Bidirectional Long Short-Term Memory (BiLSTM), convolutional neural network (CNN), highway connections, 32 | attention mechanism, recurrent neural random fields, and gated multi-scale CNN. 33 | 34 | Our results suggest that PSSM concatenated to one-hot vectors are the most important features for the task of secondary structure prediction. 35 | Utilizing the CNN-BiLSTM network, we achieved an accuracy of 69.9% and 70.4% using ensemble top-k models, for 8-class of protein secondary structure on the CB513 dataset, the most challenging dataset for protein secondary structure prediction. 36 | 37 | ``` 38 | @article {Asgari705426, 39 | author = {Asgari, Ehsaneddin and Poerner, Nina and McHardy, Alice C. and Mofrad, Mohammad R.K.}, 40 | title = {DeepPrime2Sec: Deep Learning for Protein Secondary Structure Prediction from the Primary Sequences}, 41 | elocation-id = {705426}, 42 | year = {2019}, 43 | doi = {10.1101/705426}, 44 | publisher = {Cold Spring Harbor Laboratory}, 45 | URL = {https://www.biorxiv.org/content/early/2019/07/18/705426}, 46 | eprint = {https://www.biorxiv.org/content/early/2019/07/18/705426.full.pdf}, 47 | journal = {bioRxiv} 48 | } 49 | ``` 50 | 51 | 52 | Through error analysis on the best performing model, we showed that the misclassification is significantly more common at positions that undergo secondary structure transitions, which is most likely due to the inaccurate assignments of the secondary structure at the boundary regions. Notably, when ignoring amino acids at secondary structure transitions in the evaluation, the accuracy increases to 90.3%. Furthermore, the best performing model mostly mistook similar structures for one another, indicating that the deep learning model inferred high-level information on the secondary structure. 53 | 54 | 55 | DeepPrime2Sec and the used datasets are available here under the Apache 2 license. 56 | 57 | Return to the [table of content ↑](#tableofcontent). 58 | 59 | 60 | 61 | # Installation 62 | 63 | ## Pip installation 64 | 65 | 66 | In order to install the required libraries for running DeepPrime2Sec use the following command: 67 | 68 | ``` 69 | pip install installations/requirements.txt 70 | ``` 71 | 72 | OR you may use conda installation. 73 | 74 | ## Conda installation 75 | 76 | In order to install the required libraries for running DeepPrime2Sec use the following conda command: 77 | 78 | ``` 79 | conda create --name deepprime2sec --file installations/deepprime2sec.yml 80 | ``` 81 | 82 | Subsequently, you need to activate the created virtual environment before running: 83 | 84 | ``` 85 | source activate deepprime2sec 86 | ``` 87 | 88 | ## Download the training files 89 | 90 | 91 | Before running the software make sure to download the traning dataset (which was too large for git) from the following file 92 | and extract them and copy them to the `dataset` directory. 93 | 94 | ``` 95 | http://deepbio.info/proteomics/datasets/deepprime2sec/train_files.tar.gz 96 | ``` 97 | 98 | 99 | Return to the [table of content ↑](#tableofcontent). 100 | 101 |
102 | 103 | 104 |
105 | # Running Configuration 106 | 107 | ### Running example 108 | 109 | In order to run the DeepPrime2Sec, you can simply use the following command. 110 | Every details on different deep learning models: architecture, hyper parameter, training parameters, will be provided in the yaml config file. 111 | Here we detail how this file should be created. Examples are also provided in `sample_configs/*.yaml`. 112 | 113 | ``` 114 | python deepprime2sec.py --config sample_configs/model_a.yaml 115 | ``` 116 | 117 | 118 | # Features to use 119 | 120 | 121 | We experiment on five sets of protein features to understand what are essential features for the task of protein secondary structure prediction. Although in 1999, PSSM was reported as an important feature to the secondary structure prediction (Jones et al, 1999), 122 | this was still unclear whether recently introduced distributed representations can outperform PSSM in such a task. For a systematic comparison, the features detailed as follows are used: 123 | 124 | 131 | 132 | In order to use combinations of features in the software please use the following keywords for the key of `features_to_use`. `features_to_use` is part of model parameters. 133 | The included features in the config will be concatenated as input: 134 | 135 | ``` 136 | model_paramters: 137 | features_to_use: 138 | - onehot 139 | - embedding 140 | - elmo 141 | - pssm 142 | - biophysical 143 | ``` 144 | 145 | 146 | Return to the [table of content ↑](#tableofcontent). 147 | 148 |
149 | 150 | ## Training parameters 151 |
152 | 153 | The following is an example of parameters for running the training and storing the results (`run_parameters`). 154 | 155 | ``` 156 | run_parameters: 157 | domain_name: baseline 158 | setting_name: baseline 159 | epochs: 100 160 | test_batch_size: 100 161 | train_batch_size: 64 162 | patience: 10 163 | gpu: 1 164 | ``` 165 | 166 | 167 | ### `domain` and `setting_name` 168 | 169 | The results of the model would be saved to `results` directory. The `domain` and `setting_name` parameters will be created as directy and sub-directories inside `results` to store the model weights 170 | and results. 171 | 172 | ### `epoch` and `batch-sizes` 173 | 174 | `epoch` refers to the number of time to iterate over the training data and `batch_size` refers to the size of data-split in each optimization step. 175 | For a proper and faster learning we have already performed bucketing (sorting the training sequences according to their lengths), which minimizes the padding operations as well. 176 | 177 | ### `patience` 178 | 179 | To avoid overfitting we perform early stopping, meaning that if the performance only improves over the training set and not the test set after a few epoch we stop the training. 180 | Because then it means that the model specialized to the training data by memorizing and cannot generalize further for the test set. `patience` determine for how many epochs we should wait for an improvement on the test set. 181 | 182 | ### `gpu` 183 | 184 | Which GPU device ID to use for training/testing the model. 185 | 186 | Return to the [table of content ↑](#tableofcontent). 187 | 188 |
189 | 190 | ## How to configure input for different deep learning models 191 |
192 | 193 | ### Model (a) CNN + BiLSTM 194 | 195 | For the details of CNN + BiLSTM model please refer to the paper, to specify this model for the paper use `deep_learning_model: a_cnn_bilstm` 196 | 197 | ![model_a](https://user-images.githubusercontent.com/8551117/61132550-e0457a00-a4bb-11e9-84e9-538d6455ce98.png) 198 | 199 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11). 200 | 201 | `filter_size` is the size of convolutional filters. 202 | 203 | `dense_size` is the size of feed forward layers are used before and after LSTM. 204 | 205 | `dropout_rate` is the dropout rate. 206 | 207 | `lstm_size` is the hidden size of bidirectional LSTM. 208 | 209 | `lr` is the learning rate. 210 | 211 | `features_to_use` is already covered at [3.1 Features](#Features). 212 | 213 | 214 | Sample config file 215 | ``` 216 | deep_learning_model: a_cnn_bilstm 217 | model_paramters: 218 | convs: 219 | - 3 220 | - 5 221 | - 7 222 | - 11 223 | - 21 224 | filter_size: 256 225 | dense_size: 1000 226 | dropout_rate: 0.5 227 | lstm_size: 1000 228 | lr: 0.001 229 | features_to_use: 230 | - onehot 231 | - pssm 232 | ``` 233 | 234 | 235 | 236 | ## Model (b) CNN + BiLSTM + Highway Connection of PSSM 237 | 238 | For the details of CNN + + Highway Connection of PSSM model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_b_cnn_bilstm_highway` 239 | 240 | ![mdoel_b](https://user-images.githubusercontent.com/8551117/61133494-d91f6b80-a4bd-11e9-8999-4ce501289ec2.png) 241 | 242 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11). 243 | 244 | `filter_size` is the size of convolutional filters. 245 | 246 | `dense_size` is the size of feed forward layers are used before and after LSTM. 247 | 248 | `dropout_rate` is the dropout rate. 249 | 250 | `lstm_size` is the hidden size of bidirectional LSTM. 251 | 252 | `lr` is the learning rate. 253 | 254 | `features_to_use` is already covered at [3.1 Features](#Features). 255 | 256 | `use_CRF` is indicate whether you would like to include a CRF layer at the end. 257 | 258 | 259 | Sample config file 260 | ``` 261 | deep_learning_model: model_b_cnn_bilstm_highway 262 | model_paramters: 263 | convs: 264 | - 3 265 | - 5 266 | - 7 267 | - 11 268 | - 21 269 | filter_size: 256 270 | dense_size: 1000 271 | dropout_rate: 0.5 272 | lstm_size: 1000 273 | lr: 0.001 274 | features_to_use: 275 | - onehot 276 | - pssm 277 | use_CRF: false 278 | ``` 279 | 280 | 281 | ## Model (c) CNN + BiLSTM + Conditional Random Field Layer 282 | 283 | For the details of CNN + BiLSTM + Conditional Random Field Layer model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_c_cnn_bilstm` 284 | 285 | ![model_c](https://user-images.githubusercontent.com/8551117/61134185-54355180-a4bf-11e9-9586-d7b996f205a7.png) 286 | 287 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11). 288 | 289 | `filter_size` is the size of convolutional filters. 290 | 291 | `dense_size` is the size of feed forward layers are used before and after LSTM. 292 | 293 | `dropout_rate` is the dropout rate. 294 | 295 | `lstm_size` is the hidden size of bidirectional LSTM. 296 | 297 | `lr` is the learning rate. 298 | 299 | `features_to_use` is already covered at [3.1 Features](#Features). 300 | 301 | `CRF_input_dim` the input dimension of CRF layer. 302 | 303 | 304 | Sample config file 305 | ``` 306 | deep_learning_model: model_c_cnn_bilstm_crf 307 | model_paramters: 308 | convs: 309 | - 3 310 | - 5 311 | - 7 312 | - 11 313 | - 21 314 | filter_size: 256 315 | dense_size: 1000 316 | dropout_rate: 0.5 317 | lstm_size: 1000 318 | lr: 0.001 319 | features_to_use: 320 | - onehot 321 | - pssm 322 | lstm_size: 1000 323 | CRF_input_dim: 200 324 | ``` 325 | 326 | ## Model (d) CNN + BiLSTM + Attention mechanism 327 | 328 | For the details of CNN + BiLSTM + Attention mechanism model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_d_cnn_bilstm_attention` 329 | 330 | ![model_d-2](https://user-images.githubusercontent.com/8551117/61134627-4f24d200-a4c0-11e9-982b-49279a5da669.png) 331 | 332 | `attention_type` is the attention type to be selected from `additive` or `multiplicative`. 333 | 334 | `attention_units` is the number of attention units. 335 | 336 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11). 337 | 338 | `filter_size` is the size of convolutional filters. 339 | 340 | `dense_size` is the size of feed forward layers are used before and after LSTM. 341 | 342 | `dropout_rate` is the dropout rate. 343 | 344 | `lstm_size` is the hidden size of bidirectional LSTM. 345 | 346 | `lr` is the learning rate. 347 | 348 | `features_to_use` is already covered at [3.1 Features](#Features). 349 | 350 | `use_CRF` is indicate whether you would like to include a CRF layer at the end. 351 | 352 | 353 | 354 | Sample config file 355 | ``` 356 | deep_learning_model: model_d_cnn_bilstm_attention 357 | model_paramters: 358 | attention_type: additive 359 | attention_units: 32 360 | convs: 361 | - 3 362 | - 5 363 | - 7 364 | - 11 365 | - 21 366 | filter_size: 256 367 | dense_size: 1000 368 | dropout_rate: 0.5 369 | lstm_size: 1000 370 | lr: 0.001 371 | features_to_use: 372 | - onehot 373 | - pssm 374 | lstm_size: 1000 375 | use_CRF: false 376 | ``` 377 | 378 | ## Model (e) CNN 379 | 380 | For the details of CNN model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_e_cnn` 381 | 382 | ![model_e](https://user-images.githubusercontent.com/8551117/61135353-b42cf780-a4c1-11e9-87aa-fdcc13a2892f.png) 383 | 384 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11). 385 | 386 | `filter_size` is the size of convolutional filters. 387 | 388 | `dense_size` is the size of feed forward layers are after the concatenation of convlolution results. 389 | 390 | `dropout_rate` is the dropout rate. 391 | 392 | `lr` is the learning rate. 393 | 394 | `features_to_use` is already covered at [3.1 Features](#Features). 395 | 396 | `use_CRF` is indicate whether you would like to include a CRF layer at the end. 397 | 398 | Sample config file 399 | ``` 400 | deep_learning_model: model_e_cnn 401 | model_paramters: 402 | convs: 403 | - 3 404 | - 5 405 | - 7 406 | - 11 407 | - 21 408 | filter_size: 256 409 | dense_size: 1000 410 | dropout_rate: 0.5 411 | lstm_size: 1000 412 | lr: 0.001 413 | features_to_use: 414 | - onehot 415 | - pssm 416 | lstm_size: 1000 417 | use_CRF: false 418 | ``` 419 | 420 | ## Model (f) Multiscale CNN 421 | 422 | For the details of Multiscale CNN model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_f_multiscale_cnn` 423 | 424 | ![model_f](https://user-images.githubusercontent.com/8551117/61135721-85fbe780-a4c2-11e9-8f65-3ea3ac2b17ee.png) 425 | 426 | `multiscalecnn_layers` how many gated muliscale CNNs should be stacked. 427 | 428 | `cnn_regularizer` regularizing parameter for the CNN. 429 | 430 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11). 431 | 432 | `filter_size` is the size of convolutional filters. 433 | 434 | `dense_size` is the size of feed forward layers are after the concatenation of convlolution results. 435 | 436 | `dropout_rate` is the dropout rate. 437 | 438 | `lr` is the learning rate. 439 | 440 | `features_to_use` is already covered at [3.1 Features](#Features). 441 | 442 | `use_CRF` is indicate whether you would like to include a CRF layer at the end. 443 | 444 | Sample config file 445 | ``` 446 | deep_learning_model: model_f_multiscale_cnn 447 | model_paramters: 448 | cnn_regularizer: 5.0e-05 449 | multiscalecnn_layers: 3 450 | convs: 451 | - 3 452 | - 5 453 | - 7 454 | - 11 455 | - 21 456 | filter_size: 256 457 | dense_size: 1000 458 | dropout_rate: 0.5 459 | lstm_size: 1000 460 | lr: 0.001 461 | features_to_use: 462 | - onehot 463 | - pssm 464 | lstm_size: 1000 465 | use_CRF: false 466 | ``` 467 | 468 | Return to the [table of content ↑](#tableofcontent). 469 | 470 |
471 | 472 | ## Your own model 473 | 474 | Create your own model by just using the template of model_a to .._f, and test its performance against the existing methods. 475 | 476 | Return to the [table of content ↑](#tableofcontent). 477 | 478 | 479 | ## Output 480 |
481 | 482 | Finally after completion of training, DeepPrime2Seq generate a PDF of the report with the following information at `results/$domain/$setting/report.pdf`: 483 | 484 | - [x] The accuracy of trained model on the standard test set of the task (CB513) 485 | - [x] Confusion matrix of the model 486 | - [x] Contingency metric of the error at the edges of secondary structure changing, along with the p-value of Chi-Square and G-test tests. 487 | - [x] The learning curve 488 | - [x] The neural network weights for the best models 489 | 490 | 491 | ![Screen Shot 2019-07-12 at 5 33 30 PM](https://user-images.githubusercontent.com/8551117/61140191-45ed3280-a4cb-11e9-95f5-e12745b5de61.png) 492 | -------------------------------------------------------------------------------- /config_generator.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | config_model_a = {'run_parameters': 4 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64, 5 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100}, 6 | 'deep_learning_model': 'model_a_cnn_bilstm', 7 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 'lstm_size': 1000, 8 | 'dropout_rate' : 0.5, 'filter_size':256,'lr' : 0.001, 'features_to_use': ['onehot', 9 | 'pssm']}} 10 | 11 | config_model_b = {'run_parameters': 12 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64, 13 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100}, 14 | 'deep_learning_model': 'model_b_cnn_bilstm_highway', 15 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 'lstm_size': 1000, 16 | 'dropout_rate' : 0.5,'filter_size':256, 'lr' : 0.001, 'features_to_use': ['onehot', 17 | 'pssm'], 'use_CRF':False}} 18 | 19 | config_model_c = {'run_parameters': 20 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64, 21 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100}, 22 | 'deep_learning_model': 'model_c_cnn_bilstm_crf', 23 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 'lstm_size': 1000, 24 | 'dropout_rate' : 0.5, 'filter_size':256, 'lr' : 0.001, 'features_to_use': ['onehot', 25 | 'pssm'], 'CRF_input_dim':200}} 26 | config_model_d = {'run_parameters': 27 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64, 28 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100}, 29 | 'deep_learning_model': 'model_d_cnn_bilstm_attention', 30 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 'lstm_size': 1000, 31 | 'dropout_rate' : 0.5, 'filter_size':256,'lr' : 0.001, 'features_to_use': ['onehot', 32 | 'pssm'], 'use_CRF':False, 'attention_units':32, 'attention_type':'additive'}} 33 | 34 | config_model_e = {'run_parameters': 35 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64, 36 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100}, 37 | 'deep_learning_model': 'model_e_cnn', 38 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 39 | 'dropout_rate' : 0.5, 'lr' : 0.001, 'filter_size':256,'features_to_use': ['onehot', 40 | 'pssm'], 'use_CRF':False}} 41 | 42 | #multiplicative 43 | 44 | config_model_f = {'run_parameters': 45 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64, 46 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100}, 47 | 'deep_learning_model': 'model_f_multiscale_cnn', 48 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 49 | 'dropout_rate' : 0.5, 'lr' : 0.001, 'filter_size':256, 'features_to_use': ['onehot', 50 | 'pssm'], 'use_CRF':False, 'lr':0.001, 'cnn_regularizer':0.00005, 'multiscalecnn_layers':3}} 51 | 52 | models = ['a','b','c','d','e','f'] 53 | 54 | for idx, config in enumerate([config_model_a,config_model_b, config_model_c, config_model_d, config_model_e, config_model_f]): 55 | c = yaml.dump(config) 56 | f = open('sample_configs/model_'+models[idx]+'.yaml', 'w') 57 | f.write(c) 58 | f.close() 59 | 60 | 61 | -------------------------------------------------------------------------------- /datasets/ReadME: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /datasets/test_length.txt: -------------------------------------------------------------------------------- 1 | 20 2 | 20 3 | 21 4 | 21 5 | 24 6 | 24 7 | 25 8 | 26 9 | 27 10 | 28 11 | 28 12 | 29 13 | 29 14 | 30 15 | 30 16 | 30 17 | 31 18 | 33 19 | 36 20 | 36 21 | 36 22 | 36 23 | 36 24 | 37 25 | 37 26 | 37 27 | 39 28 | 40 29 | 40 30 | 43 31 | 43 32 | 43 33 | 44 34 | 45 35 | 46 36 | 47 37 | 48 38 | 48 39 | 49 40 | 50 41 | 50 42 | 51 43 | 51 44 | 51 45 | 52 46 | 53 47 | 53 48 | 53 49 | 53 50 | 54 51 | 54 52 | 55 53 | 56 54 | 56 55 | 57 56 | 57 57 | 58 58 | 58 59 | 59 60 | 60 61 | 60 62 | 61 63 | 62 64 | 63 65 | 63 66 | 63 67 | 64 68 | 64 69 | 66 70 | 67 71 | 67 72 | 67 73 | 68 74 | 68 75 | 69 76 | 69 77 | 69 78 | 70 79 | 70 80 | 71 81 | 71 82 | 71 83 | 73 84 | 73 85 | 73 86 | 73 87 | 73 88 | 73 89 | 74 90 | 74 91 | 74 92 | 74 93 | 75 94 | 75 95 | 76 96 | 76 97 | 77 98 | 77 99 | 78 100 | 78 101 | 78 102 | 79 103 | 80 104 | 81 105 | 81 106 | 82 107 | 82 108 | 82 109 | 83 110 | 83 111 | 83 112 | 83 113 | 84 114 | 84 115 | 85 116 | 85 117 | 85 118 | 85 119 | 85 120 | 86 121 | 86 122 | 86 123 | 87 124 | 87 125 | 87 126 | 87 127 | 87 128 | 87 129 | 88 130 | 89 131 | 89 132 | 89 133 | 91 134 | 92 135 | 93 136 | 94 137 | 95 138 | 95 139 | 96 140 | 96 141 | 96 142 | 96 143 | 97 144 | 97 145 | 98 146 | 98 147 | 98 148 | 98 149 | 99 150 | 99 151 | 99 152 | 100 153 | 100 154 | 100 155 | 100 156 | 101 157 | 101 158 | 101 159 | 101 160 | 102 161 | 102 162 | 102 163 | 102 164 | 102 165 | 102 166 | 103 167 | 103 168 | 103 169 | 103 170 | 104 171 | 104 172 | 104 173 | 104 174 | 105 175 | 105 176 | 106 177 | 106 178 | 107 179 | 107 180 | 107 181 | 107 182 | 107 183 | 108 184 | 108 185 | 108 186 | 109 187 | 109 188 | 110 189 | 110 190 | 110 191 | 111 192 | 111 193 | 111 194 | 111 195 | 112 196 | 112 197 | 113 198 | 114 199 | 114 200 | 114 201 | 114 202 | 114 203 | 114 204 | 114 205 | 115 206 | 115 207 | 115 208 | 116 209 | 116 210 | 116 211 | 117 212 | 117 213 | 117 214 | 118 215 | 118 216 | 119 217 | 119 218 | 119 219 | 119 220 | 120 221 | 120 222 | 120 223 | 120 224 | 120 225 | 121 226 | 122 227 | 122 228 | 122 229 | 122 230 | 122 231 | 123 232 | 123 233 | 123 234 | 123 235 | 124 236 | 124 237 | 125 238 | 125 239 | 126 240 | 126 241 | 127 242 | 127 243 | 128 244 | 128 245 | 128 246 | 129 247 | 129 248 | 129 249 | 129 250 | 130 251 | 130 252 | 130 253 | 130 254 | 131 255 | 131 256 | 131 257 | 132 258 | 132 259 | 134 260 | 134 261 | 135 262 | 136 263 | 136 264 | 136 265 | 136 266 | 136 267 | 137 268 | 137 269 | 138 270 | 138 271 | 139 272 | 140 273 | 141 274 | 141 275 | 142 276 | 142 277 | 142 278 | 143 279 | 143 280 | 144 281 | 144 282 | 145 283 | 145 284 | 146 285 | 147 286 | 147 287 | 148 288 | 148 289 | 149 290 | 149 291 | 151 292 | 151 293 | 152 294 | 152 295 | 153 296 | 153 297 | 153 298 | 153 299 | 153 300 | 154 301 | 154 302 | 154 303 | 154 304 | 154 305 | 155 306 | 155 307 | 157 308 | 157 309 | 158 310 | 158 311 | 158 312 | 158 313 | 159 314 | 159 315 | 162 316 | 163 317 | 164 318 | 164 319 | 164 320 | 166 321 | 166 322 | 166 323 | 169 324 | 169 325 | 171 326 | 172 327 | 173 328 | 173 329 | 174 330 | 174 331 | 175 332 | 175 333 | 176 334 | 177 335 | 177 336 | 177 337 | 178 338 | 178 339 | 179 340 | 180 341 | 181 342 | 181 343 | 182 344 | 182 345 | 183 346 | 184 347 | 185 348 | 185 349 | 185 350 | 185 351 | 185 352 | 186 353 | 186 354 | 186 355 | 187 356 | 188 357 | 188 358 | 190 359 | 190 360 | 191 361 | 191 362 | 195 363 | 195 364 | 197 365 | 197 366 | 198 367 | 198 368 | 198 369 | 200 370 | 200 371 | 200 372 | 204 373 | 204 374 | 206 375 | 206 376 | 206 377 | 208 378 | 208 379 | 209 380 | 210 381 | 210 382 | 211 383 | 211 384 | 212 385 | 212 386 | 213 387 | 213 388 | 214 389 | 215 390 | 216 391 | 216 392 | 216 393 | 218 394 | 218 395 | 220 396 | 220 397 | 220 398 | 225 399 | 226 400 | 228 401 | 228 402 | 228 403 | 228 404 | 229 405 | 229 406 | 230 407 | 230 408 | 230 409 | 230 410 | 233 411 | 236 412 | 237 413 | 239 414 | 241 415 | 241 416 | 243 417 | 247 418 | 248 419 | 249 420 | 250 421 | 252 422 | 253 423 | 253 424 | 255 425 | 256 426 | 257 427 | 264 428 | 264 429 | 266 430 | 269 431 | 273 432 | 273 433 | 273 434 | 275 435 | 280 436 | 280 437 | 283 438 | 283 439 | 285 440 | 285 441 | 286 442 | 289 443 | 289 444 | 291 445 | 291 446 | 291 447 | 293 448 | 293 449 | 293 450 | 295 451 | 296 452 | 296 453 | 298 454 | 298 455 | 299 456 | 302 457 | 306 458 | 307 459 | 308 460 | 309 461 | 310 462 | 311 463 | 314 464 | 316 465 | 316 466 | 317 467 | 317 468 | 317 469 | 319 470 | 328 471 | 329 472 | 330 473 | 333 474 | 334 475 | 337 476 | 339 477 | 340 478 | 342 479 | 344 480 | 349 481 | 354 482 | 358 483 | 360 484 | 363 485 | 374 486 | 374 487 | 381 488 | 385 489 | 388 490 | 388 491 | 391 492 | 393 493 | 396 494 | 399 495 | 405 496 | 414 497 | 426 498 | 429 499 | 433 500 | 449 501 | 456 502 | 461 503 | 468 504 | 481 505 | 483 506 | 490 507 | 498 508 | 506 509 | 526 510 | 534 511 | 544 512 | 576 513 | 700 514 | 700 515 | -------------------------------------------------------------------------------- /datasets/test_mat_Y.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehsanasgari/DeepPrime2Sec/b0932214b85a6d949caf4348b78b01b207f266c7/datasets/test_mat_Y.npy -------------------------------------------------------------------------------- /datasets/train_length.txt: -------------------------------------------------------------------------------- 1 | 12 2 | 14 3 | 15 4 | 17 5 | 18 6 | 18 7 | 20 8 | 20 9 | 21 10 | 21 11 | 21 12 | 24 13 | 25 14 | 25 15 | 27 16 | 27 17 | 28 18 | 29 19 | 29 20 | 31 21 | 31 22 | 32 23 | 33 24 | 33 25 | 34 26 | 35 27 | 35 28 | 35 29 | 38 30 | 39 31 | 39 32 | 39 33 | 39 34 | 39 35 | 40 36 | 41 37 | 42 38 | 42 39 | 43 40 | 43 41 | 43 42 | 43 43 | 43 44 | 43 45 | 43 46 | 43 47 | 43 48 | 45 49 | 45 50 | 45 51 | 45 52 | 45 53 | 45 54 | 46 55 | 46 56 | 46 57 | 47 58 | 47 59 | 47 60 | 47 61 | 47 62 | 48 63 | 48 64 | 48 65 | 48 66 | 48 67 | 48 68 | 48 69 | 48 70 | 49 71 | 49 72 | 49 73 | 49 74 | 49 75 | 50 76 | 50 77 | 50 78 | 50 79 | 51 80 | 51 81 | 51 82 | 51 83 | 51 84 | 51 85 | 51 86 | 51 87 | 52 88 | 52 89 | 52 90 | 52 91 | 52 92 | 52 93 | 52 94 | 52 95 | 53 96 | 53 97 | 53 98 | 53 99 | 53 100 | 53 101 | 53 102 | 53 103 | 53 104 | 53 105 | 54 106 | 54 107 | 54 108 | 54 109 | 54 110 | 54 111 | 54 112 | 54 113 | 54 114 | 54 115 | 54 116 | 54 117 | 55 118 | 55 119 | 55 120 | 56 121 | 56 122 | 56 123 | 56 124 | 56 125 | 56 126 | 56 127 | 56 128 | 56 129 | 56 130 | 57 131 | 57 132 | 57 133 | 57 134 | 57 135 | 57 136 | 57 137 | 57 138 | 57 139 | 57 140 | 57 141 | 57 142 | 57 143 | 57 144 | 57 145 | 57 146 | 58 147 | 58 148 | 58 149 | 58 150 | 58 151 | 58 152 | 58 153 | 58 154 | 58 155 | 58 156 | 59 157 | 59 158 | 59 159 | 59 160 | 59 161 | 59 162 | 59 163 | 59 164 | 59 165 | 59 166 | 60 167 | 60 168 | 60 169 | 60 170 | 60 171 | 60 172 | 60 173 | 60 174 | 60 175 | 60 176 | 60 177 | 60 178 | 60 179 | 61 180 | 61 181 | 61 182 | 61 183 | 61 184 | 61 185 | 61 186 | 61 187 | 61 188 | 61 189 | 61 190 | 61 191 | 61 192 | 61 193 | 61 194 | 62 195 | 62 196 | 62 197 | 62 198 | 62 199 | 62 200 | 62 201 | 62 202 | 62 203 | 62 204 | 62 205 | 62 206 | 62 207 | 62 208 | 63 209 | 63 210 | 63 211 | 63 212 | 63 213 | 63 214 | 63 215 | 63 216 | 63 217 | 63 218 | 63 219 | 63 220 | 63 221 | 63 222 | 63 223 | 63 224 | 63 225 | 64 226 | 64 227 | 64 228 | 64 229 | 64 230 | 64 231 | 64 232 | 64 233 | 64 234 | 64 235 | 65 236 | 65 237 | 65 238 | 65 239 | 65 240 | 65 241 | 65 242 | 65 243 | 65 244 | 65 245 | 65 246 | 65 247 | 65 248 | 65 249 | 65 250 | 65 251 | 65 252 | 66 253 | 66 254 | 66 255 | 66 256 | 66 257 | 66 258 | 66 259 | 66 260 | 66 261 | 66 262 | 66 263 | 66 264 | 66 265 | 66 266 | 66 267 | 66 268 | 66 269 | 66 270 | 66 271 | 66 272 | 66 273 | 67 274 | 67 275 | 67 276 | 67 277 | 67 278 | 67 279 | 67 280 | 67 281 | 67 282 | 67 283 | 67 284 | 67 285 | 67 286 | 67 287 | 67 288 | 67 289 | 67 290 | 68 291 | 68 292 | 68 293 | 68 294 | 68 295 | 68 296 | 68 297 | 68 298 | 68 299 | 68 300 | 68 301 | 68 302 | 68 303 | 69 304 | 69 305 | 69 306 | 69 307 | 69 308 | 69 309 | 69 310 | 69 311 | 69 312 | 69 313 | 69 314 | 69 315 | 69 316 | 69 317 | 69 318 | 69 319 | 69 320 | 70 321 | 70 322 | 70 323 | 70 324 | 70 325 | 70 326 | 70 327 | 70 328 | 70 329 | 70 330 | 70 331 | 70 332 | 70 333 | 70 334 | 71 335 | 71 336 | 71 337 | 71 338 | 71 339 | 71 340 | 71 341 | 72 342 | 72 343 | 72 344 | 72 345 | 72 346 | 72 347 | 72 348 | 72 349 | 72 350 | 72 351 | 72 352 | 73 353 | 73 354 | 73 355 | 73 356 | 73 357 | 73 358 | 73 359 | 73 360 | 73 361 | 73 362 | 73 363 | 73 364 | 73 365 | 73 366 | 73 367 | 73 368 | 73 369 | 74 370 | 74 371 | 74 372 | 74 373 | 74 374 | 74 375 | 74 376 | 74 377 | 74 378 | 74 379 | 74 380 | 74 381 | 74 382 | 74 383 | 74 384 | 74 385 | 74 386 | 74 387 | 75 388 | 75 389 | 75 390 | 75 391 | 75 392 | 75 393 | 75 394 | 75 395 | 75 396 | 75 397 | 75 398 | 75 399 | 75 400 | 75 401 | 75 402 | 75 403 | 75 404 | 75 405 | 76 406 | 76 407 | 76 408 | 76 409 | 76 410 | 76 411 | 76 412 | 76 413 | 76 414 | 76 415 | 76 416 | 76 417 | 76 418 | 76 419 | 77 420 | 77 421 | 77 422 | 77 423 | 77 424 | 77 425 | 77 426 | 77 427 | 77 428 | 77 429 | 77 430 | 77 431 | 77 432 | 77 433 | 77 434 | 78 435 | 78 436 | 78 437 | 78 438 | 78 439 | 78 440 | 78 441 | 78 442 | 78 443 | 78 444 | 78 445 | 78 446 | 78 447 | 78 448 | 78 449 | 78 450 | 78 451 | 78 452 | 78 453 | 78 454 | 79 455 | 79 456 | 79 457 | 79 458 | 79 459 | 79 460 | 79 461 | 79 462 | 79 463 | 79 464 | 79 465 | 79 466 | 79 467 | 79 468 | 79 469 | 80 470 | 80 471 | 80 472 | 80 473 | 80 474 | 80 475 | 80 476 | 80 477 | 80 478 | 80 479 | 80 480 | 80 481 | 80 482 | 80 483 | 80 484 | 80 485 | 80 486 | 80 487 | 80 488 | 80 489 | 80 490 | 80 491 | 80 492 | 80 493 | 80 494 | 81 495 | 81 496 | 81 497 | 81 498 | 81 499 | 81 500 | 81 501 | 81 502 | 81 503 | 81 504 | 81 505 | 81 506 | 81 507 | 81 508 | 81 509 | 81 510 | 81 511 | 81 512 | 81 513 | 81 514 | 81 515 | 81 516 | 81 517 | 82 518 | 82 519 | 82 520 | 82 521 | 82 522 | 82 523 | 82 524 | 82 525 | 82 526 | 82 527 | 82 528 | 82 529 | 82 530 | 82 531 | 82 532 | 82 533 | 82 534 | 82 535 | 82 536 | 82 537 | 82 538 | 82 539 | 83 540 | 83 541 | 83 542 | 83 543 | 83 544 | 83 545 | 83 546 | 83 547 | 83 548 | 83 549 | 83 550 | 83 551 | 83 552 | 83 553 | 84 554 | 84 555 | 84 556 | 84 557 | 84 558 | 84 559 | 84 560 | 84 561 | 84 562 | 84 563 | 84 564 | 84 565 | 84 566 | 84 567 | 84 568 | 84 569 | 84 570 | 85 571 | 85 572 | 85 573 | 85 574 | 85 575 | 85 576 | 85 577 | 85 578 | 85 579 | 85 580 | 85 581 | 85 582 | 85 583 | 85 584 | 85 585 | 85 586 | 85 587 | 85 588 | 85 589 | 85 590 | 86 591 | 86 592 | 86 593 | 86 594 | 86 595 | 86 596 | 86 597 | 86 598 | 86 599 | 86 600 | 86 601 | 86 602 | 86 603 | 86 604 | 86 605 | 86 606 | 86 607 | 86 608 | 87 609 | 87 610 | 87 611 | 87 612 | 87 613 | 87 614 | 87 615 | 87 616 | 87 617 | 87 618 | 87 619 | 87 620 | 87 621 | 87 622 | 87 623 | 87 624 | 87 625 | 87 626 | 88 627 | 88 628 | 88 629 | 88 630 | 88 631 | 88 632 | 88 633 | 88 634 | 88 635 | 88 636 | 89 637 | 89 638 | 89 639 | 89 640 | 89 641 | 89 642 | 89 643 | 89 644 | 89 645 | 89 646 | 89 647 | 89 648 | 89 649 | 89 650 | 89 651 | 89 652 | 89 653 | 89 654 | 89 655 | 89 656 | 89 657 | 89 658 | 89 659 | 89 660 | 89 661 | 89 662 | 89 663 | 89 664 | 89 665 | 90 666 | 90 667 | 90 668 | 90 669 | 90 670 | 90 671 | 90 672 | 90 673 | 90 674 | 90 675 | 90 676 | 90 677 | 90 678 | 90 679 | 90 680 | 90 681 | 90 682 | 90 683 | 90 684 | 90 685 | 90 686 | 90 687 | 90 688 | 90 689 | 90 690 | 90 691 | 90 692 | 90 693 | 90 694 | 90 695 | 90 696 | 90 697 | 91 698 | 91 699 | 91 700 | 91 701 | 91 702 | 91 703 | 91 704 | 91 705 | 91 706 | 91 707 | 91 708 | 91 709 | 91 710 | 91 711 | 91 712 | 91 713 | 91 714 | 91 715 | 91 716 | 91 717 | 91 718 | 92 719 | 92 720 | 92 721 | 92 722 | 92 723 | 92 724 | 92 725 | 92 726 | 92 727 | 92 728 | 92 729 | 92 730 | 92 731 | 92 732 | 92 733 | 92 734 | 92 735 | 92 736 | 92 737 | 92 738 | 92 739 | 92 740 | 92 741 | 92 742 | 92 743 | 92 744 | 92 745 | 92 746 | 93 747 | 93 748 | 93 749 | 93 750 | 93 751 | 93 752 | 93 753 | 93 754 | 93 755 | 93 756 | 93 757 | 93 758 | 93 759 | 93 760 | 93 761 | 94 762 | 94 763 | 94 764 | 94 765 | 94 766 | 94 767 | 94 768 | 94 769 | 94 770 | 94 771 | 94 772 | 94 773 | 94 774 | 94 775 | 94 776 | 94 777 | 94 778 | 94 779 | 94 780 | 94 781 | 94 782 | 94 783 | 95 784 | 95 785 | 95 786 | 95 787 | 95 788 | 95 789 | 95 790 | 95 791 | 95 792 | 95 793 | 95 794 | 95 795 | 95 796 | 95 797 | 95 798 | 95 799 | 95 800 | 95 801 | 95 802 | 95 803 | 96 804 | 96 805 | 96 806 | 96 807 | 96 808 | 96 809 | 96 810 | 96 811 | 96 812 | 96 813 | 96 814 | 96 815 | 96 816 | 96 817 | 96 818 | 96 819 | 96 820 | 96 821 | 96 822 | 97 823 | 97 824 | 97 825 | 97 826 | 97 827 | 97 828 | 97 829 | 97 830 | 97 831 | 97 832 | 97 833 | 97 834 | 97 835 | 97 836 | 97 837 | 97 838 | 97 839 | 97 840 | 97 841 | 97 842 | 97 843 | 97 844 | 97 845 | 97 846 | 97 847 | 97 848 | 97 849 | 97 850 | 97 851 | 98 852 | 98 853 | 98 854 | 98 855 | 98 856 | 98 857 | 98 858 | 98 859 | 98 860 | 98 861 | 98 862 | 98 863 | 98 864 | 98 865 | 98 866 | 98 867 | 98 868 | 98 869 | 98 870 | 98 871 | 98 872 | 98 873 | 98 874 | 98 875 | 98 876 | 98 877 | 98 878 | 98 879 | 99 880 | 99 881 | 99 882 | 99 883 | 99 884 | 99 885 | 99 886 | 99 887 | 99 888 | 99 889 | 99 890 | 99 891 | 99 892 | 99 893 | 99 894 | 99 895 | 99 896 | 99 897 | 99 898 | 99 899 | 100 900 | 100 901 | 100 902 | 100 903 | 100 904 | 100 905 | 100 906 | 100 907 | 100 908 | 100 909 | 100 910 | 100 911 | 100 912 | 100 913 | 100 914 | 100 915 | 100 916 | 100 917 | 100 918 | 101 919 | 101 920 | 101 921 | 101 922 | 101 923 | 101 924 | 101 925 | 101 926 | 101 927 | 101 928 | 101 929 | 101 930 | 101 931 | 101 932 | 101 933 | 101 934 | 101 935 | 101 936 | 101 937 | 101 938 | 101 939 | 102 940 | 102 941 | 102 942 | 102 943 | 102 944 | 102 945 | 102 946 | 102 947 | 102 948 | 102 949 | 102 950 | 102 951 | 102 952 | 102 953 | 102 954 | 102 955 | 102 956 | 102 957 | 102 958 | 102 959 | 102 960 | 102 961 | 102 962 | 103 963 | 103 964 | 103 965 | 103 966 | 103 967 | 103 968 | 103 969 | 103 970 | 103 971 | 103 972 | 103 973 | 103 974 | 103 975 | 103 976 | 103 977 | 103 978 | 103 979 | 103 980 | 103 981 | 103 982 | 104 983 | 104 984 | 104 985 | 104 986 | 104 987 | 104 988 | 104 989 | 104 990 | 104 991 | 104 992 | 104 993 | 104 994 | 104 995 | 104 996 | 104 997 | 104 998 | 104 999 | 104 1000 | 104 1001 | 104 1002 | 104 1003 | 104 1004 | 104 1005 | 104 1006 | 105 1007 | 105 1008 | 105 1009 | 105 1010 | 105 1011 | 105 1012 | 105 1013 | 105 1014 | 105 1015 | 105 1016 | 105 1017 | 105 1018 | 105 1019 | 105 1020 | 105 1021 | 105 1022 | 105 1023 | 105 1024 | 105 1025 | 105 1026 | 105 1027 | 105 1028 | 105 1029 | 105 1030 | 105 1031 | 106 1032 | 106 1033 | 106 1034 | 106 1035 | 106 1036 | 106 1037 | 106 1038 | 106 1039 | 106 1040 | 106 1041 | 106 1042 | 106 1043 | 106 1044 | 106 1045 | 106 1046 | 106 1047 | 106 1048 | 106 1049 | 106 1050 | 106 1051 | 106 1052 | 106 1053 | 106 1054 | 106 1055 | 106 1056 | 106 1057 | 106 1058 | 106 1059 | 106 1060 | 106 1061 | 106 1062 | 106 1063 | 106 1064 | 106 1065 | 106 1066 | 107 1067 | 107 1068 | 107 1069 | 107 1070 | 107 1071 | 107 1072 | 107 1073 | 107 1074 | 107 1075 | 107 1076 | 107 1077 | 107 1078 | 107 1079 | 107 1080 | 107 1081 | 107 1082 | 108 1083 | 108 1084 | 108 1085 | 108 1086 | 108 1087 | 108 1088 | 108 1089 | 108 1090 | 108 1091 | 108 1092 | 108 1093 | 108 1094 | 108 1095 | 108 1096 | 108 1097 | 108 1098 | 108 1099 | 108 1100 | 108 1101 | 108 1102 | 108 1103 | 108 1104 | 108 1105 | 108 1106 | 109 1107 | 109 1108 | 109 1109 | 109 1110 | 109 1111 | 109 1112 | 109 1113 | 109 1114 | 109 1115 | 109 1116 | 109 1117 | 109 1118 | 109 1119 | 109 1120 | 109 1121 | 109 1122 | 109 1123 | 109 1124 | 109 1125 | 109 1126 | 109 1127 | 109 1128 | 109 1129 | 109 1130 | 109 1131 | 109 1132 | 109 1133 | 109 1134 | 109 1135 | 109 1136 | 109 1137 | 110 1138 | 110 1139 | 110 1140 | 110 1141 | 110 1142 | 110 1143 | 110 1144 | 110 1145 | 110 1146 | 110 1147 | 110 1148 | 110 1149 | 110 1150 | 110 1151 | 110 1152 | 110 1153 | 110 1154 | 110 1155 | 110 1156 | 110 1157 | 110 1158 | 110 1159 | 111 1160 | 111 1161 | 111 1162 | 111 1163 | 111 1164 | 111 1165 | 111 1166 | 111 1167 | 111 1168 | 111 1169 | 111 1170 | 111 1171 | 111 1172 | 111 1173 | 111 1174 | 111 1175 | 111 1176 | 111 1177 | 111 1178 | 111 1179 | 111 1180 | 111 1181 | 111 1182 | 111 1183 | 111 1184 | 111 1185 | 111 1186 | 112 1187 | 112 1188 | 112 1189 | 112 1190 | 112 1191 | 112 1192 | 112 1193 | 112 1194 | 112 1195 | 112 1196 | 112 1197 | 112 1198 | 112 1199 | 112 1200 | 112 1201 | 112 1202 | 112 1203 | 112 1204 | 112 1205 | 113 1206 | 113 1207 | 113 1208 | 113 1209 | 113 1210 | 113 1211 | 113 1212 | 113 1213 | 113 1214 | 113 1215 | 113 1216 | 113 1217 | 113 1218 | 113 1219 | 113 1220 | 113 1221 | 113 1222 | 113 1223 | 113 1224 | 113 1225 | 113 1226 | 113 1227 | 113 1228 | 114 1229 | 114 1230 | 114 1231 | 114 1232 | 114 1233 | 114 1234 | 114 1235 | 114 1236 | 114 1237 | 114 1238 | 114 1239 | 114 1240 | 114 1241 | 114 1242 | 114 1243 | 114 1244 | 114 1245 | 114 1246 | 114 1247 | 114 1248 | 114 1249 | 114 1250 | 114 1251 | 114 1252 | 114 1253 | 114 1254 | 114 1255 | 114 1256 | 114 1257 | 114 1258 | 115 1259 | 115 1260 | 115 1261 | 115 1262 | 115 1263 | 115 1264 | 115 1265 | 115 1266 | 115 1267 | 115 1268 | 115 1269 | 115 1270 | 115 1271 | 115 1272 | 115 1273 | 115 1274 | 115 1275 | 115 1276 | 115 1277 | 115 1278 | 115 1279 | 115 1280 | 115 1281 | 115 1282 | 115 1283 | 115 1284 | 115 1285 | 115 1286 | 115 1287 | 116 1288 | 116 1289 | 116 1290 | 116 1291 | 116 1292 | 116 1293 | 116 1294 | 116 1295 | 116 1296 | 116 1297 | 116 1298 | 116 1299 | 116 1300 | 116 1301 | 116 1302 | 116 1303 | 116 1304 | 117 1305 | 117 1306 | 117 1307 | 117 1308 | 117 1309 | 117 1310 | 117 1311 | 117 1312 | 117 1313 | 117 1314 | 117 1315 | 117 1316 | 117 1317 | 117 1318 | 117 1319 | 117 1320 | 117 1321 | 117 1322 | 117 1323 | 118 1324 | 118 1325 | 118 1326 | 118 1327 | 118 1328 | 118 1329 | 118 1330 | 118 1331 | 118 1332 | 118 1333 | 118 1334 | 118 1335 | 118 1336 | 118 1337 | 118 1338 | 118 1339 | 118 1340 | 118 1341 | 118 1342 | 118 1343 | 118 1344 | 118 1345 | 118 1346 | 118 1347 | 118 1348 | 118 1349 | 118 1350 | 118 1351 | 118 1352 | 118 1353 | 118 1354 | 118 1355 | 118 1356 | 118 1357 | 118 1358 | 118 1359 | 118 1360 | 118 1361 | 118 1362 | 118 1363 | 119 1364 | 119 1365 | 119 1366 | 119 1367 | 119 1368 | 119 1369 | 119 1370 | 119 1371 | 119 1372 | 119 1373 | 119 1374 | 119 1375 | 119 1376 | 119 1377 | 119 1378 | 119 1379 | 119 1380 | 119 1381 | 119 1382 | 119 1383 | 119 1384 | 119 1385 | 119 1386 | 119 1387 | 119 1388 | 119 1389 | 119 1390 | 120 1391 | 120 1392 | 120 1393 | 120 1394 | 120 1395 | 120 1396 | 120 1397 | 120 1398 | 120 1399 | 120 1400 | 120 1401 | 120 1402 | 120 1403 | 120 1404 | 120 1405 | 120 1406 | 120 1407 | 120 1408 | 120 1409 | 120 1410 | 120 1411 | 120 1412 | 120 1413 | 120 1414 | 120 1415 | 120 1416 | 121 1417 | 121 1418 | 121 1419 | 121 1420 | 121 1421 | 121 1422 | 121 1423 | 121 1424 | 121 1425 | 121 1426 | 121 1427 | 121 1428 | 121 1429 | 121 1430 | 121 1431 | 121 1432 | 121 1433 | 121 1434 | 121 1435 | 121 1436 | 121 1437 | 121 1438 | 121 1439 | 121 1440 | 121 1441 | 121 1442 | 121 1443 | 122 1444 | 122 1445 | 122 1446 | 122 1447 | 122 1448 | 122 1449 | 122 1450 | 122 1451 | 122 1452 | 122 1453 | 122 1454 | 122 1455 | 122 1456 | 122 1457 | 122 1458 | 122 1459 | 122 1460 | 122 1461 | 122 1462 | 122 1463 | 122 1464 | 122 1465 | 122 1466 | 122 1467 | 122 1468 | 122 1469 | 122 1470 | 123 1471 | 123 1472 | 123 1473 | 123 1474 | 123 1475 | 123 1476 | 123 1477 | 123 1478 | 123 1479 | 123 1480 | 123 1481 | 123 1482 | 123 1483 | 123 1484 | 123 1485 | 123 1486 | 123 1487 | 123 1488 | 123 1489 | 123 1490 | 123 1491 | 123 1492 | 123 1493 | 123 1494 | 123 1495 | 124 1496 | 124 1497 | 124 1498 | 124 1499 | 124 1500 | 124 1501 | 124 1502 | 124 1503 | 124 1504 | 124 1505 | 124 1506 | 124 1507 | 124 1508 | 124 1509 | 124 1510 | 124 1511 | 124 1512 | 124 1513 | 124 1514 | 124 1515 | 124 1516 | 124 1517 | 124 1518 | 124 1519 | 124 1520 | 124 1521 | 124 1522 | 124 1523 | 125 1524 | 125 1525 | 125 1526 | 125 1527 | 125 1528 | 125 1529 | 125 1530 | 125 1531 | 125 1532 | 125 1533 | 125 1534 | 125 1535 | 125 1536 | 125 1537 | 125 1538 | 125 1539 | 125 1540 | 125 1541 | 125 1542 | 125 1543 | 125 1544 | 126 1545 | 126 1546 | 126 1547 | 126 1548 | 126 1549 | 126 1550 | 126 1551 | 126 1552 | 126 1553 | 126 1554 | 126 1555 | 126 1556 | 126 1557 | 126 1558 | 126 1559 | 126 1560 | 126 1561 | 126 1562 | 126 1563 | 126 1564 | 126 1565 | 126 1566 | 126 1567 | 126 1568 | 126 1569 | 126 1570 | 126 1571 | 127 1572 | 127 1573 | 127 1574 | 127 1575 | 127 1576 | 127 1577 | 127 1578 | 127 1579 | 127 1580 | 127 1581 | 127 1582 | 127 1583 | 127 1584 | 127 1585 | 127 1586 | 127 1587 | 127 1588 | 127 1589 | 127 1590 | 127 1591 | 127 1592 | 127 1593 | 127 1594 | 127 1595 | 127 1596 | 127 1597 | 127 1598 | 128 1599 | 128 1600 | 128 1601 | 128 1602 | 128 1603 | 128 1604 | 128 1605 | 128 1606 | 128 1607 | 128 1608 | 128 1609 | 128 1610 | 128 1611 | 128 1612 | 128 1613 | 128 1614 | 128 1615 | 128 1616 | 128 1617 | 128 1618 | 128 1619 | 128 1620 | 128 1621 | 128 1622 | 129 1623 | 129 1624 | 129 1625 | 129 1626 | 129 1627 | 129 1628 | 129 1629 | 129 1630 | 129 1631 | 129 1632 | 129 1633 | 129 1634 | 129 1635 | 129 1636 | 129 1637 | 129 1638 | 129 1639 | 129 1640 | 129 1641 | 129 1642 | 129 1643 | 129 1644 | 129 1645 | 130 1646 | 130 1647 | 130 1648 | 130 1649 | 130 1650 | 130 1651 | 130 1652 | 130 1653 | 130 1654 | 130 1655 | 130 1656 | 130 1657 | 130 1658 | 130 1659 | 130 1660 | 130 1661 | 130 1662 | 131 1663 | 131 1664 | 131 1665 | 131 1666 | 131 1667 | 131 1668 | 131 1669 | 131 1670 | 131 1671 | 131 1672 | 131 1673 | 131 1674 | 131 1675 | 131 1676 | 131 1677 | 131 1678 | 131 1679 | 131 1680 | 131 1681 | 131 1682 | 131 1683 | 131 1684 | 131 1685 | 131 1686 | 131 1687 | 131 1688 | 131 1689 | 132 1690 | 132 1691 | 132 1692 | 132 1693 | 132 1694 | 132 1695 | 132 1696 | 132 1697 | 132 1698 | 132 1699 | 132 1700 | 132 1701 | 132 1702 | 132 1703 | 132 1704 | 132 1705 | 132 1706 | 132 1707 | 132 1708 | 132 1709 | 132 1710 | 132 1711 | 132 1712 | 132 1713 | 132 1714 | 132 1715 | 132 1716 | 133 1717 | 133 1718 | 133 1719 | 133 1720 | 133 1721 | 133 1722 | 133 1723 | 133 1724 | 133 1725 | 133 1726 | 133 1727 | 133 1728 | 133 1729 | 133 1730 | 133 1731 | 133 1732 | 133 1733 | 133 1734 | 133 1735 | 133 1736 | 133 1737 | 133 1738 | 133 1739 | 133 1740 | 133 1741 | 133 1742 | 133 1743 | 133 1744 | 133 1745 | 134 1746 | 134 1747 | 134 1748 | 134 1749 | 134 1750 | 134 1751 | 134 1752 | 134 1753 | 134 1754 | 134 1755 | 134 1756 | 134 1757 | 134 1758 | 134 1759 | 134 1760 | 134 1761 | 134 1762 | 134 1763 | 134 1764 | 134 1765 | 134 1766 | 134 1767 | 134 1768 | 134 1769 | 134 1770 | 134 1771 | 134 1772 | 134 1773 | 134 1774 | 135 1775 | 135 1776 | 135 1777 | 135 1778 | 135 1779 | 135 1780 | 135 1781 | 135 1782 | 135 1783 | 135 1784 | 135 1785 | 135 1786 | 135 1787 | 135 1788 | 135 1789 | 135 1790 | 135 1791 | 135 1792 | 135 1793 | 135 1794 | 135 1795 | 135 1796 | 135 1797 | 135 1798 | 135 1799 | 136 1800 | 136 1801 | 136 1802 | 136 1803 | 136 1804 | 136 1805 | 136 1806 | 136 1807 | 136 1808 | 136 1809 | 136 1810 | 136 1811 | 136 1812 | 136 1813 | 136 1814 | 136 1815 | 137 1816 | 137 1817 | 137 1818 | 137 1819 | 137 1820 | 137 1821 | 137 1822 | 137 1823 | 137 1824 | 137 1825 | 137 1826 | 137 1827 | 137 1828 | 137 1829 | 137 1830 | 137 1831 | 137 1832 | 137 1833 | 137 1834 | 137 1835 | 137 1836 | 137 1837 | 137 1838 | 137 1839 | 138 1840 | 138 1841 | 138 1842 | 138 1843 | 138 1844 | 138 1845 | 138 1846 | 138 1847 | 138 1848 | 138 1849 | 138 1850 | 138 1851 | 138 1852 | 138 1853 | 138 1854 | 138 1855 | 138 1856 | 138 1857 | 138 1858 | 138 1859 | 138 1860 | 138 1861 | 138 1862 | 138 1863 | 138 1864 | 138 1865 | 138 1866 | 139 1867 | 139 1868 | 139 1869 | 139 1870 | 139 1871 | 139 1872 | 139 1873 | 139 1874 | 139 1875 | 139 1876 | 139 1877 | 139 1878 | 139 1879 | 139 1880 | 139 1881 | 139 1882 | 139 1883 | 139 1884 | 139 1885 | 139 1886 | 139 1887 | 139 1888 | 140 1889 | 140 1890 | 140 1891 | 140 1892 | 140 1893 | 140 1894 | 140 1895 | 140 1896 | 140 1897 | 140 1898 | 140 1899 | 140 1900 | 140 1901 | 140 1902 | 140 1903 | 140 1904 | 140 1905 | 140 1906 | 140 1907 | 140 1908 | 140 1909 | 140 1910 | 140 1911 | 140 1912 | 141 1913 | 141 1914 | 141 1915 | 141 1916 | 141 1917 | 141 1918 | 141 1919 | 141 1920 | 141 1921 | 141 1922 | 141 1923 | 141 1924 | 141 1925 | 141 1926 | 141 1927 | 141 1928 | 141 1929 | 141 1930 | 142 1931 | 142 1932 | 142 1933 | 142 1934 | 142 1935 | 142 1936 | 142 1937 | 142 1938 | 142 1939 | 142 1940 | 142 1941 | 142 1942 | 142 1943 | 142 1944 | 142 1945 | 142 1946 | 142 1947 | 142 1948 | 142 1949 | 142 1950 | 142 1951 | 142 1952 | 142 1953 | 142 1954 | 142 1955 | 142 1956 | 143 1957 | 143 1958 | 143 1959 | 143 1960 | 143 1961 | 143 1962 | 143 1963 | 143 1964 | 143 1965 | 143 1966 | 143 1967 | 143 1968 | 143 1969 | 143 1970 | 143 1971 | 143 1972 | 143 1973 | 143 1974 | 143 1975 | 143 1976 | 143 1977 | 143 1978 | 143 1979 | 143 1980 | 143 1981 | 143 1982 | 143 1983 | 144 1984 | 144 1985 | 144 1986 | 144 1987 | 144 1988 | 144 1989 | 144 1990 | 144 1991 | 144 1992 | 144 1993 | 144 1994 | 144 1995 | 144 1996 | 144 1997 | 144 1998 | 144 1999 | 144 2000 | 144 2001 | 144 2002 | 144 2003 | 144 2004 | 144 2005 | 144 2006 | 144 2007 | 144 2008 | 144 2009 | 144 2010 | 145 2011 | 145 2012 | 145 2013 | 145 2014 | 145 2015 | 145 2016 | 145 2017 | 145 2018 | 145 2019 | 145 2020 | 145 2021 | 145 2022 | 145 2023 | 145 2024 | 145 2025 | 145 2026 | 145 2027 | 145 2028 | 145 2029 | 145 2030 | 145 2031 | 145 2032 | 145 2033 | 145 2034 | 145 2035 | 145 2036 | 145 2037 | 145 2038 | 145 2039 | 145 2040 | 146 2041 | 146 2042 | 146 2043 | 146 2044 | 146 2045 | 146 2046 | 146 2047 | 146 2048 | 146 2049 | 146 2050 | 146 2051 | 146 2052 | 146 2053 | 146 2054 | 146 2055 | 146 2056 | 146 2057 | 146 2058 | 146 2059 | 146 2060 | 146 2061 | 146 2062 | 146 2063 | 146 2064 | 146 2065 | 146 2066 | 146 2067 | 146 2068 | 146 2069 | 146 2070 | 146 2071 | 147 2072 | 147 2073 | 147 2074 | 147 2075 | 147 2076 | 147 2077 | 147 2078 | 147 2079 | 147 2080 | 147 2081 | 147 2082 | 147 2083 | 147 2084 | 147 2085 | 147 2086 | 147 2087 | 147 2088 | 147 2089 | 147 2090 | 147 2091 | 147 2092 | 147 2093 | 147 2094 | 147 2095 | 147 2096 | 147 2097 | 147 2098 | 148 2099 | 148 2100 | 148 2101 | 148 2102 | 148 2103 | 148 2104 | 148 2105 | 148 2106 | 148 2107 | 148 2108 | 148 2109 | 148 2110 | 148 2111 | 148 2112 | 148 2113 | 148 2114 | 148 2115 | 149 2116 | 149 2117 | 149 2118 | 149 2119 | 149 2120 | 149 2121 | 149 2122 | 149 2123 | 149 2124 | 149 2125 | 149 2126 | 149 2127 | 149 2128 | 149 2129 | 149 2130 | 149 2131 | 149 2132 | 149 2133 | 149 2134 | 149 2135 | 149 2136 | 149 2137 | 149 2138 | 149 2139 | 149 2140 | 149 2141 | 149 2142 | 149 2143 | 149 2144 | 149 2145 | 149 2146 | 150 2147 | 150 2148 | 150 2149 | 150 2150 | 150 2151 | 150 2152 | 150 2153 | 150 2154 | 150 2155 | 150 2156 | 150 2157 | 150 2158 | 150 2159 | 150 2160 | 150 2161 | 150 2162 | 150 2163 | 150 2164 | 150 2165 | 150 2166 | 150 2167 | 150 2168 | 150 2169 | 151 2170 | 151 2171 | 151 2172 | 151 2173 | 151 2174 | 151 2175 | 151 2176 | 151 2177 | 151 2178 | 151 2179 | 151 2180 | 151 2181 | 151 2182 | 151 2183 | 151 2184 | 151 2185 | 151 2186 | 151 2187 | 152 2188 | 152 2189 | 152 2190 | 152 2191 | 152 2192 | 152 2193 | 152 2194 | 152 2195 | 152 2196 | 152 2197 | 152 2198 | 152 2199 | 152 2200 | 152 2201 | 152 2202 | 152 2203 | 152 2204 | 152 2205 | 152 2206 | 152 2207 | 152 2208 | 152 2209 | 152 2210 | 152 2211 | 152 2212 | 152 2213 | 152 2214 | 152 2215 | 152 2216 | 152 2217 | 152 2218 | 152 2219 | 152 2220 | 152 2221 | 152 2222 | 153 2223 | 153 2224 | 153 2225 | 153 2226 | 153 2227 | 153 2228 | 153 2229 | 153 2230 | 153 2231 | 153 2232 | 153 2233 | 153 2234 | 153 2235 | 153 2236 | 153 2237 | 153 2238 | 153 2239 | 153 2240 | 153 2241 | 153 2242 | 153 2243 | 153 2244 | 153 2245 | 153 2246 | 153 2247 | 153 2248 | 153 2249 | 153 2250 | 154 2251 | 154 2252 | 154 2253 | 154 2254 | 154 2255 | 154 2256 | 154 2257 | 154 2258 | 154 2259 | 154 2260 | 154 2261 | 154 2262 | 154 2263 | 154 2264 | 154 2265 | 154 2266 | 154 2267 | 154 2268 | 154 2269 | 154 2270 | 154 2271 | 155 2272 | 155 2273 | 155 2274 | 155 2275 | 155 2276 | 155 2277 | 155 2278 | 155 2279 | 155 2280 | 155 2281 | 155 2282 | 155 2283 | 155 2284 | 155 2285 | 155 2286 | 155 2287 | 155 2288 | 155 2289 | 156 2290 | 156 2291 | 156 2292 | 156 2293 | 156 2294 | 156 2295 | 156 2296 | 156 2297 | 156 2298 | 156 2299 | 156 2300 | 156 2301 | 156 2302 | 156 2303 | 156 2304 | 156 2305 | 156 2306 | 156 2307 | 156 2308 | 156 2309 | 156 2310 | 156 2311 | 156 2312 | 156 2313 | 156 2314 | 156 2315 | 156 2316 | 156 2317 | 156 2318 | 157 2319 | 157 2320 | 157 2321 | 157 2322 | 157 2323 | 157 2324 | 157 2325 | 157 2326 | 157 2327 | 157 2328 | 157 2329 | 157 2330 | 157 2331 | 157 2332 | 157 2333 | 157 2334 | 157 2335 | 157 2336 | 157 2337 | 157 2338 | 157 2339 | 157 2340 | 158 2341 | 158 2342 | 158 2343 | 158 2344 | 158 2345 | 158 2346 | 158 2347 | 158 2348 | 158 2349 | 158 2350 | 158 2351 | 158 2352 | 158 2353 | 158 2354 | 158 2355 | 158 2356 | 158 2357 | 158 2358 | 158 2359 | 158 2360 | 158 2361 | 158 2362 | 158 2363 | 158 2364 | 159 2365 | 159 2366 | 159 2367 | 159 2368 | 159 2369 | 159 2370 | 159 2371 | 159 2372 | 159 2373 | 159 2374 | 159 2375 | 159 2376 | 159 2377 | 159 2378 | 159 2379 | 159 2380 | 159 2381 | 159 2382 | 159 2383 | 159 2384 | 159 2385 | 160 2386 | 160 2387 | 160 2388 | 160 2389 | 160 2390 | 160 2391 | 160 2392 | 160 2393 | 160 2394 | 160 2395 | 160 2396 | 160 2397 | 160 2398 | 160 2399 | 161 2400 | 161 2401 | 161 2402 | 161 2403 | 161 2404 | 161 2405 | 161 2406 | 161 2407 | 161 2408 | 161 2409 | 161 2410 | 161 2411 | 161 2412 | 161 2413 | 161 2414 | 161 2415 | 161 2416 | 161 2417 | 161 2418 | 161 2419 | 162 2420 | 162 2421 | 162 2422 | 162 2423 | 162 2424 | 162 2425 | 162 2426 | 162 2427 | 162 2428 | 162 2429 | 162 2430 | 162 2431 | 162 2432 | 162 2433 | 162 2434 | 162 2435 | 162 2436 | 162 2437 | 162 2438 | 162 2439 | 162 2440 | 162 2441 | 162 2442 | 162 2443 | 162 2444 | 162 2445 | 162 2446 | 163 2447 | 163 2448 | 163 2449 | 163 2450 | 163 2451 | 163 2452 | 163 2453 | 163 2454 | 163 2455 | 163 2456 | 163 2457 | 163 2458 | 163 2459 | 163 2460 | 163 2461 | 164 2462 | 164 2463 | 164 2464 | 164 2465 | 164 2466 | 164 2467 | 164 2468 | 164 2469 | 164 2470 | 164 2471 | 164 2472 | 164 2473 | 164 2474 | 164 2475 | 164 2476 | 164 2477 | 164 2478 | 164 2479 | 165 2480 | 165 2481 | 165 2482 | 165 2483 | 165 2484 | 165 2485 | 165 2486 | 165 2487 | 165 2488 | 166 2489 | 166 2490 | 166 2491 | 166 2492 | 166 2493 | 166 2494 | 166 2495 | 166 2496 | 166 2497 | 166 2498 | 166 2499 | 166 2500 | 166 2501 | 166 2502 | 166 2503 | 166 2504 | 166 2505 | 166 2506 | 166 2507 | 166 2508 | 167 2509 | 167 2510 | 167 2511 | 167 2512 | 167 2513 | 167 2514 | 167 2515 | 167 2516 | 167 2517 | 167 2518 | 167 2519 | 167 2520 | 167 2521 | 167 2522 | 168 2523 | 168 2524 | 168 2525 | 168 2526 | 168 2527 | 168 2528 | 168 2529 | 168 2530 | 168 2531 | 168 2532 | 168 2533 | 168 2534 | 169 2535 | 169 2536 | 169 2537 | 169 2538 | 169 2539 | 169 2540 | 169 2541 | 169 2542 | 169 2543 | 169 2544 | 169 2545 | 169 2546 | 169 2547 | 169 2548 | 170 2549 | 170 2550 | 170 2551 | 170 2552 | 170 2553 | 170 2554 | 170 2555 | 170 2556 | 170 2557 | 170 2558 | 170 2559 | 170 2560 | 170 2561 | 171 2562 | 171 2563 | 171 2564 | 171 2565 | 171 2566 | 171 2567 | 171 2568 | 171 2569 | 171 2570 | 171 2571 | 171 2572 | 171 2573 | 171 2574 | 171 2575 | 171 2576 | 171 2577 | 171 2578 | 171 2579 | 171 2580 | 172 2581 | 172 2582 | 172 2583 | 172 2584 | 172 2585 | 172 2586 | 172 2587 | 172 2588 | 172 2589 | 172 2590 | 172 2591 | 172 2592 | 172 2593 | 172 2594 | 172 2595 | 172 2596 | 173 2597 | 173 2598 | 173 2599 | 173 2600 | 173 2601 | 173 2602 | 173 2603 | 173 2604 | 173 2605 | 173 2606 | 173 2607 | 173 2608 | 173 2609 | 174 2610 | 174 2611 | 174 2612 | 174 2613 | 174 2614 | 174 2615 | 174 2616 | 174 2617 | 174 2618 | 174 2619 | 175 2620 | 175 2621 | 175 2622 | 175 2623 | 175 2624 | 175 2625 | 175 2626 | 175 2627 | 175 2628 | 175 2629 | 175 2630 | 175 2631 | 175 2632 | 175 2633 | 175 2634 | 175 2635 | 175 2636 | 176 2637 | 176 2638 | 176 2639 | 176 2640 | 176 2641 | 176 2642 | 176 2643 | 176 2644 | 176 2645 | 176 2646 | 176 2647 | 176 2648 | 176 2649 | 176 2650 | 176 2651 | 176 2652 | 176 2653 | 176 2654 | 176 2655 | 176 2656 | 176 2657 | 176 2658 | 176 2659 | 176 2660 | 176 2661 | 177 2662 | 177 2663 | 177 2664 | 177 2665 | 177 2666 | 177 2667 | 177 2668 | 177 2669 | 177 2670 | 177 2671 | 177 2672 | 177 2673 | 177 2674 | 177 2675 | 177 2676 | 177 2677 | 178 2678 | 178 2679 | 178 2680 | 178 2681 | 178 2682 | 178 2683 | 178 2684 | 178 2685 | 178 2686 | 178 2687 | 178 2688 | 178 2689 | 178 2690 | 178 2691 | 178 2692 | 179 2693 | 179 2694 | 179 2695 | 179 2696 | 179 2697 | 179 2698 | 179 2699 | 179 2700 | 179 2701 | 179 2702 | 179 2703 | 179 2704 | 179 2705 | 179 2706 | 179 2707 | 180 2708 | 180 2709 | 180 2710 | 180 2711 | 180 2712 | 180 2713 | 180 2714 | 180 2715 | 180 2716 | 180 2717 | 180 2718 | 180 2719 | 180 2720 | 180 2721 | 180 2722 | 180 2723 | 180 2724 | 180 2725 | 180 2726 | 180 2727 | 180 2728 | 180 2729 | 181 2730 | 181 2731 | 181 2732 | 181 2733 | 181 2734 | 181 2735 | 181 2736 | 181 2737 | 181 2738 | 181 2739 | 181 2740 | 181 2741 | 181 2742 | 181 2743 | 181 2744 | 181 2745 | 181 2746 | 181 2747 | 182 2748 | 182 2749 | 182 2750 | 182 2751 | 182 2752 | 182 2753 | 182 2754 | 182 2755 | 182 2756 | 182 2757 | 182 2758 | 182 2759 | 182 2760 | 182 2761 | 182 2762 | 182 2763 | 182 2764 | 182 2765 | 182 2766 | 183 2767 | 183 2768 | 183 2769 | 183 2770 | 183 2771 | 183 2772 | 183 2773 | 183 2774 | 183 2775 | 183 2776 | 183 2777 | 183 2778 | 183 2779 | 183 2780 | 183 2781 | 183 2782 | 183 2783 | 184 2784 | 184 2785 | 184 2786 | 184 2787 | 184 2788 | 184 2789 | 184 2790 | 184 2791 | 184 2792 | 184 2793 | 184 2794 | 184 2795 | 184 2796 | 184 2797 | 184 2798 | 184 2799 | 184 2800 | 184 2801 | 184 2802 | 184 2803 | 185 2804 | 185 2805 | 185 2806 | 185 2807 | 185 2808 | 185 2809 | 185 2810 | 185 2811 | 185 2812 | 185 2813 | 185 2814 | 185 2815 | 185 2816 | 185 2817 | 185 2818 | 185 2819 | 185 2820 | 185 2821 | 185 2822 | 185 2823 | 186 2824 | 186 2825 | 186 2826 | 186 2827 | 186 2828 | 186 2829 | 186 2830 | 186 2831 | 186 2832 | 186 2833 | 186 2834 | 186 2835 | 186 2836 | 186 2837 | 186 2838 | 186 2839 | 186 2840 | 186 2841 | 186 2842 | 186 2843 | 186 2844 | 187 2845 | 187 2846 | 187 2847 | 187 2848 | 187 2849 | 187 2850 | 187 2851 | 187 2852 | 187 2853 | 187 2854 | 188 2855 | 188 2856 | 188 2857 | 188 2858 | 188 2859 | 188 2860 | 188 2861 | 188 2862 | 188 2863 | 188 2864 | 188 2865 | 188 2866 | 188 2867 | 188 2868 | 188 2869 | 188 2870 | 188 2871 | 188 2872 | 188 2873 | 189 2874 | 189 2875 | 189 2876 | 189 2877 | 189 2878 | 189 2879 | 189 2880 | 189 2881 | 189 2882 | 189 2883 | 189 2884 | 189 2885 | 189 2886 | 189 2887 | 189 2888 | 189 2889 | 190 2890 | 190 2891 | 190 2892 | 190 2893 | 190 2894 | 190 2895 | 190 2896 | 190 2897 | 190 2898 | 190 2899 | 190 2900 | 190 2901 | 190 2902 | 190 2903 | 190 2904 | 190 2905 | 190 2906 | 190 2907 | 190 2908 | 191 2909 | 191 2910 | 191 2911 | 191 2912 | 191 2913 | 191 2914 | 191 2915 | 191 2916 | 191 2917 | 191 2918 | 191 2919 | 191 2920 | 192 2921 | 192 2922 | 192 2923 | 192 2924 | 192 2925 | 192 2926 | 192 2927 | 192 2928 | 192 2929 | 192 2930 | 192 2931 | 192 2932 | 192 2933 | 192 2934 | 192 2935 | 192 2936 | 192 2937 | 192 2938 | 192 2939 | 192 2940 | 192 2941 | 193 2942 | 193 2943 | 193 2944 | 193 2945 | 193 2946 | 193 2947 | 193 2948 | 193 2949 | 193 2950 | 193 2951 | 193 2952 | 193 2953 | 193 2954 | 193 2955 | 193 2956 | 193 2957 | 193 2958 | 193 2959 | 193 2960 | 193 2961 | 193 2962 | 193 2963 | 193 2964 | 194 2965 | 194 2966 | 194 2967 | 194 2968 | 194 2969 | 194 2970 | 194 2971 | 194 2972 | 194 2973 | 194 2974 | 194 2975 | 194 2976 | 194 2977 | 194 2978 | 194 2979 | 194 2980 | 194 2981 | 194 2982 | 194 2983 | 194 2984 | 194 2985 | 194 2986 | 195 2987 | 195 2988 | 195 2989 | 195 2990 | 195 2991 | 195 2992 | 195 2993 | 195 2994 | 195 2995 | 195 2996 | 195 2997 | 195 2998 | 195 2999 | 195 3000 | 195 3001 | 196 3002 | 196 3003 | 196 3004 | 196 3005 | 196 3006 | 196 3007 | 196 3008 | 196 3009 | 196 3010 | 196 3011 | 196 3012 | 196 3013 | 196 3014 | 196 3015 | 196 3016 | 196 3017 | 197 3018 | 197 3019 | 197 3020 | 197 3021 | 197 3022 | 197 3023 | 197 3024 | 197 3025 | 197 3026 | 197 3027 | 197 3028 | 198 3029 | 198 3030 | 198 3031 | 198 3032 | 198 3033 | 198 3034 | 198 3035 | 198 3036 | 198 3037 | 198 3038 | 198 3039 | 198 3040 | 198 3041 | 198 3042 | 198 3043 | 198 3044 | 199 3045 | 199 3046 | 199 3047 | 199 3048 | 199 3049 | 199 3050 | 199 3051 | 199 3052 | 199 3053 | 199 3054 | 200 3055 | 200 3056 | 200 3057 | 200 3058 | 200 3059 | 200 3060 | 200 3061 | 200 3062 | 200 3063 | 200 3064 | 200 3065 | 200 3066 | 200 3067 | 200 3068 | 200 3069 | 200 3070 | 201 3071 | 201 3072 | 201 3073 | 201 3074 | 201 3075 | 201 3076 | 201 3077 | 201 3078 | 201 3079 | 201 3080 | 201 3081 | 201 3082 | 201 3083 | 201 3084 | 202 3085 | 202 3086 | 202 3087 | 202 3088 | 202 3089 | 202 3090 | 202 3091 | 202 3092 | 202 3093 | 202 3094 | 202 3095 | 202 3096 | 202 3097 | 203 3098 | 203 3099 | 203 3100 | 203 3101 | 203 3102 | 203 3103 | 203 3104 | 203 3105 | 203 3106 | 203 3107 | 203 3108 | 203 3109 | 203 3110 | 203 3111 | 203 3112 | 203 3113 | 204 3114 | 204 3115 | 204 3116 | 204 3117 | 204 3118 | 204 3119 | 204 3120 | 204 3121 | 204 3122 | 204 3123 | 204 3124 | 204 3125 | 204 3126 | 204 3127 | 205 3128 | 205 3129 | 205 3130 | 205 3131 | 205 3132 | 205 3133 | 205 3134 | 205 3135 | 205 3136 | 205 3137 | 205 3138 | 205 3139 | 205 3140 | 205 3141 | 205 3142 | 205 3143 | 205 3144 | 205 3145 | 205 3146 | 205 3147 | 205 3148 | 206 3149 | 206 3150 | 206 3151 | 206 3152 | 206 3153 | 206 3154 | 206 3155 | 206 3156 | 206 3157 | 206 3158 | 206 3159 | 206 3160 | 206 3161 | 206 3162 | 206 3163 | 206 3164 | 206 3165 | 206 3166 | 206 3167 | 206 3168 | 206 3169 | 206 3170 | 207 3171 | 207 3172 | 207 3173 | 207 3174 | 207 3175 | 207 3176 | 207 3177 | 207 3178 | 207 3179 | 207 3180 | 207 3181 | 207 3182 | 207 3183 | 208 3184 | 208 3185 | 208 3186 | 208 3187 | 208 3188 | 208 3189 | 208 3190 | 208 3191 | 208 3192 | 208 3193 | 208 3194 | 208 3195 | 208 3196 | 208 3197 | 208 3198 | 208 3199 | 208 3200 | 208 3201 | 208 3202 | 208 3203 | 209 3204 | 209 3205 | 209 3206 | 209 3207 | 209 3208 | 209 3209 | 209 3210 | 209 3211 | 209 3212 | 209 3213 | 209 3214 | 209 3215 | 210 3216 | 210 3217 | 210 3218 | 210 3219 | 210 3220 | 210 3221 | 210 3222 | 210 3223 | 210 3224 | 210 3225 | 210 3226 | 210 3227 | 210 3228 | 211 3229 | 211 3230 | 211 3231 | 211 3232 | 211 3233 | 211 3234 | 211 3235 | 211 3236 | 211 3237 | 211 3238 | 211 3239 | 211 3240 | 211 3241 | 211 3242 | 211 3243 | 211 3244 | 211 3245 | 211 3246 | 211 3247 | 211 3248 | 211 3249 | 211 3250 | 211 3251 | 211 3252 | 211 3253 | 212 3254 | 212 3255 | 212 3256 | 212 3257 | 212 3258 | 212 3259 | 212 3260 | 212 3261 | 212 3262 | 212 3263 | 212 3264 | 212 3265 | 212 3266 | 212 3267 | 213 3268 | 213 3269 | 213 3270 | 213 3271 | 213 3272 | 213 3273 | 213 3274 | 213 3275 | 213 3276 | 213 3277 | 213 3278 | 213 3279 | 213 3280 | 213 3281 | 213 3282 | 213 3283 | 213 3284 | 213 3285 | 214 3286 | 214 3287 | 214 3288 | 214 3289 | 214 3290 | 214 3291 | 214 3292 | 214 3293 | 214 3294 | 214 3295 | 214 3296 | 214 3297 | 214 3298 | 215 3299 | 215 3300 | 215 3301 | 215 3302 | 215 3303 | 215 3304 | 215 3305 | 215 3306 | 215 3307 | 215 3308 | 215 3309 | 215 3310 | 215 3311 | 216 3312 | 216 3313 | 216 3314 | 216 3315 | 216 3316 | 216 3317 | 216 3318 | 216 3319 | 216 3320 | 216 3321 | 216 3322 | 216 3323 | 216 3324 | 216 3325 | 216 3326 | 216 3327 | 216 3328 | 217 3329 | 217 3330 | 217 3331 | 217 3332 | 217 3333 | 217 3334 | 217 3335 | 217 3336 | 217 3337 | 217 3338 | 217 3339 | 217 3340 | 217 3341 | 217 3342 | 218 3343 | 218 3344 | 218 3345 | 218 3346 | 218 3347 | 218 3348 | 218 3349 | 218 3350 | 218 3351 | 218 3352 | 218 3353 | 218 3354 | 218 3355 | 218 3356 | 218 3357 | 219 3358 | 219 3359 | 219 3360 | 219 3361 | 219 3362 | 219 3363 | 219 3364 | 219 3365 | 219 3366 | 219 3367 | 219 3368 | 220 3369 | 220 3370 | 220 3371 | 220 3372 | 220 3373 | 220 3374 | 220 3375 | 220 3376 | 220 3377 | 220 3378 | 220 3379 | 220 3380 | 220 3381 | 221 3382 | 221 3383 | 221 3384 | 221 3385 | 221 3386 | 221 3387 | 221 3388 | 221 3389 | 221 3390 | 221 3391 | 221 3392 | 221 3393 | 221 3394 | 222 3395 | 222 3396 | 222 3397 | 222 3398 | 222 3399 | 222 3400 | 222 3401 | 222 3402 | 222 3403 | 222 3404 | 222 3405 | 222 3406 | 222 3407 | 222 3408 | 223 3409 | 223 3410 | 223 3411 | 223 3412 | 223 3413 | 223 3414 | 223 3415 | 223 3416 | 223 3417 | 223 3418 | 223 3419 | 224 3420 | 224 3421 | 224 3422 | 224 3423 | 224 3424 | 224 3425 | 224 3426 | 224 3427 | 224 3428 | 224 3429 | 225 3430 | 225 3431 | 225 3432 | 225 3433 | 225 3434 | 225 3435 | 225 3436 | 225 3437 | 225 3438 | 225 3439 | 225 3440 | 225 3441 | 226 3442 | 226 3443 | 226 3444 | 226 3445 | 226 3446 | 226 3447 | 226 3448 | 226 3449 | 227 3450 | 227 3451 | 227 3452 | 227 3453 | 227 3454 | 227 3455 | 227 3456 | 227 3457 | 227 3458 | 227 3459 | 227 3460 | 227 3461 | 228 3462 | 228 3463 | 228 3464 | 228 3465 | 228 3466 | 228 3467 | 228 3468 | 228 3469 | 228 3470 | 228 3471 | 228 3472 | 228 3473 | 228 3474 | 229 3475 | 229 3476 | 229 3477 | 229 3478 | 229 3479 | 229 3480 | 229 3481 | 229 3482 | 229 3483 | 230 3484 | 230 3485 | 230 3486 | 230 3487 | 230 3488 | 230 3489 | 230 3490 | 230 3491 | 230 3492 | 231 3493 | 231 3494 | 231 3495 | 231 3496 | 231 3497 | 231 3498 | 231 3499 | 231 3500 | 231 3501 | 231 3502 | 231 3503 | 231 3504 | 231 3505 | 232 3506 | 232 3507 | 232 3508 | 232 3509 | 232 3510 | 232 3511 | 232 3512 | 232 3513 | 232 3514 | 232 3515 | 232 3516 | 232 3517 | 232 3518 | 232 3519 | 233 3520 | 233 3521 | 233 3522 | 233 3523 | 233 3524 | 233 3525 | 233 3526 | 233 3527 | 233 3528 | 233 3529 | 233 3530 | 233 3531 | 233 3532 | 233 3533 | 234 3534 | 234 3535 | 234 3536 | 234 3537 | 234 3538 | 234 3539 | 234 3540 | 234 3541 | 235 3542 | 235 3543 | 235 3544 | 235 3545 | 235 3546 | 235 3547 | 235 3548 | 235 3549 | 235 3550 | 235 3551 | 235 3552 | 235 3553 | 235 3554 | 235 3555 | 235 3556 | 236 3557 | 236 3558 | 236 3559 | 236 3560 | 236 3561 | 236 3562 | 236 3563 | 236 3564 | 236 3565 | 236 3566 | 236 3567 | 236 3568 | 236 3569 | 236 3570 | 236 3571 | 237 3572 | 237 3573 | 237 3574 | 237 3575 | 237 3576 | 237 3577 | 237 3578 | 237 3579 | 237 3580 | 237 3581 | 237 3582 | 238 3583 | 238 3584 | 238 3585 | 238 3586 | 238 3587 | 238 3588 | 238 3589 | 238 3590 | 238 3591 | 238 3592 | 238 3593 | 239 3594 | 239 3595 | 239 3596 | 239 3597 | 239 3598 | 239 3599 | 239 3600 | 240 3601 | 240 3602 | 240 3603 | 240 3604 | 240 3605 | 240 3606 | 240 3607 | 240 3608 | 241 3609 | 241 3610 | 241 3611 | 241 3612 | 241 3613 | 241 3614 | 242 3615 | 242 3616 | 242 3617 | 242 3618 | 242 3619 | 242 3620 | 242 3621 | 242 3622 | 242 3623 | 243 3624 | 243 3625 | 243 3626 | 243 3627 | 243 3628 | 243 3629 | 243 3630 | 243 3631 | 243 3632 | 243 3633 | 243 3634 | 243 3635 | 243 3636 | 243 3637 | 243 3638 | 243 3639 | 243 3640 | 243 3641 | 244 3642 | 244 3643 | 244 3644 | 244 3645 | 244 3646 | 244 3647 | 244 3648 | 244 3649 | 244 3650 | 244 3651 | 244 3652 | 244 3653 | 244 3654 | 244 3655 | 244 3656 | 244 3657 | 244 3658 | 245 3659 | 245 3660 | 245 3661 | 245 3662 | 245 3663 | 245 3664 | 245 3665 | 245 3666 | 245 3667 | 245 3668 | 245 3669 | 245 3670 | 245 3671 | 245 3672 | 245 3673 | 246 3674 | 246 3675 | 246 3676 | 246 3677 | 246 3678 | 246 3679 | 246 3680 | 246 3681 | 246 3682 | 246 3683 | 246 3684 | 247 3685 | 247 3686 | 247 3687 | 247 3688 | 247 3689 | 247 3690 | 247 3691 | 247 3692 | 247 3693 | 247 3694 | 247 3695 | 247 3696 | 247 3697 | 247 3698 | 247 3699 | 247 3700 | 248 3701 | 248 3702 | 248 3703 | 248 3704 | 248 3705 | 248 3706 | 248 3707 | 248 3708 | 248 3709 | 248 3710 | 248 3711 | 248 3712 | 248 3713 | 248 3714 | 248 3715 | 249 3716 | 249 3717 | 249 3718 | 249 3719 | 249 3720 | 249 3721 | 249 3722 | 249 3723 | 249 3724 | 249 3725 | 249 3726 | 249 3727 | 250 3728 | 250 3729 | 250 3730 | 250 3731 | 250 3732 | 250 3733 | 250 3734 | 250 3735 | 250 3736 | 250 3737 | 250 3738 | 250 3739 | 250 3740 | 250 3741 | 250 3742 | 250 3743 | 250 3744 | 250 3745 | 251 3746 | 251 3747 | 251 3748 | 251 3749 | 251 3750 | 251 3751 | 251 3752 | 251 3753 | 251 3754 | 251 3755 | 251 3756 | 251 3757 | 251 3758 | 251 3759 | 251 3760 | 252 3761 | 252 3762 | 252 3763 | 252 3764 | 252 3765 | 252 3766 | 252 3767 | 252 3768 | 252 3769 | 252 3770 | 252 3771 | 252 3772 | 252 3773 | 252 3774 | 252 3775 | 253 3776 | 253 3777 | 253 3778 | 253 3779 | 253 3780 | 253 3781 | 253 3782 | 253 3783 | 253 3784 | 253 3785 | 254 3786 | 254 3787 | 254 3788 | 254 3789 | 254 3790 | 254 3791 | 254 3792 | 254 3793 | 254 3794 | 254 3795 | 254 3796 | 254 3797 | 254 3798 | 254 3799 | 254 3800 | 255 3801 | 255 3802 | 255 3803 | 255 3804 | 255 3805 | 255 3806 | 255 3807 | 255 3808 | 256 3809 | 256 3810 | 256 3811 | 256 3812 | 256 3813 | 256 3814 | 256 3815 | 256 3816 | 256 3817 | 256 3818 | 256 3819 | 256 3820 | 257 3821 | 257 3822 | 257 3823 | 257 3824 | 257 3825 | 257 3826 | 257 3827 | 257 3828 | 257 3829 | 257 3830 | 257 3831 | 258 3832 | 258 3833 | 258 3834 | 258 3835 | 258 3836 | 258 3837 | 258 3838 | 258 3839 | 258 3840 | 258 3841 | 258 3842 | 258 3843 | 258 3844 | 258 3845 | 258 3846 | 258 3847 | 258 3848 | 258 3849 | 259 3850 | 259 3851 | 259 3852 | 259 3853 | 259 3854 | 259 3855 | 259 3856 | 259 3857 | 259 3858 | 259 3859 | 259 3860 | 259 3861 | 259 3862 | 259 3863 | 259 3864 | 259 3865 | 259 3866 | 260 3867 | 260 3868 | 260 3869 | 260 3870 | 260 3871 | 260 3872 | 260 3873 | 260 3874 | 260 3875 | 260 3876 | 260 3877 | 260 3878 | 260 3879 | 260 3880 | 260 3881 | 260 3882 | 260 3883 | 261 3884 | 261 3885 | 261 3886 | 261 3887 | 261 3888 | 261 3889 | 261 3890 | 261 3891 | 261 3892 | 261 3893 | 261 3894 | 261 3895 | 261 3896 | 261 3897 | 261 3898 | 261 3899 | 261 3900 | 261 3901 | 261 3902 | 262 3903 | 262 3904 | 262 3905 | 262 3906 | 262 3907 | 262 3908 | 262 3909 | 262 3910 | 262 3911 | 262 3912 | 262 3913 | 262 3914 | 262 3915 | 262 3916 | 263 3917 | 263 3918 | 263 3919 | 263 3920 | 263 3921 | 263 3922 | 263 3923 | 263 3924 | 263 3925 | 263 3926 | 263 3927 | 263 3928 | 263 3929 | 263 3930 | 263 3931 | 263 3932 | 263 3933 | 264 3934 | 264 3935 | 264 3936 | 264 3937 | 264 3938 | 264 3939 | 264 3940 | 264 3941 | 264 3942 | 264 3943 | 264 3944 | 264 3945 | 264 3946 | 264 3947 | 264 3948 | 264 3949 | 264 3950 | 265 3951 | 265 3952 | 265 3953 | 265 3954 | 265 3955 | 265 3956 | 265 3957 | 265 3958 | 265 3959 | 265 3960 | 265 3961 | 266 3962 | 266 3963 | 266 3964 | 266 3965 | 266 3966 | 266 3967 | 266 3968 | 266 3969 | 266 3970 | 266 3971 | 266 3972 | 267 3973 | 267 3974 | 267 3975 | 267 3976 | 267 3977 | 267 3978 | 267 3979 | 267 3980 | 268 3981 | 268 3982 | 268 3983 | 268 3984 | 268 3985 | 268 3986 | 268 3987 | 268 3988 | 268 3989 | 268 3990 | 268 3991 | 268 3992 | 268 3993 | 269 3994 | 269 3995 | 269 3996 | 269 3997 | 269 3998 | 269 3999 | 269 4000 | 269 4001 | 269 4002 | 269 4003 | 270 4004 | 270 4005 | 270 4006 | 270 4007 | 270 4008 | 270 4009 | 270 4010 | 270 4011 | 270 4012 | 270 4013 | 270 4014 | 270 4015 | 270 4016 | 270 4017 | 271 4018 | 271 4019 | 271 4020 | 271 4021 | 271 4022 | 271 4023 | 271 4024 | 271 4025 | 271 4026 | 271 4027 | 272 4028 | 272 4029 | 272 4030 | 272 4031 | 272 4032 | 272 4033 | 272 4034 | 272 4035 | 272 4036 | 273 4037 | 273 4038 | 273 4039 | 273 4040 | 273 4041 | 273 4042 | 273 4043 | 274 4044 | 274 4045 | 274 4046 | 274 4047 | 274 4048 | 274 4049 | 274 4050 | 274 4051 | 274 4052 | 274 4053 | 274 4054 | 274 4055 | 274 4056 | 274 4057 | 274 4058 | 274 4059 | 275 4060 | 275 4061 | 275 4062 | 275 4063 | 275 4064 | 275 4065 | 275 4066 | 275 4067 | 275 4068 | 275 4069 | 275 4070 | 275 4071 | 275 4072 | 275 4073 | 276 4074 | 276 4075 | 276 4076 | 276 4077 | 276 4078 | 276 4079 | 276 4080 | 276 4081 | 276 4082 | 276 4083 | 276 4084 | 276 4085 | 277 4086 | 277 4087 | 277 4088 | 277 4089 | 277 4090 | 277 4091 | 277 4092 | 278 4093 | 278 4094 | 278 4095 | 278 4096 | 278 4097 | 278 4098 | 278 4099 | 278 4100 | 278 4101 | 278 4102 | 278 4103 | 278 4104 | 279 4105 | 279 4106 | 279 4107 | 279 4108 | 279 4109 | 279 4110 | 279 4111 | 279 4112 | 280 4113 | 280 4114 | 280 4115 | 280 4116 | 280 4117 | 280 4118 | 280 4119 | 280 4120 | 280 4121 | 280 4122 | 280 4123 | 280 4124 | 280 4125 | 280 4126 | 281 4127 | 281 4128 | 281 4129 | 281 4130 | 281 4131 | 281 4132 | 281 4133 | 282 4134 | 282 4135 | 282 4136 | 282 4137 | 282 4138 | 282 4139 | 282 4140 | 282 4141 | 282 4142 | 282 4143 | 282 4144 | 282 4145 | 282 4146 | 282 4147 | 283 4148 | 283 4149 | 283 4150 | 283 4151 | 283 4152 | 283 4153 | 283 4154 | 283 4155 | 283 4156 | 283 4157 | 283 4158 | 283 4159 | 283 4160 | 283 4161 | 283 4162 | 284 4163 | 284 4164 | 284 4165 | 284 4166 | 284 4167 | 284 4168 | 284 4169 | 284 4170 | 284 4171 | 284 4172 | 285 4173 | 285 4174 | 285 4175 | 285 4176 | 285 4177 | 285 4178 | 285 4179 | 285 4180 | 285 4181 | 285 4182 | 285 4183 | 286 4184 | 286 4185 | 286 4186 | 286 4187 | 286 4188 | 286 4189 | 286 4190 | 286 4191 | 286 4192 | 286 4193 | 286 4194 | 286 4195 | 287 4196 | 287 4197 | 287 4198 | 287 4199 | 287 4200 | 288 4201 | 288 4202 | 288 4203 | 288 4204 | 288 4205 | 288 4206 | 288 4207 | 288 4208 | 288 4209 | 288 4210 | 288 4211 | 288 4212 | 288 4213 | 288 4214 | 289 4215 | 289 4216 | 289 4217 | 289 4218 | 289 4219 | 289 4220 | 289 4221 | 289 4222 | 289 4223 | 289 4224 | 290 4225 | 290 4226 | 290 4227 | 290 4228 | 290 4229 | 290 4230 | 290 4231 | 290 4232 | 290 4233 | 291 4234 | 291 4235 | 291 4236 | 291 4237 | 291 4238 | 291 4239 | 291 4240 | 291 4241 | 291 4242 | 291 4243 | 292 4244 | 292 4245 | 292 4246 | 292 4247 | 293 4248 | 293 4249 | 293 4250 | 293 4251 | 293 4252 | 293 4253 | 293 4254 | 293 4255 | 293 4256 | 293 4257 | 293 4258 | 294 4259 | 294 4260 | 294 4261 | 294 4262 | 294 4263 | 294 4264 | 294 4265 | 294 4266 | 294 4267 | 294 4268 | 294 4269 | 295 4270 | 295 4271 | 295 4272 | 295 4273 | 295 4274 | 295 4275 | 295 4276 | 295 4277 | 295 4278 | 296 4279 | 296 4280 | 296 4281 | 296 4282 | 296 4283 | 296 4284 | 296 4285 | 296 4286 | 296 4287 | 297 4288 | 297 4289 | 297 4290 | 297 4291 | 297 4292 | 297 4293 | 297 4294 | 297 4295 | 298 4296 | 298 4297 | 298 4298 | 298 4299 | 298 4300 | 298 4301 | 298 4302 | 298 4303 | 299 4304 | 299 4305 | 299 4306 | 299 4307 | 299 4308 | 299 4309 | 299 4310 | 299 4311 | 299 4312 | 299 4313 | 299 4314 | 299 4315 | 300 4316 | 300 4317 | 300 4318 | 300 4319 | 300 4320 | 300 4321 | 300 4322 | 300 4323 | 300 4324 | 300 4325 | 301 4326 | 301 4327 | 301 4328 | 301 4329 | 301 4330 | 301 4331 | 301 4332 | 302 4333 | 302 4334 | 302 4335 | 302 4336 | 302 4337 | 302 4338 | 302 4339 | 302 4340 | 302 4341 | 302 4342 | 302 4343 | 302 4344 | 302 4345 | 302 4346 | 302 4347 | 302 4348 | 302 4349 | 302 4350 | 303 4351 | 303 4352 | 303 4353 | 303 4354 | 303 4355 | 303 4356 | 304 4357 | 304 4358 | 304 4359 | 304 4360 | 304 4361 | 305 4362 | 305 4363 | 305 4364 | 305 4365 | 305 4366 | 305 4367 | 306 4368 | 306 4369 | 306 4370 | 306 4371 | 306 4372 | 306 4373 | 306 4374 | 306 4375 | 306 4376 | 306 4377 | 306 4378 | 306 4379 | 306 4380 | 307 4381 | 307 4382 | 307 4383 | 307 4384 | 307 4385 | 307 4386 | 307 4387 | 308 4388 | 308 4389 | 308 4390 | 308 4391 | 308 4392 | 308 4393 | 308 4394 | 308 4395 | 308 4396 | 309 4397 | 309 4398 | 309 4399 | 309 4400 | 309 4401 | 309 4402 | 309 4403 | 309 4404 | 309 4405 | 309 4406 | 309 4407 | 309 4408 | 309 4409 | 310 4410 | 310 4411 | 310 4412 | 310 4413 | 310 4414 | 310 4415 | 310 4416 | 311 4417 | 311 4418 | 311 4419 | 311 4420 | 311 4421 | 311 4422 | 311 4423 | 311 4424 | 311 4425 | 312 4426 | 312 4427 | 312 4428 | 312 4429 | 312 4430 | 312 4431 | 312 4432 | 313 4433 | 313 4434 | 313 4435 | 313 4436 | 313 4437 | 313 4438 | 313 4439 | 313 4440 | 314 4441 | 314 4442 | 314 4443 | 314 4444 | 314 4445 | 314 4446 | 314 4447 | 314 4448 | 314 4449 | 314 4450 | 314 4451 | 315 4452 | 315 4453 | 315 4454 | 315 4455 | 315 4456 | 315 4457 | 315 4458 | 315 4459 | 315 4460 | 315 4461 | 315 4462 | 316 4463 | 316 4464 | 316 4465 | 316 4466 | 316 4467 | 316 4468 | 316 4469 | 316 4470 | 316 4471 | 317 4472 | 317 4473 | 317 4474 | 317 4475 | 317 4476 | 317 4477 | 317 4478 | 317 4479 | 317 4480 | 317 4481 | 317 4482 | 317 4483 | 317 4484 | 317 4485 | 318 4486 | 318 4487 | 318 4488 | 318 4489 | 318 4490 | 318 4491 | 318 4492 | 318 4493 | 318 4494 | 319 4495 | 319 4496 | 319 4497 | 319 4498 | 319 4499 | 319 4500 | 319 4501 | 319 4502 | 319 4503 | 319 4504 | 320 4505 | 320 4506 | 320 4507 | 320 4508 | 320 4509 | 320 4510 | 321 4511 | 321 4512 | 321 4513 | 321 4514 | 321 4515 | 321 4516 | 321 4517 | 321 4518 | 322 4519 | 322 4520 | 322 4521 | 322 4522 | 322 4523 | 322 4524 | 323 4525 | 323 4526 | 323 4527 | 323 4528 | 323 4529 | 323 4530 | 323 4531 | 323 4532 | 323 4533 | 324 4534 | 324 4535 | 324 4536 | 324 4537 | 324 4538 | 324 4539 | 324 4540 | 324 4541 | 324 4542 | 325 4543 | 325 4544 | 325 4545 | 325 4546 | 325 4547 | 325 4548 | 325 4549 | 325 4550 | 326 4551 | 326 4552 | 326 4553 | 326 4554 | 326 4555 | 326 4556 | 326 4557 | 326 4558 | 327 4559 | 327 4560 | 327 4561 | 327 4562 | 327 4563 | 327 4564 | 328 4565 | 328 4566 | 328 4567 | 328 4568 | 328 4569 | 328 4570 | 328 4571 | 328 4572 | 329 4573 | 329 4574 | 329 4575 | 329 4576 | 329 4577 | 329 4578 | 329 4579 | 329 4580 | 329 4581 | 330 4582 | 330 4583 | 330 4584 | 330 4585 | 330 4586 | 330 4587 | 330 4588 | 330 4589 | 330 4590 | 330 4591 | 330 4592 | 330 4593 | 330 4594 | 330 4595 | 331 4596 | 331 4597 | 331 4598 | 331 4599 | 331 4600 | 331 4601 | 331 4602 | 332 4603 | 332 4604 | 332 4605 | 332 4606 | 332 4607 | 333 4608 | 333 4609 | 333 4610 | 333 4611 | 333 4612 | 333 4613 | 333 4614 | 333 4615 | 333 4616 | 334 4617 | 334 4618 | 334 4619 | 334 4620 | 334 4621 | 334 4622 | 334 4623 | 334 4624 | 334 4625 | 334 4626 | 335 4627 | 335 4628 | 335 4629 | 335 4630 | 335 4631 | 335 4632 | 335 4633 | 336 4634 | 336 4635 | 336 4636 | 336 4637 | 337 4638 | 337 4639 | 337 4640 | 337 4641 | 337 4642 | 337 4643 | 337 4644 | 337 4645 | 338 4646 | 338 4647 | 338 4648 | 338 4649 | 338 4650 | 338 4651 | 339 4652 | 340 4653 | 340 4654 | 340 4655 | 340 4656 | 340 4657 | 340 4658 | 340 4659 | 340 4660 | 340 4661 | 340 4662 | 340 4663 | 340 4664 | 341 4665 | 341 4666 | 341 4667 | 341 4668 | 341 4669 | 341 4670 | 341 4671 | 341 4672 | 341 4673 | 342 4674 | 342 4675 | 342 4676 | 342 4677 | 342 4678 | 342 4679 | 342 4680 | 342 4681 | 342 4682 | 342 4683 | 342 4684 | 342 4685 | 342 4686 | 342 4687 | 342 4688 | 343 4689 | 343 4690 | 344 4691 | 344 4692 | 344 4693 | 344 4694 | 344 4695 | 344 4696 | 344 4697 | 345 4698 | 345 4699 | 345 4700 | 345 4701 | 346 4702 | 346 4703 | 346 4704 | 346 4705 | 346 4706 | 346 4707 | 347 4708 | 347 4709 | 347 4710 | 347 4711 | 347 4712 | 348 4713 | 348 4714 | 348 4715 | 349 4716 | 349 4717 | 349 4718 | 349 4719 | 349 4720 | 350 4721 | 350 4722 | 350 4723 | 350 4724 | 350 4725 | 351 4726 | 351 4727 | 351 4728 | 351 4729 | 351 4730 | 351 4731 | 351 4732 | 351 4733 | 351 4734 | 351 4735 | 351 4736 | 351 4737 | 351 4738 | 352 4739 | 352 4740 | 352 4741 | 352 4742 | 352 4743 | 352 4744 | 352 4745 | 353 4746 | 353 4747 | 353 4748 | 353 4749 | 353 4750 | 353 4751 | 354 4752 | 354 4753 | 354 4754 | 354 4755 | 354 4756 | 354 4757 | 355 4758 | 355 4759 | 355 4760 | 356 4761 | 356 4762 | 356 4763 | 356 4764 | 356 4765 | 356 4766 | 357 4767 | 357 4768 | 357 4769 | 357 4770 | 357 4771 | 358 4772 | 358 4773 | 358 4774 | 358 4775 | 358 4776 | 358 4777 | 358 4778 | 358 4779 | 359 4780 | 359 4781 | 359 4782 | 359 4783 | 359 4784 | 359 4785 | 359 4786 | 359 4787 | 359 4788 | 359 4789 | 360 4790 | 360 4791 | 360 4792 | 360 4793 | 360 4794 | 360 4795 | 360 4796 | 360 4797 | 361 4798 | 361 4799 | 361 4800 | 361 4801 | 362 4802 | 362 4803 | 362 4804 | 362 4805 | 362 4806 | 362 4807 | 362 4808 | 362 4809 | 362 4810 | 362 4811 | 362 4812 | 362 4813 | 363 4814 | 363 4815 | 363 4816 | 363 4817 | 363 4818 | 364 4819 | 364 4820 | 364 4821 | 364 4822 | 364 4823 | 364 4824 | 364 4825 | 365 4826 | 365 4827 | 365 4828 | 365 4829 | 365 4830 | 365 4831 | 365 4832 | 366 4833 | 366 4834 | 366 4835 | 366 4836 | 366 4837 | 366 4838 | 366 4839 | 366 4840 | 366 4841 | 367 4842 | 367 4843 | 367 4844 | 367 4845 | 367 4846 | 367 4847 | 368 4848 | 368 4849 | 368 4850 | 368 4851 | 369 4852 | 369 4853 | 369 4854 | 369 4855 | 369 4856 | 369 4857 | 370 4858 | 370 4859 | 370 4860 | 371 4861 | 371 4862 | 371 4863 | 371 4864 | 371 4865 | 371 4866 | 371 4867 | 371 4868 | 372 4869 | 372 4870 | 372 4871 | 372 4872 | 372 4873 | 373 4874 | 373 4875 | 373 4876 | 373 4877 | 373 4878 | 373 4879 | 374 4880 | 374 4881 | 374 4882 | 374 4883 | 374 4884 | 374 4885 | 374 4886 | 375 4887 | 375 4888 | 375 4889 | 375 4890 | 375 4891 | 375 4892 | 375 4893 | 376 4894 | 376 4895 | 376 4896 | 376 4897 | 376 4898 | 376 4899 | 376 4900 | 376 4901 | 376 4902 | 376 4903 | 376 4904 | 377 4905 | 377 4906 | 377 4907 | 377 4908 | 378 4909 | 378 4910 | 378 4911 | 378 4912 | 378 4913 | 379 4914 | 379 4915 | 379 4916 | 379 4917 | 379 4918 | 380 4919 | 381 4920 | 381 4921 | 381 4922 | 381 4923 | 381 4924 | 381 4925 | 381 4926 | 381 4927 | 381 4928 | 382 4929 | 382 4930 | 382 4931 | 382 4932 | 382 4933 | 383 4934 | 383 4935 | 383 4936 | 383 4937 | 383 4938 | 383 4939 | 383 4940 | 384 4941 | 384 4942 | 384 4943 | 384 4944 | 385 4945 | 385 4946 | 385 4947 | 385 4948 | 385 4949 | 385 4950 | 386 4951 | 386 4952 | 386 4953 | 386 4954 | 386 4955 | 386 4956 | 386 4957 | 386 4958 | 386 4959 | 386 4960 | 387 4961 | 387 4962 | 387 4963 | 388 4964 | 388 4965 | 388 4966 | 388 4967 | 388 4968 | 388 4969 | 388 4970 | 388 4971 | 388 4972 | 389 4973 | 389 4974 | 389 4975 | 389 4976 | 389 4977 | 389 4978 | 390 4979 | 390 4980 | 390 4981 | 390 4982 | 390 4983 | 391 4984 | 391 4985 | 391 4986 | 391 4987 | 391 4988 | 391 4989 | 391 4990 | 391 4991 | 391 4992 | 392 4993 | 392 4994 | 392 4995 | 392 4996 | 392 4997 | 392 4998 | 392 4999 | 392 5000 | 393 5001 | 393 5002 | 393 5003 | 393 5004 | 393 5005 | 393 5006 | 393 5007 | 393 5008 | 394 5009 | 394 5010 | 394 5011 | 395 5012 | 395 5013 | 395 5014 | 395 5015 | 395 5016 | 396 5017 | 397 5018 | 397 5019 | 397 5020 | 398 5021 | 398 5022 | 398 5023 | 398 5024 | 399 5025 | 399 5026 | 399 5027 | 399 5028 | 400 5029 | 400 5030 | 400 5031 | 400 5032 | 401 5033 | 401 5034 | 401 5035 | 401 5036 | 401 5037 | 401 5038 | 402 5039 | 402 5040 | 402 5041 | 402 5042 | 402 5043 | 402 5044 | 403 5045 | 403 5046 | 404 5047 | 404 5048 | 404 5049 | 404 5050 | 404 5051 | 404 5052 | 405 5053 | 405 5054 | 405 5055 | 405 5056 | 406 5057 | 406 5058 | 407 5059 | 407 5060 | 407 5061 | 407 5062 | 407 5063 | 407 5064 | 408 5065 | 408 5066 | 408 5067 | 408 5068 | 409 5069 | 410 5070 | 410 5071 | 410 5072 | 411 5073 | 411 5074 | 411 5075 | 411 5076 | 411 5077 | 412 5078 | 412 5079 | 412 5080 | 412 5081 | 412 5082 | 413 5083 | 413 5084 | 413 5085 | 413 5086 | 413 5087 | 413 5088 | 414 5089 | 414 5090 | 414 5091 | 414 5092 | 415 5093 | 415 5094 | 415 5095 | 416 5096 | 416 5097 | 416 5098 | 416 5099 | 417 5100 | 417 5101 | 418 5102 | 418 5103 | 418 5104 | 418 5105 | 419 5106 | 419 5107 | 419 5108 | 419 5109 | 419 5110 | 420 5111 | 420 5112 | 421 5113 | 421 5114 | 422 5115 | 422 5116 | 422 5117 | 422 5118 | 422 5119 | 423 5120 | 423 5121 | 424 5122 | 424 5123 | 424 5124 | 424 5125 | 424 5126 | 424 5127 | 424 5128 | 424 5129 | 424 5130 | 425 5131 | 425 5132 | 425 5133 | 425 5134 | 425 5135 | 425 5136 | 426 5137 | 426 5138 | 426 5139 | 426 5140 | 428 5141 | 428 5142 | 429 5143 | 429 5144 | 429 5145 | 429 5146 | 430 5147 | 431 5148 | 431 5149 | 431 5150 | 431 5151 | 431 5152 | 432 5153 | 432 5154 | 433 5155 | 433 5156 | 433 5157 | 433 5158 | 433 5159 | 434 5160 | 434 5161 | 434 5162 | 434 5163 | 434 5164 | 435 5165 | 435 5166 | 435 5167 | 435 5168 | 435 5169 | 435 5170 | 435 5171 | 436 5172 | 436 5173 | 436 5174 | 436 5175 | 437 5176 | 437 5177 | 437 5178 | 437 5179 | 437 5180 | 437 5181 | 437 5182 | 437 5183 | 438 5184 | 438 5185 | 438 5186 | 438 5187 | 439 5188 | 439 5189 | 439 5190 | 439 5191 | 440 5192 | 440 5193 | 440 5194 | 440 5195 | 441 5196 | 441 5197 | 441 5198 | 442 5199 | 442 5200 | 443 5201 | 444 5202 | 444 5203 | 444 5204 | 444 5205 | 445 5206 | 445 5207 | 445 5208 | 445 5209 | 446 5210 | 446 5211 | 447 5212 | 447 5213 | 449 5214 | 449 5215 | 449 5216 | 449 5217 | 450 5218 | 451 5219 | 451 5220 | 451 5221 | 451 5222 | 451 5223 | 452 5224 | 452 5225 | 453 5226 | 453 5227 | 453 5228 | 453 5229 | 453 5230 | 454 5231 | 454 5232 | 454 5233 | 454 5234 | 454 5235 | 455 5236 | 455 5237 | 455 5238 | 455 5239 | 456 5240 | 456 5241 | 456 5242 | 456 5243 | 456 5244 | 457 5245 | 457 5246 | 457 5247 | 457 5248 | 457 5249 | 458 5250 | 458 5251 | 458 5252 | 459 5253 | 459 5254 | 460 5255 | 460 5256 | 460 5257 | 461 5258 | 461 5259 | 461 5260 | 461 5261 | 462 5262 | 463 5263 | 463 5264 | 464 5265 | 464 5266 | 464 5267 | 465 5268 | 465 5269 | 465 5270 | 466 5271 | 466 5272 | 466 5273 | 467 5274 | 467 5275 | 467 5276 | 468 5277 | 470 5278 | 470 5279 | 471 5280 | 471 5281 | 471 5282 | 471 5283 | 471 5284 | 471 5285 | 472 5286 | 472 5287 | 472 5288 | 472 5289 | 473 5290 | 474 5291 | 474 5292 | 475 5293 | 475 5294 | 475 5295 | 476 5296 | 476 5297 | 477 5298 | 477 5299 | 477 5300 | 478 5301 | 479 5302 | 479 5303 | 479 5304 | 480 5305 | 480 5306 | 481 5307 | 482 5308 | 482 5309 | 482 5310 | 482 5311 | 482 5312 | 483 5313 | 483 5314 | 483 5315 | 484 5316 | 486 5317 | 487 5318 | 487 5319 | 487 5320 | 488 5321 | 489 5322 | 489 5323 | 489 5324 | 489 5325 | 490 5326 | 490 5327 | 491 5328 | 491 5329 | 492 5330 | 492 5331 | 492 5332 | 492 5333 | 493 5334 | 493 5335 | 493 5336 | 493 5337 | 494 5338 | 494 5339 | 494 5340 | 494 5341 | 494 5342 | 494 5343 | 495 5344 | 495 5345 | 496 5346 | 496 5347 | 496 5348 | 497 5349 | 497 5350 | 497 5351 | 497 5352 | 498 5353 | 498 5354 | 499 5355 | 499 5356 | 500 5357 | 500 5358 | 500 5359 | 501 5360 | 501 5361 | 503 5362 | 504 5363 | 504 5364 | 504 5365 | 504 5366 | 505 5367 | 505 5368 | 505 5369 | 506 5370 | 508 5371 | 508 5372 | 508 5373 | 508 5374 | 509 5375 | 509 5376 | 509 5377 | 510 5378 | 511 5379 | 512 5380 | 512 5381 | 512 5382 | 512 5383 | 513 5384 | 513 5385 | 513 5386 | 514 5387 | 515 5388 | 515 5389 | 516 5390 | 516 5391 | 516 5392 | 518 5393 | 518 5394 | 519 5395 | 520 5396 | 520 5397 | 522 5398 | 522 5399 | 522 5400 | 523 5401 | 523 5402 | 524 5403 | 524 5404 | 525 5405 | 525 5406 | 525 5407 | 525 5408 | 526 5409 | 526 5410 | 529 5411 | 529 5412 | 529 5413 | 531 5414 | 534 5415 | 534 5416 | 535 5417 | 536 5418 | 537 5419 | 537 5420 | 538 5421 | 538 5422 | 538 5423 | 538 5424 | 539 5425 | 539 5426 | 541 5427 | 542 5428 | 543 5429 | 543 5430 | 543 5431 | 544 5432 | 544 5433 | 545 5434 | 546 5435 | 546 5436 | 546 5437 | 546 5438 | 547 5439 | 549 5440 | 551 5441 | 551 5442 | 552 5443 | 553 5444 | 554 5445 | 554 5446 | 554 5447 | 555 5448 | 557 5449 | 558 5450 | 559 5451 | 562 5452 | 564 5453 | 565 5454 | 567 5455 | 571 5456 | 572 5457 | 572 5458 | 574 5459 | 575 5460 | 576 5461 | 579 5462 | 579 5463 | 580 5464 | 581 5465 | 581 5466 | 582 5467 | 582 5468 | 583 5469 | 585 5470 | 586 5471 | 586 5472 | 587 5473 | 588 5474 | 588 5475 | 588 5476 | 589 5477 | 589 5478 | 590 5479 | 594 5480 | 595 5481 | 597 5482 | 597 5483 | 601 5484 | 602 5485 | 602 5486 | 604 5487 | 604 5488 | 604 5489 | 608 5490 | 610 5491 | 612 5492 | 612 5493 | 615 5494 | 616 5495 | 617 5496 | 617 5497 | 618 5498 | 619 5499 | 620 5500 | 627 5501 | 627 5502 | 628 5503 | 628 5504 | 629 5505 | 630 5506 | 630 5507 | 632 5508 | 633 5509 | 633 5510 | 633 5511 | 634 5512 | 637 5513 | 638 5514 | 640 5515 | 644 5516 | 646 5517 | 649 5518 | 654 5519 | 659 5520 | 661 5521 | 661 5522 | 664 5523 | 666 5524 | 669 5525 | 670 5526 | 671 5527 | 674 5528 | 674 5529 | 677 5530 | 683 5531 | 687 5532 | 691 5533 | 693 5534 | 696 5535 | -------------------------------------------------------------------------------- /deepprime2sec.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ehsaneddin Asgari" 2 | __license__ = "Apache 2" 3 | __version__ = "1.0.0" 4 | __maintainer__ = "Ehsaneddin Asgari" 5 | __email__ = "asgari@berkeley.edu" 6 | __project__ = "LLP - DeepPrime2Sec" 7 | __website__ = "https://llp.berkeley.edu/deepprime2sec/" 8 | 9 | import argparse 10 | import os 11 | import os.path 12 | import sys 13 | import warnings 14 | from utility.training import training_loop 15 | import yaml 16 | 17 | def checkArgs(args): 18 | ''' 19 | This function checks the input argument and returns the parameters 20 | ''' 21 | parser = argparse.ArgumentParser() 22 | 23 | 24 | # input config ################################################################################################# 25 | parser.add_argument('--config', action='store', dest='config_file', default='sample_configs/model_a.yaml', type=str, 26 | help='The config file for secondary structure prediction / please see the examples in the sample_configs/') 27 | 28 | 29 | parsedArgs = parser.parse_args() 30 | 31 | if (not os.access(parsedArgs.config_file, os.F_OK)): 32 | print("\nError: Permission denied or could not find the config file!") 33 | return False 34 | return parsedArgs.config_file 35 | 36 | if __name__ == '__main__': 37 | warnings.filterwarnings('ignore') 38 | res = checkArgs(sys.argv) 39 | if res != False: 40 | f = open(res, 'r') 41 | config=yaml.load(f) 42 | training_loop(**config) 43 | else: 44 | print(res) 45 | exit() 46 | 47 | 48 | -------------------------------------------------------------------------------- /installations/deepprime2sec.yml: -------------------------------------------------------------------------------- 1 | name: keras 2 | channels: 3 | - aaronzs 4 | - anaconda 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _tflow_select=2.1.0=gpu 9 | - absl-py=0.7.1=py36_0 10 | - asn1crypto=0.24.0=py36_1003 11 | - astor=0.7.1=py_0 12 | - atk=2.25.90=hb9dd440_1002 13 | - attrs=19.1.0=py_0 14 | - backcall=0.1.0=py_0 15 | - biopython=1.73=py36h14c3975_0 16 | - blas=2.8=openblas 17 | - bleach=1.5.0=py36_0 18 | - boto=2.49.0=py_0 19 | - boto3=1.9.141=py_0 20 | - botocore=1.12.141=py_0 21 | - bz2file=0.98=py_0 22 | - bzip2=1.0.6=h14c3975_1002 23 | - c-ares=1.15.0=h14c3975_1001 24 | - ca-certificates=2019.3.9=hecc5488_0 25 | - cairo=1.16.0=ha4e643d_1000 26 | - certifi=2019.3.9=py36_0 27 | - cffi=1.12.3=py36h8022711_0 28 | - chardet=3.0.4=py36_1003 29 | - cryptography=2.6.1=py36h72c5cf5_0 30 | - cudatoolkit=9.0=h13b8566_0 31 | - cudnn=7.3.1=cuda9.0_0 32 | - dbus=1.13.6=he372182_0 33 | - decorator=4.4.0=py_0 34 | - defusedxml=0.5.0=py_1 35 | - docutils=0.14=py36_1001 36 | - entrypoints=0.3=py36_1000 37 | - expat=2.2.5=hf484d3e_1002 38 | - fontconfig=2.13.1=he4413a7_1000 39 | - freetype=2.10.0=he983fc9_0 40 | - gast=0.2.2=py_0 41 | - gdk-pixbuf=2.36.12=h49783d7_1002 42 | - gensim=3.7.1=py36he1b5a44_1 43 | - gettext=0.19.8.1=hc5be6a0_1002 44 | - glib=2.58.3=hf63aee3_1001 45 | - gobject-introspection=1.58.2=py36h2da5eee_1000 46 | - graphite2=1.3.13=hf484d3e_1000 47 | - grpcio=1.16.1=py36hf8bcb03_1 48 | - gst-plugins-base=1.14.4=hdf3bae2_1001 49 | - gstreamer=1.14.4=h66beb1c_1001 50 | - gtk2=2.24.31=hb68c50a_1001 51 | - h5py=2.9.0=nompi_py36hf008753_1102 52 | - harfbuzz=2.4.0=h37c48d4_0 53 | - hdf5=1.10.4=nompi_h3c11f04_1106 54 | - html5lib=0.9999999=py36_0 55 | - icu=58.2=hf484d3e_1000 56 | - idna=2.8=py36_1000 57 | - ipykernel=5.1.0=py36h24bf2e0_1002 58 | - ipython=7.5.0=py36h24bf2e0_0 59 | - ipython_genutils=0.2.0=py_1 60 | - ipywidgets=7.4.2=py_0 61 | - jedi=0.13.3=py36_0 62 | - jinja2=2.10.1=py_0 63 | - jmespath=0.9.4=py_0 64 | - jpeg=9c=h14c3975_1001 65 | - jsonschema=3.0.1=py36_0 66 | - jupyter=1.0.0=py_2 67 | - jupyter_client=5.2.4=py_3 68 | - jupyter_console=6.0.0=py_0 69 | - jupyter_core=4.4.0=py_0 70 | - keras-applications=1.0.7=py_1 71 | - keras-base=2.2.4=py36_0 72 | - keras-gpu=2.2.4=0 73 | - keras-preprocessing=1.0.9=py_1 74 | - libblas=3.8.0=8_openblas 75 | - libcblas=3.8.0=8_openblas 76 | - libffi=3.2.1=he1b5a44_1006 77 | - libgcc-ng=8.2.0=hdf63c60_1 78 | - libgfortran-ng=7.3.0=hdf63c60_0 79 | - libgpuarray=0.7.6=h14c3975_1003 80 | - libiconv=1.15=h516909a_1005 81 | - liblapack=3.8.0=8_openblas 82 | - liblapacke=3.8.0=8_openblas 83 | - libopenblas=0.2.20=h9ac9557_7 84 | - libpng=1.6.37=hed695b0_0 85 | - libprotobuf=3.7.1=h8b12597_0 86 | - libsodium=1.0.16=h14c3975_1001 87 | - libstdcxx-ng=8.2.0=hdf63c60_1 88 | - libtiff=4.0.10=h648cc4a_1001 89 | - libuuid=2.32.1=h14c3975_1000 90 | - libxcb=1.13=h14c3975_1002 91 | - libxml2=2.9.9=h13577e0_0 92 | - mako=1.0.7=py_1 93 | - markdown=2.6.11=py_0 94 | - markupsafe=1.1.1=py36h14c3975_0 95 | - mistune=0.8.4=py36h14c3975_1000 96 | - mock=3.0.3=py36_0 97 | - nbconvert=5.5.0=py_0 98 | - nbformat=4.4.0=py_1 99 | - ncurses=6.1=hf484d3e_1002 100 | - notebook=5.7.8=py36_0 101 | - numpy=1.14.3=py36h28100ab_1 102 | - numpy-base=1.14.3=py36h0ea5e3f_1 103 | - openblas=0.3.6=h6e990d7_1 104 | - openssl=1.1.1b=h14c3975_1 105 | - pandoc=2.7.2=0 106 | - pandocfilters=1.4.2=py_1 107 | - pango=1.40.14=h4ea9474_1004 108 | - parso=0.4.0=py_0 109 | - pcre=8.41=hf484d3e_1003 110 | - pexpect=4.7.0=py36_0 111 | - pickleshare=0.7.5=py36_1000 112 | - pip=19.1=py36_0 113 | - pixman=0.34.0=h14c3975_1003 114 | - prometheus_client=0.6.0=py_0 115 | - prompt_toolkit=2.0.9=py_0 116 | - protobuf=3.7.1=py36he1b5a44_0 117 | - pthread-stubs=0.4=h14c3975_1001 118 | - ptyprocess=0.6.0=py_1001 119 | - pycparser=2.19=py36_1 120 | - pygments=2.3.1=py_0 121 | - pygpu=0.7.6=py36h3010b51_1000 122 | - pyopenssl=19.0.0=py36_0 123 | - pyqt=5.9.2=py36h05f1152_2 124 | - pyrsistent=0.15.1=py36h516909a_0 125 | - pysocks=1.6.8=py36_1002 126 | - python=3.6.7=h381d211_1004 127 | - python-dateutil=2.8.0=py_0 128 | - pyyaml=5.1=py36h14c3975_0 129 | - pyzmq=18.0.1=py36hc4ba49a_1 130 | - qt=5.9.7=h52cfd70_1 131 | - qtconsole=4.4.3=py_0 132 | - readline=7.0=hf8c457e_1001 133 | - requests=2.21.0=py36_1000 134 | - s3transfer=0.2.0=py36_0 135 | - scikit-learn=0.20.3=py36ha8026db_1 136 | - scipy=1.2.1=py36h09a28d5_1 137 | - send2trash=1.5.0=py_0 138 | - setuptools=41.0.1=py36_0 139 | - sip=4.19.8=py36hf484d3e_1000 140 | - six=1.12.0=py36_1000 141 | - smart_open=1.8.3=py_0 142 | - sqlite=3.26.0=h67949de_1001 143 | - tensorboard=1.10.0=py36_0 144 | - tensorflow=1.10.0=py36_0 145 | - tensorflow-estimator=1.13.0=py_0 146 | - tensorflow-gpu=1.10.0=py36_0 147 | - termcolor=1.1.0=py_2 148 | - terminado=0.8.2=py36_0 149 | - testpath=0.4.2=py_1001 150 | - theano=1.0.3=py36_0 151 | - tk=8.6.9=h84994c4_1001 152 | - tornado=6.0.2=py36h516909a_0 153 | - tqdm=4.31.1=py_0 154 | - traitlets=4.3.2=py36_1000 155 | - urllib3=1.24.2=py36_0 156 | - wcwidth=0.1.7=py_1 157 | - webencodings=0.5.1=py_1 158 | - werkzeug=0.15.2=py_0 159 | - wheel=0.33.1=py36_0 160 | - widgetsnbextension=3.4.2=py36_1000 161 | - xorg-kbproto=1.0.7=h14c3975_1002 162 | - xorg-libice=1.0.9=h516909a_1004 163 | - xorg-libsm=1.2.3=h84519dc_1000 164 | - xorg-libx11=1.6.7=h14c3975_1000 165 | - xorg-libxau=1.0.9=h14c3975_0 166 | - xorg-libxdmcp=1.1.3=h516909a_0 167 | - xorg-libxext=1.3.4=h516909a_0 168 | - xorg-libxrender=0.9.10=h516909a_1002 169 | - xorg-libxt=1.1.5=h14c3975_1002 170 | - xorg-renderproto=0.11.1=h14c3975_1002 171 | - xorg-xextproto=7.3.0=h14c3975_1002 172 | - xorg-xproto=7.0.31=h14c3975_1007 173 | - xz=5.2.4=h14c3975_1001 174 | - yaml=0.1.7=h14c3975_1001 175 | - zeromq=4.3.1=hf484d3e_1000 176 | - zlib=1.2.11=h14c3975_1004 177 | - pip: 178 | - cycler==0.10.0 179 | - keras-multi-head==0.19.0 180 | - keras-pos-embd==0.10.0 181 | - keras-self-attention==0.41.0 182 | - kiwisolver==1.1.0 183 | - matplotlib==3.1.0 184 | - pandas==0.24.2 185 | - pyparsing==2.4.0 186 | - pytz==2019.1 187 | - tensorflow-hub==0.4.0 188 | 189 | -------------------------------------------------------------------------------- /installations/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.7.1 2 | asn1crypto==0.24.0 3 | astor==0.7.1 4 | attrs==19.1.0 5 | backcall==0.1.0 6 | biopython==1.73 7 | bleach==1.5.0 8 | boto==2.49.0 9 | boto3==1.9.141 10 | botocore==1.12.141 11 | bz2file==0.98 12 | certifi==2019.3.9 13 | cffi==1.12.3 14 | chardet==3.0.4 15 | cryptography==2.6.1 16 | cycler==0.10.0 17 | decorator==4.4.0 18 | defusedxml==0.5.0 19 | docutils==0.14 20 | entrypoints==0.3 21 | gast==0.2.2 22 | gensim==3.7.1 23 | grpcio==1.16.1 24 | h5py==2.9.0 25 | html5lib==0.9999999 26 | idna==2.8 27 | ipykernel==5.1.0 28 | ipython==7.5.0 29 | ipython-genutils==0.2.0 30 | ipywidgets==7.4.2 31 | jedi==0.13.3 32 | Jinja2==2.10.1 33 | jmespath==0.9.4 34 | jsonschema==3.0.1 35 | jupyter-client==5.2.4 36 | jupyter-console==6.0.0 37 | jupyter-core==4.4.0 38 | Keras==2.2.4 39 | Keras-Applications==1.0.7 40 | keras-multi-head==0.19.0 41 | keras-pos-embd==0.10.0 42 | Keras-Preprocessing==1.0.9 43 | keras-self-attention==0.41.0 44 | kiwisolver==1.1.0 45 | Mako==1.0.7 46 | Markdown==2.6.11 47 | MarkupSafe==1.1.1 48 | matplotlib==3.1.0 49 | mistune==0.8.4 50 | mock==3.0.3 51 | nbconvert==5.5.0 52 | nbformat==4.4.0 53 | notebook==5.7.8 54 | numpy==1.14.3 55 | pandas==0.24.2 56 | pandocfilters==1.4.2 57 | parso==0.4.0 58 | pexpect==4.7.0 59 | pickleshare==0.7.5 60 | prometheus-client==0.6.0 61 | prompt-toolkit==2.0.9 62 | protobuf==3.7.1 63 | ptyprocess==0.6.0 64 | pycparser==2.19 65 | Pygments==2.3.1 66 | pygpu==0.7.6 67 | pyOpenSSL==19.0.0 68 | pyparsing==2.4.0 69 | pyrsistent==0.15.1 70 | PySocks==1.6.8 71 | python-dateutil==2.8.0 72 | pytz==2019.1 73 | PyYAML==5.1 74 | pyzmq==18.0.1 75 | qtconsole==4.4.3 76 | requests==2.21.0 77 | s3transfer==0.2.0 78 | scikit-learn==0.20.3 79 | scipy==1.2.1 80 | seaborn==0.9.0 81 | Send2Trash==1.5.0 82 | six==1.12.0 83 | smart-open==1.8.3 84 | tensorboard==1.10.0 85 | tensorflow==1.10.0 86 | tensorflow-estimator==1.13.0 87 | tensorflow-gpu==1.10.0 88 | tensorflow-hub==0.4.0 89 | termcolor==1.1.0 90 | terminado==0.8.2 91 | testpath==0.4.2 92 | Theano==1.0.3 93 | tornado==6.0.2 94 | tqdm==4.31.1 95 | traitlets==4.3.2 96 | urllib3==1.24.2 97 | wcwidth==0.1.7 98 | webencodings==0.5.1 99 | Werkzeug==0.15.2 100 | widgetsnbextension==3.4.2 101 | -------------------------------------------------------------------------------- /layers/crf.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | 5 | """ 6 | Author: Philipp Gross, https://github.com/phipleg/keras/blob/crf/keras/layers/crf.py 7 | ==== We performed slight modifications 8 | """ 9 | 10 | from keras import backend as K 11 | from keras import initializers, regularizers, constraints 12 | from keras.engine import Layer, InputSpec 13 | 14 | class ChainCRF(Layer): 15 | 16 | """A Linear Chain Conditional Random Field output layer. 17 | It carries the loss function and its weights for computing 18 | the global tag sequence scores. While training it acts as 19 | the identity function that passes the inputs to the subsequently 20 | used loss function. While testing it applies Viterbi decoding 21 | and returns the best scoring tag sequence as one-hot encoded vectors. 22 | # Arguments 23 | init: weight initialization function for chain energies U. 24 | Can be the name of an existing function (str), 25 | or a Theano function (see: [initializers](../initializers.md)). 26 | U_regularizer: instance of [WeightRegularizer](../regularizers.md) 27 | (eg. L1 or L2 regularization), applied to the transition weight matrix. 28 | b_start_regularizer: instance of [WeightRegularizer](../regularizers.md), 29 | applied to the start bias b. 30 | b_end_regularizer: instance of [WeightRegularizer](../regularizers.md) 31 | module, applied to the end bias b. 32 | b_start_constraint: instance of the [constraints](../constraints.md) 33 | module, applied to the start bias b. 34 | b_end_constraint: instance of the [constraints](../constraints.md) 35 | module, applied to the end bias b. 36 | weights: list of Numpy arrays for initializing [U, b_start, b_end]. 37 | Thus it should be a list of 3 elements of shape 38 | [(n_classes, n_classes), (n_classes, ), (n_classes, )] 39 | # Input shape 40 | 3D tensor with shape `(nb_samples, timesteps, nb_classes)`, where 41 | ´timesteps >= 2`and `nb_classes >= 2`. 42 | # Output shape 43 | Same shape as input. 44 | # Masking 45 | This layer supports masking for input sequences of variable length. 46 | # Example 47 | ```python 48 | # As the last layer of sequential layer with 49 | # model.output_shape == (None, timesteps, nb_classes) 50 | crf = ChainCRF() 51 | model.add(crf) 52 | # now: model.output_shape == (None, timesteps, nb_classes) 53 | # Compile model with chain crf loss (and one-hot encoded labels) and accuracy 54 | model.compile(loss=crf.loss, optimizer='sgd', metrics=['accuracy']) 55 | # Alternatively, compile model with sparsely encoded labels and sparse accuracy: 56 | model.compile(loss=crf.sparse_loss, optimizer='sgd', metrics=['sparse_categorical_accuracy']) 57 | ``` 58 | # Gotchas 59 | ## Model loading 60 | When you want to load a saved model that has a crf output, then loading 61 | the model with 'keras.models.load_model' won't work properly because 62 | the reference of the loss function to the transition parameters is lost. To 63 | fix this, you need to use the parameter 'custom_objects' as follows: 64 | ```python 65 | from keras.layer.crf import create_custom_objects: 66 | model = keras.models.load_model(filename, custom_objects=create_custom_objects()) 67 | ``` 68 | ## Temporal sample weights 69 | Given a ChainCRF instance crf both loss functions, crf.loss and crf.sparse_loss 70 | return a tensor of shape (batch_size, 1) and not (batch_size, maxlen). 71 | that sample weighting in temporal mode. 72 | """ 73 | 74 | def __init__(self, init='glorot_uniform', 75 | U_regularizer=None, 76 | b_start_regularizer=None, 77 | b_end_regularizer=None, 78 | U_constraint=None, 79 | b_start_constraint=None, 80 | b_end_constraint=None, 81 | weights=None, 82 | **kwargs): 83 | super(ChainCRF, self).__init__(**kwargs) 84 | self.init = initializers.get(init) 85 | self.U_regularizer = regularizers.get(U_regularizer) 86 | self.b_start_regularizer = regularizers.get(b_start_regularizer) 87 | self.b_end_regularizer = regularizers.get(b_end_regularizer) 88 | self.U_constraint = constraints.get(U_constraint) 89 | self.b_start_constraint = constraints.get(b_start_constraint) 90 | self.b_end_constraint = constraints.get(b_end_constraint) 91 | 92 | self.initial_weights = weights 93 | 94 | self.supports_masking = True 95 | self.uses_learning_phase = True 96 | self.input_spec = [InputSpec(ndim=3)] 97 | 98 | def compute_output_shape(self, input_shape): 99 | assert input_shape and len(input_shape) == 3 100 | return (input_shape[0], input_shape[1], input_shape[2]) 101 | 102 | def compute_mask(self, input, mask=None): 103 | if mask is not None: 104 | return K.any(mask, axis=1) 105 | return mask 106 | 107 | def _fetch_mask(self): 108 | mask = None 109 | if self._inbound_nodes: 110 | mask = self._inbound_nodes[0].input_masks[0] 111 | return mask 112 | 113 | def build(self, input_shape): 114 | assert len(input_shape) == 3 115 | n_classes = input_shape[2] 116 | n_steps = input_shape[1] 117 | assert n_steps is None or n_steps >= 2 118 | self.input_spec = [InputSpec(dtype=K.floatx(), 119 | shape=(None, n_steps, n_classes))] 120 | 121 | self.U = self.add_weight((n_classes, n_classes), 122 | initializer=self.init, 123 | name='U', 124 | regularizer=self.U_regularizer, 125 | constraint=self.U_constraint) 126 | 127 | self.b_start = self.add_weight((n_classes,), 128 | initializer='zero', 129 | name='b_start', 130 | regularizer=self.b_start_regularizer, 131 | constraint=self.b_start_constraint) 132 | 133 | self.b_end = self.add_weight((n_classes,), 134 | initializer='zero', 135 | name='b_end', 136 | regularizer=self.b_end_regularizer, 137 | constraint=self.b_end_constraint) 138 | 139 | if self.initial_weights is not None: 140 | self.set_weights(self.initial_weights) 141 | del self.initial_weights 142 | 143 | self.built = True 144 | 145 | def call(self, x, mask=None): 146 | y_pred = viterbi_decode(x, self.U, self.b_start, self.b_end, mask) 147 | nb_classes = self.input_spec[0].shape[2] 148 | y_pred_one_hot = K.one_hot(y_pred, nb_classes) 149 | return K.in_train_phase(x, y_pred_one_hot) 150 | 151 | def loss(self, y_true, y_pred): 152 | """Linear Chain Conditional Random Field loss function. 153 | """ 154 | mask = self._fetch_mask() 155 | return chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask) 156 | 157 | def sparse_loss(self, y_true, y_pred): 158 | """Linear Chain Conditional Random Field loss function with sparse 159 | tag sequences. 160 | """ 161 | y_true = K.cast(y_true, 'int32') 162 | y_true = K.squeeze(y_true, 2) 163 | mask = self._fetch_mask() 164 | return sparse_chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask) 165 | 166 | def get_config(self): 167 | config = { 168 | 'init': initializers.serialize(self.init), 169 | 'U_regularizer': regularizers.serialize(self.U_regularizer), 170 | 'b_start_regularizer': regularizers.serialize(self.b_start_regularizer), 171 | 'b_end_regularizer': regularizers.serialize(self.b_end_regularizer), 172 | 'U_constraint': constraints.serialize(self.U_constraint), 173 | 'b_start_constraint': constraints.serialize(self.b_start_constraint), 174 | 'b_end_constraint': constraints.serialize(self.b_end_constraint) 175 | } 176 | base_config = super(ChainCRF, self).get_config() 177 | return dict(list(base_config.items()) + list(config.items())) 178 | 179 | 180 | 181 | def path_energy(y, x, U, b_start=None, b_end=None, mask=None): 182 | """Calculates the energy of a tag path y for a given input x (with mask), 183 | transition energies U and boundary energies b_start, b_end.""" 184 | x = add_boundary_energy(x, b_start, b_end, mask) 185 | return path_energy0(y, x, U, mask) 186 | 187 | 188 | def path_energy0(y, x, U, mask=None): 189 | """Path energy without boundary potential handling.""" 190 | n_classes = K.shape(x)[2] 191 | y_one_hot = K.one_hot(y, n_classes) 192 | 193 | # Tag path energy 194 | energy = K.sum(x * y_one_hot, 2) 195 | energy = K.sum(energy, 1) 196 | 197 | # Transition energy 198 | y_t = y[:, :-1] 199 | y_tp1 = y[:, 1:] 200 | U_flat = K.reshape(U, [-1]) 201 | # Convert 2-dim indices (y_t, y_tp1) of U to 1-dim indices of U_flat: 202 | flat_indices = y_t * n_classes + y_tp1 203 | U_y_t_tp1 = K.gather(U_flat, flat_indices) 204 | 205 | if mask is not None: 206 | mask = K.cast(mask, K.floatx()) 207 | y_t_mask = mask[:, :-1] 208 | y_tp1_mask = mask[:, 1:] 209 | U_y_t_tp1 *= y_t_mask * y_tp1_mask 210 | 211 | energy += K.sum(U_y_t_tp1, axis=1) 212 | 213 | return energy 214 | 215 | 216 | def sparse_chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None): 217 | """Given the true sparsely encoded tag sequence y, input x (with mask), 218 | transition energies U, boundary energies b_start and b_end, it computes 219 | the loss function of a Linear Chain Conditional Random Field: 220 | loss(y, x) = NNL(P(y|x)), where P(y|x) = exp(E(y, x)) / Z. 221 | So, loss(y, x) = - E(y, x) + log(Z) 222 | Here, E(y, x) is the tag path energy, and Z is the normalization constant. 223 | The values log(Z) is also called free energy. 224 | """ 225 | x = add_boundary_energy(x, b_start, b_end, mask) 226 | energy = path_energy0(y, x, U, mask) 227 | energy -= free_energy0(x, U, mask) 228 | return K.expand_dims(-energy, -1) 229 | 230 | 231 | def chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None): 232 | """Variant of sparse_chain_crf_loss but with one-hot encoded tags y.""" 233 | y_sparse = K.argmax(y, -1) 234 | y_sparse = K.cast(y_sparse, 'int32') 235 | return sparse_chain_crf_loss(y_sparse, x, U, b_start, b_end, mask) 236 | 237 | 238 | def add_boundary_energy(x, b_start=None, b_end=None, mask=None): 239 | """Given the observations x, it adds the start boundary energy b_start (resp. 240 | end boundary energy b_end on the start (resp. end) elements and multiplies 241 | the mask.""" 242 | if mask is None: 243 | if b_start is not None: 244 | x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1) 245 | if b_end is not None: 246 | x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1) 247 | else: 248 | mask = K.cast(mask, K.floatx()) 249 | mask = K.expand_dims(mask, 2) 250 | x *= mask 251 | if b_start is not None: 252 | mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1) 253 | start_mask = K.cast(K.greater(mask, mask_r), K.floatx()) 254 | x = x + start_mask * b_start 255 | if b_end is not None: 256 | mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1) 257 | end_mask = K.cast(K.greater(mask, mask_l), K.floatx()) 258 | x = x + end_mask * b_end 259 | return x 260 | 261 | 262 | def viterbi_decode(x, U, b_start=None, b_end=None, mask=None): 263 | """Computes the best tag sequence y for a given input x, i.e. the one that 264 | maximizes the value of path_energy.""" 265 | x = add_boundary_energy(x, b_start, b_end, mask) 266 | 267 | alpha_0 = x[:, 0, :] 268 | gamma_0 = K.zeros_like(alpha_0) 269 | initial_states = [gamma_0, alpha_0] 270 | _, gamma = _forward(x, 271 | lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()), K.max(B, axis=1)], 272 | initial_states, 273 | U, 274 | mask) 275 | y = _backward(gamma, mask) 276 | return y 277 | 278 | 279 | def free_energy(x, U, b_start=None, b_end=None, mask=None): 280 | """Computes efficiently the sum of all path energies for input x, when 281 | runs over all possible tag sequences.""" 282 | x = add_boundary_energy(x, b_start, b_end, mask) 283 | return free_energy0(x, U, mask) 284 | 285 | 286 | def free_energy0(x, U, mask=None): 287 | """Free energy without boundary potential handling.""" 288 | initial_states = [x[:, 0, :]] 289 | last_alpha, _ = _forward(x, 290 | lambda B: [K.logsumexp(B, axis=1)], 291 | initial_states, 292 | U, 293 | mask) 294 | return last_alpha[:, 0] 295 | 296 | 297 | def _forward(x, reduce_step, initial_states, U, mask=None): 298 | """Forward recurrence of the linear chain crf.""" 299 | 300 | def _forward_step(energy_matrix_t, states): 301 | alpha_tm1 = states[-1] 302 | new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) 303 | return new_states[0], new_states 304 | 305 | U_shared = K.expand_dims(K.expand_dims(U, 0), 0) 306 | 307 | if mask is not None: 308 | mask = K.cast(mask, K.floatx()) 309 | mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) 310 | U_shared = U_shared * mask_U 311 | 312 | inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared 313 | inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) 314 | 315 | last, values, _ = K.rnn(_forward_step, inputs, initial_states) 316 | return last, values 317 | 318 | 319 | def batch_gather(reference, indices): 320 | ref_shape = K.shape(reference) 321 | batch_size = ref_shape[0] 322 | n_classes = ref_shape[1] 323 | flat_indices = K.arange(0, batch_size) * n_classes + K.flatten(indices) 324 | return K.gather(K.flatten(reference), flat_indices) 325 | 326 | 327 | def _backward(gamma, mask): 328 | """Backward recurrence of the linear chain crf.""" 329 | gamma = K.cast(gamma, 'int32') 330 | 331 | def _backward_step(gamma_t, states): 332 | y_tm1 = K.squeeze(states[0], 0) 333 | y_t = batch_gather(gamma_t, y_tm1) 334 | return y_t, [K.expand_dims(y_t, 0)] 335 | 336 | initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] 337 | _, y_rev, _ = K.rnn(_backward_step, 338 | gamma, 339 | initial_states, 340 | go_backwards=True) 341 | y = K.reverse(y_rev, 1) 342 | 343 | if mask is not None: 344 | mask = K.cast(mask, dtype='int32') 345 | # mask output 346 | y *= mask 347 | # set masked values to -1 348 | y += -(1 - mask) 349 | return y 350 | 351 | def create_custom_objects(): 352 | """Returns the custom objects, needed for loading a persisted model.""" 353 | instanceHolder = {'instance': None} 354 | 355 | class ClassWrapper(ChainCRF): 356 | def __init__(self, *args, **kwargs): 357 | instanceHolder['instance'] = self 358 | super(ClassWrapper, self).__init__(*args, **kwargs) 359 | 360 | def loss(*args): 361 | method = getattr(instanceHolder['instance'], 'loss') 362 | return method(*args) 363 | 364 | def sparse_loss(*args): 365 | method = getattr(instanceHolder['instance'], 'sparse_loss') 366 | return method(*args) 367 | 368 | return {'ChainCRF': ClassWrapper, 'loss': loss, 'sparse_loss': sparse_loss} 369 | -------------------------------------------------------------------------------- /layers/utility.py: -------------------------------------------------------------------------------- 1 | from keras import regularizers 2 | from keras.layers import Lambda, concatenate, Conv1D 3 | 4 | 5 | def slice_tensor(dimension, start, end, name='sliced_layer'): 6 | ''' 7 | :param dimension: 8 | :param start: 9 | :param end: 10 | :return: 11 | ''' 12 | 13 | # Crops (or slices) a Tensor on a given dimension from start to end 14 | # example : to crop tensor x[:, :, 5:10] 15 | # call slice(2, 5, 10) as you want to crop on the second dimension 16 | def func(x): 17 | if dimension == 0: 18 | return x[start: end] 19 | if dimension == 1: 20 | return x[:, start: end] 21 | if dimension == 2: 22 | return x[:, :, start: end] 23 | if dimension == 3: 24 | return x[:, :, :, start: end] 25 | if dimension == 4: 26 | return x[:, :, :, :, start: end] 27 | 28 | return Lambda(func, name=name) 29 | 30 | 31 | def multiscale_CNN(input_layer, gating_layer, filter_size, convs, kernel_regularizer=0.00005): 32 | ''' 33 | :param input_layer: 34 | :param gating_layer: 35 | :param filter_size: 36 | :param convs: 37 | :param kernel_regularizer: 38 | :return: 39 | ''' 40 | z_t = gating_layer(input_layer) 41 | conclayers = [] 42 | for idx, conv in enumerate(convs): 43 | conclayers.append(Conv1D(filter_size, conv, activation="relu", padding="same", 44 | kernel_regularizer=regularizers.l2(kernel_regularizer))(input_layer)) 45 | conc = concatenate(conclayers) 46 | output = Lambda(lambda a: z_t * a[0] + (1 - z_t) * a[1])([input_layer, conc]) 47 | return output 48 | -------------------------------------------------------------------------------- /models/a_cnn_bilstm.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | 5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.insert(0, parentdir) 8 | 9 | import numpy as np 10 | from keras.models import Model 11 | from keras.layers import Dense, CuDNNLSTM, Bidirectional, Input, Dropout, concatenate, Conv1D, \ 12 | BatchNormalization 13 | from keras.layers.wrappers import TimeDistributed 14 | from layers.utility import slice_tensor 15 | from keras import optimizers 16 | from keras import regularizers 17 | 18 | np.random.seed(0) 19 | 20 | 21 | def model_a_cnn_bilstm(n_classes, convs=[3, 5, 7], dense_size=200, lstm_size=400, dropout_rate=0.5, 22 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001): 23 | ''' 24 | :param n_classes: 25 | :param convs: 26 | :param dense_size: 27 | :param lstm_size: 28 | :param dropout_rate: 29 | :param features_to_use: 30 | :param filter_size: 31 | :return: 32 | ''' 33 | visible = Input(shape=(None, 408)) 34 | 35 | # slice different feature types 36 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible) 37 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible) 38 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible) 39 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible) 40 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible) 41 | 42 | # create input based-on selected features 43 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo, 44 | 'biophysical': biophysical} 45 | features = [] 46 | for feature in features_to_use: 47 | features.append(input_dict[feature]) 48 | 49 | ## batch normalization on the input features 50 | if len(features_to_use) == 1: 51 | conclayers = features 52 | input = BatchNormalization(name='batchnorm_input')(features[0]) 53 | else: 54 | input = BatchNormalization(name='batchnorm_input')(concatenate(features)) 55 | conclayers = [input] 56 | 57 | # performing the conlvolutions 58 | for idx, conv in enumerate(convs): 59 | idx = str(idx + 1) 60 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)( 61 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx, 62 | kernel_regularizer=regularizers.l2(0.001))(input))) 63 | conc = concatenate(conclayers) 64 | 65 | # Dropout and Dense Layer before LSTM 66 | if dropout_rate > 0: 67 | drop_before = Dropout(dropout_rate, name='dropoutonconvs')(conc) 68 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(drop_before) 69 | else: 70 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(conc) 71 | 72 | # Batch normalize the results of dropout 73 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp) 74 | 75 | # LSTM 76 | lstm = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True, name='bilstm'))(dense_convinpn) 77 | drop_after_lstm = Dropout(dropout_rate)(lstm) 78 | dense_out = Dense(dense_size, activation='relu')(drop_after_lstm) 79 | 80 | # Labeling layer layer 81 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(dense_out) 82 | model = Model(inputs=visible, outputs=timedist) 83 | adam = optimizers.Adam(lr=lr) 84 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'], 85 | sample_weight_mode='temporal') 86 | 87 | # print model 88 | print(model.summary()) 89 | return model, 'model_a_cnn_bilstm#' + '#'.join(features_to_use) + '@conv' + '_'.join( 90 | [str(c) for c in convs]) + '@dense_' + str(dense_size) + '@lstm' + str(lstm_size) + '@drop_rate' + str( 91 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) 92 | -------------------------------------------------------------------------------- /models/b_cnn_bilstm_highway.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | 5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.insert(0, parentdir) 8 | import numpy as np 9 | from keras.models import Model 10 | from keras.layers import Dense, CuDNNLSTM, Bidirectional, Input, Dropout, concatenate, Conv1D, \ 11 | BatchNormalization 12 | from keras.layers.wrappers import TimeDistributed 13 | from layers.crf import ChainCRF 14 | from layers.utility import slice_tensor 15 | from keras import optimizers 16 | from keras import regularizers 17 | 18 | np.random.seed(0) 19 | 20 | 21 | def model_b_cnn_bilstm_highway(n_classes, convs=[3, 5, 7], dense_size=200, lstm_size=400, dropout_rate=0.5, 22 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001, 23 | use_CRF=False): 24 | ''' 25 | :param n_classes: 26 | :param convs: 27 | :param dense_size: 28 | :param lstm_size: 29 | :param dropout_rate: 30 | :param features_to_use: 31 | :param filter_size: 32 | :param lr: 33 | :param use_CRF: 34 | :return: 35 | ''' 36 | 37 | visible = Input(shape=(None, 408)) 38 | 39 | # slice different feature types 40 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible) 41 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible) 42 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible) 43 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible) 44 | # we need batchnorm for the highway 45 | batchnorm_profile = BatchNormalization(name='batchnormseqprof')(pssm) 46 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible) 47 | 48 | # create input based-on selected features 49 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo, 50 | 'biophysical': biophysical} 51 | features = [] 52 | for feature in features_to_use: 53 | features.append(input_dict[feature]) 54 | 55 | ## batch normalization on the input features 56 | if len(features_to_use) == 1: 57 | conclayers = features 58 | input = BatchNormalization(name='batchnorm_input')(features[0]) 59 | else: 60 | input = BatchNormalization(name='batchnorm_input')(concatenate(features)) 61 | conclayers = [input] 62 | 63 | # performing the conlvolutions 64 | for idx, conv in enumerate(convs): 65 | idx = str(idx + 1) 66 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)( 67 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx, 68 | kernel_regularizer=regularizers.l2(0.001))(input))) 69 | conc = concatenate(conclayers) 70 | 71 | # Dropout and Dense Layer before LSTM 72 | if dropout_rate > 0: 73 | drop_before = Dropout(dropout_rate, name='dropoutonconvs')(conc) 74 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(drop_before) 75 | else: 76 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(conc) 77 | 78 | # Batch normalize the results of dropout 79 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp) 80 | 81 | # LSTM 82 | lstm = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True, name='bilstm'))(dense_convinpn) 83 | drop_after_lstm = Dropout(dropout_rate)(lstm) 84 | 85 | # Highway 86 | dense_out = Dense(dense_size, activation='relu')(drop_after_lstm) 87 | highway_layer = concatenate([dense_out, batchnorm_profile]) 88 | highway_out = Dense(dense_size, activation='relu')(highway_layer) 89 | 90 | if use_CRF: 91 | timedist = TimeDistributed(Dense(n_classes, name='crf_in'))(highway_out) 92 | crf = ChainCRF(name="crf1") 93 | crf_output = crf(timedist) 94 | model = Model(inputs=visible, outputs=crf_output) 95 | adam = optimizers.Adam(lr=lr) 96 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics=['accuracy'], sample_weight_mode='temporal') 97 | else: 98 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(highway_out) 99 | model = Model(inputs=visible, outputs=timedist) 100 | adam = optimizers.Adam(lr=lr) 101 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'], 102 | sample_weight_mode='temporal') 103 | print(model.summary()) 104 | return model, 'model_b_cnn_bilstm_highway#' + '#'.join(features_to_use) + '@conv' + '_'.join( 105 | [str(c) for c in convs]) + '@dense_' + str(dense_size) + '@lstm' + str(lstm_size) + '@droplstm' + str( 106 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) + '@crf_' + str(use_CRF) 107 | -------------------------------------------------------------------------------- /models/c_cnn_bilstm_crf.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | 5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.insert(0,parentdir) 8 | 9 | import numpy as np 10 | from keras.models import Model 11 | from keras.layers import Dense, CuDNNLSTM, Bidirectional, Input, Dropout, concatenate, Conv1D, \ 12 | BatchNormalization 13 | from keras.layers.wrappers import TimeDistributed 14 | from layers.crf import ChainCRF 15 | from layers.utility import slice_tensor 16 | from keras import optimizers 17 | from keras import regularizers 18 | np.random.seed(0) 19 | 20 | 21 | 22 | def model_c_cnn_bilstm_crf(n_classes, convs=[3, 5, 7], dense_size=200, lstm_size=400, dropout_rate=0.5, 23 | features_to_use=['onehot', 'pssm'], filter_size=256, CRF_input_dim=200, lr=0.001): 24 | ''' 25 | :param n_classes: 26 | :param convs: 27 | :param dense_size: 28 | :param lstm_size: 29 | :param dropout_rate: 30 | :param features_to_use: 31 | :param filter_size: 32 | :return: 33 | ''' 34 | visible = Input(shape=(None, 408)) 35 | 36 | # slice different feature types 37 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible) 38 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible) 39 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible) 40 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible) 41 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible) 42 | 43 | # create input based-on selected features 44 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo, 45 | 'biophysical': biophysical} 46 | features = [] 47 | for feature in features_to_use: 48 | features.append(input_dict[feature]) 49 | 50 | ## batch normalization on the input features 51 | if len(features_to_use) == 1: 52 | conclayers = features 53 | input = BatchNormalization(name='batchnorm_input')(features[0]) 54 | else: 55 | input = BatchNormalization(name='batchnorm_input')(concatenate(features)) 56 | conclayers = [input] 57 | 58 | # performing the conlvolutions 59 | for idx, conv in enumerate(convs): 60 | idx = str(idx + 1) 61 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)( 62 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx, 63 | kernel_regularizer=regularizers.l2(0.001))(input))) 64 | conc = concatenate(conclayers) 65 | 66 | # Dropout and Dense Layer before LSTM 67 | if dropout_rate > 0: 68 | drop_before = Dropout(dropout_rate, name='dropoutonconvs')(conc) 69 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(drop_before) 70 | else: 71 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(conc) 72 | 73 | # Batch normalize the results of dropout 74 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp) 75 | 76 | # LSTM 77 | lstm = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True, name='bilstm'))(dense_convinpn) 78 | drop_after_lstm = Dropout(dropout_rate)(lstm) 79 | dense_out = Dense(CRF_input_dim, activation='relu')(drop_after_lstm) 80 | 81 | timedist = TimeDistributed(Dense(n_classes, name='crf_in'))(dense_out) 82 | crf = ChainCRF(name="crf1") 83 | crf_output = crf(timedist) 84 | model = Model(inputs=visible, outputs=crf_output) 85 | adam=optimizers.Adam(lr=lr) 86 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics= ['accuracy'], sample_weight_mode='temporal') 87 | print(model.summary()) 88 | return model, 'model_c_cnn_bilstm_CRF#'+'#'.join(features_to_use)+'@conv'+'_'.join([str(c) for c in convs])+'@dense_'+str(dense_size)+'@lstm'+str(lstm_size)+'@droplstm'+str(dropout_rate)+'@filtersize_'+str(filter_size)+ '@lr_' + str(lr) 89 | 90 | -------------------------------------------------------------------------------- /models/d_cnn_bilstm_attention.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | 5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.insert(0, parentdir) 8 | 9 | import numpy as np 10 | 11 | np.random.seed(7) 12 | from keras.models import Model 13 | from keras.layers import Dense, CuDNNLSTM, Bidirectional, Input, Dropout, concatenate, Conv1D, \ 14 | BatchNormalization 15 | from keras.layers.wrappers import TimeDistributed 16 | from layers.crf import ChainCRF 17 | from layers.utility import slice_tensor 18 | from keras import optimizers 19 | from keras import regularizers 20 | from keras_self_attention import SeqSelfAttention 21 | 22 | 23 | def model_d_cnn_bilstm_attention(n_classes, convs=[3, 5, 7], dense_size=200, lstm_size=400, dropout_rate=0.5, 24 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001, 25 | use_CRF=False, attention_units=32, attention_type='additive'): 26 | ''' 27 | :param n_classes: 28 | :param convs: 29 | :param dense_size: 30 | :param lstm_size: 31 | :param dropout_rate: 32 | :param features_to_use: 33 | :param filter_size: 34 | :param lr: 35 | :param use_CRF: 36 | :return: 37 | ''' 38 | 39 | visible = Input(shape=(None, 408)) 40 | 41 | # slice different feature types 42 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible) 43 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible) 44 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible) 45 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible) 46 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible) 47 | 48 | # create input based-on selected features 49 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo, 50 | 'biophysical': biophysical} 51 | features = [] 52 | for feature in features_to_use: 53 | features.append(input_dict[feature]) 54 | 55 | ## batch normalization on the input features 56 | if len(features_to_use) == 1: 57 | conclayers = features 58 | input = BatchNormalization(name='batchnorm_input')(features[0]) 59 | else: 60 | input = BatchNormalization(name='batchnorm_input')(concatenate(features)) 61 | conclayers = [input] 62 | 63 | # performing the conlvolutions 64 | for idx, conv in enumerate(convs): 65 | idx = str(idx + 1) 66 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)( 67 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx, 68 | kernel_regularizer=regularizers.l2(0.001))(input))) 69 | conc = concatenate(conclayers) 70 | 71 | # Dropout and Dense Layer before LSTM 72 | if dropout_rate > 0: 73 | drop_before = Dropout(dropout_rate, name='dropoutonconvs')(conc) 74 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(drop_before) 75 | else: 76 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(conc) 77 | 78 | # Batch normalize the results of dropout 79 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp) 80 | 81 | # LSTM 82 | lstm = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True, name='bilstm'))(dense_convinpn) 83 | drop_after_lstm = Dropout(dropout_rate)(lstm) 84 | lstm_out = Dense(dense_size, activation='relu')(drop_after_lstm) 85 | 86 | # Attention layer 87 | seq_representation = SeqSelfAttention(units=attention_units, attention_type=attention_type, 88 | name='Attention')(lstm_out) 89 | if use_CRF: 90 | timedist = TimeDistributed(Dense(n_classes, name='timedist'))(seq_representation) 91 | crf = ChainCRF(name="crf1") 92 | crf_output = crf(timedist) 93 | model = Model(inputs=visible, outputs=crf_output) 94 | adam = optimizers.Adam(lr=0.001) 95 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics=['accuracy'], sample_weight_mode='temporal') 96 | else: 97 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(seq_representation) 98 | model = Model(inputs=visible, outputs=timedist) 99 | adam = optimizers.Adam(lr=0.001) 100 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'], 101 | sample_weight_mode='temporal') 102 | print(model.summary()) 103 | return model, 'model_d_cnn_bilstm_attention#' + '#'.join(features_to_use) + '@conv' + '_'.join( 104 | [str(c) for c in convs]) + '@dense_' + str(dense_size) + '@lstm' + str(lstm_size) + '@droplstm' + str( 105 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) + '@use_CRF_' + str( 106 | use_CRF) + '@attention_units_' + str(attention_units) + '@attention_type_' + str(attention_type) 107 | -------------------------------------------------------------------------------- /models/e_cnn.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | 5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.insert(0, parentdir) 8 | 9 | import numpy as np 10 | 11 | np.random.seed(7) 12 | from keras.models import Model 13 | from keras.layers import Dense, Input, Dropout, concatenate, Conv1D, \ 14 | BatchNormalization 15 | from keras.layers.wrappers import TimeDistributed 16 | from layers.crf import ChainCRF 17 | from layers.utility import slice_tensor 18 | from keras import optimizers 19 | from keras import regularizers 20 | 21 | 22 | def model_e_cnn(n_classes, convs=[3, 5, 7], dense_size=200, dropout_rate=0.5, 23 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001, 24 | use_CRF=False): 25 | ''' 26 | :param n_classes: 27 | :param convs: 28 | :param dense_size: 29 | :param dropout_rate: 30 | :param features_to_use: 31 | :param filter_size: 32 | :param lr: 33 | :param use_CRF: 34 | :return: 35 | ''' 36 | visible = Input(shape=(None, 408)) 37 | # slice different feature types 38 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible) 39 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible) 40 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible) 41 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible) 42 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible) 43 | 44 | # create input based-on selected features 45 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo, 46 | 'biophysical': biophysical} 47 | features = [] 48 | for feature in features_to_use: 49 | features.append(input_dict[feature]) 50 | 51 | ## batch normalization on the input features 52 | if len(features_to_use) == 1: 53 | conclayers = features 54 | input = BatchNormalization(name='batchnorm_input')(features[0]) 55 | else: 56 | input = BatchNormalization(name='batchnorm_input')(concatenate(features)) 57 | conclayers = [input] 58 | 59 | # performing the conlvolutions 60 | for idx, conv in enumerate(convs): 61 | idx = str(idx + 1) 62 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)( 63 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx, 64 | kernel_regularizer=regularizers.l2(0.001))(input))) 65 | conc = concatenate(conclayers) 66 | 67 | dropped = Dropout(dropout_rate, name='dropoutonconvs')(conc) 68 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(dropped) 69 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp) 70 | 71 | if use_CRF: 72 | timedist = TimeDistributed(Dense(n_classes, name='dense'))(dense_convinpn) 73 | crf = ChainCRF(name="crf1") 74 | crf_output = crf(timedist) 75 | model = Model(inputs=visible, outputs=crf_output) 76 | adam = optimizers.Adam(lr=lr) 77 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics=['accuracy'], sample_weight_mode='temporal') 78 | else: 79 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(dense_convinpn) 80 | model = Model(inputs=visible, outputs=timedist) 81 | adam = optimizers.Adam(lr=lr) 82 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'], 83 | sample_weight_mode='temporal') 84 | 85 | print(model.summary()) 86 | return model, 'model_e_cnn#' + '#'.join(features_to_use) + '@conv' + '_'.join( 87 | [str(c) for c in convs]) + '@dense_' + str(dense_size) + '@droplstm' + str( 88 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) + '@use_CRF_' + str( 89 | use_CRF) 90 | -------------------------------------------------------------------------------- /models/f_multiscale_cnn.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | 5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.insert(0, parentdir) 8 | 9 | import numpy as np 10 | 11 | np.random.seed(7) 12 | from keras.models import Model 13 | from keras.layers import Dense, Input, Dropout, concatenate, Conv1D, \ 14 | BatchNormalization 15 | from keras.layers.wrappers import TimeDistributed 16 | from layers.crf import ChainCRF 17 | from layers.utility import slice_tensor, multiscale_CNN 18 | from keras import optimizers 19 | from keras import regularizers 20 | 21 | 22 | 23 | def model_f_multiscale_cnn(n_classes, convs=[3, 5, 7], dropout_rate=0.5, 24 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001, multiscalecnn_layers=3, cnn_regularizer=0.00005, 25 | use_CRF=False): 26 | ''' 27 | :param n_classes: 28 | :param convs: 29 | :param dropout_rate: 30 | :param features_to_use: 31 | :param filter_size: 32 | :param lr: 33 | :param multicnn_layers: 34 | :param cnn_regularizer: 35 | :param use_CRF: 36 | :return: 37 | ''' 38 | visible = Input(shape=(None, 408)) 39 | 40 | # slice different feature types 41 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible) 42 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible) 43 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible) 44 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible) 45 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible) 46 | 47 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo, 48 | 'biophysical': biophysical} 49 | 50 | gating = Dense(len(convs) * filter_size, activation='sigmoid') 51 | 52 | # create input 53 | features = [] 54 | for feature in features_to_use: 55 | features.append(input_dict[feature]) 56 | 57 | if len(features_to_use) == 1: 58 | conclayers = features 59 | input = BatchNormalization(name='batchnorm_input')(features[0]) 60 | else: 61 | input = BatchNormalization(name='batchnorm_input')(concatenate(features)) 62 | conclayers = [] 63 | 64 | # performing the conlvolutions 65 | for idx, conv in enumerate(convs): 66 | idx = str(idx + 1) 67 | conclayers.append(Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx, 68 | kernel_regularizer=regularizers.l2(cnn_regularizer))(input)) 69 | current_multi_cnn_input = concatenate(conclayers) 70 | 71 | # Multiscale CNN application 72 | for layer_idx in range(multiscalecnn_layers-1): 73 | current_multi_cnn_output = multiscale_CNN(current_multi_cnn_input, gating, filter_size, convs, cnn_regularizer) 74 | current_multi_cnn_input = Dropout(dropout_rate)(current_multi_cnn_output) 75 | dense_out = Dense(len(convs) * filter_size, activation='relu')(current_multi_cnn_input) 76 | 77 | if use_CRF: 78 | timedist = TimeDistributed(Dense(n_classes, name='timedist'))(dense_out) 79 | crf = ChainCRF(name="crf1") 80 | crf_output = crf(timedist) 81 | model = Model(inputs=visible, outputs=crf_output) 82 | adam = optimizers.Adam(lr=lr) 83 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics=['accuracy'], sample_weight_mode='temporal') 84 | else: 85 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(dense_out) 86 | model = Model(inputs=visible, outputs=timedist) 87 | adam = optimizers.Adam(lr=lr) 88 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'], 89 | sample_weight_mode='temporal') 90 | print(model.summary()) 91 | return model, 'model_f_multiscale_cnn#' + '#'.join(features_to_use) + '@conv' + '_'.join( 92 | [str(c) for c in convs]) + '@dropout_rate' + str( 93 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) + '@use_CRF_' + str( 94 | use_CRF) + '@multiscalecnn_layers' + str(multiscalecnn_layers) + '@cnn_regularizer' + str(cnn_regularizer) 95 | -------------------------------------------------------------------------------- /sample_configs/model_a.yaml: -------------------------------------------------------------------------------- 1 | deep_learning_model: model_a_cnn_bilstm 2 | model_paramters: 3 | convs: 4 | - 3 5 | - 5 6 | - 7 7 | - 11 8 | - 21 9 | dense_size: 1000 10 | dropout_rate: 0.5 11 | features_to_use: 12 | - onehot 13 | - pssm 14 | filter_size: 256 15 | lr: 0.001 16 | lstm_size: 1000 17 | run_parameters: 18 | domain_name: cnnbilstm 19 | epochs: 100 20 | gpu: 1 21 | patience: 5 22 | setting_name: pssm_onehot 23 | test_batch_size: 100 24 | train_batch_size: 64 25 | -------------------------------------------------------------------------------- /sample_configs/model_b.yaml: -------------------------------------------------------------------------------- 1 | deep_learning_model: model_b_cnn_bilstm_highway 2 | model_paramters: 3 | convs: 4 | - 3 5 | - 5 6 | - 7 7 | - 11 8 | - 21 9 | dense_size: 1000 10 | dropout_rate: 0.5 11 | features_to_use: 12 | - onehot 13 | - pssm 14 | filter_size: 256 15 | lr: 0.001 16 | lstm_size: 1000 17 | use_CRF: false 18 | run_parameters: 19 | domain_name: cnn_bilstm_highway 20 | epochs: 100 21 | gpu: 1 22 | patience: 5 23 | setting_name: pssm_onhot 24 | test_batch_size: 100 25 | train_batch_size: 64 26 | -------------------------------------------------------------------------------- /sample_configs/model_c.yaml: -------------------------------------------------------------------------------- 1 | deep_learning_model: model_c_cnn_bilstm_crf 2 | model_paramters: 3 | CRF_input_dim: 200 4 | convs: 5 | - 3 6 | - 5 7 | - 7 8 | - 11 9 | - 21 10 | dense_size: 1000 11 | dropout_rate: 0.5 12 | features_to_use: 13 | - onehot 14 | - pssm 15 | filter_size: 256 16 | lr: 0.0005 17 | lstm_size: 1000 18 | run_parameters: 19 | domain_name: cnn_bilstm_crf 20 | epochs: 100 21 | gpu: 1 22 | patience: 10 23 | setting_name: pssm_onehot 24 | test_batch_size: 100 25 | train_batch_size: 64 26 | -------------------------------------------------------------------------------- /sample_configs/model_d.yaml: -------------------------------------------------------------------------------- 1 | deep_learning_model: model_d_cnn_bilstm_attention 2 | model_paramters: 3 | attention_type: additive 4 | attention_units: 32 5 | convs: 6 | - 3 7 | - 5 8 | - 7 9 | - 11 10 | - 21 11 | dense_size: 1000 12 | dropout_rate: 0.5 13 | features_to_use: 14 | - onehot 15 | - pssm 16 | filter_size: 256 17 | lr: 0.001 18 | lstm_size: 1000 19 | use_CRF: false 20 | run_parameters: 21 | domain_name: baseline 22 | epochs: 100 23 | gpu: 1 24 | patience: 10 25 | setting_name: baseline 26 | test_batch_size: 100 27 | train_batch_size: 64 28 | -------------------------------------------------------------------------------- /sample_configs/model_e.yaml: -------------------------------------------------------------------------------- 1 | deep_learning_model: model_e_cnn 2 | model_paramters: 3 | convs: 4 | - 3 5 | - 5 6 | - 7 7 | - 11 8 | - 21 9 | dense_size: 1000 10 | dropout_rate: 0.5 11 | features_to_use: 12 | - onehot 13 | - pssm 14 | filter_size: 256 15 | lr: 0.001 16 | use_CRF: false 17 | run_parameters: 18 | domain_name: baseline 19 | epochs: 100 20 | gpu: 1 21 | patience: 10 22 | setting_name: baseline 23 | test_batch_size: 100 24 | train_batch_size: 64 25 | -------------------------------------------------------------------------------- /sample_configs/model_f.yaml: -------------------------------------------------------------------------------- 1 | deep_learning_model: model_f_multiscale_cnn 2 | model_paramters: 3 | cnn_regularizer: 5.0e-05 4 | convs: 5 | - 3 6 | - 5 7 | - 7 8 | - 11 9 | - 21 10 | dropout_rate: 0.5 11 | features_to_use: 12 | - onehot 13 | - pssm 14 | filter_size: 256 15 | lr: 0.001 16 | multiscalecnn_layers: 3 17 | use_CRF: false 18 | run_parameters: 19 | domain_name: baseline 20 | epochs: 100 21 | gpu: 1 22 | patience: 10 23 | setting_name: baseline 24 | test_batch_size: 100 25 | train_batch_size: 64 26 | -------------------------------------------------------------------------------- /utility/feed_generation_utility.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from utility.file_utility import FileUtility 4 | 5 | 6 | def train_batch_generator_408(batch_size=64): 7 | ''' 8 | :param batch_size: 9 | :return: 10 | ''' 11 | start_idx = 0 12 | train_lengths = [int(j) for j in FileUtility.load_list( 13 | 'datasets/train_length.txt')] 14 | X_train = np.load('datasets/X_train_408.npy') 15 | Y_train = np.array( 16 | np.load('datasets/train_mat_Y.npy')) 17 | while True: 18 | if not start_idx < len(train_lengths): 19 | start_idx = 0 20 | X = X_train[start_idx:(min(start_idx + batch_size, len(train_lengths))), 21 | 0:train_lengths[min(start_idx + batch_size, len(train_lengths)) - 1]] 22 | Y = Y_train[start_idx:(min(start_idx + batch_size, len(train_lengths))), 23 | 0:train_lengths[min(start_idx + batch_size, len(train_lengths)) - 1], :] 24 | 25 | W = [] 26 | for idx in range(start_idx, (min(start_idx + batch_size, len(train_lengths)))): 27 | W.append([1 if l < train_lengths[idx] else 0 for l in 28 | range(0, train_lengths[min(start_idx + batch_size, len(train_lengths)) - 1])]) 29 | 30 | start_idx += batch_size 31 | 32 | yield X, Y, np.array(W) 33 | 34 | 35 | def validation_batch_generator_408(batch_size=100): 36 | ''' 37 | :param batch_size: 38 | :return: 39 | ''' 40 | test_lengths = [int(i) for i in FileUtility.load_list( 41 | 'datasets/test_length.txt')] 42 | X_test = np.load('datasets/X_test_408.npy') 43 | Y_test = np.array( 44 | np.load('datasets/test_mat_Y.npy')) 45 | start_idx = 0 46 | while True: 47 | if not start_idx < len(test_lengths): 48 | start_idx = 0 49 | X = X_test[start_idx:(min(start_idx + batch_size, len(test_lengths))), 50 | 0:test_lengths[min(start_idx + batch_size, len(test_lengths)) - 1]] 51 | Y = Y_test[start_idx:(min(start_idx + batch_size, len(test_lengths))), 52 | 0:test_lengths[min(start_idx + batch_size, len(test_lengths)) - 1], :] 53 | W = [] 54 | for idx in range(start_idx, (min(start_idx + batch_size, len(test_lengths)))): 55 | W.append([1 if l < test_lengths[idx] else 0 for l in 56 | range(0, test_lengths[min(start_idx + batch_size, len(test_lengths)) - 1])]) 57 | 58 | start_idx += batch_size 59 | yield X, Y, np.array(W) 60 | 61 | 62 | def validation_batches_fortest_408(batchsize=100): 63 | ''' 64 | :param batchsize: 65 | :return: 66 | ''' 67 | test_lengths = [int(i) for i in FileUtility.load_list( 68 | 'datasets/test_length.txt')] 69 | X_test = np.load('datasets/X_test_408.npy') 70 | Y_test = np.array( 71 | np.load('datasets/test_mat_Y.npy')) 72 | start_idx = 0 73 | while start_idx < len(test_lengths): 74 | X = X_test[start_idx:(min(start_idx + batchsize, len(test_lengths))), 75 | 0:test_lengths[min(start_idx + batchsize, len(test_lengths)) - 1]] 76 | Y = Y_test[start_idx:(min(start_idx + batchsize, len(test_lengths))), 77 | 0:test_lengths[min(start_idx + batchsize, len(test_lengths)) - 1], :] 78 | W = [] 79 | for idx in range(start_idx, (min(start_idx + batchsize, len(test_lengths)))): 80 | W.append([1 if l < test_lengths[idx] else 0 for l in 81 | range(0, test_lengths[min(start_idx + batchsize, len(test_lengths)) - 1])]) 82 | 83 | start_idx += batchsize 84 | yield X, Y, np.array(W) 85 | -------------------------------------------------------------------------------- /utility/file_utility.py: -------------------------------------------------------------------------------- 1 | import _pickle as pickle 2 | import codecs 3 | import fnmatch 4 | import os 5 | import h5py 6 | import numpy as np 7 | from Bio import SeqIO 8 | from Bio.Alphabet import generic_dna 9 | from Bio.Seq import Seq 10 | from Bio.SeqRecord import SeqRecord 11 | from scipy import sparse 12 | 13 | 14 | class FileUtility(object): 15 | def __init__(self): 16 | print('File utility object created..') 17 | 18 | @staticmethod 19 | def create_fasta_file(file_address, corpus, label): 20 | seq_id_pairs = [('.'.join([str(idx + 1), label[idx]]), x) for idx, x in enumerate(corpus)] 21 | seq_recs = [SeqRecord(Seq(seq, generic_dna), id=id, description='') for id, seq in seq_id_pairs] 22 | SeqIO.write(seq_recs, file_address, "fasta") 23 | 24 | @staticmethod 25 | def read_sequence_file(file_name_sample): 26 | ''' 27 | :param file_name_sample: 28 | :return: 29 | ''' 30 | corpus = [] 31 | if file_name_sample[-1] == 'q': 32 | for cur_record in SeqIO.parse(file_name_sample, "fastq"): 33 | corpus.append(str(cur_record.seq).lower()) 34 | else: 35 | for cur_record in SeqIO.parse(file_name_sample, "fasta"): 36 | corpus.append(str(cur_record.seq).lower()) 37 | return file_name_sample.split('/')[-1], corpus 38 | 39 | @staticmethod 40 | def read_sequence_file_length(file_name_sample): 41 | ''' 42 | :param file_name_sample: 43 | :return: 44 | ''' 45 | corpus = [] 46 | if file_name_sample[-1] == 'q': 47 | for cur_record in SeqIO.parse(file_name_sample, "fastq"): 48 | corpus.append(str(cur_record.seq).lower()) 49 | else: 50 | for cur_record in SeqIO.parse(file_name_sample, "fasta"): 51 | corpus.append(str(cur_record.seq).lower()) 52 | return file_name_sample.split('/')[-1], len(corpus) 53 | 54 | @staticmethod 55 | def read_fasta_directory(file_directory, file_extenstion, only_files=[]): 56 | ''' 57 | :param file_directory: 58 | :param file_extenstion: 59 | :param only_files: 60 | :return: list of fasta files, and a dic to map file to index 61 | ''' 62 | if len(only_files) > 0: 63 | fasta_files = [x for x in FileUtility.recursive_glob(file_directory, '*.' + file_extenstion) if 64 | x.split('/')[-1] in only_files] 65 | else: 66 | fasta_files = [x for x in FileUtility.recursive_glob(file_directory, '*.' + file_extenstion)] 67 | 68 | fasta_files.sort() 69 | mapping = {v: k for k, v in enumerate(fasta_files)} 70 | return fasta_files, mapping 71 | 72 | @staticmethod 73 | def save_obj(filename, value): 74 | with open(filename + '.pickle', 'wb') as f: 75 | pickle.dump(value, f) 76 | 77 | @staticmethod 78 | def load_obj(filename): 79 | return pickle.load(open(filename, "rb")) 80 | 81 | @staticmethod 82 | def ensure_dir(file_path): 83 | directory = os.path.dirname(file_path) 84 | if not os.path.exists(directory): 85 | os.makedirs(directory) 86 | 87 | @staticmethod 88 | def exists(file_path): 89 | return os.path.exists(file_path) 90 | 91 | @staticmethod 92 | def remove(file_path): 93 | os.remove(file_path) 94 | 95 | @staticmethod 96 | def save_list(filename, list_names): 97 | # FileUtility.ensure_dir(filename) 98 | f = codecs.open(filename, 'w', 'utf-8') 99 | for x in list_names: 100 | f.write(x + '\n') 101 | f.close() 102 | 103 | @staticmethod 104 | def load_list(filename): 105 | return [line.strip() for line in codecs.open(filename, 'r', 'utf-8').readlines()] 106 | 107 | @staticmethod 108 | def save_sparse_csr(filename, array): 109 | np.savez(filename, data=array.data, indices=array.indices, 110 | indptr=array.indptr, shape=array.shape) 111 | 112 | @staticmethod 113 | def load_sparse_csr(filename): 114 | loader = np.load(filename) 115 | return sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape']) 116 | 117 | @staticmethod 118 | def _float_or_zero(value): 119 | try: 120 | return float(value) 121 | except: 122 | return 0.0 123 | 124 | @staticmethod 125 | def recursive_glob(treeroot, pattern): 126 | ''' 127 | :param treeroot: the path to the directory 128 | :param pattern: the pattern of files 129 | :return: 130 | ''' 131 | results = [] 132 | for base, dirs, files in os.walk(treeroot): 133 | good_files = fnmatch.filter(files, pattern) 134 | results.extend(os.path.join(base, f) for f in good_files) 135 | return results 136 | 137 | @staticmethod 138 | def read_fasta_sequences(file_name): 139 | corpus = [] 140 | for cur_record in SeqIO.parse(file_name, "fasta"): 141 | corpus.append(str(cur_record.seq).lower()) 142 | return corpus 143 | 144 | @staticmethod 145 | def read_fasta_sequences_ids(file_name): 146 | corpus = dict() 147 | for cur_record in SeqIO.parse(file_name, "fasta"): 148 | corpus[str(cur_record.id)] = (str(cur_record.seq).lower(), str(cur_record.description)) 149 | return corpus 150 | 151 | @staticmethod 152 | def loadH5file(filename): 153 | f = h5py.File(filename, 'r') 154 | a_group_key = list(f.keys())[0] 155 | return list(f[a_group_key]) 156 | -------------------------------------------------------------------------------- /utility/labeling_utility.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | 5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.insert(0, parentdir) 8 | 9 | import numpy as np 10 | from keras.preprocessing.text import Tokenizer 11 | from collections import Counter 12 | from keras.preprocessing.sequence import pad_sequences 13 | from keras.utils.np_utils import to_categorical 14 | from gensim.models import KeyedVectors 15 | from keras.layers import Embedding 16 | from utility.file_utility import FileUtility 17 | from utility.list_set_util import argsort 18 | 19 | np.random.seed(7) 20 | 21 | 22 | class LabelingData(object): 23 | def __init__(self, train_file, test_file): 24 | print('Labeling utility object created..') 25 | ## read train ## 26 | self.X_train, self.y_train, self.train_lengths = LabelingData.labeling_file_reader(train_file) 27 | ## read test## 28 | self.X_test, self.y_test , self.test_lengths= LabelingData.labeling_file_reader(test_file) 29 | ## data loading 30 | self.load_data() 31 | 32 | def load_data(self): 33 | words = list(set([elem for sublist in (self.X_train + self.X_test) for elem in sublist])) 34 | self.vocab_size = len(words) + 2 # because of and pseudo words 35 | self.n_classes = len(set([elem for sublist in (self.y_train + self.y_test) for elem in 36 | sublist])) + 1 # add 1 because of zero padding 37 | 38 | # assign a unique integer to each word/label 39 | self.w2idx = LabelingData.encode(self.X_train + self.X_test) 40 | self.l2idx = LabelingData.encode(self.y_train + self.y_test) 41 | 42 | # encode() maps each word to a unique index, starting from 1. We additionally incerement all the 43 | # values by 1, so that we can save space for 0 and 1 to be assigned to and later 44 | self.w2idx = Counter(self.w2idx) 45 | self.w2idx.update(self.w2idx.keys()) 46 | self.w2idx = dict( 47 | self.w2idx) # convert back to regular dict (to avoid erroneously assigning 0 to unknown words) 48 | 49 | self.w2idx[''] = 0 50 | self.w2idx[''] = 1 51 | 52 | # on the label side we only have the to add 53 | self.l2idx[''] = 0 54 | 55 | # keep the reverse to be able to decode back 56 | self.idx2w = {v: k for k, v in self.w2idx.items()} 57 | self.idx2l = {v: k for k, v in self.l2idx.items()} 58 | 59 | X_train_enc = [[self.w2idx[w] for w in sent] for sent in self.X_train] 60 | X_test_enc = [[self.w2idx[w] for w in sent] for sent in self.X_test] 61 | 62 | y_train_enc = [[self.l2idx[l] for l in labels] for labels in self.y_train] 63 | y_test_enc = [[self.l2idx[l] for l in labels] for labels in self.y_test] 64 | 65 | # zero-pad all the sequences 66 | self.max_length = len(max(self.X_train + self.X_test, key=len)) 67 | 68 | self.X_train_enc = pad_sequences(X_train_enc, maxlen=self.max_length, padding='post') 69 | self.X_test_enc = pad_sequences(X_test_enc, maxlen=self.max_length, padding='post') 70 | 71 | y_train_enc = pad_sequences(y_train_enc, maxlen=self.max_length, padding='post') 72 | y_test_enc = pad_sequences(y_test_enc, maxlen=self.max_length, padding='post') 73 | 74 | # one-hot encode the labels 75 | idx = np.array(list(self.idx2l.keys())) 76 | vec = to_categorical(idx) 77 | one_hot = dict(zip(idx, vec)) 78 | self.inv_one_hot = {tuple(v): k for k, v in one_hot.items()} # keep the inverse dict 79 | 80 | self.y_train_enc = np.array([[one_hot[l] for l in labels] for labels in y_train_enc]) 81 | self.y_test_enc = np.array([[one_hot[l] for l in labels] for labels in y_test_enc]) 82 | 83 | print('Training y encoded shape is ', y_train_enc.shape) 84 | print('Maximum sequence length is', self.max_length) 85 | 86 | def get_embedding_layer(self, embedding_file, embedding_dim, trainable=False): 87 | wvmodel = KeyedVectors.load_word2vec_format(embedding_file) 88 | 89 | embedding_dimension = embedding_dim 90 | embedding_matrix = np.zeros((self.vocab_size, embedding_dimension)) 91 | 92 | UNKOWN = np.random.uniform(-1, 1, embedding_dimension) # assumes that '' does not exist in the embed vocab 93 | 94 | for word, i in self.w2idx.items(): 95 | if word in wvmodel.vocab: 96 | embedding_matrix[i] = wvmodel[word] 97 | else: 98 | embedding_matrix[i] = UNKOWN 99 | 100 | embedding_matrix[self.w2idx['']] = np.zeros((embedding_dimension)) 101 | 102 | embedding_layer = Embedding(embedding_matrix.shape[0], 103 | embedding_matrix.shape[1], 104 | weights=[embedding_matrix], 105 | trainable=trainable, 106 | name='embed_layer') 107 | return embedding_layer 108 | 109 | 110 | @staticmethod 111 | def tolower(file): 112 | lines=[l.lower() for l in FileUtility.load_list(file)] 113 | FileUtility.save_list(file+'new',lines) 114 | 115 | 116 | @staticmethod 117 | def labeling_file_reader(file): 118 | with open(file, 'r') as f: 119 | train = f.read().splitlines() 120 | X, y = [], [] 121 | sent = [] 122 | sent_labels = [] 123 | for elem in train: 124 | if elem == '': 125 | X.append(sent) 126 | y.append(sent_labels) 127 | sent = [] 128 | sent_labels = [] 129 | else: 130 | xx, yy = elem.split() 131 | sent.append(xx) 132 | sent_labels.append(yy) 133 | 134 | lengths = LabelingData.sequence_lengths(file) 135 | sorted_idxs = argsort(lengths) 136 | lengths.sort() 137 | X = [X[i] for i in sorted_idxs] 138 | y = [y[i] for i in sorted_idxs] 139 | return X, y, lengths 140 | 141 | @staticmethod 142 | def convert_to_kmer(input_file, out_file, n=3): 143 | train = FileUtility.load_list(input_file) 144 | training_data = [line.split() for line in train] 145 | final_list = list() 146 | temp = [] 147 | for x in training_data: 148 | if x == []: 149 | final_list.append(temp) 150 | temp = [] 151 | else: 152 | temp.append(x) 153 | res = [] 154 | for prot in final_list: 155 | sentence = ''.join(['$'] + [aa[0] for aa in prot] + ['#']) 156 | res += [(sentence[i:i + n], prot[i][1]) for i in range(len(sentence) - n + 1)] 157 | res += [''] 158 | FileUtility.save_list(out_file, [' '.join(list(x)) for x in res]) 159 | 160 | @staticmethod 161 | def sequence_lengths(input_file): 162 | train = FileUtility.load_list(input_file) 163 | training_data = [line.split() for line in train] 164 | final_list = list() 165 | temp = [] 166 | for x in training_data: 167 | if x == []: 168 | final_list.append(temp) 169 | temp = [] 170 | else: 171 | temp.append(x) 172 | return [len(prot) for prot in final_list] 173 | 174 | @staticmethod 175 | def encode(sequence): 176 | ''' 177 | Encoding sequence to integers 178 | :param sents: 179 | :return: 180 | ''' 181 | t = Tokenizer(filters='\t\n', lower=False) 182 | t.fit_on_texts([" ".join(seq) for seq in sequence]) 183 | return t.word_index 184 | 185 | @staticmethod 186 | def numpy2trainfiles(file,name,out='../data/s8_features/'): 187 | ''' 188 | test_file='/mounts/data/proj/asgari/dissertation/datasets/deepbio/protein_general/ss/data/cb513+profile_split1.npy' 189 | train_file='/mounts/data/proj/asgari/dissertation/datasets/deepbio/protein_general/ss/data/cullpdb+profile_6133_filtered.npy' 190 | :param name: 191 | :param out: 192 | :return: 193 | ''' 194 | db=np.load(file) 195 | a = np.arange(0,21) 196 | b = np.arange(35,56) 197 | c = np.hstack((a,b)) 198 | db = np.reshape(db, (db.shape[0], int(db.shape[1] / 57), 57)) 199 | seq=['A', 'C', 'E', 'D', 'G', 'F', 'I', 'H', 'K', 'M', 'L', 'N', 'Q', 'P', 'S', 'R', 'T', 'W', 'V', 'Y', 'X','NoSeq'] 200 | label=['L', 'B', 'E', 'G', 'I', 'H', 'S', 'T'] 201 | sequences=[] 202 | labels=[] 203 | possible_features=dict() 204 | for i in range(0,db.shape[0]): 205 | sequences.append(''.join([seq[np.argmax(x)] if np.max(x)==1 else '' for x in db[i,:,0:21]]).lower()) 206 | labels.append(''.join([label[np.argmax(y)] if np.max(y)==1 else '' for y in db[i,:,22:30]]).lower()) 207 | lengths=[len(x) for x in sequences] 208 | sorted_idxs = argsort(lengths) 209 | lengths.sort() 210 | sequences = [sequences[i] for i in sorted_idxs] 211 | labels = [labels[i] for i in sorted_idxs] 212 | FileUtility.save_list(out+name,['\n'.join([' '.join([elx,labels[idx][idy]]) for idy,elx in enumerate(list(seq))]+['']) for idx,seq in enumerate(sequences)]) 213 | db_new=db[sorted_idxs,:,:] 214 | label_encoding=[[([0] if np.max(row)==1 else [1])+row for row in db_new[i,:,22:30].tolist()] for i in range(0,db.shape[0])] 215 | np.save(out+name+'_mat_Y',label_encoding) 216 | db_new =db_new[:,:,c] 217 | np.save(out+name+'_mat_X',db_new) 218 | FileUtility.save_list(out+name+'_length.txt',[str(l) for l in lengths]) 219 | 220 | @staticmethod 221 | def X2extended(X): 222 | EMB=np.load('/mounts/data/proj/asgari/dissertation/git_repos/DeepSeq2Sec/pretrained_embeddings/emb2features.npy') 223 | x_new=[] 224 | for i in range(0,X.shape[0]): 225 | temp=[] 226 | for j in range(0,700): 227 | temp.append(X[i,j,0:21].dot(EMB).tolist()+X[i,j,:].tolist()) 228 | x_new.append(temp) 229 | return np.array(X_new) 230 | 231 | 232 | if __name__ == '__main__': 233 | LabelingData.tolower('/mounts/data/proj/asgari/dissertation/git_repos/DeepSeq2Sec/data/epitopes/test_epitopes.txt') 234 | LabelingData.tolower('/mounts/data/proj/asgari/dissertation/git_repos/DeepSeq2Sec/data/epitopes/train_epitopes.txt') 235 | -------------------------------------------------------------------------------- /utility/list_set_util.py: -------------------------------------------------------------------------------- 1 | __author__ = "Ehsaneddin Asgari" 2 | __license__ = "Apache 2" 3 | __version__ = "1.0.0" 4 | __maintainer__ = "Ehsaneddin Asgari" 5 | __email__ = "asgari@berkeley.edu" 6 | __project__ = "LLP - DeepPrime2Sec" 7 | __website__ = "https://llp.berkeley.edu/DeepPrime2Sec/" 8 | 9 | import operator 10 | import numpy as np 11 | 12 | def get_intersection_of_list(list_of_list_features): 13 | return list(set.intersection(*map(set, list_of_list_features))) 14 | 15 | def get_max_of_dict(inp): 16 | return max(inp.items(), key=operator.itemgetter(1))[0] 17 | 18 | def argsort(seq, rev=False): 19 | # http://stackoverflow.com/questions/3071415/efficient-method-to-calculate-the-rank-vector-of-a-list-in-python 20 | return sorted(range(len(seq)), key=seq.__getitem__, reverse=rev) 21 | 22 | def sampling_from_dict(score_dict, N): 23 | summation=np.sum(list(score_dict.values())) 24 | keys=list(score_dict.keys()) 25 | keys.sort() 26 | probDict={k:(s/summation) for k,s in score_dict.items()} 27 | prob_list=[probDict[k] for k in keys] 28 | return np.random.choice(keys, N, prob_list).tolist() 29 | -------------------------------------------------------------------------------- /utility/training.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | 5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 6 | parentdir = os.path.dirname(currentdir) 7 | sys.path.insert(0, parentdir) 8 | import scipy 9 | 10 | from keras.callbacks import ModelCheckpoint, EarlyStopping 11 | from utility.file_utility import FileUtility 12 | from utility.labeling_utility import LabelingData 13 | from utility.feed_generation_utility import train_batch_generator_408, validation_batch_generator_408, validation_batches_fortest_408 14 | from utility.vis_utility import create_mat_plot 15 | import tqdm 16 | import numpy as np 17 | import itertools 18 | from sklearn.metrics import accuracy_score, f1_score 19 | from sklearn.metrics import confusion_matrix 20 | from fpdf import FPDF, HTMLMixin 21 | import seaborn as sns; sns.set() 22 | import matplotlib.pyplot as plt 23 | import matplotlib 24 | 25 | class MyFPDF(FPDF, HTMLMixin): 26 | pass 27 | 28 | # predefined models 29 | from models.a_cnn_bilstm import model_a_cnn_bilstm 30 | from models.b_cnn_bilstm_highway import model_b_cnn_bilstm_highway 31 | from models.c_cnn_bilstm_crf import model_c_cnn_bilstm_crf 32 | from models.d_cnn_bilstm_attention import model_d_cnn_bilstm_attention 33 | from models.e_cnn import model_e_cnn 34 | from models.f_multiscale_cnn import model_f_multiscale_cnn 35 | 36 | def training_loop(**kwargs): 37 | run_parameters = kwargs['run_parameters'] 38 | model_paramters = kwargs['model_paramters'] 39 | model = eval(kwargs['deep_learning_model']) 40 | 41 | # which GPU to use 42 | os.environ["CUDA_VISIBLE_DEVICES"] = str(run_parameters['gpu']) 43 | 44 | # read files 45 | train_file = 'datasets/train.txt' 46 | test_file = 'datasets/test.txt' 47 | LD = LabelingData(train_file, test_file) 48 | train_lengths = [int(j) for j in FileUtility.load_list('/'.join(train_file.split('/')[0:-1]) + '/train_length.txt')] 49 | test_lengths = [int(i) for i in FileUtility.load_list('/'.join(test_file.split('/')[0:-1]) + '/test_length.txt')] 50 | 51 | # train/test batch parameters 52 | train_batch_size = run_parameters['train_batch_size'] 53 | test_batch_size = run_parameters['test_batch_size'] 54 | patience = run_parameters['patience'] 55 | epochs = run_parameters['epochs'] 56 | 57 | # model 58 | model, params = model(LD.n_classes, **model_paramters) 59 | 60 | # output directory 61 | FileUtility.ensure_dir('results/') 62 | FileUtility.ensure_dir('results/' + run_parameters['domain_name'] + '/') 63 | FileUtility.ensure_dir('results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/') 64 | FileUtility.ensure_dir( 65 | 'results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/' + params + '/') 66 | full_path = 'results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/' + params + '/' 67 | 68 | # save model 69 | with open(full_path + 'config.txt', 'w') as fh: 70 | model.summary(print_fn=lambda x: fh.write(x + '\n')) 71 | 72 | # check points 73 | filepath = full_path + "/weights-improvement-{epoch:02d}-{weighted_acc:.3f}-{val_weighted_acc:.3f}.hdf5" 74 | 75 | checkpoint = ModelCheckpoint(filepath, monitor='val_weighted_acc', verbose=1, save_best_only=True, mode='max', 76 | period=1) 77 | earlystopping = EarlyStopping(monitor='val_weighted_acc', min_delta=0, patience=patience, verbose=0, mode='max', 78 | baseline=None) 79 | callbacks_list = [checkpoint, earlystopping] 80 | 81 | # calculate the sizes 82 | steps_per_epoch = len(train_lengths) / train_batch_size if len(train_lengths) % train_batch_size == 0 else int( 83 | len(train_lengths) / train_batch_size) + 1 84 | validation_steps = int(len(test_lengths) / test_batch_size) if len(test_lengths) % test_batch_size == 0 else int( 85 | len(test_lengths) / test_batch_size) + 1 86 | 87 | # feed model 88 | h = model.fit_generator(train_batch_generator_408(train_batch_size), steps_per_epoch=steps_per_epoch, 89 | validation_data=validation_batch_generator_408(test_batch_size), 90 | validation_steps=validation_steps, 91 | shuffle=False, epochs=epochs, verbose=1, callbacks=callbacks_list) 92 | 93 | # save the history 94 | FileUtility.save_obj(full_path + 'history', h.history) 95 | 96 | 97 | # Analysis of the performance 98 | pred_test = [(model.predict_on_batch(x),y,w) for x,y,w in tqdm.tqdm(validation_batches_fortest_408(1))] 99 | 100 | acc_test, conf_mat, conf_mat_column_mapping, contingency_metric, chi2_res_pval, gtest_res_pval = generate_report(full_path, pred_test, run_parameters['domain_name'], run_parameters['setting_name']) 101 | 102 | 103 | 104 | def generate_report(full_path, pred_test, domain, setting): 105 | ''' 106 | :param pred_test: test results 107 | :return: 108 | ''' 109 | # Error location analysis 110 | error_edge=0 111 | error_NOTedge=0 112 | correct_edge=0 113 | correct_NOTedge=0 114 | 115 | all_pred = [] 116 | all_true = [] 117 | 118 | for i in tqdm.tqdm(range(0,514)): 119 | pred=np.array([np.argmax(x, axis=1) for x in pred_test[i][0]]) 120 | true=np.array([np.argmax(x, axis=1) for x in pred_test[i][1]]) 121 | all_pred = all_pred + pred.tolist() 122 | all_true = all_true + true.tolist() 123 | diff=np.diff(true) 124 | errors = [y for x,y in np.argwhere(pred!=true)] 125 | corrects = list(set(list(range(len(pred[0]))))-set(errors)) 126 | edges_edge = [y for x,y in np.argwhere(diff!=0)] 127 | edges_before = [x-1 for x in edges_edge if x-1>=0] 128 | edges_after = [x+1 for x in edges_edge if x+1DeepPrime2Sec Report on Protein Secondary Structure Prediction 169 |

Experiment name: {domain} - {setting}

170 |
171 | 172 |

The performance on CB513

173 |

Report on the accuracy

174 | 175 | 176 | 177 | 178 | 179 | 180 |
Test-set AccurayTest-set micro F1Test-set macro F1
{round(acc_test,3)}{round(f1_micro,3)}{round(f1_macro,3)}
181 | 182 |
183 | 184 |

Confusion matrix

185 | 186 | 187 | """ 188 | 189 | pdf.write_html(html) 190 | pdf.image(full_path+'confusion'+F"{domain}_{setting}"+'.png', x = 50, y = None, w = 100, h = 0, type = '', link = '') 191 | 192 | html=F""" 193 |
194 | 195 |
196 | 197 |
198 | 199 |

Error analysis

200 | 201 |
Contingency table for location analysis of the misclassified amino acids
202 | 203 | 204 | 205 | 206 | 207 | 208 |
\Located at the PSS transitionNOT Located at the PSS transition
Miss-classified{error_edge}{error_NOTedge}
Truely classified{correct_edge}{correct_NOTedge}
209 |
210 | P-value for Chi-square test = {chi2_res_pval} 211 |
212 | P-value for G-test = {gtest_res_pval} 213 | 214 |
215 |
216 |
217 |
218 | 219 |

Learning curve

220 | """ 221 | pdf.write_html(html) 222 | 223 | # learning curve 224 | history_dict=FileUtility.load_obj(full_path+'history.pickle') 225 | plt.clf() 226 | loss_values = history_dict['loss'] 227 | val_loss_values = history_dict['val_loss'] 228 | epochs = range(1, len(loss_values) + 1) 229 | matplotlib.rcParams['mathtext.fontset'] = 'stix' 230 | matplotlib.rcParams['font.family'] = 'STIXGeneral' 231 | matplotlib.rcParams['mathtext.fontset'] = 'custom' 232 | matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans' 233 | matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic' 234 | matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold' 235 | matplotlib.rcParams["axes.edgecolor"] = "black" 236 | matplotlib.rcParams["axes.linewidth"] = 0.6 237 | plt.plot(epochs, loss_values, 'ro', label='Loss for train set') 238 | plt.plot(epochs, val_loss_values, 'b', label='Loss for test set') 239 | plt.xlabel('Epochs') 240 | plt.ylabel('Loss') 241 | plt.legend(loc=1, prop={'size': 8},ncol=1, edgecolor='black', facecolor='white', frameon=True) 242 | plt.title('Loss with respect to the number of epochs for train and test sets') 243 | plt.savefig(full_path + 'learning_curve'+F"{domain}_{setting}"+'.png', dpi=300) 244 | pdf.image(full_path + 'learning_curve'+F"{domain}_{setting}"+'.png', x = 50, y = None, w = 100, h = 0, type = '', link = '') 245 | 246 | 247 | pdf.output(full_path+'final_report.pdf', 'F') 248 | 249 | return acc_test, conf_mat, conf_mat_column_mapping, contingency_metric, chi2_res_pval, gtest_res_pval 250 | -------------------------------------------------------------------------------- /utility/vis_utility.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns; sns.set() 2 | import sys 3 | sys.path.append('../') 4 | import matplotlib 5 | import matplotlib.pyplot as plt 6 | 7 | global color_schemes 8 | color_schemes=[['green','blue','red','gold', 'cyan'], ['#ff0505', '#f2a041', '#cdff05', '#04d9cb', '#45a8ff', '#8503a6', '#590202', '#734d02', '#4ab304', '#025359', '#0454cc', '#ff45da', '#993829', '#ffda45', '#1c661c', '#05cdff', '#1c2f66', '#731f57', '#b24a04', '#778003', '#0e3322', '#024566', '#0404d9', '#e5057d', '#66391c', '#31330e', '#3ee697', '#2d7da6', '#20024d', '#33011c']+list(({'aliceblue': '#F0F8FF','antiquewhite': '#FAEBD7','aqua': '#00FFFF','aquamarine': '#7FFFD4','azure': '#F0FFFF','beige': '#F5F5DC','bisque': '#FFE4C4','black': '#000000','blanchedalmond': '#FFEBCD','blue': '#0000FF','blueviolet': '#8A2BE2','brown': '#A52A2A','burlywood': '#DEB887','cadetblue': '#5F9EA0','chartreuse': '#7FFF00','chocolate': '#D2691E','coral': '#FF7F50','cornflowerblue': '#6495ED','cornsilk': '#FFF8DC','crimson': '#DC143C','cyan': '#00FFFF','darkblue': '#00008B','darkcyan': '#008B8B','darkgoldenrod': '#B8860B','darkgray': '#A9A9A9','darkgreen': '#006400','darkkhaki': '#BDB76B','darkmagenta': '#8B008B','darkolivegreen': '#556B2F','darkorange': '#FF8C00','darkorchid': '#9932CC','darkred': '#8B0000','darksalmon': '#E9967A','darkseagreen': '#8FBC8F','darkslateblue': '#483D8B','darkslategray': '#2F4F4F','darkturquoise': '#00CED1','darkviolet': '#9400D3','deeppink': '#FF1493','deepskyblue': '#00BFFF','dimgray': '#696969','dodgerblue': '#1E90FF','firebrick': '#B22222','floralwhite': '#FFFAF0','forestgreen': '#228B22','fuchsia': '#FF00FF','gainsboro': '#DCDCDC','ghostwhite': '#F8F8FF','gold': '#FFD700','goldenrod': '#DAA520','gray': '#808080','green': '#008000','greenyellow': '#ADFF2F','honeydew': '#F0FFF0','hotpink': '#FF69B4','indianred': '#CD5C5C','indigo': '#4B0082','ivory': '#FFFFF0','khaki': '#F0E68C','lavender': '#E6E6FA','lavenderblush': '#FFF0F5','lawngreen': '#7CFC00','lemonchiffon': '#FFFACD','lightblue': '#ADD8E6','lightcoral': '#F08080','lightcyan': '#E0FFFF','lightgoldenrodyellow': '#FAFAD2','lightgreen': '#90EE90','lightgray': '#D3D3D3','lightpink': '#FFB6C1','lightsalmon': '#FFA07A','lightseagreen': '#20B2AA','lightskyblue': '#87CEFA','lightslategray': '#778899','lightsteelblue': '#B0C4DE','lightyellow': '#FFFFE0','lime': '#00FF00','limegreen': '#32CD32','linen': '#FAF0E6','magenta': '#FF00FF','maroon': '#800000','mediumaquamarine': '#66CDAA','mediumblue': '#0000CD','mediumorchid': '#BA55D3','mediumpurple': '#9370DB','mediumseagreen': '#3CB371','mediumslateblue': '#7B68EE','mediumspringgreen': '#00FA9A','mediumturquoise': '#48D1CC','mediumvioletred': '#C71585','midnightblue': '#191970','mintcream': '#F5FFFA','mistyrose': '#FFE4E1','moccasin': '#FFE4B5','navajowhite': '#FFDEAD','navy': '#000080','oldlace': '#FDF5E6','olive': '#808000','olivedrab': '#6B8E23','orange': '#FFA500','orangered': '#FF4500','orchid': '#DA70D6','palegoldenrod': '#EEE8AA','palegreen': '#98FB98','paleturquoise': '#AFEEEE','palevioletred': '#DB7093','papayawhip': '#FFEFD5','peachpuff': '#FFDAB9','peru': '#CD853F','pink': '#FFC0CB','plum': '#DDA0DD','powderblue': '#B0E0E6','purple': '#800080','red': '#FF0000','rosybrown': '#BC8F8F','royalblue': '#4169E1','saddlebrown': '#8B4513','salmon': '#FA8072','sandybrown': '#FAA460','seagreen': '#2E8B57','seashell': '#FFF5EE','sienna': '#A0522D','silver': '#C0C0C0','skyblue': '#87CEEB','slateblue': '#6A5ACD','slategray': '#708090','snow': '#FFFAFA','springgreen': '#00FF7F','steelblue': '#4682B4','tan': '#D2B48C','teal': '#008080','thistle': '#D8BFD8','tomato': '#FF6347','turquoise': '#40E0D0','violet': '#EE82EE','wheat': '#F5DEB3','white': '#FFFFFF','whitesmoke': '#F5F5F5','yellow': '#FFFF00','yellowgreen': '#9ACD32'}).keys()),['#ff0505', '#f2a041', '#cdff05', '#04d9cb', '#45a8ff', '#8503a6', '#590202', '#734d02', '#4ab304', '#025359', '#0454cc', '#ff45da', '#993829', '#ffda45', '#1c661c', '#05cdff', '#1c2f66', '#731f57', '#b24a04', '#778003', '#0e3322', '#024566', '#0404d9', '#e5057d', '#66391c', '#31330e', '#3ee697', '#2d7da6', '#20024d', '#33011c']] 9 | 10 | def create_mat_plot(mat, axis_names, title, filename, xlab, ylab, cmap='inferno', filetype='pdf', rx=0, ry=0, font_s=10, annot=True): 11 | ''' 12 | :param mat: divergence matrix 13 | :param axis_names: axis_names 14 | :param title 15 | :param filename: where to be saved 16 | :return: 17 | ''' 18 | plt.rc('text') 19 | ax = sns.heatmap(mat,annot=annot, yticklabels=axis_names, xticklabels=axis_names, cmap=cmap) 20 | plt.title(title) 21 | params = { 22 | 'legend.fontsize': font_s, 23 | 'xtick.labelsize': font_s, 24 | 'ytick.labelsize': font_s, 25 | } 26 | matplotlib.rcParams['mathtext.fontset'] = 'stix' 27 | matplotlib.rcParams['font.family'] = 'STIXGeneral' 28 | matplotlib.rcParams['mathtext.fontset'] = 'custom' 29 | matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans' 30 | matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic' 31 | matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold' 32 | plt.xlabel(xlab) 33 | plt.ylabel(ylab) 34 | plt.xticks(rotation=rx) 35 | plt.yticks(rotation=ry) 36 | plt.rcParams.update(params) 37 | plt.tight_layout() 38 | plt.savefig(filename + '.'+filetype, dpi=300) 39 | plt.clf() 40 | 41 | --------------------------------------------------------------------------------