├── .gitignore
├── LICENSE
├── README.md
├── config_generator.py
├── datasets
├── ReadME
├── test.txt
├── test_length.txt
├── test_mat_Y.npy
├── train.txt
└── train_length.txt
├── deepprime2sec.py
├── installations
├── deepprime2sec.yml
└── requirements.txt
├── layers
├── crf.py
└── utility.py
├── models
├── a_cnn_bilstm.py
├── b_cnn_bilstm_highway.py
├── c_cnn_bilstm_crf.py
├── d_cnn_bilstm_attention.py
├── e_cnn.py
└── f_multiscale_cnn.py
├── sample_configs
├── model_a.yaml
├── model_b.yaml
├── model_c.yaml
├── model_d.yaml
├── model_e.yaml
└── model_f.yaml
└── utility
├── feed_generation_utility.py
├── file_utility.py
├── labeling_utility.py
├── list_set_util.py
├── training.py
└── vis_utility.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | .idea/
14 | dist/
15 | datasets/*train*
16 | results/*
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # pyenv
79 | .python-version
80 |
81 | # celery beat schedule file
82 | celerybeat-schedule
83 |
84 | # SageMath parsed files
85 | *.sage.py
86 |
87 | # Environments
88 | .env
89 | .venv
90 | env/
91 | venv/
92 | ENV/
93 | env.bak/
94 | venv.bak/
95 |
96 | # Spyder project settings
97 | .spyderproject
98 | .spyproject
99 |
100 | # Rope project settings
101 | .ropeproject
102 |
103 | # mkdocs documentation
104 | /site
105 |
106 | # mypy
107 | .mypy_cache/
108 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2019 Ehsaneddin Asgari
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepPrime2Sec
2 |
3 |
4 |
5 | ## Table of Content
6 |
7 | [1. Summary](#Summary)
8 |
9 | [2. Installation](#Installation)
10 |
11 | [3. Running Configuration](#Configuration)
12 |
13 | [3.1 Features](#Features)
14 |
15 | [3.2 Training parameters](#Training)
16 |
17 | [3.3 Model specific parameters](#Models)
18 |
19 | [4. Output](##Output)
20 |
21 |
22 |
23 | # Summary
24 |
25 |
26 | DeepPrime2Seq is developed deep learning-based prediction of protein secondary structure from the protein primary sequence.
27 | It facilitate the function of different features in this task, including one-hot vectors, biophysical features,
28 | protein sequence embedding (ProtVec), deep contextualized embedding (known as ELMo), and the Position Specific Scoring Matrix (PSSM).
29 |
30 | In addition to the role of features, it allows for the evaluation of various deep learning architectures including the following models/mechanisms and
31 | certain combinations: Bidirectional Long Short-Term Memory (BiLSTM), convolutional neural network (CNN), highway connections,
32 | attention mechanism, recurrent neural random fields, and gated multi-scale CNN.
33 |
34 | Our results suggest that PSSM concatenated to one-hot vectors are the most important features for the task of secondary structure prediction.
35 | Utilizing the CNN-BiLSTM network, we achieved an accuracy of 69.9% and 70.4% using ensemble top-k models, for 8-class of protein secondary structure on the CB513 dataset, the most challenging dataset for protein secondary structure prediction.
36 |
37 | ```
38 | @article {Asgari705426,
39 | author = {Asgari, Ehsaneddin and Poerner, Nina and McHardy, Alice C. and Mofrad, Mohammad R.K.},
40 | title = {DeepPrime2Sec: Deep Learning for Protein Secondary Structure Prediction from the Primary Sequences},
41 | elocation-id = {705426},
42 | year = {2019},
43 | doi = {10.1101/705426},
44 | publisher = {Cold Spring Harbor Laboratory},
45 | URL = {https://www.biorxiv.org/content/early/2019/07/18/705426},
46 | eprint = {https://www.biorxiv.org/content/early/2019/07/18/705426.full.pdf},
47 | journal = {bioRxiv}
48 | }
49 | ```
50 |
51 |
52 | Through error analysis on the best performing model, we showed that the misclassification is significantly more common at positions that undergo secondary structure transitions, which is most likely due to the inaccurate assignments of the secondary structure at the boundary regions. Notably, when ignoring amino acids at secondary structure transitions in the evaluation, the accuracy increases to 90.3%. Furthermore, the best performing model mostly mistook similar structures for one another, indicating that the deep learning model inferred high-level information on the secondary structure.
53 |
54 |
55 | DeepPrime2Sec and the used datasets are available here under the Apache 2 license.
56 |
57 | Return to the [table of content ↑](#tableofcontent).
58 |
59 |
60 |
61 | # Installation
62 |
63 | ## Pip installation
64 |
65 |
66 | In order to install the required libraries for running DeepPrime2Sec use the following command:
67 |
68 | ```
69 | pip install installations/requirements.txt
70 | ```
71 |
72 | OR you may use conda installation.
73 |
74 | ## Conda installation
75 |
76 | In order to install the required libraries for running DeepPrime2Sec use the following conda command:
77 |
78 | ```
79 | conda create --name deepprime2sec --file installations/deepprime2sec.yml
80 | ```
81 |
82 | Subsequently, you need to activate the created virtual environment before running:
83 |
84 | ```
85 | source activate deepprime2sec
86 | ```
87 |
88 | ## Download the training files
89 |
90 |
91 | Before running the software make sure to download the traning dataset (which was too large for git) from the following file
92 | and extract them and copy them to the `dataset` directory.
93 |
94 | ```
95 | http://deepbio.info/proteomics/datasets/deepprime2sec/train_files.tar.gz
96 | ```
97 |
98 |
99 | Return to the [table of content ↑](#tableofcontent).
100 |
101 |
102 |
103 |
104 |
105 | # Running Configuration
106 |
107 | ### Running example
108 |
109 | In order to run the DeepPrime2Sec, you can simply use the following command.
110 | Every details on different deep learning models: architecture, hyper parameter, training parameters, will be provided in the yaml config file.
111 | Here we detail how this file should be created. Examples are also provided in `sample_configs/*.yaml`.
112 |
113 | ```
114 | python deepprime2sec.py --config sample_configs/model_a.yaml
115 | ```
116 |
117 |
118 | # Features to use
119 |
120 |
121 | We experiment on five sets of protein features to understand what are essential features for the task of protein secondary structure prediction. Although in 1999, PSSM was reported as an important feature to the secondary structure prediction (Jones et al, 1999),
122 | this was still unclear whether recently introduced distributed representations can outperform PSSM in such a task. For a systematic comparison, the features detailed as follows are used:
123 |
124 |
125 | - One-hot vector representation (length: 21) --- onehot: vector representation indicating which amino acid exists at each specific position, where each index in the vector indicates the presence or absence of that amino acid.
126 | - ProtVec embedding (length: 50) --- protvec: representation trained using Skip-gram neural network on protein amino acid sequences (ProtVec). The only difference would be character-level training instead of n-gram based training.
127 | - Contextualized embedding (length: 300) --- elmo: we use the contextualized embedding of the amino acids trained in the course of language modeling, known as ELMo, as a new feature for the secondary structure task. Contextualized embedding is the concatenation of the hidden states of a deep bidirectional language model. The main difference between ProtVec embedding and ELMO embedding is that the ProtVec embedding for a given amino acid or amino acid k-mer is fixed and the representation would be the same in different sequences. However, the contextualized embedding, as it is clear from its name, is an embedding of word changing based on its context. We train ELMo embedding of amino acids using UniRef50 dataset in the dimension size of 300.
128 | - Position Specific Scoring Matrix (PSSM) features (length: 21) --- pssm: PSSM is amino acid substitution scores calculated on protein multiple sequence alignment of homolog sequences for each given position in the protein sequence.
129 | - Biophysical features (length: 16) --- biophysical For each amino acid we create a normalized vector of their biophysical properties, e.g., flexibility, instability, surface accessibility, kd-hydrophobicity, hydrophilicity, and etc.
130 |
131 |
132 | In order to use combinations of features in the software please use the following keywords for the key of `features_to_use`. `features_to_use` is part of model parameters.
133 | The included features in the config will be concatenated as input:
134 |
135 | ```
136 | model_paramters:
137 | features_to_use:
138 | - onehot
139 | - embedding
140 | - elmo
141 | - pssm
142 | - biophysical
143 | ```
144 |
145 |
146 | Return to the [table of content ↑](#tableofcontent).
147 |
148 |
149 |
150 | ## Training parameters
151 |
152 |
153 | The following is an example of parameters for running the training and storing the results (`run_parameters`).
154 |
155 | ```
156 | run_parameters:
157 | domain_name: baseline
158 | setting_name: baseline
159 | epochs: 100
160 | test_batch_size: 100
161 | train_batch_size: 64
162 | patience: 10
163 | gpu: 1
164 | ```
165 |
166 |
167 | ### `domain` and `setting_name`
168 |
169 | The results of the model would be saved to `results` directory. The `domain` and `setting_name` parameters will be created as directy and sub-directories inside `results` to store the model weights
170 | and results.
171 |
172 | ### `epoch` and `batch-sizes`
173 |
174 | `epoch` refers to the number of time to iterate over the training data and `batch_size` refers to the size of data-split in each optimization step.
175 | For a proper and faster learning we have already performed bucketing (sorting the training sequences according to their lengths), which minimizes the padding operations as well.
176 |
177 | ### `patience`
178 |
179 | To avoid overfitting we perform early stopping, meaning that if the performance only improves over the training set and not the test set after a few epoch we stop the training.
180 | Because then it means that the model specialized to the training data by memorizing and cannot generalize further for the test set. `patience` determine for how many epochs we should wait for an improvement on the test set.
181 |
182 | ### `gpu`
183 |
184 | Which GPU device ID to use for training/testing the model.
185 |
186 | Return to the [table of content ↑](#tableofcontent).
187 |
188 |
189 |
190 | ## How to configure input for different deep learning models
191 |
192 |
193 | ### Model (a) CNN + BiLSTM
194 |
195 | For the details of CNN + BiLSTM model please refer to the paper, to specify this model for the paper use `deep_learning_model: a_cnn_bilstm`
196 |
197 | 
198 |
199 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11).
200 |
201 | `filter_size` is the size of convolutional filters.
202 |
203 | `dense_size` is the size of feed forward layers are used before and after LSTM.
204 |
205 | `dropout_rate` is the dropout rate.
206 |
207 | `lstm_size` is the hidden size of bidirectional LSTM.
208 |
209 | `lr` is the learning rate.
210 |
211 | `features_to_use` is already covered at [3.1 Features](#Features).
212 |
213 |
214 | Sample config file
215 | ```
216 | deep_learning_model: a_cnn_bilstm
217 | model_paramters:
218 | convs:
219 | - 3
220 | - 5
221 | - 7
222 | - 11
223 | - 21
224 | filter_size: 256
225 | dense_size: 1000
226 | dropout_rate: 0.5
227 | lstm_size: 1000
228 | lr: 0.001
229 | features_to_use:
230 | - onehot
231 | - pssm
232 | ```
233 |
234 |
235 |
236 | ## Model (b) CNN + BiLSTM + Highway Connection of PSSM
237 |
238 | For the details of CNN + + Highway Connection of PSSM model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_b_cnn_bilstm_highway`
239 |
240 | 
241 |
242 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11).
243 |
244 | `filter_size` is the size of convolutional filters.
245 |
246 | `dense_size` is the size of feed forward layers are used before and after LSTM.
247 |
248 | `dropout_rate` is the dropout rate.
249 |
250 | `lstm_size` is the hidden size of bidirectional LSTM.
251 |
252 | `lr` is the learning rate.
253 |
254 | `features_to_use` is already covered at [3.1 Features](#Features).
255 |
256 | `use_CRF` is indicate whether you would like to include a CRF layer at the end.
257 |
258 |
259 | Sample config file
260 | ```
261 | deep_learning_model: model_b_cnn_bilstm_highway
262 | model_paramters:
263 | convs:
264 | - 3
265 | - 5
266 | - 7
267 | - 11
268 | - 21
269 | filter_size: 256
270 | dense_size: 1000
271 | dropout_rate: 0.5
272 | lstm_size: 1000
273 | lr: 0.001
274 | features_to_use:
275 | - onehot
276 | - pssm
277 | use_CRF: false
278 | ```
279 |
280 |
281 | ## Model (c) CNN + BiLSTM + Conditional Random Field Layer
282 |
283 | For the details of CNN + BiLSTM + Conditional Random Field Layer model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_c_cnn_bilstm`
284 |
285 | 
286 |
287 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11).
288 |
289 | `filter_size` is the size of convolutional filters.
290 |
291 | `dense_size` is the size of feed forward layers are used before and after LSTM.
292 |
293 | `dropout_rate` is the dropout rate.
294 |
295 | `lstm_size` is the hidden size of bidirectional LSTM.
296 |
297 | `lr` is the learning rate.
298 |
299 | `features_to_use` is already covered at [3.1 Features](#Features).
300 |
301 | `CRF_input_dim` the input dimension of CRF layer.
302 |
303 |
304 | Sample config file
305 | ```
306 | deep_learning_model: model_c_cnn_bilstm_crf
307 | model_paramters:
308 | convs:
309 | - 3
310 | - 5
311 | - 7
312 | - 11
313 | - 21
314 | filter_size: 256
315 | dense_size: 1000
316 | dropout_rate: 0.5
317 | lstm_size: 1000
318 | lr: 0.001
319 | features_to_use:
320 | - onehot
321 | - pssm
322 | lstm_size: 1000
323 | CRF_input_dim: 200
324 | ```
325 |
326 | ## Model (d) CNN + BiLSTM + Attention mechanism
327 |
328 | For the details of CNN + BiLSTM + Attention mechanism model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_d_cnn_bilstm_attention`
329 |
330 | 
331 |
332 | `attention_type` is the attention type to be selected from `additive` or `multiplicative`.
333 |
334 | `attention_units` is the number of attention units.
335 |
336 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11).
337 |
338 | `filter_size` is the size of convolutional filters.
339 |
340 | `dense_size` is the size of feed forward layers are used before and after LSTM.
341 |
342 | `dropout_rate` is the dropout rate.
343 |
344 | `lstm_size` is the hidden size of bidirectional LSTM.
345 |
346 | `lr` is the learning rate.
347 |
348 | `features_to_use` is already covered at [3.1 Features](#Features).
349 |
350 | `use_CRF` is indicate whether you would like to include a CRF layer at the end.
351 |
352 |
353 |
354 | Sample config file
355 | ```
356 | deep_learning_model: model_d_cnn_bilstm_attention
357 | model_paramters:
358 | attention_type: additive
359 | attention_units: 32
360 | convs:
361 | - 3
362 | - 5
363 | - 7
364 | - 11
365 | - 21
366 | filter_size: 256
367 | dense_size: 1000
368 | dropout_rate: 0.5
369 | lstm_size: 1000
370 | lr: 0.001
371 | features_to_use:
372 | - onehot
373 | - pssm
374 | lstm_size: 1000
375 | use_CRF: false
376 | ```
377 |
378 | ## Model (e) CNN
379 |
380 | For the details of CNN model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_e_cnn`
381 |
382 | 
383 |
384 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11).
385 |
386 | `filter_size` is the size of convolutional filters.
387 |
388 | `dense_size` is the size of feed forward layers are after the concatenation of convlolution results.
389 |
390 | `dropout_rate` is the dropout rate.
391 |
392 | `lr` is the learning rate.
393 |
394 | `features_to_use` is already covered at [3.1 Features](#Features).
395 |
396 | `use_CRF` is indicate whether you would like to include a CRF layer at the end.
397 |
398 | Sample config file
399 | ```
400 | deep_learning_model: model_e_cnn
401 | model_paramters:
402 | convs:
403 | - 3
404 | - 5
405 | - 7
406 | - 11
407 | - 21
408 | filter_size: 256
409 | dense_size: 1000
410 | dropout_rate: 0.5
411 | lstm_size: 1000
412 | lr: 0.001
413 | features_to_use:
414 | - onehot
415 | - pssm
416 | lstm_size: 1000
417 | use_CRF: false
418 | ```
419 |
420 | ## Model (f) Multiscale CNN
421 |
422 | For the details of Multiscale CNN model please refer to the paper, to specify this model for the paper use `deep_learning_model: model_f_multiscale_cnn`
423 |
424 | 
425 |
426 | `multiscalecnn_layers` how many gated muliscale CNNs should be stacked.
427 |
428 | `cnn_regularizer` regularizing parameter for the CNN.
429 |
430 | `convs` refers to the convolution window sizes (in the following example we use 5 window sizes of 3, 5, 7, and 11).
431 |
432 | `filter_size` is the size of convolutional filters.
433 |
434 | `dense_size` is the size of feed forward layers are after the concatenation of convlolution results.
435 |
436 | `dropout_rate` is the dropout rate.
437 |
438 | `lr` is the learning rate.
439 |
440 | `features_to_use` is already covered at [3.1 Features](#Features).
441 |
442 | `use_CRF` is indicate whether you would like to include a CRF layer at the end.
443 |
444 | Sample config file
445 | ```
446 | deep_learning_model: model_f_multiscale_cnn
447 | model_paramters:
448 | cnn_regularizer: 5.0e-05
449 | multiscalecnn_layers: 3
450 | convs:
451 | - 3
452 | - 5
453 | - 7
454 | - 11
455 | - 21
456 | filter_size: 256
457 | dense_size: 1000
458 | dropout_rate: 0.5
459 | lstm_size: 1000
460 | lr: 0.001
461 | features_to_use:
462 | - onehot
463 | - pssm
464 | lstm_size: 1000
465 | use_CRF: false
466 | ```
467 |
468 | Return to the [table of content ↑](#tableofcontent).
469 |
470 |
471 |
472 | ## Your own model
473 |
474 | Create your own model by just using the template of model_a to .._f, and test its performance against the existing methods.
475 |
476 | Return to the [table of content ↑](#tableofcontent).
477 |
478 |
479 | ## Output
480 |
481 |
482 | Finally after completion of training, DeepPrime2Seq generate a PDF of the report with the following information at `results/$domain/$setting/report.pdf`:
483 |
484 | - [x] The accuracy of trained model on the standard test set of the task (CB513)
485 | - [x] Confusion matrix of the model
486 | - [x] Contingency metric of the error at the edges of secondary structure changing, along with the p-value of Chi-Square and G-test tests.
487 | - [x] The learning curve
488 | - [x] The neural network weights for the best models
489 |
490 |
491 | 
492 |
--------------------------------------------------------------------------------
/config_generator.py:
--------------------------------------------------------------------------------
1 | import yaml
2 |
3 | config_model_a = {'run_parameters':
4 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64,
5 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100},
6 | 'deep_learning_model': 'model_a_cnn_bilstm',
7 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 'lstm_size': 1000,
8 | 'dropout_rate' : 0.5, 'filter_size':256,'lr' : 0.001, 'features_to_use': ['onehot',
9 | 'pssm']}}
10 |
11 | config_model_b = {'run_parameters':
12 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64,
13 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100},
14 | 'deep_learning_model': 'model_b_cnn_bilstm_highway',
15 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 'lstm_size': 1000,
16 | 'dropout_rate' : 0.5,'filter_size':256, 'lr' : 0.001, 'features_to_use': ['onehot',
17 | 'pssm'], 'use_CRF':False}}
18 |
19 | config_model_c = {'run_parameters':
20 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64,
21 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100},
22 | 'deep_learning_model': 'model_c_cnn_bilstm_crf',
23 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 'lstm_size': 1000,
24 | 'dropout_rate' : 0.5, 'filter_size':256, 'lr' : 0.001, 'features_to_use': ['onehot',
25 | 'pssm'], 'CRF_input_dim':200}}
26 | config_model_d = {'run_parameters':
27 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64,
28 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100},
29 | 'deep_learning_model': 'model_d_cnn_bilstm_attention',
30 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000, 'lstm_size': 1000,
31 | 'dropout_rate' : 0.5, 'filter_size':256,'lr' : 0.001, 'features_to_use': ['onehot',
32 | 'pssm'], 'use_CRF':False, 'attention_units':32, 'attention_type':'additive'}}
33 |
34 | config_model_e = {'run_parameters':
35 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64,
36 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100},
37 | 'deep_learning_model': 'model_e_cnn',
38 | 'model_paramters': {'convs': [3, 5, 7, 11, 21], 'dense_size': 1000,
39 | 'dropout_rate' : 0.5, 'lr' : 0.001, 'filter_size':256,'features_to_use': ['onehot',
40 | 'pssm'], 'use_CRF':False}}
41 |
42 | #multiplicative
43 |
44 | config_model_f = {'run_parameters':
45 | {'domain_name': 'baseline', 'gpu': 1, 'setting_name': 'baseline', 'train_batch_size': 64,
46 | 'test_batch_size': 100, 'patience': 10, 'epochs': 100},
47 | 'deep_learning_model': 'model_f_multiscale_cnn',
48 | 'model_paramters': {'convs': [3, 5, 7, 11, 21],
49 | 'dropout_rate' : 0.5, 'lr' : 0.001, 'filter_size':256, 'features_to_use': ['onehot',
50 | 'pssm'], 'use_CRF':False, 'lr':0.001, 'cnn_regularizer':0.00005, 'multiscalecnn_layers':3}}
51 |
52 | models = ['a','b','c','d','e','f']
53 |
54 | for idx, config in enumerate([config_model_a,config_model_b, config_model_c, config_model_d, config_model_e, config_model_f]):
55 | c = yaml.dump(config)
56 | f = open('sample_configs/model_'+models[idx]+'.yaml', 'w')
57 | f.write(c)
58 | f.close()
59 |
60 |
61 |
--------------------------------------------------------------------------------
/datasets/ReadME:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/datasets/test_length.txt:
--------------------------------------------------------------------------------
1 | 20
2 | 20
3 | 21
4 | 21
5 | 24
6 | 24
7 | 25
8 | 26
9 | 27
10 | 28
11 | 28
12 | 29
13 | 29
14 | 30
15 | 30
16 | 30
17 | 31
18 | 33
19 | 36
20 | 36
21 | 36
22 | 36
23 | 36
24 | 37
25 | 37
26 | 37
27 | 39
28 | 40
29 | 40
30 | 43
31 | 43
32 | 43
33 | 44
34 | 45
35 | 46
36 | 47
37 | 48
38 | 48
39 | 49
40 | 50
41 | 50
42 | 51
43 | 51
44 | 51
45 | 52
46 | 53
47 | 53
48 | 53
49 | 53
50 | 54
51 | 54
52 | 55
53 | 56
54 | 56
55 | 57
56 | 57
57 | 58
58 | 58
59 | 59
60 | 60
61 | 60
62 | 61
63 | 62
64 | 63
65 | 63
66 | 63
67 | 64
68 | 64
69 | 66
70 | 67
71 | 67
72 | 67
73 | 68
74 | 68
75 | 69
76 | 69
77 | 69
78 | 70
79 | 70
80 | 71
81 | 71
82 | 71
83 | 73
84 | 73
85 | 73
86 | 73
87 | 73
88 | 73
89 | 74
90 | 74
91 | 74
92 | 74
93 | 75
94 | 75
95 | 76
96 | 76
97 | 77
98 | 77
99 | 78
100 | 78
101 | 78
102 | 79
103 | 80
104 | 81
105 | 81
106 | 82
107 | 82
108 | 82
109 | 83
110 | 83
111 | 83
112 | 83
113 | 84
114 | 84
115 | 85
116 | 85
117 | 85
118 | 85
119 | 85
120 | 86
121 | 86
122 | 86
123 | 87
124 | 87
125 | 87
126 | 87
127 | 87
128 | 87
129 | 88
130 | 89
131 | 89
132 | 89
133 | 91
134 | 92
135 | 93
136 | 94
137 | 95
138 | 95
139 | 96
140 | 96
141 | 96
142 | 96
143 | 97
144 | 97
145 | 98
146 | 98
147 | 98
148 | 98
149 | 99
150 | 99
151 | 99
152 | 100
153 | 100
154 | 100
155 | 100
156 | 101
157 | 101
158 | 101
159 | 101
160 | 102
161 | 102
162 | 102
163 | 102
164 | 102
165 | 102
166 | 103
167 | 103
168 | 103
169 | 103
170 | 104
171 | 104
172 | 104
173 | 104
174 | 105
175 | 105
176 | 106
177 | 106
178 | 107
179 | 107
180 | 107
181 | 107
182 | 107
183 | 108
184 | 108
185 | 108
186 | 109
187 | 109
188 | 110
189 | 110
190 | 110
191 | 111
192 | 111
193 | 111
194 | 111
195 | 112
196 | 112
197 | 113
198 | 114
199 | 114
200 | 114
201 | 114
202 | 114
203 | 114
204 | 114
205 | 115
206 | 115
207 | 115
208 | 116
209 | 116
210 | 116
211 | 117
212 | 117
213 | 117
214 | 118
215 | 118
216 | 119
217 | 119
218 | 119
219 | 119
220 | 120
221 | 120
222 | 120
223 | 120
224 | 120
225 | 121
226 | 122
227 | 122
228 | 122
229 | 122
230 | 122
231 | 123
232 | 123
233 | 123
234 | 123
235 | 124
236 | 124
237 | 125
238 | 125
239 | 126
240 | 126
241 | 127
242 | 127
243 | 128
244 | 128
245 | 128
246 | 129
247 | 129
248 | 129
249 | 129
250 | 130
251 | 130
252 | 130
253 | 130
254 | 131
255 | 131
256 | 131
257 | 132
258 | 132
259 | 134
260 | 134
261 | 135
262 | 136
263 | 136
264 | 136
265 | 136
266 | 136
267 | 137
268 | 137
269 | 138
270 | 138
271 | 139
272 | 140
273 | 141
274 | 141
275 | 142
276 | 142
277 | 142
278 | 143
279 | 143
280 | 144
281 | 144
282 | 145
283 | 145
284 | 146
285 | 147
286 | 147
287 | 148
288 | 148
289 | 149
290 | 149
291 | 151
292 | 151
293 | 152
294 | 152
295 | 153
296 | 153
297 | 153
298 | 153
299 | 153
300 | 154
301 | 154
302 | 154
303 | 154
304 | 154
305 | 155
306 | 155
307 | 157
308 | 157
309 | 158
310 | 158
311 | 158
312 | 158
313 | 159
314 | 159
315 | 162
316 | 163
317 | 164
318 | 164
319 | 164
320 | 166
321 | 166
322 | 166
323 | 169
324 | 169
325 | 171
326 | 172
327 | 173
328 | 173
329 | 174
330 | 174
331 | 175
332 | 175
333 | 176
334 | 177
335 | 177
336 | 177
337 | 178
338 | 178
339 | 179
340 | 180
341 | 181
342 | 181
343 | 182
344 | 182
345 | 183
346 | 184
347 | 185
348 | 185
349 | 185
350 | 185
351 | 185
352 | 186
353 | 186
354 | 186
355 | 187
356 | 188
357 | 188
358 | 190
359 | 190
360 | 191
361 | 191
362 | 195
363 | 195
364 | 197
365 | 197
366 | 198
367 | 198
368 | 198
369 | 200
370 | 200
371 | 200
372 | 204
373 | 204
374 | 206
375 | 206
376 | 206
377 | 208
378 | 208
379 | 209
380 | 210
381 | 210
382 | 211
383 | 211
384 | 212
385 | 212
386 | 213
387 | 213
388 | 214
389 | 215
390 | 216
391 | 216
392 | 216
393 | 218
394 | 218
395 | 220
396 | 220
397 | 220
398 | 225
399 | 226
400 | 228
401 | 228
402 | 228
403 | 228
404 | 229
405 | 229
406 | 230
407 | 230
408 | 230
409 | 230
410 | 233
411 | 236
412 | 237
413 | 239
414 | 241
415 | 241
416 | 243
417 | 247
418 | 248
419 | 249
420 | 250
421 | 252
422 | 253
423 | 253
424 | 255
425 | 256
426 | 257
427 | 264
428 | 264
429 | 266
430 | 269
431 | 273
432 | 273
433 | 273
434 | 275
435 | 280
436 | 280
437 | 283
438 | 283
439 | 285
440 | 285
441 | 286
442 | 289
443 | 289
444 | 291
445 | 291
446 | 291
447 | 293
448 | 293
449 | 293
450 | 295
451 | 296
452 | 296
453 | 298
454 | 298
455 | 299
456 | 302
457 | 306
458 | 307
459 | 308
460 | 309
461 | 310
462 | 311
463 | 314
464 | 316
465 | 316
466 | 317
467 | 317
468 | 317
469 | 319
470 | 328
471 | 329
472 | 330
473 | 333
474 | 334
475 | 337
476 | 339
477 | 340
478 | 342
479 | 344
480 | 349
481 | 354
482 | 358
483 | 360
484 | 363
485 | 374
486 | 374
487 | 381
488 | 385
489 | 388
490 | 388
491 | 391
492 | 393
493 | 396
494 | 399
495 | 405
496 | 414
497 | 426
498 | 429
499 | 433
500 | 449
501 | 456
502 | 461
503 | 468
504 | 481
505 | 483
506 | 490
507 | 498
508 | 506
509 | 526
510 | 534
511 | 544
512 | 576
513 | 700
514 | 700
515 |
--------------------------------------------------------------------------------
/datasets/test_mat_Y.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehsanasgari/DeepPrime2Sec/b0932214b85a6d949caf4348b78b01b207f266c7/datasets/test_mat_Y.npy
--------------------------------------------------------------------------------
/datasets/train_length.txt:
--------------------------------------------------------------------------------
1 | 12
2 | 14
3 | 15
4 | 17
5 | 18
6 | 18
7 | 20
8 | 20
9 | 21
10 | 21
11 | 21
12 | 24
13 | 25
14 | 25
15 | 27
16 | 27
17 | 28
18 | 29
19 | 29
20 | 31
21 | 31
22 | 32
23 | 33
24 | 33
25 | 34
26 | 35
27 | 35
28 | 35
29 | 38
30 | 39
31 | 39
32 | 39
33 | 39
34 | 39
35 | 40
36 | 41
37 | 42
38 | 42
39 | 43
40 | 43
41 | 43
42 | 43
43 | 43
44 | 43
45 | 43
46 | 43
47 | 43
48 | 45
49 | 45
50 | 45
51 | 45
52 | 45
53 | 45
54 | 46
55 | 46
56 | 46
57 | 47
58 | 47
59 | 47
60 | 47
61 | 47
62 | 48
63 | 48
64 | 48
65 | 48
66 | 48
67 | 48
68 | 48
69 | 48
70 | 49
71 | 49
72 | 49
73 | 49
74 | 49
75 | 50
76 | 50
77 | 50
78 | 50
79 | 51
80 | 51
81 | 51
82 | 51
83 | 51
84 | 51
85 | 51
86 | 51
87 | 52
88 | 52
89 | 52
90 | 52
91 | 52
92 | 52
93 | 52
94 | 52
95 | 53
96 | 53
97 | 53
98 | 53
99 | 53
100 | 53
101 | 53
102 | 53
103 | 53
104 | 53
105 | 54
106 | 54
107 | 54
108 | 54
109 | 54
110 | 54
111 | 54
112 | 54
113 | 54
114 | 54
115 | 54
116 | 54
117 | 55
118 | 55
119 | 55
120 | 56
121 | 56
122 | 56
123 | 56
124 | 56
125 | 56
126 | 56
127 | 56
128 | 56
129 | 56
130 | 57
131 | 57
132 | 57
133 | 57
134 | 57
135 | 57
136 | 57
137 | 57
138 | 57
139 | 57
140 | 57
141 | 57
142 | 57
143 | 57
144 | 57
145 | 57
146 | 58
147 | 58
148 | 58
149 | 58
150 | 58
151 | 58
152 | 58
153 | 58
154 | 58
155 | 58
156 | 59
157 | 59
158 | 59
159 | 59
160 | 59
161 | 59
162 | 59
163 | 59
164 | 59
165 | 59
166 | 60
167 | 60
168 | 60
169 | 60
170 | 60
171 | 60
172 | 60
173 | 60
174 | 60
175 | 60
176 | 60
177 | 60
178 | 60
179 | 61
180 | 61
181 | 61
182 | 61
183 | 61
184 | 61
185 | 61
186 | 61
187 | 61
188 | 61
189 | 61
190 | 61
191 | 61
192 | 61
193 | 61
194 | 62
195 | 62
196 | 62
197 | 62
198 | 62
199 | 62
200 | 62
201 | 62
202 | 62
203 | 62
204 | 62
205 | 62
206 | 62
207 | 62
208 | 63
209 | 63
210 | 63
211 | 63
212 | 63
213 | 63
214 | 63
215 | 63
216 | 63
217 | 63
218 | 63
219 | 63
220 | 63
221 | 63
222 | 63
223 | 63
224 | 63
225 | 64
226 | 64
227 | 64
228 | 64
229 | 64
230 | 64
231 | 64
232 | 64
233 | 64
234 | 64
235 | 65
236 | 65
237 | 65
238 | 65
239 | 65
240 | 65
241 | 65
242 | 65
243 | 65
244 | 65
245 | 65
246 | 65
247 | 65
248 | 65
249 | 65
250 | 65
251 | 65
252 | 66
253 | 66
254 | 66
255 | 66
256 | 66
257 | 66
258 | 66
259 | 66
260 | 66
261 | 66
262 | 66
263 | 66
264 | 66
265 | 66
266 | 66
267 | 66
268 | 66
269 | 66
270 | 66
271 | 66
272 | 66
273 | 67
274 | 67
275 | 67
276 | 67
277 | 67
278 | 67
279 | 67
280 | 67
281 | 67
282 | 67
283 | 67
284 | 67
285 | 67
286 | 67
287 | 67
288 | 67
289 | 67
290 | 68
291 | 68
292 | 68
293 | 68
294 | 68
295 | 68
296 | 68
297 | 68
298 | 68
299 | 68
300 | 68
301 | 68
302 | 68
303 | 69
304 | 69
305 | 69
306 | 69
307 | 69
308 | 69
309 | 69
310 | 69
311 | 69
312 | 69
313 | 69
314 | 69
315 | 69
316 | 69
317 | 69
318 | 69
319 | 69
320 | 70
321 | 70
322 | 70
323 | 70
324 | 70
325 | 70
326 | 70
327 | 70
328 | 70
329 | 70
330 | 70
331 | 70
332 | 70
333 | 70
334 | 71
335 | 71
336 | 71
337 | 71
338 | 71
339 | 71
340 | 71
341 | 72
342 | 72
343 | 72
344 | 72
345 | 72
346 | 72
347 | 72
348 | 72
349 | 72
350 | 72
351 | 72
352 | 73
353 | 73
354 | 73
355 | 73
356 | 73
357 | 73
358 | 73
359 | 73
360 | 73
361 | 73
362 | 73
363 | 73
364 | 73
365 | 73
366 | 73
367 | 73
368 | 73
369 | 74
370 | 74
371 | 74
372 | 74
373 | 74
374 | 74
375 | 74
376 | 74
377 | 74
378 | 74
379 | 74
380 | 74
381 | 74
382 | 74
383 | 74
384 | 74
385 | 74
386 | 74
387 | 75
388 | 75
389 | 75
390 | 75
391 | 75
392 | 75
393 | 75
394 | 75
395 | 75
396 | 75
397 | 75
398 | 75
399 | 75
400 | 75
401 | 75
402 | 75
403 | 75
404 | 75
405 | 76
406 | 76
407 | 76
408 | 76
409 | 76
410 | 76
411 | 76
412 | 76
413 | 76
414 | 76
415 | 76
416 | 76
417 | 76
418 | 76
419 | 77
420 | 77
421 | 77
422 | 77
423 | 77
424 | 77
425 | 77
426 | 77
427 | 77
428 | 77
429 | 77
430 | 77
431 | 77
432 | 77
433 | 77
434 | 78
435 | 78
436 | 78
437 | 78
438 | 78
439 | 78
440 | 78
441 | 78
442 | 78
443 | 78
444 | 78
445 | 78
446 | 78
447 | 78
448 | 78
449 | 78
450 | 78
451 | 78
452 | 78
453 | 78
454 | 79
455 | 79
456 | 79
457 | 79
458 | 79
459 | 79
460 | 79
461 | 79
462 | 79
463 | 79
464 | 79
465 | 79
466 | 79
467 | 79
468 | 79
469 | 80
470 | 80
471 | 80
472 | 80
473 | 80
474 | 80
475 | 80
476 | 80
477 | 80
478 | 80
479 | 80
480 | 80
481 | 80
482 | 80
483 | 80
484 | 80
485 | 80
486 | 80
487 | 80
488 | 80
489 | 80
490 | 80
491 | 80
492 | 80
493 | 80
494 | 81
495 | 81
496 | 81
497 | 81
498 | 81
499 | 81
500 | 81
501 | 81
502 | 81
503 | 81
504 | 81
505 | 81
506 | 81
507 | 81
508 | 81
509 | 81
510 | 81
511 | 81
512 | 81
513 | 81
514 | 81
515 | 81
516 | 81
517 | 82
518 | 82
519 | 82
520 | 82
521 | 82
522 | 82
523 | 82
524 | 82
525 | 82
526 | 82
527 | 82
528 | 82
529 | 82
530 | 82
531 | 82
532 | 82
533 | 82
534 | 82
535 | 82
536 | 82
537 | 82
538 | 82
539 | 83
540 | 83
541 | 83
542 | 83
543 | 83
544 | 83
545 | 83
546 | 83
547 | 83
548 | 83
549 | 83
550 | 83
551 | 83
552 | 83
553 | 84
554 | 84
555 | 84
556 | 84
557 | 84
558 | 84
559 | 84
560 | 84
561 | 84
562 | 84
563 | 84
564 | 84
565 | 84
566 | 84
567 | 84
568 | 84
569 | 84
570 | 85
571 | 85
572 | 85
573 | 85
574 | 85
575 | 85
576 | 85
577 | 85
578 | 85
579 | 85
580 | 85
581 | 85
582 | 85
583 | 85
584 | 85
585 | 85
586 | 85
587 | 85
588 | 85
589 | 85
590 | 86
591 | 86
592 | 86
593 | 86
594 | 86
595 | 86
596 | 86
597 | 86
598 | 86
599 | 86
600 | 86
601 | 86
602 | 86
603 | 86
604 | 86
605 | 86
606 | 86
607 | 86
608 | 87
609 | 87
610 | 87
611 | 87
612 | 87
613 | 87
614 | 87
615 | 87
616 | 87
617 | 87
618 | 87
619 | 87
620 | 87
621 | 87
622 | 87
623 | 87
624 | 87
625 | 87
626 | 88
627 | 88
628 | 88
629 | 88
630 | 88
631 | 88
632 | 88
633 | 88
634 | 88
635 | 88
636 | 89
637 | 89
638 | 89
639 | 89
640 | 89
641 | 89
642 | 89
643 | 89
644 | 89
645 | 89
646 | 89
647 | 89
648 | 89
649 | 89
650 | 89
651 | 89
652 | 89
653 | 89
654 | 89
655 | 89
656 | 89
657 | 89
658 | 89
659 | 89
660 | 89
661 | 89
662 | 89
663 | 89
664 | 89
665 | 90
666 | 90
667 | 90
668 | 90
669 | 90
670 | 90
671 | 90
672 | 90
673 | 90
674 | 90
675 | 90
676 | 90
677 | 90
678 | 90
679 | 90
680 | 90
681 | 90
682 | 90
683 | 90
684 | 90
685 | 90
686 | 90
687 | 90
688 | 90
689 | 90
690 | 90
691 | 90
692 | 90
693 | 90
694 | 90
695 | 90
696 | 90
697 | 91
698 | 91
699 | 91
700 | 91
701 | 91
702 | 91
703 | 91
704 | 91
705 | 91
706 | 91
707 | 91
708 | 91
709 | 91
710 | 91
711 | 91
712 | 91
713 | 91
714 | 91
715 | 91
716 | 91
717 | 91
718 | 92
719 | 92
720 | 92
721 | 92
722 | 92
723 | 92
724 | 92
725 | 92
726 | 92
727 | 92
728 | 92
729 | 92
730 | 92
731 | 92
732 | 92
733 | 92
734 | 92
735 | 92
736 | 92
737 | 92
738 | 92
739 | 92
740 | 92
741 | 92
742 | 92
743 | 92
744 | 92
745 | 92
746 | 93
747 | 93
748 | 93
749 | 93
750 | 93
751 | 93
752 | 93
753 | 93
754 | 93
755 | 93
756 | 93
757 | 93
758 | 93
759 | 93
760 | 93
761 | 94
762 | 94
763 | 94
764 | 94
765 | 94
766 | 94
767 | 94
768 | 94
769 | 94
770 | 94
771 | 94
772 | 94
773 | 94
774 | 94
775 | 94
776 | 94
777 | 94
778 | 94
779 | 94
780 | 94
781 | 94
782 | 94
783 | 95
784 | 95
785 | 95
786 | 95
787 | 95
788 | 95
789 | 95
790 | 95
791 | 95
792 | 95
793 | 95
794 | 95
795 | 95
796 | 95
797 | 95
798 | 95
799 | 95
800 | 95
801 | 95
802 | 95
803 | 96
804 | 96
805 | 96
806 | 96
807 | 96
808 | 96
809 | 96
810 | 96
811 | 96
812 | 96
813 | 96
814 | 96
815 | 96
816 | 96
817 | 96
818 | 96
819 | 96
820 | 96
821 | 96
822 | 97
823 | 97
824 | 97
825 | 97
826 | 97
827 | 97
828 | 97
829 | 97
830 | 97
831 | 97
832 | 97
833 | 97
834 | 97
835 | 97
836 | 97
837 | 97
838 | 97
839 | 97
840 | 97
841 | 97
842 | 97
843 | 97
844 | 97
845 | 97
846 | 97
847 | 97
848 | 97
849 | 97
850 | 97
851 | 98
852 | 98
853 | 98
854 | 98
855 | 98
856 | 98
857 | 98
858 | 98
859 | 98
860 | 98
861 | 98
862 | 98
863 | 98
864 | 98
865 | 98
866 | 98
867 | 98
868 | 98
869 | 98
870 | 98
871 | 98
872 | 98
873 | 98
874 | 98
875 | 98
876 | 98
877 | 98
878 | 98
879 | 99
880 | 99
881 | 99
882 | 99
883 | 99
884 | 99
885 | 99
886 | 99
887 | 99
888 | 99
889 | 99
890 | 99
891 | 99
892 | 99
893 | 99
894 | 99
895 | 99
896 | 99
897 | 99
898 | 99
899 | 100
900 | 100
901 | 100
902 | 100
903 | 100
904 | 100
905 | 100
906 | 100
907 | 100
908 | 100
909 | 100
910 | 100
911 | 100
912 | 100
913 | 100
914 | 100
915 | 100
916 | 100
917 | 100
918 | 101
919 | 101
920 | 101
921 | 101
922 | 101
923 | 101
924 | 101
925 | 101
926 | 101
927 | 101
928 | 101
929 | 101
930 | 101
931 | 101
932 | 101
933 | 101
934 | 101
935 | 101
936 | 101
937 | 101
938 | 101
939 | 102
940 | 102
941 | 102
942 | 102
943 | 102
944 | 102
945 | 102
946 | 102
947 | 102
948 | 102
949 | 102
950 | 102
951 | 102
952 | 102
953 | 102
954 | 102
955 | 102
956 | 102
957 | 102
958 | 102
959 | 102
960 | 102
961 | 102
962 | 103
963 | 103
964 | 103
965 | 103
966 | 103
967 | 103
968 | 103
969 | 103
970 | 103
971 | 103
972 | 103
973 | 103
974 | 103
975 | 103
976 | 103
977 | 103
978 | 103
979 | 103
980 | 103
981 | 103
982 | 104
983 | 104
984 | 104
985 | 104
986 | 104
987 | 104
988 | 104
989 | 104
990 | 104
991 | 104
992 | 104
993 | 104
994 | 104
995 | 104
996 | 104
997 | 104
998 | 104
999 | 104
1000 | 104
1001 | 104
1002 | 104
1003 | 104
1004 | 104
1005 | 104
1006 | 105
1007 | 105
1008 | 105
1009 | 105
1010 | 105
1011 | 105
1012 | 105
1013 | 105
1014 | 105
1015 | 105
1016 | 105
1017 | 105
1018 | 105
1019 | 105
1020 | 105
1021 | 105
1022 | 105
1023 | 105
1024 | 105
1025 | 105
1026 | 105
1027 | 105
1028 | 105
1029 | 105
1030 | 105
1031 | 106
1032 | 106
1033 | 106
1034 | 106
1035 | 106
1036 | 106
1037 | 106
1038 | 106
1039 | 106
1040 | 106
1041 | 106
1042 | 106
1043 | 106
1044 | 106
1045 | 106
1046 | 106
1047 | 106
1048 | 106
1049 | 106
1050 | 106
1051 | 106
1052 | 106
1053 | 106
1054 | 106
1055 | 106
1056 | 106
1057 | 106
1058 | 106
1059 | 106
1060 | 106
1061 | 106
1062 | 106
1063 | 106
1064 | 106
1065 | 106
1066 | 107
1067 | 107
1068 | 107
1069 | 107
1070 | 107
1071 | 107
1072 | 107
1073 | 107
1074 | 107
1075 | 107
1076 | 107
1077 | 107
1078 | 107
1079 | 107
1080 | 107
1081 | 107
1082 | 108
1083 | 108
1084 | 108
1085 | 108
1086 | 108
1087 | 108
1088 | 108
1089 | 108
1090 | 108
1091 | 108
1092 | 108
1093 | 108
1094 | 108
1095 | 108
1096 | 108
1097 | 108
1098 | 108
1099 | 108
1100 | 108
1101 | 108
1102 | 108
1103 | 108
1104 | 108
1105 | 108
1106 | 109
1107 | 109
1108 | 109
1109 | 109
1110 | 109
1111 | 109
1112 | 109
1113 | 109
1114 | 109
1115 | 109
1116 | 109
1117 | 109
1118 | 109
1119 | 109
1120 | 109
1121 | 109
1122 | 109
1123 | 109
1124 | 109
1125 | 109
1126 | 109
1127 | 109
1128 | 109
1129 | 109
1130 | 109
1131 | 109
1132 | 109
1133 | 109
1134 | 109
1135 | 109
1136 | 109
1137 | 110
1138 | 110
1139 | 110
1140 | 110
1141 | 110
1142 | 110
1143 | 110
1144 | 110
1145 | 110
1146 | 110
1147 | 110
1148 | 110
1149 | 110
1150 | 110
1151 | 110
1152 | 110
1153 | 110
1154 | 110
1155 | 110
1156 | 110
1157 | 110
1158 | 110
1159 | 111
1160 | 111
1161 | 111
1162 | 111
1163 | 111
1164 | 111
1165 | 111
1166 | 111
1167 | 111
1168 | 111
1169 | 111
1170 | 111
1171 | 111
1172 | 111
1173 | 111
1174 | 111
1175 | 111
1176 | 111
1177 | 111
1178 | 111
1179 | 111
1180 | 111
1181 | 111
1182 | 111
1183 | 111
1184 | 111
1185 | 111
1186 | 112
1187 | 112
1188 | 112
1189 | 112
1190 | 112
1191 | 112
1192 | 112
1193 | 112
1194 | 112
1195 | 112
1196 | 112
1197 | 112
1198 | 112
1199 | 112
1200 | 112
1201 | 112
1202 | 112
1203 | 112
1204 | 112
1205 | 113
1206 | 113
1207 | 113
1208 | 113
1209 | 113
1210 | 113
1211 | 113
1212 | 113
1213 | 113
1214 | 113
1215 | 113
1216 | 113
1217 | 113
1218 | 113
1219 | 113
1220 | 113
1221 | 113
1222 | 113
1223 | 113
1224 | 113
1225 | 113
1226 | 113
1227 | 113
1228 | 114
1229 | 114
1230 | 114
1231 | 114
1232 | 114
1233 | 114
1234 | 114
1235 | 114
1236 | 114
1237 | 114
1238 | 114
1239 | 114
1240 | 114
1241 | 114
1242 | 114
1243 | 114
1244 | 114
1245 | 114
1246 | 114
1247 | 114
1248 | 114
1249 | 114
1250 | 114
1251 | 114
1252 | 114
1253 | 114
1254 | 114
1255 | 114
1256 | 114
1257 | 114
1258 | 115
1259 | 115
1260 | 115
1261 | 115
1262 | 115
1263 | 115
1264 | 115
1265 | 115
1266 | 115
1267 | 115
1268 | 115
1269 | 115
1270 | 115
1271 | 115
1272 | 115
1273 | 115
1274 | 115
1275 | 115
1276 | 115
1277 | 115
1278 | 115
1279 | 115
1280 | 115
1281 | 115
1282 | 115
1283 | 115
1284 | 115
1285 | 115
1286 | 115
1287 | 116
1288 | 116
1289 | 116
1290 | 116
1291 | 116
1292 | 116
1293 | 116
1294 | 116
1295 | 116
1296 | 116
1297 | 116
1298 | 116
1299 | 116
1300 | 116
1301 | 116
1302 | 116
1303 | 116
1304 | 117
1305 | 117
1306 | 117
1307 | 117
1308 | 117
1309 | 117
1310 | 117
1311 | 117
1312 | 117
1313 | 117
1314 | 117
1315 | 117
1316 | 117
1317 | 117
1318 | 117
1319 | 117
1320 | 117
1321 | 117
1322 | 117
1323 | 118
1324 | 118
1325 | 118
1326 | 118
1327 | 118
1328 | 118
1329 | 118
1330 | 118
1331 | 118
1332 | 118
1333 | 118
1334 | 118
1335 | 118
1336 | 118
1337 | 118
1338 | 118
1339 | 118
1340 | 118
1341 | 118
1342 | 118
1343 | 118
1344 | 118
1345 | 118
1346 | 118
1347 | 118
1348 | 118
1349 | 118
1350 | 118
1351 | 118
1352 | 118
1353 | 118
1354 | 118
1355 | 118
1356 | 118
1357 | 118
1358 | 118
1359 | 118
1360 | 118
1361 | 118
1362 | 118
1363 | 119
1364 | 119
1365 | 119
1366 | 119
1367 | 119
1368 | 119
1369 | 119
1370 | 119
1371 | 119
1372 | 119
1373 | 119
1374 | 119
1375 | 119
1376 | 119
1377 | 119
1378 | 119
1379 | 119
1380 | 119
1381 | 119
1382 | 119
1383 | 119
1384 | 119
1385 | 119
1386 | 119
1387 | 119
1388 | 119
1389 | 119
1390 | 120
1391 | 120
1392 | 120
1393 | 120
1394 | 120
1395 | 120
1396 | 120
1397 | 120
1398 | 120
1399 | 120
1400 | 120
1401 | 120
1402 | 120
1403 | 120
1404 | 120
1405 | 120
1406 | 120
1407 | 120
1408 | 120
1409 | 120
1410 | 120
1411 | 120
1412 | 120
1413 | 120
1414 | 120
1415 | 120
1416 | 121
1417 | 121
1418 | 121
1419 | 121
1420 | 121
1421 | 121
1422 | 121
1423 | 121
1424 | 121
1425 | 121
1426 | 121
1427 | 121
1428 | 121
1429 | 121
1430 | 121
1431 | 121
1432 | 121
1433 | 121
1434 | 121
1435 | 121
1436 | 121
1437 | 121
1438 | 121
1439 | 121
1440 | 121
1441 | 121
1442 | 121
1443 | 122
1444 | 122
1445 | 122
1446 | 122
1447 | 122
1448 | 122
1449 | 122
1450 | 122
1451 | 122
1452 | 122
1453 | 122
1454 | 122
1455 | 122
1456 | 122
1457 | 122
1458 | 122
1459 | 122
1460 | 122
1461 | 122
1462 | 122
1463 | 122
1464 | 122
1465 | 122
1466 | 122
1467 | 122
1468 | 122
1469 | 122
1470 | 123
1471 | 123
1472 | 123
1473 | 123
1474 | 123
1475 | 123
1476 | 123
1477 | 123
1478 | 123
1479 | 123
1480 | 123
1481 | 123
1482 | 123
1483 | 123
1484 | 123
1485 | 123
1486 | 123
1487 | 123
1488 | 123
1489 | 123
1490 | 123
1491 | 123
1492 | 123
1493 | 123
1494 | 123
1495 | 124
1496 | 124
1497 | 124
1498 | 124
1499 | 124
1500 | 124
1501 | 124
1502 | 124
1503 | 124
1504 | 124
1505 | 124
1506 | 124
1507 | 124
1508 | 124
1509 | 124
1510 | 124
1511 | 124
1512 | 124
1513 | 124
1514 | 124
1515 | 124
1516 | 124
1517 | 124
1518 | 124
1519 | 124
1520 | 124
1521 | 124
1522 | 124
1523 | 125
1524 | 125
1525 | 125
1526 | 125
1527 | 125
1528 | 125
1529 | 125
1530 | 125
1531 | 125
1532 | 125
1533 | 125
1534 | 125
1535 | 125
1536 | 125
1537 | 125
1538 | 125
1539 | 125
1540 | 125
1541 | 125
1542 | 125
1543 | 125
1544 | 126
1545 | 126
1546 | 126
1547 | 126
1548 | 126
1549 | 126
1550 | 126
1551 | 126
1552 | 126
1553 | 126
1554 | 126
1555 | 126
1556 | 126
1557 | 126
1558 | 126
1559 | 126
1560 | 126
1561 | 126
1562 | 126
1563 | 126
1564 | 126
1565 | 126
1566 | 126
1567 | 126
1568 | 126
1569 | 126
1570 | 126
1571 | 127
1572 | 127
1573 | 127
1574 | 127
1575 | 127
1576 | 127
1577 | 127
1578 | 127
1579 | 127
1580 | 127
1581 | 127
1582 | 127
1583 | 127
1584 | 127
1585 | 127
1586 | 127
1587 | 127
1588 | 127
1589 | 127
1590 | 127
1591 | 127
1592 | 127
1593 | 127
1594 | 127
1595 | 127
1596 | 127
1597 | 127
1598 | 128
1599 | 128
1600 | 128
1601 | 128
1602 | 128
1603 | 128
1604 | 128
1605 | 128
1606 | 128
1607 | 128
1608 | 128
1609 | 128
1610 | 128
1611 | 128
1612 | 128
1613 | 128
1614 | 128
1615 | 128
1616 | 128
1617 | 128
1618 | 128
1619 | 128
1620 | 128
1621 | 128
1622 | 129
1623 | 129
1624 | 129
1625 | 129
1626 | 129
1627 | 129
1628 | 129
1629 | 129
1630 | 129
1631 | 129
1632 | 129
1633 | 129
1634 | 129
1635 | 129
1636 | 129
1637 | 129
1638 | 129
1639 | 129
1640 | 129
1641 | 129
1642 | 129
1643 | 129
1644 | 129
1645 | 130
1646 | 130
1647 | 130
1648 | 130
1649 | 130
1650 | 130
1651 | 130
1652 | 130
1653 | 130
1654 | 130
1655 | 130
1656 | 130
1657 | 130
1658 | 130
1659 | 130
1660 | 130
1661 | 130
1662 | 131
1663 | 131
1664 | 131
1665 | 131
1666 | 131
1667 | 131
1668 | 131
1669 | 131
1670 | 131
1671 | 131
1672 | 131
1673 | 131
1674 | 131
1675 | 131
1676 | 131
1677 | 131
1678 | 131
1679 | 131
1680 | 131
1681 | 131
1682 | 131
1683 | 131
1684 | 131
1685 | 131
1686 | 131
1687 | 131
1688 | 131
1689 | 132
1690 | 132
1691 | 132
1692 | 132
1693 | 132
1694 | 132
1695 | 132
1696 | 132
1697 | 132
1698 | 132
1699 | 132
1700 | 132
1701 | 132
1702 | 132
1703 | 132
1704 | 132
1705 | 132
1706 | 132
1707 | 132
1708 | 132
1709 | 132
1710 | 132
1711 | 132
1712 | 132
1713 | 132
1714 | 132
1715 | 132
1716 | 133
1717 | 133
1718 | 133
1719 | 133
1720 | 133
1721 | 133
1722 | 133
1723 | 133
1724 | 133
1725 | 133
1726 | 133
1727 | 133
1728 | 133
1729 | 133
1730 | 133
1731 | 133
1732 | 133
1733 | 133
1734 | 133
1735 | 133
1736 | 133
1737 | 133
1738 | 133
1739 | 133
1740 | 133
1741 | 133
1742 | 133
1743 | 133
1744 | 133
1745 | 134
1746 | 134
1747 | 134
1748 | 134
1749 | 134
1750 | 134
1751 | 134
1752 | 134
1753 | 134
1754 | 134
1755 | 134
1756 | 134
1757 | 134
1758 | 134
1759 | 134
1760 | 134
1761 | 134
1762 | 134
1763 | 134
1764 | 134
1765 | 134
1766 | 134
1767 | 134
1768 | 134
1769 | 134
1770 | 134
1771 | 134
1772 | 134
1773 | 134
1774 | 135
1775 | 135
1776 | 135
1777 | 135
1778 | 135
1779 | 135
1780 | 135
1781 | 135
1782 | 135
1783 | 135
1784 | 135
1785 | 135
1786 | 135
1787 | 135
1788 | 135
1789 | 135
1790 | 135
1791 | 135
1792 | 135
1793 | 135
1794 | 135
1795 | 135
1796 | 135
1797 | 135
1798 | 135
1799 | 136
1800 | 136
1801 | 136
1802 | 136
1803 | 136
1804 | 136
1805 | 136
1806 | 136
1807 | 136
1808 | 136
1809 | 136
1810 | 136
1811 | 136
1812 | 136
1813 | 136
1814 | 136
1815 | 137
1816 | 137
1817 | 137
1818 | 137
1819 | 137
1820 | 137
1821 | 137
1822 | 137
1823 | 137
1824 | 137
1825 | 137
1826 | 137
1827 | 137
1828 | 137
1829 | 137
1830 | 137
1831 | 137
1832 | 137
1833 | 137
1834 | 137
1835 | 137
1836 | 137
1837 | 137
1838 | 137
1839 | 138
1840 | 138
1841 | 138
1842 | 138
1843 | 138
1844 | 138
1845 | 138
1846 | 138
1847 | 138
1848 | 138
1849 | 138
1850 | 138
1851 | 138
1852 | 138
1853 | 138
1854 | 138
1855 | 138
1856 | 138
1857 | 138
1858 | 138
1859 | 138
1860 | 138
1861 | 138
1862 | 138
1863 | 138
1864 | 138
1865 | 138
1866 | 139
1867 | 139
1868 | 139
1869 | 139
1870 | 139
1871 | 139
1872 | 139
1873 | 139
1874 | 139
1875 | 139
1876 | 139
1877 | 139
1878 | 139
1879 | 139
1880 | 139
1881 | 139
1882 | 139
1883 | 139
1884 | 139
1885 | 139
1886 | 139
1887 | 139
1888 | 140
1889 | 140
1890 | 140
1891 | 140
1892 | 140
1893 | 140
1894 | 140
1895 | 140
1896 | 140
1897 | 140
1898 | 140
1899 | 140
1900 | 140
1901 | 140
1902 | 140
1903 | 140
1904 | 140
1905 | 140
1906 | 140
1907 | 140
1908 | 140
1909 | 140
1910 | 140
1911 | 140
1912 | 141
1913 | 141
1914 | 141
1915 | 141
1916 | 141
1917 | 141
1918 | 141
1919 | 141
1920 | 141
1921 | 141
1922 | 141
1923 | 141
1924 | 141
1925 | 141
1926 | 141
1927 | 141
1928 | 141
1929 | 141
1930 | 142
1931 | 142
1932 | 142
1933 | 142
1934 | 142
1935 | 142
1936 | 142
1937 | 142
1938 | 142
1939 | 142
1940 | 142
1941 | 142
1942 | 142
1943 | 142
1944 | 142
1945 | 142
1946 | 142
1947 | 142
1948 | 142
1949 | 142
1950 | 142
1951 | 142
1952 | 142
1953 | 142
1954 | 142
1955 | 142
1956 | 143
1957 | 143
1958 | 143
1959 | 143
1960 | 143
1961 | 143
1962 | 143
1963 | 143
1964 | 143
1965 | 143
1966 | 143
1967 | 143
1968 | 143
1969 | 143
1970 | 143
1971 | 143
1972 | 143
1973 | 143
1974 | 143
1975 | 143
1976 | 143
1977 | 143
1978 | 143
1979 | 143
1980 | 143
1981 | 143
1982 | 143
1983 | 144
1984 | 144
1985 | 144
1986 | 144
1987 | 144
1988 | 144
1989 | 144
1990 | 144
1991 | 144
1992 | 144
1993 | 144
1994 | 144
1995 | 144
1996 | 144
1997 | 144
1998 | 144
1999 | 144
2000 | 144
2001 | 144
2002 | 144
2003 | 144
2004 | 144
2005 | 144
2006 | 144
2007 | 144
2008 | 144
2009 | 144
2010 | 145
2011 | 145
2012 | 145
2013 | 145
2014 | 145
2015 | 145
2016 | 145
2017 | 145
2018 | 145
2019 | 145
2020 | 145
2021 | 145
2022 | 145
2023 | 145
2024 | 145
2025 | 145
2026 | 145
2027 | 145
2028 | 145
2029 | 145
2030 | 145
2031 | 145
2032 | 145
2033 | 145
2034 | 145
2035 | 145
2036 | 145
2037 | 145
2038 | 145
2039 | 145
2040 | 146
2041 | 146
2042 | 146
2043 | 146
2044 | 146
2045 | 146
2046 | 146
2047 | 146
2048 | 146
2049 | 146
2050 | 146
2051 | 146
2052 | 146
2053 | 146
2054 | 146
2055 | 146
2056 | 146
2057 | 146
2058 | 146
2059 | 146
2060 | 146
2061 | 146
2062 | 146
2063 | 146
2064 | 146
2065 | 146
2066 | 146
2067 | 146
2068 | 146
2069 | 146
2070 | 146
2071 | 147
2072 | 147
2073 | 147
2074 | 147
2075 | 147
2076 | 147
2077 | 147
2078 | 147
2079 | 147
2080 | 147
2081 | 147
2082 | 147
2083 | 147
2084 | 147
2085 | 147
2086 | 147
2087 | 147
2088 | 147
2089 | 147
2090 | 147
2091 | 147
2092 | 147
2093 | 147
2094 | 147
2095 | 147
2096 | 147
2097 | 147
2098 | 148
2099 | 148
2100 | 148
2101 | 148
2102 | 148
2103 | 148
2104 | 148
2105 | 148
2106 | 148
2107 | 148
2108 | 148
2109 | 148
2110 | 148
2111 | 148
2112 | 148
2113 | 148
2114 | 148
2115 | 149
2116 | 149
2117 | 149
2118 | 149
2119 | 149
2120 | 149
2121 | 149
2122 | 149
2123 | 149
2124 | 149
2125 | 149
2126 | 149
2127 | 149
2128 | 149
2129 | 149
2130 | 149
2131 | 149
2132 | 149
2133 | 149
2134 | 149
2135 | 149
2136 | 149
2137 | 149
2138 | 149
2139 | 149
2140 | 149
2141 | 149
2142 | 149
2143 | 149
2144 | 149
2145 | 149
2146 | 150
2147 | 150
2148 | 150
2149 | 150
2150 | 150
2151 | 150
2152 | 150
2153 | 150
2154 | 150
2155 | 150
2156 | 150
2157 | 150
2158 | 150
2159 | 150
2160 | 150
2161 | 150
2162 | 150
2163 | 150
2164 | 150
2165 | 150
2166 | 150
2167 | 150
2168 | 150
2169 | 151
2170 | 151
2171 | 151
2172 | 151
2173 | 151
2174 | 151
2175 | 151
2176 | 151
2177 | 151
2178 | 151
2179 | 151
2180 | 151
2181 | 151
2182 | 151
2183 | 151
2184 | 151
2185 | 151
2186 | 151
2187 | 152
2188 | 152
2189 | 152
2190 | 152
2191 | 152
2192 | 152
2193 | 152
2194 | 152
2195 | 152
2196 | 152
2197 | 152
2198 | 152
2199 | 152
2200 | 152
2201 | 152
2202 | 152
2203 | 152
2204 | 152
2205 | 152
2206 | 152
2207 | 152
2208 | 152
2209 | 152
2210 | 152
2211 | 152
2212 | 152
2213 | 152
2214 | 152
2215 | 152
2216 | 152
2217 | 152
2218 | 152
2219 | 152
2220 | 152
2221 | 152
2222 | 153
2223 | 153
2224 | 153
2225 | 153
2226 | 153
2227 | 153
2228 | 153
2229 | 153
2230 | 153
2231 | 153
2232 | 153
2233 | 153
2234 | 153
2235 | 153
2236 | 153
2237 | 153
2238 | 153
2239 | 153
2240 | 153
2241 | 153
2242 | 153
2243 | 153
2244 | 153
2245 | 153
2246 | 153
2247 | 153
2248 | 153
2249 | 153
2250 | 154
2251 | 154
2252 | 154
2253 | 154
2254 | 154
2255 | 154
2256 | 154
2257 | 154
2258 | 154
2259 | 154
2260 | 154
2261 | 154
2262 | 154
2263 | 154
2264 | 154
2265 | 154
2266 | 154
2267 | 154
2268 | 154
2269 | 154
2270 | 154
2271 | 155
2272 | 155
2273 | 155
2274 | 155
2275 | 155
2276 | 155
2277 | 155
2278 | 155
2279 | 155
2280 | 155
2281 | 155
2282 | 155
2283 | 155
2284 | 155
2285 | 155
2286 | 155
2287 | 155
2288 | 155
2289 | 156
2290 | 156
2291 | 156
2292 | 156
2293 | 156
2294 | 156
2295 | 156
2296 | 156
2297 | 156
2298 | 156
2299 | 156
2300 | 156
2301 | 156
2302 | 156
2303 | 156
2304 | 156
2305 | 156
2306 | 156
2307 | 156
2308 | 156
2309 | 156
2310 | 156
2311 | 156
2312 | 156
2313 | 156
2314 | 156
2315 | 156
2316 | 156
2317 | 156
2318 | 157
2319 | 157
2320 | 157
2321 | 157
2322 | 157
2323 | 157
2324 | 157
2325 | 157
2326 | 157
2327 | 157
2328 | 157
2329 | 157
2330 | 157
2331 | 157
2332 | 157
2333 | 157
2334 | 157
2335 | 157
2336 | 157
2337 | 157
2338 | 157
2339 | 157
2340 | 158
2341 | 158
2342 | 158
2343 | 158
2344 | 158
2345 | 158
2346 | 158
2347 | 158
2348 | 158
2349 | 158
2350 | 158
2351 | 158
2352 | 158
2353 | 158
2354 | 158
2355 | 158
2356 | 158
2357 | 158
2358 | 158
2359 | 158
2360 | 158
2361 | 158
2362 | 158
2363 | 158
2364 | 159
2365 | 159
2366 | 159
2367 | 159
2368 | 159
2369 | 159
2370 | 159
2371 | 159
2372 | 159
2373 | 159
2374 | 159
2375 | 159
2376 | 159
2377 | 159
2378 | 159
2379 | 159
2380 | 159
2381 | 159
2382 | 159
2383 | 159
2384 | 159
2385 | 160
2386 | 160
2387 | 160
2388 | 160
2389 | 160
2390 | 160
2391 | 160
2392 | 160
2393 | 160
2394 | 160
2395 | 160
2396 | 160
2397 | 160
2398 | 160
2399 | 161
2400 | 161
2401 | 161
2402 | 161
2403 | 161
2404 | 161
2405 | 161
2406 | 161
2407 | 161
2408 | 161
2409 | 161
2410 | 161
2411 | 161
2412 | 161
2413 | 161
2414 | 161
2415 | 161
2416 | 161
2417 | 161
2418 | 161
2419 | 162
2420 | 162
2421 | 162
2422 | 162
2423 | 162
2424 | 162
2425 | 162
2426 | 162
2427 | 162
2428 | 162
2429 | 162
2430 | 162
2431 | 162
2432 | 162
2433 | 162
2434 | 162
2435 | 162
2436 | 162
2437 | 162
2438 | 162
2439 | 162
2440 | 162
2441 | 162
2442 | 162
2443 | 162
2444 | 162
2445 | 162
2446 | 163
2447 | 163
2448 | 163
2449 | 163
2450 | 163
2451 | 163
2452 | 163
2453 | 163
2454 | 163
2455 | 163
2456 | 163
2457 | 163
2458 | 163
2459 | 163
2460 | 163
2461 | 164
2462 | 164
2463 | 164
2464 | 164
2465 | 164
2466 | 164
2467 | 164
2468 | 164
2469 | 164
2470 | 164
2471 | 164
2472 | 164
2473 | 164
2474 | 164
2475 | 164
2476 | 164
2477 | 164
2478 | 164
2479 | 165
2480 | 165
2481 | 165
2482 | 165
2483 | 165
2484 | 165
2485 | 165
2486 | 165
2487 | 165
2488 | 166
2489 | 166
2490 | 166
2491 | 166
2492 | 166
2493 | 166
2494 | 166
2495 | 166
2496 | 166
2497 | 166
2498 | 166
2499 | 166
2500 | 166
2501 | 166
2502 | 166
2503 | 166
2504 | 166
2505 | 166
2506 | 166
2507 | 166
2508 | 167
2509 | 167
2510 | 167
2511 | 167
2512 | 167
2513 | 167
2514 | 167
2515 | 167
2516 | 167
2517 | 167
2518 | 167
2519 | 167
2520 | 167
2521 | 167
2522 | 168
2523 | 168
2524 | 168
2525 | 168
2526 | 168
2527 | 168
2528 | 168
2529 | 168
2530 | 168
2531 | 168
2532 | 168
2533 | 168
2534 | 169
2535 | 169
2536 | 169
2537 | 169
2538 | 169
2539 | 169
2540 | 169
2541 | 169
2542 | 169
2543 | 169
2544 | 169
2545 | 169
2546 | 169
2547 | 169
2548 | 170
2549 | 170
2550 | 170
2551 | 170
2552 | 170
2553 | 170
2554 | 170
2555 | 170
2556 | 170
2557 | 170
2558 | 170
2559 | 170
2560 | 170
2561 | 171
2562 | 171
2563 | 171
2564 | 171
2565 | 171
2566 | 171
2567 | 171
2568 | 171
2569 | 171
2570 | 171
2571 | 171
2572 | 171
2573 | 171
2574 | 171
2575 | 171
2576 | 171
2577 | 171
2578 | 171
2579 | 171
2580 | 172
2581 | 172
2582 | 172
2583 | 172
2584 | 172
2585 | 172
2586 | 172
2587 | 172
2588 | 172
2589 | 172
2590 | 172
2591 | 172
2592 | 172
2593 | 172
2594 | 172
2595 | 172
2596 | 173
2597 | 173
2598 | 173
2599 | 173
2600 | 173
2601 | 173
2602 | 173
2603 | 173
2604 | 173
2605 | 173
2606 | 173
2607 | 173
2608 | 173
2609 | 174
2610 | 174
2611 | 174
2612 | 174
2613 | 174
2614 | 174
2615 | 174
2616 | 174
2617 | 174
2618 | 174
2619 | 175
2620 | 175
2621 | 175
2622 | 175
2623 | 175
2624 | 175
2625 | 175
2626 | 175
2627 | 175
2628 | 175
2629 | 175
2630 | 175
2631 | 175
2632 | 175
2633 | 175
2634 | 175
2635 | 175
2636 | 176
2637 | 176
2638 | 176
2639 | 176
2640 | 176
2641 | 176
2642 | 176
2643 | 176
2644 | 176
2645 | 176
2646 | 176
2647 | 176
2648 | 176
2649 | 176
2650 | 176
2651 | 176
2652 | 176
2653 | 176
2654 | 176
2655 | 176
2656 | 176
2657 | 176
2658 | 176
2659 | 176
2660 | 176
2661 | 177
2662 | 177
2663 | 177
2664 | 177
2665 | 177
2666 | 177
2667 | 177
2668 | 177
2669 | 177
2670 | 177
2671 | 177
2672 | 177
2673 | 177
2674 | 177
2675 | 177
2676 | 177
2677 | 178
2678 | 178
2679 | 178
2680 | 178
2681 | 178
2682 | 178
2683 | 178
2684 | 178
2685 | 178
2686 | 178
2687 | 178
2688 | 178
2689 | 178
2690 | 178
2691 | 178
2692 | 179
2693 | 179
2694 | 179
2695 | 179
2696 | 179
2697 | 179
2698 | 179
2699 | 179
2700 | 179
2701 | 179
2702 | 179
2703 | 179
2704 | 179
2705 | 179
2706 | 179
2707 | 180
2708 | 180
2709 | 180
2710 | 180
2711 | 180
2712 | 180
2713 | 180
2714 | 180
2715 | 180
2716 | 180
2717 | 180
2718 | 180
2719 | 180
2720 | 180
2721 | 180
2722 | 180
2723 | 180
2724 | 180
2725 | 180
2726 | 180
2727 | 180
2728 | 180
2729 | 181
2730 | 181
2731 | 181
2732 | 181
2733 | 181
2734 | 181
2735 | 181
2736 | 181
2737 | 181
2738 | 181
2739 | 181
2740 | 181
2741 | 181
2742 | 181
2743 | 181
2744 | 181
2745 | 181
2746 | 181
2747 | 182
2748 | 182
2749 | 182
2750 | 182
2751 | 182
2752 | 182
2753 | 182
2754 | 182
2755 | 182
2756 | 182
2757 | 182
2758 | 182
2759 | 182
2760 | 182
2761 | 182
2762 | 182
2763 | 182
2764 | 182
2765 | 182
2766 | 183
2767 | 183
2768 | 183
2769 | 183
2770 | 183
2771 | 183
2772 | 183
2773 | 183
2774 | 183
2775 | 183
2776 | 183
2777 | 183
2778 | 183
2779 | 183
2780 | 183
2781 | 183
2782 | 183
2783 | 184
2784 | 184
2785 | 184
2786 | 184
2787 | 184
2788 | 184
2789 | 184
2790 | 184
2791 | 184
2792 | 184
2793 | 184
2794 | 184
2795 | 184
2796 | 184
2797 | 184
2798 | 184
2799 | 184
2800 | 184
2801 | 184
2802 | 184
2803 | 185
2804 | 185
2805 | 185
2806 | 185
2807 | 185
2808 | 185
2809 | 185
2810 | 185
2811 | 185
2812 | 185
2813 | 185
2814 | 185
2815 | 185
2816 | 185
2817 | 185
2818 | 185
2819 | 185
2820 | 185
2821 | 185
2822 | 185
2823 | 186
2824 | 186
2825 | 186
2826 | 186
2827 | 186
2828 | 186
2829 | 186
2830 | 186
2831 | 186
2832 | 186
2833 | 186
2834 | 186
2835 | 186
2836 | 186
2837 | 186
2838 | 186
2839 | 186
2840 | 186
2841 | 186
2842 | 186
2843 | 186
2844 | 187
2845 | 187
2846 | 187
2847 | 187
2848 | 187
2849 | 187
2850 | 187
2851 | 187
2852 | 187
2853 | 187
2854 | 188
2855 | 188
2856 | 188
2857 | 188
2858 | 188
2859 | 188
2860 | 188
2861 | 188
2862 | 188
2863 | 188
2864 | 188
2865 | 188
2866 | 188
2867 | 188
2868 | 188
2869 | 188
2870 | 188
2871 | 188
2872 | 188
2873 | 189
2874 | 189
2875 | 189
2876 | 189
2877 | 189
2878 | 189
2879 | 189
2880 | 189
2881 | 189
2882 | 189
2883 | 189
2884 | 189
2885 | 189
2886 | 189
2887 | 189
2888 | 189
2889 | 190
2890 | 190
2891 | 190
2892 | 190
2893 | 190
2894 | 190
2895 | 190
2896 | 190
2897 | 190
2898 | 190
2899 | 190
2900 | 190
2901 | 190
2902 | 190
2903 | 190
2904 | 190
2905 | 190
2906 | 190
2907 | 190
2908 | 191
2909 | 191
2910 | 191
2911 | 191
2912 | 191
2913 | 191
2914 | 191
2915 | 191
2916 | 191
2917 | 191
2918 | 191
2919 | 191
2920 | 192
2921 | 192
2922 | 192
2923 | 192
2924 | 192
2925 | 192
2926 | 192
2927 | 192
2928 | 192
2929 | 192
2930 | 192
2931 | 192
2932 | 192
2933 | 192
2934 | 192
2935 | 192
2936 | 192
2937 | 192
2938 | 192
2939 | 192
2940 | 192
2941 | 193
2942 | 193
2943 | 193
2944 | 193
2945 | 193
2946 | 193
2947 | 193
2948 | 193
2949 | 193
2950 | 193
2951 | 193
2952 | 193
2953 | 193
2954 | 193
2955 | 193
2956 | 193
2957 | 193
2958 | 193
2959 | 193
2960 | 193
2961 | 193
2962 | 193
2963 | 193
2964 | 194
2965 | 194
2966 | 194
2967 | 194
2968 | 194
2969 | 194
2970 | 194
2971 | 194
2972 | 194
2973 | 194
2974 | 194
2975 | 194
2976 | 194
2977 | 194
2978 | 194
2979 | 194
2980 | 194
2981 | 194
2982 | 194
2983 | 194
2984 | 194
2985 | 194
2986 | 195
2987 | 195
2988 | 195
2989 | 195
2990 | 195
2991 | 195
2992 | 195
2993 | 195
2994 | 195
2995 | 195
2996 | 195
2997 | 195
2998 | 195
2999 | 195
3000 | 195
3001 | 196
3002 | 196
3003 | 196
3004 | 196
3005 | 196
3006 | 196
3007 | 196
3008 | 196
3009 | 196
3010 | 196
3011 | 196
3012 | 196
3013 | 196
3014 | 196
3015 | 196
3016 | 196
3017 | 197
3018 | 197
3019 | 197
3020 | 197
3021 | 197
3022 | 197
3023 | 197
3024 | 197
3025 | 197
3026 | 197
3027 | 197
3028 | 198
3029 | 198
3030 | 198
3031 | 198
3032 | 198
3033 | 198
3034 | 198
3035 | 198
3036 | 198
3037 | 198
3038 | 198
3039 | 198
3040 | 198
3041 | 198
3042 | 198
3043 | 198
3044 | 199
3045 | 199
3046 | 199
3047 | 199
3048 | 199
3049 | 199
3050 | 199
3051 | 199
3052 | 199
3053 | 199
3054 | 200
3055 | 200
3056 | 200
3057 | 200
3058 | 200
3059 | 200
3060 | 200
3061 | 200
3062 | 200
3063 | 200
3064 | 200
3065 | 200
3066 | 200
3067 | 200
3068 | 200
3069 | 200
3070 | 201
3071 | 201
3072 | 201
3073 | 201
3074 | 201
3075 | 201
3076 | 201
3077 | 201
3078 | 201
3079 | 201
3080 | 201
3081 | 201
3082 | 201
3083 | 201
3084 | 202
3085 | 202
3086 | 202
3087 | 202
3088 | 202
3089 | 202
3090 | 202
3091 | 202
3092 | 202
3093 | 202
3094 | 202
3095 | 202
3096 | 202
3097 | 203
3098 | 203
3099 | 203
3100 | 203
3101 | 203
3102 | 203
3103 | 203
3104 | 203
3105 | 203
3106 | 203
3107 | 203
3108 | 203
3109 | 203
3110 | 203
3111 | 203
3112 | 203
3113 | 204
3114 | 204
3115 | 204
3116 | 204
3117 | 204
3118 | 204
3119 | 204
3120 | 204
3121 | 204
3122 | 204
3123 | 204
3124 | 204
3125 | 204
3126 | 204
3127 | 205
3128 | 205
3129 | 205
3130 | 205
3131 | 205
3132 | 205
3133 | 205
3134 | 205
3135 | 205
3136 | 205
3137 | 205
3138 | 205
3139 | 205
3140 | 205
3141 | 205
3142 | 205
3143 | 205
3144 | 205
3145 | 205
3146 | 205
3147 | 205
3148 | 206
3149 | 206
3150 | 206
3151 | 206
3152 | 206
3153 | 206
3154 | 206
3155 | 206
3156 | 206
3157 | 206
3158 | 206
3159 | 206
3160 | 206
3161 | 206
3162 | 206
3163 | 206
3164 | 206
3165 | 206
3166 | 206
3167 | 206
3168 | 206
3169 | 206
3170 | 207
3171 | 207
3172 | 207
3173 | 207
3174 | 207
3175 | 207
3176 | 207
3177 | 207
3178 | 207
3179 | 207
3180 | 207
3181 | 207
3182 | 207
3183 | 208
3184 | 208
3185 | 208
3186 | 208
3187 | 208
3188 | 208
3189 | 208
3190 | 208
3191 | 208
3192 | 208
3193 | 208
3194 | 208
3195 | 208
3196 | 208
3197 | 208
3198 | 208
3199 | 208
3200 | 208
3201 | 208
3202 | 208
3203 | 209
3204 | 209
3205 | 209
3206 | 209
3207 | 209
3208 | 209
3209 | 209
3210 | 209
3211 | 209
3212 | 209
3213 | 209
3214 | 209
3215 | 210
3216 | 210
3217 | 210
3218 | 210
3219 | 210
3220 | 210
3221 | 210
3222 | 210
3223 | 210
3224 | 210
3225 | 210
3226 | 210
3227 | 210
3228 | 211
3229 | 211
3230 | 211
3231 | 211
3232 | 211
3233 | 211
3234 | 211
3235 | 211
3236 | 211
3237 | 211
3238 | 211
3239 | 211
3240 | 211
3241 | 211
3242 | 211
3243 | 211
3244 | 211
3245 | 211
3246 | 211
3247 | 211
3248 | 211
3249 | 211
3250 | 211
3251 | 211
3252 | 211
3253 | 212
3254 | 212
3255 | 212
3256 | 212
3257 | 212
3258 | 212
3259 | 212
3260 | 212
3261 | 212
3262 | 212
3263 | 212
3264 | 212
3265 | 212
3266 | 212
3267 | 213
3268 | 213
3269 | 213
3270 | 213
3271 | 213
3272 | 213
3273 | 213
3274 | 213
3275 | 213
3276 | 213
3277 | 213
3278 | 213
3279 | 213
3280 | 213
3281 | 213
3282 | 213
3283 | 213
3284 | 213
3285 | 214
3286 | 214
3287 | 214
3288 | 214
3289 | 214
3290 | 214
3291 | 214
3292 | 214
3293 | 214
3294 | 214
3295 | 214
3296 | 214
3297 | 214
3298 | 215
3299 | 215
3300 | 215
3301 | 215
3302 | 215
3303 | 215
3304 | 215
3305 | 215
3306 | 215
3307 | 215
3308 | 215
3309 | 215
3310 | 215
3311 | 216
3312 | 216
3313 | 216
3314 | 216
3315 | 216
3316 | 216
3317 | 216
3318 | 216
3319 | 216
3320 | 216
3321 | 216
3322 | 216
3323 | 216
3324 | 216
3325 | 216
3326 | 216
3327 | 216
3328 | 217
3329 | 217
3330 | 217
3331 | 217
3332 | 217
3333 | 217
3334 | 217
3335 | 217
3336 | 217
3337 | 217
3338 | 217
3339 | 217
3340 | 217
3341 | 217
3342 | 218
3343 | 218
3344 | 218
3345 | 218
3346 | 218
3347 | 218
3348 | 218
3349 | 218
3350 | 218
3351 | 218
3352 | 218
3353 | 218
3354 | 218
3355 | 218
3356 | 218
3357 | 219
3358 | 219
3359 | 219
3360 | 219
3361 | 219
3362 | 219
3363 | 219
3364 | 219
3365 | 219
3366 | 219
3367 | 219
3368 | 220
3369 | 220
3370 | 220
3371 | 220
3372 | 220
3373 | 220
3374 | 220
3375 | 220
3376 | 220
3377 | 220
3378 | 220
3379 | 220
3380 | 220
3381 | 221
3382 | 221
3383 | 221
3384 | 221
3385 | 221
3386 | 221
3387 | 221
3388 | 221
3389 | 221
3390 | 221
3391 | 221
3392 | 221
3393 | 221
3394 | 222
3395 | 222
3396 | 222
3397 | 222
3398 | 222
3399 | 222
3400 | 222
3401 | 222
3402 | 222
3403 | 222
3404 | 222
3405 | 222
3406 | 222
3407 | 222
3408 | 223
3409 | 223
3410 | 223
3411 | 223
3412 | 223
3413 | 223
3414 | 223
3415 | 223
3416 | 223
3417 | 223
3418 | 223
3419 | 224
3420 | 224
3421 | 224
3422 | 224
3423 | 224
3424 | 224
3425 | 224
3426 | 224
3427 | 224
3428 | 224
3429 | 225
3430 | 225
3431 | 225
3432 | 225
3433 | 225
3434 | 225
3435 | 225
3436 | 225
3437 | 225
3438 | 225
3439 | 225
3440 | 225
3441 | 226
3442 | 226
3443 | 226
3444 | 226
3445 | 226
3446 | 226
3447 | 226
3448 | 226
3449 | 227
3450 | 227
3451 | 227
3452 | 227
3453 | 227
3454 | 227
3455 | 227
3456 | 227
3457 | 227
3458 | 227
3459 | 227
3460 | 227
3461 | 228
3462 | 228
3463 | 228
3464 | 228
3465 | 228
3466 | 228
3467 | 228
3468 | 228
3469 | 228
3470 | 228
3471 | 228
3472 | 228
3473 | 228
3474 | 229
3475 | 229
3476 | 229
3477 | 229
3478 | 229
3479 | 229
3480 | 229
3481 | 229
3482 | 229
3483 | 230
3484 | 230
3485 | 230
3486 | 230
3487 | 230
3488 | 230
3489 | 230
3490 | 230
3491 | 230
3492 | 231
3493 | 231
3494 | 231
3495 | 231
3496 | 231
3497 | 231
3498 | 231
3499 | 231
3500 | 231
3501 | 231
3502 | 231
3503 | 231
3504 | 231
3505 | 232
3506 | 232
3507 | 232
3508 | 232
3509 | 232
3510 | 232
3511 | 232
3512 | 232
3513 | 232
3514 | 232
3515 | 232
3516 | 232
3517 | 232
3518 | 232
3519 | 233
3520 | 233
3521 | 233
3522 | 233
3523 | 233
3524 | 233
3525 | 233
3526 | 233
3527 | 233
3528 | 233
3529 | 233
3530 | 233
3531 | 233
3532 | 233
3533 | 234
3534 | 234
3535 | 234
3536 | 234
3537 | 234
3538 | 234
3539 | 234
3540 | 234
3541 | 235
3542 | 235
3543 | 235
3544 | 235
3545 | 235
3546 | 235
3547 | 235
3548 | 235
3549 | 235
3550 | 235
3551 | 235
3552 | 235
3553 | 235
3554 | 235
3555 | 235
3556 | 236
3557 | 236
3558 | 236
3559 | 236
3560 | 236
3561 | 236
3562 | 236
3563 | 236
3564 | 236
3565 | 236
3566 | 236
3567 | 236
3568 | 236
3569 | 236
3570 | 236
3571 | 237
3572 | 237
3573 | 237
3574 | 237
3575 | 237
3576 | 237
3577 | 237
3578 | 237
3579 | 237
3580 | 237
3581 | 237
3582 | 238
3583 | 238
3584 | 238
3585 | 238
3586 | 238
3587 | 238
3588 | 238
3589 | 238
3590 | 238
3591 | 238
3592 | 238
3593 | 239
3594 | 239
3595 | 239
3596 | 239
3597 | 239
3598 | 239
3599 | 239
3600 | 240
3601 | 240
3602 | 240
3603 | 240
3604 | 240
3605 | 240
3606 | 240
3607 | 240
3608 | 241
3609 | 241
3610 | 241
3611 | 241
3612 | 241
3613 | 241
3614 | 242
3615 | 242
3616 | 242
3617 | 242
3618 | 242
3619 | 242
3620 | 242
3621 | 242
3622 | 242
3623 | 243
3624 | 243
3625 | 243
3626 | 243
3627 | 243
3628 | 243
3629 | 243
3630 | 243
3631 | 243
3632 | 243
3633 | 243
3634 | 243
3635 | 243
3636 | 243
3637 | 243
3638 | 243
3639 | 243
3640 | 243
3641 | 244
3642 | 244
3643 | 244
3644 | 244
3645 | 244
3646 | 244
3647 | 244
3648 | 244
3649 | 244
3650 | 244
3651 | 244
3652 | 244
3653 | 244
3654 | 244
3655 | 244
3656 | 244
3657 | 244
3658 | 245
3659 | 245
3660 | 245
3661 | 245
3662 | 245
3663 | 245
3664 | 245
3665 | 245
3666 | 245
3667 | 245
3668 | 245
3669 | 245
3670 | 245
3671 | 245
3672 | 245
3673 | 246
3674 | 246
3675 | 246
3676 | 246
3677 | 246
3678 | 246
3679 | 246
3680 | 246
3681 | 246
3682 | 246
3683 | 246
3684 | 247
3685 | 247
3686 | 247
3687 | 247
3688 | 247
3689 | 247
3690 | 247
3691 | 247
3692 | 247
3693 | 247
3694 | 247
3695 | 247
3696 | 247
3697 | 247
3698 | 247
3699 | 247
3700 | 248
3701 | 248
3702 | 248
3703 | 248
3704 | 248
3705 | 248
3706 | 248
3707 | 248
3708 | 248
3709 | 248
3710 | 248
3711 | 248
3712 | 248
3713 | 248
3714 | 248
3715 | 249
3716 | 249
3717 | 249
3718 | 249
3719 | 249
3720 | 249
3721 | 249
3722 | 249
3723 | 249
3724 | 249
3725 | 249
3726 | 249
3727 | 250
3728 | 250
3729 | 250
3730 | 250
3731 | 250
3732 | 250
3733 | 250
3734 | 250
3735 | 250
3736 | 250
3737 | 250
3738 | 250
3739 | 250
3740 | 250
3741 | 250
3742 | 250
3743 | 250
3744 | 250
3745 | 251
3746 | 251
3747 | 251
3748 | 251
3749 | 251
3750 | 251
3751 | 251
3752 | 251
3753 | 251
3754 | 251
3755 | 251
3756 | 251
3757 | 251
3758 | 251
3759 | 251
3760 | 252
3761 | 252
3762 | 252
3763 | 252
3764 | 252
3765 | 252
3766 | 252
3767 | 252
3768 | 252
3769 | 252
3770 | 252
3771 | 252
3772 | 252
3773 | 252
3774 | 252
3775 | 253
3776 | 253
3777 | 253
3778 | 253
3779 | 253
3780 | 253
3781 | 253
3782 | 253
3783 | 253
3784 | 253
3785 | 254
3786 | 254
3787 | 254
3788 | 254
3789 | 254
3790 | 254
3791 | 254
3792 | 254
3793 | 254
3794 | 254
3795 | 254
3796 | 254
3797 | 254
3798 | 254
3799 | 254
3800 | 255
3801 | 255
3802 | 255
3803 | 255
3804 | 255
3805 | 255
3806 | 255
3807 | 255
3808 | 256
3809 | 256
3810 | 256
3811 | 256
3812 | 256
3813 | 256
3814 | 256
3815 | 256
3816 | 256
3817 | 256
3818 | 256
3819 | 256
3820 | 257
3821 | 257
3822 | 257
3823 | 257
3824 | 257
3825 | 257
3826 | 257
3827 | 257
3828 | 257
3829 | 257
3830 | 257
3831 | 258
3832 | 258
3833 | 258
3834 | 258
3835 | 258
3836 | 258
3837 | 258
3838 | 258
3839 | 258
3840 | 258
3841 | 258
3842 | 258
3843 | 258
3844 | 258
3845 | 258
3846 | 258
3847 | 258
3848 | 258
3849 | 259
3850 | 259
3851 | 259
3852 | 259
3853 | 259
3854 | 259
3855 | 259
3856 | 259
3857 | 259
3858 | 259
3859 | 259
3860 | 259
3861 | 259
3862 | 259
3863 | 259
3864 | 259
3865 | 259
3866 | 260
3867 | 260
3868 | 260
3869 | 260
3870 | 260
3871 | 260
3872 | 260
3873 | 260
3874 | 260
3875 | 260
3876 | 260
3877 | 260
3878 | 260
3879 | 260
3880 | 260
3881 | 260
3882 | 260
3883 | 261
3884 | 261
3885 | 261
3886 | 261
3887 | 261
3888 | 261
3889 | 261
3890 | 261
3891 | 261
3892 | 261
3893 | 261
3894 | 261
3895 | 261
3896 | 261
3897 | 261
3898 | 261
3899 | 261
3900 | 261
3901 | 261
3902 | 262
3903 | 262
3904 | 262
3905 | 262
3906 | 262
3907 | 262
3908 | 262
3909 | 262
3910 | 262
3911 | 262
3912 | 262
3913 | 262
3914 | 262
3915 | 262
3916 | 263
3917 | 263
3918 | 263
3919 | 263
3920 | 263
3921 | 263
3922 | 263
3923 | 263
3924 | 263
3925 | 263
3926 | 263
3927 | 263
3928 | 263
3929 | 263
3930 | 263
3931 | 263
3932 | 263
3933 | 264
3934 | 264
3935 | 264
3936 | 264
3937 | 264
3938 | 264
3939 | 264
3940 | 264
3941 | 264
3942 | 264
3943 | 264
3944 | 264
3945 | 264
3946 | 264
3947 | 264
3948 | 264
3949 | 264
3950 | 265
3951 | 265
3952 | 265
3953 | 265
3954 | 265
3955 | 265
3956 | 265
3957 | 265
3958 | 265
3959 | 265
3960 | 265
3961 | 266
3962 | 266
3963 | 266
3964 | 266
3965 | 266
3966 | 266
3967 | 266
3968 | 266
3969 | 266
3970 | 266
3971 | 266
3972 | 267
3973 | 267
3974 | 267
3975 | 267
3976 | 267
3977 | 267
3978 | 267
3979 | 267
3980 | 268
3981 | 268
3982 | 268
3983 | 268
3984 | 268
3985 | 268
3986 | 268
3987 | 268
3988 | 268
3989 | 268
3990 | 268
3991 | 268
3992 | 268
3993 | 269
3994 | 269
3995 | 269
3996 | 269
3997 | 269
3998 | 269
3999 | 269
4000 | 269
4001 | 269
4002 | 269
4003 | 270
4004 | 270
4005 | 270
4006 | 270
4007 | 270
4008 | 270
4009 | 270
4010 | 270
4011 | 270
4012 | 270
4013 | 270
4014 | 270
4015 | 270
4016 | 270
4017 | 271
4018 | 271
4019 | 271
4020 | 271
4021 | 271
4022 | 271
4023 | 271
4024 | 271
4025 | 271
4026 | 271
4027 | 272
4028 | 272
4029 | 272
4030 | 272
4031 | 272
4032 | 272
4033 | 272
4034 | 272
4035 | 272
4036 | 273
4037 | 273
4038 | 273
4039 | 273
4040 | 273
4041 | 273
4042 | 273
4043 | 274
4044 | 274
4045 | 274
4046 | 274
4047 | 274
4048 | 274
4049 | 274
4050 | 274
4051 | 274
4052 | 274
4053 | 274
4054 | 274
4055 | 274
4056 | 274
4057 | 274
4058 | 274
4059 | 275
4060 | 275
4061 | 275
4062 | 275
4063 | 275
4064 | 275
4065 | 275
4066 | 275
4067 | 275
4068 | 275
4069 | 275
4070 | 275
4071 | 275
4072 | 275
4073 | 276
4074 | 276
4075 | 276
4076 | 276
4077 | 276
4078 | 276
4079 | 276
4080 | 276
4081 | 276
4082 | 276
4083 | 276
4084 | 276
4085 | 277
4086 | 277
4087 | 277
4088 | 277
4089 | 277
4090 | 277
4091 | 277
4092 | 278
4093 | 278
4094 | 278
4095 | 278
4096 | 278
4097 | 278
4098 | 278
4099 | 278
4100 | 278
4101 | 278
4102 | 278
4103 | 278
4104 | 279
4105 | 279
4106 | 279
4107 | 279
4108 | 279
4109 | 279
4110 | 279
4111 | 279
4112 | 280
4113 | 280
4114 | 280
4115 | 280
4116 | 280
4117 | 280
4118 | 280
4119 | 280
4120 | 280
4121 | 280
4122 | 280
4123 | 280
4124 | 280
4125 | 280
4126 | 281
4127 | 281
4128 | 281
4129 | 281
4130 | 281
4131 | 281
4132 | 281
4133 | 282
4134 | 282
4135 | 282
4136 | 282
4137 | 282
4138 | 282
4139 | 282
4140 | 282
4141 | 282
4142 | 282
4143 | 282
4144 | 282
4145 | 282
4146 | 282
4147 | 283
4148 | 283
4149 | 283
4150 | 283
4151 | 283
4152 | 283
4153 | 283
4154 | 283
4155 | 283
4156 | 283
4157 | 283
4158 | 283
4159 | 283
4160 | 283
4161 | 283
4162 | 284
4163 | 284
4164 | 284
4165 | 284
4166 | 284
4167 | 284
4168 | 284
4169 | 284
4170 | 284
4171 | 284
4172 | 285
4173 | 285
4174 | 285
4175 | 285
4176 | 285
4177 | 285
4178 | 285
4179 | 285
4180 | 285
4181 | 285
4182 | 285
4183 | 286
4184 | 286
4185 | 286
4186 | 286
4187 | 286
4188 | 286
4189 | 286
4190 | 286
4191 | 286
4192 | 286
4193 | 286
4194 | 286
4195 | 287
4196 | 287
4197 | 287
4198 | 287
4199 | 287
4200 | 288
4201 | 288
4202 | 288
4203 | 288
4204 | 288
4205 | 288
4206 | 288
4207 | 288
4208 | 288
4209 | 288
4210 | 288
4211 | 288
4212 | 288
4213 | 288
4214 | 289
4215 | 289
4216 | 289
4217 | 289
4218 | 289
4219 | 289
4220 | 289
4221 | 289
4222 | 289
4223 | 289
4224 | 290
4225 | 290
4226 | 290
4227 | 290
4228 | 290
4229 | 290
4230 | 290
4231 | 290
4232 | 290
4233 | 291
4234 | 291
4235 | 291
4236 | 291
4237 | 291
4238 | 291
4239 | 291
4240 | 291
4241 | 291
4242 | 291
4243 | 292
4244 | 292
4245 | 292
4246 | 292
4247 | 293
4248 | 293
4249 | 293
4250 | 293
4251 | 293
4252 | 293
4253 | 293
4254 | 293
4255 | 293
4256 | 293
4257 | 293
4258 | 294
4259 | 294
4260 | 294
4261 | 294
4262 | 294
4263 | 294
4264 | 294
4265 | 294
4266 | 294
4267 | 294
4268 | 294
4269 | 295
4270 | 295
4271 | 295
4272 | 295
4273 | 295
4274 | 295
4275 | 295
4276 | 295
4277 | 295
4278 | 296
4279 | 296
4280 | 296
4281 | 296
4282 | 296
4283 | 296
4284 | 296
4285 | 296
4286 | 296
4287 | 297
4288 | 297
4289 | 297
4290 | 297
4291 | 297
4292 | 297
4293 | 297
4294 | 297
4295 | 298
4296 | 298
4297 | 298
4298 | 298
4299 | 298
4300 | 298
4301 | 298
4302 | 298
4303 | 299
4304 | 299
4305 | 299
4306 | 299
4307 | 299
4308 | 299
4309 | 299
4310 | 299
4311 | 299
4312 | 299
4313 | 299
4314 | 299
4315 | 300
4316 | 300
4317 | 300
4318 | 300
4319 | 300
4320 | 300
4321 | 300
4322 | 300
4323 | 300
4324 | 300
4325 | 301
4326 | 301
4327 | 301
4328 | 301
4329 | 301
4330 | 301
4331 | 301
4332 | 302
4333 | 302
4334 | 302
4335 | 302
4336 | 302
4337 | 302
4338 | 302
4339 | 302
4340 | 302
4341 | 302
4342 | 302
4343 | 302
4344 | 302
4345 | 302
4346 | 302
4347 | 302
4348 | 302
4349 | 302
4350 | 303
4351 | 303
4352 | 303
4353 | 303
4354 | 303
4355 | 303
4356 | 304
4357 | 304
4358 | 304
4359 | 304
4360 | 304
4361 | 305
4362 | 305
4363 | 305
4364 | 305
4365 | 305
4366 | 305
4367 | 306
4368 | 306
4369 | 306
4370 | 306
4371 | 306
4372 | 306
4373 | 306
4374 | 306
4375 | 306
4376 | 306
4377 | 306
4378 | 306
4379 | 306
4380 | 307
4381 | 307
4382 | 307
4383 | 307
4384 | 307
4385 | 307
4386 | 307
4387 | 308
4388 | 308
4389 | 308
4390 | 308
4391 | 308
4392 | 308
4393 | 308
4394 | 308
4395 | 308
4396 | 309
4397 | 309
4398 | 309
4399 | 309
4400 | 309
4401 | 309
4402 | 309
4403 | 309
4404 | 309
4405 | 309
4406 | 309
4407 | 309
4408 | 309
4409 | 310
4410 | 310
4411 | 310
4412 | 310
4413 | 310
4414 | 310
4415 | 310
4416 | 311
4417 | 311
4418 | 311
4419 | 311
4420 | 311
4421 | 311
4422 | 311
4423 | 311
4424 | 311
4425 | 312
4426 | 312
4427 | 312
4428 | 312
4429 | 312
4430 | 312
4431 | 312
4432 | 313
4433 | 313
4434 | 313
4435 | 313
4436 | 313
4437 | 313
4438 | 313
4439 | 313
4440 | 314
4441 | 314
4442 | 314
4443 | 314
4444 | 314
4445 | 314
4446 | 314
4447 | 314
4448 | 314
4449 | 314
4450 | 314
4451 | 315
4452 | 315
4453 | 315
4454 | 315
4455 | 315
4456 | 315
4457 | 315
4458 | 315
4459 | 315
4460 | 315
4461 | 315
4462 | 316
4463 | 316
4464 | 316
4465 | 316
4466 | 316
4467 | 316
4468 | 316
4469 | 316
4470 | 316
4471 | 317
4472 | 317
4473 | 317
4474 | 317
4475 | 317
4476 | 317
4477 | 317
4478 | 317
4479 | 317
4480 | 317
4481 | 317
4482 | 317
4483 | 317
4484 | 317
4485 | 318
4486 | 318
4487 | 318
4488 | 318
4489 | 318
4490 | 318
4491 | 318
4492 | 318
4493 | 318
4494 | 319
4495 | 319
4496 | 319
4497 | 319
4498 | 319
4499 | 319
4500 | 319
4501 | 319
4502 | 319
4503 | 319
4504 | 320
4505 | 320
4506 | 320
4507 | 320
4508 | 320
4509 | 320
4510 | 321
4511 | 321
4512 | 321
4513 | 321
4514 | 321
4515 | 321
4516 | 321
4517 | 321
4518 | 322
4519 | 322
4520 | 322
4521 | 322
4522 | 322
4523 | 322
4524 | 323
4525 | 323
4526 | 323
4527 | 323
4528 | 323
4529 | 323
4530 | 323
4531 | 323
4532 | 323
4533 | 324
4534 | 324
4535 | 324
4536 | 324
4537 | 324
4538 | 324
4539 | 324
4540 | 324
4541 | 324
4542 | 325
4543 | 325
4544 | 325
4545 | 325
4546 | 325
4547 | 325
4548 | 325
4549 | 325
4550 | 326
4551 | 326
4552 | 326
4553 | 326
4554 | 326
4555 | 326
4556 | 326
4557 | 326
4558 | 327
4559 | 327
4560 | 327
4561 | 327
4562 | 327
4563 | 327
4564 | 328
4565 | 328
4566 | 328
4567 | 328
4568 | 328
4569 | 328
4570 | 328
4571 | 328
4572 | 329
4573 | 329
4574 | 329
4575 | 329
4576 | 329
4577 | 329
4578 | 329
4579 | 329
4580 | 329
4581 | 330
4582 | 330
4583 | 330
4584 | 330
4585 | 330
4586 | 330
4587 | 330
4588 | 330
4589 | 330
4590 | 330
4591 | 330
4592 | 330
4593 | 330
4594 | 330
4595 | 331
4596 | 331
4597 | 331
4598 | 331
4599 | 331
4600 | 331
4601 | 331
4602 | 332
4603 | 332
4604 | 332
4605 | 332
4606 | 332
4607 | 333
4608 | 333
4609 | 333
4610 | 333
4611 | 333
4612 | 333
4613 | 333
4614 | 333
4615 | 333
4616 | 334
4617 | 334
4618 | 334
4619 | 334
4620 | 334
4621 | 334
4622 | 334
4623 | 334
4624 | 334
4625 | 334
4626 | 335
4627 | 335
4628 | 335
4629 | 335
4630 | 335
4631 | 335
4632 | 335
4633 | 336
4634 | 336
4635 | 336
4636 | 336
4637 | 337
4638 | 337
4639 | 337
4640 | 337
4641 | 337
4642 | 337
4643 | 337
4644 | 337
4645 | 338
4646 | 338
4647 | 338
4648 | 338
4649 | 338
4650 | 338
4651 | 339
4652 | 340
4653 | 340
4654 | 340
4655 | 340
4656 | 340
4657 | 340
4658 | 340
4659 | 340
4660 | 340
4661 | 340
4662 | 340
4663 | 340
4664 | 341
4665 | 341
4666 | 341
4667 | 341
4668 | 341
4669 | 341
4670 | 341
4671 | 341
4672 | 341
4673 | 342
4674 | 342
4675 | 342
4676 | 342
4677 | 342
4678 | 342
4679 | 342
4680 | 342
4681 | 342
4682 | 342
4683 | 342
4684 | 342
4685 | 342
4686 | 342
4687 | 342
4688 | 343
4689 | 343
4690 | 344
4691 | 344
4692 | 344
4693 | 344
4694 | 344
4695 | 344
4696 | 344
4697 | 345
4698 | 345
4699 | 345
4700 | 345
4701 | 346
4702 | 346
4703 | 346
4704 | 346
4705 | 346
4706 | 346
4707 | 347
4708 | 347
4709 | 347
4710 | 347
4711 | 347
4712 | 348
4713 | 348
4714 | 348
4715 | 349
4716 | 349
4717 | 349
4718 | 349
4719 | 349
4720 | 350
4721 | 350
4722 | 350
4723 | 350
4724 | 350
4725 | 351
4726 | 351
4727 | 351
4728 | 351
4729 | 351
4730 | 351
4731 | 351
4732 | 351
4733 | 351
4734 | 351
4735 | 351
4736 | 351
4737 | 351
4738 | 352
4739 | 352
4740 | 352
4741 | 352
4742 | 352
4743 | 352
4744 | 352
4745 | 353
4746 | 353
4747 | 353
4748 | 353
4749 | 353
4750 | 353
4751 | 354
4752 | 354
4753 | 354
4754 | 354
4755 | 354
4756 | 354
4757 | 355
4758 | 355
4759 | 355
4760 | 356
4761 | 356
4762 | 356
4763 | 356
4764 | 356
4765 | 356
4766 | 357
4767 | 357
4768 | 357
4769 | 357
4770 | 357
4771 | 358
4772 | 358
4773 | 358
4774 | 358
4775 | 358
4776 | 358
4777 | 358
4778 | 358
4779 | 359
4780 | 359
4781 | 359
4782 | 359
4783 | 359
4784 | 359
4785 | 359
4786 | 359
4787 | 359
4788 | 359
4789 | 360
4790 | 360
4791 | 360
4792 | 360
4793 | 360
4794 | 360
4795 | 360
4796 | 360
4797 | 361
4798 | 361
4799 | 361
4800 | 361
4801 | 362
4802 | 362
4803 | 362
4804 | 362
4805 | 362
4806 | 362
4807 | 362
4808 | 362
4809 | 362
4810 | 362
4811 | 362
4812 | 362
4813 | 363
4814 | 363
4815 | 363
4816 | 363
4817 | 363
4818 | 364
4819 | 364
4820 | 364
4821 | 364
4822 | 364
4823 | 364
4824 | 364
4825 | 365
4826 | 365
4827 | 365
4828 | 365
4829 | 365
4830 | 365
4831 | 365
4832 | 366
4833 | 366
4834 | 366
4835 | 366
4836 | 366
4837 | 366
4838 | 366
4839 | 366
4840 | 366
4841 | 367
4842 | 367
4843 | 367
4844 | 367
4845 | 367
4846 | 367
4847 | 368
4848 | 368
4849 | 368
4850 | 368
4851 | 369
4852 | 369
4853 | 369
4854 | 369
4855 | 369
4856 | 369
4857 | 370
4858 | 370
4859 | 370
4860 | 371
4861 | 371
4862 | 371
4863 | 371
4864 | 371
4865 | 371
4866 | 371
4867 | 371
4868 | 372
4869 | 372
4870 | 372
4871 | 372
4872 | 372
4873 | 373
4874 | 373
4875 | 373
4876 | 373
4877 | 373
4878 | 373
4879 | 374
4880 | 374
4881 | 374
4882 | 374
4883 | 374
4884 | 374
4885 | 374
4886 | 375
4887 | 375
4888 | 375
4889 | 375
4890 | 375
4891 | 375
4892 | 375
4893 | 376
4894 | 376
4895 | 376
4896 | 376
4897 | 376
4898 | 376
4899 | 376
4900 | 376
4901 | 376
4902 | 376
4903 | 376
4904 | 377
4905 | 377
4906 | 377
4907 | 377
4908 | 378
4909 | 378
4910 | 378
4911 | 378
4912 | 378
4913 | 379
4914 | 379
4915 | 379
4916 | 379
4917 | 379
4918 | 380
4919 | 381
4920 | 381
4921 | 381
4922 | 381
4923 | 381
4924 | 381
4925 | 381
4926 | 381
4927 | 381
4928 | 382
4929 | 382
4930 | 382
4931 | 382
4932 | 382
4933 | 383
4934 | 383
4935 | 383
4936 | 383
4937 | 383
4938 | 383
4939 | 383
4940 | 384
4941 | 384
4942 | 384
4943 | 384
4944 | 385
4945 | 385
4946 | 385
4947 | 385
4948 | 385
4949 | 385
4950 | 386
4951 | 386
4952 | 386
4953 | 386
4954 | 386
4955 | 386
4956 | 386
4957 | 386
4958 | 386
4959 | 386
4960 | 387
4961 | 387
4962 | 387
4963 | 388
4964 | 388
4965 | 388
4966 | 388
4967 | 388
4968 | 388
4969 | 388
4970 | 388
4971 | 388
4972 | 389
4973 | 389
4974 | 389
4975 | 389
4976 | 389
4977 | 389
4978 | 390
4979 | 390
4980 | 390
4981 | 390
4982 | 390
4983 | 391
4984 | 391
4985 | 391
4986 | 391
4987 | 391
4988 | 391
4989 | 391
4990 | 391
4991 | 391
4992 | 392
4993 | 392
4994 | 392
4995 | 392
4996 | 392
4997 | 392
4998 | 392
4999 | 392
5000 | 393
5001 | 393
5002 | 393
5003 | 393
5004 | 393
5005 | 393
5006 | 393
5007 | 393
5008 | 394
5009 | 394
5010 | 394
5011 | 395
5012 | 395
5013 | 395
5014 | 395
5015 | 395
5016 | 396
5017 | 397
5018 | 397
5019 | 397
5020 | 398
5021 | 398
5022 | 398
5023 | 398
5024 | 399
5025 | 399
5026 | 399
5027 | 399
5028 | 400
5029 | 400
5030 | 400
5031 | 400
5032 | 401
5033 | 401
5034 | 401
5035 | 401
5036 | 401
5037 | 401
5038 | 402
5039 | 402
5040 | 402
5041 | 402
5042 | 402
5043 | 402
5044 | 403
5045 | 403
5046 | 404
5047 | 404
5048 | 404
5049 | 404
5050 | 404
5051 | 404
5052 | 405
5053 | 405
5054 | 405
5055 | 405
5056 | 406
5057 | 406
5058 | 407
5059 | 407
5060 | 407
5061 | 407
5062 | 407
5063 | 407
5064 | 408
5065 | 408
5066 | 408
5067 | 408
5068 | 409
5069 | 410
5070 | 410
5071 | 410
5072 | 411
5073 | 411
5074 | 411
5075 | 411
5076 | 411
5077 | 412
5078 | 412
5079 | 412
5080 | 412
5081 | 412
5082 | 413
5083 | 413
5084 | 413
5085 | 413
5086 | 413
5087 | 413
5088 | 414
5089 | 414
5090 | 414
5091 | 414
5092 | 415
5093 | 415
5094 | 415
5095 | 416
5096 | 416
5097 | 416
5098 | 416
5099 | 417
5100 | 417
5101 | 418
5102 | 418
5103 | 418
5104 | 418
5105 | 419
5106 | 419
5107 | 419
5108 | 419
5109 | 419
5110 | 420
5111 | 420
5112 | 421
5113 | 421
5114 | 422
5115 | 422
5116 | 422
5117 | 422
5118 | 422
5119 | 423
5120 | 423
5121 | 424
5122 | 424
5123 | 424
5124 | 424
5125 | 424
5126 | 424
5127 | 424
5128 | 424
5129 | 424
5130 | 425
5131 | 425
5132 | 425
5133 | 425
5134 | 425
5135 | 425
5136 | 426
5137 | 426
5138 | 426
5139 | 426
5140 | 428
5141 | 428
5142 | 429
5143 | 429
5144 | 429
5145 | 429
5146 | 430
5147 | 431
5148 | 431
5149 | 431
5150 | 431
5151 | 431
5152 | 432
5153 | 432
5154 | 433
5155 | 433
5156 | 433
5157 | 433
5158 | 433
5159 | 434
5160 | 434
5161 | 434
5162 | 434
5163 | 434
5164 | 435
5165 | 435
5166 | 435
5167 | 435
5168 | 435
5169 | 435
5170 | 435
5171 | 436
5172 | 436
5173 | 436
5174 | 436
5175 | 437
5176 | 437
5177 | 437
5178 | 437
5179 | 437
5180 | 437
5181 | 437
5182 | 437
5183 | 438
5184 | 438
5185 | 438
5186 | 438
5187 | 439
5188 | 439
5189 | 439
5190 | 439
5191 | 440
5192 | 440
5193 | 440
5194 | 440
5195 | 441
5196 | 441
5197 | 441
5198 | 442
5199 | 442
5200 | 443
5201 | 444
5202 | 444
5203 | 444
5204 | 444
5205 | 445
5206 | 445
5207 | 445
5208 | 445
5209 | 446
5210 | 446
5211 | 447
5212 | 447
5213 | 449
5214 | 449
5215 | 449
5216 | 449
5217 | 450
5218 | 451
5219 | 451
5220 | 451
5221 | 451
5222 | 451
5223 | 452
5224 | 452
5225 | 453
5226 | 453
5227 | 453
5228 | 453
5229 | 453
5230 | 454
5231 | 454
5232 | 454
5233 | 454
5234 | 454
5235 | 455
5236 | 455
5237 | 455
5238 | 455
5239 | 456
5240 | 456
5241 | 456
5242 | 456
5243 | 456
5244 | 457
5245 | 457
5246 | 457
5247 | 457
5248 | 457
5249 | 458
5250 | 458
5251 | 458
5252 | 459
5253 | 459
5254 | 460
5255 | 460
5256 | 460
5257 | 461
5258 | 461
5259 | 461
5260 | 461
5261 | 462
5262 | 463
5263 | 463
5264 | 464
5265 | 464
5266 | 464
5267 | 465
5268 | 465
5269 | 465
5270 | 466
5271 | 466
5272 | 466
5273 | 467
5274 | 467
5275 | 467
5276 | 468
5277 | 470
5278 | 470
5279 | 471
5280 | 471
5281 | 471
5282 | 471
5283 | 471
5284 | 471
5285 | 472
5286 | 472
5287 | 472
5288 | 472
5289 | 473
5290 | 474
5291 | 474
5292 | 475
5293 | 475
5294 | 475
5295 | 476
5296 | 476
5297 | 477
5298 | 477
5299 | 477
5300 | 478
5301 | 479
5302 | 479
5303 | 479
5304 | 480
5305 | 480
5306 | 481
5307 | 482
5308 | 482
5309 | 482
5310 | 482
5311 | 482
5312 | 483
5313 | 483
5314 | 483
5315 | 484
5316 | 486
5317 | 487
5318 | 487
5319 | 487
5320 | 488
5321 | 489
5322 | 489
5323 | 489
5324 | 489
5325 | 490
5326 | 490
5327 | 491
5328 | 491
5329 | 492
5330 | 492
5331 | 492
5332 | 492
5333 | 493
5334 | 493
5335 | 493
5336 | 493
5337 | 494
5338 | 494
5339 | 494
5340 | 494
5341 | 494
5342 | 494
5343 | 495
5344 | 495
5345 | 496
5346 | 496
5347 | 496
5348 | 497
5349 | 497
5350 | 497
5351 | 497
5352 | 498
5353 | 498
5354 | 499
5355 | 499
5356 | 500
5357 | 500
5358 | 500
5359 | 501
5360 | 501
5361 | 503
5362 | 504
5363 | 504
5364 | 504
5365 | 504
5366 | 505
5367 | 505
5368 | 505
5369 | 506
5370 | 508
5371 | 508
5372 | 508
5373 | 508
5374 | 509
5375 | 509
5376 | 509
5377 | 510
5378 | 511
5379 | 512
5380 | 512
5381 | 512
5382 | 512
5383 | 513
5384 | 513
5385 | 513
5386 | 514
5387 | 515
5388 | 515
5389 | 516
5390 | 516
5391 | 516
5392 | 518
5393 | 518
5394 | 519
5395 | 520
5396 | 520
5397 | 522
5398 | 522
5399 | 522
5400 | 523
5401 | 523
5402 | 524
5403 | 524
5404 | 525
5405 | 525
5406 | 525
5407 | 525
5408 | 526
5409 | 526
5410 | 529
5411 | 529
5412 | 529
5413 | 531
5414 | 534
5415 | 534
5416 | 535
5417 | 536
5418 | 537
5419 | 537
5420 | 538
5421 | 538
5422 | 538
5423 | 538
5424 | 539
5425 | 539
5426 | 541
5427 | 542
5428 | 543
5429 | 543
5430 | 543
5431 | 544
5432 | 544
5433 | 545
5434 | 546
5435 | 546
5436 | 546
5437 | 546
5438 | 547
5439 | 549
5440 | 551
5441 | 551
5442 | 552
5443 | 553
5444 | 554
5445 | 554
5446 | 554
5447 | 555
5448 | 557
5449 | 558
5450 | 559
5451 | 562
5452 | 564
5453 | 565
5454 | 567
5455 | 571
5456 | 572
5457 | 572
5458 | 574
5459 | 575
5460 | 576
5461 | 579
5462 | 579
5463 | 580
5464 | 581
5465 | 581
5466 | 582
5467 | 582
5468 | 583
5469 | 585
5470 | 586
5471 | 586
5472 | 587
5473 | 588
5474 | 588
5475 | 588
5476 | 589
5477 | 589
5478 | 590
5479 | 594
5480 | 595
5481 | 597
5482 | 597
5483 | 601
5484 | 602
5485 | 602
5486 | 604
5487 | 604
5488 | 604
5489 | 608
5490 | 610
5491 | 612
5492 | 612
5493 | 615
5494 | 616
5495 | 617
5496 | 617
5497 | 618
5498 | 619
5499 | 620
5500 | 627
5501 | 627
5502 | 628
5503 | 628
5504 | 629
5505 | 630
5506 | 630
5507 | 632
5508 | 633
5509 | 633
5510 | 633
5511 | 634
5512 | 637
5513 | 638
5514 | 640
5515 | 644
5516 | 646
5517 | 649
5518 | 654
5519 | 659
5520 | 661
5521 | 661
5522 | 664
5523 | 666
5524 | 669
5525 | 670
5526 | 671
5527 | 674
5528 | 674
5529 | 677
5530 | 683
5531 | 687
5532 | 691
5533 | 693
5534 | 696
5535 |
--------------------------------------------------------------------------------
/deepprime2sec.py:
--------------------------------------------------------------------------------
1 | __author__ = "Ehsaneddin Asgari"
2 | __license__ = "Apache 2"
3 | __version__ = "1.0.0"
4 | __maintainer__ = "Ehsaneddin Asgari"
5 | __email__ = "asgari@berkeley.edu"
6 | __project__ = "LLP - DeepPrime2Sec"
7 | __website__ = "https://llp.berkeley.edu/deepprime2sec/"
8 |
9 | import argparse
10 | import os
11 | import os.path
12 | import sys
13 | import warnings
14 | from utility.training import training_loop
15 | import yaml
16 |
17 | def checkArgs(args):
18 | '''
19 | This function checks the input argument and returns the parameters
20 | '''
21 | parser = argparse.ArgumentParser()
22 |
23 |
24 | # input config #################################################################################################
25 | parser.add_argument('--config', action='store', dest='config_file', default='sample_configs/model_a.yaml', type=str,
26 | help='The config file for secondary structure prediction / please see the examples in the sample_configs/')
27 |
28 |
29 | parsedArgs = parser.parse_args()
30 |
31 | if (not os.access(parsedArgs.config_file, os.F_OK)):
32 | print("\nError: Permission denied or could not find the config file!")
33 | return False
34 | return parsedArgs.config_file
35 |
36 | if __name__ == '__main__':
37 | warnings.filterwarnings('ignore')
38 | res = checkArgs(sys.argv)
39 | if res != False:
40 | f = open(res, 'r')
41 | config=yaml.load(f)
42 | training_loop(**config)
43 | else:
44 | print(res)
45 | exit()
46 |
47 |
48 |
--------------------------------------------------------------------------------
/installations/deepprime2sec.yml:
--------------------------------------------------------------------------------
1 | name: keras
2 | channels:
3 | - aaronzs
4 | - anaconda
5 | - conda-forge
6 | - defaults
7 | dependencies:
8 | - _tflow_select=2.1.0=gpu
9 | - absl-py=0.7.1=py36_0
10 | - asn1crypto=0.24.0=py36_1003
11 | - astor=0.7.1=py_0
12 | - atk=2.25.90=hb9dd440_1002
13 | - attrs=19.1.0=py_0
14 | - backcall=0.1.0=py_0
15 | - biopython=1.73=py36h14c3975_0
16 | - blas=2.8=openblas
17 | - bleach=1.5.0=py36_0
18 | - boto=2.49.0=py_0
19 | - boto3=1.9.141=py_0
20 | - botocore=1.12.141=py_0
21 | - bz2file=0.98=py_0
22 | - bzip2=1.0.6=h14c3975_1002
23 | - c-ares=1.15.0=h14c3975_1001
24 | - ca-certificates=2019.3.9=hecc5488_0
25 | - cairo=1.16.0=ha4e643d_1000
26 | - certifi=2019.3.9=py36_0
27 | - cffi=1.12.3=py36h8022711_0
28 | - chardet=3.0.4=py36_1003
29 | - cryptography=2.6.1=py36h72c5cf5_0
30 | - cudatoolkit=9.0=h13b8566_0
31 | - cudnn=7.3.1=cuda9.0_0
32 | - dbus=1.13.6=he372182_0
33 | - decorator=4.4.0=py_0
34 | - defusedxml=0.5.0=py_1
35 | - docutils=0.14=py36_1001
36 | - entrypoints=0.3=py36_1000
37 | - expat=2.2.5=hf484d3e_1002
38 | - fontconfig=2.13.1=he4413a7_1000
39 | - freetype=2.10.0=he983fc9_0
40 | - gast=0.2.2=py_0
41 | - gdk-pixbuf=2.36.12=h49783d7_1002
42 | - gensim=3.7.1=py36he1b5a44_1
43 | - gettext=0.19.8.1=hc5be6a0_1002
44 | - glib=2.58.3=hf63aee3_1001
45 | - gobject-introspection=1.58.2=py36h2da5eee_1000
46 | - graphite2=1.3.13=hf484d3e_1000
47 | - grpcio=1.16.1=py36hf8bcb03_1
48 | - gst-plugins-base=1.14.4=hdf3bae2_1001
49 | - gstreamer=1.14.4=h66beb1c_1001
50 | - gtk2=2.24.31=hb68c50a_1001
51 | - h5py=2.9.0=nompi_py36hf008753_1102
52 | - harfbuzz=2.4.0=h37c48d4_0
53 | - hdf5=1.10.4=nompi_h3c11f04_1106
54 | - html5lib=0.9999999=py36_0
55 | - icu=58.2=hf484d3e_1000
56 | - idna=2.8=py36_1000
57 | - ipykernel=5.1.0=py36h24bf2e0_1002
58 | - ipython=7.5.0=py36h24bf2e0_0
59 | - ipython_genutils=0.2.0=py_1
60 | - ipywidgets=7.4.2=py_0
61 | - jedi=0.13.3=py36_0
62 | - jinja2=2.10.1=py_0
63 | - jmespath=0.9.4=py_0
64 | - jpeg=9c=h14c3975_1001
65 | - jsonschema=3.0.1=py36_0
66 | - jupyter=1.0.0=py_2
67 | - jupyter_client=5.2.4=py_3
68 | - jupyter_console=6.0.0=py_0
69 | - jupyter_core=4.4.0=py_0
70 | - keras-applications=1.0.7=py_1
71 | - keras-base=2.2.4=py36_0
72 | - keras-gpu=2.2.4=0
73 | - keras-preprocessing=1.0.9=py_1
74 | - libblas=3.8.0=8_openblas
75 | - libcblas=3.8.0=8_openblas
76 | - libffi=3.2.1=he1b5a44_1006
77 | - libgcc-ng=8.2.0=hdf63c60_1
78 | - libgfortran-ng=7.3.0=hdf63c60_0
79 | - libgpuarray=0.7.6=h14c3975_1003
80 | - libiconv=1.15=h516909a_1005
81 | - liblapack=3.8.0=8_openblas
82 | - liblapacke=3.8.0=8_openblas
83 | - libopenblas=0.2.20=h9ac9557_7
84 | - libpng=1.6.37=hed695b0_0
85 | - libprotobuf=3.7.1=h8b12597_0
86 | - libsodium=1.0.16=h14c3975_1001
87 | - libstdcxx-ng=8.2.0=hdf63c60_1
88 | - libtiff=4.0.10=h648cc4a_1001
89 | - libuuid=2.32.1=h14c3975_1000
90 | - libxcb=1.13=h14c3975_1002
91 | - libxml2=2.9.9=h13577e0_0
92 | - mako=1.0.7=py_1
93 | - markdown=2.6.11=py_0
94 | - markupsafe=1.1.1=py36h14c3975_0
95 | - mistune=0.8.4=py36h14c3975_1000
96 | - mock=3.0.3=py36_0
97 | - nbconvert=5.5.0=py_0
98 | - nbformat=4.4.0=py_1
99 | - ncurses=6.1=hf484d3e_1002
100 | - notebook=5.7.8=py36_0
101 | - numpy=1.14.3=py36h28100ab_1
102 | - numpy-base=1.14.3=py36h0ea5e3f_1
103 | - openblas=0.3.6=h6e990d7_1
104 | - openssl=1.1.1b=h14c3975_1
105 | - pandoc=2.7.2=0
106 | - pandocfilters=1.4.2=py_1
107 | - pango=1.40.14=h4ea9474_1004
108 | - parso=0.4.0=py_0
109 | - pcre=8.41=hf484d3e_1003
110 | - pexpect=4.7.0=py36_0
111 | - pickleshare=0.7.5=py36_1000
112 | - pip=19.1=py36_0
113 | - pixman=0.34.0=h14c3975_1003
114 | - prometheus_client=0.6.0=py_0
115 | - prompt_toolkit=2.0.9=py_0
116 | - protobuf=3.7.1=py36he1b5a44_0
117 | - pthread-stubs=0.4=h14c3975_1001
118 | - ptyprocess=0.6.0=py_1001
119 | - pycparser=2.19=py36_1
120 | - pygments=2.3.1=py_0
121 | - pygpu=0.7.6=py36h3010b51_1000
122 | - pyopenssl=19.0.0=py36_0
123 | - pyqt=5.9.2=py36h05f1152_2
124 | - pyrsistent=0.15.1=py36h516909a_0
125 | - pysocks=1.6.8=py36_1002
126 | - python=3.6.7=h381d211_1004
127 | - python-dateutil=2.8.0=py_0
128 | - pyyaml=5.1=py36h14c3975_0
129 | - pyzmq=18.0.1=py36hc4ba49a_1
130 | - qt=5.9.7=h52cfd70_1
131 | - qtconsole=4.4.3=py_0
132 | - readline=7.0=hf8c457e_1001
133 | - requests=2.21.0=py36_1000
134 | - s3transfer=0.2.0=py36_0
135 | - scikit-learn=0.20.3=py36ha8026db_1
136 | - scipy=1.2.1=py36h09a28d5_1
137 | - send2trash=1.5.0=py_0
138 | - setuptools=41.0.1=py36_0
139 | - sip=4.19.8=py36hf484d3e_1000
140 | - six=1.12.0=py36_1000
141 | - smart_open=1.8.3=py_0
142 | - sqlite=3.26.0=h67949de_1001
143 | - tensorboard=1.10.0=py36_0
144 | - tensorflow=1.10.0=py36_0
145 | - tensorflow-estimator=1.13.0=py_0
146 | - tensorflow-gpu=1.10.0=py36_0
147 | - termcolor=1.1.0=py_2
148 | - terminado=0.8.2=py36_0
149 | - testpath=0.4.2=py_1001
150 | - theano=1.0.3=py36_0
151 | - tk=8.6.9=h84994c4_1001
152 | - tornado=6.0.2=py36h516909a_0
153 | - tqdm=4.31.1=py_0
154 | - traitlets=4.3.2=py36_1000
155 | - urllib3=1.24.2=py36_0
156 | - wcwidth=0.1.7=py_1
157 | - webencodings=0.5.1=py_1
158 | - werkzeug=0.15.2=py_0
159 | - wheel=0.33.1=py36_0
160 | - widgetsnbextension=3.4.2=py36_1000
161 | - xorg-kbproto=1.0.7=h14c3975_1002
162 | - xorg-libice=1.0.9=h516909a_1004
163 | - xorg-libsm=1.2.3=h84519dc_1000
164 | - xorg-libx11=1.6.7=h14c3975_1000
165 | - xorg-libxau=1.0.9=h14c3975_0
166 | - xorg-libxdmcp=1.1.3=h516909a_0
167 | - xorg-libxext=1.3.4=h516909a_0
168 | - xorg-libxrender=0.9.10=h516909a_1002
169 | - xorg-libxt=1.1.5=h14c3975_1002
170 | - xorg-renderproto=0.11.1=h14c3975_1002
171 | - xorg-xextproto=7.3.0=h14c3975_1002
172 | - xorg-xproto=7.0.31=h14c3975_1007
173 | - xz=5.2.4=h14c3975_1001
174 | - yaml=0.1.7=h14c3975_1001
175 | - zeromq=4.3.1=hf484d3e_1000
176 | - zlib=1.2.11=h14c3975_1004
177 | - pip:
178 | - cycler==0.10.0
179 | - keras-multi-head==0.19.0
180 | - keras-pos-embd==0.10.0
181 | - keras-self-attention==0.41.0
182 | - kiwisolver==1.1.0
183 | - matplotlib==3.1.0
184 | - pandas==0.24.2
185 | - pyparsing==2.4.0
186 | - pytz==2019.1
187 | - tensorflow-hub==0.4.0
188 |
189 |
--------------------------------------------------------------------------------
/installations/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.7.1
2 | asn1crypto==0.24.0
3 | astor==0.7.1
4 | attrs==19.1.0
5 | backcall==0.1.0
6 | biopython==1.73
7 | bleach==1.5.0
8 | boto==2.49.0
9 | boto3==1.9.141
10 | botocore==1.12.141
11 | bz2file==0.98
12 | certifi==2019.3.9
13 | cffi==1.12.3
14 | chardet==3.0.4
15 | cryptography==2.6.1
16 | cycler==0.10.0
17 | decorator==4.4.0
18 | defusedxml==0.5.0
19 | docutils==0.14
20 | entrypoints==0.3
21 | gast==0.2.2
22 | gensim==3.7.1
23 | grpcio==1.16.1
24 | h5py==2.9.0
25 | html5lib==0.9999999
26 | idna==2.8
27 | ipykernel==5.1.0
28 | ipython==7.5.0
29 | ipython-genutils==0.2.0
30 | ipywidgets==7.4.2
31 | jedi==0.13.3
32 | Jinja2==2.10.1
33 | jmespath==0.9.4
34 | jsonschema==3.0.1
35 | jupyter-client==5.2.4
36 | jupyter-console==6.0.0
37 | jupyter-core==4.4.0
38 | Keras==2.2.4
39 | Keras-Applications==1.0.7
40 | keras-multi-head==0.19.0
41 | keras-pos-embd==0.10.0
42 | Keras-Preprocessing==1.0.9
43 | keras-self-attention==0.41.0
44 | kiwisolver==1.1.0
45 | Mako==1.0.7
46 | Markdown==2.6.11
47 | MarkupSafe==1.1.1
48 | matplotlib==3.1.0
49 | mistune==0.8.4
50 | mock==3.0.3
51 | nbconvert==5.5.0
52 | nbformat==4.4.0
53 | notebook==5.7.8
54 | numpy==1.14.3
55 | pandas==0.24.2
56 | pandocfilters==1.4.2
57 | parso==0.4.0
58 | pexpect==4.7.0
59 | pickleshare==0.7.5
60 | prometheus-client==0.6.0
61 | prompt-toolkit==2.0.9
62 | protobuf==3.7.1
63 | ptyprocess==0.6.0
64 | pycparser==2.19
65 | Pygments==2.3.1
66 | pygpu==0.7.6
67 | pyOpenSSL==19.0.0
68 | pyparsing==2.4.0
69 | pyrsistent==0.15.1
70 | PySocks==1.6.8
71 | python-dateutil==2.8.0
72 | pytz==2019.1
73 | PyYAML==5.1
74 | pyzmq==18.0.1
75 | qtconsole==4.4.3
76 | requests==2.21.0
77 | s3transfer==0.2.0
78 | scikit-learn==0.20.3
79 | scipy==1.2.1
80 | seaborn==0.9.0
81 | Send2Trash==1.5.0
82 | six==1.12.0
83 | smart-open==1.8.3
84 | tensorboard==1.10.0
85 | tensorflow==1.10.0
86 | tensorflow-estimator==1.13.0
87 | tensorflow-gpu==1.10.0
88 | tensorflow-hub==0.4.0
89 | termcolor==1.1.0
90 | terminado==0.8.2
91 | testpath==0.4.2
92 | Theano==1.0.3
93 | tornado==6.0.2
94 | tqdm==4.31.1
95 | traitlets==4.3.2
96 | urllib3==1.24.2
97 | wcwidth==0.1.7
98 | webencodings==0.5.1
99 | Werkzeug==0.15.2
100 | widgetsnbextension==3.4.2
101 |
--------------------------------------------------------------------------------
/layers/crf.py:
--------------------------------------------------------------------------------
1 |
2 | # -*- coding: utf-8 -*-
3 | from __future__ import absolute_import
4 |
5 | """
6 | Author: Philipp Gross, https://github.com/phipleg/keras/blob/crf/keras/layers/crf.py
7 | ==== We performed slight modifications
8 | """
9 |
10 | from keras import backend as K
11 | from keras import initializers, regularizers, constraints
12 | from keras.engine import Layer, InputSpec
13 |
14 | class ChainCRF(Layer):
15 |
16 | """A Linear Chain Conditional Random Field output layer.
17 | It carries the loss function and its weights for computing
18 | the global tag sequence scores. While training it acts as
19 | the identity function that passes the inputs to the subsequently
20 | used loss function. While testing it applies Viterbi decoding
21 | and returns the best scoring tag sequence as one-hot encoded vectors.
22 | # Arguments
23 | init: weight initialization function for chain energies U.
24 | Can be the name of an existing function (str),
25 | or a Theano function (see: [initializers](../initializers.md)).
26 | U_regularizer: instance of [WeightRegularizer](../regularizers.md)
27 | (eg. L1 or L2 regularization), applied to the transition weight matrix.
28 | b_start_regularizer: instance of [WeightRegularizer](../regularizers.md),
29 | applied to the start bias b.
30 | b_end_regularizer: instance of [WeightRegularizer](../regularizers.md)
31 | module, applied to the end bias b.
32 | b_start_constraint: instance of the [constraints](../constraints.md)
33 | module, applied to the start bias b.
34 | b_end_constraint: instance of the [constraints](../constraints.md)
35 | module, applied to the end bias b.
36 | weights: list of Numpy arrays for initializing [U, b_start, b_end].
37 | Thus it should be a list of 3 elements of shape
38 | [(n_classes, n_classes), (n_classes, ), (n_classes, )]
39 | # Input shape
40 | 3D tensor with shape `(nb_samples, timesteps, nb_classes)`, where
41 | ´timesteps >= 2`and `nb_classes >= 2`.
42 | # Output shape
43 | Same shape as input.
44 | # Masking
45 | This layer supports masking for input sequences of variable length.
46 | # Example
47 | ```python
48 | # As the last layer of sequential layer with
49 | # model.output_shape == (None, timesteps, nb_classes)
50 | crf = ChainCRF()
51 | model.add(crf)
52 | # now: model.output_shape == (None, timesteps, nb_classes)
53 | # Compile model with chain crf loss (and one-hot encoded labels) and accuracy
54 | model.compile(loss=crf.loss, optimizer='sgd', metrics=['accuracy'])
55 | # Alternatively, compile model with sparsely encoded labels and sparse accuracy:
56 | model.compile(loss=crf.sparse_loss, optimizer='sgd', metrics=['sparse_categorical_accuracy'])
57 | ```
58 | # Gotchas
59 | ## Model loading
60 | When you want to load a saved model that has a crf output, then loading
61 | the model with 'keras.models.load_model' won't work properly because
62 | the reference of the loss function to the transition parameters is lost. To
63 | fix this, you need to use the parameter 'custom_objects' as follows:
64 | ```python
65 | from keras.layer.crf import create_custom_objects:
66 | model = keras.models.load_model(filename, custom_objects=create_custom_objects())
67 | ```
68 | ## Temporal sample weights
69 | Given a ChainCRF instance crf both loss functions, crf.loss and crf.sparse_loss
70 | return a tensor of shape (batch_size, 1) and not (batch_size, maxlen).
71 | that sample weighting in temporal mode.
72 | """
73 |
74 | def __init__(self, init='glorot_uniform',
75 | U_regularizer=None,
76 | b_start_regularizer=None,
77 | b_end_regularizer=None,
78 | U_constraint=None,
79 | b_start_constraint=None,
80 | b_end_constraint=None,
81 | weights=None,
82 | **kwargs):
83 | super(ChainCRF, self).__init__(**kwargs)
84 | self.init = initializers.get(init)
85 | self.U_regularizer = regularizers.get(U_regularizer)
86 | self.b_start_regularizer = regularizers.get(b_start_regularizer)
87 | self.b_end_regularizer = regularizers.get(b_end_regularizer)
88 | self.U_constraint = constraints.get(U_constraint)
89 | self.b_start_constraint = constraints.get(b_start_constraint)
90 | self.b_end_constraint = constraints.get(b_end_constraint)
91 |
92 | self.initial_weights = weights
93 |
94 | self.supports_masking = True
95 | self.uses_learning_phase = True
96 | self.input_spec = [InputSpec(ndim=3)]
97 |
98 | def compute_output_shape(self, input_shape):
99 | assert input_shape and len(input_shape) == 3
100 | return (input_shape[0], input_shape[1], input_shape[2])
101 |
102 | def compute_mask(self, input, mask=None):
103 | if mask is not None:
104 | return K.any(mask, axis=1)
105 | return mask
106 |
107 | def _fetch_mask(self):
108 | mask = None
109 | if self._inbound_nodes:
110 | mask = self._inbound_nodes[0].input_masks[0]
111 | return mask
112 |
113 | def build(self, input_shape):
114 | assert len(input_shape) == 3
115 | n_classes = input_shape[2]
116 | n_steps = input_shape[1]
117 | assert n_steps is None or n_steps >= 2
118 | self.input_spec = [InputSpec(dtype=K.floatx(),
119 | shape=(None, n_steps, n_classes))]
120 |
121 | self.U = self.add_weight((n_classes, n_classes),
122 | initializer=self.init,
123 | name='U',
124 | regularizer=self.U_regularizer,
125 | constraint=self.U_constraint)
126 |
127 | self.b_start = self.add_weight((n_classes,),
128 | initializer='zero',
129 | name='b_start',
130 | regularizer=self.b_start_regularizer,
131 | constraint=self.b_start_constraint)
132 |
133 | self.b_end = self.add_weight((n_classes,),
134 | initializer='zero',
135 | name='b_end',
136 | regularizer=self.b_end_regularizer,
137 | constraint=self.b_end_constraint)
138 |
139 | if self.initial_weights is not None:
140 | self.set_weights(self.initial_weights)
141 | del self.initial_weights
142 |
143 | self.built = True
144 |
145 | def call(self, x, mask=None):
146 | y_pred = viterbi_decode(x, self.U, self.b_start, self.b_end, mask)
147 | nb_classes = self.input_spec[0].shape[2]
148 | y_pred_one_hot = K.one_hot(y_pred, nb_classes)
149 | return K.in_train_phase(x, y_pred_one_hot)
150 |
151 | def loss(self, y_true, y_pred):
152 | """Linear Chain Conditional Random Field loss function.
153 | """
154 | mask = self._fetch_mask()
155 | return chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask)
156 |
157 | def sparse_loss(self, y_true, y_pred):
158 | """Linear Chain Conditional Random Field loss function with sparse
159 | tag sequences.
160 | """
161 | y_true = K.cast(y_true, 'int32')
162 | y_true = K.squeeze(y_true, 2)
163 | mask = self._fetch_mask()
164 | return sparse_chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask)
165 |
166 | def get_config(self):
167 | config = {
168 | 'init': initializers.serialize(self.init),
169 | 'U_regularizer': regularizers.serialize(self.U_regularizer),
170 | 'b_start_regularizer': regularizers.serialize(self.b_start_regularizer),
171 | 'b_end_regularizer': regularizers.serialize(self.b_end_regularizer),
172 | 'U_constraint': constraints.serialize(self.U_constraint),
173 | 'b_start_constraint': constraints.serialize(self.b_start_constraint),
174 | 'b_end_constraint': constraints.serialize(self.b_end_constraint)
175 | }
176 | base_config = super(ChainCRF, self).get_config()
177 | return dict(list(base_config.items()) + list(config.items()))
178 |
179 |
180 |
181 | def path_energy(y, x, U, b_start=None, b_end=None, mask=None):
182 | """Calculates the energy of a tag path y for a given input x (with mask),
183 | transition energies U and boundary energies b_start, b_end."""
184 | x = add_boundary_energy(x, b_start, b_end, mask)
185 | return path_energy0(y, x, U, mask)
186 |
187 |
188 | def path_energy0(y, x, U, mask=None):
189 | """Path energy without boundary potential handling."""
190 | n_classes = K.shape(x)[2]
191 | y_one_hot = K.one_hot(y, n_classes)
192 |
193 | # Tag path energy
194 | energy = K.sum(x * y_one_hot, 2)
195 | energy = K.sum(energy, 1)
196 |
197 | # Transition energy
198 | y_t = y[:, :-1]
199 | y_tp1 = y[:, 1:]
200 | U_flat = K.reshape(U, [-1])
201 | # Convert 2-dim indices (y_t, y_tp1) of U to 1-dim indices of U_flat:
202 | flat_indices = y_t * n_classes + y_tp1
203 | U_y_t_tp1 = K.gather(U_flat, flat_indices)
204 |
205 | if mask is not None:
206 | mask = K.cast(mask, K.floatx())
207 | y_t_mask = mask[:, :-1]
208 | y_tp1_mask = mask[:, 1:]
209 | U_y_t_tp1 *= y_t_mask * y_tp1_mask
210 |
211 | energy += K.sum(U_y_t_tp1, axis=1)
212 |
213 | return energy
214 |
215 |
216 | def sparse_chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None):
217 | """Given the true sparsely encoded tag sequence y, input x (with mask),
218 | transition energies U, boundary energies b_start and b_end, it computes
219 | the loss function of a Linear Chain Conditional Random Field:
220 | loss(y, x) = NNL(P(y|x)), where P(y|x) = exp(E(y, x)) / Z.
221 | So, loss(y, x) = - E(y, x) + log(Z)
222 | Here, E(y, x) is the tag path energy, and Z is the normalization constant.
223 | The values log(Z) is also called free energy.
224 | """
225 | x = add_boundary_energy(x, b_start, b_end, mask)
226 | energy = path_energy0(y, x, U, mask)
227 | energy -= free_energy0(x, U, mask)
228 | return K.expand_dims(-energy, -1)
229 |
230 |
231 | def chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None):
232 | """Variant of sparse_chain_crf_loss but with one-hot encoded tags y."""
233 | y_sparse = K.argmax(y, -1)
234 | y_sparse = K.cast(y_sparse, 'int32')
235 | return sparse_chain_crf_loss(y_sparse, x, U, b_start, b_end, mask)
236 |
237 |
238 | def add_boundary_energy(x, b_start=None, b_end=None, mask=None):
239 | """Given the observations x, it adds the start boundary energy b_start (resp.
240 | end boundary energy b_end on the start (resp. end) elements and multiplies
241 | the mask."""
242 | if mask is None:
243 | if b_start is not None:
244 | x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1)
245 | if b_end is not None:
246 | x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1)
247 | else:
248 | mask = K.cast(mask, K.floatx())
249 | mask = K.expand_dims(mask, 2)
250 | x *= mask
251 | if b_start is not None:
252 | mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1)
253 | start_mask = K.cast(K.greater(mask, mask_r), K.floatx())
254 | x = x + start_mask * b_start
255 | if b_end is not None:
256 | mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1)
257 | end_mask = K.cast(K.greater(mask, mask_l), K.floatx())
258 | x = x + end_mask * b_end
259 | return x
260 |
261 |
262 | def viterbi_decode(x, U, b_start=None, b_end=None, mask=None):
263 | """Computes the best tag sequence y for a given input x, i.e. the one that
264 | maximizes the value of path_energy."""
265 | x = add_boundary_energy(x, b_start, b_end, mask)
266 |
267 | alpha_0 = x[:, 0, :]
268 | gamma_0 = K.zeros_like(alpha_0)
269 | initial_states = [gamma_0, alpha_0]
270 | _, gamma = _forward(x,
271 | lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()), K.max(B, axis=1)],
272 | initial_states,
273 | U,
274 | mask)
275 | y = _backward(gamma, mask)
276 | return y
277 |
278 |
279 | def free_energy(x, U, b_start=None, b_end=None, mask=None):
280 | """Computes efficiently the sum of all path energies for input x, when
281 | runs over all possible tag sequences."""
282 | x = add_boundary_energy(x, b_start, b_end, mask)
283 | return free_energy0(x, U, mask)
284 |
285 |
286 | def free_energy0(x, U, mask=None):
287 | """Free energy without boundary potential handling."""
288 | initial_states = [x[:, 0, :]]
289 | last_alpha, _ = _forward(x,
290 | lambda B: [K.logsumexp(B, axis=1)],
291 | initial_states,
292 | U,
293 | mask)
294 | return last_alpha[:, 0]
295 |
296 |
297 | def _forward(x, reduce_step, initial_states, U, mask=None):
298 | """Forward recurrence of the linear chain crf."""
299 |
300 | def _forward_step(energy_matrix_t, states):
301 | alpha_tm1 = states[-1]
302 | new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t)
303 | return new_states[0], new_states
304 |
305 | U_shared = K.expand_dims(K.expand_dims(U, 0), 0)
306 |
307 | if mask is not None:
308 | mask = K.cast(mask, K.floatx())
309 | mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3)
310 | U_shared = U_shared * mask_U
311 |
312 | inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared
313 | inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1)
314 |
315 | last, values, _ = K.rnn(_forward_step, inputs, initial_states)
316 | return last, values
317 |
318 |
319 | def batch_gather(reference, indices):
320 | ref_shape = K.shape(reference)
321 | batch_size = ref_shape[0]
322 | n_classes = ref_shape[1]
323 | flat_indices = K.arange(0, batch_size) * n_classes + K.flatten(indices)
324 | return K.gather(K.flatten(reference), flat_indices)
325 |
326 |
327 | def _backward(gamma, mask):
328 | """Backward recurrence of the linear chain crf."""
329 | gamma = K.cast(gamma, 'int32')
330 |
331 | def _backward_step(gamma_t, states):
332 | y_tm1 = K.squeeze(states[0], 0)
333 | y_t = batch_gather(gamma_t, y_tm1)
334 | return y_t, [K.expand_dims(y_t, 0)]
335 |
336 | initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]
337 | _, y_rev, _ = K.rnn(_backward_step,
338 | gamma,
339 | initial_states,
340 | go_backwards=True)
341 | y = K.reverse(y_rev, 1)
342 |
343 | if mask is not None:
344 | mask = K.cast(mask, dtype='int32')
345 | # mask output
346 | y *= mask
347 | # set masked values to -1
348 | y += -(1 - mask)
349 | return y
350 |
351 | def create_custom_objects():
352 | """Returns the custom objects, needed for loading a persisted model."""
353 | instanceHolder = {'instance': None}
354 |
355 | class ClassWrapper(ChainCRF):
356 | def __init__(self, *args, **kwargs):
357 | instanceHolder['instance'] = self
358 | super(ClassWrapper, self).__init__(*args, **kwargs)
359 |
360 | def loss(*args):
361 | method = getattr(instanceHolder['instance'], 'loss')
362 | return method(*args)
363 |
364 | def sparse_loss(*args):
365 | method = getattr(instanceHolder['instance'], 'sparse_loss')
366 | return method(*args)
367 |
368 | return {'ChainCRF': ClassWrapper, 'loss': loss, 'sparse_loss': sparse_loss}
369 |
--------------------------------------------------------------------------------
/layers/utility.py:
--------------------------------------------------------------------------------
1 | from keras import regularizers
2 | from keras.layers import Lambda, concatenate, Conv1D
3 |
4 |
5 | def slice_tensor(dimension, start, end, name='sliced_layer'):
6 | '''
7 | :param dimension:
8 | :param start:
9 | :param end:
10 | :return:
11 | '''
12 |
13 | # Crops (or slices) a Tensor on a given dimension from start to end
14 | # example : to crop tensor x[:, :, 5:10]
15 | # call slice(2, 5, 10) as you want to crop on the second dimension
16 | def func(x):
17 | if dimension == 0:
18 | return x[start: end]
19 | if dimension == 1:
20 | return x[:, start: end]
21 | if dimension == 2:
22 | return x[:, :, start: end]
23 | if dimension == 3:
24 | return x[:, :, :, start: end]
25 | if dimension == 4:
26 | return x[:, :, :, :, start: end]
27 |
28 | return Lambda(func, name=name)
29 |
30 |
31 | def multiscale_CNN(input_layer, gating_layer, filter_size, convs, kernel_regularizer=0.00005):
32 | '''
33 | :param input_layer:
34 | :param gating_layer:
35 | :param filter_size:
36 | :param convs:
37 | :param kernel_regularizer:
38 | :return:
39 | '''
40 | z_t = gating_layer(input_layer)
41 | conclayers = []
42 | for idx, conv in enumerate(convs):
43 | conclayers.append(Conv1D(filter_size, conv, activation="relu", padding="same",
44 | kernel_regularizer=regularizers.l2(kernel_regularizer))(input_layer))
45 | conc = concatenate(conclayers)
46 | output = Lambda(lambda a: z_t * a[0] + (1 - z_t) * a[1])([input_layer, conc])
47 | return output
48 |
--------------------------------------------------------------------------------
/models/a_cnn_bilstm.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 |
5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
6 | parentdir = os.path.dirname(currentdir)
7 | sys.path.insert(0, parentdir)
8 |
9 | import numpy as np
10 | from keras.models import Model
11 | from keras.layers import Dense, CuDNNLSTM, Bidirectional, Input, Dropout, concatenate, Conv1D, \
12 | BatchNormalization
13 | from keras.layers.wrappers import TimeDistributed
14 | from layers.utility import slice_tensor
15 | from keras import optimizers
16 | from keras import regularizers
17 |
18 | np.random.seed(0)
19 |
20 |
21 | def model_a_cnn_bilstm(n_classes, convs=[3, 5, 7], dense_size=200, lstm_size=400, dropout_rate=0.5,
22 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001):
23 | '''
24 | :param n_classes:
25 | :param convs:
26 | :param dense_size:
27 | :param lstm_size:
28 | :param dropout_rate:
29 | :param features_to_use:
30 | :param filter_size:
31 | :return:
32 | '''
33 | visible = Input(shape=(None, 408))
34 |
35 | # slice different feature types
36 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible)
37 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible)
38 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible)
39 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible)
40 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible)
41 |
42 | # create input based-on selected features
43 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo,
44 | 'biophysical': biophysical}
45 | features = []
46 | for feature in features_to_use:
47 | features.append(input_dict[feature])
48 |
49 | ## batch normalization on the input features
50 | if len(features_to_use) == 1:
51 | conclayers = features
52 | input = BatchNormalization(name='batchnorm_input')(features[0])
53 | else:
54 | input = BatchNormalization(name='batchnorm_input')(concatenate(features))
55 | conclayers = [input]
56 |
57 | # performing the conlvolutions
58 | for idx, conv in enumerate(convs):
59 | idx = str(idx + 1)
60 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)(
61 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx,
62 | kernel_regularizer=regularizers.l2(0.001))(input)))
63 | conc = concatenate(conclayers)
64 |
65 | # Dropout and Dense Layer before LSTM
66 | if dropout_rate > 0:
67 | drop_before = Dropout(dropout_rate, name='dropoutonconvs')(conc)
68 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(drop_before)
69 | else:
70 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(conc)
71 |
72 | # Batch normalize the results of dropout
73 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp)
74 |
75 | # LSTM
76 | lstm = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True, name='bilstm'))(dense_convinpn)
77 | drop_after_lstm = Dropout(dropout_rate)(lstm)
78 | dense_out = Dense(dense_size, activation='relu')(drop_after_lstm)
79 |
80 | # Labeling layer layer
81 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(dense_out)
82 | model = Model(inputs=visible, outputs=timedist)
83 | adam = optimizers.Adam(lr=lr)
84 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'],
85 | sample_weight_mode='temporal')
86 |
87 | # print model
88 | print(model.summary())
89 | return model, 'model_a_cnn_bilstm#' + '#'.join(features_to_use) + '@conv' + '_'.join(
90 | [str(c) for c in convs]) + '@dense_' + str(dense_size) + '@lstm' + str(lstm_size) + '@drop_rate' + str(
91 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr)
92 |
--------------------------------------------------------------------------------
/models/b_cnn_bilstm_highway.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 |
5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
6 | parentdir = os.path.dirname(currentdir)
7 | sys.path.insert(0, parentdir)
8 | import numpy as np
9 | from keras.models import Model
10 | from keras.layers import Dense, CuDNNLSTM, Bidirectional, Input, Dropout, concatenate, Conv1D, \
11 | BatchNormalization
12 | from keras.layers.wrappers import TimeDistributed
13 | from layers.crf import ChainCRF
14 | from layers.utility import slice_tensor
15 | from keras import optimizers
16 | from keras import regularizers
17 |
18 | np.random.seed(0)
19 |
20 |
21 | def model_b_cnn_bilstm_highway(n_classes, convs=[3, 5, 7], dense_size=200, lstm_size=400, dropout_rate=0.5,
22 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001,
23 | use_CRF=False):
24 | '''
25 | :param n_classes:
26 | :param convs:
27 | :param dense_size:
28 | :param lstm_size:
29 | :param dropout_rate:
30 | :param features_to_use:
31 | :param filter_size:
32 | :param lr:
33 | :param use_CRF:
34 | :return:
35 | '''
36 |
37 | visible = Input(shape=(None, 408))
38 |
39 | # slice different feature types
40 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible)
41 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible)
42 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible)
43 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible)
44 | # we need batchnorm for the highway
45 | batchnorm_profile = BatchNormalization(name='batchnormseqprof')(pssm)
46 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible)
47 |
48 | # create input based-on selected features
49 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo,
50 | 'biophysical': biophysical}
51 | features = []
52 | for feature in features_to_use:
53 | features.append(input_dict[feature])
54 |
55 | ## batch normalization on the input features
56 | if len(features_to_use) == 1:
57 | conclayers = features
58 | input = BatchNormalization(name='batchnorm_input')(features[0])
59 | else:
60 | input = BatchNormalization(name='batchnorm_input')(concatenate(features))
61 | conclayers = [input]
62 |
63 | # performing the conlvolutions
64 | for idx, conv in enumerate(convs):
65 | idx = str(idx + 1)
66 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)(
67 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx,
68 | kernel_regularizer=regularizers.l2(0.001))(input)))
69 | conc = concatenate(conclayers)
70 |
71 | # Dropout and Dense Layer before LSTM
72 | if dropout_rate > 0:
73 | drop_before = Dropout(dropout_rate, name='dropoutonconvs')(conc)
74 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(drop_before)
75 | else:
76 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(conc)
77 |
78 | # Batch normalize the results of dropout
79 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp)
80 |
81 | # LSTM
82 | lstm = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True, name='bilstm'))(dense_convinpn)
83 | drop_after_lstm = Dropout(dropout_rate)(lstm)
84 |
85 | # Highway
86 | dense_out = Dense(dense_size, activation='relu')(drop_after_lstm)
87 | highway_layer = concatenate([dense_out, batchnorm_profile])
88 | highway_out = Dense(dense_size, activation='relu')(highway_layer)
89 |
90 | if use_CRF:
91 | timedist = TimeDistributed(Dense(n_classes, name='crf_in'))(highway_out)
92 | crf = ChainCRF(name="crf1")
93 | crf_output = crf(timedist)
94 | model = Model(inputs=visible, outputs=crf_output)
95 | adam = optimizers.Adam(lr=lr)
96 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics=['accuracy'], sample_weight_mode='temporal')
97 | else:
98 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(highway_out)
99 | model = Model(inputs=visible, outputs=timedist)
100 | adam = optimizers.Adam(lr=lr)
101 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'],
102 | sample_weight_mode='temporal')
103 | print(model.summary())
104 | return model, 'model_b_cnn_bilstm_highway#' + '#'.join(features_to_use) + '@conv' + '_'.join(
105 | [str(c) for c in convs]) + '@dense_' + str(dense_size) + '@lstm' + str(lstm_size) + '@droplstm' + str(
106 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) + '@crf_' + str(use_CRF)
107 |
--------------------------------------------------------------------------------
/models/c_cnn_bilstm_crf.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 |
5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
6 | parentdir = os.path.dirname(currentdir)
7 | sys.path.insert(0,parentdir)
8 |
9 | import numpy as np
10 | from keras.models import Model
11 | from keras.layers import Dense, CuDNNLSTM, Bidirectional, Input, Dropout, concatenate, Conv1D, \
12 | BatchNormalization
13 | from keras.layers.wrappers import TimeDistributed
14 | from layers.crf import ChainCRF
15 | from layers.utility import slice_tensor
16 | from keras import optimizers
17 | from keras import regularizers
18 | np.random.seed(0)
19 |
20 |
21 |
22 | def model_c_cnn_bilstm_crf(n_classes, convs=[3, 5, 7], dense_size=200, lstm_size=400, dropout_rate=0.5,
23 | features_to_use=['onehot', 'pssm'], filter_size=256, CRF_input_dim=200, lr=0.001):
24 | '''
25 | :param n_classes:
26 | :param convs:
27 | :param dense_size:
28 | :param lstm_size:
29 | :param dropout_rate:
30 | :param features_to_use:
31 | :param filter_size:
32 | :return:
33 | '''
34 | visible = Input(shape=(None, 408))
35 |
36 | # slice different feature types
37 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible)
38 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible)
39 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible)
40 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible)
41 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible)
42 |
43 | # create input based-on selected features
44 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo,
45 | 'biophysical': biophysical}
46 | features = []
47 | for feature in features_to_use:
48 | features.append(input_dict[feature])
49 |
50 | ## batch normalization on the input features
51 | if len(features_to_use) == 1:
52 | conclayers = features
53 | input = BatchNormalization(name='batchnorm_input')(features[0])
54 | else:
55 | input = BatchNormalization(name='batchnorm_input')(concatenate(features))
56 | conclayers = [input]
57 |
58 | # performing the conlvolutions
59 | for idx, conv in enumerate(convs):
60 | idx = str(idx + 1)
61 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)(
62 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx,
63 | kernel_regularizer=regularizers.l2(0.001))(input)))
64 | conc = concatenate(conclayers)
65 |
66 | # Dropout and Dense Layer before LSTM
67 | if dropout_rate > 0:
68 | drop_before = Dropout(dropout_rate, name='dropoutonconvs')(conc)
69 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(drop_before)
70 | else:
71 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(conc)
72 |
73 | # Batch normalize the results of dropout
74 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp)
75 |
76 | # LSTM
77 | lstm = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True, name='bilstm'))(dense_convinpn)
78 | drop_after_lstm = Dropout(dropout_rate)(lstm)
79 | dense_out = Dense(CRF_input_dim, activation='relu')(drop_after_lstm)
80 |
81 | timedist = TimeDistributed(Dense(n_classes, name='crf_in'))(dense_out)
82 | crf = ChainCRF(name="crf1")
83 | crf_output = crf(timedist)
84 | model = Model(inputs=visible, outputs=crf_output)
85 | adam=optimizers.Adam(lr=lr)
86 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics= ['accuracy'], sample_weight_mode='temporal')
87 | print(model.summary())
88 | return model, 'model_c_cnn_bilstm_CRF#'+'#'.join(features_to_use)+'@conv'+'_'.join([str(c) for c in convs])+'@dense_'+str(dense_size)+'@lstm'+str(lstm_size)+'@droplstm'+str(dropout_rate)+'@filtersize_'+str(filter_size)+ '@lr_' + str(lr)
89 |
90 |
--------------------------------------------------------------------------------
/models/d_cnn_bilstm_attention.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 |
5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
6 | parentdir = os.path.dirname(currentdir)
7 | sys.path.insert(0, parentdir)
8 |
9 | import numpy as np
10 |
11 | np.random.seed(7)
12 | from keras.models import Model
13 | from keras.layers import Dense, CuDNNLSTM, Bidirectional, Input, Dropout, concatenate, Conv1D, \
14 | BatchNormalization
15 | from keras.layers.wrappers import TimeDistributed
16 | from layers.crf import ChainCRF
17 | from layers.utility import slice_tensor
18 | from keras import optimizers
19 | from keras import regularizers
20 | from keras_self_attention import SeqSelfAttention
21 |
22 |
23 | def model_d_cnn_bilstm_attention(n_classes, convs=[3, 5, 7], dense_size=200, lstm_size=400, dropout_rate=0.5,
24 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001,
25 | use_CRF=False, attention_units=32, attention_type='additive'):
26 | '''
27 | :param n_classes:
28 | :param convs:
29 | :param dense_size:
30 | :param lstm_size:
31 | :param dropout_rate:
32 | :param features_to_use:
33 | :param filter_size:
34 | :param lr:
35 | :param use_CRF:
36 | :return:
37 | '''
38 |
39 | visible = Input(shape=(None, 408))
40 |
41 | # slice different feature types
42 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible)
43 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible)
44 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible)
45 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible)
46 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible)
47 |
48 | # create input based-on selected features
49 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo,
50 | 'biophysical': biophysical}
51 | features = []
52 | for feature in features_to_use:
53 | features.append(input_dict[feature])
54 |
55 | ## batch normalization on the input features
56 | if len(features_to_use) == 1:
57 | conclayers = features
58 | input = BatchNormalization(name='batchnorm_input')(features[0])
59 | else:
60 | input = BatchNormalization(name='batchnorm_input')(concatenate(features))
61 | conclayers = [input]
62 |
63 | # performing the conlvolutions
64 | for idx, conv in enumerate(convs):
65 | idx = str(idx + 1)
66 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)(
67 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx,
68 | kernel_regularizer=regularizers.l2(0.001))(input)))
69 | conc = concatenate(conclayers)
70 |
71 | # Dropout and Dense Layer before LSTM
72 | if dropout_rate > 0:
73 | drop_before = Dropout(dropout_rate, name='dropoutonconvs')(conc)
74 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(drop_before)
75 | else:
76 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(conc)
77 |
78 | # Batch normalize the results of dropout
79 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp)
80 |
81 | # LSTM
82 | lstm = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True, name='bilstm'))(dense_convinpn)
83 | drop_after_lstm = Dropout(dropout_rate)(lstm)
84 | lstm_out = Dense(dense_size, activation='relu')(drop_after_lstm)
85 |
86 | # Attention layer
87 | seq_representation = SeqSelfAttention(units=attention_units, attention_type=attention_type,
88 | name='Attention')(lstm_out)
89 | if use_CRF:
90 | timedist = TimeDistributed(Dense(n_classes, name='timedist'))(seq_representation)
91 | crf = ChainCRF(name="crf1")
92 | crf_output = crf(timedist)
93 | model = Model(inputs=visible, outputs=crf_output)
94 | adam = optimizers.Adam(lr=0.001)
95 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics=['accuracy'], sample_weight_mode='temporal')
96 | else:
97 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(seq_representation)
98 | model = Model(inputs=visible, outputs=timedist)
99 | adam = optimizers.Adam(lr=0.001)
100 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'],
101 | sample_weight_mode='temporal')
102 | print(model.summary())
103 | return model, 'model_d_cnn_bilstm_attention#' + '#'.join(features_to_use) + '@conv' + '_'.join(
104 | [str(c) for c in convs]) + '@dense_' + str(dense_size) + '@lstm' + str(lstm_size) + '@droplstm' + str(
105 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) + '@use_CRF_' + str(
106 | use_CRF) + '@attention_units_' + str(attention_units) + '@attention_type_' + str(attention_type)
107 |
--------------------------------------------------------------------------------
/models/e_cnn.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 |
5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
6 | parentdir = os.path.dirname(currentdir)
7 | sys.path.insert(0, parentdir)
8 |
9 | import numpy as np
10 |
11 | np.random.seed(7)
12 | from keras.models import Model
13 | from keras.layers import Dense, Input, Dropout, concatenate, Conv1D, \
14 | BatchNormalization
15 | from keras.layers.wrappers import TimeDistributed
16 | from layers.crf import ChainCRF
17 | from layers.utility import slice_tensor
18 | from keras import optimizers
19 | from keras import regularizers
20 |
21 |
22 | def model_e_cnn(n_classes, convs=[3, 5, 7], dense_size=200, dropout_rate=0.5,
23 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001,
24 | use_CRF=False):
25 | '''
26 | :param n_classes:
27 | :param convs:
28 | :param dense_size:
29 | :param dropout_rate:
30 | :param features_to_use:
31 | :param filter_size:
32 | :param lr:
33 | :param use_CRF:
34 | :return:
35 | '''
36 | visible = Input(shape=(None, 408))
37 | # slice different feature types
38 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible)
39 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible)
40 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible)
41 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible)
42 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible)
43 |
44 | # create input based-on selected features
45 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo,
46 | 'biophysical': biophysical}
47 | features = []
48 | for feature in features_to_use:
49 | features.append(input_dict[feature])
50 |
51 | ## batch normalization on the input features
52 | if len(features_to_use) == 1:
53 | conclayers = features
54 | input = BatchNormalization(name='batchnorm_input')(features[0])
55 | else:
56 | input = BatchNormalization(name='batchnorm_input')(concatenate(features))
57 | conclayers = [input]
58 |
59 | # performing the conlvolutions
60 | for idx, conv in enumerate(convs):
61 | idx = str(idx + 1)
62 | conclayers.append(BatchNormalization(name='batch_norm_conv' + idx)(
63 | Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx,
64 | kernel_regularizer=regularizers.l2(0.001))(input)))
65 | conc = concatenate(conclayers)
66 |
67 | dropped = Dropout(dropout_rate, name='dropoutonconvs')(conc)
68 | dense_convinp = Dense(dense_size, activation='relu', name='denseonconvs')(dropped)
69 | dense_convinpn = BatchNormalization(name='batch_norm_dense')(dense_convinp)
70 |
71 | if use_CRF:
72 | timedist = TimeDistributed(Dense(n_classes, name='dense'))(dense_convinpn)
73 | crf = ChainCRF(name="crf1")
74 | crf_output = crf(timedist)
75 | model = Model(inputs=visible, outputs=crf_output)
76 | adam = optimizers.Adam(lr=lr)
77 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics=['accuracy'], sample_weight_mode='temporal')
78 | else:
79 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(dense_convinpn)
80 | model = Model(inputs=visible, outputs=timedist)
81 | adam = optimizers.Adam(lr=lr)
82 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'],
83 | sample_weight_mode='temporal')
84 |
85 | print(model.summary())
86 | return model, 'model_e_cnn#' + '#'.join(features_to_use) + '@conv' + '_'.join(
87 | [str(c) for c in convs]) + '@dense_' + str(dense_size) + '@droplstm' + str(
88 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) + '@use_CRF_' + str(
89 | use_CRF)
90 |
--------------------------------------------------------------------------------
/models/f_multiscale_cnn.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 |
5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
6 | parentdir = os.path.dirname(currentdir)
7 | sys.path.insert(0, parentdir)
8 |
9 | import numpy as np
10 |
11 | np.random.seed(7)
12 | from keras.models import Model
13 | from keras.layers import Dense, Input, Dropout, concatenate, Conv1D, \
14 | BatchNormalization
15 | from keras.layers.wrappers import TimeDistributed
16 | from layers.crf import ChainCRF
17 | from layers.utility import slice_tensor, multiscale_CNN
18 | from keras import optimizers
19 | from keras import regularizers
20 |
21 |
22 |
23 | def model_f_multiscale_cnn(n_classes, convs=[3, 5, 7], dropout_rate=0.5,
24 | features_to_use=['onehot', 'pssm'], filter_size=256, lr=0.001, multiscalecnn_layers=3, cnn_regularizer=0.00005,
25 | use_CRF=False):
26 | '''
27 | :param n_classes:
28 | :param convs:
29 | :param dropout_rate:
30 | :param features_to_use:
31 | :param filter_size:
32 | :param lr:
33 | :param multicnn_layers:
34 | :param cnn_regularizer:
35 | :param use_CRF:
36 | :return:
37 | '''
38 | visible = Input(shape=(None, 408))
39 |
40 | # slice different feature types
41 | biophysical = slice_tensor(2, 0, 16, name='biophysicalfeatures')(visible)
42 | embedding = slice_tensor(2, 16, 66, name='skipgramembd')(visible)
43 | onehot = slice_tensor(2, 66, 87, name='onehot')(visible)
44 | pssm = slice_tensor(2, 87, 108, name='pssm')(visible)
45 | elmo = slice_tensor(2, 108, 408, name='elmo')(visible)
46 |
47 | input_dict = {'pssm': pssm, 'onehot': onehot, 'embedding': embedding, 'elmo': elmo,
48 | 'biophysical': biophysical}
49 |
50 | gating = Dense(len(convs) * filter_size, activation='sigmoid')
51 |
52 | # create input
53 | features = []
54 | for feature in features_to_use:
55 | features.append(input_dict[feature])
56 |
57 | if len(features_to_use) == 1:
58 | conclayers = features
59 | input = BatchNormalization(name='batchnorm_input')(features[0])
60 | else:
61 | input = BatchNormalization(name='batchnorm_input')(concatenate(features))
62 | conclayers = []
63 |
64 | # performing the conlvolutions
65 | for idx, conv in enumerate(convs):
66 | idx = str(idx + 1)
67 | conclayers.append(Conv1D(filter_size, conv, activation="relu", padding="same", name='conv' + idx,
68 | kernel_regularizer=regularizers.l2(cnn_regularizer))(input))
69 | current_multi_cnn_input = concatenate(conclayers)
70 |
71 | # Multiscale CNN application
72 | for layer_idx in range(multiscalecnn_layers-1):
73 | current_multi_cnn_output = multiscale_CNN(current_multi_cnn_input, gating, filter_size, convs, cnn_regularizer)
74 | current_multi_cnn_input = Dropout(dropout_rate)(current_multi_cnn_output)
75 | dense_out = Dense(len(convs) * filter_size, activation='relu')(current_multi_cnn_input)
76 |
77 | if use_CRF:
78 | timedist = TimeDistributed(Dense(n_classes, name='timedist'))(dense_out)
79 | crf = ChainCRF(name="crf1")
80 | crf_output = crf(timedist)
81 | model = Model(inputs=visible, outputs=crf_output)
82 | adam = optimizers.Adam(lr=lr)
83 | model.compile(loss=crf.loss, optimizer=adam, weighted_metrics=['accuracy'], sample_weight_mode='temporal')
84 | else:
85 | timedist = TimeDistributed(Dense(n_classes, activation='softmax'))(dense_out)
86 | model = Model(inputs=visible, outputs=timedist)
87 | adam = optimizers.Adam(lr=lr)
88 | model.compile(loss='categorical_crossentropy', optimizer=adam, weighted_metrics=['accuracy'],
89 | sample_weight_mode='temporal')
90 | print(model.summary())
91 | return model, 'model_f_multiscale_cnn#' + '#'.join(features_to_use) + '@conv' + '_'.join(
92 | [str(c) for c in convs]) + '@dropout_rate' + str(
93 | dropout_rate) + '@filtersize_' + str(filter_size) + '@lr_' + str(lr) + '@use_CRF_' + str(
94 | use_CRF) + '@multiscalecnn_layers' + str(multiscalecnn_layers) + '@cnn_regularizer' + str(cnn_regularizer)
95 |
--------------------------------------------------------------------------------
/sample_configs/model_a.yaml:
--------------------------------------------------------------------------------
1 | deep_learning_model: model_a_cnn_bilstm
2 | model_paramters:
3 | convs:
4 | - 3
5 | - 5
6 | - 7
7 | - 11
8 | - 21
9 | dense_size: 1000
10 | dropout_rate: 0.5
11 | features_to_use:
12 | - onehot
13 | - pssm
14 | filter_size: 256
15 | lr: 0.001
16 | lstm_size: 1000
17 | run_parameters:
18 | domain_name: cnnbilstm
19 | epochs: 100
20 | gpu: 1
21 | patience: 5
22 | setting_name: pssm_onehot
23 | test_batch_size: 100
24 | train_batch_size: 64
25 |
--------------------------------------------------------------------------------
/sample_configs/model_b.yaml:
--------------------------------------------------------------------------------
1 | deep_learning_model: model_b_cnn_bilstm_highway
2 | model_paramters:
3 | convs:
4 | - 3
5 | - 5
6 | - 7
7 | - 11
8 | - 21
9 | dense_size: 1000
10 | dropout_rate: 0.5
11 | features_to_use:
12 | - onehot
13 | - pssm
14 | filter_size: 256
15 | lr: 0.001
16 | lstm_size: 1000
17 | use_CRF: false
18 | run_parameters:
19 | domain_name: cnn_bilstm_highway
20 | epochs: 100
21 | gpu: 1
22 | patience: 5
23 | setting_name: pssm_onhot
24 | test_batch_size: 100
25 | train_batch_size: 64
26 |
--------------------------------------------------------------------------------
/sample_configs/model_c.yaml:
--------------------------------------------------------------------------------
1 | deep_learning_model: model_c_cnn_bilstm_crf
2 | model_paramters:
3 | CRF_input_dim: 200
4 | convs:
5 | - 3
6 | - 5
7 | - 7
8 | - 11
9 | - 21
10 | dense_size: 1000
11 | dropout_rate: 0.5
12 | features_to_use:
13 | - onehot
14 | - pssm
15 | filter_size: 256
16 | lr: 0.0005
17 | lstm_size: 1000
18 | run_parameters:
19 | domain_name: cnn_bilstm_crf
20 | epochs: 100
21 | gpu: 1
22 | patience: 10
23 | setting_name: pssm_onehot
24 | test_batch_size: 100
25 | train_batch_size: 64
26 |
--------------------------------------------------------------------------------
/sample_configs/model_d.yaml:
--------------------------------------------------------------------------------
1 | deep_learning_model: model_d_cnn_bilstm_attention
2 | model_paramters:
3 | attention_type: additive
4 | attention_units: 32
5 | convs:
6 | - 3
7 | - 5
8 | - 7
9 | - 11
10 | - 21
11 | dense_size: 1000
12 | dropout_rate: 0.5
13 | features_to_use:
14 | - onehot
15 | - pssm
16 | filter_size: 256
17 | lr: 0.001
18 | lstm_size: 1000
19 | use_CRF: false
20 | run_parameters:
21 | domain_name: baseline
22 | epochs: 100
23 | gpu: 1
24 | patience: 10
25 | setting_name: baseline
26 | test_batch_size: 100
27 | train_batch_size: 64
28 |
--------------------------------------------------------------------------------
/sample_configs/model_e.yaml:
--------------------------------------------------------------------------------
1 | deep_learning_model: model_e_cnn
2 | model_paramters:
3 | convs:
4 | - 3
5 | - 5
6 | - 7
7 | - 11
8 | - 21
9 | dense_size: 1000
10 | dropout_rate: 0.5
11 | features_to_use:
12 | - onehot
13 | - pssm
14 | filter_size: 256
15 | lr: 0.001
16 | use_CRF: false
17 | run_parameters:
18 | domain_name: baseline
19 | epochs: 100
20 | gpu: 1
21 | patience: 10
22 | setting_name: baseline
23 | test_batch_size: 100
24 | train_batch_size: 64
25 |
--------------------------------------------------------------------------------
/sample_configs/model_f.yaml:
--------------------------------------------------------------------------------
1 | deep_learning_model: model_f_multiscale_cnn
2 | model_paramters:
3 | cnn_regularizer: 5.0e-05
4 | convs:
5 | - 3
6 | - 5
7 | - 7
8 | - 11
9 | - 21
10 | dropout_rate: 0.5
11 | features_to_use:
12 | - onehot
13 | - pssm
14 | filter_size: 256
15 | lr: 0.001
16 | multiscalecnn_layers: 3
17 | use_CRF: false
18 | run_parameters:
19 | domain_name: baseline
20 | epochs: 100
21 | gpu: 1
22 | patience: 10
23 | setting_name: baseline
24 | test_batch_size: 100
25 | train_batch_size: 64
26 |
--------------------------------------------------------------------------------
/utility/feed_generation_utility.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from utility.file_utility import FileUtility
4 |
5 |
6 | def train_batch_generator_408(batch_size=64):
7 | '''
8 | :param batch_size:
9 | :return:
10 | '''
11 | start_idx = 0
12 | train_lengths = [int(j) for j in FileUtility.load_list(
13 | 'datasets/train_length.txt')]
14 | X_train = np.load('datasets/X_train_408.npy')
15 | Y_train = np.array(
16 | np.load('datasets/train_mat_Y.npy'))
17 | while True:
18 | if not start_idx < len(train_lengths):
19 | start_idx = 0
20 | X = X_train[start_idx:(min(start_idx + batch_size, len(train_lengths))),
21 | 0:train_lengths[min(start_idx + batch_size, len(train_lengths)) - 1]]
22 | Y = Y_train[start_idx:(min(start_idx + batch_size, len(train_lengths))),
23 | 0:train_lengths[min(start_idx + batch_size, len(train_lengths)) - 1], :]
24 |
25 | W = []
26 | for idx in range(start_idx, (min(start_idx + batch_size, len(train_lengths)))):
27 | W.append([1 if l < train_lengths[idx] else 0 for l in
28 | range(0, train_lengths[min(start_idx + batch_size, len(train_lengths)) - 1])])
29 |
30 | start_idx += batch_size
31 |
32 | yield X, Y, np.array(W)
33 |
34 |
35 | def validation_batch_generator_408(batch_size=100):
36 | '''
37 | :param batch_size:
38 | :return:
39 | '''
40 | test_lengths = [int(i) for i in FileUtility.load_list(
41 | 'datasets/test_length.txt')]
42 | X_test = np.load('datasets/X_test_408.npy')
43 | Y_test = np.array(
44 | np.load('datasets/test_mat_Y.npy'))
45 | start_idx = 0
46 | while True:
47 | if not start_idx < len(test_lengths):
48 | start_idx = 0
49 | X = X_test[start_idx:(min(start_idx + batch_size, len(test_lengths))),
50 | 0:test_lengths[min(start_idx + batch_size, len(test_lengths)) - 1]]
51 | Y = Y_test[start_idx:(min(start_idx + batch_size, len(test_lengths))),
52 | 0:test_lengths[min(start_idx + batch_size, len(test_lengths)) - 1], :]
53 | W = []
54 | for idx in range(start_idx, (min(start_idx + batch_size, len(test_lengths)))):
55 | W.append([1 if l < test_lengths[idx] else 0 for l in
56 | range(0, test_lengths[min(start_idx + batch_size, len(test_lengths)) - 1])])
57 |
58 | start_idx += batch_size
59 | yield X, Y, np.array(W)
60 |
61 |
62 | def validation_batches_fortest_408(batchsize=100):
63 | '''
64 | :param batchsize:
65 | :return:
66 | '''
67 | test_lengths = [int(i) for i in FileUtility.load_list(
68 | 'datasets/test_length.txt')]
69 | X_test = np.load('datasets/X_test_408.npy')
70 | Y_test = np.array(
71 | np.load('datasets/test_mat_Y.npy'))
72 | start_idx = 0
73 | while start_idx < len(test_lengths):
74 | X = X_test[start_idx:(min(start_idx + batchsize, len(test_lengths))),
75 | 0:test_lengths[min(start_idx + batchsize, len(test_lengths)) - 1]]
76 | Y = Y_test[start_idx:(min(start_idx + batchsize, len(test_lengths))),
77 | 0:test_lengths[min(start_idx + batchsize, len(test_lengths)) - 1], :]
78 | W = []
79 | for idx in range(start_idx, (min(start_idx + batchsize, len(test_lengths)))):
80 | W.append([1 if l < test_lengths[idx] else 0 for l in
81 | range(0, test_lengths[min(start_idx + batchsize, len(test_lengths)) - 1])])
82 |
83 | start_idx += batchsize
84 | yield X, Y, np.array(W)
85 |
--------------------------------------------------------------------------------
/utility/file_utility.py:
--------------------------------------------------------------------------------
1 | import _pickle as pickle
2 | import codecs
3 | import fnmatch
4 | import os
5 | import h5py
6 | import numpy as np
7 | from Bio import SeqIO
8 | from Bio.Alphabet import generic_dna
9 | from Bio.Seq import Seq
10 | from Bio.SeqRecord import SeqRecord
11 | from scipy import sparse
12 |
13 |
14 | class FileUtility(object):
15 | def __init__(self):
16 | print('File utility object created..')
17 |
18 | @staticmethod
19 | def create_fasta_file(file_address, corpus, label):
20 | seq_id_pairs = [('.'.join([str(idx + 1), label[idx]]), x) for idx, x in enumerate(corpus)]
21 | seq_recs = [SeqRecord(Seq(seq, generic_dna), id=id, description='') for id, seq in seq_id_pairs]
22 | SeqIO.write(seq_recs, file_address, "fasta")
23 |
24 | @staticmethod
25 | def read_sequence_file(file_name_sample):
26 | '''
27 | :param file_name_sample:
28 | :return:
29 | '''
30 | corpus = []
31 | if file_name_sample[-1] == 'q':
32 | for cur_record in SeqIO.parse(file_name_sample, "fastq"):
33 | corpus.append(str(cur_record.seq).lower())
34 | else:
35 | for cur_record in SeqIO.parse(file_name_sample, "fasta"):
36 | corpus.append(str(cur_record.seq).lower())
37 | return file_name_sample.split('/')[-1], corpus
38 |
39 | @staticmethod
40 | def read_sequence_file_length(file_name_sample):
41 | '''
42 | :param file_name_sample:
43 | :return:
44 | '''
45 | corpus = []
46 | if file_name_sample[-1] == 'q':
47 | for cur_record in SeqIO.parse(file_name_sample, "fastq"):
48 | corpus.append(str(cur_record.seq).lower())
49 | else:
50 | for cur_record in SeqIO.parse(file_name_sample, "fasta"):
51 | corpus.append(str(cur_record.seq).lower())
52 | return file_name_sample.split('/')[-1], len(corpus)
53 |
54 | @staticmethod
55 | def read_fasta_directory(file_directory, file_extenstion, only_files=[]):
56 | '''
57 | :param file_directory:
58 | :param file_extenstion:
59 | :param only_files:
60 | :return: list of fasta files, and a dic to map file to index
61 | '''
62 | if len(only_files) > 0:
63 | fasta_files = [x for x in FileUtility.recursive_glob(file_directory, '*.' + file_extenstion) if
64 | x.split('/')[-1] in only_files]
65 | else:
66 | fasta_files = [x for x in FileUtility.recursive_glob(file_directory, '*.' + file_extenstion)]
67 |
68 | fasta_files.sort()
69 | mapping = {v: k for k, v in enumerate(fasta_files)}
70 | return fasta_files, mapping
71 |
72 | @staticmethod
73 | def save_obj(filename, value):
74 | with open(filename + '.pickle', 'wb') as f:
75 | pickle.dump(value, f)
76 |
77 | @staticmethod
78 | def load_obj(filename):
79 | return pickle.load(open(filename, "rb"))
80 |
81 | @staticmethod
82 | def ensure_dir(file_path):
83 | directory = os.path.dirname(file_path)
84 | if not os.path.exists(directory):
85 | os.makedirs(directory)
86 |
87 | @staticmethod
88 | def exists(file_path):
89 | return os.path.exists(file_path)
90 |
91 | @staticmethod
92 | def remove(file_path):
93 | os.remove(file_path)
94 |
95 | @staticmethod
96 | def save_list(filename, list_names):
97 | # FileUtility.ensure_dir(filename)
98 | f = codecs.open(filename, 'w', 'utf-8')
99 | for x in list_names:
100 | f.write(x + '\n')
101 | f.close()
102 |
103 | @staticmethod
104 | def load_list(filename):
105 | return [line.strip() for line in codecs.open(filename, 'r', 'utf-8').readlines()]
106 |
107 | @staticmethod
108 | def save_sparse_csr(filename, array):
109 | np.savez(filename, data=array.data, indices=array.indices,
110 | indptr=array.indptr, shape=array.shape)
111 |
112 | @staticmethod
113 | def load_sparse_csr(filename):
114 | loader = np.load(filename)
115 | return sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape'])
116 |
117 | @staticmethod
118 | def _float_or_zero(value):
119 | try:
120 | return float(value)
121 | except:
122 | return 0.0
123 |
124 | @staticmethod
125 | def recursive_glob(treeroot, pattern):
126 | '''
127 | :param treeroot: the path to the directory
128 | :param pattern: the pattern of files
129 | :return:
130 | '''
131 | results = []
132 | for base, dirs, files in os.walk(treeroot):
133 | good_files = fnmatch.filter(files, pattern)
134 | results.extend(os.path.join(base, f) for f in good_files)
135 | return results
136 |
137 | @staticmethod
138 | def read_fasta_sequences(file_name):
139 | corpus = []
140 | for cur_record in SeqIO.parse(file_name, "fasta"):
141 | corpus.append(str(cur_record.seq).lower())
142 | return corpus
143 |
144 | @staticmethod
145 | def read_fasta_sequences_ids(file_name):
146 | corpus = dict()
147 | for cur_record in SeqIO.parse(file_name, "fasta"):
148 | corpus[str(cur_record.id)] = (str(cur_record.seq).lower(), str(cur_record.description))
149 | return corpus
150 |
151 | @staticmethod
152 | def loadH5file(filename):
153 | f = h5py.File(filename, 'r')
154 | a_group_key = list(f.keys())[0]
155 | return list(f[a_group_key])
156 |
--------------------------------------------------------------------------------
/utility/labeling_utility.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 |
5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
6 | parentdir = os.path.dirname(currentdir)
7 | sys.path.insert(0, parentdir)
8 |
9 | import numpy as np
10 | from keras.preprocessing.text import Tokenizer
11 | from collections import Counter
12 | from keras.preprocessing.sequence import pad_sequences
13 | from keras.utils.np_utils import to_categorical
14 | from gensim.models import KeyedVectors
15 | from keras.layers import Embedding
16 | from utility.file_utility import FileUtility
17 | from utility.list_set_util import argsort
18 |
19 | np.random.seed(7)
20 |
21 |
22 | class LabelingData(object):
23 | def __init__(self, train_file, test_file):
24 | print('Labeling utility object created..')
25 | ## read train ##
26 | self.X_train, self.y_train, self.train_lengths = LabelingData.labeling_file_reader(train_file)
27 | ## read test##
28 | self.X_test, self.y_test , self.test_lengths= LabelingData.labeling_file_reader(test_file)
29 | ## data loading
30 | self.load_data()
31 |
32 | def load_data(self):
33 | words = list(set([elem for sublist in (self.X_train + self.X_test) for elem in sublist]))
34 | self.vocab_size = len(words) + 2 # because of and pseudo words
35 | self.n_classes = len(set([elem for sublist in (self.y_train + self.y_test) for elem in
36 | sublist])) + 1 # add 1 because of zero padding
37 |
38 | # assign a unique integer to each word/label
39 | self.w2idx = LabelingData.encode(self.X_train + self.X_test)
40 | self.l2idx = LabelingData.encode(self.y_train + self.y_test)
41 |
42 | # encode() maps each word to a unique index, starting from 1. We additionally incerement all the
43 | # values by 1, so that we can save space for 0 and 1 to be assigned to and later
44 | self.w2idx = Counter(self.w2idx)
45 | self.w2idx.update(self.w2idx.keys())
46 | self.w2idx = dict(
47 | self.w2idx) # convert back to regular dict (to avoid erroneously assigning 0 to unknown words)
48 |
49 | self.w2idx[''] = 0
50 | self.w2idx[''] = 1
51 |
52 | # on the label side we only have the to add
53 | self.l2idx[''] = 0
54 |
55 | # keep the reverse to be able to decode back
56 | self.idx2w = {v: k for k, v in self.w2idx.items()}
57 | self.idx2l = {v: k for k, v in self.l2idx.items()}
58 |
59 | X_train_enc = [[self.w2idx[w] for w in sent] for sent in self.X_train]
60 | X_test_enc = [[self.w2idx[w] for w in sent] for sent in self.X_test]
61 |
62 | y_train_enc = [[self.l2idx[l] for l in labels] for labels in self.y_train]
63 | y_test_enc = [[self.l2idx[l] for l in labels] for labels in self.y_test]
64 |
65 | # zero-pad all the sequences
66 | self.max_length = len(max(self.X_train + self.X_test, key=len))
67 |
68 | self.X_train_enc = pad_sequences(X_train_enc, maxlen=self.max_length, padding='post')
69 | self.X_test_enc = pad_sequences(X_test_enc, maxlen=self.max_length, padding='post')
70 |
71 | y_train_enc = pad_sequences(y_train_enc, maxlen=self.max_length, padding='post')
72 | y_test_enc = pad_sequences(y_test_enc, maxlen=self.max_length, padding='post')
73 |
74 | # one-hot encode the labels
75 | idx = np.array(list(self.idx2l.keys()))
76 | vec = to_categorical(idx)
77 | one_hot = dict(zip(idx, vec))
78 | self.inv_one_hot = {tuple(v): k for k, v in one_hot.items()} # keep the inverse dict
79 |
80 | self.y_train_enc = np.array([[one_hot[l] for l in labels] for labels in y_train_enc])
81 | self.y_test_enc = np.array([[one_hot[l] for l in labels] for labels in y_test_enc])
82 |
83 | print('Training y encoded shape is ', y_train_enc.shape)
84 | print('Maximum sequence length is', self.max_length)
85 |
86 | def get_embedding_layer(self, embedding_file, embedding_dim, trainable=False):
87 | wvmodel = KeyedVectors.load_word2vec_format(embedding_file)
88 |
89 | embedding_dimension = embedding_dim
90 | embedding_matrix = np.zeros((self.vocab_size, embedding_dimension))
91 |
92 | UNKOWN = np.random.uniform(-1, 1, embedding_dimension) # assumes that '' does not exist in the embed vocab
93 |
94 | for word, i in self.w2idx.items():
95 | if word in wvmodel.vocab:
96 | embedding_matrix[i] = wvmodel[word]
97 | else:
98 | embedding_matrix[i] = UNKOWN
99 |
100 | embedding_matrix[self.w2idx['']] = np.zeros((embedding_dimension))
101 |
102 | embedding_layer = Embedding(embedding_matrix.shape[0],
103 | embedding_matrix.shape[1],
104 | weights=[embedding_matrix],
105 | trainable=trainable,
106 | name='embed_layer')
107 | return embedding_layer
108 |
109 |
110 | @staticmethod
111 | def tolower(file):
112 | lines=[l.lower() for l in FileUtility.load_list(file)]
113 | FileUtility.save_list(file+'new',lines)
114 |
115 |
116 | @staticmethod
117 | def labeling_file_reader(file):
118 | with open(file, 'r') as f:
119 | train = f.read().splitlines()
120 | X, y = [], []
121 | sent = []
122 | sent_labels = []
123 | for elem in train:
124 | if elem == '':
125 | X.append(sent)
126 | y.append(sent_labels)
127 | sent = []
128 | sent_labels = []
129 | else:
130 | xx, yy = elem.split()
131 | sent.append(xx)
132 | sent_labels.append(yy)
133 |
134 | lengths = LabelingData.sequence_lengths(file)
135 | sorted_idxs = argsort(lengths)
136 | lengths.sort()
137 | X = [X[i] for i in sorted_idxs]
138 | y = [y[i] for i in sorted_idxs]
139 | return X, y, lengths
140 |
141 | @staticmethod
142 | def convert_to_kmer(input_file, out_file, n=3):
143 | train = FileUtility.load_list(input_file)
144 | training_data = [line.split() for line in train]
145 | final_list = list()
146 | temp = []
147 | for x in training_data:
148 | if x == []:
149 | final_list.append(temp)
150 | temp = []
151 | else:
152 | temp.append(x)
153 | res = []
154 | for prot in final_list:
155 | sentence = ''.join(['$'] + [aa[0] for aa in prot] + ['#'])
156 | res += [(sentence[i:i + n], prot[i][1]) for i in range(len(sentence) - n + 1)]
157 | res += ['']
158 | FileUtility.save_list(out_file, [' '.join(list(x)) for x in res])
159 |
160 | @staticmethod
161 | def sequence_lengths(input_file):
162 | train = FileUtility.load_list(input_file)
163 | training_data = [line.split() for line in train]
164 | final_list = list()
165 | temp = []
166 | for x in training_data:
167 | if x == []:
168 | final_list.append(temp)
169 | temp = []
170 | else:
171 | temp.append(x)
172 | return [len(prot) for prot in final_list]
173 |
174 | @staticmethod
175 | def encode(sequence):
176 | '''
177 | Encoding sequence to integers
178 | :param sents:
179 | :return:
180 | '''
181 | t = Tokenizer(filters='\t\n', lower=False)
182 | t.fit_on_texts([" ".join(seq) for seq in sequence])
183 | return t.word_index
184 |
185 | @staticmethod
186 | def numpy2trainfiles(file,name,out='../data/s8_features/'):
187 | '''
188 | test_file='/mounts/data/proj/asgari/dissertation/datasets/deepbio/protein_general/ss/data/cb513+profile_split1.npy'
189 | train_file='/mounts/data/proj/asgari/dissertation/datasets/deepbio/protein_general/ss/data/cullpdb+profile_6133_filtered.npy'
190 | :param name:
191 | :param out:
192 | :return:
193 | '''
194 | db=np.load(file)
195 | a = np.arange(0,21)
196 | b = np.arange(35,56)
197 | c = np.hstack((a,b))
198 | db = np.reshape(db, (db.shape[0], int(db.shape[1] / 57), 57))
199 | seq=['A', 'C', 'E', 'D', 'G', 'F', 'I', 'H', 'K', 'M', 'L', 'N', 'Q', 'P', 'S', 'R', 'T', 'W', 'V', 'Y', 'X','NoSeq']
200 | label=['L', 'B', 'E', 'G', 'I', 'H', 'S', 'T']
201 | sequences=[]
202 | labels=[]
203 | possible_features=dict()
204 | for i in range(0,db.shape[0]):
205 | sequences.append(''.join([seq[np.argmax(x)] if np.max(x)==1 else '' for x in db[i,:,0:21]]).lower())
206 | labels.append(''.join([label[np.argmax(y)] if np.max(y)==1 else '' for y in db[i,:,22:30]]).lower())
207 | lengths=[len(x) for x in sequences]
208 | sorted_idxs = argsort(lengths)
209 | lengths.sort()
210 | sequences = [sequences[i] for i in sorted_idxs]
211 | labels = [labels[i] for i in sorted_idxs]
212 | FileUtility.save_list(out+name,['\n'.join([' '.join([elx,labels[idx][idy]]) for idy,elx in enumerate(list(seq))]+['']) for idx,seq in enumerate(sequences)])
213 | db_new=db[sorted_idxs,:,:]
214 | label_encoding=[[([0] if np.max(row)==1 else [1])+row for row in db_new[i,:,22:30].tolist()] for i in range(0,db.shape[0])]
215 | np.save(out+name+'_mat_Y',label_encoding)
216 | db_new =db_new[:,:,c]
217 | np.save(out+name+'_mat_X',db_new)
218 | FileUtility.save_list(out+name+'_length.txt',[str(l) for l in lengths])
219 |
220 | @staticmethod
221 | def X2extended(X):
222 | EMB=np.load('/mounts/data/proj/asgari/dissertation/git_repos/DeepSeq2Sec/pretrained_embeddings/emb2features.npy')
223 | x_new=[]
224 | for i in range(0,X.shape[0]):
225 | temp=[]
226 | for j in range(0,700):
227 | temp.append(X[i,j,0:21].dot(EMB).tolist()+X[i,j,:].tolist())
228 | x_new.append(temp)
229 | return np.array(X_new)
230 |
231 |
232 | if __name__ == '__main__':
233 | LabelingData.tolower('/mounts/data/proj/asgari/dissertation/git_repos/DeepSeq2Sec/data/epitopes/test_epitopes.txt')
234 | LabelingData.tolower('/mounts/data/proj/asgari/dissertation/git_repos/DeepSeq2Sec/data/epitopes/train_epitopes.txt')
235 |
--------------------------------------------------------------------------------
/utility/list_set_util.py:
--------------------------------------------------------------------------------
1 | __author__ = "Ehsaneddin Asgari"
2 | __license__ = "Apache 2"
3 | __version__ = "1.0.0"
4 | __maintainer__ = "Ehsaneddin Asgari"
5 | __email__ = "asgari@berkeley.edu"
6 | __project__ = "LLP - DeepPrime2Sec"
7 | __website__ = "https://llp.berkeley.edu/DeepPrime2Sec/"
8 |
9 | import operator
10 | import numpy as np
11 |
12 | def get_intersection_of_list(list_of_list_features):
13 | return list(set.intersection(*map(set, list_of_list_features)))
14 |
15 | def get_max_of_dict(inp):
16 | return max(inp.items(), key=operator.itemgetter(1))[0]
17 |
18 | def argsort(seq, rev=False):
19 | # http://stackoverflow.com/questions/3071415/efficient-method-to-calculate-the-rank-vector-of-a-list-in-python
20 | return sorted(range(len(seq)), key=seq.__getitem__, reverse=rev)
21 |
22 | def sampling_from_dict(score_dict, N):
23 | summation=np.sum(list(score_dict.values()))
24 | keys=list(score_dict.keys())
25 | keys.sort()
26 | probDict={k:(s/summation) for k,s in score_dict.items()}
27 | prob_list=[probDict[k] for k in keys]
28 | return np.random.choice(keys, N, prob_list).tolist()
29 |
--------------------------------------------------------------------------------
/utility/training.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 |
5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
6 | parentdir = os.path.dirname(currentdir)
7 | sys.path.insert(0, parentdir)
8 | import scipy
9 |
10 | from keras.callbacks import ModelCheckpoint, EarlyStopping
11 | from utility.file_utility import FileUtility
12 | from utility.labeling_utility import LabelingData
13 | from utility.feed_generation_utility import train_batch_generator_408, validation_batch_generator_408, validation_batches_fortest_408
14 | from utility.vis_utility import create_mat_plot
15 | import tqdm
16 | import numpy as np
17 | import itertools
18 | from sklearn.metrics import accuracy_score, f1_score
19 | from sklearn.metrics import confusion_matrix
20 | from fpdf import FPDF, HTMLMixin
21 | import seaborn as sns; sns.set()
22 | import matplotlib.pyplot as plt
23 | import matplotlib
24 |
25 | class MyFPDF(FPDF, HTMLMixin):
26 | pass
27 |
28 | # predefined models
29 | from models.a_cnn_bilstm import model_a_cnn_bilstm
30 | from models.b_cnn_bilstm_highway import model_b_cnn_bilstm_highway
31 | from models.c_cnn_bilstm_crf import model_c_cnn_bilstm_crf
32 | from models.d_cnn_bilstm_attention import model_d_cnn_bilstm_attention
33 | from models.e_cnn import model_e_cnn
34 | from models.f_multiscale_cnn import model_f_multiscale_cnn
35 |
36 | def training_loop(**kwargs):
37 | run_parameters = kwargs['run_parameters']
38 | model_paramters = kwargs['model_paramters']
39 | model = eval(kwargs['deep_learning_model'])
40 |
41 | # which GPU to use
42 | os.environ["CUDA_VISIBLE_DEVICES"] = str(run_parameters['gpu'])
43 |
44 | # read files
45 | train_file = 'datasets/train.txt'
46 | test_file = 'datasets/test.txt'
47 | LD = LabelingData(train_file, test_file)
48 | train_lengths = [int(j) for j in FileUtility.load_list('/'.join(train_file.split('/')[0:-1]) + '/train_length.txt')]
49 | test_lengths = [int(i) for i in FileUtility.load_list('/'.join(test_file.split('/')[0:-1]) + '/test_length.txt')]
50 |
51 | # train/test batch parameters
52 | train_batch_size = run_parameters['train_batch_size']
53 | test_batch_size = run_parameters['test_batch_size']
54 | patience = run_parameters['patience']
55 | epochs = run_parameters['epochs']
56 |
57 | # model
58 | model, params = model(LD.n_classes, **model_paramters)
59 |
60 | # output directory
61 | FileUtility.ensure_dir('results/')
62 | FileUtility.ensure_dir('results/' + run_parameters['domain_name'] + '/')
63 | FileUtility.ensure_dir('results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/')
64 | FileUtility.ensure_dir(
65 | 'results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/' + params + '/')
66 | full_path = 'results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/' + params + '/'
67 |
68 | # save model
69 | with open(full_path + 'config.txt', 'w') as fh:
70 | model.summary(print_fn=lambda x: fh.write(x + '\n'))
71 |
72 | # check points
73 | filepath = full_path + "/weights-improvement-{epoch:02d}-{weighted_acc:.3f}-{val_weighted_acc:.3f}.hdf5"
74 |
75 | checkpoint = ModelCheckpoint(filepath, monitor='val_weighted_acc', verbose=1, save_best_only=True, mode='max',
76 | period=1)
77 | earlystopping = EarlyStopping(monitor='val_weighted_acc', min_delta=0, patience=patience, verbose=0, mode='max',
78 | baseline=None)
79 | callbacks_list = [checkpoint, earlystopping]
80 |
81 | # calculate the sizes
82 | steps_per_epoch = len(train_lengths) / train_batch_size if len(train_lengths) % train_batch_size == 0 else int(
83 | len(train_lengths) / train_batch_size) + 1
84 | validation_steps = int(len(test_lengths) / test_batch_size) if len(test_lengths) % test_batch_size == 0 else int(
85 | len(test_lengths) / test_batch_size) + 1
86 |
87 | # feed model
88 | h = model.fit_generator(train_batch_generator_408(train_batch_size), steps_per_epoch=steps_per_epoch,
89 | validation_data=validation_batch_generator_408(test_batch_size),
90 | validation_steps=validation_steps,
91 | shuffle=False, epochs=epochs, verbose=1, callbacks=callbacks_list)
92 |
93 | # save the history
94 | FileUtility.save_obj(full_path + 'history', h.history)
95 |
96 |
97 | # Analysis of the performance
98 | pred_test = [(model.predict_on_batch(x),y,w) for x,y,w in tqdm.tqdm(validation_batches_fortest_408(1))]
99 |
100 | acc_test, conf_mat, conf_mat_column_mapping, contingency_metric, chi2_res_pval, gtest_res_pval = generate_report(full_path, pred_test, run_parameters['domain_name'], run_parameters['setting_name'])
101 |
102 |
103 |
104 | def generate_report(full_path, pred_test, domain, setting):
105 | '''
106 | :param pred_test: test results
107 | :return:
108 | '''
109 | # Error location analysis
110 | error_edge=0
111 | error_NOTedge=0
112 | correct_edge=0
113 | correct_NOTedge=0
114 |
115 | all_pred = []
116 | all_true = []
117 |
118 | for i in tqdm.tqdm(range(0,514)):
119 | pred=np.array([np.argmax(x, axis=1) for x in pred_test[i][0]])
120 | true=np.array([np.argmax(x, axis=1) for x in pred_test[i][1]])
121 | all_pred = all_pred + pred.tolist()
122 | all_true = all_true + true.tolist()
123 | diff=np.diff(true)
124 | errors = [y for x,y in np.argwhere(pred!=true)]
125 | corrects = list(set(list(range(len(pred[0]))))-set(errors))
126 | edges_edge = [y for x,y in np.argwhere(diff!=0)]
127 | edges_before = [x-1 for x in edges_edge if x-1>=0]
128 | edges_after = [x+1 for x in edges_edge if x+1DeepPrime2Sec Report on Protein Secondary Structure Prediction
169 | Experiment name: {domain} - {setting}
170 |
171 |
172 | The performance on CB513
173 | Report on the accuracy
174 |
175 |
176 | Test-set Accuray | Test-set micro F1 | Test-set macro F1 |
177 |
178 | {round(acc_test,3)} | {round(f1_micro,3)} | {round(f1_macro,3)} |
179 |
180 |
181 |
182 |
183 |
184 | Confusion matrix
185 |
186 |
187 | """
188 |
189 | pdf.write_html(html)
190 | pdf.image(full_path+'confusion'+F"{domain}_{setting}"+'.png', x = 50, y = None, w = 100, h = 0, type = '', link = '')
191 |
192 | html=F"""
193 |
194 |
195 |
196 |
197 |
198 |
199 | Error analysis
200 |
201 | Contingency table for location analysis of the misclassified amino acids
202 |
203 | \ | Located at the PSS transition | NOT Located at the PSS transition |
204 |
205 | Miss-classified | {error_edge} | {error_NOTedge} |
206 | Truely classified | {correct_edge} | {correct_NOTedge} |
207 |
208 |
209 |
210 | P-value for Chi-square test = {chi2_res_pval}
211 |
212 | P-value for G-test = {gtest_res_pval}
213 |
214 |
215 |
216 |
217 |
218 |
219 | Learning curve
220 | """
221 | pdf.write_html(html)
222 |
223 | # learning curve
224 | history_dict=FileUtility.load_obj(full_path+'history.pickle')
225 | plt.clf()
226 | loss_values = history_dict['loss']
227 | val_loss_values = history_dict['val_loss']
228 | epochs = range(1, len(loss_values) + 1)
229 | matplotlib.rcParams['mathtext.fontset'] = 'stix'
230 | matplotlib.rcParams['font.family'] = 'STIXGeneral'
231 | matplotlib.rcParams['mathtext.fontset'] = 'custom'
232 | matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans'
233 | matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic'
234 | matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold'
235 | matplotlib.rcParams["axes.edgecolor"] = "black"
236 | matplotlib.rcParams["axes.linewidth"] = 0.6
237 | plt.plot(epochs, loss_values, 'ro', label='Loss for train set')
238 | plt.plot(epochs, val_loss_values, 'b', label='Loss for test set')
239 | plt.xlabel('Epochs')
240 | plt.ylabel('Loss')
241 | plt.legend(loc=1, prop={'size': 8},ncol=1, edgecolor='black', facecolor='white', frameon=True)
242 | plt.title('Loss with respect to the number of epochs for train and test sets')
243 | plt.savefig(full_path + 'learning_curve'+F"{domain}_{setting}"+'.png', dpi=300)
244 | pdf.image(full_path + 'learning_curve'+F"{domain}_{setting}"+'.png', x = 50, y = None, w = 100, h = 0, type = '', link = '')
245 |
246 |
247 | pdf.output(full_path+'final_report.pdf', 'F')
248 |
249 | return acc_test, conf_mat, conf_mat_column_mapping, contingency_metric, chi2_res_pval, gtest_res_pval
250 |
--------------------------------------------------------------------------------
/utility/vis_utility.py:
--------------------------------------------------------------------------------
1 | import seaborn as sns; sns.set()
2 | import sys
3 | sys.path.append('../')
4 | import matplotlib
5 | import matplotlib.pyplot as plt
6 |
7 | global color_schemes
8 | color_schemes=[['green','blue','red','gold', 'cyan'], ['#ff0505', '#f2a041', '#cdff05', '#04d9cb', '#45a8ff', '#8503a6', '#590202', '#734d02', '#4ab304', '#025359', '#0454cc', '#ff45da', '#993829', '#ffda45', '#1c661c', '#05cdff', '#1c2f66', '#731f57', '#b24a04', '#778003', '#0e3322', '#024566', '#0404d9', '#e5057d', '#66391c', '#31330e', '#3ee697', '#2d7da6', '#20024d', '#33011c']+list(({'aliceblue': '#F0F8FF','antiquewhite': '#FAEBD7','aqua': '#00FFFF','aquamarine': '#7FFFD4','azure': '#F0FFFF','beige': '#F5F5DC','bisque': '#FFE4C4','black': '#000000','blanchedalmond': '#FFEBCD','blue': '#0000FF','blueviolet': '#8A2BE2','brown': '#A52A2A','burlywood': '#DEB887','cadetblue': '#5F9EA0','chartreuse': '#7FFF00','chocolate': '#D2691E','coral': '#FF7F50','cornflowerblue': '#6495ED','cornsilk': '#FFF8DC','crimson': '#DC143C','cyan': '#00FFFF','darkblue': '#00008B','darkcyan': '#008B8B','darkgoldenrod': '#B8860B','darkgray': '#A9A9A9','darkgreen': '#006400','darkkhaki': '#BDB76B','darkmagenta': '#8B008B','darkolivegreen': '#556B2F','darkorange': '#FF8C00','darkorchid': '#9932CC','darkred': '#8B0000','darksalmon': '#E9967A','darkseagreen': '#8FBC8F','darkslateblue': '#483D8B','darkslategray': '#2F4F4F','darkturquoise': '#00CED1','darkviolet': '#9400D3','deeppink': '#FF1493','deepskyblue': '#00BFFF','dimgray': '#696969','dodgerblue': '#1E90FF','firebrick': '#B22222','floralwhite': '#FFFAF0','forestgreen': '#228B22','fuchsia': '#FF00FF','gainsboro': '#DCDCDC','ghostwhite': '#F8F8FF','gold': '#FFD700','goldenrod': '#DAA520','gray': '#808080','green': '#008000','greenyellow': '#ADFF2F','honeydew': '#F0FFF0','hotpink': '#FF69B4','indianred': '#CD5C5C','indigo': '#4B0082','ivory': '#FFFFF0','khaki': '#F0E68C','lavender': '#E6E6FA','lavenderblush': '#FFF0F5','lawngreen': '#7CFC00','lemonchiffon': '#FFFACD','lightblue': '#ADD8E6','lightcoral': '#F08080','lightcyan': '#E0FFFF','lightgoldenrodyellow': '#FAFAD2','lightgreen': '#90EE90','lightgray': '#D3D3D3','lightpink': '#FFB6C1','lightsalmon': '#FFA07A','lightseagreen': '#20B2AA','lightskyblue': '#87CEFA','lightslategray': '#778899','lightsteelblue': '#B0C4DE','lightyellow': '#FFFFE0','lime': '#00FF00','limegreen': '#32CD32','linen': '#FAF0E6','magenta': '#FF00FF','maroon': '#800000','mediumaquamarine': '#66CDAA','mediumblue': '#0000CD','mediumorchid': '#BA55D3','mediumpurple': '#9370DB','mediumseagreen': '#3CB371','mediumslateblue': '#7B68EE','mediumspringgreen': '#00FA9A','mediumturquoise': '#48D1CC','mediumvioletred': '#C71585','midnightblue': '#191970','mintcream': '#F5FFFA','mistyrose': '#FFE4E1','moccasin': '#FFE4B5','navajowhite': '#FFDEAD','navy': '#000080','oldlace': '#FDF5E6','olive': '#808000','olivedrab': '#6B8E23','orange': '#FFA500','orangered': '#FF4500','orchid': '#DA70D6','palegoldenrod': '#EEE8AA','palegreen': '#98FB98','paleturquoise': '#AFEEEE','palevioletred': '#DB7093','papayawhip': '#FFEFD5','peachpuff': '#FFDAB9','peru': '#CD853F','pink': '#FFC0CB','plum': '#DDA0DD','powderblue': '#B0E0E6','purple': '#800080','red': '#FF0000','rosybrown': '#BC8F8F','royalblue': '#4169E1','saddlebrown': '#8B4513','salmon': '#FA8072','sandybrown': '#FAA460','seagreen': '#2E8B57','seashell': '#FFF5EE','sienna': '#A0522D','silver': '#C0C0C0','skyblue': '#87CEEB','slateblue': '#6A5ACD','slategray': '#708090','snow': '#FFFAFA','springgreen': '#00FF7F','steelblue': '#4682B4','tan': '#D2B48C','teal': '#008080','thistle': '#D8BFD8','tomato': '#FF6347','turquoise': '#40E0D0','violet': '#EE82EE','wheat': '#F5DEB3','white': '#FFFFFF','whitesmoke': '#F5F5F5','yellow': '#FFFF00','yellowgreen': '#9ACD32'}).keys()),['#ff0505', '#f2a041', '#cdff05', '#04d9cb', '#45a8ff', '#8503a6', '#590202', '#734d02', '#4ab304', '#025359', '#0454cc', '#ff45da', '#993829', '#ffda45', '#1c661c', '#05cdff', '#1c2f66', '#731f57', '#b24a04', '#778003', '#0e3322', '#024566', '#0404d9', '#e5057d', '#66391c', '#31330e', '#3ee697', '#2d7da6', '#20024d', '#33011c']]
9 |
10 | def create_mat_plot(mat, axis_names, title, filename, xlab, ylab, cmap='inferno', filetype='pdf', rx=0, ry=0, font_s=10, annot=True):
11 | '''
12 | :param mat: divergence matrix
13 | :param axis_names: axis_names
14 | :param title
15 | :param filename: where to be saved
16 | :return:
17 | '''
18 | plt.rc('text')
19 | ax = sns.heatmap(mat,annot=annot, yticklabels=axis_names, xticklabels=axis_names, cmap=cmap)
20 | plt.title(title)
21 | params = {
22 | 'legend.fontsize': font_s,
23 | 'xtick.labelsize': font_s,
24 | 'ytick.labelsize': font_s,
25 | }
26 | matplotlib.rcParams['mathtext.fontset'] = 'stix'
27 | matplotlib.rcParams['font.family'] = 'STIXGeneral'
28 | matplotlib.rcParams['mathtext.fontset'] = 'custom'
29 | matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans'
30 | matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic'
31 | matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold'
32 | plt.xlabel(xlab)
33 | plt.ylabel(ylab)
34 | plt.xticks(rotation=rx)
35 | plt.yticks(rotation=ry)
36 | plt.rcParams.update(params)
37 | plt.tight_layout()
38 | plt.savefig(filename + '.'+filetype, dpi=300)
39 | plt.clf()
40 |
41 |
--------------------------------------------------------------------------------