├── .gitattributes ├── LICENSE ├── README.md ├── bin ├── gtf_extract_gene_regions.py ├── gtf_extract_transcript_regions.py └── rnaprot ├── docs └── framework_overview.png ├── rnaprot ├── RNNNets.py ├── __init__.py ├── content │ ├── logo1.png │ ├── logo2.png │ ├── plotly-latest.min.js │ └── sorttable.js ├── rnn_util.py ├── rplib.py └── test_data │ ├── empty_file │ ├── feat_666.bed │ ├── feat_666_1.exp.out │ ├── feat_666_2.exp.out │ ├── feat_in.bed │ ├── feat_old_nick.bed │ ├── feat_old_nick_1.exp.out │ ├── feat_old_nick_2.exp.out │ ├── file1 │ ├── file2 │ ├── gene_test_in.gtf │ ├── gtf_exon_out.bed │ ├── gtf_exon_out_exp.bed │ ├── gtf_gene_out.exp.bed │ ├── gtf_intron_out.bed │ ├── gtf_intron_out_exp.bed │ ├── gtf_transcript_out.exp.bed │ ├── map_test_in.bed │ ├── map_test_in.gtf │ ├── map_test_out_all_unique.bed │ ├── map_test_out_transcript_stats.out │ ├── negatives.bed │ ├── negatives.bpp │ ├── negatives.con │ ├── negatives.entr │ ├── negatives.fa │ ├── negatives.sf │ ├── negatives.str_elem.up │ ├── negatives.up │ ├── new_format.eia │ ├── positives.bed │ ├── positives.bpp │ ├── positives.con │ ├── positives.entr │ ├── positives.fa │ ├── positives.sf │ ├── positives.str_elem.up │ ├── positives.up │ ├── test.bpp │ ├── test.con │ ├── test.elem_p.str │ ├── test.entr │ ├── test.exon_intron_labels │ ├── test.fa │ ├── test.ids │ ├── test.pp.con │ ├── test.region_labels │ ├── test.sf │ ├── test.sorted.bed │ ├── test.sorted.merged.exp.bed │ ├── test.str_elem.up │ ├── test.tra │ ├── test.up │ ├── test1.bed │ ├── test2.bed │ ├── test2.bpp │ ├── test2.con │ ├── test2.exon_intron_labels │ ├── test2.exp.bed │ ├── test2.exp.con │ ├── test2.exp.exon_intron_labels │ ├── test2.fa │ ├── test2.pp.con │ ├── test2.up │ ├── test3.bed │ ├── test3.fa │ ├── test4.bed │ ├── test4.exp.fa │ ├── test4.fa │ ├── test5.bed │ ├── test5.centered_zero_sc.bed │ ├── test5.exp.bed │ ├── test5.exp.fa │ ├── test6.bed │ ├── test7.exp.out │ ├── test7.feat.bed │ ├── test7.in.bed │ ├── test8.bed │ ├── test8.exp.rra │ ├── test_border_annot.exp.bed │ ├── test_border_annot.gtf │ ├── test_con.bed │ ├── test_con.bw │ ├── test_con.exp.con │ ├── test_con2.exp.con │ ├── test_eia.bed │ ├── test_eia.exp1.eia │ ├── test_eia.exp2.eia │ ├── test_eia.exp3.eia │ ├── test_eia.exp4.eia │ ├── test_eia.gtf │ ├── test_intersect.genes.bed │ ├── test_intersect.sites.bed │ ├── test_lengths_to_bed.exp.bed │ ├── test_most_prom_select.gtf │ ├── test_new_format.tra │ ├── test_seq_extr.chr_sizes │ ├── test_seq_extr.sequences.2bit │ ├── test_seq_extr.sequences.fa │ ├── test_seq_extr.sites.bed │ ├── test_seq_extr.sites.exp.fa │ ├── test_seq_feat.in │ ├── test_seq_feat_gp.exp.out │ ├── test_seq_feat_gp.in │ ├── test_seq_feat_neg.exp.out │ ├── test_seq_feat_pos.exp.out │ ├── test_settings.out │ ├── test_start_end.bed │ ├── test_start_end.exp.bed │ ├── test_tr2gen.bed │ ├── test_tr2gen.exp.bed │ ├── test_tr2gen.gtf │ ├── test_tr_annot.bed │ ├── test_tr_annot.codons.exp.tra │ ├── test_tr_annot.exp.tra │ ├── test_tr_annot.gtf │ ├── test_tr_annot_gtf.exp.bed │ ├── test_tsl_genes.exp.bed │ └── test_tsl_genes.gtf ├── setup.py └── test ├── CDE_sites.bed ├── ENST00000314032.fa ├── NORAD_lncRNA.fa ├── PUM2_PARCLIP.negatives.fa ├── PUM2_PARCLIP.positives.fa └── cde_sites_str_model_folder.zip /.gitattributes: -------------------------------------------------------------------------------- 1 | supplements export-ignore 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Michael Uhl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/gtf_extract_gene_regions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from rnaprot import rplib 4 | import subprocess 5 | import argparse 6 | import os 7 | import re 8 | 9 | 10 | def setup_argument_parser(): 11 | """Setup argparse parser.""" 12 | help_description = """ 13 | Extract gene regions in BED format (6-col) from (Ensembl) GTF. 14 | 15 | """ 16 | # Define argument parser. 17 | p = argparse.ArgumentParser(add_help=False, 18 | prog="gtf_extract_gene_regions.py", 19 | description=help_description, 20 | formatter_class=argparse.MetavarTypeHelpFormatter) 21 | 22 | # Required arguments. 23 | p.add_argument("-h", "--help", 24 | action="help", 25 | help="Print help message") 26 | p.add_argument("--ids", 27 | dest="in_gene_ids", 28 | type=str, 29 | nargs='+', 30 | required = True, 31 | help = "Provide gene ID(s) to extract BED regions for (e.g. --ids ENSG00000223972 ENSG00000227232) from --gtf. NOTE1 that IDs must be provided without version number. NOTE2 --ids also accepts a file with gene IDs (one ID per row)") 32 | p.add_argument("--gtf", 33 | dest="in_gtf", 34 | type=str, 35 | metavar='str', 36 | required = True, 37 | help = "Genomic annotations GTF file (.gtf or .gtf.gz)") 38 | p.add_argument("--out", 39 | dest="out_bed", 40 | type=str, 41 | required = True, 42 | help = "Output BED file") 43 | return p 44 | 45 | 46 | 47 | if __name__ == '__main__': 48 | 49 | parser = setup_argument_parser() 50 | args = parser.parse_args() 51 | 52 | assert os.path.exists(args.in_gtf), "input .gtf file \"%s\" not found" %(args.in_gtf) 53 | 54 | gene_ids_dic = {} 55 | if len(args.in_gene_ids) == 1 and os.path.exists(args.in_gene_ids[0]): 56 | gene_ids_dic = rplib.read_ids_into_dic(args.in_gene_ids[0], 57 | check_dic=False) 58 | assert gene_ids_dic, "no gene IDs read in from %s" %(args.in_gene_ids[0]) 59 | else: 60 | for gene_id in args.in_gene_ids: 61 | gene_ids_dic[gene_id] = 1 62 | assert gene_ids_dic, "no gene IDs read into gene_ids_dic" 63 | 64 | print("Extracting gene regions from GTF ... ") 65 | rplib.gtf_extract_gene_bed(args.in_gtf, args.out_bed, 66 | gene_ids_dic=gene_ids_dic) 67 | 68 | bed_ids_dic = rplib.bed_get_region_ids(args.out_bed, 69 | check_dic=False) 70 | 71 | assert bed_ids_dic, "no gene regions extracted from --gtf. Gene IDs provided via --ids must be present in --gtf" 72 | 73 | c_extracted = 0 74 | for gene_id in gene_ids_dic: 75 | if gene_id not in bed_ids_dic: 76 | print("WARNING: no gene region extracted for --in gene ID %s" %(gene_id)) 77 | else: 78 | c_extracted += 1 79 | if c_extracted: 80 | print("# of extracted gene regions: %i" %(c_extracted)) 81 | print("Gene regions written to:\n%s" %(args.out_bed)) 82 | -------------------------------------------------------------------------------- /bin/gtf_extract_transcript_regions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from rnaprot import rplib 4 | import subprocess 5 | import argparse 6 | import os 7 | import re 8 | 9 | 10 | def setup_argument_parser(): 11 | """Setup argparse parser.""" 12 | help_description = """ 13 | Extract transcript regions in BED format (6-col) from (Ensembl) GTF. 14 | 15 | """ 16 | # Define argument parser. 17 | p = argparse.ArgumentParser(add_help=False, 18 | prog="gtf_extract_transcript_regions.py", 19 | description=help_description, 20 | formatter_class=argparse.MetavarTypeHelpFormatter) 21 | 22 | # Required arguments. 23 | p.add_argument("-h", "--help", 24 | action="help", 25 | help="Print help message") 26 | p.add_argument("--ids", 27 | dest="in_tr_ids", 28 | type=str, 29 | nargs='+', 30 | required = True, 31 | help = "Provide transcript ID(s) to extract BED regions for (e.g. --ids ENST00000456328 ENST00000488147) from --gtf. NOTE1 that IDs must be provided without version number. NOTE2 --ids also accepts a file with transcript IDs (one ID per row)") 32 | p.add_argument("--gtf", 33 | dest="in_gtf", 34 | type=str, 35 | metavar='str', 36 | required = True, 37 | help = "Genomic annotations GTF file (.gtf or .gtf.gz)") 38 | p.add_argument("--out", 39 | dest="out_bed", 40 | type=str, 41 | required = True, 42 | help = "Output BED file") 43 | return p 44 | 45 | 46 | 47 | if __name__ == '__main__': 48 | 49 | parser = setup_argument_parser() 50 | args = parser.parse_args() 51 | 52 | assert os.path.exists(args.in_gtf), "input .gtf file \"%s\" not found" %(args.in_gtf) 53 | 54 | tr_ids_dic = {} 55 | if len(args.in_tr_ids) == 1 and os.path.exists(args.in_tr_ids[0]): 56 | tr_ids_dic = rplib.read_ids_into_dic(args.in_tr_ids[0], 57 | check_dic=False) 58 | assert tr_ids_dic, "no transcript IDs read in from %s" %(args.in_tr_ids[0]) 59 | else: 60 | for tr_id in args.in_tr_ids: 61 | tr_ids_dic[tr_id] = 1 62 | assert tr_ids_dic, "no transcript IDs read into tr_ids_dic" 63 | 64 | print("Extracting transcript regions from GTF ... ") 65 | tr_len_dic = rplib.gtf_get_transcript_lengths(args.in_gtf) 66 | assert tr_len_dic, "no transcript lengths extracted from --gtf %s" %(args.in_gtf) 67 | 68 | c_miss = 0 69 | c_found = 0 70 | for tr_id in tr_ids_dic: 71 | if tr_id not in tr_len_dic: 72 | print("WARNING: --in transcript ID %s has no entry in --gtf" %(tr_id)) 73 | else: 74 | c_found += 1 75 | assert c_found, "no --gtf entries for any --in transcript IDs" 76 | rplib.bed_sequence_lengths_to_bed(tr_len_dic, args.out_bed, 77 | ids_dic=tr_ids_dic) 78 | 79 | print("# of extracted transcript regions: %i" %(c_found)) 80 | print("Transcript regions written to:\n%s" %(args.out_bed)) 81 | 82 | -------------------------------------------------------------------------------- /docs/framework_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BackofenLab/RNAProt/6091c9882d0355e3457607f12ee43ddf82cbb11b/docs/framework_overview.png -------------------------------------------------------------------------------- /rnaprot/RNNNets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | from torch.nn.utils.rnn import pack_padded_sequence 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import sys 7 | 8 | ############################################################################### 9 | 10 | class RNNDataset(Dataset): 11 | def __init__(self, in_data, in_labels): 12 | self.data = in_data 13 | self.labels = in_labels 14 | 15 | def __len__(self): 16 | return len(self.data) 17 | 18 | def __getitem__(self, index): 19 | return self.data[index], self.labels[index] 20 | 21 | 22 | ############################################################################### 23 | 24 | class LSTMModel_old(nn.Module): 25 | def __init__(self, input_dim, hidden_dim, n_layer, n_class, device): 26 | super(LSTMModel_old, self).__init__() 27 | self.input_dim = input_dim 28 | self.hidden_dim = hidden_dim 29 | self.n_layer = n_layer 30 | self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.n_layer, batch_first=True) 31 | self.fc = nn.Linear(self.hidden_dim, n_class) 32 | self.device = device 33 | 34 | def zero_state(self, batch_size): 35 | h0 = torch.zeros(self.n_layer, batch_size, self.hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 36 | c0 = torch.zeros(self.n_layer, batch_size, self.hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 37 | return h0, c0 38 | 39 | def forward(self, embed): 40 | embed = embed.cuda() 41 | batch_size = embed.batch_sizes[0].item() 42 | h0, c0 = self.zero_state(batch_size) 43 | out, (hidden, cell) = self.lstm(embed, (h0, c0)) 44 | x = F.log_softmax(self.fc(hidden.float()), dim=-1) 45 | return x 46 | 47 | 48 | ############################################################################### 49 | 50 | class GRUModel_old(nn.Module): 51 | def __init__(self, input_dim, hidden_dim, n_layer, n_class, device, 52 | dr=0.5): 53 | super(GRUModel_old, self).__init__() 54 | self.hidden_dim = hidden_dim 55 | self.n_layer = n_layer 56 | self.gru = nn.GRU(input_dim, hidden_dim, n_layer, batch_first=True) 57 | self.fc = nn.Linear(hidden_dim, n_class) 58 | self.device = device 59 | self.dr = dr 60 | 61 | def zero_state(self, batch_size): 62 | h0 = torch.zeros(self.n_layer, batch_size, self.hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 63 | return h0 64 | 65 | def forward(self, embed): 66 | embed = embed.cuda() 67 | batch_size = embed.batch_sizes[0].item() 68 | h0 = self.zero_state(batch_size) 69 | out, hidden = self.gru(embed, h0.detach()) 70 | x = F.dropout(hidden, p=self.dr, training=self.training) 71 | x = F.log_softmax(self.fc(x), dim=-1) 72 | return x 73 | 74 | 75 | ############################################################################### 76 | 77 | class GRUModel(nn.Module): 78 | def __init__(self, input_dim, n_class, device, 79 | gru_n_layers=2, 80 | gru_hidden_dim=32, 81 | bidirect=True, 82 | add_feat=False, 83 | dropout_rate=0.5, 84 | add_fc_layer=True, 85 | embed=True, 86 | embed_vocab_size=5, 87 | embed_dim=10): 88 | 89 | super(GRUModel, self).__init__() 90 | 91 | self.bidirect = bidirect 92 | self.embed = embed 93 | self.add_feat = add_feat 94 | self.gru_n_layers = gru_n_layers 95 | self.gru_hidden_dim = gru_hidden_dim 96 | self.device = device 97 | self.add_fc_layer = add_fc_layer 98 | self.embedding = nn.Embedding(embed_vocab_size, embed_dim) 99 | # Dropout. 100 | self.dropout = nn.Dropout(dropout_rate) 101 | # GRU. 102 | if embed: 103 | if add_feat: 104 | self.gru = nn.GRU(embed_dim + (input_dim - 1), gru_hidden_dim, gru_n_layers, 105 | bidirectional=bidirect, bias=True, 106 | batch_first=True).to(device) 107 | else: 108 | self.gru = nn.GRU(embed_dim, gru_hidden_dim, gru_n_layers, 109 | bidirectional=bidirect, bias=True, 110 | batch_first=True).to(device) 111 | else: 112 | self.gru = nn.GRU(input_dim, gru_hidden_dim, gru_n_layers, 113 | bidirectional=bidirect, bias=True, 114 | batch_first=True).to(device) 115 | if bidirect: 116 | if add_fc_layer: 117 | self.fc1 = nn.Linear(2*gru_hidden_dim, gru_hidden_dim) 118 | self.fc2 = nn.Linear(gru_hidden_dim, n_class) 119 | else: 120 | self.fc = nn.Linear(2*gru_hidden_dim, n_class) 121 | else: 122 | if add_fc_layer: 123 | self.fc1 = nn.Linear(gru_hidden_dim, int(gru_hidden_dim/2)) 124 | self.fc2 = nn.Linear(int(gru_hidden_dim/2), n_class) 125 | else: 126 | self.fc = nn.Linear(gru_hidden_dim, n_class) 127 | 128 | 129 | def zero_state(self, batch_size): 130 | if self.bidirect: 131 | h0 = torch.zeros(2*self.gru_n_layers, batch_size, self.gru_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 132 | else: 133 | h0 = torch.zeros(self.gru_n_layers, batch_size, self.gru_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 134 | return h0 135 | 136 | 137 | def forward(self, batch_data, batch_lens, batch_size): 138 | h0 = self.zero_state(batch_size) 139 | 140 | if self.embed: 141 | x_embed = self.embedding(batch_data[:, :, 0].long()).clone().detach().requires_grad_(True) 142 | if self.add_feat: 143 | x_embed = torch.cat([x_embed, batch_data[:, :, 1:]], dim=2) 144 | else: 145 | # Without embedding. 146 | x_embed = batch_data.clone().detach().requires_grad_(True) 147 | 148 | x_packed = pack_padded_sequence(x_embed, batch_lens, batch_first=True, enforce_sorted=False) 149 | 150 | if self.bidirect: 151 | out, hidden = self.gru(x_packed, h0) 152 | hidden = torch.cat([hidden[0], hidden[1]], dim=1).unsqueeze(0) 153 | else: 154 | out, hidden = self.gru(x_packed, h0) 155 | 156 | x = self.dropout(hidden) 157 | 158 | if self.add_fc_layer: 159 | x = self.dropout(self.fc1(x)) 160 | x = self.fc2(x) 161 | else: 162 | x = self.fc(x) 163 | 164 | return x, x_embed 165 | 166 | 167 | ############################################################################### 168 | 169 | class RNNModel(nn.Module): 170 | def __init__(self, input_dim, n_class, device, 171 | rnn_type=1, 172 | rnn_n_layers=2, 173 | rnn_hidden_dim=32, 174 | bidirect=True, 175 | add_feat=False, 176 | dropout_rate=0.5, 177 | add_fc_layer=True, 178 | embed=True, 179 | embed_vocab_size=5, 180 | embed_dim=10): 181 | 182 | super(RNNModel, self).__init__() 183 | 184 | self.bidirect = bidirect 185 | self.embed = embed 186 | self.add_feat = add_feat 187 | self.rnn_type = rnn_type 188 | self.rnn_n_layers = rnn_n_layers 189 | self.rnn_hidden_dim = rnn_hidden_dim 190 | self.device = device 191 | self.add_fc_layer = add_fc_layer 192 | 193 | # Embedding. 194 | self.embedding = nn.Embedding(embed_vocab_size, embed_dim) 195 | 196 | # Dropout. 197 | self.dropout = nn.Dropout(dropout_rate) 198 | 199 | # RNN layer. 200 | if embed: 201 | if add_feat: 202 | if rnn_type == 1: 203 | self.gru = nn.GRU(embed_dim + (input_dim - 1), 204 | rnn_hidden_dim, rnn_n_layers, 205 | bidirectional=bidirect, bias=True, 206 | batch_first=True).to(device) 207 | else: 208 | self.lstm = nn.LSTM(embed_dim + (input_dim - 1), 209 | rnn_hidden_dim, rnn_n_layers, 210 | bidirectional=bidirect, bias=True, 211 | batch_first=True).to(device) 212 | 213 | else: 214 | if rnn_type == 1: 215 | self.gru = nn.GRU(embed_dim, rnn_hidden_dim, rnn_n_layers, 216 | bidirectional=bidirect, bias=True, 217 | batch_first=True).to(device) 218 | else: 219 | self.lstm = nn.LSTM(embed_dim, rnn_hidden_dim, rnn_n_layers, 220 | bidirectional=bidirect, bias=True, 221 | batch_first=True).to(device) 222 | else: 223 | if rnn_type == 1: 224 | self.gru = nn.GRU(input_dim, rnn_hidden_dim, rnn_n_layers, 225 | bidirectional=bidirect, bias=True, 226 | batch_first=True).to(device) 227 | else: 228 | self.lstm = nn.LSTM(input_dim, rnn_hidden_dim, rnn_n_layers, 229 | bidirectional=bidirect, bias=True, 230 | batch_first=True).to(device) 231 | # FC layers. 232 | if bidirect: 233 | if add_fc_layer: 234 | self.fc1 = nn.Linear(2*rnn_hidden_dim, rnn_hidden_dim) 235 | self.fc2 = nn.Linear(rnn_hidden_dim, n_class) 236 | else: 237 | self.fc = nn.Linear(2*rnn_hidden_dim, n_class) 238 | else: 239 | if add_fc_layer: 240 | self.fc1 = nn.Linear(rnn_hidden_dim, int(rnn_hidden_dim/2)) 241 | self.fc2 = nn.Linear(int(rnn_hidden_dim/2), n_class) 242 | else: 243 | self.fc = nn.Linear(rnn_hidden_dim, n_class) 244 | 245 | 246 | def zero_state(self, batch_size): 247 | if self.rnn_type == 1: 248 | if self.bidirect: 249 | h0 = torch.zeros(2*self.rnn_n_layers, batch_size, self.rnn_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 250 | else: 251 | h0 = torch.zeros(self.rnn_n_layers, batch_size, self.rnn_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 252 | return h0 253 | else: 254 | if self.bidirect: 255 | h0 = torch.zeros(2*self.rnn_n_layers, batch_size, self.rnn_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 256 | c0 = torch.zeros(2*self.rnn_n_layers, batch_size, self.rnn_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 257 | else: 258 | h0 = torch.zeros(self.rnn_n_layers, batch_size, self.rnn_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 259 | c0 = torch.zeros(self.rnn_n_layers, batch_size, self.rnn_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 260 | return h0, c0 261 | 262 | 263 | def forward(self, batch_data, batch_lens, batch_size): 264 | if self.rnn_type == 1: 265 | h0 = self.zero_state(batch_size) 266 | else: 267 | h0, c0 = self.zero_state(batch_size) 268 | 269 | if self.embed: 270 | x_embed = self.embedding(batch_data[:, :, 0].long()).clone().detach().requires_grad_(True) 271 | if self.add_feat: 272 | x_embed = torch.cat([x_embed, batch_data[:, :, 1:]], dim=2) 273 | x_embed = x_embed.clone().detach().requires_grad_(True) 274 | else: 275 | # Without embedding. 276 | x_embed = batch_data.clone().detach().requires_grad_(True) 277 | 278 | x_packed = pack_padded_sequence(x_embed, batch_lens, batch_first=True, enforce_sorted=False) 279 | 280 | if self.rnn_type == 1: 281 | if self.bidirect: 282 | out, hidden = self.gru(x_packed, h0) 283 | hidden = torch.cat([hidden[0], hidden[1]], dim=1).unsqueeze(0) 284 | else: 285 | out, hidden = self.gru(x_packed, h0) 286 | else: 287 | if self.bidirect: 288 | out, (hidden, cell) = self.lstm(x_packed, (h0, c0)) 289 | hidden = torch.cat([hidden[0], hidden[1]], dim=1).unsqueeze(0) 290 | else: 291 | out, (hidden, cell) = self.lstm(x_packed, (h0, c0)) 292 | 293 | x = self.dropout(hidden) 294 | 295 | if self.add_fc_layer: 296 | x = self.dropout(self.fc1(x)) 297 | x = self.fc2(x) 298 | else: 299 | x = self.fc(x) 300 | 301 | #x = F.log_softmax(x, dim=-1) 302 | 303 | return x, x_embed 304 | 305 | 306 | ############################################################################### 307 | 308 | class LSTMModel(nn.Module): 309 | def __init__(self, input_dim, n_class, device, 310 | lstm_n_layers=2, 311 | lstm_hidden_dim=32, 312 | bidirect=True, 313 | add_feat=False, 314 | dropout_rate=0.5, 315 | add_fc_layer=True, 316 | embed=True, 317 | embed_vocab_size=5, 318 | embed_dim=10): 319 | 320 | super(LSTMModel, self).__init__() 321 | 322 | self.bidirect = bidirect 323 | self.embed = embed 324 | self.add_feat = add_feat 325 | self.lstm_n_layers = lstm_n_layers 326 | self.lstm_hidden_dim = lstm_hidden_dim 327 | self.device = device 328 | self.add_fc_layer = add_fc_layer 329 | self.embedding = nn.Embedding(embed_vocab_size, embed_dim) 330 | # Dropout. 331 | self.dropout = nn.Dropout(dropout_rate) 332 | # LSTM. 333 | if embed: 334 | if add_feat: 335 | self.lstm = nn.LSTM(embed_dim + (input_dim - 1), lstm_hidden_dim, lstm_n_layers, 336 | bidirectional=bidirect, bias=True, 337 | batch_first=True).to(device) 338 | else: 339 | self.lstm = nn.LSTM(embed_dim, lstm_hidden_dim, lstm_n_layers, 340 | bidirectional=bidirect, bias=True, 341 | batch_first=True).to(device) 342 | else: 343 | self.lstm = nn.LSTM(input_dim, lstm_hidden_dim, lstm_n_layers, 344 | bidirectional=bidirect, bias=True, 345 | batch_first=True).to(device) 346 | if bidirect: 347 | if add_fc_layer: 348 | self.fc1 = nn.Linear(2*lstm_hidden_dim, lstm_hidden_dim) 349 | self.fc2 = nn.Linear(lstm_hidden_dim, n_class) 350 | else: 351 | self.fc = nn.Linear(2*lstm_hidden_dim, n_class) 352 | else: 353 | if add_fc_layer: 354 | self.fc1 = nn.Linear(lstm_hidden_dim, int(lstm_hidden_dim/2)) 355 | self.fc2 = nn.Linear(int(lstm_hidden_dim/2), n_class) 356 | else: 357 | self.fc = nn.Linear(lstm_hidden_dim, n_class) 358 | 359 | 360 | def zero_state(self, batch_size): 361 | if self.bidirect: 362 | h0 = torch.zeros(2*self.lstm_n_layers, batch_size, self.lstm_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 363 | c0 = torch.zeros(2*self.lstm_n_layers, batch_size, self.lstm_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 364 | else: 365 | h0 = torch.zeros(self.lstm_n_layers, batch_size, self.lstm_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 366 | c0 = torch.zeros(self.lstm_n_layers, batch_size, self.lstm_hidden_dim, dtype=torch.float).requires_grad_().to(self.device) 367 | return h0, c0 368 | 369 | 370 | def forward(self, batch_data, batch_lens, batch_size): 371 | h0, c0 = self.zero_state(batch_size) 372 | 373 | if self.embed: 374 | x_embed = self.embedding(batch_data[:, :, 0].long()).clone().detach().requires_grad_(True) 375 | if self.add_feat: 376 | x_embed = torch.cat([x_embed, batch_data[:, :, 1:]], dim=2) 377 | else: 378 | x_embed = batch_data.clone().detach().requires_grad_(True) 379 | 380 | x_packed = pack_padded_sequence(x_embed, batch_lens, batch_first=True, enforce_sorted=False) 381 | 382 | if self.bidirect: 383 | out, (hidden, cell) = self.lstm(x_packed, (h0, c0)) 384 | hidden = torch.cat([hidden[0], hidden[1]], dim=1).unsqueeze(0) 385 | else: 386 | out, (hidden, cell) = self.lstm(x_packed, (h0, c0)) 387 | 388 | x = self.dropout(hidden) 389 | 390 | if self.add_fc_layer: 391 | x = self.dropout(self.fc1(x)) 392 | x = self.fc2(x) 393 | else: 394 | x = self.fc(x) 395 | 396 | return x, x_embed 397 | 398 | ############################################################################### 399 | -------------------------------------------------------------------------------- /rnaprot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BackofenLab/RNAProt/6091c9882d0355e3457607f12ee43ddf82cbb11b/rnaprot/__init__.py -------------------------------------------------------------------------------- /rnaprot/content/logo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BackofenLab/RNAProt/6091c9882d0355e3457607f12ee43ddf82cbb11b/rnaprot/content/logo1.png -------------------------------------------------------------------------------- /rnaprot/content/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BackofenLab/RNAProt/6091c9882d0355e3457607f12ee43ddf82cbb11b/rnaprot/content/logo2.png -------------------------------------------------------------------------------- /rnaprot/content/sorttable.js: -------------------------------------------------------------------------------- 1 | /* 2 | SortTable 3 | version 2 4 | 7th April 2007 5 | Stuart Langridge, http://www.kryogenix.org/code/browser/sorttable/ 6 | 7 | Instructions: 8 | Download this file 9 | Add to your HTML 10 | Add class="sortable" to any table you'd like to make sortable 11 | Click on the headers to sort 12 | 13 | Thanks to many, many people for contributions and suggestions. 14 | Licenced as X11: http://www.kryogenix.org/code/browser/licence.html 15 | This basically means: do what you want with it. 16 | */ 17 | 18 | 19 | var stIsIE = /*@cc_on!@*/false; 20 | 21 | sorttable = { 22 | init: function() { 23 | // quit if this function has already been called 24 | if (arguments.callee.done) return; 25 | // flag this function so we don't do the same thing twice 26 | arguments.callee.done = true; 27 | // kill the timer 28 | if (_timer) clearInterval(_timer); 29 | 30 | if (!document.createElement || !document.getElementsByTagName) return; 31 | 32 | sorttable.DATE_RE = /^(\d\d?)[\/\.-](\d\d?)[\/\.-]((\d\d)?\d\d)$/; 33 | 34 | forEach(document.getElementsByTagName('table'), function(table) { 35 | if (table.className.search(/\bsortable\b/) != -1) { 36 | sorttable.makeSortable(table); 37 | } 38 | }); 39 | 40 | }, 41 | 42 | makeSortable: function(table) { 43 | if (table.getElementsByTagName('thead').length == 0) { 44 | // table doesn't have a tHead. Since it should have, create one and 45 | // put the first table row in it. 46 | the = document.createElement('thead'); 47 | the.appendChild(table.rows[0]); 48 | table.insertBefore(the,table.firstChild); 49 | } 50 | // Safari doesn't support table.tHead, sigh 51 | if (table.tHead == null) table.tHead = table.getElementsByTagName('thead')[0]; 52 | 53 | if (table.tHead.rows.length != 1) return; // can't cope with two header rows 54 | 55 | // Sorttable v1 put rows with a class of "sortbottom" at the bottom (as 56 | // "total" rows, for example). This is B&R, since what you're supposed 57 | // to do is put them in a tfoot. So, if there are sortbottom rows, 58 | // for backwards compatibility, move them to tfoot (creating it if needed). 59 | sortbottomrows = []; 60 | for (var i=0; i5' : ' ▴'; 104 | this.appendChild(sortrevind); 105 | return; 106 | } 107 | if (this.className.search(/\bsorttable_sorted_reverse\b/) != -1) { 108 | // if we're already sorted by this column in reverse, just 109 | // re-reverse the table, which is quicker 110 | sorttable.reverse(this.sorttable_tbody); 111 | this.className = this.className.replace('sorttable_sorted_reverse', 112 | 'sorttable_sorted'); 113 | this.removeChild(document.getElementById('sorttable_sortrevind')); 114 | sortfwdind = document.createElement('span'); 115 | sortfwdind.id = "sorttable_sortfwdind"; 116 | sortfwdind.innerHTML = stIsIE ? ' 6' : ' ▾'; 117 | this.appendChild(sortfwdind); 118 | return; 119 | } 120 | 121 | // remove sorttable_sorted classes 122 | theadrow = this.parentNode; 123 | forEach(theadrow.childNodes, function(cell) { 124 | if (cell.nodeType == 1) { // an element 125 | cell.className = cell.className.replace('sorttable_sorted_reverse',''); 126 | cell.className = cell.className.replace('sorttable_sorted',''); 127 | } 128 | }); 129 | sortfwdind = document.getElementById('sorttable_sortfwdind'); 130 | if (sortfwdind) { sortfwdind.parentNode.removeChild(sortfwdind); } 131 | sortrevind = document.getElementById('sorttable_sortrevind'); 132 | if (sortrevind) { sortrevind.parentNode.removeChild(sortrevind); } 133 | 134 | this.className += ' sorttable_sorted'; 135 | sortfwdind = document.createElement('span'); 136 | sortfwdind.id = "sorttable_sortfwdind"; 137 | sortfwdind.innerHTML = stIsIE ? ' 6' : ' ▾'; 138 | this.appendChild(sortfwdind); 139 | 140 | // build an array to sort. This is a Schwartzian transform thing, 141 | // i.e., we "decorate" each row with the actual sort key, 142 | // sort based on the sort keys, and then put the rows back in order 143 | // which is a lot faster because you only do getInnerText once per row 144 | row_array = []; 145 | col = this.sorttable_columnindex; 146 | rows = this.sorttable_tbody.rows; 147 | for (var j=0; j 12) { 184 | // definitely dd/mm 185 | return sorttable.sort_ddmm; 186 | } else if (second > 12) { 187 | return sorttable.sort_mmdd; 188 | } else { 189 | // looks like a date, but we can't tell which, so assume 190 | // that it's dd/mm (English imperialism!) and keep looking 191 | sortfn = sorttable.sort_ddmm; 192 | } 193 | } 194 | } 195 | } 196 | return sortfn; 197 | }, 198 | 199 | getInnerText: function(node) { 200 | // gets the text we want to use for sorting for a cell. 201 | // strips leading and trailing whitespace. 202 | // this is *not* a generic getInnerText function; it's special to sorttable. 203 | // for example, you can override the cell text with a customkey attribute. 204 | // it also gets .value for fields. 205 | 206 | if (!node) return ""; 207 | 208 | hasInputs = (typeof node.getElementsByTagName == 'function') && 209 | node.getElementsByTagName('input').length; 210 | 211 | if (node.getAttribute("sorttable_customkey") != null) { 212 | return node.getAttribute("sorttable_customkey"); 213 | } 214 | else if (typeof node.textContent != 'undefined' && !hasInputs) { 215 | return node.textContent.replace(/^\s+|\s+$/g, ''); 216 | } 217 | else if (typeof node.innerText != 'undefined' && !hasInputs) { 218 | return node.innerText.replace(/^\s+|\s+$/g, ''); 219 | } 220 | else if (typeof node.text != 'undefined' && !hasInputs) { 221 | return node.text.replace(/^\s+|\s+$/g, ''); 222 | } 223 | else { 224 | switch (node.nodeType) { 225 | case 3: 226 | if (node.nodeName.toLowerCase() == 'input') { 227 | return node.value.replace(/^\s+|\s+$/g, ''); 228 | } 229 | case 4: 230 | return node.nodeValue.replace(/^\s+|\s+$/g, ''); 231 | break; 232 | case 1: 233 | case 11: 234 | var innerText = ''; 235 | for (var i = 0; i < node.childNodes.length; i++) { 236 | innerText += sorttable.getInnerText(node.childNodes[i]); 237 | } 238 | return innerText.replace(/^\s+|\s+$/g, ''); 239 | break; 240 | default: 241 | return ''; 242 | } 243 | } 244 | }, 245 | 246 | reverse: function(tbody) { 247 | // reverse the rows in a tbody 248 | newrows = []; 249 | for (var i=0; i=0; i--) { 253 | tbody.appendChild(newrows[i]); 254 | } 255 | delete newrows; 256 | }, 257 | 258 | /* sort functions 259 | each sort function takes two parameters, a and b 260 | you are comparing a[0] and b[0] */ 261 | sort_numeric: function(a,b) { 262 | aa = parseFloat(a[0].replace(/[^0-9.-]/g,'')); 263 | if (isNaN(aa)) aa = 0; 264 | bb = parseFloat(b[0].replace(/[^0-9.-]/g,'')); 265 | if (isNaN(bb)) bb = 0; 266 | return aa-bb; 267 | }, 268 | sort_alpha: function(a,b) { 269 | if (a[0]==b[0]) return 0; 270 | if (a[0] 0 ) { 316 | var q = list[i]; list[i] = list[i+1]; list[i+1] = q; 317 | swap = true; 318 | } 319 | } // for 320 | t--; 321 | 322 | if (!swap) break; 323 | 324 | for(var i = t; i > b; --i) { 325 | if ( comp_func(list[i], list[i-1]) < 0 ) { 326 | var q = list[i]; list[i] = list[i-1]; list[i-1] = q; 327 | swap = true; 328 | } 329 | } // for 330 | b++; 331 | 332 | } // while(swap) 333 | } 334 | } 335 | 336 | /* ****************************************************************** 337 | Supporting functions: bundled here to avoid depending on a library 338 | ****************************************************************** */ 339 | 340 | // Dean Edwards/Matthias Miller/John Resig 341 | 342 | /* for Mozilla/Opera9 */ 343 | if (document.addEventListener) { 344 | document.addEventListener("DOMContentLoaded", sorttable.init, false); 345 | } 346 | 347 | /* for Internet Explorer */ 348 | /*@cc_on @*/ 349 | /*@if (@_win32) 350 | document.write("