├── Gui_pipeline.png
├── git_hub_modiDeC.png
├── data_curation_tutorial
    ├── Analysis_GUI.png
    ├── analysis_plot1.png
    ├── analysis_plot2.png
    ├── Training_figure_2.png
    ├── Figura_data_generation.png
    ├── data_creation_example.png
    ├── tutorial_training.md
    ├── tutorial_ModiDeC_analysis.md
    └── tutorial_data_creation.md
├── CITATION.cff
├── LICENSE
├── Coverage_check.py
├── Load_data_for_training_V2.py
├── README.md
├── ModiDec_NN.py
├── remora_TF2_env.yml
├── Analyze_data_NN.py
├── Analyze_data_NN_V2.py
├── Resquigle_remora_GUI.py
├── Training_NN_GUI.py
├── Analysis_platform_GUI.py
└── Remora_resquigle_generate_data.py


/Gui_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mem3nto0/ModiDeC-RNA-modification-classifier/HEAD/Gui_pipeline.png


--------------------------------------------------------------------------------
/git_hub_modiDeC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mem3nto0/ModiDeC-RNA-modification-classifier/HEAD/git_hub_modiDeC.png


--------------------------------------------------------------------------------
/data_curation_tutorial/Analysis_GUI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mem3nto0/ModiDeC-RNA-modification-classifier/HEAD/data_curation_tutorial/Analysis_GUI.png


--------------------------------------------------------------------------------
/data_curation_tutorial/analysis_plot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mem3nto0/ModiDeC-RNA-modification-classifier/HEAD/data_curation_tutorial/analysis_plot1.png


--------------------------------------------------------------------------------
/data_curation_tutorial/analysis_plot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mem3nto0/ModiDeC-RNA-modification-classifier/HEAD/data_curation_tutorial/analysis_plot2.png


--------------------------------------------------------------------------------
/data_curation_tutorial/Training_figure_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mem3nto0/ModiDeC-RNA-modification-classifier/HEAD/data_curation_tutorial/Training_figure_2.png


--------------------------------------------------------------------------------
/data_curation_tutorial/Figura_data_generation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mem3nto0/ModiDeC-RNA-modification-classifier/HEAD/data_curation_tutorial/Figura_data_generation.png


--------------------------------------------------------------------------------
/data_curation_tutorial/data_creation_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mem3nto0/ModiDeC-RNA-modification-classifier/HEAD/data_curation_tutorial/data_creation_example.png


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.1.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: Alagna 
 5 |   given-names: Nicolo
 6 | orcid: https://orcid.org/0009-0006-0804-5774
 7 | title:mem3nto0/ModiDeC-RNA-modification-classifier: ModiDeC_RNA_modification_classifier
 8 | version: ModiDeC V1
 9 | date-released: 2025-06-23
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Nicolò Alagna 2024
 2 | 
 3 | Permission is granted to use, copy, and modify this software and its documentation for academic and non-commercial research purposes only, provided that the following conditions are met:
 4 | 
 5 | 1. The software is used for scientific or educational purposes only.
 6 | 2. Proper citation of the original author and source must be included in any publications or presentations using this software.
 7 | 3. Commercial use, redistribution, or modification is prohibited without prior written permission from the author.
 8 | 4. For any derivative works or reuse in future research projects, users must contact the original author for permission.
 9 | 
10 | This software is provided "as is", without warranty of any kind, express or implied.
11 | 


--------------------------------------------------------------------------------
/Coverage_check.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # /// Function to penalize low covarage sites while calculating frequency ///
 4 | 
 5 | def Coverage_analysis(N_max, Data, Track_coverage, threshold = 0.2):
 6 | 
 7 |     Filtered_covarage = Track_coverage[ Track_coverage > 1]  
 8 |     median_covarage = np.median(Filtered_covarage)    
 9 |     MAD_covarage = np.median(np.absolute(Filtered_covarage - median_covarage))    
10 | 
11 |     if median_covarage > 2*N_max*threshold:
12 | 
13 |         lower_bound = median_covarage - MAD_covarage
14 |         mask = np.where((Track_coverage >= lower_bound), 1, 0)
15 | 
16 |         Division_factor = N_max*(1 - (1 - Track_coverage/N_max)*mask)
17 |         Final_results = Data / Division_factor[:,np.newaxis]
18 | 
19 |     else:
20 | 
21 |         if median_covarage < N_max*threshold:
22 | 
23 |             Final_results = (Data/N_max)         
24 | 
25 |         else:
26 | 
27 |             lower_bound = median_covarage
28 |             mask = np.where((Track_coverage >= lower_bound), 1, 0)
29 | 
30 |             Division_factor = N_max*(1 - (1 - Track_coverage/N_max)*mask)
31 |             Final_results = Data / Division_factor[:,np.newaxis]
32 | 
33 | 
34 |     return Final_results


--------------------------------------------------------------------------------
/data_curation_tutorial/tutorial_training.md:
--------------------------------------------------------------------------------
 1 | # Tutorial Part 2: Training of the neural network 
 2 | 
 3 | ![GUI for retraining ModiDeC](https://github.com/mem3nto0/ModiDeC-RNA-modification-classifier/blob/main/data_curation_tutorial/Training_figure_2.png)
 4 | 
 5 | 
 6 | ## Training Data Input
 7 | For training the neural network with your own data, the directories containing the data need to be specified. data has to be created using the "Resguiggle_remora_Gui.py" to use the training user interface.
 8 |   1) select the folder where the training data were created by pressing the button "training data folder".
 9 |   2) (optional) if validation data are created as well using the "Resguiggle_remora_Gui.py", select the validation folder using the "validation data folder" button. if you don't have validation data, remember to set the "validation during training" variable with "no".
10 |   3) Select the folder where the retrained ModiDeC model will be saved by pressing the button "save model folder". This will specify the path where the model will be stored.
11 | 
12 | ## General Variables
13 | Additional specifications are needed for training the neural network:
14 |   1) The batch size (1) specifies the number of samples, which are propagated through the network during training. Batch sizes like 128 or 256 are recommended depending on the memory available for training.
15 |   2) The k-mer model (2) can be adjusted to the type of data used for training. Both data sequenced from RNA002 and RNA004 flowcells from Oxford Nanopore Technologies can be used for training the model: for RNA002, insert the number 5 (5-mer). For RNA004, insert the number 9 (9-mer).
16 |   3)  Insert the number of epochs for the training (3). We recommend setting the number of epochs to three.
17 |   4)  The variable name NN specifies the name of the saved model (4).
18 |   5)  The user can also specify if a validation of the retrained model is needed, by typing yes or no (5). 
19 | 
20 | After the settings are settled, press the button "Start training" to retrain ModiDeC. When the training is finished, a folder with the name of the neural network will be created in the "save folder" that contains the trained neural network.
21 | 


--------------------------------------------------------------------------------
/Load_data_for_training_V2.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import numpy as np
 3 | import os
 4 | 
 5 | class Load_data_RNA(keras.utils.Sequence):
 6 | 
 7 |     """generate data in sequence mode for training the neural network"""
 8 | 
 9 |     def __init__(self, batch_size, N_batches, path, files_list, chunck_size, labels, batch_loading, max_seq_len):
10 | 
11 |         self.batch_size = batch_size
12 |         self.N_batches = N_batches
13 |         self.batch_loading = batch_loading
14 | 
15 |         self.X_train = np.zeros([self.batch_size,chunck_size,1])
16 |         self.X_train2 = np.zeros([self.batch_size,max_seq_len,4,1])
17 |         self.labels = np.zeros([self.batch_size,max_seq_len,labels])
18 | 
19 |         # The set of characters accepted in the transcription.
20 |         self.path = path
21 |         self.files_list = files_list
22 |         self.ind_rand = np.arange(0,self.N_batches,1)
23 |         np.random.shuffle(self.ind_rand)
24 |         np.random.shuffle(self.files_list)
25 | 
26 |     def __len__(self):
27 |         '''
28 |         Denotes the number of batches per epoch
29 |         '''
30 |         return int(self.N_batches)
31 |     
32 |     def __getitem__(self, index):
33 |  
34 |         selected_ind = self.ind_rand[index]
35 | 
36 |         const = self.batch_loading 
37 | 
38 |         for i in range(int(self.batch_size/const)):
39 | 
40 |             try:
41 |                 with np.load(self.path + "/" + self.files_list[int(self.batch_size/const)*selected_ind + i]) as data:
42 |                     
43 |         
44 |                     new_x_train = data["train_input"]
45 |                     new_x_train2 = data["train_input2"]
46 |                     y_train = data["train_output"]
47 | 
48 |             except:
49 | 
50 |                 with np.load(self.path + "/" + self.files_list[0]) as data:
51 |                             
52 |                     new_x_train = data["train_input"]
53 |                     new_x_train2 = data["train_input2"]
54 |                     y_train = data["train_output"]
55 | 
56 |             self.X_train[i*const:(i+1)*const,:,0] = new_x_train
57 |             self.X_train2[i*const:(i+1)*const,:,:,0] = new_x_train2
58 | 
59 |             self.labels[i*const:(i+1)*const,:,:] = y_train 
60 | 
61 |         X_total = {
62 |             "Input_1": self.X_train,
63 |             "Input_2": self.X_train2
64 |             }
65 |             
66 |         return X_total,  self.labels
67 | 
68 | 
69 |     def on_epoch_end(self):
70 |         """Shuffle indices after each epoch"""
71 |         np.random.shuffle(self.ind_rand)   
72 | 


--------------------------------------------------------------------------------
/data_curation_tutorial/tutorial_ModiDeC_analysis.md:
--------------------------------------------------------------------------------
 1 | # Tutorial Part 3: Data Analysis with ModiDeC 
 2 | 
 3 | ![GUI for retraining ModiDeC](https://github.com/mem3nto0/ModiDeC-RNA-modification-classifier/blob/main/data_curation_tutorial/Analysis_GUI.png)
 4 | 
 5 | The data analysis GUI allows us to visualize the ModiDeC analysis in a simple way.
 6 | 
 7 | ## Starting the analysis using ModiDeC neural network
 8 | 
 9 | from the above figure, it is possible to see that several buttons can be pressed to load data, neural network, and reference for the data analysis.
10 | ModiDeC reconstructs the analysis directly on the reference. The data has to be basecalled using dorado (with the --emit-move flag) and aligned
11 | using sametools.
12 | 
13 | This user interface allows the analysis of one reference at a time.
14 | 
15 | Here is a list of what each button does:
16 | 
17 |   1) "Pod5 file folder": load the folder containing the pod5 files that you want to analyze.
18 |   2) "bam file": load the bam file for the analysis of the transcript
19 |   3) "Neural network folder": load the folder where the model is stored.
20 |   4) "kmer-level table file": kmer level table that is gives from ONT.
21 |   5) "reference": load the reference for your single transcript.
22 | 
23 | After these steps, press the button "initialize the data". this can take a few seconds to load the model.
24 | When the initialization is finished, you can select the total amount of reads to analyze. for example, if you want to analyze the initial
25 | 1000 reads, put as start_index = 0 and end_index = 1000. if you want to analyze all the reads, put start_index = 0 and end_index = -1.
26 | The analysis of a lot of reads can take a lot of time. For a good statistical analysis, we suggest a value of 5000 for the first analysis.
27 | 
28 | Press the "start analysis with Neural Network" to let ModiDeC analyze your data. At the end of the analysis, a "ModiDeC_analysis.npz" file
29 | will be created in the current working folder. The file contains the analysis of ModiDeC, which shows the modification frequency for each
30 | modification that ModiDeC was trained on and for each nucleotide. 
31 | 
32 | The results can be also visualized using the GUI by pressing the "visualize results" button. A new window is open where the data can
33 | be visualized.
34 | 
35 | ![figure plot](https://github.com/mem3nto0/ModiDeC-RNA-modification-classifier/blob/main/data_curation_tutorial/analysis_plot1.png)
36 | 
37 | select the start and end reference points to visualize and press "plot". the window will change and the results of the selected reference
38 | region are shown.
39 | 
40 | ![figure plot 2](https://github.com/mem3nto0/ModiDeC-RNA-modification-classifier/blob/main/data_curation_tutorial/analysis_plot2.png)
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ModiDeC-RNA-modification-classifier
 2 | 
 3 | ![GUI for retraining ModiDeC](https://github.com/mem3nto0/ModiDeC-RNA-modification-classifier/blob/main/git_hub_modiDeC.png)
 4 | 
 5 | ModiDeC is a Personalized two input neural network that was designed to identify RNA modifications from direct RNA sequencing using
 6 | RNA002 or RNA004 Oxford Nanopore technology (ONT) kits. In detail, ModiDeC combines LSTM and a newly designed inception-res-net block for
 7 | the multi-classification process. In this GitHub repository, we offer the ModiDeC models and several user graphic interfaces to retrain 
 8 | from scratch the neural network to readapt ModiDeC to your specific problem.
 9 | 
10 | ## Requirements and Installation
11 | 
12 | ModiDeC uses simple libraries such as NumPy and TensorFlow. It also uses the pre-compiled library "ont-remora" from ONT.
13 | Here below is a list of the libraries used for ModiDeC creation:
14 | 
15 |      python == 3.10.14
16 |      TensorFlow == 2.15
17 |      pyqt5 == 5.15.11
18 |      matplotlib == 3.9.1
19 |      numpy == 1.26.4 
20 |      ont-remora == 3.2.0 
21 | 
22 | To install the Conda environment to run the ModiDeC GUI, we suggest running the two following command lines in the prompt
23 | of Linux or WSL.
24 | 
25 |      sudo apt install gcc -y
26 |      conda env create -f /path_to_ModiDeC_folder/remora_TF2_env.yml
27 | 
28 | These two command lines will install C and C++ interpreters (needed for Remora), and it will create a new conda environment called
29 | "Remora_TF2", which contains all the libraries necessary to run ModiDeC.
30 | 
31 | IMPORTANT: The ont-remora library is a Linux-based library, which means that ModiDeC can be used in the Linx system or Windows with WSL.
32 | 
33 | ## General information ModiDeC GUI
34 | 
35 | ModiDeC GUI is divided in three sub-interfaces (see figure below), which each of them has a specific design. The ModiDeC GUI can be used in several ways, from retraining the neural network to directly 
36 | analyzing an aligned sample using a pre-trained neural network. We decided to create the GUIs to give the opportunity to adapt and customize ModiDeC for specific problems.
37 | 
38 | the figure below shows a general overview of ModiDeC GUIs. The first thing that can be observed is that the GUIs are divided into ”ModiDeC data curation”, “ModiDeC training” and “ModiDeC analysis”. 
39 | 
40 | ![GUI for retraining ModiDeC](https://github.com/mem3nto0/ModiDeC-RNA-modification-classifier/blob/main/Gui_pipeline.png)
41 | 
42 | in the tutorial folder it is possible to find detailed tutorials for each of the three GUI.
43 | 
44 | ## Epi2Me pipeline link
45 | 
46 | We also implemented ModiDeC in Epi2Me. Epi2Me links repositary can be found here below.
47 | 
48 | https://github.com/Nanopore-Hackathon/wf-modidec_data-curation
49 | 
50 | https://github.com/Nanopore-Hackathon/wf-modidec_training
51 | 
52 | https://github.com/Nanopore-Hackathon/wf-modidec_analysis
53 | 
54 | ## Collaboration
55 | 
56 | This work is a collaboration partnership with the group of Prof. Dr. Susanne Gerber, Uni Medical Center, Mainz. https://csg.uni-mainz.de/group-member/susanne-gerber/
57 | 
58 | ## Credit and Licence
59 | 
60 | This code is provided by Dr. Nicolo Alagna and the Computational Systems Genetics Group of the University Medical Center of Mainz. © 2024 All rights reserved.
61 | 
62 | This code is licensed for non-commercial academic use only. See LICENSE file.
63 | 


--------------------------------------------------------------------------------
/ModiDec_NN.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from keras.models import Model
  3 | from keras.layers import Conv1D, MaxPooling1D , Add, Dropout , Dense , Conv2D
  4 | from keras.layers import Input, Activation , Concatenate, LSTM ,  BatchNormalization, Reshape
  5 | from keras.layers import Resizing , Masking, Multiply
  6 | 
  7 | 
  8 | 
  9 | def Conv1D_swish_bn(x, N_filters, kernel, strides):
 10 | 
 11 |     x = Conv1D(N_filters, kernel, strides=strides, padding="same")(x)
 12 |     x = BatchNormalization()(x)
 13 |     x = Activation("swish")(x)
 14 | 
 15 |     return x
 16 | 
 17 | 
 18 | def Conv2D_swish_bn(x, N_filters, kernel, strides):
 19 | 
 20 |     x = Conv2D(N_filters, kernel, strides=strides, padding="same")(x)
 21 |     x = BatchNormalization()(x)
 22 |     x = Activation("swish")(x)
 23 | 
 24 |     return x
 25 | 
 26 | 
 27 | def Inception_res_block(x, N_filters):
 28 | 
 29 |     short = Conv1D(N_filters, 1, strides = 1, padding="same")(x)
 30 |     short = BatchNormalization()(short)
 31 | 
 32 |     x_Inc_1 = Conv1D_swish_bn(x, int(0.2*N_filters), kernel= 1, strides= 1)
 33 |     x_Inc_2 = Conv1D_swish_bn(x_Inc_1, int(0.35*N_filters), kernel= 3, strides= 1)
 34 |     x_Inc_3 = Conv1D_swish_bn(x_Inc_2, int(0.5*N_filters), kernel= 5, strides= 1)
 35 | 
 36 |     x_conc = Concatenate(axis=-1)([x_Inc_1, x_Inc_2, x_Inc_3])
 37 |     x_conc = Conv1D(N_filters, 1, strides=1, padding="same")(x_conc)
 38 |     x_conc = BatchNormalization()(x_conc)
 39 | 
 40 |     out = Add()([short,x_conc])
 41 |     out = BatchNormalization()(out)
 42 |     out = Activation("swish")(out)
 43 | 
 44 |     return out
 45 | 
 46 | def Inception_res_block_2D(x, N_filters, kernel):
 47 | 
 48 |     short = Conv2D(N_filters, 1, strides = 1, padding="same")(x)
 49 |     short = BatchNormalization()(short)
 50 | 
 51 |     x_Inc_1 = Conv2D_swish_bn(x, int(0.2*N_filters), kernel= 1, strides= 1)
 52 |     x_Inc_2 = Conv2D_swish_bn(x_Inc_1, int(0.35*N_filters), kernel= kernel, strides= 1)
 53 |     x_Inc_3 = Conv2D_swish_bn(x_Inc_2, int(0.5*N_filters) + 1, kernel= kernel, strides= 1)
 54 | 
 55 |     x_conc = Concatenate(axis=-1)([x_Inc_1, x_Inc_2, x_Inc_3])
 56 | 
 57 |     # Final 1x1 Conv to reduce dimensions to N_filters
 58 |     x_conc = Conv2D(N_filters, 1, strides=1, padding="same")(x_conc)
 59 |     x_conc = BatchNormalization()(x_conc)
 60 | 
 61 |     out = Add()([short,x_conc])
 62 |     out = BatchNormalization()(out)
 63 |     out = Activation("swish")(out)
 64 | 
 65 |     return out
 66 | 
 67 | 
 68 | def ModiDeC_model(Inp_1, Inp_2, labels, kmer_model):
 69 | 
 70 |     input_layer1 = Input((Inp_1,1), name='Input_1')
 71 | 
 72 |     x1 = Inception_res_block(input_layer1,256)
 73 |     x1 = MaxPooling1D(2)(x1)
 74 |     x1 = tf.keras.layers.Bidirectional(LSTM(128, return_sequences=True))(x1)
 75 | 
 76 |     x1 = Inception_res_block(x1,512)
 77 |     x1 = MaxPooling1D(2)(x1)
 78 |     x1 = tf.keras.layers.Bidirectional(LSTM(256, return_sequences=True))(x1)
 79 | 
 80 |     x1 = Inception_res_block(x1,1024)
 81 |     x1 = MaxPooling1D(2)(x1)
 82 |     x1 = tf.keras.layers.Bidirectional(LSTM(512, return_sequences=True))(x1)
 83 | 
 84 |     x1 = Reshape((Inp_2, int(x1.shape[1]*1024/Inp_2)))(x1)
 85 | 
 86 |     input_layer2 = Input((Inp_2,4,1), name='Input_2')
 87 |     masked_input = Masking(mask_value=0.0)(input_layer2)
 88 | 
 89 |     x2 = Inception_res_block_2D(masked_input , 128, (kmer_model,4))
 90 |     x2 = Inception_res_block_2D(x2 , 256, (3,3))
 91 | 
 92 |     x2 = Reshape((x2.shape[-3], x2.shape[-2] * x2.shape[-1]))(x2)
 93 |     x2 = Inception_res_block(x2,512) # 512
 94 | 
 95 |     x_con = Concatenate(axis=-1)([x1,x2])
 96 | 
 97 |     x_con = Dense(1024)(x_con)
 98 |     x_con = BatchNormalization()(x_con)
 99 |     x_con = Activation("swish")(x_con)
100 | 
101 |     x_con = Dropout(0.2)(x_con)
102 | 
103 |     x_con = Dense(1024)(x_con)
104 |     x_con = BatchNormalization()(x_con)
105 |     x_con = Activation("swish")(x_con)
106 | 
107 |     x_con = Dropout(0.2)(x_con)
108 | 
109 |     x_LSTM = tf.keras.layers.Bidirectional(LSTM(256, return_sequences=True))(x_con)
110 | 
111 |     out_2 = Dense(labels, activation="sigmoid")(x_LSTM)
112 |     model = Model(inputs = [input_layer1, input_layer2] , outputs = [out_2])
113 | 
114 |     return model
115 |  


--------------------------------------------------------------------------------
/remora_TF2_env.yml:
--------------------------------------------------------------------------------
  1 | name: remora_TF2
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=conda_forge
  7 |   - _openmp_mutex=4.5=2_gnu
  8 |   - asttokens=2.4.1=pyhd8ed1ab_0
  9 |   - bzip2=1.0.8=h5eee18b_6
 10 |   - ca-certificates=2024.7.4=hbcca054_0
 11 |   - comm=0.2.2=pyhd8ed1ab_0
 12 |   - debugpy=1.6.7=py310h6a678d5_0
 13 |   - decorator=5.1.1=pyhd8ed1ab_0
 14 |   - exceptiongroup=1.2.2=pyhd8ed1ab_0
 15 |   - executing=2.0.1=pyhd8ed1ab_0
 16 |   - importlib-metadata=8.2.0=pyha770c72_0
 17 |   - importlib_metadata=8.2.0=hd8ed1ab_0
 18 |   - ipykernel=6.29.5=pyh3099207_0
 19 |   - ipython=8.26.0=pyh707e725_0
 20 |   - jedi=0.19.1=pyhd8ed1ab_0
 21 |   - jupyter_client=8.6.2=pyhd8ed1ab_0
 22 |   - jupyter_core=5.7.2=py310hff52083_0
 23 |   - ld_impl_linux-64=2.38=h1181459_1
 24 |   - libffi=3.4.4=h6a678d5_1
 25 |   - libgcc-ng=14.1.0=h77fa898_0
 26 |   - libgomp=14.1.0=h77fa898_0
 27 |   - libsodium=1.0.18=h36c2ea0_1
 28 |   - libstdcxx-ng=11.2.0=h1234567_1
 29 |   - libuuid=1.41.5=h5eee18b_0
 30 |   - matplotlib-inline=0.1.7=pyhd8ed1ab_0
 31 |   - ncurses=6.4=h6a678d5_0
 32 |   - nest-asyncio=1.6.0=pyhd8ed1ab_0
 33 |   - openssl=3.3.1=h4bc722e_2
 34 |   - packaging=24.1=pyhd8ed1ab_0
 35 |   - parso=0.8.4=pyhd8ed1ab_0
 36 |   - pexpect=4.9.0=pyhd8ed1ab_0
 37 |   - pickleshare=0.7.5=py_1003
 38 |   - pip=24.0=py310h06a4308_0
 39 |   - platformdirs=4.2.2=pyhd8ed1ab_0
 40 |   - prompt-toolkit=3.0.47=pyha770c72_0
 41 |   - psutil=6.0.0=py310hc51659f_0
 42 |   - ptyprocess=0.7.0=pyhd3deb0d_0
 43 |   - pure_eval=0.2.3=pyhd8ed1ab_0
 44 |   - pygments=2.18.0=pyhd8ed1ab_0
 45 |   - python=3.10.14=h955ad1f_1
 46 |   - python_abi=3.10=2_cp310
 47 |   - pyzmq=25.1.2=py310h6a678d5_0
 48 |   - readline=8.2=h5eee18b_0
 49 |   - setuptools=69.5.1=py310h06a4308_0
 50 |   - six=1.16.0=pyh6c4a22f_0
 51 |   - sqlite=3.45.3=h5eee18b_0
 52 |   - stack_data=0.6.2=pyhd8ed1ab_0
 53 |   - tk=8.6.14=h39e8969_0
 54 |   - tornado=6.4.1=py310hc51659f_0
 55 |   - traitlets=5.14.3=pyhd8ed1ab_0
 56 |   - typing_extensions=4.12.2=pyha770c72_0
 57 |   - wcwidth=0.2.13=pyhd8ed1ab_0
 58 |   - wheel=0.43.0=py310h06a4308_0
 59 |   - xz=5.4.6=h5eee18b_1
 60 |   - zeromq=4.3.5=h6a678d5_0
 61 |   - zipp=3.19.2=pyhd8ed1ab_0
 62 |   - zlib=1.2.13=h5eee18b_1
 63 |   - pip:
 64 |       - absl-py==2.1.0
 65 |       - astunparse==1.6.3
 66 |       - cachetools==5.4.0
 67 |       - certifi==2024.7.4
 68 |       - charset-normalizer==3.3.2
 69 |       - contourpy==1.2.1
 70 |       - cycler==0.12.1
 71 |       - filelock==3.15.4
 72 |       - flatbuffers==24.3.25
 73 |       - fonttools==4.53.1
 74 |       - fsspec==2024.6.1
 75 |       - gast==0.6.0
 76 |       - google-auth==2.32.0
 77 |       - google-auth-oauthlib==1.2.1
 78 |       - google-pasta==0.2.0
 79 |       - grpcio==1.65.1
 80 |       - h5py==3.11.0
 81 |       - idna==3.7
 82 |       - importlib-resources==6.4.0
 83 |       - iso8601==2.1.0
 84 |       - jinja2==3.1.4
 85 |       - joblib==1.4.2
 86 |       - keras==2.15.0
 87 |       - kiwisolver==1.4.5
 88 |       - lib-pod5==0.3.12
 89 |       - libclang==18.1.1
 90 |       - markdown==3.6
 91 |       - markdown-it-py==3.0.0
 92 |       - markupsafe==2.1.5
 93 |       - matplotlib==3.9.1
 94 |       - mdurl==0.1.2
 95 |       - mizani==0.9.3
 96 |       - ml-dtypes==0.2.0
 97 |       - more-itertools==10.3.0
 98 |       - mpmath==1.3.0
 99 |       - namex==0.0.8
100 |       - networkx==3.3
101 |       - numpy==1.26.4
102 |       - nvidia-cublas-cu12==12.1.3.1
103 |       - nvidia-cuda-cupti-cu12==12.1.105
104 |       - nvidia-cuda-nvcc-cu12==12.3.107
105 |       - nvidia-cuda-nvrtc-cu12==12.1.105
106 |       - nvidia-cuda-runtime-cu12==12.1.105
107 |       - nvidia-cudnn-cu12==8.9.2.26
108 |       - nvidia-cufft-cu12==11.0.2.54
109 |       - nvidia-curand-cu12==10.3.2.106
110 |       - nvidia-cusolver-cu12==11.4.5.107
111 |       - nvidia-cusparse-cu12==12.1.0.106
112 |       - nvidia-nccl-cu12==2.20.5
113 |       - nvidia-nvjitlink-cu12==12.3.101
114 |       - nvidia-nvtx-cu12==12.1.105
115 |       - oauthlib==3.2.2
116 |       - ont-remora==3.2.0
117 |       - opt-einsum==3.3.0
118 |       - optree==0.12.1
119 |       - pandas==2.2.2
120 |       - parasail==1.3.4
121 |       - patsy==0.5.6
122 |       - pillow==10.4.0
123 |       - plotnine==0.12.4
124 |       - pod5==0.3.12
125 |       - polars==0.20.31
126 |       - protobuf==4.25.3
127 |       - pyarrow==16.1.0
128 |       - pyasn1==0.6.0
129 |       - pyasn1-modules==0.4.0
130 |       - pyparsing==3.1.2
131 |       - pyqt5==5.15.11
132 |       - pyqt5-qt5==5.15.14
133 |       - pyqt5-sip==12.15.0
134 |       - pysam==0.22.1
135 |       - python-dateutil==2.9.0.post0
136 |       - pytz==2024.1
137 |       - requests==2.32.3
138 |       - requests-oauthlib==2.0.0
139 |       - rich==13.7.1
140 |       - rsa==4.9
141 |       - scikit-learn==1.5.1
142 |       - scipy==1.14.0
143 |       - statsmodels==0.14.2
144 |       - sympy==1.13.1
145 |       - tensorboard==2.15.2
146 |       - tensorboard-data-server==0.7.2
147 |       - tensorflow==2.15.0
148 |       - tensorflow-estimator==2.15.0
149 |       - tensorflow-io-gcs-filesystem==0.37.1
150 |       - termcolor==2.4.0
151 |       - thop==0.1.1-2209072238
152 |       - threadpoolctl==3.5.0
153 |       - toml==0.10.2
154 |       - torch==2.3.1
155 |       - tqdm==4.66.4
156 |       - triton==2.3.1
157 |       - tzdata==2024.1
158 |       - urllib3==2.2.2
159 |       - vbz-h5py-plugin==1.0.1
160 |       - werkzeug==3.0.3
161 |       - wrapt==1.14.1
162 | prefix: /home/nicolo/anaconda3/envs/remora_TF2
163 | 


--------------------------------------------------------------------------------
/data_curation_tutorial/tutorial_data_creation.md:
--------------------------------------------------------------------------------
 1 | # Tutorial Part 1: Data creation for training ModiDeC 
 2 | 
 3 | ![GUI for retraining ModiDeC](https://github.com/mem3nto0/ModiDeC-RNA-modification-classifier/blob/main/data_curation_tutorial/Figura_data_generation.png)
 4 | 
 5 | The "Data Curation" (or data creation) GUI was created to give the opportunity to the user to generate personalized training data for ModiDeC, which can be used
 6 | for further steps and retraining the neural network.
 7 | 
 8 | The figure shows three sections with several variables as inputs. In this Tutorial, we will explain the several steps to do to correctly generate your own
 9 | dataset for training ModiDeC for your specific problem.
10 | 
11 | Initially, we will give a description of the inputs that can be introduced in the GUI for data creation. In the second part of the file, an example will be
12 | provide to show what are the steps to do for creating the training data.
13 | 
14 | ## Important Steps for running the GUI
15 | 
16 | data has to be basecalled using Dorado and aligned using samtools:
17 | 
18 |   1) Basecall your data using Dorado with the --emit-move. It is necessary for resquiggleling process.
19 |   2) aligned using "samtools" to generate a .bam file
20 | 
21 | ## Select Input files and save-directory (Section 1)
22 | 
23 | In this section, the input files can be selected using the several Gui buttons.
24 | 
25 |   1) "Pod5 file folder" button: Select the folder where the pod5 files are stored. The folder must contain only pod5 files.
26 |   2) "bam file folder" button: Select the folder where the bam files are stored. The folder must contain only pod5 files.
27 |   3) "Save path" button: select the folder where the training data will be saved. Create a specific folder for it.
28 |   4) "kmer-level table file" button: select the k-mer level table for the 004 or 002 kit. These files are provided by ONT.
29 | 
30 | We use the bam folder instead of a single file selection because, in certain cases, multiple .bam files can be obtained by the same pod5 measurement.
31 | If this is the case, generating multiple .bam files the GUI automatically analyzes all the bam files without any data overwriting.
32 | 
33 | Example for multiple bam: if you used the first alignment flag during the alignment using samtools, use samtools to generate a single .bam file for each
34 | reference. then create a folder containing all the bam files created in this way. Use this folder for the GUI and all the bam files will be used for the data generation of training data.
35 | 
36 | ## General variable for training data (Section 2)
37 | 
38 | This second section of the GUI focuses on giving sequence information for the data sets the user wants to use for training. Information like "modification position" or "modified data"
39 | can be selected and let the users use their oligos for retraining the neural network. here below, a description of the input is provided:
40 | 
41 |   1) "modification_data?": it is a yes or no question. The user can specify if the data are modified or not. It is useful if the user wants to add un-modfied reads for the training.
42 |   2) "take_modification_region?": it is a yes or no question. The user can decide to use all the read for the analysis or use only the signal region around the
43 |      modification position that can be selected a few steps later. For example, it is useful for un-modified data for taking more k-mer for the analysis.
44 |   3) "name_save_file": specify the name of the file that will be saved. For each modification that you want to analyze or if the data are modified or not, give a new name.
45 |   4) "What type of modification?: it is a string linked also to the modification dictionary. For example, if you have in your dictionary two modifications (m6A and Gm), type Gm if you want
46 |      to create training data for Gm, or type m6A to create training data for m6A.
47 |   5) "Bases before modification": It can be a positive or negative integer. Choose the number of bases to consider before (positive values) or after (negative values) for the resquiggle. Use 0
48 |      if you want to take only a few bases around the modification position. This feature can be useful depending on the oligos design.
49 |   6) "Modification dictionary": come separated list of the total modifications that ModiDeC has to learn. For example, For Gm and m6A write in the box "Gm,m6A".
50 | 
51 | ## Segmentation variables for training data (Section 3)
52 | 
53 | This third section focuses on raw signal and neural network features that can be personalized by the user. A description of the input is provided:
54 | 
55 |   1) "batch size": it is the number of raw signal that we will be saved in a single file. This is helpful to reduce memory problems during the saving process. Recommended value 16.
56 |   2) "max seq- length": it is an integer linked to one of the inputs of the neural network. It is linked to the maximum number of bases to use for the input. A Good value is "chunk length" divided by 10.
57 |   3) "chunk length": it is an integer that tells you how much is bit the time window to extract from the raw signal. IT is linked to one of the inputs of the neural network.
58 |   4) "shift in time": indicates how many time points to move for creating a new representation of the modified raw signal. suggested value is "chunk length" divided "batch size".
59 |   5) "start read number" and "end read number": Integers to select the pod5 reads indexes to use for generating data.
60 | 
61 | After filling all the variables, press the button "Start resguigle" and .npz files will be generated in the save-folder.
62 | 
63 | ## Practical example training data generation: Create a training data set containing Gm and m6A modification
64 | 
65 | We want to give a practical example on how to fill the GUI for generating training data for ModiDeC. We have two oligos, one containing one Gm modification at the reference position 64 and another one
66 | containing m6A at the reference position 75. Additionally, we also have an un-modified oligo as well.
67 | 
68 | First step, basecall each of the three oligos pod5 files indipently Using Dorado with the --emit-move flag. this means that I will have a .ubam file for Gm, one for m6A and one for Un-mod. After it,
69 | Use sametools to align each basecalled data to its corresponding sequence to obtain three .bam files. in the end, we should have something like this:
70 | 
71 |   1) Gm_pod5_folder + Gm_bam_folder(containing "Gm_aligned.bam" file)
72 |   2) m6A_pod5_folder + m6A_bam_folder(containing "m6A_aligned.bam" file)
73 |   3) unmodified_pod5_folder + unmofied_bam_folder(containing "unmodifed_aligned.bam" file)
74 | 
75 | You want to save all of them in the same folder for the training, then create a folder called "training_data".
76 | 
77 | Now, for this case we want that ModiDeC analyzes closely the modified signal. Having this purpose, we can set the "chunck size" parameter to 400, which means that the "max seq. length" is 40.
78 | additionally, we want to save 16 raw signals per file, which means that "batch size" is 16 and consequently shift in time is 25 (400/16). Setting in mind these values, we can run the GUI and start
79 | fo fill the variable for analyzing first Gm, m6a, and unmodified data. in the figure below you can see how the GUI was filled with our goal with the three runs. The red squares
80 | show what was changed in the GUI between each run. In each run, press the button "start resguille" for creating the training data.
81 | 
82 | ![generating the data](https://github.com/mem3nto0/ModiDeC-RNA-modification-classifier/blob/main/data_curation_tutorial/data_creation_example.png)
83 | 
84 | In the figure it is possible to observe that "pod5 file folder" and "bam file folder" are also marked in red. This because for each run you have to load the corresponding pod5 and bam folder.
85 | The "training_data" folder that we created will be filled with .npz files containing the modification resquiglle signal for the training phase.
86 | 


--------------------------------------------------------------------------------
/Analyze_data_NN.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import pod5
  4 | from remora import io
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | def NN_analyzer(variables, pod5_dr, bam_fh, read_id, sig_map_refiner, model, reference, labels_mod = 4):
  8 | 
  9 |     chunck_size = variables[2]
 10 |     max_seq_len = variables[3]
 11 |     labels = 4
 12 |     N_miss = 0
 13 |     
 14 |     reference_track_mod = np.zeros([len(reference), labels_mod])     
 15 | 
 16 |     if variables[1] == -1:
 17 | 
 18 |         variables[1] = len(read_id)
 19 | 
 20 |     if len(read_id) > variables[1]:
 21 | 
 22 |         end_reads = variables[1]
 23 | 
 24 |     else:
 25 |         end_reads = len(read_id)
 26 | 
 27 |     if end_reads < variables[0]:
 28 | 
 29 |         if end_reads - np.abs(variables[0] - variables[1]) < 0:
 30 |             start_reads = 0
 31 |     
 32 |         else:
 33 |             start_reads = end_reads - np.abs(variables[0] - variables[1])
 34 | 
 35 |     else:
 36 |         start_reads = variables[0]
 37 | 
 38 |     #print(len(read_id))
 39 |     #print(start_reads, end_reads)
 40 | 
 41 |     for name_id in read_id[start_reads: end_reads]:
 42 | 
 43 |         pod5_read = pod5_dr.get_read(name_id)
 44 |         bam_read = bam_fh.get_first_alignment(name_id)
 45 | 
 46 |         seq_resquigle = ""
 47 |         position_adjusting = 0
 48 |         Error_read = False
 49 | 
 50 |         if bam_read.is_reverse: #correct the signal for forward direction
 51 |             flip = False
 52 |         else:
 53 |             flip = True
 54 | 
 55 |         try:
 56 |             #/// read data
 57 |             read_analysed = io.Read.from_pod5_and_alignment(pod5_read, bam_read, reverse_signal = flip)
 58 |             
 59 |             #/// If data were aligned with U, U in sequence will be replaced by the T. Important for resquiggle
 60 |             prob_ref = read_analysed.ref_seq
 61 |             prob_ref = prob_ref.replace("U", "T")
 62 |             read_analysed.ref_seq = prob_ref
 63 |             
 64 |             # // resquigle the data with the reference
 65 |             read_analysed.set_refine_signal_mapping(sig_map_refiner, ref_mapping=True)
 66 |             
 67 |             start_of_mapping = read_analysed.extract_ref_reg(
 68 |                 read_analysed.ref_reg.adjust(start_adjust = 0, end_adjust=read_analysed.ref_reg.len))
 69 | 
 70 |             Raw_signal = start_of_mapping.norm_signal
 71 |             seq_resquigle = start_of_mapping.seq
 72 |             start_end_resquigle = start_of_mapping.seq_to_sig_map
 73 | 
 74 |             # /// check if the modification position has to be adjusted ///
 75 |             position_adjusting = start_of_mapping.ref_reg.start
 76 |         
 77 |         except:
 78 |             position_adjusting = 0
 79 |             seq_resquigle = ""
 80 |             Error_read = True
 81 |         
 82 |         if Error_read == False:
 83 |             
 84 |             base_dict = {"A":1, "C":2, "G":3, "T":4}
 85 |             bases_onehot = np.zeros([len(Raw_signal),4 + 1])
 86 | 
 87 |             try:
 88 | 
 89 |                 for k in range(len(seq_resquigle)):
 90 | 
 91 |                     start_resq = start_end_resquigle[k]
 92 |                     bases_onehot[start_resq,base_dict[seq_resquigle[k]]] = 1
 93 | 
 94 | 
 95 |                 N_segments = int(len(Raw_signal)/chunck_size)
 96 |                 Input_1 = np.zeros([N_segments +1,chunck_size])            # initialize the first input of the NN
 97 |                 Input_2 = np.zeros([N_segments +1,max_seq_len,labels])     # initialize the second input of the NN
 98 | 
 99 |                 for k in range (N_segments):
100 | 
101 |                     start = k*chunck_size
102 |                     Input_1[k] = Raw_signal[start: start + chunck_size]
103 | 
104 |                     window_onehot = bases_onehot[start: start + chunck_size,:]
105 |                     probe = np.argmax(window_onehot, axis=-1)
106 |                     probe = probe[probe != 0]
107 |                     probe = probe -1
108 | 
109 |                     for kk in range(len(probe)):
110 | 
111 |                         Input_2[k, kk, probe[kk]] = 1
112 | 
113 |                 #find the number of point not overlapping
114 |                 not_overlaping_last_seg = len(Raw_signal) - (start + chunck_size)
115 | 
116 |                 # the extention to +1 is for keeping the full dimention of the output
117 |                 Input_1[N_segments] = Raw_signal[-chunck_size:]
118 | 
119 |                 Additional_window = bases_onehot[-chunck_size:,:]
120 |                 probe = np.argmax(Additional_window, axis = -1)
121 |                 probe = probe[probe != 0]
122 |                 probe = probe - 1
123 | 
124 |                 for kk in range (len(probe)):
125 | 
126 |                     Input_2[N_segments, kk, probe[kk]] = 1 
127 | 
128 |                 #probe the overlapping bases for the last segment
129 |                 Window_overlap = bases_onehot[-chunck_size:-not_overlaping_last_seg,:]
130 |                 seq_overlap = np.zeros([Window_overlap.shape[0],4])
131 |                 probe = np.argmax(Window_overlap, axis = -1)
132 |                 probe = probe[probe != 0]
133 |                 probe = probe - 1
134 | 
135 |                 for kk in range (len(probe)):
136 | 
137 |                     seq_overlap[kk, probe[kk]] = 1 
138 | 
139 |                 seq_overlap = np.sum(seq_overlap, axis = 1)
140 |                 seq_overlap = np.where(seq_overlap > 0.5)[0] 
141 |                 len_overlap = len(seq_overlap)
142 | 
143 |                 Input_1 = np.expand_dims(Input_1, axis=-1) 
144 |                 #Input_2 = np.expand_dims(Input_2, axis=-1) 
145 | 
146 |                 X_total ={"Input_1": Input_1, "Input_2": Input_2}
147 | 
148 |                 #analyze the read with the NN
149 | 
150 |                 prediction = model.predict(X_total, verbose=0) #  
151 | 
152 |                 # reconstruct the final output removing the null part of the predictions
153 |                 Final_seq_binary = []
154 | 
155 |                 for kk in range(N_segments): #
156 | 
157 |                     full_position = np.sum(prediction[kk], axis = 1)
158 |                     full_position = np.where(full_position> 0.5)[0]
159 | 
160 |                     real_part =  np.argmax(prediction[kk,:len(full_position)], axis=-1)
161 |                     Final_seq_binary = np.concatenate((Final_seq_binary,real_part), axis=0)
162 | 
163 |                 full_position = np.sum(prediction[N_segments], axis = 1)
164 |                 full_position = np.where(full_position> 0.5)[0]
165 | 
166 |                 real_part = np.argmax(prediction[N_segments,:len(full_position)], axis=-1)
167 |                 not_overlaping_part = real_part[len_overlap:]
168 |                 Final_seq_binary = np.concatenate((Final_seq_binary,not_overlaping_part), axis=0)
169 | 
170 |                 if (len(Final_seq_binary) - len(seq_resquigle)) != 0:
171 | 
172 |                     N_miss += 1
173 | 
174 |                 else:
175 | 
176 |                     where_mod = np.where(Final_seq_binary >= 1)[0]
177 |                     modific_detec = np.zeros(len(where_mod))
178 | 
179 |                     for j in range(len(where_mod)):
180 | 
181 |                         modific_detec[j] = Final_seq_binary[where_mod[j]]
182 |                 
183 |                     if len(modific_detec) > 1:
184 | 
185 |                         for n in range(len(modific_detec)):
186 | 
187 |                             mod_probe_position = where_mod[n]
188 |                             mod_probe_predicted = modific_detec[n]
189 | 
190 |                             reference_track_mod[int(mod_probe_position) + int(position_adjusting), int(mod_probe_predicted -1)] += 1
191 | 
192 |                     else:
193 | 
194 |                         mod_probe_position = where_mod[0]
195 |                         mod_probe_predicted = modific_detec[0]
196 | 
197 |                         reference_track_mod[int(mod_probe_position) + int(position_adjusting), int(mod_probe_predicted -1)] += 1   
198 | 
199 |             except:
200 | 
201 |                 None
202 |    
203 |    
204 |     print("analysis finished")
205 |     print("Total data to analyize:", np.abs(end_reads - start_reads))
206 |     print("data analyized:", np.abs(end_reads - start_reads) - N_miss)
207 | 
208 |     #return reference_track_mod   
209 | 
210 |     # /// calculate the modification frequency lust by the number or reads analyzed///
211 | 
212 |     return (reference_track_mod)/(np.abs(end_reads - start_reads - N_miss))
213 | 


--------------------------------------------------------------------------------
/Analyze_data_NN_V2.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import pod5
  4 | from remora import io
  5 | import matplotlib.pyplot as plt
  6 | from Coverage_check import Coverage_analysis
  7 | 
  8 | def NN_analyzer(variables, pod5_dr, bam_fh, read_id, sig_map_refiner, model, reference, labels_mod):
  9 | 
 10 |     chunck_size = variables[2]
 11 |     max_seq_len = variables[3]
 12 |     labels = 4
 13 |     N_miss = 0
 14 |     
 15 |     reference_track_mod = np.zeros([len(reference), labels_mod]) # matrix to track the modification     
 16 |     Track_coverage = np.zeros([len(reference)])
 17 | 
 18 |     if variables[1] == -1:
 19 | 
 20 |         variables[1] = len(read_id)
 21 | 
 22 |     if len(read_id) > variables[1]:
 23 | 
 24 |         end_reads = variables[1]
 25 | 
 26 |     else:
 27 |         end_reads = len(read_id)
 28 | 
 29 |     if end_reads < variables[0]:
 30 | 
 31 |         if end_reads - np.abs(variables[0] - variables[1]) < 0:
 32 |             start_reads = 0
 33 |     
 34 |         else:
 35 |             start_reads = end_reads - np.abs(variables[0] - variables[1])
 36 | 
 37 |     else:
 38 |         start_reads = variables[0]
 39 | 
 40 |     #print(len(read_id))
 41 |     #print(start_reads, end_reads)
 42 | 
 43 |     for name_id in read_id[start_reads: end_reads]:
 44 | 
 45 |         pod5_read = pod5_dr.get_read(name_id)
 46 |         bam_read = bam_fh.get_first_alignment(name_id)
 47 | 
 48 |         seq_resquigle = ""
 49 |         position_adjusting = 0
 50 |         Error_read = False
 51 | 
 52 |         if bam_read.is_reverse: #correct the signal for forward direction
 53 |             flip = False
 54 |         else:
 55 |             flip = True
 56 | 
 57 |         try:
 58 |             #/// read data
 59 |             read_analysed = io.Read.from_pod5_and_alignment(pod5_read, bam_read, reverse_signal = flip)
 60 | 
 61 |             #/// If data were aligned with U, U in sequence will be replaced by the T. Important for resquiggle
 62 |             prob_ref = read_analysed.ref_seq
 63 |             prob_ref = prob_ref.replace("U", "T")
 64 |             read_analysed.ref_seq = prob_ref
 65 | 
 66 |             # // resquigle the data with the refence
 67 |             read_analysed.set_refine_signal_mapping(sig_map_refiner, ref_mapping=True)
 68 |             
 69 |             start_of_mapping = read_analysed.extract_ref_reg(
 70 |                 read_analysed.ref_reg.adjust(start_adjust = 0, end_adjust=read_analysed.ref_reg.len))
 71 | 
 72 |             Raw_signal = start_of_mapping.norm_signal
 73 |             seq_resquigle = start_of_mapping.seq
 74 |             start_end_resquigle = start_of_mapping.seq_to_sig_map
 75 | 
 76 |             # /// check if the modification position has to be adjusted ///
 77 |             position_adjusting = start_of_mapping.ref_reg.start
 78 |             end_of_sequence = start_of_mapping.ref_reg.len
 79 | 
 80 |         except:
 81 |             position_adjusting = 0
 82 |             seq_resquigle = ""
 83 |             Error_read = True
 84 |         
 85 |         if Error_read == False:
 86 |             
 87 |             base_dict = {"A":1, "C":2, "G":3, "T":4}
 88 |             bases_onehot = np.zeros([len(Raw_signal),4 + 1])
 89 | 
 90 |             try:
 91 | 
 92 |                 for k in range(len(seq_resquigle)):
 93 | 
 94 |                     start_resq = start_end_resquigle[k]
 95 |                     bases_onehot[start_resq,base_dict[seq_resquigle[k]]] = 1
 96 | 
 97 | 
 98 |                 N_segments = int(len(Raw_signal)/chunck_size)
 99 |                 Input_1 = np.zeros([N_segments +1,chunck_size])            # initialize the first input of the NN
100 |                 Input_2 = np.zeros([N_segments +1,max_seq_len,labels])     # initialize the second input of the NN
101 | 
102 |                 for k in range (N_segments):
103 | 
104 |                     start = k*chunck_size
105 |                     Input_1[k] = Raw_signal[start: start + chunck_size]
106 | 
107 |                     window_onehot = bases_onehot[start: start + chunck_size,:]
108 |                     probe = np.argmax(window_onehot, axis=-1)
109 |                     probe = probe[probe != 0]
110 |                     probe = probe -1
111 | 
112 |                     for kk in range(len(probe)):
113 | 
114 |                         Input_2[k, kk, probe[kk]] = 1
115 | 
116 |                 #find the number of point not overlapping
117 |                 not_overlaping_last_seg = len(Raw_signal) - (start + chunck_size)
118 | 
119 |                 # the extention to +1 is for keeping the full dimention of the output
120 |                 Input_1[N_segments] = Raw_signal[-chunck_size:]
121 | 
122 |                 Additional_window = bases_onehot[-chunck_size:,:]
123 |                 probe = np.argmax(Additional_window, axis = -1)
124 |                 probe = probe[probe != 0]
125 |                 probe = probe - 1
126 | 
127 |                 for kk in range (len(probe)):
128 | 
129 |                     Input_2[N_segments, kk, probe[kk]] = 1 
130 | 
131 |                 #probe the overlapping bases for the last segment
132 |                 Window_overlap = bases_onehot[-chunck_size:-not_overlaping_last_seg,:]
133 |                 seq_overlap = np.zeros([Window_overlap.shape[0],4])
134 |                 probe = np.argmax(Window_overlap, axis = -1)
135 |                 probe = probe[probe != 0]
136 |                 probe = probe - 1
137 | 
138 |                 for kk in range (len(probe)):
139 | 
140 |                     seq_overlap[kk, probe[kk]] = 1 
141 | 
142 |                 seq_overlap = np.sum(seq_overlap, axis = 1)
143 |                 seq_overlap = np.where(seq_overlap > 0.5)[0] 
144 |                 len_overlap = len(seq_overlap)
145 | 
146 |                 Input_1 = np.expand_dims(Input_1, axis=-1) 
147 | 
148 |                 X_total ={"Input_1": Input_1, "Input_2": Input_2}
149 | 
150 |                 #analyze the read with the NN
151 | 
152 |                 prediction = model.predict(X_total, verbose=0) #  
153 | 
154 |                 # reconstruct the final output removing the null part of the predictions
155 |                 Final_seq_binary = []
156 | 
157 |                 for kk in range(N_segments): #
158 | 
159 |                     full_position = np.sum(Input_2[kk], axis = 1)
160 |                     full_position = np.where(full_position> 0.5)[0]
161 | 
162 |                     real_part =  np.argmax(prediction[kk,:len(full_position)], axis=-1)
163 |                     Final_seq_binary = np.concatenate((Final_seq_binary,real_part), axis=0)
164 | 
165 |                 full_position = np.sum(Input_2[N_segments], axis = 1)
166 |                 full_position = np.where(full_position> 0.5)[0]
167 | 
168 |                 real_part = np.argmax(prediction[N_segments,:len(full_position)], axis=-1)
169 |                 not_overlaping_part = real_part[len_overlap:]
170 |                 Final_seq_binary = np.concatenate((Final_seq_binary,not_overlaping_part), axis=0)
171 | 
172 |                 if (len(Final_seq_binary) - len(seq_resquigle)) != 0:
173 | 
174 |                     N_miss += 1
175 | 
176 |                 else:
177 | 
178 |                     where_mod = np.where(Final_seq_binary >= 1)[0]
179 |                     modific_detec = np.zeros(len(where_mod))
180 | 
181 |                     for j in range(len(where_mod)):
182 | 
183 |                         modific_detec[j] = Final_seq_binary[where_mod[j]]
184 |                 
185 |                     if len(modific_detec) > 1:
186 | 
187 |                         for n in range(len(modific_detec)):
188 | 
189 |                             mod_probe_position = where_mod[n]
190 |                             mod_probe_predicted = modific_detec[n]
191 | 
192 |                             reference_track_mod[int(mod_probe_position) + int(position_adjusting), int(mod_probe_predicted -1)] += 1
193 | 
194 |                     else:
195 | 
196 |                         mod_probe_position = where_mod[0]
197 |                         mod_probe_predicted = modific_detec[0]
198 | 
199 |                         reference_track_mod[int(mod_probe_position) + int(position_adjusting), int(mod_probe_predicted -1)] += 1   
200 | 
201 |                     Track_coverage[ int(position_adjusting): int(position_adjusting) +  int(end_of_sequence)] += 1
202 | 
203 |             except:
204 | 
205 |                 None
206 |    
207 |     
208 |     # //////// caculate the weight for each base calculating the distance from avarage covarage ////////
209 |     # the median seems to work bad in few cases. need to check where is the error.
210 | 
211 |     N_tot_analyzed = np.abs(end_reads - start_reads - N_miss)
212 |     Final_results = Coverage_analysis(N_tot_analyzed, reference_track_mod, Track_coverage, threshold = 0.2)
213 | 
214 |     print("analysis finished")
215 |     print("Total data to analyize:", np.abs(end_reads - start_reads))
216 |     print("data analyized:", N_tot_analyzed)
217 | 
218 |     # /// calculate the modification frequency lust by the number or reads analyzed///
219 | 
220 |     return Final_results


--------------------------------------------------------------------------------
/Resquigle_remora_GUI.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | import sys
  3 | from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QFileDialog, QVBoxLayout, QWidget, QLineEdit, QLabel, QHBoxLayout, QCheckBox
  4 | from Remora_resquigle_generate_data import Remora_resquigle_Generation_data
  5 | import json
  6 | import os
  7 | import numpy as np
  8 | 
  9 | class MainWindow(QMainWindow):
 10 |     def __init__(self):
 11 |         super().__init__()
 12 | 
 13 |         # list of variables
 14 |         self.paths = {"folder1": None, "folder2": None, "folder3": None, "folder4": None, "folder5": None}
 15 | 
 16 |         # Set up the main window
 17 |         self.setWindowTitle('Remora Resquigle - Generata training data for NN')
 18 |         self.setGeometry(100, 100, 320, 100)
 19 | 
 20 |         # Create a QWidget and set it as the central widget
 21 |         self.central_widget = QWidget()
 22 |         self.setCentralWidget(self.central_widget)
 23 | 
 24 |         # Create a vertical layout
 25 |         layout = QVBoxLayout()
 26 | 
 27 |         # Create buttons and add them to the layout
 28 |         self.button1 = QPushButton('Pod5 file folder')
 29 |         self.button1.clicked.connect(lambda: self.open_directory_dialog('folder1'))
 30 |         layout.addWidget(self.button1)
 31 | 
 32 |         self.button2 = QPushButton('bam file folder')
 33 |         self.button2.clicked.connect(lambda: self.open_directory_dialog('folder2'))
 34 |         layout.addWidget(self.button2)
 35 | 
 36 |         self.button3 = QPushButton('Save path')
 37 |         self.button3.clicked.connect(lambda: self.open_directory_dialog('folder4'))
 38 |         layout.addWidget(self.button3)
 39 | 
 40 |         self.button3 = QPushButton('kmer-level table file')
 41 |         self.button3.clicked.connect(lambda: self.open_filename_dialog('folder5'))
 42 |         layout.addWidget(self.button3)
 43 | 
 44 |         # set the first set of variables
 45 |         textbox1 = QLabel("General variables for training data:")
 46 |         layout.addWidget(textbox1)
 47 |         self.setup_variables(layout)
 48 | 
 49 |         # set the second set of variables
 50 |         textbox1 = QLabel("segmentation variables for training data:")
 51 |         layout.addWidget(textbox1)
 52 |         self.setup_variables_segmentation(layout)
 53 | 
 54 |         # Create buttons and add them to the layout
 55 |         self.button4 = QPushButton('Start resquigle')
 56 |         self.button4.clicked.connect(lambda: self.start_resquigle())
 57 |         layout.addWidget(self.button4)
 58 | 
 59 | 
 60 |         # Set the layout on the central widget
 61 |         self.central_widget.setLayout(layout)
 62 | 
 63 | 
 64 |     """ list of function used in the main"""
 65 | 
 66 |     def open_directory_dialog(self, folder_name):
 67 |         # Open a dialog to choose a directory
 68 |         directory = QFileDialog.getExistingDirectory(self, f"Select {folder_name}")
 69 |         if directory:
 70 |             self.paths[folder_name] = directory
 71 |             print(f"Selected path for {folder_name}: {directory}")
 72 | 
 73 | 
 74 |     def open_filename_dialog(self, file_type):
 75 |         # Open a dialog to choose a file
 76 |         options = QFileDialog.Options()
 77 |         options |= QFileDialog.ReadOnly
 78 | 
 79 |         file_name, _ = QFileDialog.getOpenFileName(self, f"Select {file_type}", "", "All Files (*);;FASTA Files (*.fasta)", options=options)        
 80 |         if file_name:
 81 |             self.paths[file_type] = file_name
 82 |             print(f"Selected path for {file_type}: {file_name}")
 83 | 
 84 |     def setup_variables(self, layout):
 85 |         # Creating layout and widgets for each variable in Variables tuple # "mod_mapping or basecalling?", 
 86 |         labels = ["modified_data? (bool)", 
 87 |                   "take_modifed_region? (bool)", "name_save_file (str)", 
 88 |                   "what type of modification? (str)", 
 89 |                   "modification pos. (int)", "Bases before modfication (int)", "modification dictionary (str)"]
 90 |         
 91 |         self.vars_entries = ["mod_mapping"] # "mod_mapping"
 92 |         for i, label in enumerate(labels):
 93 |             row_layout = QHBoxLayout()
 94 |             label_widget = QLabel(label + ":")
 95 |             input_widget = QLineEdit()
 96 |             row_layout.addWidget(label_widget)
 97 |             row_layout.addWidget(input_widget)
 98 |             layout.addLayout(row_layout)
 99 |             self.vars_entries.append(input_widget)
100 | 
101 | 
102 |     def setup_variables_segmentation(self, layout):
103 |         labels_segmentation = ["batch size (int)", "max seq. length (int)", 
104 |                                "chunk length (int)", "shift in time (int)", 
105 |                                "start read number (int)", "end read number (int)"]
106 |         
107 |         self.segmentation_entries = []
108 |         for label in labels_segmentation:
109 |             row_layout = QHBoxLayout()
110 |             label_widget = QLabel(label + ":")
111 |             input_widget = QLineEdit()
112 |             row_layout.addWidget(label_widget)
113 |             row_layout.addWidget(input_widget)
114 |             layout.addLayout(row_layout)
115 |             self.segmentation_entries.append(input_widget)
116 | 
117 |     """ """
118 |     def start_resquigle(self):
119 | 
120 |         #level_table_folder = self.paths["folder5"]
121 |         #level_table_list = os.listdir(level_table_folder) #maybe to change to read the file and not the folder                
122 |         #level_table_file = level_table_folder + "/" + level_table_list[0]
123 | 
124 |         level_table_file = self.paths["folder5"]
125 | 
126 |         save_path = self.paths["folder4"]
127 | 
128 |         pod5_folder = self.paths["folder1"]
129 |         bam_folder = self.paths["folder2"]
130 |         bam_list = os.listdir(bam_folder)
131 | 
132 |         var1_bool = []
133 |         var2_bool = []
134 | 
135 |         if self.vars_entries[1].text() == "yes" or self.vars_entries[1].text() == "Yes":
136 |             
137 |             var1_bool = True
138 | 
139 |         else:
140 | 
141 |             var1_bool = False
142 | 
143 | 
144 |         if self.vars_entries[2].text() == "yes" or self.vars_entries[2].text() == "Yes":
145 |             
146 |             var2_bool = True
147 | 
148 |         else:
149 | 
150 |             var2_bool = False
151 | 
152 | 
153 |         Variables = (self.vars_entries[0], #.text()
154 |                      var1_bool, #bool, 
155 |                      var2_bool, #bool, 
156 |                      self.vars_entries[3].text(), 
157 |                      self.vars_entries[4].text(),
158 |                      int( self.vars_entries[5].text()), 
159 |                      int( self.vars_entries[6].text())
160 |                      )
161 | 
162 |         variables_segmentation = (int( self.segmentation_entries[0].text()), 
163 |                                   int( self.segmentation_entries[1].text()), 
164 |                                   int( self.segmentation_entries[2].text()), 
165 |                                   int( self.segmentation_entries[3].text())
166 |                                   )
167 | 
168 |         Indexes = (int( self.segmentation_entries[4].text()),
169 |                     int( self.segmentation_entries[5].text()))
170 | 
171 |         # /// create a dictionary ///
172 | 
173 |         probe_names = self.vars_entries[7].text()
174 |         probe_names = probe_names.split(',')
175 | 
176 |         values = np.arange(2, len(probe_names) + 2, 1)
177 | 
178 |         # Convert the list into a dictionary with default values
179 |         mod_dictionary = {probe_names[i]: values[i] for i in range(len(probe_names))}
180 | 
181 | 
182 |         print(mod_dictionary)
183 |         print(Variables)
184 |         print(variables_segmentation)
185 |         print(Indexes)
186 |          
187 |         for i in range (len(bam_list)):
188 | 
189 |             bam_file = bam_folder + "/" +  bam_list[i]
190 |             
191 |             Remora_resquigle_Generation_data(pod5_folder, bam_file, 
192 |                                              level_table_file, save_path,
193 |                                              Variables, variables_segmentation, 
194 |                                              Indexes, mod_dictionary, i)
195 | 
196 |             """
197 |             # /// save each bam file in a different generated folder
198 |             bam_file = bam_folder + "/" +  bam_list[i]
199 |             
200 |             #this creates several folder and save inside the data
201 |             Directory = self.vars_entries[3].text() + f"reference_{i}"
202 |             Final_path = os.path.join(save_path, Directory) 
203 |             os.mkdir(Final_path)
204 | 
205 |             Remora_resquigle_Generation_data(pod5_folder, bam_file, 
206 |                                              level_table_file, Final_path, #save_path
207 |                                              Variables, variables_segmentation, 
208 |                                              Indexes, mod_dictionary, i)            
209 |             """
210 |             
211 |         print("Resquigle finished")                     
212 | 
213 | def main():
214 |     app = QApplication(sys.argv)
215 |     window = MainWindow()
216 |     window.show()
217 |     sys.exit(app.exec_())
218 | 
219 | if __name__ == '__main__':
220 |     main()


--------------------------------------------------------------------------------
/Training_NN_GUI.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QFileDialog, QVBoxLayout, QWidget, QLineEdit, QLabel, QHBoxLayout, QCheckBox
  3 | import tensorflow as tf
  4 | from keras.callbacks import LearningRateScheduler
  5 | from Load_data_for_training_V2 import  Load_data_RNA
  6 | from ModiDec_NN import ModiDeC_model
  7 | import os
  8 | import numpy as np
  9 | 
 10 | class MainWindow(QMainWindow):
 11 |     def __init__(self):
 12 |         super().__init__()
 13 | 
 14 |         # list of variables
 15 |         self.paths = {"folder1": None, "folder2": None , "folder3": None}
 16 | 
 17 |         # Set up the main window
 18 |         self.setWindowTitle("Training Nueral network - modification classifier")
 19 |         self.setGeometry(100, 100, 320, 100)
 20 | 
 21 |         # Create a QWidget and set it as the central widget
 22 |         self.central_widget = QWidget()
 23 |         self.setCentralWidget(self.central_widget)
 24 | 
 25 |         # Create a vertical layout
 26 |         layout = QVBoxLayout()
 27 | 
 28 |         # Create buttons and add them to the layout
 29 |         self.button1 = QPushButton('training data folder')
 30 |         self.button1.clicked.connect(lambda: self.open_directory_dialog('folder1'))
 31 |         layout.addWidget(self.button1)
 32 | 
 33 |         # Create buttons and add them to the layout
 34 |         self.button2 = QPushButton('Validation data folder')
 35 |         self.button2.clicked.connect(lambda: self.open_directory_dialog('folder2'))
 36 |         layout.addWidget(self.button2)
 37 | 
 38 |         self.button3 = QPushButton('save model folder')
 39 |         self.button3.clicked.connect(lambda: self.open_directory_dialog('folder3'))
 40 |         layout.addWidget(self.button3)
 41 | 
 42 |         # set the first set of variables
 43 |         textbox1 = QLabel("General variables for training data:")
 44 |         layout.addWidget(textbox1)
 45 |         self.setup_variables(layout)
 46 | 
 47 |         # Create buttons and add them to the layout
 48 |         self.button4 = QPushButton('Start training')
 49 |         self.button4.clicked.connect(lambda: self.start_training())
 50 |         layout.addWidget(self.button4)
 51 | 
 52 |         # Set the layout on the central widget
 53 |         self.central_widget.setLayout(layout)
 54 | 
 55 | 
 56 |     """ list of function used in the main"""
 57 | 
 58 |     def open_directory_dialog(self, folder_name):
 59 |         # Open a dialog to choose a directory
 60 |         directory = QFileDialog.getExistingDirectory(self, f"Select {folder_name}")
 61 |         if directory:
 62 |             self.paths[folder_name] = directory
 63 |             print(f"Selected path for {folder_name}: {directory}")
 64 | 
 65 |     """
 66 |     labels = ["chunck_size (int)", "batch_size (int)", 
 67 |                 "single_data_size (int)", "max seq. length (int)", "k-mer model (int)",
 68 |                 "labels (int)", "epoches (suggeste 4) (int)", "name NN (str)" ]
 69 |     """
 70 | 
 71 |     def setup_variables(self, layout):
 72 |         # Creating layout and widgets for each variable in Variables tuple
 73 |         labels = ["batch_size (int)", "k-mer model (int)", "epoches (suggeste 4) (int)", "name NN (str)", "validation during training? (bool)" ]
 74 |         
 75 |         self.vars_entries = []
 76 |         for i, label in enumerate(labels):
 77 |             row_layout = QHBoxLayout()
 78 |             label_widget = QLabel(label + ":")
 79 |             input_widget = QLineEdit()
 80 |             row_layout.addWidget(label_widget)
 81 |             row_layout.addWidget(input_widget)
 82 |             layout.addLayout(row_layout)
 83 |             self.vars_entries.append(input_widget)
 84 | 
 85 |     def start_training(self):
 86 | 
 87 |         """load the variables"""
 88 | 
 89 |         path_data = self.paths["folder1"]
 90 |         data_list = os.listdir(path_data)
 91 | 
 92 | 
 93 |         path_eval = self.paths["folder2"]
 94 |         eval_list = os.listdir(path_data)
 95 | 
 96 |         var1_bool = []
 97 | 
 98 |         # //// extract variable for training from data training datasets ///
 99 | 
100 |         probe_data = np.load(path_data + "/" + data_list[0])
101 | 
102 |         probe_x1_data = probe_data["train_input"]
103 |         probe_y_data = probe_data["train_output"]
104 | 
105 |         chunck_size = int(probe_x1_data.shape[1])
106 |         single_data_size = int(probe_x1_data.shape[0])
107 |         labels = int(probe_y_data.shape[2])
108 |         max_seq_len = int(probe_y_data.shape[1])
109 | 
110 |         batch_size = int(self.vars_entries[0].text())
111 |         k_mer = int( self.vars_entries[1].text())
112 |         N_epoch = int( self.vars_entries[2].text())
113 | 
114 |         "validation during training? (bool)" 
115 | 
116 |         if self.vars_entries[4].text() == "yes" or self.vars_entries[4].text() == "Yes":
117 |             
118 |             var1_bool = True
119 | 
120 |         else:
121 | 
122 |             var1_bool = False
123 | 
124 | 
125 |         """ /////define the model /////"""
126 | 
127 |         model = ModiDeC_model(Inp_1 = chunck_size, Inp_2 = max_seq_len, labels = labels, kmer_model=k_mer)
128 | 
129 |         """ /////compile the model for the training ///"""
130 | 
131 |         opt_adam =tf.keras.optimizers.Adam(learning_rate= 0.0001)
132 | 
133 |         model.compile(optimizer=opt_adam, 
134 |                 loss= tf.losses.binary_crossentropy, 
135 |                 metrics=["accuracy"])
136 | 
137 |         def lr_schedule(epoch, optimizer):
138 | 
139 |             min_lr = 0.0000125  # Set the minimum learning rate
140 | 
141 |             # Update the learning rate if needed (similar to your original code)       
142 |             if epoch % 2 == 0 and epoch > 0:
143 | 
144 |                 new_lr = tf.keras.backend.get_value(model.optimizer.lr) * 0.5  # You can adjust the decay factor as needed
145 |                 model.optimizer.lr.assign(new_lr)
146 |                 return max(new_lr, min_lr)
147 |             
148 |             else:
149 |                 return tf.keras.backend.get_value(model.optimizer.lr)
150 |     
151 |         lr_scheduler = LearningRateScheduler(lambda epoch: lr_schedule(epoch, optimizer=opt_adam))
152 | 
153 |         if var1_bool == False: 
154 | 
155 |             N_batches = int(len(data_list)/(batch_size/single_data_size))
156 | 
157 |             """loading function used for training"""
158 | 
159 |             training_generator =  Load_data_RNA(batch_size, N_batches,
160 |                                                 path_data, 
161 |                                                 data_list, 
162 |                                                 chunck_size = chunck_size, 
163 |                                                 labels= labels , 
164 |                                                 batch_loading = single_data_size,
165 |                                                 max_seq_len= max_seq_len)
166 | 
167 |             """start the training"""
168 | 
169 |             model.fit(training_generator, 
170 |                         shuffle = True, 
171 |                         epochs=N_epoch, 
172 |                         workers= 6, 
173 |                         max_queue_size=128,
174 |                         callbacks= [lr_scheduler]) 
175 | 
176 |             """save the model"""
177 | 
178 |             model.save( self.paths["folder3"] + "/" + self.vars_entries[3].text())
179 | 
180 |             print("training complete")
181 | 
182 |         else: 
183 | 
184 |             N_batches = int(len(data_list)/(batch_size/single_data_size))
185 |             N_batches_2 = int(len(eval_list)/(batch_size/single_data_size))
186 | 
187 |             """loading function used for training"""
188 | 
189 |             training_generator =  Load_data_RNA(batch_size, N_batches,
190 |                                                 path_data, 
191 |                                                 data_list, 
192 |                                                 chunck_size = chunck_size, 
193 |                                                 labels= labels , 
194 |                                                 batch_loading = single_data_size,
195 |                                                 max_seq_len= max_seq_len)
196 | 
197 |             validation_generator =  Load_data_RNA(batch_size, N_batches_2,
198 |                                                 path_eval, 
199 |                                                 eval_list, 
200 |                                                 chunck_size = chunck_size, 
201 |                                                 labels= labels, 
202 |                                                 batch_loading = single_data_size,
203 |                                                 max_seq_len= max_seq_len)
204 | 
205 |             """start the training"""
206 | 
207 |             model.fit(training_generator, 
208 |                         validation_data = validation_generator,
209 |                         shuffle = True, 
210 |                         epochs=N_epoch, 
211 |                         workers= 6, 
212 |                         max_queue_size=256,
213 |                         callbacks= [lr_scheduler]) 
214 | 
215 |             """save the model"""
216 | 
217 |             model.save( self.paths["folder3"] + "/" + self.vars_entries[3].text())
218 | 
219 |             print("training complete")
220 | 
221 | 
222 | def main():
223 |     app = QApplication(sys.argv)
224 |     window = MainWindow()
225 |     window.show()
226 |     sys.exit(app.exec_())
227 | 
228 | if __name__ == '__main__':
229 |     main()
230 | 


--------------------------------------------------------------------------------
/Analysis_platform_GUI.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import sys
  3 | from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QVBoxLayout, QWidget, QLabel, QSlider, QLineEdit, QHBoxLayout , QFileDialog
  4 | from PyQt5.QtCore import Qt
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
  8 | import pod5
  9 | from remora import io , refine_signal_map, util
 10 | import tensorflow as tf
 11 | from Analyze_data_NN_V2 import NN_analyzer
 12 | 
 13 | 
 14 | """ to generate a window into a window two main class has to be defined"""
 15 | """ second window generator"""
 16 | class RNA_analysis_platform(QWidget):
 17 |     def __init__(self, Analysis_NN):
 18 |         super().__init__()
 19 |         
 20 |         self.Analysis_NN  = Analysis_NN
 21 |         self.initUI()
 22 | 
 23 |     def initUI(self):
 24 |         self.layout = QVBoxLayout()
 25 | 
 26 |         self.start_label = QLabel('Start Point:')
 27 |         self.end_label = QLabel('End Point:')
 28 |         self.start_input = QLineEdit(self)
 29 |         self.end_input = QLineEdit(self)
 30 |         self.start_slider = QSlider(Qt.Horizontal, self)
 31 |         self.end_slider = QSlider(Qt.Horizontal, self)
 32 |         
 33 |         self.start_input.setText('0')
 34 |         self.end_input.setText('10')
 35 |         self.start_slider.setMinimum(0)
 36 |         self.start_slider.setMaximum(self.Analysis_NN.shape[0])
 37 |         self.start_slider.setValue(0)
 38 |         self.end_slider.setMinimum(0)
 39 |         self.end_slider.setMaximum(self.Analysis_NN.shape[0])
 40 |         self.end_slider.setValue(10)
 41 | 
 42 |         self.start_input.textChanged.connect(self.update_start_slider)
 43 |         self.end_input.textChanged.connect(self.update_end_slider)
 44 |         self.start_slider.valueChanged.connect(self.update_start_input)
 45 |         self.end_slider.valueChanged.connect(self.update_end_input)
 46 | 
 47 |         self.hbox1 = QHBoxLayout()
 48 |         self.hbox1.addWidget(self.start_label)
 49 |         self.hbox1.addWidget(self.start_input)
 50 | 
 51 |         self.hbox2 = QHBoxLayout()
 52 |         self.hbox2.addWidget(self.end_label)
 53 |         self.hbox2.addWidget(self.end_input)
 54 | 
 55 |         self.layout.addLayout(self.hbox1)
 56 |         self.layout.addWidget(self.start_slider)
 57 |         self.layout.addLayout(self.hbox2)
 58 |         self.layout.addWidget(self.end_slider)
 59 | 
 60 |         self.plot_button = QPushButton('Plot', self)
 61 |         self.plot_button.clicked.connect(self.plot)
 62 | 
 63 |         self.layout.addWidget(self.plot_button)
 64 | 
 65 |         self.figure, self.ax = plt.subplots()
 66 |         self.canvas = FigureCanvas(self.figure)
 67 |         self.layout.addWidget(self.canvas)
 68 | 
 69 |         self.setLayout(self.layout)
 70 | 
 71 |     def update_start_slider(self, text):
 72 |         try:
 73 |             value = int(text)
 74 |             self.start_slider.setValue(value)
 75 |         except ValueError:
 76 |             pass
 77 | 
 78 |     def update_end_slider(self, text):
 79 |         try:
 80 |             value = int(text)
 81 |             self.end_slider.setValue(value)
 82 |         except ValueError:
 83 |             pass
 84 | 
 85 |     def update_start_input(self, value):
 86 |         self.start_input.setText(str(value))
 87 | 
 88 |     def update_end_input(self, value):
 89 |         self.end_input.setText(str(value))
 90 | 
 91 |     def plot(self):
 92 |         start = int(self.start_input.text())
 93 |         end = int(self.end_input.text())
 94 | 
 95 |         x_axis = np.arange(0,self.Analysis_NN.shape[0],1)
 96 | 
 97 |         self.ax.clear()
 98 | 
 99 |         """
100 |         self.ax.plot(x_axis,self.Analysis_NN[:,0], marker= "o" , label="Gm")
101 |         self.ax.plot(x_axis,self.Analysis_NN[:,1], marker= "o"  , label="$m^6A$")
102 |         self.ax.plot(x_axis,self.Analysis_NN[:,2], marker= "o"  , label="Ino")
103 |         self.ax.plot(x_axis,self.Analysis_NN[:,3], marker= "o"  , label="Psi")
104 |         """
105 |         self.ax.plot(x_axis,self.Analysis_NN, marker= "o" , label="label")
106 | 
107 |         self.ax.set_xlim(start,end)
108 |         self.ax.set_ylim(-0.05,1)
109 |         #ax2.set_xlim(0,len(reference)) #len(reference)
110 |         self.ax.set_xlabel("ref. seq. position")
111 |         self.ax.set_ylabel("freq. modif.")
112 |         self.ax.legend()
113 |         self.canvas.draw()
114 | 
115 | 
116 | """ first window generator"""
117 | 
118 | class MainWindow(QMainWindow):
119 |     def __init__(self):
120 |         super().__init__()
121 | 
122 |         # list of variables
123 |         self.paths = {"folder1": None, "folder2": None, "folder3": None, "folder4": None, "folder5": None}
124 | 
125 |         # Set up the main window
126 |         self.setWindowTitle('Analysis data Neural network')
127 |         self.setGeometry(100, 100, 320, 100)
128 | 
129 |         # Create a QWidget and set it as the central widget
130 |         self.central_widget = QWidget()
131 |         self.setCentralWidget(self.central_widget)
132 | 
133 |         # Create a vertical layout
134 |         layout = QVBoxLayout()
135 | 
136 |         # Create buttons and add them to the layout
137 |         self.button1 = QPushButton('Pod5 file folder')
138 |         self.button1.clicked.connect(lambda: self.open_directory_dialog('folder1'))
139 |         layout.addWidget(self.button1)
140 | 
141 |         self.button2 = QPushButton('bam file')
142 |         self.button2.clicked.connect(lambda: self.open_filename_dialog('folder2'))
143 |         layout.addWidget(self.button2)
144 | 
145 |         self.button3 = QPushButton('Neural Network folder')
146 |         self.button3.clicked.connect(lambda: self.open_directory_dialog('folder3'))
147 |         layout.addWidget(self.button3)
148 | 
149 |         self.button4 = QPushButton('kmer-level table file')
150 |         self.button4.clicked.connect(lambda: self.open_filename_dialog('folder4'))
151 |         layout.addWidget(self.button4)
152 | 
153 |         self.button4 = QPushButton('reference')
154 |         self.button4.clicked.connect(lambda: self.open_filename_dialog('folder5'))
155 |         layout.addWidget(self.button4)
156 | 
157 |         self.button4 = QPushButton('Initialize the data')
158 |         self.button4.clicked.connect(lambda: self.Initialize_Analysis())
159 |         layout.addWidget(self.button4)
160 | 
161 |         # set the first set of variables
162 |         textbox1 = QLabel("General variables for the analysis:")
163 |         layout.addWidget(textbox1)
164 |         self.setup_variables(layout)
165 | 
166 |         self.button4 = QPushButton('start analysis with Neural network')
167 |         self.button4.clicked.connect(lambda: self.Analysis_Neural_network())
168 |         layout.addWidget(self.button4)
169 | 
170 |         self.button4 = QPushButton('Visualize results')
171 |         self.button4.clicked.connect(lambda: self.open_visualization_results())
172 |         layout.addWidget(self.button4)
173 | 
174 |         # Set the layout on the central widget
175 |         self.central_widget.setLayout(layout)
176 | 
177 | 
178 |     """ list of function used in the mainWindow"""
179 | 
180 |     def open_directory_dialog(self, folder_name):
181 |         # Open a dialog to choose a directory
182 |         directory = QFileDialog.getExistingDirectory(self, f"Select {folder_name}")
183 |         if directory:
184 |             self.paths[folder_name] = directory
185 |             print(f"Selected path for {folder_name}: {directory}")
186 | 
187 |     def open_filename_dialog(self, file_type):
188 |         # Open a dialog to choose a file
189 |         options = QFileDialog.Options()
190 |         options |= QFileDialog.ReadOnly
191 | 
192 |         file_name, _ = QFileDialog.getOpenFileName(self, f"Select {file_type}", "", "All Files (*);;FASTA Files (*.fasta)", options=options)        
193 |         if file_name:
194 |             self.paths[file_type] = file_name
195 |             print(f"Selected path for {file_type}: {file_name}")
196 | 
197 |     def setup_variables(self, layout):
198 |         # Creating layout and widgets for each variable in Variables tuple
199 |         labels = ["start_index", "end_index"]
200 |         
201 |         self.vars_entries = []
202 |         for i, label in enumerate(labels):
203 |             row_layout = QHBoxLayout()
204 |             label_widget = QLabel(label + ":")
205 |             input_widget = QLineEdit()
206 |             row_layout.addWidget(label_widget)
207 |             row_layout.addWidget(input_widget)
208 |             layout.addLayout(row_layout)
209 |             self.vars_entries.append(input_widget)
210 | 
211 | 
212 |     def Initialize_Analysis(self):
213 | 
214 |         pod5_path = self.paths["folder1"]
215 |         bam_pathr = self.paths["folder2"]
216 |         model_path = self.paths["folder3"]
217 |         level_table_file = self.paths["folder4"]
218 | 
219 |         self.pod5_dr = pod5.DatasetReader(pod5_path)
220 |         self.bam_fh = io.ReadIndexedBam(bam_pathr)
221 | 
222 |         self.read_id = self.bam_fh.read_ids
223 | 
224 |         self.sig_map_refiner = refine_signal_map.SigMapRefiner(
225 |                     kmer_model_filename=level_table_file,
226 |                     do_rough_rescale=True,
227 |                     scale_iters=0,
228 |                     do_fix_guage=True)
229 | 
230 |         self.NN_model = tf.keras.models.load_model(model_path)
231 | 
232 |         input_shapes = self.NN_model.input_shape
233 |         output_shape = self.NN_model.output_shape
234 |         
235 |         self.chunck_size = int(input_shapes[0][1])
236 |         self.max_seq_len = int(input_shapes[1][1])
237 |         self.total_mod = output_shape[2] - 1
238 | 
239 |         print("initialize: Done")
240 | 
241 |     def Analysis_Neural_network(self):
242 | 
243 |         Variables = (int(self.vars_entries[0].text()), 
244 |                      int(self.vars_entries[1].text()), 
245 |                      int(self.chunck_size), 
246 |                      int(self.max_seq_len))
247 | 
248 | 
249 |         reference_path = self.paths["folder5"]
250 |         reference = open(reference_path)
251 |         reference = reference.read()
252 | 
253 |         self.Analysis_NN = NN_analyzer(Variables, 
254 |                                             self.pod5_dr, 
255 |                                             self.bam_fh, 
256 |                                             self.read_id, 
257 |                                             self.sig_map_refiner, 
258 |                                             self.NN_model, 
259 |                                             reference,
260 |                                             labels_mod = self.total_mod)
261 | 
262 |         print("Analysis finished")
263 | 
264 |     def open_visualization_results(self):
265 |         self.gaussian_plot = RNA_analysis_platform(self.Analysis_NN)
266 |         self.gaussian_plot.show()
267 | 
268 | 
269 | def main():
270 |     app = QApplication(sys.argv)
271 |     window = MainWindow()
272 |     window.show()
273 |     sys.exit(app.exec_())
274 | 
275 | if __name__ == '__main__':
276 |     main()
277 | 


--------------------------------------------------------------------------------
/Remora_resquigle_generate_data.py:
--------------------------------------------------------------------------------
  1 | import pod5
  2 | from remora import io , refine_signal_map, util
  3 | import os
  4 | import numpy as np
  5 | 
  6 | 
  7 | def Remora_resquigle_Generation_data(data_path, bam_file, level_table_file, save_path, Variables, variables_segmentation, Indexes, mod_dictionary, ind_loop):
  8 | 
  9 |     #initial variable
 10 |     type_analysis = Variables[0]
 11 |     modified_data = Variables[1]
 12 |     take_mod_region = Variables[2]
 13 |     name_save_file = Variables[3]
 14 |     Modfied_base = Variables[4]
 15 |     mod_pos_initial = Variables[5]
 16 |     start_base_resquigle = Variables[6]
 17 | 
 18 |     #second variable for chunk size creations
 19 |     batch_size = variables_segmentation[0]
 20 |     max_label_length = variables_segmentation[1]
 21 |     time_segment = variables_segmentation[2]
 22 |     shift = variables_segmentation[3]
 23 | 
 24 |     # /////// read the files //////
 25 | 
 26 |     pod5_dr = pod5.DatasetReader(data_path)
 27 |     bam_fh = io.ReadIndexedBam(bam_file)
 28 | 
 29 |     # /////// take the name of reads////
 30 | 
 31 |     read_id = bam_fh.read_ids
 32 | 
 33 |     # /// define the function for resquile from Remora ///
 34 |     # // old version used for DNA. maybe DNA data has to be analysed again //
 35 | 
 36 |     sig_map_refiner = refine_signal_map.SigMapRefiner(
 37 |                         kmer_model_filename=level_table_file,
 38 |                         do_rough_rescale=True,
 39 |                         scale_iters=0,
 40 |                         do_fix_guage=True)
 41 |     
 42 |     if type_analysis == "mod_mapping":
 43 | 
 44 |         labels = len(mod_dictionary)
 45 | 
 46 |     if type_analysis == "basecalling":
 47 | 
 48 |         labels = 4
 49 | 
 50 |     start_Index = Indexes[0]
 51 | 
 52 |     for name_id in read_id[Indexes[0]: Indexes[1]]: #need to find a way to choose the ids.
 53 | 
 54 |         start_Index += 1
 55 |         print(start_Index)
 56 |         seq_resquigle = ""
 57 |         position_adjusting = 0
 58 |         Error_read = False
 59 | 
 60 |         # /// extract the select read and info from bam file ///
 61 | 
 62 |         pod5_read = pod5_dr.get_read(name_id)
 63 |         bam_read = bam_fh.get_first_alignment(name_id)
 64 | 
 65 |         # /// after extraction, obtain the basecalling information ///
 66 | 
 67 |         if bam_read.is_reverse: #correct the signal for forward direction
 68 |             flip = False
 69 |         else:
 70 |             flip = True
 71 | 
 72 |         try:
 73 |             #/// read data
 74 |             read_analysed = io.Read.from_pod5_and_alignment(pod5_read, bam_read, reverse_signal = flip)
 75 |             
 76 |             #/// If data were aligned with U, U in sequence will be replaced by the T. Important for resquiggle
 77 |             prob_ref = read_analysed.ref_seq
 78 |             prob_ref = prob_ref.replace("U", "T")
 79 |             read_analysed.ref_seq = prob_ref
 80 |             
 81 |             # // resquigle the data with the refence
 82 |             read_analysed.set_refine_signal_mapping(sig_map_refiner, ref_mapping=True)
 83 | 
 84 |             start_of_mapping = read_analysed.extract_ref_reg(
 85 |                 read_analysed.ref_reg.adjust(start_adjust = 0, end_adjust=read_analysed.ref_reg.len))
 86 | 
 87 |             Raw_signal = start_of_mapping.norm_signal
 88 |             seq_resquigle = start_of_mapping.seq
 89 |             start_end_resquigle = start_of_mapping.seq_to_sig_map
 90 | 
 91 |             # /// check if the modification position has to be adjusted ///
 92 |             position_adjusting =start_of_mapping.ref_reg.start
 93 |             
 94 |         except:
 95 | 
 96 |             print("error")
 97 |             position_adjusting = 0
 98 |             seq_resquigle = ""
 99 |             Error_read = True
100 | 
101 |         """
102 |         mod_pos = mod_pos_initial - position_adjusting - 1            
103 |         max_signal_length = Raw_signal[0 : mod_pos + time_segment]
104 |         """
105 |         
106 |         val_total_seq = position_adjusting + len(seq_resquigle)
107 |         high_threshold = mod_pos_initial + 20
108 |         
109 |         # // select only high score quality, extrapolate signal and save data //
110 | 
111 |         start_analysis = False
112 | 
113 |         if take_mod_region == True:
114 | 
115 |             if high_threshold < val_total_seq and position_adjusting < mod_pos_initial and Error_read == False: 
116 | 
117 |                 start_analysis = True
118 | 
119 |         else:
120 | 
121 |             if  Error_read == False:
122 | 
123 |                     start_analysis = True
124 | 
125 | 
126 |         if  start_analysis == True: # ///////// TO CHECK !!! ////////////
127 | 
128 |             Signal_onehot = np.zeros([len(Raw_signal),4 + 1])
129 |             Output_onehot = np.zeros([len(Raw_signal), labels + 2])
130 | 
131 |             mod_pos = mod_pos_initial - position_adjusting - 1            
132 | 
133 |             if modified_data == True:
134 | 
135 |                 seq_resquigle_mod = seq_resquigle[:mod_pos] + "X" + seq_resquigle[mod_pos +1:] 
136 | 
137 |             else:
138 | 
139 |                 seq_resquigle_mod = seq_resquigle
140 | 
141 |             if type_analysis == "mod_mapping":
142 |                 
143 |                 #modification_dict = {"G":2, "M":3, "I":4, "P":5}
144 |                 value_modification = int(mod_dictionary[Modfied_base])
145 |                 base_dict_output = { "A":1, "C":1, "G":1, "T":1,"X":value_modification} # variable
146 | 
147 |             if type_analysis == "basecalling":
148 | 
149 |                 base_dict_output = { "A":1, "C":2, "G":3, "T":4, "X":5}
150 |                 
151 |             base_dict = {"A":1, "C":2, "G":3, "T":4}
152 | 
153 |             try:
154 | 
155 |                 for k in range(len(seq_resquigle)):
156 | 
157 |                     start_resq = start_end_resquigle[k]
158 |                     Signal_onehot[start_resq,base_dict[seq_resquigle[k]]] = 1
159 |                     Output_onehot[start_resq,base_dict_output[seq_resquigle_mod[k]]] = 1
160 | 
161 |                 if type_analysis == "mod_mapping" and modified_data == True:
162 | 
163 |                     mod_position = np.where(Output_onehot[:,value_modification] > 0)[0][0]
164 | 
165 |                 if type_analysis == "mod_mapping" and modified_data == False:
166 | 
167 |                     if take_mod_region == True:
168 | 
169 |                         mod_position = np.where(Output_onehot[:,1] > 0)[0][mod_pos]
170 | 
171 |                     else:
172 |                         
173 |                         mod_position = 0
174 |                         
175 |                 if type_analysis == "basecalling" and modified_data == True:
176 | 
177 |                     mod_position = np.where(Output_onehot[:,5] > 0)[0][0]
178 | 
179 |                 if type_analysis == "basecalling" and modified_data == False: # to check for the others
180 | 
181 |                     if take_mod_region == True:
182 | 
183 |                         mod_position = np.where(Output_onehot[:,1] > 0)[0][mod_pos]
184 | 
185 |                     else:
186 |                         
187 |                         mod_position = 0
188 | 
189 |                 if take_mod_region == True:
190 | 
191 |                     minus_start = np.abs(start_end_resquigle[mod_pos - start_base_resquigle] - mod_position)
192 | 
193 |                     N_shift = int((time_segment + minus_start)/shift)
194 | 
195 |                 else:
196 | 
197 |                     N_shift = int((len(Raw_signal) - time_segment)/shift)
198 | 
199 |                 for n in range(int(N_shift/batch_size)):
200 | 
201 |                     train1_batch = np.zeros([batch_size, time_segment])
202 |                     train2_batch = np.zeros([batch_size, max_label_length, 4])
203 |                     output_batch = np.zeros([batch_size, max_label_length, 1 + labels])
204 | 
205 |                     for m in range(batch_size):
206 | 
207 |                         if take_mod_region == True:
208 |                             
209 |                             midlle_mod_position = mod_position #+ int(0.5*np.abs(start_end_resquigle[mod_pos + 1] - start_end_resquigle[mod_pos]))
210 |                             start = midlle_mod_position - n*batch_size*shift - m*shift
211 |                             end = start + time_segment
212 | 
213 |                         else:
214 | 
215 |                             start = n*batch_size*shift + m*shift
216 |                             end = start + time_segment
217 | 
218 |                         output_for_batch = np.zeros([max_label_length,1 + labels])
219 |                         train2_for_batch = np.zeros([max_label_length,4])
220 | 
221 |                         # // here I am using a trick. All the bases has no zero value
222 |                         # making again the one-hot into an array and removing the 0 values,
223 |                         # I obtain the index of the final one-hot sequence for train2 and output
224 | 
225 |                         probe_1 = np.argmax(Signal_onehot[start:end,:], axis = -1)
226 |                         probe_1 = probe_1[probe_1 != 0]
227 |                         probe_1 = probe_1 - 1
228 | 
229 |                         probe_2 = np.argmax(Output_onehot[start:end,:], axis = -1)
230 |                         probe_2 = probe_2[probe_2 != 0]
231 |                         probe_2 = probe_2 - 1
232 | 
233 |                         try:
234 | 
235 |                             for kk in range(len(probe_1)):
236 |                                 
237 |                                 train2_for_batch[kk, probe_1[kk]] = 1
238 |                                 output_for_batch[kk, probe_2[kk]] = 1
239 | 
240 |                         except:
241 | 
242 |                             for kk in range(max_label_length):
243 |                                 
244 |                                 train2_for_batch[kk, probe_1[kk]] = 1
245 |                                 output_for_batch[kk, probe_2[kk]] = 1
246 | 
247 |                         # try/expect is places for data that are too short for storage
248 |                         # the problem is only related to modified data.
249 | 
250 |                         try:
251 | 
252 |                             train1_batch[m] = Raw_signal[start:end]
253 |                             train2_batch[m] = train2_for_batch
254 |                             output_batch[m] = output_for_batch
255 | 
256 |                         except:
257 | 
258 |                             if mod_position < int(time_segment/2):                            
259 |                                 start = mod_position
260 |                                 end = start + time_segment
261 | 
262 |                             else:     
263 |                                 start = mod_position - int(time_segment/2)
264 |                                 end = start + time_segment
265 | 
266 |                             probe_1 = np.argmax(Signal_onehot[start:end,:], axis = -1)
267 |                             probe_1 = probe_1[probe_1 != 0]
268 |                             probe_1 = probe_1 - 1
269 | 
270 |                             probe_2 = np.argmax(Output_onehot[start:end,:], axis = -1)
271 |                             probe_2 = probe_2[probe_2 != 0]
272 |                             probe_2 = probe_2 - 1
273 | 
274 |                             try:
275 | 
276 |                                 for kk in range(len(probe_1)):
277 |                                     
278 |                                     train2_for_batch[kk, probe_1[kk]] = 1
279 |                                     output_for_batch[kk, probe_2[kk]] = 1
280 | 
281 |                             except:
282 | 
283 |                                 for kk in range(max_label_length):
284 |                                     
285 |                                     train2_for_batch[kk, probe_1[kk]] = 1
286 |                                     output_for_batch[kk, probe_2[kk]] = 1
287 | 
288 |                             train1_batch[m] = Raw_signal[start:end]
289 |                             train2_batch[m] = train2_for_batch
290 |                             output_batch[m] = output_for_batch
291 | 
292 |                     file_name = name_save_file + f"{int(ind_loop)}_{int(start_Index)}" + f"_{n}.npz"
293 | 
294 |                     np.savez_compressed(os.path.join(save_path,file_name), 
295 |                                         train_input = train1_batch,
296 |                                         train_input2 = train2_batch,
297 |                                         train_output = output_batch)
298 |                                             
299 |             # // save long rads enter in the quality check. maybe is not necessary
300 | 
301 |             
302 |             except:
303 |                 print("resquigle error")
304 |             
305 | 


--------------------------------------------------------------------------------