├── DeepBaseEditor
    ├── ABE_Efficiency
    │   ├── ABE_Efficiency_Weight
    │   │   ├── PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.data-00000-of-00001
    │   │   ├── PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.index
    │   │   ├── PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.meta
    │   │   └── checkpoint
    │   ├── ABE_Efficiency_sample.txt
    │   ├── TEST_ABE_Efficiency.py
    │   └── outputs
    │   │   └── TEST_OUTPUT_fortest.xlsx
    ├── ABE_Proportion
    │   ├── ABE_Proportion_Weight
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.data-00000-of-00001
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.index
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.meta
    │   │   └── checkpoint
    │   ├── ABE_Proportion_sample.txt
    │   ├── TEST_ABE_Proportion.py
    │   └── outputs
    │   │   └── TEST_OUTPUT_fortest.xlsx
    ├── CBE_Efficiency
    │   ├── CBE_Efficiency_Weight
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.data-00000-of-00001
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.index
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.meta
    │   │   └── checkpoint
    │   ├── CBE_Efficiency_sample.txt
    │   ├── TEST_CBE_Efficiency.py
    │   └── outputs
    │   │   └── TEST_OUTPUT_fortest.xlsx
    ├── CBE_Efficiency_CA
    │   ├── CBE_Efficiency_CA_Weight
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.data-00000-of-00001
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.index
    │   │   ├── PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.meta
    │   │   └── checkpoint
    │   ├── CBE_Efficiency_CA_sample.txt
    │   ├── TEST_CBE_Efficiency_CA.py
    │   └── outputs
    │   │   └── TEST_OUTPUT_fortest.xlsx
    ├── CBE_Proportion
    │   ├── CBE_Proportion_Weight
    │   │   ├── PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.data-00000-of-00001
    │   │   ├── PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.index
    │   │   ├── PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.meta
    │   │   └── checkpoint
    │   ├── CBE_Proportion_sample.txt
    │   ├── TEST_CBE_Proportion.py
    │   └── outputs
    │   │   └── TEST_OUTPUT_fortest.xlsx
    ├── LICENSE.txt
    └── README.txt
├── DeepCas9-NG
    ├── DeepCas9-NG_weight
    │   ├── PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.data-00000-of-00001
    │   ├── PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.index
    │   ├── PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.meta
    │   └── checkpoint
    ├── README.txt
    ├── Test.py
    └── dataset
    │   └── sample.txt
├── DeepCas9
    ├── DeepCas9_Final
    │   ├── PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.data-00000-of-00001
    │   ├── PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.index
    │   └── PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.meta
    ├── DeepCas9_TestCode.py
    ├── LICENSE
    ├── README.txt
    └── dataset
    │   └── sample.txt
├── DeepCpf1
    ├── Analysis of indel frequency
    │   ├── analyser_v3.exe
    │   ├── analyser_v3.py
    │   ├── extractor_v3.exe
    │   └── extractor_v3.py
    ├── DeepCpf1.py
    ├── input_example.txt
    ├── output_example.txt
    └── weights
    │   ├── DeepCpf1_weights.h5
    │   └── Seq_deepCpf1_weights.h5
└── DeepxCas9
    ├── DeepxCas9_weight
        ├── PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.data-00000-of-00001
        ├── PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.index
        ├── PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.meta
        └── checkpoint
    ├── README.txt
    ├── Test.py
    └── dataset
        └── sample.txt


/DeepBaseEditor/ABE_Efficiency/ABE_Efficiency_Weight/PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/ABE_Efficiency/ABE_Efficiency_Weight/PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.data-00000-of-00001


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Efficiency/ABE_Efficiency_Weight/PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/ABE_Efficiency/ABE_Efficiency_Weight/PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.index


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Efficiency/ABE_Efficiency_Weight/PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/ABE_Efficiency/ABE_Efficiency_Weight/PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200.meta


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Efficiency/ABE_Efficiency_Weight/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200"
2 | all_model_checkpoint_paths: "PreTrain-Final-3-3-3-20-20-20-0.001-1085-500-200"
3 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Efficiency/ABE_Efficiency_sample.txt:
--------------------------------------------------------------------------------
1 | Target number	24 bp target sequence (1 bp + PAM + 20 bp protospacer + 3 bp)		"Chromatin accessibility (1= DNase I hypersensitive sites, 0 = Dnase I non-sensitive sites)"	
2 | 1	TGAAGGCTGAACAGCAGGGGTGGG


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Efficiency/TEST_ABE_Efficiency.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from os import system
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | from numpy import *
  6 | import xlsxwriter
  7 | import pyexcel as pe
  8 | from random import shuffle
  9 | 
 10 | ##############################################################################
 11 | 
 12 | 
 13 | 
 14 | 
 15 | ##############################################################################
 16 | ## System Paths ##
 17 | path                 = './'
 18 | parameters           = {'0': 'ABE_Efficiency_sample.txt'} # Dictionary can be expanded for multiple test parameters
 19 | 
 20 | ## Run Parameters ##
 21 | TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 22 | best_model_path_list = ['./ABE_Efficiency_Weight']
 23 | 
 24 | # Model
 25 | length = 25
 26 | 
 27 | class Deep_xCas9(object):
 28 |     def __init__(self, filter_size, filter_num, node_1 = 80, node_2 = 60, l_rate = 0.005):
 29 |         self.inputs         = tf.placeholder(tf.float32, [None, 1, length, 4])
 30 |         self.targets        = tf.placeholder(tf.float32, [None, 1])
 31 |         self.is_training    = tf.placeholder(tf.bool)
 32 |         def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
 33 |             # setup the filter input shape for tf.nn.conv_2d
 34 |             conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
 35 |                               num_filters]
 36 | 
 37 |             # initialise weights and bias for the filter
 38 |             weights   = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
 39 |                                               name=name+'_W')
 40 |             bias      = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')
 41 | 
 42 |             # setup the convolutional layer operation
 43 |             out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='VALID')
 44 |             #out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')
 45 | 
 46 |             # add the bias
 47 |             out_layer += bias
 48 | 
 49 |             # apply a ReLU non-linear activation
 50 |             out_layer = tf.layers.dropout(tf.nn.relu(out_layer), 0.3, self.is_training)
 51 | 
 52 |             # now perform max pooling
 53 |             #ksize     = [1, pool_shape[0], pool_shape[1], 1]
 54 |             #strides   = [1, 1, 2, 1]
 55 |             #out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, 
 56 |             #                           padding='SAME')
 57 |             return out_layer
 58 |         #def end: create_new_conv_layer
 59 | 
 60 |         L_filter_num = 4
 61 |         L_inputs = self.inputs
 62 |         L_pool_0 = create_new_conv_layer(L_inputs, L_filter_num, filter_num[0] * 3, [1, filter_size[0]], [1, 2], name='conv1')
 63 |         
 64 |         with tf.variable_scope('Fully_Connected_Layer1'):
 65 |             layer_node_0 = int((length-filter_size[0])/1)+1
 66 |             node_num_0   = layer_node_0*filter_num[0] * 3
 67 |             
 68 |             L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
 69 |             W_fcl1       = tf.get_variable("W_fcl1", shape=[node_num_0, node_1])
 70 |             B_fcl1       = tf.get_variable("B_fcl1", shape=[node_1])
 71 |             L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten_0, W_fcl1), B_fcl1)
 72 |             L_fcl1       = tf.nn.relu(L_fcl1_pre)
 73 |             L_fcl1_drop  = tf.layers.dropout(L_fcl1, 0.3, self.is_training)
 74 |             
 75 |         with tf.variable_scope('Output_Layer'):
 76 |             W_out        = tf.get_variable("W_out", shape=[node_1, 1])
 77 |             B_out        = tf.get_variable("B_out", shape=[1])
 78 |             self.outputs = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_out), B_out)
 79 | 
 80 |         # Define loss function and optimizer
 81 |         self.obj_loss    = tf.reduce_mean(tf.square(self.targets - self.outputs))
 82 |         self.optimizer   = tf.train.AdamOptimizer(l_rate).minimize(self.obj_loss)
 83 |     #def end: def __init__
 84 | #class end: Deep_xCas9
 85 | 
 86 | # Test Model
 87 | def Model_Inference(sess, TEST_X, model, args, load_episode, test_data_num, testvalbook, testvalsheet, col_index=1):
 88 |     test_batch = 500
 89 |     test_spearman = 0.0
 90 |     optimizer = model.optimizer
 91 |     TEST_Z = zeros((TEST_X.shape[0], 1), dtype=float)
 92 |     
 93 |     for i in range(int(ceil(float(TEST_X.shape[0])/float(test_batch)))):
 94 |         Dict = {model.inputs: TEST_X[i*test_batch:(i+1)*test_batch], model.is_training: False}
 95 |         TEST_Z[i*test_batch:(i+1)*test_batch] = sess.run([model.outputs], feed_dict=Dict)[0]
 96 |     
 97 |     testval_row = 0
 98 |     testval_col = 2
 99 |     sheet_index = 0
100 |     
101 |     for test_value in (TEST_Z):
102 |         testvalsheet[sheet_index].write(testval_row, testval_col, 100*test_value[0])
103 |         testval_row += 1
104 |     
105 |     return
106 | 
107 | 
108 | def preprocess_seq(data):
109 |     print("Start preprocessing the sequence done 2d")
110 |     length  = 24
111 |     
112 |     DATA_X = np.zeros((len(data),1,length,4), dtype=int)
113 |     print(np.shape(data), len(data), length)
114 |     for l in range(len(data)):
115 |         for i in range(length):
116 | 
117 |             try: data[l][i]
118 |             except: print(data[l], i, length, len(data))
119 | 
120 |             if data[l][i]in "Aa":    DATA_X[l, 0, i, 0] = 1
121 |             elif data[l][i] in "Cc": DATA_X[l, 0, i, 1] = 1
122 |             elif data[l][i] in "Gg": DATA_X[l, 0, i, 2] = 1
123 |             elif data[l][i] in "Tt": DATA_X[l, 0, i, 3] = 1
124 |             else:
125 |                 print "Non-ATGC character " + data[l]
126 |                 print i
127 |                 print data[l][i]
128 |                 sys.exit()
129 |         #loop end: i
130 |     #loop end: l
131 |     print("Preprocessing the sequence done")
132 |     return DATA_X
133 | #def end: preprocess_seq
134 | 
135 | 
136 | def getfile_inference(filenum):
137 |     param   = parameters['%s' % filenum]
138 |     FILE    = open(path+param, "r")
139 |     data    = FILE.readlines()
140 |     data_n  = len(data) - 1
141 |     seq     = []
142 | 
143 |     for l in range(1, data_n+1):
144 |         try:
145 |             data_split = data[l].split()
146 |             seq.append(data_split[1])
147 |         except:
148 |             print data[l]
149 |             raise ValueError
150 |     #loop end: l
151 |     FILE.close()
152 |     processed_full_seq = preprocess_seq(seq)
153 | 
154 |     return processed_full_seq, seq
155 | #def end: getseq
156 | 
157 | 
158 | if "outputs" not in os.listdir(os.getcwd()):
159 |     os.makedirs('outputs')
160 | 
161 | #TensorFlow config
162 | conf                                = tf.ConfigProto()
163 | conf.gpu_options.allow_growth       = True
164 | os.environ['CUDA_VISIBLE_DEVICES']  = '0'
165 | best_model_cv                       = 0.0
166 | best_model_list                     = []
167 | 
168 | testbook = xlsxwriter.Workbook('outputs/TEST_OUTPUT_fortest.xlsx')
169 | 
170 | TEST_X = []
171 | testsheet = []
172 | for TEST_NUM_index in range(len(TEST_NUM_SET)):
173 |     TEST_NUM = TEST_NUM_SET[TEST_NUM_index]
174 |     testsheet.append([testbook.add_worksheet('{}'.format(TEST_NUM))])
175 |     tmp_X, pre_X = getfile_inference(TEST_NUM)
176 |     TEST_X.append(tmp_X)
177 |     test_row = 0
178 |     for index_X in range(np.shape(pre_X)[0]):
179 |         testsheet[-1][-1].write(test_row, 0, pre_X[index_X])
180 |         test_row += 1
181 |         
182 | for best_model_path in best_model_path_list:
183 |     for modelname in os.listdir(best_model_path):
184 |         if "meta" in modelname:
185 |             best_model_list.append(modelname[:-5])
186 | #loop end: best_model_path
187 | 
188 | for index in range(len(best_model_list)):
189 |     best_model_path = best_model_path_list[index]
190 |     best_model      = best_model_list[index]
191 |     valuelist       = best_model.split('-')
192 |     fulllist        = []
193 |     
194 |     for value in valuelist:
195 |         try:
196 |             value=int(value)
197 |         except:
198 |             try:    value=float(value)
199 |             except: pass
200 |         fulllist.append(value)
201 | 
202 |     print(fulllist[2:])
203 |     
204 |     filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[2:]
205 |     filter_size = [filter_size_1, filter_size_2, filter_size_3]
206 |     filter_num  = [filter_num_1, filter_num_2, filter_num_3]
207 | 
208 |     args = [filter_size, filter_num, l_rate, 0, None, node_1, node_2]
209 |     # Loading the model with the best validation score and test
210 |     tf.reset_default_graph()
211 |     with tf.Session(config=conf) as sess:
212 |         sess.run(tf.global_variables_initializer())
213 |         model = Deep_xCas9(filter_size, filter_num, node_1, node_2, args[2])
214 |         saver = tf.train.Saver()
215 |         saver.restore(sess, best_model_path+"/PreTrain-Final-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}".format(args[0][0], args[0][1], args[0][2], args[1][0], args[1][1], args[1][2], args[2], load_episode, args[5], args[6]))
216 |         for i in range(len(TEST_NUM_SET)):
217 |             Model_Inference(sess, TEST_X[i], model, args, load_episode, TEST_NUM_SET[i], testbook, testsheet[i])
218 |         testbook.close()
219 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Efficiency/outputs/TEST_OUTPUT_fortest.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/ABE_Efficiency/outputs/TEST_OUTPUT_fortest.xlsx


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Proportion/ABE_Proportion_Weight/PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/ABE_Proportion/ABE_Proportion_Weight/PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.data-00000-of-00001


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Proportion/ABE_Proportion_Weight/PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/ABE_Proportion/ABE_Proportion_Weight/PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.index


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Proportion/ABE_Proportion_Weight/PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/ABE_Proportion/ABE_Proportion_Weight/PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256.meta


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Proportion/ABE_Proportion_Weight/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256"
2 | all_model_checkpoint_paths: "PreTrain-Final-3-3-3-50-50-50-0.0005-458-256-256"
3 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Proportion/ABE_Proportion_sample.txt:
--------------------------------------------------------------------------------
1 | Target number	34 bp target sequence (4 bp + PAM + 23 bp protospacer + 3 bp)		"Chromatin accessibility (1= DNase I hypersensitive sites, 0 = Dnase I non-sensitive sites)"	
2 | 1	ACTGAAGGCTGAACAGCAGGGGTGGG


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Proportion/TEST_ABE_Proportion.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from os import system
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | from numpy import *
  6 | import xlsxwriter
  7 | import pyexcel as pe
  8 | from random import shuffle
  9 | 
 10 | ##############################################################################
 11 | 
 12 | 
 13 | 
 14 | 
 15 | ##############################################################################
 16 | ## System Paths ##
 17 | path                 = './'
 18 | parameters           = {'0': 'ABE_Proportion_sample.txt'} # Dictionary can be expanded for multiple test parameters
 19 | 
 20 | ## Run Parameters ##
 21 | TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 22 | best_model_path_list = ['./ABE_Proportion_Weight']
 23 | 
 24 | # Model
 25 | length = 26
 26 | window_start = 5
 27 | window_size = 8
 28 | 
 29 | class Deep_xCas9(object):
 30 |     def __init__(self, filter_size, filter_num, node_1 = 80, node_2 = 60, l_rate = 0.005, window_size = 5):
 31 |         self.inputs          = tf.placeholder(tf.float32, [None, 1, length, 4])
 32 |         self.targets         = tf.placeholder(tf.float32, [None, 2**window_size-1])
 33 |         self.wow         = tf.placeholder(tf.float32, [None, 2**window_size-1])
 34 |         self.possible_labels = tf.placeholder(tf.float32, [None, 2**window_size-1])
 35 |         self.is_training     = tf.placeholder(tf.bool)
 36 | 
 37 |         def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
 38 |             # setup the filter input shape for tf.nn.conv_2d
 39 |             conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
 40 |                               num_filters]
 41 | 
 42 |             # initialise weights and bias for the filter
 43 |             weights   = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
 44 |                                               name=name+'_W')
 45 |             bias      = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')
 46 | 
 47 |             # setup the convolutional layer operation
 48 |             out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='VALID')
 49 |             #out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')
 50 | 
 51 |             # add the bias
 52 |             out_layer += bias
 53 | 
 54 |             # apply a ReLU non-linear activation
 55 |             out_layer = tf.layers.dropout(tf.nn.relu(out_layer), 0.3, self.is_training)
 56 | 
 57 |             # now perform max pooling
 58 |             #ksize     = [1, pool_shape[0], pool_shape[1], 1]
 59 |             #strides   = [1, 1, 2, 1]
 60 |             #out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, 
 61 |             #                           padding='SAME')
 62 |             return out_layer
 63 |         #def end: create_new_conv_layer
 64 | 
 65 |         L_filter_num = 4
 66 |         L_inputs = self.inputs
 67 |         L_pool_0 = create_new_conv_layer(L_inputs, L_filter_num, filter_num[0] * 3, [1, filter_size[0]], [1, 2], name='conv1')
 68 |         
 69 |         with tf.variable_scope('Fully_Connected_Layer1'):
 70 |             layer_node_0 = int((length-filter_size[0])/1)+1
 71 |             node_num_0   = layer_node_0*filter_num[0] * 3
 72 |             
 73 |             L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
 74 |             W_fcl1       = tf.get_variable("W_fcl1", shape=[node_num_0, node_1])
 75 |             B_fcl1       = tf.get_variable("B_fcl1", shape=[node_1])
 76 |             L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten_0, W_fcl1), B_fcl1)
 77 |             L_fcl1       = tf.nn.relu(L_fcl1_pre)
 78 |             L_fcl1_drop  = tf.layers.dropout(L_fcl1, 0.3, self.is_training)
 79 |         
 80 |         with tf.variable_scope('Output_Layer'):
 81 |             W_out        = tf.get_variable("W_out", shape=[node_1, 2**window_size-1])#, initializer=tf.contrib.layers.xavier_initializer())
 82 |             B_out        = tf.get_variable("B_out", shape=[2**window_size-1])#, initializer=tf.contrib.layers.xavier_initializer())
 83 |             self.outputs = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_out), B_out)
 84 | 
 85 |         #self.possible_outputs = self.outputs
 86 |         self.possible_outputs = tf.nn.softmax(self.outputs)#tf.multiply(self.outputs, self.possible_labels))
 87 |         # Define loss function and optimizer
 88 |         self.obj_loss = tf.reduce_mean(-tf.reduce_sum(self.targets * tf.log(self.possible_outputs) - self.targets * tf.log(self.targets), reduction_indices=[1]))
 89 |         self.obj_loss1 = tf.reduce_mean(-tf.reduce_sum(self.targets * tf.log(self.wow) - self.targets * tf.log(self.targets), reduction_indices=[1]))
 90 |         self.optimizer   = tf.train.AdamOptimizer(l_rate).minimize(self.obj_loss)
 91 |     #def end: def __init__
 92 | #class end: Deep_xCas9
 93 | 
 94 | def Model_Inference(sess, TEST_X, TEST_Label, model, args, load_episode, test_data_num, testvalbook, testvalsheet, window_size=8):
 95 |     test_batch = 1024
 96 |     optimizer = model.optimizer
 97 |     TEST_Z = np.zeros((TEST_X.shape[0], 2**window_size - 1), dtype=float)
 98 |     
 99 |     for i in range(int(ceil(float(TEST_X.shape[0])/float(test_batch)))):
100 |         Dict = {model.inputs: TEST_X[i*test_batch:(i+1)*test_batch], model.is_training: False}
101 |         TEST_Z[i*test_batch:(i+1)*test_batch] = sess.run([model.possible_outputs], feed_dict=Dict)[0]
102 |     
103 |     testval_row = 0
104 |     testval_col = 3
105 |     sheet_index = 0
106 |     
107 |     sum = 0
108 |     for test_index in range(len(TEST_Z)):
109 |         sum += TEST_Z[test_index][TEST_Label[test_index]]
110 |     TEST_Z /= sum
111 |     
112 |     for test_index in range(len(TEST_Z)):
113 |         test_value = TEST_Z[test_index][TEST_Label[test_index]]
114 |         testvalsheet[sheet_index].write(testval_row, testval_col, test_value)
115 |         testval_row += 1
116 |     return
117 | 
118 | 
119 | # One hot encoding for DNA Sequence
120 | def preprocess_seq(data):
121 |     print("Start preprocessing the sequence done 2d")
122 |     length  = 26
123 |     
124 |     DATA_X = np.zeros((len(data),1,length,4), dtype=int)
125 |     print(np.shape(data), len(data), length)
126 |     for l in range(len(data)):
127 |         for i in range(length):
128 | 
129 |             try: data[l][i]
130 |             except: print(data[l], i, length, len(data))
131 | 
132 |             if data[l][i] in "Aa":    DATA_X[l, 0, i, 0] = 1
133 |             elif data[l][i] in "Cc": DATA_X[l, 0, i, 1] = 1
134 |             elif data[l][i] in "Gg": DATA_X[l, 0, i, 2] = 1
135 |             elif data[l][i] in "Tt": DATA_X[l, 0, i, 3] = 1
136 |             else:
137 |                 print "Non-ATGC character " + data[l]
138 |                 print i
139 |                 print data[l][i]
140 |                 sys.exit()
141 |         #loop end: i
142 |     #loop end: l
143 |     print("Preprocessing the sequence done")
144 |     return DATA_X
145 | #def end: preprocess_seq
146 | 
147 | def inference_index(orig_seq, req_seq):
148 |     window_start = 5
149 |     window_size = 8
150 |     index = []
151 |     for seq_index in range(len(list(req_seq))):
152 |         labels_index = -1
153 |         for ind in range(window_size): #change to 2
154 |             if req_seq[seq_index][window_start + ind] == orig_seq[seq_index][window_start + ind]: pass
155 |             else:
156 |                 labels_index += 2**(window_size - 1-ind)
157 |         if labels_index < 0:
158 |             print("WT INCLUDED IN REQUIRED SEQUENCE")
159 |             #raise ValueError
160 |             labels_index = 0
161 |         index.append(labels_index)
162 |     return index
163 | 
164 | def req_seq_produce(seq):
165 |     window_start = 5
166 |     window_size = 8
167 |     req_seq = []
168 |     full_seq = []
169 |     for indiv_seq in seq:
170 |         tmp_seq = [indiv_seq]
171 |         for index in range(window_size):
172 |             if indiv_seq[window_start+index] == 'A':
173 |                 print(index, indiv_seq[window_start:window_start+index])
174 |                 tmp = []
175 |                 for tmp_indiv_seq in tmp_seq:
176 |                     tmp.append(tmp_indiv_seq[:window_start+index]+str("G")+tmp_indiv_seq[window_start+index+1:])
177 |                     full_seq.append(indiv_seq)
178 |                 for sequence in tmp:
179 |                     tmp_seq.append(sequence)
180 |         for req_sequence in tmp_seq:
181 |             if req_sequence != tmp_seq[0]:
182 |                 req_seq.append(req_sequence)
183 |     return full_seq, req_seq
184 | 
185 | def getfile_inference(filenum):
186 |     param = parameters['%s'%filenum]
187 |     FILE    = open(path+param, "r")
188 |     data    = FILE.readlines()
189 |     data_n  = len(data) - 1
190 |     seq     = []
191 |     req_seq     = []
192 |     
193 |     for l in range(1, data_n+1):
194 |         try:
195 |             data_split = data[l].split()
196 |             seq.append(data_split[1])
197 |         except:
198 |             print data[l]
199 |             seq.append(data[l])
200 |     #loop end: l
201 |     FILE.close()
202 |     full_seq, req_seq = req_seq_produce(seq)
203 |     processed_full_seq = preprocess_seq(full_seq)
204 |     processed_full_req_seq = inference_index(full_seq, req_seq)
205 |     return processed_full_seq, full_seq, processed_full_req_seq, req_seq
206 | 
207 | if "outputs" not in os.listdir(os.getcwd()):
208 |     os.makedirs('outputs')
209 | 
210 | #TensorFlow config
211 | conf                                = tf.ConfigProto()
212 | conf.gpu_options.allow_growth       = True
213 | os.environ['CUDA_VISIBLE_DEVICES']  = '0'
214 | best_model_cv                       = 0.0
215 | best_model_list                     = []
216 | 
217 | testbook = xlsxwriter.Workbook('outputs/TEST_OUTPUT_fortest.xlsx')
218 | 
219 | TEST_X = []
220 | TEST_Label = []
221 | testsheet = []
222 | for TEST_NUM_index in range(len(TEST_NUM_SET)):
223 |     TEST_NUM = TEST_NUM_SET[TEST_NUM_index]
224 |     testsheet.append([testbook.add_worksheet('{}'.format(TEST_NUM))])
225 |     tmp_X, pre_X, tmp_Label, pre_Label = getfile_inference(TEST_NUM)
226 |     TEST_X.append(tmp_X)
227 |     TEST_Label.append(tmp_Label)
228 |     test_row = 0
229 |     for index_X in range(np.shape(pre_X)[0]):
230 |         testsheet[-1][-1].write(test_row, 0, pre_X[index_X])
231 |         testsheet[-1][-1].write(test_row, 1, pre_Label[index_X])
232 |         test_row += 1
233 | 
234 | for best_model_path in best_model_path_list:
235 |     for modelname in os.listdir(best_model_path):
236 |         print(modelname)
237 |         if "meta" in modelname:
238 |             best_model_list.append(modelname[:-5])
239 | 
240 | print(best_model_list)
241 | for index in range(len(best_model_list)):
242 |     best_model_path = best_model_path_list[index]
243 |     best_model      = best_model_list[index]
244 |     valuelist       = best_model.split('-')
245 |     fulllist        = []
246 |     
247 |     for value in valuelist:
248 |         try:
249 |             value=int(value)
250 |         except:
251 |             try:    value=float(value)
252 |             except: pass
253 |         fulllist.append(value)
254 | 
255 |     print(fulllist[2:])
256 |     
257 |     filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[2:]
258 |     filter_size = [filter_size_1, filter_size_2, filter_size_3]
259 |     filter_num  = [filter_num_1, filter_num_2, filter_num_3]
260 | 
261 |     args = [filter_size, filter_num, l_rate, 0, None, node_1, node_2]
262 |     # Loading the model with the best validation score and test
263 |     tf.reset_default_graph()
264 |     with tf.Session(config=conf) as sess:
265 |         sess.run(tf.global_variables_initializer())
266 |         model = Deep_xCas9(filter_size, filter_num, node_1, node_2, args[2], window_size)
267 |         saver = tf.train.Saver()
268 |         saver.restore(sess, best_model_path+"/PreTrain-Final-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}".format(args[0][0], args[0][1], args[0][2], args[1][0], args[1][1], args[1][2], args[2], load_episode, args[5], args[6]))
269 |         for i in range(len(TEST_NUM_SET)):
270 |             Model_Inference(sess, TEST_X[i], TEST_Label[i], model, args, load_episode, TEST_NUM_SET[i], testbook, testsheet[i], window_size)
271 |         testbook.close()
272 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/ABE_Proportion/outputs/TEST_OUTPUT_fortest.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/ABE_Proportion/outputs/TEST_OUTPUT_fortest.xlsx


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency/CBE_Efficiency_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Efficiency/CBE_Efficiency_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.data-00000-of-00001


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency/CBE_Efficiency_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Efficiency/CBE_Efficiency_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.index


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency/CBE_Efficiency_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Efficiency/CBE_Efficiency_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50.meta


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency/CBE_Efficiency_Weight/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50"
2 | all_model_checkpoint_paths: "PreTrain-Final-3-3-3-50-50-50-0.001-1968-500-50"
3 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency/CBE_Efficiency_sample.txt:
--------------------------------------------------------------------------------
1 |  Target number   24 bp target sequence (1 bp + 20 bp protospacer + PAM + 3 bp)
2 |   	    1   	GGGCTGAACTAAAGCCTCCAGGGG


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency/TEST_CBE_Efficiency.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from os import system
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | from numpy import *
  6 | import xlsxwriter
  7 | import pyexcel as pe
  8 | from random import shuffle
  9 | 
 10 | #np.set_printoptions(threshold='nan')
 11 | 
 12 | ##############################################################################
 13 | 
 14 | 
 15 | 
 16 | 
 17 | ##############################################################################
 18 | ## System Paths ##
 19 | path                 = './'
 20 | parameters           = {'0': 'CBE_Efficiency_sample.txt'} # Dictionary can be expanded for multiple test parameters
 21 | 
 22 | ## Run Parameters ##
 23 | TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 24 | best_model_path_list = ['./CBE_Efficiency_Weight']
 25 | 
 26 | # Model
 27 | length = 24
 28 | 
 29 | class Deep_xCas9(object):
 30 |     def __init__(self, filter_size, filter_num, node_1 = 80, node_2 = 60, l_rate = 0.005):
 31 |         self.inputs         = tf.placeholder(tf.float32, [None, 1, length, 4])
 32 |         self.targets        = tf.placeholder(tf.float32, [None, 1])
 33 |         self.is_training    = tf.placeholder(tf.bool)
 34 |         def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
 35 |             # setup the filter input shape for tf.nn.conv_2d
 36 |             conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
 37 |                               num_filters]
 38 | 
 39 |             # initialise weights and bias for the filter
 40 |             weights   = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
 41 |                                               name=name+'_W')
 42 |             bias      = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')
 43 | 
 44 |             # setup the convolutional layer operation
 45 |             out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='VALID')
 46 |             #out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')
 47 | 
 48 |             # add the bias
 49 |             out_layer += bias
 50 | 
 51 |             # apply a ReLU non-linear activation
 52 |             out_layer = tf.layers.dropout(tf.nn.relu(out_layer), 0.3, self.is_training)
 53 | 
 54 |             # now perform max pooling
 55 |             #ksize     = [1, pool_shape[0], pool_shape[1], 1]
 56 |             #strides   = [1, 1, 2, 1]
 57 |             #out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, 
 58 |             #                           padding='SAME')
 59 |             return out_layer
 60 |         #def end: create_new_conv_layer
 61 | 
 62 |         L_filter_num = 4
 63 |         L_inputs = self.inputs
 64 |         L_pool_0 = create_new_conv_layer(L_inputs, L_filter_num, filter_num[0] * 3, [1, filter_size[0]], [1, 2], name='conv1')
 65 |         
 66 |         with tf.variable_scope('Fully_Connected_Layer1'):
 67 |             layer_node_0 = int((length-filter_size[0])/1)+1
 68 |             node_num_0   = layer_node_0*filter_num[0] * 3
 69 |             
 70 |             L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
 71 |             W_fcl1       = tf.get_variable("W_fcl1", shape=[node_num_0, node_1])
 72 |             B_fcl1       = tf.get_variable("B_fcl1", shape=[node_1])
 73 |             L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten_0, W_fcl1), B_fcl1)
 74 |             L_fcl1       = tf.nn.relu(L_fcl1_pre)
 75 |             L_fcl1_drop  = tf.layers.dropout(L_fcl1, 0.3, self.is_training)
 76 |         
 77 |         with tf.variable_scope('Fully_Connected_Layer2'):
 78 |             W_fcl2       = tf.get_variable("W_fcl2", shape=[node_1, node_2])
 79 |             B_fcl2       = tf.get_variable("B_fcl2", shape=[node_2])
 80 |             L_fcl2_pre   = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_fcl2), B_fcl2)
 81 |             L_fcl2       = tf.nn.relu(L_fcl2_pre)
 82 |             L_fcl2_drop  = tf.layers.dropout(L_fcl2, 0.3, self.is_training)
 83 |             
 84 |         with tf.variable_scope('Output_Layer'):
 85 |             W_out        = tf.get_variable("W_out", shape=[node_2, 1])#, initializer=tf.contrib.layers.xavier_initializer())
 86 |             B_out        = tf.get_variable("B_out", shape=[1])#, initializer=tf.contrib.layers.xavier_initializer())
 87 |             self.outputs = tf.nn.bias_add(tf.matmul(L_fcl2_drop, W_out), B_out)
 88 | 
 89 |         # Define loss function and optimizer
 90 |         self.obj_loss    = tf.reduce_mean(tf.square(self.targets - self.outputs))
 91 |         self.optimizer   = tf.train.AdamOptimizer(l_rate).minimize(self.obj_loss)
 92 |     #def end: def __init__
 93 | #class end: Deep_xCas9
 94 | 
 95 | # Test Model
 96 | def Model_Inference(sess, TEST_X, model, args, load_episode, test_data_num, testvalbook, testvalsheet, col_index=1):
 97 |     test_batch = 500
 98 |     test_spearman = 0.0
 99 |     optimizer = model.optimizer
100 |     TEST_Z = zeros((TEST_X.shape[0], 1), dtype=float)
101 |     
102 |     for i in range(int(ceil(float(TEST_X.shape[0])/float(test_batch)))):
103 |         Dict = {model.inputs: TEST_X[i*test_batch:(i+1)*test_batch], model.is_training: False}
104 |         TEST_Z[i*test_batch:(i+1)*test_batch] = sess.run([model.outputs], feed_dict=Dict)[0]
105 |     
106 |     testval_row = 0
107 |     testval_col = 2
108 |     sheet_index = 0
109 |     
110 |     for test_value in (TEST_Z):
111 |         testvalsheet[sheet_index].write(testval_row, testval_col, 100*test_value[0])
112 |         testval_row += 1
113 |     
114 |     return
115 | 
116 | 
117 | def preprocess_seq(data):
118 |     print("Start preprocessing the sequence done 2d")
119 |     length  = 24
120 |     
121 |     DATA_X = np.zeros((len(data),1,length,4), dtype=int)
122 |     print(np.shape(data), len(data), length)
123 |     for l in range(len(data)):
124 |         for i in range(length):
125 | 
126 |             try: data[l][i]
127 |             except: print(data[l], i, length, len(data))
128 | 
129 |             if data[l][i]in "Aa":    DATA_X[l, 0, i, 0] = 1
130 |             elif data[l][i] in "Cc": DATA_X[l, 0, i, 1] = 1
131 |             elif data[l][i] in "Gg": DATA_X[l, 0, i, 2] = 1
132 |             elif data[l][i] in "Tt": DATA_X[l, 0, i, 3] = 1
133 |             else:
134 |                 print "Non-ATGC character " + data[l]
135 |                 print i
136 |                 print data[l][i]
137 |                 sys.exit()
138 |         #loop end: i
139 |     #loop end: l
140 |     print("Preprocessing the sequence done")
141 |     return DATA_X
142 | #def end: preprocess_seq
143 | 
144 | 
145 | def getfile_inference(filenum):
146 |     param   = parameters['%s' % filenum]
147 |     FILE    = open(path+param, "r")
148 |     data    = FILE.readlines()
149 |     data_n  = len(data) - 1
150 |     seq     = []
151 | 
152 |     for l in range(1, data_n+1):
153 |         try:
154 |             data_split = data[l].split()
155 |             seq.append(data_split[1])
156 |         except:
157 |             print data[l]
158 |             raise ValueError
159 |     #loop end: l
160 |     FILE.close()
161 |     processed_full_seq = preprocess_seq(seq)
162 | 
163 |     return processed_full_seq, seq
164 | #def end: getseq
165 | 
166 | 
167 | if "outputs" not in os.listdir(os.getcwd()):
168 |     os.makedirs('outputs')
169 | 
170 | #TensorFlow config
171 | conf                                = tf.ConfigProto()
172 | conf.gpu_options.allow_growth       = True
173 | os.environ['CUDA_VISIBLE_DEVICES']  = '0'
174 | best_model_cv                       = 0.0
175 | best_model_list                     = []
176 | 
177 | testbook = xlsxwriter.Workbook('outputs/TEST_OUTPUT_fortest.xlsx')
178 | 
179 | TEST_X = []
180 | testsheet = []
181 | for TEST_NUM_index in range(len(TEST_NUM_SET)):
182 |     TEST_NUM = TEST_NUM_SET[TEST_NUM_index]
183 |     testsheet.append([testbook.add_worksheet('{}'.format(TEST_NUM))])
184 |     tmp_X, pre_X = getfile_inference(TEST_NUM)
185 |     TEST_X.append(tmp_X)
186 |     test_row = 0
187 |     for index_X in range(np.shape(pre_X)[0]):
188 |         testsheet[-1][-1].write(test_row, 0, pre_X[index_X])
189 |         test_row += 1
190 |         
191 | for best_model_path in best_model_path_list:
192 |     for modelname in os.listdir(best_model_path):
193 |         if "meta" in modelname:
194 |             best_model_list.append(modelname[:-5])
195 | #loop end: best_model_path
196 | 
197 | for index in range(len(best_model_list)):
198 |     best_model_path = best_model_path_list[index]
199 |     best_model      = best_model_list[index]
200 |     valuelist       = best_model.split('-')
201 |     fulllist        = []
202 |     
203 |     for value in valuelist:
204 |         try:
205 |             value=int(value)
206 |         except:
207 |             try:    value=float(value)
208 |             except: pass
209 |         fulllist.append(value)
210 | 
211 |     print(fulllist[2:])
212 |     
213 |     filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[2:]
214 |     filter_size = [filter_size_1, filter_size_2, filter_size_3]
215 |     filter_num  = [filter_num_1, filter_num_2, filter_num_3]
216 | 
217 |     args = [filter_size, filter_num, l_rate, 0, None, node_1, node_2]
218 |     # Loading the model with the best validation score and test
219 |     tf.reset_default_graph()
220 |     with tf.Session(config=conf) as sess:
221 |         sess.run(tf.global_variables_initializer())
222 |         model = Deep_xCas9(filter_size, filter_num, node_1, node_2, args[2])
223 |         saver = tf.train.Saver()
224 |         saver.restore(sess, best_model_path+"/PreTrain-Final-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}".format(args[0][0], args[0][1], args[0][2], args[1][0], args[1][1], args[1][2], args[2], load_episode, args[5], args[6]))
225 |         for i in range(len(TEST_NUM_SET)):
226 |             Model_Inference(sess, TEST_X[i], model, args, load_episode, TEST_NUM_SET[i], testbook, testsheet[i])
227 |         testbook.close()
228 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency/outputs/TEST_OUTPUT_fortest.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Efficiency/outputs/TEST_OUTPUT_fortest.xlsx


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency_CA/CBE_Efficiency_CA_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Efficiency_CA/CBE_Efficiency_CA_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.data-00000-of-00001


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency_CA/CBE_Efficiency_CA_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Efficiency_CA/CBE_Efficiency_CA_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.index


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency_CA/CBE_Efficiency_CA_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Efficiency_CA/CBE_Efficiency_CA_Weight/PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50.meta


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency_CA/CBE_Efficiency_CA_Weight/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50"
2 | all_model_checkpoint_paths: "PreTrain-Final-3-3-3-50-50-50-0.001-186-500-50"
3 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency_CA/CBE_Efficiency_CA_sample.txt:
--------------------------------------------------------------------------------
1 | reference seqeucne	"Chromatin accessibility (1= DNase I hypersensitive sites, 0 = Dnase I non-sensitive sites)"
2 | 1	GGGCTGAACTAAAGCCTCCAGGGG	1
3 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency_CA/TEST_CBE_Efficiency_CA.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from os import system
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | from numpy import *
  6 | import xlsxwriter
  7 | import pyexcel as pe
  8 | from random import shuffle
  9 | 
 10 | #np.set_printoptions(threshold='nan')
 11 | 
 12 | ##############################################################################
 13 | 
 14 | 
 15 | 
 16 | 
 17 | ##############################################################################
 18 | ## System Paths ##
 19 | path                 = './'
 20 | parameters           = {'0': 'CBE_Efficiency_CA_sample.txt'} # Dictionary can be expanded for multiple test parameters
 21 | 
 22 | ## Run Parameters ##
 23 | TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 24 | best_model_path_list = ['./CBE_Efficiency_CA_Weight']
 25 | 
 26 | # Model
 27 | length = 24
 28 | 
 29 | class Deep_xCas9(object):
 30 |     def __init__(self, filter_size, filter_num, node_1 = 80, node_2 = 60, l_rate = 0.005):
 31 |         self.inputs         = tf.placeholder(tf.float32, [None, 1, length, 4])
 32 |         self.targets        = tf.placeholder(tf.float32, [None, 1])
 33 |         self.ca        = tf.placeholder(tf.float32, [None, 1])
 34 |         self.is_training    = tf.placeholder(tf.bool)
 35 |         def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
 36 |             # setup the filter input shape for tf.nn.conv_2d
 37 |             conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
 38 |                               num_filters]
 39 | 
 40 |             # initialise weights and bias for the filter
 41 |             weights   = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
 42 |                                               name=name+'_W')
 43 |             bias      = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')
 44 | 
 45 |             # setup the convolutional layer operation
 46 |             out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='VALID')
 47 |             #out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')
 48 | 
 49 |             # add the bias
 50 |             out_layer += bias
 51 | 
 52 |             # apply a ReLU non-linear activation
 53 |             out_layer = tf.layers.dropout(tf.nn.relu(out_layer), 0.3, self.is_training)
 54 | 
 55 |             # now perform max pooling
 56 |             #ksize     = [1, pool_shape[0], pool_shape[1], 1]
 57 |             #strides   = [1, 1, 2, 1]
 58 |             #out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, 
 59 |             #                           padding='SAME')
 60 |             return out_layer
 61 |         #def end: create_new_conv_layer
 62 | 
 63 |         L_filter_num = 4
 64 |         L_inputs = self.inputs
 65 |         L_pool_0 = create_new_conv_layer(L_inputs, L_filter_num, filter_num[0] * 3, [1, filter_size[0]], [1, 2], name='conv1')
 66 |         
 67 |         with tf.variable_scope('Fully_Connected_Layer1'):
 68 |             layer_node_0 = int((length-filter_size[0])/1)+1
 69 |             node_num_0   = layer_node_0*filter_num[0] * 3
 70 |             
 71 |             L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
 72 |             W_fcl1       = tf.get_variable("W_fcl1", shape=[node_num_0, node_1])
 73 |             B_fcl1       = tf.get_variable("B_fcl1", shape=[node_1])
 74 |             L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten_0, W_fcl1), B_fcl1)
 75 |             L_fcl1       = tf.nn.relu(L_fcl1_pre)
 76 |             L_fcl1_drop  = tf.layers.dropout(L_fcl1, 0.3, self.is_training)
 77 |         
 78 |         with tf.variable_scope('Fully_Connected_Layer2'):
 79 |             W_fcl2       = tf.get_variable("W_fcl2", shape=[node_1, node_2])
 80 |             B_fcl2       = tf.get_variable("B_fcl2", shape=[node_2])
 81 |             L_fcl2_pre   = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_fcl2), B_fcl2)
 82 |             L_fcl2       = tf.nn.relu(L_fcl2_pre)
 83 |             L_fcl2_drop  = tf.layers.dropout(L_fcl2, 0.3, self.is_training)
 84 |             
 85 |         with tf.variable_scope('CA_Layer'):
 86 |             W_acc = tf.get_variable("W_acc", shape=[1, node_2])#, initializer=tf.contrib.layers.xavier_initializer())
 87 |             B_acc = tf.get_variable("B_acc", shape=[node_2])#, initializer=tf.contrib.layers.xavier_initializer())
 88 |             L_ca_pre = tf.nn.bias_add(tf.matmul(self.ca, W_acc), B_acc)
 89 |             
 90 |         L_fcl2_drop = tf.multiply(L_fcl2_drop, L_ca_pre)
 91 |             
 92 |         with tf.variable_scope('Output_Layer'):
 93 |             W_out        = tf.get_variable("W_out", shape=[node_2, 1])#, initializer=tf.contrib.layers.xavier_initializer())
 94 |             B_out        = tf.get_variable("B_out", shape=[1])#, initializer=tf.contrib.layers.xavier_initializer())
 95 |             self.outputs = tf.nn.bias_add(tf.matmul(L_fcl2_drop, W_out), B_out)
 96 | 
 97 |         # Define loss function and optimizer
 98 |         self.obj_loss    = tf.reduce_mean(tf.square(self.targets - self.outputs))
 99 |         self.optimizer = tf.train.AdamOptimizer(l_rate).minimize(self.obj_loss, 
100 |                                                                 var_list= (tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='CA_Layer')
101 |                                                                           +tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Output_Layer')))
102 |     #def end: def __init__
103 | #class end: Deep_xCas9
104 | 
105 | # Test Model
106 | def Model_Inference(sess, TEST_X, TEST_CA, model, args, load_episode, test_data_num, testvalbook, testvalsheet, col_index=1):
107 |     test_batch = 500
108 |     test_spearman = 0.0
109 |     optimizer = model.optimizer
110 |     TEST_Z = zeros((TEST_X.shape[0], 1), dtype=float)
111 |     
112 |     for i in range(int(ceil(float(TEST_X.shape[0])/float(test_batch)))):
113 |         Dict = {model.inputs: TEST_X[i*test_batch:(i+1)*test_batch], model.ca: TEST_CA[i*test_batch:(i+1)*test_batch], model.is_training: False}
114 |         TEST_Z[i*test_batch:(i+1)*test_batch] = sess.run([model.outputs], feed_dict=Dict)[0]
115 |     
116 |     testval_row = 0
117 |     testval_col = 2
118 |     sheet_index = 0
119 |     
120 |     for test_value in (TEST_Z):
121 |         testvalsheet[sheet_index].write(testval_row, testval_col, 100*test_value[0])
122 |         testval_row += 1
123 |     
124 |     return
125 | 
126 | 
127 | def preprocess_seq(data):
128 |     print("Start preprocessing the sequence done 2d")
129 |     length  = 24
130 |     
131 |     DATA_X = np.zeros((len(data),1,length,4), dtype=int)
132 |     print(np.shape(data), len(data), length)
133 |     for l in range(len(data)):
134 |         for i in range(length):
135 | 
136 |             try: data[l][i]
137 |             except: print(data[l], i, length, len(data))
138 | 
139 |             if data[l][i]in "Aa":    DATA_X[l, 0, i, 0] = 1
140 |             elif data[l][i] in "Cc": DATA_X[l, 0, i, 1] = 1
141 |             elif data[l][i] in "Gg": DATA_X[l, 0, i, 2] = 1
142 |             elif data[l][i] in "Tt": DATA_X[l, 0, i, 3] = 1
143 |             else:
144 |                 print "Non-ATGC character " + data[l]
145 |                 print i
146 |                 print data[l][i]
147 |                 sys.exit()
148 |         #loop end: i
149 |     #loop end: l
150 |     print("Preprocessing the sequence done")
151 |     return DATA_X
152 | #def end: preprocess_seq
153 | 
154 | 
155 | def getfile_inference(filenum):
156 |     param   = parameters['%s' % filenum]
157 |     FILE    = open(path+param, "r")
158 |     data    = FILE.readlines()
159 |     data_n  = len(data) - 1
160 |     seq     = []
161 |     ca     = []
162 | 
163 |     for l in range(1, data_n+1):
164 |         try:
165 |             data_split = data[l].split()
166 |             seq.append(data_split[1])
167 |             ca.append(data_split[2])
168 |         except:
169 |             print data[l]
170 |             raise ValueError
171 |     #loop end: l
172 |     FILE.close()
173 |     processed_full_seq = preprocess_seq(seq)
174 |     ca = expand_dims(array(ca), 1)
175 | 
176 |     return processed_full_seq, seq, ca
177 | #def end: getseq
178 | 
179 | 
180 | if "outputs" not in os.listdir(os.getcwd()):
181 |     os.makedirs('outputs')
182 | 
183 | #TensorFlow config
184 | conf                                = tf.ConfigProto()
185 | conf.gpu_options.allow_growth       = True
186 | os.environ['CUDA_VISIBLE_DEVICES']  = '0'
187 | best_model_cv                       = 0.0
188 | best_model_list                     = []
189 | 
190 | testbook = xlsxwriter.Workbook('outputs/TEST_OUTPUT_fortest.xlsx')
191 | 
192 | TEST_X = []
193 | TEST_CA = []
194 | testsheet = []
195 | for TEST_NUM_index in range(len(TEST_NUM_SET)):
196 |     TEST_NUM = TEST_NUM_SET[TEST_NUM_index]
197 |     testsheet.append([testbook.add_worksheet('{}'.format(TEST_NUM))])
198 |     tmp_X, pre_X, tmp_CA = getfile_inference(TEST_NUM)
199 |     TEST_X.append(tmp_X)
200 |     TEST_CA.append(tmp_CA)
201 |     test_row = 0
202 |     for index_X in range(np.shape(pre_X)[0]):
203 |         testsheet[-1][-1].write(test_row, 0, pre_X[index_X])
204 |         testsheet[-1][-1].write(test_row, 1, tmp_CA[index_X][0])
205 |         test_row += 1
206 |         
207 | for best_model_path in best_model_path_list:
208 |     for modelname in os.listdir(best_model_path):
209 |         if "meta" in modelname:
210 |             best_model_list.append(modelname[:-5])
211 | #loop end: best_model_path
212 | 
213 | for index in range(len(best_model_list)):
214 |     best_model_path = best_model_path_list[index]
215 |     best_model      = best_model_list[index]
216 |     valuelist       = best_model.split('-')
217 |     fulllist        = []
218 |     
219 |     for value in valuelist:
220 |         try:
221 |             value=int(value)
222 |         except:
223 |             try:    value=float(value)
224 |             except: pass
225 |         fulllist.append(value)
226 | 
227 |     print(fulllist[2:])
228 |     
229 |     filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[2:]
230 |     filter_size = [filter_size_1, filter_size_2, filter_size_3]
231 |     filter_num  = [filter_num_1, filter_num_2, filter_num_3]
232 | 
233 |     args = [filter_size, filter_num, l_rate, 0, None, node_1, node_2]
234 |     # Loading the model with the best validation score and test
235 |     tf.reset_default_graph()
236 |     with tf.Session(config=conf) as sess:
237 |         sess.run(tf.global_variables_initializer())
238 |         model = Deep_xCas9(filter_size, filter_num, node_1, node_2, args[2])
239 |         saver = tf.train.Saver()
240 |         saver.restore(sess, best_model_path+"/PreTrain-Final-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}".format(args[0][0], args[0][1], args[0][2], args[1][0], args[1][1], args[1][2], args[2], load_episode, args[5], args[6]))
241 |         for i in range(len(TEST_NUM_SET)):
242 |             Model_Inference(sess, TEST_X[i], TEST_CA[i], model, args, load_episode, TEST_NUM_SET[i], testbook, testsheet[i])
243 |         testbook.close()
244 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Efficiency_CA/outputs/TEST_OUTPUT_fortest.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Efficiency_CA/outputs/TEST_OUTPUT_fortest.xlsx


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Proportion/CBE_Proportion_Weight/PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Proportion/CBE_Proportion_Weight/PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.data-00000-of-00001


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Proportion/CBE_Proportion_Weight/PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Proportion/CBE_Proportion_Weight/PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.index


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Proportion/CBE_Proportion_Weight/PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Proportion/CBE_Proportion_Weight/PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256.meta


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Proportion/CBE_Proportion_Weight/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256"
2 | all_model_checkpoint_paths: "PreTrain-Final-3-3-3-20-20-20-0.0005-214-256-256"
3 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Proportion/CBE_Proportion_sample.txt:
--------------------------------------------------------------------------------
1 | reference seqeucne
2 | 1	AGGGCTGAACTAAAGCCTCCAGGGG
3 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Proportion/TEST_CBE_Proportion.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from os import system
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | from numpy import *
  6 | import xlsxwriter
  7 | import pyexcel as pe
  8 | from random import shuffle
  9 | 
 10 | #np.set_printoptions(threshold='nan')
 11 | 
 12 | ##############################################################################
 13 | 
 14 | 
 15 | 
 16 | 
 17 | ##############################################################################
 18 | ## System Paths ##
 19 | path                 = './'
 20 | parameters           = {'0': 'CBE_Proportion_sample.txt'} # Dictionary can be expanded for multiple test parameters
 21 | 
 22 | ## Run Parameters ##
 23 | TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 24 | best_model_path_list = ['./CBE_Proportion_Weight']
 25 | 
 26 | # Model
 27 | length = 25
 28 | window_start = 4
 29 | window_size = 8
 30 | 
 31 | class Deep_xCas9(object):
 32 |     def __init__(self, filter_size, filter_num, node_1 = 80, node_2 = 60, l_rate = 0.005, window_size = 5):
 33 |         self.inputs          = tf.placeholder(tf.float32, [None, 1, length, 4])
 34 |         self.targets         = tf.placeholder(tf.float32, [None, 2**window_size-1])
 35 |         self.wow         = tf.placeholder(tf.float32, [None, 2**window_size-1])
 36 |         self.possible_labels = tf.placeholder(tf.float32, [None, 2**window_size-1])
 37 |         self.is_training     = tf.placeholder(tf.bool)
 38 | 
 39 |         def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
 40 |             # setup the filter input shape for tf.nn.conv_2d
 41 |             conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
 42 |                               num_filters]
 43 | 
 44 |             # initialise weights and bias for the filter
 45 |             weights   = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
 46 |                                               name=name+'_W')
 47 |             bias      = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')
 48 | 
 49 |             # setup the convolutional layer operation
 50 |             out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='VALID')
 51 |             #out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')
 52 | 
 53 |             # add the bias
 54 |             out_layer += bias
 55 | 
 56 |             # apply a ReLU non-linear activation
 57 |             out_layer = tf.layers.dropout(tf.nn.relu(out_layer), 0.3, self.is_training)
 58 | 
 59 |             # now perform max pooling
 60 |             #ksize     = [1, pool_shape[0], pool_shape[1], 1]
 61 |             #strides   = [1, 1, 2, 1]
 62 |             #out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, 
 63 |             #                           padding='SAME')
 64 |             return out_layer
 65 |         #def end: create_new_conv_layer
 66 | 
 67 |         L_filter_num = 4
 68 |         L_inputs = self.inputs
 69 |         L_pool_0 = create_new_conv_layer(L_inputs, L_filter_num, filter_num[0] * 3, [1, filter_size[0]], [1, 2], name='conv1')
 70 |         
 71 |         with tf.variable_scope('Fully_Connected_Layer1'):
 72 |             layer_node_0 = int((length-filter_size[0])/1)+1
 73 |             node_num_0   = layer_node_0*filter_num[0] * 3
 74 |             
 75 |             L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
 76 |             W_fcl1       = tf.get_variable("W_fcl1", shape=[node_num_0, node_1])
 77 |             B_fcl1       = tf.get_variable("B_fcl1", shape=[node_1])
 78 |             L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten_0, W_fcl1), B_fcl1)
 79 |             L_fcl1       = tf.nn.relu(L_fcl1_pre)
 80 |             L_fcl1_drop  = tf.layers.dropout(L_fcl1, 0.3, self.is_training)
 81 |         
 82 |         with tf.variable_scope('Fully_Connected_Layer2'):
 83 |             W_fcl2       = tf.get_variable("W_fcl2", shape=[node_1, node_2])
 84 |             B_fcl2       = tf.get_variable("B_fcl2", shape=[node_2])
 85 |             L_fcl2_pre   = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_fcl2), B_fcl2)
 86 |             L_fcl2       = tf.nn.relu(L_fcl2_pre)
 87 |             L_fcl2_drop  = tf.layers.dropout(L_fcl2, 0.3, self.is_training)
 88 |         
 89 |         with tf.variable_scope('Output_Layer'):
 90 |             W_out        = tf.get_variable("W_out", shape=[node_2, 2**window_size-1])#, initializer=tf.contrib.layers.xavier_initializer())
 91 |             B_out        = tf.get_variable("B_out", shape=[2**window_size-1])#, initializer=tf.contrib.layers.xavier_initializer())
 92 |             self.outputs = tf.nn.bias_add(tf.matmul(L_fcl2_drop, W_out), B_out)
 93 | 
 94 |         #self.possible_outputs = self.outputs
 95 |         self.possible_outputs = tf.nn.softmax(self.outputs)#tf.multiply(self.outputs, self.possible_labels))
 96 |         # Define loss function and optimizer
 97 |         self.obj_loss = tf.reduce_mean(-tf.reduce_sum(self.targets * tf.log(self.possible_outputs) - self.targets * tf.log(self.targets), reduction_indices=[1]))
 98 |         self.obj_loss1 = tf.reduce_mean(-tf.reduce_sum(self.targets * tf.log(self.wow) - self.targets * tf.log(self.targets), reduction_indices=[1]))
 99 |         self.optimizer   = tf.train.AdamOptimizer(l_rate).minimize(self.obj_loss)
100 |     #def end: def __init__
101 | #class end: Deep_xCas9
102 | 
103 | def Model_Inference(sess, TEST_X, TEST_Label, model, args, load_episode, test_data_num, testvalbook, testvalsheet, window_size=8):
104 |     test_batch = 1024
105 |     optimizer = model.optimizer
106 |     TEST_Z = np.zeros((TEST_X.shape[0], 2**window_size - 1), dtype=float)
107 |     
108 |     for i in range(int(ceil(float(TEST_X.shape[0])/float(test_batch)))):
109 |         Dict = {model.inputs: TEST_X[i*test_batch:(i+1)*test_batch], model.is_training: False}
110 |         TEST_Z[i*test_batch:(i+1)*test_batch] = sess.run([model.possible_outputs], feed_dict=Dict)[0]
111 |     
112 |     testval_row = 0
113 |     testval_col = 3
114 |     sheet_index = 0
115 |     
116 |     sum = 0
117 |     for test_index in range(len(TEST_Z)):
118 |         sum += TEST_Z[test_index][TEST_Label[test_index]]
119 |     TEST_Z /= sum
120 |     
121 |     for test_index in range(len(TEST_Z)):
122 |         test_value = TEST_Z[test_index][TEST_Label[test_index]]
123 |         testvalsheet[sheet_index].write(testval_row, testval_col, test_value)
124 |         testval_row += 1
125 |     return
126 | 
127 | 
128 | # One hot encoding for DNA Sequence
129 | def preprocess_seq(data):
130 |     print("Start preprocessing the sequence done 2d")
131 |     length  = 25
132 |     
133 |     DATA_X = np.zeros((len(data),1,length,4), dtype=int)
134 |     print(np.shape(data), len(data), length)
135 |     for l in range(len(data)):
136 |         for i in range(length):
137 | 
138 |             try: data[l][i]
139 |             except: print(data[l], i, length, len(data))
140 | 
141 |             if data[l][i] in "Aa":    DATA_X[l, 0, i, 0] = 1
142 |             elif data[l][i] in "Cc": DATA_X[l, 0, i, 1] = 1
143 |             elif data[l][i] in "Gg": DATA_X[l, 0, i, 2] = 1
144 |             elif data[l][i] in "Tt": DATA_X[l, 0, i, 3] = 1
145 |             else:
146 |                 print "Non-ATGC character " + data[l]
147 |                 print i
148 |                 print data[l][i]
149 |                 sys.exit()
150 |         #loop end: i
151 |     #loop end: l
152 |     print("Preprocessing the sequence done")
153 |     return DATA_X
154 | #def end: preprocess_seq
155 | 
156 | def inference_index(orig_seq, req_seq):
157 |     window_start = 4
158 |     window_size = 8
159 |     index = []
160 |     for seq_index in range(len(list(req_seq))):
161 |         labels_index = -1
162 |         for ind in range(window_size): #change to 2
163 |             if req_seq[seq_index][window_start + ind] == orig_seq[seq_index][window_start + ind]: pass
164 |             else:
165 |                 labels_index += 2**(window_size - 1-ind)
166 |         if labels_index < 0:
167 |             print("WT INCLUDED IN REQUIRED SEQUENCE")
168 |             #raise ValueError
169 |             labels_index = 0
170 |         index.append(labels_index)
171 |     return index
172 | 
173 | def req_seq_produce(seq):
174 |     window_start = 4
175 |     window_size = 8
176 |     req_seq = []
177 |     full_seq = []
178 |     for indiv_seq in seq:
179 |         tmp_seq = [indiv_seq]
180 |         for index in range(window_size):
181 |             if indiv_seq[window_start+index] == 'C':
182 |                 print(index, indiv_seq[window_start:window_start+index])
183 |                 tmp = []
184 |                 for tmp_indiv_seq in tmp_seq:
185 |                     tmp.append(tmp_indiv_seq[:window_start+index]+str("T")+tmp_indiv_seq[window_start+index+1:])
186 |                     full_seq.append(indiv_seq)
187 |                 for sequence in tmp:
188 |                     tmp_seq.append(sequence)
189 |         for req_sequence in tmp_seq:
190 |             if req_sequence != tmp_seq[0]:
191 |                 req_seq.append(req_sequence)
192 |     return full_seq, req_seq
193 | 
194 | def getfile_inference(filenum):
195 |     param = parameters['%s'%filenum]
196 |     FILE    = open(path+param, "r")
197 |     data    = FILE.readlines()
198 |     data_n  = len(data) - 1
199 |     seq     = []
200 |     req_seq     = []
201 |     
202 |     for l in range(1, data_n+1):
203 |         try:
204 |             data_split = data[l].split()
205 |             seq.append(data_split[1])
206 |         except:
207 |             print data[l]
208 |             seq.append(data[l])
209 |     #loop end: l
210 |     FILE.close()
211 |     full_seq, req_seq = req_seq_produce(seq)
212 |     processed_full_seq = preprocess_seq(full_seq)
213 |     processed_full_req_seq = inference_index(full_seq, req_seq)
214 |     return processed_full_seq, full_seq, processed_full_req_seq, req_seq
215 | 
216 | if "outputs" not in os.listdir(os.getcwd()):
217 |     os.makedirs('outputs')
218 | 
219 | #TensorFlow config
220 | conf                                = tf.ConfigProto()
221 | conf.gpu_options.allow_growth       = True
222 | os.environ['CUDA_VISIBLE_DEVICES']  = '0'
223 | best_model_cv                       = 0.0
224 | best_model_list                     = []
225 | 
226 | testbook = xlsxwriter.Workbook('outputs/TEST_OUTPUT_fortest.xlsx')
227 | 
228 | TEST_X = []
229 | TEST_Label = []
230 | testsheet = []
231 | for TEST_NUM_index in range(len(TEST_NUM_SET)):
232 |     TEST_NUM = TEST_NUM_SET[TEST_NUM_index]
233 |     testsheet.append([testbook.add_worksheet('{}'.format(TEST_NUM))])
234 |     tmp_X, pre_X, tmp_Label, pre_Label = getfile_inference(TEST_NUM)
235 |     TEST_X.append(tmp_X)
236 |     TEST_Label.append(tmp_Label)
237 |     test_row = 0
238 |     for index_X in range(np.shape(pre_X)[0]):
239 |         testsheet[-1][-1].write(test_row, 0, pre_X[index_X])
240 |         testsheet[-1][-1].write(test_row, 1, pre_Label[index_X])
241 |         test_row += 1
242 | 
243 | for best_model_path in best_model_path_list:
244 |     for modelname in os.listdir(best_model_path):
245 |         print(modelname)
246 |         if "meta" in modelname:
247 |             best_model_list.append(modelname[:-5])
248 | 
249 | print(best_model_list)
250 | for index in range(len(best_model_list)):
251 |     best_model_path = best_model_path_list[index]
252 |     best_model      = best_model_list[index]
253 |     valuelist       = best_model.split('-')
254 |     fulllist        = []
255 |     
256 |     for value in valuelist:
257 |         try:
258 |             value=int(value)
259 |         except:
260 |             try:    value=float(value)
261 |             except: pass
262 |         fulllist.append(value)
263 | 
264 |     print(fulllist[2:])
265 |     
266 |     filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[2:]
267 |     filter_size = [filter_size_1, filter_size_2, filter_size_3]
268 |     filter_num  = [filter_num_1, filter_num_2, filter_num_3]
269 | 
270 |     args = [filter_size, filter_num, l_rate, 0, None, node_1, node_2]
271 |     # Loading the model with the best validation score and test
272 |     tf.reset_default_graph()
273 |     with tf.Session(config=conf) as sess:
274 |         sess.run(tf.global_variables_initializer())
275 |         model = Deep_xCas9(filter_size, filter_num, node_1, node_2, args[2], window_size)
276 |         saver = tf.train.Saver()
277 |         saver.restore(sess, best_model_path+"/PreTrain-Final-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}".format(args[0][0], args[0][1], args[0][2], args[1][0], args[1][1], args[1][2], args[2], load_episode, args[5], args[6]))
278 |         for i in range(len(TEST_NUM_SET)):
279 |             Model_Inference(sess, TEST_X[i], TEST_Label[i], model, args, load_episode, TEST_NUM_SET[i], testbook, testsheet[i], window_size)
280 |         testbook.close()
281 | 


--------------------------------------------------------------------------------
/DeepBaseEditor/CBE_Proportion/outputs/TEST_OUTPUT_fortest.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepBaseEditor/CBE_Proportion/outputs/TEST_OUTPUT_fortest.xlsx


--------------------------------------------------------------------------------
/DeepBaseEditor/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Hyoungbum (Henry) Kim, Myungjae Song,  Hui Kwon Kim, and Sungtae Lee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/DeepBaseEditor/README.txt:
--------------------------------------------------------------------------------
  1 | 1. System Requirements:
  2 |     Ubuntu   16.04
  3 |     Python   2.7.12
  4 |     Python Packages:
  5 |        numpy 1.14.5
  6 |        scipy 1.1.0
  7 | 
  8 |    Tensorflow and dependencies:
  9 |        Tensorflow  1.4.1
 10 |        CUDA       8.0.61
 11 |        cuDNN       6.0.21
 12 | 
 13 | 2. Installation Guide (required time, <120 minutes):
 14 | 
 15 | - Operation System
 16 |     Ubuntu 16.04 download from https://www.ubuntu.com/download/desktop
 17 |     
 18 |  - Python and packages
 19 |     Download Python 2.7.12 tarball on https://www.python.org/downloads/release/python-2712/
 20 |     Unzip and install:
 21 |        tar -zxvf Python-2.7.12.tgz
 22 |        cd ./Python-2.7.12
 23 |        ./configure
 24 |        make
 25 | 
 26 |    Package Installation:
 27 |        pip install numpy==1.14.5
 28 |        pip install scipy==1.1.0
 29 | 
 30 |    Tensorflow Installation:
 31 |        (for GPU use)
 32 |        pip install tensorflow-gpu==1.4.1
 33 |        (for CPU only)
 34 |        pip install tensorflow==1.4.1
 35 | 
 36 | 
 37 | (for GPU use)
 38 | 
 39 | - CUDA Toolkit 8.0 
 40 |     wget -O cuda_8_linux.run https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run
 41 |     sudo chmod +x cuda_8_linux.run
 42 |     ./cuda_8_linux.run
 43 | 
 44 | - cuDNN 6.0.21
 45 |     Download CUDNN tarball on https://developer.nvidia.com/cudnn
 46 |     Unzip and install:
 47 |        tar -zxvf cudnn-8.0-linux-x64-v6.0.tgz
 48 | 
 49 | For more details, please refer to CUDA, CuDNN, and Tensorflow installation guide on Github:          
 50 |     https://gist.github.com/ksopyla/813a62d6afc4307755e5832a3b62f432
 51 | 
 52 | 
 53 | 3. Demo Instructions (required time, <1 min):
 54 | 
 55 | (1) ABE_Efficiency
 56 | 
 57 | Input1: ./ABE_Efficiency_sample.txt        # List of Target Sequence(s)
 58 |     File format:
 59 |     Target number	30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)			
 60 | 	1	AACTGAAGGCTGAACAGCAGGGGTGGGAGA
 61 | 
 62 | Input2: ./ABE_Efficiency_Weight/ # Pre-trained Weight Files
 63 | 
 64 | Output: outputs/TEST_OUTPUT_for_test.xlsx
 65 |     Predicted activity score for sequence 1 :
 66 |     25.79517365
 67 | 
 68 | Run script:
 69 |     python ./TEST_ABE_Efficiency.py
 70 | 
 71 | Modification for personalized runs:
 72 | 
 73 |    <TEST_ABE_Efficiency.py>
 74 |     ## System Paths ##
 75 |     path                 = './'
 76 |     parameters           = {'0': 'ABE_Efficiency_sample.txt'}
 77 | 
 78 |    ## Run Parameters ##
 79 |     TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 80 |     best_model_path_list = ['./ABE_Efficiency_Weight/']
 81 | 
 82 | ABE_Efficiency_Sample.txt can be replaced or modified to include target sequence of interest
 83 | 
 84 |  
 85 | 
 86 | (2) ABE_Proportion
 87 | 
 88 | Input1: ./ABE_Proportion_sample.txt        # List of Target Sequence(s)
 89 |     File format:
 90 |     Target number   30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
 91 |       1  		AACTGAAGGCTGAACAGCAGGGGTGGGAGA		
 92 |     
 93 | Input2: ./ABE_Proportion_Weight/ # Pre-trained Weight Files
 94 | 
 95 | Output: outputs/TEST_OUTPUT_for_test.xlsx
 96 | 30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)			Outcome seqeuence				Proportion
 97 |     AACTGAAGGCTGAACAGCAGGGGTGGGAGA				AACTGAGGGCTGAACAGCAGGGGTGGGAGA		0.068301663
 98 |     AACTGAAGGCTGAACAGCAGGGGTGGGAGA				AACTGAAGGCTGGACAGCAGGGGTGGGAGA		0.346454144
 99 |     AACTGAAGGCTGAACAGCAGGGGTGGGAGA				AACTGAGGGCTGGACAGCAGGGGTGGGAGA		0.006036451
100 |     AACTGAAGGCTGAACAGCAGGGGTGGGAGA				AACTGAAGGCTGAGCAGCAGGGGTGGGAGA		0.433071852
101 |     AACTGAAGGCTGAACAGCAGGGGTGGGAGA				AACTGAGGGCTGAGCAGCAGGGGTGGGAGA		0.023255052
102 |     AACTGAAGGCTGAACAGCAGGGGTGGGAGA				AACTGAAGGCTGGGCAGCAGGGGTGGGAGA		0.11875882
103 |     AACTGAAGGCTGAACAGCAGGGGTGGGAGA				AACTGAGGGCTGGGCAGCAGGGGTGGGAGA		0.000885288
104 | 	
105 | 
106 | Run script:
107 |     python ./TEST_ABE_Proportion.py
108 | 
109 | Modification for personalized runs:
110 | 
111 |    <TEST_ABE_Proportion.py>
112 |     ## System Paths ##
113 |     path                 = './'
114 |     parameters           = {'0': 'ABE_Proportion_sample.txt'}
115 | 
116 |    ## Run Parameters ##
117 |     TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
118 |     best_model_path_list = ['./ABE_Proportion_Weight/']
119 | 
120 | ABE_Proportion_sample.txt can be replaced or modified to include target sequence of interest
121 | 
122 |  
123 | 
124 | (3) CBE_Efficiency
125 | 
126 | Input1: ./CBE_Efficiency_sample.txt        # List of Target Sequence(s)
127 |     File format:
128 |     Target number   30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
129 |   	    1   	TCAGGGCTGAACTAAAGCCTCCAGGGGGCC
130 |     
131 | Input2: ./CBE_Efficiency_Weight/ # Pre-trained Weight Files
132 | 
133 | Output: outputs/TEST_OUTPUT_for_test.xlsx
134 |     Predicted activity score for sequence 1:
135 |     4.856938124
136 | 
137 | Run script:
138 |     python ./TEST_CBE_Efficiency.py
139 | 
140 | Modification for personalized runs:
141 | 
142 |    <TEST_CBE_Efficiency.py>
143 |     ## System Paths ##
144 |     path                 = './'
145 |     parameters           = {'0': 'CBE_Efficiency_sample.txt'}
146 | 
147 |    ## Run Parameters ##
148 |     TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
149 |     best_model_path_list = ['./CBE_Efficiency_Weight/']
150 | 
151 | CBE_Efficiency_sample.txt can be replaced or modified to include target sequence of interest
152 | 
153 |  
154 | 
155 | (4) CBE_Efficiency_CA
156 | 
157 | Input1: ./CBE_Efficiency_CA_sample.txt        # List of Target Sequence(s)
158 |     File format:
159 |     Target number    30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)		"Chromatin accessibility (1= DNase I hypersensitive sites, 0 = Dnase I non-sensitive sites)"
160 | 	1		TCAGGGCTGAACTAAAGCCTCCAGGGGGCC					1
161 | 
162 | Input2: ./CBE_Efficiency_CA_Weight/ # Pre-trained Weight Files
163 | 
164 | Output: outputs/TEST_OUTPUT_for_test.xlsx
165 |     Predicted activity score for sequence 1:
166 |     17.0684725
167 | 
168 | Run script:
169 |     python ./TEST_CBE_Efficiency_CA.py
170 | 
171 | Modification for personalized runs:
172 | 
173 |    <TEST_CBE_Efficiency_CA.py>
174 |     ## System Paths ##
175 |     path                 = './'
176 |     parameters           = {'0': 'CBE_Efficiency_CA_sample.txt'}
177 | 
178 |    ## Run Parameters ##
179 |     TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
180 |     best_model_path_list = ['./CBE_Efficiency_CA_Weight/']
181 | 
182 | CBE_Efficiency_CA_sample.txt can be replaced or modified to include target sequence of interest
183 | 
184 |  
185 | 
186 | (5) CBE_Proportion
187 | 
188 | Input1: ./CBE_Proportion_sample.txt        # List of Target Sequence(s)
189 |     File format:
190 |     Target number   30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
191 |       1 		  TCAGGGCTGAACTAAAGCCTCCAGGGGGCC
192 |   
193 | 
194 | Input2: ./CBE_Proportion_Weight/ # Pre-trained Weight Files
195 | 
196 | Output: outputs/TEST_OUTPUT_for_test.xlsx
197 |     30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)			Outcome seqeuence				Proportion   
198 | 	TCAGGGCTGAACTAAAGCCTCCAGGGGGCC			TCAGGGTTGAACTAAAGCCTCCAGGGGGCC		0.103429772
199 | 	TCAGGGCTGAACTAAAGCCTCCAGGGGGCC			TCAGGGCTGAATTAAAGCCTCCAGGGGGCC		0.846258879
200 | 	TCAGGGCTGAACTAAAGCCTCCAGGGGGCC			TCAGGGTTGAATTAAAGCCTCCAGGGGGCC		0.048627082
201 | 
202 | 
203 | Run script:
204 |     python ./TEST_CBE_Proportion.py
205 | 
206 | Modification for personalized runs:
207 | 
208 |    <TEST_CBE_Proportion.py>
209 |     ## System Paths ##
210 |     path                 = './'
211 |     parameters           = {'0': 'CBE_Proportion_sample.txt'}
212 | 
213 |    ## Run Parameters ##
214 |     TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
215 |     best_model_path_list = ['./CBE_Proportion_Weight/']
216 | 
217 | CBE_Proportion_sample.txt can be replaced or modified to include target sequence of interest
218 | 


--------------------------------------------------------------------------------
/DeepCas9-NG/DeepCas9-NG_weight/PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCas9-NG/DeepCas9-NG_weight/PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.data-00000-of-00001


--------------------------------------------------------------------------------
/DeepCas9-NG/DeepCas9-NG_weight/PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCas9-NG/DeepCas9-NG_weight/PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.index


--------------------------------------------------------------------------------
/DeepCas9-NG/DeepCas9-NG_weight/PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCas9-NG/DeepCas9-NG_weight/PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60.meta


--------------------------------------------------------------------------------
/DeepCas9-NG/DeepCas9-NG_weight/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60"
2 | all_model_checkpoint_paths: "PreTrain-Final-4-6-8-180-150-120-0.001-879-120-60"
3 | 


--------------------------------------------------------------------------------
/DeepCas9-NG/README.txt:
--------------------------------------------------------------------------------
 1 | 1. System Requirements:
 2 | 	Ubuntu	16.04
 3 | 	Python	2.7.12
 4 | 	Python Packages:
 5 | 		numpy 1.14.5
 6 | 		scipy 1.1.0
 7 | 
 8 | 	Tensorflow and dependencies:
 9 | 		Tensorflow  1.4.1
10 | 		CUDA	    8.0.61
11 | 		cuDNN	    6.0.21
12 | 
13 | 2. Installation Guide (required time, <120 minutes):
14 | 
15 | - Operation System
16 | 	Ubuntu 16.04 download from https://www.ubuntu.com/download/desktop
17 | 	
18 | - Python and packages
19 | 	Download Python 2.7.12 tarball on https://www.python.org/downloads/release/python-2712/
20 | 	Unzip and install:
21 | 		tar -zxvf Python-2.7.12.tgz
22 | 		cd ./Python-2.7.12
23 | 		./configure
24 | 		make
25 | 
26 | 	Package Installation:
27 | 		pip install numpy==1.14.5
28 | 		pip install scipy==1.1.0
29 | 
30 | 	Tensorflow Installation:
31 | 		(for GPU use)
32 | 		pip install tensorflow-gpu==1.4.1
33 | 		(for CPU only)
34 | 		pip install tensorflow==1.4.1
35 | 
36 | 
37 | (for GPU use)
38 | 
39 | - CUDA Toolkit 8.0 
40 | 	wget -O cuda_8_linux.run https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run
41 | 	sudo chmod +x cuda_8_linux.run
42 | 	./cuda_8_linux.run
43 | 
44 | - cuDNN 6.0.21
45 | 	Download CUDNN tarball on https://developer.nvidia.com/cudnn
46 | 	Unzip and install:
47 | 		tar -zxvf cudnn-8.0-linux-x64-v6.0.tgz 
48 | 
49 | For more details, please refer to CUDA, CuDNN, and Tensorflow installation guide on Github: 			
50 | 	https://gist.github.com/ksopyla/813a62d6afc4307755e5832a3b62f432
51 | 
52 | 
53 | 3. Demo Instructions (required time, <1 min):
54 | 
55 | Input1: ./dataset/        # List of Target Sequence(s)
56 | 	File format:
57 | 	Target number   30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
58 | 	  1   CTCAGTACCTATCTGAGCATACCGTGTGTT
59 | 	  2   GATTGCAGAGGTAGAATCAGCAGGTGCTGT
60 | 
61 | Input2: ./DeepCas9-NG_Final/ # Pre-trained Weight Files
62 | 
63 | Output: RANK_final_DeepCas9-NG_Final.txt
64 | 	Predicted activity score for sequence 1 and 2:
65 | 	84.10478973, 56.13136673
66 | 
67 | Run script:
68 | 	python ./Test.py
69 | 
70 | Modification for personalized runs:
71 | 
72 | 	<Test.py>
73 | 	## System Paths ##
74 | 	path                 = './dataset/'
75 | 	parameters           = {'0': 'sample.txt'}
76 | 
77 | 	## Run Parameters ##
78 | 	TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
79 | 	best_model_path_list = ['./DeepCas9-NG_Final/']
80 | 
81 | sample.txt can be replaced or modified to include target sequence of interest
82 | 
83 |  
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/DeepCas9-NG/Test.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from os import system
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | 
  6 | np.set_printoptions(threshold='nan')
  7 | 
  8 | ##############################################################################
  9 | 
 10 | 
 11 | 
 12 | 
 13 | ##############################################################################
 14 | ## System Paths ##
 15 | path                 = './dataset/'
 16 | parameters           = {'0': 'sample.txt'} # Dictionary can be expanded for multiple test parameters
 17 | 
 18 | ## Run Parameters ##
 19 | TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 20 | best_model_path_list = ['./DeepCas9-NG_weight/']
 21 | 
 22 | # Model
 23 | length = 30
 24 | 
 25 | class DeepCas9_NG(object):
 26 |     def __init__(self, filter_size, filter_num, node_1 = 80, node_2 = 60, l_rate = 0.005):
 27 |         self.inputs         = tf.placeholder(tf.float32, [None, 1, length, 4])
 28 |         self.targets        = tf.placeholder(tf.float32, [None, 1])
 29 |         self.is_training    = tf.placeholder(tf.bool)
 30 |         def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
 31 |             # setup the filter input shape for tf.nn.conv_2d
 32 |             conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
 33 |                               num_filters]
 34 | 
 35 |             # initialise weights and bias for the filter
 36 |             weights   = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
 37 |                                               name=name+'_W')
 38 |             bias      = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')
 39 | 
 40 |             # setup the convolutional layer operation
 41 |             out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='VALID')
 42 | 
 43 |             # add the bias
 44 |             out_layer += bias
 45 | 
 46 |             # apply a ReLU non-linear activation
 47 |             out_layer = tf.layers.dropout(tf.nn.relu(out_layer), 0.3, self.is_training)
 48 | 
 49 |             # now perform max pooling
 50 |             ksize     = [1, pool_shape[0], pool_shape[1], 1]
 51 |             strides   = [1, 1, 2, 1]
 52 |             out_layer = tf.nn.avg_pool(out_layer, ksize=ksize, strides=strides, 
 53 |                                        padding='SAME')
 54 |             return out_layer
 55 |         #def end: create_new_conv_layer
 56 | 
 57 |         L_pool_0 = create_new_conv_layer(self.inputs, 4, filter_num[0], [1, filter_size[0]], [1, 2], name='conv1')
 58 |         L_pool_1 = create_new_conv_layer(self.inputs, 4, filter_num[1], [1, filter_size[1]], [1, 2], name='conv2')
 59 |         L_pool_2 = create_new_conv_layer(self.inputs, 4, filter_num[2], [1, filter_size[2]], [1, 2], name='conv3')
 60 |         with tf.variable_scope('Fully_Connected_Layer1'):
 61 |             layer_node_0 = int((length-filter_size[0])/2)+1
 62 |             node_num_0   = layer_node_0*filter_num[0]
 63 |             layer_node_1 = int((length-filter_size[1])/2)+1
 64 |             node_num_1   = layer_node_1*filter_num[1]
 65 |             layer_node_2 = int((length-filter_size[2])/2)+1
 66 |             node_num_2   = layer_node_2*filter_num[2]
 67 |             L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
 68 |             L_flatten_1  = tf.reshape(L_pool_1, [-1, node_num_1])
 69 |             L_flatten_2  = tf.reshape(L_pool_2, [-1, node_num_2])
 70 |             L_flatten    = tf.concat([L_flatten_0, L_flatten_1, L_flatten_2], 1, name='concat')
 71 |             node_num     = node_num_0 + node_num_1 + node_num_2
 72 |             W_fcl1       = tf.get_variable("W_fcl1", shape=[node_num, node_1])
 73 |             B_fcl1       = tf.get_variable("B_fcl1", shape=[node_1])
 74 |             L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten, W_fcl1), B_fcl1)
 75 |             L_fcl1       = tf.nn.relu(L_fcl1_pre)
 76 |             L_fcl1_drop  = tf.layers.dropout(L_fcl1, 0.3, self.is_training)
 77 | 
 78 |         with tf.variable_scope('Fully_Connected_Layer2'):
 79 |             W_fcl2       = tf.get_variable("W_fcl2", shape=[node_1, node_2])
 80 |             B_fcl2       = tf.get_variable("B_fcl2", shape=[node_2])
 81 |             L_fcl2_pre   = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_fcl2), B_fcl2)
 82 |             L_fcl2       = tf.nn.relu(L_fcl2_pre)
 83 |             L_fcl2_drop  = tf.layers.dropout(L_fcl2, 0.3, self.is_training)
 84 |             
 85 |         with tf.variable_scope('Output_Layer'):
 86 |             W_out        = tf.get_variable("W_out", shape=[node_2, 1])#, initializer=tf.contrib.layers.xavier_initializer())
 87 |             B_out        = tf.get_variable("B_out", shape=[1])#, initializer=tf.contrib.layers.xavier_initializer())
 88 |             self.outputs = tf.nn.bias_add(tf.matmul(L_fcl2_drop, W_out), B_out)
 89 | 
 90 |         # Define loss function and optimizer
 91 |         self.obj_loss    = tf.reduce_mean(tf.square(self.targets - self.outputs))
 92 |         self.optimizer   = tf.train.AdamOptimizer(l_rate).minimize(self.obj_loss)
 93 |     #def end: def __init__
 94 | #class end: DeepCas9-NG
 95 | 
 96 | def Model_Finaltest(sess, TEST_X, filter_size, filter_num, model, load_episode, best_model_path):
 97 |     test_batch      = 500
 98 |     test_spearman   = 0.0
 99 |     optimizer       = model.optimizer
100 |     TEST_Z          = np.zeros((TEST_X.shape[0], 1), dtype=float)
101 |     
102 |     for i in range(int(np.ceil(float(TEST_X.shape[0])/float(test_batch)))):
103 |         Dict = {model.inputs: TEST_X[i*test_batch:(i+1)*test_batch], model.is_training: False}
104 |         TEST_Z[i*test_batch:(i+1)*test_batch] = sess.run([model.outputs], feed_dict=Dict)[0]
105 |     
106 |     OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
107 |     OUT.write("Testing final \n {} ".format(tuple(TEST_Z.reshape([np.shape(TEST_Z)[0]]))))
108 |     OUT.write("\n")
109 |     OUT.close()
110 |     return
111 | #def end: Model_Finaltest
112 | 
113 | 
114 | def preprocess_seq(data):
115 |     print("Start preprocessing the sequence done 2d")
116 |     length  = 30
117 |     
118 |     DATA_X = np.zeros((len(data),1,length,4), dtype=int)
119 |     print(np.shape(data), len(data), length)
120 |     for l in range(len(data)):
121 |         for i in range(length):
122 | 
123 |             try: data[l][i]
124 |             except: print(data[l], i, length, len(data))
125 | 
126 |             if data[l][i]in "Aa":    DATA_X[l, 0, i, 0] = 1
127 |             elif data[l][i] in "Cc": DATA_X[l, 0, i, 1] = 1
128 |             elif data[l][i] in "Gg": DATA_X[l, 0, i, 2] = 1
129 |             elif data[l][i] in "Tt": DATA_X[l, 0, i, 3] = 1
130 |             else:
131 |                 print "Non-ATGC character " + data[l]
132 |                 print i
133 |                 print data[l][i]
134 |                 sys.exit()
135 |         #loop end: i
136 |     #loop end: l
137 |     print("Preprocessing the sequence done")
138 |     return DATA_X
139 | #def end: preprocess_seq
140 | 
141 | 
142 | def getseq(filenum):
143 |     param   = parameters['%s' % filenum]
144 |     FILE    = open(path+param, "r")
145 |     data    = FILE.readlines()
146 |     data_n  = len(data) - 1
147 |     seq     = []
148 | 
149 |     for l in range(1, data_n+1):
150 |         try:
151 |             data_split = data[l].split()
152 |             seq.append(data_split[1])
153 |         except:
154 |             print data[l]
155 |             seq.append(data[l])
156 |     #loop end: l
157 |     FILE.close()
158 |     processed_full_seq = preprocess_seq(seq)
159 | 
160 |     return processed_full_seq, seq
161 | #def end: getseq
162 | 
163 | 
164 | #TensorFlow config
165 | conf                                = tf.ConfigProto()
166 | conf.gpu_options.allow_growth       = True
167 | os.environ['CUDA_VISIBLE_DEVICES']  = '0'
168 | best_model_cv                       = 0.0
169 | best_model_list                     = []
170 | 
171 | for best_model_path in best_model_path_list:
172 |     for modelname in os.listdir(best_model_path):
173 |         if "meta" in modelname:
174 |             best_model_list.append(modelname[:-5])
175 | #loop end: best_model_path
176 | 
177 | TEST_X          = []
178 | TEST_X_nohot    = []
179 | for TEST_NUM in TEST_NUM_SET:
180 |     tmp_X, tmp_X_nohot = getseq(TEST_NUM)
181 |     TEST_X.append(tmp_X)
182 |     TEST_X_nohot.append(tmp_X_nohot)
183 | #loop end: TEST_NUM
184 | 
185 | 
186 | for index in range(len(best_model_list)):
187 |     best_model_path = best_model_path_list[index]
188 |     best_model      = best_model_list[index]
189 |     valuelist       = best_model.split('-')
190 |     fulllist        = []
191 |     
192 |     for value in valuelist:
193 |         if value == 'True':    value=True
194 |         elif value == 'False': value=False
195 |         else:
196 |             try:
197 |                 value=int(value)
198 |             except:
199 |                 try:    value=float(value)
200 |                 except: pass
201 |         fulllist.append(value)
202 |     #loop end: value
203 | 
204 |     print(fulllist[2:])
205 |     
206 |     filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[2:]
207 |     filter_size = [filter_size_1, filter_size_2, filter_size_3]
208 |     filter_num  = [filter_num_1, filter_num_2, filter_num_3]
209 |     #if end: fulllist[2:][-3] is True:
210 | 
211 |     args = [filter_size, filter_num, l_rate, load_episode]
212 |     tf.reset_default_graph()
213 |     with tf.Session(config=conf) as sess:
214 |         sess.run(tf.global_variables_initializer())
215 |         model = DeepCas9_NG(filter_size, filter_num, node_1, node_2, args[2])
216 |         
217 |         saver = tf.train.Saver()
218 |         saver.restore(sess, best_model_path + best_model)
219 |         
220 |         OUT   = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
221 |         OUT.write("{}".format(best_model))
222 |         OUT.write("\n")
223 |         OUT.close()
224 |         
225 |         TEST_Y = []
226 |         for i in range(len(TEST_NUM_SET)):
227 |             print ("TEST_NUM : {}".format(TEST_NUM_SET[i]))
228 |             
229 |             OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
230 |             OUT.write("\n")
231 |             OUT.write("TEST_FILE : {}".format(parameters['{}'.format(TEST_NUM_SET[i])]))
232 |             OUT.write("\n")
233 |             OUT.close()
234 |             Model_Finaltest(sess, TEST_X[i], filter_size, filter_num, model, load_episode, best_model_path)
235 |         #loop end: i
236 | 
237 |         OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
238 |         OUT.write("\n")
239 |         OUT.close()


--------------------------------------------------------------------------------
/DeepCas9-NG/dataset/sample.txt:
--------------------------------------------------------------------------------
1 | Target number   30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
2 | 1	CTCAGTACCTATCTGAGCATACCGTGTGTT
3 | 2	GATTGCAGAGGTAGAATCAGCAGGTGCTGT


--------------------------------------------------------------------------------
/DeepCas9/DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCas9/DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.data-00000-of-00001


--------------------------------------------------------------------------------
/DeepCas9/DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCas9/DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.index


--------------------------------------------------------------------------------
/DeepCas9/DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCas9/DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60.meta


--------------------------------------------------------------------------------
/DeepCas9/DeepCas9_TestCode.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from os.path import exists
  3 | from os import system
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import scipy.misc
  8 | import scipy.stats
  9 | from scipy.stats import rankdata
 10 | 
 11 | np.set_printoptions(threshold='nan')
 12 | 
 13 | ##############################################################################
 14 | 
 15 | 
 16 | 
 17 | 
 18 | ##############################################################################
 19 | ## System Paths ##
 20 | path                 = './dataset/'
 21 | parameters           = {'0': 'sample.txt'} # Dictionary can be expanded for multiple test parameters
 22 | 
 23 | ## Run Parameters ##
 24 | TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 25 | best_model_path_list = ['./DeepCas9_Final/']
 26 | 
 27 | # Model
 28 | length = 30
 29 | 
 30 | class DeepCas9(object):
 31 |     def __init__(self, filter_size, filter_num, node_1 = 80, node_2 = 60, l_rate = 0.005):
 32 |         self.inputs         = tf.placeholder(tf.float32, [None, 1, length, 4])
 33 |         self.targets        = tf.placeholder(tf.float32, [None, 1])
 34 |         self.is_training    = tf.placeholder(tf.bool)
 35 |         def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
 36 |             # setup the filter input shape for tf.nn.conv_2d
 37 |             conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
 38 |                               num_filters]
 39 | 
 40 |             # initialise weights and bias for the filter
 41 |             weights   = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
 42 |                                               name=name+'_W')
 43 |             bias      = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')
 44 | 
 45 |             # setup the convolutional layer operation
 46 |             out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='VALID')
 47 | 
 48 |             # add the bias
 49 |             out_layer += bias
 50 | 
 51 |             # apply a ReLU non-linear activation
 52 |             out_layer = tf.layers.dropout(tf.nn.relu(out_layer), 0.3, self.is_training)
 53 | 
 54 |             # now perform max pooling
 55 |             ksize     = [1, pool_shape[0], pool_shape[1], 1]
 56 |             strides   = [1, 1, 2, 1]
 57 |             out_layer = tf.nn.avg_pool(out_layer, ksize=ksize, strides=strides, 
 58 |                                        padding='SAME')
 59 |             return out_layer
 60 |         #def end: create_new_conv_layer
 61 | 
 62 |         L_pool_0 = create_new_conv_layer(self.inputs, 4, filter_num[0], [1, filter_size[0]], [1, 2], name='conv1')
 63 |         L_pool_1 = create_new_conv_layer(self.inputs, 4, filter_num[1], [1, filter_size[1]], [1, 2], name='conv2')
 64 |         L_pool_2 = create_new_conv_layer(self.inputs, 4, filter_num[2], [1, filter_size[2]], [1, 2], name='conv3')
 65 |         with tf.variable_scope('Fully_Connected_Layer1'):
 66 |             layer_node_0 = int((length-filter_size[0])/2)+1
 67 |             node_num_0   = layer_node_0*filter_num[0]
 68 |             layer_node_1 = int((length-filter_size[1])/2)+1
 69 |             node_num_1   = layer_node_1*filter_num[1]
 70 |             layer_node_2 = int((length-filter_size[2])/2)+1
 71 |             node_num_2   = layer_node_2*filter_num[2]
 72 |             L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
 73 |             L_flatten_1  = tf.reshape(L_pool_1, [-1, node_num_1])
 74 |             L_flatten_2  = tf.reshape(L_pool_2, [-1, node_num_2])
 75 |             L_flatten    = tf.concat([L_flatten_0, L_flatten_1, L_flatten_2], 1, name='concat')
 76 |             node_num     = node_num_0 + node_num_1 + node_num_2
 77 |             W_fcl1       = tf.get_variable("W_fcl1", shape=[node_num, node_1])
 78 |             B_fcl1       = tf.get_variable("B_fcl1", shape=[node_1])
 79 |             L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten, W_fcl1), B_fcl1)
 80 |             L_fcl1       = tf.nn.relu(L_fcl1_pre)
 81 |             L_fcl1_drop  = tf.layers.dropout(L_fcl1, 0.3, self.is_training)
 82 | 
 83 |         with tf.variable_scope('Fully_Connected_Layer2'):
 84 |             W_fcl2       = tf.get_variable("W_fcl2", shape=[node_1, node_2])
 85 |             B_fcl2       = tf.get_variable("B_fcl2", shape=[node_2])
 86 |             L_fcl2_pre   = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_fcl2), B_fcl2)
 87 |             L_fcl2       = tf.nn.relu(L_fcl2_pre)
 88 |             L_fcl2_drop  = tf.layers.dropout(L_fcl2, 0.3, self.is_training)
 89 |             
 90 |         with tf.variable_scope('Output_Layer'):
 91 |             W_out        = tf.get_variable("W_out", shape=[node_2, 1])#, initializer=tf.contrib.layers.xavier_initializer())
 92 |             B_out        = tf.get_variable("B_out", shape=[1])#, initializer=tf.contrib.layers.xavier_initializer())
 93 |             self.outputs = tf.nn.bias_add(tf.matmul(L_fcl2_drop, W_out), B_out)
 94 | 
 95 |         # Define loss function and optimizer
 96 |         self.obj_loss    = tf.reduce_mean(tf.square(self.targets - self.outputs))
 97 |         self.optimizer   = tf.train.AdamOptimizer(l_rate).minimize(self.obj_loss)
 98 |     #def end: def __init__
 99 | #class end: DeepCas9
100 | 
101 | def Model_Finaltest(sess, TEST_X, filter_size, filter_num, if3d, model, args, load_episode, best_model_path):
102 |     test_batch      = 500
103 |     test_spearman   = 0.0
104 |     optimizer       = model.optimizer
105 |     TEST_Z          = np.zeros((TEST_X.shape[0], 1), dtype=float)
106 |     
107 |     for i in range(int(np.ceil(float(TEST_X.shape[0])/float(test_batch)))):
108 |         Dict = {model.inputs: TEST_X[i*test_batch:(i+1)*test_batch], model.is_training: False}
109 |         TEST_Z[i*test_batch:(i+1)*test_batch] = sess.run([model.outputs], feed_dict=Dict)[0]
110 |     
111 |     OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
112 |     OUT.write("Testing final \n {} ".format(tuple(TEST_Z.reshape([np.shape(TEST_Z)[0]]))))
113 |     OUT.write("\n")
114 |     OUT.close()
115 |     return
116 | #def end: Model_Finaltest
117 | 
118 | 
119 | def preprocess_seq(data):
120 |     print("Start preprocessing the sequence done 2d")
121 |     length  = 30
122 |     
123 |     DATA_X = np.zeros((len(data),1,length,4), dtype=int)
124 |     print(np.shape(data), len(data), length)
125 |     for l in range(len(data)):
126 |         for i in range(length):
127 | 
128 |             try: data[l][i]
129 |             except: print(data[l], i, length, len(data))
130 | 
131 |             if data[l][i]in "Aa":    DATA_X[l, 0, i, 0] = 1
132 |             elif data[l][i] in "Cc": DATA_X[l, 0, i, 1] = 1
133 |             elif data[l][i] in "Gg": DATA_X[l, 0, i, 2] = 1
134 |             elif data[l][i] in "Tt": DATA_X[l, 0, i, 3] = 1
135 |             else:
136 |                 print "Non-ATGC character " + data[l]
137 |                 print i
138 |                 print data[l][i]
139 |                 sys.exit()
140 |         #loop end: i
141 |     #loop end: l
142 |     print("Preprocessing the sequence done")
143 |     return DATA_X
144 | #def end: preprocess_seq
145 | 
146 | 
147 | def getseq(filenum):
148 |     param   = parameters['%s' % filenum]
149 |     FILE    = open(path+param, "r")
150 |     data    = FILE.readlines()
151 |     data_n  = len(data) - 1
152 |     seq     = []
153 | 
154 |     for l in range(1, data_n+1):
155 |         try:
156 |             data_split = data[l].split()
157 |             seq.append(data_split[1])
158 |         except:
159 |             print data[l]
160 |             seq.append(data[l])
161 |     #loop end: l
162 |     FILE.close()
163 |     processed_full_seq = preprocess_seq(seq)
164 | 
165 |     return processed_full_seq, seq
166 | #def end: getseq
167 | 
168 | 
169 | #TensorFlow config
170 | conf                                = tf.ConfigProto()
171 | conf.gpu_options.allow_growth       = True
172 | os.environ['CUDA_VISIBLE_DEVICES']  = '0'
173 | best_model_cv                       = 0.0
174 | best_model_list                     = []
175 | 
176 | for best_model_path in best_model_path_list:
177 |     for modelname in os.listdir(best_model_path):
178 |         if "meta" in modelname:
179 |             best_model_list.append(modelname[:-5])
180 | #loop end: best_model_path
181 | 
182 | TEST_X          = []
183 | TEST_X_nohot    = []
184 | for TEST_NUM in TEST_NUM_SET:
185 |     tmp_X, tmp_X_nohot = getseq(TEST_NUM)
186 |     TEST_X.append(tmp_X)
187 |     TEST_X_nohot.append(tmp_X_nohot)
188 | #loop end: TEST_NUM
189 | 
190 | 
191 | for index in range(len(best_model_list)):
192 |     best_model_path = best_model_path_list[index]
193 |     best_model      = best_model_list[index]
194 |     valuelist       = best_model.split('-')
195 |     fulllist        = []
196 |     
197 |     for value in valuelist:
198 |         if value == 'True':    value=True
199 |         elif value == 'False': value=False
200 |         else:
201 |             try:
202 |                 value=int(value)
203 |             except:
204 |                 try:    value=float(value)
205 |                 except: pass
206 |         fulllist.append(value)
207 |     #loop end: value
208 | 
209 |     print(fulllist[2:])
210 |     
211 |     if fulllist[2:][-3] is True:
212 |         if3d, filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, inception, node_1, node_2 = fulllist[2:]
213 |         filter_size = [filter_size_1, filter_size_2, filter_size_3]
214 |         filter_num  = [filter_num_1, filter_num_2, filter_num_3]
215 |     else:
216 |         if3d, filter_size, filter_num, l_rate, load_episode, inception, node_1, node_2 = fulllist[2:]
217 |     #if end: fulllist[2:][-3] is True:
218 | 
219 |     args = [filter_size, filter_num, l_rate, load_episode]
220 |     tf.reset_default_graph()
221 |     with tf.Session(config=conf) as sess:
222 |         sess.run(tf.global_variables_initializer())
223 |         model = DeepCas9(filter_size, filter_num, node_1, node_2, args[2])
224 |         
225 |         saver = tf.train.Saver()
226 |         saver.restore(sess, best_model_path + best_model)
227 |         
228 |         OUT   = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
229 |         OUT.write("{}".format(best_model))
230 |         OUT.write("\n")
231 |         OUT.close()
232 |         
233 |         TEST_Y = []
234 |         for i in range(len(TEST_NUM_SET)):
235 |             print ("TEST_NUM : {}".format(TEST_NUM_SET[i]))
236 |             
237 |             OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
238 |             OUT.write("\n")
239 |             OUT.write("TEST_FILE : {}".format(parameters['{}'.format(TEST_NUM_SET[i])]))
240 |             OUT.write("\n")
241 |             OUT.close()
242 |             Model_Finaltest(sess, TEST_X[i], filter_size, filter_num, if3d, model, args, load_episode, best_model_path)
243 |         #loop end: i
244 | 
245 |         OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
246 |         OUT.write("\n")
247 |         OUT.close()
248 | 


--------------------------------------------------------------------------------
/DeepCas9/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Hyoungbum (Henry) Kim, Hui Kwon Kim, Younggwang Kim, and Sungtae Lee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/DeepCas9/README.txt:
--------------------------------------------------------------------------------
 1 | 1. System Requirements:
 2 | 	Ubuntu	16.04
 3 | 	Python	2.7.12
 4 | 	Python Packages:
 5 | 		numpy 1.14.5
 6 | 		scipy 1.1.0
 7 | 
 8 | 	Tensorflow and dependencies:
 9 | 		Tensorflow  1.4.1
10 | 		CUDA	    8.0.61
11 | 		cuDNN	    5.1.10
12 | 
13 | 2. Installation Guide (required time, <120 minutes):
14 | 
15 | - Operation System
16 | 	Ubuntu 16.04 download from https://www.ubuntu.com/download/desktop
17 | 	
18 | - Python and packages
19 | 	Download Python 2.7.12 tarball on https://www.python.org/downloads/release/python-2712/
20 | 	Unzip and install:
21 | 		tar -zxvf Python-2.7.12.tgz
22 | 		cd ./Python-2.7.12
23 | 		./configure
24 | 		make
25 | 
26 | 	Package Installation:
27 | 		pip install numpy==1.14.5
28 | 		pip install scipy==1.1.0
29 | 
30 | 	Tensorflow Installation:
31 | 		(for GPU use)
32 | 		pip install tensorflow-gpu==1.4.1
33 | 		(for CPU only)
34 | 		pip install tensorflow==1.4.1
35 | 
36 | 
37 | (for GPU use)
38 | 
39 | - CUDA Toolkit 8.0 
40 | 	wget -O cuda_8_linux.run https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run
41 | 	sudo chmod +x cuda_8_linux.run
42 | 	./cuda_8.0.61_375.26_linux.run
43 | 
44 | - cuDNN 5.1.10
45 | 	Download CUDNN tarball on https://developer.nvidia.com/cudnn
46 | 	Unzip and install:
47 | 		tar -zxvf cudnn-8.0-linux-x64-v5.1.tgz 
48 | 
49 | For more details, please refer to CUDA, CuDNN, and Tensorflow installation guide on Github: 			
50 | 	https://gist.github.com/ksopyla/813a62d6afc4307755e5832a3b62f432
51 | 
52 | 
53 | 3. Demo Instructions (required time, <1 min):
54 | 
55 | Input1: ./dataset/        # List of Target Sequence(s)
56 | 	File format:
57 | 	Target number   30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
58 | 	  1   TAAGAGAGTGGTAATAGAAGTGCCAGGTAT
59 | 	  2   CCCTCATGGTGCAGCTAAAGGCCCAGGAGC
60 | 
61 | Input2: ./DeepCas9_Final/ # Pre-trained Weight Files
62 | 
63 | Output: RANK_final_DeepCas9_Final.txt
64 | 	Predicted activity score for sequence 1 and 2:
65 | 	67.5565185546875, 56.930904388427734   
66 | 
67 | Run script:
68 | 	python ./DeepCas9_TestCode.py
69 | 
70 | Modification for personalized runs:
71 | 
72 | 	<DeepCas9_TestCode.py>
73 | 	## System Paths ##
74 | 	path                 = './dataset/'
75 | 	parameters           = {'0': 'sample.txt'}
76 | 
77 | 	## Run Parameters ##
78 | 	TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
79 | 	best_model_path_list = ['./DeepCas9_Final/']
80 | 
81 | sample.txt can be replaced or modified to include target sequence of interest
82 | 
83 |  
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/DeepCas9/dataset/sample.txt:
--------------------------------------------------------------------------------
1 | Target number	30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
2 | 1	TAAGAGAGTGGTAATAGAAGTGCCAGGTAT
3 | 2	CCCTCATGGTGCAGCTAAAGGCCCAGGAGC


--------------------------------------------------------------------------------
/DeepCpf1/Analysis of indel frequency/analyser_v3.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCpf1/Analysis of indel frequency/analyser_v3.exe


--------------------------------------------------------------------------------
/DeepCpf1/Analysis of indel frequency/analyser_v3.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #-*- coding: utf-8 -*-
  3 | __author__ = 'forestkeep21@naver.com'
  4 | 
  5 | import re
  6 | import sys
  7 | import os
  8 | 
  9 | from Levenshtein import editops
 10 | # 해당 라이브러리 도큐먼트
 11 | # https://rawgit.com/ztane/python-Levenshtein/master/docs/Levenshtein.html
 12 | 
 13 | BASE_DIR = os.path.dirname(sys.executable)
 14 | 
 15 | #for debug
 16 | # BASE_DIR = os.path.dirname(os.path.realpath(__file__))
 17 | 
 18 | 
 19 | def seq_validator(data):
 20 |     # 문자열이 시퀀스인지 판별. 판별 끝난 후 비교를 쉽게 하기 위해 모두 대문자로 변환한다.
 21 |     m = re.findall(r'^[A|a|T|t|C|c|G|g]+$', data)
 22 |     return m[0].upper() if m else None
 23 | 
 24 | 
 25 | def count_line_in_file(file_name):
 26 |     # 파일내 라인 카운터
 27 |     count = 0
 28 |     for line in open(file_name, 'r'):
 29 |         count += 1
 30 |     return count
 31 | 
 32 | 
 33 | def do(input_file_name, backward_target_length, dest_folder_path):
 34 |     cur_cnt = 0
 35 |     target_cnt = count_line_in_file(input_file_name)
 36 |     final_results = {}
 37 | 
 38 |     for target_set in open(input_file_name, 'r'):
 39 |         # 인풋 파일은 이름 : 와일드시퀀스 : 타겟 의 형태를 띈다.
 40 |         tmp = target_set.split(':')
 41 |         file_name_no_ext = tmp[0].strip()
 42 |         file_name = '{}.txt'.format(file_name_no_ext)
 43 |         wild_seq = tmp[1].strip()
 44 |         target = tmp[2].strip()
 45 | 
 46 |         # 타겟 검사
 47 |         target = seq_validator(target)
 48 |         if not target:
 49 |             continue
 50 | 
 51 |         # 와일드 시퀀스 검사
 52 |         wild_seq = seq_validator(wild_seq)
 53 |         if not wild_seq:
 54 |             continue
 55 | 
 56 |         # 결과 저장용 폴더 생성
 57 |         result_folder_name = os.path.join(BASE_DIR, 'analyse_results')
 58 |         if not os.path.exists(result_folder_name):
 59 |                 os.makedirs(result_folder_name)
 60 | 
 61 |         try:
 62 |             # 결과 임시 저장 dict
 63 |             result = {
 64 |                 'total_cnt': count_line_in_file(os.path.join(dest_folder_path, file_name)),
 65 |                 'mutated_cnt': 0,
 66 |                 'mutated_rates': 0.0,
 67 |                 'mutated_dict': {}
 68 |             }
 69 | 
 70 |             for line in open(os.path.join(dest_folder_path, file_name), 'r'):
 71 |                 # 대상 시퀀스 검사
 72 |                 line = seq_validator(line)
 73 |                 if not line:
 74 |                     continue
 75 | 
 76 |                 # 와일드 시퀀스와 타겟을 이용하여 와일드 시퀀스에서 타겟의 시작, 종료 위치를 파악한다. editops에서 사용.
 77 |                 target_start_pos_in_wild = int(wild_seq.find(target))
 78 |                 target_end_pos = target_start_pos_in_wild + len(target)
 79 | 
 80 |                 # 와일드 시퀀스를 기준으로 대상 시퀀스와 비교하여 레벤슈타인 유사도 측정에서 editops를 뽑아낸다.
 81 |                 # editops는 (변형방법, 와일드시퀀스 기준 위치, 대상시퀀스 기준 위치) 의 형태로 결과가 나온다.
 82 |                 # 예를 들어, editops('test', 'teaasz') 의 경우 [('insert', 2, 2), ('insert', 2, 3), ('replace', 3, 5)]
 83 |                 # 1번 인덱스 : 삽입이 와일드시퀀스 기준 2번째, 대상시퀀스 기준 2번째에서 발생
 84 |                 # 2번 인덱스 : 삽입이 와일드시퀀스 기준 2번째, 대상시퀀스 기준 3번째에서 발생
 85 |                 # 3번 인덱스 : 교체가 와일드시퀀스 기준 3번째, 대상시퀀스 기준 5번째에서 발생
 86 |                 # 때문에 와일드시퀀스에서 타겟의 위치만 정확히 파악한다면 대상시퀀스에서 변형이 어느부분에 일어났는지
 87 |                 # 몰라도 사용자가 지정한 위치에서의 변형 여부를 충분히 잡아낼 수 있다.
 88 |                 for mutation_info in editops(wild_seq, line):
 89 |                     # 사용자 지정 위치 검사(타겟의 뒤에서부터 backward_target_length 번째까지)
 90 |                     if target_end_pos - int(backward_target_length) <= mutation_info[1] <= target_end_pos:
 91 |                         # 교체는 변형으로 치지 않는다.
 92 |                         if mutation_info[0] != 'replace':
 93 |                             # 여기까지 왔다면 변형으로 쳐서 카운트+1
 94 |                             result['mutated_cnt'] += 1
 95 |                             # 변형된 대상시퀀스를 결과 출력을 위해 저장하고 동일 시퀀스 갯수 조사를 위해 카운팅한다.
 96 |                             if line not in result['mutated_dict'].keys():
 97 |                                 result['mutated_dict'][line] = 1
 98 |                             else:
 99 |                                 result['mutated_dict'][line] += 1
100 |                             break
101 | 
102 |             # 변형 퍼센티지 계산
103 |             try:
104 |                 result['mutated_rates'] = float(result['mutated_cnt']) / result['total_cnt'] * 100
105 |             except:
106 |                 result['mutated_rates'] = 0
107 | 
108 |             # 각 결과값 저장.
109 |             with open(os.path.join(result_folder_name, file_name), 'w') as f:
110 |                 for mutated_seq, cnt in result['mutated_dict'].items():
111 |                     f.write('{} X {}\n'.format(mutated_seq, cnt))
112 |                 f.write('--------\n')
113 |                 f.write('mutation rates : {} %'.format(result['mutated_rates']))
114 | 
115 |         except Exception as e:
116 |             print e
117 |             print file_name, ' not found.'
118 |             pass
119 |         else:
120 |             # 문제 없다면 결과물을 모은다.
121 |             final_results[file_name_no_ext] = result
122 | 
123 |         # 타겟 하나 분석 종료 카운트+1
124 |         cur_cnt += 1
125 |         # 진행율 화면 표시
126 |         progress_percentage = float(cur_cnt) / target_cnt * 100
127 |         print '{} % done'.format(progress_percentage)
128 | 
129 |         # 최종 결과물 파일 저장.
130 |         with open(os.path.join(result_folder_name, 'result_info.txt'), 'w') as f:
131 |             for name, data in final_results.items():
132 |                 f.write('{} : {} : {}/{}\n'.format(name, data['mutated_rates'], data['mutated_cnt'], data['total_cnt']))
133 | 
134 | if __name__ == '__main__':
135 |     print u'Input file name with extension: '
136 |     # 이름 : 와일드시퀀스 : 타겟 으로 구성된 파일을 입력받는다.
137 |     input_file_name = raw_input()
138 |     input_file_name = os.path.join(BASE_DIR, input_file_name)
139 | 
140 |     if not os.path.isfile(input_file_name):
141 |         print u'File Not Found. Check it is in same folder'
142 |         raise
143 | 
144 |     print u'Input length to check mutation from backward of target: '
145 |     # 사용자 지정 위치를 입력받는다. 타겟의 제일 위에서부터 ~번째이다.
146 |     backward_target_length = raw_input()
147 | 
148 |     print u'Input result folder name: '
149 |     # 추출기가 뽑아낸 대상시퀀스들이 모여있는 폴더 이름 입력.
150 |     dest_folder_name = raw_input()
151 |     dest_folder_name = os.path.join(BASE_DIR, dest_folder_name)
152 | 
153 |     if not os.path.isdir(dest_folder_name):
154 |         print u'Folder Not Found'
155 |         raise
156 | 
157 |     # 분석시작
158 |     do(input_file_name, backward_target_length, dest_folder_name)
159 | 
160 |     print u'Well done. Press any key'
161 |     raw_input()
162 | 
163 | 


--------------------------------------------------------------------------------
/DeepCpf1/Analysis of indel frequency/extractor_v3.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCpf1/Analysis of indel frequency/extractor_v3.exe


--------------------------------------------------------------------------------
/DeepCpf1/Analysis of indel frequency/extractor_v3.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #-*- coding: utf-8 -*-
  3 | __author__ = 'forestkeep21@naver.com'
  4 | 
  5 | import re
  6 | import sys
  7 | import os
  8 | 
  9 | BASE_DIR = os.path.dirname(sys.executable)
 10 | 
 11 | #for debug
 12 | # BASE_DIR = os.path.dirname(os.path.realpath(__file__))
 13 | 
 14 | 
 15 | def seq_validator(data):
 16 |     m = re.findall(r'^[A|a|T|t|C|c|G|g]+$', data)
 17 |     return m[0] if m else None
 18 | 
 19 | 
 20 | def count_line_in_file(file_name):
 21 |     count = 0
 22 |     for line in open(file_name, 'r'):
 23 |         count += 1
 24 |     return count
 25 | 
 26 | 
 27 | def do(src_file_name, dest_file_name):
 28 |     # 프로그램 진행율을 계산하기 위해 파일의 라인수를 센다.
 29 |     src_line_cnt = count_line_in_file(src_file_name)
 30 |     if src_line_cnt == 0:
 31 |         print u'File Not Found'
 32 |         raise
 33 |     current_cnt = 0
 34 |     extracted_line_index = []
 35 | 
 36 |     # 결과가 저장될 폴더 지정
 37 |     result_folder_name = os.path.join(BASE_DIR, 'results')
 38 |     # 추출할 시퀸스가 있는 파일을 읽어온다.
 39 |     data = [line.strip() for line in open(dest_file_name, 'r') if seq_validator(line)]
 40 |     # 바코드가 있는 파일을 읽어온다.
 41 |     barcode_data = [line for line in open(src_file_name, 'r')]
 42 | 
 43 |     # 결과가 저장될 폴더가 없다면 하나 생성
 44 |     if not os.path.exists(result_folder_name):
 45 |         os.makedirs(result_folder_name)
 46 | 
 47 |     try:
 48 |         # 읽어온 바코드를 속도를 위해 모두 메모리에 올려놓고 분석을 시작한다.
 49 |         for barcode in barcode_data:
 50 |             # 바코드셋은 :를 구분자로 앞은 파일명, 뒤는 바코드로 되어있다.
 51 |             barcode_set = barcode.split(':')
 52 |             if len(barcode_set) < 2:
 53 |                 continue
 54 |             # 파일명에서 화이트 스페이스 삭제
 55 |             file_name = barcode_set[0].strip()
 56 |             # 바코드가 valid한지 검증
 57 |             barcode = seq_validator(barcode_set[1].strip())
 58 | 
 59 |             used_data = []
 60 |             # 대상이 되는 시퀸스들을 하나하나 분석한다.
 61 |             for line in data:
 62 |                 # 대상 시퀸스 valid 검증
 63 |                 line = seq_validator(line)
 64 |                 if line is None:
 65 |                     continue
 66 | 
 67 |                 # 비교를 위해 바코드, 대상 시퀸스 둘다 소문자로 변환하여 바코드가 대상 시퀸스 내에 존재하는지 검사
 68 |                 if barcode.lower() in line.lower():
 69 |                     # 존재한다면 대상 시퀸스는 이제 필요없으므로 추후 메모리에서 제거하기 위해 따로 보관한다.
 70 |                     used_data.append(line)
 71 | 
 72 |             # 결과가 저장될 파일명 지정
 73 |             file_name = os.path.join(result_folder_name, '{}.txt'.format(file_name))
 74 |             # 결과 파일 쓰기 시작
 75 |             with open(file_name, 'w') as f:
 76 |                 # 추출된 대상 시퀸스들을 파일에 쓴다.
 77 |                 for datum in used_data:
 78 |                     f.write('{}\n'.format(datum))
 79 | 
 80 |             # 파일에 전부 옮겨담았다면 메모리에 올라간 전체 대상 시퀸스들에서 파일에 쓴 대상 시퀸스를 뺀다.
 81 |             [data.remove(used_datum) for used_datum in used_data]
 82 | 
 83 |             # 프로그램 진행율 계산 부분
 84 |             current_cnt += 1
 85 |             progress_percentage = (float(current_cnt) / src_line_cnt * 100)
 86 |             print u'{} %'.format(progress_percentage)
 87 | 
 88 |     except Exception as e:
 89 |         print e
 90 |         print u'Extraction Failure.'
 91 |         raise
 92 | 
 93 |     try:
 94 |         # 모든 바코드의 분석이 종료되었다면 총 결과파일을 쓴다. 총 결과 파일명 지정
 95 |         result_info_file_name = os.path.join(result_folder_name, 'result_info.txt')
 96 |         with open(result_info_file_name, 'w') as f:
 97 |             # 각 개별 결과 파일을 열어서
 98 |             for line in open(src_file_name, 'r'):
 99 |                 barcode_set = line.split(':')
100 |                 file_name = barcode_set[0].strip()
101 |                 # 라인 수를 센다음에
102 |                 count = count_line_in_file(os.path.join(BASE_DIR, result_folder_name, '{}.txt'.format(file_name)))
103 |                 # 총 결과 파일에 파일명 : 라인수 형식으로 쓴다.
104 |                 f.write('{} : {}\n'.format(file_name, count))
105 |     except Exception as e:
106 |         print e
107 |         print u'Extraction has been done. But Making a result-info.txt is failed.'
108 |         raise
109 | 
110 | if __name__ == "__main__":
111 |     print u'Input barcode file name with extension: '
112 |     src_file_name = raw_input()
113 |     src_file_name = os.path.join(BASE_DIR, src_file_name)
114 | 
115 |     if not os.path.isfile(src_file_name):
116 |         print u'File Not Found. Check it is in same folder'
117 |         raise
118 | 
119 |     print u'Input sequence file name with extension: '
120 |     dest_file_name = raw_input()
121 |     dest_file_name = os.path.join(BASE_DIR, dest_file_name)
122 | 
123 |     if not os.path.isfile(dest_file_name):
124 |         print u'File Not Found. Check it is in same folder'
125 |         raise
126 | 
127 |     do(src_file_name, dest_file_name)
128 | 
129 |     print u'Well done. Press any key'
130 |     raw_input()
131 | 
132 | 


--------------------------------------------------------------------------------
/DeepCpf1/DeepCpf1.py:
--------------------------------------------------------------------------------
  1 | from numpy import *
  2 | import sys;  
  3 | 
  4 | from keras import backend as K
  5 | from keras.models import Model
  6 | from keras.layers import Input
  7 | from keras.layers.merge import Multiply
  8 | from keras.layers.core import Dense, Dropout, Activation, Flatten
  9 | from keras.layers.convolutional import Convolution1D, AveragePooling1D
 10 | 
 11 | def main():
 12 |     print "Usage: python DeepCpf1.py input.txt output.txt"
 13 |     print "input.txt must include 3 columns with single header row"
 14 |     print "\t1st column: sequence index"
 15 |     print "\t2nd column: 34bp target sequence"
 16 |     print "\t3rd column: binary chromain information of the target sequence\n"        
 17 | 
 18 |     print "DeepCpf1 currently requires python=2.7.12, theano=1.0.1, keras=2.1.5"
 19 |     print "DeepCpf1 available on GitHub requires pre-obtained binary chromatin information (DNase-seq narraow peak data from ENCODE)"
 20 |     print "DeepCpf1 web tool, available at http://data.snu.ac.kr/DeepCpf1, provides entire pipeline including binary chromatin accessibility for 125 cell lines\n"    
 21 |     
 22 |     if len(sys.argv) < 3:
 23 | 	print "ERROR: Not enough arguments for DeepCpf1.py; Check the usage."
 24 |         sys.exit()
 25 |     elif K.backend() != "theano":
 26 | 	print "ERROR: Not using the theano backend. Check the requirements."
 27 |         sys.exit()
 28 |     
 29 |     print "Building models"
 30 |     Seq_deepCpf1_Input_SEQ = Input(shape=(34,4))
 31 |     Seq_deepCpf1_C1 = Convolution1D(80, 5, activation='relu')(Seq_deepCpf1_Input_SEQ)
 32 |     Seq_deepCpf1_P1 = AveragePooling1D(2)(Seq_deepCpf1_C1)
 33 |     Seq_deepCpf1_F = Flatten()(Seq_deepCpf1_P1)
 34 |     Seq_deepCpf1_DO1= Dropout(0.3)(Seq_deepCpf1_F)
 35 |     Seq_deepCpf1_D1 = Dense(80, activation='relu')(Seq_deepCpf1_DO1)
 36 |     Seq_deepCpf1_DO2= Dropout(0.3)(Seq_deepCpf1_D1)
 37 |     Seq_deepCpf1_D2 = Dense(40, activation='relu')(Seq_deepCpf1_DO2)
 38 |     Seq_deepCpf1_DO3= Dropout(0.3)(Seq_deepCpf1_D2)
 39 |     Seq_deepCpf1_D3 = Dense(40, activation='relu')(Seq_deepCpf1_DO3)
 40 |     Seq_deepCpf1_DO4= Dropout(0.3)(Seq_deepCpf1_D3)
 41 |     Seq_deepCpf1_Output = Dense(1, activation='linear')(Seq_deepCpf1_DO4)
 42 |     Seq_deepCpf1 = Model(inputs=[Seq_deepCpf1_Input_SEQ], outputs=[Seq_deepCpf1_Output])
 43 |     
 44 |     DeepCpf1_Input_SEQ = Input(shape=(34,4))
 45 |     DeepCpf1_C1 = Convolution1D(80, 5, activation='relu')(DeepCpf1_Input_SEQ)
 46 |     DeepCpf1_P1 = AveragePooling1D(2)(DeepCpf1_C1)
 47 |     DeepCpf1_F = Flatten()(DeepCpf1_P1)
 48 |     DeepCpf1_DO1= Dropout(0.3)(DeepCpf1_F)
 49 |     DeepCpf1_D1 = Dense(80, activation='relu')(DeepCpf1_DO1)
 50 |     DeepCpf1_DO2= Dropout(0.3)(DeepCpf1_D1)
 51 |     DeepCpf1_D2 = Dense(40, activation='relu')(DeepCpf1_DO2)
 52 |     DeepCpf1_DO3= Dropout(0.3)(DeepCpf1_D2)
 53 |     DeepCpf1_D3_SEQ = Dense(40, activation='relu')(DeepCpf1_DO3)
 54 |     
 55 |     DeepCpf1_Input_CA = Input(shape=(1,))
 56 |     DeepCpf1_D3_CA = Dense(40, activation='relu')(DeepCpf1_Input_CA)
 57 |     DeepCpf1_M = Multiply()([DeepCpf1_D3_SEQ, DeepCpf1_D3_CA])
 58 |     
 59 |     DeepCpf1_DO4= Dropout(0.3)(DeepCpf1_M)
 60 |     DeepCpf1_Output = Dense(1, activation='linear')(DeepCpf1_DO4)
 61 |     DeepCpf1 = Model(inputs=[DeepCpf1_Input_SEQ, DeepCpf1_Input_CA], outputs=[DeepCpf1_Output])
 62 |     
 63 |     print "Loading weights for the models"
 64 |     Seq_deepCpf1.load_weights('weights/Seq_deepCpf1_weights.h5')
 65 |     DeepCpf1.load_weights('weights/DeepCpf1_weights.h5')
 66 |     
 67 |     print "Loading test data"
 68 |     FILE = open(sys.argv[1], "r")
 69 |     data = FILE.readlines()
 70 |     SEQ, CA = PREPROCESS(data)
 71 |     FILE.close()
 72 |     
 73 |     print "Predicting on test data"
 74 |     Seq_deepCpf1_SCORE = Seq_deepCpf1.predict([SEQ], batch_size=50, verbose=0)
 75 |     DeepCpf1_SCORE = DeepCpf1.predict([SEQ, CA], batch_size=50, verbose=0) * 3
 76 |     
 77 |     print "Saving to " + sys.argv[2]
 78 |     OUTPUT = open(sys.argv[2], "w")
 79 |     for l in range(len(data)):
 80 |         if l == 0:
 81 |             OUTPUT.write(data[l].strip())
 82 |             OUTPUT.write("\tSeq-deepCpf1 Score\tDeepCpf1 Score\n")
 83 |         else:
 84 |             OUTPUT.write(data[l].strip())
 85 |             OUTPUT.write("\t%f\t%f\n" % (Seq_deepCpf1_SCORE[l-1], DeepCpf1_SCORE[l-1]))
 86 |     OUTPUT.close()
 87 |     
 88 | def PREPROCESS(lines):
 89 |     data_n = len(lines) - 1
 90 |     SEQ = zeros((data_n, 34, 4), dtype=int)
 91 |     CA = zeros((data_n, 1), dtype=int)
 92 |     
 93 |     for l in range(1, data_n+1):
 94 |         data = lines[l].split()
 95 |         seq = data[1]
 96 |         for i in range(34):
 97 |             if seq[i] in "Aa":
 98 |                 SEQ[l-1, i, 0] = 1
 99 |             elif seq[i] in "Cc":
100 |                 SEQ[l-1, i, 1] = 1
101 |             elif seq[i] in "Gg":
102 |                 SEQ[l-1, i, 2] = 1
103 |             elif seq[i] in "Tt":
104 |                 SEQ[l-1, i, 3] = 1
105 |         CA[l-1,0] = int(data[2])*100
106 | 
107 |     return SEQ, CA
108 | 
109 | if __name__ == '__main__':
110 |         main()
111 |         
112 | 


--------------------------------------------------------------------------------
/DeepCpf1/input_example.txt:
--------------------------------------------------------------------------------
1 | Target number	34 bp target sequence (4 bp + PAM + 23 bp protospacer + 3 bp)		"Chromatin accessibility (1= DNase I hypersensitive sites, 0 = Dnase I non-sensitive sites)"	
2 | 1	TGACTTTGAATGGAGTCGTGAGCGCAAGAACGCT		1
3 | 2	GTTATTTGAGCAATGCCACTTAATAAACATGTAA		0


--------------------------------------------------------------------------------
/DeepCpf1/output_example.txt:
--------------------------------------------------------------------------------
1 | Target number	34 bp target sequence (4 bp + PAM + 23 bp protospacer + 3 bp)		"Chromatin accessibility (1= DNase I hypersensitive sites, 0 = Dnase I non-sensitive sites)"	Seq-deepCpf1 Score	DeepCpf1 Score
2 | 1	TGACTTTGAATGGAGTCGTGAGCGCAAGAACGCT		1	55.699318	46.077488
3 | 2	GTTATTTGAGCAATGCCACTTAATAAACATGTAA		0	53.469837	7.932923
4 | 
5 | 


--------------------------------------------------------------------------------
/DeepCpf1/weights/DeepCpf1_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCpf1/weights/DeepCpf1_weights.h5


--------------------------------------------------------------------------------
/DeepCpf1/weights/Seq_deepCpf1_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepCpf1/weights/Seq_deepCpf1_weights.h5


--------------------------------------------------------------------------------
/DeepxCas9/DeepxCas9_weight/PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepxCas9/DeepxCas9_weight/PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.data-00000-of-00001


--------------------------------------------------------------------------------
/DeepxCas9/DeepxCas9_weight/PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepxCas9/DeepxCas9_weight/PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.index


--------------------------------------------------------------------------------
/DeepxCas9/DeepxCas9_weight/PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyungjaeSong/Paired-Library/9aa18cd418a0258382c02846604d3d399c8ab020/DeepxCas9/DeepxCas9_weight/PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80.meta


--------------------------------------------------------------------------------
/DeepxCas9/DeepxCas9_weight/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80"
2 | all_model_checkpoint_paths: "PreTrain-Final-4-6-8-180-150-120-0.001-880-120-80"
3 | 


--------------------------------------------------------------------------------
/DeepxCas9/README.txt:
--------------------------------------------------------------------------------
 1 | 1. System Requirements:
 2 | 	Ubuntu	16.04
 3 | 	Python	2.7.12
 4 | 	Python Packages:
 5 | 		numpy 1.14.5
 6 | 		scipy 1.1.0
 7 | 
 8 | 	Tensorflow and dependencies:
 9 | 		Tensorflow  1.4.1
10 | 		CUDA	    8.0.61
11 | 		cuDNN	    6.0.21
12 | 
13 | 2. Installation Guide (required time, <120 minutes):
14 | 
15 | - Operation System
16 | 	Ubuntu 16.04 download from https://www.ubuntu.com/download/desktop
17 | 	
18 | - Python and packages
19 | 	Download Python 2.7.12 tarball on https://www.python.org/downloads/release/python-2712/
20 | 	Unzip and install:
21 | 		tar -zxvf Python-2.7.12.tgz
22 | 		cd ./Python-2.7.12
23 | 		./configure
24 | 		make
25 | 
26 | 	Package Installation:
27 | 		pip install numpy==1.14.5
28 | 		pip install scipy==1.1.0
29 | 
30 | 	Tensorflow Installation:
31 | 		(for GPU use)
32 | 		pip install tensorflow-gpu==1.4.1
33 | 		(for CPU only)
34 | 		pip install tensorflow==1.4.1
35 | 
36 | 
37 | (for GPU use)
38 | 
39 | - CUDA Toolkit 8.0 
40 | 	wget -O cuda_8_linux.run https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run
41 | 	sudo chmod +x cuda_8_linux.run
42 | 	./cuda_8_linux.run
43 | 
44 | - cuDNN 6.0.21
45 | 	Download CUDNN tarball on https://developer.nvidia.com/cudnn
46 | 	Unzip and install:
47 | 		tar -zxvf cudnn-8.0-linux-x64-v6.0.tgz 
48 | 
49 | For more details, please refer to CUDA, CuDNN, and Tensorflow installation guide on Github: 			
50 | 	https://gist.github.com/ksopyla/813a62d6afc4307755e5832a3b62f432
51 | 
52 | 
53 | 3. Demo Instructions (required time, <1 min):
54 | 
55 | Input1: ./dataset/        # List of Target Sequence(s)
56 | 	File format:
57 | 	Target number   30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
58 | 	  1   TCTGGCGGTCTCAAGCACTACCTACGTCAG
59 | 	  2   GGTGGGGGTTAGACGAATATCAGGAGACTA
60 | 
61 | Input2: ./DeepxCas9_weight/ # Pre-trained Weight Files
62 | 
63 | Output: RANK_final_DeepxCas9_weight.txt
64 | 	Predicted activity score for sequence 1 and 2:
65 | 	25.55347824, 22.9860401153564
66 | 
67 | Run script:
68 | 	python ./Test.py
69 | 
70 | Modification for personalized runs:
71 | 
72 | 	<Test.py>
73 | 	## System Paths ##
74 | 	path                 = './dataset/'
75 | 	parameters           = {'0': 'sample.txt'}
76 | 
77 | 	## Run Parameters ##
78 | 	TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
79 | 	best_model_path_list = ['./DeepxCas9_weight/']
80 | 
81 | sample.txt can be replaced or modified to include target sequence of interest
82 | 
83 |  
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/DeepxCas9/Test.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from os import system
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | 
  6 | np.set_printoptions(threshold='nan')
  7 | 
  8 | ##############################################################################
  9 | 
 10 | 
 11 | 
 12 | 
 13 | ##############################################################################
 14 | ## System Paths ##
 15 | path                 = './dataset/'
 16 | parameters           = {'0': 'sample.txt'} # Dictionary can be expanded for multiple test parameters
 17 | 
 18 | ## Run Parameters ##
 19 | TEST_NUM_SET         = [0] # List can be expanded in case of multiple test parameters
 20 | best_model_path_list = ['./DeepxCas9_weight/']
 21 | 
 22 | # Model
 23 | length = 30
 24 | 
 25 | class Deep_xCas9(object):
 26 |     def __init__(self, filter_size, filter_num, node_1 = 80, node_2 = 60, l_rate = 0.005):
 27 |         self.inputs         = tf.placeholder(tf.float32, [None, 1, length, 4])
 28 |         self.targets        = tf.placeholder(tf.float32, [None, 1])
 29 |         self.is_training    = tf.placeholder(tf.bool)
 30 |         def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
 31 |             # setup the filter input shape for tf.nn.conv_2d
 32 |             conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
 33 |                               num_filters]
 34 | 
 35 |             # initialise weights and bias for the filter
 36 |             weights   = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
 37 |                                               name=name+'_W')
 38 |             bias      = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')
 39 | 
 40 |             # setup the convolutional layer operation
 41 |             out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='VALID')
 42 | 
 43 |             # add the bias
 44 |             out_layer += bias
 45 | 
 46 |             # apply a ReLU non-linear activation
 47 |             out_layer = tf.layers.dropout(tf.nn.relu(out_layer), 0.3, self.is_training)
 48 | 
 49 |             # now perform max pooling
 50 |             ksize     = [1, pool_shape[0], pool_shape[1], 1]
 51 |             strides   = [1, 1, 2, 1]
 52 |             out_layer = tf.nn.avg_pool(out_layer, ksize=ksize, strides=strides, 
 53 |                                        padding='SAME')
 54 |             return out_layer
 55 |         #def end: create_new_conv_layer
 56 | 
 57 |         L_pool_0 = create_new_conv_layer(self.inputs, 4, filter_num[0], [1, filter_size[0]], [1, 2], name='conv1')
 58 |         L_pool_1 = create_new_conv_layer(self.inputs, 4, filter_num[1], [1, filter_size[1]], [1, 2], name='conv2')
 59 |         L_pool_2 = create_new_conv_layer(self.inputs, 4, filter_num[2], [1, filter_size[2]], [1, 2], name='conv3')
 60 |         with tf.variable_scope('Fully_Connected_Layer1'):
 61 |             layer_node_0 = int((length-filter_size[0])/2)+1
 62 |             node_num_0   = layer_node_0*filter_num[0]
 63 |             layer_node_1 = int((length-filter_size[1])/2)+1
 64 |             node_num_1   = layer_node_1*filter_num[1]
 65 |             layer_node_2 = int((length-filter_size[2])/2)+1
 66 |             node_num_2   = layer_node_2*filter_num[2]
 67 |             L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
 68 |             L_flatten_1  = tf.reshape(L_pool_1, [-1, node_num_1])
 69 |             L_flatten_2  = tf.reshape(L_pool_2, [-1, node_num_2])
 70 |             L_flatten    = tf.concat([L_flatten_0, L_flatten_1, L_flatten_2], 1, name='concat')
 71 |             node_num     = node_num_0 + node_num_1 + node_num_2
 72 |             W_fcl1       = tf.get_variable("W_fcl1", shape=[node_num, node_1])
 73 |             B_fcl1       = tf.get_variable("B_fcl1", shape=[node_1])
 74 |             L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten, W_fcl1), B_fcl1)
 75 |             L_fcl1       = tf.nn.relu(L_fcl1_pre)
 76 |             L_fcl1_drop  = tf.layers.dropout(L_fcl1, 0.3, self.is_training)
 77 | 
 78 |         with tf.variable_scope('Fully_Connected_Layer2'):
 79 |             W_fcl2       = tf.get_variable("W_fcl2", shape=[node_1, node_2])
 80 |             B_fcl2       = tf.get_variable("B_fcl2", shape=[node_2])
 81 |             L_fcl2_pre   = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_fcl2), B_fcl2)
 82 |             L_fcl2       = tf.nn.relu(L_fcl2_pre)
 83 |             L_fcl2_drop  = tf.layers.dropout(L_fcl2, 0.3, self.is_training)
 84 |             
 85 |         with tf.variable_scope('Output_Layer'):
 86 |             W_out        = tf.get_variable("W_out", shape=[node_2, 1])#, initializer=tf.contrib.layers.xavier_initializer())
 87 |             B_out        = tf.get_variable("B_out", shape=[1])#, initializer=tf.contrib.layers.xavier_initializer())
 88 |             self.outputs = tf.nn.bias_add(tf.matmul(L_fcl2_drop, W_out), B_out)
 89 | 
 90 |         # Define loss function and optimizer
 91 |         self.obj_loss    = tf.reduce_mean(tf.square(self.targets - self.outputs))
 92 |         self.optimizer   = tf.train.AdamOptimizer(l_rate).minimize(self.obj_loss)
 93 |     #def end: def __init__
 94 | #class end: Deep_xCas9
 95 | 
 96 | def Model_Finaltest(sess, TEST_X, filter_size, filter_num, model, load_episode, best_model_path):
 97 |     test_batch      = 500
 98 |     test_spearman   = 0.0
 99 |     optimizer       = model.optimizer
100 |     TEST_Z          = np.zeros((TEST_X.shape[0], 1), dtype=float)
101 |     
102 |     for i in range(int(np.ceil(float(TEST_X.shape[0])/float(test_batch)))):
103 |         Dict = {model.inputs: TEST_X[i*test_batch:(i+1)*test_batch], model.is_training: False}
104 |         TEST_Z[i*test_batch:(i+1)*test_batch] = sess.run([model.outputs], feed_dict=Dict)[0]
105 |     
106 |     OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
107 |     OUT.write("Testing final \n {} ".format(tuple(TEST_Z.reshape([np.shape(TEST_Z)[0]]))))
108 |     OUT.write("\n")
109 |     OUT.close()
110 |     return
111 | #def end: Model_Finaltest
112 | 
113 | 
114 | def preprocess_seq(data):
115 |     print("Start preprocessing the sequence done 2d")
116 |     length  = 30
117 |     
118 |     DATA_X = np.zeros((len(data),1,length,4), dtype=int)
119 |     print(np.shape(data), len(data), length)
120 |     for l in range(len(data)):
121 |         for i in range(length):
122 | 
123 |             try: data[l][i]
124 |             except: print(data[l], i, length, len(data))
125 | 
126 |             if data[l][i]in "Aa":    DATA_X[l, 0, i, 0] = 1
127 |             elif data[l][i] in "Cc": DATA_X[l, 0, i, 1] = 1
128 |             elif data[l][i] in "Gg": DATA_X[l, 0, i, 2] = 1
129 |             elif data[l][i] in "Tt": DATA_X[l, 0, i, 3] = 1
130 |             else:
131 |                 print "Non-ATGC character " + data[l]
132 |                 print i
133 |                 print data[l][i]
134 |                 sys.exit()
135 |         #loop end: i
136 |     #loop end: l
137 |     print("Preprocessing the sequence done")
138 |     return DATA_X
139 | #def end: preprocess_seq
140 | 
141 | 
142 | def getseq(filenum):
143 |     param   = parameters['%s' % filenum]
144 |     FILE    = open(path+param, "r")
145 |     data    = FILE.readlines()
146 |     data_n  = len(data) - 1
147 |     seq     = []
148 | 
149 |     for l in range(1, data_n+1):
150 |         try:
151 |             data_split = data[l].split()
152 |             seq.append(data_split[1])
153 |         except:
154 |             print data[l]
155 |             seq.append(data[l])
156 |     #loop end: l
157 |     FILE.close()
158 |     processed_full_seq = preprocess_seq(seq)
159 | 
160 |     return processed_full_seq, seq
161 | #def end: getseq
162 | 
163 | 
164 | #TensorFlow config
165 | conf                                = tf.ConfigProto()
166 | conf.gpu_options.allow_growth       = True
167 | os.environ['CUDA_VISIBLE_DEVICES']  = '0'
168 | best_model_cv                       = 0.0
169 | best_model_list                     = []
170 | 
171 | for best_model_path in best_model_path_list:
172 |     for modelname in os.listdir(best_model_path):
173 |         if "meta" in modelname:
174 |             best_model_list.append(modelname[:-5])
175 | #loop end: best_model_path
176 | 
177 | TEST_X          = []
178 | TEST_X_nohot    = []
179 | for TEST_NUM in TEST_NUM_SET:
180 |     tmp_X, tmp_X_nohot = getseq(TEST_NUM)
181 |     TEST_X.append(tmp_X)
182 |     TEST_X_nohot.append(tmp_X_nohot)
183 | #loop end: TEST_NUM
184 | 
185 | 
186 | for index in range(len(best_model_list)):
187 |     best_model_path = best_model_path_list[index]
188 |     best_model      = best_model_list[index]
189 |     valuelist       = best_model.split('-')
190 |     fulllist        = []
191 |     
192 |     for value in valuelist:
193 |         if value == 'True':    value=True
194 |         elif value == 'False': value=False
195 |         else:
196 |             try:
197 |                 value=int(value)
198 |             except:
199 |                 try:    value=float(value)
200 |                 except: pass
201 |         fulllist.append(value)
202 |     #loop end: value
203 | 
204 |     print(fulllist[2:])
205 |     
206 |     filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[2:]
207 |     filter_size = [filter_size_1, filter_size_2, filter_size_3]
208 |     filter_num  = [filter_num_1, filter_num_2, filter_num_3]
209 | 
210 |     args = [filter_size, filter_num, l_rate, load_episode]
211 |     tf.reset_default_graph()
212 |     with tf.Session(config=conf) as sess:
213 |         sess.run(tf.global_variables_initializer())
214 |         model = Deep_xCas9(filter_size, filter_num, node_1, node_2, args[2])
215 |         
216 |         saver = tf.train.Saver()
217 |         saver.restore(sess, best_model_path + best_model)
218 |         
219 |         OUT   = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
220 |         OUT.write("{}".format(best_model))
221 |         OUT.write("\n")
222 |         OUT.close()
223 |         
224 |         TEST_Y = []
225 |         for i in range(len(TEST_NUM_SET)):
226 |             print ("TEST_NUM : {}".format(TEST_NUM_SET[i]))
227 |             
228 |             OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
229 |             OUT.write("\n")
230 |             OUT.write("TEST_FILE : {}".format(parameters['{}'.format(TEST_NUM_SET[i])]))
231 |             OUT.write("\n")
232 |             OUT.close()
233 |             Model_Finaltest(sess, TEST_X[i], filter_size, filter_num, model, load_episode, best_model_path)
234 |         #loop end: i
235 | 
236 |         OUT = open("RANK_final_{}.txt".format(best_model_path.split('/')[1]), "a")
237 |         OUT.write("\n")
238 |         OUT.close()


--------------------------------------------------------------------------------
/DeepxCas9/dataset/sample.txt:
--------------------------------------------------------------------------------
1 | Target number   30 bp target sequence (4 bp + 20 bp protospacer + PAM + 3 bp)
2 | 1	TCTGGCGGTCTCAAGCACTACCTACGTCAG
3 | 2	GGTGGGGGTTAGACGAATATCAGGAGACTA


--------------------------------------------------------------------------------