├── GRUI
    ├── Run_GAN_imputed.py
    ├── __init__.py
    ├── gru_delta_forGAN.py
    ├── gru_impute_lastValue.py
    ├── gru_impute_zero.py
    ├── max_auc
    ├── mygru_cell.py
    ├── tune_lastValue_imputed.py
    ├── tune_mean_imputed.py
    ├── tune_zero_imputed.py
    └── untitled1.py
├── Gan_Imputation
    ├── Physionet_main.py
    ├── WGAN_GRUI.py
    ├── __init__.py
    ├── meanAndstd
    ├── ops.py
    ├── ops.pyc
    ├── readMe
    ├── utils.py
    └── utils.pyc
├── KDD_dataset
    ├── Beijing_AirQuality_Stations_en.xlsx
    ├── beijing_17_18_aq.csv
    ├── beijing_17_18_meo.csv
    ├── bj_aq_online.csv
    ├── holiday_bj.csv
    ├── holiday_ld.csv
    ├── ld_aq_online.csv
    ├── station_beijing.txt
    ├── station_london.txt
    └── tmp
    │   └── rate.pkl
├── Physionet2012Data
    ├── __init__.py
    ├── __init__.pyc
    ├── calculateMissingRate.py
    ├── meanAndstd
    ├── readData.py
    ├── readData.pyc
    ├── readTestData.py
    └── readTestData.pyc
├── Physionet2012ImputedData
    ├── __init__.py
    ├── __init__.pyc
    ├── readImputed.py
    └── readImputed.pyc
├── README.md
├── requirements.txt
└── set-a
    ├── data_loader.py
    ├── test.py
    ├── test.zip
    └── train.zip


/GRUI/Run_GAN_imputed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Mar 26 10:47:41 2018
  5 | 
  6 | @author: yonghong
  7 | """
  8 | 
  9 | from __future__ import print_function
 10 | import sys
 11 | sys.path.append("..")
 12 | import argparse
 13 | import os
 14 | import tensorflow as tf
 15 | from Physionet2012ImputedData import readImputed
 16 | import gru_delta_forGAN 
 17 | if __name__ == '__main__':
 18 |     parser = argparse.ArgumentParser(description='manual to this script')
 19 |     parser.add_argument('--gpus', type=str, default = None)
 20 |     parser.add_argument('--batch-size', type=int, default=128)
 21 |     parser.add_argument('--run-type', type=str, default='test')
 22 |     parser.add_argument('--data-path', type=str, default="../Gan_Imputation/imputation_train_results/WGAN_no_mask/")
 23 |     #输入填充之后的训练数据集的完整路径 Gan_Imputation/imputation_train_results/WGAN_no_mask/30_8_128_64_0.001_400_True_True_True_0.15_0.5
 24 |     parser.add_argument('--model-path', type=str, default=None)
 25 |     parser.add_argument('--result-path', type=str, default=None)
 26 |     parser.add_argument('--lr', type=float, default=0.01)
 27 |     parser.add_argument('--epoch', type=int, default=30)
 28 |     parser.add_argument('--n-inputs', type=int, default=41)
 29 |     parser.add_argument('--n-hidden-units', type=int, default=64)
 30 |     parser.add_argument('--n-classes', type=int, default=2)
 31 |     parser.add_argument('--checkpoint-dir', type=str, default='checkpoint_physionet_imputed',
 32 |                         help='Directory name to save the checkpoints')
 33 |     parser.add_argument('--log-dir', type=str, default='logs_physionet_imputed',
 34 |                         help='Directory name to save training logs')
 35 |     parser.add_argument('--isNormal',type=int,default=1)
 36 |     parser.add_argument('--isSlicing',type=int,default=1)
 37 |     #0 false 1 true
 38 |     parser.add_argument('--isBatch-normal',type=int,default=1)
 39 |     args = parser.parse_args()
 40 |     
 41 |     
 42 |     if args.isBatch_normal==0:
 43 |             args.isBatch_normal=False
 44 |     if args.isBatch_normal==1:
 45 |             args.isBatch_normal=True
 46 |     if args.isNormal==0:
 47 |             args.isNormal=False
 48 |     if args.isNormal==1:
 49 |             args.isNormal=True
 50 |     if args.isSlicing==0:
 51 |             args.isSlicing=False
 52 |     if args.isSlicing==1:
 53 |             args.isSlicing=True
 54 |             
 55 |     
 56 |     checkdir=args.checkpoint_dir
 57 |     logdir=args.log_dir
 58 |     base=args.data_path
 59 |     data_paths=["30_8_128_64_0.001_400_True_True_True_0.15_0.5"]
 60 |     max_auc = 0.0
 61 |     for d in data_paths:
 62 |         args.data_path=os.path.join(base,d)
 63 |         path_splits=args.data_path.split("/")
 64 |         if len(path_splits[-1])==0:
 65 |             datasetName=path_splits[-2]
 66 |         else:
 67 |             datasetName=path_splits[-1]
 68 |         args.checkpoint_dir=checkdir+"/"+datasetName
 69 |         args.log_dir=logdir+"/"+datasetName
 70 |         
 71 |         dt_train=readImputed.ReadImputedPhysionetData(args.data_path)
 72 |         dt_train.load()
 73 |         
 74 |         dt_test=readImputed.ReadImputedPhysionetData(args.data_path.replace("imputation_train_results","imputation_test_results"))
 75 |         dt_test.load()
 76 |           
 77 |         lrs=[0.004,0.003,0.005,0.006,0.007,0.008,0.009,0.01,0.012,0.015]
 78 |         #lrs = [0.0075,0.0085]
 79 |         for lr in lrs:
 80 |             args.lr=lr
 81 |             epoch= args.epoch
 82 |             #epoch=30
 83 |             args.epoch=epoch
 84 |             print("epoch: %2d"%(epoch))
 85 |             tf.reset_default_graph()
 86 |             config = tf.ConfigProto() 
 87 |             config.gpu_options.allow_growth = True 
 88 |             with tf.Session(config=config) as sess:
 89 |                 model = gru_delta_forGAN.grui(sess,
 90 |                             args=args,
 91 |                             dataset=dt_train,
 92 |                             test_set = dt_test
 93 |                             )
 94 |             
 95 |                 # build graph
 96 |                 model.build()
 97 |             
 98 |                 auc = model.train()
 99 |                 if auc > max_auc:
100 |                     max_auc = auc 
101 |                 
102 |             print("")
103 |         print("max auc is: " + str(max_auc))
104 |         f2 = open("max_auc","w")
105 |         f2.write(str(max_auc))
106 |         f2.close()
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/GRUI/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Mon Jan 29 22:55:25 2018
5 | 
6 | @author: lyh
7 | """
8 | 
9 | 


--------------------------------------------------------------------------------
/GRUI/gru_delta_forGAN.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Jan 25 21:52:13 2018
  5 | gru for imputed data
  6 | @author: lyh
  7 | """
  8 | 
  9 | 
 10 | from __future__ import print_function
 11 | import os
 12 | import numpy as np
 13 | from sklearn import metrics
 14 | import time
 15 | import mygru_cell
 16 | import tensorflow as tf
 17 | from tensorflow.python.ops import math_ops
 18 | tf.set_random_seed(1)   # set random seed
 19 |  
 20 | class grui(object):
 21 |     model_name = "GRU_I"
 22 |     def __init__(self, sess, args, dataset, test_set):
 23 |         self.lr = args.lr            
 24 |         self.sess=sess
 25 |         self.isbatch_normal=args.isBatch_normal
 26 |         self.isNormal=args.isNormal
 27 |         self.isSlicing=args.isSlicing
 28 |         self.dataset=dataset
 29 |         self.test_set = test_set
 30 |         self.epoch = args.epoch     
 31 |         self.batch_size = args.batch_size
 32 |         self.n_inputs = args.n_inputs                 # MNIST data input (img shape: 28*28)
 33 |         self.n_steps = dataset.maxLength                                # time steps
 34 |         self.n_hidden_units = args.n_hidden_units        # neurons in hidden layer
 35 |         self.n_classes = args.n_classes                # MNIST classes (0-9 digits)
 36 |         self.run_type=args.run_type
 37 |         self.result_path=args.result_path
 38 |         self.model_path=args.model_path
 39 |         self.log_dir=args.log_dir
 40 |         self.checkpoint_dir=args.checkpoint_dir
 41 |         self.num_batches = len(dataset.x) // self.batch_size
 42 |         # x y placeholder
 43 |         self.keep_prob = tf.placeholder(tf.float32) 
 44 |         self.x = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 45 |         self.y = tf.placeholder(tf.float32, [None, self.n_classes])
 46 |         self.m = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 47 |         self.delta = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 48 |         self.mean = tf.placeholder(tf.float32, [self.n_inputs,])
 49 |         self.lastvalues = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 50 |         self.x_lengths = tf.placeholder(tf.int32,  shape=[self.batch_size,])
 51 |         # 对 weights biases 初始值的定义
 52 |        
 53 | 
 54 |     #concatenate x and m 
 55 |     #rth should be also concatenate after x, then decay the older state
 56 |     #rth's length is n_hidden_units
 57 | 
 58 | 
 59 | 
 60 |     def RNN(self,X, M, Delta,  Mean, Lastvalues, X_lengths,Keep_prob, reuse=False):
 61 |         #       2*3*2
 62 |         # X: batches * steps, n_inputs
 63 |         # m:batches * steps, n_inputs
 64 |         # delta:batches * steps, n_inputs
 65 |         # mean:n_inputs  mean of all observations, not contian the imputations
 66 |         # lastvalues: batches * steps, n_inputs  last obsevation value of x, if x is missing
 67 |         # if lastvalues is zero, take mean as it
 68 |         
 69 |          with tf.variable_scope("grui", reuse=reuse):
 70 |            
 71 |             # then wr_x should be transformed into a diag matrix:tf.matrix_diag(wr_x)
 72 |             wr_h=tf.get_variable('wr_h',shape=[self.n_inputs,self.n_hidden_units],initializer=tf.random_normal_initializer())
 73 |             w_out=tf.get_variable('w_out', shape=[self.n_hidden_units, self.n_classes],initializer=tf.random_normal_initializer())
 74 |         
 75 |             br_h=tf.get_variable('br_h', shape=[self.n_hidden_units, ],initializer=tf.constant_initializer(0.001))
 76 |             b_out=tf.get_variable('b_out', shape=[self.n_classes, ],initializer=tf.constant_initializer(0.001))
 77 |         
 78 |         
 79 |         
 80 |             Lastvalues=tf.reshape(Lastvalues,[-1,self.n_inputs])
 81 |             #M=tf.reshape(M,[-1,self.n_inputs])
 82 |             X = tf.reshape(X, [-1, self.n_inputs])
 83 |             Delta=tf.reshape(Delta,[-1,self.n_inputs])
 84 |             
 85 |             
 86 |             rth= tf.matmul( Delta, wr_h)+br_h
 87 |             rth=math_ops.exp(-tf.maximum(0.0,rth))
 88 |             
 89 |             #X = tf.reshape(X, [-1, n_inputs])
 90 |             #print(X.get_shape(),M.get_shape(),rth.get_shape())
 91 |             X=tf.concat([X,rth],1)
 92 |             
 93 |             X_in = tf.reshape(X, [-1, self.n_steps, self.n_inputs+self.n_hidden_units])
 94 |             
 95 |             #print(X_in.get_shape())
 96 |             # X_in = W*X + b
 97 |             #X_in = tf.matmul(X, weights['in']) + biases['in']
 98 |             # X_in ==> (128 batches, 28 steps, 128 hidden) 换回3维
 99 |             #X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
100 |          
101 |             if "1.5" in tf.__version__ or "1.7" in tf.__version__ :   
102 |                 grud_cell = mygru_cell.MyGRUCell15(self.n_hidden_units)
103 |             elif "1.4" in tf.__version__:
104 |                 grud_cell = mygru_cell.MyGRUCell4(self.n_hidden_units)
105 |             elif "1.2" in tf.__version__:
106 |                 grud_cell = mygru_cell.MyGRUCell2(self.n_hidden_units)
107 |             init_state = grud_cell.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
108 |             outputs, final_state = tf.nn.dynamic_rnn(grud_cell, X_in, \
109 |                                 initial_state=init_state,\
110 |                                 sequence_length=X_lengths,
111 |                                 time_major=False)
112 |          
113 |             factor=tf.matrix_diag([1.0/9,1])
114 |             tempout=tf.matmul(tf.nn.dropout(final_state,Keep_prob), w_out) + b_out
115 |             results =tf.nn.softmax(tf.matmul(tempout,factor))    #选取最后一个 output
116 |             #todo: dropout of 0.5 and batch normalization
117 |             return results
118 |     def build(self):
119 |         
120 |         self.pred = self.RNN(self.x, self.m, self.delta, self. mean, self.lastvalues, self.x_lengths, self.keep_prob)
121 |         self.cross_entropy = -tf.reduce_sum(self.y*tf.log(self.pred))
122 |         self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cross_entropy)
123 |          
124 |         
125 |         self.correct_pred = tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.y, 1))
126 |         self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
127 |         self.saver = tf.train.Saver(max_to_keep=None)
128 |         
129 |         loss_sum = tf.summary.scalar("loss", self.cross_entropy)
130 |         acc_sum = tf.summary.scalar("acc", self.accuracy)
131 |         
132 |         self.sum=tf.summary.merge([loss_sum, acc_sum])
133 |         
134 |         
135 |     def model_dir(self,epoch):
136 |         return "{}_{}_{}_{}_{}_{}/epoch{}".format(
137 |             self.model_name, self.lr,
138 |             self.batch_size, self.isNormal,
139 |             self.isbatch_normal,self.isSlicing,
140 |             epoch
141 |             )
142 |         
143 |     def save(self, checkpoint_dir, step, epoch):
144 |         checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir(epoch), self.model_name)
145 | 
146 |         if not os.path.exists(checkpoint_dir):
147 |             os.makedirs(checkpoint_dir)
148 | 
149 |         self.saver.save(self.sess,os.path.join(checkpoint_dir, self.model_name+'.model'), global_step=step)
150 | 
151 |     def load(self, checkpoint_dir, epoch):
152 |         import re
153 |         checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir(epoch), self.model_name)
154 | 
155 |         ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
156 |         if ckpt and ckpt.model_checkpoint_path:
157 |             ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
158 |             self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
159 |             counter = int(next(re.finditer("(\d+)(?!.*\d)",ckpt_name)).group(0))
160 |             print(" [*] Success to read {}".format(ckpt_name))
161 |             return True, counter
162 |         else:
163 |             #print(" [*] Failed to find a checkpoint")
164 |             return False, 0
165 |     
166 |     def train(self):
167 |         
168 |         max_auc = 0.5
169 |         model_dir2= "{}_{}_{}_{}_{}_{}".format(
170 |             self.model_name, self.lr, 
171 |             self.batch_size, self.isNormal,
172 |             self.isbatch_normal,self.isSlicing
173 |             )
174 |         if not os.path.exists(os.path.join(self.checkpoint_dir, model_dir2)):
175 |             os.makedirs(os.path.join(self.checkpoint_dir, model_dir2))
176 |         result_file=open(os.path.join(self.checkpoint_dir, model_dir2, "result"),"a+")
177 |         
178 |         if os.path.exists(os.path.join(self.checkpoint_dir, self.model_dir(self.epoch), self.model_name)):
179 |             for nowepoch in range(1,self.epoch+1):
180 |                 print(" [*] Load SUCCESS")
181 |                 print("epoch: "+str(nowepoch))
182 |                 self.load(self.checkpoint_dir,nowepoch)
183 |                 acc,auc,model_name=self.test(self.test_set,nowepoch)
184 |                 if auc > max_auc :
185 |                     max_auc = auc 
186 |                 result_file.write("epoch: "+str(nowepoch)+","+str(acc)+","+str(auc)+"\r\n")
187 |                 print("")
188 |             result_file.close()
189 |             return max_auc 
190 |         else:
191 |             # initialize all variables
192 |             tf.global_variables_initializer().run()
193 |             counter = 1
194 |             print(" [!] Load failed...")
195 | 
196 |         start_time=time.time()
197 |         idx = 0
198 |         epochcount=0
199 |         dataset=self.dataset
200 |         while epochcount<self.epoch:
201 |             dataset.shuffle(self.batch_size,True)
202 |             for data_x,data_y,data_mean,data_m,data_delta,data_x_lengths,data_lastvalues,_,_,_ in dataset.nextBatch():
203 |                 _,loss,summary_str,acc = self.sess.run([self.train_op,self.cross_entropy, self.sum, self.accuracy], feed_dict={\
204 |                     self.x: data_x,\
205 |                     self.y: data_y,\
206 |                     self.m: data_m,\
207 |                     self.delta: data_delta,\
208 |                     self.mean: data_mean,\
209 |                     self.x_lengths: data_x_lengths,\
210 |                     self.lastvalues: data_lastvalues,\
211 |                     self.keep_prob: 0.5})
212 |         
213 |                 counter += 1
214 |                 idx+=1
215 |             epochcount+=1
216 |             idx=0
217 |             #print("Optimization Finished! save model!")
218 |             self.save(self.checkpoint_dir, counter, epochcount)
219 | 
220 |             acc,auc,model_name=self.test(self.test_set,epochcount)
221 |             if auc > max_auc :
222 |                 max_auc = auc 
223 |             result_file.write("epoch: "+str(epochcount)+","+str(acc)+","+str(auc)+"\r\n")
224 |             print("")
225 | 
226 |         result_file.close()
227 |         return max_auc 
228 |         
229 |     def test(self,dataset, epoch):
230 |         start_time=time.time()
231 |         counter=0
232 |         dataset.shuffle(self.batch_size,False)
233 |         totalacc=0.0
234 |         totalauc=0.0
235 |         auccounter=0
236 |         for data_x,data_y,data_mean,data_m,data_delta,data_x_lengths,data_lastvalues,_,_,_ in dataset.nextBatch():
237 |             summary_str,acc,pred = self.sess.run([self.sum, self.accuracy,self.pred], feed_dict={\
238 |                 self.x: data_x,\
239 |                 self.y: data_y,\
240 |                 self.m: data_m,\
241 |                 self.delta: data_delta,\
242 |                 self.mean: data_mean,\
243 |                 self.x_lengths: data_x_lengths,\
244 |                 self.lastvalues: data_lastvalues,\
245 |                 self.keep_prob: 1.0})
246 |     
247 |             try:
248 |                 auc = metrics.roc_auc_score(np.array(data_y),np.array(pred))
249 |                 totalauc+=auc
250 |                 auccounter+=1
251 |                 print("Batch: %4d time: %4.4f, acc: %.8f, auc: %.8f" \
252 |                           % ( counter, time.time() - start_time, acc, auc))
253 |             except ValueError:
254 |                 print("Batch: %4d time: %4.4f, acc: %.8f " \
255 |                           % ( counter, time.time() - start_time, acc))
256 |                 pass
257 |             totalacc+=acc
258 |             counter += 1
259 |         totalacc=totalacc/counter
260 |         try:
261 |             totalauc=totalauc/auccounter
262 |         except:
263 |             pass
264 |         print("epoch is : %2.2f, Total acc: %.8f, Total auc: %.8f , counter is : %.2f , auccounter is %.2f" % (epoch, totalacc,totalauc,counter,auccounter))
265 |         f=open(os.path.join(self.checkpoint_dir, self.model_dir(epoch), self.model_name,"final_acc_and_auc"),"w")
266 |         f.write(str(totalacc)+","+str(totalauc))
267 |         f.close()
268 |         return totalacc,totalauc,self.model_name
269 |   
270 | 


--------------------------------------------------------------------------------
/GRUI/gru_impute_lastValue.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Jan 25 21:52:13 2018
  5 | 
  6 | @author: lyh
  7 | """
  8 | from __future__ import print_function
  9 | import os
 10 | import numpy as np
 11 | from sklearn import metrics
 12 | import time
 13 | import mygru_cell
 14 | import tensorflow as tf
 15 | from tensorflow.python.ops import math_ops
 16 | tf.set_random_seed(1)   # set random seed
 17 |  
 18 | class grud(object):
 19 |     model_name = "GRU_ImputeLastValue"
 20 |     def __init__(self, sess, args, dataset):
 21 |         self.lr = args.lr            
 22 |         self.sess=sess
 23 |         self.isbatch_normal=args.isBatch_normal
 24 |         self.isNormal=args.isNormal
 25 |         self.isSlicing=args.isSlicing
 26 |         self.dataset=dataset
 27 |         self.epoch = args.epoch     
 28 |         self.batch_size = args.batch_size
 29 |         self.n_inputs = args.n_inputs                 # MNIST data input (img shape: 28*28)
 30 |         self.n_steps = dataset.maxLength                                # time steps
 31 |         self.n_hidden_units = args.n_hidden_units        # neurons in hidden layer
 32 |         self.n_classes = args.n_classes                # MNIST classes (0-9 digits)
 33 |         self.run_type=args.run_type
 34 |         self.result_path=args.result_path
 35 |         self.model_path=args.model_path
 36 |         self.log_dir=args.log_dir
 37 |         self.checkpoint_dir=args.checkpoint_dir
 38 |         self.num_batches = len(dataset.x) // self.batch_size
 39 |         # x y placeholder
 40 |         self.keep_prob = tf.placeholder(tf.float32) 
 41 |         self.x = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 42 |         self.y = tf.placeholder(tf.float32, [None, self.n_classes])
 43 |         self.m = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 44 |         self.delta = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 45 |         self.mean = tf.placeholder(tf.float32, [self.n_inputs,])
 46 |         self.lastvalues = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 47 |         self.x_lengths = tf.placeholder(tf.int32,  shape=[self.batch_size,])
 48 |         # 对 weights biases 初始值的定义
 49 |        
 50 | 
 51 |     #concatenate x and m 
 52 |     #rth should be also concatenate after x, then decay the older state
 53 |     #rth's length is n_hidden_units
 54 | 
 55 | 
 56 | 
 57 |     def RNN(self,X, M, Delta,  Mean, Lastvalues, X_lengths,Keep_prob, reuse=False):
 58 |         #       2*3*2
 59 |         # X: batches * steps, n_inputs
 60 |         # m:batches * steps, n_inputs
 61 |         # delta:batches * steps, n_inputs
 62 |         # mean:n_inputs  mean of all observations, not contian the imputations
 63 |         # lastvalues: batches * steps, n_inputs  last obsevation value of x, if x is missing
 64 |         # if lastvalues is zero, take mean as it
 65 |         
 66 |          with tf.variable_scope("grud", reuse=reuse):
 67 |            
 68 |             # then wr_x should be transformed into a diag matrix:tf.matrix_diag(wr_x)
 69 |             wr_h=tf.get_variable('wr_h',shape=[self.n_inputs,self.n_hidden_units],initializer=tf.random_normal_initializer())
 70 |             w_out=tf.get_variable('w_out', shape=[self.n_hidden_units, self.n_classes],initializer=tf.random_normal_initializer())
 71 |         
 72 |             br_h=tf.get_variable('br_h', shape=[self.n_hidden_units, ],initializer=tf.constant_initializer(0.001))
 73 |             b_out=tf.get_variable('b_out', shape=[self.n_classes, ],initializer=tf.constant_initializer(0.001))
 74 |         
 75 |         
 76 |         
 77 |             Lastvalues=tf.reshape(Lastvalues,[-1,self.n_inputs])
 78 |             M=tf.reshape(M,[-1,self.n_inputs])
 79 |             X = tf.reshape(X, [-1, self.n_inputs])
 80 |             Delta=tf.reshape(Delta,[-1,self.n_inputs])
 81 |             
 82 |             X=math_ops.multiply(X,M)+math_ops.multiply((1-M),Lastvalues)
 83 |             
 84 |             rth= tf.matmul( Delta, wr_h)+br_h
 85 |             rth=math_ops.exp(-tf.maximum(0.0,rth))
 86 |             
 87 |             #X = tf.reshape(X, [-1, n_inputs])
 88 |             #print(X.get_shape(),M.get_shape(),rth.get_shape())
 89 |             X=tf.concat([X,rth],1)
 90 |             
 91 |             X_in = tf.reshape(X, [-1, self.n_steps, self.n_inputs+self.n_hidden_units])
 92 |             
 93 |             #print(X_in.get_shape())
 94 |             # X_in = W*X + b
 95 |             #X_in = tf.matmul(X, weights['in']) + biases['in']
 96 |             # X_in ==> (128 batches, 28 steps, 128 hidden) 换回3维
 97 |             #X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
 98 |          
 99 |             if "1.5" in tf.__version__ or "1.7" in tf.__version__ :   
100 |                 grud_cell = mygru_cell.MyGRUCell15(self.n_hidden_units)
101 |             elif "1.4" in tf.__version__:
102 |                 grud_cell = mygru_cell.MyGRUCell4(self.n_hidden_units)
103 |             elif "1.2" in tf.__version__:
104 |                 grud_cell = mygru_cell.MyGRUCell2(self.n_hidden_units)
105 |             init_state = grud_cell.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
106 |             outputs, final_state = tf.nn.dynamic_rnn(grud_cell, X_in, \
107 |                                 initial_state=init_state,\
108 |                                 sequence_length=X_lengths,
109 |                                 time_major=False)
110 |          
111 |             factor=tf.matrix_diag([1.0/9,1])
112 |             tempout=tf.matmul(tf.nn.dropout(final_state,Keep_prob), w_out) + b_out
113 |             results =tf.nn.softmax(tf.matmul(tempout,factor))    #选取最后一个 output
114 |             #todo: dropout of 0.5 and batch normalization
115 |             return results
116 |     def build(self):
117 |         
118 |         self.pred = self.RNN(self.x, self.m, self.delta, self. mean, self.lastvalues, self.x_lengths, self.keep_prob)
119 |         self.cross_entropy = -tf.reduce_sum(self.y*tf.log(self.pred))
120 |         self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cross_entropy)
121 |          
122 |         
123 |         self.correct_pred = tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.y, 1))
124 |         self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
125 |         self.saver = tf.train.Saver()
126 |         
127 |         loss_sum = tf.summary.scalar("loss", self.cross_entropy)
128 |         acc_sum = tf.summary.scalar("acc", self.accuracy)
129 |         
130 |         self.sum=tf.summary.merge([loss_sum, acc_sum])
131 |         self.writer = tf.summary.FileWriter(self.log_dir + '/' + self.model_dir, self.sess.graph)
132 |         
133 |         
134 |     @property
135 |     def model_dir(self):
136 |         return "{}_{}_{}_{}_{}_{}/epoch{}".format(
137 |             self.model_name, self.lr,
138 |             self.batch_size, self.isNormal,
139 |             self.isbatch_normal,self.isSlicing,
140 |             self.epoch
141 |             )
142 |         
143 |     def save(self, checkpoint_dir, step):
144 |         checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir, self.model_name)
145 | 
146 |         if not os.path.exists(checkpoint_dir):
147 |             os.makedirs(checkpoint_dir)
148 | 
149 |         self.saver.save(self.sess,os.path.join(checkpoint_dir, self.model_name+'.model'), global_step=step)
150 | 
151 |     def load(self, checkpoint_dir):
152 |         import re
153 |         checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir, self.model_name)
154 | 
155 |         ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
156 |         if ckpt and ckpt.model_checkpoint_path:
157 |             ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
158 |             self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
159 |             counter = int(next(re.finditer("(\d+)(?!.*\d)",ckpt_name)).group(0))
160 |             print(" [*] Success to read {}".format(ckpt_name))
161 |             return True, counter
162 |         else:
163 |             #print(" [*] Failed to find a checkpoint")
164 |             return False, 0
165 |     
166 |     def train(self):
167 |         
168 |         could_load, checkpoint_counter = self.load(self.checkpoint_dir)
169 |         if could_load:
170 |             start_epoch = (int)(checkpoint_counter / self.num_batches)
171 |             #start_batch_id = checkpoint_counter - start_epoch * self.num_batches
172 |             start_batch_id=0
173 |             #counter = checkpoint_counter
174 |             counter=start_epoch*self.num_batches
175 |             print(" [*] Load SUCCESS")
176 |             return 
177 |         else:
178 |             # initialize all variables
179 |             tf.global_variables_initializer().run()
180 |             counter = 1
181 |             print(" [!] Load failed...")
182 |         start_time=time.time()
183 |         idx = 0
184 |         epochcount=0
185 |         # X: batches * steps, n_inputs 2*3*2
186 |         # m:batches * steps, n_inputs
187 |         # delta:batches * steps, n_inputs
188 |         # mean:n_inputs  mean of all observations, not contian the imputations
189 |         # lastvalues: batches * steps, n_inputs  last obsevation value of x, if x is missing
190 |         # if lastvalues is zero, take mean as it
191 |         # assume series1's time: 0,0.8,2 ;series2's time:0,1
192 |         #data_x=[[[1,0],[3,2],[2,0]],[[0,2],[1,1],[0,0]]]
193 |         #data_y=[[1,0],[0,1]]
194 |         #data_m=[[[1,0],[1,1],[1,0]],[[0,1],[1,1],[0,0]]]
195 |         #data_delta=[[[0,0],[0.8,0.8],[1.2,1.2]],[[0,0],[1,1],[0,0]]]
196 |         #data_mean=[1.75,1.66667]
197 |         #data_lastvalues=[[[1,1.66667],[3,2],[2,2]],[[1.75,2],[1,1],[0,0]]]
198 |         #data_x_lengths=[3,2]
199 |         dataset=self.dataset
200 |         while epochcount<self.epoch:
201 |             dataset.shuffle(self.batch_size,True)
202 |             for data_x,data_y,data_mean,data_m,data_delta,data_x_lengths,data_lastvalues,_,_,_,_,_,_ in dataset.nextBatch():
203 |                 _,loss,summary_str,acc = self.sess.run([self.train_op,self.cross_entropy, self.sum, self.accuracy], feed_dict={\
204 |                     self.x: data_x,\
205 |                     self.y: data_y,\
206 |                     self.m: data_m,\
207 |                     self.delta: data_delta,\
208 |                     self.mean: data_mean,\
209 |                     self.x_lengths: data_x_lengths,\
210 |                     self.lastvalues: data_lastvalues,\
211 |                     self.keep_prob: 0.5})
212 |         
213 |                 self.writer.add_summary(summary_str, counter)
214 |                 counter += 1
215 |                 idx+=1
216 |                 """
217 |                 if counter%10==0:
218 |                     print("Epoch: [%2d] [%4d/%4d] time: %4.4f, loss: %.8f, acc: %.8f " \
219 |                               % (epochcount, idx, self.num_batches, time.time() - start_time, loss, acc))
220 |                 """
221 |             epochcount+=1
222 |             idx=0
223 |         #print("Optimization Finished! save model!")
224 |         self.save(self.checkpoint_dir, counter)
225 |         
226 |     def test(self,dataset):
227 |         start_time=time.time()
228 |         counter=0
229 |         dataset.shuffle(self.batch_size,False)
230 |         totalacc=0.0
231 |         totalauc=0.0
232 |         auccounter=0
233 |         for data_x,data_y,data_mean,data_m,data_delta,data_x_lengths,data_lastvalues,_,_,_,_,_,_ in dataset.nextBatch():
234 |             summary_str,acc,pred = self.sess.run([self.sum, self.accuracy,self.pred], feed_dict={\
235 |                 self.x: data_x,\
236 |                 self.y: data_y,\
237 |                 self.m: data_m,\
238 |                 self.delta: data_delta,\
239 |                 self.mean: data_mean,\
240 |                 self.x_lengths: data_x_lengths,\
241 |                 self.lastvalues: data_lastvalues,\
242 |                 self.keep_prob: 1.0})
243 |     
244 |             self.writer.add_summary(summary_str, counter)
245 |             try:
246 |                 auc = metrics.roc_auc_score(np.array(data_y),np.array(pred))
247 |                 totalauc+=auc
248 |                 auccounter+=1
249 |             except ValueError:
250 |                 pass
251 |             totalacc+=acc
252 |             counter += 1
253 |             print("Batch: %4d time: %4.4f, acc: %.8f, auc: %.8f" \
254 |                           % ( counter, time.time() - start_time, acc, auc))
255 |         totalacc=totalacc/counter
256 |         totalauc=totalauc/auccounter
257 |         print("Total acc: %.8f, Total auc: %.8f , counter is : %.2f , auccounter is %.2f" % (totalacc,totalauc,counter,auccounter))
258 |         f=open(os.path.join(self.checkpoint_dir, self.model_dir, self.model_name,"final_acc_and_auc"),"w")
259 |         f.write(str(totalacc)+","+str(totalauc))
260 |         f.close()
261 |         return totalacc,totalauc,self.model_name
262 |   
263 | 


--------------------------------------------------------------------------------
/GRUI/gru_impute_zero.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Jan 25 21:52:13 2018
  5 | 
  6 | @author: lyh
  7 | """
  8 | from __future__ import print_function
  9 | import os
 10 | import numpy as np
 11 | from sklearn import metrics
 12 | import time
 13 | import mygru_cell
 14 | import tensorflow as tf
 15 | from tensorflow.python.ops import math_ops
 16 | tf.set_random_seed(1)   # set random seed
 17 |  
 18 | class grud(object):
 19 |     model_name = "GRU_ImputeZeroAndMean"
 20 |     def __init__(self, sess, args, dataset):
 21 |         self.lr = args.lr            
 22 |         self.sess=sess
 23 |         self.isbatch_normal=args.isBatch_normal
 24 |         self.isNormal=args.isNormal
 25 |         self.isSlicing=args.isSlicing
 26 |         self.dataset=dataset
 27 |         self.epoch = args.epoch     
 28 |         self.batch_size = args.batch_size
 29 |         self.n_inputs = args.n_inputs                 # MNIST data input (img shape: 28*28)
 30 |         self.n_steps = dataset.maxLength                                # time steps
 31 |         self.n_hidden_units = args.n_hidden_units        # neurons in hidden layer
 32 |         self.n_classes = args.n_classes                # MNIST classes (0-9 digits)
 33 |         self.run_type=args.run_type
 34 |         self.result_path=args.result_path
 35 |         self.model_path=args.model_path
 36 |         self.log_dir=args.log_dir
 37 |         self.checkpoint_dir=args.checkpoint_dir
 38 |         self.num_batches = len(dataset.x) // self.batch_size
 39 |         # x y placeholder
 40 |         self.keep_prob = tf.placeholder(tf.float32) 
 41 |         self.x = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 42 |         self.y = tf.placeholder(tf.float32, [None, self.n_classes])
 43 |         self.m = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 44 |         self.delta = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 45 |         self.mean = tf.placeholder(tf.float32, [self.n_inputs,])
 46 |         self.lastvalues = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
 47 |         self.x_lengths = tf.placeholder(tf.int32,  shape=[self.batch_size,])
 48 |         # 对 weights biases 初始值的定义
 49 |        
 50 | 
 51 |     #concatenate x and m 
 52 |     #rth should be also concatenate after x, then decay the older state
 53 |     #rth's length is n_hidden_units
 54 | 
 55 | 
 56 | 
 57 |     def RNN(self,X, M, Delta,  Mean, Lastvalues, X_lengths,Keep_prob, reuse=False):
 58 |         #       2*3*2
 59 |         # X: batches * steps, n_inputs
 60 |         # m:batches * steps, n_inputs
 61 |         # delta:batches * steps, n_inputs
 62 |         # mean:n_inputs  mean of all observations, not contian the imputations
 63 |         # lastvalues: batches * steps, n_inputs  last obsevation value of x, if x is missing
 64 |         # if lastvalues is zero, take mean as it
 65 |         
 66 |          with tf.variable_scope("grud", reuse=reuse):
 67 |            
 68 |             # then wr_x should be transformed into a diag matrix:tf.matrix_diag(wr_x)
 69 |             wr_h=tf.get_variable('wr_h',shape=[self.n_inputs,self.n_hidden_units],initializer=tf.random_normal_initializer())
 70 |             w_out=tf.get_variable('w_out', shape=[self.n_hidden_units, self.n_classes],initializer=tf.random_normal_initializer())
 71 |         
 72 |             br_h=tf.get_variable('br_h', shape=[self.n_hidden_units, ],initializer=tf.constant_initializer(0.001))
 73 |             b_out=tf.get_variable('b_out', shape=[self.n_classes, ],initializer=tf.constant_initializer(0.001))
 74 |         
 75 |         
 76 |         
 77 |             Lastvalues=tf.reshape(Lastvalues,[-1,self.n_inputs])
 78 |             #M=tf.reshape(M,[-1,self.n_inputs])
 79 |             X = tf.reshape(X, [-1, self.n_inputs])
 80 |             Delta=tf.reshape(Delta,[-1,self.n_inputs])
 81 |             
 82 |             
 83 |             rth= tf.matmul( Delta, wr_h)+br_h
 84 |             rth=math_ops.exp(-tf.maximum(0.0,rth))
 85 |             
 86 |             #X = tf.reshape(X, [-1, n_inputs])
 87 |             #print(X.get_shape(),M.get_shape(),rth.get_shape())
 88 |             X=tf.concat([X,rth],1)
 89 |             
 90 |             X_in = tf.reshape(X, [-1, self.n_steps, self.n_inputs+self.n_hidden_units])
 91 |             
 92 |             #print(X_in.get_shape())
 93 |             # X_in = W*X + b
 94 |             #X_in = tf.matmul(X, weights['in']) + biases['in']
 95 |             # X_in ==> (128 batches, 28 steps, 128 hidden) 换回3维
 96 |             #X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
 97 |          
 98 |             if "1.5" in tf.__version__ or "1.7" in tf.__version__ :   
 99 |                 grud_cell = mygru_cell.MyGRUCell15(self.n_hidden_units)
100 |             elif "1.4" in tf.__version__:
101 |                 grud_cell = mygru_cell.MyGRUCell4(self.n_hidden_units)
102 |             elif "1.2" in tf.__version__:
103 |                 grud_cell = mygru_cell.MyGRUCell2(self.n_hidden_units)
104 |             init_state = grud_cell.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
105 |             outputs, final_state = tf.nn.dynamic_rnn(grud_cell, X_in, \
106 |                                 initial_state=init_state,\
107 |                                 sequence_length=X_lengths,
108 |                                 time_major=False)
109 |          
110 |             factor=tf.matrix_diag([1.0/9,1])
111 |             tempout=tf.matmul(tf.nn.dropout(final_state,Keep_prob), w_out) + b_out
112 |             results =tf.nn.softmax(tf.matmul(tempout,factor))    #选取最后一个 output
113 |             #todo: dropout of 0.5 and batch normalization
114 |             return results
115 |     def build(self):
116 |         
117 |         self.pred = self.RNN(self.x, self.m, self.delta, self. mean, self.lastvalues, self.x_lengths, self.keep_prob)
118 |         self.cross_entropy = -tf.reduce_sum(self.y*tf.log(self.pred))
119 |         self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cross_entropy)
120 |          
121 |         
122 |         self.correct_pred = tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.y, 1))
123 |         self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
124 |         self.saver = tf.train.Saver()
125 |         
126 |         loss_sum = tf.summary.scalar("loss", self.cross_entropy)
127 |         acc_sum = tf.summary.scalar("acc", self.accuracy)
128 |         
129 |         self.sum=tf.summary.merge([loss_sum, acc_sum])
130 |         self.writer = tf.summary.FileWriter(self.log_dir + '/' + self.model_dir, self.sess.graph)
131 |         
132 |         
133 |     @property
134 |     def model_dir(self):
135 |         return "{}_{}_{}_{}_{}_{}/epoch{}".format(
136 |             self.model_name, self.lr,
137 |             self.batch_size, self.isNormal,
138 |             self.isbatch_normal,self.isSlicing,
139 |             self.epoch
140 |             )
141 |         
142 |     def save(self, checkpoint_dir, step):
143 |         checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir, self.model_name)
144 | 
145 |         if not os.path.exists(checkpoint_dir):
146 |             os.makedirs(checkpoint_dir)
147 | 
148 |         self.saver.save(self.sess,os.path.join(checkpoint_dir, self.model_name+'.model'), global_step=step)
149 | 
150 |     def load(self, checkpoint_dir):
151 |         import re
152 |         checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir, self.model_name)
153 | 
154 |         ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
155 |         if ckpt and ckpt.model_checkpoint_path:
156 |             ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
157 |             self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
158 |             counter = int(next(re.finditer("(\d+)(?!.*\d)",ckpt_name)).group(0))
159 |             print(" [*] Success to read {}".format(ckpt_name))
160 |             return True, counter
161 |         else:
162 |             #print(" [*] Failed to find a checkpoint")
163 |             return False, 0
164 |     
165 |     def train(self):
166 |         
167 |         could_load, checkpoint_counter = self.load(self.checkpoint_dir)
168 |         if could_load:
169 |             start_epoch = (int)(checkpoint_counter / self.num_batches)
170 |             #start_batch_id = checkpoint_counter - start_epoch * self.num_batches
171 |             start_batch_id=0
172 |             #counter = checkpoint_counter
173 |             counter=start_epoch*self.num_batches
174 |             print(" [*] Load SUCCESS")
175 |             return 
176 |         else:
177 |             # initialize all variables
178 |             tf.global_variables_initializer().run()
179 |             counter = 1
180 |             print(" [!] Load failed...")
181 |         start_time=time.time()
182 |         idx = 0
183 |         epochcount=0
184 |         # X: batches * steps, n_inputs 2*3*2
185 |         # m:batches * steps, n_inputs
186 |         # delta:batches * steps, n_inputs
187 |         # mean:n_inputs  mean of all observations, not contian the imputations
188 |         # lastvalues: batches * steps, n_inputs  last obsevation value of x, if x is missing
189 |         # if lastvalues is zero, take mean as it
190 |         # assume series1's time: 0,0.8,2 ;series2's time:0,1
191 |         #data_x=[[[1,0],[3,2],[2,0]],[[0,2],[1,1],[0,0]]]
192 |         #data_y=[[1,0],[0,1]]
193 |         #data_m=[[[1,0],[1,1],[1,0]],[[0,1],[1,1],[0,0]]]
194 |         #data_delta=[[[0,0],[0.8,0.8],[1.2,1.2]],[[0,0],[1,1],[0,0]]]
195 |         #data_mean=[1.75,1.66667]
196 |         #data_lastvalues=[[[1,1.66667],[3,2],[2,2]],[[1.75,2],[1,1],[0,0]]]
197 |         #data_x_lengths=[3,2]
198 |         dataset=self.dataset
199 |         while epochcount<self.epoch:
200 |             dataset.shuffle(self.batch_size,True)
201 |             for data_x,data_y,data_mean,data_m,data_delta,data_x_lengths,data_lastvalues,_,_,_,_,_,_ in dataset.nextBatch():
202 |                 _,loss,summary_str,acc = self.sess.run([self.train_op,self.cross_entropy, self.sum, self.accuracy], feed_dict={\
203 |                     self.x: data_x,\
204 |                     self.y: data_y,\
205 |                     self.m: data_m,\
206 |                     self.delta: data_delta,\
207 |                     self.mean: data_mean,\
208 |                     self.x_lengths: data_x_lengths,\
209 |                     self.lastvalues: data_lastvalues,\
210 |                     self.keep_prob: 0.5})
211 |         
212 |                 self.writer.add_summary(summary_str, counter)
213 |                 counter += 1
214 |                 idx+=1
215 |                 """
216 |                 if counter%10==0:
217 |                     print("Epoch: [%2d] [%4d/%4d] time: %4.4f, loss: %.8f, acc: %.8f " \
218 |                               % (epochcount, idx, self.num_batches, time.time() - start_time, loss, acc))
219 |                 """
220 |             epochcount+=1
221 |             idx=0
222 |         #print("Optimization Finished! save model!")
223 |         self.save(self.checkpoint_dir, counter)
224 |         
225 |     def test(self,dataset):
226 |         start_time=time.time()
227 |         counter=0
228 |         dataset.shuffle(self.batch_size,False)
229 |         totalacc=0.0
230 |         totalauc=0.0
231 |         auccounter=0
232 |         for data_x,data_y,data_mean,data_m,data_delta,data_x_lengths,data_lastvalues,_,_,_,_,_,_ in dataset.nextBatch():
233 |             summary_str,acc,pred = self.sess.run([self.sum, self.accuracy,self.pred], feed_dict={\
234 |                 self.x: data_x,\
235 |                 self.y: data_y,\
236 |                 self.m: data_m,\
237 |                 self.delta: data_delta,\
238 |                 self.mean: data_mean,\
239 |                 self.x_lengths: data_x_lengths,\
240 |                 self.lastvalues: data_lastvalues,\
241 |                 self.keep_prob: 1.0})
242 |     
243 |             self.writer.add_summary(summary_str, counter)
244 |             try:
245 |                 auc = metrics.roc_auc_score(np.array(data_y),np.array(pred))
246 |                 totalauc+=auc
247 |                 auccounter+=1
248 |             except ValueError:
249 |                 pass
250 |             totalacc+=acc
251 |             counter += 1
252 |             print("Batch: %4d time: %4.4f, acc: %.8f, auc: %.8f" \
253 |                           % ( counter, time.time() - start_time, acc, auc))
254 |         totalacc=totalacc/counter
255 |         totalauc=totalauc/auccounter
256 |         print("Total acc: %.8f, Total auc: %.8f , counter is : %.2f , auccounter is %.2f" % (totalacc,totalauc,counter,auccounter))
257 |         f=open(os.path.join(self.checkpoint_dir, self.model_dir, self.model_name,"final_acc_and_auc"),"w")
258 |         f.write(str(totalacc)+","+str(totalauc))
259 |         f.close()
260 |         return totalacc,totalauc,self.model_name
261 |   
262 | 


--------------------------------------------------------------------------------
/GRUI/max_auc:
--------------------------------------------------------------------------------
1 | 0.8617656449553003


--------------------------------------------------------------------------------
/GRUI/mygru_cell.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Jan 25 23:13:29 2018
  5 | 
  6 | @author: yonghong, luo
  7 | """
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | from tensorflow.python.ops import array_ops
 13 | from tensorflow.python.ops import init_ops
 14 | from tensorflow.python.ops import math_ops
 15 | from tensorflow.python.ops import variable_scope as vs
 16 | from tensorflow.contrib.rnn import RNNCell
 17 | import tensorflow as tf
 18 | if "1.5" in tf.__version__ or "1.7" in tf.__version__:
 19 |     from tensorflow.python.ops.rnn_cell_impl import LayerRNNCell
 20 |     from tensorflow.python.layers import base as base_layer
 21 |     from tensorflow.python.ops import nn_ops
 22 |     _BIAS_VARIABLE_NAME = "bias"
 23 |     _WEIGHTS_VARIABLE_NAME = "kernel"
 24 |     
 25 |     class MyGRUCell15(LayerRNNCell):
 26 |     #todo: 直接改就行了，已经试验过了，不影响dynamic_rnn的调用
 27 |       """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
 28 |     
 29 |       Args:
 30 |         num_units: int, The number of units in the GRU cell.
 31 |         activation: Nonlinearity to use.  Default: `tanh`.
 32 |         reuse: (optional) Python boolean describing whether to reuse variables
 33 |          in an existing scope.  If not `True`, and the existing scope already has
 34 |          the given variables, an error is raised.
 35 |         kernel_initializer: (optional) The initializer to use for the weight and
 36 |         projection matrices.
 37 |         bias_initializer: (optional) The initializer to use for the bias.
 38 |         name: String, the name of the layer. Layers with the same name will
 39 |           share weights, but to avoid mistakes we require reuse=True in such
 40 |           cases.
 41 |       """
 42 |       def __init__(self,
 43 |                    num_units,
 44 |                    activation=None,
 45 |                    reuse=None,
 46 |                    kernel_initializer=None,
 47 |                    bias_initializer=None,
 48 |                    name=None):
 49 |         super(MyGRUCell15, self).__init__(_reuse=reuse, name=name)
 50 |     
 51 |         # Inputs must be 2-dimensional.
 52 |         self.input_spec = base_layer.InputSpec(ndim=2)
 53 |     
 54 |         self._num_units = num_units
 55 |         self._activation = activation or math_ops.tanh
 56 |         self._kernel_initializer = kernel_initializer
 57 |         self._bias_initializer = bias_initializer
 58 |     
 59 |       @property
 60 |       def state_size(self):
 61 |         return self._num_units
 62 |     
 63 |       @property
 64 |       def output_size(self):
 65 |         return self._num_units
 66 |     
 67 |       def build(self, inputs_shape):
 68 |         if inputs_shape[1].value is None:
 69 |           raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
 70 |                            % inputs_shape)
 71 |     
 72 |         input_depth = inputs_shape[1].value-self._num_units
 73 |         self._gate_kernel = self.add_variable(
 74 |             "gates/%s" % _WEIGHTS_VARIABLE_NAME,
 75 |             shape=[input_depth + self._num_units, 2 * self._num_units],
 76 |             initializer=self._kernel_initializer)
 77 |         self._gate_bias = self.add_variable(
 78 |             "gates/%s" % _BIAS_VARIABLE_NAME,
 79 |             shape=[2 * self._num_units],
 80 |             initializer=(
 81 |                 self._bias_initializer
 82 |                 if self._bias_initializer is not None
 83 |                 else init_ops.constant_initializer(1.0, dtype=self.dtype)))
 84 |         self._candidate_kernel = self.add_variable(
 85 |             "candidate/%s" % _WEIGHTS_VARIABLE_NAME,
 86 |             shape=[input_depth + self._num_units, self._num_units],
 87 |             initializer=self._kernel_initializer)
 88 |         self._candidate_bias = self.add_variable(
 89 |             "candidate/%s" % _BIAS_VARIABLE_NAME,
 90 |             shape=[self._num_units],
 91 |             initializer=(
 92 |                 self._bias_initializer
 93 |                 if self._bias_initializer is not None
 94 |                 else init_ops.zeros_initializer(dtype=self.dtype)))
 95 |     
 96 |         self.built = True
 97 |     
 98 |       def call(self, inputs, state):
 99 |         """Gated recurrent unit (GRU) with nunits cells."""
100 |         totalLength=inputs.get_shape().as_list()[1]
101 |         inputs_=inputs[:,0:totalLength-self._num_units]
102 |         rth=inputs[:,totalLength-self._num_units:]
103 |         inputs=inputs_
104 |         state=math_ops.multiply(rth,state)
105 |         
106 |         gate_inputs = math_ops.matmul(
107 |             array_ops.concat([inputs, state], 1), self._gate_kernel)
108 |         gate_inputs = nn_ops.bias_add(gate_inputs, self._gate_bias)
109 |     
110 |         value = math_ops.sigmoid(gate_inputs)
111 |         r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
112 |     
113 |         r_state = r * state
114 |     
115 |         candidate = math_ops.matmul(
116 |             array_ops.concat([inputs, r_state], 1), self._candidate_kernel)
117 |         candidate = nn_ops.bias_add(candidate, self._candidate_bias)
118 |     
119 |         c = self._activation(candidate)
120 |         new_h = u * state + (1 - u) * c
121 |         return new_h, new_h
122 | 
123 | 
124 | elif "1.4" in tf.__version__:
125 |     from tensorflow.python.ops.rnn_cell_impl import _Linear
126 | elif "1.2" in tf.__version__:
127 |     from tensorflow.python.ops.rnn_cell_impl import _linear
128 | 
129 | 
130 | 
131 | class MyGRUCell4(RNNCell):
132 |   """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
133 | 
134 |   Args:
135 |     num_units: int, The number of units in the GRU cell.
136 |     activation: Nonlinearity to use.  Default: `tanh`.
137 |     reuse: (optional) Python boolean describing whether to reuse variables
138 |      in an existing scope.  If not `True`, and the existing scope already has
139 |      the given variables, an error is raised.
140 |     kernel_initializer: (optional) The initializer to use for the weight and
141 |     projection matrices.
142 |     bias_initializer: (optional) The initializer to use for the bias.
143 |   """
144 | 
145 |   def __init__(self,
146 |                num_units,
147 |                activation=None,
148 |                reuse=None,
149 |                kernel_initializer=None,
150 |                bias_initializer=None):
151 |     super(MyGRUCell4, self).__init__(_reuse=reuse)
152 |     self._num_units = num_units
153 |     self._activation = activation or math_ops.tanh
154 |     self._kernel_initializer = kernel_initializer
155 |     self._bias_initializer = bias_initializer
156 |     self._gate_linear = None
157 |     self._candidate_linear = None
158 | 
159 |   @property
160 |   def state_size(self):
161 |     return self._num_units
162 | 
163 |   @property
164 |   def output_size(self):
165 |     return self._num_units
166 | 
167 |   def call(self, inputs, state):
168 |     """Gated recurrent unit (GRU) with nunits cells."""
169 |     # inputs = realinputs + m +rt
170 |     # rt's length is self._num_units
171 |     # state = rt * older state 
172 |     # input = first 2 part
173 |     totalLength=inputs.get_shape().as_list()[1]
174 |     inputs_=inputs[:,0:totalLength-self._num_units]
175 |     rth=inputs[:,totalLength-self._num_units:]
176 |     inputs=inputs_
177 |     state=math_ops.multiply(rth,state)
178 |     if self._gate_linear is None:
179 |       bias_ones = self._bias_initializer
180 |       if self._bias_initializer is None:
181 |         bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
182 |       with vs.variable_scope("gates"):  # Reset gate and update gate.
183 |         self._gate_linear = _Linear(
184 |             [inputs, state],
185 |             2 * self._num_units,
186 |             True,
187 |             bias_initializer=bias_ones,
188 |             kernel_initializer=self._kernel_initializer)
189 | 
190 |     value = math_ops.sigmoid(self._gate_linear([inputs, state]))
191 |     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
192 | 
193 |     r_state = r * state
194 |     if self._candidate_linear is None:
195 |       with vs.variable_scope("candidate"):
196 |         self._candidate_linear = _Linear(
197 |             [inputs, r_state],
198 |             self._num_units,
199 |             True,
200 |             bias_initializer=self._bias_initializer,
201 |             kernel_initializer=self._kernel_initializer)
202 |     c = self._activation(self._candidate_linear([inputs, r_state]))
203 |     new_h = u * state + (1 - u) * c
204 |     return new_h, new_h
205 | 
206 | 
207 | class MyGRUCell2(RNNCell):
208 |   """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
209 | 
210 |   Args:
211 |     num_units: int, The number of units in the GRU cell.
212 |     activation: Nonlinearity to use.  Default: `tanh`.
213 |     reuse: (optional) Python boolean describing whether to reuse variables
214 |      in an existing scope.  If not `True`, and the existing scope already has
215 |      the given variables, an error is raised.
216 |     kernel_initializer: (optional) The initializer to use for the weight and
217 |     projection matrices.
218 |     bias_initializer: (optional) The initializer to use for the bias.
219 |   """
220 | 
221 |   def __init__(self,
222 |                num_units,
223 |                activation=None,
224 |                reuse=None,
225 |                kernel_initializer=None,
226 |                bias_initializer=None):
227 |     super(MyGRUCell2, self).__init__(_reuse=reuse)
228 |     self._num_units = num_units
229 |     self._activation = activation or math_ops.tanh
230 |     self._kernel_initializer = kernel_initializer
231 |     self._bias_initializer = bias_initializer
232 |     self._gate_linear = None
233 |     self._candidate_linear = None
234 | 
235 |   @property
236 |   def state_size(self):
237 |     return self._num_units
238 | 
239 |   @property
240 |   def output_size(self):
241 |     return self._num_units
242 | 
243 |   def call(self, inputs, state):
244 |     """Gated recurrent unit (GRU) with nunits cells."""
245 |     # inputs = realinputs + m +rt
246 |     # rt's length is self._num_units
247 |     # state = rt * older state 
248 |     # input = first 2 part
249 |     totalLength=inputs.get_shape().as_list()[1]
250 |     inputs_=inputs[:,0:totalLength-self._num_units]
251 |     rth=inputs[:,totalLength-self._num_units:]
252 |     inputs=inputs_
253 |     state=math_ops.multiply(rth,state)
254 |     with vs.variable_scope("gates"):  # Reset gate and update gate.
255 |       # We start with bias of 1.0 to not reset and not update.
256 |       bias_ones = self._bias_initializer
257 |       if self._bias_initializer is None:
258 |         dtype = [a.dtype for a in [inputs, state]][0]
259 |         bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
260 |       value = math_ops.sigmoid(
261 |           _linear([inputs, state], 2 * self._num_units, True, bias_ones,
262 |                   self._kernel_initializer))
263 |       r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
264 |     with vs.variable_scope("candidate"):
265 |       c = self._activation(
266 |           _linear([inputs, r * state], self._num_units, True,
267 |                   self._bias_initializer, self._kernel_initializer))
268 |     new_h = u * state + (1 - u) * c
269 |     return new_h, new_h
270 | 


--------------------------------------------------------------------------------
/GRUI/tune_lastValue_imputed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Mar 26 10:47:41 2018
  5 | 
  6 | @author: yonghong
  7 | """
  8 | 
  9 | from __future__ import print_function
 10 | import sys
 11 | sys.path.append("..")
 12 | import argparse
 13 | import os
 14 | import tensorflow as tf
 15 | from Physionet2012Data import readData ,readTestData
 16 | import gru_impute_lastValue 
 17 | if __name__ == '__main__':
 18 |     parser = argparse.ArgumentParser(description='manual to this script')
 19 |     parser.add_argument('--gpus', type=str, default = None)
 20 |     parser.add_argument('--batch-size', type=int, default=32)
 21 |     parser.add_argument('--run-type', type=str, default='test')
 22 |     parser.add_argument('--data-path', type=str, default="/Users/luoyonghong/tensorflow_works/Gan_Imputation/imputation_train_results/35-0.001-1400-18/")
 23 |     #输入填充之后的训练数据集的完整路径
 24 |     parser.add_argument('--model-path', type=str, default=None)
 25 |     parser.add_argument('--result-path', type=str, default=None)
 26 |     parser.add_argument('--lr', type=float, default=0.01)
 27 |     #parser.add_argument('--epoch', type=int, default=20)
 28 |     parser.add_argument('--n-inputs', type=int, default=41)
 29 |     parser.add_argument('--n-hidden-units', type=int, default=64)
 30 |     parser.add_argument('--n-classes', type=int, default=2)
 31 |     parser.add_argument('--checkpoint-dir', type=str, default='checkpoint_physionet_imputed_lastvalue',
 32 |                         help='Directory name to save the checkpoints')
 33 |     parser.add_argument('--log-dir', type=str, default='logs_physionet_imputed_lastvalue',
 34 |                         help='Directory name to save training logs')
 35 |     parser.add_argument('--isNormal',type=int,default=0)
 36 |     parser.add_argument('--isSlicing',type=int,default=0)
 37 |     #0 false 1 true
 38 |     parser.add_argument('--isBatch-normal',type=int,default=1)
 39 |     args = parser.parse_args()
 40 |     
 41 |     
 42 |     if args.isBatch_normal==0:
 43 |             args.isBatch_normal=False
 44 |     if args.isBatch_normal==1:
 45 |             args.isBatch_normal=True
 46 |     if args.isNormal==0:
 47 |             args.isNormal=False
 48 |     if args.isNormal==1:
 49 |             args.isNormal=True
 50 |     if args.isSlicing==0:
 51 |             args.isSlicing=False
 52 |     if args.isSlicing==1:
 53 |             args.isSlicing=True
 54 |             
 55 |     if "impute_zero" in args.data_path:
 56 |         args.log_dir="logs_physionet_imputed_zero"
 57 |         args.checkpoint_dir="checkpoint_physionet_imputed_zero"
 58 |     
 59 |     path_splits=args.data_path.split("/")
 60 |     if len(path_splits[-1])==0:
 61 |         datasetName=path_splits[-2]
 62 |     else:
 63 |         datasetName=path_splits[-1]
 64 |     
 65 |     args.checkpoint_dir=args.checkpoint_dir+"/"+datasetName
 66 |     args.log_dir=args.log_dir+"/"+datasetName
 67 |     
 68 |     epoch=3
 69 |     while epoch<31:
 70 |         args.epoch=epoch
 71 |         print("epoch: %2d"%(epoch))
 72 |         dt_train=readData.ReadPhysionetData(os.path.join(args.data_path,"train"), os.path.join(args.data_path,"train","list.txt"),isNormal=args.isNormal,isSlicing=args.isSlicing)
 73 |         dt_test=readTestData.ReadPhysionetData(os.path.join(args.data_path,"test"), os.path.join(args.data_path,"test","list.txt"),dt_train.maxLength,isNormal=args.isNormal,isSlicing=args.isSlicing)
 74 |     
 75 |         tf.reset_default_graph()
 76 |         config = tf.ConfigProto() 
 77 |         config.gpu_options.allow_growth = True 
 78 |         with tf.Session(config=config) as sess:
 79 |             model = gru_impute_lastValue.grud(sess,
 80 |                         args=args,
 81 |                         dataset=dt_train,
 82 |                         )
 83 |     
 84 |             # build graph
 85 |             model.build()
 86 |     
 87 |             model.train()
 88 |             print(" [*] Training finished!")
 89 |             #todo: should use test dataset!
 90 |             acc,auc,model_name=model.test(dt_test)
 91 |             # visualize learned generator
 92 |             #gan.visualize_results(args.epoch-1)
 93 |             print(" [*] Test finished!")
 94 |             
 95 |             model_dir= "{}_{}_{}_{}_{}_{}".format(
 96 |             model_name, args.lr,
 97 |             args.batch_size, args.isNormal,
 98 |             args.isBatch_normal,args.isSlicing,
 99 |             )
100 |             
101 |             f=open(os.path.join(args.checkpoint_dir, model_dir, "result"),"a+")
102 |             f.write("epoch: "+str(epoch)+","+str(acc)+","+str(auc)+"\r\n")
103 |             f.close()
104 |             
105 |         epoch+=1
106 |         print("")
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/GRUI/tune_mean_imputed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Mar 26 10:47:41 2018
  5 | 
  6 | @author: yonghong
  7 | """
  8 | 
  9 | from __future__ import print_function
 10 | import sys
 11 | sys.path.append("..")
 12 | import argparse
 13 | import os
 14 | import tensorflow as tf
 15 | from Physionet2012Data import readData ,readTestData
 16 | import gru_impute_zero 
 17 | if __name__ == '__main__':
 18 |     parser = argparse.ArgumentParser(description='manual to this script')
 19 |     parser.add_argument('--gpus', type=str, default = None)
 20 |     parser.add_argument('--batch-size', type=int, default=32)
 21 |     parser.add_argument('--run-type', type=str, default='test')
 22 |     parser.add_argument('--data-path', type=str, default="/Users/luoyonghong/tensorflow_works/Gan_Imputation/imputation_train_results/35-0.001-1400-18/")
 23 |     #输入填充之后的训练数据集的完整路径
 24 |     parser.add_argument('--model-path', type=str, default=None)
 25 |     parser.add_argument('--result-path', type=str, default=None)
 26 |     parser.add_argument('--lr', type=float, default=0.01)
 27 |     #parser.add_argument('--epoch', type=int, default=20)
 28 |     parser.add_argument('--n-inputs', type=int, default=41)
 29 |     parser.add_argument('--n-hidden-units', type=int, default=64)
 30 |     parser.add_argument('--n-classes', type=int, default=2)
 31 |     parser.add_argument('--checkpoint-dir', type=str, default='checkpoint_physionet_imputed_mean',
 32 |                         help='Directory name to save the checkpoints')
 33 |     parser.add_argument('--log-dir', type=str, default='logs_physionet_imputed_mean',
 34 |                         help='Directory name to save training logs')
 35 |     parser.add_argument('--isNormal',type=int,default=0)
 36 |     parser.add_argument('--isSlicing',type=int,default=0)
 37 |     #0 false 1 true
 38 |     parser.add_argument('--isBatch-normal',type=int,default=1)
 39 |     args = parser.parse_args()
 40 |     
 41 |     
 42 |     if args.isBatch_normal==0:
 43 |             args.isBatch_normal=False
 44 |     if args.isBatch_normal==1:
 45 |             args.isBatch_normal=True
 46 |     if args.isNormal==0:
 47 |             args.isNormal=False
 48 |     if args.isNormal==1:
 49 |             args.isNormal=True
 50 |     if args.isSlicing==0:
 51 |             args.isSlicing=False
 52 |     if args.isSlicing==1:
 53 |             args.isSlicing=True
 54 |             
 55 |  
 56 |     
 57 |     path_splits=args.data_path.split("/")
 58 |     if len(path_splits[-1])==0:
 59 |         datasetName=path_splits[-2]
 60 |     else:
 61 |         datasetName=path_splits[-1]
 62 |     
 63 |     args.checkpoint_dir=args.checkpoint_dir+"/"+datasetName
 64 |     args.log_dir=args.log_dir+"/"+datasetName
 65 |     dt_train=readData.ReadPhysionetData(os.path.join(args.data_path,"train"), os.path.join(args.data_path,"train","list.txt"),isNormal=args.isNormal,isSlicing=args.isSlicing)
 66 |     dt_test=readTestData.ReadPhysionetData(os.path.join(args.data_path,"test"), os.path.join(args.data_path,"test","list.txt"),dt_train.maxLength,isNormal=args.isNormal,isSlicing=args.isSlicing)
 67 |     
 68 |     epoch=11
 69 |     while epoch<31:
 70 |         args.epoch=epoch
 71 |         print("epoch: %2d"%(epoch))
 72 |         
 73 |         tf.reset_default_graph()
 74 |         config = tf.ConfigProto() 
 75 |         config.gpu_options.allow_growth = True 
 76 |         with tf.Session(config=config) as sess:
 77 |             model = gru_impute_zero.grud(sess,
 78 |                         args=args,
 79 |                         dataset=dt_train,
 80 |                         )
 81 |     
 82 |             # build graph
 83 |             model.build()
 84 |     
 85 |             model.train()
 86 |             print(" [*] Training finished!")
 87 |             #todo: should use test dataset!
 88 |             acc,auc,model_name=model.test(dt_test)
 89 |             # visualize learned generator
 90 |             #gan.visualize_results(args.epoch-1)
 91 |             print(" [*] Test finished!")
 92 |             
 93 |             model_dir= "{}_{}_{}_{}_{}_{}".format(
 94 |             model_name, args.lr,
 95 |             args.batch_size, args.isNormal,
 96 |             args.isBatch_normal,args.isSlicing,
 97 |             )
 98 |             
 99 |             f=open(os.path.join(args.checkpoint_dir, model_dir, "result"),"a+")
100 |             f.write("epoch: "+str(epoch)+","+str(acc)+","+str(auc)+"\r\n")
101 |             f.close()
102 |             
103 |         epoch+=1
104 |         print("")
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/GRUI/tune_zero_imputed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Mar 26 10:47:41 2018
  5 | 
  6 | @author: yonghong
  7 | """
  8 | 
  9 | from __future__ import print_function
 10 | import sys
 11 | sys.path.append("..")
 12 | import argparse
 13 | import os
 14 | import tensorflow as tf
 15 | from Physionet2012Data import readData ,readTestData
 16 | import gru_impute_zero 
 17 | if __name__ == '__main__':
 18 |     parser = argparse.ArgumentParser(description='manual to this script')
 19 |     parser.add_argument('--gpus', type=str, default = None)
 20 |     parser.add_argument('--batch-size', type=int, default=32)
 21 |     parser.add_argument('--run-type', type=str, default='test')
 22 |     parser.add_argument('--data-path', type=str, default="/Users/luoyonghong/tensorflow_works/Gan_Imputation/imputation_train_results/35-0.001-1400-18/")
 23 |     #输入填充之后的训练数据集的完整路径
 24 |     parser.add_argument('--model-path', type=str, default=None)
 25 |     parser.add_argument('--result-path', type=str, default=None)
 26 |     parser.add_argument('--lr', type=float, default=0.01)
 27 |     #parser.add_argument('--epoch', type=int, default=20)
 28 |     parser.add_argument('--n-inputs', type=int, default=41)
 29 |     parser.add_argument('--n-hidden-units', type=int, default=64)
 30 |     parser.add_argument('--n-classes', type=int, default=2)
 31 |     parser.add_argument('--checkpoint-dir', type=str, default='checkpoint_physionet_imputed_zero',
 32 |                         help='Directory name to save the checkpoints')
 33 |     parser.add_argument('--log-dir', type=str, default='logs_physionet_imputed_zero',
 34 |                         help='Directory name to save training logs')
 35 |     parser.add_argument('--isNormal',type=int,default=0)
 36 |     parser.add_argument('--isSlicing',type=int,default=0)
 37 |     #0 false 1 true
 38 |     parser.add_argument('--isBatch-normal',type=int,default=1)
 39 |     args = parser.parse_args()
 40 |     
 41 |     
 42 |     if args.isBatch_normal==0:
 43 |             args.isBatch_normal=False
 44 |     if args.isBatch_normal==1:
 45 |             args.isBatch_normal=True
 46 |     if args.isNormal==0:
 47 |             args.isNormal=False
 48 |     if args.isNormal==1:
 49 |             args.isNormal=True
 50 |     if args.isSlicing==0:
 51 |             args.isSlicing=False
 52 |     if args.isSlicing==1:
 53 |             args.isSlicing=True
 54 |             
 55 |  
 56 |     
 57 |     path_splits=args.data_path.split("/")
 58 |     if len(path_splits[-1])==0:
 59 |         datasetName=path_splits[-2]
 60 |     else:
 61 |         datasetName=path_splits[-1]
 62 |     
 63 |     args.checkpoint_dir=args.checkpoint_dir+"/"+datasetName
 64 |     args.log_dir=args.log_dir+"/"+datasetName
 65 |     dt_train=readData.ReadPhysionetData(os.path.join(args.data_path,"train"), os.path.join(args.data_path,"train","list.txt"),isNormal=args.isNormal,isSlicing=args.isSlicing)
 66 |     dt_test=readTestData.ReadPhysionetData(os.path.join(args.data_path,"test"), os.path.join(args.data_path,"test","list.txt"),dt_train.maxLength,isNormal=args.isNormal,isSlicing=args.isSlicing)
 67 |     epoch=3
 68 |     while epoch<31:
 69 |         args.epoch=epoch
 70 |         print("epoch: %2d"%(epoch))
 71 |         tf.reset_default_graph()
 72 |         config = tf.ConfigProto() 
 73 |         config.gpu_options.allow_growth = True 
 74 |         with tf.Session(config=config) as sess:
 75 |             model = gru_impute_zero.grud(sess,
 76 |                         args=args,
 77 |                         dataset=dt_train,
 78 |                         )
 79 |     
 80 |             # build graph
 81 |             model.build()
 82 |     
 83 |             model.train()
 84 |             print(" [*] Training finished!")
 85 |             #todo: should use test dataset!
 86 |             acc,auc,model_name=model.test(dt_test)
 87 |             # visualize learned generator
 88 |             #gan.visualize_results(args.epoch-1)
 89 |             print(" [*] Test finished!")
 90 |             
 91 |             model_dir= "{}_{}_{}_{}_{}_{}".format(
 92 |             model_name, args.lr,
 93 |             args.batch_size, args.isNormal,
 94 |             args.isBatch_normal,args.isSlicing,
 95 |             )
 96 |             
 97 |             f=open(os.path.join(args.checkpoint_dir, model_dir, "result"),"a+")
 98 |             f.write("epoch: "+str(epoch)+","+str(acc)+","+str(auc)+"\r\n")
 99 |             f.close()
100 |             
101 |         epoch+=1
102 |         print("")
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/GRUI/untitled1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Apr 23 09:23:18 2018
 5 | 
 6 | @author: yonghong
 7 | """
 8 | import os 
 9 | 
10 | def f():
11 | 
12 |     folders =  os.listdir("./")
13 |     globalMin=0.0
14 |     for folder in folders:
15 |         if os.path.isdir(folder):
16 |             secondPaths=os.listdir("./"+folder)
17 |             nowMin=0.0
18 |             for s in secondPaths:
19 |                 #print(s)
20 |                 if os.path.isfile(os.path.join("./",folder,s,"result")):
21 |                     with open(os.path.join("./",folder,s,"result"),"r") as f:
22 |                         temp=0.0 
23 |                         for line in f:
24 |                             #print(line)
25 |                             a=float(line.split(",")[2])
26 |                             #print(a)
27 |                             if a>nowMin:
28 |                                 nowMin=a
29 |                             if a>globalMin:
30 |                                 globalMin=a
31 |                             if a>temp:
32 |                                 temp=a 
33 |                         ggg=open(os.path.join(folder,s,str(temp)),"w")
34 |                         ggg.close()
35 |             r=open(os.path.join(folder,str(nowMin)),"w")
36 |             r.close()
37 |             if nowMin>globalMin:
38 |                 globalMin=nowMin
39 |     d=open(os.path.join(str(globalMin)),"w")
40 |     d.close()
41 |     print(globalMin)
42 | 
43 | if __name__=="__main__":
44 |     f()
45 |             
46 |         
47 | 


--------------------------------------------------------------------------------
/Gan_Imputation/Physionet_main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri Feb  2 13:54:53 2018
  5 | 
  6 | @author: yonghong, luo
  7 | """
  8 | from __future__ import print_function
  9 | import sys
 10 | sys.path.append("..")
 11 | import WGAN_GRUI 
 12 | import tensorflow as tf
 13 | import argparse
 14 | import numpy as np
 15 | from Physionet2012Data import readData, readTestData
 16 | import os
 17 | 
 18 | """main"""
 19 | def main():
 20 |     # parse arguments
 21 |     parser = argparse.ArgumentParser(description='manual to this script')
 22 |     parser.add_argument('--gpus', type=str, default = None)
 23 |     parser.add_argument('--batch-size', type=int, default=128)
 24 |     parser.add_argument('--gen-length', type=int, default=96)
 25 |     parser.add_argument('--impute-iter', type=int, default=400)
 26 |     parser.add_argument('--pretrain-epoch', type=int, default=5)
 27 |     parser.add_argument('--run-type', type=str, default='train')
 28 |     parser.add_argument('--data-path', type=str, default="../set-a/")
 29 |     parser.add_argument('--model-path', type=str, default=None)
 30 |     parser.add_argument('--result-path', type=str, default=None)
 31 |     parser.add_argument('--dataset-name', type=str, default=None)
 32 |     parser.add_argument('--g-loss-lambda',type=float,default=0.1)
 33 |     parser.add_argument('--beta1',type=float,default=0.5)
 34 |     parser.add_argument('--lr', type=float, default=0.001)
 35 |     #lr 0.001的时候 pretrain_loss降的很快，4个epoch就行了
 36 |     parser.add_argument('--epoch', type=int, default=30)
 37 |     parser.add_argument('--n-inputs', type=int, default=41)
 38 |     parser.add_argument('--n-hidden-units', type=int, default=64)
 39 |     parser.add_argument('--n-classes', type=int, default=2)
 40 |     parser.add_argument('--z-dim', type=int, default=64)
 41 |     parser.add_argument('--checkpoint-dir', type=str, default='checkpoint',
 42 |                         help='Directory name to save the checkpoints')
 43 |     parser.add_argument('--result-dir', type=str, default='results',
 44 |                         help='Directory name to save the generated images')
 45 |     parser.add_argument('--log-dir', type=str, default='logs',
 46 |                         help='Directory name to save training logs')
 47 |     parser.add_argument('--isNormal',type=int,default=1)
 48 |     #0 false 1 true
 49 |     parser.add_argument('--isBatch-normal',type=int,default=1)
 50 |     parser.add_argument('--isSlicing',type=int,default=1)
 51 |     parser.add_argument('--disc-iters',type=int,default=8)
 52 |     args = parser.parse_args()
 53 |     
 54 |     if args.isBatch_normal==0:
 55 |             args.isBatch_normal=False
 56 |     if args.isBatch_normal==1:
 57 |             args.isBatch_normal=True
 58 |     if args.isNormal==0:
 59 |             args.isNormal=False
 60 |     if args.isNormal==1:
 61 |             args.isNormal=True
 62 |     if args.isSlicing==0:
 63 |             args.isSlicing=False
 64 |     if args.isSlicing==1:
 65 |             args.isSlicing=True
 66 | 
 67 |     #make the max step length of two datasett the same
 68 |     #epochs=[30]
 69 |     epochs= [args.epoch]
 70 |     g_loss_lambdas=[0.15]
 71 |     beta1s=[0.5]
 72 |     for beta1 in beta1s:
 73 |         for e in epochs:
 74 |             for g_l in g_loss_lambdas:
 75 |                 args.epoch=e
 76 |                 args.beta1=beta1
 77 |                 args.g_loss_lambda=g_l
 78 |                 tf.reset_default_graph()
 79 |                 dt_train=readData.ReadPhysionetData(os.path.join(args.data_path,"train"), os.path.join(args.data_path,"train","list.txt"),isNormal=args.isNormal,isSlicing=args.isSlicing)
 80 |                 dt_test=readTestData.ReadPhysionetData(os.path.join(args.data_path,"test"), os.path.join(args.data_path,"test","list.txt"),dt_train.maxLength,isNormal=args.isNormal,isSlicing=args.isSlicing)
 81 |                 tf.reset_default_graph()
 82 |                 config = tf.ConfigProto() 
 83 |                 config.gpu_options.allow_growth = True 
 84 |                 with tf.Session(config=config) as sess:
 85 |                     gan = WGAN_GRUI.WGAN(sess,
 86 |                                 args=args,
 87 |                                 datasets=dt_train,
 88 |                                 )
 89 |             
 90 |                     # build graph
 91 |                     gan.build_model()
 92 |             
 93 |                     # show network architecture
 94 |                     #show_all_variables()
 95 |             
 96 |                     # launch the graph in a session
 97 |                     gan.train()
 98 |                     print(" [*] Training finished!")
 99 |                     
100 |                     gan.imputation(dt_train,True)
101 |                     
102 |                     print(" [*] Train dataset Imputation finished!")
103 |                     
104 |                     gan.imputation(dt_test,False)
105 |                     
106 |                     print(" [*] Test dataset Imputation finished!")
107 |                 tf.reset_default_graph()
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/Gan_Imputation/WGAN_GRUI.py:
--------------------------------------------------------------------------------
  1 | #-*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | import os
  4 | import math
  5 | import time
  6 | import tensorflow as tf
  7 | import numpy as np
  8 | from tensorflow.python.ops import math_ops
  9 | from ops import *
 10 | from utils import *
 11 | from GRUI import mygru_cell
 12 | 
 13 | """
 14 | D输入标准化， 不要m 填充0
 15 | G输入去掉m,只有delta
 16 | g 没有每次累加z
 17 | """
 18 | class WGAN(object):
 19 |     model_name = "WGAN_no_mask"     # name for checkpoint
 20 | 
 21 |     def __init__(self, sess, args, datasets):
 22 |         self.sess = sess
 23 |         self.isbatch_normal=args.isBatch_normal
 24 |         self.isNormal=args.isNormal
 25 |         self.checkpoint_dir = args.checkpoint_dir
 26 |         self.result_dir = args.result_dir
 27 |         self.log_dir = args.log_dir
 28 |         self.dataset_name=args.dataset_name
 29 |         self.run_type=args.run_type
 30 |         self.lr = args.lr                 
 31 |         self.epoch = args.epoch     
 32 |         self.batch_size = args.batch_size
 33 |         self.n_inputs = args.n_inputs                 # MNIST data input (img shape: 28*28)
 34 |         self.n_steps = datasets.maxLength                                # time steps
 35 |         self.n_hidden_units = args.n_hidden_units        # neurons in hidden layer
 36 |         self.n_classes = args.n_classes                # MNIST classes (0-9 digits)
 37 |         self.gpus=args.gpus
 38 |         self.run_type=args.run_type
 39 |         self.result_path=args.result_path
 40 |         self.model_path=args.model_path
 41 |         self.pretrain_epoch=args.pretrain_epoch
 42 |         self.impute_iter=args.impute_iter
 43 |         self.isSlicing=args.isSlicing
 44 |         self.g_loss_lambda=args.g_loss_lambda
 45 |         
 46 |         self.datasets=datasets
 47 |         self.z_dim = args.z_dim         # dimension of noise-vector
 48 |         self.gen_length=args.gen_length
 49 |         
 50 |         # WGAN_GP parameter
 51 |         self.lambd = 0.25       # The higher value, the more stable, but the slower convergence
 52 |         self.disc_iters = args.disc_iters     # The number of critic iterations for one-step of generator
 53 | 
 54 |         # train
 55 |         self.learning_rate = args.lr
 56 |         self.beta1 = args.beta1
 57 |         if "1.5" in tf.__version__ or "1.7" in tf.__version__ :
 58 |             self.grud_cell_d = mygru_cell.MyGRUCell15(self.n_hidden_units)
 59 |             self.grud_cell_g = mygru_cell.MyGRUCell15(self.n_hidden_units)
 60 |         elif "1.4" in tf.__version__:
 61 |             self.grud_cell_d = mygru_cell.MyGRUCell4(self.n_hidden_units)
 62 |             self.grud_cell_g = mygru_cell.MyGRUCell4(self.n_hidden_units)
 63 |         elif "1.2" in tf.__version__:
 64 |             self.grud_cell_d = mygru_cell.MyGRUCell2(self.n_hidden_units)
 65 |             self.grud_cell_g = mygru_cell.MyGRUCell2(self.n_hidden_units)
 66 |         # test
 67 |         self.sample_num = 64  # number of generated images to be saved
 68 | 
 69 |         self.num_batches = len(datasets.x) // self.batch_size
 70 | 
 71 |       
 72 |     def pretrainG(self, X, M, Delta,  Mean, Lastvalues, X_lengths, Keep_prob, is_training=True, reuse=False):
 73 |         
 74 |         with tf.variable_scope("g_enerator", reuse=reuse):
 75 |             
 76 |             """
 77 |             the rnn cell's variable scope is defined by tensorflow,
 78 |             if we want to update rnn cell's weights, the variable scope must contains 'g_' or 'd_'
 79 |             
 80 |             """
 81 |             
 82 |             wr_h=tf.get_variable("g_wr_h",shape=[self.n_inputs,self.n_hidden_units],initializer=tf.random_normal_initializer())
 83 |             w_out= tf.get_variable("g_w_out",shape=[self.n_hidden_units, self.n_inputs],initializer=tf.random_normal_initializer())
 84 |             
 85 |             br_h= tf.get_variable("g_br_h",shape=[self.n_hidden_units, ],initializer=tf.constant_initializer(0.001))
 86 |             b_out= tf.get_variable("g_b_out",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001))
 87 |             w_z=tf.get_variable("g_w_z",shape=[self.z_dim,self.n_inputs],initializer=tf.random_normal_initializer())
 88 |             b_z=tf.get_variable("g_b_z",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001))
 89 |             
 90 |             
 91 |             X = tf.reshape(X, [-1, self.n_inputs])
 92 |             Delta=tf.reshape(Delta,[-1,self.n_inputs])
 93 |             
 94 |             rth= tf.matmul(Delta, wr_h)+br_h
 95 |             rth=math_ops.exp(-tf.maximum(0.0,rth))
 96 |             
 97 |             X=tf.concat([X,rth],1)
 98 |             
 99 |             X_in = tf.reshape(X, [-1, self.n_steps, self.n_inputs+self.n_hidden_units])
100 |          
101 |             init_state = self.grud_cell_g.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
102 |             outputs, final_state = tf.nn.dynamic_rnn(self.grud_cell_g, X_in, \
103 |                                 initial_state=init_state,\
104 |                                 sequence_length=X_lengths,
105 |                                 time_major=False)
106 |             #outputs: batch_size*n_steps*n_hiddensize
107 |             outputs=tf.reshape(outputs,[-1,self.n_hidden_units])
108 |             out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out
109 |             out_predict=tf.reshape(out_predict,[-1,self.n_steps,self.n_inputs])
110 |             return out_predict
111 | 
112 | 
113 |     def discriminator(self, X, M, DeltaPre, Lastvalues ,DeltaSub ,SubValues , Mean,  X_lengths,Keep_prob, is_training=True, reuse=False, isTdata=True):
114 |         # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
115 |         # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S
116 |         with tf.variable_scope("d_iscriminator", reuse=reuse):
117 |             
118 |             wr_h=tf.get_variable("d_wr_h",shape=[self.n_inputs,self.n_hidden_units],initializer=tf.random_normal_initializer())
119 |             w_out= tf.get_variable("d_w_out",shape=[self.n_hidden_units, 1],initializer=tf.random_normal_initializer())
120 |             br_h= tf.get_variable("d_br_h",shape=[self.n_hidden_units, ],initializer=tf.constant_initializer(0.001))
121 |             b_out= tf.get_variable("d_b_out",shape=[1, ],initializer=tf.constant_initializer(0.001))
122 |           
123 |            
124 |             M=tf.reshape(M,[-1,self.n_inputs])
125 |             X = tf.reshape(X, [-1, self.n_inputs])
126 |             DeltaPre=tf.reshape(DeltaPre,[-1,self.n_inputs])
127 |            
128 |             
129 |             rth= tf.matmul(DeltaPre, wr_h)+br_h
130 |             rth=math_ops.exp(-tf.maximum(0.0,rth))
131 |             # add noise
132 |             #X=X+np.random.standard_normal(size=(self.batch_size*self.n_steps, self.n_inputs))/100 
133 |             X=tf.concat([X,rth],1)
134 |               
135 |             X_in = tf.reshape(X, [self.batch_size, self.n_steps , self.n_inputs+self.n_hidden_units])
136 |             
137 |             init_state = self.grud_cell_d.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
138 |             outputs, final_state = tf.nn.dynamic_rnn(self.grud_cell_d, X_in, \
139 |                                 initial_state=init_state,\
140 |                                 sequence_length=X_lengths,
141 |                                 time_major=False)
142 |          
143 |             # final_state:batch_size*n_hiddensize
144 |             # 不能用最后一个，应该用第length个  之前用了最后一个，所以输出无论如何都是b_out
145 |             out_logit=tf.matmul(tf.nn.dropout(final_state,Keep_prob), w_out) + b_out
146 |             out =tf.nn.sigmoid(out_logit)    #选取最后一个 output
147 |             return out,out_logit
148 | 
149 |     def generator(self, z, Keep_prob, is_training=True, reuse=False):
150 |         # x,delta,n_steps
151 |         # z :[self.batch_size, self.z_dim]
152 |         # first feed noize in rnn, then feed the previous output into next input
153 |         # or we can feed noize and previous output into next input in future version
154 |         with tf.variable_scope("g_enerator", reuse=reuse):
155 |             #gennerate 
156 |             
157 |             wr_h=tf.get_variable("g_wr_h",shape=[self.n_inputs,self.n_hidden_units],initializer=tf.random_normal_initializer())
158 |             w_out= tf.get_variable("g_w_out",shape=[self.n_hidden_units, self.n_inputs],initializer=tf.random_normal_initializer())
159 |             br_h= tf.get_variable("g_br_h",shape=[self.n_hidden_units, ],initializer=tf.constant_initializer(0.001))
160 |             b_out= tf.get_variable("g_b_out",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001))
161 |             w_z=tf.get_variable("g_w_z",shape=[self.z_dim,self.n_inputs],initializer=tf.random_normal_initializer())
162 |             b_z=tf.get_variable("g_b_z",shape=[self.n_inputs, ],initializer=tf.constant_initializer(0.001))
163 |             
164 |             #self.times=tf.reshape(self.times,[self.batch_size,self.n_steps,self.n_inputs])
165 |             #change z's dimension
166 |             # batch_size*z_dim-->batch_size*n_inputs
167 |             x=tf.matmul(z,w_z)+b_z
168 |             x=tf.reshape(x,[-1,self.n_inputs])
169 |             delta_zero=tf.constant(0.0,shape=[self.batch_size,self.n_inputs])
170 |             #delta_normal=tf.constant(48.0*60.0/self.gen_length,shape=[self.batch_size,self.n_inputs])
171 |             #delta:[batch_size,1,n_inputs]
172 |             
173 | 
174 |             # combine X_in
175 |             rth= tf.matmul(delta_zero, wr_h)+br_h
176 |             rth=math_ops.exp(-tf.maximum(0.0,rth))
177 |             x=tf.concat([x,rth],1)
178 |             
179 |             X_in = tf.reshape(x, [-1, 1, self.n_inputs+self.n_hidden_units])
180 |             
181 |             init_state = self.grud_cell_g.zero_state(self.batch_size, dtype=tf.float32) # 初始化全零 state
182 |             #z=tf.reshape(z,[self.batch_size,1,self.z_dim])
183 |             seq_len=tf.constant(1,shape=[self.batch_size])
184 |             
185 |             outputs, final_state = tf.nn.dynamic_rnn(self.grud_cell_g, X_in, \
186 |                                 initial_state=init_state,\
187 |                                 sequence_length=seq_len,
188 |                                 time_major=False)
189 |             init_state=final_state
190 |             #outputs: batch_size*1*n_hidden
191 |             outputs=tf.reshape(outputs,[-1,self.n_hidden_units])
192 |             # full connect
193 |             out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out
194 |             out_predict=tf.reshape(out_predict,[-1,1,self.n_inputs])
195 |             
196 |             total_result=tf.multiply(out_predict,1.0)
197 |             
198 |             for i in range(1,self.n_steps):
199 |                 out_predict=tf.reshape(out_predict,[self.batch_size,self.n_inputs])
200 |                 #输出加上noise z
201 |                 #out_predict=out_predict+tf.matmul(z,w_z)+b_z
202 |                 #
203 |                 delta_normal=tf.reshape(self.imputed_deltapre[:,i:(i+1),:],[self.batch_size,self.n_inputs])
204 |                 rth= tf.matmul(delta_normal, wr_h)+br_h
205 |                 rth=math_ops.exp(-tf.maximum(0.0,rth))
206 |                 x=tf.concat([out_predict,rth],1)
207 |                 X_in = tf.reshape(x, [-1, 1, self.n_inputs+self.n_hidden_units])
208 |                 
209 |                 outputs, final_state = tf.nn.dynamic_rnn(self.grud_cell_g, X_in, \
210 |                             initial_state=init_state,\
211 |                             sequence_length=seq_len,
212 |                             time_major=False)
213 |                 init_state=final_state
214 |                 outputs=tf.reshape(outputs,[-1,self.n_hidden_units])
215 |                 out_predict=tf.matmul(tf.nn.dropout(outputs,Keep_prob), w_out) + b_out
216 |                 out_predict=tf.reshape(out_predict,[-1,1,self.n_inputs])
217 |                 total_result=tf.concat([total_result,out_predict],1)
218 |             
219 |             #delta:[batch_size,,n_inputs]
220 |         
221 |             if self.isbatch_normal:
222 |                 with tf.variable_scope("g_bn", reuse=tf.AUTO_REUSE):
223 |                     total_result=bn(total_result,is_training=is_training, scope="g_bn_imple")
224 |             
225 |             
226 |             last_values=tf.multiply(total_result,1)
227 |             sub_values=tf.multiply(total_result,1)
228 | 
229 |             return total_result,self.imputed_deltapre,self.imputed_deltasub,self.imputed_m,self.x_lengths,last_values,sub_values
230 |         
231 |     def impute(self):
232 |         with tf.variable_scope("impute", reuse=tf.AUTO_REUSE):
233 |             z_need_tune=tf.get_variable("z_needtune",shape=[self.batch_size,self.z_dim],initializer=tf.random_normal_initializer(mean=0,stddev=0.1) )
234 |             return z_need_tune
235 |             
236 |     def build_model(self):
237 |         
238 |         self.keep_prob = tf.placeholder(tf.float32) 
239 |         self.x = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
240 |         self.y = tf.placeholder(tf.float32, [None, self.n_classes])
241 |         self.m = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
242 |         self.mean = tf.placeholder(tf.float32, [self.n_inputs,])
243 |         self.deltaPre = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
244 |         self.lastvalues = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
245 |         self.deltaSub = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
246 |         self.subvalues = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
247 |         self.x_lengths = tf.placeholder(tf.int32,  shape=[self.batch_size,])
248 |         self.imputed_deltapre=tf.placeholder(tf.float32,  shape=[self.batch_size,self.n_steps,self.n_inputs])
249 |         self.imputed_deltasub=tf.placeholder(tf.float32,  shape=[self.batch_size,self.n_steps,self.n_inputs])
250 |         self.imputed_m = tf.placeholder(tf.float32, [None, self.n_steps, self.n_inputs])
251 |         self.z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z')
252 | 
253 |         
254 |         
255 | 
256 |         """ Loss Function """
257 | 
258 |         Pre_out=self.pretrainG(self.x, self.m, self.deltaPre,  self.mean,\
259 |                                                       self.lastvalues, self.x_lengths,self.keep_prob, \
260 |                                                       is_training=True, reuse=False)
261 |         
262 |         self.pretrain_loss=tf.reduce_sum(tf.square(tf.multiply(Pre_out,self.m)-self.x)) / tf.cast(tf.reduce_sum(self.x_lengths),tf.float32)
263 |         
264 |         #discriminator( X, M, DeltaPre, Lastvalues ,DeltaSub ,SubValues , Mean,  X_lengths,Keep_prob, is_training=True, reuse=False, isTdata=True):
265 |         
266 |         D_real, D_real_logits = self.discriminator(self.x, self.m, self.deltaPre,self.lastvalues,\
267 |                                                    self.deltaSub,self.subvalues,  self.mean,\
268 |                                                        self.x_lengths,self.keep_prob, \
269 |                                                       is_training=True, reuse=False,isTdata=True)
270 | 
271 |         #G return total_result,self.imputed_deltapre,self.imputed_deltasub,self.imputed_m,self.x_lengths,last_values,sub_values
272 |         g_x,g_deltapre,g_deltasub,g_m,G_x_lengths,g_last_values,g_sub_values = self.generator(self.z,self.keep_prob, is_training=True, reuse=True)
273 |         
274 |         D_fake, D_fake_logits = self.discriminator(g_x,g_m,g_deltapre,g_last_values,\
275 |                                                    g_deltasub,g_sub_values,self.mean,\
276 |                                                       G_x_lengths,self.keep_prob,
277 |                                                       is_training=True, reuse=True ,isTdata=False)
278 |         
279 |         """
280 |         impute loss
281 |         """
282 |         self.z_need_tune=self.impute()
283 |         
284 |         impute_out,impute_deltapre,impute_deltasub,impute_m,impute_x_lengths,impute_last_values,impute_sub_values=self.generator(self.z_need_tune,self.keep_prob, is_training=False, reuse=True)
285 |         
286 |         
287 |         impute_fake, impute_fake_logits = self.discriminator(impute_out,impute_m,impute_deltapre,impute_last_values,\
288 |                                                              impute_deltasub,impute_sub_values,self.mean,\
289 |                                                       impute_x_lengths,self.keep_prob,
290 |                                                       is_training=False, reuse=True ,isTdata=False)
291 |         
292 |         # loss for imputation
293 |         
294 |         self.impute_loss=tf.reduce_mean(tf.square(tf.multiply(impute_out,self.m)-self.x))-self.g_loss_lambda*tf.reduce_mean(impute_fake_logits)
295 |         #self.impute_loss=tf.reduce_mean(tf.square(tf.multiply(impute_out,self.m)-self.x))
296 |         
297 |         self.impute_out=impute_out
298 |         
299 |         #the imputed results
300 |         self.imputed=tf.multiply((1-self.m),self.impute_out)+self.x
301 |         # get loss for discriminator
302 |         d_loss_real = - tf.reduce_mean(D_real_logits)
303 |         d_loss_fake = tf.reduce_mean(D_fake_logits)
304 | 
305 | 
306 |         self.d_loss = d_loss_real + d_loss_fake
307 | 
308 |         # get loss for generator
309 |         self.g_loss = - d_loss_fake
310 |         
311 | 
312 |         """ Training """
313 |         # divide trainable variables into a group for D and a group for G
314 |         t_vars = tf.trainable_variables()
315 |         d_vars = [var for var in t_vars if 'd_' in var.name]
316 |         g_vars = [var for var in t_vars if 'g_' in var.name]
317 |         z_vars = [self.z_need_tune]
318 |         '''
319 |         print("d vars:")
320 |         for v in d_vars:
321 |             print(v.name)
322 |         print("g vars:")
323 |         for v in g_vars:
324 |             print(v.name)
325 |         print("z vars:")
326 |         for v in z_vars:
327 |             print(v.name)
328 |         '''
329 |         
330 |         #don't need normalization because we have adopted the dropout
331 |         """
332 |         ld = 0.0
333 |         for w in d_vars:
334 |             ld += tf.contrib.layers.l2_regularizer(1e-4)(w)
335 |         lg = 0.0
336 |         for w in g_vars:
337 |             lg += tf.contrib.layers.l2_regularizer(1e-4)(w)
338 |         
339 |         self.d_loss+=ld
340 |         self.g_loss+=lg
341 |         """
342 |         
343 |         # optimizers
344 |         with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
345 |         # this code have used batch normalization, so the upside line should be executed
346 |             self.d_optim = tf.train.AdamOptimizer(self.learning_rate, beta1=self.beta1) \
347 |                         .minimize(self.d_loss, var_list=d_vars)
348 |             #self.d_optim=self.optim(self.learning_rate, self.beta1,self.d_loss,d_vars)
349 |             self.g_optim = tf.train.AdamOptimizer(self.learning_rate*self.disc_iters, beta1=self.beta1) \
350 |                         .minimize(self.g_loss, var_list=g_vars)
351 |             #self.g_optim=self.optim(self.learning_rate, self.beta1,self.g_loss,g_vars)
352 |             self.g_pre_optim=tf.train.AdamOptimizer(self.learning_rate*2,beta1=self.beta1) \
353 |                         .minimize(self.pretrain_loss,var_list=g_vars)
354 |         self.impute_optim=tf.train.AdamOptimizer(self.learning_rate*7,beta1=self.beta1) \
355 |                     .minimize(self.impute_loss,var_list=z_vars)
356 |     
357 |         
358 |         
359 | 
360 |         #clip weight
361 |         self.clip_all_vals = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in t_vars]
362 |         self.clip_D = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in d_vars]
363 |         self.clip_G = [p.assign(tf.clip_by_value(p, -0.99, 0.99)) for p in g_vars]
364 |         
365 |         
366 |         """" Testing """
367 |         # for test
368 |         self.fake_x,self.fake_delta,_,_,_,_,_ = self.generator(self.z, self.keep_prob, is_training=False, reuse=True)
369 | 
370 |         """ Summary """
371 |         d_loss_real_sum = tf.summary.scalar("d_loss_real", d_loss_real)
372 |         d_loss_fake_sum = tf.summary.scalar("d_loss_fake", d_loss_fake)
373 |         d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
374 |         g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)
375 |         g_pretrain_loss_sum=tf.summary.scalar("g_pretrain_loss", self.pretrain_loss)
376 |         # final summary operations
377 |         self.impute_sum=tf.summary.scalar("impute_loss", self.impute_loss)
378 |         self.g_sum = g_loss_sum
379 |         self.g_pretrain_sum=tf.summary.merge([g_pretrain_loss_sum])
380 |         self.d_sum = tf.summary.merge([d_loss_real_sum,d_loss_fake_sum, d_loss_sum])
381 |         
382 |     def optim(self,learning_rate,beta,loss,var):
383 |         optimizer = tf.train.AdamOptimizer(learning_rate, beta1=beta)
384 |         grads = optimizer.compute_gradients(loss,var_list=var)
385 |         for i, (g, v) in enumerate(grads):
386 |             if g is not None:
387 |                 grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
388 |         train_op = optimizer.apply_gradients(grads)
389 |         return train_op
390 |     def pretrain(self, start_epoch,counter,start_time):
391 |         
392 |         if start_epoch < self.pretrain_epoch:
393 |             #todo
394 |             for epoch in range(start_epoch, self.pretrain_epoch):
395 |             # get batch data
396 |                 self.datasets.shuffle(self.batch_size,True)
397 |                 idx=0
398 |                 #x,y,mean,m,deltaPre,x_lengths,lastvalues,files,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub
399 |                 for data_x,data_y,data_mean,data_m,data_deltaPre,data_x_lengths,data_lastvalues,_,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub in self.datasets.nextBatch():
400 |                     
401 |                     # pretrain
402 |                     _, summary_str, p_loss = self.sess.run([self.g_pre_optim, self.g_pretrain_sum, self.pretrain_loss],
403 |                                                    feed_dict={self.x: data_x,
404 |                                                               self.m: data_m,
405 |                                                               self.deltaPre: data_deltaPre,
406 |                                                               self.mean: data_mean,
407 |                                                               self.x_lengths: data_x_lengths,
408 |                                                               self.lastvalues: data_lastvalues,
409 |                                                               self.deltaSub:deltaSub,
410 |                                                               self.subvalues:subvalues,
411 |                                                               self.imputed_m:imputed_m,
412 |                                                               self.imputed_deltapre:imputed_deltapre,
413 |                                                               self.imputed_deltasub:imputed_deltasub,
414 |                                                               self.keep_prob: 0.5})
415 |                     self.writer.add_summary(summary_str, counter)
416 |     
417 |     
418 |                     counter += 1
419 |     
420 |                     # display training status
421 |                     print("Epoch: [%2d] [%4d/%4d] time: %4.4f, pretrain_loss: %.8f" \
422 |                           % (epoch, idx, self.num_batches, time.time() - start_time, p_loss))
423 |                     idx+=1
424 |                 # After an epoch, start_batch_id is set to zero
425 |                 # non-zero value is only for the first epoch after loading pre-trained model
426 |                 start_batch_id = 0
427 | 
428 |                 # save model
429 |                 #调好之后再保存
430 |                 #if epoch%10==0:
431 |                 #    self.save(self.checkpoint_dir, counter)
432 | 
433 | 
434 |     def train(self):
435 | 
436 |         # graph inputs for visualize training results
437 |         self.sample_z = np.random.standard_normal(size=(self.batch_size , self.z_dim))
438 | 
439 |         # saver to save model
440 |         self.saver = tf.train.Saver()
441 | 
442 |         # summary writer
443 |         self.writer = tf.summary.FileWriter(self.log_dir + '/' + self.model_name+'/'+self.model_dir)
444 | 
445 |         # restore check-point if it exits
446 |         could_load, checkpoint_counter = self.load(self.checkpoint_dir)
447 |         if could_load:
448 |             start_epoch = (int)(checkpoint_counter / self.num_batches)
449 |             #start_batch_id = checkpoint_counter - start_epoch * self.num_batches
450 |             start_batch_id=0
451 |             #counter = checkpoint_counter
452 |             counter=start_epoch*self.num_batches
453 |             print(" [*] Load SUCCESS")
454 |             return 
455 |         else:
456 |             # initialize all variables
457 |             tf.global_variables_initializer().run()
458 |             start_epoch = 0
459 |             start_batch_id = 0
460 |             counter = 1
461 |             print(" [!] Load failed...")
462 | 
463 |         # loop for epoch
464 |         start_time = time.time()
465 |         
466 |         self.pretrain(start_epoch,counter,start_time)
467 |         if start_epoch < self.pretrain_epoch:
468 |             start_epoch=self.pretrain_epoch
469 |         
470 |         for epoch in range(start_epoch, self.epoch):
471 | 
472 |             # get batch data
473 |             self.datasets.shuffle(self.batch_size,True)
474 |             idx=0
475 |             for data_x,data_y,data_mean,data_m,data_deltaPre,data_x_lengths,data_lastvalues,_,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub in self.datasets.nextBatch():
476 |                 
477 |                 batch_z = np.random.standard_normal(size=(self.batch_size, self.z_dim))
478 |                 #_ = self.sess.run(self.clip_D)
479 |                 _ = self.sess.run(self.clip_all_vals)
480 |                 _, summary_str, d_loss = self.sess.run([self.d_optim, self.d_sum, self.d_loss],
481 |                                                feed_dict={self.z: batch_z,
482 |                                                           self.x: data_x,
483 |                                                           self.m: data_m,
484 |                                                           self.deltaPre: data_deltaPre,
485 |                                                           self.mean: data_mean,
486 |                                                           self.x_lengths: data_x_lengths,
487 |                                                           self.lastvalues: data_lastvalues,
488 |                                                           self.deltaSub:deltaSub,
489 |                                                           self.subvalues:subvalues,
490 |                                                           self.imputed_m:imputed_m,
491 |                                                           self.imputed_deltapre:imputed_deltapre,
492 |                                                           self.imputed_deltasub:imputed_deltasub,
493 |                                                           self.keep_prob: 0.5})
494 |                 self.writer.add_summary(summary_str, counter)
495 | 
496 |                 # update G network
497 |                 if counter%self.disc_iters==0:
498 |                     #batch_z = np.random.normal(0, 1, [self.batch_size, self.z_dim]).astype(np.float32)
499 |                     _, summary_str, g_loss = self.sess.run([self.g_optim, self.g_sum, self.g_loss], 
500 |                                                            feed_dict={self.z: batch_z,
501 |                                                            self.keep_prob: 0.5,
502 |                                                            self.deltaPre: data_deltaPre,
503 |                                                            self.mean: data_mean,
504 |                                                            self.x_lengths: data_x_lengths,
505 |                                                            self.lastvalues: data_lastvalues,
506 |                                                            self.deltaSub:deltaSub,
507 |                                                            self.subvalues:subvalues,
508 |                                                            self.imputed_m:imputed_m,
509 |                                                            self.imputed_deltapre:imputed_deltapre,
510 |                                                            self.imputed_deltasub:imputed_deltasub,
511 |                                                            self.mean: data_mean})
512 |                     self.writer.add_summary(summary_str, counter)
513 |                     print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f,counter:%4d" \
514 |                       % (epoch, idx, self.num_batches, time.time() - start_time, d_loss, g_loss,counter))
515 |                     #debug 
516 | 
517 |                 counter += 1
518 | 
519 |                 # display training status
520 |                 print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, counter:%4d" \
521 |                       % (epoch, idx, self.num_batches, time.time() - start_time, d_loss, counter))
522 | 
523 |                 # save training results for every 300 steps
524 |                 if np.mod(counter, 300) == 0 :
525 |                     fake_x,fake_delta = self.sess.run([self.fake_x,self.fake_delta],
526 |                                             feed_dict={self.z: batch_z,
527 |                                                        self.deltaPre: data_deltaPre,
528 |                                                        self.mean: data_mean,
529 |                                                        self.x_lengths: data_x_lengths,
530 |                                                        self.lastvalues: data_lastvalues,
531 |                                                        self.deltaSub:deltaSub,
532 |                                                        self.subvalues:subvalues,
533 |                                                        self.imputed_m:imputed_m,
534 |                                                        self.imputed_deltapre:imputed_deltapre,
535 |                                                        self.imputed_deltasub:imputed_deltasub,
536 |                                                        self.mean: data_mean,
537 |                                                        self.keep_prob: 0.5})
538 |                     if self.run_type=="train":
539 |                         self.writeG_Samples("G_sample_x",counter,fake_x)
540 |                         self.writeG_Samples("G_sample_delta",counter,fake_delta)
541 |                     
542 |                 idx+=1
543 |             # After an epoch, start_batch_id is set to zero
544 |             # non-zero value is only for the first epoch after loading pre-trained model
545 |             start_batch_id = 0
546 | 
547 |         
548 |         self.save(self.checkpoint_dir, counter)
549 | 
550 |     def imputation(self,dataset,isTrain):
551 |         self.datasets=dataset
552 |         self.datasets.shuffle(self.batch_size,True)
553 |         tf.variables_initializer([self.z_need_tune]).run()
554 |         #是否shuffle无所谓，填充之后存起来，测试的时候用填充之后的数据再shuffle即可
555 |         #训练数据集不能被batch_size整除剩下的部分，扔掉
556 |         start_time = time.time()
557 |         batchid=1
558 |         impute_tune_time=1
559 |         counter=1
560 |         for data_x,data_y,data_mean,data_m,data_deltaPre,data_x_lengths,data_lastvalues,_,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub in self.datasets.nextBatch():
561 |             #self.z_need_tune=tf.assign(self.z_need_tune,tf.random_normal([self.batch_size,self.z_dim]))
562 |             tf.variables_initializer([self.z_need_tune]).run()
563 |             for i in range(0,self.impute_iter):
564 |                 _, impute_out, summary_str, impute_loss, imputed = self.sess.run([self.impute_optim, self.impute_out, self.impute_sum, self.impute_loss, self.imputed], \
565 |                                                        feed_dict={self.x: data_x,
566 |                                                                   self.m: data_m,
567 |                                                                   self.deltaPre: data_deltaPre,
568 |                                                                   self.mean: data_mean,
569 |                                                                   self.x_lengths: data_x_lengths,
570 |                                                                   self.lastvalues: data_lastvalues,
571 |                                                                   self.deltaSub:deltaSub,
572 |                                                                   self.subvalues:subvalues,
573 |                                                                   self.imputed_m:imputed_m,
574 |                                                                   self.imputed_deltapre:imputed_deltapre,
575 |                                                                   self.imputed_deltasub:imputed_deltasub,
576 |                                                                   self.keep_prob: 1.0})
577 |                 impute_tune_time+=1
578 |                 counter+=1
579 |                 if counter%10==0:
580 |                     print("Batchid: [%2d] [%4d/%4d] time: %4.4f, impute_loss: %.8f" \
581 |                           % (batchid, impute_tune_time, self.impute_iter, time.time() - start_time, impute_loss))
582 |                     self.writer.add_summary(summary_str, counter/10)
583 |             #imputed=tf.multiply((1-self.m),impute_out)+data_x
584 |             self.save_imputation(imputed,batchid,data_x_lengths,data_deltaPre,data_y,isTrain)
585 |             batchid+=1
586 |             impute_tune_time=1
587 |     @property
588 |     def model_dir(self):
589 |         return "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}".format(
590 |             self.epoch,self.disc_iters,
591 |             self.batch_size, self.z_dim,
592 |             self.lr,self.impute_iter,
593 |             self.isNormal,self.isbatch_normal,
594 |             self.isSlicing,self.g_loss_lambda,
595 |             self.beta1
596 |             )
597 | 
598 | 
599 |     def save_imputation(self,impute_out,batchid,data_x_lengths,data_times,data_y,isTrain):
600 |         #填充后的数据全是n_steps长度！，但只有data_x_lengths才是可用的
601 |         if isTrain:
602 |             imputation_dir="imputation_train_results"
603 |         else:
604 |             imputation_dir="imputation_test_results"
605 |         
606 |         if not os.path.exists(os.path.join(imputation_dir,\
607 |                                      self.model_name,\
608 |                                      self.model_dir)):
609 |             os.makedirs(os.path.join(imputation_dir,\
610 |                                      self.model_name,\
611 |                                      self.model_dir))
612 |             
613 |         #write imputed data
614 |         resultFile=open(os.path.join(imputation_dir,\
615 |                                      self.model_name,\
616 |                                      self.model_dir,\
617 |                                      "batch"+str(batchid)+"x"),'w')
618 |         for length in data_x_lengths:
619 |             resultFile.writelines(str(length)+",")
620 |         resultFile.writelines("\r\n")
621 |         # impute_out:ndarray
622 |         for oneSeries in impute_out:
623 |             resultFile.writelines("begin\r\n")
624 |             for oneClass in oneSeries:
625 |                 for i in oneClass.flat:
626 |                     resultFile.writelines(str(i)+",")
627 |                 resultFile.writelines("\r\n")
628 |             resultFile.writelines("end\r\n")
629 |         resultFile.close()
630 |         
631 |         #write data_times data_times:list
632 |         resultFile=open(os.path.join(imputation_dir,\
633 |                                      self.model_name,\
634 |                                      self.model_dir,\
635 |                                      "batch"+str(batchid)+"delta"),'w')
636 |         for oneSeries in data_times:
637 |             resultFile.writelines("begin\r\n")
638 |             for oneClass in oneSeries:
639 |                 for i in oneClass:
640 |                     resultFile.writelines(str(i)+",")
641 |                 resultFile.writelines("\r\n")
642 |             resultFile.writelines("end\r\n")
643 |         resultFile.close()
644 |         
645 |         #write y
646 |         resultFile=open(os.path.join(imputation_dir,\
647 |                                      self.model_name,\
648 |                                      self.model_dir,\
649 |                                      "batch"+str(batchid)+"y"),'w')
650 |         for oneSeries in data_y:
651 |             #resultFile.writelines("begin\r\n")
652 |             for oneClass in oneSeries:
653 |                 resultFile.writelines(str(oneClass)+",")
654 |             resultFile.writelines("\r\n")
655 |             #resultFile.writelines("end\r\n")
656 |         resultFile.close()
657 |         
658 |     def writeG_Samples(self,filename,step,o):
659 |         if not os.path.exists(os.path.join("G_results",\
660 |                                      self.model_name,\
661 |                                      self.model_dir)):
662 |             os.makedirs(os.path.join("G_results",\
663 |                                      self.model_name,\
664 |                                      self.model_dir))
665 |         resultFile=open(os.path.join("G_results",\
666 |                                      self.model_name,\
667 |                                      self.model_dir,\
668 |                                      filename+str(step)),'w')
669 |         for oneSeries in o:
670 |             resultFile.writelines("begin\r\n")
671 |             for oneClass in oneSeries:
672 |                 for i in oneClass.flat:
673 |                     resultFile.writelines(str(i)+",")
674 |                 resultFile.writelines("\r\n")
675 |             resultFile.writelines("end\r\n")
676 |         resultFile.close()
677 |     
678 |     def save(self, checkpoint_dir, step):
679 |         checkpoint_dir = os.path.join(checkpoint_dir, self.model_name, self.model_dir )
680 | 
681 |         if not os.path.exists(checkpoint_dir):
682 |             os.makedirs(checkpoint_dir)
683 | 
684 |         self.saver.save(self.sess,os.path.join(checkpoint_dir, self.model_name+'.model'), global_step=step)
685 | 
686 |     def load(self, checkpoint_dir):
687 |         import re
688 |         print(" [*] Reading checkpoints...")
689 |         checkpoint_dir = os.path.join(checkpoint_dir, self.model_name, self.model_dir)
690 | 
691 |         ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
692 |         if ckpt and ckpt.model_checkpoint_path:
693 |             ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
694 |             self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
695 |             counter = int(next(re.finditer("(\d+)(?!.*\d)",ckpt_name)).group(0))
696 |             print(" [*] Success to read {}".format(ckpt_name))
697 |             return True, counter
698 |         else:
699 |             print(" [*] Failed to find a checkpoint")
700 |             return False, 0
701 | 


--------------------------------------------------------------------------------
/Gan_Imputation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Mon Jan 29 22:55:25 2018
5 | 
6 | @author: lyh
7 | """
8 | 
9 | 


--------------------------------------------------------------------------------
/Gan_Imputation/meanAndstd:
--------------------------------------------------------------------------------
 1 | 64.3903728436,120.60029684,3594
 2 | 1.56251740462,3.43739522293,3591
 3 | 169.892672641,142.708280667,1897
 4 | 2.75904284919,6.91307995108,3594
 5 | 83.2937508973,27.2344099364,117012
 6 | 2.91052882325,0.659171679493,2099
 7 | 117.228417266,135.986377265,2780
 8 | 335.044467787,1005.07840975,2856
 9 | 444.980775952,1367.7176053,2861
10 | 2.83818308388,5.66904645246,2873
11 | 27.5626813327,23.5046438958,12546
12 | 157.424460432,46.0953302595,278
13 | 1.51289146119,1.65004641856,12613
14 | 59.1855931816,10.2333579555,131174
15 | 0.545134916259,0.182029721712,29018
16 | 11.3876139293,3.97086351802,55078
17 | 141.982086497,68.0586773989,11723
18 | 23.1059810952,4.71906558836,12272
19 | 30.6342452488,4.95864063511,16522
20 | 87.4681264593,15.3223193404,206011
21 | 4.14293850535,0.709754246202,12993
22 | 2.96027414244,2.54194169552,7259
23 | 2.03044117647,0.424558070413,12240
24 | 79.6356256514,13.2678700315,131439
25 | 1.0,0.0,28023
26 | 139.146882936,5.1793711637,12207
27 | 57.9431869522,13.404829726,88536
28 | 76.7355003034,13.2899674888,87337
29 | 118.503801724,19.3107068232,88644
30 | 40.4336084343,8.90741464284,20962
31 | 151.008548233,84.831702507,20940
32 | 7.45693484316,7.02700710139,21901
33 | 190.548457563,106.430799411,12772
34 | 19.7463509965,4.95453277258,49123
35 | 96.6198698834,3.30505772459,7378
36 | 118.551089135,18.5077541751,131251
37 | 37.0015819916,1.24412540204,78319
38 | 6.96347607053,9.62934021296,397
39 | 1.16050823155,2.57217924282,1883
40 | 120.512410101,170.560246692,123472
41 | 12.7185650609,7.8050320621,11666
42 | 


--------------------------------------------------------------------------------
/Gan_Imputation/ops.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Most codes from https://github.com/carpedm20/DCGAN-tensorflow
 3 | """
 4 | import math
 5 | import numpy as np 
 6 | import tensorflow as tf
 7 | 
 8 | from tensorflow.python.framework import ops
 9 | 
10 | from utils import *
11 | 
12 | if "concat_v2" in dir(tf):
13 |     def concat(tensors, axis, *args, **kwargs):
14 |         return tf.concat_v2(tensors, axis, *args, **kwargs)
15 | else:
16 |     def concat(tensors, axis, *args, **kwargs):
17 |         return tf.concat(tensors, axis, *args, **kwargs)
18 | 
19 | def bn(x, is_training, scope):
20 |     return tf.contrib.layers.batch_norm(x,
21 |                                         decay=0.9,
22 |                                         updates_collections=None,
23 |                                         epsilon=1e-5,
24 |                                         scale=True,
25 |                                         is_training=is_training,
26 |                                         scope=scope)
27 | 
28 | def conv_out_size_same(size, stride):
29 |     return int(math.ceil(float(size) / float(stride)))
30 | 
31 | def conv_cond_concat(x, y):
32 |     """Concatenate conditioning vector on feature map axis."""
33 |     x_shapes = x.get_shape()
34 |     y_shapes = y.get_shape()
35 |     return concat([x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3)
36 | 
37 | def conv2d(input_, output_dim, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="conv2d"):
38 |     with tf.variable_scope(name):
39 |         w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
40 |               initializer=tf.truncated_normal_initializer(stddev=stddev))
41 |         conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')
42 | 
43 |         biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
44 |         conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
45 | 
46 |         return conv
47 | 
48 | def deconv2d(input_, output_shape, k_h=5, k_w=5, d_h=2, d_w=2, name="deconv2d", stddev=0.02, with_w=False):
49 |     with tf.variable_scope(name):
50 |         # filter : [height, width, output_channels, in_channels]
51 |         w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
52 |                             initializer=tf.random_normal_initializer(stddev=stddev))
53 | 
54 |         try:
55 |             deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape, strides=[1, d_h, d_w, 1])
56 | 
57 |         # Support for verisons of TensorFlow before 0.7.0
58 |         except AttributeError:
59 |             deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape, strides=[1, d_h, d_w, 1])
60 | 
61 |         biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
62 |         deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
63 | 
64 |         if with_w:
65 |             return deconv, w, biases
66 |         else:
67 |             return deconv
68 | 
69 | def lrelu(x, leak=0.2, name="lrelu"):
70 |     return tf.maximum(x, leak*x)
71 | 
72 | def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
73 |     shape = input_.get_shape().as_list()
74 | 
75 |     with tf.variable_scope(scope or "Linear"):
76 |         matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
77 |                  tf.random_normal_initializer(stddev=stddev))
78 |         bias = tf.get_variable("bias", [output_size],
79 |         initializer=tf.constant_initializer(bias_start))
80 |         if with_w:
81 |             return tf.matmul(input_, matrix) + bias, matrix, bias
82 |         else:
83 |             return tf.matmul(input_, matrix) + bias
84 | 


--------------------------------------------------------------------------------
/Gan_Imputation/ops.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/Gan_Imputation/ops.pyc


--------------------------------------------------------------------------------
/Gan_Imputation/readMe:
--------------------------------------------------------------------------------
1 | epoch
2 | disc-iters
3 | batch-size
4 | z-dim
5 | lr
6 | impute-iter
7 | 


--------------------------------------------------------------------------------
/Gan_Imputation/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Most codes from https://github.com/carpedm20/DCGAN-tensorflow
  3 | """
  4 | from __future__ import division
  5 | import math
  6 | import random
  7 | import pprint
  8 | import scipy.misc
  9 | import numpy as np
 10 | from time import gmtime, strftime
 11 | from six.moves import xrange
 12 | import matplotlib.pyplot as plt
 13 | import os, gzip
 14 | 
 15 | import tensorflow as tf
 16 | import tensorflow.contrib.slim as slim
 17 | 
 18 | def load_mnist(dataset_name):
 19 |     data_dir = os.path.join("./data", dataset_name)
 20 | 
 21 |     def extract_data(filename, num_data, head_size, data_size):
 22 |         with gzip.open(filename) as bytestream:
 23 |             bytestream.read(head_size)
 24 |             buf = bytestream.read(data_size * num_data)
 25 |             data = np.frombuffer(buf, dtype=np.uint8).astype(np.float)
 26 |         return data
 27 | 
 28 |     data = extract_data(data_dir + '/train-images-idx3-ubyte.gz', 60000, 16, 28 * 28)
 29 |     trX = data.reshape((60000, 28, 28, 1))
 30 | 
 31 |     data = extract_data(data_dir + '/train-labels-idx1-ubyte.gz', 60000, 8, 1)
 32 |     trY = data.reshape((60000))
 33 | 
 34 |     data = extract_data(data_dir + '/t10k-images-idx3-ubyte.gz', 10000, 16, 28 * 28)
 35 |     teX = data.reshape((10000, 28, 28, 1))
 36 | 
 37 |     data = extract_data(data_dir + '/t10k-labels-idx1-ubyte.gz', 10000, 8, 1)
 38 |     teY = data.reshape((10000))
 39 | 
 40 |     trY = np.asarray(trY)
 41 |     teY = np.asarray(teY)
 42 | 
 43 |     X = np.concatenate((trX, teX), axis=0)
 44 |     y = np.concatenate((trY, teY), axis=0).astype(np.int)
 45 | 
 46 |     seed = 547
 47 |     np.random.seed(seed)
 48 |     np.random.shuffle(X)
 49 |     np.random.seed(seed)
 50 |     np.random.shuffle(y)
 51 | 
 52 |     y_vec = np.zeros((len(y), 10), dtype=np.float)
 53 |     for i, label in enumerate(y):
 54 |         y_vec[i, y[i]] = 1.0
 55 | 
 56 |     return X / 255., y_vec
 57 | 
 58 | def check_folder(log_dir):
 59 |     if not os.path.exists(log_dir):
 60 |         os.makedirs(log_dir)
 61 |     return log_dir
 62 | 
 63 | def show_all_variables():
 64 |     model_vars = tf.trainable_variables()
 65 |     slim.model_analyzer.analyze_vars(model_vars, print_info=True)
 66 | 
 67 | def get_image(image_path, input_height, input_width, resize_height=64, resize_width=64, crop=True, grayscale=False):
 68 |     image = imread(image_path, grayscale)
 69 |     return transform(image, input_height, input_width, resize_height, resize_width, crop)
 70 | 
 71 | def save_images(images, size, image_path):
 72 |     return imsave(inverse_transform(images), size, image_path)
 73 | 
 74 | def imread(path, grayscale = False):
 75 |     if (grayscale):
 76 |         return scipy.misc.imread(path, flatten = True).astype(np.float)
 77 |     else:
 78 |         return scipy.misc.imread(path).astype(np.float)
 79 | 
 80 | def merge_images(images, size):
 81 |     return inverse_transform(images)
 82 | 
 83 | def merge(images, size):
 84 |     h, w = images.shape[1], images.shape[2]
 85 |     if (images.shape[3] in (3,4)):
 86 |         c = images.shape[3]
 87 |         img = np.zeros((h * size[0], w * size[1], c))
 88 |         for idx, image in enumerate(images):
 89 |             i = idx % size[1]
 90 |             j = idx // size[1]
 91 |             img[j * h:j * h + h, i * w:i * w + w, :] = image
 92 |         return img
 93 |     elif images.shape[3]==1:
 94 |         img = np.zeros((h * size[0], w * size[1]))
 95 |         for idx, image in enumerate(images):
 96 |             i = idx % size[1]
 97 |             j = idx // size[1]
 98 |             img[j * h:j * h + h, i * w:i * w + w] = image[:,:,0]
 99 |         return img
100 |     else:
101 |         raise ValueError('in merge(images,size) images parameter ''must have dimensions: HxW or HxWx3 or HxWx4')
102 | 
103 | def imsave(images, size, path):
104 |     image = np.squeeze(merge(images, size))
105 |     return scipy.misc.imsave(path, image)
106 | 
107 | def center_crop(x, crop_h, crop_w, resize_h=64, resize_w=64):
108 |     if crop_w is None:
109 |         crop_w = crop_h
110 |     h, w = x.shape[:2]
111 |     j = int(round((h - crop_h)/2.))
112 |     i = int(round((w - crop_w)/2.))
113 |     return scipy.misc.imresize(x[j:j+crop_h, i:i+crop_w], [resize_h, resize_w])
114 | 
115 | def transform(image, input_height, input_width, resize_height=64, resize_width=64, crop=True):
116 |     if crop:
117 |         cropped_image = center_crop(image, input_height, input_width, resize_height, resize_width)
118 |     else:
119 |         cropped_image = scipy.misc.imresize(image, [resize_height, resize_width])
120 |     return np.array(cropped_image)/127.5 - 1.
121 | 
122 | def inverse_transform(images):
123 |     return (images+1.)/2.
124 | 
125 | """ Drawing Tools """
126 | # borrowed from https://github.com/ykwon0407/variational_autoencoder/blob/master/variational_bayes.ipynb
127 | def save_scattered_image(z, id, z_range_x, z_range_y, name='scattered_image.jpg'):
128 |     N = 10
129 |     plt.figure(figsize=(8, 6))
130 |     plt.scatter(z[:, 0], z[:, 1], c=np.argmax(id, 1), marker='o', edgecolor='none', cmap=discrete_cmap(N, 'jet'))
131 |     plt.colorbar(ticks=range(N))
132 |     axes = plt.gca()
133 |     axes.set_xlim([-z_range_x, z_range_x])
134 |     axes.set_ylim([-z_range_y, z_range_y])
135 |     plt.grid(True)
136 |     plt.savefig(name)
137 | 
138 | # borrowed from https://gist.github.com/jakevdp/91077b0cae40f8f8244a
139 | def discrete_cmap(N, base_cmap=None):
140 |     """Create an N-bin discrete colormap from the specified input map"""
141 | 
142 |     # Note that if base_cmap is a string or None, you can simply do
143 |     #    return plt.cm.get_cmap(base_cmap, N)
144 |     # The following works for string, None, or a colormap instance:
145 | 
146 |     base = plt.cm.get_cmap(base_cmap)
147 |     color_list = base(np.linspace(0, 1, N))
148 |     cmap_name = base.name + str(N)
149 |     return base.from_list(cmap_name, color_list, N)


--------------------------------------------------------------------------------
/Gan_Imputation/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/Gan_Imputation/utils.pyc


--------------------------------------------------------------------------------
/KDD_dataset/Beijing_AirQuality_Stations_en.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/KDD_dataset/Beijing_AirQuality_Stations_en.xlsx


--------------------------------------------------------------------------------
/KDD_dataset/holiday_bj.csv:
--------------------------------------------------------------------------------
  1 | date,holiday,lunardate
  2 | 20170101,2,20161204
  3 | 20170102,1,20161205
  4 | 20170103,0,20161206
  5 | 20170104,0,20161207
  6 | 20170105,0,20161208
  7 | 20170106,0,20161209
  8 | 20170107,1,20161210
  9 | 20170108,1,20161211
 10 | 20170109,0,20161212
 11 | 20170110,0,20161213
 12 | 20170111,0,20161214
 13 | 20170112,0,20161215
 14 | 20170113,0,20161216
 15 | 20170114,1,20161217
 16 | 20170115,1,20161218
 17 | 20170116,0,20161219
 18 | 20170117,0,20161220
 19 | 20170118,0,20161221
 20 | 20170119,0,20161222
 21 | 20170120,0,20161223
 22 | 20170121,1,20161224
 23 | 20170122,0,20161225
 24 | 20170123,0,20161226
 25 | 20170124,0,20161227
 26 | 20170125,0,20161228
 27 | 20170126,0,20161229
 28 | 20170127,2,20161230
 29 | 20170128,2,20170101
 30 | 20170129,2,20170102
 31 | 20170130,1,20170103
 32 | 20170131,1,20170104
 33 | 20170201,1,20170105
 34 | 20170202,1,20170106
 35 | 20170203,0,20170107
 36 | 20170204,0,20170108
 37 | 20170205,1,20170109
 38 | 20170206,0,20170110
 39 | 20170207,0,20170111
 40 | 20170208,0,20170112
 41 | 20170209,0,20170113
 42 | 20170210,0,20170114
 43 | 20170211,1,20170115
 44 | 20170212,1,20170116
 45 | 20170213,0,20170117
 46 | 20170214,0,20170118
 47 | 20170215,0,20170119
 48 | 20170216,0,20170120
 49 | 20170217,0,20170121
 50 | 20170218,1,20170122
 51 | 20170219,1,20170123
 52 | 20170220,0,20170124
 53 | 20170221,0,20170125
 54 | 20170222,0,20170126
 55 | 20170223,0,20170127
 56 | 20170224,0,20170128
 57 | 20170225,1,20170129
 58 | 20170226,1,20170201
 59 | 20170227,0,20170202
 60 | 20170228,0,20170203
 61 | 20170301,0,20170204
 62 | 20170302,0,20170205
 63 | 20170303,0,20170206
 64 | 20170304,1,20170207
 65 | 20170305,1,20170208
 66 | 20170306,0,20170209
 67 | 20170307,0,20170210
 68 | 20170308,0,20170211
 69 | 20170309,0,20170212
 70 | 20170310,0,20170213
 71 | 20170311,1,20170214
 72 | 20170312,1,20170215
 73 | 20170313,0,20170216
 74 | 20170314,0,20170217
 75 | 20170315,0,20170218
 76 | 20170316,0,20170219
 77 | 20170317,0,20170220
 78 | 20170318,1,20170221
 79 | 20170319,1,20170222
 80 | 20170320,0,20170223
 81 | 20170321,0,20170224
 82 | 20170322,0,20170225
 83 | 20170323,0,20170226
 84 | 20170324,0,20170227
 85 | 20170325,1,20170228
 86 | 20170326,1,20170229
 87 | 20170327,0,20170230
 88 | 20170328,0,20170301
 89 | 20170329,0,20170302
 90 | 20170330,0,20170303
 91 | 20170331,0,20170304
 92 | 20170401,0,20170305
 93 | 20170402,1,20170306
 94 | 20170403,1,20170307
 95 | 20170404,2,20170308
 96 | 20170405,0,20170309
 97 | 20170406,0,20170310
 98 | 20170407,0,20170311
 99 | 20170408,1,20170312
100 | 20170409,1,20170313
101 | 20170410,0,20170314
102 | 20170411,0,20170315
103 | 20170412,0,20170316
104 | 20170413,0,20170317
105 | 20170414,0,20170318
106 | 20170415,1,20170319
107 | 20170416,1,20170320
108 | 20170417,0,20170321
109 | 20170418,0,20170322
110 | 20170419,0,20170323
111 | 20170420,0,20170324
112 | 20170421,0,20170325
113 | 20170422,1,20170326
114 | 20170423,1,20170327
115 | 20170424,0,20170328
116 | 20170425,0,20170329
117 | 20170426,0,20170401
118 | 20170427,0,20170402
119 | 20170428,0,20170403
120 | 20170429,1,20170404
121 | 20170430,1,20170405
122 | 20170501,2,20170406
123 | 20170502,0,20170407
124 | 20170503,0,20170408
125 | 20170504,0,20170409
126 | 20170505,0,20170410
127 | 20170506,1,20170411
128 | 20170507,1,20170412
129 | 20170508,0,20170413
130 | 20170509,0,20170414
131 | 20170510,0,20170415
132 | 20170511,0,20170416
133 | 20170512,0,20170417
134 | 20170513,1,20170418
135 | 20170514,1,20170419
136 | 20170515,0,20170420
137 | 20170516,0,20170421
138 | 20170517,0,20170422
139 | 20170518,0,20170423
140 | 20170519,0,20170424
141 | 20170520,1,20170425
142 | 20170521,1,20170426
143 | 20170522,0,20170427
144 | 20170523,0,20170428
145 | 20170524,0,20170429
146 | 20170525,0,20170430
147 | 20170526,0,20170501
148 | 20170527,0,20170502
149 | 20170528,1,20170503
150 | 20170529,1,20170504
151 | 20170530,2,20170505
152 | 20170531,0,20170506
153 | 20170601,0,20170507
154 | 20170602,0,20170508
155 | 20170603,1,20170509
156 | 20170604,1,20170510
157 | 20170605,0,20170511
158 | 20170606,0,20170512
159 | 20170607,0,20170513
160 | 20170608,0,20170514
161 | 20170609,0,20170515
162 | 20170610,1,20170516
163 | 20170611,1,20170517
164 | 20170612,0,20170518
165 | 20170613,0,20170519
166 | 20170614,0,20170520
167 | 20170615,0,20170521
168 | 20170616,0,20170522
169 | 20170617,1,20170523
170 | 20170618,1,20170524
171 | 20170619,0,20170525
172 | 20170620,0,20170526
173 | 20170621,0,20170527
174 | 20170622,0,20170528
175 | 20170623,0,20170529
176 | 20170624,1,20170601
177 | 20170625,1,20170602
178 | 20170626,0,20170603
179 | 20170627,0,20170604
180 | 20170628,0,20170605
181 | 20170629,0,20170606
182 | 20170630,0,20170607
183 | 20170701,1,20170608
184 | 20170702,1,20170609
185 | 20170703,0,20170610
186 | 20170704,0,20170611
187 | 20170705,0,20170612
188 | 20170706,0,20170613
189 | 20170707,0,20170614
190 | 20170708,1,20170615
191 | 20170709,1,20170616
192 | 20170710,0,20170617
193 | 20170711,0,20170618
194 | 20170712,0,20170619
195 | 20170713,0,20170620
196 | 20170714,0,20170621
197 | 20170715,1,20170622
198 | 20170716,1,20170623
199 | 20170717,0,20170624
200 | 20170718,0,20170625
201 | 20170719,0,20170626
202 | 20170720,0,20170627
203 | 20170721,0,20170628
204 | 20170722,1,20170629
205 | 20170723,1,20170601
206 | 20170724,0,20170602
207 | 20170725,0,20170603
208 | 20170726,0,20170604
209 | 20170727,0,20170605
210 | 20170728,0,20170606
211 | 20170729,1,20170607
212 | 20170730,1,20170608
213 | 20170731,0,20170609
214 | 20170801,0,20170610
215 | 20170802,0,20170611
216 | 20170803,0,20170612
217 | 20170804,0,20170613
218 | 20170805,1,20170614
219 | 20170806,1,20170615
220 | 20170807,0,20170616
221 | 20170808,0,20170617
222 | 20170809,0,20170618
223 | 20170810,0,20170619
224 | 20170811,0,20170620
225 | 20170812,1,20170621
226 | 20170813,1,20170622
227 | 20170814,0,20170623
228 | 20170815,0,20170624
229 | 20170816,0,20170625
230 | 20170817,0,20170626
231 | 20170818,0,20170627
232 | 20170819,1,20170628
233 | 20170820,1,20170629
234 | 20170821,0,20170630
235 | 20170822,0,20170701
236 | 20170823,0,20170702
237 | 20170824,0,20170703
238 | 20170825,0,20170704
239 | 20170826,1,20170705
240 | 20170827,1,20170706
241 | 20170828,0,20170707
242 | 20170829,0,20170708
243 | 20170830,0,20170709
244 | 20170831,0,20170710
245 | 20170901,0,20170711
246 | 20170902,1,20170712
247 | 20170903,1,20170713
248 | 20170904,0,20170714
249 | 20170905,0,20170715
250 | 20170906,0,20170716
251 | 20170907,0,20170717
252 | 20170908,0,20170718
253 | 20170909,1,20170719
254 | 20170910,1,20170720
255 | 20170911,0,20170721
256 | 20170912,0,20170722
257 | 20170913,0,20170723
258 | 20170914,0,20170724
259 | 20170915,0,20170725
260 | 20170916,1,20170726
261 | 20170917,1,20170727
262 | 20170918,0,20170728
263 | 20170919,0,20170729
264 | 20170920,0,20170801
265 | 20170921,0,20170802
266 | 20170922,0,20170803
267 | 20170923,1,20170804
268 | 20170924,1,20170805
269 | 20170925,0,20170806
270 | 20170926,0,20170807
271 | 20170927,0,20170808
272 | 20170928,0,20170809
273 | 20170929,0,20170810
274 | 20170930,0,20170811
275 | 20171001,2,20170812
276 | 20171002,2,20170813
277 | 20171003,2,20170814
278 | 20171004,2,20170815
279 | 20171005,1,20170816
280 | 20171006,1,20170817
281 | 20171007,1,20170818
282 | 20171008,1,20170819
283 | 20171009,0,20170820
284 | 20171010,0,20170821
285 | 20171011,0,20170822
286 | 20171012,0,20170823
287 | 20171013,0,20170824
288 | 20171014,1,20170825
289 | 20171015,1,20170826
290 | 20171016,0,20170827
291 | 20171017,0,20170828
292 | 20171018,0,20170829
293 | 20171019,0,20170830
294 | 20171020,0,20170901
295 | 20171021,1,20170902
296 | 20171022,1,20170903
297 | 20171023,0,20170904
298 | 20171024,0,20170905
299 | 20171025,0,20170906
300 | 20171026,0,20170907
301 | 20171027,0,20170908
302 | 20171028,1,20170909
303 | 20171029,1,20170910
304 | 20171030,0,20170911
305 | 20171031,0,20170912
306 | 20171101,0,20170913
307 | 20171102,0,20170914
308 | 20171103,0,20170915
309 | 20171104,1,20170916
310 | 20171105,1,20170917
311 | 20171106,0,20170918
312 | 20171107,0,20170919
313 | 20171108,0,20170920
314 | 20171109,0,20170921
315 | 20171110,0,20170922
316 | 20171111,1,20170923
317 | 20171112,1,20170924
318 | 20171113,0,20170925
319 | 20171114,0,20170926
320 | 20171115,0,20170927
321 | 20171116,0,20170928
322 | 20171117,0,20170929
323 | 20171118,1,20171001
324 | 20171119,1,20171002
325 | 20171120,0,20171003
326 | 20171121,0,20171004
327 | 20171122,0,20171005
328 | 20171123,0,20171006
329 | 20171124,0,20171007
330 | 20171125,1,20171008
331 | 20171126,1,20171009
332 | 20171127,0,20171010
333 | 20171128,0,20171011
334 | 20171129,0,20171012
335 | 20171130,0,20171013
336 | 20171201,0,20171014
337 | 20171202,1,20171015
338 | 20171203,1,20171016
339 | 20171204,0,20171017
340 | 20171205,0,20171018
341 | 20171206,0,20171019
342 | 20171207,0,20171020
343 | 20171208,0,20171021
344 | 20171209,1,20171022
345 | 20171210,1,20171023
346 | 20171211,0,20171024
347 | 20171212,0,20171025
348 | 20171213,0,20171026
349 | 20171214,0,20171027
350 | 20171215,0,20171028
351 | 20171216,1,20171029
352 | 20171217,1,20171030
353 | 20171218,0,20171101
354 | 20171219,0,20171102
355 | 20171220,0,20171103
356 | 20171221,0,20171104
357 | 20171222,0,20171105
358 | 20171223,1,20171106
359 | 20171224,1,20171107
360 | 20171225,0,20171108
361 | 20171226,0,20171109
362 | 20171227,0,20171110
363 | 20171228,0,20171111
364 | 20171229,0,20171112
365 | 20171230,1,20171113
366 | 20171231,1,20171114
367 | 20180101,2,20171115
368 | 20180102,0,20171116
369 | 20180103,0,20171117
370 | 20180104,0,20171118
371 | 20180105,0,20171119
372 | 20180106,1,20171120
373 | 20180107,1,20171121
374 | 20180108,0,20171122
375 | 20180109,0,20171123
376 | 20180110,0,20171124
377 | 20180111,0,20171125
378 | 20180112,0,20171126
379 | 20180113,1,20171127
380 | 20180114,1,20171128
381 | 20180115,0,20171129
382 | 20180116,0,20171130
383 | 20180117,0,20171201
384 | 20180118,0,20171202
385 | 20180119,0,20171203
386 | 20180120,1,20171204
387 | 20180121,1,20171205
388 | 20180122,0,20171206
389 | 20180123,0,20171207
390 | 20180124,0,20171208
391 | 20180125,0,20171209
392 | 20180126,0,20171210
393 | 20180127,1,20171211
394 | 20180128,1,20171212
395 | 20180129,0,20171213
396 | 20180130,0,20171214
397 | 20180131,0,20171215
398 | 20180201,0,20171216
399 | 20180202,0,20171217
400 | 20180203,1,20171218
401 | 20180204,1,20171219
402 | 20180205,0,20171220
403 | 20180206,0,20171221
404 | 20180207,0,20171222
405 | 20180208,0,20171223
406 | 20180209,0,20171224
407 | 20180210,1,20171225
408 | 20180211,0,20171226
409 | 20180212,0,20171227
410 | 20180213,0,20171228
411 | 20180214,0,20171229
412 | 20180215,1,20171230
413 | 20180216,2,20180101
414 | 20180217,2,20180102
415 | 20180218,2,20180103
416 | 20180219,1,20180104
417 | 20180220,1,20180105
418 | 20180221,1,20180106
419 | 20180222,0,20180107
420 | 20180223,0,20180108
421 | 20180224,0,20180109
422 | 20180225,1,20180110
423 | 20180226,0,20180111
424 | 20180227,0,20180112
425 | 20180228,0,20180113
426 | 20180301,0,20180114
427 | 20180302,0,20180115
428 | 20180303,1,20180116
429 | 20180304,1,20180117
430 | 20180305,0,20180118
431 | 20180306,0,20180119
432 | 20180307,0,20180120
433 | 20180308,0,20180121
434 | 20180309,0,20180122
435 | 20180310,1,20180123
436 | 20180311,1,20180124
437 | 20180312,0,20180125
438 | 20180313,0,20180126
439 | 20180314,0,20180127
440 | 20180315,0,20180128
441 | 20180316,0,20180129
442 | 20180317,1,20180201
443 | 20180318,1,20180202
444 | 20180319,0,20180203
445 | 20180320,0,20180204
446 | 20180321,0,20180205
447 | 20180322,0,20180206
448 | 20180323,0,20180207
449 | 20180324,1,20180208
450 | 20180325,1,20180209
451 | 20180326,0,20180210
452 | 20180327,0,20180211
453 | 20180328,0,20180212
454 | 20180329,0,20180213
455 | 20180330,0,20180214
456 | 20180331,1,20180215
457 | 20180401,1,20180216
458 | 20180402,0,20180217
459 | 20180403,0,20180218
460 | 20180404,0,20180219
461 | 20180405,2,20180220
462 | 20180406,1,20180221
463 | 20180407,1,20180222
464 | 20180408,0,20180223
465 | 20180409,0,20180224
466 | 20180410,0,20180225
467 | 20180411,0,20180226
468 | 20180412,0,20180227
469 | 20180413,0,20180228
470 | 20180414,1,20180229
471 | 20180415,1,20180230
472 | 20180416,0,20180301
473 | 20180417,0,20180302
474 | 20180418,0,20180303
475 | 20180419,0,20180304
476 | 20180420,0,20180305
477 | 20180421,1,20180306
478 | 20180422,1,20180307
479 | 20180423,0,20180308
480 | 20180424,0,20180309
481 | 20180425,0,20180310
482 | 20180426,0,20180311
483 | 20180427,0,20180312
484 | 20180428,0,20180313
485 | 20180429,1,20180314
486 | 20180430,1,20180315
487 | 20180501,2,20180316
488 | 20180502,0,20180317
489 | 20180503,0,20180318
490 | 20180504,0,20180319
491 | 20180505,1,20180320
492 | 20180506,1,20180321
493 | 20180507,0,20180322
494 | 20180508,0,20180323
495 | 20180509,0,20180324
496 | 20180510,0,20180325
497 | 20180511,0,20180326
498 | 20180512,1,20180327
499 | 20180513,1,20180328
500 | 20180514,0,20180329
501 | 20180515,0,20180401
502 | 20180516,0,20180402
503 | 20180517,0,20180403
504 | 20180518,0,20180404
505 | 20180519,1,20180405
506 | 20180520,1,20180406
507 | 20180521,0,20180407
508 | 20180522,0,20180408
509 | 20180523,0,20180409
510 | 20180524,0,20180410
511 | 20180525,0,20180411
512 | 20180526,1,20180412
513 | 20180527,1,20180413
514 | 20180528,0,20180414
515 | 20180529,0,20180415
516 | 20180530,0,20180416
517 | 20180531,0,20180417
518 | 


--------------------------------------------------------------------------------
/KDD_dataset/holiday_ld.csv:
--------------------------------------------------------------------------------
  1 | date,holiday,lunardate
  2 | 20170101,2,20161204
  3 | 20170102,1,20161205
  4 | 20170103,0,20161206
  5 | 20170104,0,20161207
  6 | 20170105,0,20161208
  7 | 20170106,0,20161209
  8 | 20170107,1,20161210
  9 | 20170108,1,20161211
 10 | 20170109,0,20161212
 11 | 20170110,0,20161213
 12 | 20170111,0,20161214
 13 | 20170112,0,20161215
 14 | 20170113,0,20161216
 15 | 20170114,1,20161217
 16 | 20170115,1,20161218
 17 | 20170116,0,20161219
 18 | 20170117,0,20161220
 19 | 20170118,0,20161221
 20 | 20170119,0,20161222
 21 | 20170120,0,20161223
 22 | 20170121,1,20161224
 23 | 20170122,0,20161225
 24 | 20170123,0,20161226
 25 | 20170124,0,20161227
 26 | 20170125,0,20161228
 27 | 20170126,0,20161229
 28 | 20170127,0,20161230
 29 | 20170128,1,20170101
 30 | 20170129,1,20170102
 31 | 20170130,1,20170103
 32 | 20170131,1,20170104
 33 | 20170201,1,20170105
 34 | 20170202,1,20170106
 35 | 20170203,0,20170107
 36 | 20170204,0,20170108
 37 | 20170205,1,20170109
 38 | 20170206,0,20170110
 39 | 20170207,0,20170111
 40 | 20170208,0,20170112
 41 | 20170209,0,20170113
 42 | 20170210,0,20170114
 43 | 20170211,1,20170115
 44 | 20170212,1,20170116
 45 | 20170213,0,20170117
 46 | 20170214,2,20170118
 47 | 20170215,0,20170119
 48 | 20170216,0,20170120
 49 | 20170217,0,20170121
 50 | 20170218,1,20170122
 51 | 20170219,1,20170123
 52 | 20170220,0,20170124
 53 | 20170221,0,20170125
 54 | 20170222,0,20170126
 55 | 20170223,0,20170127
 56 | 20170224,0,20170128
 57 | 20170225,1,20170129
 58 | 20170226,1,20170201
 59 | 20170227,0,20170202
 60 | 20170228,0,20170203
 61 | 20170301,2,20170204
 62 | 20170302,0,20170205
 63 | 20170303,0,20170206
 64 | 20170304,1,20170207
 65 | 20170305,1,20170208
 66 | 20170306,0,20170209
 67 | 20170307,0,20170210
 68 | 20170308,0,20170211
 69 | 20170309,0,20170212
 70 | 20170310,0,20170213
 71 | 20170311,1,20170214
 72 | 20170312,1,20170215
 73 | 20170313,0,20170216
 74 | 20170314,0,20170217
 75 | 20170315,0,20170218
 76 | 20170316,0,20170219
 77 | 20170317,2,20170220
 78 | 20170318,1,20170221
 79 | 20170319,1,20170222
 80 | 20170320,0,20170223
 81 | 20170321,0,20170224
 82 | 20170322,0,20170225
 83 | 20170323,0,20170226
 84 | 20170324,0,20170227
 85 | 20170325,1,20170228
 86 | 20170326,1,20170229
 87 | 20170327,0,20170230
 88 | 20170328,0,20170301
 89 | 20170329,0,20170302
 90 | 20170330,0,20170303
 91 | 20170331,0,20170304
 92 | 20170401,0,20170305
 93 | 20170402,1,20170306
 94 | 20170403,1,20170307
 95 | 20170404,0,20170308
 96 | 20170405,0,20170309
 97 | 20170406,0,20170310
 98 | 20170407,0,20170311
 99 | 20170408,1,20170312
100 | 20170409,1,20170313
101 | 20170410,0,20170314
102 | 20170411,0,20170315
103 | 20170412,0,20170316
104 | 20170413,2,20170317
105 | 20170414,2,20170318
106 | 20170415,1,20170319
107 | 20170416,2,20170320
108 | 20170417,0,20170321
109 | 20170418,0,20170322
110 | 20170419,0,20170323
111 | 20170420,0,20170324
112 | 20170421,0,20170325
113 | 20170422,1,20170326
114 | 20170423,2,20170327
115 | 20170424,0,20170328
116 | 20170425,0,20170329
117 | 20170426,0,20170401
118 | 20170427,0,20170402
119 | 20170428,0,20170403
120 | 20170429,1,20170404
121 | 20170430,1,20170405
122 | 20170501,2,20170406
123 | 20170502,0,20170407
124 | 20170503,0,20170408
125 | 20170504,0,20170409
126 | 20170505,0,20170410
127 | 20170506,1,20170411
128 | 20170507,1,20170412
129 | 20170508,0,20170413
130 | 20170509,0,20170414
131 | 20170510,0,20170415
132 | 20170511,0,20170416
133 | 20170512,0,20170417
134 | 20170513,1,20170418
135 | 20170514,1,20170419
136 | 20170515,0,20170420
137 | 20170516,0,20170421
138 | 20170517,0,20170422
139 | 20170518,0,20170423
140 | 20170519,0,20170424
141 | 20170520,1,20170425
142 | 20170521,1,20170426
143 | 20170522,0,20170427
144 | 20170523,0,20170428
145 | 20170524,0,20170429
146 | 20170525,0,20170430
147 | 20170526,0,20170501
148 | 20170527,0,20170502
149 | 20170528,1,20170503
150 | 20170529,2,20170504
151 | 20170530,0,20170505
152 | 20170531,0,20170506
153 | 20170601,0,20170507
154 | 20170602,0,20170508
155 | 20170603,1,20170509
156 | 20170604,1,20170510
157 | 20170605,0,20170511
158 | 20170606,0,20170512
159 | 20170607,0,20170513
160 | 20170608,0,20170514
161 | 20170609,0,20170515
162 | 20170610,1,20170516
163 | 20170611,1,20170517
164 | 20170612,0,20170518
165 | 20170613,0,20170519
166 | 20170614,0,20170520
167 | 20170615,0,20170521
168 | 20170616,0,20170522
169 | 20170617,1,20170523
170 | 20170618,1,20170524
171 | 20170619,0,20170525
172 | 20170620,0,20170526
173 | 20170621,0,20170527
174 | 20170622,0,20170528
175 | 20170623,0,20170529
176 | 20170624,1,20170601
177 | 20170625,1,20170602
178 | 20170626,0,20170603
179 | 20170627,0,20170604
180 | 20170628,0,20170605
181 | 20170629,0,20170606
182 | 20170630,0,20170607
183 | 20170701,1,20170608
184 | 20170702,1,20170609
185 | 20170703,0,20170610
186 | 20170704,0,20170611
187 | 20170705,0,20170612
188 | 20170706,0,20170613
189 | 20170707,0,20170614
190 | 20170708,1,20170615
191 | 20170709,1,20170616
192 | 20170710,0,20170617
193 | 20170711,0,20170618
194 | 20170712,2,20170619
195 | 20170713,0,20170620
196 | 20170714,0,20170621
197 | 20170715,1,20170622
198 | 20170716,1,20170623
199 | 20170717,0,20170624
200 | 20170718,0,20170625
201 | 20170719,0,20170626
202 | 20170720,0,20170627
203 | 20170721,0,20170628
204 | 20170722,1,20170629
205 | 20170723,1,20170601
206 | 20170724,0,20170602
207 | 20170725,0,20170603
208 | 20170726,0,20170604
209 | 20170727,0,20170605
210 | 20170728,0,20170606
211 | 20170729,1,20170607
212 | 20170730,1,20170608
213 | 20170731,0,20170609
214 | 20170801,0,20170610
215 | 20170802,0,20170611
216 | 20170803,0,20170612
217 | 20170804,0,20170613
218 | 20170805,1,20170614
219 | 20170806,1,20170615
220 | 20170807,0,20170616
221 | 20170808,0,20170617
222 | 20170809,0,20170618
223 | 20170810,0,20170619
224 | 20170811,0,20170620
225 | 20170812,1,20170621
226 | 20170813,1,20170622
227 | 20170814,0,20170623
228 | 20170815,0,20170624
229 | 20170816,0,20170625
230 | 20170817,0,20170626
231 | 20170818,0,20170627
232 | 20170819,1,20170628
233 | 20170820,1,20170629
234 | 20170821,0,20170630
235 | 20170822,0,20170701
236 | 20170823,0,20170702
237 | 20170824,0,20170703
238 | 20170825,0,20170704
239 | 20170826,1,20170705
240 | 20170827,1,20170706
241 | 20170828,2,20170707
242 | 20170829,0,20170708
243 | 20170830,0,20170709
244 | 20170831,0,20170710
245 | 20170901,0,20170711
246 | 20170902,1,20170712
247 | 20170903,1,20170713
248 | 20170904,0,20170714
249 | 20170905,0,20170715
250 | 20170906,0,20170716
251 | 20170907,0,20170717
252 | 20170908,0,20170718
253 | 20170909,1,20170719
254 | 20170910,1,20170720
255 | 20170911,0,20170721
256 | 20170912,0,20170722
257 | 20170913,0,20170723
258 | 20170914,0,20170724
259 | 20170915,0,20170725
260 | 20170916,1,20170726
261 | 20170917,1,20170727
262 | 20170918,0,20170728
263 | 20170919,0,20170729
264 | 20170920,0,20170801
265 | 20170921,0,20170802
266 | 20170922,0,20170803
267 | 20170923,1,20170804
268 | 20170924,1,20170805
269 | 20170925,0,20170806
270 | 20170926,0,20170807
271 | 20170927,0,20170808
272 | 20170928,0,20170809
273 | 20170929,0,20170810
274 | 20170930,0,20170811
275 | 20171001,1,20170812
276 | 20171002,0,20170813
277 | 20171003,0,20170814
278 | 20171004,0,20170815
279 | 20171005,1,20170816
280 | 20171006,1,20170817
281 | 20171007,1,20170818
282 | 20171008,1,20170819
283 | 20171009,0,20170820
284 | 20171010,0,20170821
285 | 20171011,0,20170822
286 | 20171012,0,20170823
287 | 20171013,0,20170824
288 | 20171014,1,20170825
289 | 20171015,1,20170826
290 | 20171016,0,20170827
291 | 20171017,0,20170828
292 | 20171018,0,20170829
293 | 20171019,0,20170830
294 | 20171020,0,20170901
295 | 20171021,1,20170902
296 | 20171022,1,20170903
297 | 20171023,0,20170904
298 | 20171024,0,20170905
299 | 20171025,0,20170906
300 | 20171026,0,20170907
301 | 20171027,0,20170908
302 | 20171028,1,20170909
303 | 20171029,1,20170910
304 | 20171030,0,20170911
305 | 20171031,2,20170912
306 | 20171101,2,20170913
307 | 20171102,0,20170914
308 | 20171103,0,20170915
309 | 20171104,1,20170916
310 | 20171105,2,20170917
311 | 20171106,0,20170918
312 | 20171107,0,20170919
313 | 20171108,0,20170920
314 | 20171109,0,20170921
315 | 20171110,0,20170922
316 | 20171111,1,20170923
317 | 20171112,1,20170924
318 | 20171113,0,20170925
319 | 20171114,0,20170926
320 | 20171115,0,20170927
321 | 20171116,0,20170928
322 | 20171117,0,20170929
323 | 20171118,1,20171001
324 | 20171119,1,20171002
325 | 20171120,0,20171003
326 | 20171121,0,20171004
327 | 20171122,0,20171005
328 | 20171123,0,20171006
329 | 20171124,0,20171007
330 | 20171125,1,20171008
331 | 20171126,1,20171009
332 | 20171127,0,20171010
333 | 20171128,0,20171011
334 | 20171129,0,20171012
335 | 20171130,2,20171013
336 | 20171201,0,20171014
337 | 20171202,1,20171015
338 | 20171203,1,20171016
339 | 20171204,0,20171017
340 | 20171205,0,20171018
341 | 20171206,0,20171019
342 | 20171207,0,20171020
343 | 20171208,0,20171021
344 | 20171209,1,20171022
345 | 20171210,1,20171023
346 | 20171211,0,20171024
347 | 20171212,0,20171025
348 | 20171213,0,20171026
349 | 20171214,0,20171027
350 | 20171215,0,20171028
351 | 20171216,1,20171029
352 | 20171217,1,20171030
353 | 20171218,0,20171101
354 | 20171219,0,20171102
355 | 20171220,0,20171103
356 | 20171221,0,20171104
357 | 20171222,0,20171105
358 | 20171223,1,20171106
359 | 20171224,2,20171107
360 | 20171225,2,20171108
361 | 20171226,2,20171109
362 | 20171227,0,20171110
363 | 20171228,0,20171111
364 | 20171229,0,20171112
365 | 20171230,1,20171113
366 | 20171231,1,20171114
367 | 20180101,2,20171115
368 | 20180102,0,20171116
369 | 20180103,0,20171117
370 | 20180104,0,20171118
371 | 20180105,0,20171119
372 | 20180106,1,20171120
373 | 20180107,1,20171121
374 | 20180108,0,20171122
375 | 20180109,0,20171123
376 | 20180110,0,20171124
377 | 20180111,0,20171125
378 | 20180112,0,20171126
379 | 20180113,1,20171127
380 | 20180114,1,20171128
381 | 20180115,0,20171129
382 | 20180116,0,20171130
383 | 20180117,0,20171201
384 | 20180118,0,20171202
385 | 20180119,0,20171203
386 | 20180120,1,20171204
387 | 20180121,1,20171205
388 | 20180122,0,20171206
389 | 20180123,0,20171207
390 | 20180124,0,20171208
391 | 20180125,0,20171209
392 | 20180126,0,20171210
393 | 20180127,1,20171211
394 | 20180128,1,20171212
395 | 20180129,0,20171213
396 | 20180130,0,20171214
397 | 20180131,0,20171215
398 | 20180201,0,20171216
399 | 20180202,0,20171217
400 | 20180203,1,20171218
401 | 20180204,1,20171219
402 | 20180205,0,20171220
403 | 20180206,0,20171221
404 | 20180207,0,20171222
405 | 20180208,0,20171223
406 | 20180209,0,20171224
407 | 20180210,1,20171225
408 | 20180211,0,20171226
409 | 20180212,0,20171227
410 | 20180213,1,20171228
411 | 20180214,2,20171229
412 | 20180215,1,20171230
413 | 20180216,0,20180101
414 | 20180217,1,20180102
415 | 20180218,1,20180103
416 | 20180219,0,20180104
417 | 20180220,0,20180105
418 | 20180221,1,20180106
419 | 20180222,0,20180107
420 | 20180223,0,20180108
421 | 20180224,0,20180109
422 | 20180225,1,20180110
423 | 20180226,0,20180111
424 | 20180227,0,20180112
425 | 20180228,0,20180113
426 | 20180301,0,20180114
427 | 20180302,0,20180115
428 | 20180303,1,20180116
429 | 20180304,1,20180117
430 | 20180305,0,20180118
431 | 20180306,0,20180119
432 | 20180307,0,20180120
433 | 20180308,0,20180121
434 | 20180309,0,20180122
435 | 20180310,1,20180123
436 | 20180311,1,20180124
437 | 20180312,0,20180125
438 | 20180313,0,20180126
439 | 20180314,0,20180127
440 | 20180315,0,20180128
441 | 20180316,0,20180129
442 | 20180317,1,20180201
443 | 20180318,1,20180202
444 | 20180319,0,20180203
445 | 20180320,0,20180204
446 | 20180321,0,20180205
447 | 20180322,0,20180206
448 | 20180323,0,20180207
449 | 20180324,1,20180208
450 | 20180325,1,20180209
451 | 20180326,0,20180210
452 | 20180327,0,20180211
453 | 20180328,0,20180212
454 | 20180329,0,20180213
455 | 20180330,2,20180214
456 | 20180331,1,20180215
457 | 20180401,1,20180216
458 | 20180402,2,20180217
459 | 20180403,0,20180218
460 | 20180404,0,20180219
461 | 20180405,0,20180220
462 | 20180406,1,20180221
463 | 20180407,1,20180222
464 | 20180408,0,20180223
465 | 20180409,0,20180224
466 | 20180410,0,20180225
467 | 20180411,0,20180226
468 | 20180412,0,20180227
469 | 20180413,0,20180228
470 | 20180414,1,20180229
471 | 20180415,1,20180230
472 | 20180416,0,20180301
473 | 20180417,0,20180302
474 | 20180418,0,20180303
475 | 20180419,0,20180304
476 | 20180420,0,20180305
477 | 20180421,1,20180306
478 | 20180422,1,20180307
479 | 20180423,0,20180308
480 | 20180424,0,20180309
481 | 20180425,0,20180310
482 | 20180426,0,20180311
483 | 20180427,0,20180312
484 | 20180428,0,20180313
485 | 20180429,1,20180314
486 | 20180430,1,20180315
487 | 20180501,0,20180316
488 | 20180502,0,20180317
489 | 20180503,0,20180318
490 | 20180504,0,20180319
491 | 20180505,1,20180320
492 | 20180506,1,20180321
493 | 20180507,2,20180322
494 | 20180508,0,20180323
495 | 20180509,0,20180324
496 | 20180510,0,20180325
497 | 20180511,0,20180326
498 | 20180512,1,20180327
499 | 20180513,1,20180328
500 | 20180514,0,20180329
501 | 20180515,0,20180401
502 | 20180516,0,20180402
503 | 20180517,0,20180403
504 | 20180518,0,20180404
505 | 20180519,1,20180405
506 | 20180520,1,20180406
507 | 20180521,0,20180407
508 | 20180522,0,20180408
509 | 20180523,0,20180409
510 | 20180524,0,20180410
511 | 20180525,0,20180411
512 | 20180526,1,20180412
513 | 20180527,1,20180413
514 | 20180528,2,20180414
515 | 20180529,0,20180415
516 | 20180530,0,20180416
517 | 20180531,0,20180417
518 | 


--------------------------------------------------------------------------------
/KDD_dataset/station_beijing.txt:
--------------------------------------------------------------------------------
 1 | I11HAIDI3,39.97919083,116.29973602,Haidian
 2 | IHAIDIAN9,39.9978447,116.36952209,Haidian Qu
 3 | ICHAOYAN11,39.96221161,116.44536591,Chaoyang Qu
 4 | IBEIJING355,39.89273453,116.33809662,Beijing
 5 | I11BAIZH2,39.87382126,116.33994293,Baizhifang
 6 | IBEIJING184,40.03221893,116.50910187,Beijing
 7 | IXICHENG8,39.89440155,116.34385681,Xicheng Qu
 8 | I11HAIDI2,39.97648621,116.30276489,Haidian
 9 | IHAIDIAN6,39.95755386,116.34825134,Haidian Qu
10 | IHAIDIAN7,39.94497299,116.30096436,Haidian Qu
11 | ISHUNYIQ5,40.07844925,116.51275635,Shunyi Qu
12 | ICHANGPI3,40.09079742,116.33363342,Changping Qu
13 | ITONGZHO3,39.91837311,116.6907959,Tongzhou Qu
14 | ISHUNYIQ3,40.08337784,116.5056839,Shunyi Qu
15 | ICHAOYAN28,40.0286293,116.51416016,Chaoyang Qu
16 | I11DONGX2,40.03866959,116.39524841,Dongxiaokou
17 | I11HOUSH2,40.09382629,116.54441071,Houshayu
18 | ISHUNYIQ8,40.06755447,116.53660583,Shunyi Qu
19 | IBEIJING17,39.98901367,116.48651123,Beijing
20 | IBEIJING250,39.91112518,116.25349426,Beijing
21 | 


--------------------------------------------------------------------------------
/KDD_dataset/station_london.txt:
--------------------------------------------------------------------------------
 1 | ILONDON1124,51.47687912,0.068984,London
 2 | ILONDON1169,51.47953796,0.1002982,London
 3 | ILONDONM4,51.4369812,0.0511718,Mottingham
 4 | IDARTFOR12,51.42866898,0.17570126,Dartford
 5 | ICHISLEH2,51.42477798,0.065824,Chislehurst
 6 | IGLAUPMI3,51.54998016,0.25253499,Upminster
 7 | IKENTERI2,51.47380066,0.1617,Erith
 8 | IILFORD2,51.55250549,0.083651,Ilford
 9 | IBEXLEYH3,51.46260452,0.117337,Bexleyheath
10 | ILONDON234,51.46895218,0.046274,London
11 | IWELLING234,51.46497726,0.11156,Welling
12 | IBEXLEYH6,51.46427536,0.146369,Bexleyheath
13 | IGLABECO2,51.53381348,0.117337,Becontree
14 | ILONDON726,51.45943832,0.19951232,London
15 | IBEXLEY13,51.44404221,0.14159916,Bexley
16 | IBEXLEY16,51.4752388,0.17316701,Bexley
17 | IBEXLEY14,51.4461937,0.13829599,Bexley
18 | IGLAWELL2,51.45271301,0.088085,Welling
19 | IENGLAND346,51.47603989,0.14876094,Erith
20 | IHAVERIN3,51.55194473,0.19453192,Havering
21 | ILONDON522,51.56309891,0.17084134,London
22 | IRAINHAM5,51.53635025,0.200808,Rainham
23 | IRAINHAM7,51.53620529,0.20069073,Rainham
24 | IRAINHAM8,51.5499649,0.20133001,Rainham
25 | IBEXLEYH10,51.46513748,0.147806,Bexleyheath
26 | IGLACHAR5,51.47551727,0.034443,Charlton
27 | IROMFORD7,51.57570267,0.126587,Romford
28 | IKENTBEX3,51.45648193,0.15228479,Bexleyheath
29 | IKENTBEX2,51.43861008,0.16249999,Bexley
30 | IKENTBEX4,51.46320724,0.15664101,Bexleyheath
31 | IHORNCHU10,51.56059647,0.19448701,Hornchurch
32 | IROMFORD17,51.5711441,0.17890701,Romford
33 | ILONDON172,51.47330093,0.04749,London
34 | IWELLING170,51.46265411,0.11737342,Welling
35 | IGLAUPMI2,51.55229568,0.25212601,Upminster
36 | IROMFORD13,51.58579254,0.198581,Romford
37 | IBEXLEY18,51.43543625,0.130968,Bexley
38 | IROMFORD10,51.59189987,0.190137,Romford
39 | IGLAYIEW96,51.48517609,0.030198,Yiewsley
40 | IGLACRAY4,51.44298172,0.179445,Crayford
41 | ILONDON610,51.58948135,0.17981441,London
42 | IDAGENHA3,51.53533936,0.129619,Dagenham
43 | IGLACRAY3,51.44725418,0.19536699,Crayford
44 | ILONDON265,51.45254898,0.058577,London
45 | ILONDON633,51.47533035,0.03219949,London
46 | IENGLAND869,51.43728638,0.21009997,Dartford
47 | IGLARAIN2,51.53604126,0.19437499,Rainham
48 | IHORNCHU5,51.58985901,0.232631,Hornchurch
49 | ILONDON557,51.46236801,0.09156118,London
50 | ISIDCUP4,51.42630005,0.1024,Sidcup
51 | IGLABEXL3,51.46281815,0.132551,Bexleyheath
52 | ILONDON552,51.55987167,0.25750893,London
53 | IBARKING2,51.52777863,0.119927,Barking
54 | ILONDON1023,51.47974777,0.030706,London
55 | ILONDON559,51.46854782,0.10738425,London
56 | IHORNCHU7,51.54951859,0.21128133,Hornchurch
57 | ISIDCUP2,51.4402771,0.098433,Sidcup
58 | 


--------------------------------------------------------------------------------
/KDD_dataset/tmp/rate.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/KDD_dataset/tmp/rate.pkl


--------------------------------------------------------------------------------
/Physionet2012Data/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Mon Jan 29 22:55:25 2018
5 | 
6 | @author: lyh
7 | """
8 | 
9 | 


--------------------------------------------------------------------------------
/Physionet2012Data/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/Physionet2012Data/__init__.pyc


--------------------------------------------------------------------------------
/Physionet2012Data/calculateMissingRate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Fri May 18 14:53:17 2018
 5 | 
 6 | @author: lyh
 7 | """
 8 | import sys
 9 | sys.path.append("..")
10 | import numpy as np
11 | import os
12 | from Physionet2012Data import readData
13 | def f():
14 |     dt_train=readData.ReadPhysionetData(os.path.join("../set-a","train"), os.path.join("../set-a","train","list.txt"),False,True)
15 |     x=dt_train.x
16 |     
17 |     missing_rate=[[0 for col in range(41)] for row in range(48)]
18 |     count=[[0 for col in range(41)] for row in range(48)]
19 |     #3595*48
20 |     for sample in x:
21 |         for i in range(len(sample)):
22 |             for j in range(len(sample[i])):
23 |                 if sample[i][j]!=0:
24 |                     count[i][j]+=1
25 |                 
26 |     missing_rate=1-np.array(count)/3595.0
27 |     #for i in range(41):
28 |     #    missing_rate[i]=1-float(count[i])/3595.0/48.0
29 |     #missing_rate=1-np.array(missing_rate)
30 |     np.savetxt("miss.txt",delimiter=',',X=missing_rate)
31 |     
32 |     line=''
33 |     for i in range(len(missing_rate)):
34 |         for j in range(5,len(missing_rate[i])):
35 |                 temp="["+str(i)+","+str(j-5)+","+str(missing_rate[i][j])+"]"+","
36 |                 line+=temp
37 |     
38 |     with open('miss_results', 'w') as f:
39 |         f.write(line)
40 |     
41 |     print(line)
42 |     return x,count,missing_rate
43 | 
44 | 
45 | x,count,missing_rate=f()
46 | """
47 | 0
48 | 0.000834492
49 | 0.472323
50 | 0
51 | 0.0720155
52 | 0.987854
53 | 0.983924
54 | 0.983484
55 | 0.983455
56 | 0.98338
57 | 0.927434
58 | 0.998389
59 | 0.927051
60 | 0.463387
61 | 0.840398
62 | 0.683432
63 | 0.932209
64 | 0.929039
65 | 0.90474
66 | 0.0979543
67 | 0.924878
68 | 0.958977
69 | 0.929161
70 | 0.464331
71 | 0.846268
72 | 0.92941
73 | 0.578715
74 | 0.583838
75 | 0.578576
76 | 0.883159
77 | 0.883264
78 | 0.87788
79 | 0.926408
80 | 0.763485
81 | 0.9s58814
82 | 0.463329
83 | 0.626976
84 | 0.997699
85 | 0.989105
86 | 0.317901
87 | 0.93269
88 | """


--------------------------------------------------------------------------------
/Physionet2012Data/meanAndstd:
--------------------------------------------------------------------------------
 1 | 64.3894297635605,120.58415405058109,3595
 2 | 1.5623608017817372,3.4375316675269367,3592
 3 | 169.89267264101167,142.70828066684876,1897
 4 | 2.7585535465924895,6.91510599079664,3595
 5 | 83.2937508973449,27.23440993642309,117012
 6 | 2.9105288232491655,0.6591716794934158,2099
 7 | 117.22841726618705,135.98637726458898,2780
 8 | 335.04446778711485,1005.0784097463257,2856
 9 | 444.98077595246417,1367.7176053021583,2861
10 | 2.838183083884438,5.669046452456994,2873
11 | 27.56223798517574,23.503759399748333,12547
12 | 157.42446043165467,46.09533025946853,278
13 | 1.5128428729982693,1.6499900318411729,12614
14 | 59.18559318157562,10.233357955490973,131174
15 | 0.5451349162588262,0.1820297217123095,29018
16 | 11.387613929336577,3.9708635180245184,55078
17 | 141.97867621972023,68.05677527137134,11724
18 | 23.106135419212904,4.718904134834809,12273
19 | 30.63443079343937,4.95854760909093,16523
20 | 87.46812645926673,15.322319340395357,206011
21 | 4.142942896721568,0.7097271136271448,12994
22 | 2.960274142443864,2.5419416955212686,7259
23 | 2.03040601258067,0.4245585300419857,12241
24 | 79.63562565144288,13.26787003146276,131439
25 | 1.0,0.0,28023
26 | 139.1470347313237,5.179186272601405,12208
27 | 57.943186952200236,13.404829726041921,88536
28 | 76.73550030342255,13.289967488833975,87337
29 | 118.50380172374892,19.310706823154256,88644
30 | 40.433608434309704,8.907414642839258,20962
31 | 151.0085482330468,84.83170250697228,20940
32 | 7.456934843157879,7.027007101386379,21901
33 | 190.54590934001408,106.4270278624923,12773
34 | 19.746350996478228,4.954532772580042,49123
35 | 96.61986988343725,3.305057724585606,7378
36 | 118.55108913455898,18.507754175149856,131251
37 | 37.001581991599295,1.2441254020410522,78319
38 | 6.963476070528968,9.62934021295986,397
39 | 1.1605082315454016,2.572179242822091,1883
40 | 120.51241010107556,170.56024669213653,123472
41 | 12.718177766349521,7.804809613436339,11667
42 | 


--------------------------------------------------------------------------------
/Physionet2012Data/readData.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Jan 27 22:10:03 2018
  5 | 
  6 | @author: lyh
  7 | """
  8 | 
  9 | import os
 10 | import random
 11 | import math
 12 | class ReadPhysionetData():
 13 |     # first read all dataset
 14 |     # before call, determine wheher shuffle
 15 |     # produce next batch
 16 |     def __init__(self, dataPath, labelPath,isNormal,isSlicing,sliceGap=60):
 17 |         print("data path: "+labelPath)
 18 |         labelFile = open(labelPath)
 19 |         fileNames=[]
 20 |         labels=[]
 21 |         #dataset: filenames,labels
 22 |         line_num = 0 
 23 |         for line in  labelFile.readlines():
 24 |         # rstrip() remove spaces in right end
 25 |             if line_num!=0:
 26 |                 words = line.strip().split(',') 
 27 |                 if os.path.isfile(os.path.join(dataPath, words[0]+".txt")):
 28 |                     fileNames.append(words[0]+".txt" )
 29 |                     if words[-1]=="0":
 30 |                         labels.append([1,0])
 31 |                     if words[-1]=="1":
 32 |                         labels.append([0,1])
 33 |             line_num=line_num+1
 34 |         self.dataPath = dataPath
 35 |         self.fileNames = fileNames
 36 |         labelFile.close()
 37 |         dic={'time':-1,'Age':0,'Gender':1,'Height':2,'ICUType':3,'Weight':4,'Albumin':5,\
 38 |              'ALP':6,'ALT':7,'AST':8,'Bilirubin':9,'BUN':10,'Cholesterol':11,'Creatinine':12,\
 39 |              'DiasABP':13,'FiO2':14,'GCS':15,'Glucose':16,'HCO3':17,'HCT':18,'HR':19,\
 40 |              'K':20,'Lactate':21,'Mg':22,'MAP':23,'MechVent':24,'Na':25,'NIDiasABP':26,\
 41 |              'NIMAP':27,'NISysABP':28,'PaCO2':29,'PaO2':30,'pH':31,'Platelets':32,'RespRate':33,\
 42 |              'SaO2':34,'SysABP':35,'Temp':36,'TroponinI':37,'TroponinT':38,'Urine':39,'WBC':40}
 43 |     
 44 |         self.dic=dic
 45 |         mean=[0.0]*(len(dic)-1)
 46 |         meancount=[0]*(len(dic)-1)
 47 |         x=[]
 48 |         times=[]
 49 |         non_in_dic_count=0
 50 |         # times: totalFilesLength*steps
 51 |         # x: totalFilesLength*steps*feature_length
 52 |         for fileName in fileNames:
 53 |             f=open(os.path.join(self.dataPath, fileName))
 54 |             count=0
 55 |             age=gender=height=icutype=weight=-1
 56 |             lastTime=0
 57 |             totalData=[]
 58 |             t_times=[]
 59 |             for line in f.readlines():
 60 |                 if count > 1:
 61 |                     words=line.strip().split(",")
 62 |                     timestamp=words[0]
 63 |                     feature=words[1]
 64 |                     value=words[2]
 65 |                     
 66 |                     # 0 is missing value,orignl gender is 0/1 ,after preprocessing
 67 |                     # gender is 0/1/2(missing,male,female)
 68 |                     if timestamp == "00:00":
 69 |                         if feature=='Age':
 70 |                             age="0" if value=="-1" else value
 71 |                             #calcuate mean
 72 |                             if age !="0":
 73 |                                 mean[self.dic[feature]]+=float(age)
 74 |                                 meancount[self.dic[feature]]+=1
 75 |                         if feature=='Gender':
 76 |                             if value=="-1":
 77 |                                 gender="0"
 78 |                             if value=="0":
 79 |                                 gender="1"
 80 |                             if value=="1":
 81 |                                 gender="2"
 82 |                             #calcuate mean
 83 |                             if gender !="0":
 84 |                                 mean[self.dic[feature]]+=float(gender)
 85 |                                 meancount[self.dic[feature]]+=1
 86 |                         if feature=='Height':
 87 |                             height="0" if value=="-1" else value
 88 |                             #calcuate mean
 89 |                             if height !="0":
 90 |                                 mean[self.dic[feature]]+=float(height)
 91 |                                 meancount[self.dic[feature]]+=1
 92 |                         if feature == 'ICUType':
 93 |                             icutype="0" if value=="-1" else value
 94 |                             #calcuate mean
 95 |                             if icutype !="0":
 96 |                                 mean[self.dic[feature]]+=float(icutype)
 97 |                                 meancount[self.dic[feature]]+=1
 98 |                         if feature=='Weight':
 99 |                             weight="0" if value=="-1" else value
100 |                             #calcuate mean
101 |                             if weight !="0":
102 |                                 mean[self.dic[feature]]+=float(weight)
103 |                                 meancount[self.dic[feature]]+=1
104 |                     else:
105 |                         if timestamp!=lastTime:
106 |                             data=[0.0]*(len(dic)-1)
107 |                             hourandminute=timestamp.split(":")
108 |                             t_times.append(float(hourandminute[0])*60+float(hourandminute[1]))
109 |                             data[0]=float(age)
110 |                             data[1]=float(gender)
111 |                             data[2]=float(height)
112 |                             data[3]=float(icutype)
113 |                             data[4]=float(weight)
114 |                             
115 |                             data[self.dic[feature]]=float(value)
116 |                             mean[self.dic[feature]]+=float(value)
117 |                             meancount[self.dic[feature]]+=1
118 |                             
119 |                             totalData.append(data)
120 |                         else:
121 |                             
122 |                             totalData[-1][self.dic[feature]]=float(value)
123 |                             mean[self.dic[feature]]+=float(value)
124 |                             meancount[self.dic[feature]]+=1
125 |                             
126 |                             
127 |                     lastTime=timestamp      
128 |                 count+=1
129 |                 #if len(totalData)==24:
130 |                 #    break;
131 |             
132 |             x.append(totalData)
133 |             times.append(t_times)
134 |             f.close()
135 |        
136 |         self.x=x
137 |         self.y=labels
138 |         self.times=times
139 |        
140 |         self.timeslicing(isSlicing,sliceGap)
141 |         
142 |         
143 |         for i in range(len(mean)):
144 |             if meancount[i]!=0:
145 |                 mean[i]=mean[i]/meancount[i]
146 |         self.mean=mean
147 |         
148 |         
149 |         # normalization
150 |         m=[] # mask 0/1
151 |         # first calculate std
152 |         self.std=[0.0]*(len(dic)-1)
153 |         for onefile in self.x:
154 |             one_m=[]
155 |             for oneclass in onefile:
156 |                 t_m=[0]*len(oneclass)
157 |                 for j in range(len(oneclass)):
158 |                     if oneclass[j] !=0:
159 |                         self.std[j]+=(oneclass[j]-self.mean[j])**2
160 |                         t_m[j]=1
161 |                 one_m.append(t_m)
162 |             m.append(one_m)
163 |         for j in range(len(self.std)):
164 |             self.std[j]=math.sqrt(1.0/(meancount[j]-1)*self.std[j])
165 |         
166 |         self.isNormal=isNormal
167 |         self.normalization(isNormal)    
168 |             
169 |                         
170 |         x_lengths=[] #
171 |         deltaPre=[] #time difference 
172 |         lastvalues=[] # if missing, last values
173 |         deltaSub=[]
174 |         subvalues=[]
175 |         for h in range(len(self.x)):
176 |             # oneFile: steps*value_number
177 |             oneFile=self.x[h]
178 |             one_time=self.times[h]
179 |             x_lengths.append(len(oneFile))
180 |             
181 |             one_deltaPre=[]
182 |             one_lastvalues=[]
183 |             
184 |             one_deltaSub=[]
185 |             one_subvalues=[]
186 |             
187 |             one_m=m[h]
188 |             for i in range(len(oneFile)):
189 |                 t_deltaPre=[0.0]*len(oneFile[i])
190 |                 t_lastvalue=[0.0]*len(oneFile[i])
191 |                 one_deltaPre.append(t_deltaPre)
192 |                 one_lastvalues.append(t_lastvalue)
193 |                 
194 |                 if i==0:
195 |                     for j in range(len(oneFile[i])):
196 |                         one_lastvalues[i][j]=0.0 if one_m[i][j]==0 else oneFile[i][j]
197 |                     continue
198 |                 for j in range(len(oneFile[i])):
199 |                     if one_m[i-1][j]==1:
200 |                         one_deltaPre[i][j]=one_time[i]-one_time[i-1]
201 |                     if one_m[i-1][j]==0:
202 |                         one_deltaPre[i][j]=one_time[i]-one_time[i-1]+one_deltaPre[i-1][j]
203 |                         
204 |                     if one_m[i][j]==1:
205 |                         one_lastvalues[i][j]=oneFile[i][j]
206 |                     if one_m[i][j]==0:
207 |                         one_lastvalues[i][j]=one_lastvalues[i-1][j]
208 |         
209 |             for i in range(len(oneFile)):
210 |                 t_deltaSub=[0.0]*len(oneFile[i])
211 |                 t_subvalue=[0.0]*len(oneFile[i])
212 |                 one_deltaSub.append(t_deltaSub)
213 |                 one_subvalues.append(t_subvalue)
214 |             #construct array 
215 |             for i in range(len(oneFile)-1,-1,-1):    
216 |                 if i==len(oneFile)-1:
217 |                     for j in range(len(oneFile[i])):
218 |                         one_subvalues[i][j]=0.0 if one_m[i][j]==0 else oneFile[i][j]
219 |                     continue
220 |                 for j in range(len(oneFile[i])):
221 |                     if one_m[i+1][j]==1:
222 |                         one_deltaSub[i][j]=one_time[i+1]-one_time[i]
223 |                     if one_m[i+1][j]==0:
224 |                         one_deltaSub[i][j]=one_time[i+1]-one_time[i]+one_deltaSub[i+1][j]
225 |                         
226 |                     if one_m[i][j]==1:
227 |                         one_subvalues[i][j]=oneFile[i][j]
228 |                     if one_m[i][j]==0:
229 |                         one_subvalues[i][j]=one_subvalues[i+1][j]   
230 |                 
231 |             
232 |             #m.append(one_m)
233 |             deltaPre.append(one_deltaPre)
234 |             lastvalues.append(one_lastvalues)
235 |             deltaSub.append(one_deltaSub)
236 |             subvalues.append(one_subvalues)
237 |         self.m=m
238 |         self.deltaPre=deltaPre
239 |         self.lastvalues=lastvalues
240 |         self.deltaSub=deltaSub
241 |         self.subvalues=subvalues
242 |         self.x_lengths=x_lengths
243 |         self.maxLength=max(x_lengths)
244 |         
245 |         print("max_length is : "+str(self.maxLength))
246 |         print("non_in_dic_count is : "+str(non_in_dic_count))
247 |         
248 |         resultFile=open(os.path.join("./","meanAndstd"),'w')
249 |         for i in range(len(self.mean)):
250 |             resultFile.writelines(str(self.mean[i])+","+str(self.std[i])+","+str(meancount[i])+"\r\n")
251 |         resultFile.close()
252 |         
253 |     def normalization(self,isNormal):
254 |         if not isNormal:
255 |             return
256 |         for onefile in self.x:
257 |             for oneclass in onefile:
258 |                 for j in range(len(oneclass)):
259 |                     if oneclass[j] !=0:
260 |                         if self.std[j]==0:
261 |                             oneclass[j]=0.0
262 |                         else:
263 |                             oneclass[j]=1.0/self.std[j]*(oneclass[j]-self.mean[j])
264 |     
265 |     def timeslicing(self,isSlicing,sliceGap):
266 |         #slicing x, make time gap be 30min, get the average of 30min
267 |         if not isSlicing:
268 |             return
269 |         else:
270 |             newx=[]
271 |             newtimes=[]
272 |             for i in range(len(self.times)):
273 |                 nowx=self.x[i]
274 |                 nowtime=self.times[i]
275 |                 lasttime=0
276 |                 newnowx=[]
277 |                 newnowtime=[]
278 |                 count=[0.0]*(len(self.dic)-1)
279 |                 tempx=[0.0]*(len(self.dic)-1)
280 |                 #newnowx.append(tempx)
281 |                 #newnowtime.append(lasttime)
282 |                 nowtime.append(48*60+2)
283 |                 for j in range(len(nowtime)):
284 |                     if nowtime[j]<=lasttime+sliceGap:
285 |                         for k in range(0,len(self.dic)-1):
286 |                             tempx[k]+=nowx[j][k]
287 |                             if nowx[j][k]!=0:
288 |                                 count[k]+=1.0
289 |                     else:
290 |                         for k in range(0,len(self.dic)-1):
291 |                             if count[k]==0:
292 |                                 count[k]=1.0
293 |                             tempx[k]=tempx[k]/count[k]
294 |                         while nowtime[j]>lasttime+sliceGap:
295 |                             newnowx.append(tempx)
296 |                             newnowtime.append(lasttime)
297 |                             lasttime+=sliceGap
298 |                             count=[0.0]*(len(self.dic)-1)
299 |                             tempx=[0.0]*(len(self.dic)-1)
300 |                         # j may be len(nowx), we add one point into nowtime before
301 |                         if j<len(nowx):
302 |                             for k in range(0,len(self.dic)-1):
303 |                                 tempx[k]+=nowx[j][k]
304 |                                 if nowx[j][k]!=0:
305 |                                     count[k]+=1.0
306 |                 
307 |                 for j in range(len(newnowtime)):
308 |                     if newnowx[j][0]==0:
309 |                         newnowx[j][0]=nowx[0][0]
310 |                     if newnowx[j][1]==0:
311 |                         newnowx[j][1]=nowx[0][1]
312 |                     if newnowx[j][2]==0:
313 |                         newnowx[j][2]=nowx[0][2]
314 |                     if newnowx[j][3]==0:
315 |                         newnowx[j][3]=nowx[0][3]
316 |                     if newnowx[j][4]==0:
317 |                         newnowx[j][4]=nowx[0][4]
318 |                             
319 |                 nowtime.pop(-1)
320 |                 newx.append(newnowx)
321 |                 newtimes.append(newnowtime)
322 |             self.x=newx
323 |             self.times=newtimes
324 |             
325 |     
326 |     def nextBatch(self):
327 |         i=1
328 |         while i*self.batchSize<=len(self.x):
329 |             x=[]
330 |             y=[]
331 |             m=[]
332 |             deltaPre=[]
333 |             x_lengths=[]
334 |             lastvalues=[]
335 |             deltaSub=[]
336 |             subvalues=[]
337 |             imputed_deltapre=[]
338 |             imputed_m=[]
339 |             imputed_deltasub=[]
340 |             mean=self.mean
341 |             files=[]
342 |             for j in range((i-1)*self.batchSize,i*self.batchSize):
343 |                 files.append(self.fileNames[j])
344 |                 x.append(self.x[j])
345 |                 y.append(self.y[j])
346 |                 m.append(self.m[j])
347 |                 deltaPre.append(self.deltaPre[j])
348 |                 deltaSub.append(self.deltaSub[j])
349 |                 #放的都是引用，下面添加0，则原始数据也加了0
350 |                 x_lengths.append(self.x_lengths[j])
351 |                 lastvalues.append(self.lastvalues[j])
352 |                 subvalues.append(self.subvalues[j])
353 |                 jj=j-(i-1)*self.batchSize
354 |                 #times.append(self.times[j])
355 |                 while len(x[jj])<self.maxLength:
356 |                     t1=[0.0]*(len(self.dic)-1)
357 |                     x[jj].append(t1)
358 |                     #times[jj].append(0.0)
359 |                     t2=[0]*(len(self.dic)-1)
360 |                     m[jj].append(t2)
361 |                     t3=[0.0]*(len(self.dic)-1)
362 |                     deltaPre[jj].append(t3)
363 |                     t4=[0.0]*(len(self.dic)-1)
364 |                     lastvalues[jj].append(t4)
365 |                     t5=[0.0]*(len(self.dic)-1)
366 |                     deltaSub[jj].append(t5)
367 |                     t6=[0.0]*(len(self.dic)-1)
368 |                     subvalues[jj].append(t6)
369 |             for j in range((i-1)*self.batchSize,i*self.batchSize):
370 |                 one_imputed_deltapre=[]
371 |                 one_imputed_deltasub=[]
372 |                 one_G_m=[]
373 |                 for h in range(0,self.x_lengths[j]):
374 |                     if h==0:
375 |                         one_f_time=[0.0]*(len(self.dic)-1)
376 |                         one_imputed_deltapre.append(one_f_time)
377 |                         try:
378 |                             one_sub=[self.times[j][h+1]-self.times[j][h]]*(len(self.dic)-1)
379 |                         except:
380 |                             print("error: "+str(h)+" "+str(len(self.times[j]))+" "+self.fileNames[j])
381 |                         one_imputed_deltasub.append(one_sub)
382 |                         one_f_g_m=[1.0]*(len(self.dic)-1)
383 |                         one_G_m.append(one_f_g_m)
384 |                     elif h==self.x_lengths[j]-1:
385 |                         one_f_time=[self.times[j][h]-self.times[j][h-1]]*(len(self.dic)-1)
386 |                         one_imputed_deltapre.append(one_f_time)
387 |                         one_sub=[0.0]*(len(self.dic)-1)
388 |                         one_imputed_deltasub.append(one_sub)
389 |                         one_f_g_m=[1.0]*(len(self.dic)-1)
390 |                         one_G_m.append(one_f_g_m)
391 |                     else:
392 |                         one_f_time=[self.times[j][h]-self.times[j][h-1]]*(len(self.dic)-1)
393 |                         one_imputed_deltapre.append(one_f_time)
394 |                         one_sub=[self.times[j][h+1]-self.times[j][h]]*(len(self.dic)-1)
395 |                         one_imputed_deltasub.append(one_sub)
396 |                         one_f_g_m=[1.0]*(len(self.dic)-1)
397 |                         one_G_m.append(one_f_g_m)
398 |                 while len(one_imputed_deltapre)<self.maxLength:
399 |                     one_f_time=[0.0]*(len(self.dic)-1)
400 |                     one_imputed_deltapre.append(one_f_time)
401 |                     one_sub=[0.0]*(len(self.dic)-1)
402 |                     one_imputed_deltasub.append(one_sub)
403 |                     one_f_g_m=[0.0]*(len(self.dic)-1)
404 |                     one_G_m.append(one_f_g_m)
405 |                 imputed_deltapre.append(one_imputed_deltapre)
406 |                 imputed_deltasub.append(one_imputed_deltasub)
407 |                 imputed_m.append(one_G_m)
408 |                 #重新设置times,times和delta类似，但times生成的时候m全是1,用于生成器G
409 |             i+=1
410 |             if self.isNormal:
411 |                 yield  x,y,[0.0]*(len(self.dic)-1),m,deltaPre,x_lengths,lastvalues,files,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub
412 |             else:
413 |                 yield  x,y,mean,m,deltaPre,x_lengths,lastvalues,files,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub
414 |                 
415 |         
416 |     def shuffle(self,batchSize=32,isShuffle=False):
417 |         self.batchSize=batchSize
418 |         if isShuffle:
419 |             c = list(zip(self.x,self.y,self.m,self.deltaPre,self.x_lengths,self.lastvalues,self.fileNames,self.times,self.deltaSub,self.subvalues))
420 |             random.shuffle(c)
421 |             self.x,self.y,self.m,self.deltaPre,self.x_lengths,self.lastvalues,self.fileNames,self.times,self.deltaSub,self.subvalues=zip(*c)
422 | 
423 | if __name__ == '__main__':
424 |     
425 |     dt=ReadPhysionetData("/home/yonghong/ImputationAndPredictionUsingGAN/set-a/train", "/home/yonghong/ImputationAndPredictionUsingGAN/set-a/train/list.txt",isNormal=True,isSlicing=True)
426 |     dt.shuffle(128,False)
427 |     batchCount=1
428 |     X_lengths=dt.x_lengths
429 |     Time=dt.times[-144:-16]
430 |     print(sum(X_lengths)/len(X_lengths))
431 |     for x,y,mean,m,deltaPre,x_lengths,lastvalues,files,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub in dt.nextBatch():
432 |         print(batchCount)
433 |         batchCount+=1
434 |         if batchCount%100==0:
435 |             print(files)
436 | def f():
437 |     print("readData")
438 | 
439 | #dt.shuffle(2,True)
440 | #for x2,y2,mean,m2,delta2,x_lengths2,lastvalues2,files2 in dt.nextBatch():
441 | #    print(files2)
442 | 
443 | #dt.shuffle(2,True)
444 | #for x3,y3,mean,m3,delta3,x_lengths3,lastvalues3,files3 in dt.nextBatch():
445 | #    print(files3)
446 | 
447 | 


--------------------------------------------------------------------------------
/Physionet2012Data/readData.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/Physionet2012Data/readData.pyc


--------------------------------------------------------------------------------
/Physionet2012Data/readTestData.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Jan 27 22:10:03 2018
  5 | 
  6 | @author: lyh
  7 | """
  8 | 
  9 | import os
 10 | import random
 11 | class ReadPhysionetData():
 12 |     # first read all dataset
 13 |     # before call, determine wheher shuffle
 14 |     # produce next batch
 15 |     def __init__(self, dataPath, labelPath, maxLength,isNormal,isSlicing,sliceGap=60):
 16 |         print("data path: "+labelPath)
 17 |         labelFile = open(labelPath)
 18 |         fileNames=[]
 19 |         labels=[]
 20 |         #dataset: filenames,labels
 21 |         line_num = 0 
 22 |         for line in  labelFile.readlines():
 23 |         # rstrip() remove spaces in right end
 24 |             if line_num!=0:
 25 |                 words = line.strip().split(',') 
 26 |                 if os.path.isfile(os.path.join(dataPath, words[0]+".txt")):
 27 |                     fileNames.append(words[0]+".txt" )
 28 |                     if words[-1]=="0":
 29 |                         labels.append([1,0])
 30 |                     if words[-1]=="1":
 31 |                         labels.append([0,1])
 32 |             line_num=line_num+1
 33 |         self.dataPath = dataPath
 34 |         self.fileNames = fileNames
 35 |         labelFile.close()
 36 |         dic={'time':-1,'Age':0,'Gender':1,'Height':2,'ICUType':3,'Weight':4,'Albumin':5,\
 37 |              'ALP':6,'ALT':7,'AST':8,'Bilirubin':9,'BUN':10,'Cholesterol':11,'Creatinine':12,\
 38 |              'DiasABP':13,'FiO2':14,'GCS':15,'Glucose':16,'HCO3':17,'HCT':18,'HR':19,\
 39 |              'K':20,'Lactate':21,'Mg':22,'MAP':23,'MechVent':24,'Na':25,'NIDiasABP':26,\
 40 |              'NIMAP':27,'NISysABP':28,'PaCO2':29,'PaO2':30,'pH':31,'Platelets':32,'RespRate':33,\
 41 |              'SaO2':34,'SysABP':35,'Temp':36,'TroponinI':37,'TroponinT':38,'Urine':39,'WBC':40}
 42 |     
 43 |         self.dic=dic
 44 |         mean=[0.0]*(len(dic)-1)
 45 |         meancount=[0]*(len(dic)-1)
 46 |         self.std=[0.0]*(len(dic)-1)
 47 |         self.mean=[0.0]*(len(dic)-1)
 48 |         x=[]
 49 |         times=[]
 50 |         non_in_dic_count=0
 51 |         # times: totalFilesLength*steps
 52 |         # x: totalFilesLength*steps*feature_length
 53 |         for fileName in fileNames:
 54 |             f=open(os.path.join(self.dataPath, fileName))
 55 |             count=0
 56 |             age=gender=height=icutype=weight=-1
 57 |             lastTime=0
 58 |             totalData=[]
 59 |             t_times=[]
 60 |             for line in f.readlines():
 61 |                 if count > 1:
 62 |                     words=line.strip().split(",")
 63 |                     timestamp=words[0]
 64 |                     feature=words[1]
 65 |                     value=words[2]
 66 |                     
 67 |                     # 0 is missing value,orignl gender is 0/1 ,after preprocessing
 68 |                     # gender is 0/1/2(missing,male,female)
 69 |                     if timestamp == "00:00":
 70 |                         if feature=='Age':
 71 |                             age="0" if value=="-1" else value
 72 |                             #calcuate mean
 73 |                             if age !="0":
 74 |                                 mean[self.dic[feature]]+=float(age)
 75 |                                 meancount[self.dic[feature]]+=1
 76 |                         if feature=='Gender':
 77 |                             if value=="-1":
 78 |                                 gender="0"
 79 |                             if value=="0":
 80 |                                 gender="1"
 81 |                             if value=="1":
 82 |                                 gender="2"
 83 |                             #calcuate mean
 84 |                             if gender !="0":
 85 |                                 mean[self.dic[feature]]+=float(gender)
 86 |                                 meancount[self.dic[feature]]+=1
 87 |                         if feature=='Height':
 88 |                             height="0" if value=="-1" else value
 89 |                             #calcuate mean
 90 |                             if height !="0":
 91 |                                 mean[self.dic[feature]]+=float(height)
 92 |                                 meancount[self.dic[feature]]+=1
 93 |                         if feature == 'ICUType':
 94 |                             icutype="0" if value=="-1" else value
 95 |                             #calcuate mean
 96 |                             if icutype !="0":
 97 |                                 mean[self.dic[feature]]+=float(icutype)
 98 |                                 meancount[self.dic[feature]]+=1
 99 |                         if feature=='Weight':
100 |                             weight="0" if value=="-1" else value
101 |                             #calcuate mean
102 |                             if weight !="0":
103 |                                 mean[self.dic[feature]]+=float(weight)
104 |                                 meancount[self.dic[feature]]+=1
105 |                     else:
106 |                         if timestamp!=lastTime:
107 |                             data=[0.0]*(len(dic)-1)
108 |                             hourandminute=timestamp.split(":")
109 |                             t_times.append(float(hourandminute[0])*60+float(hourandminute[1]))
110 |                             data[0]=float(age)
111 |                             data[1]=float(gender)
112 |                             data[2]=float(height)
113 |                             data[3]=float(icutype)
114 |                             data[4]=float(weight)
115 |                             
116 |                             data[self.dic[feature]]=float(value)
117 |                             mean[self.dic[feature]]+=float(value)
118 |                             meancount[self.dic[feature]]+=1
119 |                             
120 |                             totalData.append(data)
121 |                         else:
122 |                             
123 |                             totalData[-1][self.dic[feature]]=float(value)
124 |                             mean[self.dic[feature]]+=float(value)
125 |                             meancount[self.dic[feature]]+=1
126 |                             
127 |                             
128 |                     lastTime=timestamp      
129 |                 count+=1
130 |                 #if len(totalData)==24:
131 |                 #    break;
132 |             x.append(totalData)
133 |             times.append(t_times)
134 |             f.close()
135 |         #print(len(x))
136 |         #for i in range(len(x)):
137 |             #print(fileNames[i]+"'steps is :"+ str(len(x[i]))+" lable is : "+str(labels[i]))
138 |         #print( x[0][0])
139 |         self.x=x
140 |         self.y=labels
141 |         self.times=times
142 |         
143 |         self.timeslicing(isSlicing,sliceGap)
144 |         
145 |         
146 |         
147 |         meanFile=open(os.path.join("./", "meanAndstd"))
148 |         linecount=0
149 |         for line in meanFile.readlines():
150 |             words=line.split(",")
151 |             mean[linecount]=float(words[0])
152 |             self.mean[linecount]=float(words[0])
153 |             self.std[linecount]=float(words[1])
154 |             meancount[linecount]=float(words[2])
155 |             linecount+=1
156 |         meanFile.close()
157 |         # normalization
158 |         m=[] # mask 0/1
159 |         
160 |         for onefile in self.x:
161 |             one_m=[]
162 |             for oneclass in onefile:
163 |                 t_m=[0]*len(oneclass)
164 |                 for j in range(len(oneclass)):
165 |                     if oneclass[j] !=0:
166 |                         t_m[j]=1
167 |                 one_m.append(t_m)
168 |             m.append(one_m)
169 |             
170 |         #second update x
171 |         self.isNormal=isNormal
172 |         self.normalization(isNormal) 
173 |                         
174 |         x_lengths=[] #
175 |         deltaPre=[] #time difference 
176 |         lastvalues=[] # if missing, last values
177 |         deltaSub=[]
178 |         subvalues=[]
179 |         for h in range(len(self.x)):
180 |             # oneFile: steps*value_number
181 |             oneFile=self.x[h]
182 |             one_time=self.times[h]
183 |             x_lengths.append(len(oneFile))
184 |             
185 |             one_deltaPre=[]
186 |             one_lastvalues=[]
187 |             
188 |             one_deltaSub=[]
189 |             one_subvalues=[]
190 |             
191 |             one_m=m[h]
192 |             for i in range(len(oneFile)):
193 |                 t_deltaPre=[0.0]*len(oneFile[i])
194 |                 t_lastvalue=[0.0]*len(oneFile[i])
195 |                 one_deltaPre.append(t_deltaPre)
196 |                 one_lastvalues.append(t_lastvalue)
197 |                 
198 |                 if i==0:
199 |                     for j in range(len(oneFile[i])):
200 |                         one_lastvalues[i][j]=0.0 if one_m[i][j]==0 else oneFile[i][j]
201 |                     continue
202 |                 for j in range(len(oneFile[i])):
203 |                     if one_m[i-1][j]==1:
204 |                         one_deltaPre[i][j]=one_time[i]-one_time[i-1]
205 |                     if one_m[i-1][j]==0:
206 |                         one_deltaPre[i][j]=one_time[i]-one_time[i-1]+one_deltaPre[i-1][j]
207 |                         
208 |                     if one_m[i][j]==1:
209 |                         one_lastvalues[i][j]=oneFile[i][j]
210 |                     if one_m[i][j]==0:
211 |                         one_lastvalues[i][j]=one_lastvalues[i-1][j]
212 |         
213 |             for i in range(len(oneFile)):
214 |                 t_deltaSub=[0.0]*len(oneFile[i])
215 |                 t_subvalue=[0.0]*len(oneFile[i])
216 |                 one_deltaSub.append(t_deltaSub)
217 |                 one_subvalues.append(t_subvalue)
218 |             #construct array 
219 |             for i in range(len(oneFile)-1,-1,-1):    
220 |                 if i==len(oneFile)-1:
221 |                     for j in range(len(oneFile[i])):
222 |                         one_subvalues[i][j]=0.0 if one_m[i][j]==0 else oneFile[i][j]
223 |                     continue
224 |                 for j in range(len(oneFile[i])):
225 |                     if one_m[i+1][j]==1:
226 |                         one_deltaSub[i][j]=one_time[i+1]-one_time[i]
227 |                     if one_m[i+1][j]==0:
228 |                         one_deltaSub[i][j]=one_time[i+1]-one_time[i]+one_deltaSub[i+1][j]
229 |                         
230 |                     if one_m[i][j]==1:
231 |                         one_subvalues[i][j]=oneFile[i][j]
232 |                     if one_m[i][j]==0:
233 |                         one_subvalues[i][j]=one_subvalues[i+1][j]   
234 |                 
235 |             
236 |             #m.append(one_m)
237 |             deltaPre.append(one_deltaPre)
238 |             lastvalues.append(one_lastvalues)
239 |             deltaSub.append(one_deltaSub)
240 |             subvalues.append(one_subvalues)
241 |         self.m=m
242 |         self.deltaPre=deltaPre
243 |         self.lastvalues=lastvalues
244 |         self.deltaSub=deltaSub
245 |         self.subvalues=subvalues
246 |         self.x_lengths=x_lengths
247 |         self.maxLength=max(x_lengths)
248 |         print("max_length is : "+str(self.maxLength))
249 | 
250 |     def normalization(self,isNormal):
251 |         if not isNormal:
252 |             return
253 |         for onefile in self.x:
254 |             for oneclass in onefile:
255 |                 for j in range(len(oneclass)):
256 |                     if oneclass[j] !=0:
257 |                         if self.std[j]==0:
258 |                             oneclass[j]=0.0
259 |                         else:
260 |                             oneclass[j]=1.0/self.std[j]*(oneclass[j]-self.mean[j])
261 |     def timeslicing(self,isSlicing,sliceGap):
262 |         #slicing x, make time gap be 30min, get the average of 30min
263 |         if not isSlicing:
264 |             return
265 |         else:
266 |             newx=[]
267 |             newtimes=[]
268 |             for i in range(len(self.times)):
269 |                 nowx=self.x[i]
270 |                 nowtime=self.times[i]
271 |                 lasttime=0
272 |                 newnowx=[]
273 |                 newnowtime=[]
274 |                 count=[0.0]*(len(self.dic)-1)
275 |                 tempx=[0.0]*(len(self.dic)-1)
276 |                 #newnowx.append(tempx)
277 |                 #newnowtime.append(lasttime)
278 |                 nowtime.append(48*60+2)
279 |                 for j in range(len(nowtime)):
280 |                     if nowtime[j]<=lasttime+sliceGap:
281 |                         for k in range(0,len(self.dic)-1):
282 |                             tempx[k]+=nowx[j][k]
283 |                             if nowx[j][k]!=0:
284 |                                 count[k]+=1.0
285 |                     else:
286 |                         for k in range(0,len(self.dic)-1):
287 |                             if count[k]==0:
288 |                                 count[k]=1.0
289 |                             tempx[k]=tempx[k]/count[k]
290 |                         while nowtime[j]>lasttime+sliceGap:
291 |                             newnowx.append(tempx)
292 |                             newnowtime.append(lasttime)
293 |                             lasttime+=sliceGap
294 |                             count=[0.0]*(len(self.dic)-1)
295 |                             tempx=[0.0]*(len(self.dic)-1)
296 |                         # j may be len(nowx), we add one point into nowtime before
297 |                         if j<len(nowx):
298 |                             for k in range(0,len(self.dic)-1):
299 |                                 tempx[k]+=nowx[j][k]
300 |                                 if nowx[j][k]!=0:
301 |                                     count[k]+=1.0
302 |                 
303 |                 for j in range(len(newnowtime)):
304 |                     if newnowx[j][0]==0:
305 |                         newnowx[j][0]=nowx[0][0]
306 |                     if newnowx[j][1]==0:
307 |                         newnowx[j][1]=nowx[0][1]
308 |                     if newnowx[j][2]==0:
309 |                         newnowx[j][2]=nowx[0][2]
310 |                     if newnowx[j][3]==0:
311 |                         newnowx[j][3]=nowx[0][3]
312 |                     if newnowx[j][4]==0:
313 |                         newnowx[j][4]=nowx[0][4]
314 |                             
315 |                 nowtime.pop(-1)
316 |                 newx.append(newnowx)
317 |                 newtimes.append(newnowtime)
318 |             self.x=newx
319 |             self.times=newtimes
320 |             
321 |     
322 |     
323 |     def nextBatch(self):
324 |         i=1
325 |         while i*self.batchSize<=len(self.x):
326 |             x=[]
327 |             y=[]
328 |             m=[]
329 |             deltaPre=[]
330 |             x_lengths=[]
331 |             lastvalues=[]
332 |             deltaSub=[]
333 |             subvalues=[]
334 |             imputed_deltapre=[]
335 |             imputed_m=[]
336 |             imputed_deltasub=[]
337 |             mean=self.mean
338 |             files=[]
339 |             for j in range((i-1)*self.batchSize,i*self.batchSize):
340 |                 files.append(self.fileNames[j])
341 |                 x.append(self.x[j])
342 |                 y.append(self.y[j])
343 |                 m.append(self.m[j])
344 |                 deltaPre.append(self.deltaPre[j])
345 |                 deltaSub.append(self.deltaSub[j])
346 |                 #放的都是引用，下面添加0，则原始数据也加了0
347 |                 x_lengths.append(self.x_lengths[j])
348 |                 lastvalues.append(self.lastvalues[j])
349 |                 subvalues.append(self.subvalues[j])
350 |                 jj=j-(i-1)*self.batchSize
351 |                 #times.append(self.times[j])
352 |                 while len(x[jj])<self.maxLength:
353 |                     t1=[0.0]*(len(self.dic)-1)
354 |                     x[jj].append(t1)
355 |                     #times[jj].append(0.0)
356 |                     t2=[0]*(len(self.dic)-1)
357 |                     m[jj].append(t2)
358 |                     t3=[0.0]*(len(self.dic)-1)
359 |                     deltaPre[jj].append(t3)
360 |                     t4=[0.0]*(len(self.dic)-1)
361 |                     lastvalues[jj].append(t4)
362 |                     t5=[0.0]*(len(self.dic)-1)
363 |                     deltaSub[jj].append(t5)
364 |                     t6=[0.0]*(len(self.dic)-1)
365 |                     subvalues[jj].append(t6)
366 |             for j in range((i-1)*self.batchSize,i*self.batchSize):
367 |                 one_imputed_deltapre=[]
368 |                 one_imputed_deltasub=[]
369 |                 one_G_m=[]
370 |                 for h in range(0,self.x_lengths[j]):
371 |                     if h==0:
372 |                         one_f_time=[0.0]*(len(self.dic)-1)
373 |                         one_imputed_deltapre.append(one_f_time)
374 |                         try:
375 |                             one_sub=[self.times[j][h+1]-self.times[j][h]]*(len(self.dic)-1)
376 |                         except:
377 |                             print("error: "+str(h)+" "+str(len(self.times[j]))+" "+self.fileNames[j])
378 |                         one_imputed_deltasub.append(one_sub)
379 |                         one_f_g_m=[1.0]*(len(self.dic)-1)
380 |                         one_G_m.append(one_f_g_m)
381 |                     elif h==self.x_lengths[j]-1:
382 |                         one_f_time=[self.times[j][h]-self.times[j][h-1]]*(len(self.dic)-1)
383 |                         one_imputed_deltapre.append(one_f_time)
384 |                         one_sub=[0.0]*(len(self.dic)-1)
385 |                         one_imputed_deltasub.append(one_sub)
386 |                         one_f_g_m=[1.0]*(len(self.dic)-1)
387 |                         one_G_m.append(one_f_g_m)
388 |                     else:
389 |                         one_f_time=[self.times[j][h]-self.times[j][h-1]]*(len(self.dic)-1)
390 |                         one_imputed_deltapre.append(one_f_time)
391 |                         one_sub=[self.times[j][h+1]-self.times[j][h]]*(len(self.dic)-1)
392 |                         one_imputed_deltasub.append(one_sub)
393 |                         one_f_g_m=[1.0]*(len(self.dic)-1)
394 |                         one_G_m.append(one_f_g_m)
395 |                 while len(one_imputed_deltapre)<self.maxLength:
396 |                     one_f_time=[0.0]*(len(self.dic)-1)
397 |                     one_imputed_deltapre.append(one_f_time)
398 |                     one_sub=[0.0]*(len(self.dic)-1)
399 |                     one_imputed_deltasub.append(one_sub)
400 |                     one_f_g_m=[0.0]*(len(self.dic)-1)
401 |                     one_G_m.append(one_f_g_m)
402 |                 imputed_deltapre.append(one_imputed_deltapre)
403 |                 imputed_deltasub.append(one_imputed_deltasub)
404 |                 imputed_m.append(one_G_m)
405 |                 #重新设置times,times和delta类似，但times生成的时候m全是1,用于生成器G
406 |             i+=1
407 |             if self.isNormal:
408 |                 yield  x,y,[0.0]*(len(self.dic)-1),m,deltaPre,x_lengths,lastvalues,files,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub
409 |             else:
410 |                 yield  x,y,mean,m,deltaPre,x_lengths,lastvalues,files,imputed_deltapre,imputed_m,deltaSub,subvalues,imputed_deltasub
411 |             
412 |     def shuffle(self,batchSize=32,isShuffle=False):
413 |         self.batchSize=batchSize
414 |         if isShuffle:
415 |             c = list(zip(self.x,self.y,self.m,self.deltaPre,self.x_lengths,self.lastvalues,self.fileNames,self.times,self.deltaSub,self.subvalues))
416 |             random.shuffle(c)
417 |             self.x,self.y,self.m,self.deltaPre,self.x_lengths,self.lastvalues,self.fileNames,self.times,self.deltaSub,self.subvalues=zip(*c)
418 | 
419 | if __name__ == '__main__'     :
420 |     
421 |     dt=ReadPhysionetData("/home/yonghong/ImputationAndPredictionUsingGAN/set-a/test", \
422 |                          "/home/yonghong/ImputationAndPredictionUsingGAN/set-a/test/list.txt",48,\
423 |                          isNormal=False,isSlicing=True)
424 |     dt.shuffle(128,False)
425 |     batchCount=1
426 |     X_lengths=dt.x_lengths
427 |     print(sum(X_lengths)/len(X_lengths))
428 |     for x,y,mean,m,delta,x_lengths,lastvalues,files,times,data_G_m in dt.nextBatch():
429 |         print(batchCount)
430 |         batchCount+=1
431 |         if batchCount%100==0:
432 |             print(files)
433 | def f():
434 |     print("readData")
435 | 
436 | #dt.shuffle(2,True)
437 | #for x2,y2,mean,m2,delta2,x_lengths2,lastvalues2,files2 in dt.nextBatch():
438 | #    print(files2)
439 | 
440 | #dt.shuffle(2,True)
441 | #for x3,y3,mean,m3,delta3,x_lengths3,lastvalues3,files3 in dt.nextBatch():
442 | #    print(files3)
443 | 
444 | 


--------------------------------------------------------------------------------
/Physionet2012Data/readTestData.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/Physionet2012Data/readTestData.pyc


--------------------------------------------------------------------------------
/Physionet2012ImputedData/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Mon Jan 29 22:55:25 2018
5 | 
6 | @author: lyh
7 | """
8 | 
9 | 


--------------------------------------------------------------------------------
/Physionet2012ImputedData/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/Physionet2012ImputedData/__init__.pyc


--------------------------------------------------------------------------------
/Physionet2012ImputedData/readImputed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Feb 14 10:50:14 2018
  5 | 
  6 | @author: luoyonghong
  7 | """
  8 | import os
  9 | import random
 10 | class ReadImputedPhysionetData:
 11 |     def __init__(self, dataPath ):
 12 |         #一个文件一个batch，但需要注意，x,y,delta之间的匹配
 13 |         #例子： batch1y,batch1x,batch1delta
 14 |         #batchid从1开始
 15 |         self.files = os.listdir(dataPath)
 16 |         self.dataPath=dataPath
 17 |         self.count=int(len(self.files)/3)
 18 |         
 19 |     def load(self):
 20 |         count=int(self.count)
 21 |         self.x=[]
 22 |         self.y=[]
 23 |         self.delta=[]
 24 |         self.x_lengths=[]
 25 |         self.m=[]
 26 |         for i in range(1,count+1):
 27 |             file_x=open(os.path.join(self.dataPath,"batch"+str(i)+"x"))
 28 |             file_y=open(os.path.join(self.dataPath,"batch"+str(i)+"y"))
 29 |             file_delta=open(os.path.join(self.dataPath,"batch"+str(i)+"delta"))
 30 |             this_x,this_lengths=self.readx(file_x)
 31 |             self.x.extend(this_x)
 32 |             self.x_lengths.extend(this_lengths)
 33 |             self.y.extend(self.ready(file_y))
 34 |             this_delta,this_m=self.readdelta(file_delta)
 35 |             self.delta.extend(this_delta)
 36 |             self.m.extend(this_m)
 37 |             file_x.close()
 38 |             file_y.close()
 39 |             file_delta.close()
 40 |         self.maxLength=len(self.x[0])
 41 |         
 42 |         
 43 |     def readx(self,x):
 44 |         this_x=[]
 45 |         this_lengths=[]
 46 |         count=1
 47 |         for line in x.readlines():
 48 |             if count==1:
 49 |                 words=line.strip().split(",")
 50 |                 for w in words:
 51 |                     if w=='':
 52 |                         continue
 53 |                     this_lengths.append(int(w))
 54 |             else:
 55 |                 if "end" in line:
 56 |                     continue
 57 |                 if "begin" in line:
 58 |                     d=[]
 59 |                     this_x.append(d)
 60 |                 else:
 61 |                     words=line.strip().split(",")
 62 |                     oneclass=[]
 63 |                     for w in words:
 64 |                         if w=='':
 65 |                             continue
 66 |                         oneclass.append(float(w))
 67 |                     this_x[-1].append(oneclass)
 68 |             count+=1
 69 |         return this_x,this_lengths
 70 |     
 71 |     def ready(self,y):
 72 |         this_y=[]
 73 |         for line in y.readlines():
 74 |             d=[]
 75 |             words=line.strip().split(",")
 76 |             for w in words:
 77 |                 if w=='':
 78 |                     continue
 79 |                 d.append(int(w))
 80 |             this_y.append(d)
 81 |         return this_y
 82 |     
 83 |     def readdelta(self,delta):
 84 |         this_delta=[]
 85 |         this_m=[]
 86 |         for line in delta.readlines():
 87 |             if "end" in line:
 88 |                 continue
 89 |             if "begin" in line:
 90 |                 d=[]
 91 |                 this_delta.append(d)
 92 |                 t=[]
 93 |                 this_m.append(t)
 94 |             else:
 95 |                 words=line.strip().split(",")
 96 |                 oneclass=[]
 97 |                 onem=[]
 98 |                 for i in range(len(words)):
 99 |                     w=words[i]
100 |                     if w=='':
101 |                         continue
102 |                     oneclass.append(float(w))
103 |                     if i==0 or float(w) >0:
104 |                         onem.append(1.0)
105 |                     else:
106 |                         onem.append(0.0)
107 |                 this_delta[-1].append(oneclass)
108 |                 this_m[-1].append(onem)
109 |         return this_delta,this_m
110 |     
111 |     def shuffle(self,batchSize=128,isShuffle=False):
112 |         self.batchSize=batchSize
113 |         if isShuffle:
114 |             c = list(zip(self.x,self.y,self.m,self.delta,self.x_lengths))
115 |             random.shuffle(c)
116 |             self.x,self.y,self.m,self.delta,self.x_lengths=zip(*c)
117 |             
118 |     def nextBatch(self):
119 |         i=1
120 |         while i*self.batchSize<=len(self.x):
121 |             x=[]
122 |             y=[]
123 |             m=[]
124 |             delta=[]
125 |             x_lengths=[]
126 |             for j in range((i-1)*self.batchSize,i*self.batchSize):
127 |                 x.append(self.x[j])
128 |                 y.append(self.y[j])
129 |                 m.append(self.m[j])
130 |                 delta.append(self.delta[j])
131 |                 x_lengths.append(self.x_lengths[j])
132 |             i+=1
133 |             yield  x,y,[0.0]*len(self.x[0][0]),m,delta,x_lengths,x,0,0,0
134 | #x,y,mean,m,delta,x_lengths,lastvalues
135 | if __name__ == '__main__'     :
136 | 
137 |     dt=ReadImputedPhysionetData("/Users/luoyonghong/tensorflow_works/Gan_Imputation/imputation_results/35-0.001-1400-18/")
138 |     dt.load()
139 |     print("number of batches is : "+str(dt.count))
140 |     batchCount=1
141 |     for x,y,mean,m,delta,x_lengths,lastvalues,_,_,_ in dt.nextBatch():
142 |         print(batchCount)
143 |         batchCount+=1


--------------------------------------------------------------------------------
/Physionet2012ImputedData/readImputed.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/Physionet2012ImputedData/readImputed.pyc


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multi_TimeGAN
 2 | Multivariate Time Series Imputation with Generative Adversarial Networks
 3 | ><img src="https://github.com/Tommy-Ngx/Tommy_Bio/blob/master/images/loc.png" alt="Pic" title="Title" width=20 />
 4 | >This is original repository is : 
 5 | https://github.com/Luoyonghong/Multivariate-Time-Series-Imputation-with-Generative-Adversarial-Networks 
 6 | >
 7 | >Special <em>thank</em> to <strong>Yonghong Luo</strong> for his great ideas and contribution. 
 8 | 
 9 | # Fine-tuning version for learning purpose
10 |  author: Yonghong Luo, Xiangrui Cai, Ying Zhang, Jun Xu and Xiaojie Yuan
11 |  
12 |  tensorflow version:1.7 python:2.7
13 | ## The proposed method is a two-stage method. We first train GAN, then we train the input vector of the generator of GAN.
14 | ### To run the code, go to the Gan_Imputation folder:
15 |  Execute the Physionet_main.py file, then we will get 3 folders named as "checkpoint" (the saved models), G_results (the generated samples), imputation_test_results (the imputed test dataset) and imputation_train_results (the imputed train dataset).
16 |  
17 | ### Go to GRUI floder
18 | Excute the Run_GAN_imputed.py file, then one floder-"checkpoint_physionet_imputed" will be created, go to the "checkpoint_physionet_imputed/30_8_128_64_0.001_400_True_True_True_0.15_0.5" floder, find "result" file, the "result" file stands for the mortality prediction results by The RNN classifier trained on the GAN imputed dataset. The first column is epoch, the second column is accuracy and the last column is the AUC score.
19 | ### Final result file location
20 | GRUI/max_auc is the file that record final auc score
21 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==1.7.0
2 | tensorboard==1.7.0
3 | bleach==1.5.0
4 | html5lib==0.9999999
5 | 


--------------------------------------------------------------------------------
/set-a/data_loader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch.utils.data as data
  3 | import torch
  4 | import os
  5 | 
  6 | class MyDataset(data.Dataset):
  7 |     def __init__(self, dataPath, labelPath):
  8 |         labelFile = open(labelPath)
  9 |         dataset=[]
 10 |         #dataset: filenames,labels
 11 |         line_num = 0 
 12 |         for line in  labelFile.readlines():
 13 |         # rstrip() remove spaces in right end
 14 |             if line_num!=0:
 15 |                 words = line.strip().split(',') 
 16 |                 if os.path.isfile(os.path.join(dataPath, words[0]+".txt")):
 17 |                     dataset.append((words[0]+".txt", words[len(words)-1]))
 18 |             line_num=line_num+1
 19 |         self.dataPath = dataPath
 20 |         self.dataset = dataset
 21 |         dic={'time':0,'Age':1,'Gender':2,'Height':3,'ICUType':4,'Weight':5,'Albumin':6,\
 22 |              'ALP':7,'ALT':8,'AST':9,'Bilirubin':10,'BUN':11,'Cholesterol':12,'Creatinine':13,\
 23 |              'DiasABP':14,'FiO2':15,'GCS':16,'Glucose':17,'HCO3':18,'HCT':19,'HR':20,\
 24 |              'K':21,'Lactate':22,'Mg':23,'MAP':24,'MechVent':25,'Na':26,'NIDiasABP':27,\
 25 |              'NIMAP':28,'NISysABP':29,'PaCO2':30,'PaO2':31,'pH':32,'Platelets':33,'RespRate':34,\
 26 |              'SaO2':35,'SysABP':36,'Temp':37,'TropI':38,'TropT':39,'Urine':40,'WBC':41}
 27 |     
 28 |         self.dic=dic
 29 |         #self.classes = class_names
 30 |         #self.transform = transform
 31 |         #self.target_transform = target_transform
 32 |         #self.loader = loader
 33 | 
 34 | 
 35 |     def __getitem__(self, index):
 36 |         #print(self.dataset[index])
 37 |         fileName, label = self.dataset[index]
 38 |         f=open(os.path.join(self.dataPath, fileName))
 39 |         #read_csv is DataFrame, need to be transformed into ndarray
 40 |         count=0
 41 |         age=gender=height=icutype=weight=-1
 42 |         lastTime=0
 43 |         totalData=[]
 44 |         for line in f.readlines():
 45 |             if count > 1:
 46 |                 words=line.split(",")
 47 |                 timestamp=words[0]
 48 |                 feature=words[1]
 49 |                 value=words[2]
 50 |                 
 51 |                 # -1 is missing value
 52 |                 if timestamp == "00:00":
 53 |                     if feature=='Age':
 54 |                         age=value
 55 |                     if feature=='Gender':
 56 |                         gender=value
 57 |                     if feature=='Height':
 58 |                         height=value
 59 |                     if feature == 'ICUType':
 60 |                         icutype=value
 61 |                     if feature=='Weight':
 62 |                         weight=value
 63 |                 else:
 64 |                     if timestamp!=lastTime:
 65 |                         data=[-1.0]*42
 66 |                         hourandminute=timestamp.split(":")
 67 |                         data[0]=float(hourandminute[0])*60+float(hourandminute[1])
 68 |                         data[1]=float(age)
 69 |                         data[2]=float(gender)
 70 |                         data[3]=float(height)
 71 |                         data[4]=float(icutype)
 72 |                         data[5]=float(weight)
 73 |                         data[self.dic[feature]]=float(value)
 74 |                         totalData.append(data)
 75 |                     else:
 76 |                         totalData[len(totalData)-1][self.dic[feature]]=float(value)
 77 |                 lastTime=timestamp      
 78 |             count+=1
 79 |             #if len(totalData)==24:
 80 |             #    break;
 81 |         #print(totalData)
 82 |         #print(label)
 83 |         return torch.FloatTensor(totalData), label, fileName
 84 | 
 85 |     def __len__(self):
 86 |         return len(self.dataset)
 87 | 
 88 | dataset = MyDataset("/home/lyh/Desktop/set-a/train/", "/home/lyh/Desktop/set-a/train/list.txt")
 89 | train_loader = torch.utils.data.DataLoader(\
 90 |     dataset, batch_size=2, shuffle=False, num_workers=0)
 91 | print(len(train_loader))
 92 | for dataset,label,filename in train_loader:
 93 |     print(filename)
 94 |     #print(dataset)
 95 |     print(label)
 96 | 
 97 | #for dataset in train_loader:
 98 | #    print(dataset)
 99 |     
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/set-a/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import print_function
 3 | import torch.utils.data as data
 4 | import torch
 5 | import os
 6 | 
 7 | 
 8 | f=open(os.path.join("/home/lyh/Desktop/set-a/train/", "132539.txt"))
 9 |         
10 | dic={'time':0,'Age':1,'Gender':2,'Height':3,'ICUType':4,'Weight':5,'Albumin':6,\
11 |              'ALP':7,'ALT':8,'AST':9,'Bilirubin':10,'BUN':11,'Cholesterol':12,'Creatinine':13,\
12 |              'DiasABP':14,'FiO2':15,'GCS':16,'Glucose':17,'HCO3':18,'HCT':19,'HR':20,\
13 |              'K':21,'Lactate':22,'Mg':23,'MAP':24,'MechVent':25,'Na':26,'NIDiasABP':27,\
14 |              'NIMAP':28,'NISysABP':29,'PaCO2':30,'PaO2':31,'pH':32,'Platelets':33,'RespRate':34,\
15 |              'SaO2':35,'SysABP':36,'Temp':37,'TropI':38,'TropT':39,'Urine':40,'WBC':41}
16 | 
17 | count=0
18 | age=gender=height=icutype=weight=-1
19 | lastTime=0
20 | totalData=[]
21 | for line in f.readlines():
22 |     if count > 1:
23 |         words=line.split(",")
24 |         timestamp=words[0]
25 |         feature=words[1]
26 |         value=words[2]
27 |         
28 |         # -1 is missing value
29 |         if timestamp == "00:00":
30 |             if feature=='Age':
31 |                 age=value
32 |             if feature=='Gender':
33 |                 gender=value
34 |             if feature=='Height':
35 |                 height=value
36 |             if feature == 'ICUType':
37 |                 icutype=value
38 |             if feature=='Weight':
39 |                 weight=value
40 |         else:
41 |             if timestamp!=lastTime:
42 |                 data=[-1.0]*42
43 |                 hourandminute=timestamp.split(":")
44 |                 data[0]=float(hourandminute[0])*60+float(hourandminute[1])
45 |                 data[1]=float(age)
46 |                 data[2]=float(gender)
47 |                 data[3]=float(height)
48 |                 data[4]=float(icutype)
49 |                 data[5]=float(weight)
50 |                 data[dic[feature]]=float(value)
51 |                 totalData.append(data)
52 |             else:
53 |                 totalData[len(totalData)-1][dic[feature]]=float(value)
54 |         lastTime=timestamp      
55 |     count+=1
56 | #print(totalData)
57 | #print(label)
58 | print( torch.FloatTensor(totalData))
59 | 


--------------------------------------------------------------------------------
/set-a/test.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/set-a/test.zip


--------------------------------------------------------------------------------
/set-a/train.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommyngx/Multi_TimeGAN/7df414b0b0f0d254fe509aaaa7e6d072804b5f0d/set-a/train.zip


--------------------------------------------------------------------------------