├── myThread.py
├── data
    └── config.json
├── log.py
├── customized_gru.py
├── param.py
├── linear.py
├── test.py
├── config.py
├── util.py
├── plt.py
├── model.py
├── trajectoryNet.py
└── data_funs.py


/myThread.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | 
 3 | class MyThread(threading.Thread):
 4 |   def __init__(self, func, args):
 5 |     threading.Thread.__init__(self)
 6 |     self.func = func
 7 |     self.args = args
 8 |     self.res = None
 9 | 
10 |   def get_result(self):
11 |     return self.res
12 | 
13 |   def run(self):
14 |     self.res = self.func(*self.args)


--------------------------------------------------------------------------------
/data/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "task": "trajectoryNet",
 3 | "testmode": "lobo",
 4 | "val_id": [6], 
 5 | "test_id": [0,1,2,3,4,5],
 6 | "hidden_size": 100,
 7 | "learning_rate": 0.1,
 8 | "batch_size": 128,
 9 | "num_layers": 1,
10 | "num_epochs": 100,
11 | "activation": "maxout",
12 | "deep_gate": false,
13 | "checkpoint": true,
14 | "restore":false,
15 | "exp_seq_len":100,
16 | "init_scale": 0.001,
17 | "weight_initializer": "uniform",
18 | "evaluate_freq": 50,
19 | "num_threads": 100,
20 | "tensorboard":false,
21 | "useGPU":true,
22 | "test_mode": false,
23 | "num_classes":4,
24 |   "maxOut_numUnits":5,
25 |   "num_features":5,
26 |   "embeded_dims":50,
27 |   "l2_preparam":0.001,
28 |   "rnn_type":"gru_b"
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/log.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import time
 3 | 
 4 | class Log(object):
 5 | 
 6 |     def __init__(self,path,name):
 7 |         self.train_log_path = path
 8 |         second = time.localtime(time.time())
 9 |         time_str = time.strftime('%Y-%m-%d-%H-%M-%S',second)
10 |         #summary_log_file 初始化   由当前时间命名
11 |         self.summary_log_file_name =  "summary"+time_str +name+".csv"
12 |         self.summary_log_file = open(path +self.summary_log_file_name,"w+")
13 | 
14 |         #training_log_file 由training+当前时间命名
15 |         self.train_log_file_name =  "training" + time_str + name+".csv"
16 |         self.train_log_file = open(path +self.train_log_file_name,"w+")
17 | 
18 |         self.addheader()
19 | 
20 |     def addheader(self):
21 |         self.summary_log_file.write("iteration, trainLoss, valLoss, testLoss, trainAcc, valAcc, testAcc\n")
22 |         #self.train_log_file.write("iteration, trainLoss,trainAcc\n")
23 | 
24 |     def summary_log(self,data,batch_iter):
25 |         (cost_train, acc_train,  cost_val, acc_val,cost_test, acc_test) = data
26 |         self.summary_log_file.write("{0}, {1:0.3f}, {2:0.3f}, {3:0.3f}, {4:0.3f}, {5:0.3f}, {6:0.3f}\n".format(batch_iter, cost_train, cost_val, cost_test, acc_train, acc_val, acc_test))
27 |         self.summary_log_file.flush()
28 | 
29 | 
30 |     def training_log(self,data):
31 |         #trainLoss,trainAcc =
32 |         self.train_log_file.write(data)
33 |         self.train_log_file.write("\n")
34 |         self.train_log_file.flush()
35 | 
36 | 
37 |     def close(self):
38 |         self.summary_log_file.close()
39 |         self.train_log_file.close()
40 | 
41 | 
42 | if __name__ == "__main__":
43 | 
44 |     print(str(3)+"dd")


--------------------------------------------------------------------------------
/customized_gru.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import linear
 3 | from tensorflow.python.ops.rnn_cell import RNNCell
 4 | from tensorflow.python.ops.math_ops import tanh
 5 | from tensorflow.python.platform import tf_logging as logging
 6 | from tensorflow.python.ops import variable_scope as vs
 7 | from tensorflow.python.ops import array_ops
 8 | from tensorflow.python.ops.math_ops import sigmoid
 9 | from tensorflow.python.ops.math_ops import tanh
10 | 
11 | 
12 | class CustomizedGRU(RNNCell):
13 | 
14 |     def __init__(self,num_units,maxOut_numUnits,activation = tanh):
15 |         self._num_units = num_units
16 |         self._activation = activation
17 |         self._maxOut_numUnits = maxOut_numUnits
18 | 
19 | 
20 |     @property
21 |     def state_size(self):
22 |         return self._num_units
23 | 
24 |     @property
25 |     def output_size(self):
26 |         return self._num_units
27 | 
28 |     def __call__(self, inputs, state, scope=None):
29 | 
30 |         with vs.variable_scope(scope or "gru_cell"):
31 |             with vs.variable_scope("gates"):
32 |                 gate = linear._linear([inputs,state],2*self._num_units,True,1.0,scope = scope)
33 | 
34 |                 r,u = array_ops.split(gate,num_or_size_splits=2,axis=1)
35 | 
36 |                 r,u = sigmoid(r),sigmoid(u)
37 |                 #r,u = tanh(r),tanh(u)
38 | 
39 |             with vs.variable_scope("candidate"):
40 | 
41 |                 c = self.maxout(inputs,r*state,self._maxOut_numUnits,0,self._num_units,scope= scope)
42 | 
43 |             new_h = u*state +(1-u)*c
44 | 
45 |         return new_h,new_h
46 | 
47 | 
48 | 
49 |     def maxout(self, input1, input2, num_units, ini_value, output_size, scope=None):
50 |         shape = input1.get_shape().as_list()
51 |         dim = shape[-1]
52 |         outputs = None
53 |         for i in range(num_units):
54 |             with tf.variable_scope(str(i)):
55 |                 y = self._activation(linear._linear([input1, input2],output_size, True, ini_value,scope=scope))
56 |                 if outputs is None:
57 |                     outputs = y
58 |                 else:
59 |                     outputs = tf.maximum(outputs, y)
60 |         c = outputs
61 |         return c
62 | 


--------------------------------------------------------------------------------
/param.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | import tensorflow as tf
 3 | 
 4 | WIDTH = 30
 5 | FENWEI_MAX = 0.95
 6 | FILTER_K = 10
 7 | 
 8 | 
 9 | train_file_pattern = "interval_[5]_train_*.tfrecords"
10 | valid_file_pattern = "interval_[5]_valid_*.tfrecords"
11 | test_file_pattern = "interval_[5]_test_*.tfrecords"
12 | 
13 | 
14 | SPEED_SEC = "speed_sec"
15 | ACC_SEC = "acc_sec"
16 | AVG_SPEED = "avg_speed"
17 | STD_SPEED = "std_speed"
18 | MEAN_ACC = "mean_acc"
19 | STD_ACC = "std_acc"
20 | HEAD = "head"
21 | HEAD_MEAN = "head_mean"
22 | STD_HEAD = "std_head"
23 | MAX_ACC = "max_acc"
24 | MAX_SPEED = "max_speed"
25 | MAX_HEAD = "max_head"
26 | EARLY = "early"
27 | LABEL = "label"
28 | 
29 | feature = {
30 |         SPEED_SEC: tf.FixedLenFeature([],tf.string),
31 |         AVG_SPEED : tf.FixedLenFeature([],tf.string),
32 |         STD_SPEED : tf.FixedLenFeature([],tf.string),
33 |         ACC_SEC   : tf.FixedLenFeature([],tf.string),
34 |         MEAN_ACC  : tf.FixedLenFeature([],tf.string),
35 |         STD_ACC   : tf.FixedLenFeature([],tf.string),
36 |         HEAD      : tf.FixedLenFeature([],tf.string),
37 |         HEAD_MEAN : tf.FixedLenFeature([],tf.string),
38 |         STD_HEAD  : tf.FixedLenFeature([],tf.string),
39 |         MAX_SPEED  : tf.FixedLenFeature([],tf.string),
40 |         MAX_ACC  : tf.FixedLenFeature([],tf.string),
41 |         MAX_HEAD  : tf.FixedLenFeature([],tf.string),
42 |         EARLY   :   tf.FixedLenFeature([],tf.int64),
43 |         LABEL:tf.FixedLenFeature([],tf.int64)
44 |     }
45 | 
46 | class RNNType(Enum):
47 |     LSTM = 1 # LSTM unidirectional
48 |     LSTM_b = 2 # LSTM bidirectional
49 |     GRU = 3 # GRU
50 |     GRU_b = 4 # GRU, bidirectional
51 |     NORM_GRU = 5
52 |     NORM_GRU_b = 6
53 | 
54 | class NetType(Enum):
55 |     DNN_MAXOUT = 0
56 |     DNN  = 1
57 |     CNN  = 2
58 |     RNN_NV1 = 3
59 |     RNN_NVN = 4
60 | 
61 | class DirName(Enum):
62 |     DNN = "dnn/"
63 |     DNN_MAXOUT = "dnn_maxout/"
64 |     CNN = "cnn/"
65 |     RNN_NV1 = "rnn_nv1/"
66 |     RNN_NVN = "rnn_nvn/"
67 | 
68 | class FeatureName(Enum):
69 |     SPEED_SEC = "speed_sec"
70 |     ACC_SEC = "acc_sec"
71 |     AVG_SPEED = "avg_speed"
72 |     STD_SPEED = "std_speed"
73 |     MEAN_ACC = "mean_acc"
74 |     STD_ACC = "std_acc"


--------------------------------------------------------------------------------
/linear.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | 
 6 | from tensorflow.python.ops import array_ops
 7 | from tensorflow.python.ops import init_ops
 8 | from tensorflow.python.ops import math_ops
 9 | from tensorflow.python.ops import nn_ops
10 | from tensorflow.python.ops import variable_scope as vs
11 | 
12 | 
13 | from tensorflow.python.util import nest
14 | 
15 | 
16 | def _linear(args, output_size, bias, bias_start=0.0, scope=None):
17 |   """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
18 |   Args:
19 |     args: a 2D Tensor or a list of 2D, batch x n, Tensors.
20 |     output_size: int, second dimension of W[i].
21 |     bias: boolean, whether to add a bias term or not.
22 |     bias_start: starting value to initialize the bias; 0 by default.
23 |     scope: (optional) Variable scope to create parameters in.
24 |   Returns:
25 |     A 2D Tensor with shape [batch x output_size] equal to
26 |     sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
27 |   Raises:
28 |     ValueError: if some of the arguments has unspecified or wrong shape.
29 |   """
30 |   if args is None or (nest.is_sequence(args) and not args):
31 |     raise ValueError("`args` must be specified")
32 |   if not nest.is_sequence(args):
33 |     args = [args]
34 | 
35 |   # Calculate the total size of arguments on dimension 1.
36 |   total_arg_size = 0
37 |   shapes = [a.get_shape() for a in args]
38 |   for shape in shapes:
39 |     if shape.ndims != 2:
40 |       raise ValueError("linear is expecting 2D arguments: %s" % shapes)
41 |     if shape[1].value is None:
42 |       raise ValueError("linear expects shape[1] to be provided for shape %s, "
43 |                        "but saw %s" % (shape, shape[1]))
44 |     else:
45 |       total_arg_size += shape[1].value
46 | 
47 |   dtype = [a.dtype for a in args][0]
48 | 
49 |   # Now the computation.
50 |   scope = vs.get_variable_scope()
51 |   with vs.variable_scope(scope) as outer_scope:
52 |     weights = vs.get_variable(
53 |         "weights", [total_arg_size, output_size], dtype=dtype)
54 |     if len(args) == 1:
55 |       res = math_ops.matmul(args[0], weights)
56 |     else:
57 |       res = math_ops.matmul(array_ops.concat(args, 1), weights)
58 |     if not bias:
59 |       return res
60 |     with vs.variable_scope(outer_scope) as inner_scope:
61 |       inner_scope.set_partitioner(None)
62 |       biases = vs.get_variable(
63 |           "biases", [output_size],
64 |           dtype=dtype,
65 |           initializer=init_ops.constant_initializer(bias_start, dtype=dtype))
66 |   return nn_ops.bias_add(res, biases)
67 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import random
 4 | import math
 5 | import config
 6 | import pandas as pd
 7 | from param import FeatureName
 8 | from param import RNNType
 9 | import tensorflow as tf
10 | import os
11 | import param
12 | 
13 | a = np.random.randint(0,10,[2,2])
14 | b = np.random.randint(0,10,[2,2])
15 | c = np.random.randint(0,10,[2,2])
16 | 
17 | a_df = pd.DataFrame(a)
18 | b_df = pd.DataFrame(b)
19 | c_df = pd.DataFrame(c)
20 | cv = pd.concat([a_df,b_df,c_df],axis=1)
21 | print(24%10)
22 | 
23 | # data_dir ="G:/all_data/tfrecords/"
24 | # filenames = os.listdir(data_dir)
25 | # filenames = [os.path.join(data_dir,i) for i in filenames]
26 | #
27 | # feature = {
28 | #     FeatureName.SPEED_SEC.value : tf.FixedLenFeature([],tf.string),
29 | #     FeatureName.AVG_SPEED.value : tf.FixedLenFeature([],tf.string),
30 | #     FeatureName.STD_SPEED.value : tf.FixedLenFeature([],tf.string),
31 | #     FeatureName.ACC_SEC.value   : tf.FixedLenFeature([],tf.string),
32 | #     FeatureName.MEAN_ACC.value  : tf.FixedLenFeature([],tf.string),
33 | #     FeatureName.STD_ACC.value   : tf.FixedLenFeature([],tf.string),
34 | #     "label":tf.FixedLenFeature([],tf.int64)
35 | # }
36 | #
37 | # filename_queue = tf.train.string_input_producer(filenames,num_epochs=1)
38 | # reader = tf.TFRecordReader()
39 | # _,serialized_example = reader.read(filename_queue)
40 | #
41 | # features = tf.parse_single_example(serialized_example,features= feature)
42 | # speed_sec_flat = tf.decode_raw(features[param.SPEED_SEC],tf.int64)
43 | # speed_sec = tf.reshape(speed_sec_flat,[-1,param.width])
44 | # label = tf.cast(features[param.LABEL],tf.int64)
45 | #
46 | #
47 | #
48 | # with tf.Session() as sess:
49 | #     sess.run(tf.global_variables_initializer())
50 | #     sess.run(tf.local_variables_initializer())
51 | #     coord = tf.train.Coordinator()
52 | #     threads = tf.train.start_queue_runners(coord=coord)
53 | #
54 | #     for i in range(2):
55 | #         speed_sec1,label1 = sess.run([speed_sec,label])
56 | #         print(type(speed_sec1))
57 | #         full_seq_num = speed_sec1.shape[0] // 100
58 | #         print(full_seq_num)
59 | #         list = []
60 | #         begin = 0
61 | #         if full_seq_num >0:
62 | #             for e in range(full_seq_num):
63 | #                 begin = e*30
64 | #                 list.append(tf.slice(speed_sec1,[begin,30],[100,30]))
65 | #         remain = speed_sec1.shape[0] - full_seq_num*100
66 | #         remain_tensor = tf.slice(speed_sec1,[begin,30],[remain,30])
67 | #
68 | #         remain_tensor_pad = tf.pad(remain_tensor,[[0,100 - remain],[0,0]])
69 | #         list.append(remain_tensor_pad)
70 | #         for h in list:
71 | #             print(h)
72 | #
73 | #         tf.train.shuffle_batch()
74 | #
75 | #
76 | #
77 | #     coord.request_stop()
78 | #     coord.join(threads)
79 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from tensorflow.python.ops.math_ops import tanh
 3 | from tensorflow.python.ops.math_ops import sigmoid
 4 | from enum import Enum
 5 | from param import RNNType
 6 | from param import NetType
 7 | 
 8 | 
 9 | class Config(object):
10 |     def __init__(self,configFile="data/config.json"):
11 |         dconf = json.load(open(configFile))
12 |         #测试集ID
13 |         self.test_id = dconf['test_id']
14 |         #验证集ID
15 |         self.val_id = dconf['val_id']
16 |         #预期序列长度
17 |         self.exp_seq_len = dconf["exp_seq_len"]
18 |         #学习速率
19 |         self.learning_rate = dconf["learning_rate"]
20 |         #批数据尺寸
21 |         self.batch_size = dconf["batch_size"]
22 |         #隐藏层数
23 |         self.num_layers = dconf["num_layers"]
24 |         #迭代周期
25 |         self.num_epochs = dconf["num_epochs"]
26 |         #是否开启tensorboard
27 |         self.tensorboard = dconf["tensorboard"]
28 |         self.init_scale = dconf["init_scale"]
29 |         #线程数
30 |         self.num_threads = dconf["num_threads"]
31 |         #gru中的隐藏节点
32 |         self.hidden_size = dconf["hidden_size"]
33 |         #任务名称
34 |         self.task = dconf["task"]
35 |         #是否用GPU加速
36 |         self.useGPU = dconf["useGPU"]
37 |         #weight初始化方式
38 |         self.weight_initializer = dconf["weight_initializer"]
39 |         #评估频率
40 |         self.evaluate_freq = dconf["evaluate_freq"]
41 |         self.testmode = dconf["testmode"]
42 |         #是否有检查点
43 |         self.checkpoint = dconf["checkpoint"]
44 |         #是否重载变量
45 |         self.restore = dconf["restore"]
46 |         #激励函数
47 |         self.activation = dconf["activation"]
48 |         self.test_mode = dconf["test_mode"]
49 |         #分类个数
50 |         self.num_classes = dconf["num_classes"]
51 |         #maxout中的单元个数
52 |         self.maxOut_numUnits = dconf["maxOut_numUnits"]
53 |         #特征数量
54 |         self.num_features = dconf["num_features"]
55 |         #嵌入后的向量维度
56 |         self.embeded_dims = dconf["embeded_dims"]
57 |         #L2正则化超参数
58 |         self.l2_preparam = dconf["l2_preparam"]
59 |         #
60 |         self.rnn_type = dconf["rnn_type"]
61 | 
62 |         self.use_tfrecord = dconf["use_tfrecord"]
63 | 
64 |         self.tfrecord_path = dconf["tfrecord_path"]
65 | 
66 |         self.shuffle = dconf["shuffle"]
67 | 
68 |         self.keep_prob = dconf["keep_prob"]
69 | 
70 |         self.discretization_width = dconf["discretization_width"]
71 | 
72 |         self.net_type = dconf["net_type"]
73 | 
74 |         self.use_dropout = dconf["use_dropout"]
75 | 
76 | 
77 | 
78 | class TrainingConfig(object):
79 |     def __init__(self,is_training,is_validation,is_test,batch_size,len_features,net_type = NetType.RNN_NV1,rnn_type = RNNType.GRU):
80 |         self.is_training = is_training
81 |         self.is_validation = is_validation
82 |         self.is_test = is_test
83 |         self.batch_size = batch_size
84 |         self.rnn_type = rnn_type
85 |         self.net_type = net_type
86 | 
87 |         #特征长度即onehot总长度
88 |         self.len_features = len_features
89 |         self.train_seq_len = []
90 |         self.val_seq_len = []
91 |         self.test_seq_len = []
92 |         self.activation = tanh
93 | 
94 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | from math import radians, cos, sin, asin, sqrt,atan,pi
  2 | import  os
  3 | from glob import glob
  4 | import param
  5 | from param import RNNType
  6 | from param import NetType
  7 | from param import DirName
  8 | from log import Log
  9 | 
 10 | def jwd2dis(lat1,lon1,lat2,lon2):
 11 |     lat1,lon1,lat2,lon2 = map(radians,[lat1,lon1,lat2,lon2])
 12 |     dlon = lon2 - lon1
 13 |     dlat = lat2 - lat1
 14 |     a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
 15 |     c = 2 * asin(sqrt(a))
 16 |     r = 6371  # 地球平均半径，单位为公里
 17 |     return c * r * 1000
 18 | 
 19 | def jwd2angle(lat1,lon1,lat2,lon_2):
 20 |     dy = lon_2 - lon1
 21 |     dx = lat2 - lat1
 22 |     angle = 0
 23 | 
 24 |     if dx == 0 and dy ==0:
 25 |         angle = 0
 26 |     elif dx == 0 and dy >0:
 27 |         angle = 90
 28 |     elif dx == 0 and dy <0:
 29 |         angle = 270
 30 |     elif dy == 0 and dx >0:
 31 |         angle = 0
 32 |     elif dy == 0 and dx <0:
 33 |         angle = 180
 34 |     elif dy > 0 and dx >0:
 35 |         angle = atan(dy/dx) * 180/pi
 36 |     elif dy >0 and dx <0:
 37 |         angle = atan(abs(dy/dx))* 180/pi + 90
 38 |     elif dy <0 and dx <0:
 39 |         angle = atan(abs(dy / dx))* 180/pi + 180
 40 |     else:
 41 |         #小于零
 42 |         angle = atan(abs(dy / dx))* 180/pi + 270
 43 | 
 44 |     return angle
 45 | 
 46 | def timestamp2second(time1,time2):
 47 | 
 48 |     return abs(time1-time2)*3600*24
 49 | 
 50 | def switch_mode(str):
 51 |     str = str.strip()
 52 |     if(str == "bike"):
 53 |         return "0"
 54 | 
 55 |     if(str == "car"):
 56 |         return "1"
 57 | 
 58 |     if (str == "walk"):
 59 |         return "2"
 60 | 
 61 |     if (str == "bus"):
 62 |         return "3"
 63 | 
 64 |     if (str == "train"):
 65 |         return "4"
 66 | 
 67 |     if (str == "subway"):
 68 |         return "5"
 69 | 
 70 |     if (str == "airplane"):
 71 |         return "6"
 72 | 
 73 |     if (str == "taxi"):
 74 |         return "7"
 75 |     if (str == "boat"):
 76 |         return "8"
 77 |     if (str == "run"):
 78 |         return "9"
 79 |     if (str == "motorcycle"):
 80 |         return "10"
 81 |     else:
 82 |         print(str)
 83 |         return "11"
 84 | 
 85 | def rename_file():
 86 | 
 87 |     data_dir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 88 | 
 89 |     valiable_user_data = open("./data/have_label_user.txt", "r")
 90 |     user_list = valiable_user_data.readlines()
 91 |     for i in user_list[1:]:
 92 |         user_id = i[0:3]
 93 |         data_txt_name = data_dir + user_id + "/userdata.csv"
 94 |         features_name = data_dir+user_id + "/user_features.csv"
 95 |         new_data_name = data_dir + user_id + "/userdata_interval_1.csv"
 96 |         new_features_name = data_dir + user_id + "/user_features_interval_1.csv"
 97 |         os.rename(data_txt_name,new_data_name)
 98 |         os.rename(features_name,new_features_name)
 99 | 
100 | def delete_file():
101 |     data_dir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
102 | 
103 |     valiable_user_data = open("./data/have_label_user.txt", "r")
104 |     user_list = valiable_user_data.readlines()
105 |     for i in user_list:
106 |         user_id = i[0:3]
107 |         data_txt_name = data_dir + user_id + "/user_features_interval_2.csv"
108 | 
109 |         os.remove(data_txt_name)
110 | 
111 | def search_file(pattern,path):
112 |     paths  = glob(os.path.join(path,pattern))
113 |     filenames = [ path.split("\\")[1]  for path in paths]
114 |     filenames = [os.path.join(path,name) for name in filenames]
115 |     return filenames
116 | 
117 | def get_net_type(net_type):
118 |     return param.NetType(net_type)
119 | 
120 | def get_rnn_type(rnn_type):
121 |     return param.RNNType(rnn_type)
122 | 
123 | 
124 | def init_environment(net_type,rnn_type):
125 |     log_path = "./logdir/shiyanxiuzheng/"
126 |     data_path = "./data/tfrecord12/"
127 |     if net_type == NetType.DNN:
128 |         data_dir = DirName.DNN.value
129 |         log_dir = DirName.DNN.value
130 |     elif net_type == NetType.DNN_MAXOUT:
131 |         data_dir = DirName.DNN_MAXOUT.value
132 |         log_dir = DirName.DNN_MAXOUT.value
133 |     elif net_type == NetType.RNN_NV1:
134 |         data_dir = DirName.RNN_NV1.value
135 |         log_dir = DirName.RNN_NV1.value
136 |     else:
137 |         data_dir = DirName.RNN_NVN.value
138 |         log_dir = DirName.RNN_NVN.value
139 | 
140 |     log_path = log_path + log_dir
141 |     data_path = data_path + data_dir
142 |     net_name = str(NetType(net_type)).split(".")[-1]
143 |     nn_name = str(RNNType(rnn_type)).split(".")[-1]
144 |     LOGGER = Log(log_path, "_" + net_name + "_" + nn_name)
145 | 
146 |     return log_path,data_path,LOGGER
147 | 
148 | if __name__ == "__main__":
149 |     #print(search_file("interval_[0-1]_*_train.tfrecords","G:/all_data/tfrecords/"))
150 |     print(jwd2angle(39.974879,116.33258899999998,39.97487,116.332673))


--------------------------------------------------------------------------------
/plt.py:
--------------------------------------------------------------------------------
  1 | import matplotlib
  2 | import matplotlib.pyplot as plt
  3 | import pandas as pd
  4 | import numpy as np
  5 | 
  6 | matplotlib.rcParams['font.family']='SimHei'
  7 | 
  8 | logdir = "./logdir/shiyanxiuzheng/"
  9 | 
 10 | def read_csv_file(name):
 11 | 
 12 |     file = open(name)
 13 | 
 14 |     df = pd.read_csv(file)
 15 | 
 16 |     result_arr = np.array(df,np.float64)
 17 | 
 18 |     return result_arr
 19 | 
 20 | 
 21 | def draw_chart(arr):
 22 |     plt.plot(arr[:,-1])
 23 |     plt.show()
 24 | 
 25 | def draw_4_features():
 26 |     temp_dir = "12features/"
 27 |     features_3 = read_csv_file(logdir +temp_dir+"RNN_NV1_GRU_b_3.csv")
 28 |     features_6 = read_csv_file(logdir + temp_dir+"RNN_NV1_GRU_b_6.csv")
 29 |     features_9 = read_csv_file(logdir + temp_dir+"RNN_NV1_GRU_b_9.csv")
 30 |     features_12 = read_csv_file(logdir + temp_dir+"RNN_NV1_GRU_b_12.csv")
 31 | 
 32 |     limit = 20
 33 |     x = range(1,limit+1)
 34 | 
 35 |     plt.plot(x,features_3[0:limit, -1], "bx-", label="3个特征")
 36 |     plt.plot(x,features_6[0:limit, -1], "rx-", label="6个特征")
 37 |     plt.plot(x,features_9[0:limit, -1], "gx-", label="9个特征")
 38 |     plt.plot(x,features_12[0:limit, -1], "yx-", label="12个特征")
 39 | 
 40 |     plt.xlabel("mini-batch")
 41 |     plt.ylabel("accuarcy")
 42 |     plt.ylim(0.8, 0.95)
 43 |     plt.xlim(0, limit+2)
 44 |     plt.legend(loc=0)
 45 |     plt.title("RNN_Nv1 双层双向MaxoutGRU模型不同特征的精度（测试集）")
 46 |     plt.savefig(logdir +temp_dir +"rnn_nv1_features")
 47 | 
 48 | 
 49 |     plt.show()
 50 | 
 51 | def draw_width():
 52 |     temp_dir = "width/"
 53 |     features_3 = read_csv_file(logdir +temp_dir+"RNN_NV1_GRU_b_10.csv")
 54 |     features_6 = read_csv_file(logdir + temp_dir+"RNN_NV1_GRU_b_20.csv")
 55 |     features_9 = read_csv_file(logdir + temp_dir+"RNN_NV1_GRU_b_30.csv")
 56 |     features_12 = read_csv_file(logdir + temp_dir+"RNN_NV1_GRU_b_40.csv")
 57 | 
 58 |     limit = 20
 59 |     x = range(1,limit+1)
 60 | 
 61 |     plt.plot(x,features_3[0:limit, -1], "bx-", label="10")
 62 |     plt.plot(x,features_6[0:limit, -1], "rx-", label="20")
 63 |     plt.plot(x,features_9[0:limit, -1], "gx-", label="30")
 64 |     plt.plot(x,features_12[0:limit, -1], "yx-", label="40")
 65 | 
 66 |     plt.xlabel("mini-batch")
 67 |     plt.ylabel("accuarcy")
 68 |     plt.ylim(0.8, 0.96)
 69 |     plt.xlim(0, limit+2)
 70 |     plt.legend(loc=0)
 71 |     plt.title("RNN_Nv1 双层双向MaxoutGRU模型 离散宽度（测试集）")
 72 |     plt.savefig(logdir +temp_dir +"rnn_nv1_width")
 73 | 
 74 | 
 75 |     plt.show()
 76 | 
 77 | def draw_dnn():
 78 |     dnn = read_csv_file(logdir + "result_dnn/dnn.csv")
 79 |     dnn_dropout = read_csv_file(logdir + "result_dnn/dnn_dropout.csv")
 80 |     dnn_maxout = read_csv_file(logdir + "result_dnn/dnn_maxout.csv")
 81 | 
 82 |     x = range(1,20)
 83 | 
 84 |     plt.plot(x,dnn[:, -1], "bx-", label="dnn")
 85 |     plt.plot(x,dnn_dropout[:, -1], "rx-", label="dnn_dropout")
 86 |     plt.plot(x,dnn_maxout[:, -1], "gx-", label="dnn_maxout")
 87 | 
 88 |     plt.xlabel("mini-batch")
 89 |     plt.ylabel("accuarcy")
 90 |     plt.ylim(0.5, 1)
 91 |     plt.xlim(0, 21)
 92 |     plt.legend(loc=1)
 93 |     plt.title("三种DNN模型的精度（测试集）")
 94 |     plt.savefig(logdir + "result_dnn/dnn_3")
 95 | 
 96 |     plt.show()
 97 | 
 98 | def rnn_3():
 99 | 
100 |     temp_dir = "3_rnn/"
101 | 
102 |     lstm = read_csv_file(logdir + temp_dir+"RNN_NV13 2.csv")
103 |     maxoutgru = read_csv_file(logdir + temp_dir+ "RNN_NV13 4.csv")
104 |     normal_gru = read_csv_file(logdir + temp_dir+ "RNN_NV13 6.csv")
105 | 
106 |     limit = 29
107 |     x = range(1,limit+1)
108 | 
109 |     plt.plot(x,lstm[0:limit, -1], "bx-", label="lstm")
110 |     plt.plot(x,normal_gru[0:limit, -1], "rx-", label="normal_gru")
111 |     plt.plot(x,maxoutgru[0:limit, -1], "gx-", label="maxout_gru")
112 | 
113 |     plt.xlabel("mini-batch")
114 |     plt.ylabel("accuarcy")
115 |     plt.ylim(0.8, 0.95)
116 |     plt.xlim(0, limit+2)
117 |     plt.legend(loc=1)
118 |     plt.title("三种RNN模型的精度（测试集）")
119 |     plt.savefig(logdir +temp_dir +"3_rnn")
120 | 
121 |     plt.show()
122 | 
123 | def gru_2():
124 |     temp_dir = "3_rnn/"
125 | 
126 |     gru = read_csv_file(logdir + temp_dir + "RNN_NV13 3.csv")
127 |     gru_b= read_csv_file(logdir + temp_dir + "RNN_NV13 4.csv")
128 | 
129 |     limit = 48
130 |     x = range(1, limit + 1)
131 | 
132 |     plt.plot(x, gru[0:limit, -1], "bx-", label="单向MaxoutGRU")
133 |     plt.plot(x, gru_b[0:limit, -1], "rx-", label="双向MaxoutGRU")
134 | 
135 |     plt.xlabel("mini-batch")
136 |     plt.ylabel("accuarcy")
137 |     plt.ylim(0.8, 0.95)
138 |     plt.xlim(0, limit + 2)
139 |     plt.legend(loc=1)
140 |     plt.title("单双向MaxoutGRU模型的精度（测试集）")
141 |     plt.savefig(logdir + temp_dir + "gru_2")
142 | 
143 |     plt.show()
144 | 
145 | def rnn_nvn():
146 |     temp_dir = "3nvn_gru/"
147 | 
148 |     gru = read_csv_file(logdir + temp_dir+"RNN_NVN_GRU.csv")
149 |     gru_b_3 = read_csv_file(logdir + temp_dir+ "RNN_NVN_GRU_b 3 feature.csv")
150 |     gru_b_9 = read_csv_file(logdir + temp_dir+ "RNN_NVN_GRU_b 9 feature.csv")
151 | 
152 |     limit = 30
153 |     x = range(1,limit+1)
154 | 
155 |     plt.plot(x,gru[0:limit, -1], "bx-", label="单向MaxoutGRU")
156 |     plt.plot(x,gru_b_3[0:limit, -1], "rx-", label="双向MaxoutGRU 3个特征")
157 |     plt.plot(x,gru_b_9[0:limit, -1], "gx-", label="双向MaxoutGRU 9个特征")
158 | 
159 |     plt.xlabel("mini-batch")
160 |     plt.ylabel("accuarcy")
161 |     plt.ylim(0.8, 0.95)
162 |     plt.xlim(0, limit+2)
163 |     plt.legend(loc=1)
164 |     plt.title("RNN_NVN模型的精度（测试集）")
165 |     plt.savefig(logdir +temp_dir +"3_nvn_gru")
166 | 
167 |     plt.show()
168 | 
169 | def draw_3_model():
170 |     temp_dir = "3_model/"
171 | 
172 |     gru = read_csv_file(logdir + temp_dir+"dnn no dropout.csv")
173 |     gru_b_3 = read_csv_file(logdir + temp_dir+ "rnn_nvn.csv")
174 |     gru_b_9 = read_csv_file(logdir + temp_dir+ "rnn_nv1.csv")
175 | 
176 |     limit = 39
177 |     x = range(1,limit+1)
178 | 
179 |     plt.plot(x,gru[0:limit, -1], "bx-", label="双层DNN")
180 |     plt.plot(x,gru_b_3[0:limit, -1], "rx-", label="双层双向MaxoutGRU RNN_NVN")
181 |     plt.plot(x,gru_b_9[0:limit, -1], "gx-", label="双层双向MaxoutGRU RNN_NV1")
182 | 
183 |     plt.xlabel("mini-batch")
184 |     plt.ylabel("accuarcy")
185 |     plt.ylim(0.8, 0.95)
186 |     plt.xlim(0, limit+2)
187 |     plt.legend(loc=0)
188 |     plt.title("三种模型的精度（测试集）")
189 |     plt.savefig(logdir +temp_dir +"3_model")
190 | 
191 |     plt.show()
192 | 
193 | def draw_nvn_2():
194 |     temp_dir = "3nvn_gru/"
195 | 
196 |     gru = read_csv_file(logdir + temp_dir+"RNN_NVN_LSTM_b.csv")
197 |     gru_b_3 = read_csv_file(logdir + temp_dir+ "RNN_NVN_GRU_b 9 feature.csv")
198 |     #gru_b_9 = read_csv_file(logdir + temp_dir+ "rnn_nv1.csv")
199 | 
200 |     limit = 39
201 |     x = range(1,limit+1)
202 | 
203 |     plt.plot(x,gru[0:limit, -1], "bx-", label="双层双向LSTM RNN_NVN")
204 |     plt.plot(x,gru_b_3[0:limit, -1], "rx-", label="双层双向MaxoutGRU RNN_NVN")
205 |     #plt.plot(x,gru_b_9[0:limit, -1], "gx-", label="双层双向MaxoutGRU RNN_NV1")
206 | 
207 |     plt.xlabel("mini-batch")
208 |     plt.ylabel("accuarcy")
209 |     plt.ylim(0.8, 0.95)
210 |     plt.xlim(0, limit+2)
211 |     plt.legend(loc=0)
212 |     plt.title("RNN_NvN模型两种网络结构的精度（测试集）")
213 |     plt.savefig(logdir +temp_dir +"2_rnn")
214 | 
215 |     plt.show()
216 | 
217 | def draw_baifenwei():
218 |     temp_dir = "baifenweiduibi/"
219 | 
220 |     gru = read_csv_file(logdir + temp_dir+"baifenwei95.csv")
221 |     gru_b_3 = read_csv_file(logdir + temp_dir+ "baifenwei99.csv")
222 |     #gru_b_9 = read_csv_file(logdir + temp_dir+ "rnn_nv1.csv")
223 | 
224 |     limit = 30
225 |     x = range(1,limit+1)
226 | 
227 |     plt.plot(x,gru[0:limit, -1], "bx-", label="百分位95")
228 |     plt.plot(x,gru_b_3[0:limit, -1], "rx-", label="百分位99")
229 |     #plt.plot(x,gru_b_9[0:limit, -1], "gx-", label="双层双向MaxoutGRU RNN_NV1")
230 | 
231 |     plt.xlabel("mini-batch")
232 |     plt.ylabel("accuarcy")
233 |     plt.ylim(0.84, 0.95)
234 |     plt.xlim(0, limit+2)
235 |     plt.legend(loc=0)
236 |     plt.title("百分位对比（测试集）")
237 |     plt.savefig(logdir +temp_dir +"baifenwei")
238 | 
239 |     plt.show()
240 | 
241 | def draw_batch_size():
242 | 
243 |     temp_dir = "3_model/"
244 | 
245 |     gru = read_csv_file(logdir + temp_dir+"rnn_nv1.csv")
246 |     gru_b_3 = read_csv_file(logdir + temp_dir+ "rnn_nv1_256.csv")
247 |     #gru_b_9 = read_csv_file(logdir + temp_dir+ "rnn_nv1.csv")
248 | 
249 |     limit = 30
250 |     x = range(1,limit+1)
251 | 
252 |     plt.plot(x,gru[0:limit, -1], "bx-", label="batch size 128")
253 |     plt.plot(x,gru_b_3[0:limit, -1], "rx-", label="batch size 256")
254 |     #plt.plot(x,gru_b_9[0:limit, -1], "gx-", label="双层双向MaxoutGRU RNN_NV1")
255 | 
256 |     plt.xlabel("mini-batch")
257 |     plt.ylabel("accuarcy")
258 |     plt.ylim(0.84, 0.95)
259 |     plt.xlim(0, limit+2)
260 |     plt.legend(loc=0)
261 |     plt.title("batch size 对比（测试集）")
262 |     plt.savefig(logdir +temp_dir +"batch size")
263 | 
264 |     plt.show()
265 | 
266 | def draw_hidden():
267 |     temp_dir = "3_model/"
268 | 
269 |     gru = read_csv_file(logdir + temp_dir+"rnn_nv1_hidden_50.csv")
270 |     gru_b_3 = read_csv_file(logdir + temp_dir+ "rnn_nv1_hidden_200.csv")
271 |     gru_b_9 = read_csv_file(logdir + temp_dir+ "rnn_nv1.csv")
272 | 
273 |     limit = 41
274 |     x = range(1,limit+1)
275 | 
276 |     plt.plot(x,gru[0:limit, -1], "bx-", label="hidden size 50")
277 |     plt.plot(x,gru_b_3[0:limit, -1], "rx-", label="hidden size 200")
278 |     plt.plot(x,gru_b_9[0:limit, -1], "gx-", label="hidden size 100")
279 | 
280 |     plt.xlabel("mini-batch")
281 |     plt.ylabel("accuarcy")
282 |     plt.ylim(0.8, 0.95)
283 |     plt.xlim(0, limit+2)
284 |     plt.legend(loc=0)
285 |     plt.title("HIDDEN SIZE（测试集）")
286 |     plt.savefig(logdir +temp_dir +"2_rnn")
287 | 
288 |     plt.show()
289 | 
290 | def draw_activation():
291 |     temp_dir = "3_model/"
292 | 
293 |     gru = read_csv_file(logdir + temp_dir+"rnn_nv1_tahn.csv")
294 |     gru_b_3 = read_csv_file(logdir + temp_dir+ "rnn_nv1.csv")
295 |     #gru_b_9 = read_csv_file(logdir + temp_dir+ "rnn_nv1.csv")
296 | 
297 |     limit = 66
298 |     x = range(1,limit+1)
299 | 
300 |     plt.plot(x,gru[0:limit, -1], "bx-", label="tahn")
301 |     plt.plot(x,gru_b_3[0:limit, -1], "rx-", label="sigmod")
302 |     #plt.plot(x,gru_b_9[0:limit, -1], "gx-", label="hidden size 100")
303 | 
304 |     plt.xlabel("mini-batch")
305 |     plt.ylabel("accuarcy")
306 |     plt.ylim(0.8, 0.95)
307 |     plt.xlim(0, limit+2)
308 |     plt.legend(loc=0)
309 |     plt.title("MaxoutGRU的两种activation（测试集）")
310 |     plt.savefig(logdir +temp_dir +"activation")
311 | 
312 |     plt.show()
313 | 
314 | if __name__ == "__main__":
315 |     draw_width()


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import config
  2 | import tensorflow as tf
  3 | from customized_gru import CustomizedGRU as GRUCell
  4 | from tensorflow.python.ops.rnn_cell import GRUCell as BasicGRUCell
  5 | import tensorflow.contrib as tf_ct
  6 | from tensorflow.contrib.rnn import BasicLSTMCell
  7 | from param import RNNType
  8 | from param import NetType
  9 | import linear
 10 | import param
 11 | import util
 12 | 
 13 | 
 14 | 
 15 | 
 16 | class Model(object):
 17 | 
 18 |     def __init__(self,conf,config):
 19 | 
 20 |         self.init_conf(conf)
 21 |         self.init_config(config)
 22 | 
 23 |         self.current_step = tf.Variable(0,trainable=False)
 24 |         self._learning_rate  = tf.train.exponential_decay(self.learning_rate,self.current_step,decay_steps=10,decay_rate=0.98,staircase=True)
 25 | 
 26 | 
 27 |         #self.current_step = tf.Variable(0)
 28 |         if self.net_type == NetType.DNN:
 29 |             self.init_dnn_type()
 30 |         elif self.net_type == NetType.CNN:
 31 |             self.init_cnn_type()
 32 |         elif self.net_type == NetType.RNN_NV1:
 33 |             self.init_rnn_type_nv1()
 34 |         elif self.net_type == NetType.RNN_NVN:
 35 |             self.init_rnn_type_nvn()
 36 |         elif self.net_type == NetType.DNN_MAXOUT:
 37 |             self.init_dnn_type_with_maxout()
 38 | 
 39 |     #init 文件里的配置
 40 |     def init_conf(self,conf):
 41 |         self.num_threads = conf.num_threads
 42 |         self.hidden_size = conf.hidden_size  # 隐藏层节点
 43 |         self.learning_rate = conf.learning_rate  # 学习速率
 44 |         self.num_layers = conf.num_layers  # 隐藏层数
 45 |         self.num_epochs = conf.num_epochs  # 训练周期
 46 |         self.exp_seq_len = conf.exp_seq_len  # 序列长度
 47 |         self.num_classes = conf.num_classes  # 分类个数
 48 |         self.num_features = conf.num_features  # 特征数量
 49 |         self.maxOut_numUnits = conf.maxOut_numUnits  # maxout节点
 50 |         self.embeded_dims = conf.embeded_dims  # 嵌入维数
 51 |         self.bias_initializer = tf.random_uniform_initializer(0, 0.001)  # bias初始器
 52 |         self.l2_preparam = conf.l2_preparam  # l2正则化超参数
 53 |         self.tensorboard  =conf.tensorboard
 54 |         self.use_tfrecord = conf.use_tfrecord
 55 |         self.tfrecord_path = conf.tfrecord_path
 56 |         self.shuffle = conf.shuffle
 57 |         self.keep_prob = conf.keep_prob
 58 |         self.use_dropout = conf.use_dropout
 59 | 
 60 |     #init 创建模型时的配置
 61 |     def init_config(self,config):
 62 |         # 将一些要创建时的数据通过config类传进来 包括模式，数据长度等等
 63 |         self.net_type = config.net_type  # 网络类型
 64 |         self.rnn_type = config.rnn_type  # rnn类型
 65 |         self.is_training = config.is_training  # 是否为训练模式
 66 |         self.is_test = config.is_test  # 是否为测试模式
 67 |         self.is_validation = config.is_validation  # 是否为验证模式
 68 |         self.len_features = config.len_features  # 特征长度
 69 |         self.train_seq_len = config.train_seq_len  # 训练集序列长度列表
 70 |         self.valid_seq_len = config.val_seq_len
 71 |         self.test_seq_len = config.test_seq_len
 72 |         self.activation = config.activation  # 激励函数
 73 |         self.batch_size = config.batch_size  # batch尺寸
 74 | 
 75 |     def init_rnn_type_nv1(self):
 76 | 
 77 |         # 输入数据
 78 |         self._input_data = tf.placeholder(tf.float32, [self.exp_seq_len, self.batch_size, self.len_features],
 79 |                                           name="input_data")
 80 |         self._targets = tf.placeholder(tf.int32,[self.batch_size],name="label")
 81 |         #self._valid_target = self._targets
 82 | 
 83 |         # 用于提前结束每个batch
 84 |         self._early_stop = tf.placeholder(tf.int32, shape=[self.batch_size], name="early-stop")
 85 | 
 86 |         if self.is_training:
 87 |             self.seq_len = self.exp_seq_len * self.batch_size
 88 |         elif self.is_validation:
 89 |             self.seq_len = sum(self.valid_seq_len)
 90 |         else:
 91 |             self.seq_len = sum(self.test_seq_len)
 92 | 
 93 |         # 获得多层双向gru的cell
 94 |         with tf.name_scope("mutil_rnn_cell"):
 95 |             cell = self.get_mutil_rnn_cell()
 96 | 
 97 |         # with tf.name_scope("embeded"):
 98 |         #   self.get_embeded_vec()
 99 | 
100 |         # 初始化cell
101 |         self.set_initial_states(cell)
102 | 
103 |         # 获得gru的输出
104 |         with tf.name_scope("rnn_outputs"):
105 |             self.get_rnn_outputs(cell)
106 | 
107 |         #w,b=self.init_mutil_dnn_weights(self.hidden_size*2,self.hidden_size*2,self.hidden_size*2,self.num_layers -1)
108 |         #self._output = self.mlp(self._output,w,b,0.5)
109 | 
110 |             # softmax层的权重
111 |         with tf.name_scope("softmax_layer") as scope:
112 |             self.get_softmax_layer_output()
113 | 
114 |         # 获得混淆矩阵
115 |         with tf.name_scope("confusion_matrix") as scope:
116 |             self._confusion_matrix = tf.confusion_matrix(self._targets, self._digit_predictions,self.num_classes)
117 | 
118 |         with tf.name_scope("cross_entropy") as scope:
119 |             self._onehot_labels = tf.one_hot(self._targets,depth=self.num_classes)
120 |             self._loss = tf.nn.softmax_cross_entropy_with_logits(labels=self._onehot_labels,logits=self._predictions)
121 |             self._cross_entropy = tf.reduce_sum(self._loss)
122 |             self._cost = tf.reduce_mean(self._loss)
123 |             self.add_l2_regulation()
124 | 
125 |         with tf.name_scope("accuracy") as scope:
126 |             self._correct_prediction = tf.equal(self._targets, self._digit_predictions)
127 |             self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction,tf.float32))
128 |             #self._accuracy = tf.metrics.accuracy( self._target,self._digit_predictions)[1]
129 | 
130 |         with tf.name_scope("optimization") as scope:
131 |             self._train_op = tf.train.AdamOptimizer(self._learning_rate).minimize(self._cost,global_step=self.current_step)
132 | 
133 |         if self.tensorboard:
134 |             self.w_hist = tf.summary.histogram("weights", self._softmax_w)
135 |             self.b_hist = tf.summary.histogram("biases", self._softmax_b)
136 |             self.y_hist_train = tf.summary.histogram("train-predictions", self._predictions)
137 |             self.y_hist_test = tf.summary.histogram("test-predictions", self._predictions)
138 |             self.mse_summary_train = tf.summary.scalar("train-cross-entropy-cost", self._cost)
139 |             self.mse_summary_test = tf.summary.scalar("test-cross-entropy-cost", self._cost)
140 | 
141 |     def init_rnn_type_nvn(self):
142 |         self._input_data = tf.placeholder(tf.float32, [self.exp_seq_len, self.batch_size, self.len_features],
143 |                                           name="input_data")
144 |         self._targets = tf.placeholder(tf.int32, [self.batch_size, self.exp_seq_len], name="targets")
145 | 
146 |         self._weight_sequence_loss = tf.placeholder(tf.float32,[self.batch_size,self.exp_seq_len],name="weight_sequence_loss")
147 | 
148 |         if self.is_training:
149 |             self.seq_len = self.exp_seq_len * self.batch_size
150 |         elif self.is_validation:
151 |             self.seq_len = sum(self.valid_seq_len)
152 |         else:
153 |             self.seq_len = sum(self.test_seq_len)
154 | 
155 |         # 获得多层双向gru的cell
156 |         with tf.name_scope("mutil_rnn_cell"):
157 |             cell = self.get_mutil_rnn_cell()
158 | 
159 |         # 用于提前结束每个batch
160 |         self._early_stop = tf.placeholder(tf.int32, shape=[self.batch_size], name="early-stop")
161 | 
162 |         # with tf.name_scope("embeded"):
163 |         #   self.get_embeded_vec()
164 | 
165 |         # 初始化cell
166 |         self.set_initial_states(cell)
167 | 
168 |         # 获得gru的输出
169 |         with tf.name_scope("rnn_outputs"):
170 |             self.get_rnn_outputs(cell)
171 |         # 获得去除padding的标签
172 |         # self._valid_target = self.get_valid_sequence(
173 |         #     tf.reshape(self._targets, [self.exp_seq_len * self.batch_size]),
174 |         #     self.num_classes)
175 |         # softmax层的权重
176 |         with tf.name_scope("softmax-layer") as scope:
177 |             self.get_softmax_layer_output()
178 | 
179 |         # 获得混淆矩阵
180 |         with tf.name_scope("confusion-matrix") as scope:
181 |             self._confusion_matrix = tf.confusion_matrix(tf.reshape(self._targets,[self.batch_size*self.exp_seq_len]), self._digit_predictions,self.num_classes)
182 | 
183 |         with tf.name_scope("seq2seq-loss-by-example") as scpoe:
184 |             self._loss = tf_ct.legacy_seq2seq.sequence_loss_by_example([self._predictions],
185 |                                         [tf.reshape(self.targets,[self.batch_size*self.exp_seq_len])],
186 |                                         [tf.reshape(self._weight_sequence_loss,[self.batch_size*self.exp_seq_len])])
187 |             # self._loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
188 |             #     [self._predictions],
189 |             #     [self._targets],
190 |             #     [tf.ones([tf.cast(self.getTensorShape(self._targets)[0],tf.int32)])])
191 |             self._cross_entropy = tf.reduce_sum(self._loss)
192 |             self._cost = tf.reduce_mean(self._loss)
193 |             self.add_l2_regulation()
194 |             # 计算l2cost
195 |             # tv = tf.trainable_variables()
196 |             # #tf_ct.layers.l2_regularizer()
197 |             #
198 |             # self._regularization_cost = self.l2_preparam*tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
199 |             # #总cost为 基础cost + l2cost
200 |             # #self._regularization_cost = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
201 |             # self._cost = self._cost+self._regularization_cost
202 |             self._accuracy = tf_ct.metrics.accuracy(self._digit_predictions, tf.reshape(self._targets,[self.batch_size*self.exp_seq_len]))
203 | 
204 |         with tf.name_scope("optimization") as scope:
205 |             self._train_op = tf.train.AdamOptimizer(self._learning_rate).minimize(self._cost,global_step=self.current_step)
206 | 
207 |         if self.tensorboard:
208 |             self.w_hist = tf.summary.histogram("weights", self._softmax_w)
209 |             self.b_hist = tf.summary.histogram("biases", self._softmax_b)
210 |             self.y_hist_train = tf.summary.histogram("train-predictions", self._predictions)
211 |             self.y_hist_test = tf.summary.histogram("test-predictions", self._predictions)
212 |             self.mse_summary_train = tf.summary.scalar("train-cross-entropy-cost", self._cost)
213 |             self.mse_summary_test = tf.summary.scalar("test-cross-entropy-cost", self._cost)
214 | 
215 |     def init_dnn_type(self):
216 |         # 输入数据
217 |         self._input_data = tf.placeholder(tf.float32, [None, self.len_features],
218 |                                           name="input_data")
219 |         self._early_stop = tf.placeholder(tf.int32, shape=[None], name="early-stop")
220 |         self._targets = tf.placeholder(tf.int32, [None], name="label")
221 | 
222 |         with tf.name_scope("init_weights") as scope:
223 |             weights,biases = self.init_mutil_dnn_weights(self.len_features,self.hidden_size,self.num_classes,self.num_layers)
224 | 
225 |         with tf.name_scope("mlp") as scope:
226 | 
227 |             self._predictions = self.mlp(self._input_data,weights,biases,self.keep_prob)
228 | 
229 |         with tf.name_scope("cost") as scope:
230 | 
231 |             self._onehot_labels = tf.one_hot(self._targets, depth=self.num_classes)
232 | 
233 |             self._loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(labels=self._onehot_labels,logits=self._predictions))
234 |             self._cross_entropy = tf.reduce_sum(self._loss)
235 |             self._cost = tf.reduce_mean(self._loss)
236 | 
237 |             self.add_l2_regulation()
238 | 
239 |         with tf.name_scope("accuracy") as scope:
240 |             self._prob_predictions = tf.nn.softmax(self._predictions)
241 |             # 获得每个数据最大的索引
242 |             self._digit_predictions = tf.argmax(self._prob_predictions, axis=1, output_type=tf.int32)
243 | 
244 |             self._correct_prediction = tf.equal(self._targets, self._digit_predictions)
245 |             self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction,tf.float32))
246 |             #self._accuracy = tf.metrics.accuracy( self._valid_target,self._digit_predictions)[1]
247 | 
248 |         with tf.name_scope("confusion-matrix") as scope:
249 |             self._confusion_matrix = tf.confusion_matrix(self._targets, self._digit_predictions,self.num_classes)
250 | 
251 |         with tf.name_scope("optimization") as scope:
252 |             self._train_op = tf.train.AdamOptimizer(self._learning_rate).minimize(self._cost,global_step=self.current_step)
253 | 
254 |     def init_dnn_type_with_maxout(self):
255 |         # 输入数据
256 |         self._input_data = tf.placeholder(tf.float32, [None, self.len_features],
257 |                                           name="input_data")
258 |         self._early_stop = tf.placeholder(tf.int32, shape=[None], name="early-stop")
259 |         self._targets = tf.placeholder(tf.int32, [None], name="label")
260 | 
261 |         with tf.name_scope("init_hidden_weights") as scope:
262 |             weights,biases = self.init_mutil_dnn_weights(self.len_features,self.hidden_size,self.hidden_size,self.num_layers -1)
263 | 
264 |         with tf.name_scope("hidden_mlp") as scope:
265 | 
266 |             self._hidden_outputs = self.mlp(self._input_data,weights,biases,self.keep_prob)
267 | 
268 |         with tf.name_scope("maxout_layer") as scope:
269 |             self._maxout_output = self.maxout(self._hidden_outputs,self.maxOut_numUnits,self.hidden_size,0,scope)
270 | 
271 | 
272 |         with tf.name_scope("softmax_layer") as scope:
273 |             self._softmax_w = tf.get_variable("softmax_w", [self.hidden_size, self.num_classes])
274 | 
275 |             self._softmax_b = tf.get_variable("softmax_b", [self.num_classes], initializer=self.bias_initializer)
276 | 
277 |             self._predictions = tf.matmul(self._maxout_output,self._softmax_w) + self._softmax_b
278 | 
279 |             self._prob_predictions = tf.nn.softmax(self._predictions)
280 | 
281 |         with tf.name_scope("cost") as scope:
282 | 
283 |             self._onehot_labels = tf.one_hot(self._targets, depth=self.num_classes)
284 | 
285 |             self._loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=self._onehot_labels,logits=self._predictions)
286 | 
287 |             self._cross_entropy = tf.reduce_sum(self._loss)
288 | 
289 |             self._cost = tf.reduce_mean(self._loss)
290 | 
291 |             self.add_l2_regulation()
292 | 
293 |         with tf.name_scope("accuracy") as scope:
294 |             # 获得每个数据最大的索引
295 |             self._digit_predictions = tf.argmax(self._prob_predictions, axis=1, output_type=tf.int32)
296 | 
297 |             self._correct_prediction = tf.equal(self._targets, self._digit_predictions)
298 |             self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction,tf.float32))
299 |             #self._accuracy = tf.metrics.accuracy( self._valid_target,self._digit_predictions)[1]
300 | 
301 |         with tf.name_scope("confusion-matrix") as scope:
302 |             self._confusion_matrix = tf.confusion_matrix(self._targets, self._digit_predictions,self.num_classes)
303 | 
304 |         with tf.name_scope("optimization") as scope:
305 |             self._train_op = tf.train.AdamOptimizer(self._learning_rate).minimize(self._cost,global_step=self.current_step)
306 | 
307 | 
308 |     def init_cnn_type(self):
309 |         pass
310 | 
311 |     def init_mutil_dnn_weights(self,ils, hls, ols,hl_num):
312 |         weights, bias = {}, {}
313 |         stddev = 0.1
314 |         for i in range(hl_num + 1):
315 |             fan_in = ils if i == 0 else hls
316 |             fan_out = ols if i == hl_num else hls
317 |             weights[i] = tf.get_variable("weight_" + str(i),shape=[fan_in, fan_out])
318 |             bias[i] = tf.get_variable("bias_"+str(i),shape = [fan_out])
319 |         return weights, bias
320 | 
321 |     def mlp(self,_x, _w, _b, _keep_prob):
322 |         layers = {}
323 |         for i in range(len(_w)):
324 |             if i == 0:
325 |                 if self.use_dropout:
326 |                     layers[i] = tf.nn.dropout(self.activation(tf.add(tf.matmul(_x, _w[i]), _b[i])), _keep_prob)
327 |                 else:
328 |                     layers[i] = self.activation(tf.add(tf.matmul(_x, _w[i]), _b[i]))
329 |             elif i < len(_w) - 1:
330 |                 if self.use_dropout:
331 |                     layers[i] = tf.nn.dropout(self.activation(tf.add(tf.matmul(layers[i - 1], _w[i]), _b[i])), _keep_prob)
332 |                 else:
333 |                     layers[i] = self.activation(tf.add(tf.matmul(layers[i - 1], _w[i]), _b[i]))
334 |             else:
335 |                 layers[i] = tf.add(tf.matmul(layers[i - 1], _w[i]), _b[i])
336 |         return layers[len(_w) - 1]
337 | 
338 |     def get_embeded_vec(self):
339 | 
340 |         self._embeding_weights = tf.get_variable(name="embeding",shape=[self.len_features,self.embeded_dims],dtype=tf.float32)
341 | 
342 |         embed_input = tf.reshape(self._input_data,[self.exp_seq_len*self.batch_size,self.len_features])
343 | 
344 |         #embeding_bias = tf.get_variable(name="embeding_bias",shape=[self.embeded_dims],dtype=tf.float32,initializer=self.bias_initializer)
345 | 
346 |         embed_result = tf.matmul(embed_input,self.embeding_weights)# + embeding_bias
347 | 
348 |         self._embeded_result = tf.reshape(embed_result,[self.exp_seq_len,self.batch_size,self.embeded_dims])
349 | 
350 |     def get_mutil_rnn_cell(self):
351 |         if self.use_dropout:
352 |             if self.rnn_type == RNNType.GRU:
353 |                 cell = tf_ct.rnn.MultiRNNCell(
354 |                     [tf_ct.rnn.DropoutWrapper(GRUCell(self.hidden_size, self.maxOut_numUnits, activation=self.activation),self.keep_prob,self.keep_prob,self.keep_prob) for _ in range(self.num_layers)])
355 |                 return cell
356 |             elif self.rnn_type == RNNType.GRU_b:
357 |                 cell_fw = tf_ct.rnn.MultiRNNCell(
358 |                     [tf_ct.rnn.DropoutWrapper(GRUCell(self.hidden_size, self.maxOut_numUnits, activation=self.activation),self.keep_prob,self.keep_prob,self.keep_prob) for _ in range(self.num_layers)])
359 |                 cell_bw = tf_ct.rnn.MultiRNNCell(
360 |                     [tf_ct.rnn.DropoutWrapper(GRUCell(self.hidden_size, self.maxOut_numUnits, activation=self.activation),self.keep_prob,self.keep_prob,self.keep_prob) for _ in range(self.num_layers)])
361 |                 return (cell_fw,cell_bw)
362 |             elif self.rnn_type == RNNType.LSTM:
363 |                 cell = tf_ct.rnn.MultiRNNCell(
364 |                     [tf_ct.rnn.DropoutWrapper(BasicLSTMCell(self.hidden_size,activation=self.activation),self.keep_prob,self.keep_prob,self.keep_prob) for _ in range(self.num_layers)])
365 |                 return cell
366 |             elif self.rnn_type == RNNType.LSTM_b:
367 |                 cell_fw = tf_ct.rnn.MultiRNNCell(
368 |                     [tf_ct.rnn.DropoutWrapper(BasicLSTMCell(self.hidden_size,activation=self.activation),self.keep_prob,self.keep_prob,self.keep_prob) for _ in range(self.num_layers)])
369 |                 cell_bw = tf_ct.rnn.MultiRNNCell(
370 |                     [tf_ct.rnn.DropoutWrapper(BasicLSTMCell(self.hidden_size,activation=self.activation),self.keep_prob,self.keep_prob,self.keep_prob) for _ in range(self.num_layers)])
371 |                 return (cell_fw,cell_bw)
372 |             elif self.rnn_type == RNNType.NORM_GRU:
373 |                 cell = tf_ct.rnn.MultiRNNCell(
374 |                     [tf_ct.rnn.DropoutWrapper(
375 |                         BasicGRUCell(self.hidden_size, activation=self.activation), self.keep_prob,
376 |                         self.keep_prob, self.keep_prob) for _ in range(self.num_layers)])
377 |                 return cell
378 |             elif self.rnn_type == RNNType.NORM_GRU_b:
379 |                 cell_fw = tf_ct.rnn.MultiRNNCell(
380 |                     [tf_ct.rnn.DropoutWrapper(BasicGRUCell(self.hidden_size, activation=self.activation),
381 |                                               self.keep_prob, self.keep_prob, self.keep_prob) for _ in
382 |                      range(self.num_layers)])
383 |                 cell_bw = tf_ct.rnn.MultiRNNCell(
384 |                     [tf_ct.rnn.DropoutWrapper(BasicGRUCell(self.hidden_size, activation=self.activation),
385 |                                               self.keep_prob, self.keep_prob, self.keep_prob) for _ in
386 |                      range(self.num_layers)])
387 |                 return (cell_fw, cell_bw)
388 |         else:
389 | 
390 |             if self.rnn_type == RNNType.GRU:
391 |                 cell = tf_ct.rnn.MultiRNNCell(
392 |                     [GRUCell(self.hidden_size, self.maxOut_numUnits, activation=self.activation) for _ in range(self.num_layers)])
393 |                 return cell
394 |             elif self.rnn_type == RNNType.GRU_b:
395 |                 cell_fw = tf_ct.rnn.MultiRNNCell(
396 |                     [GRUCell(self.hidden_size, self.maxOut_numUnits, activation=self.activation) for _ in range(self.num_layers)])
397 |                 cell_bw = tf_ct.rnn.MultiRNNCell(
398 |                     [GRUCell(self.hidden_size, self.maxOut_numUnits, activation=self.activation) for _ in range(self.num_layers)])
399 |                 return (cell_fw,cell_bw)
400 |             elif self.rnn_type == RNNType.LSTM:
401 |                 cell = tf_ct.rnn.MultiRNNCell(
402 |                     [BasicLSTMCell(self.hidden_size,activation=self.activation) for _ in range(self.num_layers)])
403 |                 return cell
404 |             elif self.rnn_type == RNNType.LSTM_b:
405 |                 cell_fw = tf_ct.rnn.MultiRNNCell(
406 |                     [BasicLSTMCell(self.hidden_size,activation=self.activation) for _ in range(self.num_layers)])
407 |                 cell_bw = tf_ct.rnn.MultiRNNCell(
408 |                     [BasicLSTMCell(self.hidden_size,activation=self.activation) for _ in range(self.num_layers)])
409 |                 return (cell_fw,cell_bw)
410 |             elif self.rnn_type == RNNType.NORM_GRU:
411 |                 cell = tf_ct.rnn.MultiRNNCell(
412 |                     [BasicGRUCell(self.hidden_size, activation=self.activation) for _ in range(self.num_layers)])
413 |                 return cell
414 |             elif self.rnn_type == RNNType.NORM_GRU_b:
415 |                 cell_fw = tf_ct.rnn.MultiRNNCell(
416 |                     [BasicGRUCell(self.hidden_size, activation=self.activation) for _ in range(self.num_layers)])
417 |                 cell_bw = tf_ct.rnn.MultiRNNCell(
418 |                     [BasicGRUCell(self.hidden_size, activation=self.activation) for _ in range(self.num_layers)])
419 |                 return (cell_fw, cell_bw)
420 | 
421 |     #初始化cell的状态
422 |     def set_initial_states(self, cell):
423 |         if self.rnn_type == RNNType.GRU or self.rnn_type == RNNType.LSTM or self.rnn_type == RNNType.NORM_GRU:
424 |             self._initial_state = cell.zero_state(self.batch_size,tf.float32)
425 |         elif self.rnn_type == RNNType.GRU_b or self.rnn_type == RNNType.LSTM_b or self.rnn_type == RNNType.NORM_GRU_b:
426 |             (cell_fw, cell_bw) = cell
427 |             self._initial_state_fw = cell_fw.zero_state(self.batch_size, tf.float32)
428 |             self._initial_state_bw = cell_bw.zero_state(self.batch_size, tf.float32)
429 | 
430 |     def get_rnn_outputs(self,cell):
431 |         if self.rnn_type == RNNType.LSTM or self.rnn_type == RNNType.GRU or self.rnn_type == RNNType.NORM_GRU:
432 |             self._outputs,self._state = tf.nn.dynamic_rnn(cell,self._input_data,sequence_length=self._early_stop,
433 |                                               initial_state=self.initial_state,
434 |                                               time_major=True,dtype=tf.float32)
435 |             if self.net_type == NetType.RNN_NVN:
436 |                 outputs = tf.transpose(self._outputs, perm=[1, 0, 2])
437 | 
438 |                 self._output = tf.reshape(tf.concat(axis=0, values=outputs),
439 |                                          [self.exp_seq_len * self.batch_size, self.hidden_size])
440 | 
441 |                 #self._valid_output = self.get_valid_sequence(self._output, self.hidden_size)
442 | 
443 |             elif self.net_type == NetType.RNN_NV1:
444 |                 if self.rnn_type == RNNType.LSTM :
445 |                     state_h = self._state[-1]
446 |                     self._output = state_h[-1]
447 |                 elif self.rnn_type == RNNType.GRU or self.rnn_type == RNNType.NORM_GRU:
448 |                     self._output = self._state[-1]
449 | 
450 | 
451 |         elif self.rnn_type == RNNType.LSTM_b or self.rnn_type == RNNType.GRU_b or self.rnn_type == RNNType.NORM_GRU_b:
452 |             (cell_fw, cell_bw) = cell
453 |             self._outputs, self._state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, self._input_data,
454 |                                                                        sequence_length=self._early_stop,
455 |                                                                        initial_state_fw=self._initial_state_fw,
456 |                                                                        initial_state_bw=self._initial_state_bw,
457 |                                                                        time_major=True, dtype=tf.float32)
458 | 
459 |             if self.net_type == NetType.RNN_NVN:
460 | 
461 |                 output_fw, output_bw = self._outputs
462 |                 output_fw = tf.transpose(output_fw, perm=[1, 0, 2])
463 |                 output_bw = tf.transpose(output_bw, perm=[1, 0, 2])
464 |                 outputs = tf.concat(axis=2, values=[output_fw, output_bw])
465 |                 # Concatenates tensors along one dimension.
466 |                 # this will flatten the dimension of the matrix to [batch_size * num_steps, num_hidden_nodes]
467 |                 # However, this is not the true output sequence, since padding added a number of empty elements
468 |                 # Extra padding elements should be removed from the output sequence.
469 |                 # Here first concatenate all vessels into one long sequence, including paddings
470 |                 self._output = tf.reshape(tf.concat(axis=0, values=outputs),
471 |                                          [self.exp_seq_len * self.batch_size, self.hidden_size * 2])
472 |                 # Remove padding here
473 |                 #self._valid_output = self.get_valid_sequence(self._output, self.hidden_size * 2)
474 |             elif self.net_type == NetType.RNN_NV1:
475 |                 state_fw, state_bw = self._state
476 |                 if self.rnn_type == RNNType.LSTM_b :
477 |                     state_fw_h = state_fw[-1]
478 |                     state_bw_h = state_bw[-1]
479 |                     self._output = tf.concat(axis=1, values=[state_fw_h[-1], state_bw_h[-1]])
480 |                 elif self.rnn_type == RNNType.GRU_b or self.rnn_type==RNNType.NORM_GRU_b:
481 |                     self._output = tf.concat(axis=1,values=[state_fw[-1],state_bw[-1]])
482 | 
483 |     def get_softmax_layer_output(self):
484 |         if self.net_type == NetType.DNN:
485 |             self._softmax_w = tf.get_variable("softmax_w", [self.hidden_size, self.num_classes])
486 |         elif self.net_type == NetType.RNN_NV1 or self.net_type == NetType.RNN_NVN:
487 | 
488 |             if self.rnn_type == RNNType.GRU or self.rnn_type == RNNType.LSTM or self.rnn_type == RNNType.NORM_GRU:
489 |                 self._softmax_w = tf.get_variable("softmax_w", [self.hidden_size , self.num_classes])
490 |             elif self.rnn_type == RNNType.GRU_b or self.rnn_type == RNNType.LSTM_b or self.rnn_type == RNNType.NORM_GRU_b:
491 | 
492 |                 self._softmax_w = tf.get_variable("softmax_w", [self.hidden_size * 2, self.num_classes])
493 | 
494 |         # softmax层的bias
495 |         self._softmax_b = tf.get_variable("softmax_b", [self.num_classes], initializer=self.bias_initializer)
496 | 
497 |         self._predictions = tf.matmul(self._output, self._softmax_w) + self._softmax_b
498 |         # 概率
499 |         self._prob_predictions = tf.nn.softmax(self._predictions)
500 |         # 获得每个数据最大的索引
501 |         self._digit_predictions = tf.argmax(self._prob_predictions, axis=1,output_type=tf.int32)
502 | 
503 |     def add_l2_regulation(self):
504 | 
505 |         # 计算l2cost
506 |         tv = tf.trainable_variables()
507 | 
508 |         # #tf_ct.layers.l2_regularizer()
509 |         #
510 |         self._l2_regularization_cost = self.l2_preparam*tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
511 |         # #总cost为 基础cost + l2cost
512 |         self._cost = self._cost+self._l2_regularization_cost
513 | 
514 |     def maxout(self,input,num_units,output_size,ini_value,scope):
515 |         outputs = None
516 |         for i in range(num_units):
517 |             with tf.variable_scope(str(i)):
518 |                 y = self.activation(linear._linear(input, output_size, True, ini_value, scope=scope))
519 |                 if outputs is None:
520 |                     outputs = y
521 |                 else:
522 |                     outputs = tf.maximum(outputs, y)
523 |         return outputs
524 | 
525 |     def get_valid_sequence(self, seq, feature_size):
526 |         """remove padding from sequences"""
527 |         if self.is_training:
528 |             stop = self._early_stop
529 |         elif self.is_validation:
530 |             stop = self._early_stop
531 |         else:
532 |             stop = self._early_stop
533 |         valid_sequence_list = []
534 |         for i in range(self.batch_size):
535 |             if len(tf.Tensor.get_shape(seq)) == 2:
536 |                 sub_seq = tf.slice(seq, [self.exp_seq_len * i, 0], [stop[i], feature_size])
537 |             else:
538 |                 sub_seq = tf.slice(seq, [self.exp_seq_len * i], [stop[i]])
539 |             valid_sequence_list.append(sub_seq)
540 |         valid_sequence = tf.concat(axis=0, values=valid_sequence_list)
541 |         return valid_sequence
542 | 
543 |     def getTensorShape(this, tensor):
544 |         return tf.Tensor.get_shape(tensor)
545 | 
546 |     @property
547 |     def embeding_weights(self):
548 |         return self._embeding_weights
549 | 
550 |     @property
551 |     def embeded_result(self):
552 |         return self._embeded_result
553 | 
554 |     @property
555 |     def digit_predictions(self):
556 |         return self._digit_predictions
557 | 
558 |     @property
559 |     def confusion_matrix(self):
560 |         return self._confusion_matrix
561 | 
562 |     @property
563 |     def prob_predictions(self):
564 |         return self._prob_predictions
565 | 
566 |     @property
567 |     def input_data(self):
568 |         return self._input_data
569 | 
570 |     @property
571 |     def weight_sequence_loss(self):
572 |         return self._weight_sequence_loss
573 | 
574 |     @property
575 |     def targets(self):
576 |         return self._targets
577 | 
578 |     @property
579 |     def predictions(self):
580 |         return self._predictions
581 | 
582 |     @property
583 |     def early_stop(self):
584 |         return self._early_stop
585 | 
586 |     @property
587 |     def initial_state(self):
588 |         if self.rnn_type == RNNType.GRU or self.rnn_type == RNNType.LSTM:
589 |             return self._initial_state
590 |         elif self.rnn_type == RNNType.GRU_b or self.rnn_type == RNNType.LSTM_b:
591 |             return (self._initial_state_fw ,self._initial_state_bw)
592 | 
593 |     @property
594 |     def cost(self):
595 |         return self._cost
596 | 
597 |     @property
598 |     def accuracy(self):
599 |         return self._accuracy
600 | 
601 |     @property
602 |     def train_op(self):
603 |         return self._train_op
604 | 
605 |     @property
606 |     def final_state(self):
607 |         return self._final_state
608 | 
609 | 


--------------------------------------------------------------------------------
/trajectoryNet.py:
--------------------------------------------------------------------------------
   1 | import numpy as np
   2 | import tensorflow as tf
   3 | import pandas as pd
   4 | import config
   5 | from config import TrainingConfig
   6 | from model import Model
   7 | from config import NetType
   8 | from config import RNNType
   9 | from tensorflow.python.ops.math_ops import tanh
  10 | from log import Log
  11 | from myThread import MyThread
  12 | import random
  13 | from tensorflow.contrib import layers
  14 | from data_funs import Data
  15 | import util
  16 | import param
  17 | from param import DirName
  18 | from sklearn.metrics import confusion_matrix
  19 | from util import get_net_type
  20 | from util import get_rnn_type
  21 | 
  22 | 
  23 | conf = config.Config("data/config.json")
  24 | 
  25 | if conf.use_tfrecord:
  26 |     log_path,data_path,LOGGER = util.init_environment(get_net_type(conf.net_type),get_rnn_type(conf.rnn_type))
  27 |     task = conf.task
  28 |     tfrecords_data_path = conf.tfrecord_path
  29 |     len_features = conf.num_features * conf.discretization_width
  30 | else:
  31 |     log_path = "./logdir/shiyan/rnn_nvn/"
  32 |     data_path = "./data/tfrecord9_data/rnn_nvn/"
  33 |     task = conf.task
  34 |     net_name = str(NetType(conf.net_type)).split(".")[-1]
  35 |     nn_name = str(RNNType(conf.rnn_type)).split(".")[-1]
  36 |     LOGGER = Log(log_path, "_" + net_name + "_" + nn_name)
  37 |     len_features = conf.num_features * conf.discretization_width
  38 | 
  39 | #从npy加载数据
  40 | def loadData_rnn_nvn():
  41 |     x_file = 'data/x_mobility.npy'
  42 |     y_file = 'data/y_mobility.npy'
  43 |     mmsi_file = 'data/mmsi_mobility.npy'
  44 | 
  45 | 
  46 |     #加载数据
  47 |     print("加载数据中......")
  48 |     x = np.load(x_file)
  49 |     y = np.load(y_file)
  50 |     mmsi = np.load(mmsi_file)
  51 |     print("加载完毕......")
  52 | 
  53 |     #x中数据格式如下
  54 |     # shape = [总序列个数，序列长度，特征数]
  55 |     #y中数据格式如下
  56 |     #shape= [总序列个数，序列长度]
  57 |     #mmsi数据格式如下
  58 |     #shape = [2，总序列个数]
  59 |     #mmsi[0] 中储存着用户编号
  60 |     #mmsi[1] 中储存着有效序列长度（因为是padding之后的切割，所以用户的一个序列会出现不满足序列长度的数据，故记录有效的序列长度，
  61 |     (x,y,mmsi) =  Data.reorganizeSeq(x, y, mmsi, conf.exp_seq_len)
  62 | 
  63 |     #序列的总个数
  64 |     num_examples = x.shape[0]
  65 |     #用户编号的不重复列表
  66 |     unique_mmsi = np.unique(mmsi[0])
  67 |     #分类个数
  68 |     num_classes = conf.num_classes
  69 | 
  70 |     #test_and_val = random.sample(range(23),6)
  71 | 
  72 |     #测试集
  73 |     #test_vessel = test_and_val[0:5]
  74 |     test_vessel = conf.test_id
  75 |     #验证集
  76 |     #val_vessel = test_and_val[5:6]
  77 |     val_vessel = conf.val_id
  78 | 
  79 |     #分割数据，将数据分割成 训练集，测试集，验证集，返回这些数据集的索引
  80 |     #test_index 的格式
  81 |     #test_vessel = [0,1] 即前两名用户的索引 则test_index = [0,1,2,3,4,5,6......]
  82 |     (train_index, test_index, valid_index) = Data.splitDataset(mmsi[0], test_vessel, val_vessel)
  83 | 
  84 |     #提前停止也即有效序列
  85 |     early_stop = mmsi[1]
  86 |     x = x.transpose([1, 0, 2])
  87 | 
  88 |     np.random.shuffle(train_index)
  89 | 
  90 | 
  91 |     # X_train shape = [序列长度，训练集序列总个数，特征]
  92 |     X_train = x[:, train_index, :]
  93 |     y_train = y[train_index, :]
  94 |     stop_train = early_stop[train_index]
  95 | 
  96 |     np.random.shuffle(test_index)
  97 | 
  98 |     X_test = x[:, test_index, :]
  99 |     y_test = y[test_index, :]
 100 |     stop_test = early_stop[test_index]
 101 | 
 102 |     np.random.shuffle(valid_index)
 103 | 
 104 |     X_valid = x[:, valid_index, :]
 105 |     y_valid = y[valid_index, :]
 106 |     stop_valid = early_stop[valid_index]
 107 | 
 108 |     train_data = (X_train, y_train, stop_train)
 109 |     test_data = (X_test, y_test, stop_test)
 110 |     val_data = (X_valid, y_valid, stop_valid)
 111 | 
 112 |     #获得训练集，测试集，验证集的序列长度数组
 113 |     #eg train_seq_len  value = [250,250,250,55,250,250,250......]
 114 |     train_seq_len = mmsi[1][train_index]
 115 |     test_seq_len = mmsi[1][test_index]
 116 |     valid_seq_len = mmsi[1][valid_index]
 117 | 
 118 |     train_config = config.TrainingConfig(True, False,False, conf.batch_size,len_features=x.shape[2],rnn_type=RNNType.GRU_b)
 119 |     train_config.train_seq_len = train_seq_len
 120 | 
 121 |     test_config = config.TrainingConfig(False,False,True,len(test_index),len_features=x.shape[2],rnn_type=RNNType.GRU_b)
 122 |     test_config.test_seq_len = test_seq_len
 123 | 
 124 |     valid_config = config.TrainingConfig(False, True, False, len(valid_index),len_features=x.shape[2],rnn_type=RNNType.GRU_b)
 125 |     valid_config.val_seq_len = valid_seq_len
 126 | 
 127 |     return (train_data,test_data,val_data,train_config,test_config,valid_config)
 128 | 
 129 | #加载rnn_nv1数据从npy文件
 130 | def load_data_rnn_nv1(classes):
 131 |     # 分训练集与测试集 验证集 8：1：1
 132 |     train_data_all = None
 133 |     train_label_all = None
 134 |     train_early_all = None
 135 |     valid_data_all = None
 136 |     valid_label_all = None
 137 |     valid_early_all = None
 138 |     test_data_all = None
 139 |     test_label_all = None
 140 |     test_early_all = None
 141 |     features_arr_list = []
 142 |     index_arr_list = []
 143 |     label_arr_list = []
 144 |     data_file_name_exp = data_path +"transportation_mode"
 145 |     for i in range(classes):
 146 |         print("加载" + str(i))
 147 |         # data_file  = data_file_name +str(i) +".npy"
 148 |         index_df = pd.DataFrame(pd.read_csv(data_file_name_exp +"_"+ str(i) + "_seg_index.csv"))
 149 |         features_arr = np.load(data_file_name_exp + str(i) + ".npy")
 150 |         print(features_arr.shape)
 151 |         features_arr = features_arr[:, 0:len_features]
 152 |         index_arr = np.array(index_df.iloc[:, [1, 2]].T)
 153 |         # index shape = [2,总个数]
 154 |         # 第一维是第几段轨迹 第二维是在固定长度为exp_seq_len中的实际长度
 155 |         # data shape =[seq_nums,exp_seq_len,feature_len]   切出相等的数据长度 不足的padding
 156 |         (data, index_arr) = Data.slice_seq(features_arr, index_arr, conf.exp_seq_len)
 157 |         #切割后删除features_arr index
 158 |         del features_arr
 159 |         del index_df
 160 |         label_arr = np.zeros(shape=[index_arr.shape[1]], dtype=np.int32)
 161 |         label_arr[:] = i
 162 |         # features_arr_list.append(data)
 163 |         # index_arr_list.append(index)
 164 |         # label_arr_list.append(label)
 165 |         #划分训练集，验证集，测试集
 166 |         print("划分训练集，验证集，测试集   " + str(i))
 167 |         seq_nums = index_arr.shape[1]
 168 |         # 控制变量
 169 |         np.random.seed(2)
 170 |         index_perm = np.random.permutation(range(seq_nums))
 171 |         train_count = int(np.floor(seq_nums * 0.8))
 172 |         valid_count = int(np.floor(seq_nums * 0.9))
 173 |         test_count = seq_nums
 174 |         train_index = index_perm[0:train_count]
 175 |         valid_index = index_perm[train_count + 1:valid_count]
 176 |         test_index = index_perm[valid_count + 1:seq_nums]
 177 | 
 178 |         # train_set valid_set test_set
 179 |         train_data = data[train_index, :, :]
 180 |         train_label = label_arr[train_index]
 181 |         train_early = index_arr[1, train_index]
 182 | 
 183 |         valid_data = data[valid_index, :, :]
 184 |         valid_label = label_arr[valid_index]
 185 |         valid_early = index_arr[1, valid_index]
 186 | 
 187 |         test_data = data[test_index, :, :]
 188 |         test_label = label_arr[test_index]
 189 |         test_early = index_arr[1, test_index]
 190 | 
 191 |         #删除读取到的data.
 192 |         del data
 193 |         del label_arr
 194 |         del index_arr
 195 | 
 196 |         if train_data_all is None:
 197 |             train_data_all = train_data
 198 |             train_label_all = train_label
 199 |             train_early_all = train_early
 200 | 
 201 |             valid_data_all = valid_data
 202 |             valid_label_all = valid_label
 203 |             valid_early_all = valid_early
 204 | 
 205 |             test_data_all = test_data
 206 |             test_label_all = test_label
 207 |             test_early_all = test_early
 208 |         else:
 209 |             train_data_all = np.concatenate((train_data_all, train_data), axis=0)
 210 |             train_label_all = np.concatenate((train_label_all, train_label), axis=0)
 211 |             train_early_all = np.concatenate((train_early_all, train_early), axis=0)
 212 | 
 213 |             valid_data_all = np.concatenate((valid_data_all, valid_data), axis=0)
 214 |             valid_label_all = np.concatenate((valid_label_all, valid_label), axis=0)
 215 |             valid_early_all = np.concatenate((valid_early_all, valid_early), axis=0)
 216 | 
 217 |             test_data_all = np.concatenate((test_data_all, test_data), axis=0)
 218 |             test_label_all = np.concatenate((test_label_all, test_label), axis=0)
 219 |             test_early_all = np.concatenate((test_early_all, test_early), axis=0)
 220 |     #打乱数据
 221 |     np.random.seed(1)
 222 |     train_perm = np.random.permutation(range(train_early_all.shape[0]))
 223 |     np.random.seed(1)
 224 |     valid_perm = np.random.permutation(range(valid_early_all.shape[0]))
 225 |     np.random.seed(1)
 226 |     test_perm = np.random.permutation(range(test_early_all.shape[0]))
 227 | 
 228 |     #shape=[序列长度，总个数，特征长度]   TimeMajor
 229 |     train_data_all = np.transpose(train_data_all, [1, 0, 2])
 230 |     valid_data_all = np.transpose(valid_data_all, [1, 0, 2])
 231 |     test_data_all = np.transpose(test_data_all, [1, 0, 2])
 232 | 
 233 |     # train_data_all = train_data_all[:, train_perm, :]
 234 |     # train_label_all = train_label_all[train_perm]
 235 |     # train_early_all = train_early_all[train_perm]
 236 | 
 237 |     valid_data_all = valid_data_all[:, valid_perm, :]
 238 |     valid_label_all = valid_label_all[valid_perm]
 239 |     valid_early_all = valid_early_all[valid_perm]
 240 | 
 241 |     test_data_all = test_data_all[:, test_perm, :]
 242 |     test_label_all = test_label_all[test_perm]
 243 |     test_early_all = test_early_all[test_perm]
 244 | 
 245 |     train_set = (train_data_all, train_label_all, train_early_all)
 246 |     valid_set = (valid_data_all, valid_label_all, valid_early_all)
 247 |     test_set = (test_data_all, test_label_all, test_early_all)
 248 |     return train_set,valid_set,test_set
 249 | 
 250 | def load_data_rnn_nv1_other(classes):
 251 |     # 分训练集与测试集 验证集 8：1：1
 252 |     train_data_all = None
 253 |     train_label_all = None
 254 |     train_early_all = None
 255 |     valid_data_all = None
 256 |     valid_label_all = None
 257 |     valid_early_all = None
 258 |     test_data_all = None
 259 |     test_label_all = None
 260 |     test_early_all = None
 261 |     data_file_name_exp = data_path + "transportation_mode"
 262 |     for i in range(classes):
 263 |         data = np.load(data_path + "slice_label" + str(i) + "_" + str(conf.exp_seq_len) + ".npy")
 264 |         index_arr = np.load(data_path + "slice_index" + str(i) + ".npy")
 265 | 
 266 |         # 切割后删除features_arr index
 267 |         label_arr = np.zeros(shape=[index_arr.shape[1]], dtype=np.int32)
 268 |         label_arr[:] = i
 269 |         # features_arr_list.append(data)
 270 |         # index_arr_list.append(index)
 271 |         # label_arr_list.append(label)
 272 |         # 划分训练集，验证集，测试集
 273 |         print("划分训练集，验证集，测试集   " + str(i))
 274 |         seq_nums = index_arr.shape[1]
 275 |         # 控制变量
 276 |         np.random.seed(2)
 277 |         index_perm = np.random.permutation(range(seq_nums))
 278 |         train_count = int(np.floor(seq_nums * 0.8))
 279 |         valid_count = int(np.floor(seq_nums * 0.9))
 280 |         test_count = seq_nums
 281 |         train_index = index_perm[0:train_count]
 282 |         valid_index = index_perm[train_count + 1:valid_count]
 283 |         test_index = index_perm[valid_count + 1:seq_nums]
 284 | 
 285 |         # train_set valid_set test_set
 286 |         train_data = data[train_index, :, :]
 287 |         train_label = label_arr[train_index]
 288 |         train_early = index_arr[1, train_index]
 289 | 
 290 |         valid_data = data[valid_index, :, :]
 291 |         valid_label = label_arr[valid_index]
 292 |         valid_early = index_arr[1, valid_index]
 293 | 
 294 |         test_data = data[test_index, :, :]
 295 |         test_label = label_arr[test_index]
 296 |         test_early = index_arr[1, test_index]
 297 | 
 298 |         # 删除读取到的data.
 299 |         del data
 300 |         del label_arr
 301 |         del index_arr
 302 | 
 303 |         if train_data_all is None:
 304 |             train_data_all = train_data
 305 |             train_label_all = train_label
 306 |             train_early_all = train_early
 307 | 
 308 |             valid_data_all = valid_data
 309 |             valid_label_all = valid_label
 310 |             valid_early_all = valid_early
 311 | 
 312 |             test_data_all = test_data
 313 |             test_label_all = test_label
 314 |             test_early_all = test_early
 315 |         else:
 316 |             train_data_all = np.concatenate((train_data_all, train_data), axis=0)
 317 |             train_label_all = np.concatenate((train_label_all, train_label), axis=0)
 318 |             train_early_all = np.concatenate((train_early_all, train_early), axis=0)
 319 | 
 320 |             valid_data_all = np.concatenate((valid_data_all, valid_data), axis=0)
 321 |             valid_label_all = np.concatenate((valid_label_all, valid_label), axis=0)
 322 |             valid_early_all = np.concatenate((valid_early_all, valid_early), axis=0)
 323 | 
 324 |             test_data_all = np.concatenate((test_data_all, test_data), axis=0)
 325 |             test_label_all = np.concatenate((test_label_all, test_label), axis=0)
 326 |             test_early_all = np.concatenate((test_early_all, test_early), axis=0)
 327 |     # 打乱数据
 328 |     np.random.seed(1)
 329 |     train_perm = np.random.permutation(range(train_early_all.shape[0]))
 330 |     np.random.seed(1)
 331 |     valid_perm = np.random.permutation(range(valid_early_all.shape[0]))
 332 |     np.random.seed(1)
 333 |     test_perm = np.random.permutation(range(test_early_all.shape[0]))
 334 | 
 335 |     # shape=[序列长度，总个数，特征长度]   TimeMajor
 336 |     train_data_all = np.transpose(train_data_all, [1, 0, 2])
 337 |     valid_data_all = np.transpose(valid_data_all, [1, 0, 2])
 338 |     test_data_all = np.transpose(test_data_all, [1, 0, 2])
 339 | 
 340 |     # train_data_all = train_data_all[:, train_perm, :]
 341 |     # train_label_all = train_label_all[train_perm]
 342 |     # train_early_all = train_early_all[train_perm]
 343 | 
 344 |     valid_data_all = valid_data_all[:, valid_perm, :]
 345 |     valid_label_all = valid_label_all[valid_perm]
 346 |     valid_early_all = valid_early_all[valid_perm]
 347 | 
 348 |     test_data_all = test_data_all[:, test_perm, :]
 349 |     test_label_all = test_label_all[test_perm]
 350 |     test_early_all = test_early_all[test_perm]
 351 | 
 352 |     train_set = (train_data_all, train_label_all, train_early_all)
 353 |     valid_set = (valid_data_all, valid_label_all, valid_early_all)
 354 |     test_set = (test_data_all, test_label_all, test_early_all)
 355 |     return train_set, valid_set, test_set
 356 | 
 357 | #直接读取整个data npz  noTranspose
 358 | def load_data_rnn_nv1_quick(classes):
 359 |     data_dir = "G:/all_data/"
 360 |     train_data_set_name = data_dir + "train_data_set.npz"
 361 |     valid_data_set_name = data_dir + "valid_data_set.npz"
 362 |     test_data_set_name = data_dir + "test_data_set.npz"
 363 |     train_data_set = np.load(train_data_set_name)
 364 |     valid_data_set = np.load(valid_data_set_name)
 365 |     test_data_set = np.load(test_data_set_name)
 366 | 
 367 |     return train_data_set,valid_data_set,test_data_set
 368 | 
 369 | def evaluate_model(sess, minibatch):
 370 |     # test and validate model
 371 |     #if conf.test_mode:
 372 |     #    run_batch(sess, mtest, test_data, tf.no_op(), minibatch)
 373 | 
 374 |     result_train = run_batch(sess,train_model,train_data,tf.no_op(),minibatch)
 375 |     result_test = run_batch(sess,test_model,test_data,tf.no_op(),minibatch)
 376 |     result_valid = run_batch(sess,valid_model,valid_data,tf.no_op(),minibatch)
 377 | 
 378 |     #t_train = MyThread(run_batch, (sess, train_model, train_data, tf.no_op(), minibatch))
 379 |     #t_test = MyThread(run_batch, (sess, test_model, test_data, tf.no_op(), minibatch))
 380 |     #t_val = MyThread(run_batch, (sess, valid_model, valid_data, tf.no_op(), minibatch))
 381 | 
 382 |     #t_train.start()
 383 |     #t_test.start()
 384 |     #t_val.start()
 385 | 
 386 |     #t_train.join()
 387 |     #result_train = t_train.get_result()
 388 |     #t_test.join()
 389 |     #result_test = t_test.get_result()
 390 |     #t_val.join()
 391 |     #result_val = t_val.get_result()
 392 | 
 393 |     print("Train cost {0:0.3f}, Acc {1:0.3f}".format(
 394 |         result_train[0], result_train[1]))
 395 |     print("Valid cost {0:0.3f}, Acc {1:0.3f}".format(
 396 |         result_valid[0], result_valid[1]))
 397 |     print("Test  cost {0:0.3f}, Acc {1:0.3f}".format(
 398 |         result_test[0], result_test[1]))
 399 | 
 400 |     return result_train + result_test + result_valid
 401 | 
 402 | #
 403 | def evaluate_model_all(sess,epoch):
 404 |     result_train = run_batch_all(sess, train_model, train_data, tf.no_op(), epoch)
 405 |     result_valid = run_batch_all(sess, valid_model, valid_data, tf.no_op(), epoch)
 406 |     result_test = run_batch_all(sess, test_model, test_data, tf.no_op(), epoch)
 407 | 
 408 | 
 409 |     LOGGER.summary_log(result_train+result_valid+result_test,epoch)
 410 | 
 411 |     print("Train cost {0:0.3f}, Acc {1:0.3f}".format(
 412 |         result_train[0], result_train[1]))
 413 |     print("Valid cost {0:0.3f}, Acc {1:0.3f}".format(
 414 |         result_valid[0], result_valid[1]))
 415 |     print("Test  cost {0:0.3f}, Acc {1:0.3f}".format(
 416 |         result_test[0], result_test[1]))
 417 | 
 418 |     return result_train + result_test + result_valid
 419 | 
 420 | #npz文件方式
 421 | def evaluate_model_quick(sess,epoch):
 422 |     print("开始测试训练集")
 423 |     result_train = run_batch_quick(sess, train_model, train_data, tf.no_op(), epoch)
 424 |     print("开始测试验证集")
 425 |     result_valid = run_batch_quick(sess, valid_model, valid_data, tf.no_op(), epoch)
 426 |     print("开始测试测试集")
 427 |     result_test = run_batch_quick(sess, test_model, test_data, tf.no_op(), epoch)
 428 | 
 429 |     LOGGER.summary_log(result_train + result_valid + result_test, epoch)
 430 | 
 431 |     print("Train cost {0:0.3f}, Acc {1:0.3f}".format(
 432 |         result_train[0], result_train[1]))
 433 |     print("Valid cost {0:0.3f}, Acc {1:0.3f}".format(
 434 |         result_valid[0], result_valid[1]))
 435 |     print("Test  cost {0:0.3f}, Acc {1:0.3f}".format(
 436 |         result_test[0], result_test[1]))
 437 | 
 438 |     return result_train + result_test + result_valid
 439 | 
 440 | #队列版 未完成
 441 | def evaluate_from_tfrecords(iter):
 442 |     with tf.Session() as sess:
 443 |         sess.run(tf.global_variables_initializer())
 444 |         sess.run(tf.local_variables_initializer())
 445 |         coord = tf.train.Coordinator()
 446 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
 447 |         print(len(threads))
 448 |         train_cost, train_acc = run_batch_from_tfrecords(sess, coord, train_model, tf.no_op())
 449 | 
 450 |         coord.request_stop()
 451 |         coord.join(threads)
 452 | 
 453 |     with tf.Session() as sess:
 454 |         sess.run(tf.global_variables_initializer())
 455 |         sess.run(tf.local_variables_initializer())
 456 |         coord = tf.train.Coordinator()
 457 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
 458 | 
 459 |         valid_cost, valid_acc = run_batch_from_tfrecords(sess, coord, valid_model, tf.no_op())
 460 | 
 461 |         coord.request_stop()
 462 |         coord.join(threads)
 463 | 
 464 |     with tf.Session() as sess:
 465 |         sess.run(tf.global_variables_initializer())
 466 |         sess.run(tf.local_variables_initializer())
 467 |         coord = tf.train.Coordinator()
 468 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
 469 | 
 470 |         test_cost, test_acc = run_batch_from_tfrecords(sess, coord, test_model, tf.no_op())
 471 | 
 472 |         coord.request_stop()
 473 |         coord.join(threads)
 474 | 
 475 |     LOGGER.summary_log((train_cost, train_acc, valid_cost, valid_acc, test_cost, test_acc),iter)
 476 | 
 477 |     print("Train cost {0:0.3f}, Acc {1:0.3f}".format(
 478 |         train_cost, train_acc))
 479 |     print("Valid cost {0:0.3f}, Acc {1:0.3f}".format(
 480 |         valid_cost, valid_acc))
 481 |     print("Test  cost {0:0.3f}, Acc {1:0.3f}".format(
 482 |         test_cost, test_acc))
 483 | 
 484 | #dataset版
 485 | def evaluate_from_tfrecord_dataset(net_type,sess, model,next_element,eval_op,epoch):
 486 | 
 487 |     cost_list = []
 488 |     acc_list = []
 489 |     confus_list = []
 490 |     count = 0
 491 |     try:
 492 |         while True:
 493 |             input, early, label = sess.run(next_element)
 494 | 
 495 |             if net_type == NetType.RNN_NV1:
 496 | 
 497 |                 if input.shape[0] < conf.batch_size:
 498 |                     print(input.shape)
 499 |                     break
 500 |                 input = np.transpose(input, [1, 0, 2])
 501 |                 batch_size = input.shape[1]
 502 | 
 503 |                 cost, acc, confus_mat = sess.run(fetches=[model.cost, model.accuracy, model.confusion_matrix],
 504 |                                                  feed_dict={model.input_data: input,
 505 |                                                             model.early_stop: early,
 506 |                                                             model.targets: label})
 507 | 
 508 |             elif net_type == NetType.RNN_NVN:
 509 |                 if input.shape[0] < conf.batch_size:
 510 |                     print(input.shape)
 511 |                     break
 512 |                 new_label = np.zeros([conf.batch_size, conf.exp_seq_len], np.int32)
 513 |                 for batch in range(conf.batch_size):
 514 |                     new_label[batch, 0:early[batch]] = label[batch]
 515 |                     new_label[batch, early[batch]:] = 0
 516 |                 label = new_label
 517 |                 input = np.transpose(input, [1, 0, 2])
 518 |                 batch_size = input.shape[1]
 519 | 
 520 |                 weight_sequence_loss = np.zeros([conf.batch_size, conf.exp_seq_len], np.float32)
 521 |                 for k in range(conf.batch_size):
 522 |                     weight_sequence_loss[k, 0:early[k]] = 1
 523 | 
 524 |                 cost,digit_predictions = sess.run(fetches = [model.cost,model.digit_predictions],feed_dict={
 525 |                                                                 model.input_data:input,
 526 |                                                                 model.early_stop:early,
 527 |                                                                 model.weight_sequence_loss:weight_sequence_loss,
 528 |                                                                 model.targets:label
 529 |                                                             })
 530 | 
 531 | 
 532 |                 batch_acc_list = []
 533 |                 confus_mat_list = []
 534 |                 for k in range(conf.batch_size):
 535 |                     start = k*conf.exp_seq_len
 536 |                     end = k*conf.exp_seq_len + early[k]
 537 |                     seq_acc = np.equal(digit_predictions[start:end],label[k,0:early[k]])
 538 |                     seq_acc = seq_acc.astype(np.float32)
 539 |                     batch_acc_list.append(np.mean(seq_acc))
 540 |                     confus = confusion_matrix(label[k,0:early[k]],digit_predictions[start:end],labels = [0,1,2,3])
 541 |                     confus_mat_list.append(confus)
 542 |                 acc = sum(batch_acc_list)/conf.batch_size
 543 |                 confus_mat = sum(confus_mat_list)
 544 |             elif net_type == NetType.DNN or net_type == NetType.DNN_MAXOUT:
 545 |                 list_input = []
 546 |                 list_label = []
 547 |                 for batch in range(input.shape[0]):
 548 |                     list_input.append(input[batch, 0:early[batch], :])
 549 |                     new_label = np.zeros([early[batch]], np.int32)
 550 |                     new_label[:] = label[batch]
 551 |                     list_label.append(new_label)
 552 |                 input = np.concatenate(tuple(list_input), axis=0)
 553 |                 label = np.concatenate(tuple(list_label), axis=0)
 554 |                 batch_size = input.shape[0]
 555 |                 cost, acc, confus_mat = sess.run(fetches=[model.cost, model.accuracy, model.confusion_matrix],
 556 |                                                  feed_dict={model.input_data: input,
 557 |                                                             model.early_stop: early,
 558 |                                                             model.targets: label})
 559 | 
 560 |             #print(input.shape)
 561 | 
 562 |             confus_list.append(confus_mat)
 563 |             cost_list.append(cost)
 564 |             acc_list.append(acc)
 565 |             count += 1
 566 | 
 567 |     except tf.errors.OutOfRangeError:
 568 | 
 569 |         print("超出界限！！！")
 570 | 
 571 |     if model.is_training:
 572 |         LOGGER.training_log("训练集：\n")
 573 |     elif model.is_validation:
 574 |         LOGGER.training_log("验证集：\n")
 575 |     else:
 576 |         LOGGER.training_log("测试集：\n")
 577 |     LOGGER.training_log(str(sum(confus_list)))
 578 |     print( count)
 579 |     return sum(cost_list) / len(cost_list), sum(acc_list) / len(acc_list)
 580 | 
 581 | #minbatch训练方法
 582 | def run_batch(session, model, data, eval_op, minibatch):
 583 |     # 准备数据
 584 |     x, y, e_stop = data
 585 |     epoch_size = x.shape[1] // model.batch_size
 586 | 
 587 |     # 记录结果
 588 |     costs = []
 589 |     correct = []
 590 | 
 591 |     for batch in range(epoch_size):
 592 |         x_batch = x[:, batch * model.batch_size: (batch + 1) * model.batch_size, :]
 593 |         y_batch = y[batch * model.batch_size: (batch + 1) * model.batch_size]
 594 |         e_batch = e_stop[batch * model.batch_size: (batch + 1) * model.batch_size]
 595 | 
 596 |         temp_dict = {model.input_data: x_batch}
 597 |         temp_dict.update({model.targets: y_batch})
 598 |         temp_dict.update({model.early_stop: e_batch})
 599 | 
 600 | 
 601 |         if model.is_training and eval_op == model.train_op:
 602 |             #如果是训练模式，且op正常 则正常训练
 603 |             print("开始训练第 %d 个batch" % batch)
 604 |             _, cost, accuracy = session.run([eval_op, model.cost, model.accuracy],
 605 |                                             feed_dict=temp_dict)
 606 | 
 607 |             if minibatch % conf.evaluate_freq == 0:
 608 |                 result = evaluate_model(session, minibatch)  #评估模型，返回结果
 609 |                 LOGGER.summary_log(result, minibatch)
 610 |             minibatch += 1
 611 | 
 612 | 
 613 |         else:
 614 |             cost, confusion, accuracy, _ = session.run([model.cost, model.confusion_matrix, model.accuracy, eval_op],
 615 |                                                        feed_dict=temp_dict)
 616 | 
 617 |             if model.net_type == NetType.RNN_NVN:
 618 |                 # keep results for this minibatch
 619 |                 costs.append(cost)
 620 |                 correct.append(accuracy * sum(e_batch))
 621 | 
 622 |                 # print test confusion matrix
 623 |                 if not model.is_training and not model.is_validation:
 624 | 
 625 |                     LOGGER.training_log(str(minibatch) + "测试集的混淆矩阵")
 626 |                     LOGGER.training_log(str(confusion))
 627 |                     # output predictions in test mode
 628 |                     # if conf.test_mode:
 629 |                     #     pred = session.run([m._prob_predictions], feed_dict=temp_dict)
 630 |                     #     pred = np.array(pred)
 631 |                     #     np.set_printoptions(threshold=np.nan)
 632 |                     #     # results = np.column_stack((tar, pred))
 633 |                     #     # np.savetxt("results/prediction.result", pred)#, fmt='%.3f')
 634 |                     #     #print("output target and predictions to file prediction.csv")
 635 |                     #     #exit()
 636 | 
 637 |                 #计算平均精度与损失
 638 |                 if batch == epoch_size - 1:
 639 |                     accuracy = sum(correct) / float(sum(e_stop))
 640 |                     return (sum(costs) / float(epoch_size), accuracy)
 641 |             elif model.net_type == NetType.RNN_NV1:
 642 |                 costs.append(cost)
 643 |                 correct.append(accuracy)
 644 | 
 645 |                 # print test confusion matrix
 646 |                 if not model.is_training and not model.is_validation:
 647 |                     LOGGER.training_log(str(minibatch) + "测试集的混淆矩阵")
 648 |                     LOGGER.training_log(str(confusion))
 649 |                     # output predictions in test mode
 650 |                     # if conf.test_mode:
 651 |                     #     pred = session.run([m._prob_predictions], feed_dict=temp_dict)
 652 |                     #     pred = np.array(pred)
 653 |                     #     np.set_printoptions(threshold=np.nan)
 654 |                     #     # results = np.column_stack((tar, pred))
 655 |                     #     # np.savetxt("results/prediction.result", pred)#, fmt='%.3f')
 656 |                     #     #print("output target and predictions to file prediction.csv")
 657 |                     #     #exit()
 658 | 
 659 |                 # 计算平均精度与损失
 660 |                 if batch == epoch_size - 1:
 661 |                     cost_mean = (sum(costs) )/ float(epoch_size)
 662 |                     accuracy_mean = sum(correct) / float(epoch_size)
 663 |                     return (cost_mean,accuracy_mean)
 664 | 
 665 |     # training: keep track of minibatch number
 666 |     return (minibatch)
 667 | 
 668 | def run_batch_all(session,model,data,eval_op,epoch):
 669 |     x, y, e_stop = data
 670 |     epoch_size = x.shape[1] // model.batch_size
 671 |     shuffle_perm = None
 672 |     if model.is_training:
 673 |         shuffle_perm = np.random.permutation(range(e_stop.shape[0]))
 674 | 
 675 | 
 676 |     # 记录结果
 677 |     costs = []
 678 |     correct = []
 679 |     for batch in range(epoch_size):
 680 | 
 681 |         if model.is_training:
 682 |             batch_perm = shuffle_perm[batch * model.batch_size: (batch + 1) * model.batch_size]
 683 |             x_batch = x[:, batch_perm, :]
 684 |             y_batch = y[batch_perm]
 685 |             e_batch = e_stop[batch_perm]
 686 |         else:
 687 |             x_batch = x[:, batch * model.batch_size: (batch + 1) * model.batch_size, :]
 688 |             y_batch = y[batch * model.batch_size: (batch + 1) * model.batch_size]
 689 |             e_batch = e_stop[batch * model.batch_size: (batch + 1) * model.batch_size]
 690 | 
 691 |         temp_dict = {model.input_data: x_batch}
 692 |         temp_dict.update({model.targets: y_batch})
 693 |         temp_dict.update({model.early_stop: e_batch})
 694 | 
 695 |         if model.is_training and eval_op == model.train_op:
 696 |             _= session.run([eval_op],feed_dict=temp_dict)
 697 | 
 698 |         else:
 699 |             cost, confusion, accuracy, _ = session.run([model.cost, model.confusion_matrix, model.accuracy, eval_op],feed_dict=temp_dict)
 700 | 
 701 |             if model.is_test:
 702 |                 LOGGER.training_log(str(epoch) + "测试集的混淆矩阵")
 703 |                 LOGGER.training_log(str(confusion))
 704 |             elif model.is_validation:
 705 |                 LOGGER.training_log(str(epoch) + "验证集的混淆矩阵")
 706 |                 LOGGER.training_log(str(confusion))
 707 | 
 708 | 
 709 |             if model.net_type == NetType.RNN_NVN:
 710 |                 # keep results for this minibatch
 711 |                 costs.append(cost)
 712 |                 correct.append(accuracy * sum(e_batch))
 713 | 
 714 |                 #计算平均精度与损失
 715 |                 if batch == epoch_size - 1:
 716 |                     accuracy = sum(correct) / float(sum(e_stop))
 717 |                     return (sum(costs) / float(epoch_size), accuracy)
 718 |             elif model.net_type == NetType.RNN_NV1:
 719 |                 costs.append(cost)
 720 |                 correct.append(accuracy)
 721 | 
 722 |                 # 计算平均精度与损失
 723 |                 if batch == epoch_size - 1:
 724 |                     cost_mean = sum(costs) / float(epoch_size)
 725 |                     accuracy_mean = sum(correct) / float(epoch_size)
 726 |                     return (cost_mean, accuracy_mean)
 727 | 
 728 | #quick 代表所有数据在npz文件里
 729 | def run_batch_quick(session,model,data,eval_op,epoch):
 730 | 
 731 |     if model.is_training:
 732 |         x = data["train_data"]
 733 |         y = data["train_label"]
 734 |         e_stop = data["train_early_stop"]
 735 |     elif model.is_validation:
 736 |         x = data["valid_data"]
 737 |         y = data["valid_label"]
 738 |         e_stop = data["valid_early_stop"]
 739 |     else:
 740 |         x = data["test_data"]
 741 |         y = data["test_label"]
 742 |         e_stop = data["test_early_stop"]
 743 | 
 744 |     epoch_size = x.shape[0] // model.batch_size
 745 |     shuffle_perm = None
 746 |     if model.is_training:
 747 |         shuffle_perm = np.random.permutation(range(e_stop.shape[0]))
 748 | 
 749 |     # 记录结果
 750 |     costs = []
 751 |     correct = []
 752 |     for batch in range(epoch_size):
 753 | 
 754 |         if model.is_training:
 755 |             batch_perm = shuffle_perm[batch * model.batch_size: (batch + 1) * model.batch_size]
 756 |             x_batch = x[batch_perm,:, :]
 757 |             y_batch = y[batch_perm]
 758 |             e_batch = e_stop[batch_perm]
 759 |         else:
 760 |             x_batch = x[batch * model.batch_size: (batch + 1) * model.batch_size,: , :]
 761 |             y_batch = y[batch * model.batch_size: (batch + 1) * model.batch_size]
 762 |             e_batch = e_stop[batch * model.batch_size: (batch + 1) * model.batch_size]
 763 | 
 764 |         x_batch = np.transpose(x_batch,[1,0,2])
 765 | 
 766 |         temp_dict = {model.input_data: x_batch}
 767 |         temp_dict.update({model.targets: y_batch})
 768 |         temp_dict.update({model.early_stop: e_batch})
 769 | 
 770 |         if model.is_training and eval_op == model.train_op:
 771 |             _ = session.run([eval_op], feed_dict=temp_dict)
 772 | 
 773 |         else:
 774 |             cost, confusion, accuracy, _ = session.run([model.cost, model.confusion_matrix, model.accuracy, eval_op],
 775 |                                                        feed_dict=temp_dict)
 776 | 
 777 |             if model.is_test:
 778 |                 LOGGER.training_log(str(epoch) + "测试集的混淆矩阵")
 779 |                 LOGGER.training_log(str(confusion))
 780 |             elif model.is_validation:
 781 |                 LOGGER.training_log(str(epoch) + "验证集的混淆矩阵")
 782 |                 LOGGER.training_log(str(confusion))
 783 | 
 784 |             if model.net_type == NetType.RNN_NVN:
 785 |                 # keep results for this minibatch
 786 |                 costs.append(cost)
 787 |                 correct.append(accuracy * sum(e_batch))
 788 | 
 789 |                 # 计算平均精度与损失
 790 |                 if batch == epoch_size - 1:
 791 |                     accuracy = sum(correct) / float(sum(e_stop))
 792 |                     return (sum(costs) / float(epoch_size), accuracy)
 793 |             elif model.net_type == NetType.RNN_NV1:
 794 |                 costs.append(cost)
 795 |                 correct.append(accuracy)
 796 | 
 797 |                 # 计算平均精度与损失
 798 |                 if batch == epoch_size - 1:
 799 |                     cost_mean = sum(costs) / float(epoch_size)
 800 |                     accuracy_mean = sum(correct) / float(epoch_size)
 801 |                     return (cost_mean, accuracy_mean)
 802 | 
 803 |     print("训练完毕  " + str(epoch))
 804 | 
 805 | #队列版未完成
 806 | def run_batch_from_tfrecords(sess, coord, model, eval_op):
 807 |     if model.is_training and eval_op == model.train_op:
 808 |         count = 1
 809 |         iter = 0
 810 |         while not coord.should_stop():
 811 |             if count % conf.evaluate_freq != 0:
 812 |                 _ = sess.run(model.train_op)
 813 | 
 814 |             else:
 815 |                 coord.request_stop()
 816 |                 print("第%d次测试精度" % (iter))
 817 |                 evaluate_from_tfrecords(iter)
 818 |                 coord.clear_stop()
 819 |                 iter += 1
 820 |             count+=1
 821 | 
 822 |     else:
 823 |         accuracy_list = []
 824 |         cost_list = []
 825 |         try:
 826 |             while not coord.should_stop():
 827 |                 cost, accuracy = sess.run([model.cost, model.accuracy])
 828 |                 print(cost,accuracy)
 829 |                 accuracy_list.append(accuracy)
 830 |                 cost_list.append(cost)
 831 |         except tf.errors.OutOfRangeError:
 832 |             print("测试完成")
 833 |         acc_mean = sum(accuracy_list) / len(accuracy_list)
 834 |         cost_mean = sum(cost_list) / len(cost_list)
 835 |         return cost_mean, acc_mean
 836 | 
 837 | #nvn model
 838 | def rnn_nvn_model():
 839 |     #1 处理数据
 840 |     #2 设置模型
 841 |     #3 训练模型
 842 |     #4 测试模型
 843 | 
 844 |     global train_data
 845 |     global test_data
 846 |     global val_data
 847 |     #x shape = [序列长度，总的序列个数，特征长度]
 848 |     #y shape = [总的序列个数，1}
 849 |     #early_stop  shape = [总的序列个数]  [250,250,250,250,50,.........]
 850 |     #train_index  训练集的索引 [10,11,12,13,......]
 851 |     train_data, test_data, val_data, train_config, test_config, valid_config = loadData_rnn_nvn()
 852 | 
 853 |     minibatch = 0
 854 | 
 855 |     with tf.Session() as sess:
 856 |         tf.set_random_seed(0)
 857 | 
 858 |         #变量初始化
 859 |         initializer = tf.random_uniform_initializer(0,0.001)
 860 |         #正则化
 861 |         regularizer = layers.l2_regularizer(conf.l2_preparam)
 862 | 
 863 |         with tf.variable_scope("model",reuse=False,initializer=initializer,dtype=tf.float32): #,regularizer = regularizer):
 864 |             global train_model
 865 |             train_model = Model(conf,train_config)
 866 |         with tf.variable_scope("model",reuse=True,initializer=initializer,dtype=tf.float32): #,regularizer = regularizer):
 867 |             global test_model
 868 |             test_model = Model(conf,test_config)
 869 |         with tf.variable_scope("model",reuse=True,initializer=initializer,dtype=tf.float32): #,regularizer = regularizer):
 870 |             global valid_model
 871 |             valid_model = Model(conf,valid_config)
 872 | 
 873 |         saver  = None
 874 |         if conf.checkpoint or conf.restore:
 875 |             saver = tf.train.Saver()
 876 | 
 877 |         if conf.tensorboard:
 878 |             global writer
 879 |             writer = tf.summary.FileWriter(log_path, sess.graph)
 880 | 
 881 |         if not conf.restore:
 882 |             tf.global_variables_initializer().run()  # initialize all variables in the model
 883 |         else:
 884 |             saver.restore(sess, data_path + task)
 885 |             print("装载变量......")
 886 | 
 887 |         for i in range(conf.num_epochs):
 888 |             print("第 {0}次epoch".format(i))
 889 |             minibatch = run_batch(sess,train_model,train_data,train_model.train_op,minibatch)
 890 |             if (i+1)%10 == 0:
 891 |                 saver.save(sess,data_path+task)
 892 | 
 893 |         if conf.checkpoint:
 894 |             save_path = saver.save(sess,data_path+task)
 895 | 
 896 | #nv1 model
 897 | def rnn_nv1_model(is_quick):
 898 |     global train_data
 899 |     global test_data
 900 |     global valid_data
 901 |     if is_quick:
 902 |         train_data,valid_data,test_data=load_data_rnn_nv1_quick(conf.num_classes)
 903 |     else:
 904 |         train_data, valid_data, test_data = load_data_rnn_nv1(conf.num_classes)
 905 |     #print("数据加载完毕......")
 906 |     train_conf = None
 907 |     valid_conf = None
 908 |     test_conf = None
 909 | 
 910 |     if conf.rnn_type == "lstm_b":
 911 |         train_conf = TrainingConfig(True,False,False,conf.batch_size,len_features,net_type=NetType.RNN_NV1,rnn_type=RNNType.LSTM_b)
 912 |         valid_conf = TrainingConfig(False,True,False,conf.batch_size,len_features,net_type=NetType.RNN_NV1,rnn_type=RNNType.LSTM_b)
 913 |         test_conf = TrainingConfig(False,False,True,conf.batch_size,len_features,net_type=NetType.RNN_NV1,rnn_type=RNNType.LSTM_b)
 914 |     elif conf.rnn_type == "gru_b":
 915 |         train_conf = TrainingConfig(True, False, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 916 |                                     rnn_type=RNNType.GRU_b)
 917 |         valid_conf = TrainingConfig(False, True, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 918 |                                     rnn_type=RNNType.GRU_b)
 919 |         test_conf = TrainingConfig(False, False, True, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 920 |                                    rnn_type=RNNType.GRU_b)
 921 |     elif conf.rnn_type == "gru":
 922 |         train_conf = TrainingConfig(True, False, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 923 |                                     rnn_type=RNNType.GRU)
 924 |         valid_conf = TrainingConfig(False, True, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 925 |                                     rnn_type=RNNType.GRU)
 926 |         test_conf = TrainingConfig(False, False, True, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 927 |                                    rnn_type=RNNType.GRU)
 928 | 
 929 |     config = tf.ConfigProto()
 930 |     config.gpu_options.allow_growth = True
 931 |     #config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 占用GPU90%的显存
 932 |     initializer = tf.random_uniform_initializer(0, conf.init_scale)
 933 | 
 934 |     with tf.variable_scope("model", reuse=False, initializer=initializer):
 935 |         global train_model
 936 |         train_model = Model(conf, train_conf)
 937 |     with tf.variable_scope("model", reuse=True, initializer=initializer):
 938 |         global test_model
 939 |         test_model = Model(conf, test_conf)
 940 |     with tf.variable_scope("model", reuse=True, initializer=initializer):
 941 |         global valid_model
 942 |         valid_model = Model(conf, valid_conf)
 943 | 
 944 |     minibatch = 0
 945 |     with tf.Session(config=config) as sess:
 946 | 
 947 |         saver = None
 948 |         if conf.checkpoint or conf.restore:
 949 |             saver = tf.train.Saver()
 950 | 
 951 |         if conf.tensorboard:
 952 |             global writer
 953 |             writer = tf.summary.FileWriter(log_path, sess.graph)
 954 | 
 955 |         if not conf.restore:
 956 |             tf.global_variables_initializer().run()  # initialize all variables in the model
 957 |         else:
 958 |             saver.restore(sess, data_path + task)
 959 |             print("装载变量......")
 960 | 
 961 |         LOGGER.training_log(str(conf.__dict__))
 962 |         LOGGER.training_log("activation = tanh")
 963 | 
 964 |         if is_quick:
 965 | 
 966 |             for i in range(conf.num_epochs):
 967 |                 print("第 {0}次epoch".format(i))
 968 |                 #minibatch = run_batch(sess, train_model, train_data, train_model.train_op, minibatch)
 969 |                 run_batch_quick(sess,train_model,train_data,train_model.train_op,i)
 970 |                 evaluate_model_quick(sess,i)
 971 |         else:
 972 |             for i in range(conf.num_epochs):
 973 |                 print("第 {0}次epoch".format(i))
 974 |                 #minibatch = run_batch(sess, train_model, train_data, train_model.train_op, minibatch)
 975 |                 run_batch_all(sess,train_model,train_data,train_model.train_op,i)
 976 |                 evaluate_model_all(sess,i)
 977 | 
 978 | #队列版  未完善
 979 | def rnn_nv1_model_tfrecord():
 980 |     global train_data
 981 |     global test_data
 982 |     global valid_data
 983 |     # train_data,valid_data,test_data=load_data_rnn_nv1_quick(conf.num_classes)
 984 |     # print("数据加载完毕......")
 985 |     train_conf = None
 986 |     valid_conf = None
 987 |     test_conf = None
 988 | 
 989 |     if conf.rnn_type == "lstm_b":
 990 |         train_conf = TrainingConfig(True, False, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 991 |                                     rnn_type=RNNType.LSTM_b)
 992 |         valid_conf = TrainingConfig(False, True, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 993 |                                     rnn_type=RNNType.LSTM_b)
 994 |         test_conf = TrainingConfig(False, False, True, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 995 |                                    rnn_type=RNNType.LSTM_b)
 996 |     elif conf.rnn_type == "gru_b":
 997 |         train_conf = TrainingConfig(True, False, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
 998 |                                     rnn_type=RNNType.GRU_b)
 999 |         valid_conf = TrainingConfig(False, True, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
1000 |                                     rnn_type=RNNType.GRU_b)
1001 |         test_conf = TrainingConfig(False, False, True, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
1002 |                                    rnn_type=RNNType.GRU_b)
1003 |     elif conf.rnn_type == "gru":
1004 |         train_conf = TrainingConfig(True, False, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
1005 |                                     rnn_type=RNNType.GRU)
1006 |         valid_conf = TrainingConfig(False, True, False, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
1007 |                                     rnn_type=RNNType.GRU)
1008 |         test_conf = TrainingConfig(False, False, True, conf.batch_size, len_features, net_type=NetType.RNN_NV1,
1009 |                                    rnn_type=RNNType.GRU)
1010 | 
1011 |     config = tf.ConfigProto()
1012 |     config.gpu_options.allow_growth = True
1013 |     # config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 占用GPU90%的显存
1014 |     initializer = tf.random_uniform_initializer(0, conf.init_scale)
1015 | 
1016 |     with tf.variable_scope("model", reuse=False, initializer=initializer):
1017 |         global train_model
1018 |         train_model = Model(conf, train_conf)
1019 | 
1020 |     # with tf.variable_scope("model", reuse=True, initializer=initializer):
1021 |     #     global test_model
1022 |     #     test_model = Model(conf, test_conf)
1023 |     #
1024 |     # with tf.variable_scope("model", reuse=True, initializer=initializer):
1025 |     #     global valid_model
1026 |     #     valid_model = Model(conf, valid_conf)
1027 | 
1028 | 
1029 |     train_filenames = np.array(util.search_file("interval_[1-5]_label_[0-3]_train.tfrecords", tfrecords_data_path))
1030 |     valid_filenames = np.array(util.search_file("interval_[1-5]_label_[0-3]_valid.tfrecords", tfrecords_data_path))
1031 |     test_filenames = np.array(util.search_file("interval_[1-5]_label_[0-3]_test.tfrecords", tfrecords_data_path))
1032 | 
1033 |     minibatch = 0
1034 |     with tf.Session(config=config) as sess:
1035 | 
1036 |         saver = None
1037 |         if conf.checkpoint or conf.restore:
1038 |             saver = tf.train.Saver()
1039 | 
1040 |         if conf.tensorboard:
1041 |             global writer
1042 |             writer = tf.summary.FileWriter(log_path, sess.graph)
1043 | 
1044 |         sess.run(tf.local_variables_initializer())
1045 |         if not conf.restore:
1046 |             sess.run(tf.global_variables_initializer())  # initialize all variables in the model
1047 |         else:
1048 |             saver.restore(sess, data_path + task)
1049 |             print("装载变量......")
1050 | 
1051 |         LOGGER.training_log(str(conf.__dict__))
1052 | 
1053 |         coord = tf.train.Coordinator()
1054 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord,start=True)
1055 |         #print("a")
1056 |         for i in range(10000):
1057 |             _  = sess.run(train_model.train_op)
1058 | 
1059 |         #run_batch_from_tfrecords(sess, coord, train_model, train_model.train_op)
1060 | 
1061 |         coord.request_stop()
1062 |         coord.join(threads)
1063 | 
1064 | def init_model_config(batch_size,len_features,net_type,rnn_type):
1065 |     train_conf = TrainingConfig(True, False, False, batch_size, len_features, net_type,rnn_type)
1066 |     valid_conf = TrainingConfig(False, True, False, batch_size, len_features, net_type,rnn_type)
1067 |     test_conf = TrainingConfig(False, False, True, batch_size, len_features, net_type,rnn_type)
1068 | 
1069 |     return train_conf,valid_conf,test_conf
1070 | 
1071 | #dataset版
1072 | def model_tfrecord_dataset(net_type,rnn_type):
1073 | 
1074 |     #初始文件路径等等
1075 |     #init_environment(net_type)
1076 | 
1077 |     train_conf,valid_conf,test_conf = init_model_config(conf.batch_size,len_features,net_type,rnn_type)
1078 | 
1079 |     config = tf.ConfigProto()
1080 |     config.gpu_options.allow_growth = True
1081 |     #config.gpu_options.per_process_gpu_memory_fraction = 0.25  # 占用GPU90%的显存
1082 |     initializer = tf.random_uniform_initializer(0, conf.init_scale)
1083 | 
1084 |     with tf.variable_scope("model", reuse=False, initializer=initializer):
1085 |         global train_model
1086 |         train_model = Model(conf, train_conf)
1087 | 
1088 |     with tf.variable_scope("model", reuse=True, initializer=initializer):
1089 |         global test_model
1090 |         test_model = Model(conf, test_conf)
1091 | 
1092 |     with tf.variable_scope("model", reuse=True, initializer=initializer):
1093 |         global valid_model
1094 |         valid_model = Model(conf, valid_conf)
1095 | 
1096 |     LOGGER.training_log(str(conf.__dict__))
1097 |     LOGGER.training_log(str(train_conf.activation))
1098 | 
1099 |     train_data_set = make_dataset_from_tfrecord_file(param.train_file_pattern,conf.batch_size,True,1)
1100 |     train_data_iterator = train_data_set.make_initializable_iterator()
1101 |     train_next_element = train_data_iterator.get_next()
1102 | 
1103 |     train_data_no_op_set = make_dataset_from_tfrecord_file(param.train_file_pattern, conf.batch_size, False, 1)
1104 |     train_data_no_op_iterator = train_data_no_op_set.make_initializable_iterator()
1105 |     train_no_op_next_element = train_data_no_op_iterator.get_next()
1106 | 
1107 | 
1108 |     valid_data_set = make_dataset_from_tfrecord_file(param.valid_file_pattern, conf.batch_size, False,1)
1109 |     valid_data_iterator = valid_data_set.make_initializable_iterator()
1110 |     valid_next_element = valid_data_iterator.get_next()
1111 | 
1112 |     test_data_set = make_dataset_from_tfrecord_file(param.test_file_pattern, conf.batch_size, False,1)
1113 |     test_data_iterator = test_data_set.make_initializable_iterator()
1114 |     test_next_element = test_data_iterator.get_next()
1115 | 
1116 |     saver = tf.train.Saver()
1117 | 
1118 |     with tf.Session(config=config) as sess:
1119 | 
1120 |         if conf.restore:
1121 |             saver.restore(sess,data_path + task + str(net_type.value)+str(rnn_type.value))
1122 |         else:
1123 |             sess.run(tf.global_variables_initializer())
1124 |             sess.run(tf.local_variables_initializer())
1125 | 
1126 |         for epoch in range(conf.num_epochs):
1127 |             sess.run(train_data_iterator.initializer)
1128 |             i = 0
1129 |             try:
1130 |                 while True:
1131 |                     input, early, label = sess.run(fetches=train_next_element)
1132 |                     # 通过session每次从数据集中取值
1133 |                     #RNN_NV1网络
1134 |                     #print(input.shape)
1135 |                     if  net_type == NetType.RNN_NV1:
1136 | 
1137 |                         if input.shape[0] < conf.batch_size:
1138 |                             print(input.shape)
1139 |                             break
1140 |                         input = np.transpose(input, [1, 0, 2])
1141 | 
1142 |                         sess.run(fetches=train_model.train_op, feed_dict={train_model.input_data: input,
1143 |                                                                           train_model.early_stop: early,
1144 |                                                                           train_model.targets: label})
1145 | 
1146 |                     #rnn_nvn网络
1147 |                     elif net_type == NetType.RNN_NVN:
1148 |                         if input.shape[0] < conf.batch_size:
1149 |                             break
1150 |                         new_label = np.zeros([conf.batch_size,conf.exp_seq_len],np.int32)
1151 |                         for batch in range(conf.batch_size):
1152 |                             new_label[batch,0:early[batch]] = label[batch]
1153 |                             new_label[batch,early[batch]:] = 0
1154 |                         label = new_label
1155 |                         input = np.transpose(input, [1, 0, 2])
1156 |                         weight_sequence_loss = np.zeros([conf.batch_size,conf.exp_seq_len],np.float32)
1157 |                         for k in range(conf.batch_size):
1158 |                             weight_sequence_loss[k,0:early[k]] = 1
1159 | 
1160 |                         sess.run(fetches=train_model.train_op, feed_dict={  train_model.input_data:input,
1161 |                                                                             train_model.early_stop:early,
1162 |                                                                             train_model.targets:label,
1163 |                                                                             train_model.weight_sequence_loss:weight_sequence_loss})
1164 | 
1165 |                     #dnn 网络
1166 |                     elif net_type == NetType.DNN or net_type == NetType.DNN_MAXOUT:
1167 |                         list_input = []
1168 |                         list_label = []
1169 |                         for batch in range(input.shape[0]):
1170 |                             list_input.append(input[batch,0:early[batch],:])
1171 |                             new_label = np.zeros([early[batch]],np.int32)
1172 |                             new_label[:] = label[batch]
1173 |                             list_label.append(new_label)
1174 |                         input = np.concatenate(tuple(list_input),axis=0)
1175 |                         label = np.concatenate(tuple(list_label),axis=0)
1176 | 
1177 |                         sess.run(fetches=train_model.train_op, feed_dict={train_model.input_data:input,
1178 |                                                                           train_model.early_stop:early,
1179 |                                                                           train_model.targets:label})
1180 |                     print("训练集第%d个batch" %(i))
1181 |                     if i % 100 == 0 and i >0:
1182 |                         train_cost = 0
1183 |                         train_acc = 0
1184 |                         valid_cost = 0
1185 |                         valid_acc = 0
1186 |                         #sess.run(train_data_no_op_iterator.initializer)
1187 |                         #train_cost, train_acc = evaluate_from_tfrecord_dataset(net_type, sess, train_model,train_no_op_next_element, tf.no_op(), i / 100)
1188 |                         #sess.run(valid_data_iterator.initializer)
1189 |                         #valid_cost,valid_acc = evaluate_from_tfrecord_dataset(net_type,sess,valid_model,valid_next_element,tf.no_op(),i/100)
1190 |                         sess.run(test_data_iterator.initializer)
1191 |                         test_cost,test_acc = evaluate_from_tfrecord_dataset(net_type,sess,test_model,test_next_element,tf.no_op(),i/100)
1192 |                         print("训练集cost:%f,acc:%f" %(train_cost,train_acc))
1193 |                         print("验证集cost:%f,acc:%f" %(valid_cost,valid_acc))
1194 |                         print("测试集cost:%f,acc:%f" %(test_cost,test_acc))
1195 |                         LOGGER.summary_log((train_cost,train_acc,valid_cost,valid_acc,test_cost,test_acc),i)
1196 |                     i = i + 1
1197 |             except tf.errors.OutOfRangeError:
1198 |                 print("第%d  epoch end!" % (epoch))
1199 |             print("共 %d 个batch" %(i))
1200 |             print("第%d  epoch end!" % (epoch))
1201 |             save_path = saver.save(sess, data_path + task + str(net_type.value)+str(rnn_type.value))
1202 |             if save_path is not None:
1203 |                 LOGGER.training_log(str(save_path))
1204 | 
1205 | #dataset 解析函数
1206 | def __parse_function_9_features(example_proto):
1207 |     feature = param.feature
1208 |     features = tf.parse_single_example(example_proto,feature)
1209 |     speed_sec = tf.reshape(tf.decode_raw(features[param.SPEED_SEC], tf.int64), [conf.exp_seq_len, param.WIDTH])
1210 |     avg_speed = tf.reshape(tf.decode_raw(features[param.AVG_SPEED], tf.int64), [conf.exp_seq_len, param.WIDTH])
1211 |     std_speed = tf.reshape(tf.decode_raw(features[param.STD_SPEED], tf.int64), [conf.exp_seq_len, param.WIDTH])
1212 |     acc_sec = tf.reshape(tf.decode_raw(features[param.ACC_SEC], tf.int64), [conf.exp_seq_len, param.WIDTH])
1213 |     mean_acc = tf.reshape(tf.decode_raw(features[param.MEAN_ACC], tf.int64), [conf.exp_seq_len, param.WIDTH])
1214 |     std_acc = tf.reshape(tf.decode_raw(features[param.STD_ACC], tf.int64), [conf.exp_seq_len, param.WIDTH])
1215 |     head = tf.reshape(tf.decode_raw(features[param.HEAD], tf.int64), [conf.exp_seq_len, param.WIDTH])
1216 |     head_mean = tf.reshape(tf.decode_raw(features[param.HEAD_MEAN], tf.int64), [conf.exp_seq_len, param.WIDTH])
1217 |     std_head = tf.reshape(tf.decode_raw(features[param.STD_HEAD], tf.int64), [conf.exp_seq_len, param.WIDTH])
1218 |     early = tf.cast(features[param.EARLY], tf.int32)
1219 |     label = tf.cast(features[param.LABEL], tf.int32)
1220 | 
1221 |     seq = tf.concat([speed_sec, avg_speed, std_speed, acc_sec, mean_acc, std_acc,head,head_mean,std_head], axis=1)
1222 |     seq_float32 = tf.cast(seq, tf.float32)
1223 | 
1224 |     return seq_float32,early,label
1225 | 
1226 | def __parse_function_3_features(example_proto):
1227 |     feature = param.feature
1228 |     features = tf.parse_single_example(example_proto, feature)
1229 |     speed_sec = tf.reshape(tf.decode_raw(features[param.SPEED_SEC], tf.int64), [conf.exp_seq_len, param.WIDTH])
1230 |     avg_speed = tf.reshape(tf.decode_raw(features[param.AVG_SPEED], tf.int64), [conf.exp_seq_len, param.WIDTH])
1231 |     std_speed = tf.reshape(tf.decode_raw(features[param.STD_SPEED], tf.int64), [conf.exp_seq_len, param.WIDTH])
1232 |     # acc_sec = tf.reshape(tf.decode_raw(features[param.ACC_SEC], tf.int64), [conf.exp_seq_len, param.WIDTH])
1233 |     # mean_acc = tf.reshape(tf.decode_raw(features[param.MEAN_ACC], tf.int64), [conf.exp_seq_len, param.WIDTH])
1234 |     # std_acc = tf.reshape(tf.decode_raw(features[param.STD_ACC], tf.int64), [conf.exp_seq_len, param.WIDTH])
1235 |     # head = tf.reshape(tf.decode_raw(features[param.HEAD], tf.int64), [conf.exp_seq_len, param.WIDTH])
1236 |     # head_mean = tf.reshape(tf.decode_raw(features[param.HEAD_MEAN], tf.int64), [conf.exp_seq_len, param.WIDTH])
1237 |     # std_head = tf.reshape(tf.decode_raw(features[param.STD_HEAD], tf.int64), [conf.exp_seq_len, param.WIDTH])
1238 |     early = tf.cast(features[param.EARLY], tf.int32)
1239 |     label = tf.cast(features[param.LABEL], tf.int32)
1240 | 
1241 |     seq = tf.concat([speed_sec, avg_speed, std_speed], axis=1)
1242 |     seq_float32 = tf.cast(seq, tf.float32)
1243 | 
1244 |     return seq_float32, early, label
1245 | 
1246 | def __parse_function_6_features(example_proto):
1247 |     feature = param.feature
1248 |     features = tf.parse_single_example(example_proto, feature)
1249 |     speed_sec = tf.reshape(tf.decode_raw(features[param.SPEED_SEC], tf.int64), [conf.exp_seq_len, conf.discretization_width])
1250 |     avg_speed = tf.reshape(tf.decode_raw(features[param.AVG_SPEED], tf.int64), [conf.exp_seq_len, conf.discretization_width])
1251 |     std_speed = tf.reshape(tf.decode_raw(features[param.STD_SPEED], tf.int64), [conf.exp_seq_len, conf.discretization_width])
1252 |     acc_sec = tf.reshape(tf.decode_raw(features[param.ACC_SEC], tf.int64), [conf.exp_seq_len, conf.discretization_width])
1253 |     mean_acc = tf.reshape(tf.decode_raw(features[param.MEAN_ACC], tf.int64), [conf.exp_seq_len, conf.discretization_width])
1254 |     std_acc = tf.reshape(tf.decode_raw(features[param.STD_ACC], tf.int64), [conf.exp_seq_len, conf.discretization_width])
1255 |     # head = tf.reshape(tf.decode_raw(features[param.HEAD], tf.int64), [conf.exp_seq_len, param.WIDTH])
1256 |     # head_mean = tf.reshape(tf.decode_raw(features[param.HEAD_MEAN], tf.int64), [conf.exp_seq_len, param.WIDTH])
1257 |     # std_head = tf.reshape(tf.decode_raw(features[param.STD_HEAD], tf.int64), [conf.exp_seq_len, param.WIDTH])
1258 |     early = tf.cast(features[param.EARLY], tf.int32)
1259 |     label = tf.cast(features[param.LABEL], tf.int32)
1260 | 
1261 |     seq = tf.concat([speed_sec, avg_speed, std_speed,acc_sec,mean_acc,std_acc], axis=1)
1262 |     seq_float32 = tf.cast(seq, tf.float32)
1263 | 
1264 |     return seq_float32, early, label
1265 | 
1266 | def __parse_function_12_features(example_proto):
1267 |     feature = param.feature
1268 |     features = tf.parse_single_example(example_proto, feature)
1269 |     speed_sec = tf.reshape(tf.decode_raw(features[param.SPEED_SEC], tf.int64),
1270 |                            [conf.exp_seq_len, conf.discretization_width])
1271 |     avg_speed = tf.reshape(tf.decode_raw(features[param.AVG_SPEED], tf.int64),
1272 |                            [conf.exp_seq_len, conf.discretization_width])
1273 |     std_speed = tf.reshape(tf.decode_raw(features[param.STD_SPEED], tf.int64),
1274 |                            [conf.exp_seq_len, conf.discretization_width])
1275 |     acc_sec = tf.reshape(tf.decode_raw(features[param.ACC_SEC], tf.int64),
1276 |                          [conf.exp_seq_len, conf.discretization_width])
1277 |     mean_acc = tf.reshape(tf.decode_raw(features[param.MEAN_ACC], tf.int64),
1278 |                           [conf.exp_seq_len, conf.discretization_width])
1279 |     std_acc = tf.reshape(tf.decode_raw(features[param.STD_ACC], tf.int64),
1280 |                          [conf.exp_seq_len, conf.discretization_width])
1281 |     head = tf.reshape(tf.decode_raw(features[param.HEAD], tf.int64), [conf.exp_seq_len, param.WIDTH])
1282 |     head_mean = tf.reshape(tf.decode_raw(features[param.HEAD_MEAN], tf.int64), [conf.exp_seq_len, param.WIDTH])
1283 |     std_head = tf.reshape(tf.decode_raw(features[param.STD_HEAD], tf.int64), [conf.exp_seq_len, param.WIDTH])
1284 | 
1285 |     max_speed = tf.reshape(tf.decode_raw(features[param.MAX_SPEED], tf.int64),
1286 |                          [conf.exp_seq_len, conf.discretization_width])
1287 |     max_acc = tf.reshape(tf.decode_raw(features[param.MAX_ACC], tf.int64),
1288 |                          [conf.exp_seq_len, conf.discretization_width])
1289 |     max_head = tf.reshape(tf.decode_raw(features[param.MAX_HEAD], tf.int64),
1290 |                          [conf.exp_seq_len, conf.discretization_width])
1291 | 
1292 |     early = tf.cast(features[param.EARLY], tf.int32)
1293 |     label = tf.cast(features[param.LABEL], tf.int32)
1294 | 
1295 |     seq = tf.concat([speed_sec, avg_speed, std_speed, acc_sec, mean_acc, std_acc,head,head_mean,std_head,max_speed,max_acc,max_head], axis=1)
1296 |     seq_float32 = tf.cast(seq, tf.float32)
1297 | 
1298 |     return seq_float32, early, label
1299 | 
1300 | #创建dataset
1301 | def make_dataset_from_tfrecord_file(file_name_pattern,batch_size=32,is_shuffle=True,repeat = 1):
1302 |     filenames = util.search_file(file_name_pattern, tfrecords_data_path)
1303 |     filenames = np.array(filenames)
1304 |     perm = np.random.permutation(len(filenames))
1305 |     dataset = tf.data.TFRecordDataset(filenames[perm])
1306 |     if conf.num_features == 9:
1307 |         dataset = dataset.map(__parse_function_9_features)
1308 |     elif conf.num_features == 6:
1309 |         dataset = dataset.map(__parse_function_6_features)
1310 |     elif conf.num_features == 3:
1311 |         dataset = dataset.map(__parse_function_3_features)
1312 |     elif conf.num_features == 12:
1313 |         dataset = dataset.map(__parse_function_12_features)
1314 |     if is_shuffle:
1315 |         dataset = dataset.shuffle(100000)
1316 |     dataset = dataset.batch(batch_size)
1317 |     dataset = dataset.repeat(repeat)
1318 | 
1319 |     return dataset
1320 | 
1321 | #从npy获取数据 切割数据为预期长度的npy文件
1322 | def slice_seq(classes):
1323 |     # 分训练集与测试集 验证集 8：1：1
1324 |     train_data_all = None
1325 |     train_label_all = None
1326 |     train_early_all = None
1327 |     valid_data_all = None
1328 |     valid_label_all = None
1329 |     valid_early_all = None
1330 |     test_data_all = None
1331 |     test_label_all = None
1332 |     test_early_all = None
1333 |     features_arr_list = []
1334 |     index_arr_list = []
1335 |     label_arr_list = []
1336 |     data_file_name_exp = data_path +"transportation_mode"
1337 |     for i in range(classes):
1338 |         print("加载" + str(i))
1339 |         # data_file  = data_file_name +str(i) +".npy"
1340 |         index_df = pd.DataFrame(pd.read_csv(data_file_name_exp +"_"+ str(i) + "_seg_index.csv"))
1341 |         features_arr = np.load(data_file_name_exp + str(i) + ".npy")
1342 |         features_arr = features_arr[:, 0:len_features]
1343 |         index_arr = np.array(index_df.iloc[:, [1, 2]].T)
1344 |         # index shape = [2,总个数]
1345 |         # 第一维是第几段轨迹 第二维是在固定长度为exp_seq_len中的实际长度
1346 |         # data shape =[seq_nums,exp_seq_len,feature_len]   切出相等的数据长度 不足的padding
1347 |         (data, index_arr) = Data.slice_seq(features_arr, index_arr, conf.exp_seq_len)
1348 | 
1349 |         np.save(data_path+"slice_label" + str(i)+"_"+str(conf.exp_seq_len)+".npy",data)
1350 |         np.save(data_path+"slice_index"+str(i)+".npy",index_arr)
1351 | 
1352 | #分割数据集，合并数据 并写为npz文件
1353 | def partition_data_set(classes):
1354 |     out_data_path = "G:/all_data/"
1355 |     # 分训练集与测试集 验证集 8：1：1
1356 |     train_data_all = None
1357 |     train_label_all = None
1358 |     train_early_all = None
1359 |     valid_data_all = None
1360 |     valid_label_all = None
1361 |     valid_early_all = None
1362 |     test_data_all = None
1363 |     test_label_all = None
1364 |     test_early_all = None
1365 |     data_file_name_exp = data_path + "transportation_mode"
1366 |     for i in range(classes):
1367 |         data = np.load(data_path + "slice_label" + str(i) + "_" + str(conf.exp_seq_len) + ".npy")
1368 |         index_arr = np.load(data_path + "slice_index" + str(i) + ".npy")
1369 | 
1370 |         # 切割后删除features_arr index
1371 |         label_arr = np.zeros(shape=[index_arr.shape[1]], dtype=np.int32)
1372 |         label_arr[:] = i
1373 |         # features_arr_list.append(data)
1374 |         # index_arr_list.append(index)
1375 |         # label_arr_list.append(label)
1376 |         # 划分训练集，验证集，测试集
1377 |         print("划分训练集，验证集，测试集   " + str(i))
1378 |         seq_nums = index_arr.shape[1]
1379 |         # 控制变量
1380 |         np.random.seed(2)
1381 |         index_perm = np.random.permutation(range(seq_nums))
1382 |         train_count = int(np.floor(seq_nums * 0.8))
1383 |         valid_count = int(np.floor(seq_nums * 0.9))
1384 |         test_count = seq_nums
1385 |         train_index = index_perm[0:train_count]
1386 |         valid_index = index_perm[train_count + 1:valid_count]
1387 |         test_index = index_perm[valid_count + 1:seq_nums]
1388 | 
1389 |         # train_set valid_set test_set
1390 |         train_data = data[train_index, :, :]
1391 |         train_label = label_arr[train_index]
1392 |         train_early = index_arr[1, train_index]
1393 | 
1394 |         valid_data = data[valid_index, :, :]
1395 |         valid_label = label_arr[valid_index]
1396 |         valid_early = index_arr[1, valid_index]
1397 | 
1398 |         test_data = data[test_index, :, :]
1399 |         test_label = label_arr[test_index]
1400 |         test_early = index_arr[1, test_index]
1401 | 
1402 |         # 删除读取到的data.
1403 |         del data
1404 |         del label_arr
1405 |         del index_arr
1406 | 
1407 |         print("连接")
1408 |         if train_data_all is None:
1409 |             train_data_all = train_data
1410 |             train_label_all = train_label
1411 |             train_early_all = train_early
1412 | 
1413 |             valid_data_all = valid_data
1414 |             valid_label_all = valid_label
1415 |             valid_early_all = valid_early
1416 | 
1417 |             test_data_all = test_data
1418 |             test_label_all = test_label
1419 |             test_early_all = test_early
1420 |         else:
1421 |             train_data_all = np.concatenate((train_data_all, train_data), axis=0)
1422 |             train_label_all = np.concatenate((train_label_all, train_label), axis=0)
1423 |             train_early_all = np.concatenate((train_early_all, train_early), axis=0)
1424 | 
1425 |             valid_data_all = np.concatenate((valid_data_all, valid_data), axis=0)
1426 |             valid_label_all = np.concatenate((valid_label_all, valid_label), axis=0)
1427 |             valid_early_all = np.concatenate((valid_early_all, valid_early), axis=0)
1428 | 
1429 |             test_data_all = np.concatenate((test_data_all, test_data), axis=0)
1430 |             test_label_all = np.concatenate((test_label_all, test_label), axis=0)
1431 |             test_early_all = np.concatenate((test_early_all, test_early), axis=0)
1432 | 
1433 |     np.savez(out_data_path+"valid_data_set.npz",valid_data=valid_data_all,valid_label = valid_label_all,valid_early_stop = valid_early_all)
1434 |     del valid_label_all
1435 |     del valid_data_all
1436 |     del valid_early_all
1437 |     np.savez(out_data_path+"test_data_set.npz",test_data=test_data_all,test_label = test_label_all,test_early_stop = test_early_all)
1438 |     del test_label_all
1439 |     del test_early_all
1440 |     del test_data_all
1441 |     np.savez(out_data_path + "train_data_set.npz", train_data=train_data_all, train_label=train_label_all,train_early_stop=train_early_all)
1442 | 
1443 | def main():
1444 |     model_tfrecord_dataset(get_net_type(conf.net_type),get_rnn_type(conf.rnn_type))
1445 |     #rnn_nv1_model(False)
1446 | 
1447 | if __name__ == "__main__":
1448 |     #slice_seq(4)
1449 |     main()
1450 |     #partition_data_set(4)
1451 | 


--------------------------------------------------------------------------------
/data_funs.py:
--------------------------------------------------------------------------------
   1 | from __future__ import division
   2 | import tensorflow as tf
   3 | import sys
   4 | import csv
   5 | 
   6 | import numpy as np
   7 | import math
   8 | import sklearn.preprocessing
   9 | import os
  10 | import time
  11 | import pandas as pd
  12 | import util
  13 | from param import WIDTH
  14 | from param import FeatureName
  15 | import config
  16 | import param
  17 | from param import FENWEI_MAX
  18 | from param import FILTER_K
  19 | 
  20 | 
  21 | class Data:
  22 |     @staticmethod
  23 |     def splitDataset(mmsi, tr_mmsi, vl_tmmsi):
  24 |         test_index = Data.get_match_index(mmsi, tr_mmsi)
  25 |         val_index = Data.get_match_index(mmsi, vl_tmmsi)
  26 |         train_index = np.delete(np.array(range(len(mmsi))), np.concatenate([test_index, val_index]))
  27 |         return (train_index, test_index, val_index)
  28 | 
  29 |     @staticmethod
  30 |     def randomSplitDataset(mmsi, train_perc=0.5, val_perc=0.1):
  31 |         mmsi = np.array(mmsi)
  32 |         seq_len = mmsi.shape[0]
  33 |         test_perc = 1 - train_perc - val_perc
  34 |         rdn_perm = np.random.permutation(seq_len)
  35 |         train_index = rdn_perm[0:int(seq_len * train_perc)]
  36 |         test_index = rdn_perm[int(seq_len * train_perc): int(seq_len * (train_perc + test_perc))]
  37 |         val_index = rdn_perm[int(seq_len * (train_perc + test_perc)): seq_len]
  38 |         return (train_index, test_index, val_index)
  39 | 
  40 |     @staticmethod
  41 |     def get_match_index(mmsi, target):
  42 |         unique_mmsi = np.unique(mmsi)
  43 |         result = np.concatenate([np.where(mmsi == unique_mmsi[i]) for i in target], axis=1)[0]
  44 |         return result
  45 | 
  46 |     @staticmethod
  47 |     def upsample(data, cls, times):
  48 |         (X_train, y_train, stop_train) = data
  49 |         labels = [set(i) for i in y_train]
  50 |         samples = [cls in i for i in labels]
  51 |         sample_index = np.where(samples)[0]
  52 |         sample_x = np.repeat(X_train[:, sample_index, :], times - 1, axis=1)
  53 |         sample_y = np.repeat(y_train[sample_index, :], times - 1, axis=0)
  54 |         sample_stop = np.repeat(stop_train[sample_index], times - 1, axis=0)
  55 |         X_train = np.concatenate((X_train, sample_x), axis=1)
  56 |         y_train = np.vstack((y_train, sample_y))
  57 |         stop_train = np.hstack((stop_train, sample_stop))
  58 |         return (X_train, y_train, stop_train)
  59 | 
  60 |     # cut sequence into smaller sequences specified by the conf
  61 |     # 将序列切成指定长度的
  62 |     @staticmethod
  63 |     def reorganizeSeq(x, y, mmsi, exp_seq_len):
  64 |         num_features = x.shape[2]
  65 |         # 总共可以切出的序列个数
  66 |         num_total_seq = int(sum([math.ceil(i) for i in mmsi[1] / exp_seq_len]))
  67 |         new_data = np.zeros((num_total_seq, exp_seq_len, num_features))
  68 |         new_label = np.zeros((num_total_seq, exp_seq_len))
  69 |         # 0行存放编号 1行存放序列长度
  70 |         new_mmsi = np.zeros((2, num_total_seq)).astype(int)
  71 |         count = 0
  72 |         for v in range(len(mmsi[0])):  # iterate each vessel
  73 |             # 每个用户的数据
  74 |             # print v
  75 |             vessel_data = x[v]
  76 |             vessel_lab = y[v]
  77 |             # 用户编号
  78 |             vessel_mmsi = mmsi[0][v]
  79 |             # print(mmsi[0][v])
  80 |             # get full sequences first
  81 |             # 各个用户能切出的序列个数
  82 |             num_full_seq = mmsi[1][v] // exp_seq_len
  83 |             if num_full_seq:
  84 |                 # full_seq的shape为当前用户的（总个数，序列长度，特征）
  85 |                 full_seq = vessel_data[0:num_full_seq * exp_seq_len].reshape((num_full_seq, exp_seq_len, num_features))
  86 |                 full_lab = vessel_lab[0:num_full_seq * exp_seq_len].reshape((num_full_seq, exp_seq_len))
  87 |                 new_data[count:(count + num_full_seq)] = full_seq
  88 |                 new_label[count:(count + num_full_seq)] = full_lab
  89 |                 new_mmsi[0][count:(count + num_full_seq)] = vessel_mmsi
  90 |                 new_mmsi[1][count:(count + num_full_seq)] = exp_seq_len
  91 |                 count += num_full_seq
  92 | 
  93 |             # 序列切片多出来的长度保存起来
  94 |             remain_seq = np.zeros((exp_seq_len, num_features))
  95 |             remain_seq[0:(mmsi[1][v] - num_full_seq * exp_seq_len)] = vessel_data[num_full_seq * exp_seq_len:mmsi[1][v]]
  96 |             remain_lab = np.zeros(exp_seq_len)
  97 |             remain_lab[0:(mmsi[1][v] - num_full_seq * exp_seq_len)] = vessel_lab[num_full_seq * exp_seq_len:mmsi[1][v]]
  98 |             new_data[count] = remain_seq
  99 |             new_label[count] = remain_lab
 100 |             new_mmsi[0][count] = vessel_mmsi
 101 |             new_mmsi[1][count] = mmsi[1][v] - num_full_seq * exp_seq_len
 102 |             count += 1
 103 |         return (new_data, new_label, new_mmsi)
 104 | 
 105 |     #处理原始数据，提取经纬度，时间，与标签对应
 106 |     @staticmethod
 107 |     def sovle_row_data(interval):
 108 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 109 | 
 110 |         valiable_user_data = open("./data/have_label_user.txt","r")
 111 |         user_list = valiable_user_data.readlines()
 112 |         for i in user_list:
 113 |             user_id = i[0:3]
 114 |             label_txt_name = datadir + user_id+"/labels.txt"
 115 |             label_file = open(label_txt_name,"r")
 116 |             #label文件 数据还是字符串
 117 |             list_label = label_file.readlines()[1:]
 118 |             #label_list 数据是label数组
 119 |             label_list = []
 120 |             for i in list_label:
 121 |                 l = i[0:len(i)-1].split("\t")
 122 |                 label_list.append(l)
 123 | 
 124 |             plt_path = datadir + user_id + "/Trajectory"
 125 |             list_plt_name = os.listdir(plt_path)
 126 | 
 127 |             user_data = datadir + user_id + "/userdata_interval_"+str(interval)+".csv"
 128 |             user_data_file = open(user_data,"w")
 129 | 
 130 |             label_time_index = 0
 131 | 
 132 |             #循环处理所有plt文件
 133 |             i = 0
 134 |             while(i < len(list_plt_name)):
 135 | 
 136 |                 is_finish = False
 137 |                 plt_name = list_plt_name[i]
 138 |                 print("处理", plt_name)
 139 | 
 140 |                 plt_file_name = plt_path + "/" + plt_name
 141 |                 #plt_time_str = plt_name[0:4] + "/" +plt_name[4:6] + "/" +plt_name[6:8] +" " + plt_name[8:10] +":"+plt_name[10:12]+":"+plt_name[12:14]
 142 |                 #plt_time = time.strptime(plt_time_str,'%Y/%m/%d %H:%M:%S')
 143 |                 #if plt_time
 144 |                 plt_file = open(plt_file_name,"r")
 145 |                 data = plt_file.readlines()
 146 |                 data = data[6:len(data)]
 147 | 
 148 |                 #plt文件的起始时间
 149 |                 plt_start_time_str = data[0]
 150 |                 plt_end_time_str = data[-1]
 151 |                 plt_start_time_list = plt_start_time_str[0:len(plt_start_time_str)-1].split(",")
 152 |                 plt_start_time = time.strptime(plt_start_time_list[-2] + " " + plt_start_time_list[-1],'%Y-%m-%d %H:%M:%S')
 153 |                 plt_end_time_list = plt_end_time_str[0:len(plt_end_time_str)-1].split(",")
 154 |                 plt_end_time = time.strptime(plt_end_time_list[-2] + " " + plt_end_time_list[-1],'%Y-%m-%d %H:%M:%S')
 155 | 
 156 |                 #label 当前起始时间
 157 |                 label_start_time = time.strptime(label_list[label_time_index][0], '%Y/%m/%d %H:%M:%S')
 158 |                 label_end_time = time.strptime(label_list[label_time_index][1], '%Y/%m/%d %H:%M:%S')
 159 | 
 160 |                 #如果plt_end_time < 当前label_start_time 处理下一个plt文件
 161 |                 if plt_end_time <= label_start_time:
 162 |                     i+=1
 163 |                     continue
 164 |                 elif plt_start_time >= label_end_time :
 165 |                     #重复此次循环
 166 |                     i-=1
 167 |                     label_time_index += 1
 168 |                     if label_time_index > len(label_list)-1:
 169 |                         is_finish = True
 170 |                 else:
 171 |                     #处理plt文件中的内容
 172 |                     print("处理有标签的文件",plt_name)
 173 | 
 174 |                     last_time = None
 175 |                     k = 0
 176 |                     while(k < len(data)):
 177 |                         line = data[k]
 178 |                         line_time_list = line[0:len(line)-1].split(",")
 179 |                         line_time = time.strptime(line_time_list[-2] + " " + line_time_list[-1],'%Y-%m-%d %H:%M:%S')
 180 |                         #print(line_time,label_start_time,label_end_time)
 181 | 
 182 |                         if line_time >= label_start_time and line_time <= label_end_time:
 183 |                             if k == 0:
 184 |                                 last_time = line_time
 185 |                             else:
 186 |                                 if line_time == last_time:
 187 |                                     last_time = line_time
 188 |                                     k+=1
 189 |                                     continue
 190 |                             result_line = user_id +"," + line[0:len(line)-1] + "," + label_list[label_time_index][-1] + "," +str(label_time_index)
 191 |                             user_data_file.write(result_line + "\n")
 192 |                             last_time = line_time
 193 |                             k+=interval
 194 |                         elif line_time >label_end_time:
 195 | 
 196 |                             label_time_index += 1
 197 |                             if label_time_index > len(label_list)-1:
 198 |                                 is_finish = True
 199 |                                 break
 200 |                             label_start_time = time.strptime(label_list[label_time_index][0], '%Y/%m/%d %H:%M:%S')
 201 |                             label_end_time = time.strptime(label_list[label_time_index][1], '%Y/%m/%d %H:%M:%S')
 202 |                         elif line_time <label_start_time:
 203 |                             k+=1
 204 |                     #处理下一个文件
 205 |                 #关闭当前plt文件
 206 |                 if is_finish:
 207 |                     print("当前用户处理完毕",user_id)
 208 |                     plt_file.close()
 209 |                     break
 210 |                 i+=1
 211 | 
 212 |             label_file.close()
 213 |             #plt_file.close()
 214 |             user_data_file.close()
 215 | 
 216 |     #计算特征
 217 |     @staticmethod
 218 |     def caculate_feature(interval_list):
 219 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 220 |         feature_num = 9
 221 |         valiable_user_data = open("./data/have_label_user.txt", "r")
 222 |         user_list = valiable_user_data.readlines()
 223 |         for interval in interval_list:
 224 |             print("处理%d"%(interval))
 225 |             for user in user_list:
 226 |                 user_id = user[0:3]
 227 |                 user_data_name = datadir + user_id + "/userdata_interval_"+str(interval)+".csv"
 228 |                 #user_data_name = datadir + user_id + "/userdata.csv"
 229 |                 print("开始处理",user_id)
 230 |                 user_data_file = open(user_data_name,"r")
 231 | 
 232 |                 # user_data_file = np.loadtxt(user_data_name,dtype=np.str,delimiter=",")
 233 |                 # label_list = user_data_file[:,-1]
 234 |                 # label_list = label_list.astype(int)
 235 |                 # label_unique,label_index,label_count = np.unique(label_list, return_counts=True, return_index=True)
 236 |                 # #print(label_unique,label_index,label_count)
 237 |                 #
 238 |                 #
 239 |                 # for i in range(1):
 240 |                 #     #一个label要使用的数组
 241 |                 #     #result = np.empty(shape=[label_count[i],feature_num],dtype=np.str_)
 242 |                 #     #一个label的索引在一个用户文件中
 243 |                 #     start = label_index[i]
 244 |                 #     end = label_index[i] + label_count[i]
 245 |                 #     #一个label索引对应的原始数据
 246 |                 #     data = user_data_file[start:end,:]
 247 |                 #     #经纬度 以及时间
 248 |                 #     lat_lon_time = data[:,[1,2,5]]
 249 |                 #     #将user_id,经纬度赋值给结果数组
 250 |                 #     #result[:,0:3] = data[:,0:3]
 251 |                 #
 252 |                 #     #计算特征  速度 加速度  开始点没有速度，第一个点没有加速度， 所以最后数组比原始数组少两个点
 253 |                 #     for i in range(1,len(lat_lon_time)):
 254 |                 #         dis = util.jwd2dis(lat_lon_time[i][0],lat_lon_time[i][1],lat_lon_time[i-1][0],lat_lon_time[i-1][1])
 255 |                 #         t = util.timestamp2second(lat_lon_time[i],lat_lon_time[i-1])
 256 |                 #
 257 |                 #     print(lat_lon_time)
 258 | 
 259 | 
 260 | 
 261 |                 # #user_data = user_data_file.readlines()
 262 |                 #列名
 263 |                 col_name = ["user_id","lat","lon","non-use","alt","timestamp","date","time","label","label_count"]
 264 |                 #原始数据
 265 |                 raw_data_df = pd.DataFrame(pd.read_csv(user_data_file,header=None,names=col_name))
 266 |                 #结果列名
 267 |                 result_col_name = ["user_id","lat","lon","speed_sec","acc_sec","std_speed","avg_speed","mean_acc","std_acc","head","head_mean","std_head","date","time","label","seg_label"]
 268 |                 #结果数据
 269 |                 result_df = pd.DataFrame(columns=result_col_name)
 270 | 
 271 |                 #通过标签分组轨迹
 272 |                 label_gp = raw_data_df.groupby(by=col_name[-1])
 273 | 
 274 |                 for label_count,group in label_gp:
 275 |                     #print(group)
 276 |                     #print(len(group.index))
 277 |                     #temp_result = pd.DataFrame(columns = result_col_name)
 278 |                     #特征数组
 279 |                     #print("label_count",label_count)
 280 |                     if (group.index[-1] - group.index[0]) < 2:
 281 |                         print("丢弃本组数据")
 282 |                         continue
 283 |                     feature_arr = np.zeros(shape=[group.index[-1] - group.index[0] +1,feature_num],dtype=np.float64)
 284 |                     fangweijiao = np.zeros(shape=[group.index[-1] - group.index[0] +1],dtype=np.float64)
 285 |                     #print(group)
 286 |                     #print(len(group.index))
 287 |                     offset =  group.index[0]
 288 |                     for ii in  group.index[1:]:
 289 |                         #row_result = pd.Series(index=result_col_name)
 290 |                         dis = util.jwd2dis(group.loc[ii,"lat"],group.loc[ii,"lon"],group.loc[ii-1,"lat"],group.loc[ii-1,"lon"])
 291 |                         t = util.timestamp2second(group.loc[ii,"timestamp"],group.loc[ii-1,"timestamp"])
 292 |                         #速度
 293 |                         feature_arr[ii - offset][0] = dis/t
 294 |                         if(ii > offset+1):
 295 |                             #加速度
 296 |                             #a = (v1-v0)/t
 297 |                             feature_arr[ii- offset][1] = (feature_arr[ii- offset][0] - feature_arr[ii-1-offset][0]) / t
 298 | 
 299 |                         fangweijiao[ii-offset] = util.jwd2angle(group.loc[ii,"lat"],group.loc[ii,"lon"],group.loc[ii-1,"lat"],group.loc[ii-1,"lon"])
 300 | 
 301 |                     #方向转换  正数代表作，负数代表右
 302 |                     for k in range(2,len(fangweijiao)):
 303 |                         if fangweijiao[k] - fangweijiao[k-1] <= 180:
 304 |                             feature_arr[k][6] = fangweijiao[k] - fangweijiao[k-1]
 305 |                         else:
 306 |                             feature_arr[k][6] = -(360 - (fangweijiao[k] - fangweijiao[k-1]))
 307 | 
 308 |                     #0 放的是速度 1放的是加速度
 309 |                     avg_speed = np.mean(feature_arr[2:,0],axis=0)
 310 |                     acc_mean = np.mean(feature_arr[2:,1],axis=0)
 311 |                     std_speed = np.std(feature_arr[2:,0],axis=0)
 312 |                     std_acc = np.std(feature_arr[2:,1],axis=0)
 313 |                     head_mean = np.mean(np.abs(feature_arr[2:,6]),axis=0)
 314 |                     std_head = np.std(feature_arr[2:,6],axis=0)
 315 |                     feature_arr[2:,2] = std_speed
 316 |                     feature_arr[2:,3] = avg_speed
 317 |                     feature_arr[2:,4] = acc_mean
 318 |                     feature_arr[2:,5] = std_acc
 319 |                     feature_arr[2:,7] = head_mean
 320 |                     feature_arr[2:,8] = std_head
 321 |                     feature_arr = feature_arr[2:,:]
 322 | 
 323 |                     #print(feature_arr)
 324 |                     result = pd.DataFrame(columns=result_col_name)
 325 |                     #result["user_id"] = group["user_id"][2:len(group.index)]
 326 |                     start = group.index[0] + 2
 327 |                     end = group.index[-1]
 328 |                     result["user_id"] = group.loc[start:end,"user_id"]
 329 |                     result["lat"] = group.loc[start:end,"lat"]
 330 |                     result["lon"] = group.loc[start:end,"lon"]
 331 |                     #print(result.info(),length,feature_arr.shape)
 332 |                     result["speed_sec"] = feature_arr[:,0]
 333 |                     result["acc_sec"] = feature_arr[:,1]
 334 |                     result["std_speed"] = feature_arr[:,2]
 335 |                     result["avg_speed"] = feature_arr[:,3]
 336 |                     result["mean_acc"] = feature_arr[:,4]
 337 |                     result["std_acc"] = feature_arr[:,5]
 338 |                     result["head"] = feature_arr[:,6]
 339 |                     result["head_mean"] = feature_arr[:,7]
 340 |                     result["std_head"] = feature_arr[:,8]
 341 |                     result["date"] = group.loc[start:end,"date"]
 342 |                     result["time"] = group.loc[start:end,"time"]
 343 |                     result["label"] = util.switch_mode(group.loc[start,"label"])
 344 |                     result["seg_label"] = user_id +" " + str(group.loc[start,"label_count"])
 345 |                     #一组label最终结果dataframe
 346 |                     result_df = result_df.append(result)
 347 | 
 348 |                 result_df.index = range(0,result_df.shape[0])
 349 |                 #result_df.to_csv(datadir + user_id + "/user_features.csv", index=False)
 350 |                 result_df.to_csv(datadir + user_id +"/user_features_interval_"+str(interval) +".csv",index=False,mode="w+")
 351 |                 user_data_file.close()
 352 | 
 353 |         # 计算特征
 354 |     @staticmethod
 355 |     def caculate_feature_12(interval_list):
 356 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 357 |         feature_num = 12
 358 |         valiable_user_data = open("./data/have_label_user.txt", "r")
 359 |         user_list = valiable_user_data.readlines()
 360 |         for interval in interval_list:
 361 |             print("处理%d" % (interval))
 362 |             for user in user_list:
 363 |                 user_id = user[0:3]
 364 |                 user_data_name = datadir + user_id + "/userdata_interval_" + str(interval) + ".csv"
 365 |                 # user_data_name = datadir + user_id + "/userdata.csv"
 366 |                 print("开始处理", user_id)
 367 |                 user_data_file = open(user_data_name, "r")
 368 | 
 369 |                 # user_data_file = np.loadtxt(user_data_name,dtype=np.str,delimiter=",")
 370 |                 # label_list = user_data_file[:,-1]
 371 |                 # label_list = label_list.astype(int)
 372 |                 # label_unique,label_index,label_count = np.unique(label_list, return_counts=True, return_index=True)
 373 |                 # #print(label_unique,label_index,label_count)
 374 |                 #
 375 |                 #
 376 |                 # for i in range(1):
 377 |                 #     #一个label要使用的数组
 378 |                 #     #result = np.empty(shape=[label_count[i],feature_num],dtype=np.str_)
 379 |                 #     #一个label的索引在一个用户文件中
 380 |                 #     start = label_index[i]
 381 |                 #     end = label_index[i] + label_count[i]
 382 |                 #     #一个label索引对应的原始数据
 383 |                 #     data = user_data_file[start:end,:]
 384 |                 #     #经纬度 以及时间
 385 |                 #     lat_lon_time = data[:,[1,2,5]]
 386 |                 #     #将user_id,经纬度赋值给结果数组
 387 |                 #     #result[:,0:3] = data[:,0:3]
 388 |                 #
 389 |                 #     #计算特征  速度 加速度  开始点没有速度，第一个点没有加速度， 所以最后数组比原始数组少两个点
 390 |                 #     for i in range(1,len(lat_lon_time)):
 391 |                 #         dis = util.jwd2dis(lat_lon_time[i][0],lat_lon_time[i][1],lat_lon_time[i-1][0],lat_lon_time[i-1][1])
 392 |                 #         t = util.timestamp2second(lat_lon_time[i],lat_lon_time[i-1])
 393 |                 #
 394 |                 #     print(lat_lon_time)
 395 | 
 396 |                 # #user_data = user_data_file.readlines()
 397 |                 # 列名
 398 |                 col_name = ["user_id", "lat", "lon", "non-use", "alt", "timestamp", "date", "time", "label",
 399 |                             "label_count"]
 400 |                 # 原始数据
 401 |                 raw_data_df = pd.DataFrame(pd.read_csv(user_data_file, header=None, names=col_name))
 402 |                 # 结果列名
 403 |                 result_col_name = ["user_id", "lat", "lon", "speed_sec", "acc_sec", "std_speed", "avg_speed",
 404 |                                    "mean_acc", "std_acc", "head", "head_mean", "std_head","max_speed","max_acc","max_head", "date", "time", "label",
 405 |                                    "seg_label"]
 406 |                 # 结果数据
 407 |                 result_df = pd.DataFrame(columns=result_col_name)
 408 | 
 409 |                 # 通过标签分组轨迹
 410 |                 label_gp = raw_data_df.groupby(by=col_name[-1])
 411 | 
 412 |                 for label_count, group in label_gp:
 413 |                     # print(group)
 414 |                     # print(len(group.index))
 415 |                     # temp_result = pd.DataFrame(columns = result_col_name)
 416 |                     # 特征数组
 417 |                     # print("label_count",label_count)
 418 |                     if (group.index[-1] - group.index[0]) < 2:
 419 |                         print("丢弃本组数据")
 420 |                         continue
 421 |                     feature_arr = np.zeros(shape=[group.index[-1] - group.index[0] + 1, feature_num],
 422 |                                            dtype=np.float64)
 423 |                     fangweijiao = np.zeros(shape=[group.index[-1] - group.index[0] + 1], dtype=np.float64)
 424 |                     # print(group)
 425 |                     # print(len(group.index))
 426 |                     offset = group.index[0]
 427 |                     for ii in group.index[1:]:
 428 |                         # row_result = pd.Series(index=result_col_name)
 429 |                         dis = util.jwd2dis(group.loc[ii, "lat"], group.loc[ii, "lon"], group.loc[ii - 1, "lat"],
 430 |                                            group.loc[ii - 1, "lon"])
 431 |                         t = util.timestamp2second(group.loc[ii, "timestamp"], group.loc[ii - 1, "timestamp"])
 432 |                         # 速度
 433 |                         feature_arr[ii - offset][0] = dis / t
 434 |                         if (ii > offset + 1):
 435 |                             # 加速度
 436 |                             # a = (v1-v0)/t
 437 |                             feature_arr[ii - offset][1] = (feature_arr[ii - offset][0] -
 438 |                                                            feature_arr[ii - 1 - offset][0]) / t
 439 | 
 440 |                         fangweijiao[ii - offset] = util.jwd2angle(group.loc[ii, "lat"], group.loc[ii, "lon"],
 441 |                                                                   group.loc[ii - 1, "lat"],
 442 |                                                                   group.loc[ii - 1, "lon"])
 443 | 
 444 |                     # 方向转换  正数代表作，负数代表右
 445 |                     #print(fangweijiao)
 446 |                     for k in range(2, len(fangweijiao)):
 447 |                         #print(fangweijiao[k],fangweijiao[k-1])
 448 |                         #print(fangweijiao[k] - fangweijiao[k-1])
 449 |                         if fangweijiao[k] >= fangweijiao[k-1]:
 450 | 
 451 |                             if fangweijiao[k] - fangweijiao[k - 1] <= 180:
 452 |                                 feature_arr[k][6] = fangweijiao[k] - fangweijiao[k - 1]
 453 |                             else:
 454 |                                 feature_arr[k][6] = -(360 - (fangweijiao[k] - fangweijiao[k - 1]))
 455 |                         else:
 456 |                             if fangweijiao[k-1] - fangweijiao[k] <=180:
 457 |                                 feature_arr[k][6] = fangweijiao[k-1] - fangweijiao[k]
 458 |                             else:
 459 |                                 feature_arr[k][6] = -(360 - (fangweijiao[k-1] - fangweijiao[k]))
 460 | 
 461 | 
 462 |                     # 0 放的是速度 1放的是加速度
 463 |                     avg_speed = np.mean(feature_arr[2:, 0], axis=0)
 464 |                     acc_mean = np.mean(feature_arr[2:, 1], axis=0)
 465 |                     std_speed = np.std(feature_arr[2:, 0], axis=0)
 466 |                     std_acc = np.std(feature_arr[2:, 1], axis=0)
 467 |                     head_mean = np.mean(np.abs(feature_arr[2:, 6]), axis=0)
 468 |                     std_head = np.std(feature_arr[2:, 6], axis=0)
 469 |                     max_speed = np.max(np.abs(feature_arr[2:,0]),axis=0)
 470 |                     max_acc = np.max(np.abs(feature_arr[2:,1]),axis=0)
 471 |                     max_head = np.max(np.abs(feature_arr[2:,6]),axis=0)
 472 |                     #print(feature_arr[2:,6])
 473 |                     feature_arr[2:, 2] = std_speed
 474 |                     feature_arr[2:, 3] = avg_speed
 475 |                     feature_arr[2:, 4] = acc_mean
 476 |                     feature_arr[2:, 5] = std_acc
 477 |                     feature_arr[2:, 7] = head_mean
 478 |                     feature_arr[2:, 8] = std_head
 479 |                     feature_arr[2:,9]  = max_speed
 480 |                     feature_arr[2:, 10]= max_acc
 481 |                     feature_arr[2:,11] = max_head
 482 |                     feature_arr = feature_arr[2:, :]
 483 | 
 484 |                     # print(feature_arr)
 485 |                     result = pd.DataFrame(columns=result_col_name)
 486 |                     # result["user_id"] = group["user_id"][2:len(group.index)]
 487 |                     start = group.index[0] + 2
 488 |                     end = group.index[-1]
 489 |                     result["user_id"] = group.loc[start:end, "user_id"]
 490 |                     result["lat"] = group.loc[start:end, "lat"]
 491 |                     result["lon"] = group.loc[start:end, "lon"]
 492 |                     # print(result.info(),length,feature_arr.shape)
 493 |                     result["speed_sec"] = feature_arr[:, 0]
 494 |                     result["acc_sec"] = feature_arr[:, 1]
 495 |                     result["std_speed"] = feature_arr[:, 2]
 496 |                     result["avg_speed"] = feature_arr[:, 3]
 497 |                     result["mean_acc"] = feature_arr[:, 4]
 498 |                     result["std_acc"] = feature_arr[:, 5]
 499 |                     result["head"] = feature_arr[:, 6]
 500 |                     result["head_mean"] = feature_arr[:, 7]
 501 |                     result["std_head"] = feature_arr[:, 8]
 502 |                     result["max_speed"] = feature_arr[:,9]
 503 |                     result["max_acc"] = feature_arr[:,10]
 504 |                     result["max_head"] = feature_arr[:,11]
 505 |                     result["date"] = group.loc[start:end, "date"]
 506 |                     result["time"] = group.loc[start:end, "time"]
 507 |                     result["label"] = util.switch_mode(group.loc[start, "label"])
 508 |                     result["seg_label"] = user_id + " " + str(group.loc[start, "label_count"])
 509 |                     # 一组label最终结果dataframe
 510 |                     result_df = result_df.append(result)
 511 | 
 512 |                 result_df.index = range(0, result_df.shape[0])
 513 |                 # result_df.to_csv(datadir + user_id + "/user_features.csv", index=False)
 514 |                 result_df.to_csv(datadir + user_id + "/user_features_interval_" + str(interval) + ".csv",
 515 |                                  index=False, mode="w+")
 516 |                 user_data_file.close()
 517 | 
 518 |     @staticmethod
 519 |     def caculate_feature_max_min():
 520 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 521 |         feature_num = 10
 522 |         valiable_user_data = open("./data/have_label_user.txt", "r")
 523 |         user_list = valiable_user_data.readlines()
 524 |         for user in user_list:
 525 |             user_id = user[0:3]
 526 |             user_feature_name = datadir + user_id + "/user_features.csv"
 527 |             user_feature_file = open(user_feature_name,"r")
 528 |             user_feature_df = pd.DataFrame(pd.read_csv(user_feature_file))
 529 | 
 530 |             user_feature_max_min_name = datadir + user_id +"/user_features_max_min.csv"
 531 |             label_group = user_feature_df.groupby(by="label")
 532 | 
 533 |             #result = np.zeros(shape=[10,len(label_group)+1])
 534 |             result_df = pd.DataFrame(columns=["speed_sec","acc_sec","std_speed","avg_speed","mean_acc","max_or_min","label"])
 535 | 
 536 |             print(user_id)
 537 | 
 538 |             for name,group in label_group:
 539 |                 #print(type(group))
 540 |                 #series_max = group.iloc[:,[3,4,5,6,7]].idxmax()
 541 |                 #series_min = group.iloc[:,[3,4,5,6,7]].idxmin()
 542 |                 max = group.iloc[:,[3,4,5,6,7,-2]].max()
 543 |                 min = group.iloc[:,[3,4,5,6,7,-2]].min()
 544 |                 max["max_or_min"] = "max"
 545 |                 min["max_or_min"] = "min"
 546 |                 #max_list = max.tolist()
 547 |                 #max_list.append("max")
 548 |                 df_max = pd.DataFrame(max)
 549 |                 df_max = df_max.T
 550 |                 df_min = pd.DataFrame(min)
 551 |                 df_min = df_min.T
 552 |                 result_df = result_df.append(df_max)
 553 |                 result_df = result_df.append(df_min)
 554 |                 # df.append(pd.DataFrame(max))
 555 |                 #dict = max.to_dict()
 556 |                 #max.to_csv(user_feature_max_name,mode= "a+",index =True)
 557 |                 #min.to_csv(user_feature_min_name,mode = "a+",index = True)
 558 |                 # print(name)
 559 |                 # print(group.describe())
 560 |                 # print(group.iloc[:,[3,4,5,6,7]].quantile(0.95))
 561 |                 # #print(group.loc[237777,"speed_sec"])
 562 |                 # #print(series_max[[0,1]])
 563 |                 # #print(type(list(series_max.index)))
 564 |                 # #print(group.iloc[series_max,series_max.index])
 565 |                 # max_list = []
 566 |                 # min_list = []
 567 |                 # for i in range(len(series_max)):
 568 |                 #     #print(series_max[i])
 569 |                 #     #print(series_max.index[i])
 570 |                 #     #print(series_max.iloc[i])
 571 |                 #     max_list.append(group.loc[series_max.iloc[i],series_max.index[i]])
 572 |                 #     min_list.append(group.loc[series_min.iloc[i],series_min.index[i]])
 573 |                 #
 574 |                 # print(max_list,min_list)
 575 | 
 576 |             #print(result_df)
 577 |             result_df.to_csv(user_feature_max_min_name,index=False)
 578 |             user_feature_file.close()
 579 | 
 580 |         valiable_user_data.close()
 581 | 
 582 |     @staticmethod
 583 |     def caculate_all_max_min():
 584 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 585 |         feature_num = 10
 586 |         valiable_user_data = open("./data/have_label_user.txt", "r")
 587 |         user_list = valiable_user_data.readlines()
 588 |         col_name = ["speed_sec", "acc_sec", "std_speed", "avg_speed", "mean_acc", "max_or_min", "label"]
 589 |         df = pd.DataFrame()
 590 |         #status = open(datadir+"status.csv","w+")
 591 | 
 592 | 
 593 |         for user in user_list:
 594 |             user_id = user[0:3]
 595 |             # user_features_max_min_name = datadir + user_id + "/user_features_max_min.csv"
 596 |             # user_features_max_min_file = open(user_features_max_min_name,"r")
 597 |             # # 原始数据
 598 |             # raw_data_df = pd.DataFrame(pd.read_csv(user_features_max_min_file))
 599 |             # max_min_df = max_min_df.append(raw_data_df)
 600 |             #
 601 |             # user_features_max_min_file.close()
 602 |             user_feature_file_name = datadir + user_id +"/user_features.csv"
 603 |             user_feature_file = open(user_feature_file_name,"r")
 604 |             raw_data_df = pd.DataFrame(pd.read_csv(user_feature_file))
 605 |             df = df.append(raw_data_df)
 606 | 
 607 |         df_label_groups = df.groupby("label")
 608 | 
 609 | 
 610 |         result_df = pd.DataFrame()
 611 |         for name,group in df_label_groups:
 612 |             df_gp_desc = group.iloc[:,[3,4,5,6,7]].describe()
 613 |             baifenwei_95 = group.iloc[:,[3,4,5,6,7]].quantile(0.95)
 614 |             baifenwei_96 = group.iloc[:,[3,4,5,6,7]].quantile(0.96)
 615 |             baifenwei_97 = group.iloc[:, [3, 4, 5, 6, 7]].quantile(0.97)
 616 |             baifenwei_98 = group.iloc[:, [3, 4, 5, 6, 7]].quantile(0.98)
 617 |             baifenwei_99 = group.iloc[:, [3, 4, 5, 6, 7]].quantile(0.99)
 618 |             #result_df = result_df.append(df_gp_desc)
 619 |             #print(name,"\n",baifenwei_95,baifenwei_96,baifenwei_97,baifenwei_98,baifenwei_99)
 620 |             file_name_99 = datadir + "baifenwei_99"  + ".csv"
 621 |             file_name_98 = datadir + "baifenwei_98" + ".csv"
 622 |             file_name_97 = datadir + "baifenwei_97" + ".csv"
 623 |             file_name_96 = datadir + "baifenwei_96" + ".csv"
 624 |             file_name_95 = datadir + "baifenwei_95" + ".csv"
 625 |             baifenwei_99.to_csv(file_name_99,mode = "a+")
 626 |             baifenwei_98.to_csv(file_name_98,mode = "a+")
 627 |             baifenwei_97.to_csv(file_name_97,mode = "a+")
 628 |             baifenwei_96.to_csv(file_name_96,mode = "a+")
 629 |             baifenwei_95.to_csv(file_name_95,mode = "a+")
 630 |             file_name = datadir+"status_label_" +str(name) + ".csv"
 631 |             df_gp_desc.to_csv(file_name,index=True,mode = "w+")
 632 | 
 633 | 
 634 |         #print(result_df)
 635 |         #result_df.to_csv(datadir+"status.csv",mode="w+")
 636 |         # max_min_groups = max_min_df.groupby(by = "max_or_min")
 637 |         #
 638 |         # max_group = max_min_groups.get_group(name="max")
 639 |         # min_group = max_min_groups.get_group(name="min")
 640 |         #
 641 |         # label_max_groups = max_group.groupby(by="label")
 642 |         # label_min_groups = min_group.groupby(by= "label")
 643 |         #
 644 |         # for name,group in label_max_groups:
 645 |         #     df_desc = group.describe()
 646 |         #     baifenwei_75 = df_desc.loc["75%"]
 647 |         #     baifenwei_25 = df_desc.loc["25%"]
 648 |         #     delta_Q = baifenwei_75 - baifenwei_25
 649 |         #     max = baifenwei_75 + delta_Q*1.5
 650 |         #     print(max)
 651 |         #for name,group in label_min_groups:
 652 |         #    print(name,group.describe())
 653 | 
 654 | 
 655 |         valiable_user_data.close()
 656 | 
 657 |     @staticmethod
 658 |     def features_status(interval_list):
 659 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 660 |         out_path = "G:/新建文件夹/Geolife Trajectories 1.3/gps_en_discrezation/features_status/"
 661 |         valiable_user_data = open("./data/have_label_user.txt", "r")
 662 |         user_list = valiable_user_data.readlines()
 663 | 
 664 |         for interval in interval_list:
 665 |             print("处理%d" %(interval))
 666 |             users_df = pd.DataFrame()
 667 |             for user in user_list:
 668 |                 user_id = user[0:3]
 669 |                 # user_features_max_min_name = datadir + user_id + "/user_features_max_min.csv"
 670 |                 # user_features_max_min_file = open(user_features_max_min_name,"r")
 671 |                 # # 原始数据
 672 |                 # raw_data_df = pd.DataFrame(pd.read_csv(user_features_max_min_file))
 673 |                 # max_min_df = max_min_df.append(raw_data_df)
 674 |                 #
 675 |                 # user_features_max_min_file.close()
 676 |                 user_feature_file_name = datadir + user_id +"/user_features_interval_" + str(interval)+".csv"
 677 |                 user_feature_file = open(user_feature_file_name,"r")
 678 |                 raw_data_df = pd.DataFrame(pd.read_csv(user_feature_file))
 679 |                 users_df = users_df.append(raw_data_df)
 680 | 
 681 |             users_df.reset_index(drop=True)
 682 | 
 683 |             pd.DataFrame(users_df[param.SPEED_SEC].describe()).to_csv(out_path+"before_" +param.SPEED_SEC + ".csv")
 684 |             pd.DataFrame(users_df[param.AVG_SPEED].describe()).to_csv(out_path+"before_" +param.AVG_SPEED + ".csv")
 685 |             pd.DataFrame(users_df[param.STD_SPEED].describe()).to_csv(out_path+"before_" +param.STD_SPEED + ".csv")
 686 |             pd.DataFrame(users_df[param.ACC_SEC].describe()).to_csv(out_path+"before_" +param.ACC_SEC + ".csv")
 687 |             pd.DataFrame(users_df[param.MEAN_ACC].describe()).to_csv(out_path+"before_" +param.MEAN_ACC + ".csv")
 688 |             pd.DataFrame(users_df[param.STD_ACC].describe()).to_csv(out_path+"before_" +param.STD_ACC + ".csv")
 689 |             pd.DataFrame(users_df[param.HEAD].describe()).to_csv(out_path+"before_" +param.HEAD + ".csv")
 690 |             pd.DataFrame(users_df[param.HEAD_MEAN].describe()).to_csv(out_path+"before_" +param.HEAD_MEAN + ".csv")
 691 |             pd.DataFrame(users_df[param.STD_HEAD].describe()).to_csv(out_path+"before_" +param.STD_HEAD + ".csv")
 692 | 
 693 | 
 694 | 
 695 |             speed_sec = pd.DataFrame(Data.filter_box_quantile(users_df["speed_sec"], FILTER_K)).describe()
 696 |             acc_sec = pd.DataFrame(Data.filter_box_quantile(users_df["acc_sec"], FILTER_K)).describe()
 697 |             avg_speed = pd.DataFrame(Data.filter_box_quantile(users_df["avg_speed"], FILTER_K)).describe()
 698 |             std_speed = pd.DataFrame(Data.filter_box_quantile(users_df["std_speed"], FILTER_K)).describe()
 699 |             mean_acc = pd.DataFrame(Data.filter_box_quantile(users_df["mean_acc"], FILTER_K)).describe()
 700 |             std_acc = pd.DataFrame(Data.filter_box_quantile(users_df["std_acc"], FILTER_K)).describe()
 701 |             head = pd.DataFrame(Data.filter_box_quantile(users_df["head"], FILTER_K)).describe()
 702 |             head_mean = pd.DataFrame(Data.filter_box_quantile(users_df["head_mean"], FILTER_K)).describe()
 703 |             std_head = pd.DataFrame(Data.filter_box_quantile(users_df["std_head"], FILTER_K)).describe()
 704 | 
 705 |             pd.DataFrame(speed_sec).to_csv(out_path+"after_"+param.SPEED_SEC +".csv")
 706 |             pd.DataFrame(avg_speed).to_csv(out_path+"after_"+param.AVG_SPEED +".csv")
 707 |             pd.DataFrame(std_speed).to_csv(out_path+"after_"+param.STD_SPEED +".csv")
 708 |             pd.DataFrame(acc_sec).to_csv(out_path+"after_"+param.ACC_SEC +".csv")
 709 |             pd.DataFrame(mean_acc).to_csv(out_path+"after_"+param.MEAN_ACC +".csv")
 710 |             pd.DataFrame(std_acc).to_csv(out_path+"after_"+param.STD_ACC +".csv")
 711 |             pd.DataFrame(head).to_csv(out_path+"after_"+param.HEAD +".csv")
 712 |             pd.DataFrame(head_mean).to_csv(out_path+"after_"+param.HEAD_MEAN +".csv")
 713 |             pd.DataFrame(std_head).to_csv(out_path+"after_"+param.STD_HEAD +".csv")
 714 | 
 715 |     #离散化
 716 |     @staticmethod
 717 |     def discretization(interval_list):
 718 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 719 |         out_path = "G:/新建文件夹/Geolife Trajectories 1.3/gps_en_discrezation/features_95_15/"
 720 |         feature_num = 9
 721 |         valiable_user_data = open("./data/have_label_user.txt", "r")
 722 |         user_list = valiable_user_data.readlines()
 723 |         #col_name = ["speed_sec", "acc_sec", "std_speed", "avg_speed", "mean_acc", "max_or_min", "label"]
 724 |         #所有数据
 725 | 
 726 |         # status = open(datadir+"status.csv","w+")
 727 |         for interval in interval_list:
 728 |             print("处理%d" %(interval))
 729 |             users_df = pd.DataFrame()
 730 |             for user in user_list:
 731 |                 user_id = user[0:3]
 732 |                 # user_features_max_min_name = datadir + user_id + "/user_features_max_min.csv"
 733 |                 # user_features_max_min_file = open(user_features_max_min_name,"r")
 734 |                 # # 原始数据
 735 |                 # raw_data_df = pd.DataFrame(pd.read_csv(user_features_max_min_file))
 736 |                 # max_min_df = max_min_df.append(raw_data_df)
 737 |                 #
 738 |                 # user_features_max_min_file.close()
 739 |                 user_feature_file_name = datadir + user_id +"/user_features_interval_" + str(interval)+".csv"
 740 |                 user_feature_file = open(user_feature_file_name,"r")
 741 |                 raw_data_df = pd.DataFrame(pd.read_csv(user_feature_file))
 742 |                 users_df = users_df.append(raw_data_df)
 743 | 
 744 |             users_df.reset_index(drop=True)
 745 |             # print("离散化")
 746 |             #
 747 |             # file = open(out_path+"status"+str(interval)+".txt",mode="w+")
 748 |             # file.write("interval_%d \n"%(interval))
 749 |             # for i in [0,0.95,0.96,0.97,0.98,0.99]:
 750 |             #     file.write("%s %f  %f\n" % (param.SPEED_SEC,i,users_df[param.SPEED_SEC].quantile(i)))
 751 |             #     file.write("%s %f  %f\n" % (param.AVG_SPEED,i,users_df[param.AVG_SPEED].quantile(i)))
 752 |             #     file.write("%s %f  %f\n" % (param.STD_SPEED,i,users_df[param.STD_SPEED].quantile(i)))
 753 |             #     file.write("%s %f  %f\n" % (param.ACC_SEC,i,users_df[param.ACC_SEC].quantile(i)))
 754 |             #     file.write("%s %f  %f\n" % (param.MEAN_ACC,i,users_df[param.MEAN_ACC].quantile(i)))
 755 |             #     file.write("%s %f  %f\n" % (param.STD_ACC,i,users_df[param.STD_ACC].quantile(i)))
 756 |             #     file.write("\n")
 757 |             #
 758 |             # file.close()
 759 |             speed_sec = pd.DataFrame(Data.equal_width(users_df["speed_sec"],WIDTH))
 760 |             acc_sec = pd.DataFrame(Data.equal_width(users_df["acc_sec"],WIDTH))
 761 |             avg_speed = pd.DataFrame(Data.equal_width(users_df["avg_speed"],WIDTH))
 762 |             std_speed = pd.DataFrame(Data.equal_width(users_df["std_speed"],WIDTH))
 763 |             mean_acc = pd.DataFrame(Data.equal_width(users_df["mean_acc"],WIDTH))
 764 |             std_acc = pd.DataFrame(Data.equal_width(users_df["std_acc"],WIDTH))
 765 |             head = pd.DataFrame(Data.equal_width(users_df["head"],WIDTH))
 766 |             head_mean = pd.DataFrame(Data.equal_width(users_df["head_mean"],WIDTH))
 767 |             std_head = pd.DataFrame(Data.equal_width(users_df["std_head"],WIDTH))
 768 | 
 769 |             print("连接矩阵")
 770 |             #features_en = np.concatenate((speed_sec,avg_speed,std_speed,acc_sec,mean_acc,std_acc),axis=1)
 771 |             result_df = pd.concat([speed_sec,avg_speed,std_speed,acc_sec,mean_acc,std_acc,head,head_mean,std_head],axis=1)
 772 | 
 773 |             #result_df = pd.DataFrame(features_en)
 774 |             result_df["label"] = users_df["label"].values
 775 |             result_df["seg_label"] = users_df["seg_label"].values
 776 |             #col_name = result_df.columns.tolist()
 777 |             #col_name.insert(col_name.index(0),"user_id")
 778 |             #result_df.reindex(columns=col_name)
 779 |             result_df["user_id"] = users_df["user_id"].values
 780 |             #result_df    columns =[userid(1),speed_sec(width),avg_speed(width),std_speed(width),acc_sec(width),mean_acc(width),label(1),seg_label(1)]
 781 | 
 782 |             #result_file = open(datadir+"user_features_data_en.csv",mode="w+")
 783 |             result_df.to_csv(out_path+"user_features_data_en_1_interval_"+str(interval)+".csv",mode="w+",header=True,index=False)
 784 | 
 785 |             valiable_user_data.close()
 786 | 
 787 |     @staticmethod
 788 |     def discretization_12(interval_list):
 789 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 790 |         out_path = "G:/新建文件夹/Geolife Trajectories 1.3/gps_en_discrezation/features_12_95_30/"
 791 |         feature_num = 12
 792 |         valiable_user_data = open("./data/have_label_user.txt", "r")
 793 |         user_list = valiable_user_data.readlines()
 794 |         # col_name = ["speed_sec", "acc_sec", "std_speed", "avg_speed", "mean_acc", "max_or_min", "label"]
 795 |         # 所有数据
 796 | 
 797 |         # status = open(datadir+"status.csv","w+")
 798 |         for interval in interval_list:
 799 |             print("处理%d" % (interval))
 800 |             users_df = pd.DataFrame()
 801 |             for user in user_list:
 802 |                 user_id = user[0:3]
 803 |                 # user_features_max_min_name = datadir + user_id + "/user_features_max_min.csv"
 804 |                 # user_features_max_min_file = open(user_features_max_min_name,"r")
 805 |                 # # 原始数据
 806 |                 # raw_data_df = pd.DataFrame(pd.read_csv(user_features_max_min_file))
 807 |                 # max_min_df = max_min_df.append(raw_data_df)
 808 |                 #
 809 |                 # user_features_max_min_file.close()
 810 |                 user_feature_file_name = datadir + user_id + "/user_features_interval_" + str(interval)+ ".csv"
 811 |                 user_feature_file = open(user_feature_file_name, "r")
 812 |                 raw_data_df = pd.DataFrame(pd.read_csv(user_feature_file))
 813 |                 users_df = users_df.append(raw_data_df)
 814 | 
 815 |             users_df.reset_index(drop=True)
 816 |             # print("离散化")
 817 |             #
 818 |             # file = open(out_path+"status"+str(interval)+".txt",mode="w+")
 819 |             # file.write("interval_%d \n"%(interval))
 820 |             # for i in [0,0.95,0.96,0.97,0.98,0.99]:
 821 |             #     file.write("%s %f  %f\n" % (param.SPEED_SEC,i,users_df[param.SPEED_SEC].quantile(i)))
 822 |             #     file.write("%s %f  %f\n" % (param.AVG_SPEED,i,users_df[param.AVG_SPEED].quantile(i)))
 823 |             #     file.write("%s %f  %f\n" % (param.STD_SPEED,i,users_df[param.STD_SPEED].quantile(i)))
 824 |             #     file.write("%s %f  %f\n" % (param.ACC_SEC,i,users_df[param.ACC_SEC].quantile(i)))
 825 |             #     file.write("%s %f  %f\n" % (param.MEAN_ACC,i,users_df[param.MEAN_ACC].quantile(i)))
 826 |             #     file.write("%s %f  %f\n" % (param.STD_ACC,i,users_df[param.STD_ACC].quantile(i)))
 827 |             #     file.write("\n")
 828 |             #
 829 |             # file.close()
 830 |             speed_sec = pd.DataFrame(Data.equal_width(users_df["speed_sec"], WIDTH))
 831 |             acc_sec = pd.DataFrame(Data.equal_width(users_df["acc_sec"], WIDTH))
 832 |             avg_speed = pd.DataFrame(Data.equal_width(users_df["avg_speed"], WIDTH))
 833 |             std_speed = pd.DataFrame(Data.equal_width(users_df["std_speed"], WIDTH))
 834 |             mean_acc = pd.DataFrame(Data.equal_width(users_df["mean_acc"], WIDTH))
 835 |             std_acc = pd.DataFrame(Data.equal_width(users_df["std_acc"], WIDTH))
 836 |             head = pd.DataFrame(Data.equal_width(users_df["head"], WIDTH))
 837 |             head_mean = pd.DataFrame(Data.equal_width(users_df["head_mean"], WIDTH))
 838 |             std_head = pd.DataFrame(Data.equal_width(users_df["std_head"], WIDTH))
 839 |             max_speed = pd.DataFrame(Data.equal_width(users_df["max_speed"], WIDTH))
 840 |             max_acc = pd.DataFrame(Data.equal_width(users_df["max_acc"], WIDTH))
 841 |             max_head = pd.DataFrame(Data.equal_width(users_df["max_head"], WIDTH))
 842 | 
 843 |             print("连接矩阵")
 844 |             # features_en = np.concatenate((speed_sec,avg_speed,std_speed,acc_sec,mean_acc,std_acc),axis=1)
 845 |             result_df = pd.concat(
 846 |                 [speed_sec, avg_speed, std_speed, acc_sec, mean_acc, std_acc, head, head_mean, std_head,max_speed,max_acc,max_head], axis=1)
 847 | 
 848 |             # result_df = pd.DataFrame(features_en)
 849 |             result_df["label"] = users_df["label"].values
 850 |             result_df["seg_label"] = users_df["seg_label"].values
 851 |             # col_name = result_df.columns.tolist()
 852 |             # col_name.insert(col_name.index(0),"user_id")
 853 |             # result_df.reindex(columns=col_name)
 854 |             result_df["user_id"] = users_df["user_id"].values
 855 |             # result_df    columns =[userid(1),speed_sec(width),avg_speed(width),std_speed(width),acc_sec(width),mean_acc(width),label(1),seg_label(1)]
 856 | 
 857 |             # result_file = open(datadir+"user_features_data_en.csv",mode="w+")
 858 |             result_df.to_csv(out_path + "user_features_data_en_1_interval_" + str(interval) + ".csv", mode="w+",
 859 |                              header=True, index=False)
 860 | 
 861 |             valiable_user_data.close()
 862 | 
 863 |     #盒状过滤
 864 |     @staticmethod
 865 |     def filter_box_quantile(x,k):
 866 |         print(x.name)
 867 |         #不同的特征不同过滤
 868 |         min = 0
 869 |         max = 0
 870 |         if x.name == param.SPEED_SEC or x.name == param.AVG_SPEED \
 871 |                 or x.name == param.STD_SPEED or x.name == param.MEAN_ACC  or x.name == param.STD_ACC\
 872 |                 or x.name == param.HEAD_MEAN or x.name == param.STD_HEAD:
 873 |             min = x.quantile(0)
 874 |             max = x.quantile(FENWEI_MAX)
 875 |         elif x.name == param.ACC_SEC or x.name == param.HEAD:
 876 |             min = x.quantile(0.01)
 877 |             max = x.quantile(FENWEI_MAX)
 878 |         n = len(x.index)
 879 |         y = np.array(x.values)
 880 | 
 881 |         for i in range(k+1,n-k):
 882 | 
 883 |             if y[i] >min and y[i] <max:
 884 |                 continue
 885 |             y[i] = np.median(y[i-k:i+k])
 886 | 
 887 |             if y[i] > max:
 888 |                 y[i] = max
 889 |             if y[i] < min:
 890 |                 y[i] = min
 891 |         series_y = pd.Series(data=y)
 892 | 
 893 |         return series_y
 894 | 
 895 |     #等宽离散
 896 |     @staticmethod
 897 |     def equal_width(x,width):
 898 |         x = Data.filter_box_quantile(x,10)
 899 | 
 900 |         min = x.min()
 901 |         max = x.max()
 902 |         interval = (max - min + 0.001)/width
 903 |         x_arr = np.array(x.values)
 904 |         x_arr = (x_arr - min) / interval
 905 |         x_arr = np.floor(x_arr).astype(np.int64)
 906 |         x_result = np.zeros(shape=[len(x_arr),width],dtype=np.int32)
 907 |         for i in  range(len(x_arr)):
 908 |             x_result[i][x_arr[i]] = 1
 909 | 
 910 |         return x_result
 911 | 
 912 |     #制作npy文件
 913 |     @staticmethod
 914 |     def create_npy(interval):
 915 |         datadir = "G:/新建文件夹/Geolife Trajectories 1.3/Data/"
 916 |         self_data_dir = "./data/transportation_feature_en_1_interval_2/"
 917 |         user_data_file_name = datadir + "user_features_data_en_1_interval_"+str(interval)+".csv"
 918 |         user_data_file = open(user_data_file_name, "r")
 919 |         user_data_df = pd.DataFrame(pd.read_csv(user_data_file))
 920 |         classes = 4
 921 |         #0-99 特征one-hot编码后数据 100 label 101 seg_label 102 user_id
 922 |         user_data_label_groups = user_data_df.groupby(by="label")
 923 | 
 924 |         for name,group in user_data_label_groups:
 925 |             #if int(name) < 7:
 926 |             #    continue
 927 |             print("处理label  ",name)
 928 |             mode_file_name = self_data_dir + "transportation_mode" + str(name) +".npy"
 929 |             features_arr = np.array(group.iloc[:,0:100])
 930 |             seg_label_arr = np.array(group.iloc[:,-2])
 931 |             seg_label_unique,seg_label_index,seg_label_count = np.unique(seg_label_arr,return_index=True,return_counts=True)
 932 |             index_file_name = self_data_dir + "transportation_mode_" + str(name) +"_seg_index.csv"
 933 |             index_df = pd.DataFrame()
 934 |             index_df["seg_label_unique"] = seg_label_unique
 935 |             index_df["seg_label_index"] = seg_label_index.astype(np.int32)
 936 |             index_df["seg_label_count"] = seg_label_count.astype(np.int32)
 937 |             index_df = index_df.sort_values(by="seg_label_index")
 938 | 
 939 |             index_df.to_csv(index_file_name,mode="w+",index=False)
 940 |             del index_df
 941 |             del seg_label_arr
 942 |             np.save(mode_file_name,features_arr)
 943 | 
 944 | 
 945 | 
 946 |         user_data_file.close()
 947 | 
 948 |         #user_data_df_classes_4 = user_data_df[user_data_df["label"]<4]
 949 |         #data_classes_4_groups = user_data_df_classes_4.groupby(by="label")
 950 |         #for name,group in data_classes_4_groups:
 951 | 
 952 |     #切割序列为指定长度
 953 |     @staticmethod
 954 |     def slice_seq(x,index,exp_seq_len):
 955 |         #index 第一维是索引，第二维是长度
 956 | 
 957 |         #特征长度
 958 |         features_len = x.shape[1]
 959 |         #每一段可以切出的序列个数
 960 |         seq_num_list = np.array([math.ceil(i) for i in (index[1]/exp_seq_len)])
 961 |         #总序列个数
 962 |         num_total_seq = int(sum(seq_num_list))
 963 |         #结果矩阵
 964 |         new_data = np.zeros(shape=[num_total_seq,exp_seq_len,features_len],dtype=np.float64)
 965 |         #new_label = np.zeros(shape=[num_total_seq,exp_seq_len])
 966 |         new_index = np.zeros(shape=[2,num_total_seq],dtype=np.int64)
 967 | 
 968 |         count = 0
 969 |         for i in range(len(seq_num_list)):
 970 |             #该段轨迹的长度
 971 |             seg_len = index[1][i]
 972 |             #索引开始
 973 |             seg_start = index[0][i]
 974 |             seg_end = seg_start + seg_len
 975 |             #二维数组
 976 |             seg_data = x[seg_start:seg_end]
 977 | 
 978 |             num_full_seq = seg_len // exp_seq_len
 979 |             if num_full_seq:
 980 |                 full_seq = seg_data[0:num_full_seq * exp_seq_len].reshape((num_full_seq, exp_seq_len, features_len))
 981 |                 new_data[count:(count + num_full_seq)] = full_seq
 982 |                 #new_label[count:(count + num_full_seq)] = full_lab
 983 |                 new_index[0][count:(count + num_full_seq)] = i
 984 |                 new_index[1][count:(count + num_full_seq)] = exp_seq_len
 985 |                 count += num_full_seq
 986 |             #如果序列没有对齐
 987 |             if num_full_seq <seq_num_list[i]:
 988 |                 remain_seq = np.zeros((exp_seq_len, features_len))
 989 |                 remain_seq[0:(seg_len - num_full_seq * exp_seq_len)] = seg_data[num_full_seq * exp_seq_len:seg_len]
 990 |                 new_data[count] = remain_seq
 991 |                 #new_label[count] = remain_lab
 992 |                 new_index[0][count] = i
 993 |                 new_index[1][count] = seg_len - num_full_seq * exp_seq_len
 994 |                 count += 1
 995 |         return (new_data,new_index)
 996 | 
 997 |     #扩展数据  将原始seq打乱成新数据
 998 |     @staticmethod
 999 |     def expand_data_npy(classes,len_features):
1000 |         data_path  = "./data/transportation_feature_en_1_interval_1&2/"
1001 |         out_data_path = "./data/transportation_feature_en_1_interval_1&2_expand_all/"
1002 |         data_file_name_exp = data_path + "transportation_mode"
1003 |         for i in range(classes):
1004 |             print("处理" + str(i))
1005 |             # data_file  = data_file_name +str(i) +".npy"
1006 |             index_df = pd.DataFrame(pd.read_csv(data_file_name_exp + "_" + str(i) + "_seg_index.csv"))
1007 |             features_arr = np.load(data_file_name_exp + str(i) + ".npy")
1008 |             features_arr = features_arr[:, 0:len_features]
1009 |             index_arr = np.array(index_df.iloc[:, [1, 2]].T)
1010 |             # index shape = [2,总个数]
1011 |             # 第一维是第几段轨迹 第二维是在固定长度为exp_seq_len中的实际长度
1012 |             # data shape =[seq_nums,exp_seq_len,feature_len]
1013 |             features_arr_shuffle = np.zeros(shape=features_arr.shape,dtype=features_arr.dtype)
1014 | 
1015 |             start = 0
1016 |             end = 0
1017 |             for k in range(index_arr.shape[1] -1):
1018 |                 perm = np.random.permutation(range(index_arr[0][k],index_arr[0][k+1]))
1019 |                 end = start + index_arr[1][k]
1020 |                 features_arr_shuffle[start:end,:] = features_arr[perm,:]
1021 |                 start = end
1022 |             #连接新的矩阵
1023 |             features_arr_all = np.concatenate((features_arr,features_arr_shuffle),axis=0)
1024 |             #构造新的index
1025 |             index_df_expand = index_df.copy()
1026 |             index_df_expand["seg_label_index"] = index_df["seg_label_index"] + features_arr.shape[0]
1027 |             index_df_all = index_df.append(index_df_expand)
1028 | 
1029 |             features_arr_file_name = out_data_path + "transportation_mode" + str(i) +".npy"
1030 |             index_arr_file_name = out_data_path + "transportation_mode_" + str(i) + "_seg_index.csv"
1031 |             np.save(features_arr_file_name,features_arr_all)
1032 |             index_df_all.to_csv(index_arr_file_name,index = False)
1033 | 
1034 |     #制作所有数据的npz文件  包括原始数据与混淆数据
1035 |     @staticmethod
1036 |     def create_all_data_npy(classes,len_features):
1037 |         conf = config.Config("data/config.json")
1038 |         log_path = "./logdir/transportation_feature_en_1_expand/"
1039 |         data_path = "./data/transportation_feature_en_1_expand/"
1040 |         # 分训练集与测试集 验证集 8：1：1
1041 |         train_data_all = None
1042 |         train_label_all = None
1043 |         train_early_all = None
1044 |         valid_data_all = None
1045 |         valid_label_all = None
1046 |         valid_early_all = None
1047 |         test_data_all = None
1048 |         test_label_all = None
1049 |         test_early_all = None
1050 |         features_arr_list = []
1051 |         index_arr_list = []
1052 |         label_arr_list = []
1053 |         data_file_name_exp = data_path + "transportation_mode"
1054 |         for i in range(classes):
1055 |             print("加载" + str(i))
1056 |             # data_file  = data_file_name +str(i) +".npy"
1057 |             index_df = pd.DataFrame(pd.read_csv(data_file_name_exp + "_" + str(i) + "_seg_index.csv"))
1058 |             features_arr = np.load(data_file_name_exp + str(i) + ".npy")
1059 |             features_arr = features_arr[:, 0:len_features]
1060 |             index_arr = np.array(index_df.iloc[:, [1, 2]].T)
1061 |             # index shape = [2,总个数]
1062 |             # 第一维是第几段轨迹 第二维是在固定长度为exp_seq_len中的实际长度
1063 |             # data shape =[seq_nums,exp_seq_len,feature_len]   切出相等的数据长度 不足的padding
1064 |             (data, index_arr) = Data.slice_seq(features_arr, index_arr, conf.exp_seq_len)
1065 |             # 切割后删除features_arr index
1066 |             del features_arr
1067 |             del index_df
1068 |             label_arr = np.zeros(shape=[index_arr.shape[1]], dtype=np.int32)
1069 |             label_arr[:] = i
1070 |             # features_arr_list.append(data)
1071 |             # index_arr_list.append(index)
1072 |             # label_arr_list.append(label)
1073 |             # 划分训练集，验证集，测试集
1074 |             print("划分训练集，验证集，测试集   " + str(i))
1075 |             seq_nums = index_arr.shape[1]
1076 |             # 控制变量
1077 |             np.random.seed(2)
1078 |             index_perm = np.random.permutation(range(seq_nums))
1079 |             train_count = int(np.floor(seq_nums * 0.8))
1080 |             valid_count = int(np.floor(seq_nums * 0.9))
1081 |             test_count = seq_nums
1082 |             train_index = index_perm[0:train_count]
1083 |             valid_index = index_perm[train_count + 1:valid_count]
1084 |             test_index = index_perm[valid_count + 1:seq_nums]
1085 | 
1086 |             # train_set valid_set test_set
1087 |             train_data = data[train_index, :, :]
1088 |             train_label = label_arr[train_index]
1089 |             train_early = index_arr[1, train_index]
1090 | 
1091 |             valid_data = data[valid_index, :, :]
1092 |             valid_label = label_arr[valid_index]
1093 |             valid_early = index_arr[1, valid_index]
1094 | 
1095 |             test_data = data[test_index, :, :]
1096 |             test_label = label_arr[test_index]
1097 |             test_early = index_arr[1, test_index]
1098 | 
1099 |             # 删除读取到的data.
1100 |             del data
1101 |             del label_arr
1102 |             del index_arr
1103 | 
1104 |             if train_data_all is None:
1105 |                 train_data_all = train_data
1106 |                 train_label_all = train_label
1107 |                 train_early_all = train_early
1108 | 
1109 |                 valid_data_all = valid_data
1110 |                 valid_label_all = valid_label
1111 |                 valid_early_all = valid_early
1112 | 
1113 |                 test_data_all = test_data
1114 |                 test_label_all = test_label
1115 |                 test_early_all = test_early
1116 |             else:
1117 |                 train_data_all = np.concatenate((train_data_all, train_data), axis=0)
1118 |                 train_label_all = np.concatenate((train_label_all, train_label), axis=0)
1119 |                 train_early_all = np.concatenate((train_early_all, train_early), axis=0)
1120 | 
1121 |                 valid_data_all = np.concatenate((valid_data_all, valid_data), axis=0)
1122 |                 valid_label_all = np.concatenate((valid_label_all, valid_label), axis=0)
1123 |                 valid_early_all = np.concatenate((valid_early_all, valid_early), axis=0)
1124 | 
1125 |                 test_data_all = np.concatenate((test_data_all, test_data), axis=0)
1126 |                 test_label_all = np.concatenate((test_label_all, test_label), axis=0)
1127 |                 test_early_all = np.concatenate((test_early_all, test_early), axis=0)
1128 |         # 打乱数据
1129 |         np.random.seed(1)
1130 |         train_perm = np.random.permutation(range(train_early_all.shape[0]))
1131 |         np.random.seed(1)
1132 |         valid_perm = np.random.permutation(range(valid_early_all.shape[0]))
1133 |         np.random.seed(1)
1134 |         test_perm = np.random.permutation(range(test_early_all.shape[0]))
1135 | 
1136 |         # shape=[序列长度，总个数，特征长度]   TimeMajor
1137 |         train_data_all = np.transpose(train_data_all, [1, 0, 2])
1138 |         valid_data_all = np.transpose(valid_data_all, [1, 0, 2])
1139 |         test_data_all = np.transpose(test_data_all, [1, 0, 2])
1140 | 
1141 |         train_data_all = train_data_all[:, train_perm, :]
1142 |         train_label_all = train_label_all[train_perm]
1143 |         train_early_all = train_early_all[train_perm]
1144 | 
1145 |         valid_data_all = valid_data_all[:, valid_perm, :]
1146 |         valid_label_all = valid_label_all[valid_perm]
1147 |         valid_early_all = valid_early_all[valid_perm]
1148 | 
1149 |         test_data_all = test_data_all[:, test_perm, :]
1150 |         test_label_all = test_label_all[test_perm]
1151 |         test_early_all = test_early_all[test_perm]
1152 | 
1153 |         train_data_file = data_path + "train_data_set.npz"
1154 |         np.savez(train_data_file,train_data = train_data_all,train_label = train_label_all,train_early = train_early_all)
1155 |         del train_data_all
1156 |         del train_label_all
1157 |         del train_early_all
1158 | 
1159 |         valid_data_file = data_path + "valid_data_set.npz"
1160 |         np.savez(valid_data_file, valid_data=valid_data_all, valid_label=valid_label_all, valid_early=valid_early_all)
1161 |         del valid_data_all
1162 |         del valid_label_all
1163 |         del valid_early_all
1164 |         test_data_file = data_path + "test_data_set.npz"
1165 |         np.savez(test_data_file, test_data=test_data_all, test_label=test_label_all, test_early=test_early_all)
1166 | 
1167 |     #连接数据
1168 |     @staticmethod
1169 |     def concat_data(classes,len_features):
1170 |         data_path1 = "./data/transportation_feature_en_1/"
1171 |         data_path2 = "./data/transportation_feature_en_1_interval_2/"
1172 |         out_data_path = "./data/transportation_feature_en_1_interval_1&2/"
1173 |         data_file_name_exp1 = data_path1 + "transportation_mode"
1174 |         data_file_name_exp2 = data_path2 + "transportation_mode"
1175 |         for i in range(1,4):
1176 |             index_df1 = pd.DataFrame(pd.read_csv(data_file_name_exp1 + "_" + str(i) + "_seg_index.csv"))
1177 |             features_arr1 = np.load(data_file_name_exp1 + str(i) + ".npy")
1178 |             features_arr1 = features_arr1[:, 0:len_features]
1179 |             #index_arr1 = np.array(index_df1.iloc[:, [1, 2]].T)
1180 | 
1181 |             index_df2 = pd.DataFrame(pd.read_csv(data_file_name_exp2 + "_" + str(i) + "_seg_index.csv"))
1182 |             features_arr2 = np.load(data_file_name_exp2 + str(i) + ".npy")
1183 |             features_arr2 = features_arr2[:, 0:len_features]
1184 |             #index_arr2 = np.array(index_df2.iloc[:, [1, 2]].T)
1185 | 
1186 |             index_df2["seg_label_index"] = index_df2["seg_label_index"] + features_arr1.shape[0]
1187 |             index_df_all = index_df1.append(index_df2)
1188 |             index_file_name = out_data_path + "transportation_mode_" + str(i) +"_seg_index.csv"
1189 |             index_df_all.to_csv(index_file_name, mode="w+", index=False)
1190 | 
1191 |             features_arr_all = np.concatenate((features_arr1,features_arr2),axis=0)
1192 |             del features_arr1
1193 |             del features_arr2
1194 |             mode_file_name = out_data_path + "transportation_mode"+str(i)+".npy"
1195 |             np.save(mode_file_name,features_arr_all)
1196 | 
1197 |     @staticmethod
1198 |     def _int64_feature(value):
1199 |         return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
1200 | 
1201 |     @staticmethod
1202 |     def _bytes_feature(value):
1203 |         return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
1204 | 
1205 |     @staticmethod
1206 |     def _float_feature(value):
1207 |         return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
1208 | 
1209 |     #制作tfrecord 以段分类
1210 |     @staticmethod
1211 |     def make_tfrecord(interval_list):
1212 |         data_dir = "G:/新建文件夹/Geolife Trajectories 1.3/gps_en_discrezation/"
1213 |         for interval in interval_list:
1214 |             print("处理"+str(interval))
1215 |             # train_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_train.tfrecords")
1216 |             # valid_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_valid.tfrecords")
1217 |             # test_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_test.tfrecords")
1218 |             data_file_name = data_dir + "user_features_data_en_1_interval_" + str(interval) + ".csv"
1219 |             data_file = open(data_file_name,mode="r")
1220 |             data_df = pd.DataFrame(pd.read_csv(data_file))
1221 | 
1222 |             data_label_groups = data_df.groupby(by="label")
1223 |             k = 0
1224 |             for label_name,label_group in data_label_groups:
1225 | 
1226 |                 # if k < 6:
1227 |                 #     k+=1
1228 |                 #     continue
1229 |                 # if k > 5:
1230 |                 #     break
1231 |                 file_group_count = 0
1232 | 
1233 |                 print("处理label"+str(label_name))
1234 |                 train_writer = tf.python_io.TFRecordWriter(
1235 |                     "G:/all_data/tfrecords/interval_" + str(interval)+"_label_"+str(label_name) + "_train_0.tfrecords")
1236 |                 valid_writer = tf.python_io.TFRecordWriter(
1237 |                     "G:/all_data/tfrecords/interval_" + str(interval) +"_label_"+str(label_name)+ "_valid_0.tfrecords")
1238 |                 test_writer = tf.python_io.TFRecordWriter(
1239 |                     "G:/all_data/tfrecords/interval_" + str(interval)+"_label_"+str(label_name) + "_test_0.tfrecords")
1240 |                 seg_groups = label_group.groupby(by="seg_label")
1241 |                 count = 0
1242 |                 for seg_name,seg_group in seg_groups:
1243 |                     #seg_group 存放每段的轨迹点的特征，每个特征长30
1244 |                     speed_sec = np.array(seg_group.iloc[:,0:1*WIDTH])
1245 |                     avg_speed = np.array(seg_group.iloc[:1*WIDTH:2*WIDTH])
1246 |                     std_speed = np.array(seg_group.iloc[:,2*WIDTH:3*WIDTH])
1247 |                     acc_sec = np.array(seg_group.iloc[:, 3 * WIDTH:4 * WIDTH])
1248 |                     mean_acc = np.array(seg_group.iloc[:, 4 * WIDTH:5 * WIDTH])
1249 |                     std_acc = np.array(seg_group.iloc[:, 5 * WIDTH:6 * WIDTH])
1250 |                     feature = {
1251 |                         FeatureName.SPEED_SEC.value : Data._bytes_feature(speed_sec.tobytes()),
1252 |                         FeatureName.AVG_SPEED.value : Data._bytes_feature(avg_speed.tobytes()),
1253 |                         FeatureName.STD_SPEED.value : Data._bytes_feature(std_speed.tobytes()),
1254 |                         FeatureName.ACC_SEC.value   : Data._bytes_feature(acc_sec.tobytes()),
1255 |                         FeatureName.MEAN_ACC.value  : Data._bytes_feature(mean_acc.tobytes()),
1256 |                         FeatureName.STD_ACC.value   : Data._bytes_feature(std_acc.tobytes()),
1257 |                         "label":Data._int64_feature(label_name)
1258 |                     }
1259 |                     example = tf.train.Example(features = tf.train.Features(feature = feature))
1260 | 
1261 |                     if count % 1000 == 0 and count > 0:
1262 |                         train_writer.close()
1263 |                         valid_writer.close()
1264 |                         test_writer.close()
1265 |                         sys.stdout.flush()
1266 |                         file_group_count += 1
1267 | 
1268 |                         train_writer = tf.python_io.TFRecordWriter(
1269 |                             "G:/all_data/tfrecords/interval_" + str(interval) + "_label_" + str(
1270 |                                 label_name) + "_train_" + str(file_group_count)+ ".tfrecords")
1271 |                         valid_writer = tf.python_io.TFRecordWriter(
1272 |                             "G:/all_data/tfrecords/interval_" + str(interval) + "_label_" + str(
1273 |                                 label_name) + "_valid_" + str(file_group_count)+ ".tfrecords")
1274 |                         test_writer = tf.python_io.TFRecordWriter(
1275 |                             "G:/all_data/tfrecords/interval_" + str(interval) + "_label_" + str(
1276 |                                 label_name) + "_test_" + str(file_group_count)+ ".tfrecords")
1277 | 
1278 | 
1279 |                     t = count % 10
1280 |                     if t >=0 and t <8 :
1281 |                         train_writer.write(example.SerializeToString())
1282 |                     elif t == 8:
1283 |                         valid_writer.write(example.SerializeToString())
1284 |                     else:
1285 |                         test_writer.write(example.SerializeToString())
1286 | 
1287 |                     count += 1
1288 |                 k+=1
1289 |                 train_writer.close()
1290 |                 valid_writer.close()
1291 |                 test_writer.close()
1292 |                 sys.stdout.flush()
1293 | 
1294 |     #补零 规定长度 未分开
1295 |     @staticmethod
1296 |     def pad_seqs(x,exp_seq_len):
1297 |         seq_nums = int(np.ceil(x.shape[0]/exp_seq_len))
1298 |         seq_len = x.shape[0]
1299 |         early = np.zeros([seq_nums],dtype=np.int64)
1300 | 
1301 |         remain_len = seq_len % exp_seq_len
1302 |         if remain_len != 0:
1303 |             x_pad = np.pad(x,[[0,exp_seq_len-remain_len],[0,0]],"constant",constant_values=0)
1304 |             early[0:seq_nums-1] = exp_seq_len
1305 |             early[-1] = remain_len
1306 |             return x_pad,early
1307 |         else:
1308 |             early[:] = exp_seq_len
1309 |             return x,early
1310 | 
1311 |     #未完成
1312 |     @staticmethod
1313 |     def pad_slice_seqs(x,exp_seq_len):
1314 |         #未完成
1315 |         seq_nums = int(np.ceil(x.shape[0] / exp_seq_len))
1316 |         seq_len = x.shape[0]
1317 |         early = np.zeros([seq_nums], dtype=np.int32)
1318 |         remain_len = seq_len % exp_seq_len
1319 |         full_seq_nums = seq_len // exp_seq_len
1320 |         pass
1321 | 
1322 |     #制作规定长度的tfrecord
1323 |     @staticmethod
1324 |     def make_tfrecord_seq(interval_list,exp_seq_len):
1325 |         data_dir = "G:/新建文件夹/Geolife Trajectories 1.3/gps_en_discrezation/"
1326 |         out_path = "G:/all_data/tfrecords/"
1327 |         for interval in interval_list:
1328 |             print("处理" + str(interval))
1329 |             # train_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_train.tfrecords")
1330 |             # valid_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_valid.tfrecords")
1331 |             # test_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_test.tfrecords")
1332 |             data_file_name = data_dir + "user_features_data_en_1_interval_" + str(interval) + ".csv"
1333 |             data_file = open(data_file_name, mode="r")
1334 |             data_df = pd.DataFrame(pd.read_csv(data_file))
1335 | 
1336 |             data_label_groups = data_df.groupby(by="label")
1337 |             k = 0
1338 |             for label_name, label_group in data_label_groups:
1339 | 
1340 |                 # if k < 7:
1341 |                 #     k+=1
1342 |                 #     continue
1343 |                 if k > 3:
1344 |                     return
1345 |                 file_group_count = 0
1346 |                 print("处理label" + str(label_name))
1347 |                 train_writer = tf.python_io.TFRecordWriter(
1348 |                     out_path + "interval_"+str(interval) + "_label_" + str(
1349 |                         label_name) + "_train_0.tfrecords")
1350 |                 valid_writer = tf.python_io.TFRecordWriter(
1351 |                     out_path + "interval_"+ str(interval) + "_label_" + str(
1352 |                         label_name) + "_valid_0.tfrecords")
1353 |                 test_writer = tf.python_io.TFRecordWriter(
1354 |                     out_path + "interval_"+ str(interval) + "_label_" + str(label_name) + "_test_0.tfrecords")
1355 |                 seg_groups = label_group.groupby(by="seg_label")
1356 |                 count = 0
1357 |                 for seg_name, seg_group in seg_groups:
1358 |                     # seg_group 存放每段的轨迹点的特征，每个特征长30
1359 |                     speed_sec = np.array(seg_group.iloc[:, 0 : 1 * WIDTH],dtype=np.int64)
1360 |                     avg_speed = np.array(seg_group.iloc[:, 1* WIDTH : 2 * WIDTH],dtype=np.int64)
1361 |                     std_speed = np.array(seg_group.iloc[:, 2* WIDTH : 3 * WIDTH],dtype=np.int64)
1362 |                     acc_sec = np.array(seg_group.iloc[:, 3* WIDTH : 4 * WIDTH],dtype=np.int64)
1363 |                     mean_acc = np.array(seg_group.iloc[:, 4* WIDTH : 5 * WIDTH],dtype=np.int64)
1364 |                     std_acc = np.array(seg_group.iloc[:, 5* WIDTH : 6 * WIDTH],dtype=np.int64)
1365 | 
1366 |                     speed_sec_pad,speed_sec_early = Data.pad_seqs(speed_sec,exp_seq_len)
1367 |                     avg_speed_pad,avg_speed_early = Data.pad_seqs(avg_speed,exp_seq_len)
1368 |                     std_speed_pad,std_speed_early = Data.pad_seqs(std_speed,exp_seq_len)
1369 |                     acc_sec_pad,acc_sec_early = Data.pad_seqs(acc_sec,exp_seq_len)
1370 |                     mean_acc_pad,mean_acc_early = Data.pad_seqs(mean_acc,exp_seq_len)
1371 |                     std_acc_pad,std_acc_early = Data.pad_seqs(std_acc,exp_seq_len)
1372 | 
1373 |                     label = np.zeros(speed_sec_early.shape,np.int64)
1374 |                     label[:] = int(label_name)
1375 | 
1376 |                     for i in range(len(speed_sec_early)):
1377 |                         start = i*exp_seq_len
1378 |                         end = (i+1)*exp_seq_len
1379 | 
1380 |                         feature = {
1381 |                             param.SPEED_SEC: Data._bytes_feature(speed_sec_pad[start:end].tobytes()),
1382 |                             param.AVG_SPEED: Data._bytes_feature(avg_speed_pad[start:end].tobytes()),
1383 |                             param.STD_SPEED: Data._bytes_feature(std_speed_pad[start:end].tobytes()),
1384 |                             param.ACC_SEC: Data._bytes_feature(acc_sec_pad[start:end].tobytes()),
1385 |                             param.MEAN_ACC: Data._bytes_feature(mean_acc_pad[start:end].tobytes()),
1386 |                             param.STD_ACC: Data._bytes_feature(std_acc_pad[start:end].tobytes()),
1387 |                             param.EARLY:Data._int64_feature(speed_sec_early[i]),
1388 |                             param.LABEL: Data._int64_feature(label[i])
1389 |                         }
1390 |                         example = tf.train.Example(features=tf.train.Features(feature=feature))
1391 | 
1392 |                         if count % 1000 == 0 and count > 0:
1393 |                             print("1000")
1394 |                             train_writer.close()
1395 |                             valid_writer.close()
1396 |                             test_writer.close()
1397 |                             #sys.stdout.flush()
1398 |                             file_group_count += 1
1399 | 
1400 |                             train_writer = tf.python_io.TFRecordWriter(
1401 |                                 out_path + "interval_" + str(interval) + "_label_" + str(
1402 |                                     label_name) + "_train_" + str(file_group_count) + ".tfrecords")
1403 |                             valid_writer = tf.python_io.TFRecordWriter(
1404 |                                 out_path + "interval_" + str(interval) + "_label_" + str(
1405 |                                     label_name) + "_valid_" + str(file_group_count) + ".tfrecords")
1406 |                             test_writer = tf.python_io.TFRecordWriter(
1407 |                                 out_path + "interval_" + str(interval) + "_label_" + str(
1408 |                                     label_name) + "_test_" + str(file_group_count) + ".tfrecords")
1409 | 
1410 |                         t = count % 10
1411 |                         if t >= 0 and t < 8:
1412 |                             train_writer.write(example.SerializeToString())
1413 |                         elif t == 8:
1414 |                             valid_writer.write(example.SerializeToString())
1415 |                         else:
1416 |                             test_writer.write(example.SerializeToString())
1417 | 
1418 |                         count += 1
1419 | 
1420 |                 print(count)
1421 |                 k += 1
1422 |                 train_writer.close()
1423 |                 valid_writer.close()
1424 |                 test_writer.close()
1425 |                 sys.stdout.flush()
1426 | 
1427 |     #制作规定长度的tfrecord
1428 |     @staticmethod
1429 |     def make_tfrecord_seq_shuffle(interval_list,exp_seq_len,dirname):
1430 |         data_dir = "G:/新建文件夹/Geolife Trajectories 1.3/gps_en_discrezation/features_12_95_30/"
1431 |         out_path = "G:/all_data/"+dirname
1432 |         for interval in interval_list:
1433 |             print("处理" + str(interval))
1434 |             # train_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_train.tfrecords")
1435 |             # valid_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_valid.tfrecords")
1436 |             # test_writer = tf.python_io.TFRecordWriter("G:/all_data/tfrecords/interval_"+str(interval)+"_test.tfrecords")
1437 |             data_file_name = data_dir + "user_features_data_en_1_interval_" + str(interval) + ".csv"
1438 |             data_file = open(data_file_name, mode="r")
1439 |             data_df = pd.DataFrame(pd.read_csv(data_file))
1440 | 
1441 |             data_label_groups = data_df.groupby(by="label")
1442 |             k = 0
1443 |             #for label_name, label_group in data_label_groups:
1444 | 
1445 |                 # if k < 7:
1446 |                 #     k+=1
1447 |                 #     continue
1448 |                 # if k > 3:
1449 |                 #     return
1450 |             file_group_count = 0
1451 |             train_writer = tf.python_io.TFRecordWriter(
1452 |                 out_path + "interval_"+str(interval) + "_train_0.tfrecords")
1453 |             valid_writer = tf.python_io.TFRecordWriter(
1454 |                 out_path + "interval_"+ str(interval)  + "_valid_0.tfrecords")
1455 |             test_writer = tf.python_io.TFRecordWriter(
1456 |                 out_path + "interval_"+ str(interval) + "_test_0.tfrecords")
1457 |             seg_groups = data_df.groupby(by="seg_label")
1458 |             count = 0
1459 |             for seg_name, seg_group in seg_groups:
1460 |                 if int(seg_group.iloc[0,-3]) > 3:
1461 |                     continue
1462 | 
1463 |                 # seg_group 存放每段的轨迹点的特征，每个特征长30
1464 |                 speed_sec = np.array(seg_group.iloc[:, 0 : 1 * WIDTH],dtype=np.int64)
1465 |                 avg_speed = np.array(seg_group.iloc[:, 1* WIDTH : 2 * WIDTH],dtype=np.int64)
1466 |                 std_speed = np.array(seg_group.iloc[:, 2* WIDTH : 3 * WIDTH],dtype=np.int64)
1467 |                 acc_sec = np.array(seg_group.iloc[:, 3* WIDTH : 4 * WIDTH],dtype=np.int64)
1468 |                 mean_acc = np.array(seg_group.iloc[:, 4* WIDTH : 5 * WIDTH],dtype=np.int64)
1469 |                 std_acc = np.array(seg_group.iloc[:, 5* WIDTH : 6 * WIDTH],dtype=np.int64)
1470 |                 head = np.array(seg_group.iloc[:, 6* WIDTH : 7 * WIDTH],dtype=np.int64)
1471 |                 head_mean = np.array(seg_group.iloc[:, 7* WIDTH : 8 * WIDTH],dtype=np.int64)
1472 |                 std_head = np.array(seg_group.iloc[:, 8* WIDTH : 9 * WIDTH],dtype=np.int64)
1473 |                 max_speed = np.array(seg_group.iloc[:, 9* WIDTH : 10 * WIDTH],dtype=np.int64)
1474 |                 max_acc = np.array(seg_group.iloc[:, 10* WIDTH : 11 * WIDTH],dtype=np.int64)
1475 |                 max_head = np.array(seg_group.iloc[:, 11* WIDTH : 12 * WIDTH],dtype=np.int64)
1476 | 
1477 | 
1478 |                 speed_sec_pad,speed_sec_early = Data.pad_seqs(speed_sec,exp_seq_len)
1479 |                 avg_speed_pad,avg_speed_early = Data.pad_seqs(avg_speed,exp_seq_len)
1480 |                 std_speed_pad,std_speed_early = Data.pad_seqs(std_speed,exp_seq_len)
1481 |                 acc_sec_pad,acc_sec_early = Data.pad_seqs(acc_sec,exp_seq_len)
1482 |                 mean_acc_pad,mean_acc_early = Data.pad_seqs(mean_acc,exp_seq_len)
1483 |                 std_acc_pad,std_acc_early = Data.pad_seqs(std_acc,exp_seq_len)
1484 |                 head_pad,head_early = Data.pad_seqs(head,exp_seq_len)
1485 |                 head_mean_pad,head_mean_early = Data.pad_seqs(head_mean,exp_seq_len)
1486 |                 std_head_pad,std_head_early = Data.pad_seqs(std_head,exp_seq_len)
1487 |                 max_speed_pad,max_speed_early = Data.pad_seqs(max_speed,exp_seq_len)
1488 |                 max_acc_pad,max_acc_early = Data.pad_seqs(max_acc,exp_seq_len)
1489 |                 max_head_pad,max_head_early = Data.pad_seqs(max_head,exp_seq_len)
1490 | 
1491 |                 label = np.zeros(speed_sec_early.shape,np.int64)
1492 |                 #print(int(seg_group.iloc[0,-3]))
1493 |                 label[:] = int(seg_group.iloc[0,-3])
1494 | 
1495 |                 for i in range(len(speed_sec_early)):
1496 |                     start = i*exp_seq_len
1497 |                     end = (i+1)*exp_seq_len
1498 | 
1499 |                     feature = {
1500 |                         param.SPEED_SEC: Data._bytes_feature(speed_sec_pad[start:end].tobytes()),
1501 |                         param.AVG_SPEED: Data._bytes_feature(avg_speed_pad[start:end].tobytes()),
1502 |                         param.STD_SPEED: Data._bytes_feature(std_speed_pad[start:end].tobytes()),
1503 |                         param.ACC_SEC: Data._bytes_feature(acc_sec_pad[start:end].tobytes()),
1504 |                         param.MEAN_ACC: Data._bytes_feature(mean_acc_pad[start:end].tobytes()),
1505 |                         param.STD_ACC: Data._bytes_feature(std_acc_pad[start:end].tobytes()),
1506 |                         param.HEAD: Data._bytes_feature(head_pad[start:end].tobytes()),
1507 |                         param.HEAD_MEAN: Data._bytes_feature(head_mean_pad[start:end].tobytes()),
1508 |                         param.STD_HEAD: Data._bytes_feature(std_head_pad[start:end].tobytes()),
1509 |                         param.MAX_SPEED: Data._bytes_feature(max_speed_pad[start:end].tobytes()),
1510 |                         param.MAX_ACC: Data._bytes_feature(max_acc_pad[start:end].tobytes()),
1511 |                         param.MAX_HEAD: Data._bytes_feature(max_head_pad[start:end].tobytes()),
1512 |                         param.EARLY: Data._int64_feature(std_head_early[i]),
1513 |                         param.LABEL: Data._int64_feature(label[i])
1514 |                     }
1515 |                     example = tf.train.Example(features=tf.train.Features(feature=feature))
1516 | 
1517 |                     if count % 1000 == 0 and count > 0:
1518 |                         print("1000")
1519 |                         train_writer.close()
1520 |                         valid_writer.close()
1521 |                         test_writer.close()
1522 |                         #sys.stdout.flush()
1523 |                         file_group_count += 1
1524 | 
1525 |                         train_writer = tf.python_io.TFRecordWriter(
1526 |                             out_path + "interval_" + str(interval) + "_train_" + str(file_group_count) + ".tfrecords")
1527 |                         valid_writer = tf.python_io.TFRecordWriter(
1528 |                             out_path + "interval_" + str(interval) + "_valid_" + str(file_group_count) + ".tfrecords")
1529 |                         test_writer = tf.python_io.TFRecordWriter(
1530 |                             out_path + "interval_" + str(interval) + "_test_" + str(file_group_count) + ".tfrecords")
1531 | 
1532 |                     t = count % 10
1533 |                     if t >= 0 and t < 8:
1534 |                         train_writer.write(example.SerializeToString())
1535 |                     elif t == 8:
1536 |                         valid_writer.write(example.SerializeToString())
1537 |                     else:
1538 |                         test_writer.write(example.SerializeToString())
1539 | 
1540 |                     count += 1
1541 | 
1542 |             print(count)
1543 |             k += 1
1544 |             train_writer.close()
1545 |             valid_writer.close()
1546 |             test_writer.close()
1547 |             sys.stdout.flush()
1548 | 
1549 |     #未完成
1550 |     @staticmethod
1551 |     def tf_slice_seq(input,exp_len_seq,has_early):
1552 | 
1553 |         shape = tf.shape(input)
1554 |         full_seq_nums = tf.floordiv(shape[0],exp_len_seq)
1555 |         # ?tf.zeros()
1556 |         result_list = []
1557 |         early_stop = []
1558 |         zero_constant = tf.zeros_like(full_seq_nums)
1559 |         is_zero = tf.equal(full_seq_nums,zero_constant)
1560 |         if not is_zero :
1561 |             for i in range(full_seq_nums):
1562 |                 result_list.append(tf.slice(input,[i*exp_len_seq,WIDTH],[exp_len_seq,WIDTH]))
1563 |                 if has_early:
1564 |                     early_stop.append(exp_len_seq)
1565 | 
1566 |         remain_length = shape[0] - full_seq_nums*exp_len_seq
1567 |         remain = tf.slice(input,[full_seq_nums*exp_len_seq,WIDTH],[shape[0]-remain_length,WIDTH])
1568 |         remain_padding = tf.pad(remain,[[0,exp_len_seq - remain_length],[0,0]])
1569 |         result_list.append(remain_padding)
1570 |         if has_early:
1571 |             early_stop.append(remain_length)
1572 | 
1573 |         if has_early:
1574 |             e = np.reshape(np.array(early_stop,np.int32),[len(early_stop),1])
1575 | 
1576 |             earlys = tf.Constant(e)
1577 |             return result_list,earlys
1578 | 
1579 |         return result_list
1580 | 
1581 |     #未完成
1582 |     @staticmethod
1583 |     def tf_slice_examples(features,feature_name_list,label_name,has_early,exp_seq_len):
1584 | 
1585 |         #feature_sliced_list  [[speed_sec_tensor*full_seq_num]]
1586 |         feature_sliced_list = []
1587 |         seqs = None
1588 |         early_seqs = None
1589 |         for feature_name in feature_name_list:
1590 |             feature = tf.decode_raw(features[feature_name],tf.int32)
1591 |             if has_early:
1592 |                 seqs,early_seqs = Data.tf_slice_seq(feature,exp_seq_len,has_early)
1593 |             else:
1594 |                 seqs = Data.tf_slice_seq(feature,exp_seq_len, has_early)
1595 |             feature_sliced_list.append(seqs)
1596 | 
1597 |         if len(feature_sliced_list)>0:
1598 |             seq_nums = len(feature_sliced_list[0])
1599 |         else:
1600 |             return
1601 | 
1602 |         features_seqs = None
1603 |         for i in range(seq_nums):
1604 |             single_seq = None
1605 |             for j in range(len[feature_name_list]):
1606 |                 if single_seq is None:
1607 |                     single_seq = feature_sliced_list[i][j]
1608 |                 else:
1609 |                     single_seq = tf.concat([single_seq,feature_sliced_list[i]][j],axis=1)
1610 | 
1611 |             single_seq = tf.expand_dims(single_seq,axis=0)
1612 |             if features_seqs is None:
1613 |                 features_seqs = single_seq
1614 |             else:
1615 |                 features_seqs = tf.concat([features_seqs,single_seq],axis=0)
1616 | 
1617 |         label = tf.cast(features[label_name], tf.int32)
1618 |         label_arr = np.zeros(shape=[seq_nums,1],dtype=tf.int32)
1619 |         label_arr[:] = label
1620 |         label_seqs = tf.Constant(label_arr,dtype=tf.int32)
1621 |         if has_early:
1622 |             return features_seqs,early_seqs,label_seqs
1623 |         else:
1624 |             return features_seqs,label_seqs
1625 | 
1626 | if __name__ == "__main__":
1627 |     #Data.sovle_row_data(5)
1628 |     #Data.caculate_feature([1,2,3,4,5])
1629 |     #Data.caculate_feature_max_min()
1630 |     #Data.caculate_all_max_min()
1631 |     #Data.discretization([1,2,3,4,5])
1632 |     #Data.create_npy(2)
1633 |     #Data.expand_data_npy(4,100)
1634 |     #Data.create_all_data_npy(4,100)
1635 |     #Data.concat_data(4,100)
1636 |     #Data.make_tfrecord([5])
1637 |     #Data.make_tfrecord_seq_shuffle([1,2,3,4,5],50,"tfrecords_95_15/")
1638 |     #Data.features_status([5])
1639 |     #Data.caculate_feature_12([1,2,3,4,5])
1640 |     #Data.discretization_12([1,2,3,4])
1641 |     Data.make_tfrecord_seq_shuffle([4], 50, "tfrecords_95_30_12/")


--------------------------------------------------------------------------------