├── LICENSE
├── README.md
└── stock_image_clf
    ├── cnn_hyper.py
    ├── main.py
    ├── random_forest.py
    ├── somemodels.py
    ├── someplots.py
    └── vectorization.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 ernest222
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # stock_img_clf
2 | CNN、随机森林应用于股票四种形态A形（先涨后跌），U形（先跌后涨），R形（上涨），D形（下跌）的识别，基于keras，后端为tensorflow。准确率为96%左右。
3 | cnn_hyper.py 为超参数优化；someplots.py为cnn层输、通道、热力图可视化等；vectorization.py为数据预处理；random_forest.py为cnn特征提取，拟合随机森林分类器。
4 | 
5 | 


--------------------------------------------------------------------------------
/stock_image_clf/cnn_hyper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import vectorization as vr
 3 | from keras import layers
 4 | from keras import models
 5 | import numpy as np
 6 | from hyperopt import Trials, STATUS_OK, tpe
 7 | from hyperas import optim
 8 | from hyperas.distributions import choice, uniform
 9 | 
10 | # 超参数优化
11 | 
12 | def data():
13 |     x_train, y_train = vr.ImgVectorization('train').vec_all()
14 |     x_test, y_test = vr.ImgVectorization('val').vec_all()
15 |     return x_train, y_train, x_test, y_test
16 | 
17 | def create_model(x_train, y_train, x_test, y_test):
18 |     model = models.Sequential()
19 |     model.add(layers.Conv2D({{choice([16, 32, 64, 128])}}, (3, 3), activation='relu', input_shape=(80,80,3)))
20 |     model.add(layers.MaxPool2D((2, 2)))
21 |     model.add(layers.Conv2D({{choice([16, 32, 64, 128])}}, (3, 3), activation='relu'))
22 |     model.add(layers.MaxPool2D((2, 2)))
23 |     model.add(layers.Conv2D({{choice([16, 32, 64, 128])}}, (3, 3), activation='relu'))
24 |     model.add(layers.MaxPool2D((2, 2)))
25 |     model.add(layers.Flatten())
26 |     model.add(layers.Dropout({{uniform(0, 1)}}))
27 |     model.add(layers.Dense({{choice([16, 32, 64, 128])}}, activation='relu'))
28 |     model.add(layers.Dense(4, activation='softmax'))
29 |     model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
30 |                   optimizer='rmsprop')
31 |     result = model.fit(x_train, y_train,
32 |                        batch_size={{choice([32, 64, 128])}},
33 |                        epochs={{choice([10,20])}},
34 |                        validation_data=(x_test, y_test))
35 |     val_acc = np.amax(result.history['val_acc'])
36 |     print('Best validation acc of epoch:', val_acc)
37 |     return {'loss': -val_acc, 'status': STATUS_OK, 'model': model}
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     this_dir= os.getcwd()
42 |     result_dir = this_dir + os.sep + 'result'
43 |     X_train, Y_train, X_test, Y_test = data()
44 |     best_run, best_model = optim.minimize(model=create_model,
45 |                                           data=data,
46 |                                           algo=tpe.suggest,
47 |                                           max_evals=20,
48 |                                           trials=Trials())
49 |     best_model.save(result_dir + os.sep + 'cnn_hyper.h5')
50 |     print("Best performing model chosen hyper-parameters:")
51 |     print(best_run)
52 | 


--------------------------------------------------------------------------------
/stock_image_clf/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import somemodels as md
 3 | import vectorization as vr
 4 | from keras.models import load_model
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | 
 8 | 
 9 | class FitModel():
10 |     def __init__(self,data_enhance=False,img_size=(80,80),epochs=10,batch_size=32):
11 |         self.img_size=img_size
12 |         self.epochs=epochs
13 |         self.batch_size=batch_size
14 |         if data_enhance:
15 |             self.l=[0,0.1,0.1,0.1]
16 |         else:
17 |             self.l=[0,0,0,0]
18 | 
19 |     def fm(self):
20 |         train_input = vr.ImgVectorization('train',img_size=self.img_size,batch_size=self.batch_size,
21 |                                           rotation=self.l[0],hs=self.l[1],ws=self.l[2],zr=self.l[3]).vec_generator()
22 |         val_input = vr.ImgVectorization('val',img_size=self.img_size,batch_size=self.batch_size).vec_generator()
23 |         model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])  # 损失函数为分类交叉熵
24 |         history = model.fit_generator(train_input, steps_per_epoch=50, epochs=self.epochs, validation_data=val_input, validation_steps=12)
25 |         best_acc=np.amax(history.history['val_acc'])
26 |         print(best_acc)
27 |         model.save(result_dir + os.sep + model_name + '_'+str(pic_size)+'_'+str(dropout)+'_'+str(batch_size)+'_'+str(self.epochs)+'.h5')
28 |         self.plot(history)
29 | 
30 |     def plot(self,history):
31 |         acc = history.history['acc']
32 |         val_acc = history.history['val_acc']
33 |         loss = history.history['loss']
34 |         val_loss = history.history['val_loss']
35 |         epochs = range(1, len(acc) + 1)
36 |         plt.figure()
37 |         plt.subplot(2, 1, 1)
38 |         plt.plot(epochs, acc, 'bo', label='train')
39 |         plt.plot(epochs, val_acc, 'b', label='val')
40 |         plt.title('accuracy')
41 |         plt.subplot(2, 1, 2)
42 |         plt.plot(epochs, loss, 'bo', label='train')
43 |         plt.plot(epochs, val_loss, 'b', label='val')
44 |         plt.title('loss')
45 |         plt.legend()
46 |         plt.savefig(result_dir + os.sep + model_name + str(pic_size)+'_'+str(self.epochs)+'_plot')
47 |         plt.show()
48 | 
49 | 
50 | def evaluate_on_test(model_file,img_size=(80,80),batch_size=32):
51 |     read_model = load_model( model_file)
52 |     X_test,Y_test = vr.ImgVectorization('test',img_size=img_size,batch_size=batch_size).vec_all()  # 评估验证集
53 |     test_score = read_model.evaluate(X_test,Y_test)
54 |     print(test_score)
55 | 
56 | if __name__ == '__main__':
57 |     this_dir = os.getcwd()
58 |     result_dir = this_dir+os.sep+'result'
59 |     pic_size=80
60 |     epochs = 10
61 |     batch_size=16
62 |     dropout=0.2
63 |     # model_name, model = md.dense_model()
64 |     model_name, model = md.cnn_model(shape=(pic_size,pic_size,3),dropout=dropout)          # 选择模型
65 |     FitModel(data_enhance=False,img_size=(pic_size,pic_size),epochs=epochs,batch_size=batch_size).fm()
66 |     model_file=result_dir + os.sep + model_name + '_'+str(pic_size)+'_'+str(dropout)+'_'+str(batch_size)+'_'+str(epochs)+'.h5'
67 |     evaluate_on_test(model_file,img_size=(pic_size,pic_size),batch_size=batch_size)                 # 在测试集评估所有模型
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/stock_image_clf/random_forest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from sklearn.ensemble import RandomForestClassifier
 3 | from sklearn.tree import DecisionTreeClassifier
 4 | from sklearn.metrics import accuracy_score
 5 | from sklearn.model_selection import GridSearchCV
 6 | import numpy as np
 7 | from keras.models import load_model
 8 | from keras import models
 9 | import vectorization as vr
10 | import pandas as pd
11 | from sklearn import tree
12 | import matplotlib.pyplot as plt
13 | import pydotplus
14 | 
15 | # 将cnn的第一层dense层的特征提取，输入随机森林和决策树分类器
16 | 
17 | def data(shuff=False):
18 |     x_train, Y_onhot = vr.ImgVectorization('train',shuff=shuff).vec_all()
19 |     x_val, y_onehot = vr.ImgVectorization('val',shuff=shuff).vec_all()
20 |     y_train=vr.ImgVectorization('train').lable_list()
21 |     y_val=vr.ImgVectorization('val').lable_list()
22 |     return x_train,y_train, x_val,y_val,Y_onhot,y_onehot
23 | 
24 | 
25 | def get_layer_features(model,x_data,layer_name,channels):  # cnn中dense层特征提取
26 |     layer_model = models.Model(inputs=model.input,outputs=model.get_layer(layer_name).output)
27 |     i=0
28 |     features = np.zeros(shape=(x_data.shape[0], channels))
29 |     for x in x_data:
30 |         x = np.expand_dims(x, axis=0)
31 |         layer_output = layer_model.predict(x)
32 |         features[i] = layer_output
33 |         i += 1
34 |     feature_col = []
35 |     for r in range(channels):
36 |         feature_col.append(str(r))
37 |         r += 1
38 |     df = pd.DataFrame(data=features, columns=feature_col)
39 |     return df
40 | 
41 | def fit_random_forest(x_train,y_train,x_val,y_val):  # 随机森林分类器
42 |     rf = RandomForestClassifier(max_depth= 11,min_samples_leaf= 40, min_samples_split= 6, n_estimators=30,max_features='sqrt')
43 |     rf.fit(x_train,y_train)
44 |     # plot(rf,x_train)
45 |     print(rf)
46 |     predictions = rf.predict(x_val)
47 |     acc = accuracy_score(predictions, y_val)
48 |     print(acc)
49 | 
50 | def plot(clf,x):  # 随机森林可视化及特征重要性
51 |     Estimators = clf.estimators_
52 |     for index, model in enumerate(Estimators):
53 |         filename = 'tree_' + str(index) + '.pdf'
54 |         dot_data = tree.export_graphviz(model, out_file=None,
55 |                                         feature_names=x.columns,
56 |                                         class_names=['A_shape','D_shape','R_shape','U_shape'],
57 |                                         filled=True, rounded=True,
58 |                                         special_characters=True)
59 |         graph = pydotplus.graph_from_dot_data(dot_data)
60 |         graph.write_pdf(this_dir+os.sep+'randomforest'+os.sep+filename)
61 |     df=pd.DataFrame({'features':x.columns,'importances':clf.feature_importances_})
62 |     df=df.sort_values(by='importances',ascending=False).head(10)
63 |     plt.bar(df.features, df.importances)
64 |     plt.xticks(np.arange(len(df.features)),df.features)
65 |     plt.ylabel('Importances')
66 |     plt.title('Features Importances')
67 |     plt.show()
68 | 
69 | 
70 | def find_param(x,y):   # 随机森林调参
71 |     param_test2 =  {'max_depth':range(10,20),'min_samples_split':range(5,15), 'min_samples_leaf':range(10,60,10),'n_estimators': range(10, 71, 10)}
72 |     gsearch2 = GridSearchCV(estimator=RandomForestClassifier(), param_grid=param_test2,
73 |                             scoring='roc_auc', cv=5)
74 |     gsearch2.fit(x, y)
75 |     print(gsearch2.best_params_, gsearch2.best_score_)
76 | 
77 | def fit_tree(x_train,y_train,x_val,y_val):  # 决策树分类器
78 |     clf = DecisionTreeClassifier()
79 |     clf.fit(x_train,y_train)
80 |     print(clf)
81 |     predictions = clf.predict(x_val)
82 |     acc = accuracy_score(predictions, y_val)
83 |     print(acc)
84 | 
85 | if __name__ == '__main__':
86 |     this_dir= os.getcwd()
87 |     result_dir = this_dir + os.sep + 'result'
88 |     model_path=result_dir+os.sep+'cnn_80_0.2_16_10.h5'
89 |     model = load_model(model_path)
90 |     model.summary()
91 |     os.environ["PATH"] += os.pathsep + path
92 |     x_train, y_train,x_val,y_val,Y_onehot,y_onhot=data(shuff=False)  # 调参时需要用打乱的数据; fit的时候lable是按次序读文件夹名的，此时shuffle要设为false
93 |     train_features=get_layer_features(model,x_train,'dense_1',128)
94 |     val_features=get_layer_features(model,x_val,'dense_1',128)
95 |     fit_random_forest(train_features,y_train,val_features,y_val)
96 |     # find_param(train_features,Y_onehot)
97 |     # fit_tree(train_features,y_train,val_features,y_val)


--------------------------------------------------------------------------------
/stock_image_clf/somemodels.py:
--------------------------------------------------------------------------------
 1 | from keras import layers
 2 | from keras import models
 3 | 
 4 | 
 5 | 
 6 | 
 7 | 
 8 | def cnn_model(shape=(80,80,3),dropout=0.5,last_activation='softmax'):
 9 |     model=models.Sequential()
10 |     model.add(layers.Conv2D(64,(3,3),activation='relu',input_shape=shape))
11 |     model.add(layers.MaxPool2D((2,2)))
12 |     model.add(layers.Conv2D(64,(3,3),activation='relu'))
13 |     model.add(layers.MaxPool2D((2,2)))
14 |     model.add(layers.Conv2D(128, (3, 3), activation='relu'))
15 |     model.add(layers.MaxPool2D((2, 2)))
16 |     model.add(layers.Flatten())
17 |     model.add(layers.Dropout(dropout))
18 |     model.add(layers.Dense(128,activation='relu'))
19 |     model.add(layers.Dense(4,activation=last_activation))
20 |     model.summary()
21 |     return 'cnn', model
22 | 
23 | 
24 | def dense_model(shape=(80*80,),last_activation='softmax'):
25 |     model = models.Sequential()
26 |     model.add(layers.Dense(128, activation='relu',input_shape=shape))
27 |     model.add(layers.Dense(4, activation=last_activation))
28 |     model.summary()
29 |     return 'simple_dense',model
30 | 
31 | def inception_model():
32 |     pass
33 | 
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/stock_image_clf/someplots.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from keras.models import load_model
  3 | from keras import models
  4 | from keras import backend as K
  5 | from keras_preprocessing import image
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from keras.models import load_model
  9 | import cv2
 10 | 
 11 | # cnn可视化，参考deep learning with python一书
 12 | 
 13 | class PredictImg():
 14 |     def __init__(self,model_path,img_path):
 15 |         self.model_path=model_path
 16 |         self.img_path=img_path
 17 | 
 18 |     def img_to_tensor(self):
 19 |         img = image.load_img(self.img_path, target_size=(80, 80))
 20 |         img_tensor = image.img_to_array(img)
 21 |         img_tensor = np.expand_dims(img_tensor, axis=0)
 22 |         img_tensor /= 255
 23 |         print(img_tensor.shape)
 24 |         return img_tensor
 25 | 
 26 |     def predict(self):
 27 |         model = load_model(self.model_path)
 28 |         pred=model.predict(self.img_to_tensor())
 29 |         print(pred)
 30 |         result=np.argmax(pred[0])
 31 |         print(result)
 32 |         if result==0:
 33 |             print('A_shape')
 34 |         elif result==1:
 35 |             print('down_shape')
 36 |         elif result==2:
 37 |             print('rise_shape')
 38 |         elif result==3:
 39 |             print('U_shape')
 40 |         return result
 41 | 
 42 |     def plot_img_tensor(self):
 43 |         plt.imshow(self.img_to_tensor()[0])
 44 |         print(self.img_to_tensor().shape)
 45 |         plt.show()
 46 | 
 47 |     def plot_onelayer_onechannel(self,layerid,channel_id,layer_before):
 48 |         model = load_model(self.model_path)
 49 |         layer_outputs = [layer.output for layer in model.layers[:layer_before]]  # layer_before 前多少层，layer_before=5，获取前5层
 50 |         activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
 51 |         activations = activation_model.predict(self.img_to_tensor())
 52 |         choose_layer_activation = activations[layerid]  # layerid=0 查看第一层激活输出
 53 |         print(choose_layer_activation.shape)
 54 |         plt.matshow(choose_layer_activation[0, :, :, channel_id], cmap='viridis')   # channel_id 查看layerid层的第channel_id通道图片
 55 |         plt.show()
 56 | 
 57 |     def plot_layer_allchannel(self,begin_layer,end_layer):  # 查看层数越多，打印图片对内存要求越高
 58 |         model = load_model(self.model_path)
 59 |         layer_outputs = [layer.output for layer in model.layers[begin_layer:end_layer]]
 60 |         layer_names = []
 61 |         for layer in model.layers[begin_layer:end_layer]:
 62 |             layer_names.append(layer.name)
 63 |         images_per_row = 16
 64 |         activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
 65 |         activations = activation_model.predict(self.img_to_tensor())
 66 |         for layer_name, layer_activation in zip(layer_names, activations):
 67 |             n_features = layer_activation.shape[-1]
 68 |             size = layer_activation.shape[1]
 69 |             n_cols = n_features // images_per_row
 70 |             display_grid = np.zeros((size * n_cols, images_per_row * size))
 71 |             for col in range(n_cols):
 72 |                 for row in range(images_per_row):
 73 |                     channel_image = layer_activation[0, :, :,col * images_per_row + row]
 74 |                     channel_image -= channel_image.mean()
 75 |                     channel_image /= channel_image.std()
 76 |                     channel_image *= 64
 77 |                     channel_image += 128
 78 |                     channel_image = np.clip(channel_image, 0, 255).astype('uint8')
 79 |                     display_grid[col * size: (col + 1) * size,
 80 |                     row * size: (row + 1) * size] = channel_image
 81 |             scale = 1. / size
 82 |             plt.figure(figsize=(scale * display_grid.shape[1],
 83 |                                 scale * display_grid.shape[0]))
 84 |             plt.title(layer_name)
 85 |             plt.grid(False)
 86 |             plt.imshow(display_grid, aspect='auto', cmap='viridis')
 87 |             plt.show()
 88 | 
 89 | 
 90 |     def plot_heatmaps(self,con2dlayer_name,con2dlayer_channel): # 打印某层的热力图
 91 |         model = load_model(self.model_path)
 92 |         predict_result=self.predict()
 93 |         shape_output = model.output[:, predict_result]
 94 |         last_conv_layer = model.get_layer(con2dlayer_name)
 95 |         grads = K.gradients(shape_output, last_conv_layer.output)[0]
 96 |         pooled_grads = K.mean(grads, axis=(0, 1, 2))
 97 |         iterate = K.function([model.input], [pooled_grads, last_conv_layer.output[0]])
 98 |         pooled_grads_value, conv_layer_output_value = iterate([self.img_to_tensor()])
 99 |         for i in range(con2dlayer_channel):
100 |             conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
101 |         heatmap = np.mean(conv_layer_output_value, axis=-1)
102 |         heatmap = np.maximum(heatmap, 0)
103 |         heatmap /= np.max(heatmap)
104 |         # print(heatmap)
105 |         plt.matshow(heatmap)
106 |         plt.show()
107 |         plt.matshow(heatmap)
108 |         img = cv2.imread(self.img_path)
109 |         heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
110 |         heatmap = np.uint8(255 * heatmap)
111 |         heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
112 |         superimposed_img = heatmap * 0.9 + img
113 |         cv2.imwrite(result_dir+os.sep+str(predict_result)+'_heatmap.jpg', superimposed_img)
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     this_dir= os.getcwd()
118 |     result_dir = this_dir + os.sep + 'result'
119 |     img_path=this_dir+os.sep+'U_shape.png'
120 |     model_path=result_dir+os.sep+'cnn_80_0.2_16_10.h5'
121 |     PI=PredictImg(model_path,img_path)
122 |     PI.predict()
123 |     # PI.plot_heatmaps('conv2d_5',64)
124 |     # PI.plot_layer_allchannel(begin_layer=0,end_layer=6)
125 |     # PI.plot_onelayer_onechannel(layerid=2,channel_id=5,layer_before=3)
126 |     # PI.img_to_tensor()
127 |     # PI.plot_img_tensor()


--------------------------------------------------------------------------------
/stock_image_clf/vectorization.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from keras.preprocessing.image import ImageDataGenerator
 3 | 
 4 | this_dir=os.getcwd()
 5 | 
 6 | 
 7 | # 图像数据预处理
 8 | class ImgVectorization():
 9 |     def __init__(self,datatype='train',img_size=(80,80),batch_size=20,mode='categorical',rotation=0,ws=0,hs=0,zr=0,hf=False,shuff=True):
10 |         self.data_dir=this_dir+os.sep+datatype
11 |         self.img_size = img_size
12 |         self.shuff=shuff
13 |         self.batch_size=batch_size
14 |         self.mode=mode
15 |         self.rotation=rotation
16 |         self.ws=ws
17 |         self.hs=hs
18 |         self.zr=zr
19 |         self.hf=hf
20 | 
21 |     def check_img_amount(self):  # 检查各类图片数量一致
22 |         list=[]
23 |         for folder in os.listdir(self.data_dir):
24 |             img_amount = len(os.listdir(self.data_dir + os.sep + folder))
25 |             img_class = folder
26 |             print(img_class, img_amount)
27 |             list.append(img_amount)
28 |         if len(set(list))==1:
29 |             return list[0]*len(list)    # 返回该目录图片数量
30 |         else:
31 |             print('样本分类数量不一致')
32 | 
33 |     def lable_list(self):     # 随机森林使用的label_list
34 |         list_lable=[]
35 |         for folder in os.listdir(self.data_dir):
36 |             img_amount = len(os.listdir(self.data_dir + os.sep + folder))
37 |             for im in range(img_amount):
38 |                 list_lable.append(int(folder))
39 |         return list_lable
40 | 
41 |     def vec_generator(self):      # 按批 向量化生成器
42 |         data = ImageDataGenerator(rescale=1. / 255, rotation_range=self.rotation, width_shift_range=self.ws,
43 |                                   height_shift_range=self.hs, zoom_range=self.zr,
44 |                                   horizontal_flip=self.hf)
45 |         data_generator = data.flow_from_directory(self.data_dir, target_size=self.img_size, batch_size=self.batch_size,
46 |                                                   class_mode=self.mode,shuffle=self.shuff)
47 |         return data_generator
48 | 
49 |     def check_input_type(self):   # 检查生成数据格式
50 |         for data_batch,lables_batch in self.vec_generator():
51 |             print(data_batch.shape)
52 |             print(lables_batch.shape)
53 |             break
54 | 
55 |     def vec_all(self):  # 向量化全部数据
56 |         data_amount=self.check_img_amount()
57 |         self.batch_size=data_amount
58 |         list=[]
59 |         for data_batch,data_label in self.vec_generator():
60 |             list.append(data_batch)
61 |             list.append(data_label)
62 |             break
63 |         return list[0],list[1]
64 | 
65 | 
66 | 
67 | 
68 | # 文本数据预处理
69 | class TextVectorization():
70 |     pass


--------------------------------------------------------------------------------