├── README.md
└── Main_model.py


/README.md:
--------------------------------------------------------------------------------
1 | # TCN-in-stock-prices-prediction
2 | 用TCN 进行股票预测
3 | reference：https://github.com/locuslab/TCN/tree/master/TCN
4 | 参考详解&论文：https://blog.csdn.net/qq_33331451/article/details/104810419
5 | 


--------------------------------------------------------------------------------
/Main_model.py:
--------------------------------------------------------------------------------
  1 | import os, re
  2 | import numpy as np
  3 | import pandas as pd
  4 | import matplotlib.pyplot as plt
  5 | plt.rcParams['font.sans-serif'] = ['SimHei']
  6 | from copy import deepcopy
  7 | from tqdm import trange
  8 | from talib.abstract import *
  9 | from sklearn.preprocessing import MinMaxScaler
 10 | import torch
 11 | from torch import nn, optim
 12 | from torch.utils.data import TensorDataset, DataLoader
 13 | from tsfresh.feature_extraction import extract_features, MinimalFCParameters, EfficientFCParameters
 14 | 
 15 | 
 16 | df = pd.read_csv("../../paper-related-codev1/data/hs300_domains_v5/保险/sh601318_中国平安_byd.csv")
 17 | df.head(3)
 18 | 
 19 | 
 20 | # ------ basic module ------
 21 | class GELU(nn.Module):  ##（非线性）激活函数   Gaussian Error Linear Units  模型的输入是由非线性激活与随机正则两者共同决定
 22 |     def __init__(self):
 23 |         super(GELU, self).__init__()
 24 | 
 25 |     def forward(self, x):
 26 |         return 0.5*x*(1+torch.tanh(np.sqrt(2/np.pi)*(x+0.044715*torch.pow(x,3))))
 27 | 
 28 | class ModelPatch(nn.Module):   ##patch 是内核kernel的输入
 29 |     def __init__(self,in_chs,mid_chs):
 30 |         super(ModelPatch,self).__init__()
 31 |         
 32 |         '''
 33 |         nn.Sequential(),一个有序的容器，神经网络模块将按照在传入构造器的顺序依次被添加到计算图中执行
 34 |         in_channels:取决于照片的类型，彩色（RGB）为3，灰色为1
 35 |         out_channels:取决于过滤器的数量
 36 |         一维的时候，filter=kernel
 37 |         
 38 |         '''
 39 |         
 40 |         ## 
 41 |         self.residual = nn.Sequential(                                             
 42 |             nn.Conv1d(in_channels=in_chs, out_channels=mid_chs, kernel_size=1),
 43 |             GELU(),
 44 |             nn.Conv1d(in_channels=mid_chs,out_channels=in_chs,kernel_size=1)
 45 |             )
 46 |     def forward(self,x):
 47 |         return x+self.residual(x)
 48 | 
 49 | class ModelPatch_wo_identify(nn.Module):
 50 |     def __init__(self,in_chs,mid_chs):
 51 |         super(ModelPatch_wo_identify,self).__init__()
 52 |         self.residual = nn.Sequential(
 53 |             nn.Conv1d(in_channels=in_chs, out_channels=mid_chs, kernel_size=1),
 54 |             GELU(),
 55 |             nn.Conv1d(in_channels=mid_chs,out_channels=in_chs,kernel_size=1)
 56 |             )
 57 |     def forward(self,x):
 58 |         return self.net(x)
 59 |         
 60 | class Chomp1d(nn.Module):  ##裁剪模块
 61 |     def __init__(self, chomp_size):
 62 |         super(Chomp1d, self).__init__()
 63 |         # 表示对继承父类属性进行初始化
 64 |         self.chomp_size = chomp_size
 65 | 
 66 |     def forward(self, x):
 67 |         '''
 68 |         一个裁剪的模块，裁剪多出来的padding
 69 |         tensor.contiguous()会返回有连续内存的相同张量
 70 |         有些tensor的view()操作依赖于内存是整块的，这时只需执行
 71 |         contiguous()函数，就是把tensor变成在内存中连续分布的形式
 72 |         本函数主要是增加padding方式对卷积后的张量做切边而实现因果卷积
 73 |         
 74 |         '''
 75 |         return x[:, :, :-self.chomp_size].contiguous()
 76 |     
 77 | 
 78 | #两层一维卷积，两层weight_norm,两层chomd1d，非线性激活函数GELU，dropout 为0.2
 79 | class TemporalBlock(nn.Module):  ##  pytorch中一切自定义操作都继承自nn.module, 必须在构造函数中执行父类的构造函数
 80 |     
 81 | #     init 在创建类的时候自动执行，写神经网络的时候一些网络结构的设置最好放在init里
 82 |     def __init__(self, n_inputs, n_outputs,     ##类中的第一个函数调用参数必须是self(不能省略)
 83 |                  kernel_size=[3,3], stride=[1,1], 
 84 |                  dilation=[1,2], 
 85 |                  dropout=0.2):
 86 |         
 87 |         '''
 88 |         相当于一个residual block
 89 |         n_inputs：int 输入通道数
 90 |         n_outputs：int 输出通道数
 91 |         kernel_size: int, 卷积核尺寸
 92 |         stride：int, 步长
 93 |         padding:int, 填充系数
 94 |         dilation: int，膨胀系数
 95 |         dropout：float, dropout比率
 96 |         '''
 97 |         
 98 |         
 99 |         
100 |         super(TemporalBlock,self).__init__()  ## 子类继承父类的init
101 |         # padding can be calculated by the dilation
102 |         
103 |         ## padding=特征图填充宽度 dilation=扩张因子（？） kernel_size=卷积核大小 stride=卷积步长 
104 |         
105 |         ## 卷积前特征宽度N+2*padding = (卷积后特征宽度M-1)*stride+kernel_size
106 |         
107 |         padding = [0,0]
108 |         padding[0] = (kernel_size[0] - 1)*dilation[0]  
109 |         padding[1] = (kernel_size[1] - 1)*dilation[1]
110 |         
111 |         
112 |         
113 |         self.conv1 = nn.Conv1d(n_inputs,n_outputs,kernel_size[0],stride=stride[0],padding=padding[0],dilation=dilation[0])
114 |         
115 |         # 经过conv1，输出的size其实是(batch,input_channel,seq_len+padding)
116 |         
117 |         self.chomp1 = Chomp1d(padding[0])   #裁掉多出来的padding部分，维持输出时间步为seq_len
118 |         self.ln1 = nn.LayerNorm(n_outputs)
119 |         self.gelu1 = GELU()
120 |         self.dropout1 = nn.Dropout(dropout)
121 |         
122 |         self.conv2 = nn.Conv1d(n_outputs, n_outputs, kernel_size[1],stride=stride[1], padding=padding[1], dilation=dilation[1])
123 |         self.chomp2 = Chomp1d(padding[1])   #裁掉多出来的padding部分，维持输出时间步为seq_len
124 |         self.ln2 = nn.LayerNorm(n_outputs)
125 |         self.gelu2 = GELU()
126 |         self.dropout2 = nn.Dropout(dropout)
127 |         
128 |         self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
129 |         
130 |         self.init_weights()
131 |         
132 |     def forward(self,x):
133 |         
134 |         '''
135 |         x: size of (batch,input_channel,seq_len)
136 |         '''
137 |         
138 |         
139 |         # conv1 block
140 |         tmp = self.conv1(x)
141 |         tmp = self.chomp1(tmp) # N,C,L
142 |         tmp = self.ln1(tmp.transpose(1,2).contiguous()) # N,C,L->N,L,C
143 |         tmp = tmp.transpose(1,2).contiguous() # N,L,C->N,C,L
144 |         tmp = self.
145 |         
146 |         
147 |         1(tmp)
148 |         tmp = self.dropout1(tmp)
149 |         
150 |         
151 |         # conv2 block
152 |         tmp = self.conv2(tmp)
153 |         tmp = self.chomp2(tmp)
154 |         tmp = self.ln2(tmp.transpose(1,2).contiguous()) # N,C,L->N,L,C
155 |         tmp = tmp.transpose(1,2).contiguous() # N,L,C->N,C,L
156 |         tmp = self.gelu2(tmp)
157 |         tmp = self.dropout2(tmp)
158 |         
159 |         out = tmp
160 |         res = x if self.downsample is None else self.downsample(x)
161 |         return out+res
162 |     
163 |     def init_weights(self):
164 |         '''
165 |         参数初始化
166 |         
167 |         '''
168 |         self.conv1.weight.data.normal_(0, 0.01)
169 |         self.conv2.weight.data.normal_(0, 0.01)
170 |         if self.downsample is not None:
171 |             self.downsample.weight.data.normal_(0, 0.01)
172 | 
173 | 
174 |             
175 | #时序卷积模块，使用for循环对8层隐含层，每层25个节点进行构建。其中*layer 表示迭代器拆分layers为一层层网络            
176 | class TemporalConvNet(nn.Module):
177 |     def __init__(self,num_inputs,num_channels,
178 |                  model_patch=ModelPatch, # model_patch should be a nn.Module
179 |                  dropout=0.2,
180 |                  ):
181 |         """
182 |         
183 |         TCN,目前paper给出的TCN结构很好的支持每个时刻一个数的情况，即sequence 结构
184 |         对于每个时刻为一个向量的一维结构，勉强可以把向量拆成若干该时刻的输入通道
185 |         对于每个时刻为一个矩阵或更高维图像的情况不太好办
186 |         
187 |         num_inputs：int 输入通道数
188 |         num_channels:list,每层的hidden_channel数，i.e. [25,25,25,25]表示有4个隐层，每层hidden_channel数为25
189 |         dropout：float, dropout比率
190 |         """
191 |         
192 |         
193 |         super().__init__()
194 |         # initilize the _tl with False value
195 |         self._tl = False 
196 |         
197 |         layers = []
198 |         num_levels = len(num_channels)
199 |         for i in range(num_levels):
200 |             dilation_size = [1,2] ##为什么膨胀系数是一个向量，且与i无关？
201 |             in_channels = num_inputs if i==0 else num_channels[i-1]  ##确定每一层的输入通道数，输入层通道为1，隐含层...
202 |             out_channels = num_channels[i]   ## 确定每一层的输出通道数
203 |             layers += [TemporalBlock(in_channels, out_channels, kernel_size=[3,3], stride=[1,1], dilation=dilation_size,dropout=dropout)]
204 |         
205 |         self.layers = nn.ModuleList(layers) 
206 |         
207 |         # then we build the path with model_patch
208 |         # self.model_patchs = OrderedDict()
209 |         
210 |         self.model_patchs = nn.ModuleDict()
211 |         for i in range(num_levels):
212 |             for j in range(i+1,num_levels):
213 |                 if j!=i+1:
214 |                     patch_name = f"{i}t{j}"
215 |                     #print(patch_name)
216 |                     self.model_patchs[patch_name] = model_patch(in_chs=num_channels[i],mid_chs=num_channels[i]//2)  
217 |         
218 |         # linear layer
219 |         self.linear = nn.Linear(num_channels[-1], 1)
220 |         return
221 |     
222 |     def _set2tl(self):
223 |         self._tl = True
224 |         for param in self.layers.parameters():
225 |             param.requires_grad = False
226 |         for param in self.linear.parameters():
227 |             param.requires_grad = False
228 |         for param in self.model_patchs.parameters():
229 |             param.requires_grad = True
230 |         
231 |     def _set2dl(self):
232 |         self._tl = False
233 |         for param in self.layers.parameters():
234 |             param.requires_grad = True
235 |         for param in self.linear.parameters():
236 |             param.requires_grad = True
237 |         for param in self.model_patchs.parameters():
238 |             param.requires_grad = True
239 |     
240 |     def forward(self,x):
241 |         
242 |         if self._tl == False:
243 |             input_ = x
244 |             for idx,layer in enumerate(self.layers,0):
245 |                 output_ = layer(input_)
246 |                 input_ = output_
247 |             
248 |             output_ = self.linear(input_[:,:,-1])
249 |             # return output_
250 |             return torch.sigmoid(output_)
251 |             
252 |         output_list = []
253 |         for idx,layer in enumerate(self.layers,0):
254 |             # print(f"This is {idx} layer ")
255 |             if idx == 0:
256 |                 input_ = x
257 |             else:
258 |                 input_ = 0
259 |                 for i in range(idx):
260 |                     if i!= idx-1:
261 |                         patch_name = f"{i}t{idx}"
262 |                         #print(patch_name)
263 |                         #print(output_list[i].shape)
264 |                         delta_ = self.model_patchs[patch_name](output_list[i])
265 |                         #print(delta_)
266 |                         #print("delta_")
267 |                         #print(delta_.shape)
268 |                         # here, we use addition operation as the fusion method
269 |                         # if the fusion method is changed to cat, then torch.cat should be used here.
270 |                         input_ = input_ + delta_
271 |                     else:
272 |                         input_ = input_ + output_list[i]
273 |                     
274 |             output_ = layer(input_)
275 |             output_list.append(output_)
276 |         
277 |         output_ = self.linear(output_[:,:,-1])
278 |         
279 |         # return output_
280 |         return torch.sigmoid(output_)
281 |   
282 |   
283 |   
284 | 
285 | '''
286 | 
287 | '''
288 | 
289 | 
290 | 
291 | def get_tsfeatures(df, use_volume=False, dropna=True, scaled=True, mode="Minimal"):
292 |     window_size = 30
293 |     
294 |     ## 将30天内的date和stock_id 合并项 当前日期到30天后每天都有
295 |     dfs = []
296 |     for i in range(window_size, len(df)):
297 |         date = df.iloc[i, 0]
298 |         sdf = df.iloc[i-window_size:i].copy()
299 |         sdf['code'] = sdf['code'] + "_" + date
300 |         dfs.append(sdf)
301 |     dfm = pd.concat(dfs) 
302 |     
303 |     ##选取变量
304 |     use_cols = ['code', 'date', 'open', 'high', 'low', 'close', 'preclose', 'volume', 'amount']
305 |     if not use_volume:
306 |         use_cols.remove("volume")
307 |     df_feature = dfm[use_cols]
308 |     
309 |     ##计算同一个code的特征的sum、mean、median、length：30, std、 var, root_mean_square、maximum、absolute_maximum、minimum
310 |     settings = MinimalFCParameters() if mode == "Minimal" else EfficientFCParameters()
311 |     features = extract_features(df_feature, column_id="code", column_sort="date", default_fc_parameters=settings)
312 |     features = features.reset_index()
313 |     
314 |     
315 |     ##把合成的新code(现index) 拆成code和date
316 |     features['code'] = features['index'].str.split("_").str[0]
317 |     features['date'] = features['index'].str.split("_").str[1]
318 |     
319 |     
320 |     dff = pd.merge(df[use_cols], features.drop(columns="index"), on=['code', 'date'], how="left")
321 |     drop_cols = [col for col in dff.columns if col.endswith("length")]
322 |     dff = dff.drop(columns=drop_cols)
323 |     
324 |     
325 |     if dropna:
326 |         dff = dff.dropna(axis=1, thresh=int(0.9*len(dff))).dropna()
327 |     dff.fillna(0, inplace=True)
328 |     if scaled:
329 |         dff.iloc[:, 2:] = MinMaxScaler().fit_transform(dff.iloc[:, 2:])
330 |     return dff
331 | 
332 | 
333 | 
334 | def get_Technical_Indicator(df, timeperiod=14, use_volume=False, dropna=True, scaled=True):
335 |     drop_cols = ["adjustflag", "turn", "tradestatus", "pctChg", "isST"]
336 |     if not use_volume:
337 |         drop_cols.append("volume")
338 |     df = df.sort_values("date")
339 |     
340 |     df['adx'] = ADX(df['high'], df['low'], df['close'], timeperiod=timeperiod)
341 |     
342 |     df['apo'] = APO(df['close'], fastperiod=12, slowperiod=26, matype=0)
343 |     
344 |     df['bop'] = BOP(df['open'], df['high'], df['low'], df['close'])
345 |     
346 |     df['mfi'] = MFI(df['high'], df['low'], df['close'], df['volume'], timeperiod=timeperiod)
347 |     
348 |     df['mom'] = MOM(df['close'], timeperiod=timeperiod)
349 |     
350 |     df['ppo'] = PPO(df['close'], fastperiod=12, slowperiod=26, matype=0)
351 |     
352 |     df['rsi'] = RSI(df['close'], timeperiod=timeperiod)
353 | 
354 |     fastk, fastd = STOCHF(df['high'], df['low'], df['close'], fastk_period=5, fastd_period=3, fastd_matype=0)
355 |     df['fastk'] = fastk
356 |     df['fastd'] = fastd
357 | 
358 |     slowk, slowd = STOCH(df['high'], df['low'], df['close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
359 |     df['slowk'] = slowk
360 |     df['slowd'] = slowd
361 | 
362 |     df['willr'] = WILLR(df['high'], df['low'], df['close'], timeperiod=timeperiod)
363 | 
364 |     macd, macdsignal, macdhist = MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
365 |     df['macd'] = macd
366 | 
367 |     df['roc_close'] = ROC(df['close'], timeperiod=timeperiod)
368 | 
369 |     df['cci'] = CCI(df['high'], df['low'], df['close'], timeperiod=timeperiod)
370 | 
371 |     if use_volume:
372 |         df['obv'] = OBV(df['close'], df['volume'])
373 |     
374 |     df['p1ccr'] = df['close'] / df['close'].shift(1) - 1
375 |     df['p2ccr'] = df['close'] / df['close'].shift(2) - 1
376 |     df['p3ccr'] = df['close'] / df['close'].shift(3) - 1
377 |     df['p4ccr'] = df['close'] / df['close'].shift(4) - 1
378 |     df['p5ccr'] = df['close'] / df['close'].shift(5) - 1
379 |     
380 |     df = df.drop(columns=drop_cols)
381 |     if dropna:
382 |         df.dropna(inplace=True)
383 |     if scaled:
384 |         df.iloc[:, 2:] = MinMaxScaler().fit_transform(df.iloc[:, 2:])
385 |     return df
386 |     
387 |    
388 |    
389 |    
390 | def get_dataset(df):
391 |     features = []
392 |     labels = []
393 |     window = 30
394 |     for i in range(window, len(df)):
395 |         feature = df.iloc[i-window:i, 2:].values.T  # C * L
396 |         ratio = df['close'].iloc[i] / (df['close'].iloc[i-1] + 1e-9)
397 |         label = 1 if ratio >= 1 else 0
398 |         features.append(feature)
399 |         labels.append(label)
400 |         
401 |     features = torch.tensor(features, dtype=torch.float)
402 |     labels = torch.tensor(labels, dtype=torch.float)
403 |     
404 |     N1 = int(len(features)*0.7)
405 |     N2 = int(len(features)*0.8)
406 |     N = len(features)
407 |     idx_train = list(range(N1))
408 |     idx_val = list(range(N1, N2))
409 |     idx_test = list(range(N2, N))
410 |     X_train, y_train = features[idx_train], labels[idx_train]
411 |     X_val, y_val = features[idx_val], labels[idx_val]
412 |     X_test, y_test = features[idx_test], labels[idx_test]
413 |     
414 |     return X_train, X_val, X_test, y_train, y_val, y_test
415 |     
416 |     
417 |     
418 |     
419 | def cal_acc(pred, label):
420 |     pred = (pred.squeeze()>0.5).float()
421 |     acc = (pred == label).float().mean()
422 |     return acc
423 |     
424 |     
425 |     
426 |     
427 | def train(df, file=None, num_channels = [32, 32, 32, 32, 32]):
428 |     # datasets
429 |     X_train, X_val, X_test, y_train, y_val, y_test = get_dataset(df)
430 |     
431 |     '''
432 |     一个epoch(代)是指整个数据集正向反向训练一次。它被用来提示模型的准确率并且不需要额外数据。
433 |     '''
434 |     
435 |     
436 |     train_batch_size = 32  ##batch-size 即 一次训练所抓取的数据样本数量，batch-size大小影响训练速度和模型优化，同样影响 epoch训练模型次数
437 |     train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=train_batch_size, shuffle=True) ## DataLoader本质上是一个iterable（跟python内置类型list等一样），并利用多进程；来加速batch data的处理
438 |                                                                                                           # TensorDataset 对tensor进行打包
439 | 
440 |     # models
441 |     num_inputs = X_train.shape[1]
442 |     
443 |     model = TemporalConvNet(num_inputs, num_channels)
444 |     optimizer = optim.Adam(model.parameters(), lr=1e-3)  ##使用Adam优化器，lr=学习率，默认为 1e-3
445 |     criterion = nn.BCELoss() ##计算目标值和预测值之间的二进制交叉损失函数  loss=-w*[p*log(q)+(1-p)*log(1-q))]
446 |     
447 |     # training
448 |     best_acc = 0.0
449 |     best_model = None
450 |     logs = {}
451 |     for epoch in trange(50):    ###trange(i) 是tqdm(range(i)) 迭代进度条
452 |         for x, y in train_loader:
453 |             y_pred = model(x)
454 |             y_pred = y_pred.view(-1)
455 |             loss = criterion(y_pred, y)
456 |             optimizer.zero_grad() ## 把模型参数梯度设为0
457 |             loss.backward()
458 |             optimizer.step()
459 |         y_train_pred = model(X_train)
460 |         acc_train = cal_acc(y_train_pred, y_train)
461 |         y_val_pred = model(X_val)
462 |         acc_val = cal_acc(y_val_pred, y_val)
463 |         if acc_val > best_acc:
464 |             best_acc = acc_val
465 |             best_model = deepcopy(model)
466 |         logs[epoch] = {"train_loss":loss.item(), "acc_train":acc_train.item(), "acc_val":acc_val.item()}
467 |     
468 |     # metrics on test dataset
469 |     pred_y_test = best_model(X_test)
470 |     acc_test = cal_acc(pred_y_test, y_test)
471 |     print(f"file: {file}, acc on test: {acc_test}")
472 |     df_log = pd.DataFrame(logs).T
473 |     return acc_test, df_log
474 |     
475 |     
476 |     
477 |     
478 | files = {
479 |     "sz002179_中航光电":"data/sz002179_中航光电_byd.csv",
480 |     "sh601601_中国太保":"data/sh601601_中国太保_byd.csv",
481 |     "sh600029_南方航空":"data/sh600029_南方航空_byd.csv",
482 |     "sh601808_中海油服":"data/sh601808_中海油服_byd.csv",
483 |     "sh601818_光大银行":"data/sh601818_光大银行_byd.csv",
484 |     "sh601377_兴业证券":"data/sh601377_兴业证券_byd.csv"
485 | }
486 | 
487 | 
488 | 
489 | 
490 | fig, axes = plt.subplots(2, 3, figsize=(12, 8))
491 | for i, (file, path) in enumerate(files.items()):
492 |     df = pd.read_csv(path)
493 |     dfti = get_Technical_Indicator(df)
494 | #     dfts = get_tsfeatures(df)
495 |     _, df_log = train(dfti, file)
496 |     df_log.plot(title=file, ax=axes.flatten()[i])
497 |     
498 |     
499 |     
500 |     
501 | fig, axes = plt.subplots(2, 3, figsize=(12, 8))
502 | for i, (file, path) in enumerate(files.items()):
503 |     df = pd.read_csv(path)
504 | #     dfti = get_Technical_Indicator(df)
505 |     dfts = get_tsfeatures(df)
506 |     _, df_log = train(dfts, file)
507 |     df_log.plot(title=file, ax=axes.flatten()[i])
508 |     
509 |     
510 |     
511 | fig, axes = plt.subplots(2, 3, figsize=(12, 8))
512 | for i, (file, path) in enumerate(files.items()):
513 |     df = pd.read_csv(path)
514 | #     dfti = get_Technical_Indicator(df)
515 |     dfts = get_tsfeatures(df, mode="Efficient")
516 |     _, df_log = train(dfts, file)
517 |     df_log.plot(title=file, ax=axes.flatten()[i])
518 |     
519 |     
520 |     
521 |     
522 | fig, axes = plt.subplots(2, 3, figsize=(12, 8))
523 | for i, (file, path) in enumerate(files.items()):
524 |     df = pd.read_csv(path)
525 |     dfti = get_Technical_Indicator(df)
526 |     dfts = get_tsfeatures(df)
527 |     redundant_cols = ['open', 'high', 'low', 'close', 'preclose', 'volume', 'amount']
528 |     sel_cols = [col for col in dfts.columns if col not in redundant_cols]
529 |     dff = pd.merge(dfti, dfts[sel_cols])
530 |     num_channels = [64, 32, 32, 32, 32]
531 |     _, df_log = train(dff, file, num_channels)
532 |     df_log.plot(title=file, ax=axes.flatten()[i])
533 |     
534 |     
535 |     
536 |     
537 |     
538 |     
539 |     
540 |     
541 |     
542 | fig, axes = plt.subplots(2, 3, figsize=(12, 8))
543 | for i, (file, path) in enumerate(files.items()):
544 |     df = pd.read_csv(path)
545 |     dfti = get_Technical_Indicator(df)
546 |     dfts = get_tsfeatures(df, mode="Efficient")
547 |     redundant_cols = ['open', 'high', 'low', 'close', 'preclose', 'volume', 'amount']
548 |     sel_cols = [col for col in dfts.columns if col not in redundant_cols]
549 |     dff = pd.merge(dfti, dfts[sel_cols])
550 |     _, df_log = train(dff, file)
551 |     df_log.plot(title=file, ax=axes.flatten()[i])
552 |     
553 |     
554 |     
555 |     
556 | files = os.listdir("data/")
557 | scores = {}
558 | 
559 | 
560 | 
561 | for file in files:
562 |     df = pd.read_csv("data/"+file)
563 |     dfti = get_Technical_Indicator(df)
564 |     dfts = get_tsfeatures(df)
565 |     redundant_cols = ['open', 'high', 'low', 'close', 'preclose', 'volume', 'amount']
566 |     sel_cols = [col for col in dfts.columns if col not in redundant_cols]
567 |     dff = pd.merge(dfti, dfts[sel_cols])
568 |     acc_test, _ = train(dff, file)
569 |     scores[file[:-8]] = acc_test.item()
570 |     
571 |     
572 | acc_scores = pd.Series(scores)
573 | acc_scores.describe()
574 | acc_scores[acc_scores>0.55].sort_values(ascending=False)
575 | acc_scores[acc_scores>0.55].sort_values(ascending=True).plot(kind='barh')
576 | acc_scores.hist(bins=30)
577 | sr[sr>0.55].plot(kind="barh")
578 | 


--------------------------------------------------------------------------------