├── test_re
    ├── sh000904.day
    ├── sh000905.day
    ├── sh204001.day
    ├── sh204003.day
    ├── sh501005.day
    ├── sh501007.day
    ├── sh603708.day
    ├── sh603709.day
    ├── sh688001.day
    ├── sh688003.day
    ├── sh688081.day
    ├── sh880001.day
    ├── sh880002.day
    ├── sh880003.day
    ├── sz000012.day
    ├── sz000014.day
    ├── sz000028.day
    ├── sz000028.lc5
    ├── sz150030.day
    ├── sz150031.day
    ├── sz200512.day
    ├── sz200521.day
    ├── sz300045.day
    ├── sz300046.day
    ├── sz399608.day
    ├── sz399612.day
    ├── sh6037072.day
    └── sh688002sw.day
├── test_talib
    ├── sh601330.lc5
    └── sh60133033.day
├── README.md
├── .gitignore
├── test_talib.py
├── 通达信数据文件结构.txt
├── test_unpack_data_tdx.py
├── stock_data_collect.py
├── stock_data_process.py
└── Bak.py


/test_re/sh000904.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh000904.day


--------------------------------------------------------------------------------
/test_re/sh000905.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh000905.day


--------------------------------------------------------------------------------
/test_re/sh204001.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh204001.day


--------------------------------------------------------------------------------
/test_re/sh204003.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh204003.day


--------------------------------------------------------------------------------
/test_re/sh501005.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh501005.day


--------------------------------------------------------------------------------
/test_re/sh501007.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh501007.day


--------------------------------------------------------------------------------
/test_re/sh603708.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh603708.day


--------------------------------------------------------------------------------
/test_re/sh603709.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh603709.day


--------------------------------------------------------------------------------
/test_re/sh688001.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh688001.day


--------------------------------------------------------------------------------
/test_re/sh688003.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh688003.day


--------------------------------------------------------------------------------
/test_re/sh688081.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh688081.day


--------------------------------------------------------------------------------
/test_re/sh880001.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh880001.day


--------------------------------------------------------------------------------
/test_re/sh880002.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh880002.day


--------------------------------------------------------------------------------
/test_re/sh880003.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh880003.day


--------------------------------------------------------------------------------
/test_re/sz000012.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz000012.day


--------------------------------------------------------------------------------
/test_re/sz000014.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz000014.day


--------------------------------------------------------------------------------
/test_re/sz000028.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz000028.day


--------------------------------------------------------------------------------
/test_re/sz000028.lc5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz000028.lc5


--------------------------------------------------------------------------------
/test_re/sz150030.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz150030.day


--------------------------------------------------------------------------------
/test_re/sz150031.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz150031.day


--------------------------------------------------------------------------------
/test_re/sz200512.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz200512.day


--------------------------------------------------------------------------------
/test_re/sz200521.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz200521.day


--------------------------------------------------------------------------------
/test_re/sz300045.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz300045.day


--------------------------------------------------------------------------------
/test_re/sz300046.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz300046.day


--------------------------------------------------------------------------------
/test_re/sz399608.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz399608.day


--------------------------------------------------------------------------------
/test_re/sz399612.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz399612.day


--------------------------------------------------------------------------------
/test_re/sh6037072.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh6037072.day


--------------------------------------------------------------------------------
/test_re/sh688002sw.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh688002sw.day


--------------------------------------------------------------------------------
/test_talib/sh601330.lc5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_talib/sh601330.lc5


--------------------------------------------------------------------------------
/test_talib/sh60133033.day:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_talib/sh60133033.day


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # gpfx
2 | 通达信股票离线数据分析，筛选
3 | 主要使用talib 金融数据库指标 测试 筛选符合指定要求的股票
4 | stock_data_collect.py 用于提取通达信股票离线数据转换成可以直接使用的数据
5 | stock_data_process.py 用于处理提取出来的数据，制定筛选条件
6 | 其他文件为测试文件
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # pycharm
 2 | /.idea/
 3 | 
 4 | # Python:
 5 | *.py[cod]
 6 | 
 7 | # Environments
 8 | .env
 9 | .venv
10 | env/
11 | venv/
12 | ENV/
13 | env.bak/
14 | venv.bak/


--------------------------------------------------------------------------------
/test_talib.py:
--------------------------------------------------------------------------------
 1 | import pretty_errors
 2 | import numpy
 3 | import talib
 4 | close = numpy.random.random(100)
 5 | 
 6 | # 均线系:
 7 | sma= talib.SMA(close, timeperiod=30) #simple moving average
 8 | # talib中MA与SMA是完全相同的函数, EMA是指数滑动平均, 常用于计算
 9 | u, m, l= talib.BBANDS(close, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)
10 | # 与手动运算的相同, 其上下界是当前段标准差的指定倍数
11 | dif, dem, histogram = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)
12 | 
13 | print(sma)
14 | print(u, m, l)
15 | print(dif, dem, histogram)
16 | 


--------------------------------------------------------------------------------
/通达信数据文件结构.txt:
--------------------------------------------------------------------------------
 1 | 日线文件中，每32字节（32bytes）为一天的记录。
 2 | 　　每4个字节为一项数据：
 3 | 　　第1项，交易日期
 4 | 　　第2项，开盘价*100
 5 | 　　第3项，最高价*100
 6 | 　　第4项，最低价*100
 7 | 　　第5项，收盘价*100
 8 | 　　第6项，成交金额
 9 | 　　第7项，成交量
10 | 　　第8项，未使用
11 | 
12 | .day文件的格式为：
13 | {                 C++             python          unpack 参数
14 | 日期：          unsigned int       integer             I
15 | 开盘价*100      unsigned int       integer             I
16 | 最高价*100      unsigned int       integer             I
17 | 最低价*100      unsigned int       integer             I
18 | 收盘价*100      unsigned int       integer             I
19 | 成交额            float             float              f
20 | 成交量          unsigned int       integer             I
21 | 保留值          unsigned int       integer             I
22 | }
23 | 
24 | 通达信5分钟线*.lc5文件和*.lc1文件
25 |     文件名即股票代码
26 |     每32个字节为一个5分钟数据，每字段内低字节在前 时间日期合成4字节
27 |     00 ~ 01 字节：日期，整型，设其值为num，则日期计算方法为：
28 |                   year=floor(num/2048)+2004;
29 |                   month=floor(mod(num,2048)/100);
30 |                   day=mod(mod(num,2048),100);
31 |     # 计算日期 floor() 返回数字的下舍整数 如 math.floor(100.72)  输出 100.0 相当于取整
32 |     ss = str(int(a[0] / 2048) + 2004) + '-' + str(int(a[0] % 2048 / 100)).zfill(2) + '-' + str(
33 |          a[0] % 2048 % 100).zfill(2), str(int(a[1] / 60)).zfill(2) + ':' + str(a[1] % 60).zfill(2) + ':00'
34 |     02 ~ 03 字节： 从0点开始至目前的分钟数，整型
35 | 
36 |     04 ~ 07 字节：开盘价，float型
37 |     08 ~ 11 字节：最高价，float型
38 |     12 ~ 15 字节：最低价，float型
39 |     16 ~ 19 字节：收盘价，float型
40 |     20 ~ 23 字节：成交额，float型
41 |     24 ~ 27 字节：成交量（股），整型
42 |     28 ~ 31 字节：（保留）
43 | 
44 | .lc5文件的格式为：
45 | {                 C++             python          unpack 参数
46 | 日期：          unsigned short     integer             H
47 | 时间：          unsigned short     integer             H
48 | 开盘价*100        float             float              f
49 | 最高价*100        float             float              f
50 | 最低价*100        float             float              f
51 | 收盘价*100        float             float              f
52 | 成交额            float             float              f
53 | 成交量          unsigned int       integer             I
54 | 保留值          unsigned int       integer             I
55 | }
56 | 


--------------------------------------------------------------------------------
/test_unpack_data_tdx.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pretty_errors
  3 | from struct import unpack
  4 | import re
  5 | 
  6 | # sh600074.day sh688012.day
  7 | # re_rule = r'^s[hz][603][08]\d{4}\.[dl][ac][y5]'
  8 | re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]'
  9 | 
 10 | re_rule_sh = r'^sh6[08]\d{4}\.[dl][ac][y5]'
 11 | re_rule_sz = r'^sz[03]0\d{4}\.[dl][ac][y5]'
 12 | 
 13 | file_rule = re.compile(re_rule)
 14 | file_path = r"D:\通达信\new_tdx\vipdoc\sh\lday"
 15 | 
 16 | stock_day = {
 17 |     "stock_date": [],
 18 |     "stock_open": [],
 19 |     "stock_high": [],
 20 |     "stock_low": [],
 21 |     "stock_close": [],
 22 |     "stock_amount": [],
 23 |     "stock_vol": [],
 24 |     "stock_reservation": []
 25 | }
 26 | # 数据共用结构 参照数据结构文件说明
 27 | data_structure = {
 28 |     "stock_date": [],
 29 |     "stock_open": [],
 30 |     "stock_high": [],
 31 |     "stock_low": [],
 32 |     "stock_close": [],
 33 |     "stock_amount": [],
 34 |     "stock_vol": [],
 35 |     "stock_reservation": []
 36 | }
 37 | data_size = 32
 38 | analysis_cycle = 5
 39 | 
 40 | 
 41 | def open_file_directory(_file_directory):
 42 |     # print(tuple(os.walk(_file_directory)))
 43 |     root, dirs, files = tuple(os.walk(_file_directory))[0]
 44 |     print(os.path.splitext(files[0]))
 45 |     # for root, dirs, files in os.walk(_file_directory):
 46 |     # print(root)
 47 |     # print(dirs)
 48 |     # print(files)
 49 | 
 50 | 
 51 | def unpack_stock_day(file_url):
 52 |     with open(file_url, r'rb') as f:
 53 |         f.seek(-data_size * analysis_cycle, 2)
 54 |         for i in range(0, analysis_cycle):
 55 |             buff = f.read(32)
 56 |             _date, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"IIIIIfII", buff)
 57 |             # print(_date)
 58 |             # print(_open/100)
 59 |             stock_day['stock_date'].append(_date)  # 4字节 如20091229
 60 |             stock_day['stock_open'].append(_open / 100)  # 开盘价*100
 61 |             stock_day['stock_high'].append(_high / 100)  # 最高价*100
 62 |             stock_day['stock_low'].append(_low / 100)  # 最低价*100
 63 |             stock_day['stock_close'].append(_close / 100)  # 收盘价*100
 64 |             stock_day['stock_amount'].append(_amount)  # 成交额
 65 |             stock_day['stock_vol'].append(_vol)  # 成交量
 66 |             stock_day['stock_reservation'].append(_reservation)  # 保留值
 67 |         # while f:
 68 |         #     stock_date = f.read(4)
 69 |         #     stock_open = f.read(4)
 70 |         #     stock_high = f.read(4)
 71 |         #     stock_low = f.read(4)
 72 |         #     stock_close = f.read(4)
 73 |         #     stock_amount = f.read(4)
 74 |         #     stock_vol = f.read(4)
 75 |         #     stock_reservation = f.read(4)
 76 |         #
 77 |         #     if not stock_date or i >= 5:
 78 |         #         break
 79 |         #     stock_day['stock_date'].append(unpack("I", stock_date)[0])  # 4字节 如20091229
 80 |         #     stock_day['stock_open'].append(unpack("I", stock_open)[0] / 100)  # 开盘价*100
 81 |         #     stock_day['stock_high'].append(unpack("I", stock_high)[0] / 100)  # 最高价*100
 82 |         #     stock_day['stock_low'].append(unpack("I", stock_low)[0] / 100)  # 最低价*100
 83 |         #     stock_day['stock_close'].append(unpack("I", stock_close)[0] / 100)  # 收盘价*100
 84 |         #     stock_day['stock_amount'].append(unpack("f", stock_amount))  # 成交额
 85 |         #     stock_day['stock_vol'].append(unpack("I", stock_vol))  # 成交量
 86 |         #     stock_day['stock_reservation'].append(unpack("I", stock_reservation))  # 保留值
 87 |         #     i = i + 1
 88 | 
 89 | 
 90 | def unpack_stock_lc5(file_url, _data_size=32, _analysis_cycle=5):
 91 |     with open(file_url, r'rb') as f:
 92 |         f.seek(-_data_size * _analysis_cycle, 2)
 93 |         for i in range(0, _analysis_cycle):
 94 |             buff = f.read(32)
 95 |             a0, a1, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"HHfffffII", buff)
 96 |             # print(_date)
 97 |             # print(_open/100)
 98 |             # a = unpack("HH", stock_date)
 99 |             _date = str(int(a0 / 2048) + 2004) + '-' + str(int(a0 % 2048 / 100)).zfill(2) + '-' + str(
100 |                 a0 % 2048 % 100).zfill(2), str(int(a1 / 60)).zfill(2) + ':' + str(a1 % 60).zfill(
101 |                 2) + ':00'
102 |             stock_day['stock_date'].append(_date)  #
103 |             stock_day['stock_open'].append(round(_open, 2))  # 开盘价
104 |             stock_day['stock_high'].append(round(_high, 2))  # 最高价
105 |             stock_day['stock_low'].append(round(_low, 2))  # 最低价
106 |             stock_day['stock_close'].append(round(_close, 2))  # 收盘价
107 |             stock_day['stock_amount'].append(_amount)  # 成交额
108 |             stock_day['stock_vol'].append(_vol)  # 成交量
109 |             stock_day['stock_reservation'].append(_reservation)  # 保留值
110 | 
111 | 
112 | def my_walk(_file_directory):
113 |     root, dirs, files = tuple(os.walk(_file_directory))[0]
114 |     file_set = []
115 |     for file in files:
116 |         if file_rule.match(file):
117 |             file_set.append(os.path.join(_file_directory, file))
118 |     for file_url in file_set:
119 |         print(file_url)
120 |         file_size = os.path.getsize(file_url)
121 |         print(file_size)
122 |         with open(file_url, r'rb') as fp:
123 |             if file_size >= data_size * analysis_cycle:
124 |                 fp.seek(-data_size * analysis_cycle, os.SEEK_END)
125 |                 print('大于')
126 |             else:
127 |                 fp.seek(0, os.SEEK_SET)
128 |                 print('小于size')
129 | 
130 |     print(file_set)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     # file_path_day_sh = r"D:\通达信\new_tdx\vipdoc\sh\lday"
135 |     file_path_day_sh = r"F:\PycharmProjects\gpfx\test_re"
136 |     # my_walk(file_path_day_sh)
137 |     file_url = r"F:\PycharmProjects\gpfx\test_re\sh000904.day"
138 |     # name, ext = os.path.splitext(file_url)
139 |     name2 = os.path.basename(file_url)
140 |     name, ext = os.path.splitext(name2)
141 |     print(name)
142 |     print('ext:', ext)
143 |     print(name2)
144 |     # print(re_rule)
145 |     # open_file_directory(file_path)
146 |     # unpack_stock_day(r"F:\PycharmProjects\gpfx\sz000028.day")
147 |     # unpack_stock_lc5(r"F:\PycharmProjects\gpfx\sz000028.lc5")
148 |     # print(stock_day['stock_date'])
149 |     # print(stock_day['stock_open'])
150 |     # print(stock_day['stock_high'])
151 |     # print(stock_day['stock_low'])
152 |     # print(stock_day['stock_close'])
153 |     # print(stock_day['stock_amount'])
154 | 


--------------------------------------------------------------------------------
/stock_data_collect.py:
--------------------------------------------------------------------------------
  1 | import pretty_errors
  2 | import os
  3 | from struct import unpack
  4 | import re
  5 | 
  6 | # 个股的匹配规则
  7 | re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]'
  8 | stock_file_rule = re.compile(re_rule)
  9 | # 板块指数的匹配规则
 10 | # todo: 修改板块指数的匹配模式
 11 | block_re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]'
 12 | block_rule = re.compile(block_re_rule)
 13 | 
 14 | # 数据共用结构 参照数据结构文件说明
 15 | data_structure = {
 16 |     "stock_date": [],
 17 |     "stock_open": [],
 18 |     "stock_high": [],
 19 |     "stock_low": [],
 20 |     "stock_close": [],
 21 |     "stock_amount": [],
 22 |     "stock_vol": [],
 23 |     "stock_reservation": [],
 24 |     "stock_num": []  # 增加一个股票代码的参数，方便满足条件时记录股票编码
 25 | }
 26 | data_ext = {'day': r".day", 'lc5': r".lc5"}
 27 | # 一个大的列表 储存stock_day stock_lc5 一个表 包含所有股票，股票的子表包含数据
 28 | # 每32字节（32bytes）为一组记录
 29 | data_size = 32
 30 | # 默认分析周期
 31 | analysis_cycle = 500
 32 | # 文件目录文件安装路径需自行修改
 33 | file_path_set = {
 34 |     'sh_day': r"D:\通达信\new_tdx\vipdoc\sh\lday",
 35 |     'sh_lc5': r"D:\通达信\new_tdx\vipdoc\sh\fzline",
 36 |     'sz_day': r"D:\通达信\new_tdx\vipdoc\sz\lday",
 37 |     'sz_lc5': r"D:\通达信\new_tdx\vipdoc\sz\fzline"
 38 | }
 39 | 
 40 | 
 41 | # 取得当前文件夹所有文件的列表
 42 | def walk_directory(_file_directory):
 43 |     """
 44 |     遍历文件夹中的文件，取得文件列表
 45 |     :param _file_directory: 文件夹
 46 |     :return: 返回文件夹中文件列表
 47 |     """
 48 |     # 取得文件夹里面的所有子文件
 49 |     root, dirs, files = tuple(os.walk(_file_directory))[0]
 50 |     # 取得判断扩展名
 51 |     # name, ext = os.path.splitext(files[0])
 52 | 
 53 |     # 生成文件列表
 54 |     _file_urls = []
 55 |     for file in files:
 56 |         # re 匹配文件
 57 |         if stock_file_rule.match(file):
 58 |             _file_urls.append(os.path.join(_file_directory, file))
 59 |     # print(file_urls)
 60 |     return _file_urls
 61 | 
 62 | 
 63 | # 直接把数据填充在data_structure中,而并不以返回值的方式传递
 64 | def unpack_stock_data(_file_url, _data_size=32, _analysis_cycle=1):
 65 |     """
 66 |     解析本地数据 .day .lc5 文件 最新数据需要自己手动从通达信软件中离线数据中下载
 67 |     :param _file_url: 数据文件
 68 |     :param _data_size: 数据结构大小 默认 32
 69 |     :param _analysis_cycle: 需要分析的数据周期 默认 1 只能是正整数(int)
 70 |     :return: 直接把数据填充在data_structure中,而并不以返回值的方式传递
 71 |     """
 72 |     name, ext = os.path.splitext(os.path.basename(_file_url))
 73 |     for key in data_structure.keys():
 74 |         data_structure[key] = []
 75 |     # data_copy = {}
 76 |     # 得到解析文件的大小
 77 |     file_size = os.path.getsize(_file_url)
 78 | 
 79 |     if data_ext['day'] == ext:
 80 |         # file_size = os.path.getsize(_file_url)
 81 |         with open(_file_url, r'rb') as fp:
 82 |             if file_size > _data_size * _analysis_cycle:
 83 |                 fp.seek(-_data_size * _analysis_cycle, os.SEEK_END)
 84 |                 range_cycle = _analysis_cycle
 85 |                 # print('大于')
 86 |             else:
 87 |                 fp.seek(0, os.SEEK_SET)
 88 |                 range_cycle = int(file_size / _data_size)
 89 |                 # print('小于size')
 90 | 
 91 |             # 提取数据
 92 |             for i in range(0, range_cycle):
 93 |                 buff = fp.read(32)
 94 |                 _date, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"IIIIIfII", buff)
 95 |                 # print(_date)
 96 |                 # print(_open/100)
 97 |                 data_structure['stock_date'].append(_date)  # 4字节 如20091229
 98 |                 data_structure['stock_open'].append(_open / 100)  # 开盘价*100
 99 |                 data_structure['stock_high'].append(_high / 100)  # 最高价*100
100 |                 data_structure['stock_low'].append(_low / 100)  # 最低价*100
101 |                 data_structure['stock_close'].append(_close / 100)  # 收盘价*100
102 |                 data_structure['stock_amount'].append(_amount)  # 成交额
103 |                 data_structure['stock_vol'].append(_vol)  # 成交量
104 |                 data_structure['stock_reservation'].append(_reservation)  # 保留值
105 |                 data_structure['stock_num'].append(name)  # 股票代码
106 | 
107 |     # 按天算 5 分钟数据 天数*48(48 是一天的5分钟周期总和)
108 |     elif data_ext['lc5'] == ext:
109 |         # print('lc5')
110 |         # 打开读取文件 设置文件指针 设置读取文件的循环周期
111 |         # 48个周期为一天4小时的5分钟数据
112 |         _analysis_cycle = _analysis_cycle * 48
113 |         with open(_file_url, r'rb') as fp:
114 |             if file_size > _data_size * _analysis_cycle:
115 |                 fp.seek(-_data_size * _analysis_cycle, os.SEEK_END)
116 |                 range_cycle = _analysis_cycle
117 |                 # print('大于')
118 |             else:
119 |                 fp.seek(0, os.SEEK_SET)
120 |                 range_cycle = int(file_size / _data_size)
121 |                 # print('小于size')
122 | 
123 |             for i in range(0, range_cycle):
124 |                 buff = fp.read(32)
125 |                 a0, a1, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"HHfffffII", buff)
126 | 
127 |                 _date = str(int(a0 / 2048) + 2004) + '-' + str(int(a0 % 2048 / 100)).zfill(2) + '-' + str(
128 |                     a0 % 2048 % 100).zfill(2), str(int(a1 / 60)).zfill(2) + ':' + str(a1 % 60).zfill(
129 |                     2) + ':00'
130 |                 data_structure['stock_date'].append(_date)  # (日期, 时间)
131 |                 data_structure['stock_open'].append(round(_open, 2))  # 开盘价
132 |                 data_structure['stock_high'].append(round(_high, 2))  # 最高价
133 |                 data_structure['stock_low'].append(round(_low, 2))  # 最低价
134 |                 data_structure['stock_close'].append(round(_close, 2))  # 收盘价
135 |                 data_structure['stock_amount'].append(round(_amount, 2))  # 成交额
136 |                 data_structure['stock_vol'].append(_vol)  # 成交量
137 |                 data_structure['stock_reservation'].append(_reservation)  # 保留值
138 |                 data_structure['stock_num'].append(name)
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     file_path_set['sh_day'] = r'F:\PycharmProjects\gpfx\test_talib'
143 |     file_urls = walk_directory(file_path_set['sh_day'])
144 | 
145 |     for file_url in file_urls:
146 |         unpack_stock_data(file_url, _analysis_cycle=50)
147 |         print(data_structure)
148 | 
149 |     # unpack_stock_day(r"F:\PycharmProjects\gpfx\sz000028.day")
150 |     # unpack_stock_lc5(r"F:\PycharmProjects\gpfx\sz000028.lc5")
151 |     # print(stock_lc5['stock_date'])
152 |     # print(stock_lc5['stock_open'])
153 |     # print(stock_day['stock_date'])
154 |     # print(stock_day['stock_open'])
155 |     # print(stock_day['stock_high'])
156 |     # print(stock_day['stock_low'])
157 |     # print(stock_day['stock_close'])
158 |     # print(stock_day['stock_amount'])
159 |     # print(stock_day['stock_vol'])
160 |     # print(stock_day['stock_reservation'])
161 | 


--------------------------------------------------------------------------------
/stock_data_process.py:
--------------------------------------------------------------------------------
  1 | # import pretty_errors
  2 | import numpy
  3 | # talib 金融分析库
  4 | import talib
  5 | from stock_data_collect import *
  6 | 
  7 | # 思路：用指标一层一层筛选，先用macd选择大范围
  8 | 
  9 | # 通达信 macd 指标
 10 | # macd筛选策略
 11 | set_macd_30 = set()
 12 | set_macd_60 = set()
 13 | set_macd_day = set()
 14 | 
 15 | 
 16 | def stock_filter_macd(_data_structure, end):
 17 |     """
 18 |     :param _data_structure: 数据的结构
 19 |     :param end: end 代表离最后周期的参数，筛选最近的
 20 |     :return: 符合条件的数据集合
 21 |     """
 22 |     # MACD = 2 * histogram 周期参数(12,26,9)
 23 |     dif, dea, histogram = talib.MACD(numpy.array(_data_structure['stock_close']), fastperiod=12, slowperiod=26,
 24 |                                      signalperiod=9)
 25 |     # 周期参数(6, 13, 9)
 26 |     dif2, dea2, histogram2 = talib.MACD(numpy.array(_data_structure['stock_close']), fastperiod=6, slowperiod=13,
 27 |                                         signalperiod=4)
 28 |     # dif, dea, macd = talib.MACDEXT(numpy.array(_data_structure['stock_close']), fastperiod=12, fastmatype=0,
 29 |     #                                slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0)
 30 |     # dif, dea, macd = talib.MACDEXT(numpy.array(_data_structure['stock_close']), signalperiod=9)
 31 |     # 1、金叉, 二次金叉
 32 |     # 记录金叉时间
 33 |     dif_cross_dea = []
 34 |     # cross_resonance 周期共振
 35 |     cross_resonance = []
 36 |     # 背离位置记录
 37 |     deviate_from_macd = []
 38 |     deviate_from_macd2 = []
 39 |     tmp = set()
 40 |     for i in range(len(dif)):
 41 |         if dif[i - 1] <= dea[i - 1] and dif[i] > dea[i]:
 42 |             pass
 43 |             # dif_cross_dea.append((_data_structure['stock_date'], i))  # 记录金叉的时间跟位置元组 (时间，位置)
 44 |             # print('金叉')
 45 |         # 2、背离
 46 |         # for i in range(len(histogram)):
 47 |         elif histogram[i] >= histogram[i - 1] and \
 48 |                 _data_structure['stock_close'][i] <= _data_structure['stock_close'][i - 1]:
 49 |             pass
 50 |             # deviate_from_macd.append((_data_structure['stock_date'], i))  # 记录背离的时间跟位置元组 (时间，位置)
 51 |             # print('macd 背离')
 52 |         # 3、共振
 53 |         # 短周期dea2走平上拐，长周期dif周平上拐，买点
 54 |         elif dea2[i - 1] <= dea2[i] and dif[i - 1] <= dif[i] and len(dif) <= i + end:
 55 |             # cross_resonance.append((_data_structure['stock_date'], i))  # 记录共振的时间跟位置元组 (时间，位置)
 56 |             tmp.add(_data_structure['stock_num'])
 57 | 
 58 |     return tmp
 59 | 
 60 | 
 61 | # 通达信 trix指标
 62 | set_trix_30 = set()
 63 | set_trix_60 = set()
 64 | set_trix_day = set()
 65 | 
 66 | 
 67 | def stock_filter_trix(_data_structure, end):
 68 |     """
 69 |     :param _data_structure: 数据的结构
 70 |     :param end: end 代表离最后周期的参数，筛选最近的
 71 |     :return: 符合条件的数据集合
 72 |     """
 73 |     trix = talib.TRIX(numpy.array(_data_structure['stock_close']), timeperiod=12)
 74 |     matrix = talib.MA(trix, timeperiod=9, matype=0)
 75 |     # 记录 trix 走平 金叉位置
 76 |     tmp = set()
 77 |     for i in range(len(trix)):
 78 |         if trix[i - 1] <= trix[i] and len(trix) <= i + end:
 79 |             tmp.add(_data_structure['stock_num'])
 80 | 
 81 |     # return tmp
 82 | 
 83 |     tmp1 = set()
 84 |     for i in range(len(matrix)):
 85 |         if matrix[i - 1] <= matrix[i] and len(matrix) <= i + end:
 86 |             tmp1.add(_data_structure['stock_num'])
 87 | 
 88 |     return tmp1
 89 |     # print(trix)
 90 |     # print(matrix)
 91 | 
 92 | 
 93 | # 通达信 marsi 指标
 94 | set_marsi_30 = set()
 95 | set_marsi_60 = set()
 96 | set_marsi_day = set()
 97 | 
 98 | 
 99 | # 筛选marsi10 最后end 周期内拐头向上的
100 | def stock_filter_marsi(_data_structure, end):
101 |     # rsi6 = talib.RSI(numpy.array(_data_structure['stock_close']), timeperiod=6)
102 |     # marsi6 = talib.MA(rsi6, timeperiod=6, matype=0)
103 |     rsi10 = talib.RSI(numpy.array(_data_structure['stock_close']), timeperiod=10)
104 |     marsi10 = talib.MA(rsi10, timeperiod=10, matype=0)
105 |     tmp = set()
106 |     for i in range(len(marsi10)):
107 |         if marsi10[i - 1] <= marsi10[i] and len(marsi10) <= i + end:
108 |             tmp.add(_data_structure['stock_num'])
109 | 
110 |     return tmp
111 |     # print(rsi10)
112 |     # print(marsi6)
113 |     # print(marsi10)
114 | 
115 | 
116 | def stock_filter_cci(_data_structure):
117 |     pass
118 | 
119 | 
120 | # def stock_filter_fsl(_data_structure):
121 | #     pass
122 | 
123 | def stock_filter_average(_data_structure):
124 |     ma10 = talib.MA(numpy.array(_data_structure['stock_close']), timeperiod=10)
125 |     ma20 = talib.MA(numpy.array(_data_structure['stock_close']), timeperiod=20)
126 |     ma30 = talib.MA(numpy.array(_data_structure['stock_close']), timeperiod=30)
127 | 
128 | 
129 | # 用于数据转换的存储，而不修改原始的数据
130 | data_copy = {}
131 | 
132 | 
133 | # 30 & 60分钟数据转换 日线也可以(但要求数据是_timeperiod的倍数)
134 | def cycle_transform(_data_structure, _timeperiod=6):
135 |     """
136 |     :param _data_structure: 数据共用结构
137 |     :param _timeperiod: 30分钟数据是6个周期(默认分析30分钟数据), 60分钟数据是12个周期
138 |     :return: 组合好的data_structure 数据共用结构
139 |     """
140 |     # 30 分钟数据是 6 个周期
141 |     # 60 分钟数据是 12 个周期
142 |     # 周线数据是日线数据的5个周期
143 |     # i = 0
144 |     # range_cycle = 0
145 |     il = len(_data_structure['stock_date'])
146 |     ir = il % _timeperiod
147 |     # 构造循环的周期数
148 |     if ir == 0:
149 |         range_cycle = il // _timeperiod
150 |     else:
151 |         range_cycle = (il - ir) // _timeperiod
152 |     # 清空原本的数据
153 |     # data_structure.clear()
154 |     # data_copy = data_structure.copy()
155 |     for key in data_copy.keys():
156 |         data_copy[key] = []
157 |     # data_copy = {}
158 |     # 组合数据
159 |     for i in range(range_cycle):
160 |         # 数据起步基数 数据取段为[base:base+_timeperiod]
161 |         base = ir + i * _timeperiod
162 |         # 30分钟数据
163 |         # if _timeperiod == 6:
164 |         # print(_data_structure['stock_date'])
165 |         data_copy['stock_date'].append(_data_structure['stock_date'][base:base + _timeperiod][-1])
166 |         data_copy['stock_open'].append(_data_structure['stock_open'][base:base + _timeperiod][0])
167 |         data_copy['stock_close'].append(_data_structure['stock_close'][base:base + _timeperiod][-1])
168 |         data_copy['stock_high'].append(max(_data_structure['stock_high'][base:base + _timeperiod]))
169 |         data_copy['stock_low'].append(min(_data_structure['stock_low'][base:base + _timeperiod]))
170 |         data_copy['stock_amount'].append(sum(_data_structure['stock_amount'][base:base + _timeperiod]))
171 |         data_copy['stock_vol'].append(sum(_data_structure['stock_vol'][base:base + _timeperiod]))
172 |         # data_structure['stock_reservation'] = _data_structure['stock_reservation'][base::_timeperiod]
173 |         data_copy['stock_num'] = _data_structure['stock_num']
174 |     return data_copy
175 | 
176 | 
177 | if __name__ == '__main__':
178 |     # file_path_set['sh_day'] = r'F:\PycharmProjects\gpfx\test_talib'
179 |     # unpack_stock_data(file_path_set['sh_day'], _analysis_cycle=150)
180 | 
181 |     # print(data_structure['stock_close'])
182 |     # stock_filter_trix(data_structure)
183 |     # stock_filter_marsi(data_structure)
184 | 
185 |     file_path_set['sh_lc5'] = r'F:\PycharmProjects\gpfx\test_talib'
186 |     file_urls = walk_directory(file_path_set['sh_lc5'])
187 | 
188 |     for file_url in file_urls:
189 |         unpack_stock_data(file_url, _analysis_cycle=2)
190 |         # 转换数据周期
191 |         date_transform_30 = cycle_transform(data_structure, 6)
192 |         # print(date_transform_30['stock_date'])
193 |         # print(date_transform_30['stock_open'])
194 |         date_transform_60 = cycle_transform(data_structure, 12)
195 |         # print(date_transform_60['stock_date'])
196 |         # print(date_transform_60['stock_open'])
197 | 


--------------------------------------------------------------------------------
/Bak.py:
--------------------------------------------------------------------------------
  1 | import pretty_errors
  2 | import os
  3 | from struct import unpack
  4 | import re
  5 | 
  6 | # talib 金融分析库
  7 | # import talib
  8 | # import numpy
  9 | # import matplotlib.pyplot as plt
 10 | # import matplotlib as mpl
 11 | 
 12 | # 使用ggplot样式，好看些
 13 | # mpl.style.use("ggplot")
 14 | 
 15 | # 个股的匹配规则
 16 | re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]'
 17 | stock_file_rule = re.compile(re_rule)
 18 | # 板块指数的匹配规则
 19 | # todo: 修改板块指数的匹配模式
 20 | block_re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]'
 21 | block_rule = re.compile(block_re_rule)
 22 | 
 23 | # 数据共用结构 参照数据结构文件说明
 24 | data_structure = {
 25 |     "stock_date": [],
 26 |     "stock_open": [],
 27 |     "stock_high": [],
 28 |     "stock_low": [],
 29 |     "stock_close": [],
 30 |     "stock_amount": [],
 31 |     "stock_vol": [],
 32 |     "stock_reservation": [],
 33 |     "stock_num": []  # 增加一个股票代码的参数，方便满足条件时记录股票编码
 34 | }
 35 | data_ext = {'day': r".day", 'lc5': r".lc5"}
 36 | # 一个大的列表 储存stock_day stock_lc5 一个表 包含所有股票，股票的子表包含数据
 37 | # 每32字节（32bytes）为一组记录
 38 | data_size = 32
 39 | # 默认分析周期
 40 | analysis_cycle = 500
 41 | # 文件目录
 42 | file_path_set = {
 43 |     'sh_day': r"D:\通达信\new_tdx\vipdoc\sh\lday",
 44 |     'sh_lc5': r"D:\通达信\new_tdx\vipdoc\sh\fzline",
 45 |     'sz_day': r"D:\通达信\new_tdx\vipdoc\sz\lday",
 46 |     'sz_lc5': r"D:\通达信\new_tdx\vipdoc\sz\fzline"
 47 | }
 48 | 
 49 | 
 50 | def walk_directory(_file_directory):
 51 |     # 取得文件夹里面的所有子文件
 52 |     root, dirs, files = tuple(os.walk(_file_directory))[0]
 53 |     # 取得判断扩展名
 54 |     # name, ext = os.path.splitext(files[0])
 55 | 
 56 |     # 生成文件列表
 57 |     file_urls = []
 58 |     for file in files:
 59 |         # re 匹配文件
 60 |         if stock_file_rule.match(file):
 61 |             file_urls.append(os.path.join(_file_directory, file))
 62 |     # print(file_urls)
 63 |     return file_urls
 64 | 
 65 | 
 66 | def unpack_stock_data(_file_url, _data_size=32, _analysis_cycle=1):
 67 |     """
 68 |     # 解析本地数据 .day .lc5 文件
 69 |     :param _file_url: 数据文件
 70 |     :param _data_size: 数据结构大小 默认 32
 71 |     :param _analysis_cycle: 需要分析的数据周期 默认 1 只能是正整数(int)
 72 |     :return:
 73 |     """
 74 | 
 75 |     # # 取得文件夹里面的所有子文件
 76 |     # root, dirs, files = tuple(os.walk(_file_directory))[0]
 77 |     # 取得判断扩展名
 78 |     # name, ext = os.path.splitext(files[0])
 79 |     # name, ext = os.path.splitext(_file_urls)
 80 |     #
 81 |     # # 生成文件列表
 82 |     # file_urls = []
 83 |     # for file in files:
 84 |     #     # re 匹配文件
 85 |     #     if stock_file_rule.match(file):
 86 |     #         file_urls.append(os.path.join(_file_directory, file))
 87 |     # # print(file_urls)
 88 | 
 89 |     name, ext = os.path.splitext(os.path.basename(_file_url))
 90 |     for key in data_structure.keys():
 91 |         data_structure[key] = []
 92 | 
 93 |     if data_ext['day'] == ext:
 94 |         # 遍历文件
 95 |         # for file_url in _file_url:
 96 |             # 取得判断扩展名
 97 |             # name2 = os.path.basename(file_url)
 98 |             # name, ext = os.path.splitext(os.path.basename(file_url))
 99 |             # name, ext = os.path.splitext(file_url)
100 | 
101 |             file_size = os.path.getsize(_file_url)
102 |             # print(file_url)
103 |             # 清空原本的数据
104 |             # data_structure.clear()
105 |             # for key in data_structure.keys():
106 |             #     data_structure[key] = []
107 |             # 打开读取文件 设置文件指针 设置读取文件的循环周期
108 |             # 解决文件根本不够分析周期的问题
109 |             # 获取文件内容大小，区分读取方式
110 |             # if data_ext['day'] == ext:
111 |             #     pass
112 |             with open(_file_url, r'rb') as fp:
113 |                 if file_size > _data_size * _analysis_cycle:
114 |                     fp.seek(-_data_size * _analysis_cycle, os.SEEK_END)
115 |                     range_cycle = _analysis_cycle
116 |                     # print('大于')
117 |                 else:
118 |                     fp.seek(0, os.SEEK_SET)
119 |                     range_cycle = int(file_size / _data_size)
120 |                     # print('小于size')
121 | 
122 |                 # 提取数据
123 |                 for i in range(0, range_cycle):
124 |                     buff = fp.read(32)
125 |                     _date, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"IIIIIfII", buff)
126 |                     # print(_date)
127 |                     # print(_open/100)
128 |                     data_structure['stock_date'].append(_date)  # 4字节 如20091229
129 |                     data_structure['stock_open'].append(_open / 100)  # 开盘价*100
130 |                     data_structure['stock_high'].append(_high / 100)  # 最高价*100
131 |                     data_structure['stock_low'].append(_low / 100)  # 最低价*100
132 |                     data_structure['stock_close'].append(_close / 100)  # 收盘价*100
133 |                     data_structure['stock_amount'].append(_amount)  # 成交额
134 |                     data_structure['stock_vol'].append(_vol)  # 成交量
135 |                     data_structure['stock_reservation'].append(_reservation)  # 保留值
136 |                     data_structure['stock_num'].append(name)  # 股票代码
137 | 
138 |             # stock_ta_lib(data_structure)
139 |             # fp.close()
140 |     # 按天算 5 分钟数据 天数*48(48 是一天的5分钟周期总和)
141 |     elif data_ext['lc5'] == ext:
142 |         # print('lc5')
143 |         for file_url in _file_url:
144 |             # 取得判断扩展名
145 |             # name2 = os.path.basename(file_url)
146 |             # name, ext = os.path.splitext(os.path.basename(file_url))
147 |             # name, ext = os.path.splitext(file_url)
148 |             file_size = os.path.getsize(file_url)
149 |             # print(file_url)
150 |             # 清空原本的数据
151 |             # data_structure.clear()
152 |             for key in data_structure.keys():
153 |                 data_structure[key] = []
154 | 
155 |             # 打开读取文件 设置文件指针 设置读取文件的循环周期
156 |             # 48个周期为一天4小时的5分钟数据
157 |             _analysis_cycle = _analysis_cycle * 48
158 |             with open(file_url, r'rb') as fp:
159 |                 if file_size > _data_size * _analysis_cycle:
160 |                     fp.seek(-_data_size * _analysis_cycle, os.SEEK_END)
161 |                     range_cycle = _analysis_cycle
162 |                     # print('大于')
163 |                 else:
164 |                     fp.seek(0, os.SEEK_SET)
165 |                     range_cycle = int(file_size / _data_size)
166 |                     # print('小于size')
167 |                 for i in range(0, range_cycle):
168 |                     buff = fp.read(32)
169 |                     a0, a1, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"HHfffffII", buff)
170 |                     # print(_date)
171 |                     # print(_open/100)
172 |                     # a = unpack("HH", stock_date)
173 |                     _date = str(int(a0 / 2048) + 2004) + '-' + str(int(a0 % 2048 / 100)).zfill(2) + '-' + str(
174 |                         a0 % 2048 % 100).zfill(2), str(int(a1 / 60)).zfill(2) + ':' + str(a1 % 60).zfill(
175 |                         2) + ':00'
176 |                     data_structure['stock_date'].append(_date)  # (日期, 时间)
177 |                     data_structure['stock_open'].append(round(_open, 2))  # 开盘价
178 |                     data_structure['stock_high'].append(round(_high, 2))  # 最高价
179 |                     data_structure['stock_low'].append(round(_low, 2))  # 最低价
180 |                     data_structure['stock_close'].append(round(_close, 2))  # 收盘价
181 |                     data_structure['stock_amount'].append(round(_amount, 2))  # 成交额
182 |                     data_structure['stock_vol'].append(_vol)  # 成交量
183 |                     data_structure['stock_reservation'].append(_reservation)  # 保留值
184 |                     data_structure['stock_num'].append(name)
185 | 
186 | 
187 | if __name__ == "__main__":
188 |     file_path_set['sh_day'] = r'F:\PycharmProjects\gpfx\test_talib'
189 |     unpack_stock_data(file_path_set['sh_day'], _analysis_cycle=50)
190 |     print(data_structure)
191 |     pass
192 |     # unpack_stock_day(r"F:\PycharmProjects\gpfx\sz000028.day")
193 |     # unpack_stock_lc5(r"F:\PycharmProjects\gpfx\sz000028.lc5")
194 |     # print(stock_lc5['stock_date'])
195 |     # print(stock_lc5['stock_open'])
196 |     # print(stock_day['stock_date'])
197 |     # print(stock_day['stock_open'])
198 |     # print(stock_day['stock_high'])
199 |     # print(stock_day['stock_low'])
200 |     # print(stock_day['stock_close'])
201 |     # print(stock_day['stock_amount'])
202 |     # print(stock_day['stock_vol'])
203 |     # print(stock_day['stock_reservation'])
204 | 


--------------------------------------------------------------------------------