├── test_re ├── sh000904.day ├── sh000905.day ├── sh204001.day ├── sh204003.day ├── sh501005.day ├── sh501007.day ├── sh603708.day ├── sh603709.day ├── sh688001.day ├── sh688003.day ├── sh688081.day ├── sh880001.day ├── sh880002.day ├── sh880003.day ├── sz000012.day ├── sz000014.day ├── sz000028.day ├── sz000028.lc5 ├── sz150030.day ├── sz150031.day ├── sz200512.day ├── sz200521.day ├── sz300045.day ├── sz300046.day ├── sz399608.day ├── sz399612.day ├── sh6037072.day └── sh688002sw.day ├── test_talib ├── sh601330.lc5 └── sh60133033.day ├── README.md ├── .gitignore ├── test_talib.py ├── 通达信数据文件结构.txt ├── test_unpack_data_tdx.py ├── stock_data_collect.py ├── stock_data_process.py └── Bak.py /test_re/sh000904.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh000904.day -------------------------------------------------------------------------------- /test_re/sh000905.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh000905.day -------------------------------------------------------------------------------- /test_re/sh204001.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh204001.day -------------------------------------------------------------------------------- /test_re/sh204003.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh204003.day -------------------------------------------------------------------------------- /test_re/sh501005.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh501005.day -------------------------------------------------------------------------------- /test_re/sh501007.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh501007.day -------------------------------------------------------------------------------- /test_re/sh603708.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh603708.day -------------------------------------------------------------------------------- /test_re/sh603709.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh603709.day -------------------------------------------------------------------------------- /test_re/sh688001.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh688001.day -------------------------------------------------------------------------------- /test_re/sh688003.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh688003.day -------------------------------------------------------------------------------- /test_re/sh688081.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh688081.day -------------------------------------------------------------------------------- /test_re/sh880001.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh880001.day -------------------------------------------------------------------------------- /test_re/sh880002.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh880002.day -------------------------------------------------------------------------------- /test_re/sh880003.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh880003.day -------------------------------------------------------------------------------- /test_re/sz000012.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz000012.day -------------------------------------------------------------------------------- /test_re/sz000014.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz000014.day -------------------------------------------------------------------------------- /test_re/sz000028.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz000028.day -------------------------------------------------------------------------------- /test_re/sz000028.lc5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz000028.lc5 -------------------------------------------------------------------------------- /test_re/sz150030.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz150030.day -------------------------------------------------------------------------------- /test_re/sz150031.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz150031.day -------------------------------------------------------------------------------- /test_re/sz200512.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz200512.day -------------------------------------------------------------------------------- /test_re/sz200521.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz200521.day -------------------------------------------------------------------------------- /test_re/sz300045.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz300045.day -------------------------------------------------------------------------------- /test_re/sz300046.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz300046.day -------------------------------------------------------------------------------- /test_re/sz399608.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz399608.day -------------------------------------------------------------------------------- /test_re/sz399612.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sz399612.day -------------------------------------------------------------------------------- /test_re/sh6037072.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh6037072.day -------------------------------------------------------------------------------- /test_re/sh688002sw.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_re/sh688002sw.day -------------------------------------------------------------------------------- /test_talib/sh601330.lc5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_talib/sh601330.lc5 -------------------------------------------------------------------------------- /test_talib/sh60133033.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/niaicmy/gpfx/HEAD/test_talib/sh60133033.day -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gpfx 2 | 通达信股票离线数据分析,筛选 3 | 主要使用talib 金融数据库指标 测试 筛选符合指定要求的股票 4 | stock_data_collect.py 用于提取通达信股票离线数据转换成可以直接使用的数据 5 | stock_data_process.py 用于处理提取出来的数据,制定筛选条件 6 | 其他文件为测试文件 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # pycharm 2 | /.idea/ 3 | 4 | # Python: 5 | *.py[cod] 6 | 7 | # Environments 8 | .env 9 | .venv 10 | env/ 11 | venv/ 12 | ENV/ 13 | env.bak/ 14 | venv.bak/ -------------------------------------------------------------------------------- /test_talib.py: -------------------------------------------------------------------------------- 1 | import pretty_errors 2 | import numpy 3 | import talib 4 | close = numpy.random.random(100) 5 | 6 | # 均线系: 7 | sma= talib.SMA(close, timeperiod=30) #simple moving average 8 | # talib中MA与SMA是完全相同的函数, EMA是指数滑动平均, 常用于计算 9 | u, m, l= talib.BBANDS(close, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0) 10 | # 与手动运算的相同, 其上下界是当前段标准差的指定倍数 11 | dif, dem, histogram = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9) 12 | 13 | print(sma) 14 | print(u, m, l) 15 | print(dif, dem, histogram) 16 | -------------------------------------------------------------------------------- /通达信数据文件结构.txt: -------------------------------------------------------------------------------- 1 | 日线文件中,每32字节(32bytes)为一天的记录。 2 |   每4个字节为一项数据: 3 |   第1项,交易日期 4 |   第2项,开盘价*100 5 |   第3项,最高价*100 6 |   第4项,最低价*100 7 |   第5项,收盘价*100 8 |   第6项,成交金额 9 |   第7项,成交量 10 |   第8项,未使用 11 | 12 | .day文件的格式为: 13 | { C++ python unpack 参数 14 | 日期: unsigned int integer I 15 | 开盘价*100 unsigned int integer I 16 | 最高价*100 unsigned int integer I 17 | 最低价*100 unsigned int integer I 18 | 收盘价*100 unsigned int integer I 19 | 成交额 float float f 20 | 成交量 unsigned int integer I 21 | 保留值 unsigned int integer I 22 | } 23 | 24 | 通达信5分钟线*.lc5文件和*.lc1文件 25 | 文件名即股票代码 26 | 每32个字节为一个5分钟数据,每字段内低字节在前 时间日期合成4字节 27 | 00 ~ 01 字节:日期,整型,设其值为num,则日期计算方法为: 28 | year=floor(num/2048)+2004; 29 | month=floor(mod(num,2048)/100); 30 | day=mod(mod(num,2048),100); 31 | # 计算日期 floor() 返回数字的下舍整数 如 math.floor(100.72) 输出 100.0 相当于取整 32 | ss = str(int(a[0] / 2048) + 2004) + '-' + str(int(a[0] % 2048 / 100)).zfill(2) + '-' + str( 33 | a[0] % 2048 % 100).zfill(2), str(int(a[1] / 60)).zfill(2) + ':' + str(a[1] % 60).zfill(2) + ':00' 34 | 02 ~ 03 字节: 从0点开始至目前的分钟数,整型 35 | 36 | 04 ~ 07 字节:开盘价,float型 37 | 08 ~ 11 字节:最高价,float型 38 | 12 ~ 15 字节:最低价,float型 39 | 16 ~ 19 字节:收盘价,float型 40 | 20 ~ 23 字节:成交额,float型 41 | 24 ~ 27 字节:成交量(股),整型 42 | 28 ~ 31 字节:(保留) 43 | 44 | .lc5文件的格式为: 45 | { C++ python unpack 参数 46 | 日期: unsigned short integer H 47 | 时间: unsigned short integer H 48 | 开盘价*100 float float f 49 | 最高价*100 float float f 50 | 最低价*100 float float f 51 | 收盘价*100 float float f 52 | 成交额 float float f 53 | 成交量 unsigned int integer I 54 | 保留值 unsigned int integer I 55 | } 56 | -------------------------------------------------------------------------------- /test_unpack_data_tdx.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pretty_errors 3 | from struct import unpack 4 | import re 5 | 6 | # sh600074.day sh688012.day 7 | # re_rule = r'^s[hz][603][08]\d{4}\.[dl][ac][y5]' 8 | re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]' 9 | 10 | re_rule_sh = r'^sh6[08]\d{4}\.[dl][ac][y5]' 11 | re_rule_sz = r'^sz[03]0\d{4}\.[dl][ac][y5]' 12 | 13 | file_rule = re.compile(re_rule) 14 | file_path = r"D:\通达信\new_tdx\vipdoc\sh\lday" 15 | 16 | stock_day = { 17 | "stock_date": [], 18 | "stock_open": [], 19 | "stock_high": [], 20 | "stock_low": [], 21 | "stock_close": [], 22 | "stock_amount": [], 23 | "stock_vol": [], 24 | "stock_reservation": [] 25 | } 26 | # 数据共用结构 参照数据结构文件说明 27 | data_structure = { 28 | "stock_date": [], 29 | "stock_open": [], 30 | "stock_high": [], 31 | "stock_low": [], 32 | "stock_close": [], 33 | "stock_amount": [], 34 | "stock_vol": [], 35 | "stock_reservation": [] 36 | } 37 | data_size = 32 38 | analysis_cycle = 5 39 | 40 | 41 | def open_file_directory(_file_directory): 42 | # print(tuple(os.walk(_file_directory))) 43 | root, dirs, files = tuple(os.walk(_file_directory))[0] 44 | print(os.path.splitext(files[0])) 45 | # for root, dirs, files in os.walk(_file_directory): 46 | # print(root) 47 | # print(dirs) 48 | # print(files) 49 | 50 | 51 | def unpack_stock_day(file_url): 52 | with open(file_url, r'rb') as f: 53 | f.seek(-data_size * analysis_cycle, 2) 54 | for i in range(0, analysis_cycle): 55 | buff = f.read(32) 56 | _date, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"IIIIIfII", buff) 57 | # print(_date) 58 | # print(_open/100) 59 | stock_day['stock_date'].append(_date) # 4字节 如20091229 60 | stock_day['stock_open'].append(_open / 100) # 开盘价*100 61 | stock_day['stock_high'].append(_high / 100) # 最高价*100 62 | stock_day['stock_low'].append(_low / 100) # 最低价*100 63 | stock_day['stock_close'].append(_close / 100) # 收盘价*100 64 | stock_day['stock_amount'].append(_amount) # 成交额 65 | stock_day['stock_vol'].append(_vol) # 成交量 66 | stock_day['stock_reservation'].append(_reservation) # 保留值 67 | # while f: 68 | # stock_date = f.read(4) 69 | # stock_open = f.read(4) 70 | # stock_high = f.read(4) 71 | # stock_low = f.read(4) 72 | # stock_close = f.read(4) 73 | # stock_amount = f.read(4) 74 | # stock_vol = f.read(4) 75 | # stock_reservation = f.read(4) 76 | # 77 | # if not stock_date or i >= 5: 78 | # break 79 | # stock_day['stock_date'].append(unpack("I", stock_date)[0]) # 4字节 如20091229 80 | # stock_day['stock_open'].append(unpack("I", stock_open)[0] / 100) # 开盘价*100 81 | # stock_day['stock_high'].append(unpack("I", stock_high)[0] / 100) # 最高价*100 82 | # stock_day['stock_low'].append(unpack("I", stock_low)[0] / 100) # 最低价*100 83 | # stock_day['stock_close'].append(unpack("I", stock_close)[0] / 100) # 收盘价*100 84 | # stock_day['stock_amount'].append(unpack("f", stock_amount)) # 成交额 85 | # stock_day['stock_vol'].append(unpack("I", stock_vol)) # 成交量 86 | # stock_day['stock_reservation'].append(unpack("I", stock_reservation)) # 保留值 87 | # i = i + 1 88 | 89 | 90 | def unpack_stock_lc5(file_url, _data_size=32, _analysis_cycle=5): 91 | with open(file_url, r'rb') as f: 92 | f.seek(-_data_size * _analysis_cycle, 2) 93 | for i in range(0, _analysis_cycle): 94 | buff = f.read(32) 95 | a0, a1, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"HHfffffII", buff) 96 | # print(_date) 97 | # print(_open/100) 98 | # a = unpack("HH", stock_date) 99 | _date = str(int(a0 / 2048) + 2004) + '-' + str(int(a0 % 2048 / 100)).zfill(2) + '-' + str( 100 | a0 % 2048 % 100).zfill(2), str(int(a1 / 60)).zfill(2) + ':' + str(a1 % 60).zfill( 101 | 2) + ':00' 102 | stock_day['stock_date'].append(_date) # 103 | stock_day['stock_open'].append(round(_open, 2)) # 开盘价 104 | stock_day['stock_high'].append(round(_high, 2)) # 最高价 105 | stock_day['stock_low'].append(round(_low, 2)) # 最低价 106 | stock_day['stock_close'].append(round(_close, 2)) # 收盘价 107 | stock_day['stock_amount'].append(_amount) # 成交额 108 | stock_day['stock_vol'].append(_vol) # 成交量 109 | stock_day['stock_reservation'].append(_reservation) # 保留值 110 | 111 | 112 | def my_walk(_file_directory): 113 | root, dirs, files = tuple(os.walk(_file_directory))[0] 114 | file_set = [] 115 | for file in files: 116 | if file_rule.match(file): 117 | file_set.append(os.path.join(_file_directory, file)) 118 | for file_url in file_set: 119 | print(file_url) 120 | file_size = os.path.getsize(file_url) 121 | print(file_size) 122 | with open(file_url, r'rb') as fp: 123 | if file_size >= data_size * analysis_cycle: 124 | fp.seek(-data_size * analysis_cycle, os.SEEK_END) 125 | print('大于') 126 | else: 127 | fp.seek(0, os.SEEK_SET) 128 | print('小于size') 129 | 130 | print(file_set) 131 | 132 | 133 | if __name__ == "__main__": 134 | # file_path_day_sh = r"D:\通达信\new_tdx\vipdoc\sh\lday" 135 | file_path_day_sh = r"F:\PycharmProjects\gpfx\test_re" 136 | # my_walk(file_path_day_sh) 137 | file_url = r"F:\PycharmProjects\gpfx\test_re\sh000904.day" 138 | # name, ext = os.path.splitext(file_url) 139 | name2 = os.path.basename(file_url) 140 | name, ext = os.path.splitext(name2) 141 | print(name) 142 | print('ext:', ext) 143 | print(name2) 144 | # print(re_rule) 145 | # open_file_directory(file_path) 146 | # unpack_stock_day(r"F:\PycharmProjects\gpfx\sz000028.day") 147 | # unpack_stock_lc5(r"F:\PycharmProjects\gpfx\sz000028.lc5") 148 | # print(stock_day['stock_date']) 149 | # print(stock_day['stock_open']) 150 | # print(stock_day['stock_high']) 151 | # print(stock_day['stock_low']) 152 | # print(stock_day['stock_close']) 153 | # print(stock_day['stock_amount']) 154 | -------------------------------------------------------------------------------- /stock_data_collect.py: -------------------------------------------------------------------------------- 1 | import pretty_errors 2 | import os 3 | from struct import unpack 4 | import re 5 | 6 | # 个股的匹配规则 7 | re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]' 8 | stock_file_rule = re.compile(re_rule) 9 | # 板块指数的匹配规则 10 | # todo: 修改板块指数的匹配模式 11 | block_re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]' 12 | block_rule = re.compile(block_re_rule) 13 | 14 | # 数据共用结构 参照数据结构文件说明 15 | data_structure = { 16 | "stock_date": [], 17 | "stock_open": [], 18 | "stock_high": [], 19 | "stock_low": [], 20 | "stock_close": [], 21 | "stock_amount": [], 22 | "stock_vol": [], 23 | "stock_reservation": [], 24 | "stock_num": [] # 增加一个股票代码的参数,方便满足条件时记录股票编码 25 | } 26 | data_ext = {'day': r".day", 'lc5': r".lc5"} 27 | # 一个大的列表 储存stock_day stock_lc5 一个表 包含所有股票,股票的子表包含数据 28 | # 每32字节(32bytes)为一组记录 29 | data_size = 32 30 | # 默认分析周期 31 | analysis_cycle = 500 32 | # 文件目录文件安装路径需自行修改 33 | file_path_set = { 34 | 'sh_day': r"D:\通达信\new_tdx\vipdoc\sh\lday", 35 | 'sh_lc5': r"D:\通达信\new_tdx\vipdoc\sh\fzline", 36 | 'sz_day': r"D:\通达信\new_tdx\vipdoc\sz\lday", 37 | 'sz_lc5': r"D:\通达信\new_tdx\vipdoc\sz\fzline" 38 | } 39 | 40 | 41 | # 取得当前文件夹所有文件的列表 42 | def walk_directory(_file_directory): 43 | """ 44 | 遍历文件夹中的文件,取得文件列表 45 | :param _file_directory: 文件夹 46 | :return: 返回文件夹中文件列表 47 | """ 48 | # 取得文件夹里面的所有子文件 49 | root, dirs, files = tuple(os.walk(_file_directory))[0] 50 | # 取得判断扩展名 51 | # name, ext = os.path.splitext(files[0]) 52 | 53 | # 生成文件列表 54 | _file_urls = [] 55 | for file in files: 56 | # re 匹配文件 57 | if stock_file_rule.match(file): 58 | _file_urls.append(os.path.join(_file_directory, file)) 59 | # print(file_urls) 60 | return _file_urls 61 | 62 | 63 | # 直接把数据填充在data_structure中,而并不以返回值的方式传递 64 | def unpack_stock_data(_file_url, _data_size=32, _analysis_cycle=1): 65 | """ 66 | 解析本地数据 .day .lc5 文件 最新数据需要自己手动从通达信软件中离线数据中下载 67 | :param _file_url: 数据文件 68 | :param _data_size: 数据结构大小 默认 32 69 | :param _analysis_cycle: 需要分析的数据周期 默认 1 只能是正整数(int) 70 | :return: 直接把数据填充在data_structure中,而并不以返回值的方式传递 71 | """ 72 | name, ext = os.path.splitext(os.path.basename(_file_url)) 73 | for key in data_structure.keys(): 74 | data_structure[key] = [] 75 | # data_copy = {} 76 | # 得到解析文件的大小 77 | file_size = os.path.getsize(_file_url) 78 | 79 | if data_ext['day'] == ext: 80 | # file_size = os.path.getsize(_file_url) 81 | with open(_file_url, r'rb') as fp: 82 | if file_size > _data_size * _analysis_cycle: 83 | fp.seek(-_data_size * _analysis_cycle, os.SEEK_END) 84 | range_cycle = _analysis_cycle 85 | # print('大于') 86 | else: 87 | fp.seek(0, os.SEEK_SET) 88 | range_cycle = int(file_size / _data_size) 89 | # print('小于size') 90 | 91 | # 提取数据 92 | for i in range(0, range_cycle): 93 | buff = fp.read(32) 94 | _date, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"IIIIIfII", buff) 95 | # print(_date) 96 | # print(_open/100) 97 | data_structure['stock_date'].append(_date) # 4字节 如20091229 98 | data_structure['stock_open'].append(_open / 100) # 开盘价*100 99 | data_structure['stock_high'].append(_high / 100) # 最高价*100 100 | data_structure['stock_low'].append(_low / 100) # 最低价*100 101 | data_structure['stock_close'].append(_close / 100) # 收盘价*100 102 | data_structure['stock_amount'].append(_amount) # 成交额 103 | data_structure['stock_vol'].append(_vol) # 成交量 104 | data_structure['stock_reservation'].append(_reservation) # 保留值 105 | data_structure['stock_num'].append(name) # 股票代码 106 | 107 | # 按天算 5 分钟数据 天数*48(48 是一天的5分钟周期总和) 108 | elif data_ext['lc5'] == ext: 109 | # print('lc5') 110 | # 打开读取文件 设置文件指针 设置读取文件的循环周期 111 | # 48个周期为一天4小时的5分钟数据 112 | _analysis_cycle = _analysis_cycle * 48 113 | with open(_file_url, r'rb') as fp: 114 | if file_size > _data_size * _analysis_cycle: 115 | fp.seek(-_data_size * _analysis_cycle, os.SEEK_END) 116 | range_cycle = _analysis_cycle 117 | # print('大于') 118 | else: 119 | fp.seek(0, os.SEEK_SET) 120 | range_cycle = int(file_size / _data_size) 121 | # print('小于size') 122 | 123 | for i in range(0, range_cycle): 124 | buff = fp.read(32) 125 | a0, a1, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"HHfffffII", buff) 126 | 127 | _date = str(int(a0 / 2048) + 2004) + '-' + str(int(a0 % 2048 / 100)).zfill(2) + '-' + str( 128 | a0 % 2048 % 100).zfill(2), str(int(a1 / 60)).zfill(2) + ':' + str(a1 % 60).zfill( 129 | 2) + ':00' 130 | data_structure['stock_date'].append(_date) # (日期, 时间) 131 | data_structure['stock_open'].append(round(_open, 2)) # 开盘价 132 | data_structure['stock_high'].append(round(_high, 2)) # 最高价 133 | data_structure['stock_low'].append(round(_low, 2)) # 最低价 134 | data_structure['stock_close'].append(round(_close, 2)) # 收盘价 135 | data_structure['stock_amount'].append(round(_amount, 2)) # 成交额 136 | data_structure['stock_vol'].append(_vol) # 成交量 137 | data_structure['stock_reservation'].append(_reservation) # 保留值 138 | data_structure['stock_num'].append(name) 139 | 140 | 141 | if __name__ == "__main__": 142 | file_path_set['sh_day'] = r'F:\PycharmProjects\gpfx\test_talib' 143 | file_urls = walk_directory(file_path_set['sh_day']) 144 | 145 | for file_url in file_urls: 146 | unpack_stock_data(file_url, _analysis_cycle=50) 147 | print(data_structure) 148 | 149 | # unpack_stock_day(r"F:\PycharmProjects\gpfx\sz000028.day") 150 | # unpack_stock_lc5(r"F:\PycharmProjects\gpfx\sz000028.lc5") 151 | # print(stock_lc5['stock_date']) 152 | # print(stock_lc5['stock_open']) 153 | # print(stock_day['stock_date']) 154 | # print(stock_day['stock_open']) 155 | # print(stock_day['stock_high']) 156 | # print(stock_day['stock_low']) 157 | # print(stock_day['stock_close']) 158 | # print(stock_day['stock_amount']) 159 | # print(stock_day['stock_vol']) 160 | # print(stock_day['stock_reservation']) 161 | -------------------------------------------------------------------------------- /stock_data_process.py: -------------------------------------------------------------------------------- 1 | # import pretty_errors 2 | import numpy 3 | # talib 金融分析库 4 | import talib 5 | from stock_data_collect import * 6 | 7 | # 思路:用指标一层一层筛选,先用macd选择大范围 8 | 9 | # 通达信 macd 指标 10 | # macd筛选策略 11 | set_macd_30 = set() 12 | set_macd_60 = set() 13 | set_macd_day = set() 14 | 15 | 16 | def stock_filter_macd(_data_structure, end): 17 | """ 18 | :param _data_structure: 数据的结构 19 | :param end: end 代表离最后周期的参数,筛选最近的 20 | :return: 符合条件的数据集合 21 | """ 22 | # MACD = 2 * histogram 周期参数(12,26,9) 23 | dif, dea, histogram = talib.MACD(numpy.array(_data_structure['stock_close']), fastperiod=12, slowperiod=26, 24 | signalperiod=9) 25 | # 周期参数(6, 13, 9) 26 | dif2, dea2, histogram2 = talib.MACD(numpy.array(_data_structure['stock_close']), fastperiod=6, slowperiod=13, 27 | signalperiod=4) 28 | # dif, dea, macd = talib.MACDEXT(numpy.array(_data_structure['stock_close']), fastperiod=12, fastmatype=0, 29 | # slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0) 30 | # dif, dea, macd = talib.MACDEXT(numpy.array(_data_structure['stock_close']), signalperiod=9) 31 | # 1、金叉, 二次金叉 32 | # 记录金叉时间 33 | dif_cross_dea = [] 34 | # cross_resonance 周期共振 35 | cross_resonance = [] 36 | # 背离位置记录 37 | deviate_from_macd = [] 38 | deviate_from_macd2 = [] 39 | tmp = set() 40 | for i in range(len(dif)): 41 | if dif[i - 1] <= dea[i - 1] and dif[i] > dea[i]: 42 | pass 43 | # dif_cross_dea.append((_data_structure['stock_date'], i)) # 记录金叉的时间跟位置元组 (时间,位置) 44 | # print('金叉') 45 | # 2、背离 46 | # for i in range(len(histogram)): 47 | elif histogram[i] >= histogram[i - 1] and \ 48 | _data_structure['stock_close'][i] <= _data_structure['stock_close'][i - 1]: 49 | pass 50 | # deviate_from_macd.append((_data_structure['stock_date'], i)) # 记录背离的时间跟位置元组 (时间,位置) 51 | # print('macd 背离') 52 | # 3、共振 53 | # 短周期dea2走平上拐,长周期dif周平上拐,买点 54 | elif dea2[i - 1] <= dea2[i] and dif[i - 1] <= dif[i] and len(dif) <= i + end: 55 | # cross_resonance.append((_data_structure['stock_date'], i)) # 记录共振的时间跟位置元组 (时间,位置) 56 | tmp.add(_data_structure['stock_num']) 57 | 58 | return tmp 59 | 60 | 61 | # 通达信 trix指标 62 | set_trix_30 = set() 63 | set_trix_60 = set() 64 | set_trix_day = set() 65 | 66 | 67 | def stock_filter_trix(_data_structure, end): 68 | """ 69 | :param _data_structure: 数据的结构 70 | :param end: end 代表离最后周期的参数,筛选最近的 71 | :return: 符合条件的数据集合 72 | """ 73 | trix = talib.TRIX(numpy.array(_data_structure['stock_close']), timeperiod=12) 74 | matrix = talib.MA(trix, timeperiod=9, matype=0) 75 | # 记录 trix 走平 金叉位置 76 | tmp = set() 77 | for i in range(len(trix)): 78 | if trix[i - 1] <= trix[i] and len(trix) <= i + end: 79 | tmp.add(_data_structure['stock_num']) 80 | 81 | # return tmp 82 | 83 | tmp1 = set() 84 | for i in range(len(matrix)): 85 | if matrix[i - 1] <= matrix[i] and len(matrix) <= i + end: 86 | tmp1.add(_data_structure['stock_num']) 87 | 88 | return tmp1 89 | # print(trix) 90 | # print(matrix) 91 | 92 | 93 | # 通达信 marsi 指标 94 | set_marsi_30 = set() 95 | set_marsi_60 = set() 96 | set_marsi_day = set() 97 | 98 | 99 | # 筛选marsi10 最后end 周期内拐头向上的 100 | def stock_filter_marsi(_data_structure, end): 101 | # rsi6 = talib.RSI(numpy.array(_data_structure['stock_close']), timeperiod=6) 102 | # marsi6 = talib.MA(rsi6, timeperiod=6, matype=0) 103 | rsi10 = talib.RSI(numpy.array(_data_structure['stock_close']), timeperiod=10) 104 | marsi10 = talib.MA(rsi10, timeperiod=10, matype=0) 105 | tmp = set() 106 | for i in range(len(marsi10)): 107 | if marsi10[i - 1] <= marsi10[i] and len(marsi10) <= i + end: 108 | tmp.add(_data_structure['stock_num']) 109 | 110 | return tmp 111 | # print(rsi10) 112 | # print(marsi6) 113 | # print(marsi10) 114 | 115 | 116 | def stock_filter_cci(_data_structure): 117 | pass 118 | 119 | 120 | # def stock_filter_fsl(_data_structure): 121 | # pass 122 | 123 | def stock_filter_average(_data_structure): 124 | ma10 = talib.MA(numpy.array(_data_structure['stock_close']), timeperiod=10) 125 | ma20 = talib.MA(numpy.array(_data_structure['stock_close']), timeperiod=20) 126 | ma30 = talib.MA(numpy.array(_data_structure['stock_close']), timeperiod=30) 127 | 128 | 129 | # 用于数据转换的存储,而不修改原始的数据 130 | data_copy = {} 131 | 132 | 133 | # 30 & 60分钟数据转换 日线也可以(但要求数据是_timeperiod的倍数) 134 | def cycle_transform(_data_structure, _timeperiod=6): 135 | """ 136 | :param _data_structure: 数据共用结构 137 | :param _timeperiod: 30分钟数据是6个周期(默认分析30分钟数据), 60分钟数据是12个周期 138 | :return: 组合好的data_structure 数据共用结构 139 | """ 140 | # 30 分钟数据是 6 个周期 141 | # 60 分钟数据是 12 个周期 142 | # 周线数据是日线数据的5个周期 143 | # i = 0 144 | # range_cycle = 0 145 | il = len(_data_structure['stock_date']) 146 | ir = il % _timeperiod 147 | # 构造循环的周期数 148 | if ir == 0: 149 | range_cycle = il // _timeperiod 150 | else: 151 | range_cycle = (il - ir) // _timeperiod 152 | # 清空原本的数据 153 | # data_structure.clear() 154 | # data_copy = data_structure.copy() 155 | for key in data_copy.keys(): 156 | data_copy[key] = [] 157 | # data_copy = {} 158 | # 组合数据 159 | for i in range(range_cycle): 160 | # 数据起步基数 数据取段为[base:base+_timeperiod] 161 | base = ir + i * _timeperiod 162 | # 30分钟数据 163 | # if _timeperiod == 6: 164 | # print(_data_structure['stock_date']) 165 | data_copy['stock_date'].append(_data_structure['stock_date'][base:base + _timeperiod][-1]) 166 | data_copy['stock_open'].append(_data_structure['stock_open'][base:base + _timeperiod][0]) 167 | data_copy['stock_close'].append(_data_structure['stock_close'][base:base + _timeperiod][-1]) 168 | data_copy['stock_high'].append(max(_data_structure['stock_high'][base:base + _timeperiod])) 169 | data_copy['stock_low'].append(min(_data_structure['stock_low'][base:base + _timeperiod])) 170 | data_copy['stock_amount'].append(sum(_data_structure['stock_amount'][base:base + _timeperiod])) 171 | data_copy['stock_vol'].append(sum(_data_structure['stock_vol'][base:base + _timeperiod])) 172 | # data_structure['stock_reservation'] = _data_structure['stock_reservation'][base::_timeperiod] 173 | data_copy['stock_num'] = _data_structure['stock_num'] 174 | return data_copy 175 | 176 | 177 | if __name__ == '__main__': 178 | # file_path_set['sh_day'] = r'F:\PycharmProjects\gpfx\test_talib' 179 | # unpack_stock_data(file_path_set['sh_day'], _analysis_cycle=150) 180 | 181 | # print(data_structure['stock_close']) 182 | # stock_filter_trix(data_structure) 183 | # stock_filter_marsi(data_structure) 184 | 185 | file_path_set['sh_lc5'] = r'F:\PycharmProjects\gpfx\test_talib' 186 | file_urls = walk_directory(file_path_set['sh_lc5']) 187 | 188 | for file_url in file_urls: 189 | unpack_stock_data(file_url, _analysis_cycle=2) 190 | # 转换数据周期 191 | date_transform_30 = cycle_transform(data_structure, 6) 192 | # print(date_transform_30['stock_date']) 193 | # print(date_transform_30['stock_open']) 194 | date_transform_60 = cycle_transform(data_structure, 12) 195 | # print(date_transform_60['stock_date']) 196 | # print(date_transform_60['stock_open']) 197 | -------------------------------------------------------------------------------- /Bak.py: -------------------------------------------------------------------------------- 1 | import pretty_errors 2 | import os 3 | from struct import unpack 4 | import re 5 | 6 | # talib 金融分析库 7 | # import talib 8 | # import numpy 9 | # import matplotlib.pyplot as plt 10 | # import matplotlib as mpl 11 | 12 | # 使用ggplot样式,好看些 13 | # mpl.style.use("ggplot") 14 | 15 | # 个股的匹配规则 16 | re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]' 17 | stock_file_rule = re.compile(re_rule) 18 | # 板块指数的匹配规则 19 | # todo: 修改板块指数的匹配模式 20 | block_re_rule = r'^sh6[08]\d{4}\.[dl][ac][y5]|^sz[03][0]\d{4}\.[dl][ac][y5]' 21 | block_rule = re.compile(block_re_rule) 22 | 23 | # 数据共用结构 参照数据结构文件说明 24 | data_structure = { 25 | "stock_date": [], 26 | "stock_open": [], 27 | "stock_high": [], 28 | "stock_low": [], 29 | "stock_close": [], 30 | "stock_amount": [], 31 | "stock_vol": [], 32 | "stock_reservation": [], 33 | "stock_num": [] # 增加一个股票代码的参数,方便满足条件时记录股票编码 34 | } 35 | data_ext = {'day': r".day", 'lc5': r".lc5"} 36 | # 一个大的列表 储存stock_day stock_lc5 一个表 包含所有股票,股票的子表包含数据 37 | # 每32字节(32bytes)为一组记录 38 | data_size = 32 39 | # 默认分析周期 40 | analysis_cycle = 500 41 | # 文件目录 42 | file_path_set = { 43 | 'sh_day': r"D:\通达信\new_tdx\vipdoc\sh\lday", 44 | 'sh_lc5': r"D:\通达信\new_tdx\vipdoc\sh\fzline", 45 | 'sz_day': r"D:\通达信\new_tdx\vipdoc\sz\lday", 46 | 'sz_lc5': r"D:\通达信\new_tdx\vipdoc\sz\fzline" 47 | } 48 | 49 | 50 | def walk_directory(_file_directory): 51 | # 取得文件夹里面的所有子文件 52 | root, dirs, files = tuple(os.walk(_file_directory))[0] 53 | # 取得判断扩展名 54 | # name, ext = os.path.splitext(files[0]) 55 | 56 | # 生成文件列表 57 | file_urls = [] 58 | for file in files: 59 | # re 匹配文件 60 | if stock_file_rule.match(file): 61 | file_urls.append(os.path.join(_file_directory, file)) 62 | # print(file_urls) 63 | return file_urls 64 | 65 | 66 | def unpack_stock_data(_file_url, _data_size=32, _analysis_cycle=1): 67 | """ 68 | # 解析本地数据 .day .lc5 文件 69 | :param _file_url: 数据文件 70 | :param _data_size: 数据结构大小 默认 32 71 | :param _analysis_cycle: 需要分析的数据周期 默认 1 只能是正整数(int) 72 | :return: 73 | """ 74 | 75 | # # 取得文件夹里面的所有子文件 76 | # root, dirs, files = tuple(os.walk(_file_directory))[0] 77 | # 取得判断扩展名 78 | # name, ext = os.path.splitext(files[0]) 79 | # name, ext = os.path.splitext(_file_urls) 80 | # 81 | # # 生成文件列表 82 | # file_urls = [] 83 | # for file in files: 84 | # # re 匹配文件 85 | # if stock_file_rule.match(file): 86 | # file_urls.append(os.path.join(_file_directory, file)) 87 | # # print(file_urls) 88 | 89 | name, ext = os.path.splitext(os.path.basename(_file_url)) 90 | for key in data_structure.keys(): 91 | data_structure[key] = [] 92 | 93 | if data_ext['day'] == ext: 94 | # 遍历文件 95 | # for file_url in _file_url: 96 | # 取得判断扩展名 97 | # name2 = os.path.basename(file_url) 98 | # name, ext = os.path.splitext(os.path.basename(file_url)) 99 | # name, ext = os.path.splitext(file_url) 100 | 101 | file_size = os.path.getsize(_file_url) 102 | # print(file_url) 103 | # 清空原本的数据 104 | # data_structure.clear() 105 | # for key in data_structure.keys(): 106 | # data_structure[key] = [] 107 | # 打开读取文件 设置文件指针 设置读取文件的循环周期 108 | # 解决文件根本不够分析周期的问题 109 | # 获取文件内容大小,区分读取方式 110 | # if data_ext['day'] == ext: 111 | # pass 112 | with open(_file_url, r'rb') as fp: 113 | if file_size > _data_size * _analysis_cycle: 114 | fp.seek(-_data_size * _analysis_cycle, os.SEEK_END) 115 | range_cycle = _analysis_cycle 116 | # print('大于') 117 | else: 118 | fp.seek(0, os.SEEK_SET) 119 | range_cycle = int(file_size / _data_size) 120 | # print('小于size') 121 | 122 | # 提取数据 123 | for i in range(0, range_cycle): 124 | buff = fp.read(32) 125 | _date, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"IIIIIfII", buff) 126 | # print(_date) 127 | # print(_open/100) 128 | data_structure['stock_date'].append(_date) # 4字节 如20091229 129 | data_structure['stock_open'].append(_open / 100) # 开盘价*100 130 | data_structure['stock_high'].append(_high / 100) # 最高价*100 131 | data_structure['stock_low'].append(_low / 100) # 最低价*100 132 | data_structure['stock_close'].append(_close / 100) # 收盘价*100 133 | data_structure['stock_amount'].append(_amount) # 成交额 134 | data_structure['stock_vol'].append(_vol) # 成交量 135 | data_structure['stock_reservation'].append(_reservation) # 保留值 136 | data_structure['stock_num'].append(name) # 股票代码 137 | 138 | # stock_ta_lib(data_structure) 139 | # fp.close() 140 | # 按天算 5 分钟数据 天数*48(48 是一天的5分钟周期总和) 141 | elif data_ext['lc5'] == ext: 142 | # print('lc5') 143 | for file_url in _file_url: 144 | # 取得判断扩展名 145 | # name2 = os.path.basename(file_url) 146 | # name, ext = os.path.splitext(os.path.basename(file_url)) 147 | # name, ext = os.path.splitext(file_url) 148 | file_size = os.path.getsize(file_url) 149 | # print(file_url) 150 | # 清空原本的数据 151 | # data_structure.clear() 152 | for key in data_structure.keys(): 153 | data_structure[key] = [] 154 | 155 | # 打开读取文件 设置文件指针 设置读取文件的循环周期 156 | # 48个周期为一天4小时的5分钟数据 157 | _analysis_cycle = _analysis_cycle * 48 158 | with open(file_url, r'rb') as fp: 159 | if file_size > _data_size * _analysis_cycle: 160 | fp.seek(-_data_size * _analysis_cycle, os.SEEK_END) 161 | range_cycle = _analysis_cycle 162 | # print('大于') 163 | else: 164 | fp.seek(0, os.SEEK_SET) 165 | range_cycle = int(file_size / _data_size) 166 | # print('小于size') 167 | for i in range(0, range_cycle): 168 | buff = fp.read(32) 169 | a0, a1, _open, _high, _low, _close, _amount, _vol, _reservation = unpack(r"HHfffffII", buff) 170 | # print(_date) 171 | # print(_open/100) 172 | # a = unpack("HH", stock_date) 173 | _date = str(int(a0 / 2048) + 2004) + '-' + str(int(a0 % 2048 / 100)).zfill(2) + '-' + str( 174 | a0 % 2048 % 100).zfill(2), str(int(a1 / 60)).zfill(2) + ':' + str(a1 % 60).zfill( 175 | 2) + ':00' 176 | data_structure['stock_date'].append(_date) # (日期, 时间) 177 | data_structure['stock_open'].append(round(_open, 2)) # 开盘价 178 | data_structure['stock_high'].append(round(_high, 2)) # 最高价 179 | data_structure['stock_low'].append(round(_low, 2)) # 最低价 180 | data_structure['stock_close'].append(round(_close, 2)) # 收盘价 181 | data_structure['stock_amount'].append(round(_amount, 2)) # 成交额 182 | data_structure['stock_vol'].append(_vol) # 成交量 183 | data_structure['stock_reservation'].append(_reservation) # 保留值 184 | data_structure['stock_num'].append(name) 185 | 186 | 187 | if __name__ == "__main__": 188 | file_path_set['sh_day'] = r'F:\PycharmProjects\gpfx\test_talib' 189 | unpack_stock_data(file_path_set['sh_day'], _analysis_cycle=50) 190 | print(data_structure) 191 | pass 192 | # unpack_stock_day(r"F:\PycharmProjects\gpfx\sz000028.day") 193 | # unpack_stock_lc5(r"F:\PycharmProjects\gpfx\sz000028.lc5") 194 | # print(stock_lc5['stock_date']) 195 | # print(stock_lc5['stock_open']) 196 | # print(stock_day['stock_date']) 197 | # print(stock_day['stock_open']) 198 | # print(stock_day['stock_high']) 199 | # print(stock_day['stock_low']) 200 | # print(stock_day['stock_close']) 201 | # print(stock_day['stock_amount']) 202 | # print(stock_day['stock_vol']) 203 | # print(stock_day['stock_reservation']) 204 | --------------------------------------------------------------------------------