├── .gitignore ├── Classes ├── Bonus.py ├── Data.py ├── Info.py ├── Updata_today.py ├── convert_data_for_PyAlgoTrade.py ├── get_cal.py ├── management.py └── real_time_data.py ├── LICENSE ├── MA.py ├── README.md ├── get_data.py ├── test.py └── test2.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /Classes/Bonus.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/4' 7 | """ 8 | 9 | import requests 10 | from lxml import etree 11 | import pandas as pd 12 | 13 | class Bonus(object): 14 | URL = 'http://quotes.money.163.com/f10/fhpg_{}.html' 15 | HEADERS = {} 16 | file_path = '' 17 | def __init__(self, symbol): 18 | self.symbol = symbol 19 | self.URL = self.URL.format(symbol) 20 | print('正在获取{}数据……'.format(symbol)) 21 | response = requests.get(url=self.URL, 22 | headers=self.HEADERS) 23 | self.html = etree.HTML(response.content.decode('gbk')) 24 | 25 | def get_data(self, content='bonus', num_div='4', num_col=8): 26 | if content == 'bonus': 27 | title = self.html.xpath('/html/body/div[2]/div[4]/table/thead/tr/th/text()') 28 | del title[2] 29 | title[2], title[3], title[4], title[5], title[6], title[7] =title[5], title[6], title[7], title[2], title[3], title[4] 30 | columns = title 31 | else: 32 | columns = self.html.xpath('/html/body/div[2]/div[{}]/table/thead/tr/th/text()'.format(num_div)) 33 | 34 | tds = self.html.xpath('/html/body/div[2]/div[{}]/table/tr/td'.format(num_div)) 35 | data_list = [] 36 | for t in tds: 37 | data_list.append(t.text) 38 | if not ('暂无数据' in data_list): 39 | data = pd.DataFrame(columns=columns) 40 | for i in range(0, len(data_list), num_col): 41 | data_dict = {} 42 | for j in range(num_col): 43 | data_dict[columns[j]] = data_list[i + j] 44 | data_df = pd.DataFrame(data_dict, columns=columns, index=[i // num_col]) 45 | data = data.append(data_df) 46 | data.to_csv(self.file_path + self.symbol + '.csv', encoding='gbk', index=False) 47 | else: 48 | print('暂无数据') 49 | print('{}数据处理完成'.format(self.symbol)) 50 | 51 | def get_bonus(self): 52 | self.file_path = 'F:/Stock_Data/bonus/' 53 | self.get_data(content='bonus', num_div='4', num_col=8) 54 | 55 | def get_allot(self): 56 | self.file_path = 'F:/Stock_Data/allot/' 57 | self.get_data(content='allot', num_div='6', num_col=8) 58 | 59 | def get_issue(self): 60 | self.file_path = 'F:/Stock_Data/issue/' 61 | self.get_data(content='issue', num_div='8', num_col=8) 62 | 63 | def get_financing(self): 64 | self.file_path = 'F:/Stock_Data/financing/' 65 | self.get_data(content='finacing', num_div='10', num_col=6) 66 | -------------------------------------------------------------------------------- /Classes/Data.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/4' 7 | """ 8 | import requests 9 | import datetime 10 | import pandas as pd 11 | from io import StringIO 12 | 13 | TODAY = datetime.date.strftime(datetime.date.today(), '%Y%m%d') 14 | 15 | class Data(object): 16 | URL = '' 17 | PARAMS = {} 18 | HEADERS = {} 19 | file_path = '' 20 | 21 | def get_data(self, code, end=TODAY): 22 | self.PARAMS['code'] = code 23 | self.PARAMS['end'] = end 24 | print('正在获取{}数据……'.format(code)) 25 | response = requests.get(url=self.URL, 26 | params=self.PARAMS, 27 | headers=self.HEADERS) 28 | print('正在处理{}数据...'.format(code)) 29 | data_df = pd.read_csv(StringIO(response.content.decode('gbk')), skip_blank_lines=True) 30 | print(data_df) 31 | data_df = data_df.sort_values(by='日期') 32 | if data_df.empty: 33 | print('空数据', code) 34 | else: 35 | data_df.to_csv(self.file_path + str(code[1:]) + '.csv', encoding='gbk',index=False) 36 | print('{}数据处理完成!!'.format(code)) 37 | 38 | class Stock_data(Data): 39 | URL = 'http://quotes.money.163.com/service/chddata.html' 40 | PARAMS = { 41 | 'code': '', 42 | 'start': '19900101', 43 | 'end': '', 44 | 'fields': 'TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP' 45 | } 46 | HEADERS = { 47 | 'Cookie': 'Province=0; City=0; UM_distinctid=16c05496622f1-00e8d8cb7044e48-4c312272-15f900-16c054966245cc; _ntes_nnid=0213f9288c03916f18ed2634a6a3506d,1563456793050; vjuids=1be4f793f.16c054a41b6.0.6b5b7a77d19a78; vjlast=1563456848.1563930352.13; vinfo_n_f_l_n3=ad2a50d90e25c7dc.1.4.1563456848324.1563950911150.1563963465898; usertrack=ezq0ZV03rush6S+BCCg6Ag==; _ntes_nuid=0213f9288c03916f18ed2634a6a3506d; NNSSPID=bcf860b5427949c599552390d570c1d0; _ntes_stock_recent_plate_=%7Chy006000%3A%E6%89%B9%E5%8F%91%E9%9B%B6%E5%94%AE; _ntes_stock_recent_=0601857%7C0601326%7C0600682; _ntes_stock_recent_=0601857%7C0601326%7C0600682; _ntes_stock_recent_=0601857%7C0601326%7C0600682; ne_analysis_trace_id=1563963422398; s_n_f_l_n3=ad2a50d90e25c7dc1563963422401; _antanalysis_s_id=1563963428611; pgr_n_f_l_n3=ad2a50d90e25c7dc15639634493333113', 48 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,' 49 | 'application/signed-exchange;v=b3', 50 | 'Accept-Encoding': 'gzip, deflate', 51 | 'Accept-Language': 'zh,en-US;q=0.9,en;q=0.8,zh-TW;q=0.7,zh-CN;q=0.6', 52 | 'Connection': 'keep-alive', 53 | 'Host': 'quotes.money.163.com', 54 | 'Referer': 'http://quotes.money.163.com / trade / lsjysj_601857.html', 55 | 'Upgrade-Insecure-Requests': '1', 56 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 57 | 'Chrome/75.0.3770.100 Safari/537.36' 58 | } 59 | file_path = 'F:/Stock_Data/stock_data/' 60 | 61 | 62 | class Index_data(Data): 63 | URL = 'http://quotes.money.163.com/service/chddata.html' 64 | HEADERS = { 65 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,' 66 | 'application/signed-exchange;v=b3', 67 | 'Accept-Encoding': 'gzip, deflate', 68 | 'Accept-Language': 'zh,en-US;q=0.9,en;q=0.8,zh-TW;q=0.7,zh-CN;q=0.6', 69 | 'Connection': 'keep-alive', 70 | 'Host': 'quotes.money.163.com', 71 | 'Referer': 'http://quotes.money.163.com/trade/lsjysj_zhishu_000003.html', 72 | 'Upgrade-Insecure-Requests': '1', 73 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 74 | 'Chrome/75.0.3770.100 Safari/537.36' 75 | } 76 | PARAMS = { 77 | 'start': '19900101', 78 | 'fields': 'TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;VOTURNOVER;VATURNOVER ' 79 | } 80 | file_path = 'F:/Stock_Data/index_data/' 81 | 82 | -------------------------------------------------------------------------------- /Classes/Info.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/4' 7 | """ 8 | import requests 9 | import io 10 | import json 11 | import pandas as pd 12 | 13 | class Info_base(object): 14 | URL = '' 15 | PARAMS = {} 16 | HEADERS = {} 17 | file_path = '' 18 | 19 | def __init__(self): 20 | try: 21 | self.data_df = pd.read_csv(self.file_path) 22 | except: 23 | self.get_data() 24 | 25 | 26 | def get_data(self): 27 | response = requests.get(url=self.URL, 28 | params=self.PARAMS, 29 | headers= self.HEADERS) 30 | 31 | data_json = json.loads(io.StringIO(response.text).read()) 32 | self.PARAMS['count'] = data_json['total'] 33 | print('正在获取数据……') 34 | response = requests.get(url=self.URL, 35 | params=self.PARAMS, 36 | headers=self.HEADERS) 37 | 38 | data_json = json.loads(io.StringIO(response.content.decode('gbk')).read()) 39 | print(data_json) 40 | self.data = data_json['list'] 41 | 42 | self.columns = list(self.data[0].keys()) 43 | 44 | self.data_df = pd.DataFrame(self.data, columns=self.columns) 45 | 46 | self.data_df.to_csv(self.file_path, encoding='gbk', index=False) 47 | print('数据处理完成') 48 | 49 | 50 | class Stock_info(Info_base): 51 | PARAMS = { 52 | 'host': 'http://quotes.money.163.com/hs/service/diyrank.php', 53 | 'page': '0', 54 | 'query': 'STYPE:EQA', 55 | 'fields': 'NO,SYMBOL,NAME,PRICE,PERCENT,UPDOWN,FIVE_MINUTE,OPEN,YESTCLOSE,HIGH,LOW,VOLUME,TURNOVER,HS,LB,WB,' 56 | 'ZF,PE,MCAP,TCAP,MFSUM,MFRATIO.MFRATIO2,MFRATIO.MFRATIO10,SNAME,CODE,ANNOUNMT,UVSNEWS', 57 | 'sort': 'SYMBOL', 58 | 'order': 'asc', 59 | 'count': '20', 60 | 'type': 'query' 61 | } 62 | HEADERS = { 63 | 'Accept': 'application/json, text/javascript, */*; q=0.01', 64 | 'Accept-Encoding': 'gzip, deflate', 65 | 'Accept-Language': 'zh,en-US;q=0.9,en;q=0.8,zh-TW;q=0.7,zh-CN;q=0.6', 66 | 'Connection': 'keep-alive', 67 | 'Host': 'quotes.money.163.com', 68 | 'Referer': 'http://quotes.money.163.com/old/', 69 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 70 | 'Chrome/75.0.3770.100 Safari/537.36', 71 | 'X-Requested-With': 'XMLHttpRequest' 72 | } 73 | URL = PARAMS['host'] 74 | file_path = 'F:/Stock_Data/stock_info.csv' 75 | 76 | class Index_info(Info_base): 77 | PARAMS = { 78 | 'host': '/hs/service/hsindexrank.php', 79 | 'page': '0', 80 | 'query': 'IS_INDEX:true', 81 | 'fields': 'no,SYMBOL,NAME,PRICE,UPDOWN,PERCENT,zhenfu,VOLUME,TURNOVER,YESTCLOSE,OPEN,HIGH,LOW', 82 | 'sort': 'SYMBOL', 83 | 'order': 'asc', 84 | 'count': '25', 85 | 'type': 'query', 86 | 'callback': '', 87 | 'req': '31254' 88 | } 89 | HEADERS = { 90 | 'Accept': '*/*', 91 | 'Accept-Encoding': 'gzip, deflate', 92 | 'Accept-Language': 'zh,en-US;q=0.9,en;q=0.8,zh-TW;q=0.7,zh-CN;q=0.6', 93 | 'Connection': 'keep-alive', 94 | 'Host': 'quotes.money.163.com', 95 | 'Referer': 'http://quotes.money.163.com/old/', 96 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' 97 | } 98 | URL = 'http://quotes.money.163.com/hs/service/hsindexrank.php' 99 | file_path = 'F:/Stock_Data/index_info.csv' -------------------------------------------------------------------------------- /Classes/Updata_today.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/5' 7 | """ 8 | import pandas as pd 9 | import datetime 10 | 11 | class Update_today(object): 12 | symbol = '' 13 | file_path = 'F:/Stock_Data/stock_data/' 14 | 15 | 16 | def __init__(self, symbol): 17 | self.symbol = symbol 18 | 19 | def update(self, recode): 20 | try: 21 | self.data_df = pd.read_csv(self.file_path + symbol + '.csv', 22 | encoding='gbk') 23 | trade_cal_df = pd.read_csv('F:/Stock_Data/trade_cal.csv') 24 | today = datetime.date.strftime(datetime.date.today(), '%Y-%m-%d') 25 | 26 | last_date = max(self.data_df['日期']) 27 | last_trade_date = trade_cal_df[trade_cal_df['cal_date'] == today]['pretrade_date'].values[0] 28 | 29 | trade_date = datetime.datetime.strptime(last_date, '%Y-%m-%d') + datetime.timedelta(days=1) 30 | trade_date = datetime.date.strftime(trade_date, '%Y-%m-%d') 31 | 32 | if last_trade_date <= trade_date: 33 | print('正在更新{}的数据'.format(self.symbol)) 34 | self.data_df = self.data_df.sort_values(by='日期') 35 | if max(self.data_df['日期'].values) != today: 36 | self.data_df = self.data_df.append(Recode(recode).recode, 37 | ignore_index=True) 38 | print(self.data_df) 39 | # self.data_df.to_csv(self.file_path + symbol + '.csv', encoding='gbk', index=False) 40 | else: 41 | print('已经有多天未更新了,请使用get_data重新获取全部数据!') 42 | except: 43 | self.data_df = self.data_df.append(Recode(recode).recode, 44 | ignore_index=True) 45 | print('创建新文件') 46 | print(self.data_df) 47 | 48 | class Recode(object): 49 | recode = {} 50 | def __init__(self, recode): 51 | self.recode['日期'] = datetime.date.strftime(datetime.date.today(), '%Y-%m-%d') 52 | self.recode['股票代码'] = recode['SYMBOL'] 53 | self.recode['名称'] = recode['NAME'] 54 | self.recode['最高价'] = recode['HIGH'] 55 | self.recode['收盘价'] = recode['PRICE'] 56 | self.recode['最低价'] = recode['LOW'] 57 | self.recode['开盘价'] = recode['OPEN'] 58 | self.recode['前收盘'] = recode['YESTCLOSE'] 59 | self.recode['涨跌额'] = recode['UPDOWN'] 60 | self.recode['涨跌幅'] = recode['PERCENT'] / 100 61 | self.recode['换手率'] = recode['HS'] * 100 62 | self.recode['成交量'] = recode['VOLUME'] 63 | self.recode['成交金额'] = recode['TURNOVER'] 64 | self.recode['总市值'] = recode['TCAP'] 65 | self.recode['流通市值'] = recode['MCAP'] 66 | 67 | info_df = pd.read_csv('F:/Stock_Data/stock_info.csv', 68 | encoding='gbk') 69 | for row in info_df.iterrows(): 70 | symbol = '{:0>6}'.format(row[1]['SYMBOL']) 71 | recode = row[1] 72 | recode['SYMBOL'] = symbol 73 | stock = Update_today(symbol) 74 | stock.update(recode) 75 | 76 | 77 | -------------------------------------------------------------------------------- /Classes/convert_data_for_PyAlgoTrade.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/12' 7 | """ 8 | 9 | import pandas as pd 10 | from multiprocessing import Pool 11 | 12 | 13 | class convert_to_PyAlgoTrade(object): 14 | symbol = '' 15 | source_path = 'F:/Stock_Data/analysis/' 16 | destination_path = 'F:/Stock_Data/PyAlgoTrade/' 17 | drop_labels = ['股票代码', '名称', '前收盘', '涨跌额', '涨跌幅', '换手率', '成交金额', '总市值', '流通市值'] 18 | columns = ['Date Time', 'Close', 'High', 'Low', 'Open', 'Volume', 'Adj Close'] 19 | 20 | def __init__(self, symbol): 21 | self.symbol = symbol 22 | 23 | def convert(self): 24 | data_df = pd.read_csv(self.source_path + self.symbol + '.csv', 25 | encoding='gbk') 26 | 27 | for label in self.drop_labels: 28 | data_df = data_df.drop(label, axis=1) 29 | data_df.columns = self.columns 30 | 31 | data_df.to_csv(self.destination_path + self.symbol + '.csv', 32 | encoding='gbk', 33 | index=False) 34 | 35 | def convert_PyAloTrade(symbol): 36 | print('正在转换{}的数据...'.format(symbol)) 37 | convert = convert_to_PyAlgoTrade(symbol) 38 | convert.convert() 39 | print('{}的数据转换完成!'.format(symbol)) 40 | 41 | def main(): 42 | info_df = pd.read_csv('F:/Stock_Data/stock_info.csv', 43 | encoding='gbk') 44 | symbols = info_df['SYMBOL'].values 45 | pool = Pool(10) 46 | for symbol in symbols: 47 | symbol = '{:0>6}'.format(symbol) 48 | pool.apply_async(func=convert_PyAloTrade, args=(symbol,)) 49 | pool.close() 50 | pool.join() 51 | print('全部PyAlgoTrade数据转换完成...') 52 | 53 | if __name__ == '__main__': 54 | main() -------------------------------------------------------------------------------- /Classes/get_cal.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/6' 7 | """ 8 | 9 | import tushare as ts 10 | import pandas as pd 11 | 12 | pro_api = ts.pro_api(token='3531aac4e2b7e3752304be0e83df5c39a2977fa57aa0e5e43fe16a38') 13 | trade_cal = pro_api.trade_cal(start='19900101', fields=['cal_date', 'is_open', 'pretrade_date']) 14 | trade_cal['cal_date'] = pd.to_datetime(trade_cal['cal_date']) 15 | trade_cal['pretrade_date'] = pd.to_datetime(trade_cal['pretrade_date']) 16 | trade_cal.to_csv('F:/Stock_Data/trade_cal.csv', 17 | encoding='utf-8', 18 | index=False) 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /Classes/management.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/5' 7 | """ 8 | import pandas as pd 9 | 10 | class reinstate(object): 11 | source_path = 'F:/Stock_Data/stock_data/' 12 | data_df = pd.DataFrame() 13 | destination_path = 'F/Stock_Data/analysis/' 14 | symbol = '' 15 | 16 | -------------------------------------------------------------------------------- /Classes/real_time_data.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/24' 7 | """ 8 | 9 | import requests 10 | import re 11 | import pandas as pd 12 | 13 | URL = 'http://api.money.126.net/data/feed/{},money.api' 14 | HEADERS = { 15 | 'Referer': 'http://quotes.money.163.com/0601326.html', 16 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36' 17 | } 18 | 19 | response = requests.get(url=URL.format('0601390'), headers=HEADERS) 20 | data_str = response.content.decode('gbk') 21 | reg = re.compile(r"\{[^{}]*\}") 22 | data_str = reg.search(data_str).group(0) 23 | data_dict = eval(data_str) 24 | 25 | re_keys = list(data_dict.keys()) 26 | new_keys = ['查询代码', '涨跌幅', '最高', '卖三量', '卖二量', '卖五量', '卖四量', '现价', '今开', '买五价', '买四价', '买三价', '买二价', '买一价', '最低', '涨跌', '类型', '股票代码', '状态', '卖四价', '买三量', '买二量', '买一量', '更新时间', '买五量', '买四量', '昨收', '卖一量', '卖五价', '成交量', '卖一价', '股票名称', '卖三价', '卖二价', '箭头', '时刻', '成交额'] 27 | data_df = pd.DataFrame(columns=new_keys) 28 | data = {} 29 | for i in range(len(re_keys)): 30 | data[new_keys[i]] = data_dict[re_keys[i]] 31 | data_df = data_df.append(data,ignore_index=True) 32 | print(data_df) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 panxiaochun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MA.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/4' 7 | """ 8 | 9 | import pandas as pd 10 | import numpy as np 11 | from multiprocessing import Pool 12 | 13 | class data_handle(object): 14 | 15 | def __init__(self, symbol): 16 | self.symbol = symbol 17 | self.data_df = pd.read_csv('F:/Stock_Data/stock_data/{}.csv'.format(self.symbol), 18 | encoding='gbk') 19 | self.data_df = self.data_df.sort_values(by='日期') 20 | self.bonus_df = pd.read_csv('F:/Stock_Data/bonus/{}.csv'.format(self.symbol), 21 | encoding='gbk') 22 | self.info_df = pd.read_csv('F:/Stock_Data/stock_info.csv', 23 | encoding='gbk') 24 | 25 | def reinstate(self, source, dest): 26 | self.data_df[dest] = self.data_df[source] 27 | print('正在处理{}的复权数据...'.format(self.symbol)) 28 | # name = self.info_df['NAME'][self.info_df['SYMBOL'] == int(self.symbol)] 29 | # name = name[0] 30 | # self.data_df = self.data_df.drop(self.data_df[self.data_df['名称'] != name].index) 31 | for row in self.bonus_df.iterrows(): 32 | bonus = row[1] 33 | offer = 0 if bonus['送股']=='--' else int(bonus['送股']) 34 | transfer = 0 if bonus['转增']=='--' else int(bonus['转增']) 35 | dividend = 0. if bonus['派息']=='--' else float(bonus['派息']) 36 | ex_dividend_date = bonus['除权除息日'] 37 | bonus_list_date = bonus['红股上市日'] 38 | if (ex_dividend_date != '--') & (bonus_list_date != '--'): 39 | self.data_df[dest] = np.where(self.data_df['日期'] < ex_dividend_date, 40 | self.data_df[dest] + dividend, 41 | self.data_df[dest]) 42 | self.data_df[dest] = np.where(self.data_df['日期'] < ex_dividend_date, 43 | self.data_df[dest] * 10 / (10 + transfer + offer), 44 | self.data_df[dest]) 45 | elif ex_dividend_date != '--': 46 | self.data_df[dest] = np.where(self.data_df['日期'] < ex_dividend_date, 47 | self.data_df[dest] + dividend, 48 | self.data_df[dest]) 49 | elif bonus_list_date != '--': 50 | self.data_df[dest] = np.where(self.data_df['日期'] < ex_dividend_date, 51 | self.data_df[dest] * 10 / (10 + transfer + offer), 52 | self.data_df[dest]) 53 | 54 | def save(self): 55 | print('正在保存{}的复权数据...'.format(self.symbol)) 56 | self.data_df.to_csv('F:/Stock_Data/analysis/{}.csv'.format(self.symbol), 57 | encoding='gbk', 58 | index=False) 59 | 60 | def cal_MA(self, source): 61 | self.data_df['MA5'] = self.data_df[source].rolling(window=5).mean() 62 | self.data_df['MA20'] = self.data_df[source].rolling(window=20).mean() 63 | self.data_df['MA60'] = self.data_df[source].rolling(window=60).mean() 64 | self.data_df['MA120'] = self.data_df[source].rolling(window=120).mean() 65 | 66 | def MACD(self, source): 67 | self.data_df['EMA12'] = self.data_df[source].rolling(window=12).mean() 68 | self.data_df['EMA26'] = self.data_df[source].rolling(window=26).mean() 69 | self.data_df['DIF'] = self.data_df['EMA12'] - self.data_df['EMA26'] 70 | self.data_df['MACD'] = self.data_df['DIF'].rolling(window=9).mean() 71 | 72 | def KDJ(self, source, n): 73 | self.data_df['RSV'] = (self.data_df[source] - self.data_df[source].rolling(window=n).min()) / (self.data_df[source].rolling(window=n).max() - self.data_df[source].rolling(window=n).min()) * 100. 74 | self.data_df['K'] = 50 75 | self.data_df['D'] = 50 76 | self.data_df['K'] = self.data_df['K'].shift(1) * 2 / 3 + self.data_df['RSV'] / 3 77 | self.data_df['D'] = self.data_df['D'].shift(1) * 2 / 3 + self.data_df['K'] / 3 78 | self.data_df['J'] = 3 * self.data_df['D'] - 2 * self.data_df['K'] 79 | 80 | def rein(symbol): 81 | data = data_handle(symbol) 82 | data.reinstate('收盘价', 'Adj Close') 83 | data.save() 84 | print('{}的复权数据处理完成!'.format(symbol)) 85 | 86 | def main(): 87 | info_df = pd.read_csv('F:/Stock_Data/stock_info.csv', 88 | encoding='gbk') 89 | symbols = info_df['SYMBOL'].values 90 | pool = Pool(10) 91 | for symbol in symbols: 92 | symbol = '{:0>6}'.format(symbol) 93 | pool.apply_async(func=rein, args=(symbol,)) 94 | pool.close() 95 | pool.join() 96 | print('全部复权数据处理完成...') 97 | 98 | 99 | # data = data_handle('000001') 100 | # data.reinstate('收盘价', 'Adj Close') 101 | # data.reinstate('开盘价', '新开盘') 102 | # data.reinstate('最高价', '新最高') 103 | # data.reinstate('最低价', '新最低') 104 | # data.cal_MA('新收盘') 105 | # data.MACD('新收盘') 106 | # data.KDJ('新收盘', 9) 107 | # data.save() 108 | 109 | if __name__ == '__main__': 110 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stock_study 2 | 股票相关知识学习,用Python来研究一下股票投资,大致会包括股票数据的爬取、技术指标分析、量化交易到神经网络(深度学习) 3 | 不过只是自学过程的资料,与大伙进行交流,希望有高人能够进行指点。 4 | -------------------------------------------------------------------------------- /get_data.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/7/4' 7 | """ 8 | 9 | from Classes.Info import Stock_info, Index_info 10 | from Classes.Data import Stock_data, Index_data 11 | from Classes.Bonus import Bonus 12 | from multiprocessing import Pool 13 | import threading 14 | 15 | def get_bonus_data(symbol): 16 | bonus = Bonus(symbol) 17 | bonus.get_bonus() 18 | bonus.get_allot() 19 | bonus.get_issue() 20 | bonus.get_financing() 21 | 22 | def get_all_bonus_data(): 23 | stock_info = Stock_info() 24 | info_df = stock_info.data_df 25 | pool = Pool(10) 26 | for row in info_df.iterrows(): 27 | symbol = row[1]['SYMBOL'] 28 | pool.apply_async(func=get_bonus_data, args=(symbol,)) 29 | pool.close() 30 | pool.join() 31 | 32 | def get_index_data(code): 33 | index_data = Index_data() 34 | index_data.get_data(code) 35 | 36 | def get_all_index_data(): 37 | index_info = Index_info() 38 | info_df = index_info.data_df 39 | pool = Pool(10) 40 | for row in info_df.iterrows(): 41 | code = row[1]['CODE'] 42 | pool.apply_async(func=get_index_data, args=(code,)) 43 | pool.close() 44 | pool.join() 45 | 46 | def get_stock_data(code): 47 | stock_data = Stock_data() 48 | stock_data.get_data(code) 49 | 50 | def get_all_stock_data(): 51 | stock_info = Stock_info() 52 | info_df = stock_info.data_df 53 | pool = Pool(10) 54 | for row in info_df.iterrows(): 55 | code = row[1]['CODE'] 56 | pool.apply_async(func=get_stock_data, args=(code,)) 57 | pool.close() 58 | pool.join() 59 | 60 | def get_info(): 61 | stock_info = Stock_info() 62 | index_info = Index_info() 63 | stock_info.get_data() 64 | index_info.get_data() 65 | 66 | if __name__ == '__main__': 67 | get_info() 68 | get_all_stock_data() 69 | get_all_index_data() 70 | get_all_bonus_data() 71 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/8/2' 7 | """ 8 | 9 | import requests 10 | import json 11 | 12 | URL = 'http://quotes.money.163.com/hs/service/diyrank.php' 13 | 14 | HEADERS = { 15 | 'Accept': 'application/json, text/javascript, */*; q=0.01', 16 | 'Accept-Encoding': 'gzip, deflate', 17 | 'Accept-Language': 'zh,en-US;q=0.9,en;q=0.8,zh-TW;q=0.7,zh-CN;q=0.6', 18 | 'Connection': 'keep-alive', 19 | 'Host': 'quotes.money.163.com', 20 | 'Referer': 'http://quotes.money.163.com/old/', 21 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36', 22 | 'X-Requested-With': 'XMLHttpRequest' 23 | } 24 | 25 | PARAMS = { 26 | 'host': 'http://quotes.money.163.com/hs/service/diyrank.php', 27 | 'page': '0', 28 | 'query': 'STYPE:EQA', 29 | 'fields': 'NO,SYMBOL,NAME,PRICE,PERCENT,UPDOWN,FIVE_MINUTE,OPEN,YESTCLOSE,HIGH,LOW,VOLUME,TURNOVER,HS,LB,WB,ZF,PE,MCAP,TCAP,MFSUM,MFRATIO.MFRATIO2,MFRATIO.MFRATIO10,SNAME,CODE,ANNOUNMT,UVSNEWS', 30 | 'sort': 'SYMBOL', 31 | 'order': 'asc', 32 | 'count': '10240', 33 | 'type': 'query' 34 | } 35 | 36 | response = requests.get(url=URL, params=PARAMS, headers=HEADERS) 37 | total = json.loads(response.content.decode('gbk'))['total'] 38 | PARAMS['count'] = total 39 | 40 | response = requests.get(url=URL, params=PARAMS, headers=HEADERS) 41 | data_list = json.loads(response.content.decode('gbk'))['list'] 42 | 43 | for row in data_list: 44 | print('CODE','代码',row['CODE']) 45 | print('FIVE_MINUTE','五分钟涨跌',row['FIVE_MINUTE']) 46 | print('HIGH','最高价',row['HIGH']) 47 | print('LOW','最低价',row['LOW']) 48 | print('NAME','股票名称',row['NAME']) 49 | print('OPEN','开盘价',row['OPEN']) 50 | print('PERCENT','涨跌幅',row['PERCENT']) 51 | print('PRICE','价格',row['PRICE']) 52 | print('SYMBOL','股票代码',row['SYMBOL']) 53 | print('TURNOVER','成交额',row['TURNOVER']) 54 | print('UPDOWN','涨跌额',row['UPDOWN']) 55 | print('VOLUME','成交量',row['VOLUME']) 56 | print('YESTCLOSE','昨收',row['YESTCLOSE']) 57 | print('ZF','振幅',row['ZF']) 58 | 59 | 60 | -------------------------------------------------------------------------------- /test2.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 |   __title__ = '' 4 |   __file__ = '' 5 |   __author__ = 'tianmuchunxiao' 6 |   __mtime__ = '2019/8/3' 7 | """ 8 | 9 | import pandas as pd 10 | import requests 11 | from lxml import etree 12 | 13 | info_df = pd.read_csv('F:/Stock_Data/stock_info.csv', encoding='gbk') 14 | for row in info_df.iterrows(): 15 | symbol = row[1]['SYMBOL'] 16 | url = 'http://quotes.money.163.com/trade/lsjysj_{}.html'.format('{:0>6}'.format(symbol)) 17 | response = requests.get(url=url) 18 | html = response.content.decode('utf8') 19 | data = etree.HTML(html) 20 | columns = data.xpath('/html/body/div[2]/div[4]/table/thead/tr/th/text()') 21 | trs = data.xpath('/html/body/div[2]/div[4]/table/tr') 22 | data_df = pd.DataFrame(columns=columns) 23 | for tr in trs: 24 | row = tr.xpath('./td/text()') 25 | data = {} 26 | for i in range(len(columns)): 27 | data[columns[i]] = row[i] 28 | data_df = data_df.append(data, ignore_index=True) 29 | data_df = data_df.sort_values(by='日期') 30 | 31 | data_df_saved = pd.read_csv('F:/Stock_Data/stock_data/{}.csv'.format('{:0>6}'.format(symbol)), 32 | encoding='gbk') 33 | print(data_df) 34 | print(data_df_saved) 35 | break 36 | 37 | 38 | --------------------------------------------------------------------------------