├── .gitignore ├── ExampleData ├── 伊利股份.csv ├── 宁沪高速.csv ├── 宇通客车.csv ├── 宇通客车产销数据 │ ├── 2017年10月份产销快报.xlsx │ ├── 2017年11月份产销快报.xlsx │ ├── 2017年12月份产销快报.xlsx │ ├── 2017年1月份产销快报.xlsx │ ├── 2017年2月产销快报.xlsx │ ├── 2017年3月份产销快报.xlsx │ ├── 2017年4月份产销快报.xlsx │ ├── 2017年5月份产销快报.xlsx │ ├── 2017年6月份产销快报.xlsx │ ├── 2017年7月份产销快报.xlsx │ ├── 2017年8月份产销快报.xlsx │ └── 2017年9月份产销快报.xlsx ├── 格力电器.csv ├── 福耀玻璃.csv └── 隆基股份.csv ├── LICENSE ├── M1809 ├── doc │ ├── M1809使用说明.txt │ ├── item_reports │ │ ├── M1809-个股基本面分析.xmind │ │ ├── M1809_20180521.ppt │ │ ├── M1809_20180530.ppt │ │ ├── M1809_20180606.ppt │ │ └── M1809_20180611.ppt │ ├── report │ │ └── M1809_20180612.ppt │ ├── xst │ │ ├── M1809_finance_analysis.py │ │ ├── M1809_finance_crawling.py │ │ └── M1809_finance_crawling_target.py │ └── 表头说明 │ │ ├── balance_columns.txt │ │ ├── cash_flow_columns.txt │ │ ├── profit_columns │ │ ├── profit_columns.txt │ │ ├── 开发建议.txt │ │ └── 新建 Microsoft Excel 工作表.xlsx ├── src │ ├── App.py │ ├── Config.py │ ├── CoreAnalyse.py │ ├── GetItemInfo.py │ ├── M1809_finance_weight.py │ ├── PlotAnalyse.py │ ├── UserApi.py │ ├── crawling_finance_table.py │ ├── crawling_finance_table_v1_7.py │ ├── get_dividends_history.py │ ├── get_price.py │ ├── trade_day.py │ └── txttoexcel.py └── sys_config │ └── 账户配置.txt ├── README.md ├── Release ├── HK_insider.py ├── HK_insider_v1.1.py ├── HK_insider_v1.4.py ├── ReleaseNote.txt ├── YTProductionAndSale │ ├── DataAnalyze.py │ ├── DataToSql.py │ ├── PdfDown.py │ ├── __init__.py │ └── 使用说明.txt ├── YT_produce_sell.py ├── crawling_finance_table_v1.2.py ├── get_dividends_history.py ├── kday │ ├── get_price.py │ └── k_day.py ├── pdf_decoder.py ├── wechat.py ├── 安居客爬虫框架 │ ├── ReadMe.txt │ └── crawl_anjuke_v1.311.py └── 树莓派信息实时推送示例 │ ├── App.py │ ├── PageDecoder.py │ ├── PushMessage.py │ ├── StockClass.py │ └── 使用说明.txt ├── _config.yml ├── anack ├── .gitignore ├── App │ ├── Detail_Stock_Selector.py │ ├── HK_insider.py │ ├── IndustryEstimation.py │ ├── IndustryEstimation_detail.py │ ├── M1808 │ │ ├── M1808.py │ │ ├── early_warning.py │ │ ├── protocol.py │ │ ├── wechat.py │ │ └── 命令示例.txt │ ├── StockAnalyser.py │ ├── YT_produce_sell.py │ ├── __init__.py │ └── 实时推送 │ │ ├── App.py │ │ ├── PageDecoder.py │ │ ├── PushMessage.py │ │ ├── StockClass.py │ │ └── 使用说明.txt ├── SQL │ ├── StockSelector.py │ ├── __init__.py │ ├── classify_to_sql.py │ ├── finnance_to_sql.py │ ├── glo.py │ ├── k_data_to_sql.py │ ├── macro_to_sql.py │ ├── sql.py │ ├── update.py │ └── 感兴趣的个股列表.txt ├── Tushare │ ├── basic.py │ ├── classify.py │ ├── finance.py │ ├── information.py │ └── macro.py ├── __init__.py └── main.py ├── anack_study_case ├── balance_columns.txt ├── cash_flow_columns.txt ├── cash_flow_statements_balance_profit_columns.xlsx ├── crawling_finance_table_v1.5.py ├── crawling_finance_table_v1.6.py ├── crawling_finance_table_v1.7.2.1.py ├── crawling_finance_table_v1.7.py ├── dividend_rate_v2.py ├── lirunbiao.csv ├── liuliang.csv ├── profit_columns.txt ├── sql常用模板.txt ├── zichanfuzhai.csv └── 财务表精简表头.xlsx ├── anack_test.py ├── anack数据字典_v1.0.xlsx ├── anjuke_readme.txt ├── crawl_anjuke_v1.311.py └── raw_modules ├── ReadMe.txt ├── __init__.py └── get_price.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | debug/ 3 | data/ 4 | __pycache__/ 5 | *.pyc 6 | *.cpython-36.pyc 7 | output/ 8 | *.csv 9 | *.cfg 10 | config/ 11 | PdfDownload/ 12 | ExeFile/ 13 | -------------------------------------------------------------------------------- /ExampleData/伊利股份.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/伊利股份.csv -------------------------------------------------------------------------------- /ExampleData/宁沪高速.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宁沪高速.csv -------------------------------------------------------------------------------- /ExampleData/宇通客车.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车.csv -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年10月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年10月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年11月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年11月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年12月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年12月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年1月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年1月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年2月产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年2月产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年3月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年3月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年4月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年4月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年5月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年5月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年6月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年6月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年7月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年7月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年8月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年8月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/宇通客车产销数据/2017年9月份产销快报.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年9月份产销快报.xlsx -------------------------------------------------------------------------------- /ExampleData/格力电器.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/格力电器.csv -------------------------------------------------------------------------------- /ExampleData/福耀玻璃.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/福耀玻璃.csv -------------------------------------------------------------------------------- /ExampleData/隆基股份.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/隆基股份.csv -------------------------------------------------------------------------------- /M1809/doc/M1809使用说明.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/M1809使用说明.txt -------------------------------------------------------------------------------- /M1809/doc/item_reports/M1809-个股基本面分析.xmind: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809-个股基本面分析.xmind -------------------------------------------------------------------------------- /M1809/doc/item_reports/M1809_20180521.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809_20180521.ppt -------------------------------------------------------------------------------- /M1809/doc/item_reports/M1809_20180530.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809_20180530.ppt -------------------------------------------------------------------------------- /M1809/doc/item_reports/M1809_20180606.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809_20180606.ppt -------------------------------------------------------------------------------- /M1809/doc/item_reports/M1809_20180611.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809_20180611.ppt -------------------------------------------------------------------------------- /M1809/doc/report/M1809_20180612.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/report/M1809_20180612.ppt -------------------------------------------------------------------------------- /M1809/doc/xst/M1809_finance_analysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | 类名:M1809_finance_analysis(财务数据分析) 4 | 作者:徐抒田 5 | 日期:2018-5-28 6 | 描述: 7 | 1、初步调试机器学习方法 8 | 2、参数设置,1的准确率增加 9 | 版本号:V0.1 10 | ''' 11 | 12 | 13 | import pandas as pd 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | 17 | result_yinli = pd.read_csv('D:/999github/anack/M1809/result_yinli.csv') 18 | result_yingyun = pd.read_csv('D:/999github/anack/M1809/result_yingyun.csv') 19 | result_chengzhang = pd.read_csv('D:/999github/anack/M1809/result_chengzhang.csv') 20 | result_changzhai = pd.read_csv('D:/999github/anack/M1809/result_changzhai.csv') 21 | result_xianjin = pd.read_csv('D:/999github/anack/M1809/result_xianjin.csv') 22 | 23 | 24 | df_final = pd.read_csv('D:/999github/anack/M1809/target.csv') 25 | df_final = df_final[(df_final.firstincrase > 0.1) & (df_final.secondincrase > 0.1)] 26 | df_final = pd.DataFrame({'code' : df_final['code'], 27 | 'label' : 1, 28 | }) 29 | 30 | 31 | data = result_yinli 32 | data = pd.merge(data, result_yingyun, on=['code','name']) 33 | data = pd.merge(data, result_chengzhang, on=['code','name']) 34 | data = pd.merge(data, result_changzhai, on=['code','name']) 35 | data = pd.merge(data, result_xianjin, on=['code','name']) 36 | data = pd.merge(data, df_final, on='code',how = 'left') 37 | 38 | # ============================================================================= 39 | # null_counts = data.isnull().sum() 40 | # print(null_counts) 41 | # ============================================================================= 42 | 43 | data = data.fillna(0) 44 | data = data.dropna(axis=0) 45 | 46 | 47 | 48 | orig_columns = data.columns 49 | drop_columns = [] 50 | for col in orig_columns: 51 | col_series = data[col].dropna().unique() 52 | if len(col_series) == 1: 53 | drop_columns.append(col) 54 | data = data.drop(drop_columns, axis=1) 55 | print(drop_columns) 56 | 57 | 58 | 59 | target = data['label'] 60 | code = data['code'] 61 | name = data['name'] 62 | features = data.drop(['code','name','label'],axis=1) 63 | 64 | features[features.currentratio20161 == '--'] = 0 65 | features[features.quickratio20161=='--']=0 66 | features[features.cashratio20161=='--']=0 67 | features[features.icratio20161=='--']=0 68 | features[features.sheqratio20161=='--']=0 69 | features[features.adratio20161=='--']=0 70 | features[features.currentratio20162=='--']=0 71 | features[features.quickratio20162=='--']=0 72 | features[features.cashratio20162=='--']=0 73 | features[features.icratio20162=='--']=0 74 | features[features.sheqratio20162=='--']=0 75 | features[features.adratio20162=='--']=0 76 | features[features.currentratio20163=='--']=0 77 | features[features.quickratio20163=='--']=0 78 | features[features.cashratio20163=='--']=0 79 | features[features.icratio20163=='--']=0 80 | features[features.sheqratio20163=='--']=0 81 | features[features.adratio20163=='--']=0 82 | features[features.currentratio20164=='--']=0 83 | features[features.quickratio20164=='--']=0 84 | features[features.cashratio20164=='--']=0 85 | features[features.icratio20164=='--']=0 86 | features[features.currentratio20171=='--']=0 87 | features[features.quickratio20171=='--']=0 88 | features[features.cashratio20171=='--']=0 89 | features[features.icratio20171=='--']=0 90 | features[features.sheqratio20171=='--']=0 91 | features[features.adratio20171=='--']=0 92 | features[features.currentratio20172=='--']=0 93 | features[features.quickratio20172=='--']=0 94 | features[features.cashratio20172=='--']=0 95 | features[features.icratio20172=='--']=0 96 | features[features.currentratio20173=='--']=0 97 | features[features.quickratio20173=='--']=0 98 | features[features.cashratio20173=='--']=0 99 | features[features.icratio20173=='--']=0 100 | features[features.currentratio20174=='--']=0 101 | features[features.quickratio20174=='--']=0 102 | features[features.cashratio20174=='--']=0 103 | features[features.icratio20174=='--']=0 104 | features[features.currentratio20181=='--']=0 105 | features[features.quickratio20181=='--']=0 106 | features[features.cashratio20181=='--']=0 107 | features[features.icratio20181=='--']=0 108 | features = features.astype('float64') 109 | 110 | 111 | ##基于树的方法不用做标准化、归一化处理 112 | from sklearn import preprocessing 113 | min_max_scaler = preprocessing.MinMaxScaler() 114 | features_new = min_max_scaler.fit_transform(features) 115 | features = pd.DataFrame(features_new, columns=features.columns) 116 | 117 | 118 | features = features[['roe20181' ,'esp20181' ,'esp20174' ,'roe20174' ,'net_profits20174' ,'net_profits20181' ,'esp20173' ,'net_profits20172' ,'roe20173' ,'net_profits20173' ,'net_profits20163' ,'esp20172' ,'business_income20174' ,'roe20172' ,'net_profits20171' ,'net_profits20164' ,'rateofreturn20173' ,'seg20181' ,'net_profits20162' ,'business_income20173' ,'roe20171' ,'business_income20171' ,'nprg20181' ,'business_income20181' ,'nav20181' ,'rateofreturn20174' ,'epsg20181' ,'seg20174' ,'business_income20172' ,'esp20171']] 119 | 120 | from sklearn.model_selection import cross_val_score 121 | from sklearn.datasets import make_blobs 122 | from sklearn.ensemble import RandomForestClassifier 123 | from sklearn.model_selection import train_test_split 124 | from sklearn.metrics import classification_report 125 | from sklearn import metrics 126 | 127 | 128 | X_train,X_test,y_train,y_test = train_test_split( 129 | features,target,test_size=0.25,random_state=42) 130 | 131 | ''' 132 | Random_forset 133 | ''' 134 | clf = RandomForestClassifier(n_estimators=200,max_depth = 7,min_samples_split = 5,min_samples_leaf = 20,random_state=2018,class_weight={1:1.5}) 135 | 136 | clf = clf.fit(X_train, y_train) 137 | y_pre = clf.predict(X_test) 138 | 139 | y_pre_pro = clf.predict_proba(X_test)[:, 1] 140 | print(y_pre_pro) 141 | print(classification_report(y_test,y_pre)) 142 | print(metrics.roc_auc_score(y_test,y_pre)) #预测Y值得分 143 | 144 | def aucfun(act,pred): 145 | fpr,tpr,thresholds = metrics.roc_curve(act,pred) 146 | plt.plot(fpr, tpr, color='darkorange',lw=2) 147 | plt.xlim([0.0, 1.0]) 148 | plt.ylim([0.0, 1.05]) 149 | plt.xlabel('False Positive Rate') 150 | plt.ylabel('True Positive Rate') 151 | # ============================================================================= 152 | # print(fpr) 153 | # print(tpr) 154 | # print(thresholds) 155 | # ============================================================================= 156 | return metrics.auc(fpr,tpr) 157 | 158 | 159 | aucfun(y_test,y_pre_pro) 160 | 161 | 162 | importances = clf.feature_importances_ 163 | std = np.std([tree.feature_importances_ for tree in clf.estimators_],axis=0) 164 | indices = np.argsort(importances)[::-1] 165 | print("Feature ranking:") 166 | for f in range(features.shape[1]): 167 | print("%d. feature %d (%f): %s" % (f + 1, indices[f], importances[indices[f]] , features.columns[indices[f]] )) 168 | 169 | 170 | 171 | y_pre_pro_f = clf.predict_proba(features)[:, 1] 172 | 173 | y_pre_pro_f = pd.DataFrame({'code' : code, 174 | 'name' : name, 175 | 'gailv' : y_pre_pro_f 176 | }) 177 | 178 | y_pre_pro_f.to_csv('D:/999github/anack/M1809/y_pre_pro_f.csv',index =False) -------------------------------------------------------------------------------- /M1809/doc/xst/M1809_finance_crawling.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | 类名:M1809_finance_crawling(财务分析数据爬虫) 4 | 作者:徐抒田 5 | 日期:2018-5-20 6 | 描述: 7 | 1、获取财务数据模块,存入本地CSV; 8 | 2、后续盈利表里增加:市盈率、市净率、PEG;现金流量增加:当期现金流入; 9 | 3、后续版本增加表入库,增加数据更新,模块化 10 | 方法:使用TUSHARE 11 | 版本号:V0.1 12 | ''' 13 | import pandas as pd 14 | import tushare as ts 15 | ''' 16 | 盈利能力表 17 | 18 | code,代码 19 | name,名称 20 | roe,净资产收益率(%) 21 | net_profit_ratio,净利率(%) 22 | gross_profit_rate,毛利率(%) 23 | net_profits,净利润(万元) 24 | esp,每股收益 25 | business_income,营业收入(百万元) 26 | bips,每股主营业务收入(元) 27 | season,年+季度 28 | ''' 29 | result_yinli = ts.get_profit_data(2018,1).loc[:,['code','name']] 30 | print(result_yinli) 31 | for i in [2016,2017,2018]: 32 | if i != 2018: 33 | for j in [1,2,3,4]: 34 | columns = ['code','name','roe'+str(i)+str(j),'net_profit_ratio'+str(i)+str(j),'gross_profit_rate'+str(i)+str(j),'net_profits'+str(i)+str(j),'esp'+str(i)+str(j),'business_income'+str(i)+str(j),'bips'+str(i)+str(j)] 35 | result_1 = pd.DataFrame(ts.get_profit_data(i,j).values,columns = columns) 36 | result_1 = result_1.drop(['name'],axis=1) 37 | result_yinli = pd.merge(result_yinli, result_1, on='code',how='left') 38 | print(str(i)+str(j)) 39 | print(len(result_yinli)) 40 | if i == 2018: 41 | j = 1 42 | columns = ['code','name','roe'+str(i)+str(j),'net_profit_ratio'+str(i)+str(j),'gross_profit_rate'+str(i)+str(j),'net_profits'+str(i)+str(j),'esp'+str(i)+str(j),'business_income'+str(i)+str(j),'bips'+str(i)+str(j)] 43 | result_1 = pd.DataFrame(ts.get_profit_data(i,j).values,columns = columns) 44 | result_1 = result_1.drop(['name'],axis=1) 45 | result_yinli = pd.merge(result_yinli, result_1, on='code',how='left') 46 | print(str(i)+str(j)) 47 | print(len(result_yinli)) 48 | result_yinli = result_yinli.drop_duplicates() 49 | result_yinli.to_csv('D:/999github/anack/M1809/result_yinli.csv',index =False) 50 | ''' 51 | 营运能力表 52 | 53 | code,代码 54 | name,名称 55 | arturnover,应收账款周转率(次) 56 | arturndays,应收账款周转天数(天) 57 | inventory_turnover,存货周转率(次) 58 | inventory_days,存货周转天数(天) 59 | currentasset_turnover,流动资产周转率(次) 60 | currentasset_days,流动资产周转天数(天) 61 | season,年+季度 62 | ''' 63 | result_yingyun = ts.get_operation_data(2018,1).loc[:,['code','name']] 64 | for i in [2016,2017,2018]: 65 | if i != 2018: 66 | for j in [1,2,3,4]: 67 | columns = ['code','name','arturnover'+str(i)+str(j),'arturndays'+str(i)+str(j),'inventory_turnover'+str(i)+str(j),'inventory_days'+str(i)+str(j),'currentasset_turnover'+str(i)+str(j),'currentasset_days'+str(i)+str(j)] 68 | result_1 = pd.DataFrame(ts.get_operation_data(i,j).values,columns = columns) 69 | result_1 = result_1.drop(['name'],axis=1) 70 | result_yingyun = pd.merge(result_yingyun, result_1, on='code',how='left') 71 | if i == 2018: 72 | j = 1 73 | columns = ['code','name','arturnover'+str(i)+str(j),'arturndays'+str(i)+str(j),'inventory_turnover'+str(i)+str(j),'inventory_days'+str(i)+str(j),'currentasset_turnover'+str(i)+str(j),'currentasset_days'+str(i)+str(j)] 74 | result_1 = pd.DataFrame(ts.get_operation_data(i,j).values,columns = columns) 75 | result_1 = result_1.drop(['name'],axis=1) 76 | result_yingyun = pd.merge(result_yingyun, result_1, on='code',how='left') 77 | result_yingyun = result_yingyun.drop_duplicates() 78 | result_yingyun.to_csv('D:/999github/anack/M1809/result_yingyun.csv',index =False) 79 | ''' 80 | 成长能力表 81 | 82 | code,代码 83 | name,名称 84 | mbrg,主营业务收入增长率(%) 85 | nprg,净利润增长率(%) 86 | nav,净资产增长率 87 | targ,总资产增长率 88 | epsg,每股收益增长率 89 | seg,股东权益增长率 90 | season,年+季度 91 | ''' 92 | result_chengzhang = ts.get_growth_data(2018,1).loc[:,['code','name']] 93 | for i in [2016,2017,2018]: 94 | if i != 2018: 95 | for j in [1,2,3,4]: 96 | columns = ['code','name','mbrg'+str(i)+str(j),'nprg'+str(i)+str(j),'nav'+str(i)+str(j),'targ'+str(i)+str(j),'epsg'+str(i)+str(j),'seg'+str(i)+str(j)] 97 | result_1 = pd.DataFrame(ts.get_growth_data(i,j).values,columns = columns) 98 | result_1 = result_1.drop(['name'],axis=1) 99 | result_chengzhang = pd.merge(result_chengzhang, result_1, on='code',how='left') 100 | if i == 2018: 101 | j = 1 102 | columns = ['code','name','mbrg'+str(i)+str(j),'nprg'+str(i)+str(j),'nav'+str(i)+str(j),'targ'+str(i)+str(j),'epsg'+str(i)+str(j),'seg'+str(i)+str(j)] 103 | result_1 = pd.DataFrame(ts.get_growth_data(i,j).values,columns = columns) 104 | result_1 = result_1.drop(['name'],axis=1) 105 | result_chengzhang = pd.merge(result_chengzhang, result_1, on='code',how='left') 106 | result_chengzhang = result_chengzhang.drop_duplicates() 107 | result_chengzhang.to_csv('D:/999github/anack/M1809/result_chengzhang.csv',index =False) 108 | ''' 109 | 偿债能力表 110 | 111 | code,代码 112 | name,名称 113 | currentratio,流动比率 114 | quickratio,速动比率 115 | cashratio,现金比率 116 | icratio,利息支付倍数 117 | sheqratio,股东权益比率 118 | adratio,股东权益增长率 119 | season,年+季度 120 | ''' 121 | result_changzhai = ts.get_debtpaying_data(2018,1).loc[:,['code','name']] 122 | for i in [2016,2017,2018]: 123 | if i != 2018: 124 | for j in [1,2,3,4]: 125 | columns = ['code','name','currentratio'+str(i)+str(j),'quickratio'+str(i)+str(j),'cashratio'+str(i)+str(j),'icratio'+str(i)+str(j),'sheqratio'+str(i)+str(j),'adratio'+str(i)+str(j)] 126 | result_1 = pd.DataFrame(ts.get_debtpaying_data(i,j).values,columns = columns) 127 | result_1 = result_1.drop(['name'],axis=1) 128 | result_changzhai = pd.merge(result_changzhai, result_1, on='code',how='left') 129 | if i == 2018: 130 | j = 1 131 | columns = ['code','name','currentratio'+str(i)+str(j),'quickratio'+str(i)+str(j),'cashratio'+str(i)+str(j),'icratio'+str(i)+str(j),'sheqratio'+str(i)+str(j),'adratio'+str(i)+str(j)] 132 | result_1 = pd.DataFrame(ts.get_debtpaying_data(i,j).values,columns = columns) 133 | result_1 = result_1.drop(['name'],axis=1) 134 | result_changzhai = pd.merge(result_changzhai, result_1, on='code',how='left') 135 | result_changzhai = result_changzhai.drop_duplicates() 136 | result_changzhai.to_csv('D:/999github/anack/M1809/result_changzhai.csv',index =False) 137 | ''' 138 | 现金流量表 139 | 140 | code,代码 141 | name,名称 142 | cf_sales,经营现金净流量对销售收入比率 143 | rateofreturn,资产的经营现金流量回报率 144 | cf_nm,经营现金净流量与净利润的比率 145 | cf_liabilities,经营现金净流量对负债比率 146 | cashflowratio,现金流量比率 147 | season,年+季度 148 | ''' 149 | result_xianjin = ts.get_cashflow_data(2018,1).loc[:,['code','name']] 150 | for i in [2016,2017,2018]: 151 | if i != 2018: 152 | for j in [1,2,3,4]: 153 | columns = ['code','name','cf_sales'+str(i)+str(j),'rateofreturn'+str(i)+str(j),'cf_nm'+str(i)+str(j),'cf_liabilities'+str(i)+str(j),'cashflowratio'+str(i)+str(j)] 154 | result_1 = pd.DataFrame(ts.get_cashflow_data(i,j).values,columns = columns) 155 | result_1 = result_1.drop(['name'],axis=1) 156 | result_xianjin = pd.merge(result_xianjin, result_1, on='code',how='left') 157 | if i == 2018: 158 | j = 1 159 | columns = ['code','name','cf_sales'+str(i)+str(j),'rateofreturn'+str(i)+str(j),'cf_nm'+str(i)+str(j),'cf_liabilities'+str(i)+str(j),'cashflowratio'+str(i)+str(j)] 160 | result_1 = pd.DataFrame(ts.get_cashflow_data(i,j).values,columns = columns) 161 | result_1 = result_1.drop(['name'],axis=1) 162 | result_xianjin = pd.merge(result_xianjin, result_1, on='code',how='left') 163 | result_xianjin = result_xianjin.drop_duplicates() 164 | result_xianjin.to_csv('D:/999github/anack/M1809/result_xianjin.csv',index =False) 165 | 166 | -------------------------------------------------------------------------------- /M1809/doc/xst/M1809_finance_crawling_target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | 类名:M1809_finance_crawling_target(财务分析数据爬虫) 4 | 作者:徐抒田 5 | 日期:2018-5-28 6 | 描述: 7 | 1、筛选17年和18年复合增长率大于10%的股票作为TARGET 8 | 2、选取close为当日的股价 9 | 3、年股价计算(12个月均价) 10 | 版本号:V0.1 11 | ''' 12 | 13 | import tushare as ts 14 | import pandas as pd 15 | 16 | stock_code_num = ts.get_profit_data(2018,1)['code'].tolist() 17 | 18 | a=[] 19 | data = pd.DataFrame() 20 | for j in stock_code_num: 21 | try: 22 | df = ts.get_k_data(j,ktype = 'M')[['date','close','code']] 23 | for i in df['date']: 24 | a.append(i[0:4]) 25 | df['date'] = a 26 | a=[] 27 | df = pd.concat([df[df.date == '2016'],df[df.date == '2017'],df[df.date == '2018']]) 28 | data = pd.concat([data,df]) 29 | print(j) 30 | except: 31 | stock_code_num.remove(j) 32 | print('error!') 33 | print(j) 34 | 35 | data_mean = data.pivot_table('close',index='code',columns=['date'],aggfunc='mean',fill_value=0) 36 | data_var = data.pivot_table('close',index='code',columns=['date'],aggfunc='std',fill_value=0) 37 | 38 | 39 | df_final = pd.DataFrame({'code' : data_mean.index, 40 | 'yiliu_mean' : data_mean['2016'], 41 | 'yiqi_mean' : data_mean['2017'], 42 | 'yiba_mean' : data_mean['2018'], 43 | 'yiliu_var' : data_var['2016'], 44 | 'yiqi_var' : data_var['2017'], 45 | 'yiba_var' : data_var['2018'] 46 | }) 47 | 48 | df_final = df_final[df_final.yiliu_mean != 0] 49 | df_final['firstincrase'] = (df_final['yiqi_mean'] - df_final['yiliu_mean'])/df_final['yiliu_mean'] 50 | df_final['secondincrase'] = (df_final['yiba_mean'] - df_final['yiqi_mean'])/df_final['yiba_mean'] 51 | 52 | df_final[(df_final.firstincrase > 0.1) & (df_final.secondincrase > 0.1)& (df_final.yiliu_var < 15)& (df_final.yiqi_var < 15)& (df_final.yiba_var < 15)] 53 | df_final.to_csv('D:/999github/anack/M1809/target.csv',index =False) 54 | -------------------------------------------------------------------------------- /M1809/doc/表头说明/balance_columns.txt: -------------------------------------------------------------------------------- 1 | 货币资金 h1 2 | 交易性金融资产 h2 3 | 衍生金融资产 h3 4 | 应收票据 h4 5 | 应收账款 h5 6 | 预付款项 h6 7 | 应收利息 h7 8 | 应收股利 h8 9 | 其他应收款 h9 10 | 买入返售金融资产 h10 11 | 存货 h11 12 | 划分为持有待售的资产 h12 13 | 一年内到期的非流动资产 h13 14 | 待摊费用 h14 15 | 待处理流动资产损益 h15 16 | 其他流动资产 h16 17 | 流动资产合计 h17 18 | 发放贷款及垫款 h18 19 | 可供出售金融资产 h19 20 | 持有至到期投资 h20 21 | 长期应收款 h21 22 | 长期股权投资 h22 23 | 投资性房地产 h23 24 | 固定资产净额 h24 25 | 在建工程 h25 26 | 工程物资 h26 27 | 固定资产清理 h27 28 | 生产性生物资产 h28 29 | 公益性生物资产 h29 30 | 油气资产 h30 31 | 无形资产 h31 32 | 开发支出 h32 33 | 商誉 h33 34 | 长期待摊费用 h34 35 | 递延所得税资产 h35 36 | 其他非流动资产 h36 37 | 非流动资产合计 h37 38 | 资产总计 h38 39 | 短期借款 h39 40 | 交易性金融负债 h40 41 | 应付票据 h41 42 | 应付账款 h42 43 | 预收款项 h43 44 | 应付手续费及佣金 h44 45 | 应付职工薪酬 h45 46 | 应交税费 h46 47 | 应付利息 h47 48 | 应付股利 h48 49 | 其他应付款 h49 50 | 预提费用 h50 51 | 一年内的递延收益 h51 52 | 应付短期债券 h52 53 | 一年内到期的非流动负债 h53 54 | 其他流动负债 h54 55 | 流动负债合计 h55 56 | 长期借款 h56 57 | 应付债券 h57 58 | 长期应付款 h58 59 | 长期应付职工薪酬 h59 60 | 专项应付款 h60 61 | 预计非流动负债 h61 62 | 递延所得税负债 h62 63 | 长期递延收益 h63 64 | 其他非流动负债 h64 65 | 非流动负债合计 h65 66 | 负债合计 h66 67 | 实收资本(或股本) h67 68 | 资本公积 h68 69 | 减:库存股 h69 70 | 其他综合收益 h70 71 | 专项储备 h71 72 | 盈余公积 h72 73 | 一般风险准备 h73 74 | 未分配利润 h74 75 | 归属于母公司股东权益合计 h75 76 | 少数股东权益 h76 77 | 所有者权益(或股东权益)合计 h77 78 | 负债和所有者权益(或股东权益)总计 h78 79 | stock_code h79 80 | Date h80 -------------------------------------------------------------------------------- /M1809/doc/表头说明/cash_flow_columns.txt: -------------------------------------------------------------------------------- 1 | 销售商品、提供劳务收到的现金 h1 2 | 收到的税费返还 h2 3 | 收到的其他与经营活动有关的现金 h3 4 | 经营活动现金流入小计 h4 5 | 购买商品、接受劳务支付的现金 h5 6 | 支付给职工以及为职工支付的现金 h6 7 | 支付的各项税费 h7 8 | 支付的其他与经营活动有关的现金 h8 9 | 经营活动现金流出小计 h9 10 | 经营活动产生的现金流量净额 h10 11 | 收回投资所收到的现金 h11 12 | 取得投资收益所收到的现金 h12 13 | 处置固定资产、无形资产和其他长期资产所收回的现金净额 h13 14 | 处置子公司及其他营业单位收到的现金净额 h14 15 | 收到的其他与投资活动有关的现金 h15 16 | 投资活动现金流入小计 h16 17 | 购建固定资产、无形资产和其他长期资产所支付的现金 h17 18 | 投资所支付的现金 h18 19 | 取得子公司及其他营业单位支付的现金净额 h19 20 | 支付的其他与投资活动有关的现金 h20 21 | 投资活动现金流出小计 h21 22 | 投资活动产生的现金流量净额 h22 23 | 吸收投资收到的现金 h23 24 | 其中:子公司吸收少数股东投资收到的现金 h24 25 | 取得借款收到的现金 h25 26 | 发行债券收到的现金 h26 27 | 收到其他与筹资活动有关的现金 h27 28 | 筹资活动现金流入小计 h28 29 | 偿还债务支付的现金 h29 30 | 分配股利、利润或偿付利息所支付的现金 h30 31 | 其中:子公司支付给少数股东的股利、利润 h31 32 | 支付其他与筹资活动有关的现金 h32 33 | 筹资活动现金流出小计 h33 34 | 筹资活动产生的现金流量净额 h34 35 | 四、汇率变动对现金及现金等价物的影响 h35 36 | 五、现金及现金等价物净增加额 h36 37 | 加:期初现金及现金等价物余额 h37 38 | 六、期末现金及现金等价物余额 h38 39 | 净利润 h39 40 | 少数股东权益 h40 41 | 未确认的投资损失 h41 42 | 资产减值准备 h42 43 | 固定资产折旧、油气资产折耗、生产性物资折旧 h43 44 | 无形资产摊销 h44 45 | 长期待摊费用摊销 h45 46 | 待摊费用的减少 h46 47 | 预提费用的增加 h47 48 | 处置固定资产、无形资产和其他长期资产的损失 h48 49 | 固定资产报废损失 h49 50 | 公允价值变动损失 h50 51 | 递延收益增加(减:减少) h51 52 | 预计负债 h52 53 | 财务费用 h53 54 | 投资损失 h54 55 | 递延所得税资产减少 h55 56 | 递延所得税负债增加 h56 57 | 存货的减少 h57 58 | 经营性应收项目的减少 h58 59 | 经营性应付项目的增加 h59 60 | 已完工尚未结算款的减少(减:增加) h60 61 | 已结算尚未完工款的增加(减:减少) h61 62 | 其他 h62 63 | 经营活动产生现金流量净额 h63 64 | 债务转为资本 h64 65 | 一年内到期的可转换公司债券 h65 66 | 融资租入固定资产 h66 67 | 现金的期末余额 h67 68 | 现金的期初余额 h68 69 | 现金等价物的期末余额 h69 70 | 现金等价物的期初余额 h70 71 | 现金及现金等价物的净增加额 h71 72 | stock_code h72 73 | Date h73 -------------------------------------------------------------------------------- /M1809/doc/表头说明/profit_columns: -------------------------------------------------------------------------------- 1 | 一、营业总收入 h1 2 | 营业收入 h2 3 | 二、营业总成本 h3 4 | 营业成本 h4 5 | 营业税金及附加 h5 6 | 销售费用 h6 7 | 管理费用 h7 8 | 财务费用 h8 9 | 资产减值损失 h9 10 | 公允价值变动收益 h10 11 | 投资收益 h11 12 | 其中:对联营企业和合营企业的投资收益 h12 13 | 汇兑收益 h13 14 | 三、营业利润 h14 15 | 加:营业外收入 h15 16 | 减:营业外支出 h16 17 | 其中:非流动资产处置损失 h17 18 | 四、利润总额 h18 19 | 减:所得税费用 h19 20 | 五、净利润 h20 21 | 归属于母公司所有者的净利润 h21 22 | 少数股东损益 h22 23 | 基本每股收益(元/股) h23 24 | 稀释每股收益(元/股) h24 25 | 七、其他综合收益 h25 26 | 八、综合收益总额 h26 27 | 归属于母公司所有者的综合收益总额 h27 28 | 归属于少数股东的综合收益总额 h28 29 | stock_code h29 30 | Date h30 31 | -------------------------------------------------------------------------------- /M1809/doc/表头说明/profit_columns.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/表头说明/profit_columns.txt -------------------------------------------------------------------------------- /M1809/doc/表头说明/开发建议.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/表头说明/开发建议.txt -------------------------------------------------------------------------------- /M1809/doc/表头说明/新建 Microsoft Excel 工作表.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/表头说明/新建 Microsoft Excel 工作表.xlsx -------------------------------------------------------------------------------- /M1809/src/App.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 1 19:35:20 2018 4 | 5 | @author: yinchao 6 | """ 7 | 8 | import UserApi 9 | 10 | 11 | id_list = ['000651', '000333', '600690', '600522'] 12 | if __name__ =='__main__': 13 | UserApi.Init(id_list,'SQL') 14 | UserApi.GetData('ON') 15 | UserApi.Analyse() -------------------------------------------------------------------------------- /M1809/src/Config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Jun 14 01:47:40 2018 4 | 5 | @author: yinchao 6 | """ 7 | import sys 8 | sys.path.append('../..') 9 | 10 | from datetime import datetime 11 | import crawling_finance_table 12 | import crawling_finance_table_v1_7 13 | import pymysql 14 | import os 15 | def Connect_sql(account): 16 | conn = pymysql.connect( 17 | host = account[0].strip(), 18 | port = 3306, 19 | user = account[1].strip(), 20 | passwd = account[2].strip(), 21 | db = account[3].strip(), 22 | charset = "utf8" 23 | ) 24 | 25 | cur = conn.cursor() 26 | # print(account) 27 | print("\nconnect to aliyun success!\n") 28 | return cur 29 | 30 | 31 | 32 | 33 | global parameter 34 | parameter = [ 35 | '总资产', 36 | '净资产', 37 | '资产负债比', 38 | '流动资产', 39 | '一年内到期的长期负债', 40 | '应收账款', 41 | '预收账款', 42 | '存货', 43 | '营业收入', 44 | '营业成本', 45 | '营业税金及附加', 46 | '财务费用', 47 | '营业外收入', 48 | '净利润', 49 | '除非净利润', 50 | '每股收益', 51 | '经营净额', 52 | '投资净额', 53 | '筹资净额', 54 | '汇率影响', 55 | '现金净增加额', 56 | '期末现金余额', 57 | '流动比率', 58 | '资产周转率', 59 | '存货周转率', 60 | '溢价比', 61 | '市盈率', 62 | '市净率', 63 | '名义净资产收益率', 64 | '真实净资产收益率', 65 | '毛利率', 66 | '营收增长率', 67 | '除非净利润增长率', 68 | '股息率', 69 | '分红率'] 70 | 71 | global company_id_list 72 | company_id_list = ['000651', '000333', '600690'] #此处可以修改 73 | global data_base_path 74 | data_base_path = '../history_data/' 75 | 76 | global data_src 77 | global cur 78 | 79 | def M1809_config(company_list, mode = 'CSV'): 80 | ''' 81 | 本地模式配置 82 | 只需要提供感兴趣的对比公司即可,如果只有一个,说明只进行自主分析 83 | ''' 84 | global data_base_path 85 | global data_src 86 | global cur 87 | global parameter 88 | global company_id_list 89 | data_src = mode 90 | company_id_list = company_list 91 | 92 | print('please wait, check for updating...') 93 | 94 | try: #自动检查并创建文件夹 95 | os.mkdir('../history_data') 96 | except: 97 | pass 98 | try: #自动检查并创建文件夹 99 | os.mkdir('../sys_config') 100 | except: 101 | pass 102 | try: #自动检查并创建文件夹 103 | os.mkdir('../output') 104 | except: 105 | pass 106 | 107 | if len(company_list) < 2: 108 | print('最少需要输入2个id作为对比') 109 | return 110 | #此处增加id合法性检查 111 | 112 | 113 | if data_src == 'SQL' or data_src == 'sql': 114 | ''' 115 | 网络模式配置 116 | 以读文件的方式获取配置参数 117 | 1. 读取待考察的参数 118 | 2. 读取公司名称列表,并转换成id(如果输入无法解析成id,会自动剔除) 119 | 3. 更新该公司的财务报表,以备以后使用 120 | 注意:文件名不可改 121 | ''' 122 | try: 123 | with open('../sys_config/account.cfg', 'r') as fh: 124 | account = fh.readlines() 125 | except: 126 | print('fail to initialize.') 127 | return 128 | 129 | cur = Connect_sql(account) 130 | # 此处增加ID合法性检查 131 | # id_list = [] 132 | # for name in company_id_list: 133 | # cmd = "select * from anack_classify where name = \'"+name+"\';" 134 | # cur.execute(cmd) 135 | # result = cur.fetchall() 136 | # try: 137 | # id = result[0][0] 138 | # id_list.append(id) 139 | # 140 | # except: #错误的ID号不会被解析(刚上市的,不会出现在anack_classify里,需要更新) 141 | # print(name+' is not in list') 142 | # pass 143 | M1809_Update(cur, company_list) 144 | 145 | elif data_src == 'CSV' or data_src == 'csv': 146 | for item in company_list: 147 | try: 148 | file_name = data_base_path + item + '_profit.csv' 149 | # print(file_name) 150 | with open(file_name, 'r') as fh: 151 | from datetime import datetime 152 | from dateutil.parser import parse 153 | from dateutil.relativedelta import relativedelta 154 | content = fh.readlines() 155 | s = content[-1].split(',') 156 | latest_record = parse(s[0]) #获取最新时间 157 | 158 | current_day = datetime.now() - relativedelta(months=+12) 159 | if latest_record > current_day: 160 | pass 161 | else: 162 | cbfx = crawling_finance_table_v1_7.crawling_finance(data_base_path,item) 163 | cbfx.crawling_update() 164 | except: 165 | cbfx = crawling_finance_table_v1_7.crawling_finance(data_base_path,item) 166 | cbfx.crawling_update() 167 | else: 168 | print('模式设置错误,请二选一:CSV/SQL') 169 | 170 | print('finish init!') 171 | 172 | def M1809_Update(cur, id_list): 173 | ''' 174 | 更新数据库 175 | ''' 176 | print('check for update,please wait...') 177 | # print(id_list) 178 | for item in id_list: 179 | try: 180 | 181 | cmd = "select * from zichanfuzhai where h79 = \'" + item + "\' and h80 = \'" + str(datetime.now().year - 1)+"-12-31\';" 182 | cur.execute(cmd) 183 | result1 = cur.fetchall() 184 | except: 185 | print('updating ', item) 186 | cbfx = crawling_finance_table.crawling_finance('',item,'') 187 | cbfx.crawling_update() 188 | continue 189 | 190 | try: 191 | cmd2 = "select * from cashFlow where h72 = \'" + item + "\' and h73 = \'" + str(datetime.now().year - 1)+"-12-31\';" 192 | cur.execute(cmd2) 193 | result2 = cur.fetchall() 194 | except: 195 | print('updating ', item) 196 | cbfx = crawling_finance_table.crawling_finance('',item,'') 197 | cbfx.crawling_update() 198 | continue 199 | 200 | try: 201 | cmd3 = "select * from Profit where h29 = \'" + item + "\' and h30 = \'" + str(datetime.now().year - 1)+"-12-31\';" 202 | cur.execute(cmd3) 203 | result3 = cur.fetchall() 204 | trash_data = result3[0] #获得资产负债表信息 205 | except: 206 | print('updating ', item) 207 | cbfx = crawling_finance_table.crawling_finance('',item,'') 208 | cbfx.crawling_update() 209 | continue 210 | 211 | print('update check finished!') 212 | 213 | ############################################################################# 214 | if __name__ =='__main__': 215 | id_list = ['000651', '000333', '600690', '600522'] 216 | #网络测试 217 | M1809_config(id_list, 'SQL') 218 | 219 | #本地测试 220 | # M1809_config(id_list, 'CSV') 221 | -------------------------------------------------------------------------------- /M1809/src/M1809_finance_weight.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | 类名:M1809_finance_weight 4 | 作者:徐抒田 5 | 日期:2018-7-9 6 | 描述: 7 | 机器学习方法确定,权重() 8 | 遗留问题: 9 | 1、资产负债比,营业税增长率,营业现金增长率,现金增长净额,期末现金字段待解决 10 | 2、从库读文件待解决 11 | 版本号:V0.1 12 | ''' 13 | 14 | 15 | import pandas as pd 16 | import numpy as np 17 | import matplotlib.pyplot as plt 18 | from sklearn import preprocessing 19 | from sklearn.model_selection import cross_val_score 20 | from sklearn.datasets import make_blobs 21 | from sklearn.ensemble import RandomForestClassifier 22 | from sklearn.model_selection import train_test_split 23 | from sklearn.metrics import classification_report 24 | from sklearn import metrics 25 | import lightgbm as lgb 26 | 27 | 28 | ''' 29 | 修改成从数据库中读取文件 30 | ''' 31 | result_yinli = pd.read_csv('D:/999github/anack/M1809/xst/result_yinli.csv') 32 | result_yingyun = pd.read_csv('D:/999github/anack/M1809/xst/result_yingyun.csv') 33 | result_chengzhang = pd.read_csv('D:/999github/anack/M1809/xst/result_chengzhang.csv') 34 | result_changzhai = pd.read_csv('D:/999github/anack/M1809/xst/result_changzhai.csv') 35 | result_xianjin = pd.read_csv('D:/999github/anack/M1809/xst/result_xianjin.csv') 36 | 37 | df_final = pd.read_csv('D:/999github/anack/M1809/xst/target.csv') 38 | 39 | df_final = df_final[(df_final.firstincrase > 0.1) & (df_final.secondincrase > 0.1)] 40 | df_final = pd.DataFrame({'code' : df_final['code'], 41 | 'label' : 1, 42 | }) 43 | 44 | 45 | data = result_yinli 46 | data = pd.merge(data, result_yingyun, on=['code','name']) 47 | data = pd.merge(data, result_chengzhang, on=['code','name']) 48 | data = pd.merge(data, result_changzhai, on=['code','name']) 49 | data = pd.merge(data, result_xianjin, on=['code','name']) 50 | data = pd.merge(data, df_final, on='code',how = 'left') 51 | 52 | # ============================================================================= 53 | # null_counts = data.isnull().sum() 54 | # print(null_counts) 55 | # ============================================================================= 56 | 57 | data = data.fillna(0) 58 | data = data.dropna(axis=0) 59 | 60 | 61 | 62 | 63 | orig_columns = data.columns 64 | drop_columns = [] 65 | for col in orig_columns: 66 | col_series = data[col].dropna().unique() 67 | if len(col_series) == 1: 68 | drop_columns.append(col) 69 | data = data.drop(drop_columns, axis=1) 70 | print(drop_columns) 71 | 72 | 73 | target = data['label'] 74 | code = data['code'] 75 | name = data['name'] 76 | features = data.drop(['code','name','label'],axis=1) 77 | 78 | features[features.currentratio20161 == '--'] = 0 79 | features[features.quickratio20161=='--']=0 80 | features[features.cashratio20161=='--']=0 81 | features[features.icratio20161=='--']=0 82 | features[features.sheqratio20161=='--']=0 83 | features[features.adratio20161=='--']=0 84 | features[features.currentratio20162=='--']=0 85 | features[features.quickratio20162=='--']=0 86 | features[features.cashratio20162=='--']=0 87 | features[features.icratio20162=='--']=0 88 | features[features.sheqratio20162=='--']=0 89 | features[features.adratio20162=='--']=0 90 | features[features.currentratio20163=='--']=0 91 | features[features.quickratio20163=='--']=0 92 | features[features.cashratio20163=='--']=0 93 | features[features.icratio20163=='--']=0 94 | features[features.sheqratio20163=='--']=0 95 | features[features.adratio20163=='--']=0 96 | features[features.currentratio20164=='--']=0 97 | features[features.quickratio20164=='--']=0 98 | features[features.cashratio20164=='--']=0 99 | features[features.icratio20164=='--']=0 100 | features[features.currentratio20171=='--']=0 101 | features[features.quickratio20171=='--']=0 102 | features[features.cashratio20171=='--']=0 103 | features[features.icratio20171=='--']=0 104 | features[features.sheqratio20171=='--']=0 105 | features[features.adratio20171=='--']=0 106 | features[features.currentratio20172=='--']=0 107 | features[features.quickratio20172=='--']=0 108 | features[features.cashratio20172=='--']=0 109 | features[features.icratio20172=='--']=0 110 | features[features.currentratio20173=='--']=0 111 | features[features.quickratio20173=='--']=0 112 | features[features.cashratio20173=='--']=0 113 | features[features.icratio20173=='--']=0 114 | features[features.currentratio20174=='--']=0 115 | features[features.quickratio20174=='--']=0 116 | features[features.cashratio20174=='--']=0 117 | features[features.icratio20174=='--']=0 118 | features[features.currentratio20181=='--']=0 119 | features[features.quickratio20181=='--']=0 120 | features[features.cashratio20181=='--']=0 121 | features[features.icratio20181=='--']=0 122 | features = features.astype('float64') 123 | 124 | 125 | 126 | 127 | 128 | ##基于树的方法不用做标准化、归一化处理 129 | 130 | 131 | ''' 132 | 资产负债比,营业税增长率,营业现金增长率,现金增长净额,期末现金 133 | ''' 134 | features = features[['targ20174','nav20174','gross_profit_rate20174','cashflowratio20174','net_profit_ratio20174','mbrg20174','currentratio20174','currentasset_turnover20174','inventory_days20174']] 135 | 136 | 137 | 138 | def aucfun(act,pred): 139 | fpr,tpr,thresholds = metrics.roc_curve(act,pred) 140 | plt.plot(fpr, tpr, color='darkorange',lw=2) 141 | plt.xlim([0.0, 1.0]) 142 | plt.ylim([0.0, 1.05]) 143 | plt.xlabel('False Positive Rate') 144 | plt.ylabel('True Positive Rate') 145 | # ============================================================================= 146 | # print(fpr) 147 | # print(tpr) 148 | # print(thresholds) 149 | # ============================================================================= 150 | return metrics.auc(fpr,tpr) 151 | 152 | 153 | 154 | 155 | 156 | 157 | def ml_for_weight(features,target): 158 | 159 | 160 | min_max_scaler = preprocessing.MinMaxScaler() 161 | features_new = min_max_scaler.fit_transform(features) 162 | features = pd.DataFrame(features_new, columns=features.columns) 163 | X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.25,random_state=42) 164 | 165 | ''' 166 | 调参 167 | ''' 168 | clf = lgb.LGBMClassifier( 169 | boosting_type='gbdt', num_leaves=31, reg_alpha=0, reg_lambda=1, 170 | max_depth=-1, n_estimators=800, objective='binary', 171 | subsample=0.7, colsample_bytree=0.7, subsample_freq=2, 172 | learning_rate=0.05, min_child_weight=20, random_state=2018, n_jobs=-1,class_weight = 'balanced' 173 | ) 174 | 175 | clf = clf.fit(X_train, y_train, eval_set=[(X_train, y_train),(X_test, y_test)], eval_names = ['train','test'],eval_metric='auc',early_stopping_rounds=100) 176 | 177 | y_pre = clf.predict(X_test) 178 | 179 | y_pre_pro = clf.predict_proba(X_test)[:, 1] 180 | # ============================================================================= 181 | # print(y_pre_pro) 182 | # ============================================================================= 183 | print(classification_report(y_test,y_pre)) 184 | print(metrics.roc_auc_score(y_test,y_pre_pro)) #预测Y值得分 185 | aucfun(y_test,y_pre_pro) 186 | 187 | importances = clf.feature_importances_ 188 | indices = np.argsort(importances)[::-1] 189 | print("Feature ranking:") 190 | for f in range(features.shape[1]): 191 | print("%d. feature %d (%f): %s" % (f + 1, indices[f], importances[indices[f]] , features.columns[indices[f]] )) 192 | 193 | 194 | return features.columns,importances 195 | 196 | 197 | a,b = ml_for_weight(features,target) 198 | 199 | 200 | 201 | # ============================================================================= 202 | # y_pre_pro_f = clf.predict_proba(features)[:, 1] 203 | # 204 | # y_pre_pro_f = pd.DataFrame({'code' : code, 205 | # 'name' : name, 206 | # 'gailv' : y_pre_pro_f 207 | # }) 208 | # 209 | # y_pre_pro_f.to_csv('D:/999github/anack/M1809/y_pre_pro_f.csv',index =False) 210 | # ============================================================================= 211 | -------------------------------------------------------------------------------- /M1809/src/PlotAnalyse.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue July 24 08:26:21 2018 4 | 5 | @author: guqiuyang 6 | """ 7 | 8 | import numpy as np 9 | import CoreAnalyse 10 | from matplotlib import pyplot as plt 11 | 12 | 13 | # 绘制分组柱状图的函数 14 | def groupedbarplot(ax, x_data, x_data_name, y_data_list, y_data_names, colors, x_label, y_label, title): 15 | ''' 16 | 绘制输出报告个分析指标的柱状图 17 | ''' 18 | # 设置每一组柱状图的宽度 19 | total_width = 0.8 20 | # 设置每一个柱状图的宽度 21 | ind_width = total_width / len(y_data_list) 22 | # 计算每一个柱状图的中心偏移 23 | alteration = np.arange(-total_width / 2 + ind_width / 2, total_width / 2 + ind_width / 2, ind_width) 24 | 25 | # 分别绘制每一个柱状图 26 | for i in range(0, len(y_data_list)): 27 | # 横向散开绘制 28 | ax.bar(x_data + alteration[i], y_data_list[i], color=colors[i], label=y_data_names[i], width=ind_width) 29 | ax.set_ylabel(y_label) 30 | ax.set_xlabel(x_label) 31 | ax.set_xticks(x_data) 32 | ax.set_xticklabels(x_data_name) 33 | ax.set_title(title) 34 | ax.legend(loc='upper right') 35 | 36 | 37 | # 4. 绘图分析 38 | def PlotAnalyse(data): 39 | ''' 40 | 个股纵向对比绘图逻辑 41 | ''' 42 | # 设置图片尺寸 20" x 15" 43 | plt.rc('figure', figsize=(14, 14)) 44 | # 设置字体 14 45 | plt.rc('font', size=14) 46 | # 不显示网格 47 | plt.rc('axes', grid=False) 48 | # 设置背景颜色是白色 49 | plt.rc('axes', facecolor='white') 50 | # 显示中文标签 51 | plt.rcParams['font.sans-serif'] = ['SimHei'] 52 | # 正常显示正负号 53 | plt.rcParams['axes.unicode_minus'] = False 54 | 55 | # 资产水平分析 56 | avg, last, level = CoreAnalyse.GetGrowth(data, 0) # 总资产_复合增长率 57 | avg_, last_, level_ = CoreAnalyse.GetGrowth(data, 1) # 净资产_复合增长率 58 | rate = CoreAnalyse.GetRate(data, 3, 0) # 流动资产_总资产占比 59 | debt_avg, debt_last = CoreAnalyse.GetAverage(data, 2) # 资产负债比_平均水平 60 | 61 | x1 = data.iloc[:, [0]].index.tolist() 62 | x2 = np.arange(4) 63 | x2_data_name = ['总资产增长率', '净资产增长率', '流动资产占比', '资产负债比'] 64 | y1 = data.iloc[:, [0, 1, 3]] 65 | y2 = [[avg, avg_, rate, debt_avg], [last, last_, 0, debt_last]] 66 | 67 | _, axs = plt.subplots(2, 1, figsize=(14, 14)) 68 | axs[0].plot(x1, y1, 'o-') 69 | axs[0].set_title('体量') 70 | axs[0].set_ylabel('元') 71 | axs[0].set_xlabel('年份') 72 | axs[0].legend(loc='upper left') 73 | 74 | groupedbarplot(axs[1] 75 | , x_data=x2 76 | , x_data_name=x2_data_name 77 | , y_data_list=y2 78 | , y_data_names=['长期', '去年'] 79 | , colors=['#539caf', '#7663b0'] 80 | , x_label='数据指标' 81 | , y_label='增幅比例' 82 | , title='资产水平分析') 83 | 84 | # 经营质量分析 85 | avg1, last1, _ = CoreAnalyse.GetGrowth(data, 8) # 营业收入_复合增长率 86 | avg2, last2 = CoreAnalyse.GetAverage(data, 30) # 毛利率 87 | avg3, last3, _ = CoreAnalyse.GetGrowth(data, 14) # 除非净利润 88 | avg4, last4, _ = CoreAnalyse.GetGrowth(data, 10) # 营业税 89 | rate = CoreAnalyse.GetRate(data, 12, 8) # 现金与净资产的占比关系 90 | avg5, last5 = CoreAnalyse.GetAverage(data, 33) #股息率 91 | avg6, last6 = CoreAnalyse.GetAverage(data, 34) #分红率 92 | 93 | x1 = np.arange(3) 94 | x1_data_name = ['现金/净资产', '股息率', '分红率'] 95 | x2 = np.arange(4) 96 | x2_data_name = ['营收增长率', '毛利率', '除非净利润增长率', '营业税增长率'] 97 | y1 = [[0, avg5, avg6], [rate, last5, last6]] 98 | y2 = [[avg1, avg2, avg3, avg4], [last1, last2, last3, last4]] 99 | 100 | _, axs = plt.subplots(2, 1, figsize=(14, 14)) 101 | groupedbarplot(axs[0] 102 | , x_data=x1 103 | , x_data_name=x1_data_name 104 | , y_data_list=y1 105 | , y_data_names=['长期', '去年'] 106 | , colors=['#539caf', '#7663b0'] 107 | , x_label='数据指标' 108 | , y_label='增幅比例' 109 | , title='经营质量分析') 110 | 111 | groupedbarplot(axs[1] 112 | , x_data=x2 113 | , x_data_name=x2_data_name 114 | , y_data_list=y2 115 | , y_data_names=['长期', '去年'] 116 | , colors=['#539caf', '#7663b0'] 117 | , x_label='数据指标' 118 | , y_label='增幅比例' 119 | , title='经营质量分析') 120 | 121 | # 现金流分析 122 | avg1, last1, _ = CoreAnalyse.GetGrowth(data, 16) # 营业现金 123 | avg2, last2, _ = CoreAnalyse.GetGrowth(data, 20) # 增加的现金 124 | avg3, last3, _ = CoreAnalyse.GetGrowth(data, 21) # 期末现金 125 | rate = CoreAnalyse.GetRate(data, 21, 1) # 现金与净资产的占比关系 126 | 127 | x1 = np.arange(4) 128 | x1_data_name = ['营业现金增长率', '现金增长净额', '期末现金', '现金与净资产的占比'] 129 | y1 = [[avg1, avg2, avg3, 0], [last1, last2, last3, rate]] 130 | 131 | _, axs = plt.subplots(1, 1, figsize=(10, 7)) 132 | groupedbarplot(axs 133 | , x_data=x1 134 | , x_data_name=x1_data_name 135 | , y_data_list=y1 136 | , y_data_names=['长期', '去年'] 137 | , colors=['#539caf', '#7663b0'] 138 | , x_label='数据指标' 139 | , y_label='增幅比例' 140 | , title='现金流分析') 141 | 142 | # 4.营运质量分析 143 | avg1, last1 = CoreAnalyse.GetAverage(data, 22) # 流动比率 144 | avg2, last2 = CoreAnalyse.GetAverage(data, 23) # 资产周转率 145 | avg3, last3 = CoreAnalyse.GetAverage(data, 24) # 存货周转率 146 | 147 | x1 = np.arange(3) 148 | x1_data_name = ['流动比率', '资产周转率', '存货周转率'] 149 | y1 = [[avg1, avg2, avg3], [last1, last2, last3]] 150 | 151 | _, axs = plt.subplots(1, 1, figsize=(10, 7)) 152 | groupedbarplot(axs 153 | , x_data=x1 154 | , x_data_name=x1_data_name 155 | , y_data_list=y1 156 | , y_data_names=['长期', '去年'] 157 | , colors=['#539caf', '#7663b0'] 158 | , x_label='数据指标' 159 | , y_label='增幅比例' 160 | , title='营运参数分析') 161 | plt.show() -------------------------------------------------------------------------------- /M1809/src/UserApi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 1 19:22:58 2018 4 | 5 | @author: yinchao 6 | """ 7 | import PlotAnalyse 8 | import CoreAnalyse 9 | import Config 10 | 11 | global self_result 12 | global compare_result 13 | 14 | def Init(company_id_list, data_src = 'CSV'): 15 | ''' 16 | 初始化配置函数 17 | company_id_list:待考察的id列表(1个到n个,eg: ['000651','00124','600660'] 18 | data_src: 'SQL'数据来源是数据库, 'CSV'数据来源是读文件 19 | ''' 20 | Config.M1809_config(company_id_list, data_src) 21 | 22 | def GetData(file_switch = 'ON'): 23 | ''' 24 | 获取财务原始数据 25 | file_switch: 'ON'结果输出到文本(默认) 'OFF'原始结果不输出 26 | 返回值: a->自身对比原始结果 b->同行业对比结果(归一化处理) 27 | 备注:a,b两个返回值原封不动交给Analyse函数进行分析即可 28 | ''' 29 | global self_result 30 | global compare_result 31 | 32 | self_result = CoreAnalyse.Compare2Themself(Config.company_id_list[0]) #自身对比 33 | b1= CoreAnalyse.Compare2Industry(Config.company_id_list) #同行业对比 34 | compare_result = CoreAnalyse.data_normalize(b1) #归一化的同行业对比 35 | if file_switch == 'ON': 36 | self_result.to_csv('../output/compare_self.csv', encoding= 'gbk') 37 | b1.to_csv('../output/compare_industry.csv', encoding = 'gbk') 38 | compare_result.to_csv('../output/normalize.csv', encoding = 'gbk') 39 | 40 | return self_result, compare_result 41 | 42 | def Analyse(): 43 | ''' 44 | 对比分析,并输出 45 | 1. ../output/文件夹下会生成诊断报告 46 | 2. 控制台输出对比图像(之后可以考虑保存图片) 47 | ''' 48 | global self_result 49 | global compare_result 50 | CoreAnalyse.Analyse(self_result, compare_result) 51 | PlotAnalyse.PlotAnalyse(self_result) 52 | 53 | 54 | if __name__ =='__main__': 55 | id_list = ['000651', '000333', '600690'] 56 | para,company = Init(id_list,'CSV') 57 | a = CoreAnalyse.Compare2Themself(company) -------------------------------------------------------------------------------- /M1809/src/get_dividends_history.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Mar 26 21:29:43 2018 4 | 5 | @author: 尹超 6 | # 该模块用于获取指定个股的历史分红记录,以DataFrame形式给出 7 | """ 8 | import pandas as pd 9 | import requests 10 | from requests.exceptions import RequestException 11 | from bs4 import BeautifulSoup 12 | 13 | 14 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'} 15 | 16 | def get_one_page(url): 17 | try: 18 | response = requests.get(url,headers = headers) 19 | response.encoding = 'GB2312' 20 | if response.status_code == 200: 21 | return response.text 22 | return None 23 | except RequestException: 24 | return None 25 | 26 | def parse(html): 27 | raw_data = [] 28 | try: 29 | year_raw = [] 30 | year = [] 31 | bonus_share = [] 32 | bonus_convert = [] 33 | profit_send = [] 34 | ex_rights = [] 35 | register_day = [] 36 | 37 | soup = BeautifulSoup(html,'html5lib') 38 | l = soup.select('table#sharebonus_1') 39 | ls = l[0].tbody 40 | lls = ls.select('td') 41 | for l in lls: 42 | if (l.get_text().strip()) != '预案' and \ 43 | (l.get_text().strip()) != '实施' and \ 44 | (l.get_text().strip()) != '不分配' and \ 45 | (l.get_text().strip()) != '查看': 46 | raw_data.append(l.get_text().strip()) 47 | 48 | year_raw = raw_data[::7] 49 | # print(raw_data) #出错的话请检查此处的输出 50 | # print(year_raw) #出错的话请检查此处的输出 51 | for item in year_raw: 52 | a = pd.to_datetime(item).year - 1 53 | year.append(a) 54 | bonus_share = raw_data[1::7] 55 | bonus_convert = raw_data[2::7] 56 | profit_send = raw_data[3::7] 57 | ex_rights = raw_data[4::7] 58 | register_day = raw_data[5::7] 59 | # print(register_day) 60 | data = {'年度':year, 61 | '送股':bonus_share, 62 | '转股':bonus_convert, 63 | '派息':profit_send, 64 | '除权日':ex_rights, 65 | '登记日':register_day 66 | } 67 | frame = pd.DataFrame(data) 68 | return frame 69 | except: 70 | print('cannot parse this page') 71 | 72 | def parse_single_year(html,Year): 73 | raw_data = [] 74 | try: 75 | year_raw = [] 76 | year = [] 77 | bonus_share = [] 78 | bonus_convert = [] 79 | profit_send = [] 80 | ex_rights = [] 81 | register_day = [] 82 | # print('it is ',Year) 83 | soup = BeautifulSoup(html,'html5lib') 84 | l = soup.select('table#sharebonus_1') 85 | ls = l[0].tbody 86 | lls = ls.select('td') 87 | for l in lls: 88 | if (l.get_text().strip()) != '预案' and \ 89 | (l.get_text().strip()) != '实施' and \ 90 | (l.get_text().strip()) != '不分配' and \ 91 | (l.get_text().strip()) != '查看': 92 | raw_data.append(l.get_text().strip()) 93 | 94 | year_raw = raw_data[::7] 95 | # print(raw_data) #出错的话请检查此处的输出 96 | # print(year_raw) #出错的话请检查此处的输出 97 | for item in year_raw: 98 | a = pd.to_datetime(item).year - 1 99 | year.append(a) 100 | bonus_share = raw_data[1::7] 101 | bonus_convert = raw_data[2::7] 102 | profit_send = raw_data[3::7] 103 | ex_rights = raw_data[4::7] 104 | register_day = raw_data[5::7] 105 | # print(register_day) 106 | data = {'年度':year, 107 | '送股':bonus_share, 108 | '转股':bonus_convert, 109 | '派息':profit_send, 110 | '除权日':ex_rights, 111 | '登记日':register_day 112 | } 113 | 114 | frame = pd.DataFrame(data) 115 | 116 | Len=len(frame) 117 | for i in range(Len): 118 | s=int(frame.iloc[i,[0]]) 119 | Date=frame.iloc[i,[2]] 120 | date2=Date.loc['登记日'] 121 | # print(s,date2) 122 | if s == Year: 123 | px=float(frame.iloc[i,[1]]) 124 | date2=date2[:4]+date2[5:7]+date2[8:] 125 | # print(s,'px money is ',px,date2) 126 | return px,date2 127 | return -1 128 | except: 129 | print('cannot parse this page') 130 | 131 | #获取每10股派现金,及股权登记日 132 | def get_px_single_year(id,Year): 133 | url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/' 134 | url += str(id) 135 | url += '.phtml' 136 | html = get_one_page(url) 137 | return parse_single_year(html,Year) 138 | # 提供给用户的函数,输入ID,解析出历史分红列表 139 | def get_bonus_table(id): 140 | url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/' 141 | url += str(id) 142 | url += '.phtml' 143 | html = get_one_page(url) 144 | return parse(html) 145 | 146 | ############################################################################### 147 | ############################################################################### 148 | # APP示例代码,用完了请关闭 600066 149 | #s = get_bonus_table('601012') 150 | #print(s) 151 | #2017年的派息实际是在2018派发,所以登记日时间上是2018年 152 | #测试股息率 153 | #PX,Date=get_px_single_year('601012',2017) 154 | #print(PX,Date) -------------------------------------------------------------------------------- /M1809/src/get_price.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 从163网址上获取指定ID指定时间段的K线数据 4 | """ 5 | import requests 6 | import re 7 | import datetime 8 | import pandas as pd 9 | ''' 10 | 11 | 完整网址: 12 | http://quotes.money.163.com/service/chddata.html?code=0%06d&start=%d&end=%d&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER 13 | ''' 14 | 15 | 16 | def get_close_price(id, day = 0): 17 | ''' 18 | 获取指定ID指定日期的收盘价 19 | 输入:id -> str形式的ID号: '600660' 20 | day -> str形式的日期: '20180626' 21 | 返回值:str形式的价格: '25.54', 如果当天为节假日,则返回0 22 | ''' 23 | if day == 0: 24 | day = datetime.datetime.now() - datetime.timedelta(days=1) 25 | day = day.strftime("%Y%m%d") 26 | if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市,则前缀为1 27 | nid = '1' + id 28 | else: #如果是沪市主板,则前缀为0 29 | nid = '0' + id 30 | url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\ 31 | fields=TCLOSE" %(nid, day,day) 32 | res = requests.get(url) 33 | res.raise_for_status() 34 | 35 | for chunk in res.iter_content(100000): 36 | # print(chunk) 37 | pattern = '[^,\r\n]+' 38 | obj = re.compile(pattern) 39 | match = obj.findall(chunk.decode('gbk')) 40 | #print(match) 41 | if len(match) < 8: 42 | return 0 43 | else: 44 | return match[-1] 45 | 46 | def get_period_k_day(id, start_day, stop_day = 0): 47 | ''' 48 | 获取指定ID一个时间段内的K线数据 49 | 输入:id -> str形式的ID号: '600660' 50 | start_day -> str形式的日期: '20180626' 51 | stop_day -> 同上, 默认到昨天 52 | 返回值:一个dataframe 53 | ''' 54 | if stop_day == 0: 55 | day = datetime.datetime.now() - datetime.timedelta(days=1) 56 | day = day.strftime("%Y%m%d") 57 | 58 | if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市,则前缀为1 59 | nid = '1' + id 60 | else: #如果是沪市主板,则前缀为0 61 | nid = '0' + id 62 | url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\ 63 | fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(nid, start_day, stop_day) 64 | 65 | 66 | # url = "http://quotes.money.163.com/service/chddata.html?code=0%s&start=%s&end=%s&\ 67 | # fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(id, start_day,stop_day) 68 | res = requests.get(url) 69 | res.raise_for_status() 70 | # playFile = open(file_name, 'wb') 71 | 72 | raw_data = [] 73 | for chunk in res.iter_content(1000000): 74 | # playFile.write(chunk) 75 | chunk = chunk.decode('gbk') 76 | pattern = '[^,\r\n]+' 77 | obj = re.compile(pattern) 78 | match = obj.findall(chunk) 79 | if len(match) < 8: #如果没有数据 80 | return 0 81 | 82 | header = match[:10] #如果增加字段,则此处以下需要相应修改 83 | # print(header) 84 | raw_data = match[10:] 85 | date = raw_data[::10] 86 | idc = raw_data[1::10] 87 | name = raw_data[2::10] 88 | price = raw_data[3::10] 89 | high = raw_data[4::10] 90 | lopen = raw_data[5::10] 91 | yesterday_close = raw_data[6::10] 92 | low = raw_data[7::10] 93 | vol = raw_data[8::10] 94 | mount = raw_data[9::10] 95 | 96 | data = { 97 | # header[0]:date, 98 | header[1]:idc, 99 | header[2]:name, 100 | header[3]:price, 101 | header[4]:high, 102 | header[5]:lopen, 103 | header[6]:yesterday_close, 104 | header[7]:low, 105 | header[8]:vol, 106 | header[9]:mount 107 | } 108 | df = pd.DataFrame(data,index = date) 109 | # playFile.close() 110 | return df 111 | 112 | 113 | def k_day_to_csv(code, stop_day = 0): 114 | ''' 115 | 更新k线数据,并保存到本地,默认为更新到昨天 116 | code:目标个股,只能为'000xxx'形式 117 | stop_day: 0->昨天, 20170101:更新到指定的一天 118 | @更新逻辑: 119 | 1. 如果无记录,则自动创建csv文件,默认为:ID.kday 120 | 2. 如果有部分记录,则自动分析,并将后续的内容更新 121 | 3. 如果记录比需要更新的更新,则直接返回 122 | 123 | 缺陷:得到的数据是没有复权的,应该进行前复权 124 | ''' 125 | base_path = './' #修改此处可以更改文件存放路径,可以考虑作为一个配置参数 126 | start_day = '19970101' #start时间统一从1997年开始 127 | #参数合法性检查 128 | if isinstance(code,list): 129 | print('is a list') 130 | elif isinstance(code,str): 131 | file_name = code + '.csv' 132 | # print(file_name) 133 | else: 134 | print('bad input. please check it') 135 | return 136 | 137 | file_name = base_path + file_name 138 | # print(file_name) 139 | 140 | update_flag = 1 #1代表重新生成, 2代表更新 3代表无需处理 141 | #判断最新的是第几天 142 | try: 143 | with open(file_name,'r') as fh: 144 | content = fh.readlines() 145 | if len(content) > 2: #获取最新记录,总是在第二行 146 | latest_record = content[1].split(',') 147 | 148 | from datetime import datetime 149 | from dateutil.parser import parse 150 | latest_day = parse(latest_record[0]) 151 | now = datetime.now().strftime('%Y-%m-%d') 152 | yesterday = parse(now) 153 | 154 | if yesterday > latest_day: 155 | update_flag = 2 156 | print('not the latest') 157 | else: 158 | update_flag = 3 159 | print(code + ' already the latest') 160 | return 161 | except: 162 | update_flag = 1 163 | print('no record') 164 | 165 | #不同的情况适用不同更新逻辑 166 | if update_flag == 1: #完全更新 167 | r = get_period_k_day(code, start_day) 168 | r.to_csv(file_name, encoding= 'gbk') 169 | elif update_flag == 2: 170 | r = get_period_k_day(code, start_day) #此处没有办法在首部添加 171 | r.to_csv(file_name, encoding= 'gbk') #如果可以的话,则不必每次重写 172 | return 173 | print('finish ' + code + ' update') 174 | return 175 | 176 | def k_day_update(id_list, stop_day = 0): 177 | ''' 178 | 用户API,更新个股的K线数据,可以是列表,也可以是str 179 | ''' 180 | #参数合法性检查 181 | if isinstance(id_list,list): 182 | print('is a list') 183 | for s in id_list: 184 | k_day_to_csv(s,stop_day) 185 | elif isinstance(id_list,str): 186 | k_day_to_csv(id_list,stop_day) 187 | else: 188 | print('bad input. please check it') 189 | return 190 | 191 | if __name__ == '__main__': 192 | id = '601012' 193 | start_day = '20100625' 194 | stop_day = '20180904' 195 | 196 | #获取昨天的收盘价 197 | # price = get_close_price(id) 198 | # print(price) 199 | 200 | # #获取指定一天的收盘价 201 | # price = get_close_price('600660','20170209') 202 | # print(price) 203 | # 204 | # #获取从start_day开始直到昨天的收盘价 205 | # s = get_period_price('600660',start_day) 206 | # print(s) 207 | # 208 | # #获取指定时间段内的收盘价 209 | # s = get_period_k_day('601012',start_day,stop_day) 210 | # print(s) 211 | # s.to_csv('test.csv', encoding= 'gbk') 212 | 213 | #更新K线数据并存文档 214 | company_list = ['600660', '600066', '000651', '600522', '601012', '600887'] 215 | k_day_update(company_list) 216 | k_day_update('600066') 217 | 218 | -------------------------------------------------------------------------------- /M1809/src/trade_day.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun May 20 16:23:28 2018 4 | 5 | @author: YinChao 6 | @date: 20180520 7 | """ 8 | 9 | import urllib.request as request 10 | import datetime 11 | import time 12 | ''' 13 | @query a single date: string '20170401'; 14 | @api return day_type: 0 workday 1 weekend 2 holiday -1 err 15 | @function return day_type: 1 workday 0 weekend&holiday 16 | ''' 17 | 18 | 19 | def get_day_type(query_date): 20 | ''' 21 | 节假日求取辅助函数,从指定网址上获取当日状态 22 | 0工作日 1周末 2节假日 23 | http://tool.bitefu.net/jiari/?d=20181009 返回0(工作日) 24 | http://tool.bitefu.net/jiari/?d=20181014 返回1(周末) 25 | http://tool.bitefu.net/jiari/?d=20181001 返回2(国庆节) 26 | ''' 27 | url = 'http://tool.bitefu.net/jiari/?d=' + query_date 28 | resp = request.urlopen(url) 29 | content = resp.read() 30 | if content: 31 | try: 32 | day_type = int(content) 33 | except ValueError: 34 | return -1 35 | else: 36 | return day_type 37 | else: 38 | return -1 39 | 40 | 41 | def isWorkingTime(): 42 | ''' 43 | 判断当前时刻是否工作日上班时间(未考虑节假日影响) 44 | ''' 45 | workTime=['09:00:00','18:00:00'] 46 | dayOfWeek = datetime.datetime.now().weekday() 47 | beginWork=datetime.datetime.now().strftime("%Y-%m-%d")+' '+workTime[0] 48 | endWork=datetime.datetime.now().strftime("%Y-%m-%d")+' '+workTime[1] 49 | beginWorkSeconds=time.time()-time.mktime(time.strptime(beginWork, '%Y-%m-%d %H:%M:%S')) 50 | endWorkSeconds=time.time()-time.mktime(time.strptime(endWork, '%Y-%m-%d %H:%M:%S')) 51 | if (int(dayOfWeek) in range(5)) and int(beginWorkSeconds)>0 and int(endWorkSeconds)<0: 52 | return 1 53 | else: 54 | return 0 55 | 56 | def isWorkingDay(): 57 | ''' 58 | 判断今天是否工作日 59 | ''' 60 | dayOfWeek = datetime.datetime.now().weekday() #今天星期几? 61 | if dayOfWeek < 6: 62 | return 1 63 | else: 64 | return 0 65 | 66 | 67 | def is_tradeday(query_date): 68 | ''' 69 | 判断给定日期是否股市交易日(考虑了节假日的影响) 70 | ''' 71 | weekday = datetime.datetime.strptime(query_date, '%Y%m%d').isoweekday() 72 | if weekday <= 5 and get_day_type(query_date) == 0: 73 | return 1 74 | else: 75 | return 0 76 | 77 | 78 | def today_is_tradeday(): 79 | ''' 80 | 判断今天是否股市交易日(考虑了节假日的影响) 81 | ''' 82 | query_date = datetime.datetime.strftime(datetime.datetime.today(), '%Y%m%d') 83 | return is_tradeday(query_date) 84 | 85 | 86 | if __name__ == '__main__': 87 | print(is_tradeday('20171229')) 88 | print(today_is_tradeday()) -------------------------------------------------------------------------------- /M1809/src/txttoexcel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Jul 12 20:17:19 2018 4 | @author: John 5 | """ 6 | 7 | from openpyxl import Workbook 8 | from openpyxl.utils import get_column_letter 9 | import re 10 | import time 11 | 12 | # read txt data 13 | 14 | 15 | def read_txt(inputfiles): 16 | p1 = r"(.*)[0-9](.*?).*" 17 | temp_list = [] 18 | with open(inputfiles,'r',encoding='gbk') as f: 19 | for line in f: 20 | mathObj = re.match(p1,line) 21 | if mathObj: 22 | line = line.strip() 23 | temp_list.append(line) 24 | return temp_list 25 | 26 | 27 | 28 | def parse_line(aline): 29 | aline = aline.replace(":"," ") 30 | aline = aline.replace(":"," ") 31 | aline = aline.replace(","," ") 32 | aline = aline.split() 33 | return aline 34 | 35 | 36 | # create excel files 37 | def generate_excel(temp_list,inputfiles): 38 | rows = len(temp_list) 39 | 40 | wb = Workbook() 41 | 42 | dest_filename = inputfiles[:-4] + '.xlsx' 43 | ws1 = wb.active 44 | 45 | ws1.title = "Analysis report V1" 46 | 47 | for row in range(rows): 48 | aline = temp_list[row] 49 | aline = parse_line(aline) 50 | for col in range(len(aline)): 51 | ws1.cell(column=col + 1, row=row + 1, value="{0}".format(aline[col])) 52 | 53 | wb.save(filename = dest_filename) 54 | 55 | if __name__ == "__main__": 56 | inputfiles = r"D:\600522_20180714.txt" 57 | file_list = read_txt(inputfiles) 58 | generate_excel(file_list,inputfiles) 59 | -------------------------------------------------------------------------------- /M1809/sys_config/账户配置.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/sys_config/账户配置.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # welcome anack 2 | 3 | ## anack是什么? 4 | anack是一款金融数据分析工具,用于实现股市投资中的基本面分析,提供投资建议,最终形成一种量化交易工具 5 | 6 | ## anack具备什么功能? 7 | * 获取多种金融原始数据 8 | * 实现个股基本面分析 9 | * 实现个股价值估计并提供投资建议 10 | * 实现宏观经济形势分析与A股趋势预判 11 | * 实现量化交易功能 12 | 13 | ## 谁会对anack有兴趣 14 | * 广大股民 15 | * 人工智能、大数据工程师 16 | * 量化交易开发者 17 | 18 | 19 | ## 已发布工具速查 20 | * HK_insider。 实现港股持股披露信息分析 21 | * YT_produce_sell。实现宇通客车的产销数据分析 22 | 23 | 24 | ## 数据接口速览(持续添加): 25 | * [实时数据_福耀玻璃](http://hq.sinajs.cn/list=sh600660) 26 | * [实时数据_上证综指](http://hq.sinajs.cn/list=s_sh000001) 27 | * [实时数据_深成指数](http://hq.sinajs.cn/list=s_sz399001) 28 | * [日线图_福耀玻璃](http://image.sinajs.cn/newchart/daily/n/sh600660.gif) 29 | * [月线图_福耀玻璃](http://image.sinajs.cn/newchart/monthly/n/sh600660.gif) 30 | * [成交明细](http://market.finance.sina.com.cn/downxls.php?date=2011-07-08&symbol=sh600660) 31 | * [当日分价表](http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_price.php?symbol=sh600660) 32 | * [多日分价表](http://market.finance.sina.com.cn/pricehis.php?symbol=sh600660&startdate=2011-08-17&enddate=2011-08-19) 33 | 34 | 35 | >## 反馈交流 36 | >在使用中有任何问题,欢迎反馈给我,可以用以下邮件跟我交流 37 | 38 | >*yc86247931@126.com* 39 | 40 | >*shutian318@163.com* 41 | 42 | ## SubProject1 基于PYTHON和树莓派的盈亏分析平台设计 43 | 44 | 45 | #1.通过Python爬取网页获取实时金融指标数据 46 |  http://hq.sinajs.cn/list=sz000651(每天更新) 47 | choice(需要付费) 48 | http://sc.hkexnews.hk/TuniS/www.hkexnews.hk/sdw/search/mutualmarket_c.aspx?t=sh (含历史数据) 49 | 50 |  http://money.finance.sina.com.cn/corp/go.php/vFD_CashFlow/stockid/000651/ctrl/2017/displaytype/4.phtml (爬取历史的报表数据) 51 | 52 | #2.指标的实时监控,有预警信息后推送手机 53 | 54 | #3.经过历史指标筛选出值得投资的长期股票及适合买入时机(具体算法再商议),历史数据的存储放在数据库或者树莓派上完成。 55 | 同时建模获取短期投资股票时机,短信提示手机 56 | 57 | #4.训练一个模拟操盘手,按每周/每月进行操作,最后按照盈亏指标来验证训练模型好坏 58 | 59 | 60 | ## 理念 61 | 62 | 63 | 把炒股的经验做成算法,利用软件来实现。同时利用软件来发掘新的机会(机器学习)。 64 | 其次可以发布推荐信息。设置自己的持仓后,一方面根据算法向用户发布买卖信号。另一方面算法 65 | 在内部自己计算操作盈亏(用户可以无视买卖信号),最终可以通过比对二者差异来确定算法的好坏 66 | 67 | 开发阶段可以设置多种算法同时运行,针对某一个具体指标。可以通过对比来确定使用哪种策略更有效 68 | 直接利用已经发生了的数据进行海量机器学习。 69 | 70 | 可以实时模拟投资,看最终的投资结果 71 | 72 | 73 | ## 架构设计 74 | 75 | 76 | 整体框架,需要实现的功能规划好 77 | 功能: 78 | 1. 能够实时监控数据变化 79 | 2. 能够根据指定的算法进行相关的输出 80 | 3. 能够根据算法进行模拟操盘并可以自己分析收益 81 | 4. 能够自主学习,用以验证经验的有效性 82 | -------------------------------------------------------------------------------- /Release/HK_insider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/HK_insider.py -------------------------------------------------------------------------------- /Release/ReleaseNote.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/ReleaseNote.txt -------------------------------------------------------------------------------- /Release/YTProductionAndSale/DataAnalyze.py: -------------------------------------------------------------------------------- 1 | # 作者:尹超 2 | # 更新日期:2018-5-13 3 | # 版本号:V0.3 4 | # 描述:本程序用于宇通客车产销快报数据的分析 5 | # 内容:系统初始化、数据读入、数据处理、绘图、统计分析 6 | # 备注: 7 | # 1. 务必保持工程下有连续的xlsx文件,并确保文件名有效 8 | # 2. 确保year/lastmonth和文件一致,否则会出现错误 9 | # 3. 暂时只能靠手工将pdf文件转成xlsx文件,以后可以考虑做成全自动的 10 | 11 | # 修改记录:2018-1-6.修复了产销比计算错误的bug 12 | #修改记录:2018-5-13.更改数据来源,由离线数据变为数据库 13 | from pylab import * 14 | mpl.rcParams['font.sans-serif'] = ['SimHei'] 15 | 16 | 17 | import DataToSql 18 | import pandas as pd 19 | from pandas import Series, DataFrame 20 | import matplotlib.pyplot as plt 21 | class DataAnalyze: 22 | def __init__(self,year,month): 23 | self.year=year 24 | self.month = month 25 | 26 | def run(self): 27 | 28 | dataBase=DataToSql.ProductionSaleToSql(YearBegin = 2018,MonthBegin = 7) 29 | # data handle 30 | CurSale = [] #今年销量明细 31 | CurTotalSale = [] #今年销量累计 32 | CurProduce = [] #今年产量 33 | CurTotalProduce = [] #今年产量累计 34 | 35 | LastSale = [] 36 | LastTotalSale = [] 37 | LastProduce = [] 38 | LastTotalProduce = [] 39 | 40 | CurBigSale = [] #大车销量 41 | CurMidSale = [] #中车销量 42 | CurSmallSale = [] #小车销量 43 | 44 | LastBigSale = [] #大车销量 45 | LastMidSale = [] #中车销量 46 | LastSmallSale = [] #小车销量 47 | 48 | sum_cur0 = 0 49 | sum_last0 = 0 50 | sum_cur1 = 0 51 | sum_last1 = 0 52 | idx = [] 53 | 54 | for i in range(1,self.month+1): 55 | CurProduceData=dataBase.QueryPSData(str(self.year),str(i),"production") 56 | CurProduce.append(CurProduceData) 57 | 58 | LastProducData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_production") 59 | LastProduce.append(LastProducData) 60 | 61 | sum_cur0+=CurProduceData 62 | sum_last0+=LastProducData 63 | CurTotalProduce.append(sum_cur0) 64 | LastTotalProduce.append(sum_last0) 65 | 66 | CurSaleData=dataBase.QueryPSData(str(self.year),str(i),"sale") 67 | CurSale.append(CurSaleData) 68 | 69 | LastSaleData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_sale") 70 | LastSale.append(LastSaleData) 71 | 72 | sum_cur1 += CurSaleData 73 | sum_last1 += LastSaleData 74 | CurTotalSale.append(sum_cur1) 75 | LastTotalSale.append(sum_last1) 76 | 77 | CurBigSaleData=dataBase.QueryPSData(str(self.year),str(i),"large_sale") 78 | CurBigSale.append(CurBigSaleData) 79 | 80 | CurMidSaleData=dataBase.QueryPSData(str(self.year),str(i),"mid_sale") 81 | CurMidSale.append(CurMidSaleData) 82 | 83 | CurSmallSaleData=dataBase.QueryPSData(str(self.year),str(i),"small_sale") 84 | CurSmallSale.append(CurSmallSaleData) 85 | 86 | LastBigSaleData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_sale_large") 87 | LastBigSale.append(LastBigSaleData) 88 | 89 | LastMidSaleData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_sale_mid") 90 | LastMidSale.append(LastMidSaleData) 91 | 92 | LastSmallSaleData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_sale_small") 93 | LastSmallSale.append(LastSmallSaleData) 94 | 95 | idx.append(str(i)+'月') 96 | print (CurSale) 97 | #汇总数据,什么都有 98 | Stat = DataFrame([CurProduce,LastProduce,CurTotalProduce,LastTotalProduce,CurSale,LastSale,CurTotalSale,LastTotalSale,CurBigSale,CurMidSale,CurSmallSale,LastBigSale,LastMidSale,LastSmallSale]) 99 | Stat = Stat.T 100 | Stat.index = idx 101 | Stat.columns=['今年产量','去年产量','今年产量累计','去年产量累计', '今年销量','去年销量','今年销量累计','去年销量累计','今年大车产量','今年中车产量','今年小车产量','去年大车产量','去年中车产量','去年小车产量'] 102 | 103 | #-------------------------------------------------------------------- 104 | # plot 105 | 106 | #不同年份的对比 107 | DiffYearCmp = Stat.iloc[:,[0,1]] 108 | DiffYearCmp.plot(kind='bar') 109 | plt.xlabel('month') #横坐标标签 110 | plt.ylabel('quantity') #纵坐标标签 111 | #plt.xticks(rotation=45) #坐标标号旋转 112 | plt.title('宇通客车月产量对比') 113 | 114 | DiffYearTotal = Stat.iloc[:,[2,3]] 115 | DiffYearTotal.plot() 116 | plt.xlabel('month') #横坐标标签 117 | plt.ylabel('quantity') #纵坐标标签 118 | plt.title('宇通客车总产量对比') 119 | 120 | #相同年份的对比 121 | SameYearCmp = Stat.iloc[:,[0,4]] 122 | SameYearCmp.plot(kind='bar') 123 | plt.xlabel('month') #横坐标标签 124 | plt.ylabel('quantity') #纵坐标标签 125 | plt.title('宇通客车产销量对比') 126 | 127 | SameYearDiff = Stat.iloc[:,[8,9,10]] 128 | SameYearDiff.plot(kind='bar') 129 | plt.xlabel('month') #横坐标标签 130 | plt.ylabel('quantity') #纵坐标标签 131 | plt.title('产品结构对比') 132 | plt.show() 133 | 134 | #-------------------------------------------------------------------- 135 | #analyse 136 | #1.今年和往年相比的增量 137 | print('统计汇总报告,截止'+str(self.year)+'年'+str(self.month)+'月。。。') 138 | print('-----------------------------------------------') 139 | print('1:产销同比') 140 | IncRate = DiffYearTotal.iloc[self.month-1,:].pct_change() * -100 141 | a = IncRate.round(2) #保留两位小数 142 | print('产量同比增长:'+str(a[1])+'%') 143 | IncRate = (Stat.iloc[self.month-1,6] - Stat.iloc[self.month-1,7])/Stat.iloc[self.month-1,7]*100 144 | a = ("%.2f" % IncRate) #保留两位小数 145 | print('销量同比增长:'+ a +'%') 146 | print('-----------------------------------------------') 147 | #2.产销比是否健康? 148 | print('2.产销结构统计') 149 | total = SameYearCmp.sum() 150 | rate = total.pct_change() * 100 151 | a = rate.round(2) #保留两位小数 152 | print('产销差异:'+str(abs(a[1]))+'%') 153 | if(abs(a[1]) <= 1): 154 | print('产销结构很健康') 155 | print('-----------------------------------------------') 156 | #3.月产量是否有异动 157 | print('3.月产量波动情况') 158 | diff = DiffYearCmp.pct_change().round(2) * 100 159 | s1=diff.今年产量 160 | s2=diff.去年产量 161 | print('每月产量增幅') 162 | for i in range(1,self.month): 163 | print(s1[i],end='\t') 164 | print('') 165 | print('-----------------------------------------------') 166 | #4.产品结构是否发生了重大变化? 167 | print('4.产品结构变化') 168 | s=SameYearDiff.T 169 | s_sum=s.sum() 170 | Rate = (s/s_sum).round(2) 171 | print(Rate) 172 | 173 | #--------------------------------------------------------------------------- 174 | # 用户代码示例 175 | # 使用前确保数据库中有相应数据 176 | 177 | if __name__ == "__main__": 178 | DA=DataAnalyze(2017,2) 179 | DA.run() 180 | -------------------------------------------------------------------------------- /Release/YTProductionAndSale/PdfDown.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Apr 7 18:35:34 2018 4 | 5 | @author: 54206 6 | """ 7 | import requests 8 | import datetime 9 | from requests.exceptions import RequestException 10 | import re 11 | import urllib 12 | import os 13 | import shutil 14 | def save_to_file(file_name, contents): 15 | fh = open(file_name, 'w') 16 | fh.write(contents) 17 | fh.close() 18 | 19 | class FolderNotCleanException (Exception): 20 | pass 21 | 22 | class PdfDownLoad: 23 | def __init__(self,year=2016,month=1,downloadAdrr = 'D:/downloadTest/'): 24 | self.headers = {'Accept':'*/*', 25 | 'Accept-Encoding':'gzip, deflate', 26 | 'Accept-Language':'zh-CN,zh;q=0.9', 27 | 'Connection': 'keep-alive', 28 | 'Cookie': 'yfx_c_g_u_id_10000042=_ck18030722220815231570139781377; VISITED_STOCK_CODE=%5B%22600066%22%5D; VISITED_MENU=%5B%229062%22%2C%229729%22%2C%228307%22%5D; UM_distinctid=1629a80cb0b7e3-0da2cab7416fbd-c343567-144000-1629a80cb0c185; websearch=%22900957%22%3A%22%u51CC%u4E91B%u80A1%22%2C%22603966%22%3A%22%u6CD5%u5170%u6CF0%u514B%22%2C%22603933%22%3A%22%u777F%u80FD%u79D1%u6280%22%2C%22603955%22%3A%22%u5927%u5343%u751F%u6001%22%2C%22600066%22%3A%22%u5B87%u901A%u5BA2%u8F66%22; VISITED_COMPANY_CODE=%5B%22600066%22%2C%22%5Bobject%20Object%5D%22%5D; seecookie=%5B900957%5D%3A%u51CC%u4E91B%u80A1%2C%5B603966%5D%3A%u6CD5%u5170%u6CF0%u514B%2C%5B603933%5D%3A%u777F%u80FD%u79D1%u6280%2C%5B603955%5D%3A%u5927%u5343%u751F%u6001%2C%5B600066%5D%3A%u5B87%u901A%u5BA2%u8F66%2C%u5B87%u901A%u5BA2%u8F66%u4EA7%u9500%u5FEB%u62A5; yfx_f_l_v_t_10000042=f_t_1520432528520__r_t_1522998662756__v_t_1523023134976__r_c_7', 29 | 'Host':'query.sse.com.cn', 30 | 'Referer':'http://www.sse.com.cn/home/search/?webswd=%E5%AE%87%E9%80%9A%E5%AE%A2%E8%BD%A6%E4%BA%A7%E9%94%80%E5%BF%AB%E6%8A%A5', 31 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36'} 32 | self.year = year 33 | self.month = month 34 | self.downloadAdrr=downloadAdrr 35 | self.pdfList=[] 36 | 37 | 38 | def get_one_page(self,url): 39 | try: 40 | response = requests.get(url,headers = self.headers) 41 | response.encoding = 'utf-8' 42 | if response.status_code == 200: 43 | return response.text 44 | return None 45 | except RequestException as e: 46 | print (e) 47 | return None 48 | 49 | def getCurrentPage(self,url,beginDate): 50 | RList=[] 51 | html = self.get_one_page(url) 52 | Reguler =r"\\/disclosure\\/listedinfo\\/announcement\\/c\\/.*?pdf" 53 | pattern = re.compile(Reguler) 54 | ls = pattern.findall(html) 55 | #print (ls) 56 | for eachLink in ls: 57 | element = eachLink.split('\\/') 58 | YMD=element[-2].split("-") 59 | year = int(YMD[0]) 60 | month = int(YMD[1]) 61 | day = int(YMD[2]) 62 | eachDate = datetime.date(year,month,day) 63 | if(eachDate.__ge__(beginDate)): 64 | RList.append(eachLink) 65 | else: 66 | break 67 | #print (RList) 68 | return RList 69 | 70 | 71 | 72 | def getAllPDFAdd(self): 73 | AllList = [] 74 | beginDate = datetime.date(self.year,self.month+1,1) 75 | beginNum =1 76 | RLength = 10 77 | url1=r"http://query.sse.com.cn/search/getSearchResult.do?search=qwjs&jsonCallBack=jQuery111205573825303579625_1523023138864&page=" 78 | url2=r"&searchword=T_L+CTITLE+T_D+E_KEYWORDS+T_JT_E+likeT_L%E5%AE%87%E9%80%9A%E5%AE%A2%E8%BD%A6%E4%BA%A7%E9%94%80%E5%BF%AB%E6%8A%A5T_RT_R&orderby=-CRELEASETIME&perpage=10&_=1523023138865" 79 | while (RLength==10): 80 | url = url1+str(beginNum)+url2 81 | Rlist = self.getCurrentPage(url,beginDate) 82 | RLength = len(Rlist) 83 | AllList =AllList+Rlist 84 | beginNum=beginNum+1 85 | return AllList 86 | 87 | 88 | 89 | def getFile(self,url): 90 | pdf_name = url.split('/')[-1] 91 | file_name = self.downloadAdrr+pdf_name 92 | u = urllib.request.urlopen(url) 93 | f = open(file_name, 'wb') 94 | 95 | block_sz = 8192 96 | while True: 97 | buffer = u.read(block_sz) 98 | if not buffer: 99 | break 100 | 101 | f.write(buffer) 102 | f.close() 103 | print ("Sucessful to download" + " " + pdf_name) 104 | return pdf_name 105 | 106 | 107 | 108 | 109 | def FolderClean(self): 110 | for i in os.listdir(self.downloadAdrr): 111 | path_file = os.path.join(self.downloadAdrr,i) # 取文件路径 112 | if os.path.isfile(path_file): 113 | os.remove(path_file) 114 | if os.path.isdir(path_file): 115 | shutil.rmtree(path_file) 116 | if os.listdir(self.downloadAdrr): #如果文件夹没有清理干净,抛出异常 117 | raise FolderNotCleanException 118 | 119 | 120 | def GetAllPdfFile(self): 121 | self.FolderClean() 122 | AllList = self.getAllPDFAdd() 123 | baseUrl = r"http://www.sse.com.cn" 124 | for EachList in AllList: 125 | url=baseUrl+EachList 126 | #url.replace('\','/') 127 | urlList = url.split('/') 128 | url = "" 129 | for Each in urlList: 130 | if (Each==urlList[0]): 131 | url=Each 132 | elif(Each==urlList[-1]): 133 | url=url+'/'+Each 134 | else: 135 | url=url+'/'+Each[:-1] 136 | pdf_name=self.getFile(url) 137 | self.pdfList.append(pdf_name) 138 | 139 | def RPdfList(self): 140 | return self.pdfList 141 | 142 | if __name__ == "__main__": 143 | i = PdfDownLoad() 144 | i.GetAllPdfFile() 145 | print (i.RPdfList()) 146 | -------------------------------------------------------------------------------- /Release/YTProductionAndSale/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /Release/YTProductionAndSale/使用说明.txt: -------------------------------------------------------------------------------- 1 | 模块中包含两个单独功能: 2 | 1、宇通客车采销数据入库 3 | 2、宇通客车采销数据分析 4 | 5 | 一、采销数据入库 6 | 环境搭建步骤: 7 | 1、下载pdf2htmlex 8 | 下载地址:http://soft.rubypdf.com/software/pdf2htmlex-windows-version,下载后缀为win32-static的版本 9 | 2、解压pdf2htmlex到\anack\Release\YTProductionAndSale\ExeFile下,保证exe文件在ExeFile文件下,而不要在ExeFile下建文件夹保存 10 | 运行脚本步骤 11 | 1、在anack\Release\YTProductionAndSale\config下新建account.txt文件,用于登录远程数据库(与M1809的文件一致) 12 | 2、调用DataToSql.py 13 | Update = ProductionSaleToSql(YearBegin = 2016,MonthBegin = 5) #新建实例,参数代表对2016年5月以上的数据进行入库处理 14 | Update.ProSaleUpdate() #调用入库方法 15 | 16 | 二、采销数据分析 17 | 运行脚本步骤: 18 | DA=DataAnalyze(2017,2) #初始化实例,参数代表对2017.1-2017.2的数据进行分析 19 | DA.run() -------------------------------------------------------------------------------- /Release/YT_produce_sell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/YT_produce_sell.py -------------------------------------------------------------------------------- /Release/get_dividends_history.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Mar 26 21:29:43 2018 4 | 5 | @author: 尹超 6 | # 该模块用于获取指定个股的历史分红记录,以DataFrame形式给出 7 | """ 8 | import pandas as pd 9 | import requests 10 | from requests.exceptions import RequestException 11 | from bs4 import BeautifulSoup 12 | 13 | 14 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'} 15 | 16 | def get_one_page(url): 17 | try: 18 | response = requests.get(url,headers = headers) 19 | response.encoding = 'GB2312' 20 | if response.status_code == 200: 21 | return response.text 22 | return None 23 | except RequestException: 24 | return None 25 | 26 | def parse(html): 27 | raw_data = [] 28 | try: 29 | year_raw = [] 30 | year = [] 31 | bonus_share = [] 32 | bonus_convert = [] 33 | profit_send = [] 34 | ex_rights = [] 35 | register_day = [] 36 | 37 | soup = BeautifulSoup(html,'html5lib') 38 | l = soup.select('table#sharebonus_1') 39 | ls = l[0].tbody 40 | lls = ls.select('td') 41 | for l in lls: 42 | if (l.get_text().strip()) != '预案' and \ 43 | (l.get_text().strip()) != '实施' and \ 44 | (l.get_text().strip()) != '不分配' and \ 45 | (l.get_text().strip()) != '查看': 46 | raw_data.append(l.get_text().strip()) 47 | 48 | year_raw = raw_data[::7] 49 | # print(raw_data) #出错的话请检查此处的输出 50 | # print(year_raw) #出错的话请检查此处的输出 51 | for item in year_raw: 52 | a = pd.to_datetime(item).year - 1 53 | year.append(a) 54 | bonus_share = raw_data[1::7] 55 | bonus_convert = raw_data[2::7] 56 | profit_send = raw_data[3::7] 57 | ex_rights = raw_data[4::7] 58 | register_day = raw_data[5::7] 59 | # print(register_day) 60 | data = {'年度':year, 61 | '送股':bonus_share, 62 | '转股':bonus_convert, 63 | '派息':profit_send, 64 | '除权日':ex_rights, 65 | '登记日':register_day 66 | } 67 | frame = pd.DataFrame(data) 68 | d = pd.DataFrame(columns = frame.columns.values.tolist()) 69 | for i in range(len(frame)): #删除无效的记录并重新排序,保证按时间顺序来 70 | if frame.iloc[len(frame) - 1 - i]['除权日'] != '--': 71 | d = d.append(frame.iloc[len(frame) - 1 - i],ignore_index=True) 72 | return d 73 | except: 74 | print('cannot parse this page') 75 | 76 | def parse_single_year(html,Year): 77 | raw_data = [] 78 | try: 79 | year_raw = [] 80 | year = [] 81 | bonus_share = [] 82 | bonus_convert = [] 83 | profit_send = [] 84 | ex_rights = [] 85 | register_day = [] 86 | # print('it is ',Year) 87 | soup = BeautifulSoup(html,'html5lib') 88 | l = soup.select('table#sharebonus_1') 89 | ls = l[0].tbody 90 | lls = ls.select('td') 91 | for l in lls: 92 | if (l.get_text().strip()) != '预案' and \ 93 | (l.get_text().strip()) != '实施' and \ 94 | (l.get_text().strip()) != '不分配' and \ 95 | (l.get_text().strip()) != '查看': 96 | raw_data.append(l.get_text().strip()) 97 | 98 | year_raw = raw_data[::7] 99 | # print(raw_data) #出错的话请检查此处的输出 100 | # print(year_raw) #出错的话请检查此处的输出 101 | for item in year_raw: 102 | a = pd.to_datetime(item).year - 1 103 | year.append(a) 104 | bonus_share = raw_data[1::7] 105 | bonus_convert = raw_data[2::7] 106 | profit_send = raw_data[3::7] 107 | ex_rights = raw_data[4::7] 108 | register_day = raw_data[5::7] 109 | # print(register_day) 110 | data = {'年度':year, 111 | '送股':bonus_share, 112 | '转股':bonus_convert, 113 | '派息':profit_send, 114 | '除权日':ex_rights, 115 | '登记日':register_day 116 | } 117 | 118 | frame = pd.DataFrame(data) 119 | 120 | Len=len(frame) 121 | for i in range(Len): 122 | s=int(frame.iloc[i,[0]]) 123 | Date=frame.iloc[i,[2]] 124 | date2=Date.loc['登记日'] 125 | # print(s,date2) 126 | if s == Year: 127 | px=float(frame.iloc[i,[1]]) 128 | date2=date2[:4]+date2[5:7]+date2[8:] 129 | # print(s,'px money is ',px,date2) 130 | return px,date2 131 | return -1 132 | except: 133 | print('cannot parse this page') 134 | 135 | #获取每10股派现金,及股权登记日 136 | def get_px_single_year(id,Year): 137 | url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/' 138 | url += str(id) 139 | url += '.phtml' 140 | html = get_one_page(url) 141 | return parse_single_year(html,Year) 142 | # 提供给用户的函数,输入ID,解析出历史分红列表 143 | def get_bonus_table(id): 144 | url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/' 145 | url += str(id) 146 | url += '.phtml' 147 | html = get_one_page(url) 148 | return parse(html) 149 | 150 | ############################################################################### 151 | ############################################################################### 152 | # APP示例代码,用完了请关闭 600066 153 | #s = get_bonus_table('601012') 154 | #print(s) 155 | #2017年的派息实际是在2018派发,所以登记日时间上是2018年 156 | #测试股息率 157 | #PX,Date=get_px_single_year('601012',2017) 158 | #print(PX,Date) -------------------------------------------------------------------------------- /Release/kday/get_price.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 从163网址上获取指定ID指定时间段的K线数据 4 | """ 5 | import requests 6 | import re 7 | import datetime 8 | import pandas as pd 9 | ''' 10 | 11 | 完整网址: 12 | http://quotes.money.163.com/service/chddata.html?code=0%06d&start=%d&end=%d&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER 13 | ''' 14 | 15 | 16 | def get_close_price(id, day = 0): 17 | ''' 18 | 获取指定ID指定日期的收盘价 19 | 输入:id -> str形式的ID号: '600660' 20 | day -> str形式的日期: '20180626' 21 | 返回值:str形式的价格: '25.54', 如果当天为节假日,则返回0 22 | ''' 23 | if day == 0: 24 | day = datetime.datetime.now() - datetime.timedelta(days=1) 25 | day = day.strftime("%Y%m%d") 26 | if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市,则前缀为1 27 | nid = '1' + id 28 | else: #如果是沪市主板,则前缀为0 29 | nid = '0' + id 30 | url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\ 31 | fields=TCLOSE" %(nid, day,day) 32 | res = requests.get(url) 33 | res.raise_for_status() 34 | 35 | for chunk in res.iter_content(100000): 36 | # print(chunk) 37 | pattern = '[^,\r\n]+' 38 | obj = re.compile(pattern) 39 | match = obj.findall(chunk.decode('gbk')) 40 | #print(match) 41 | if len(match) < 8: 42 | return 0 43 | else: 44 | return match[-1] 45 | 46 | def get_period_k_day(id, start_day, stop_day = 0): 47 | ''' 48 | 获取指定ID一个时间段内的K线数据 49 | 输入:id -> str形式的ID号: '600660' 50 | start_day -> str形式的日期: '20180626' 51 | stop_day -> 同上, 默认到昨天 52 | 返回值:一个dataframe 53 | ''' 54 | if stop_day == 0: 55 | day = datetime.datetime.now() - datetime.timedelta(days=1) 56 | day = day.strftime("%Y%m%d") 57 | 58 | if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市,则前缀为1 59 | nid = '1' + id 60 | else: #如果是沪市主板,则前缀为0 61 | nid = '0' + id 62 | url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\ 63 | fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(nid, start_day, stop_day) 64 | 65 | 66 | # url = "http://quotes.money.163.com/service/chddata.html?code=0%s&start=%s&end=%s&\ 67 | # fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(id, start_day,stop_day) 68 | res = requests.get(url) 69 | res.raise_for_status() 70 | # playFile = open(file_name, 'wb') 71 | 72 | raw_data = [] 73 | for chunk in res.iter_content(1000000): 74 | # playFile.write(chunk) 75 | chunk = chunk.decode('gbk') 76 | pattern = '[^,\r\n]+' 77 | obj = re.compile(pattern) 78 | match = obj.findall(chunk) 79 | if len(match) < 8: #如果没有数据 80 | return 0 81 | 82 | header = match[:10] #如果增加字段,则此处以下需要相应修改 83 | # print(header) 84 | raw_data = match[10:] 85 | date = raw_data[::10] 86 | idc = raw_data[1::10] 87 | name = raw_data[2::10] 88 | price = raw_data[3::10] 89 | high = raw_data[4::10] 90 | lopen = raw_data[5::10] 91 | yesterday_close = raw_data[6::10] 92 | low = raw_data[7::10] 93 | vol = raw_data[8::10] 94 | mount = raw_data[9::10] 95 | 96 | data = { 97 | # header[0]:date, 98 | header[1]:idc, 99 | header[2]:name, 100 | header[3]:price, 101 | header[4]:high, 102 | header[5]:lopen, 103 | header[6]:yesterday_close, 104 | header[7]:low, 105 | header[8]:vol, 106 | header[9]:mount 107 | } 108 | df = pd.DataFrame(data,index = date) 109 | # playFile.close() 110 | return df 111 | 112 | 113 | def k_day_to_csv(code, stop_day = 0): 114 | ''' 115 | 更新k线数据,并保存到本地,默认为更新到昨天 116 | code:目标个股,只能为'000xxx'形式 117 | stop_day: 0->昨天, 20170101:更新到指定的一天 118 | @更新逻辑: 119 | 1. 如果无记录,则自动创建csv文件,默认为:ID.kday 120 | 2. 如果有部分记录,则自动分析,并将后续的内容更新 121 | 3. 如果记录比需要更新的更新,则直接返回 122 | 123 | 缺陷:得到的数据是没有复权的,应该进行前复权 124 | ''' 125 | base_path = './' #修改此处可以更改文件存放路径,可以考虑作为一个配置参数 126 | start_day = '19970101' #start时间统一从1997年开始 127 | #参数合法性检查 128 | if isinstance(code,list): 129 | print('is a list') 130 | elif isinstance(code,str): 131 | file_name = code + '.csv' 132 | # print(file_name) 133 | else: 134 | print('bad input. please check it') 135 | return 136 | 137 | file_name = base_path + file_name 138 | # print(file_name) 139 | 140 | update_flag = 1 #1代表重新生成, 2代表更新 3代表无需处理 141 | #判断最新的是第几天 142 | try: 143 | with open(file_name,'r') as fh: 144 | content = fh.readlines() 145 | if len(content) > 2: #获取最新记录,总是在第二行 146 | latest_record = content[1].split(',') 147 | 148 | from datetime import datetime 149 | from dateutil.parser import parse 150 | latest_day = parse(latest_record[0]) 151 | now = datetime.now().strftime('%Y-%m-%d') 152 | yesterday = parse(now) 153 | 154 | if yesterday > latest_day: 155 | update_flag = 2 156 | print('not the latest') 157 | else: 158 | update_flag = 3 159 | print(code + ' already the latest') 160 | return 161 | except: 162 | update_flag = 1 163 | print('no record') 164 | 165 | #不同的情况适用不同更新逻辑 166 | if update_flag == 1: #完全更新 167 | r = get_period_k_day(code, start_day) 168 | r.to_csv(file_name, encoding= 'gbk') 169 | elif update_flag == 2: 170 | r = get_period_k_day(code, start_day) #此处没有办法在首部添加 171 | r.to_csv(file_name, encoding= 'gbk') #如果可以的话,则不必每次重写 172 | return 173 | print('finish ' + code + ' update') 174 | return 175 | 176 | def k_day_update(id_list, stop_day = 0): 177 | ''' 178 | 用户API,更新个股的K线数据,可以是列表,也可以是str 179 | ''' 180 | #参数合法性检查 181 | if isinstance(id_list,list): 182 | print('is a list') 183 | for s in id_list: 184 | k_day_to_csv(s,stop_day) 185 | elif isinstance(id_list,str): 186 | k_day_to_csv(id_list,stop_day) 187 | else: 188 | print('bad input. please check it') 189 | return 190 | 191 | if __name__ == '__main__': 192 | id = '601012' 193 | start_day = '20100625' 194 | stop_day = '20180904' 195 | 196 | #获取昨天的收盘价 197 | # price = get_close_price(id) 198 | # print(price) 199 | 200 | # #获取指定一天的收盘价 201 | # price = get_close_price('600660','20170209') 202 | # print(price) 203 | # 204 | # #获取从start_day开始直到昨天的收盘价 205 | # s = get_period_price('600660',start_day) 206 | # print(s) 207 | # 208 | # #获取指定时间段内的收盘价 209 | # s = get_period_k_day('601012',start_day,stop_day) 210 | # print(s) 211 | # s.to_csv('test.csv', encoding= 'gbk') 212 | 213 | #更新K线数据并存文档 214 | company_list = ['600660', '600066', '000651', '600522', '601012', '600887'] 215 | k_day_update(company_list) 216 | k_day_update('600066') 217 | 218 | -------------------------------------------------------------------------------- /Release/kday/k_day.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Sep 10 20:46:24 2018 4 | 本模块用于实现k线数据的入库/本地存储 5 | @author: yinchao 6 | """ 7 | # ============================================================================= 8 | # 1. sql账户配置 9 | # 2. k_day数据更新 10 | # 3. k_day数据提取 11 | # ============================================================================= 12 | 13 | import get_price 14 | import pymysql 15 | import os 16 | from sqlalchemy import create_engine 17 | 18 | 19 | hosts = '47.98.216.118' 20 | users = 'yc' 21 | passwds = 'yc123!' 22 | databases = 'test' 23 | def pymysql_connect(): 24 | return pymysql.connect( 25 | host=hosts, 26 | database=databases, 27 | user=users, 28 | password=passwds, 29 | port=3306, 30 | charset='utf8' 31 | ) 32 | def connect_sql(): 33 | return create_engine("mysql+pymysql://"+ users + ":"+ passwds + "@" + hosts + ":3306/" + databases + "?charset=utf8") 34 | 35 | def df_to_mysql(table, code_id, start_day = '19970101'): 36 | connect = connect_sql() 37 | df = get_price.get_period_k_day(code_id, start_day) 38 | df.to_sql(name=table,con=connect,if_exists='append') 39 | 40 | 41 | def get_data_from_mysql(code_id): 42 | try: 43 | cmd = "select * from k_day2 where 股票代码 = \'"+code_id+"\';" 44 | print(cmd) 45 | conn = pymysql.connect( 46 | host = hosts, 47 | port = 3306, 48 | user = users, 49 | passwd = passwds, 50 | db = databases, 51 | charset = "utf8" 52 | ) 53 | 54 | cur = conn.cursor() 55 | cur.execute(cmd) 56 | result = cur.fetchall() 57 | print(result) #此处无法获取正确的数据 58 | return result 59 | except: 60 | print('get nothing') 61 | 62 | # ============================================================================= 63 | # 64 | # ============================================================================= 65 | if __name__ == '__main__': 66 | # df_to_mysql('k_day2', '601012', '20180801') 67 | get_data_from_mysql('601012') -------------------------------------------------------------------------------- /Release/pdf_decoder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 25 20:52:13 2018 4 | 5 | @author: Administrator 6 | # make sure pdfminer3k has been installed 7 | # otherwise: pip install pdfminer3k 8 | """ 9 | 10 | import sys 11 | import importlib 12 | importlib.reload(sys) 13 | 14 | from pdfminer.pdfparser import PDFParser,PDFDocument 15 | from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter 16 | from pdfminer.converter import PDFPageAggregator 17 | from pdfminer.layout import LTTextBoxHorizontal,LAParams 18 | from pdfminer.pdfinterp import PDFTextExtractionNotAllowed 19 | 20 | ''' 21 | 解析pdf 文本,保存到txt文件中 22 | ''' 23 | path = 'test.pdf' 24 | def parse(): 25 | fp = open(path, 'rb') # 以二进制读模式打开 26 | #用文件对象来创建一个pdf文档分析器 27 | praser = PDFParser(fp) 28 | # 创建一个PDF文档 29 | doc = PDFDocument() 30 | # 连接分析器 与文档对象 31 | praser.set_document(doc) 32 | doc.set_parser(praser) 33 | 34 | # 提供初始化密码 35 | # 如果没有密码 就创建一个空的字符串 36 | doc.initialize() 37 | 38 | # 检测文档是否提供txt转换,不提供就忽略 39 | if not doc.is_extractable: 40 | raise PDFTextExtractionNotAllowed 41 | else: 42 | # 创建PDf 资源管理器 来管理共享资源 43 | rsrcmgr = PDFResourceManager() 44 | # 创建一个PDF设备对象 45 | laparams = LAParams() 46 | device = PDFPageAggregator(rsrcmgr, laparams=laparams) 47 | # 创建一个PDF解释器对象 48 | interpreter = PDFPageInterpreter(rsrcmgr, device) 49 | 50 | # 循环遍历列表,每次处理一个page的内容 51 | for page in doc.get_pages(): # doc.get_pages() 获取page列表 52 | interpreter.process_page(page) 53 | # 接受该页面的LTPage对象 54 | layout = device.get_result() 55 | # 这里layout是一个LTPage对象 里面存放着 这个page解析出的各种对象 一般包括LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等等 想要获取文本就获得对象的text属性, 56 | for x in layout: 57 | if (isinstance(x, LTTextBoxHorizontal)): 58 | with open(r'out.txt', 'a') as f: 59 | results = x.get_text() 60 | print(results) 61 | f.write(results + '\n') 62 | 63 | if __name__ == '__main__': 64 | parse() -------------------------------------------------------------------------------- /Release/wechat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Mar 20 19:54:14 2018 4 | 5 | @author: Administrator 6 | # note: key 可以从"http://www.tuling123.com/"处免费注册获得 7 | """ 8 | 9 | import itchat 10 | import requests 11 | import json 12 | key = '' 13 | 14 | # 1. 单独发送 15 | #itchat.auto_login() 16 | #users = itchat.search_friends(name=u'阿狸') 17 | #print(users) 18 | #who = users[0]['UserName'] 19 | #print(who) 20 | #itchat.send('进入自动回复模式,和我对话试试看',toUserName = who) 21 | 22 | #2. 自动回复 23 | 24 | #itchat.auto_login() 25 | #@itchat.msg_register('Text',isGroupChat = True)#群回复 26 | #def text_reply(msg): 27 | # return '新年快乐!(回复群消息)' 28 | #@itchat.msg_register('Text')#个人回复 29 | #def text_reply(msg): 30 | # print(msg['Text']) 31 | # print(type(msg)) 32 | # return '新年快乐!(回复好友消息)' 33 | #itchat.auto_login(hotReload=True) 34 | #itchat.run() 35 | 36 | 37 | #3. 实现了机器人对话 38 | #import requests 39 | #import json 40 | #key = 'aa7ab198e85e4ba3bec6622654789472' 41 | #while True: 42 | # info = input('\n我:') 43 | # url = 'http://www.tuling123.com/openapi/api?key='+key+'&info='+info 44 | # res = requests.get(url) 45 | # res.encoding = 'utf-8' 46 | # jd = json.loads(res.text)#将得到的json格式的信息转换为Python的字典格式 47 | # print('\nTuling: '+jd['text'])#输出结果 48 | 49 | 50 | #4. 个人图灵测试成功 51 | itchat.auto_login() 52 | @itchat.msg_register('Text')#个人回复 53 | def text_reply(msg): 54 | # print(msg['Text']) 55 | url = 'http://www.tuling123.com/openapi/api?key='+key+'&info='+msg['Text'] 56 | res = requests.get(url) 57 | res.encoding = 'utf-8' 58 | jd = json.loads(res.text)#将得到的json格式的信息转换为Python的字典格式 59 | return jd['text'] #输出结果 60 | itchat.auto_login(hotReload=True) 61 | itchat.run() 62 | 63 | #5. 群回复测试成功 64 | #itchat.auto_login() 65 | #@itchat.msg_register('Text',isGroupChat = True)#群回复 66 | #def text_reply(msg): 67 | ## print(msg['Text']) 68 | # url = 'http://www.tuling123.com/openapi/api?key='+key+'&info='+msg['Text'] 69 | # res = requests.get(url) 70 | # res.encoding = 'utf-8' 71 | # jd = json.loads(res.text)#将得到的json格式的信息转换为Python的字典格式 72 | # return jd['text'] #输出结果 73 | #itchat.auto_login(hotReload=True) 74 | #itchat.run() -------------------------------------------------------------------------------- /Release/安居客爬虫框架/ReadMe.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/安居客爬虫框架/ReadMe.txt -------------------------------------------------------------------------------- /Release/安居客爬虫框架/crawl_anjuke_v1.311.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Dec 11 14:30:50 2017 4 | @author: 1707501 5 | """ 6 | 7 | """ 8 | crawling anjuke house price 9 | GuiYang 10 | 20171212 add proxy and detailed the parse of house information 11 | 20171214 add spidertime and multiprocess 12 | """ 13 | 14 | import requests 15 | from bs4 import BeautifulSoup 16 | import pymysql 17 | import random,time 18 | 19 | def parse_detial(html): 20 | soup = BeautifulSoup(html.text,'html5lib') 21 | houseinfo = soup.select('div.houseInfoBox') 22 | houseinfotitle = houseinfo[0].h4 23 | an_xian = houseinfotitle.select('span.anxian')[0].get_text() 24 | if '假一赔百' in an_xian: 25 | an_xian = "Yes" 26 | else: 27 | an_xian = "No" 28 | houseencode= houseinfotitle.select('span.house-encode')[0].get_text() 29 | houseinfoV2 = houseinfo[0].select('div.houseInfoV2-desc')[0].get_text() 30 | housedetail1 = houseinfoV2.split() 31 | housedetail2 = ':'.join(housedetail1) 32 | housedetail = housedetail2.replace('\ue092','').replace('\u200b','').replace('\ue094','').replace('\ue093','').replace('\ue095','') 33 | housefirstv = soup.select('div.first-col.detail-col')[0].find_all('dl') 34 | house_estate = ''.join(housefirstv[0].get_text().split())[3:] 35 | house_add = ''.join(housefirstv[1].get_text().split())[3:] 36 | house_build_time = ''.join(housefirstv[2].get_text().split())[3:] 37 | house_type = ''.join(housefirstv[3].get_text().split())[3:] 38 | housesecondv = soup.select('div.second-col.detail-col')[0].find_all('dl') 39 | house_model_detail = ''.join(housesecondv[0].get_text().split())[3:] 40 | house_size = ''.join(housesecondv[1].get_text().split())[3:] 41 | house_orientation = ''.join(housesecondv[2].get_text().split())[3:] 42 | house_floor = ''.join(housesecondv[3].get_text().split())[3:] 43 | housethirdv = soup.select('div.third-col.detail-col')[0].find_all('dl') 44 | house_decorate = ''.join(housethirdv[0].get_text().split())[5:] 45 | house_univalence = ''.join(housethirdv[1].get_text().split())[5:] 46 | down_payment = ''.join(housethirdv[2].get_text().split())[5:] 47 | # monthly_payment = ''.join(housethirdv[3].get_text().split())[5:] #javescript loading data 48 | salerinfo = soup.select('p.broker-mobile') 49 | salerphone = salerinfo[0].get_text().replace('\ue047','') 50 | housetitle = ''.join(soup.select('h3.long-title')[0].get_text().split()) 51 | houseinfov1 = soup.select('div.basic-info.clearfix')[0].find_all('span') 52 | housetotleprice = houseinfov1[0].get_text() 53 | #============================================================================== 54 | # housemodel = houseinfov1[1].get_text() 55 | # housesize = houseinfov1[2].get_text() 56 | #============================================================================== 57 | line = [] 58 | line.append(housetitle) 59 | line.append(an_xian) 60 | line.append(houseencode) 61 | line.append(housetotleprice) 62 | line.append(house_model_detail) 63 | line.append(house_size) 64 | line.append(house_estate) 65 | line.append(house_add) 66 | line.append(house_build_time) 67 | line.append(house_type) 68 | line.append(house_orientation) 69 | line.append(house_floor) 70 | line.append(house_decorate) 71 | line.append(house_univalence) 72 | line.append(down_payment) 73 | line.append(housedetail) 74 | line.append(salerphone) 75 | result = '\t'.join(line) 76 | print(result) 77 | return result 78 | 79 | def parse_list(html): 80 | secondurl = [] 81 | soup = BeautifulSoup(html.text,'html5lib') 82 | houselists = soup.select('a.houseListTitle') 83 | for houseid in houselists: 84 | houseurl = houseid['href'] 85 | secondurl.append(houseurl) 86 | return secondurl 87 | 88 | def downloadhtml(url,proxy_ip): 89 | response = requests.get(url,headers=header,proxies={"http":proxy_ip}) 90 | if response.status_code == 200: 91 | return response 92 | else: 93 | print("download html error!") 94 | 95 | 96 | def Create_table(): 97 | query = """CREATE TABLE IF NOT EXISTS `anjuke_collecter_original_test` ( 98 | `No` int(10) unsigned NOT NULL AUTO_INCREMENT, 99 | `housetitle` varchar(255) DEFAULT NULL, 100 | `an_xian` varchar(255) DEFAULT NULL, 101 | `houseencode` varchar(255) DEFAULT NULL, 102 | `housetotleprice` varchar(255) DEFAULT NULL, 103 | `house_model_detail` varchar(255) DEFAULT NULL, 104 | `house_size` varchar(255) DEFAULT NULL, 105 | `house_estate` varchar(255) DEFAULT NULL, 106 | `house_add` varchar(255) DEFAULT NULL, 107 | `house_build_time` varchar(255) DEFAULT NULL, 108 | `house_type` varchar(255) DEFAULT NULL, 109 | `house_orientation` varchar(255) DEFAULT NULL, 110 | `house_floor` varchar(255) DEFAULT NULL, 111 | `house_decorate` varchar(255) DEFAULT NULL, 112 | `house_univalence` varchar(255) DEFAULT NULL, 113 | `down_payment` varchar(255) DEFAULT NULL, 114 | `housedetail` text DEFAULT NULL, 115 | `salerphone` varchar(255) DEFAULT NULL, 116 | `Url` varchar(255) DEFAULT NULL, 117 | `SpiderTime` varchar(255) DEFAULT NULL, 118 | PRIMARY KEY (`No`) 119 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 120 | db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8') 121 | cursor = db.cursor() 122 | cursor.execute(query) 123 | db.commit() 124 | cursor.close() 125 | db.close() 126 | 127 | 128 | def etl_mysql(result): 129 | db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8') 130 | cursor = db.cursor() 131 | result = tuple(result) 132 | query = "insert into anjuke_collecter_original_test(housetitle,an_xian,houseencode,housetotleprice,house_model_detail,house_size,house_estate,house_add,house_build_time,house_type,house_orientation,house_floor,house_decorate,house_univalence,down_payment,housedetail,salerphone,Url,SpiderTime) values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % result 133 | cursor.execute(query) 134 | db.commit() 135 | cursor.close() 136 | db.close() 137 | 138 | 139 | def get_next_page(html): 140 | soup = BeautifulSoup(html.text,'html5lib') 141 | nexturl = soup.select('a.aNxt')[0]['href'] 142 | return nexturl 143 | 144 | def get_proxy_ip(): 145 | db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8') 146 | cursor = db.cursor() 147 | query = "select ip,port from ip_collecter_original_test limit 17000" 148 | cursor.execute(query) 149 | ip_result = cursor.fetchall() 150 | IPList = [] 151 | for i in ip_result: 152 | Ip = i[0] + ":" + i[1] 153 | IPList.append(Ip) 154 | return IPList 155 | 156 | def check_ip(IPList): 157 | url = "https://www.baidu.com/" 158 | proxy_ip = random.choice(IPList) 159 | res = requests.get(url,headers=header,proxies={"http":proxy_ip}) 160 | if res.status_code == 200: 161 | print(proxy_ip) 162 | return proxy_ip 163 | else: 164 | return None # 后期修改成迭代 165 | 166 | header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', 167 | 'Connection':'keep-alive' } 168 | 169 | # https://gy.anjuke.com/sale/p1/#filtersort 170 | # https://shanghai.anjuke.com/sale/p1/#filtersort 171 | # https://hangzhou.anjuke.com/sale/ 172 | url = "https://shanghai.anjuke.com/sale/p1/#filtersort" 173 | 174 | hosts = 175 | users = 176 | passwords = 177 | databases = 178 | 179 | if __name__ == '__main__': 180 | List_ip = get_proxy_ip() 181 | next_url = url 182 | Create_table() 183 | while next_url != None: 184 | proxy_ip = check_ip(List_ip) 185 | res = downloadhtml(url,proxy_ip) 186 | if res != None: 187 | try: 188 | urllist = parse_list(res) 189 | except: 190 | print('house url list parsing error!') 191 | if urllist != None: 192 | for houseurl in urllist: 193 | proxy_ip = check_ip(List_ip) 194 | houseinfor = downloadhtml(houseurl,proxy_ip) 195 | try: 196 | results = parse_detial(houseinfor) 197 | except: 198 | results = None 199 | with open(r'E:\documents\personal\python\crawler\anjuke\anjuke_error_shanghai.txt','a',encoding='utf-8') as f: 200 | f.write(houseurl +"\n") 201 | print("parse hosue detial infor error!") 202 | continue 203 | with open(r'E:\documents\personal\python\crawler\anjuke\anjuke_shanghai_v15.txt','a',encoding='utf-8') as f: 204 | f.write(results + '\n') 205 | try: 206 | line = results.split('\t') 207 | ts = time.strftime('%Y%m%d%H%M%S',time.localtime(time.time())) 208 | line.append(houseurl) 209 | line.append(ts) 210 | # print(line) 211 | etl_mysql(line) 212 | except: 213 | print("data insert into mysql error!") 214 | continue 215 | try: 216 | next_url = get_next_page(res) 217 | except: 218 | next_url = None 219 | print("crawling end!") -------------------------------------------------------------------------------- /Release/树莓派信息实时推送示例/App.py: -------------------------------------------------------------------------------- 1 | from PageDecoder import * 2 | from StockClass import * 3 | from PushMessage import * 4 | import time 5 | 6 | 7 | my_interest = ['000651','600660','600887','600377','601012'] 8 | for interest in my_interest: 9 | data = GetTotalData(interest) 10 | istock = stock() 11 | istock.SetData(data) 12 | str1 = interest + '.CurPrice = ' + str(istock.CurPrice) 13 | print(str1) 14 | push(str1) 15 | time.sleep(1) 16 | -------------------------------------------------------------------------------- /Release/树莓派信息实时推送示例/PageDecoder.py: -------------------------------------------------------------------------------- 1 | import re 2 | import urllib.request 3 | 4 | base = 'http://hq.sinajs.cn/list=' 5 | bios = 'sh600660' 6 | 7 | def GetTotalData(inputstr): 8 | ''' 9 | input a number serial, ex:600660 10 | ''' 11 | pattern_id = '\d{6}' 12 | reobj = re.compile(pattern_id) 13 | id = reobj.findall(inputstr) 14 | id = "".join(id) 15 | page = '' 16 | if id != '': 17 | flag = int(id) 18 | if flag >= 600000: 19 | bios = 'sh' + id 20 | else: 21 | bios = 'sz' + id 22 | inputstr = base + bios 23 | # print(inputstr) 24 | page = urllib.request.urlopen(inputstr).read() 25 | # print(page) 26 | if len(page) < 30: 27 | print('error, invalid id') 28 | return 0 29 | s = page[30:] 30 | s = str(s) 31 | 32 | pattern_data = '\d+\.*\d*(?=,)' 33 | reobj = re.compile(pattern_data) 34 | data = reobj.findall(s) 35 | data.pop() 36 | data.pop() 37 | 38 | pattern_data = '\d\d\d\d-\d\d-\d\d' 39 | reobj = re.compile(pattern_data) 40 | date = reobj.findall(s) 41 | data.append(date) 42 | 43 | pattern_data = '\d\d:\d\d:\d\d' 44 | reobj = re.compile(pattern_data) 45 | time = reobj.findall(s) 46 | data.append(time) 47 | data.append(id) 48 | return data 49 | else: 50 | print('invalid id') 51 | return 0 52 | -------------------------------------------------------------------------------- /Release/树莓派信息实时推送示例/PushMessage.py: -------------------------------------------------------------------------------- 1 | # Author:YinChao 2 | # Date:2017-12-21 3 | # ver:V0.1 4 | 5 | import pycurl,json 6 | 7 | def push(str): 8 | appID = "59edd424a4c48aee80d6dd4a" 9 | appSecret = "05cc2a44d97e361f14d28c0ab8ff4acd" 10 | pushEvent = "DoorAlert" 11 | pushMessage = str 12 | 13 | c = pycurl.Curl() 14 | c.setopt(c.URL, 'https://api.instapush.im/v1/post') 15 | c.setopt(c.HTTPHEADER,['x-instapush-appid:' + appID,'x-instapush-appsecret:' + appSecret, 'Content-Type:application/json']) 16 | 17 | json_fields = {} 18 | json_fields['event'] = pushEvent 19 | json_fields['trackers'] = {} 20 | json_fields['trackers']['message'] = pushMessage 21 | 22 | postfields = json.dumps(json_fields) 23 | 24 | c.setopt(c.POSTFIELDS, postfields) 25 | 26 | c.perform() 27 | 28 | c.close() 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /Release/树莓派信息实时推送示例/StockClass.py: -------------------------------------------------------------------------------- 1 | #from DistinguishData import DataType 2 | 3 | class stock: 4 | ''' 5 | # 参数定义 6 | # ID 代码 600660 由单独变量给出 7 | # 0 TdyOpen 今开盘价 由List变量给出 8 | # 1 YdyClose 昨天收盘 9 | # 2 CurPrice 现价 10 | # 3 HighPrice 最高价 11 | # 4 LowPrice 最低价 12 | # 5 CurBuyPrice 竞买价 13 | # 6 CurSellPrice 竞卖价 14 | # 7 CurQuantity 成交量 15 | # 8 CurMoney 成交额 16 | # 9 Buy1_quant 买一数量 17 | # 10Buy1_price 买一报价 18 | # 11Buy2_quant 买一数量 19 | # 12Buy2_price 以此类推。。。 20 | # 13Buy3_quant 21 | # 14Buy3_price 22 | # 15Buy4_quant 23 | # 16Buy4_price 24 | # 17Buy5_quant 25 | # 18Buy5_price 26 | # 19Sell1_quant 27 | # 20Sell1_price 28 | # 21Sell2_quant 29 | # 22Sell2_price 30 | # 23Sell3_quant 31 | # 24Sell3_price 32 | # 25Sell4_quant 33 | # 26Sell4_price 34 | # 27Sell5_quant 35 | # 28Sell5_price 36 | ''' 37 | def SetData(self, ldata): 38 | ''' 39 | 一次性设置所有的信息 40 | :param id: 股票代码,{'gldq',000651} 41 | :param lista: 输入结构体 只能通过正则表达式获得 42 | :return:无 43 | ''' 44 | if ldata == 0: 45 | return 0 46 | self.TdyOpen = float(ldata[0]) 47 | self.YdyClose = float(ldata[1]) 48 | self.CurPrice = float(ldata[2]) 49 | self.HighPrice = float(ldata[3]) 50 | self.LowPrice = float(ldata[4]) 51 | self.CurBuyPrice = float(ldata[5]) 52 | self.CurSellPrice = float(ldata[6]) 53 | self.CurQuantity = int(ldata[7])/1000000 54 | self.CurMoney = float(ldata[8])/100000000 55 | self.Buy1_quant = int(int(ldata[9])/100) 56 | self.Buy1_price = float(ldata[10]) 57 | self.Buy2_quant = int(int(ldata[11])/100) 58 | self.Buy2_price = float(ldata[12]) 59 | self.Buy3_quant = int(int(ldata[13])/100) 60 | self.Buy3_price = float(ldata[14]) 61 | self.Buy4_quant = int(int(ldata[15])/100) 62 | self.Buy4_price = float(ldata[16]) 63 | self.Buy5_quant = int(int(ldata[17])/100) 64 | self.Buy5_price = float(ldata[18]) 65 | self.Sell1_quant = int(int(ldata[19])/100) 66 | self.Sell1_price = float(ldata[20]) 67 | self.Sell2_quant = int(int(ldata[21])/100) 68 | self.Sell2_price = float(ldata[22]) 69 | self.Sell3_quant = int(int(ldata[23])/100) 70 | self.Sell3_price = float(ldata[24]) 71 | self.Sell4_quant = int(int(ldata[25])/100) 72 | self.Sell4_price = float(ldata[26]) 73 | self.Sell5_quant = int(int(ldata[27])/100) 74 | self.Sell5_price = float(ldata[28]) 75 | self.date = ldata[29] 76 | self.time = ldata[30] 77 | self.id = ldata[31] 78 | 79 | def PrintAllData(self): 80 | ''' 81 | 一次性打印所有信息(仅用于调试) 82 | :return: 83 | ''' 84 | print('ID:\t'+self.id) 85 | print('今开:\t'+str(self.TdyOpen)) 86 | print('昨收:\t'+str(self.YdyClose)) 87 | print('现价:\t'+str(self.CurPrice)) 88 | print('最高价:\t'+str(self.HighPrice)) 89 | print('最低价:\t'+str(self.LowPrice)) 90 | print('竞买:\t'+str(self.CurBuyPrice)) 91 | print('竞卖:\t'+str(self.CurSellPrice)) 92 | print('成交量(万手):\t'+str(self.CurQuantity)) 93 | print('成交额(亿元):\t'+str(self.CurMoney)) 94 | print('买一/手:\t'+str(self.Buy1_quant)) 95 | print('买一/价:\t'+str(self.Buy1_price)) 96 | print('买二/手:\t'+str(self.Buy2_quant)) 97 | print('买二/价:\t'+str(self.Buy2_price)) 98 | print('买三/手:\t'+str(self.Buy3_quant)) 99 | print('买三/价:\t'+str(self.Buy3_price)) 100 | print('买四/手:\t'+str(self.Buy4_quant)) 101 | print('买四/价:\t'+str(self.Buy4_price)) 102 | print('买五/手:\t'+str(self.Buy5_quant)) 103 | print('买五/价:\t'+str(self.Buy5_price)) 104 | print('卖一/手:\t'+str(self.Sell1_quant)) 105 | print('卖一/价:\t'+str(self.Sell1_price)) 106 | print('卖二/手:\t'+str(self.Sell2_quant)) 107 | print('卖二/价:\t'+str(self.Sell2_price)) 108 | print('卖三/手:\t'+str(self.Sell3_quant)) 109 | print('卖三/价:\t'+str(self.Sell3_price)) 110 | print('卖四/手:\t'+str(self.Sell4_quant)) 111 | print('卖四/价:\t'+str(self.Sell4_price)) 112 | print('卖五/手:\t'+str(self.Sell5_quant)) 113 | print('卖五/价:\t'+str(self.Sell5_price)) 114 | print(self.date) 115 | print(self.time) 116 | 117 | def RiseRate(self): 118 | ''' 119 | 获取股票实时涨幅 120 | :return: 121 | ''' 122 | rate = (self.CurPrice - self.YdyClose)/self.YdyClose * 100 123 | rate = round(rate,2) 124 | return rate 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /Release/树莓派信息实时推送示例/使用说明.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/树莓派信息实时推送示例/使用说明.txt -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-slate -------------------------------------------------------------------------------- /anack/.gitignore: -------------------------------------------------------------------------------- 1 | __pychache__/ 2 | *.pyc 3 | Debug/ 4 | -------------------------------------------------------------------------------- /anack/App/Detail_Stock_Selector.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Mar 12 15:57:22 2018 4 | 5 | @author: 10191773 6 | """ 7 | 8 | import sys 9 | sys.path.append("..") 10 | import pandas as pd 11 | import pymysql 12 | import tushare as ts 13 | from SQL.sql import pymysql_connect 14 | from SQL.sql import df_to_mysql 15 | 16 | 17 | ''' 18 | code,代码 19 | name,名称 20 | industry,所属行业 21 | area,地区 22 | pe,市盈率 23 | outstanding,流通股本(亿) 24 | totals,总股本(亿) 25 | totalAssets,总资产(万) 26 | liquidAssets,流动资产 27 | fixedAssets,固定资产 28 | reserved,公积金 29 | reservedPerShare,每股公积金 30 | esp,每股收益 31 | bvps,每股净资 32 | pb,市净率 33 | timeToMarket,上市日期 34 | undp,未分利润 35 | perundp, 每股未分配 36 | rev,收入同比(%) 37 | profit,利润同比(%) 38 | gpr,毛利率(%) 39 | npr,净利润率(%) 40 | holders,股东人数 41 | ''' 42 | 43 | dbconn=pymysql_connect() 44 | 45 | def create_stock_select_table(): 46 | #db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8') 47 | db = pymysql_connect() 48 | cursor = db.cursor() 49 | cursor.execute('DROP TABLE IF EXISTS all_stock_select') 50 | stock_select_sql = """CREATE TABLE IF NOT EXISTS `all_stock_select` ( 51 | `code` varchar(255) DEFAULT NULL, 52 | `name` varchar(255) DEFAULT NULL, 53 | `industry` varchar(255) DEFAULT NULL, 54 | `area` varchar(255) DEFAULT NULL, 55 | `pe` float(25) DEFAULT NULL, #市盈率 56 | `outstanding` varchar(255) DEFAULT NULL, 57 | `totals` varchar(255) DEFAULT NULL, 58 | `totalAssets` float(25) DEFAULT NULL, #总资产(万) 59 | `liquidAssets` varchar(255) DEFAULT NULL, 60 | `fixedAssets` varchar(255) DEFAULT NULL, 61 | `reserved` varchar(255) DEFAULT NULL, 62 | `reservedPerShare` varchar(255) DEFAULT NULL, 63 | `esp` varchar(255) DEFAULT NULL, 64 | `bvps` varchar(255) DEFAULT NULL, 65 | `pb` float(25) DEFAULT NULL, #市净率 66 | `timeToMarket` varchar(255) DEFAULT NULL, 67 | `undp` varchar(255) DEFAULT NULL, 68 | `perundp` varchar(255) DEFAULT NULL, 69 | `rev` float(25) DEFAULT NULL, #收入同比 70 | `profit` float(25) DEFAULT NULL, #利润同比 71 | `gpr` float(25) DEFAULT NULL, #毛利率 72 | `npr` float(25) DEFAULT NULL, #净利润率 73 | `holders` varchar(255) DEFAULT NULL 74 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 75 | cursor.execute(stock_select_sql) 76 | db.commit() 77 | cursor.close() 78 | db.close() 79 | 80 | 81 | def create_stock_detail_select_table(): 82 | #db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8') 83 | db = pymysql_connect() 84 | cursor = db.cursor() 85 | cursor.execute('DROP TABLE IF EXISTS detail_stock_select') 86 | stock_select_sql = """CREATE TABLE IF NOT EXISTS `detail_stock_select` ( 87 | `code` varchar(255) DEFAULT NULL, 88 | `name` varchar(255) DEFAULT NULL, 89 | `industry` varchar(255) DEFAULT NULL, 90 | `area` varchar(255) DEFAULT NULL, 91 | `pe` float(25) DEFAULT NULL, #市盈率 92 | `outstanding` varchar(255) DEFAULT NULL, 93 | `totals` varchar(255) DEFAULT NULL, 94 | `totalAssets` float(25) DEFAULT NULL, #总资产(万) 95 | `liquidAssets` varchar(255) DEFAULT NULL, 96 | `fixedAssets` varchar(255) DEFAULT NULL, 97 | `reserved` varchar(255) DEFAULT NULL, 98 | `reservedPerShare` varchar(255) DEFAULT NULL, 99 | `esp` varchar(255) DEFAULT NULL, 100 | `bvps` varchar(255) DEFAULT NULL, 101 | `pb` float(25) DEFAULT NULL, #市净率 102 | `timeToMarket` varchar(255) DEFAULT NULL, 103 | `undp` varchar(255) DEFAULT NULL, 104 | `perundp` varchar(255) DEFAULT NULL, 105 | `rev` float(25) DEFAULT NULL, #收入同比 106 | `profit` float(25) DEFAULT NULL, #利润同比 107 | `gpr` float(25) DEFAULT NULL, #毛利率 108 | `npr` float(25) DEFAULT NULL, #净利润率 109 | `holders` varchar(255) DEFAULT NULL 110 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 111 | cursor.execute(stock_select_sql) 112 | db.commit() 113 | cursor.close() 114 | db.close() 115 | 116 | 117 | #初步筛选 118 | def stock_select_to_sql(PE,TotalAssists): 119 | create_stock_select_table() 120 | 121 | df=ts.get_stock_basics() 122 | #df.to_excel('c:/python/all_stock_list.xlsx') 123 | df= df[df['pe'] < PE] 124 | df= df[df['pe'] > 0] 125 | print(df) 126 | #df.to_excel('c:/python/all_stock_pe50.xlsx') 127 | df= df[df['totalAssets'] >= TotalAssists] 128 | df= df[df['rev'] >= 0] 129 | df= df[df['profit'] >= 0] 130 | #df.to_excel('c:/python/all_stock_assets100.xlsx') 131 | print(df) 132 | print('...........................before') 133 | #df=df.iloc[1:] 134 | #df.to_excel('c:/python/all_stock_assets100head.xlsx') 135 | #sql.df_to_mysql('all_stock_select',df) 136 | df_to_mysql('all_stock_select',df) 137 | print('...........................after') 138 | 139 | def GetIndustryData(id): 140 | sqlcmd="select * from industry_estimation_avg where industry ='%s'" %(id) 141 | try: 142 | a=pd.read_sql(sqlcmd,dbconn) 143 | return a 144 | except: 145 | print('invalid input') 146 | return pd.DataFrame() 147 | 148 | 149 | #仔细筛选并入库----执行前提是industry_estimation_avg表已存在 150 | #PE,TotalAssists参数暂时没有用到 151 | def stock_detail_select(PE,TotalAssists): 152 | #stock_select_to_sql(PE,TotalAssists) 153 | create_stock_detail_select_table() 154 | #sqlcmd="select * from all_stock_select ORDER BY pe" 155 | #try: 156 | #a=pd.read_sql(sqlcmd,dbconn) 157 | a=ts.get_stock_basics() 158 | target = pd.DataFrame() #创建一个空的dataframe 159 | i=0 160 | for i in range(0,len(a)): 161 | ''' 162 | #测试输出某一个行业的所有股票数据 163 | c=a.iloc[i,1] 164 | print('****',c) 165 | if (c=='元器件'): 166 | print('get---->',a.iloc[i],i) 167 | if(c=='农药化肥'): 168 | print('get...2>',a.iloc[i],i) 169 | 170 | ''' 171 | 172 | ''' 173 | #测试输出数据库(行业平均值数据库)中指定行业的平均统计数据 174 | c='农药化肥' 175 | result=GetIndustryData(c) 176 | if not result.empty: 177 | #print(result) 178 | print(result.iloc[0]['avg_pe'],result.iloc[0]['avg_pb'],result.iloc[0]['avg_rev'], \ 179 | result.iloc[0]['avg_profit'],result.iloc[0]['avg_gpr'],result.iloc[0]['avg_npr']) 180 | #print(result.iloc[0,5],result.iloc[0,6],result.iloc[0,7], \ 181 | # result.iloc[0,8],result.iloc[0,9],result.iloc[0,10]) 182 | else: 183 | print('找不到行业名称...',i) 184 | ''' 185 | 186 | #正式逻辑代码 187 | c=a.iloc[i,1] 188 | result=GetIndustryData(c) 189 | if not result.empty: 190 | cnt=0; 191 | #print('#########',result.iloc[0],'pe:',a.iloc[i].pe) 192 | 193 | #此处判断条件可调,eg:判断条件中5/6的数据优于平均水平则认为值得研究,此处判断条件可操作范围较大,可以再讨论 194 | if a.iloc[i].peresult.iloc[0]['avg_rev']: 199 | cnt+=1 200 | if a.iloc[i].gpr>result.iloc[0]['avg_gpr']: 201 | cnt+=1 202 | if a.iloc[i].profit>result.iloc[0]['avg_profit']: 203 | cnt+=1 204 | if a.iloc[i].npr>result.iloc[0]['avg_npr']: 205 | cnt+=1 206 | 207 | # 5/6的参数优于平均水平,则认为值得研究,保存入库 208 | if cnt>=6: 209 | print('find industry data,avg ok data num is:',cnt) 210 | target = target.append(a.iloc[i]) 211 | #else: 212 | # print('item ok num not enogh,which is:',cnt) 213 | 214 | #else: 215 | # print('找不到行业名称...',i) 216 | 217 | i=i+1 218 | 219 | print(target) 220 | df_to_mysql('detail_stock_select',target) #筛选结果入库 221 | return target 222 | 223 | #to test run this fun 224 | #stock_detail_select(300,50) 225 | -------------------------------------------------------------------------------- /anack/App/HK_insider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/HK_insider.py -------------------------------------------------------------------------------- /anack/App/IndustryEstimation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import pandas as pd 3 | #import pymysql 4 | #from sqlalchemy import create_engine 5 | import tushare as ts 6 | 7 | from SQL.sql import pymysql_connect 8 | from SQL.sql import df_to_mysql 9 | #from SQL.glo import get_value 10 | #import requests 11 | ## 加上字符集参数,防止中文乱码 12 | 13 | #确定输出表头信息: 14 | #基础:总市值、平均市值、市盈率、市净率、收入增长、净利增长、毛利率、净利率 15 | #扩展:资产负债率、市净率、市现率、市销率(需要根据财务报表获取) 16 | #自定义: 17 | 18 | 19 | dbconn=pymysql_connect() 20 | 21 | clm = ['行业','年度','企业数量','总市值','平均市值','平均市盈率','平均市净率', 22 | '收入增长率','利润增长率','毛利率','净利润率'] 23 | headers = ['name','industry','totalAssets','pe','pb','rev','profit','gpr','npr'] 24 | #sql语句示例 25 | #select 字段 from 表名 where 条件; 26 | #eg:select * from student where sex='男' and age>20; //查询性别是男,并且年龄大于20岁的人。 27 | 28 | #创建industry_estimation表头 29 | def CreateTable(): 30 | db = pymysql_connect() 31 | cursor = db.cursor() 32 | cursor.execute('DROP TABLE IF EXISTS industry_estimation') 33 | estimation = """CREATE TABLE IF NOT EXISTS `industry_estimation` ( 34 | `行业` varchar(25) DEFAULT NULL, 35 | `年度` varchar(25) DEFAULT NULL, 36 | `企业数量` int(25) DEFAULT NULL, 37 | `总市值` float(25) DEFAULT NULL, 38 | `平均市值` float(25) DEFAULT NULL, 39 | `平均市盈率` float(25) DEFAULT NULL, 40 | `平均市净率` float(25) DEFAULT NULL, 41 | `收入增长率` float(25) DEFAULT NULL, 42 | `利润增长率` float(25) DEFAULT NULL, 43 | `毛利率` float(25) DEFAULT NULL, 44 | `净利润率` float(25) DEFAULT NULL 45 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 46 | cursor.execute(estimation) 47 | db.commit() 48 | cursor.close() 49 | db.close() 50 | 51 | #函数名:GetIndustryName 52 | #更新时间:2018-3-17 53 | #描述:行业翻译器,输入ID或者股票名称,解析其在anack_classify数据库中所在的行业名 54 | #输入:股票名称或者代码, 比如 "福耀玻璃"或者"600660"都可以 55 | #输出:行业名称 比如:汽车制造 56 | #异常处理:如果没有对应信息,输出invalid id input错误提示信息 57 | def GetIndustryName(id): 58 | sqlcmd="select code,name,industry from anack_classify where code ='%s'" %(id) 59 | try: 60 | a=pd.read_sql(sqlcmd,dbconn) 61 | return a.iloc[0]['industry'] 62 | except: 63 | sqlcmd="select code,name,industry from anack_classify where name ='%s'" %(id) 64 | try: 65 | a=pd.read_sql(sqlcmd,dbconn) 66 | return a.iloc[0]['industry'] 67 | except: 68 | print('invalid id input') 69 | return 70 | 71 | #描述:输入行业名,计算出该行业的平均水平 72 | #输入:数据库用户信息, 行业名, 年度 73 | def Estimation(dbconn,industry_name, year): 74 | ''' 75 | 年度信息还没有用上 76 | ''' 77 | sqlcmd="select code,name from anack_classify where industry ='%s'" %(industry_name) 78 | 79 | #利用pandas 模块导入mysql数据 80 | a=pd.read_sql(sqlcmd,dbconn) 81 | industry_id_list=a[:] 82 | # print(a) 83 | 84 | if len(a) == 0: 85 | print('行业名称输入错误,请重试') 86 | return 0 87 | else: 88 | a = ts.get_stock_basics() #获取的数据 89 | tushare_data=a.loc[:,headers] 90 | target = pd.DataFrame(columns = ['行业','industry','totalAssets','pe','pb','rev','profit','gpr','npr']) #创建一个空的dataframe 91 | 92 | for names in industry_id_list.name: 93 | target = target.append(tushare_data.loc[tushare_data.name == names], ignore_index=True) 94 | #print(target) 95 | 96 | 总市值 = 0 97 | 企业数量 = 0 98 | for sums in target.totalAssets: 99 | 总市值 += sums 100 | 企业数量 += 1 101 | print('行业名:' + industry_name) 102 | print('行业数量(家) = ' + str(企业数量)) 103 | print('行业总市值(万) = ' + str(总市值)) 104 | 平均市值 = 总市值/企业数量 105 | print('平均市值(万) = ' + str(平均市值)) 106 | 107 | weight = [] 108 | for each in target.totalAssets: 109 | weight.append(each/总市值) 110 | target['weight'] = weight 111 | 112 | # 求平均市盈率 113 | 平均市盈率 = 0 114 | num = 企业数量 115 | i = 0 116 | for each in target.pe: 117 | if each == 0 or each > 100: #排除异常情况 118 | num -= 1 119 | else: 120 | 平均市盈率 += each * target.iloc[i]['weight'] 121 | i+=1 122 | print('平均市盈率(%) = ' + str(平均市盈率)) 123 | 124 | 平均市净率 = 0 125 | num = 企业数量 126 | i = 0 127 | for each in target.pb: 128 | if each < 0 or each > 10: #排除异常情况 129 | num -= 1 130 | else: 131 | 平均市净率 += each * target.iloc[i]['weight'] 132 | i+=1 133 | print('平均市净率(%) = ' + str(平均市净率)) 134 | 135 | 收入增长率 = 0 136 | num = 企业数量 137 | i = 0 138 | for each in target.rev: 139 | if each < -1000 or each > 1000: #排除异常情况 140 | num -= 1 141 | else: 142 | 收入增长率 += each * target.iloc[i]['weight'] 143 | i+=1 144 | print('收入增长率(%) = '+str(收入增长率)) 145 | 146 | 利润增长率 = 0 147 | num = 企业数量 148 | i = 0 149 | for each in target.profit: 150 | if each < -1000 or each > 1000: #排除异常情况 151 | num -= 1 152 | else: 153 | 利润增长率 += each * target.iloc[i]['weight'] 154 | i+=1 155 | print('利润增长率(%) = ' + str(利润增长率)) 156 | 157 | 158 | 毛利率 = 0 159 | num = 企业数量 160 | i = 0 161 | for each in target.gpr: 162 | if each < -1000 or each > 1000: #排除异常情况 163 | num -= 1 164 | else: 165 | 毛利率 += each * target.iloc[i]['weight'] 166 | i+=1 167 | print('毛利率(%) = ' + str(毛利率)) 168 | 169 | 净利润率 = 0 170 | num = 企业数量 171 | i = 0 172 | for each in target.npr: 173 | if each < -1000 or each > 1000: #排除异常情况 174 | num -= 1 175 | else: 176 | 净利润率 += each * target.iloc[i]['weight'] 177 | i+=1 178 | print('净利润率(%) = ' + str(净利润率)) 179 | data = {'行业':industry_name,'年度':str(year),'企业数量':企业数量, 180 | '总市值':round(总市值/10000,4),'平均市值':round(平均市值/10000,4),'平均市盈率':round(平均市盈率,2), 181 | '平均市净率':round(平均市净率,2),'收入增长率':round(收入增长率,2),'利润增长率':round(利润增长率,2), 182 | '毛利率':round(毛利率,2),'净利润率':round(净利润率,2)} 183 | result_df = pd.DataFrame(data,columns = clm, index=["0"]) 184 | # print(result_df) 185 | df_to_mysql('industry_estimation',result_df) 186 | return result_df 187 | 188 | #获取所有行业平均数据用于测试 189 | def Get_all_industry_average_data(): 190 | a = ts.get_stock_basics() 191 | for i in range(0,len(a)): 192 | print('industry:',a.iloc[i,1]) 193 | test=Estimation(dbconn,a.iloc[i,1],2017) 194 | # App示例代码,用完删掉 195 | 196 | 197 | #Estimation(dbconn,'家电行业') 198 | #print(GetIndustryName('福耀玻璃')) 199 | #CreateTable() 200 | #Estimation(dbconn,GetIndustryName('宁沪高速'),2017) 201 | #Estimation(dbconn,GetIndustryName('格力电器'),2017) 202 | #Estimation(dbconn,GetIndustryName('福耀玻璃'),2017) 203 | #Estimation(dbconn,GetIndustryName('隆基股份'),2017) 204 | 205 | #def get_interest_list(): 206 | # ''' 207 | # 解析"感兴趣的个股列表.txt",返回list类型的数据供其他模块使用 208 | # ''' 209 | # list_id = [] 210 | # with open('../SQL/感兴趣的个股列表.txt','r') as fh: 211 | # s = fh.readline() #获取更新时间 212 | # s = fh.readline() #获取目标长度 213 | # 214 | # lines = fh.readlines() #获取目标内容 215 | # for s in lines: 216 | # code = s[:6] 217 | # list_id.append(code) 218 | # list_id.sort() 219 | # return list_id 220 | # 221 | #for s in get_interest_list(): 222 | # Estimation(dbconn,GetIndustryName(s),2017) 223 | -------------------------------------------------------------------------------- /anack/App/IndustryEstimation_detail.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import sys 3 | sys.path.append("..") 4 | import pandas as pd 5 | #import pymysql 6 | #from sqlalchemy import create_engine 7 | import tushare as ts 8 | 9 | from SQL.sql import pymysql_connect 10 | from SQL.sql import df_to_mysql 11 | 12 | 13 | 14 | #from SQL.glo import get_value 15 | #import requests 16 | ## 加上字符集参数,防止中文乱码 17 | 18 | #确定输出表头信息: 19 | #基础:总市值、平均市值、市盈率、市净率、收入增长、净利增长、毛利率、净利率 20 | #扩展:资产负债率、市净率、市现率、市销率(需要根据财务报表获取) 21 | #自定义: 22 | #sql语句示例 23 | #select 字段 from 表名 where 条件; 24 | #eg:select * from student where sex='男' and age>20; //查询性别是男,并且年龄大于20岁的人。 25 | 26 | #创建industry_estimation表头 27 | ''' 28 | code,代码 29 | name,名称 30 | industry,所属行业 31 | area,地区 32 | pe,市盈率 33 | outstanding,流通股本(亿) 34 | totals,总股本(亿) 35 | totalAssets,总资产(万) 36 | liquidAssets,流动资产 37 | fixedAssets,固定资产 38 | reserved,公积金 39 | reservedPerShare,每股公积金 40 | esp,每股收益 41 | bvps,每股净资 42 | pb,市净率 43 | timeToMarket,上市日期 44 | undp,未分利润 45 | perundp, 每股未分配 46 | rev,收入同比(%) 47 | profit,利润同比(%) 48 | gpr,毛利率(%) 49 | npr,净利润率(%) 50 | holders,股东人数 51 | ''' 52 | 53 | #作用:行业平均值明细数据入库 54 | #输出:入库行业平均值明细,便于后续分析 55 | def CreateTable(): 56 | db = pymysql_connect() 57 | cursor = db.cursor() 58 | cursor.execute('DROP TABLE IF EXISTS industry_estimation_detail') 59 | estimation = """CREATE TABLE IF NOT EXISTS `industry_estimation_detail` ( 60 | `code` varchar(25) DEFAULT NULL, 61 | `name` varchar(25) DEFAULT NULL, 62 | `industry` varchar(25) DEFAULT NULL, 63 | `area` varchar(25) DEFAULT NULL, 64 | `pe` varchar(25) DEFAULT NULL, 65 | `outstanding` varchar(25) DEFAULT NULL, 66 | `totals` varchar(25) DEFAULT NULL, 67 | `totalAssets` varchar(25) DEFAULT NULL, 68 | `liquidAssets` varchar(25) DEFAULT NULL, 69 | `fixedAssets` varchar(25) DEFAULT NULL, 70 | `reserved` varchar(25) DEFAULT NULL, 71 | `reservedPerShare` varchar(25) DEFAULT NULL, 72 | `esp` varchar(25) DEFAULT NULL, 73 | `bvps` varchar(25) DEFAULT NULL, 74 | `pb` varchar(25) DEFAULT NULL, 75 | `timeToMarket` varchar(25) DEFAULT NULL, 76 | `undp` varchar(25) DEFAULT NULL, 77 | `perundp` varchar(25) DEFAULT NULL, 78 | `rev` varchar(25) DEFAULT NULL, 79 | `profit` varchar(25) DEFAULT NULL, 80 | `gpr` varchar(25) DEFAULT NULL, 81 | `npr` varchar(25) DEFAULT NULL, 82 | `holders` varchar(25) DEFAULT NULL 83 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 84 | cursor.execute(estimation) 85 | db.commit() 86 | cursor.close() 87 | db.close() 88 | 89 | def Estimation(): 90 | 91 | result_df = pd.DataFrame(ts.get_stock_basics().values,columns = ts.get_stock_basics().columns) 92 | df_to_mysql('industry_estimation_detail',result_df) 93 | 94 | return result_df 95 | 96 | 97 | 98 | 99 | #作用:查看行业平均值统计 100 | #输入:行业名称 101 | #输出:行业平均统计数 102 | def industry_stat(industry): 103 | df = pd.DataFrame(ts.get_stock_basics().values,columns = ts.get_stock_basics().columns) 104 | pe_stat = df[df.industry == industry].drop(['name','industry','area'], axis = 1).astype('float') 105 | # ============================================================================= 106 | # print(pe_stat.dtypes) 107 | # ============================================================================= 108 | result_df = pe_stat.describe() 109 | print(result_df) 110 | return result_df 111 | 112 | 113 | 114 | 115 | #作用:查看行业平均值统计 116 | #输出:所有行业平均统计数(筛选条件:PE <100,pb <10,1000>rev>-1000,1000>profit>-1000,,1000>gpr>-1000,,1000>npr>-1000) 117 | def CreateTable_industry_avg(): 118 | db = pymysql_connect() 119 | cursor = db.cursor() 120 | cursor.execute('DROP TABLE IF EXISTS industry_estimation_avg') 121 | estimation = """CREATE TABLE IF NOT EXISTS `industry_estimation_avg` as 122 | select industry,avg(pe) as avg_pe,avg(outstanding) as avg_outstanding, 123 | avg(totals) as avg_totals ,avg(totalAssets) as avg_totalAssets, 124 | avg(liquidAssets) as avg_liquidAssets,avg(fixedAssets) as avg_fixedAssets, 125 | avg(reserved) as avg_reserved,avg(reservedPerShare) as avg_reservedPerShare, 126 | avg(esp) as avg_esp,avg(bvps) as avg_bvps,avg(pb) as avg_pb, 127 | avg(timeToMarket) as avg_timeToMarket,avg(undp) as avg_undp, 128 | avg(perundp) as avg_perundp,avg(rev) as avg_rev ,avg(profit) as avg_profit 129 | ,avg(gpr) as avg_gpr ,avg(npr) as avg_npr ,avg(holders) as avg_holders 130 | from industry_estimation_detail where pe < 100 and pb < 10 and rev <1000 and rev > -1000 and profit < 1000 and profit > -1000 and gpr < 1000 and gpr > -1000 and npr < 1000 and npr > -1000 131 | group by industry 132 | """ 133 | cursor.execute(estimation) 134 | db.commit() 135 | cursor.close() 136 | db.close() 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | # ============================================================================= 147 | # #调试使用 148 | #CreateTable() 149 | #Estimation() 150 | # industry_stat('通信设备') 151 | # CreateTable_industry_avg() 152 | # ============================================================================= 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /anack/App/M1808/M1808.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Apr 3 21:02:35 2018 4 | 5 | @author: Administrator 6 | """ 7 | from datetime import datetime 8 | from datetime import timedelta 9 | import time 10 | import threading 11 | from threading import Thread 12 | L = threading.Lock() # 引入锁 13 | 14 | #import imp #防止重复调用导致全局变量设置无效 15 | #try: 16 | # imp.find_module('protocol') 17 | # found = True 18 | # print('arleady imported protocol') 19 | #except ImportError: 20 | # from protocol import * 21 | 22 | from protocol import ATDecoder 23 | 24 | #try: 25 | # imp.find_module('wechat') 26 | # found = True 27 | # print('arleady imported wechat') 28 | #except ImportError: 29 | # from wechat import * 30 | # 参数初始化设置 31 | now = datetime.now() 32 | open_call_time = datetime(now.year,now.month,now.day,9,15) 33 | close_call_time = datetime(now.year,now.month,now.day,9,25) 34 | morning_open_time = datetime(now.year,now.month,now.day,9,30) 35 | morning_close_time = datetime(now.year,now.month,now.day,11,30) 36 | afternoon_open_time = datetime(now.year,now.month,now.day,13,00) 37 | afternoon_close_time = datetime(now.year,now.month,now.day,23,00) 38 | 39 | # 周期性调用该函数以实现完整的预警监测功能 40 | def watch_dog_one_time(): 41 | now = datetime.now() 42 | if now == close_call_time: # 获取大盘和个股的开盘信息并输出 43 | print(str(now)+'快开盘了') 44 | elif (now >= morning_open_time and now < morning_close_time) or \ 45 | (now >= afternoon_open_time and now < afternoon_close_time): 46 | # 获取 47 | # rand = ran 48 | print('主线程休眠') 49 | time.sleep(get_sleep_time() * 60) #休眠 50 | clear_sleep_time() 51 | print('主线程休眠完毕') 52 | print(str(now)+'检查一次') 53 | test_str='AT:run' 54 | result = ATDecoder(test_str) 55 | # print(result) 56 | SendText2ChatRoom(result,'啊啊啊') #给指定群聊 57 | elif now >= afternoon_close_time: #3点以后停止运行 58 | print(str(now)+'停止运行') 59 | return 1 60 | else: #中场休息,直接sleep 61 | time.sleep(1) 62 | print(str(now)+'休息') 63 | return 0 64 | 65 | # 外界的API接口,调用run函数以实现完整的监测 66 | def M1808_run(): 67 | while 1: 68 | L.acquire() 69 | ret = watch_dog_one_time() 70 | L.release() 71 | # if ret == 1: 72 | # return 73 | # else: 74 | time.sleep(30) 75 | 76 | ############################################################################### 77 | #import imp #官方提供的加载方法,仍然没用 78 | #import sys 79 | #def __import__(name, globals=None, locals=None, fromlist=None): 80 | # try: 81 | # return sys.modules['wechat'] 82 | # except KeyError: 83 | # pass 84 | # 85 | # fp,pathname,description = imp.find_module('wechat') 86 | # try: 87 | # imp.load_module('wechat',fp,pathname,description) 88 | # finally: 89 | # if fp: 90 | # fp.close() 91 | # 92 | #itchat.auto_login(hotReload=True) 93 | 94 | 95 | ############################################################################### 96 | 97 | from wechat import * 98 | def test(): 99 | n = 1 100 | while n > 0: 101 | print(n) 102 | n = n + 1 103 | time.sleep(2) 104 | #t1 = Thread(target=M1808_run, args=()) 105 | itchat.auto_login(hotReload=True) 106 | #t1 = Thread(target=test, args=()) #仅供测试 107 | t1 = Thread(target=M1808_run, args=()) 108 | t2 = Thread(target=itchat.run,args=()) 109 | t1.start() 110 | t2.start() 111 | ############################################################################### 112 | #from wechat import * 113 | #from protocol import * 114 | #import itchat 115 | # 116 | #itchat.auto_login(hotReload=True) 117 | #itchat.run() 118 | 119 | #cmd='AT:set_target_id=600660,000651,601012,000002,000333' 120 | #ATDecoder(cmd) 121 | #cmd='AT:run' 122 | #s = ATDecoder(cmd) 123 | #cmd='AT:get_target_id?' 124 | #s = ATDecoder(cmd) 125 | ############################################################################### -------------------------------------------------------------------------------- /anack/App/M1808/protocol.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Apr 7 10:23:33 2018 4 | 5 | @author: Administrator 6 | """ 7 | import time 8 | import re 9 | 10 | from early_warning import * 11 | #import imp #防止重复调用导致全局变量设置无效 12 | #try: 13 | # imp.find_module('early_warning') 14 | # found = True 15 | # print('arleady imported early_warning') 16 | #except ImportError: 17 | # from early_warning import * 18 | 19 | def ATDecoder(strin): 20 | pattern_id = '(?<=AT:)[^=?]+' 21 | reobj = re.compile(pattern_id) 22 | cmd = reobj.findall(strin) 23 | cmd = "".join(cmd) #list to str 24 | print('\ncmd =',cmd) #该句话只做调试用 25 | 26 | # if cmd in cmd_list: #找到合适的命令 27 | if cmd == 'set_target_id': #设置感兴趣的股票列表 28 | # print('set target_id\n') 29 | pattern_id = '\d{6}' 30 | reobj = re.compile(pattern_id) 31 | stock_id = reobj.findall(strin) 32 | set_target_id(stock_id) 33 | return ('stock id set ok') 34 | 35 | elif cmd == 'get_target_id': #显示股票列表 36 | # print(show_target_id()) 37 | return show_target_id() 38 | 39 | elif cmd == 'clear_target': #清空股票列表 40 | clear_target_id() 41 | return 'target id cleared' 42 | 43 | elif cmd == 'set_para': #设置参数 44 | pattern_id = '[0-9.]+' 45 | reobj = re.compile(pattern_id) 46 | result = reobj.findall(strin) 47 | if len(result) == 1: 48 | set_param(float(result[0])) 49 | else: 50 | set_param(float(result[0]),float(result[1])) 51 | return 'set para th = %s, quantity = %s' % (result[0],result[1]) 52 | 53 | elif cmd == 'get_para': #查看设置的预警参数 54 | th, quantity = get_param() 55 | return 'rase th, quantity = %.2f, %.2f' % (th, quantity) 56 | 57 | elif cmd == 'check': #主动查询当前个股状态 58 | r = str(get_main_market()) 59 | r += '\n\n' 60 | r += str(get_stock_market()) 61 | return r 62 | 63 | elif cmd == 'sleep': #让主机休眠x分钟 64 | pattern_id = '(?<=sleep=)[0-9]+' 65 | reobj = re.compile(pattern_id) 66 | result = reobj.findall(strin) 67 | result = "".join(result) #list to str 68 | print('主机休眠',result,'分钟\n') 69 | set_sleep_time(int(result)) 70 | # time.sleep(int(result)*60) 71 | 72 | return '开始休眠' #仅供测试 73 | 74 | elif cmd == 'level': #设置预警模式 75 | pattern_id = '(?<=level=)[0-9]' 76 | reobj = re.compile(pattern_id) 77 | result = reobj.findall(strin) 78 | result = "".join(result) #list to str 79 | set_warning_level(int(result)) 80 | return ('设置预警模式') 81 | 82 | #测试指令,正式使用时请注释-------------------------------------------------- 83 | elif cmd == 'run': 84 | init() 85 | market_info = get_stock_market() 86 | # print(market_info) 87 | warning_info = check(market_info) 88 | print(warning_info) 89 | return warning_info 90 | 91 | elif cmd == 'test': 92 | return 'still connecting...' 93 | 94 | ############################################################################### 95 | #test_str='AT:set_target_id=600660,000651,601012,000002,000333' 96 | #print(ATDecoder(test_str)) 97 | # 98 | #test_str='AT:get_target_id?' 99 | #print(ATDecoder(test_str)) 100 | 101 | #test_str='AT:clear_target' 102 | #print(ATDecoder(test_str)) 103 | 104 | #test_str='AT:get_target_id?' 105 | #print(ATDecoder(test_str)) 106 | 107 | #test_str='AT:set_para=1.0,0.9' 108 | #print(ATDecoder(test_str)) 109 | # 110 | #test_str='AT:get_para?' 111 | #print(ATDecoder(test_str)) 112 | 113 | #test_str='AT:sleep=1' 114 | #print(ATDecoder(test_str)) 115 | 116 | #test_str='AT:check?' 117 | #print(ATDecoder(test_str)) 118 | 119 | #test_str='AT:level=3' 120 | #print(ATDecoder(test_str)) 121 | # 122 | #test_str='AT:run' 123 | #print(ATDecoder(test_str)) 124 | # 125 | #test_str='AT:test' 126 | #print(ATDecoder(test_str)) 127 | 128 | #print(get_main_market()) 129 | #print(get_stock_market()) 130 | ################################### 131 | #from test import * #此处就是设置的全局变量不起作用的原因 132 | #import test 133 | #test.set_a() 134 | #test.a = 1 135 | #print(test.a) -------------------------------------------------------------------------------- /anack/App/M1808/wechat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Apr 21 10:08:24 2018 4 | 5 | @author: Administrator 6 | """ 7 | import time 8 | import itchat 9 | from protocol import * 10 | #import imp #防止重复调用导致全局变量设置无效 11 | #try: 12 | # imp.find_module('protocol') 13 | # found = True 14 | # print('arleady imported protocol') 15 | #except ImportError: 16 | # from protocol import * 17 | ''' 18 | 1. 给单个人发消息 19 | 2. 给指定群发消息 20 | 3. 实现消息注册 21 | 4. 显示当前可用的群聊 22 | ''' 23 | def WechatLogin(): 24 | itchat.auto_login(hotReload=True) 25 | 26 | def SendText2Friend(msg,nick_name='filehelper'): #已经测试成功,可用 27 | ''' 28 | @ 发送文本消息给指定好友,如果不指定nick_name则发送给自己的文件助手 29 | ''' 30 | if nick_name == 'filehelper': 31 | itchat.send(msg,toUserName = 'filehelper') 32 | else: 33 | 34 | users = itchat.search_friends(name=nick_name) 35 | # print(users) 36 | who = users[0]['UserName'] 37 | # print(who) 38 | itchat.send(msg,toUserName = who) 39 | 40 | def SendText2ChatRoom(context, name): 41 | ''' 42 | @ 发送消息到特定群聊内 43 | @ 备注:1.确定该群聊存在(可调用PrintChatRoomList查看) 44 | @ 2.切记把群聊加入通讯录,否则只能显示活跃的前几个群聊 45 | ''' 46 | itchat.get_chatrooms(update=True) 47 | iRoom = itchat.search_chatrooms(name) 48 | for room in iRoom: 49 | if room['NickName'] == name: 50 | userName = room['UserName'] 51 | break 52 | try: 53 | itchat.send_msg(context, userName) 54 | except: 55 | print('warning: no this chatrooms') 56 | 57 | def PrintChatRoomList(): 58 | ''' 59 | @ 显示当前可见的群聊名 60 | ''' 61 | rooms = itchat.get_chatrooms(update=True) 62 | for s in rooms: 63 | print(s['NickName']) 64 | 65 | @itchat.msg_register('Text',isGroupChat = True)#群回复 66 | def text_reply(msg): 67 | # msg.user.send('%s: %s' % (msg.type, msg.text)) #终于发出消息了 68 | who = msg['ActualNickName'] #获取发送人的名称 69 | content = msg['Text'] 70 | print(who,'call me') 71 | if content == 'logout' or content == 'quit' or content == 'exit': 72 | itchat.logout() 73 | return 74 | ### 发送内容有三种方式:给自己、给别人、给群聊(示例程序),测试成功 75 | # if who == '尹超': 76 | # SendText2Friend('yc send') #给自己(文件助手) 77 | # SendTxet2ChatRoom('yc send','啊啊啊') #给指定群聊 78 | # else: 79 | # SendText2Friend('ali send','阿狸') #给指定的人 80 | # SendTxet2ChatRoom('ali send','啊啊啊') #给指定群聊 81 | 82 | #------------------------------------------------- 83 | authority = ['尹超','徐抒田','李航','李繁','鹏','顾秋杨'] 84 | # if who in authority: #此处有bug,自己先发送的话who为空,必须别人先发信息 85 | if 1: 86 | # print(content) 87 | result = ATDecoder(content) 88 | # print(result) 89 | if result != None: 90 | SendText2ChatRoom(result,'啊啊啊') #给指定群聊 91 | # else: 92 | # print('no reply') 93 | #------------------------------------------------------------------- 94 | time.sleep(1) 95 | ######################################################################## 96 | #WechatLogin() 97 | #SendText2Friend('test') 98 | #SendText2Friend('test','阿狸') 99 | #SendTxet2ChatRoom('test','啊啊啊') 100 | #itchat.run() 101 | -------------------------------------------------------------------------------- /anack/App/M1808/命令示例.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/M1808/命令示例.txt -------------------------------------------------------------------------------- /anack/App/StockAnalyser.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- -------------------------------------------------------------------------------- /anack/App/YT_produce_sell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/YT_produce_sell.py -------------------------------------------------------------------------------- /anack/App/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/__init__.py -------------------------------------------------------------------------------- /anack/App/实时推送/App.py: -------------------------------------------------------------------------------- 1 | from PageDecoder import * 2 | from StockClass import * 3 | from PushMessage import * 4 | import time 5 | 6 | 7 | my_interest = ['000651','600660','600887','600377','601012'] 8 | for interest in my_interest: 9 | data = GetTotalData(interest) 10 | istock = stock() 11 | istock.SetData(data) 12 | str1 = interest + '.CurPrice = ' + str(istock.CurPrice) 13 | print(str1) 14 | push(str1) 15 | time.sleep(1) 16 | -------------------------------------------------------------------------------- /anack/App/实时推送/PageDecoder.py: -------------------------------------------------------------------------------- 1 | import re 2 | import urllib.request 3 | 4 | base = 'http://hq.sinajs.cn/list=' 5 | bios = 'sh600660' 6 | 7 | def GetTotalData(inputstr): 8 | ''' 9 | input a number serial, ex:600660 10 | ''' 11 | pattern_id = '\d{6}' 12 | reobj = re.compile(pattern_id) 13 | id = reobj.findall(inputstr) 14 | id = "".join(id) 15 | page = '' 16 | if id != '': 17 | flag = int(id) 18 | if flag >= 600000: 19 | bios = 'sh' + id 20 | else: 21 | bios = 'sz' + id 22 | inputstr = base + bios 23 | # print(inputstr) 24 | page = urllib.request.urlopen(inputstr).read() 25 | # print(page) 26 | if len(page) < 30: 27 | print('error, invalid id') 28 | return 0 29 | s = page[30:] 30 | s = str(s) 31 | 32 | pattern_data = '\d+\.*\d*(?=,)' 33 | reobj = re.compile(pattern_data) 34 | data = reobj.findall(s) 35 | data.pop() 36 | data.pop() 37 | 38 | pattern_data = '\d\d\d\d-\d\d-\d\d' 39 | reobj = re.compile(pattern_data) 40 | date = reobj.findall(s) 41 | data.append(date) 42 | 43 | pattern_data = '\d\d:\d\d:\d\d' 44 | reobj = re.compile(pattern_data) 45 | time = reobj.findall(s) 46 | data.append(time) 47 | data.append(id) 48 | return data 49 | else: 50 | print('invalid id') 51 | return 0 52 | -------------------------------------------------------------------------------- /anack/App/实时推送/PushMessage.py: -------------------------------------------------------------------------------- 1 | # Author:YinChao 2 | # Date:2017-12-21 3 | # ver:V0.1 4 | 5 | import pycurl,json 6 | 7 | def push(str): 8 | appID = "59edd424a4c48aee80d6dd4a" 9 | appSecret = "05cc2a44d97e361f14d28c0ab8ff4acd" 10 | pushEvent = "DoorAlert" 11 | pushMessage = str 12 | 13 | c = pycurl.Curl() 14 | c.setopt(c.URL, 'https://api.instapush.im/v1/post') 15 | c.setopt(c.HTTPHEADER,['x-instapush-appid:' + appID,'x-instapush-appsecret:' + appSecret, 'Content-Type:application/json']) 16 | 17 | json_fields = {} 18 | json_fields['event'] = pushEvent 19 | json_fields['trackers'] = {} 20 | json_fields['trackers']['message'] = pushMessage 21 | 22 | postfields = json.dumps(json_fields) 23 | 24 | c.setopt(c.POSTFIELDS, postfields) 25 | 26 | c.perform() 27 | 28 | c.close() 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /anack/App/实时推送/StockClass.py: -------------------------------------------------------------------------------- 1 | #from DistinguishData import DataType 2 | 3 | class stock: 4 | ''' 5 | # 参数定义 6 | # ID 代码 600660 由单独变量给出 7 | # 0 TdyOpen 今开盘价 由List变量给出 8 | # 1 YdyClose 昨天收盘 9 | # 2 CurPrice 现价 10 | # 3 HighPrice 最高价 11 | # 4 LowPrice 最低价 12 | # 5 CurBuyPrice 竞买价 13 | # 6 CurSellPrice 竞卖价 14 | # 7 CurQuantity 成交量 15 | # 8 CurMoney 成交额 16 | # 9 Buy1_quant 买一数量 17 | # 10Buy1_price 买一报价 18 | # 11Buy2_quant 买一数量 19 | # 12Buy2_price 以此类推。。。 20 | # 13Buy3_quant 21 | # 14Buy3_price 22 | # 15Buy4_quant 23 | # 16Buy4_price 24 | # 17Buy5_quant 25 | # 18Buy5_price 26 | # 19Sell1_quant 27 | # 20Sell1_price 28 | # 21Sell2_quant 29 | # 22Sell2_price 30 | # 23Sell3_quant 31 | # 24Sell3_price 32 | # 25Sell4_quant 33 | # 26Sell4_price 34 | # 27Sell5_quant 35 | # 28Sell5_price 36 | ''' 37 | def SetData(self, ldata): 38 | ''' 39 | 一次性设置所有的信息 40 | :param id: 股票代码,{'gldq',000651} 41 | :param lista: 输入结构体 只能通过正则表达式获得 42 | :return:无 43 | ''' 44 | if ldata == 0: 45 | return 0 46 | self.TdyOpen = float(ldata[0]) 47 | self.YdyClose = float(ldata[1]) 48 | self.CurPrice = float(ldata[2]) 49 | self.HighPrice = float(ldata[3]) 50 | self.LowPrice = float(ldata[4]) 51 | self.CurBuyPrice = float(ldata[5]) 52 | self.CurSellPrice = float(ldata[6]) 53 | self.CurQuantity = int(ldata[7])/1000000 54 | self.CurMoney = float(ldata[8])/100000000 55 | self.Buy1_quant = int(int(ldata[9])/100) 56 | self.Buy1_price = float(ldata[10]) 57 | self.Buy2_quant = int(int(ldata[11])/100) 58 | self.Buy2_price = float(ldata[12]) 59 | self.Buy3_quant = int(int(ldata[13])/100) 60 | self.Buy3_price = float(ldata[14]) 61 | self.Buy4_quant = int(int(ldata[15])/100) 62 | self.Buy4_price = float(ldata[16]) 63 | self.Buy5_quant = int(int(ldata[17])/100) 64 | self.Buy5_price = float(ldata[18]) 65 | self.Sell1_quant = int(int(ldata[19])/100) 66 | self.Sell1_price = float(ldata[20]) 67 | self.Sell2_quant = int(int(ldata[21])/100) 68 | self.Sell2_price = float(ldata[22]) 69 | self.Sell3_quant = int(int(ldata[23])/100) 70 | self.Sell3_price = float(ldata[24]) 71 | self.Sell4_quant = int(int(ldata[25])/100) 72 | self.Sell4_price = float(ldata[26]) 73 | self.Sell5_quant = int(int(ldata[27])/100) 74 | self.Sell5_price = float(ldata[28]) 75 | self.date = ldata[29] 76 | self.time = ldata[30] 77 | self.id = ldata[31] 78 | 79 | def PrintAllData(self): 80 | ''' 81 | 一次性打印所有信息(仅用于调试) 82 | :return: 83 | ''' 84 | print('ID:\t'+self.id) 85 | print('今开:\t'+str(self.TdyOpen)) 86 | print('昨收:\t'+str(self.YdyClose)) 87 | print('现价:\t'+str(self.CurPrice)) 88 | print('最高价:\t'+str(self.HighPrice)) 89 | print('最低价:\t'+str(self.LowPrice)) 90 | print('竞买:\t'+str(self.CurBuyPrice)) 91 | print('竞卖:\t'+str(self.CurSellPrice)) 92 | print('成交量(万手):\t'+str(self.CurQuantity)) 93 | print('成交额(亿元):\t'+str(self.CurMoney)) 94 | print('买一/手:\t'+str(self.Buy1_quant)) 95 | print('买一/价:\t'+str(self.Buy1_price)) 96 | print('买二/手:\t'+str(self.Buy2_quant)) 97 | print('买二/价:\t'+str(self.Buy2_price)) 98 | print('买三/手:\t'+str(self.Buy3_quant)) 99 | print('买三/价:\t'+str(self.Buy3_price)) 100 | print('买四/手:\t'+str(self.Buy4_quant)) 101 | print('买四/价:\t'+str(self.Buy4_price)) 102 | print('买五/手:\t'+str(self.Buy5_quant)) 103 | print('买五/价:\t'+str(self.Buy5_price)) 104 | print('卖一/手:\t'+str(self.Sell1_quant)) 105 | print('卖一/价:\t'+str(self.Sell1_price)) 106 | print('卖二/手:\t'+str(self.Sell2_quant)) 107 | print('卖二/价:\t'+str(self.Sell2_price)) 108 | print('卖三/手:\t'+str(self.Sell3_quant)) 109 | print('卖三/价:\t'+str(self.Sell3_price)) 110 | print('卖四/手:\t'+str(self.Sell4_quant)) 111 | print('卖四/价:\t'+str(self.Sell4_price)) 112 | print('卖五/手:\t'+str(self.Sell5_quant)) 113 | print('卖五/价:\t'+str(self.Sell5_price)) 114 | print(self.date) 115 | print(self.time) 116 | 117 | def RiseRate(self): 118 | ''' 119 | 获取股票实时涨幅 120 | :return: 121 | ''' 122 | rate = (self.CurPrice - self.YdyClose)/self.YdyClose * 100 123 | rate = round(rate,2) 124 | return rate 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /anack/App/实时推送/使用说明.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/实时推送/使用说明.txt -------------------------------------------------------------------------------- /anack/SQL/StockSelector.py: -------------------------------------------------------------------------------- 1 | # This tools used to update file "感兴趣的个股列表.txt" 2 | # syntax: 3 | # first line: update time: 4 | # seconde line: total: 5 | # other line: /t 6 | # 7 | # eg: 8 | # update time:2018/3/4 9 | # total:33 10 | # 000651 格力电器 11 | # ... other 32 items 12 | 13 | # -*- coding: utf-8 -*- 14 | import pandas as pd 15 | 16 | #------------------------------------------------------------------------------ 17 | # change here 18 | # 用于筛选个股的各项参数 19 | # 筛股逻辑: 20 | # 1. 初筛:调用ts.get_stock_basics()即可 21 | # 动态市盈率60以下,日成交量大于1亿,市值大于100亿,收入同比、净利润率为正 22 | # 2. 仔细筛查:同行比对排名前5,个股历年同比连续增长 23 | # 同行业对比(从大到小排列):pe倒数前五,毛利率顺数前5。pb排名靠后,利润同比、 24 | # 收入同比排名靠前 25 | # 自己同比:现金流为正、利润同比有增长 26 | # 27 | parameter = [] 28 | pe = 50 29 | pb = 30 | and so on ... 31 | #------------------------------------------------------------------------------ 32 | 33 | def update_interest_list(): 34 | ''' 35 | 根据指定的逻辑遍历A股,找出符合条件的个股,更新“感兴趣的个股列表.txt”文件, 36 | 同时以列表形式返回 37 | ''' 38 | 39 | return interest_list 40 | 41 | def get_interest_list(): 42 | ''' 43 | 解析"感兴趣的个股列表.txt",返回list类型的数据供其他模块使用 44 | ''' 45 | list_id = [] 46 | with open('yourpath/感兴趣的个股列表.txt','r') as fh: 47 | s = fh.readline() #获取更新时间 48 | s = fh.readline() #获取目标长度 49 | 50 | lines = fh.readlines() #获取目标内容 51 | for s in lines: 52 | code = s[:6] 53 | list_id.append(code) 54 | list_id.sort() 55 | return list_id -------------------------------------------------------------------------------- /anack/SQL/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/SQL/__init__.py -------------------------------------------------------------------------------- /anack/SQL/classify_to_sql.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 4 14:21:14 2018 4 | 5 | @author: Administrator 6 | """ 7 | import pandas as pd 8 | import tushare as ts 9 | from SQL.sql import pymysql_connect 10 | from SQL.sql import df_to_mysql 11 | 12 | def create_classify_table(): 13 | db = pymysql_connect() 14 | cursor = db.cursor() 15 | cursor.execute('DROP TABLE IF EXISTS anack_classify') 16 | classify = """CREATE TABLE IF NOT EXISTS `anack_classify` ( 17 | `code` varchar(255) DEFAULT NULL, 18 | `name` varchar(255) DEFAULT NULL, 19 | `industry` varchar(255) DEFAULT NULL, 20 | `area` varchar(255) DEFAULT NULL, 21 | `sz50` varchar(255) DEFAULT NULL, 22 | `hs300_weight` FLOAT(10) DEFAULT NULL, 23 | `zz500_weight` FLOAT(10) DEFAULT NULL 24 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 25 | cursor.execute(classify) 26 | db.commit() 27 | cursor.close() 28 | db.close() 29 | 30 | def classify_info_to_sql(): 31 | create_classify_table() 32 | 33 | a = ts.get_industry_classified() 34 | a.columns = ['code', 'name', 'industry'] 35 | b = ts.get_area_classified() 36 | c = ts.get_sz50s() 37 | c = c.iloc[:,1::] 38 | c['sz50'] = '1' 39 | d = ts.get_hs300s() 40 | d = d.iloc[:,1::] 41 | d.columns = ['code','name','hs300_weight'] 42 | e = ts.get_zz500s() 43 | e = e.iloc[:,1::] 44 | e.columns = ['code','name','zz500_weight'] 45 | result = pd.merge(a, b, how='left', on=None, left_on=None, right_on=None, 46 | left_index=False, right_index=False, sort=True, 47 | suffixes=('_x', '_y'), copy=True, indicator=False) 48 | result = pd.merge(result, c, how='left', on=None, left_on=None, right_on=None, 49 | left_index=False, right_index=False, sort=True, 50 | suffixes=('_x', '_y'), copy=True, indicator=False) 51 | result = pd.merge(result, d, how='left', on=None, left_on=None, right_on=None, 52 | left_index=False, right_index=False, sort=True, 53 | suffixes=('_x', '_y'), copy=True, indicator=False) 54 | result = pd.merge(result, e, how='left', on=None, left_on=None, right_on=None, 55 | left_index=False, right_index=False, sort=True, 56 | suffixes=('_x', '_y'), copy=True, indicator=False) 57 | df_to_mysql('anack_classify',result) 58 | 59 | # ------------------------------------------------------------- 60 | classify_info_to_sql()#每次调用都会更新 61 | -------------------------------------------------------------------------------- /anack/SQL/glo.py: -------------------------------------------------------------------------------- 1 | # 本模块实现全局变量 2 | # -*- coding:utf-8 -*- 3 | 4 | def _init(): 5 | global _global_dict 6 | _global_dict = {} 7 | 8 | 9 | def set_value(key,value): 10 | _global_dict[key] = value 11 | 12 | def get_value(key, default_value = None): 13 | try: 14 | return _global_dict[key] 15 | except KeyError: 16 | return default_value -------------------------------------------------------------------------------- /anack/SQL/k_data_to_sql.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 4 14:21:14 2018 4 | 5 | @author: Administrator 6 | """ 7 | 8 | import tushare as ts 9 | from SQL.sql import pymysql_connect 10 | from SQL.sql import df_to_mysql 11 | 12 | def create_k_table(): 13 | db = pymysql_connect() 14 | cursor = db.cursor() 15 | 16 | sql1 = """CREATE TABLE IF NOT EXISTS `anack_d_k_data` ( 17 | `date` varchar(255) DEFAULT NULL, 18 | `open` varchar(255) DEFAULT NULL, 19 | `close` varchar(255) DEFAULT NULL, 20 | `high` varchar(255) DEFAULT NULL, 21 | `low` varchar(255) DEFAULT NULL, 22 | `volume` varchar(255) DEFAULT NULL, 23 | `code` varchar(255) DEFAULT NULL 24 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 25 | 26 | sql2 = """CREATE TABLE IF NOT EXISTS `anack_m_k_data` ( 27 | `date` varchar(255) DEFAULT NULL, 28 | `open` varchar(255) DEFAULT NULL, 29 | `close` varchar(255) DEFAULT NULL, 30 | `high` varchar(255) DEFAULT NULL, 31 | `low` varchar(255) DEFAULT NULL, 32 | `volume` varchar(255) DEFAULT NULL, 33 | `code` varchar(255) DEFAULT NULL 34 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 35 | 36 | cursor.execute(sql1) 37 | cursor.execute(sql2) 38 | db.commit() 39 | cursor.close() 40 | db.close() 41 | 42 | def k_data(index,mode='D'): 43 | 44 | if mode == 'D': 45 | df_to_mysql('anack_d_k_data',ts.get_k_data(index)) 46 | elif mode == 'M': 47 | df_to_mysql('anack_m_k_data',ts.get_k_data(index,ktype='M')) 48 | 49 | #------------------------------------------------------------------------------ 50 | #create_k_table() 51 | #k_data('600660') 52 | #k_data('600660','M') 53 | -------------------------------------------------------------------------------- /anack/SQL/macro_to_sql.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 4 14:21:14 2018 4 | 5 | @author: Administrator 6 | """ 7 | 8 | import pandas as pd 9 | import pymysql 10 | 11 | import tushare as ts 12 | from SQL.sql import pymysql_connect 13 | from SQL.sql import df_to_mysql 14 | # 15 | def create_classify_table(): 16 | db = pymysql_connect() 17 | cursor = db.cursor() 18 | cursor.execute('DROP TABLE IF EXISTS anack_macro_data') 19 | macro = """CREATE TABLE IF NOT EXISTS `anack_macro_data` ( 20 | `month` varchar(255) DEFAULT NULL, 21 | `cpi` varchar(16) DEFAULT NULL, 22 | `ppi` varchar(16) DEFAULT NULL, 23 | `m2` varchar(16) DEFAULT NULL, 24 | `m1` varchar(16) DEFAULT NULL, 25 | `m0` varchar(16) DEFAULT NULL 26 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 27 | cursor.execute(macro) 28 | db.commit() 29 | cursor.close() 30 | db.close() 31 | 32 | def macro_info_to_sql(): 33 | create_classify_table() 34 | 35 | a = ts.get_cpi() 36 | b = ts.get_ppi() 37 | c = ts.get_money_supply() 38 | c = c.iloc[:,[0,1,3,5]] 39 | b = b.iloc[:,[0,2]] 40 | result = pd.merge(a, b, how='left', on=None, left_on=None, right_on=None, 41 | left_index=False, right_index=False, sort=False, 42 | suffixes=('_x', '_y'), copy=True, indicator=False) 43 | result = pd.merge(result, c, how='left', on=None, left_on=None, right_on=None, 44 | left_index=False, right_index=False, sort=False, 45 | suffixes=('_x', '_y'), copy=True, indicator=False) 46 | df_to_mysql('anack_macro_data',result) 47 | 48 | 49 | # ------------------------------------------------------------- 50 | macro_info_to_sql() #每次调用都会更新 -------------------------------------------------------------------------------- /anack/SQL/sql.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import pymysql 3 | from sqlalchemy import create_engine 4 | #import glo 5 | # 6 | #glo._init() 7 | 8 | hosts = '' 9 | users = '' 10 | passwds = '' 11 | databases = '' 12 | 13 | #glo.set_value('host',host) 14 | #glo.set_value('user',user) 15 | #glo.set_value('passwd',passwd) 16 | #glo.set_value('database',database) 17 | #glo.set_value('charset','utf8') 18 | def pymysql_connect(): 19 | return pymysql.connect( 20 | host=hosts, 21 | database=databases, 22 | user=users, 23 | password=passwds, 24 | port=3306, 25 | charset='utf8' 26 | ) 27 | def connect_sql(): 28 | return create_engine("mysql+pymysql://"+ users + ":"+ passwds + "@" + hosts + ":3306/" + databases + "?charset=utf8") 29 | 30 | def df_to_mysql(table,df): 31 | connect = connect_sql() 32 | df.to_sql(name=table,con=connect,if_exists='append',index=False,index_label=False) 33 | -------------------------------------------------------------------------------- /anack/SQL/update.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 本模块用于更新数据库 3 | ''' 4 | from SQL.classify_to_sql import classify_info_to_sql 5 | from SQL.macro_to_sql import macro_info_to_sql 6 | from SQL.k_data_to_sql import create_k_table 7 | from SQL.k_data_to_sql import k_data 8 | 9 | 10 | def get_interest_list(filename): 11 | ''' 12 | 解析"感兴趣的个股列表.txt",返回list类型的数据供其他模块使用 13 | ''' 14 | list_id = [] 15 | with open(filename,'r') as fh: 16 | s = fh.readline() #获取更新时间 17 | s = fh.readline() #获取目标长度 18 | 19 | lines = fh.readlines() #获取目标内容 20 | for s in lines: 21 | code = s[:6] 22 | list_id.append(code) 23 | list_id.sort() 24 | return list_id 25 | 26 | def sql_update(): 27 | classify_info_to_sql() #update classify data 28 | 29 | macro_info_to_sql() #update macro data 30 | 31 | lls = [] #update k_data, both day and month 32 | lls = get_interest_list() 33 | create_k_table() 34 | for l in lls: 35 | k_data(l) 36 | k_data(l,'M') 37 | 38 | # update finnance data here... 39 | # 在代码执行路径自动生成输入路径 40 | 41 | column_interest = ['货币资金','应收账款','存货','流动资产合计','固定资产净额','无形资产','资产总计','短期借款','预收款项','流动负债合计','长期借款','一年内到期的非流动负债','负债合计','盈余公积','所有者权益(或股东权益)合计'] 42 | for i in lls: 43 | try: 44 | cbfx = f.crawling_finance(path,i,column_interest) 45 | cbfx.crawling_update() 46 | f.Data_extract_balance() 47 | except: 48 | print(i) 49 | #------------------------------------------------------------------------------ 50 | #sql_update() #一条更新语句完成所有事情 51 | -------------------------------------------------------------------------------- /anack/SQL/感兴趣的个股列表.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/SQL/感兴趣的个股列表.txt -------------------------------------------------------------------------------- /anack/Tushare/basic.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import tushare as ts 3 | def info(): 4 | print('本模块用于获取实时交易信息') 5 | print('k_day 获取个股的K线图') 6 | print('k_today 获取当日所有股票的K线图') 7 | print('index 获取今日指数信息') 8 | print('ddjy 获取指定日期下的大单交易信息') 9 | 10 | def k_day(index,mode='D'): 11 | 12 | if mode == 'D': 13 | return ts.get_k_data(index) 14 | elif mode == 'M': 15 | return ts.get_k_data(index,ktype='M') 16 | 17 | def k_today(): 18 | 19 | return ts.get_today_all() 20 | 21 | def index(): 22 | 23 | return ts.get_index() 24 | 25 | def ddjy(id,time,hand=400): 26 | 27 | return ts.get_sina_dd(id, date=time, vol=hand) 28 | 29 | -------------------------------------------------------------------------------- /anack/Tushare/classify.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import tushare as ts 3 | 4 | def info(): 5 | ''' 6 | 本模块用于获取分类信息 7 | ''' 8 | print('本模块用于获取分类信息') 9 | print('industry 行业分类数据') 10 | print('concept 概念分类数据') 11 | print('area 地域分类数据') 12 | print('zxb 中小板列表') 13 | print('cyb 创业板列表') 14 | print('st ST列表') 15 | print('hs300 沪深300列表') 16 | print('sz50 上证50列表') 17 | print('zz500 中证500列表') 18 | 19 | def industry(): 20 | return ts.get_industry_classified() 21 | 22 | def concept(): 23 | return ts.get_concept_classified() 24 | 25 | def area(): 26 | return ts.get_area_classified() 27 | 28 | def zxb(): 29 | return ts.get_sme_classified() 30 | 31 | def cyb(): 32 | return ts.get_gme_classified() 33 | 34 | def st(): 35 | return ts.get_st_classified() 36 | 37 | def hs300(): 38 | return ts.get_hs300s() 39 | 40 | def sz50(): 41 | return ts.get_sz50s() 42 | 43 | def zz500(): 44 | return ts.get_zz500s() -------------------------------------------------------------------------------- /anack/Tushare/finance.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import tushare as ts 3 | def info(): 4 | ''' 5 | ''' 6 | print('本模块用于获取基本面信息') 7 | print('basic_info 获取股票列表') 8 | print('finance_report 所有季度报表') 9 | print('area 地域分类数据') 10 | print('zxb 中小板列表') 11 | print('cyb 创业板列表') 12 | print('st ST列表') 13 | print('hs300 沪深300列表') 14 | print('sz50 上证50列表') 15 | print('zz500 中证500列表') 16 | 17 | def basic_info(): 18 | ''' 19 | 获取股票列表 20 | ''' 21 | return ts.get_stock_basics() 22 | 23 | def finance_report(year, month): 24 | ''' 25 | 季度报主表 26 | ''' 27 | return ts.get_report_data(year,month) 28 | 29 | def profit(year, month): 30 | return ts.get_profit_data(year, month) 31 | 32 | def cashflow(year, month): 33 | return ts.get_cashflow_data(year, month) 34 | -------------------------------------------------------------------------------- /anack/Tushare/information.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import tushare as ts 4 | 5 | 6 | def info(): 7 | ''' 8 | 9 | ''' 10 | print('本模块用于输出各种消息') 11 | print('fund_holdings 基金持股') 12 | print('forecast_info 业绩预告') 13 | print('xsg_info 限售股信息') 14 | 15 | def fund_holdings(year,month): 16 | ''' 17 | 基金持股消息披露 18 | year:年 19 | month:季度 只可取【1,2,3,4】 20 | ''' 21 | try: 22 | return ts.fund_holdings(year,month) 23 | except: 24 | print('error, month=[1,4], please check your parameter') 25 | 26 | def forecast_info(year,month): 27 | ''' 28 | 业绩预告 29 | ''' 30 | try: 31 | return ts.forecast_data(year,month) 32 | except: 33 | print('error, month=[1,4], please check your parameter') 34 | 35 | def xsg_info(): 36 | ''' 37 | 限售股信息 38 | ''' 39 | return ts.xsg_data() -------------------------------------------------------------------------------- /anack/Tushare/macro.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import tushare as ts 3 | 4 | def info(): 5 | ''' 6 | 7 | ''' 8 | print('本模块用于获取宏观经济数据') 9 | print('deposit 存款利率一览表') 10 | print('loan 贷款利率一览表') 11 | print('rrr 存款准备金率') 12 | print('money_supply 货币供应量') 13 | print('gdp 国内生产总值') 14 | print('cpi 居民消费价格指数') 15 | print('ppi 工业品出厂价格指数') 16 | print('gdp_contribute 三大产业对GDP的贡献率') 17 | 18 | def deposit(): 19 | return ts.get_deposit_rate() 20 | 21 | def loan(): 22 | return ts.get_loan_rate() 23 | 24 | def rrr(): 25 | return ts.get_rrr() 26 | 27 | def money_supply(): 28 | return ts.get_money_supply() 29 | 30 | def gdp(): 31 | return ts.get_gdp_year() 32 | 33 | def cpi(): 34 | return ts.get_cpi() 35 | 36 | def ppi(): 37 | return ts.get_ppi() 38 | 39 | def gdp_contribute(): 40 | return ts.get_gdp_contrib() 41 | -------------------------------------------------------------------------------- /anack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/__init__.py -------------------------------------------------------------------------------- /anack/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import App.IndustryEstimation 4 | from SQL.sql import pymysql_connect 5 | from SQL.update import get_interest_list 6 | import App.IndustryEstimation_detail 7 | import App.Detail_Stock_Selector 8 | 9 | industry_check = [] 10 | dbconn=pymysql_connect() 11 | filename = './SQL/感兴趣的个股列表.txt' 12 | stock_code_num = ['600000' ,'600004' ,'600005' ,'600006' ,'600007' ,'600008' ,'600009' ,'600010' ,'600011' ,'600012' ,'600015' ,'600016' ,'600017' ,'600018' ,'600019' ,'600020' ,'600021' ,'600022' ,'600026' ,'600027' ,'600028' ,'600029' ,'600030' ,'600031' ,'600033' ,'600035' ,'600036' ,'600037' ,'600038' ,'600039' ,'600048' ,'600050' ,'600051' ,'600052' ,'600053' ,'600054' ,'600055' ,'600056' ,'600058' ,'600059' ,'600060' ,'600061' ,'600062' ,'600063' ,'600064' ,'600066' ,'600067' ,'600068' ,'600069' ,'600070' ,'600071' ,'600072' ,'600073' ,'600074' ,'600075' ,'600076' ,'600077' ,'600078' ,'600079' ,'600080' ,'600081' ,'600082' ,'600083' ,'600084' ,'600085' ,'600086' ,'600087' ,'600088' ,'600089' ,'600090' ,'600091' ,'600093' ,'600095' ,'600096' ,'600097' ,'600098' ,'600099' ,'600100' ,'600101' ,'600102' ,'600103' ,'600104' ,'600105' ,'600106' ,'600107' ,'600108' ,'600109' ,'600110' ,'600111' ,'600112' ,'600113' ,'600114' ,'600115' ,'600116' ,'600117' ,'600118' ,'600119' ,'600120' ,'600121' ,'600122' ,'600123' ,'600125' ,'600126' ,'600127' ,'600128' ,'600129' ,'600130' ,'600131' ,'600132' ,'600133' ,'600135' ,'600136' ,'600137' ,'600138' ,'600139' ,'600141' ,'600143' ,'600145' ,'600146' ,'600148' ,'600149' ,'600150' ,'600151' ,'600152' ,'600153' ,'600155' ,'600156' ,'600157' ,'600158' ,'600159' ,'600160' ,'600161' ,'600162' ,'600163' ,'600165' ,'600166' ,'600167' ,'600168' ,'600169' ,'600170' ,'600171' ,'600172' ,'600173' ,'600175' ,'600176' ,'600177' ,'600178' ,'600179' ,'600180' ,'600182' ,'600183' ,'600184' ,'600185' ,'600186' ,'600187' ,'600188' ,'600189' ,'600190' ,'600191' ,'600192' ,'600193' ,'600195' ,'600196' ,'600197' ,'600198' ,'600199' ,'600200' ,'600201' ,'600202' ,'600203' ,'600206' ,'600207' ,'600208' ,'600209' ,'600210' ,'600211' ,'600212' ,'600213' ,'600215' ,'600216' ,'600217' ,'600218' ,'600219' ,'600220' ,'600221' ,'600222' ,'600223' ,'600225' ,'600226' ,'600227' ,'600228' ,'600229' ,'600230' ,'600231' ,'600232' ,'600233' ,'600234' ,'600235' ,'600236' ,'600237' ,'600238' ,'600239' ,'600240' ,'600241' ,'600242' ,'600243' ,'600246' ,'600247' ,'600248' ,'600249' ,'600250' ,'600251' ,'600252' ,'600253' ,'600255' ,'600256' ,'600257' ,'600258' ,'600259' ,'600260' ,'600261' ,'600262' ,'600263' ,'600265' ,'600266' ,'600267' ,'600268' ,'600269' ,'600270' ,'600271' ,'600272' ,'600273' ,'600275' ,'600276' ,'600277' ,'600278' ,'600279' ,'600280' ,'600281' ,'600282' ,'600283' ,'600284' ,'600285' ,'600287' ,'600288' ,'600289' ,'600290' ,'600291' ,'600292' ,'600293' ,'600295' ,'600297' ,'600298' ,'600299' ,'600300' ,'600301' ,'600302' ,'600303' ,'600305' ,'600306' ,'600307' ,'600308' ,'600309' ,'600310' ,'600311' ,'600312' ,'600313' ,'600315' ,'600316' ,'600317' ,'600318' ,'600319' ,'600320' ,'600321' ,'600322' ,'600323' ,'600325' ,'600326' ,'600327' ,'600328' ,'600329' ,'600330' ,'600331' ,'600332' ,'600333' ,'600335' ,'600336' ,'600337' ,'600338' ,'600339' ,'600340' ,'600343' ,'600345' ,'600346' ,'600348' ,'600350' ,'600351' ,'600352' ,'600353' ,'600354' ,'600355' ,'600356' ,'600358' ,'600359' ,'600360' ,'600361' ,'600362' ,'600363' ,'600365' ,'600366' ,'600367' ,'600368' ,'600369' ,'600370' ,'600371' ,'600373' ,'600375' ,'600376' ,'600377' ,'600378' ,'600379' ,'600380' ,'600381' ,'600382' ,'600383' ,'600385' ,'600386' ,'600387' ,'600388' ,'600389' ,'600390' ,'600391' ,'600392' ,'600393' ,'600395' ,'600396' ,'600397' ,'600398' ,'600399' ,'600400' ,'600403' ,'600405' ,'600406' ,'600408' ,'600409' ,'600410' ,'600415' ,'600416' ,'600418' ,'600419' ,'600420' ,'600421' ,'600422' ,'600423' ,'600425' ,'600426' ,'600428' ,'600429' ,'600432' ,'600433' ,'600435' ,'600436' ,'600438' ,'600439' ,'600444' ,'600446' ,'600448' ,'600449' ,'600452' ,'600455' ,'600456' ,'600458' ,'600459' ,'600460' ,'600461' ,'600462' ,'600463' ,'600466' ,'600467' ,'600468' ,'600469' ,'600470' ,'600475' ,'600476' ,'600477' ,'600478' ,'600479' ,'600480' ,'600481' ,'600482' ,'600483' ,'600485' ,'600486' ,'600487' ,'600488' ,'600489' ,'600490' ,'600491' ,'600493' ,'600495' ,'600496' ,'600497' ,'600498' ,'600499' ,'600500' ,'600501' ,'600502' ,'600503' ,'600505' ,'600506' ,'600507' ,'600508' ,'600509' ,'600510' ,'600511' ,'600512' ,'600513' ,'600515' ,'600516' ,'600517' ,'600518' ,'600519' ,'600520' ,'600521' ,'600522' ,'600523' ,'600525' ,'600526' ,'600527' ,'600528' ,'600529' ,'600530' ,'600531' ,'600532' ,'600533' ,'600535' ,'600536' ,'600537' ,'600538' ,'600539' ,'600540' ,'600543' ,'600545' ,'600546' ,'600547' ,'600548' ,'600549' ,'600550' ,'600551' ,'600552' ,'600553' ,'600555' ,'600557' ,'600558' ,'600559' ,'600560' ,'600561' ,'600562' ,'600563' ,'600565' ,'600566' ,'600567' ,'600568' ,'600569' ,'600570' ,'600571' ,'600572' ,'600573' ,'600575' ,'600576' ,'600577' ,'600578' ,'600579' ,'600580' ,'600581' ,'600582' ,'600583' ,'600584' ,'600585' ,'600586' ,'600587' ,'600588' ,'600589' ,'600590' ,'600592' ,'600593' ,'600594' ,'600595' ,'600596' ,'600597' ,'600598' ,'600599' ,'600600' ,'600601' ,'600602' ,'600603' ,'600604' ,'600605' ,'600606' ,'600608' ,'600609' ,'600610' ,'600611' ,'600612' ,'600613' ,'600614' ,'600615' ,'600616' ,'600617' ,'600618' ,'600619' ,'600620' ,'600621' ,'600622' ,'600623' ,'600624' ,'600626' ,'600628' ,'600629' ,'600630' ,'600631' ,'600633' ,'600634' ,'600635' ,'600636' ,'600637' ,'600638' ,'600639' ,'600640' ,'600641' ,'600642' ,'600643' ,'600644' ,'600645' ,'600647' ,'600648' ,'600649' ,'600650' ,'600651' ,'600652' ,'600653' ,'600654' ,'600655' ,'600656' ,'600657' ,'600658' ,'600660' ,'600661' ,'600662' ,'600663' ,'600664' ,'600665' ,'600666' ,'600667' ,'600668' ,'600671' ,'600673' ,'600674' ,'600675' ,'600676' ,'600677' ,'600678' ,'600679' ,'600680' ,'600682' ,'600683' ,'600684' ,'600685' ,'600686' ,'600687' ,'600688' ,'600689' ,'600690' ,'600691' ,'600692' ,'600693' ,'600694' ,'600695' ,'600696' ,'600697' ,'600698' ,'600699' ,'600701' ,'600702' ,'600703' ,'600704' ,'600706' ,'600707' ,'600708' ,'600710' ,'600711' ,'600712' ,'600713' ,'600714' ,'600715' ,'600716' ,'600717' ,'600718' ,'600719' ,'600720' ,'600721' ,'600722' ,'600723' ,'600724' ,'600725' ,'600726' ,'600727' ,'600728' ,'600729' ,'600730' ,'600731' ,'600732' ,'600733' ,'600734' ,'600735' ,'600736' ,'600737' ,'600738' ,'600739' ,'600740' ,'600741' ,'600742' ,'600743' ,'600744' ,'600745' ,'600746' ,'600747' ,'600748' ,'600749' ,'600750' ,'600751' ,'600753' ,'600754' ,'600755' ,'600756' ,'600757' ,'600758' ,'600759' ,'600760' ,'600761' ,'600763' ,'600764' ,'600765' ,'600766' ,'600767' ,'600768' ,'600769' ,'600770' ,'600771' ,'600773' ,'600774' ,'600775' ,'600776' ,'600777' ,'600778' ,'600779' ,'600780' ,'600781' ,'600782' ,'600783' ,'600784' ,'600785' ,'600787' ,'600789' ,'600790' ,'600791' ,'600792' ,'600793' ,'600794' ,'600795' ,'600796' ,'600797' ,'600798' ,'600800' ,'600801' ,'600802' ,'600803' ,'600804' ,'600805' ,'600806' ,'600807' ,'600808' ,'600809' ,'600810' ,'600811' ,'600812' ,'600814' ,'600815' ,'600816' ,'600817' ,'600818' ,'600819' ,'600820' ,'600821' ,'600822' ,'600823' ,'600824' ,'600825' ,'600826' ,'600827' ,'600828' ,'600829' ,'600830' ,'600831' ,'600832' ,'600833' ,'600834' ,'600835' ,'600836' ,'600837' ,'600838' ,'600839' ,'600841' ,'600843' ,'600844' ,'600845' ,'600846' ,'600847' ,'600848' ,'600850' ,'600851' ,'600853' ,'600854' ,'600855' ,'600856' ,'600857' ,'600858' ,'600859' ,'600860' ,'600861' ,'600862' ,'600863' ,'600864' ,'600865' ,'600866' ,'600867' ,'600868' ,'600869' ,'600871' ,'600872' ,'600873' ,'600874' ,'600875' ,'600876' ,'600877' ,'600879' ,'600880' ,'600881' ,'600882' ,'600883' ,'600884' ,'600885' ,'600886' ,'600887' ,'600888' ,'600889' ,'600890' ,'600891' ,'600892' ,'600893' ,'600894' ,'600895' ,'600896' ,'600897' ,'600898' ,'600900' ,'600960' ,'600961' ,'600962' ,'600963' ,'600965' ,'600966' ,'600967' ,'600969' ,'600970' ,'600971' ,'600973' ,'600975' ,'600976' ,'600978' ,'600979' ,'600980' ,'600981' ,'600982' ,'600983' ,'600984' ,'600985' ,'600986' ,'600987' ,'600988' ,'600990' ,'600991' ,'600992' ,'600993' ,'600995' ,'600997' ,'600999' ,'601001' ,'601002' ,'601003' ,'601005' ,'601006' ,'601007' ,'601008' ,'601009' ,'601088' ,'601099' ,'601106' ,'601107' ,'601111' ,'601117' ,'601139' ,'601166' ,'601168' ,'601169' ,'601179' ,'601186' ,'601268' ,'601299' ,'601318' ,'601328' ,'601333' ,'601390' ,'601398' ,'601588' ,'601600' ,'601601' ,'601607' ,'601618' ,'601628' ,'601666' ,'601668' ,'601678' ,'601688' ,'601699' ,'601727' ,'601766' ,'601788' ,'601801' ,'601808' ,'601857' ,'601866' ,'601872' ,'601877' ,'601888' ,'601898' ,'601899' ,'601918' ,'601919' ,'601939' ,'601958' ,'601988' ,'601989' ,'601991' ,'601998' ,'601999' ,'000958' ,'601188' ,'601518'] 13 | 14 | 15 | 16 | 17 | # ============================================================================= 18 | # #行业平均数据 19 | # App.IndustryEstimation.CreateTable() #此处开启则清空此前所有内容 20 | # for stock_id in get_interest_list(filename): 21 | # name = App.IndustryEstimation.GetIndustryName(stock_id) #根据id获取行业名 22 | # 23 | # if name in industry_check: #去重检查 24 | # continue 25 | # else: 26 | # industry_check.append(name) 27 | # 28 | # App.IndustryEstimation.Estimation(dbconn,name,2017) #入库 29 | # ============================================================================= 30 | 31 | 32 | 33 | 34 | 35 | #行业平均数据明细 36 | # ============================================================================= 37 | App.IndustryEstimation_detail.CreateTable() #此处开启则清空此前所有内容 38 | App.IndustryEstimation_detail.Estimation() #入库 39 | # ============================================================================= 40 | #App.IndustryEstimation_detail.industry_stat('通信设备') 41 | 42 | 43 | #行业平均数据统计值入库 44 | App.IndustryEstimation_detail.CreateTable_industry_avg() 45 | 46 | 47 | # ============================================================================= 48 | # #筛选基本面数据优于行业平均值的股票并入库,20170330 49 | # App.Detail_Stock_Selector.stock_detail_select(300,50) 50 | # 51 | # ============================================================================= 52 | -------------------------------------------------------------------------------- /anack_study_case/balance_columns.txt: -------------------------------------------------------------------------------- 1 | 货币资金 h1 2 | 交易性金融资产 h2 3 | 衍生金融资产 h3 4 | 应收票据 h4 5 | 应收账款 h5 6 | 预付款项 h6 7 | 应收利息 h7 8 | 应收股利 h8 9 | 其他应收款 h9 10 | 买入返售金融资产 h10 11 | 存货 h11 12 | 划分为持有待售的资产 h12 13 | 一年内到期的非流动资产 h13 14 | 待摊费用 h14 15 | 待处理流动资产损益 h15 16 | 其他流动资产 h16 17 | 流动资产合计 h17 18 | 发放贷款及垫款 h18 19 | 可供出售金融资产 h19 20 | 持有至到期投资 h20 21 | 长期应收款 h21 22 | 长期股权投资 h22 23 | 投资性房地产 h23 24 | 固定资产净额 h24 25 | 在建工程 h25 26 | 工程物资 h26 27 | 固定资产清理 h27 28 | 生产性生物资产 h28 29 | 公益性生物资产 h29 30 | 油气资产 h30 31 | 无形资产 h31 32 | 开发支出 h32 33 | 商誉 h33 34 | 长期待摊费用 h34 35 | 递延所得税资产 h35 36 | 其他非流动资产 h36 37 | 非流动资产合计 h37 38 | 资产总计 h38 39 | 短期借款 h39 40 | 交易性金融负债 h40 41 | 应付票据 h41 42 | 应付账款 h42 43 | 预收款项 h43 44 | 应付手续费及佣金 h44 45 | 应付职工薪酬 h45 46 | 应交税费 h46 47 | 应付利息 h47 48 | 应付股利 h48 49 | 其他应付款 h49 50 | 预提费用 h50 51 | 一年内的递延收益 h51 52 | 应付短期债券 h52 53 | 一年内到期的非流动负债 h53 54 | 其他流动负债 h54 55 | 流动负债合计 h55 56 | 长期借款 h56 57 | 应付债券 h57 58 | 长期应付款 h58 59 | 长期应付职工薪酬 h59 60 | 专项应付款 h60 61 | 预计非流动负债 h61 62 | 递延所得税负债 h62 63 | 长期递延收益 h63 64 | 其他非流动负债 h64 65 | 非流动负债合计 h65 66 | 负债合计 h66 67 | 实收资本(或股本) h67 68 | 资本公积 h68 69 | 减:库存股 h69 70 | 其他综合收益 h70 71 | 专项储备 h71 72 | 盈余公积 h72 73 | 一般风险准备 h73 74 | 未分配利润 h74 75 | 归属于母公司股东权益合计 h75 76 | 少数股东权益 h76 77 | 所有者权益(或股东权益)合计 h77 78 | 负债和所有者权益(或股东权益)总计 h78 79 | stock_code h79 80 | Date h80 -------------------------------------------------------------------------------- /anack_study_case/cash_flow_columns.txt: -------------------------------------------------------------------------------- 1 | 销售商品、提供劳务收到的现金 h1 2 | 收到的税费返还 h2 3 | 收到的其他与经营活动有关的现金 h3 4 | 经营活动现金流入小计 h4 5 | 购买商品、接受劳务支付的现金 h5 6 | 支付给职工以及为职工支付的现金 h6 7 | 支付的各项税费 h7 8 | 支付的其他与经营活动有关的现金 h8 9 | 经营活动现金流出小计 h9 10 | 经营活动产生的现金流量净额 h10 11 | 收回投资所收到的现金 h11 12 | 取得投资收益所收到的现金 h12 13 | 处置固定资产、无形资产和其他长期资产所收回的现金净额 h13 14 | 处置子公司及其他营业单位收到的现金净额 h14 15 | 收到的其他与投资活动有关的现金 h15 16 | 投资活动现金流入小计 h16 17 | 购建固定资产、无形资产和其他长期资产所支付的现金 h17 18 | 投资所支付的现金 h18 19 | 取得子公司及其他营业单位支付的现金净额 h19 20 | 支付的其他与投资活动有关的现金 h20 21 | 投资活动现金流出小计 h21 22 | 投资活动产生的现金流量净额 h22 23 | 吸收投资收到的现金 h23 24 | 其中:子公司吸收少数股东投资收到的现金 h24 25 | 取得借款收到的现金 h25 26 | 发行债券收到的现金 h26 27 | 收到其他与筹资活动有关的现金 h27 28 | 筹资活动现金流入小计 h28 29 | 偿还债务支付的现金 h29 30 | 分配股利、利润或偿付利息所支付的现金 h30 31 | 其中:子公司支付给少数股东的股利、利润 h31 32 | 支付其他与筹资活动有关的现金 h32 33 | 筹资活动现金流出小计 h33 34 | 筹资活动产生的现金流量净额 h34 35 | 四、汇率变动对现金及现金等价物的影响 h35 36 | 五、现金及现金等价物净增加额 h36 37 | 加:期初现金及现金等价物余额 h37 38 | 六、期末现金及现金等价物余额 h38 39 | 净利润 h39 40 | 少数股东权益 h40 41 | 未确认的投资损失 h41 42 | 资产减值准备 h42 43 | 固定资产折旧、油气资产折耗、生产性物资折旧 h43 44 | 无形资产摊销 h44 45 | 长期待摊费用摊销 h45 46 | 待摊费用的减少 h46 47 | 预提费用的增加 h47 48 | 处置固定资产、无形资产和其他长期资产的损失 h48 49 | 固定资产报废损失 h49 50 | 公允价值变动损失 h50 51 | 递延收益增加(减:减少) h51 52 | 预计负债 h52 53 | 财务费用 h53 54 | 投资损失 h54 55 | 递延所得税资产减少 h55 56 | 递延所得税负债增加 h56 57 | 存货的减少 h57 58 | 经营性应收项目的减少 h58 59 | 经营性应付项目的增加 h59 60 | 已完工尚未结算款的减少(减:增加) h60 61 | 已结算尚未完工款的增加(减:减少) h61 62 | 其他 h62 63 | 经营活动产生现金流量净额 h63 64 | 债务转为资本 h64 65 | 一年内到期的可转换公司债券 h65 66 | 融资租入固定资产 h66 67 | 现金的期末余额 h67 68 | 现金的期初余额 h68 69 | 现金等价物的期末余额 h69 70 | 现金等价物的期初余额 h70 71 | 现金及现金等价物的净增加额 h71 72 | stock_code h72 73 | Date h73 -------------------------------------------------------------------------------- /anack_study_case/cash_flow_statements_balance_profit_columns.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/cash_flow_statements_balance_profit_columns.xlsx -------------------------------------------------------------------------------- /anack_study_case/dividend_rate_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Mar 26 21:29:43 2018 4 | 5 | @author: lh 6 | @version: 1.0 7 | @time:20180403 8 | @detail:实现模块化功能,计算股息率、分红率 9 | """ 10 | import tushare as ts 11 | import pandas as pd 12 | import numpy as np 13 | import requests 14 | from requests.exceptions import RequestException 15 | from bs4 import BeautifulSoup 16 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'} 17 | 18 | class dividend_rate: 19 | 20 | def __init__(self,id): 21 | self.id =id 22 | 23 | def get_one_page(url): 24 | try: 25 | response = requests.get(url,headers = headers) 26 | response.encoding = 'GB2312' 27 | if response.status_code == 200: 28 | return response.text 29 | return None 30 | except RequestException: 31 | return None 32 | 33 | def parse(html): 34 | raw_data = [] 35 | try: 36 | year_raw = [] 37 | year = [] 38 | bonus_share = [] 39 | bonus_convert = [] 40 | profit_send = [] 41 | ex_rights = [] 42 | register_day = [] 43 | 44 | soup = BeautifulSoup(html,'html5lib') 45 | l = soup.select('table#sharebonus_1') 46 | ls = l[0].tbody 47 | lls = ls.select('td') 48 | for l in lls: 49 | if (l.get_text().strip()) != '预案' and \ 50 | (l.get_text().strip()) != '实施' and \ 51 | (l.get_text().strip()) != '不分配' and \ 52 | (l.get_text().strip()) != '查看': 53 | raw_data.append(l.get_text().strip()) 54 | 55 | year_raw = raw_data[::7] 56 | # print(raw_data) #出错的话请检查此处的输出 57 | # print(year_raw) #出错的话请检查此处的输出 58 | for item in year_raw: 59 | a = pd.to_datetime(item).year - 1 60 | year.append(a) 61 | bonus_share = raw_data[1::7] 62 | bonus_convert = raw_data[2::7] 63 | profit_send = raw_data[3::7] 64 | ex_rights = raw_data[4::7] 65 | register_day = raw_data[5::7] 66 | # print(register_day) 67 | data = {'年度':year, 68 | '送股':bonus_share, 69 | '转股':bonus_convert, 70 | '派息':profit_send, 71 | '除权日':ex_rights, 72 | '登记日':register_day 73 | } 74 | frame = pd.DataFrame(data) 75 | return frame 76 | except: 77 | print('cannot parse this page') 78 | 79 | 80 | # 提供给用户的函数,输入ID,解析出历史分红列表 81 | 82 | def get_bonus_table(self): 83 | url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/' 84 | url += str(self.id) 85 | url += '.phtml' 86 | html = dividend_rate.get_one_page(url) 87 | return dividend_rate.parse(html) 88 | 89 | 90 | 91 | @property 92 | def divident_rate(self): 93 | stock = ts.get_hist_data(self.id) 94 | df = dividend_rate.get_bonus_table(self) 95 | df_dividend = df[['年度','派息','登记日']] 96 | # print(df_dividend) 97 | stock_close_price = stock["close"] 98 | sIndex = stock_close_price.index.tolist() 99 | # 获取登记日 100 | regis = df_dividend['登记日'].tolist() 101 | # print(sIndex) 102 | # print(regis) 103 | close_price = [] 104 | diVi = [] 105 | aPe = [] 106 | bonus = [] 107 | div_year = [] 108 | for i in regis: 109 | if i != "--" and i in sIndex: 110 | cprice = stock_close_price.loc[i] 111 | close_price.append(cprice) 112 | aDiv = df_dividend[df_dividend['登记日'] == i]['派息'].tolist()[0] 113 | year = df_dividend[df_dividend['登记日'] == i]['年度'].values #获得年份 114 | div_year.append(year[0]) 115 | 116 | #此处的bonus暂时通过ts获得,以后可以直接搜索本地数据库 117 | profit_table = ts.get_report_data(year[0],4) #获取年度eps 118 | print('') 119 | target_eps = profit_table[profit_table['code'] == self.id]['eps'].values 120 | eps = target_eps[0].item() #numpy.float64 -> float 121 | per_bonus = round(float(aDiv) / 10 / eps * 100, 2) 122 | # per_bonus = 1 #测试时开启 123 | 124 | bonus.append(per_bonus) 125 | 126 | diVi.append(float(aDiv)/10) #10股派息转1股派息 127 | div_ratio = [] 128 | for i,j in zip(diVi,close_price): 129 | adivr = float(i) / float(j) * 100 130 | div_ratio.append(round(adivr,2)) 131 | aPe.append(round(100/adivr,2)) 132 | 133 | reDf = pd.DataFrame({"cash_div":diVi, #每股派现方案 134 | "div_ratio(%)":div_ratio, #股息率 135 | 'ape':aPe, #真实市盈率 136 | 'bonus_ratio(%)':bonus #分红率 137 | },index = div_year) 138 | 139 | # 统计输出 140 | print(self.id + '分红情况统计如下:') 141 | avg_bonus = round(sum(bonus)/len(bonus),2) 142 | print('1.平均分红率:',avg_bonus,'%') 143 | avg_div = round(sum(div_ratio)/len(div_ratio),2) 144 | print('2.平均股息率:',avg_div,'%') 145 | print('3.详细列表如下所示') 146 | return reDf 147 | 148 | ############################################## 149 | a = dividend_rate('601012') 150 | s = a.divident_rate 151 | print(s) 152 | -------------------------------------------------------------------------------- /anack_study_case/lirunbiao.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/lirunbiao.csv -------------------------------------------------------------------------------- /anack_study_case/liuliang.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/liuliang.csv -------------------------------------------------------------------------------- /anack_study_case/profit_columns.txt: -------------------------------------------------------------------------------- 1 | 一、营业总收入 h1 2 | 营业收入 h2 3 | 二、营业总成本 h3 4 | 营业成本 h4 5 | 营业税金及附加 h5 6 | 销售费用 h6 7 | 管理费用 h7 8 | 财务费用 h8 9 | 资产减值损失 h9 10 | 公允价值变动收益 h10 11 | 投资收益 h11 12 | 其中:对联营企业和合营企业的投资收益 h12 13 | 汇兑收益 h13 14 | 三、营业利润 h14 15 | 加:营业外收入 h15 16 | 减:营业外支出 h16 17 | 其中:非流动资产处置损失 h17 18 | 四、利润总额 h18 19 | 减:所得税费用 h19 20 | 五、净利润 h20 21 | 归属于母公司所有者的净利润 h21 22 | 少数股东损益 h22 23 | 基本每股收益(元/股) h23 24 | 稀释每股收益(元/股) h24 25 | 七、其他综合收益 h25 26 | 八、综合收益总额 h26 27 | 归属于母公司所有者的综合收益总额 h27 28 | 归属于少数股东的综合收益总额 h28 29 | stock_code h29 30 | Date h30 -------------------------------------------------------------------------------- /anack_study_case/sql常用模板.txt: -------------------------------------------------------------------------------- 1 | Mysql 基础常用操作 2 | 3 | 链接Mysql: 4 | 格式: mysql -h主机地址 -u用户名 -p用户密码 5 | 6 | 连接到本机上的MYSQL 7 | 8 | 键入命令mysql-uroot -p,回车后提示你输密码,如果刚安装好MYSQL,超级用户root是没有密码的,故直接回车即可进入到MYSQL中了,MYSQL的提示符是:mysql> 9 | 10 | 连接到远程主机上的MYSQL 11 | 12 | 假设远程主机的IP为:10.0.0.1,用户名为root,密码为12356。则键入以下命令: 13 | mysql -h10.0.0.1 -uroot -p12356 14 | 15 | 退出MYSQL命令 16 | exit (回车) 17 | 18 | 19 | 20 | 库操作: 21 | 22 | --- 创建数据库: 23 | CREATE DATABASE mysqltest; 24 | 25 | --- 查看数据库 26 | show databases; 27 | 28 | --- 的数据表: 29 | use mysql; 30 | show tables; 31 | 32 | --- 创建带带字符集的数据库(存储数据中含有中文时常用) 33 | CREATE DATABASE my_chinese CHARACTER SET=utf8; 34 | 35 | --- 创建带校验的数据库 36 | CREATE DATABASE mydb CHARACTER SET=utf8 COLLATE utf8_general_ci; 37 | 38 | --- 显示数据库创建语句(详情): 39 | SHOW CREATE DATABASE mydb3; 40 | 41 | --- 数据库删除语句: 42 | DROP DATABASE mydb3; 43 | 44 | 45 | --- 修改数据库的库字符编码 46 | ALTER DATABASE mydb2 character set gb2312; 47 | 48 | 49 | --- 表操作 50 | 51 | 52 | 53 | ---创建数据库表 54 | create table employee 55 | ( 56 | id int, 57 | name varchar(40), 58 | sex char(4), 59 | birthday date, 60 | Entry_date date, 61 | job varchar(100), 62 | salary Decimal(8,2), 63 | resume Text 64 | ); 65 | 66 | 67 | --- 在上面员工表的基本上增加一个image列。 68 | alter table employee add image blob; 69 | 70 | --- 修改job列,使其长度为60。 71 | alter table employee modify job varchar(60); 72 | 73 | --- 删除sex列。 74 | alter table employee drop image; 75 | 76 | --- 表名改为user。 77 | rename table employee to user; 78 | 79 | --- 修改表的字符集为utf-8 80 | alter table user character set gbk; 81 | alter table user character set utf8; 82 | 83 | --- 列名name修改为username 84 | alter table user change column name username varchar(100); 85 | --- 删除表 86 | drop table user; 87 | 88 | --- 增删改查 89 | --- 准备表 90 | create table employee 91 | ( 92 | id int, 93 | name varchar(40), 94 | sex varchar(4), 95 | birthday date, 96 | entry_date date, 97 | salary decimal(8,2), 98 | resume text 99 | ); 100 | 101 | --- 插入数据 102 | insert into employee(id,name,sex,birthday,entry_date,salary,resume) values(1,'zhangsan','male','1993-03-04','2016-11-10','1000','i am a developer'); 103 | 104 | --- 可以省略表字段,但是必须插入全部字段 105 | insert into employee values(null,null,'male','1993-03-04','2016-11-10','1000','i am a developer'); 106 | 107 | ---指定某些列插入数据 108 | insert into employee(id) values(6); 109 | 110 | ---插入汉字 111 | insert into employee(id,name) values(6,'张三'); 112 | 113 | --- mysql客户采用gb2312编码 114 | show variables like 'chara%'; 115 | set character_set_client=gb2312; 116 | insert into employee(id,username) values('3','张三'); 117 | 118 | 119 | ---查看时不乱码 120 | show variables like 'chara%'; 121 | set character_set_results=gb2312; 122 | select * from employee; 123 | 124 | ---修改表数据 125 | ---将所有员工薪水修改为5000元。 126 | update employee set salary=5000; 127 | 128 | 129 | ---将姓名为’zs’的员工薪水修改为3000元。 130 | update employee set salary = 3000 where name='zhangsan'; 131 | 132 | 133 | ---将姓名为’aaa’的员工薪水修改为4000元,job改为ccc。 134 | update employee set salary = 4000,job='ccc' where name='张三'; 135 | 136 | 137 | --- 将wu的薪水在原有基础上增加1000元。 138 | update employee set salary = salary+1000 where name='张三'; 139 | 140 | ---删除 141 | ---删除表中名称为’zs’的记录。 142 | delete from employee where job='ccc'; 143 | 144 | 145 | ---删除表中所有记录。 146 | delete from employee; 147 | 148 | 149 | ---使用truncate删除表中记录 150 | truncate table employee; 151 | 152 | ---查询 153 | 154 | ---查询表中所有学生的信息。 155 | select id,name,chinese,english,math from student; 156 | 157 | 158 | ---查询表中所有学生的姓名和对应的英语成绩。 159 | select name,english from student; 160 | 161 | 162 | ---过滤表中重复数据。 163 | select distinct english from student; 164 | 165 | ---在所有学生分数上加10分特长分。 166 | select name,(chinese+english+math)+10 from student; 167 | 168 | 169 | ---统计每个学生的总分。 170 | select name,(chinese+english+math) from student; 171 | 172 | 173 | ---使用别名表示学生分数 174 | select name,(chinese+english+math) as 总分 from student; 175 | ---可以不用as 176 | select name,(chinese+english+math) 总分 from student; 177 | 178 | 179 | ---查询姓名为wu的学生成绩 180 | select * from student where name='张三'; 181 | 182 | 183 | ---查询英语成绩大于90分的同学 184 | select * from student where english>'90'; 185 | 186 | 187 | --- 显示数据表的结构: 188 | describe 表名; 189 | --- 简写 190 | desc 表名; 191 | 192 | --- 建表: 193 | use 库名; 194 | create table 表名 (字段设定列表); 195 | 196 | --- 删库和删表: 197 | drop database 库名; 198 | drop table 表名; 199 | 200 | --- 表中记录清空: 201 | truncate table wp_comments; 202 | delete * from wp_comments; 203 | --- 2种操作模式的区别,目标对象是表wp_comments,其中truncate操作中的table可以省略,delete操作中的*可以省略。这两者都是将wp_comments表中数据清空,不过也是有区别的,如下:truncate是整体删除(速度较快),delete是逐条删除(速度较慢)。truncate不写服务器log,delete写服务器log,也就是truncate效率比delete高的原因。truncate不激活trigger(触发器),但是会重置Identity(标识列、自增字段),相当于自增列会被置为初始值,又重新从1开始记录,而不是接着原来的ID数。而delete删除以后,Identity依旧是接着被删除的最近的那一条记录ID加1后进行记录。如果只需删除表中的部分记录,只能使用DELETE语句配合where条件。 DELETE FROM wp_comments WHERE…… 204 | 205 | --- 显示表中的记录: 206 | select * from 表名 207 | 208 | 209 | --- 数据库备份与恢复 210 | 211 | --- 备份:从数据库导出数据: 212 | --- 格式:mysqldump -h链接ip -P(大写)端口 -u用户名 -p密码数据库名>d:XX.sql(路劲) 213 | mysqldump -h132.72.192.432 -P3307 -uroot -p8888 htgl>d:\htgl.sql; 214 | 215 | 216 | --- 备份导出示例 217 | ---导出数据和表结构——将特定数据库特定表中的数据和表格结构和数据全部返回 218 | mysqldump --u b_user -h 101.3.20.33 -p'H_password' -P3306 database_di up_subjects > 0101_0630_up_subjects.sql 219 | 220 | 221 | --- 导出表结构却不导出表数据——只返回特定数据库特定表格的表格结构,不返回数据,添加“-d”命令参数 222 | mysqldump --u b_user -h 101.3.20.33 -p'H_password' -P3306 -d database_di up_subjects > 0101_0630_up_subjects.sql 223 | 224 | 225 | ---导出表结构和满足挑顶条件的表数据——只返回特定数据库中特定表的表格结构和满足特定条件的数据 226 | mysqldump --u b_user -h 101.3.20.33 -p'H_password' -P3306 database_di up_subjects --where=" ctime>'2017-01-01' and ctime<'2017-06-30'" > 0101_0630_up_subjects.sql 227 | 228 | --- 导出数据却不导出表结构——只返回特定数据库中特定表格的数据,不返回表格结构,添加“-t”命令参数 229 | mysqldump --u b_user -h 101.3.20.33 -p'H_password' -t -P3306 database_di up_subjects >0101_0630_up_subjects.sql 230 | 231 | --- 导出特定数据库的所有表格的表结构及其数据,添加“--databases ”命令参数 232 | mysqldump --u b_user -h 101.3.20.33 -p'H_password' -P3306 233 | --databases test > all_database.sql 234 | 235 | --- 恢复,导入数据库数据 236 | --- 将导出的本地文件导入到指定数据库 237 | --- 系统命令行 238 | --- 格式:mysql -h链接ip -P(大写)端口 -u用户名 -p密码 数据库名 < d:XX.sql(路劲) 239 | mysql -uusername -ppassword db1 tb1.sql 252 | 253 | --- mysqldump客户端可用来转储数据库或搜集数据库进行备份或将数据转移到另一个sql服务器(不一定是一个mysql服务器)。转储包含创建表和/或装载表的sql语句。 254 | 255 | 256 | --- 导出全部数据库 257 | --all-databases , -A 258 | mysqldump -uroot -p --all-databases 259 | 260 | 261 | ---导出全部表空间。 262 | --all-tablespaces , -Y 263 | mysqldump -uroot -p --all-databases --all-tablespaces 264 | 265 | 266 | 267 | -------------------------------------------------------------------------------- /anack_study_case/zichanfuzhai.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/zichanfuzhai.csv -------------------------------------------------------------------------------- /anack_study_case/财务表精简表头.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/财务表精简表头.xlsx -------------------------------------------------------------------------------- /anack_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu May 31 17:12:12 2018 4 | 5 | @author: 1707501 6 | """ 7 | 8 | import pymysql 9 | 10 | conn = pymysql.connect( 11 | host = mysqlip, 12 | port = 3306, 13 | user = uusername, 14 | passwd = upassword, 15 | db = "test", 16 | charset = "utf8" 17 | ) 18 | 19 | cur = conn.cursor() 20 | print("OK!") 21 | # 查看库里的表 22 | sql = "show tables;" 23 | cur.execute(sql) 24 | result = cur.fetchall() 25 | print(result) 26 | 27 | # 查询数据 28 | sql = "select * from target limit 100;" 29 | cur.execute(sql) 30 | result = cur.fetchall() 31 | print(result) -------------------------------------------------------------------------------- /anack数据字典_v1.0.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack数据字典_v1.0.xlsx -------------------------------------------------------------------------------- /anjuke_readme.txt: -------------------------------------------------------------------------------- 1 | crawl_anjuke_v1.311.py 说明 2 | 3 | 4 | 本脚本实现爬取安居客二手房信息。可实现将爬取下来的信息存储到本地和导入mysql数据库。 5 | 但需要注意两点: 6 | 7 | 1、ip_collecter_original_test。 8 | 是mysql数据库中的代理ip地址和port表。从该表中读取代理信息。如果不需要代理,可以修改代码即可。 9 | 10 | 2、配置和设置mysql数据库的链接信息。 11 | hosts = 12 | users = 13 | passwords = 14 | databases = -------------------------------------------------------------------------------- /crawl_anjuke_v1.311.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Dec 11 14:30:50 2017 4 | 5 | @author:xx 6 | """ 7 | 8 | """ 9 | crawling anjuke house price 10 | GuiYang 11 | 20171212 add proxy and detailed the parse of house information 12 | 20171214 add spidertime and multiprocess 13 | """ 14 | 15 | import requests 16 | from bs4 import BeautifulSoup 17 | import pymysql 18 | import random,time 19 | 20 | def parse_detial(html): 21 | soup = BeautifulSoup(html.text,'html5lib') 22 | houseinfo = soup.select('div.houseInfoBox') 23 | houseinfotitle = houseinfo[0].h4 24 | an_xian = houseinfotitle.select('span.anxian')[0].get_text() 25 | if '假一赔百' in an_xian: 26 | an_xian = "Yes" 27 | else: 28 | an_xian = "No" 29 | houseencode= houseinfotitle.select('span.house-encode')[0].get_text() 30 | houseinfoV2 = houseinfo[0].select('div.houseInfoV2-desc')[0].get_text() 31 | housedetail1 = houseinfoV2.split() 32 | housedetail2 = ':'.join(housedetail1) 33 | housedetail = housedetail2.replace('\ue092','').replace('\u200b','').replace('\ue094','').replace('\ue093','').replace('\ue095','') 34 | housefirstv = soup.select('div.first-col.detail-col')[0].find_all('dl') 35 | house_estate = ''.join(housefirstv[0].get_text().split())[3:] 36 | house_add = ''.join(housefirstv[1].get_text().split())[3:] 37 | house_build_time = ''.join(housefirstv[2].get_text().split())[3:] 38 | house_type = ''.join(housefirstv[3].get_text().split())[3:] 39 | housesecondv = soup.select('div.second-col.detail-col')[0].find_all('dl') 40 | house_model_detail = ''.join(housesecondv[0].get_text().split())[3:] 41 | house_size = ''.join(housesecondv[1].get_text().split())[3:] 42 | house_orientation = ''.join(housesecondv[2].get_text().split())[3:] 43 | house_floor = ''.join(housesecondv[3].get_text().split())[3:] 44 | housethirdv = soup.select('div.third-col.detail-col')[0].find_all('dl') 45 | house_decorate = ''.join(housethirdv[0].get_text().split())[5:] 46 | house_univalence = ''.join(housethirdv[1].get_text().split())[5:] 47 | down_payment = ''.join(housethirdv[2].get_text().split())[5:] 48 | # monthly_payment = ''.join(housethirdv[3].get_text().split())[5:] #javescript loading data 49 | salerinfo = soup.select('p.broker-mobile') 50 | salerphone = salerinfo[0].get_text().replace('\ue047','') 51 | housetitle = ''.join(soup.select('h3.long-title')[0].get_text().split()) 52 | houseinfov1 = soup.select('div.basic-info.clearfix')[0].find_all('span') 53 | housetotleprice = houseinfov1[0].get_text() 54 | #============================================================================== 55 | # housemodel = houseinfov1[1].get_text() 56 | # housesize = houseinfov1[2].get_text() 57 | #============================================================================== 58 | line = [] 59 | line.append(housetitle) 60 | line.append(an_xian) 61 | line.append(houseencode) 62 | line.append(housetotleprice) 63 | line.append(house_model_detail) 64 | line.append(house_size) 65 | line.append(house_estate) 66 | line.append(house_add) 67 | line.append(house_build_time) 68 | line.append(house_type) 69 | line.append(house_orientation) 70 | line.append(house_floor) 71 | line.append(house_decorate) 72 | line.append(house_univalence) 73 | line.append(down_payment) 74 | line.append(housedetail) 75 | line.append(salerphone) 76 | result = '\t'.join(line) 77 | print(result) 78 | return result 79 | 80 | def parse_list(html): 81 | secondurl = [] 82 | soup = BeautifulSoup(html.text,'html5lib') 83 | houselists = soup.select('a.houseListTitle') 84 | for houseid in houselists: 85 | houseurl = houseid['href'] 86 | secondurl.append(houseurl) 87 | return secondurl 88 | 89 | def downloadhtml(url,proxy_ip): 90 | response = requests.get(url,headers=header,proxies={"http":proxy_ip}) 91 | if response.status_code == 200: 92 | return response 93 | else: 94 | print("download html error!") 95 | 96 | 97 | def Create_table(): 98 | query = """CREATE TABLE IF NOT EXISTS `anjuke_collecter_original_test` ( 99 | `No` int(10) unsigned NOT NULL AUTO_INCREMENT, 100 | `housetitle` varchar(255) DEFAULT NULL, 101 | `an_xian` varchar(255) DEFAULT NULL, 102 | `houseencode` varchar(255) DEFAULT NULL, 103 | `housetotleprice` varchar(255) DEFAULT NULL, 104 | `house_model_detail` varchar(255) DEFAULT NULL, 105 | `house_size` varchar(255) DEFAULT NULL, 106 | `house_estate` varchar(255) DEFAULT NULL, 107 | `house_add` varchar(255) DEFAULT NULL, 108 | `house_build_time` varchar(255) DEFAULT NULL, 109 | `house_type` varchar(255) DEFAULT NULL, 110 | `house_orientation` varchar(255) DEFAULT NULL, 111 | `house_floor` varchar(255) DEFAULT NULL, 112 | `house_decorate` varchar(255) DEFAULT NULL, 113 | `house_univalence` varchar(255) DEFAULT NULL, 114 | `down_payment` varchar(255) DEFAULT NULL, 115 | `housedetail` text DEFAULT NULL, 116 | `salerphone` varchar(255) DEFAULT NULL, 117 | `Url` varchar(255) DEFAULT NULL, 118 | `SpiderTime` varchar(255) DEFAULT NULL, 119 | PRIMARY KEY (`No`) 120 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8""" 121 | db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8') 122 | cursor = db.cursor() 123 | cursor.execute(query) 124 | db.commit() 125 | cursor.close() 126 | db.close() 127 | 128 | 129 | def etl_mysql(result): 130 | db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8') 131 | cursor = db.cursor() 132 | result = tuple(result) 133 | query = "insert into anjuke_collecter_original_test(housetitle,an_xian,houseencode,housetotleprice,house_model_detail,house_size,house_estate,house_add,house_build_time,house_type,house_orientation,house_floor,house_decorate,house_univalence,down_payment,housedetail,salerphone,Url,SpiderTime) values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % result 134 | cursor.execute(query) 135 | db.commit() 136 | cursor.close() 137 | db.close() 138 | 139 | 140 | def get_next_page(html): 141 | soup = BeautifulSoup(html.text,'html5lib') 142 | nexturl = soup.select('a.aNxt')[0]['href'] 143 | return nexturl 144 | 145 | def get_proxy_ip(): 146 | db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8') 147 | cursor = db.cursor() 148 | query = "select ip,port from ip_collecter_original_test limit 17000" 149 | cursor.execute(query) 150 | ip_result = cursor.fetchall() 151 | IPList = [] 152 | for i in ip_result: 153 | Ip = i[0] + ":" + i[1] 154 | IPList.append(Ip) 155 | return IPList 156 | 157 | def check_ip(IPList): 158 | url = "https://www.baidu.com/" 159 | proxy_ip = random.choice(IPList) 160 | res = requests.get(url,headers=header,proxies={"http":proxy_ip}) 161 | if res.status_code == 200: 162 | print(proxy_ip) 163 | return proxy_ip 164 | else: 165 | return None # 后期修改成迭代 166 | 167 | header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', 168 | 'Connection':'keep-alive' } 169 | 170 | # https://gy.anjuke.com/sale/p1/#filtersort 171 | # https://shanghai.anjuke.com/sale/p1/#filtersort 172 | # https://hangzhou.anjuke.com/sale/ 173 | url = "https://shanghai.anjuke.com/sale/p1/#filtersort" 174 | 175 | hosts = 176 | users = 177 | passwords = 178 | databases = 179 | 180 | if __name__ == '__main__': 181 | List_ip = get_proxy_ip() 182 | next_url = url 183 | Create_table() 184 | while next_url != None: 185 | proxy_ip = check_ip(List_ip) 186 | res = downloadhtml(url,proxy_ip) 187 | if res != None: 188 | try: 189 | urllist = parse_list(res) 190 | except: 191 | print('house url list parsing error!') 192 | if urllist != None: 193 | for houseurl in urllist: 194 | proxy_ip = check_ip(List_ip) 195 | houseinfor = downloadhtml(houseurl,proxy_ip) 196 | try: 197 | results = parse_detial(houseinfor) 198 | except: 199 | results = None 200 | with open(r'E:\documents\personal\python\crawler\anjuke\anjuke_error_shanghai.txt','a',encoding='utf-8') as f: 201 | f.write(houseurl +"\n") 202 | print("parse hosue detial infor error!") 203 | continue 204 | with open(r'E:\documents\personal\python\crawler\anjuke\anjuke_shanghai_v15.txt','a',encoding='utf-8') as f: 205 | f.write(results + '\n') 206 | try: 207 | line = results.split('\t') 208 | ts = time.strftime('%Y%m%d%H%M%S',time.localtime(time.time())) 209 | line.append(houseurl) 210 | line.append(ts) 211 | # print(line) 212 | etl_mysql(line) 213 | except: 214 | print("data insert into mysql error!") 215 | continue 216 | try: 217 | next_url = get_next_page(res) 218 | except: 219 | next_url = None 220 | print("crawling end!") 221 | -------------------------------------------------------------------------------- /raw_modules/ReadMe.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/raw_modules/ReadMe.txt -------------------------------------------------------------------------------- /raw_modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /raw_modules/get_price.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 从163网址上获取指定ID指定时间段的K线数据 4 | """ 5 | import requests 6 | import re 7 | import datetime 8 | import pandas as pd 9 | ''' 10 | 11 | 完整网址: 12 | http://quotes.money.163.com/service/chddata.html?code=0%06d&start=%d&end=%d&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER 13 | ''' 14 | 15 | 16 | def get_close_price(id, day = 0): 17 | ''' 18 | 获取指定ID指定日期的收盘价 19 | 输入:id -> str形式的ID号: '600660' 20 | day -> str形式的日期: '20180626' 21 | 返回值:str形式的价格: '25.54', 如果当天为节假日,则返回0 22 | ''' 23 | if day == 0: 24 | day = datetime.datetime.now() - datetime.timedelta(days=1) 25 | day = day.strftime("%Y%m%d") 26 | if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市,则前缀为1 27 | nid = '1' + id 28 | else: #如果是沪市主板,则前缀为0 29 | nid = '0' + id 30 | url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\ 31 | fields=TCLOSE" %(nid, day,day) 32 | res = requests.get(url) 33 | res.raise_for_status() 34 | 35 | for chunk in res.iter_content(100000): 36 | # print(chunk) 37 | pattern = '[^,\r\n]+' 38 | obj = re.compile(pattern) 39 | match = obj.findall(chunk.decode('gbk')) 40 | #print(match) 41 | if len(match) < 8: 42 | return 0 43 | else: 44 | return match[-1] 45 | 46 | def get_period_k_day(id, start_day, stop_day = 0): 47 | ''' 48 | 获取指定ID一个时间段内的K线数据 49 | 输入:id -> str形式的ID号: '600660' 50 | start_day -> str形式的日期: '20180626' 51 | stop_day -> 同上, 默认到昨天 52 | 返回值:一个dataframe 53 | ''' 54 | if stop_day == 0: 55 | day = datetime.datetime.now() - datetime.timedelta(days=1) 56 | day = day.strftime("%Y%m%d") 57 | 58 | if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市,则前缀为1 59 | nid = '1' + id 60 | else: #如果是沪市主板,则前缀为0 61 | nid = '0' + id 62 | url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\ 63 | fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(nid, start_day, stop_day) 64 | 65 | 66 | # url = "http://quotes.money.163.com/service/chddata.html?code=0%s&start=%s&end=%s&\ 67 | # fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(id, start_day,stop_day) 68 | res = requests.get(url) 69 | res.raise_for_status() 70 | # playFile = open(file_name, 'wb') 71 | 72 | raw_data = [] 73 | for chunk in res.iter_content(1000000): 74 | # playFile.write(chunk) 75 | chunk = chunk.decode('gbk') 76 | pattern = '[^,\r\n]+' 77 | obj = re.compile(pattern) 78 | match = obj.findall(chunk) 79 | if len(match) < 8: #如果没有数据 80 | return 0 81 | 82 | header = match[:10] #如果增加字段,则此处以下需要相应修改 83 | # print(header) 84 | raw_data = match[10:] 85 | date = raw_data[::10] 86 | idc = raw_data[1::10] 87 | name = raw_data[2::10] 88 | price = raw_data[3::10] 89 | high = raw_data[4::10] 90 | lopen = raw_data[5::10] 91 | yesterday_close = raw_data[6::10] 92 | low = raw_data[7::10] 93 | vol = raw_data[8::10] 94 | mount = raw_data[9::10] 95 | 96 | data = { 97 | # header[0]:date, 98 | header[1]:idc, 99 | header[2]:name, 100 | header[3]:price, 101 | header[4]:high, 102 | header[5]:lopen, 103 | header[6]:yesterday_close, 104 | header[7]:low, 105 | header[8]:vol, 106 | header[9]:mount 107 | } 108 | df = pd.DataFrame(data,index = date) 109 | # playFile.close() 110 | return df 111 | 112 | 113 | def k_day_to_csv(code, stop_day = 0): 114 | ''' 115 | 更新k线数据,并保存到本地,默认为更新到昨天 116 | code:目标个股,只能为'000xxx'形式 117 | stop_day: 0->昨天, 20170101:更新到指定的一天 118 | @更新逻辑: 119 | 1. 如果无记录,则自动创建csv文件,默认为:ID.kday 120 | 2. 如果有部分记录,则自动分析,并将后续的内容更新 121 | 3. 如果记录比需要更新的更新,则直接返回 122 | 123 | 缺陷:得到的数据是没有复权的,应该进行前复权 124 | ''' 125 | base_path = './' #修改此处可以更改文件存放路径,可以考虑作为一个配置参数 126 | start_day = '19970101' #start时间统一从1997年开始 127 | #参数合法性检查 128 | if isinstance(code,list): 129 | print('is a list') 130 | elif isinstance(code,str): 131 | file_name = code + '.csv' 132 | # print(file_name) 133 | else: 134 | print('bad input. please check it') 135 | return 136 | 137 | file_name = base_path + file_name 138 | # print(file_name) 139 | 140 | update_flag = 1 #1代表重新生成, 2代表更新 3代表无需处理 141 | #判断最新的是第几天 142 | try: 143 | with open(file_name,'r') as fh: 144 | content = fh.readlines() 145 | if len(content) > 2: #获取最新记录,总是在第二行 146 | latest_record = content[1].split(',') 147 | 148 | from datetime import datetime 149 | from dateutil.parser import parse 150 | latest_day = parse(latest_record[0]) 151 | now = datetime.now().strftime('%Y-%m-%d') 152 | yesterday = parse(now) 153 | 154 | if yesterday > latest_day: 155 | update_flag = 2 156 | print('not the latest') 157 | else: 158 | update_flag = 3 159 | print(code + ' already the latest') 160 | return 161 | except: 162 | update_flag = 1 163 | print('no record') 164 | 165 | #不同的情况适用不同更新逻辑 166 | if update_flag == 1: #完全更新 167 | r = get_period_k_day(code, start_day) 168 | r.to_csv(file_name, encoding= 'gbk') 169 | elif update_flag == 2: 170 | r = get_period_k_day(code, start_day) #此处没有办法在首部添加 171 | r.to_csv(file_name, encoding= 'gbk') #如果可以的话,则不必每次重写 172 | return 173 | print('finish ' + code + ' update') 174 | return 175 | 176 | def k_day_update(id_list, stop_day = 0): 177 | ''' 178 | 用户API,更新个股的K线数据,可以是列表,也可以是str 179 | ''' 180 | #参数合法性检查 181 | if isinstance(id_list,list): 182 | print('is a list') 183 | for s in id_list: 184 | k_day_to_csv(s,stop_day) 185 | elif isinstance(id_list,str): 186 | k_day_to_csv(id_list,stop_day) 187 | else: 188 | print('bad input. please check it') 189 | return 190 | 191 | if __name__ == '__main__': 192 | id = '601012' 193 | start_day = '20100625' 194 | stop_day = '20180904' 195 | 196 | #获取昨天的收盘价 197 | # price = get_close_price(id) 198 | # print(price) 199 | 200 | # #获取指定一天的收盘价 201 | # price = get_close_price('600660','20170209') 202 | # print(price) 203 | # 204 | # #获取从start_day开始直到昨天的收盘价 205 | # s = get_period_price('600660',start_day) 206 | # print(s) 207 | # 208 | # #获取指定时间段内的收盘价 209 | # s = get_period_k_day('601012',start_day,stop_day) 210 | # print(s) 211 | # s.to_csv('test.csv', encoding= 'gbk') 212 | 213 | #更新K线数据并存文档 214 | company_list = ['600660', '600066', '000651', '600522', '601012', '600887'] 215 | k_day_update(company_list) 216 | k_day_update('600066') 217 | 218 | --------------------------------------------------------------------------------