├── .gitignore
├── ExampleData
    ├── 伊利股份.csv
    ├── 宁沪高速.csv
    ├── 宇通客车.csv
    ├── 宇通客车产销数据
    │   ├── 2017年10月份产销快报.xlsx
    │   ├── 2017年11月份产销快报.xlsx
    │   ├── 2017年12月份产销快报.xlsx
    │   ├── 2017年1月份产销快报.xlsx
    │   ├── 2017年2月产销快报.xlsx
    │   ├── 2017年3月份产销快报.xlsx
    │   ├── 2017年4月份产销快报.xlsx
    │   ├── 2017年5月份产销快报.xlsx
    │   ├── 2017年6月份产销快报.xlsx
    │   ├── 2017年7月份产销快报.xlsx
    │   ├── 2017年8月份产销快报.xlsx
    │   └── 2017年9月份产销快报.xlsx
    ├── 格力电器.csv
    ├── 福耀玻璃.csv
    └── 隆基股份.csv
├── LICENSE
├── M1809
    ├── doc
    │   ├── M1809使用说明.txt
    │   ├── item_reports
    │   │   ├── M1809-个股基本面分析.xmind
    │   │   ├── M1809_20180521.ppt
    │   │   ├── M1809_20180530.ppt
    │   │   ├── M1809_20180606.ppt
    │   │   └── M1809_20180611.ppt
    │   ├── report
    │   │   └── M1809_20180612.ppt
    │   ├── xst
    │   │   ├── M1809_finance_analysis.py
    │   │   ├── M1809_finance_crawling.py
    │   │   └── M1809_finance_crawling_target.py
    │   └── 表头说明
    │   │   ├── balance_columns.txt
    │   │   ├── cash_flow_columns.txt
    │   │   ├── profit_columns
    │   │   ├── profit_columns.txt
    │   │   ├── 开发建议.txt
    │   │   └── 新建 Microsoft Excel 工作表.xlsx
    ├── src
    │   ├── App.py
    │   ├── Config.py
    │   ├── CoreAnalyse.py
    │   ├── GetItemInfo.py
    │   ├── M1809_finance_weight.py
    │   ├── PlotAnalyse.py
    │   ├── UserApi.py
    │   ├── crawling_finance_table.py
    │   ├── crawling_finance_table_v1_7.py
    │   ├── get_dividends_history.py
    │   ├── get_price.py
    │   ├── trade_day.py
    │   └── txttoexcel.py
    └── sys_config
    │   └── 账户配置.txt
├── README.md
├── Release
    ├── HK_insider.py
    ├── HK_insider_v1.1.py
    ├── HK_insider_v1.4.py
    ├── ReleaseNote.txt
    ├── YTProductionAndSale
    │   ├── DataAnalyze.py
    │   ├── DataToSql.py
    │   ├── PdfDown.py
    │   ├── __init__.py
    │   └── 使用说明.txt
    ├── YT_produce_sell.py
    ├── crawling_finance_table_v1.2.py
    ├── get_dividends_history.py
    ├── kday
    │   ├── get_price.py
    │   └── k_day.py
    ├── pdf_decoder.py
    ├── wechat.py
    ├── 安居客爬虫框架
    │   ├── ReadMe.txt
    │   └── crawl_anjuke_v1.311.py
    └── 树莓派信息实时推送示例
    │   ├── App.py
    │   ├── PageDecoder.py
    │   ├── PushMessage.py
    │   ├── StockClass.py
    │   └── 使用说明.txt
├── _config.yml
├── anack
    ├── .gitignore
    ├── App
    │   ├── Detail_Stock_Selector.py
    │   ├── HK_insider.py
    │   ├── IndustryEstimation.py
    │   ├── IndustryEstimation_detail.py
    │   ├── M1808
    │   │   ├── M1808.py
    │   │   ├── early_warning.py
    │   │   ├── protocol.py
    │   │   ├── wechat.py
    │   │   └── 命令示例.txt
    │   ├── StockAnalyser.py
    │   ├── YT_produce_sell.py
    │   ├── __init__.py
    │   └── 实时推送
    │   │   ├── App.py
    │   │   ├── PageDecoder.py
    │   │   ├── PushMessage.py
    │   │   ├── StockClass.py
    │   │   └── 使用说明.txt
    ├── SQL
    │   ├── StockSelector.py
    │   ├── __init__.py
    │   ├── classify_to_sql.py
    │   ├── finnance_to_sql.py
    │   ├── glo.py
    │   ├── k_data_to_sql.py
    │   ├── macro_to_sql.py
    │   ├── sql.py
    │   ├── update.py
    │   └── 感兴趣的个股列表.txt
    ├── Tushare
    │   ├── basic.py
    │   ├── classify.py
    │   ├── finance.py
    │   ├── information.py
    │   └── macro.py
    ├── __init__.py
    └── main.py
├── anack_study_case
    ├── balance_columns.txt
    ├── cash_flow_columns.txt
    ├── cash_flow_statements_balance_profit_columns.xlsx
    ├── crawling_finance_table_v1.5.py
    ├── crawling_finance_table_v1.6.py
    ├── crawling_finance_table_v1.7.2.1.py
    ├── crawling_finance_table_v1.7.py
    ├── dividend_rate_v2.py
    ├── lirunbiao.csv
    ├── liuliang.csv
    ├── profit_columns.txt
    ├── sql常用模板.txt
    ├── zichanfuzhai.csv
    └── 财务表精简表头.xlsx
├── anack_test.py
├── anack数据字典_v1.0.xlsx
├── anjuke_readme.txt
├── crawl_anjuke_v1.311.py
└── raw_modules
    ├── ReadMe.txt
    ├── __init__.py
    └── get_price.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.bin
 2 | debug/
 3 | data/
 4 | __pycache__/
 5 | *.pyc
 6 | *.cpython-36.pyc
 7 | output/
 8 | *.csv
 9 | *.cfg
10 | config/
11 | PdfDownload/
12 | ExeFile/
13 | 


--------------------------------------------------------------------------------
/ExampleData/伊利股份.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/伊利股份.csv


--------------------------------------------------------------------------------
/ExampleData/宁沪高速.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宁沪高速.csv


--------------------------------------------------------------------------------
/ExampleData/宇通客车.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车.csv


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年10月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年10月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年11月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年11月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年12月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年12月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年1月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年1月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年2月产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年2月产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年3月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年3月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年4月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年4月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年5月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年5月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年6月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年6月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年7月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年7月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年8月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年8月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/宇通客车产销数据/2017年9月份产销快报.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/宇通客车产销数据/2017年9月份产销快报.xlsx


--------------------------------------------------------------------------------
/ExampleData/格力电器.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/格力电器.csv


--------------------------------------------------------------------------------
/ExampleData/福耀玻璃.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/福耀玻璃.csv


--------------------------------------------------------------------------------
/ExampleData/隆基股份.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/ExampleData/隆基股份.csv


--------------------------------------------------------------------------------
/M1809/doc/M1809使用说明.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/M1809使用说明.txt


--------------------------------------------------------------------------------
/M1809/doc/item_reports/M1809-个股基本面分析.xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809-个股基本面分析.xmind


--------------------------------------------------------------------------------
/M1809/doc/item_reports/M1809_20180521.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809_20180521.ppt


--------------------------------------------------------------------------------
/M1809/doc/item_reports/M1809_20180530.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809_20180530.ppt


--------------------------------------------------------------------------------
/M1809/doc/item_reports/M1809_20180606.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809_20180606.ppt


--------------------------------------------------------------------------------
/M1809/doc/item_reports/M1809_20180611.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/item_reports/M1809_20180611.ppt


--------------------------------------------------------------------------------
/M1809/doc/report/M1809_20180612.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/report/M1809_20180612.ppt


--------------------------------------------------------------------------------
/M1809/doc/xst/M1809_finance_analysis.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | '''
  3 | 类名：M1809_finance_analysis（财务数据分析）
  4 | 作者：徐抒田
  5 | 日期：2018-5-28
  6 | 描述：
  7 | 1、初步调试机器学习方法
  8 | 2、参数设置，1的准确率增加
  9 | 版本号：V0.1
 10 | '''
 11 | 
 12 | 
 13 | import pandas as pd
 14 | import numpy as np
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | result_yinli = pd.read_csv('D:/999github/anack/M1809/result_yinli.csv')
 18 | result_yingyun = pd.read_csv('D:/999github/anack/M1809/result_yingyun.csv')
 19 | result_chengzhang = pd.read_csv('D:/999github/anack/M1809/result_chengzhang.csv')
 20 | result_changzhai = pd.read_csv('D:/999github/anack/M1809/result_changzhai.csv')
 21 | result_xianjin = pd.read_csv('D:/999github/anack/M1809/result_xianjin.csv')
 22 | 
 23 | 
 24 | df_final = pd.read_csv('D:/999github/anack/M1809/target.csv')
 25 | df_final = df_final[(df_final.firstincrase > 0.1) & (df_final.secondincrase > 0.1)]
 26 | df_final = pd.DataFrame({'code' : df_final['code'],
 27 |                    'label' : 1,
 28 |                    })
 29 | 
 30 | 
 31 | data = result_yinli
 32 | data = pd.merge(data, result_yingyun, on=['code','name'])
 33 | data = pd.merge(data, result_chengzhang, on=['code','name'])
 34 | data = pd.merge(data, result_changzhai, on=['code','name'])
 35 | data = pd.merge(data, result_xianjin, on=['code','name'])
 36 | data = pd.merge(data, df_final, on='code',how = 'left')
 37 | 
 38 | # =============================================================================
 39 | # null_counts = data.isnull().sum()
 40 | # print(null_counts)
 41 | # =============================================================================
 42 | 
 43 | data = data.fillna(0)
 44 | data = data.dropna(axis=0)
 45 | 
 46 | 
 47 | 
 48 | orig_columns = data.columns
 49 | drop_columns = []
 50 | for col in orig_columns:
 51 |     col_series = data[col].dropna().unique()
 52 |     if len(col_series) == 1:
 53 |         drop_columns.append(col)
 54 | data = data.drop(drop_columns, axis=1)
 55 | print(drop_columns)
 56 | 
 57 | 
 58 | 
 59 | target = data['label']
 60 | code = data['code']
 61 | name = data['name']
 62 | features = data.drop(['code','name','label'],axis=1)
 63 | 
 64 | features[features.currentratio20161 == '--'] = 0
 65 | features[features.quickratio20161=='--']=0
 66 | features[features.cashratio20161=='--']=0
 67 | features[features.icratio20161=='--']=0
 68 | features[features.sheqratio20161=='--']=0
 69 | features[features.adratio20161=='--']=0
 70 | features[features.currentratio20162=='--']=0
 71 | features[features.quickratio20162=='--']=0
 72 | features[features.cashratio20162=='--']=0
 73 | features[features.icratio20162=='--']=0
 74 | features[features.sheqratio20162=='--']=0
 75 | features[features.adratio20162=='--']=0
 76 | features[features.currentratio20163=='--']=0
 77 | features[features.quickratio20163=='--']=0
 78 | features[features.cashratio20163=='--']=0
 79 | features[features.icratio20163=='--']=0
 80 | features[features.sheqratio20163=='--']=0
 81 | features[features.adratio20163=='--']=0
 82 | features[features.currentratio20164=='--']=0
 83 | features[features.quickratio20164=='--']=0
 84 | features[features.cashratio20164=='--']=0
 85 | features[features.icratio20164=='--']=0
 86 | features[features.currentratio20171=='--']=0
 87 | features[features.quickratio20171=='--']=0
 88 | features[features.cashratio20171=='--']=0
 89 | features[features.icratio20171=='--']=0
 90 | features[features.sheqratio20171=='--']=0
 91 | features[features.adratio20171=='--']=0
 92 | features[features.currentratio20172=='--']=0
 93 | features[features.quickratio20172=='--']=0
 94 | features[features.cashratio20172=='--']=0
 95 | features[features.icratio20172=='--']=0
 96 | features[features.currentratio20173=='--']=0
 97 | features[features.quickratio20173=='--']=0
 98 | features[features.cashratio20173=='--']=0
 99 | features[features.icratio20173=='--']=0
100 | features[features.currentratio20174=='--']=0
101 | features[features.quickratio20174=='--']=0
102 | features[features.cashratio20174=='--']=0
103 | features[features.icratio20174=='--']=0
104 | features[features.currentratio20181=='--']=0
105 | features[features.quickratio20181=='--']=0
106 | features[features.cashratio20181=='--']=0
107 | features[features.icratio20181=='--']=0
108 | features = features.astype('float64')
109 | 
110 | 
111 | ##基于树的方法不用做标准化、归一化处理
112 | from sklearn import preprocessing
113 | min_max_scaler = preprocessing.MinMaxScaler()
114 | features_new = min_max_scaler.fit_transform(features)
115 | features = pd.DataFrame(features_new, columns=features.columns)
116 | 
117 | 
118 | features = features[['roe20181' ,'esp20181' ,'esp20174' ,'roe20174' ,'net_profits20174' ,'net_profits20181' ,'esp20173' ,'net_profits20172' ,'roe20173' ,'net_profits20173' ,'net_profits20163' ,'esp20172' ,'business_income20174' ,'roe20172' ,'net_profits20171' ,'net_profits20164' ,'rateofreturn20173' ,'seg20181' ,'net_profits20162' ,'business_income20173' ,'roe20171' ,'business_income20171' ,'nprg20181' ,'business_income20181' ,'nav20181' ,'rateofreturn20174' ,'epsg20181' ,'seg20174' ,'business_income20172' ,'esp20171']]
119 | 
120 | from sklearn.model_selection import cross_val_score
121 | from sklearn.datasets import make_blobs
122 | from sklearn.ensemble import RandomForestClassifier
123 | from sklearn.model_selection import train_test_split 
124 | from sklearn.metrics import classification_report   
125 | from sklearn import metrics
126 | 
127 | 
128 | X_train,X_test,y_train,y_test = train_test_split(
129 | features,target,test_size=0.25,random_state=42)
130 | 
131 | '''
132 | Random_forset 
133 | '''
134 | clf = RandomForestClassifier(n_estimators=200,max_depth = 7,min_samples_split = 5,min_samples_leaf = 20,random_state=2018,class_weight={1:1.5})
135 | 
136 | clf = clf.fit(X_train, y_train)
137 | y_pre = clf.predict(X_test)
138 | 
139 | y_pre_pro = clf.predict_proba(X_test)[:, 1]
140 | print(y_pre_pro)
141 | print(classification_report(y_test,y_pre))
142 | print(metrics.roc_auc_score(y_test,y_pre))  #预测Y值得分
143 | 
144 | def aucfun(act,pred):
145 |     fpr,tpr,thresholds = metrics.roc_curve(act,pred)
146 |     plt.plot(fpr, tpr, color='darkorange',lw=2)
147 |     plt.xlim([0.0, 1.0])
148 |     plt.ylim([0.0, 1.05])
149 |     plt.xlabel('False Positive Rate')
150 |     plt.ylabel('True Positive Rate')
151 | # =============================================================================
152 | #     print(fpr)
153 | #     print(tpr)
154 | #     print(thresholds)
155 | # =============================================================================
156 |     return metrics.auc(fpr,tpr)
157 | 
158 | 
159 | aucfun(y_test,y_pre_pro)
160 | 
161 | 
162 | importances = clf.feature_importances_
163 | std = np.std([tree.feature_importances_ for tree in clf.estimators_],axis=0)
164 | indices = np.argsort(importances)[::-1]
165 | print("Feature ranking:")
166 | for f in range(features.shape[1]):
167 |     print("%d. feature %d (%f): %s" % (f + 1, indices[f], importances[indices[f]] , features.columns[indices[f]] ))
168 | 
169 | 
170 | 
171 | y_pre_pro_f = clf.predict_proba(features)[:, 1]
172 | 
173 | y_pre_pro_f = pd.DataFrame({'code' : code,
174 |                             'name' : name,
175 |                    'gailv' : y_pre_pro_f
176 |                    })
177 |     
178 | y_pre_pro_f.to_csv('D:/999github/anack/M1809/y_pre_pro_f.csv',index =False)


--------------------------------------------------------------------------------
/M1809/doc/xst/M1809_finance_crawling.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | '''
  3 | 类名：M1809_finance_crawling（财务分析数据爬虫）
  4 | 作者：徐抒田
  5 | 日期：2018-5-20
  6 | 描述：
  7 | 1、获取财务数据模块，存入本地CSV;
  8 | 2、后续盈利表里增加：市盈率、市净率、PEG；现金流量增加：当期现金流入；
  9 | 3、后续版本增加表入库，增加数据更新，模块化
 10 | 方法：使用TUSHARE
 11 | 版本号：V0.1
 12 | '''
 13 | import pandas as pd
 14 | import tushare as ts
 15 | '''
 16 | 盈利能力表
 17 | 
 18 | code,代码
 19 | name,名称
 20 | roe,净资产收益率(%)
 21 | net_profit_ratio,净利率(%)
 22 | gross_profit_rate,毛利率(%)
 23 | net_profits,净利润(万元)
 24 | esp,每股收益
 25 | business_income,营业收入(百万元)
 26 | bips,每股主营业务收入(元)
 27 | season,年+季度
 28 | '''
 29 | result_yinli = ts.get_profit_data(2018,1).loc[:,['code','name']]
 30 | print(result_yinli)
 31 | for i in [2016,2017,2018]:
 32 |     if i != 2018:
 33 |         for j in [1,2,3,4]:
 34 |             columns = ['code','name','roe'+str(i)+str(j),'net_profit_ratio'+str(i)+str(j),'gross_profit_rate'+str(i)+str(j),'net_profits'+str(i)+str(j),'esp'+str(i)+str(j),'business_income'+str(i)+str(j),'bips'+str(i)+str(j)]
 35 |             result_1 = pd.DataFrame(ts.get_profit_data(i,j).values,columns = columns)
 36 |             result_1 = result_1.drop(['name'],axis=1)
 37 |             result_yinli = pd.merge(result_yinli, result_1, on='code',how='left')
 38 |             print(str(i)+str(j))
 39 |             print(len(result_yinli))
 40 |     if i == 2018:
 41 |         j = 1
 42 |         columns = ['code','name','roe'+str(i)+str(j),'net_profit_ratio'+str(i)+str(j),'gross_profit_rate'+str(i)+str(j),'net_profits'+str(i)+str(j),'esp'+str(i)+str(j),'business_income'+str(i)+str(j),'bips'+str(i)+str(j)]
 43 |         result_1 = pd.DataFrame(ts.get_profit_data(i,j).values,columns = columns)    
 44 |         result_1 = result_1.drop(['name'],axis=1)
 45 |         result_yinli = pd.merge(result_yinli, result_1, on='code',how='left')
 46 |         print(str(i)+str(j))
 47 |         print(len(result_yinli))
 48 | result_yinli = result_yinli.drop_duplicates()        
 49 | result_yinli.to_csv('D:/999github/anack/M1809/result_yinli.csv',index =False)
 50 | '''
 51 | 营运能力表
 52 | 
 53 | code,代码
 54 | name,名称
 55 | arturnover,应收账款周转率(次)
 56 | arturndays,应收账款周转天数(天)
 57 | inventory_turnover,存货周转率(次)
 58 | inventory_days,存货周转天数(天)
 59 | currentasset_turnover,流动资产周转率(次)
 60 | currentasset_days,流动资产周转天数(天)
 61 | season,年+季度
 62 | '''
 63 | result_yingyun = ts.get_operation_data(2018,1).loc[:,['code','name']]
 64 | for i in [2016,2017,2018]:
 65 |     if i != 2018:
 66 |         for j in [1,2,3,4]:
 67 |             columns = ['code','name','arturnover'+str(i)+str(j),'arturndays'+str(i)+str(j),'inventory_turnover'+str(i)+str(j),'inventory_days'+str(i)+str(j),'currentasset_turnover'+str(i)+str(j),'currentasset_days'+str(i)+str(j)]
 68 |             result_1 = pd.DataFrame(ts.get_operation_data(i,j).values,columns = columns)
 69 |             result_1 = result_1.drop(['name'],axis=1)
 70 |             result_yingyun = pd.merge(result_yingyun, result_1, on='code',how='left')
 71 |     if i == 2018:
 72 |         j = 1
 73 |         columns = ['code','name','arturnover'+str(i)+str(j),'arturndays'+str(i)+str(j),'inventory_turnover'+str(i)+str(j),'inventory_days'+str(i)+str(j),'currentasset_turnover'+str(i)+str(j),'currentasset_days'+str(i)+str(j)]
 74 |         result_1 = pd.DataFrame(ts.get_operation_data(i,j).values,columns = columns)
 75 |         result_1 = result_1.drop(['name'],axis=1)
 76 |         result_yingyun = pd.merge(result_yingyun, result_1, on='code',how='left')
 77 | result_yingyun = result_yingyun.drop_duplicates()
 78 | result_yingyun.to_csv('D:/999github/anack/M1809/result_yingyun.csv',index =False)
 79 | '''
 80 | 成长能力表
 81 | 
 82 | code,代码
 83 | name,名称
 84 | mbrg,主营业务收入增长率(%)
 85 | nprg,净利润增长率(%)
 86 | nav,净资产增长率
 87 | targ,总资产增长率
 88 | epsg,每股收益增长率
 89 | seg,股东权益增长率
 90 | season,年+季度
 91 | '''
 92 | result_chengzhang = ts.get_growth_data(2018,1).loc[:,['code','name']]
 93 | for i in [2016,2017,2018]:
 94 |     if i != 2018:
 95 |         for j in [1,2,3,4]:
 96 |             columns = ['code','name','mbrg'+str(i)+str(j),'nprg'+str(i)+str(j),'nav'+str(i)+str(j),'targ'+str(i)+str(j),'epsg'+str(i)+str(j),'seg'+str(i)+str(j)]
 97 |             result_1 = pd.DataFrame(ts.get_growth_data(i,j).values,columns = columns)
 98 |             result_1 = result_1.drop(['name'],axis=1)
 99 |             result_chengzhang = pd.merge(result_chengzhang, result_1, on='code',how='left')
100 |     if i == 2018:
101 |         j = 1
102 |         columns = ['code','name','mbrg'+str(i)+str(j),'nprg'+str(i)+str(j),'nav'+str(i)+str(j),'targ'+str(i)+str(j),'epsg'+str(i)+str(j),'seg'+str(i)+str(j)]
103 |         result_1 = pd.DataFrame(ts.get_growth_data(i,j).values,columns = columns)
104 |         result_1 = result_1.drop(['name'],axis=1)
105 |         result_chengzhang = pd.merge(result_chengzhang, result_1, on='code',how='left')
106 | result_chengzhang = result_chengzhang.drop_duplicates()
107 | result_chengzhang.to_csv('D:/999github/anack/M1809/result_chengzhang.csv',index =False)
108 | '''
109 | 偿债能力表
110 | 
111 | code,代码
112 | name,名称
113 | currentratio,流动比率
114 | quickratio,速动比率
115 | cashratio,现金比率
116 | icratio,利息支付倍数
117 | sheqratio,股东权益比率
118 | adratio,股东权益增长率
119 | season,年+季度
120 | '''
121 | result_changzhai = ts.get_debtpaying_data(2018,1).loc[:,['code','name']]
122 | for i in [2016,2017,2018]:
123 |     if i != 2018:
124 |         for j in [1,2,3,4]:
125 |             columns = ['code','name','currentratio'+str(i)+str(j),'quickratio'+str(i)+str(j),'cashratio'+str(i)+str(j),'icratio'+str(i)+str(j),'sheqratio'+str(i)+str(j),'adratio'+str(i)+str(j)]
126 |             result_1 = pd.DataFrame(ts.get_debtpaying_data(i,j).values,columns = columns)
127 |             result_1 = result_1.drop(['name'],axis=1)
128 |             result_changzhai = pd.merge(result_changzhai, result_1, on='code',how='left')
129 |     if i == 2018:
130 |         j = 1
131 |         columns = ['code','name','currentratio'+str(i)+str(j),'quickratio'+str(i)+str(j),'cashratio'+str(i)+str(j),'icratio'+str(i)+str(j),'sheqratio'+str(i)+str(j),'adratio'+str(i)+str(j)]
132 |         result_1 = pd.DataFrame(ts.get_debtpaying_data(i,j).values,columns = columns)
133 |         result_1 = result_1.drop(['name'],axis=1)
134 |         result_changzhai = pd.merge(result_changzhai, result_1, on='code',how='left')
135 | result_changzhai = result_changzhai.drop_duplicates()
136 | result_changzhai.to_csv('D:/999github/anack/M1809/result_changzhai.csv',index =False)
137 | '''
138 | 现金流量表
139 | 
140 | code,代码
141 | name,名称
142 | cf_sales,经营现金净流量对销售收入比率
143 | rateofreturn,资产的经营现金流量回报率
144 | cf_nm,经营现金净流量与净利润的比率
145 | cf_liabilities,经营现金净流量对负债比率
146 | cashflowratio,现金流量比率
147 | season,年+季度
148 | '''
149 | result_xianjin = ts.get_cashflow_data(2018,1).loc[:,['code','name']]
150 | for i in [2016,2017,2018]:
151 |     if i != 2018:
152 |         for j in [1,2,3,4]:
153 |             columns = ['code','name','cf_sales'+str(i)+str(j),'rateofreturn'+str(i)+str(j),'cf_nm'+str(i)+str(j),'cf_liabilities'+str(i)+str(j),'cashflowratio'+str(i)+str(j)]
154 |             result_1 = pd.DataFrame(ts.get_cashflow_data(i,j).values,columns = columns)
155 |             result_1 = result_1.drop(['name'],axis=1)
156 |             result_xianjin = pd.merge(result_xianjin, result_1, on='code',how='left')
157 |     if i == 2018:
158 |         j = 1
159 |         columns = ['code','name','cf_sales'+str(i)+str(j),'rateofreturn'+str(i)+str(j),'cf_nm'+str(i)+str(j),'cf_liabilities'+str(i)+str(j),'cashflowratio'+str(i)+str(j)]
160 |         result_1 = pd.DataFrame(ts.get_cashflow_data(i,j).values,columns = columns)
161 |         result_1 = result_1.drop(['name'],axis=1)
162 |         result_xianjin = pd.merge(result_xianjin, result_1, on='code',how='left')
163 | result_xianjin = result_xianjin.drop_duplicates()
164 | result_xianjin.to_csv('D:/999github/anack/M1809/result_xianjin.csv',index =False)
165 | 
166 | 


--------------------------------------------------------------------------------
/M1809/doc/xst/M1809_finance_crawling_target.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | '''
 3 | 类名：M1809_finance_crawling_target（财务分析数据爬虫）
 4 | 作者：徐抒田
 5 | 日期：2018-5-28
 6 | 描述：
 7 | 1、筛选17年和18年复合增长率大于10%的股票作为TARGET
 8 | 2、选取close为当日的股价
 9 | 3、年股价计算（12个月均价）
10 | 版本号：V0.1
11 | '''
12 | 
13 | import tushare as ts
14 | import pandas as pd
15 | 
16 | stock_code_num = ts.get_profit_data(2018,1)['code'].tolist()
17 | 
18 | a=[]
19 | data = pd.DataFrame()
20 | for j in stock_code_num:
21 |     try:    
22 |         df = ts.get_k_data(j,ktype = 'M')[['date','close','code']]
23 |         for i in df['date']:
24 |             a.append(i[0:4])
25 |         df['date'] = a
26 |         a=[]
27 |         df = pd.concat([df[df.date == '2016'],df[df.date == '2017'],df[df.date == '2018']])
28 |         data = pd.concat([data,df])
29 |         print(j)
30 |     except:
31 |         stock_code_num.remove(j)
32 |         print('error!')
33 |         print(j)
34 |     
35 | data_mean = data.pivot_table('close',index='code',columns=['date'],aggfunc='mean',fill_value=0)  
36 | data_var = data.pivot_table('close',index='code',columns=['date'],aggfunc='std',fill_value=0)  
37 | 
38 | 
39 | df_final = pd.DataFrame({'code' : data_mean.index,
40 |                    'yiliu_mean' : data_mean['2016'],
41 |                    'yiqi_mean' : data_mean['2017'],
42 |                    'yiba_mean' : data_mean['2018'],
43 |                    'yiliu_var' : data_var['2016'],
44 |                    'yiqi_var' : data_var['2017'],
45 |                    'yiba_var' : data_var['2018']
46 |                    })
47 |     
48 | df_final = df_final[df_final.yiliu_mean != 0]
49 | df_final['firstincrase'] = (df_final['yiqi_mean'] - df_final['yiliu_mean'])/df_final['yiliu_mean']
50 | df_final['secondincrase'] = (df_final['yiba_mean'] - df_final['yiqi_mean'])/df_final['yiba_mean']
51 | 
52 | df_final[(df_final.firstincrase > 0.1) & (df_final.secondincrase > 0.1)& (df_final.yiliu_var < 15)& (df_final.yiqi_var < 15)& (df_final.yiba_var < 15)]
53 | df_final.to_csv('D:/999github/anack/M1809/target.csv',index =False)
54 | 


--------------------------------------------------------------------------------
/M1809/doc/表头说明/balance_columns.txt:
--------------------------------------------------------------------------------
 1 | 货币资金	h1
 2 | 交易性金融资产	h2
 3 | 衍生金融资产	h3
 4 | 应收票据	h4
 5 | 应收账款	h5
 6 | 预付款项	h6
 7 | 应收利息	h7
 8 | 应收股利	h8
 9 | 其他应收款	h9
10 | 买入返售金融资产	h10
11 | 存货	h11
12 | 划分为持有待售的资产	h12
13 | 一年内到期的非流动资产	h13
14 | 待摊费用	h14
15 | 待处理流动资产损益	h15
16 | 其他流动资产	h16
17 | 流动资产合计	h17
18 | 发放贷款及垫款	h18
19 | 可供出售金融资产	h19
20 | 持有至到期投资	h20
21 | 长期应收款	h21
22 | 长期股权投资	h22
23 | 投资性房地产	h23
24 | 固定资产净额	h24
25 | 在建工程	h25
26 | 工程物资	h26
27 | 固定资产清理	h27
28 | 生产性生物资产	h28
29 | 公益性生物资产	h29
30 | 油气资产	h30
31 | 无形资产	h31
32 | 开发支出	h32
33 | 商誉	h33
34 | 长期待摊费用	h34
35 | 递延所得税资产	h35
36 | 其他非流动资产	h36
37 | 非流动资产合计	h37
38 | 资产总计	h38
39 | 短期借款	h39
40 | 交易性金融负债	h40
41 | 应付票据	h41
42 | 应付账款	h42
43 | 预收款项	h43
44 | 应付手续费及佣金	h44
45 | 应付职工薪酬	h45
46 | 应交税费	h46
47 | 应付利息	h47
48 | 应付股利	h48
49 | 其他应付款	h49
50 | 预提费用	h50
51 | 一年内的递延收益	h51
52 | 应付短期债券	h52
53 | 一年内到期的非流动负债	h53
54 | 其他流动负债	h54
55 | 流动负债合计	h55
56 | 长期借款	h56
57 | 应付债券	h57
58 | 长期应付款	h58
59 | 长期应付职工薪酬	h59
60 | 专项应付款	h60
61 | 预计非流动负债	h61
62 | 递延所得税负债	h62
63 | 长期递延收益	h63
64 | 其他非流动负债	h64
65 | 非流动负债合计	h65
66 | 负债合计	h66
67 | 实收资本(或股本)	h67
68 | 资本公积	h68
69 | 减：库存股	h69
70 | 其他综合收益	h70
71 | 专项储备	h71
72 | 盈余公积	h72
73 | 一般风险准备	h73
74 | 未分配利润	h74
75 | 归属于母公司股东权益合计	h75
76 | 少数股东权益	h76
77 | 所有者权益(或股东权益)合计	h77
78 | 负债和所有者权益(或股东权益)总计	h78
79 | stock_code	h79
80 | Date	h80


--------------------------------------------------------------------------------
/M1809/doc/表头说明/cash_flow_columns.txt:
--------------------------------------------------------------------------------
 1 | 销售商品、提供劳务收到的现金	h1
 2 | 收到的税费返还	h2
 3 | 收到的其他与经营活动有关的现金	h3
 4 | 经营活动现金流入小计	h4
 5 | 购买商品、接受劳务支付的现金	h5
 6 | 支付给职工以及为职工支付的现金	h6
 7 | 支付的各项税费	h7
 8 | 支付的其他与经营活动有关的现金	h8
 9 | 经营活动现金流出小计	h9
10 | 经营活动产生的现金流量净额	h10
11 | 收回投资所收到的现金	h11
12 | 取得投资收益所收到的现金	h12
13 | 处置固定资产、无形资产和其他长期资产所收回的现金净额	h13
14 | 处置子公司及其他营业单位收到的现金净额	h14
15 | 收到的其他与投资活动有关的现金	h15
16 | 投资活动现金流入小计	h16
17 | 购建固定资产、无形资产和其他长期资产所支付的现金	h17
18 | 投资所支付的现金	h18
19 | 取得子公司及其他营业单位支付的现金净额	h19
20 | 支付的其他与投资活动有关的现金	h20
21 | 投资活动现金流出小计	h21
22 | 投资活动产生的现金流量净额	h22
23 | 吸收投资收到的现金	h23
24 | 其中：子公司吸收少数股东投资收到的现金	h24
25 | 取得借款收到的现金	h25
26 | 发行债券收到的现金	h26
27 | 收到其他与筹资活动有关的现金	h27
28 | 筹资活动现金流入小计	h28
29 | 偿还债务支付的现金	h29
30 | 分配股利、利润或偿付利息所支付的现金	h30
31 | 其中：子公司支付给少数股东的股利、利润	h31
32 | 支付其他与筹资活动有关的现金	h32
33 | 筹资活动现金流出小计	h33
34 | 筹资活动产生的现金流量净额	h34
35 | 四、汇率变动对现金及现金等价物的影响	h35
36 | 五、现金及现金等价物净增加额	h36
37 | 加:期初现金及现金等价物余额	h37
38 | 六、期末现金及现金等价物余额	h38
39 | 净利润	h39
40 | 少数股东权益	h40
41 | 未确认的投资损失	h41
42 | 资产减值准备	h42
43 | 固定资产折旧、油气资产折耗、生产性物资折旧	h43
44 | 无形资产摊销	h44
45 | 长期待摊费用摊销	h45
46 | 待摊费用的减少	h46
47 | 预提费用的增加	h47
48 | 处置固定资产、无形资产和其他长期资产的损失	h48
49 | 固定资产报废损失	h49
50 | 公允价值变动损失	h50
51 | 递延收益增加（减：减少）	h51
52 | 预计负债	h52
53 | 财务费用	h53
54 | 投资损失	h54
55 | 递延所得税资产减少	h55
56 | 递延所得税负债增加	h56
57 | 存货的减少	h57
58 | 经营性应收项目的减少	h58
59 | 经营性应付项目的增加	h59
60 | 已完工尚未结算款的减少(减:增加)	h60
61 | 已结算尚未完工款的增加(减:减少)	h61
62 | 其他	h62
63 | 经营活动产生现金流量净额	h63
64 | 债务转为资本	h64
65 | 一年内到期的可转换公司债券	h65
66 | 融资租入固定资产	h66
67 | 现金的期末余额	h67
68 | 现金的期初余额	h68
69 | 现金等价物的期末余额	h69
70 | 现金等价物的期初余额	h70
71 | 现金及现金等价物的净增加额	h71
72 | stock_code	h72
73 | Date	h73


--------------------------------------------------------------------------------
/M1809/doc/表头说明/profit_columns:
--------------------------------------------------------------------------------
 1 | 一、营业总收入	h1
 2 | 营业收入	h2
 3 | 二、营业总成本	h3
 4 | 营业成本	h4
 5 | 营业税金及附加	h5
 6 | 销售费用	h6
 7 | 管理费用	h7
 8 | 财务费用	h8
 9 | 资产减值损失	h9
10 | 公允价值变动收益	h10
11 | 投资收益	h11
12 | 其中:对联营企业和合营企业的投资收益	h12
13 | 汇兑收益	h13
14 | 三、营业利润	h14
15 | 加:营业外收入	h15
16 | 减：营业外支出	h16
17 | 其中：非流动资产处置损失	h17
18 | 四、利润总额	h18
19 | 减：所得税费用	h19
20 | 五、净利润	h20
21 | 归属于母公司所有者的净利润	h21
22 | 少数股东损益	h22
23 | 基本每股收益(元/股)	h23
24 | 稀释每股收益(元/股)	h24
25 | 七、其他综合收益	h25
26 | 八、综合收益总额	h26
27 | 归属于母公司所有者的综合收益总额	h27
28 | 归属于少数股东的综合收益总额	h28
29 | stock_code	h29
30 | Date	h30
31 | 


--------------------------------------------------------------------------------
/M1809/doc/表头说明/profit_columns.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/表头说明/profit_columns.txt


--------------------------------------------------------------------------------
/M1809/doc/表头说明/开发建议.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/表头说明/开发建议.txt


--------------------------------------------------------------------------------
/M1809/doc/表头说明/新建 Microsoft Excel 工作表.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/doc/表头说明/新建 Microsoft Excel 工作表.xlsx


--------------------------------------------------------------------------------
/M1809/src/App.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Sep  1 19:35:20 2018
 4 | 
 5 | @author: yinchao
 6 | """
 7 | 
 8 | import UserApi
 9 | 
10 | 
11 | id_list = ['000651', '000333', '600690', '600522']
12 | if __name__ =='__main__':
13 |     UserApi.Init(id_list,'SQL')
14 |     UserApi.GetData('ON')
15 |     UserApi.Analyse()


--------------------------------------------------------------------------------
/M1809/src/Config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Jun 14 01:47:40 2018
  4 | 
  5 | @author: yinchao
  6 | """
  7 | import sys
  8 | sys.path.append('../..')
  9 | 
 10 | from datetime import datetime
 11 | import crawling_finance_table
 12 | import crawling_finance_table_v1_7
 13 | import pymysql
 14 | import os
 15 | def Connect_sql(account):
 16 |     conn = pymysql.connect(
 17 |             host = account[0].strip(),
 18 |             port = 3306,
 19 |             user = account[1].strip(),
 20 |             passwd = account[2].strip(),
 21 |             db = account[3].strip(),
 22 |             charset = "utf8"
 23 |             )
 24 |     
 25 |     cur = conn.cursor()
 26 | #    print(account)
 27 |     print("\nconnect to aliyun success!\n")
 28 |     return cur
 29 | 
 30 | 
 31 | 
 32 | 
 33 | global parameter
 34 | parameter = [
 35 |  '总资产',
 36 |  '净资产',
 37 |  '资产负债比',
 38 |  '流动资产',
 39 |  '一年内到期的长期负债',
 40 |  '应收账款',
 41 |  '预收账款',
 42 |  '存货',
 43 |  '营业收入',
 44 |  '营业成本',
 45 |  '营业税金及附加',
 46 |  '财务费用',
 47 |  '营业外收入',
 48 |  '净利润',
 49 |  '除非净利润',
 50 |  '每股收益',
 51 |  '经营净额',
 52 |  '投资净额',
 53 |  '筹资净额',
 54 |  '汇率影响',
 55 |  '现金净增加额',
 56 |  '期末现金余额',
 57 |  '流动比率',
 58 |  '资产周转率',
 59 |  '存货周转率',
 60 |  '溢价比',
 61 |  '市盈率',
 62 |  '市净率',
 63 |  '名义净资产收益率',
 64 |  '真实净资产收益率',
 65 |  '毛利率',
 66 |  '营收增长率',
 67 |  '除非净利润增长率',
 68 |  '股息率',
 69 |  '分红率']
 70 | 
 71 | global company_id_list
 72 | company_id_list = ['000651', '000333', '600690'] #此处可以修改
 73 | global data_base_path
 74 | data_base_path = '../history_data/'
 75 | 
 76 | global data_src
 77 | global cur
 78 | 
 79 | def M1809_config(company_list, mode = 'CSV'):
 80 |     '''
 81 |     本地模式配置
 82 |     只需要提供感兴趣的对比公司即可，如果只有一个，说明只进行自主分析
 83 |     '''
 84 |     global data_base_path
 85 |     global data_src
 86 |     global cur
 87 |     global parameter
 88 |     global company_id_list
 89 |     data_src = mode
 90 |     company_id_list = company_list
 91 |     
 92 |     print('please wait, check for updating...')
 93 |      
 94 |     try: #自动检查并创建文件夹
 95 |         os.mkdir('../history_data')
 96 |     except:
 97 |         pass 
 98 |     try: #自动检查并创建文件夹
 99 |         os.mkdir('../sys_config')
100 |     except:
101 |         pass 
102 |     try: #自动检查并创建文件夹
103 |         os.mkdir('../output')
104 |     except:
105 |         pass 
106 |     
107 |     if len(company_list) < 2:
108 |         print('最少需要输入2个id作为对比')
109 |         return
110 |     #此处增加id合法性检查
111 |     
112 |     
113 |     if data_src == 'SQL' or data_src == 'sql':
114 |         '''
115 |         网络模式配置
116 |         以读文件的方式获取配置参数
117 |         1. 读取待考察的参数
118 |         2. 读取公司名称列表，并转换成id（如果输入无法解析成id，会自动剔除）
119 |         3. 更新该公司的财务报表，以备以后使用
120 |         注意：文件名不可改
121 |         '''
122 |         try:
123 |             with open('../sys_config/account.cfg', 'r') as fh:
124 |                 account = fh.readlines()
125 |         except:
126 |             print('fail to initialize.')
127 |             return
128 |    
129 |         cur = Connect_sql(account)
130 | #        此处增加ID合法性检查
131 | #        id_list = []
132 | #        for name in company_id_list:
133 | #            cmd = "select * from anack_classify where name = \'"+name+"\';"
134 | #            cur.execute(cmd)
135 | #            result = cur.fetchall()
136 | #            try:
137 | #                id = result[0][0] 
138 | #                id_list.append(id)
139 | #                
140 | #            except: #错误的ID号不会被解析（刚上市的，不会出现在anack_classify里，需要更新）
141 | #                print(name+' is not in list')
142 | #                pass   
143 |         M1809_Update(cur, company_list)
144 |     
145 |     elif data_src == 'CSV' or data_src == 'csv':  
146 |         for item in company_list:
147 |             try:
148 |                 file_name = data_base_path + item + '_profit.csv'
149 |     #            print(file_name)
150 |                 with open(file_name, 'r') as fh:
151 |                     from datetime import datetime
152 |                     from dateutil.parser import parse
153 |                     from dateutil.relativedelta import relativedelta
154 |                     content = fh.readlines()
155 |                     s = content[-1].split(',')
156 |                     latest_record = parse(s[0]) #获取最新时间
157 |                     
158 |                     current_day = datetime.now() - relativedelta(months=+12) 
159 |                     if latest_record > current_day:
160 |                         pass
161 |                     else:
162 |                         cbfx = crawling_finance_table_v1_7.crawling_finance(data_base_path,item)
163 |                         cbfx.crawling_update()                    
164 |             except:
165 |                 cbfx = crawling_finance_table_v1_7.crawling_finance(data_base_path,item)
166 |                 cbfx.crawling_update() 
167 |     else:
168 |         print('模式设置错误，请二选一：CSV/SQL') 
169 |         
170 |     print('finish init!')
171 |     
172 | def M1809_Update(cur, id_list):
173 |     '''
174 |     更新数据库
175 |     '''
176 |     print('check for update,please wait...')
177 | #    print(id_list)
178 |     for item in id_list:
179 |         try:
180 |             
181 |             cmd = "select * from zichanfuzhai where h79 = \'" + item + "\' and h80 = \'" + str(datetime.now().year - 1)+"-12-31\';"
182 |             cur.execute(cmd)
183 |             result1 = cur.fetchall()
184 |         except:
185 |             print('updating ', item)
186 |             cbfx = crawling_finance_table.crawling_finance('',item,'')
187 |             cbfx.crawling_update()
188 |             continue
189 |         
190 |         try:
191 |             cmd2 = "select * from cashFlow where h72 = \'" + item + "\' and h73 = \'" + str(datetime.now().year - 1)+"-12-31\';"
192 |             cur.execute(cmd2)
193 |             result2 = cur.fetchall()
194 |         except:
195 |             print('updating ', item)
196 |             cbfx = crawling_finance_table.crawling_finance('',item,'')
197 |             cbfx.crawling_update()
198 |             continue
199 |             
200 |         try:
201 |             cmd3 = "select * from Profit where h29 = \'" + item + "\' and h30 = \'" + str(datetime.now().year - 1)+"-12-31\';"
202 |             cur.execute(cmd3)
203 |             result3 = cur.fetchall()
204 |             trash_data = result3[0] #获得资产负债表信息
205 |         except:
206 |             print('updating ', item)
207 |             cbfx = crawling_finance_table.crawling_finance('',item,'')
208 |             cbfx.crawling_update()
209 |             continue
210 |         
211 |     print('update check finished!') 
212 | 
213 | #############################################################################
214 | if __name__ =='__main__':
215 |     id_list = ['000651', '000333', '600690', '600522']
216 |     #网络测试
217 |     M1809_config(id_list, 'SQL')     
218 |     
219 |     #本地测试
220 | #    M1809_config(id_list, 'CSV')
221 |         


--------------------------------------------------------------------------------
/M1809/src/M1809_finance_weight.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | '''
  3 | 类名：M1809_finance_weight
  4 | 作者：徐抒田
  5 | 日期：2018-7-9
  6 | 描述：
  7 | 机器学习方法确定,权重()
  8 | 遗留问题：
  9 | 1、资产负债比,营业税增长率,营业现金增长率,现金增长净额,期末现金字段待解决
 10 | 2、从库读文件待解决
 11 | 版本号：V0.1
 12 | '''
 13 | 
 14 | 
 15 | import pandas as pd
 16 | import numpy as np
 17 | import matplotlib.pyplot as plt
 18 | from sklearn import preprocessing
 19 | from sklearn.model_selection import cross_val_score
 20 | from sklearn.datasets import make_blobs
 21 | from sklearn.ensemble import RandomForestClassifier
 22 | from sklearn.model_selection import train_test_split 
 23 | from sklearn.metrics import classification_report   
 24 | from sklearn import metrics
 25 | import lightgbm as lgb
 26 | 
 27 | 
 28 | '''
 29 | 修改成从数据库中读取文件
 30 | '''
 31 | result_yinli = pd.read_csv('D:/999github/anack/M1809/xst/result_yinli.csv')
 32 | result_yingyun = pd.read_csv('D:/999github/anack/M1809/xst/result_yingyun.csv')
 33 | result_chengzhang = pd.read_csv('D:/999github/anack/M1809/xst/result_chengzhang.csv')
 34 | result_changzhai = pd.read_csv('D:/999github/anack/M1809/xst/result_changzhai.csv')
 35 | result_xianjin = pd.read_csv('D:/999github/anack/M1809/xst/result_xianjin.csv')
 36 | 
 37 | df_final = pd.read_csv('D:/999github/anack/M1809/xst/target.csv')
 38 | 
 39 | df_final = df_final[(df_final.firstincrase > 0.1) & (df_final.secondincrase > 0.1)]
 40 | df_final = pd.DataFrame({'code' : df_final['code'],
 41 |                    'label' : 1,
 42 |                    })
 43 | 
 44 | 
 45 | data = result_yinli
 46 | data = pd.merge(data, result_yingyun, on=['code','name'])
 47 | data = pd.merge(data, result_chengzhang, on=['code','name'])
 48 | data = pd.merge(data, result_changzhai, on=['code','name'])
 49 | data = pd.merge(data, result_xianjin, on=['code','name'])
 50 | data = pd.merge(data, df_final, on='code',how = 'left')
 51 | 
 52 | # =============================================================================
 53 | # null_counts = data.isnull().sum()
 54 | # print(null_counts)
 55 | # =============================================================================
 56 | 
 57 | data = data.fillna(0)
 58 | data = data.dropna(axis=0)
 59 | 
 60 | 
 61 | 
 62 | 
 63 | orig_columns = data.columns
 64 | drop_columns = []
 65 | for col in orig_columns:
 66 |     col_series = data[col].dropna().unique()
 67 |     if len(col_series) == 1:
 68 |         drop_columns.append(col)
 69 | data = data.drop(drop_columns, axis=1)
 70 | print(drop_columns)
 71 | 
 72 | 
 73 | target = data['label']
 74 | code = data['code']
 75 | name = data['name']
 76 | features = data.drop(['code','name','label'],axis=1)
 77 | 
 78 | features[features.currentratio20161 == '--'] = 0
 79 | features[features.quickratio20161=='--']=0
 80 | features[features.cashratio20161=='--']=0
 81 | features[features.icratio20161=='--']=0
 82 | features[features.sheqratio20161=='--']=0
 83 | features[features.adratio20161=='--']=0
 84 | features[features.currentratio20162=='--']=0
 85 | features[features.quickratio20162=='--']=0
 86 | features[features.cashratio20162=='--']=0
 87 | features[features.icratio20162=='--']=0
 88 | features[features.sheqratio20162=='--']=0
 89 | features[features.adratio20162=='--']=0
 90 | features[features.currentratio20163=='--']=0
 91 | features[features.quickratio20163=='--']=0
 92 | features[features.cashratio20163=='--']=0
 93 | features[features.icratio20163=='--']=0
 94 | features[features.sheqratio20163=='--']=0
 95 | features[features.adratio20163=='--']=0
 96 | features[features.currentratio20164=='--']=0
 97 | features[features.quickratio20164=='--']=0
 98 | features[features.cashratio20164=='--']=0
 99 | features[features.icratio20164=='--']=0
100 | features[features.currentratio20171=='--']=0
101 | features[features.quickratio20171=='--']=0
102 | features[features.cashratio20171=='--']=0
103 | features[features.icratio20171=='--']=0
104 | features[features.sheqratio20171=='--']=0
105 | features[features.adratio20171=='--']=0
106 | features[features.currentratio20172=='--']=0
107 | features[features.quickratio20172=='--']=0
108 | features[features.cashratio20172=='--']=0
109 | features[features.icratio20172=='--']=0
110 | features[features.currentratio20173=='--']=0
111 | features[features.quickratio20173=='--']=0
112 | features[features.cashratio20173=='--']=0
113 | features[features.icratio20173=='--']=0
114 | features[features.currentratio20174=='--']=0
115 | features[features.quickratio20174=='--']=0
116 | features[features.cashratio20174=='--']=0
117 | features[features.icratio20174=='--']=0
118 | features[features.currentratio20181=='--']=0
119 | features[features.quickratio20181=='--']=0
120 | features[features.cashratio20181=='--']=0
121 | features[features.icratio20181=='--']=0
122 | features = features.astype('float64')
123 | 
124 | 
125 | 
126 | 
127 | 
128 | ##基于树的方法不用做标准化、归一化处理
129 | 
130 | 
131 | '''
132 | 资产负债比,营业税增长率,营业现金增长率,现金增长净额,期末现金
133 | '''
134 | features = features[['targ20174','nav20174','gross_profit_rate20174','cashflowratio20174','net_profit_ratio20174','mbrg20174','currentratio20174','currentasset_turnover20174','inventory_days20174']]
135 | 
136 | 
137 | 
138 | def aucfun(act,pred):
139 |     fpr,tpr,thresholds = metrics.roc_curve(act,pred)
140 |     plt.plot(fpr, tpr, color='darkorange',lw=2)
141 |     plt.xlim([0.0, 1.0])
142 |     plt.ylim([0.0, 1.05])
143 |     plt.xlabel('False Positive Rate')
144 |     plt.ylabel('True Positive Rate')
145 | # =============================================================================
146 | #     print(fpr)
147 | #     print(tpr)
148 | #     print(thresholds)
149 | # =============================================================================
150 |     return metrics.auc(fpr,tpr)
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | def ml_for_weight(features,target):
158 |     
159 |     
160 |     min_max_scaler = preprocessing.MinMaxScaler()
161 |     features_new = min_max_scaler.fit_transform(features)
162 |     features = pd.DataFrame(features_new, columns=features.columns)  
163 |     X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.25,random_state=42)
164 | 
165 |     '''
166 |     调参
167 |     '''
168 |     clf = lgb.LGBMClassifier(
169 |         boosting_type='gbdt', num_leaves=31, reg_alpha=0, reg_lambda=1,
170 |         max_depth=-1, n_estimators=800, objective='binary',
171 |         subsample=0.7, colsample_bytree=0.7, subsample_freq=2,
172 |         learning_rate=0.05, min_child_weight=20, random_state=2018, n_jobs=-1,class_weight = 'balanced'
173 |     )
174 | 
175 |     clf = clf.fit(X_train, y_train, eval_set=[(X_train, y_train),(X_test, y_test)], eval_names = ['train','test'],eval_metric='auc',early_stopping_rounds=100) 
176 | 
177 |     y_pre = clf.predict(X_test)
178 | 
179 |     y_pre_pro = clf.predict_proba(X_test)[:, 1]
180 | # =============================================================================
181 | # print(y_pre_pro)
182 | # =============================================================================
183 |     print(classification_report(y_test,y_pre))
184 |     print(metrics.roc_auc_score(y_test,y_pre_pro))  #预测Y值得分
185 |     aucfun(y_test,y_pre_pro)
186 | 
187 |     importances = clf.feature_importances_
188 |     indices = np.argsort(importances)[::-1]
189 |     print("Feature ranking:")
190 |     for f in range(features.shape[1]):
191 |         print("%d. feature %d (%f): %s" % (f + 1, indices[f], importances[indices[f]] , features.columns[indices[f]] ))
192 | 
193 |         
194 |     return features.columns,importances
195 | 
196 | 
197 | a,b = ml_for_weight(features,target)
198 | 
199 | 
200 | 
201 | # =============================================================================
202 | # y_pre_pro_f = clf.predict_proba(features)[:, 1]
203 | # 
204 | # y_pre_pro_f = pd.DataFrame({'code' : code,
205 | #                             'name' : name,
206 | #                    'gailv' : y_pre_pro_f
207 | #                    })
208 | #     
209 | # y_pre_pro_f.to_csv('D:/999github/anack/M1809/y_pre_pro_f.csv',index =False)
210 | # =============================================================================
211 | 


--------------------------------------------------------------------------------
/M1809/src/PlotAnalyse.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue July 24 08:26:21 2018
  4 | 
  5 | @author: guqiuyang
  6 | """
  7 | 
  8 | import numpy as np
  9 | import CoreAnalyse
 10 | from matplotlib import pyplot as plt
 11 | 
 12 | 
 13 | # 绘制分组柱状图的函数
 14 | def groupedbarplot(ax, x_data, x_data_name, y_data_list, y_data_names, colors, x_label, y_label, title):
 15 |     '''
 16 |     绘制输出报告个分析指标的柱状图
 17 |     '''
 18 |     # 设置每一组柱状图的宽度
 19 |     total_width = 0.8
 20 |     # 设置每一个柱状图的宽度
 21 |     ind_width = total_width / len(y_data_list)
 22 |     # 计算每一个柱状图的中心偏移
 23 |     alteration = np.arange(-total_width / 2 + ind_width / 2, total_width / 2 + ind_width / 2, ind_width)
 24 | 
 25 |     # 分别绘制每一个柱状图
 26 |     for i in range(0, len(y_data_list)):
 27 |         # 横向散开绘制
 28 |         ax.bar(x_data + alteration[i], y_data_list[i], color=colors[i], label=y_data_names[i], width=ind_width)
 29 |     ax.set_ylabel(y_label)
 30 |     ax.set_xlabel(x_label)
 31 |     ax.set_xticks(x_data)
 32 |     ax.set_xticklabels(x_data_name)
 33 |     ax.set_title(title)
 34 |     ax.legend(loc='upper right')
 35 | 
 36 | 
 37 | # 4. 绘图分析
 38 | def PlotAnalyse(data):
 39 |     '''
 40 |     个股纵向对比绘图逻辑
 41 |     '''
 42 |     # 设置图片尺寸 20" x 15"
 43 |     plt.rc('figure', figsize=(14, 14))
 44 |     # 设置字体 14
 45 |     plt.rc('font', size=14)
 46 |     # 不显示网格
 47 |     plt.rc('axes', grid=False)
 48 |     # 设置背景颜色是白色
 49 |     plt.rc('axes', facecolor='white')
 50 |     # 显示中文标签
 51 |     plt.rcParams['font.sans-serif'] = ['SimHei']
 52 |     # 正常显示正负号
 53 |     plt.rcParams['axes.unicode_minus'] = False
 54 | 
 55 |     # 资产水平分析
 56 |     avg, last, level = CoreAnalyse.GetGrowth(data, 0)  # 总资产_复合增长率
 57 |     avg_, last_, level_ = CoreAnalyse.GetGrowth(data, 1)  # 净资产_复合增长率
 58 |     rate = CoreAnalyse.GetRate(data, 3, 0)  # 流动资产_总资产占比
 59 |     debt_avg, debt_last = CoreAnalyse.GetAverage(data, 2)  # 资产负债比_平均水平
 60 | 
 61 |     x1 = data.iloc[:, [0]].index.tolist()
 62 |     x2 = np.arange(4)
 63 |     x2_data_name = ['总资产增长率', '净资产增长率', '流动资产占比', '资产负债比']
 64 |     y1 = data.iloc[:, [0, 1, 3]]
 65 |     y2 = [[avg, avg_, rate, debt_avg], [last, last_, 0, debt_last]]
 66 | 
 67 |     _, axs = plt.subplots(2, 1, figsize=(14, 14))
 68 |     axs[0].plot(x1, y1, 'o-')
 69 |     axs[0].set_title('体量')
 70 |     axs[0].set_ylabel('元')
 71 |     axs[0].set_xlabel('年份')
 72 |     axs[0].legend(loc='upper left')
 73 | 
 74 |     groupedbarplot(axs[1]
 75 |                    , x_data=x2
 76 |                    , x_data_name=x2_data_name
 77 |                    , y_data_list=y2
 78 |                    , y_data_names=['长期', '去年']
 79 |                    , colors=['#539caf', '#7663b0']
 80 |                    , x_label='数据指标'
 81 |                    , y_label='增幅比例'
 82 |                    , title='资产水平分析')
 83 | 
 84 |     # 经营质量分析
 85 |     avg1, last1, _ = CoreAnalyse.GetGrowth(data, 8)  # 营业收入_复合增长率
 86 |     avg2, last2 = CoreAnalyse.GetAverage(data, 30)  # 毛利率
 87 |     avg3, last3, _ = CoreAnalyse.GetGrowth(data, 14)  # 除非净利润
 88 |     avg4, last4, _ = CoreAnalyse.GetGrowth(data, 10)  # 营业税
 89 |     rate = CoreAnalyse.GetRate(data, 12, 8)  # 现金与净资产的占比关系
 90 |     avg5, last5 = CoreAnalyse.GetAverage(data, 33) #股息率
 91 |     avg6, last6 = CoreAnalyse.GetAverage(data, 34) #分红率
 92 | 
 93 |     x1 = np.arange(3)
 94 |     x1_data_name = ['现金/净资产', '股息率', '分红率']
 95 |     x2 = np.arange(4)
 96 |     x2_data_name = ['营收增长率', '毛利率', '除非净利润增长率', '营业税增长率']
 97 |     y1 = [[0, avg5, avg6], [rate, last5, last6]]
 98 |     y2 = [[avg1, avg2, avg3, avg4], [last1, last2, last3, last4]]
 99 | 
100 |     _, axs = plt.subplots(2, 1, figsize=(14, 14))
101 |     groupedbarplot(axs[0]
102 |                    , x_data=x1
103 |                    , x_data_name=x1_data_name
104 |                    , y_data_list=y1
105 |                    , y_data_names=['长期', '去年']
106 |                    , colors=['#539caf', '#7663b0']
107 |                    , x_label='数据指标'
108 |                    , y_label='增幅比例'
109 |                    , title='经营质量分析')
110 | 
111 |     groupedbarplot(axs[1]
112 |                    , x_data=x2
113 |                    , x_data_name=x2_data_name
114 |                    , y_data_list=y2
115 |                    , y_data_names=['长期', '去年']
116 |                    , colors=['#539caf', '#7663b0']
117 |                    , x_label='数据指标'
118 |                    , y_label='增幅比例'
119 |                    , title='经营质量分析')
120 | 
121 |     # 现金流分析
122 |     avg1, last1, _ = CoreAnalyse.GetGrowth(data, 16)  # 营业现金
123 |     avg2, last2, _ = CoreAnalyse.GetGrowth(data, 20)  # 增加的现金
124 |     avg3, last3, _ = CoreAnalyse.GetGrowth(data, 21)  # 期末现金
125 |     rate = CoreAnalyse.GetRate(data, 21, 1)  # 现金与净资产的占比关系
126 | 
127 |     x1 = np.arange(4)
128 |     x1_data_name = ['营业现金增长率', '现金增长净额', '期末现金', '现金与净资产的占比']
129 |     y1 = [[avg1, avg2, avg3, 0], [last1, last2, last3, rate]]
130 | 
131 |     _, axs = plt.subplots(1, 1, figsize=(10, 7))
132 |     groupedbarplot(axs
133 |                    , x_data=x1
134 |                    , x_data_name=x1_data_name
135 |                    , y_data_list=y1
136 |                    , y_data_names=['长期', '去年']
137 |                    , colors=['#539caf', '#7663b0']
138 |                    , x_label='数据指标'
139 |                    , y_label='增幅比例'
140 |                    , title='现金流分析')
141 | 
142 |     # 4.营运质量分析
143 |     avg1, last1 = CoreAnalyse.GetAverage(data, 22)  # 流动比率
144 |     avg2, last2 = CoreAnalyse.GetAverage(data, 23)  # 资产周转率
145 |     avg3, last3 = CoreAnalyse.GetAverage(data, 24)  # 存货周转率
146 | 
147 |     x1 = np.arange(3)
148 |     x1_data_name = ['流动比率', '资产周转率', '存货周转率']
149 |     y1 = [[avg1, avg2, avg3], [last1, last2, last3]]
150 | 
151 |     _, axs = plt.subplots(1, 1, figsize=(10, 7))
152 |     groupedbarplot(axs
153 |                    , x_data=x1
154 |                    , x_data_name=x1_data_name
155 |                    , y_data_list=y1
156 |                    , y_data_names=['长期', '去年']
157 |                    , colors=['#539caf', '#7663b0']
158 |                    , x_label='数据指标'
159 |                    , y_label='增幅比例'
160 |                    , title='营运参数分析')
161 |     plt.show()


--------------------------------------------------------------------------------
/M1809/src/UserApi.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Sep  1 19:22:58 2018
 4 | 
 5 | @author: yinchao
 6 | """
 7 | import PlotAnalyse
 8 | import CoreAnalyse
 9 | import Config
10 | 
11 | global self_result
12 | global compare_result
13 | 
14 | def Init(company_id_list, data_src = 'CSV'):
15 |     '''
16 |     初始化配置函数
17 |     company_id_list:待考察的id列表（1个到n个，eg: ['000651','00124','600660']
18 |     data_src: 'SQL'数据来源是数据库， 'CSV'数据来源是读文件
19 |     '''
20 |     Config.M1809_config(company_id_list, data_src)
21 | 
22 | def GetData(file_switch = 'ON'):
23 |     '''
24 |     获取财务原始数据
25 |     file_switch: 'ON'结果输出到文本（默认） 'OFF'原始结果不输出
26 |     返回值： a->自身对比原始结果 b->同行业对比结果（归一化处理）
27 |     备注：a,b两个返回值原封不动交给Analyse函数进行分析即可
28 |     '''
29 |     global self_result
30 |     global compare_result
31 |     
32 |     self_result = CoreAnalyse.Compare2Themself(Config.company_id_list[0])    #自身对比
33 |     b1= CoreAnalyse.Compare2Industry(Config.company_id_list)    #同行业对比
34 |     compare_result = CoreAnalyse.data_normalize(b1)  #归一化的同行业对比
35 |     if file_switch == 'ON':
36 |         self_result.to_csv('../output/compare_self.csv', encoding= 'gbk') 
37 |         b1.to_csv('../output/compare_industry.csv', encoding = 'gbk')
38 |         compare_result.to_csv('../output/normalize.csv', encoding = 'gbk') 
39 | 
40 |     return self_result, compare_result
41 | 
42 | def Analyse():
43 |     '''
44 |     对比分析，并输出
45 |     1. ../output/文件夹下会生成诊断报告
46 |     2. 控制台输出对比图像（之后可以考虑保存图片）
47 |     '''
48 |     global self_result
49 |     global compare_result  
50 |     CoreAnalyse.Analyse(self_result, compare_result)
51 |     PlotAnalyse.PlotAnalyse(self_result)
52 |     
53 |     
54 | if __name__ =='__main__':
55 |     id_list = ['000651', '000333', '600690']
56 |     para,company = Init(id_list,'CSV')
57 |     a = CoreAnalyse.Compare2Themself(company)


--------------------------------------------------------------------------------
/M1809/src/get_dividends_history.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Mar 26 21:29:43 2018
  4 | 
  5 | @author: 尹超
  6 | # 该模块用于获取指定个股的历史分红记录，以DataFrame形式给出
  7 | """
  8 | import pandas as pd
  9 | import requests
 10 | from requests.exceptions import RequestException
 11 | from bs4 import BeautifulSoup
 12 | 
 13 |  
 14 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'}
 15 | 
 16 | def get_one_page(url):
 17 |     try:
 18 |         response = requests.get(url,headers = headers)
 19 |         response.encoding = 'GB2312'
 20 |         if response.status_code == 200:
 21 |             return response.text
 22 |         return None
 23 |     except RequestException:
 24 |         return None
 25 | 
 26 | def parse(html):
 27 |     raw_data = []
 28 |     try:
 29 |         year_raw = []
 30 |         year = []
 31 |         bonus_share = []
 32 |         bonus_convert = []
 33 |         profit_send = []
 34 |         ex_rights = []
 35 |         register_day = []
 36 |         
 37 |         soup = BeautifulSoup(html,'html5lib')
 38 |         l = soup.select('table#sharebonus_1')
 39 |         ls = l[0].tbody
 40 |         lls = ls.select('td')
 41 |         for l in lls:
 42 |             if (l.get_text().strip()) != '预案' and \
 43 |             (l.get_text().strip()) != '实施' and \
 44 |             (l.get_text().strip()) != '不分配' and \
 45 |             (l.get_text().strip()) != '查看':
 46 |                 raw_data.append(l.get_text().strip())
 47 |         
 48 |         year_raw = raw_data[::7]
 49 | #        print(raw_data)        #出错的话请检查此处的输出
 50 | #        print(year_raw)        #出错的话请检查此处的输出
 51 |         for item in year_raw:
 52 |             a = pd.to_datetime(item).year - 1
 53 |             year.append(a)
 54 |         bonus_share = raw_data[1::7]
 55 |         bonus_convert = raw_data[2::7]
 56 |         profit_send = raw_data[3::7]
 57 |         ex_rights = raw_data[4::7]
 58 |         register_day = raw_data[5::7]
 59 | #        print(register_day)
 60 |         data = {'年度':year,
 61 |                 '送股':bonus_share,
 62 |                 '转股':bonus_convert,
 63 |                 '派息':profit_send,
 64 |                 '除权日':ex_rights,
 65 |                 '登记日':register_day
 66 |                 }
 67 |         frame = pd.DataFrame(data)
 68 |         return frame
 69 |     except:
 70 |         print('cannot parse this page')
 71 | 
 72 | def parse_single_year(html,Year):
 73 |     raw_data = []
 74 |     try:
 75 |         year_raw = []
 76 |         year = []
 77 |         bonus_share = []
 78 |         bonus_convert = []
 79 |         profit_send = []
 80 |         ex_rights = []
 81 |         register_day = []
 82 | #        print('it is ',Year)
 83 |         soup = BeautifulSoup(html,'html5lib')
 84 |         l = soup.select('table#sharebonus_1')
 85 |         ls = l[0].tbody
 86 |         lls = ls.select('td')
 87 |         for l in lls:
 88 |             if (l.get_text().strip()) != '预案' and \
 89 |             (l.get_text().strip()) != '实施' and \
 90 |             (l.get_text().strip()) != '不分配' and \
 91 |             (l.get_text().strip()) != '查看':
 92 |                 raw_data.append(l.get_text().strip())
 93 |         
 94 |         year_raw = raw_data[::7]
 95 | #        print(raw_data)        #出错的话请检查此处的输出
 96 | #        print(year_raw)        #出错的话请检查此处的输出
 97 |         for item in year_raw:
 98 |             a = pd.to_datetime(item).year - 1
 99 |             year.append(a)
100 |         bonus_share = raw_data[1::7]
101 |         bonus_convert = raw_data[2::7]
102 |         profit_send = raw_data[3::7]
103 |         ex_rights = raw_data[4::7]
104 |         register_day = raw_data[5::7]
105 | #        print(register_day)
106 |         data = {'年度':year,
107 |                 '送股':bonus_share,
108 |                 '转股':bonus_convert,
109 |                 '派息':profit_send,
110 |                 '除权日':ex_rights,
111 |                 '登记日':register_day
112 |                 }
113 | 			
114 |         frame = pd.DataFrame(data)
115 | 
116 |         Len=len(frame)
117 |         for i in range(Len):
118 |             s=int(frame.iloc[i,[0]])
119 |             Date=frame.iloc[i,[2]]
120 |             date2=Date.loc['登记日']
121 | #            print(s,date2)
122 |             if s == Year:
123 |                 px=float(frame.iloc[i,[1]])     
124 |                 date2=date2[:4]+date2[5:7]+date2[8:]
125 | #                print(s,'px money is ',px,date2)
126 |                 return px,date2
127 |         return -1
128 |     except:
129 |         print('cannot parse this page')		
130 | 
131 | #获取每10股派现金，及股权登记日
132 | def get_px_single_year(id,Year):
133 |     url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/'
134 |     url += str(id)
135 |     url += '.phtml'
136 |     html = get_one_page(url)
137 |     return parse_single_year(html,Year) 
138 | # 提供给用户的函数，输入ID，解析出历史分红列表   
139 | def get_bonus_table(id):
140 |     url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/'
141 |     url += str(id)
142 |     url += '.phtml'
143 |     html = get_one_page(url)
144 |     return parse(html)     
145 | 
146 | ###############################################################################  
147 | ###############################################################################      
148 | # APP示例代码，用完了请关闭   600066
149 | #s = get_bonus_table('601012')
150 | #print(s)
151 | #2017年的派息实际是在2018派发，所以登记日时间上是2018年
152 | #测试股息率
153 | #PX,Date=get_px_single_year('601012',2017)
154 | #print(PX,Date)


--------------------------------------------------------------------------------
/M1809/src/get_price.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 从163网址上获取指定ID指定时间段的K线数据
  4 | """
  5 | import requests
  6 | import re
  7 | import datetime 
  8 | import pandas as pd
  9 | '''
 10 | 
 11 | 完整网址：
 12 | http://quotes.money.163.com/service/chddata.html?code=0%06d&start=%d&end=%d&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER
 13 | '''
 14 | 
 15 | 
 16 | def get_close_price(id, day = 0):
 17 |     '''
 18 |     获取指定ID指定日期的收盘价
 19 |     输入：id -> str形式的ID号： '600660'
 20 |          day -> str形式的日期： '20180626'
 21 |     返回值：str形式的价格： '25.54'， 如果当天为节假日，则返回0
 22 |     '''
 23 |     if day == 0:
 24 |         day = datetime.datetime.now() - datetime.timedelta(days=1)
 25 |         day = day.strftime("%Y%m%d")
 26 |     if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市，则前缀为1
 27 |         nid = '1' + id
 28 |     else: #如果是沪市主板，则前缀为0
 29 |         nid = '0' + id
 30 |     url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\
 31 |     fields=TCLOSE" %(nid, day,day)
 32 |     res = requests.get(url)
 33 |     res.raise_for_status()
 34 |     
 35 |     for chunk in res.iter_content(100000):
 36 | #        print(chunk)
 37 |         pattern = '[^,\r\n]+'
 38 |         obj = re.compile(pattern)
 39 |         match = obj.findall(chunk.decode('gbk'))
 40 |         #print(match)
 41 |         if len(match) < 8:
 42 |             return 0
 43 |         else:
 44 |             return match[-1]
 45 |     
 46 | def get_period_k_day(id, start_day, stop_day = 0):
 47 |     '''
 48 |     获取指定ID一个时间段内的K线数据
 49 |     输入：id -> str形式的ID号： '600660'
 50 |          start_day -> str形式的日期： '20180626'
 51 |          stop_day -> 同上， 默认到昨天
 52 |     返回值：一个dataframe
 53 |     '''
 54 |     if stop_day == 0:
 55 |         day = datetime.datetime.now() - datetime.timedelta(days=1)
 56 |         day = day.strftime("%Y%m%d")
 57 | 
 58 |     if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市，则前缀为1
 59 |         nid = '1' + id
 60 |     else: #如果是沪市主板，则前缀为0
 61 |         nid = '0' + id
 62 |     url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\
 63 |     fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(nid, start_day, stop_day)
 64 | 
 65 | 
 66 | #    url = "http://quotes.money.163.com/service/chddata.html?code=0%s&start=%s&end=%s&\
 67 | #    fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(id, start_day,stop_day)
 68 |     res = requests.get(url)
 69 |     res.raise_for_status()
 70 | #    playFile = open(file_name, 'wb')
 71 |     
 72 |     raw_data = []
 73 |     for chunk in res.iter_content(1000000):
 74 | #        playFile.write(chunk)
 75 |         chunk = chunk.decode('gbk')
 76 |         pattern = '[^,\r\n]+'
 77 |         obj = re.compile(pattern)
 78 |         match = obj.findall(chunk)
 79 |         if len(match) < 8: #如果没有数据
 80 |             return 0
 81 |         
 82 |     header = match[:10] #如果增加字段，则此处以下需要相应修改
 83 | #    print(header)
 84 |     raw_data = match[10:]
 85 |     date = raw_data[::10]
 86 |     idc = raw_data[1::10]
 87 |     name = raw_data[2::10]
 88 |     price = raw_data[3::10]
 89 |     high = raw_data[4::10]
 90 |     lopen = raw_data[5::10]
 91 |     yesterday_close = raw_data[6::10]
 92 |     low = raw_data[7::10]
 93 |     vol = raw_data[8::10]
 94 |     mount = raw_data[9::10]
 95 |     
 96 |     data = {
 97 | #            header[0]:date,
 98 |             header[1]:idc,
 99 |             header[2]:name,
100 |             header[3]:price,
101 |             header[4]:high,
102 |             header[5]:lopen,
103 |             header[6]:yesterday_close,
104 |             header[7]:low,
105 |             header[8]:vol,
106 |             header[9]:mount
107 |             }
108 |     df = pd.DataFrame(data,index = date)
109 | #    playFile.close()
110 |     return df
111 | 
112 |     
113 | def k_day_to_csv(code, stop_day = 0):
114 |     '''
115 |     更新k线数据，并保存到本地，默认为更新到昨天
116 |     code：目标个股,只能为'000xxx'形式
117 |     stop_day: 0->昨天，    20170101:更新到指定的一天
118 |     @更新逻辑：
119 |     1. 如果无记录，则自动创建csv文件，默认为：ID.kday
120 |     2. 如果有部分记录，则自动分析，并将后续的内容更新
121 |     3. 如果记录比需要更新的更新，则直接返回
122 |     
123 |     缺陷：得到的数据是没有复权的，应该进行前复权
124 |     '''
125 |     base_path = './'   #修改此处可以更改文件存放路径，可以考虑作为一个配置参数
126 |     start_day = '19970101' #start时间统一从1997年开始
127 |     #参数合法性检查
128 |     if isinstance(code,list):
129 |         print('is a list')
130 |     elif isinstance(code,str):
131 |         file_name = code + '.csv'
132 | #        print(file_name)
133 |     else:
134 |         print('bad input. please check it')
135 |         return
136 |     
137 |     file_name = base_path + file_name
138 | #    print(file_name)
139 |     
140 |     update_flag = 1     #1代表重新生成，   2代表更新   3代表无需处理
141 |     #判断最新的是第几天
142 |     try:
143 |         with open(file_name,'r') as fh:
144 |             content = fh.readlines()
145 |             if len(content) > 2: #获取最新记录，总是在第二行
146 |                 latest_record = content[1].split(',')
147 |                 
148 |                 from datetime import datetime
149 |                 from dateutil.parser import parse
150 |                 latest_day = parse(latest_record[0])
151 |                 now = datetime.now().strftime('%Y-%m-%d')
152 |                 yesterday = parse(now)
153 |                 
154 |                 if yesterday > latest_day:
155 |                     update_flag = 2
156 |                     print('not the latest')
157 |                 else:
158 |                     update_flag = 3
159 |                     print(code + ' already the latest')
160 |                     return
161 |     except:
162 |         update_flag = 1
163 |         print('no record')
164 |         
165 |     #不同的情况适用不同更新逻辑
166 |     if update_flag == 1:    #完全更新
167 |         r = get_period_k_day(code, start_day)
168 |         r.to_csv(file_name, encoding= 'gbk') 
169 |     elif update_flag == 2:
170 |         r = get_period_k_day(code, start_day)    #此处没有办法在首部添加
171 |         r.to_csv(file_name, encoding= 'gbk')      #如果可以的话，则不必每次重写
172 |         return 
173 |     print('finish ' + code + ' update')
174 |     return
175 | 
176 | def k_day_update(id_list, stop_day = 0):
177 |     '''
178 |     用户API，更新个股的K线数据，可以是列表，也可以是str
179 |     '''
180 |     #参数合法性检查
181 |     if isinstance(id_list,list):
182 |         print('is a list')
183 |         for s in id_list:
184 |             k_day_to_csv(s,stop_day)
185 |     elif isinstance(id_list,str):
186 |         k_day_to_csv(id_list,stop_day)
187 |     else:
188 |         print('bad input. please check it')
189 |         return
190 |         
191 | if __name__ == '__main__':
192 |     id = '601012'
193 |     start_day = '20100625'
194 |     stop_day = '20180904'
195 |     
196 |     #获取昨天的收盘价
197 | #    price = get_close_price(id) 
198 | #    print(price)
199 |     
200 | #    #获取指定一天的收盘价
201 | #    price = get_close_price('600660','20170209') 
202 | #    print(price)
203 | #    
204 | #    #获取从start_day开始直到昨天的收盘价
205 | #    s = get_period_price('600660',start_day)
206 | #    print(s)
207 | #    
208 | #    #获取指定时间段内的收盘价
209 | #    s = get_period_k_day('601012',start_day,stop_day)
210 | #    print(s)
211 | #    s.to_csv('test.csv', encoding= 'gbk') 
212 |     
213 |     #更新K线数据并存文档
214 |     company_list = ['600660', '600066', '000651', '600522', '601012', '600887']
215 |     k_day_update(company_list)
216 |     k_day_update('600066')
217 |     
218 |     


--------------------------------------------------------------------------------
/M1809/src/trade_day.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun May 20 16:23:28 2018
 4 | 
 5 | @author: YinChao
 6 | @date: 20180520
 7 | """
 8 | 
 9 | import urllib.request as request  
10 | import datetime 
11 | import time 
12 | '''  
13 | @query a single date: string '20170401';  
14 | @api return day_type: 0 workday 1 weekend 2 holiday -1 err  
15 | @function return day_type: 1 workday 0 weekend&holiday  
16 | '''  
17 |   
18 |   
19 | def get_day_type(query_date): 
20 |     '''
21 |     节假日求取辅助函数，从指定网址上获取当日状态
22 |     0工作日    1周末     2节假日
23 |     http://tool.bitefu.net/jiari/?d=20181009  返回0（工作日）
24 |     http://tool.bitefu.net/jiari/?d=20181014  返回1（周末）
25 |     http://tool.bitefu.net/jiari/?d=20181001  返回2（国庆节）
26 |     '''
27 |     url = 'http://tool.bitefu.net/jiari/?d=' + query_date  
28 |     resp = request.urlopen(url)  
29 |     content = resp.read()  
30 |     if content:  
31 |         try:  
32 |             day_type = int(content)  
33 |         except ValueError:  
34 |             return -1  
35 |         else:  
36 |             return day_type  
37 |     else:  
38 |         return -1  
39 | 
40 | 
41 | def isWorkingTime():
42 |     '''
43 |     判断当前时刻是否工作日上班时间（未考虑节假日影响）
44 |     '''
45 |     workTime=['09:00:00','18:00:00']
46 |     dayOfWeek = datetime.datetime.now().weekday()
47 |     beginWork=datetime.datetime.now().strftime("%Y-%m-%d")+' '+workTime[0]
48 |     endWork=datetime.datetime.now().strftime("%Y-%m-%d")+' '+workTime[1]
49 |     beginWorkSeconds=time.time()-time.mktime(time.strptime(beginWork, '%Y-%m-%d %H:%M:%S'))
50 |     endWorkSeconds=time.time()-time.mktime(time.strptime(endWork, '%Y-%m-%d %H:%M:%S'))
51 |     if (int(dayOfWeek) in range(5)) and int(beginWorkSeconds)>0 and int(endWorkSeconds)<0:
52 |         return 1
53 |     else:
54 |         return 0   
55 | 
56 | def isWorkingDay():
57 |     '''
58 |     判断今天是否工作日
59 |     '''
60 |     dayOfWeek = datetime.datetime.now().weekday()   #今天星期几？
61 |     if dayOfWeek < 6:
62 |         return 1
63 |     else:
64 |         return 0     
65 |     
66 |   
67 | def is_tradeday(query_date):  
68 |     '''
69 |     判断给定日期是否股市交易日（考虑了节假日的影响）
70 |     '''
71 |     weekday = datetime.datetime.strptime(query_date, '%Y%m%d').isoweekday()  
72 |     if weekday <= 5 and get_day_type(query_date) == 0:  
73 |         return 1  
74 |     else:  
75 |         return 0  
76 |   
77 |   
78 | def today_is_tradeday():
79 |     '''
80 |     判断今天是否股市交易日（考虑了节假日的影响）
81 |     '''  
82 |     query_date = datetime.datetime.strftime(datetime.datetime.today(), '%Y%m%d')  
83 |     return is_tradeday(query_date)  
84 |   
85 |   
86 | if __name__ == '__main__':  
87 |     print(is_tradeday('20171229'))  
88 |     print(today_is_tradeday()) 


--------------------------------------------------------------------------------
/M1809/src/txttoexcel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Jul 12 20:17:19 2018
 4 | @author: John
 5 | """
 6 | 
 7 | from openpyxl import Workbook
 8 | from openpyxl.utils import get_column_letter
 9 | import re
10 | import time
11 | 
12 | # read txt data
13 | 
14 | 
15 | def read_txt(inputfiles):
16 |     p1 = r"(.*)[0-9](.*?).*"
17 |     temp_list = []
18 |     with open(inputfiles,'r',encoding='gbk') as f:
19 |         for line in f:
20 |             mathObj = re.match(p1,line)
21 |             if mathObj:
22 |                 line = line.strip()
23 |                 temp_list.append(line)
24 |     return temp_list
25 | 
26 | 
27 | 
28 | def parse_line(aline):
29 |     aline = aline.replace(":"," ")
30 |     aline = aline.replace("："," ")
31 |     aline = aline.replace(","," ")
32 |     aline = aline.split()
33 |     return aline
34 | 
35 | 
36 | # create excel files
37 | def generate_excel(temp_list,inputfiles):
38 |     rows = len(temp_list)
39 | 
40 |     wb = Workbook()
41 | 
42 |     dest_filename = inputfiles[:-4] + '.xlsx'
43 |     ws1 = wb.active
44 | 
45 |     ws1.title = "Analysis report V1"
46 | 
47 |     for row in range(rows):
48 |         aline = temp_list[row]
49 |         aline = parse_line(aline)
50 |         for col in range(len(aline)):
51 |             ws1.cell(column=col + 1, row=row + 1, value="{0}".format(aline[col]))
52 | 
53 |     wb.save(filename = dest_filename)
54 | 
55 | if __name__ == "__main__":
56 |     inputfiles = r"D:\600522_20180714.txt"
57 |     file_list = read_txt(inputfiles)
58 |     generate_excel(file_list,inputfiles)
59 | 


--------------------------------------------------------------------------------
/M1809/sys_config/账户配置.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/M1809/sys_config/账户配置.txt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # welcome anack
 2 | 
 3 | ## anack是什么？
 4 | anack是一款金融数据分析工具，用于实现股市投资中的基本面分析，提供投资建议，最终形成一种量化交易工具
 5 | 
 6 | ## anack具备什么功能？
 7 | * 获取多种金融原始数据
 8 | * 实现个股基本面分析
 9 | * 实现个股价值估计并提供投资建议
10 | * 实现宏观经济形势分析与A股趋势预判
11 | * 实现量化交易功能
12 | 
13 | ## 谁会对anack有兴趣
14 | * 广大股民
15 | * 人工智能、大数据工程师
16 | * 量化交易开发者
17 | 
18 | 
19 | ## 已发布工具速查
20 | * HK_insider。  实现港股持股披露信息分析
21 | * YT_produce_sell。实现宇通客车的产销数据分析
22 |  
23 |  
24 | ## 数据接口速览（持续添加）：
25 | * [实时数据_福耀玻璃](http://hq.sinajs.cn/list=sh600660)
26 | * [实时数据_上证综指](http://hq.sinajs.cn/list=s_sh000001)
27 | * [实时数据_深成指数](http://hq.sinajs.cn/list=s_sz399001)
28 | * [日线图_福耀玻璃](http://image.sinajs.cn/newchart/daily/n/sh600660.gif)
29 | * [月线图_福耀玻璃](http://image.sinajs.cn/newchart/monthly/n/sh600660.gif)
30 | * [成交明细](http://market.finance.sina.com.cn/downxls.php?date=2011-07-08&symbol=sh600660)
31 | * [当日分价表](http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_price.php?symbol=sh600660)
32 | * [多日分价表](http://market.finance.sina.com.cn/pricehis.php?symbol=sh600660&startdate=2011-08-17&enddate=2011-08-19)
33 | 
34 | 
35 | >## 反馈交流
36 | >在使用中有任何问题，欢迎反馈给我，可以用以下邮件跟我交流
37 | 
38 | >*yc86247931@126.com*
39 | 
40 | >*shutian318@163.com*
41 | 
42 | ## SubProject1 基于PYTHON和树莓派的盈亏分析平台设计
43 | 
44 | 
45 | #1.通过Python爬取网页获取实时金融指标数据
46 |   http://hq.sinajs.cn/list=sz000651（每天更新）
47 |   choice(需要付费)
48 |   http://sc.hkexnews.hk/TuniS/www.hkexnews.hk/sdw/search/mutualmarket_c.aspx?t=sh （含历史数据）
49 |   
50 |   http://money.finance.sina.com.cn/corp/go.php/vFD_CashFlow/stockid/000651/ctrl/2017/displaytype/4.phtml (爬取历史的报表数据)
51 |   
52 | #2.指标的实时监控，有预警信息后推送手机
53 | 
54 | #3.经过历史指标筛选出值得投资的长期股票及适合买入时机（具体算法再商议），历史数据的存储放在数据库或者树莓派上完成。
55 |   同时建模获取短期投资股票时机，短信提示手机
56 | 
57 | #4.训练一个模拟操盘手，按每周/每月进行操作，最后按照盈亏指标来验证训练模型好坏
58 | 
59 | 
60 | ## 理念
61 | 
62 | 
63 | 把炒股的经验做成算法，利用软件来实现。同时利用软件来发掘新的机会（机器学习）。
64 | 其次可以发布推荐信息。设置自己的持仓后，一方面根据算法向用户发布买卖信号。另一方面算法
65 | 在内部自己计算操作盈亏（用户可以无视买卖信号），最终可以通过比对二者差异来确定算法的好坏
66 | 
67 | 开发阶段可以设置多种算法同时运行，针对某一个具体指标。可以通过对比来确定使用哪种策略更有效
68 | 直接利用已经发生了的数据进行海量机器学习。
69 | 
70 | 可以实时模拟投资，看最终的投资结果
71 | 
72 | 
73 | ## 架构设计
74 | 
75 | 
76 | 整体框架，需要实现的功能规划好
77 | 功能：
78 | 1. 能够实时监控数据变化
79 | 2. 能够根据指定的算法进行相关的输出
80 | 3. 能够根据算法进行模拟操盘并可以自己分析收益
81 | 4. 能够自主学习，用以验证经验的有效性
82 | 


--------------------------------------------------------------------------------
/Release/HK_insider.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/HK_insider.py


--------------------------------------------------------------------------------
/Release/ReleaseNote.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/ReleaseNote.txt


--------------------------------------------------------------------------------
/Release/YTProductionAndSale/DataAnalyze.py:
--------------------------------------------------------------------------------
  1 | # 作者：尹超
  2 | # 更新日期：2018-5-13
  3 | # 版本号：V0.3
  4 | # 描述：本程序用于宇通客车产销快报数据的分析
  5 | # 内容：系统初始化、数据读入、数据处理、绘图、统计分析
  6 | # 备注：
  7 | # 1. 务必保持工程下有连续的xlsx文件，并确保文件名有效
  8 | # 2. 确保year/lastmonth和文件一致，否则会出现错误
  9 | # 3. 暂时只能靠手工将pdf文件转成xlsx文件，以后可以考虑做成全自动的
 10 | 
 11 | # 修改记录：2018-1-6.修复了产销比计算错误的bug
 12 | #修改记录：2018-5-13.更改数据来源，由离线数据变为数据库
 13 | from pylab import *  
 14 | mpl.rcParams['font.sans-serif'] = ['SimHei']  
 15 | 
 16 | 
 17 | import DataToSql
 18 | import pandas as pd
 19 | from pandas import Series, DataFrame
 20 | import matplotlib.pyplot as plt
 21 | class DataAnalyze:
 22 |     def __init__(self,year,month):
 23 |         self.year=year
 24 |         self.month = month
 25 |         
 26 |     def run(self):
 27 | 
 28 |         dataBase=DataToSql.ProductionSaleToSql(YearBegin = 2018,MonthBegin = 7)
 29 |         # data handle
 30 |         CurSale = [] #今年销量明细
 31 |         CurTotalSale = [] #今年销量累计
 32 |         CurProduce = [] #今年产量
 33 |         CurTotalProduce = [] #今年产量累计
 34 |         
 35 |         LastSale = []
 36 |         LastTotalSale = []
 37 |         LastProduce = []
 38 |         LastTotalProduce = []
 39 |         
 40 |         CurBigSale = [] #大车销量
 41 |         CurMidSale = [] #中车销量
 42 |         CurSmallSale = [] #小车销量
 43 |         
 44 |         LastBigSale = [] #大车销量
 45 |         LastMidSale = [] #中车销量
 46 |         LastSmallSale = [] #小车销量
 47 |         
 48 |         sum_cur0 = 0
 49 |         sum_last0 = 0
 50 |         sum_cur1 = 0
 51 |         sum_last1 = 0
 52 |         idx = []
 53 | 
 54 |         for i in range(1,self.month+1):
 55 |             CurProduceData=dataBase.QueryPSData(str(self.year),str(i),"production")
 56 |             CurProduce.append(CurProduceData)
 57 |             
 58 |             LastProducData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_production")
 59 |             LastProduce.append(LastProducData)
 60 |             
 61 |             sum_cur0+=CurProduceData
 62 |             sum_last0+=LastProducData
 63 |             CurTotalProduce.append(sum_cur0)
 64 |             LastTotalProduce.append(sum_last0)
 65 |             
 66 |             CurSaleData=dataBase.QueryPSData(str(self.year),str(i),"sale")
 67 |             CurSale.append(CurSaleData)
 68 |             
 69 |             LastSaleData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_sale")
 70 |             LastSale.append(LastSaleData)
 71 |             
 72 |             sum_cur1 += CurSaleData
 73 |             sum_last1 += LastSaleData
 74 |             CurTotalSale.append(sum_cur1)
 75 |             LastTotalSale.append(sum_last1)
 76 |             
 77 |             CurBigSaleData=dataBase.QueryPSData(str(self.year),str(i),"large_sale")
 78 |             CurBigSale.append(CurBigSaleData)
 79 |             
 80 |             CurMidSaleData=dataBase.QueryPSData(str(self.year),str(i),"mid_sale")
 81 |             CurMidSale.append(CurMidSaleData)
 82 |             
 83 |             CurSmallSaleData=dataBase.QueryPSData(str(self.year),str(i),"small_sale")
 84 |             CurSmallSale.append(CurSmallSaleData)
 85 |             
 86 |             LastBigSaleData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_sale_large")
 87 |             LastBigSale.append(LastBigSaleData)
 88 |             
 89 |             LastMidSaleData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_sale_mid")
 90 |             LastMidSale.append(LastMidSaleData)
 91 |             
 92 |             LastSmallSaleData=dataBase.QueryPSData(str(self.year),str(i),"SPLY_sale_small")
 93 |             LastSmallSale.append(LastSmallSaleData)
 94 |             
 95 |             idx.append(str(i)+'月')
 96 |         print (CurSale)
 97 |         #汇总数据，什么都有  
 98 |         Stat = DataFrame([CurProduce,LastProduce,CurTotalProduce,LastTotalProduce,CurSale,LastSale,CurTotalSale,LastTotalSale,CurBigSale,CurMidSale,CurSmallSale,LastBigSale,LastMidSale,LastSmallSale])
 99 |         Stat = Stat.T
100 |         Stat.index = idx
101 |         Stat.columns=['今年产量','去年产量','今年产量累计','去年产量累计', '今年销量','去年销量','今年销量累计','去年销量累计','今年大车产量','今年中车产量','今年小车产量','去年大车产量','去年中车产量','去年小车产量']
102 | 
103 | #--------------------------------------------------------------------
104 | # plot
105 | 
106 |         #不同年份的对比 
107 |         DiffYearCmp = Stat.iloc[:,[0,1]]
108 |         DiffYearCmp.plot(kind='bar')
109 |         plt.xlabel('month')  #横坐标标签
110 |         plt.ylabel('quantity') #纵坐标标签
111 |         #plt.xticks(rotation=45)  #坐标标号旋转
112 |         plt.title('宇通客车月产量对比')
113 | 
114 |         DiffYearTotal = Stat.iloc[:,[2,3]]
115 |         DiffYearTotal.plot()
116 |         plt.xlabel('month')  #横坐标标签
117 |         plt.ylabel('quantity') #纵坐标标签
118 |         plt.title('宇通客车总产量对比')
119 | 
120 |         #相同年份的对比
121 |         SameYearCmp = Stat.iloc[:,[0,4]]
122 |         SameYearCmp.plot(kind='bar')
123 |         plt.xlabel('month')  #横坐标标签
124 |         plt.ylabel('quantity') #纵坐标标签
125 |         plt.title('宇通客车产销量对比')
126 | 
127 |         SameYearDiff = Stat.iloc[:,[8,9,10]]
128 |         SameYearDiff.plot(kind='bar')
129 |         plt.xlabel('month')  #横坐标标签
130 |         plt.ylabel('quantity') #纵坐标标签
131 |         plt.title('产品结构对比')
132 |         plt.show()
133 |  
134 |         #--------------------------------------------------------------------
135 |         #analyse
136 |         #1.今年和往年相比的增量
137 |         print('统计汇总报告，截止'+str(self.year)+'年'+str(self.month)+'月。。。')
138 |         print('-----------------------------------------------')
139 |         print('1:产销同比')
140 |         IncRate = DiffYearTotal.iloc[self.month-1,:].pct_change() * -100
141 |         a = IncRate.round(2)  #保留两位小数
142 |         print('产量同比增长：'+str(a[1])+'%') 
143 |         IncRate = (Stat.iloc[self.month-1,6] - Stat.iloc[self.month-1,7])/Stat.iloc[self.month-1,7]*100
144 |         a = ("%.2f" % IncRate)  #保留两位小数
145 |         print('销量同比增长：'+ a +'%')  
146 |         print('-----------------------------------------------')
147 |         #2.产销比是否健康？
148 |         print('2.产销结构统计')
149 |         total = SameYearCmp.sum()
150 |         rate = total.pct_change() * 100
151 |         a = rate.round(2)  #保留两位小数
152 |         print('产销差异：'+str(abs(a[1]))+'%')   
153 |         if(abs(a[1]) <= 1):
154 |           print('产销结构很健康')
155 |         print('-----------------------------------------------')
156 |         #3.月产量是否有异动
157 |         print('3.月产量波动情况')
158 |         diff = DiffYearCmp.pct_change().round(2) * 100
159 |         s1=diff.今年产量
160 |         s2=diff.去年产量
161 |         print('每月产量增幅')
162 |         for i in range(1,self.month):
163 |           print(s1[i],end='\t')
164 |         print('')
165 |         print('-----------------------------------------------')
166 |         #4.产品结构是否发生了重大变化？
167 |         print('4.产品结构变化')
168 |         s=SameYearDiff.T
169 |         s_sum=s.sum()
170 |         Rate = (s/s_sum).round(2)
171 |         print(Rate)
172 |         
173 | #---------------------------------------------------------------------------
174 | # 用户代码示例    
175 | # 使用前确保数据库中有相应数据
176 | 
177 | if __name__ == "__main__":
178 |     DA=DataAnalyze(2017,2)
179 |     DA.run()
180 |        


--------------------------------------------------------------------------------
/Release/YTProductionAndSale/PdfDown.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Apr  7 18:35:34 2018
  4 | 
  5 | @author: 54206
  6 | """
  7 | import requests
  8 | import datetime
  9 | from requests.exceptions import RequestException
 10 | import re
 11 | import urllib
 12 | import os
 13 | import shutil
 14 | def save_to_file(file_name, contents):
 15 |     fh = open(file_name, 'w')
 16 |     fh.write(contents)
 17 |     fh.close()
 18 | 
 19 | class FolderNotCleanException (Exception):
 20 |     pass
 21 | 
 22 | class PdfDownLoad:
 23 |     def __init__(self,year=2016,month=1,downloadAdrr = 'D:/downloadTest/'):
 24 |         self.headers = {'Accept':'*/*',
 25 |            'Accept-Encoding':'gzip, deflate',
 26 |            'Accept-Language':'zh-CN,zh;q=0.9',
 27 |            'Connection': 'keep-alive',
 28 |            'Cookie': 'yfx_c_g_u_id_10000042=_ck18030722220815231570139781377; VISITED_STOCK_CODE=%5B%22600066%22%5D; VISITED_MENU=%5B%229062%22%2C%229729%22%2C%228307%22%5D; UM_distinctid=1629a80cb0b7e3-0da2cab7416fbd-c343567-144000-1629a80cb0c185; websearch=%22900957%22%3A%22%u51CC%u4E91B%u80A1%22%2C%22603966%22%3A%22%u6CD5%u5170%u6CF0%u514B%22%2C%22603933%22%3A%22%u777F%u80FD%u79D1%u6280%22%2C%22603955%22%3A%22%u5927%u5343%u751F%u6001%22%2C%22600066%22%3A%22%u5B87%u901A%u5BA2%u8F66%22; VISITED_COMPANY_CODE=%5B%22600066%22%2C%22%5Bobject%20Object%5D%22%5D; seecookie=%5B900957%5D%3A%u51CC%u4E91B%u80A1%2C%5B603966%5D%3A%u6CD5%u5170%u6CF0%u514B%2C%5B603933%5D%3A%u777F%u80FD%u79D1%u6280%2C%5B603955%5D%3A%u5927%u5343%u751F%u6001%2C%5B600066%5D%3A%u5B87%u901A%u5BA2%u8F66%2C%u5B87%u901A%u5BA2%u8F66%u4EA7%u9500%u5FEB%u62A5; yfx_f_l_v_t_10000042=f_t_1520432528520__r_t_1522998662756__v_t_1523023134976__r_c_7',
 29 |            'Host':'query.sse.com.cn',
 30 |            'Referer':'http://www.sse.com.cn/home/search/?webswd=%E5%AE%87%E9%80%9A%E5%AE%A2%E8%BD%A6%E4%BA%A7%E9%94%80%E5%BF%AB%E6%8A%A5',
 31 |            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36'}
 32 |         self.year = year
 33 |         self.month = month
 34 |         self.downloadAdrr=downloadAdrr
 35 |         self.pdfList=[]
 36 |         
 37 | 
 38 |     def get_one_page(self,url):
 39 |         try:
 40 |             response = requests.get(url,headers = self.headers)
 41 |             response.encoding = 'utf-8'
 42 |             if response.status_code == 200:
 43 |                 return response.text
 44 |             return None
 45 |         except RequestException as e:
 46 |             print (e)
 47 |             return None
 48 |     
 49 |     def getCurrentPage(self,url,beginDate):
 50 |         RList=[]
 51 |         html = self.get_one_page(url)
 52 |         Reguler =r"\\/disclosure\\/listedinfo\\/announcement\\/c\\/.*?pdf"
 53 |         pattern = re.compile(Reguler)
 54 |         ls = pattern.findall(html)
 55 |         #print (ls)
 56 |         for eachLink in ls:
 57 |             element = eachLink.split('\\/')
 58 |             YMD=element[-2].split("-")
 59 |             year = int(YMD[0])
 60 |             month = int(YMD[1])
 61 |             day = int(YMD[2])
 62 |             eachDate = datetime.date(year,month,day)
 63 |             if(eachDate.__ge__(beginDate)):
 64 |                 RList.append(eachLink)
 65 |             else:
 66 |                 break
 67 |         #print (RList)
 68 |         return RList
 69 | 
 70 | 
 71 | 
 72 |     def getAllPDFAdd(self):
 73 |         AllList = []
 74 |         beginDate = datetime.date(self.year,self.month+1,1)
 75 |         beginNum =1
 76 |         RLength = 10
 77 |         url1=r"http://query.sse.com.cn/search/getSearchResult.do?search=qwjs&jsonCallBack=jQuery111205573825303579625_1523023138864&page="
 78 |         url2=r"&searchword=T_L+CTITLE+T_D+E_KEYWORDS+T_JT_E+likeT_L%E5%AE%87%E9%80%9A%E5%AE%A2%E8%BD%A6%E4%BA%A7%E9%94%80%E5%BF%AB%E6%8A%A5T_RT_R&orderby=-CRELEASETIME&perpage=10&_=1523023138865"
 79 |         while (RLength==10):
 80 |             url = url1+str(beginNum)+url2
 81 |             Rlist = self.getCurrentPage(url,beginDate)
 82 |             RLength = len(Rlist)
 83 |             AllList =AllList+Rlist
 84 |             beginNum=beginNum+1
 85 |         return AllList
 86 | 
 87 | 
 88 | 
 89 |     def getFile(self,url):
 90 |         pdf_name = url.split('/')[-1]
 91 |         file_name = self.downloadAdrr+pdf_name
 92 |         u = urllib.request.urlopen(url)
 93 |         f = open(file_name, 'wb')
 94 |     
 95 |         block_sz = 8192
 96 |         while True:
 97 |             buffer = u.read(block_sz)
 98 |             if not buffer:
 99 |                 break
100 |     
101 |             f.write(buffer)
102 |         f.close()
103 |         print ("Sucessful to download" + " " + pdf_name)
104 |         return pdf_name
105 | 
106 | 
107 |     
108 | 
109 |     def FolderClean(self):
110 |             for i in os.listdir(self.downloadAdrr):
111 |                path_file = os.path.join(self.downloadAdrr,i)  # 取文件路径
112 |                if os.path.isfile(path_file):
113 |                    os.remove(path_file)
114 |                if os.path.isdir(path_file):
115 |                    shutil.rmtree(path_file)
116 |             if os.listdir(self.downloadAdrr):   #如果文件夹没有清理干净，抛出异常
117 |                 raise FolderNotCleanException
118 |         
119 |  
120 |     def GetAllPdfFile(self):
121 |         self.FolderClean()
122 |         AllList = self.getAllPDFAdd()
123 |         baseUrl = r"http://www.sse.com.cn"
124 |         for EachList in AllList:
125 |             url=baseUrl+EachList
126 |             #url.replace('\','/')
127 |             urlList = url.split('/')
128 |             url = ""
129 |             for Each in urlList:
130 |                 if (Each==urlList[0]):
131 |                     url=Each
132 |                 elif(Each==urlList[-1]):
133 |                     url=url+'/'+Each
134 |                 else:
135 |                     url=url+'/'+Each[:-1]
136 |             pdf_name=self.getFile(url)
137 |             self.pdfList.append(pdf_name)
138 |             
139 |     def RPdfList(self):
140 |         return self.pdfList
141 | 
142 | if __name__ == "__main__":
143 |     i = PdfDownLoad()
144 |     i.GetAllPdfFile()
145 |     print (i.RPdfList())
146 | 


--------------------------------------------------------------------------------
/Release/YTProductionAndSale/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/Release/YTProductionAndSale/使用说明.txt:
--------------------------------------------------------------------------------
 1 | 模块中包含两个单独功能：
 2 | 1、宇通客车采销数据入库
 3 | 2、宇通客车采销数据分析
 4 | 
 5 | 一、采销数据入库
 6 | 环境搭建步骤：
 7 | 1、下载pdf2htmlex
 8 |    下载地址：http://soft.rubypdf.com/software/pdf2htmlex-windows-version，下载后缀为win32-static的版本
 9 | 2、解压pdf2htmlex到\anack\Release\YTProductionAndSale\ExeFile下，保证exe文件在ExeFile文件下，而不要在ExeFile下建文件夹保存
10 | 运行脚本步骤
11 | 1、在anack\Release\YTProductionAndSale\config下新建account.txt文件，用于登录远程数据库（与M1809的文件一致）
12 | 2、调用DataToSql.py
13 | Update = ProductionSaleToSql(YearBegin = 2016,MonthBegin = 5)  #新建实例，参数代表对2016年5月以上的数据进行入库处理
14 | Update.ProSaleUpdate()                        #调用入库方法
15 | 
16 | 二、采销数据分析
17 | 运行脚本步骤：
18 | DA=DataAnalyze(2017,2)  #初始化实例，参数代表对2017.1-2017.2的数据进行分析
19 | DA.run()


--------------------------------------------------------------------------------
/Release/YT_produce_sell.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/YT_produce_sell.py


--------------------------------------------------------------------------------
/Release/get_dividends_history.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Mar 26 21:29:43 2018
  4 | 
  5 | @author: 尹超
  6 | # 该模块用于获取指定个股的历史分红记录，以DataFrame形式给出
  7 | """
  8 | import pandas as pd
  9 | import requests
 10 | from requests.exceptions import RequestException
 11 | from bs4 import BeautifulSoup
 12 | 
 13 |  
 14 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'}
 15 | 
 16 | def get_one_page(url):
 17 |     try:
 18 |         response = requests.get(url,headers = headers)
 19 |         response.encoding = 'GB2312'
 20 |         if response.status_code == 200:
 21 |             return response.text
 22 |         return None
 23 |     except RequestException:
 24 |         return None
 25 | 
 26 | def parse(html):
 27 |     raw_data = []
 28 |     try:
 29 |         year_raw = []
 30 |         year = []
 31 |         bonus_share = []
 32 |         bonus_convert = []
 33 |         profit_send = []
 34 |         ex_rights = []
 35 |         register_day = []
 36 |         
 37 |         soup = BeautifulSoup(html,'html5lib')
 38 |         l = soup.select('table#sharebonus_1')
 39 |         ls = l[0].tbody
 40 |         lls = ls.select('td')
 41 |         for l in lls:
 42 |             if (l.get_text().strip()) != '预案' and \
 43 |             (l.get_text().strip()) != '实施' and \
 44 |             (l.get_text().strip()) != '不分配' and \
 45 |             (l.get_text().strip()) != '查看':
 46 |                 raw_data.append(l.get_text().strip())
 47 |         
 48 |         year_raw = raw_data[::7]
 49 | #        print(raw_data)        #出错的话请检查此处的输出
 50 | #        print(year_raw)        #出错的话请检查此处的输出
 51 |         for item in year_raw:
 52 |             a = pd.to_datetime(item).year - 1
 53 |             year.append(a)
 54 |         bonus_share = raw_data[1::7]
 55 |         bonus_convert = raw_data[2::7]
 56 |         profit_send = raw_data[3::7]
 57 |         ex_rights = raw_data[4::7]
 58 |         register_day = raw_data[5::7]
 59 | #        print(register_day)
 60 |         data = {'年度':year,
 61 |                 '送股':bonus_share,
 62 |                 '转股':bonus_convert,
 63 |                 '派息':profit_send,
 64 |                 '除权日':ex_rights,
 65 |                 '登记日':register_day
 66 |                 }
 67 |         frame = pd.DataFrame(data)
 68 |         d = pd.DataFrame(columns = frame.columns.values.tolist())
 69 |         for i in range(len(frame)): #删除无效的记录并重新排序，保证按时间顺序来
 70 |             if frame.iloc[len(frame) - 1 - i]['除权日'] != '--':
 71 |                 d = d.append(frame.iloc[len(frame) - 1 - i],ignore_index=True)
 72 |         return d
 73 |     except:
 74 |         print('cannot parse this page')
 75 | 
 76 | def parse_single_year(html,Year):
 77 |     raw_data = []
 78 |     try:
 79 |         year_raw = []
 80 |         year = []
 81 |         bonus_share = []
 82 |         bonus_convert = []
 83 |         profit_send = []
 84 |         ex_rights = []
 85 |         register_day = []
 86 | #        print('it is ',Year)
 87 |         soup = BeautifulSoup(html,'html5lib')
 88 |         l = soup.select('table#sharebonus_1')
 89 |         ls = l[0].tbody
 90 |         lls = ls.select('td')
 91 |         for l in lls:
 92 |             if (l.get_text().strip()) != '预案' and \
 93 |             (l.get_text().strip()) != '实施' and \
 94 |             (l.get_text().strip()) != '不分配' and \
 95 |             (l.get_text().strip()) != '查看':
 96 |                 raw_data.append(l.get_text().strip())
 97 |         
 98 |         year_raw = raw_data[::7]
 99 | #        print(raw_data)        #出错的话请检查此处的输出
100 | #        print(year_raw)        #出错的话请检查此处的输出
101 |         for item in year_raw:
102 |             a = pd.to_datetime(item).year - 1
103 |             year.append(a)
104 |         bonus_share = raw_data[1::7]
105 |         bonus_convert = raw_data[2::7]
106 |         profit_send = raw_data[3::7]
107 |         ex_rights = raw_data[4::7]
108 |         register_day = raw_data[5::7]
109 | #        print(register_day)
110 |         data = {'年度':year,
111 |                 '送股':bonus_share,
112 |                 '转股':bonus_convert,
113 |                 '派息':profit_send,
114 |                 '除权日':ex_rights,
115 |                 '登记日':register_day
116 |                 }
117 | 			
118 |         frame = pd.DataFrame(data)
119 | 
120 |         Len=len(frame)
121 |         for i in range(Len):
122 |             s=int(frame.iloc[i,[0]])
123 |             Date=frame.iloc[i,[2]]
124 |             date2=Date.loc['登记日']
125 | #            print(s,date2)
126 |             if s == Year:
127 |                 px=float(frame.iloc[i,[1]])     
128 |                 date2=date2[:4]+date2[5:7]+date2[8:]
129 | #                print(s,'px money is ',px,date2)
130 |                 return px,date2
131 |         return -1
132 |     except:
133 |         print('cannot parse this page')		
134 | 
135 | #获取每10股派现金，及股权登记日
136 | def get_px_single_year(id,Year):
137 |     url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/'
138 |     url += str(id)
139 |     url += '.phtml'
140 |     html = get_one_page(url)
141 |     return parse_single_year(html,Year) 
142 | # 提供给用户的函数，输入ID，解析出历史分红列表   
143 | def get_bonus_table(id):
144 |     url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/'
145 |     url += str(id)
146 |     url += '.phtml'
147 |     html = get_one_page(url)
148 |     return parse(html)     
149 | 
150 | ###############################################################################  
151 | ###############################################################################      
152 | # APP示例代码，用完了请关闭   600066
153 | #s = get_bonus_table('601012')
154 | #print(s)
155 | #2017年的派息实际是在2018派发，所以登记日时间上是2018年
156 | #测试股息率
157 | #PX,Date=get_px_single_year('601012',2017)
158 | #print(PX,Date)


--------------------------------------------------------------------------------
/Release/kday/get_price.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 从163网址上获取指定ID指定时间段的K线数据
  4 | """
  5 | import requests
  6 | import re
  7 | import datetime 
  8 | import pandas as pd
  9 | '''
 10 | 
 11 | 完整网址：
 12 | http://quotes.money.163.com/service/chddata.html?code=0%06d&start=%d&end=%d&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER
 13 | '''
 14 | 
 15 | 
 16 | def get_close_price(id, day = 0):
 17 |     '''
 18 |     获取指定ID指定日期的收盘价
 19 |     输入：id -> str形式的ID号： '600660'
 20 |          day -> str形式的日期： '20180626'
 21 |     返回值：str形式的价格： '25.54'， 如果当天为节假日，则返回0
 22 |     '''
 23 |     if day == 0:
 24 |         day = datetime.datetime.now() - datetime.timedelta(days=1)
 25 |         day = day.strftime("%Y%m%d")
 26 |     if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市，则前缀为1
 27 |         nid = '1' + id
 28 |     else: #如果是沪市主板，则前缀为0
 29 |         nid = '0' + id
 30 |     url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\
 31 |     fields=TCLOSE" %(nid, day,day)
 32 |     res = requests.get(url)
 33 |     res.raise_for_status()
 34 |     
 35 |     for chunk in res.iter_content(100000):
 36 | #        print(chunk)
 37 |         pattern = '[^,\r\n]+'
 38 |         obj = re.compile(pattern)
 39 |         match = obj.findall(chunk.decode('gbk'))
 40 |         #print(match)
 41 |         if len(match) < 8:
 42 |             return 0
 43 |         else:
 44 |             return match[-1]
 45 |     
 46 | def get_period_k_day(id, start_day, stop_day = 0):
 47 |     '''
 48 |     获取指定ID一个时间段内的K线数据
 49 |     输入：id -> str形式的ID号： '600660'
 50 |          start_day -> str形式的日期： '20180626'
 51 |          stop_day -> 同上， 默认到昨天
 52 |     返回值：一个dataframe
 53 |     '''
 54 |     if stop_day == 0:
 55 |         day = datetime.datetime.now() - datetime.timedelta(days=1)
 56 |         day = day.strftime("%Y%m%d")
 57 | 
 58 |     if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市，则前缀为1
 59 |         nid = '1' + id
 60 |     else: #如果是沪市主板，则前缀为0
 61 |         nid = '0' + id
 62 |     url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\
 63 |     fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(nid, start_day, stop_day)
 64 | 
 65 | 
 66 | #    url = "http://quotes.money.163.com/service/chddata.html?code=0%s&start=%s&end=%s&\
 67 | #    fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(id, start_day,stop_day)
 68 |     res = requests.get(url)
 69 |     res.raise_for_status()
 70 | #    playFile = open(file_name, 'wb')
 71 |     
 72 |     raw_data = []
 73 |     for chunk in res.iter_content(1000000):
 74 | #        playFile.write(chunk)
 75 |         chunk = chunk.decode('gbk')
 76 |         pattern = '[^,\r\n]+'
 77 |         obj = re.compile(pattern)
 78 |         match = obj.findall(chunk)
 79 |         if len(match) < 8: #如果没有数据
 80 |             return 0
 81 |         
 82 |     header = match[:10] #如果增加字段，则此处以下需要相应修改
 83 | #    print(header)
 84 |     raw_data = match[10:]
 85 |     date = raw_data[::10]
 86 |     idc = raw_data[1::10]
 87 |     name = raw_data[2::10]
 88 |     price = raw_data[3::10]
 89 |     high = raw_data[4::10]
 90 |     lopen = raw_data[5::10]
 91 |     yesterday_close = raw_data[6::10]
 92 |     low = raw_data[7::10]
 93 |     vol = raw_data[8::10]
 94 |     mount = raw_data[9::10]
 95 |     
 96 |     data = {
 97 | #            header[0]:date,
 98 |             header[1]:idc,
 99 |             header[2]:name,
100 |             header[3]:price,
101 |             header[4]:high,
102 |             header[5]:lopen,
103 |             header[6]:yesterday_close,
104 |             header[7]:low,
105 |             header[8]:vol,
106 |             header[9]:mount
107 |             }
108 |     df = pd.DataFrame(data,index = date)
109 | #    playFile.close()
110 |     return df
111 | 
112 |     
113 | def k_day_to_csv(code, stop_day = 0):
114 |     '''
115 |     更新k线数据，并保存到本地，默认为更新到昨天
116 |     code：目标个股,只能为'000xxx'形式
117 |     stop_day: 0->昨天，    20170101:更新到指定的一天
118 |     @更新逻辑：
119 |     1. 如果无记录，则自动创建csv文件，默认为：ID.kday
120 |     2. 如果有部分记录，则自动分析，并将后续的内容更新
121 |     3. 如果记录比需要更新的更新，则直接返回
122 |     
123 |     缺陷：得到的数据是没有复权的，应该进行前复权
124 |     '''
125 |     base_path = './'   #修改此处可以更改文件存放路径，可以考虑作为一个配置参数
126 |     start_day = '19970101' #start时间统一从1997年开始
127 |     #参数合法性检查
128 |     if isinstance(code,list):
129 |         print('is a list')
130 |     elif isinstance(code,str):
131 |         file_name = code + '.csv'
132 | #        print(file_name)
133 |     else:
134 |         print('bad input. please check it')
135 |         return
136 |     
137 |     file_name = base_path + file_name
138 | #    print(file_name)
139 |     
140 |     update_flag = 1     #1代表重新生成，   2代表更新   3代表无需处理
141 |     #判断最新的是第几天
142 |     try:
143 |         with open(file_name,'r') as fh:
144 |             content = fh.readlines()
145 |             if len(content) > 2: #获取最新记录，总是在第二行
146 |                 latest_record = content[1].split(',')
147 |                 
148 |                 from datetime import datetime
149 |                 from dateutil.parser import parse
150 |                 latest_day = parse(latest_record[0])
151 |                 now = datetime.now().strftime('%Y-%m-%d')
152 |                 yesterday = parse(now)
153 |                 
154 |                 if yesterday > latest_day:
155 |                     update_flag = 2
156 |                     print('not the latest')
157 |                 else:
158 |                     update_flag = 3
159 |                     print(code + ' already the latest')
160 |                     return
161 |     except:
162 |         update_flag = 1
163 |         print('no record')
164 |         
165 |     #不同的情况适用不同更新逻辑
166 |     if update_flag == 1:    #完全更新
167 |         r = get_period_k_day(code, start_day)
168 |         r.to_csv(file_name, encoding= 'gbk') 
169 |     elif update_flag == 2:
170 |         r = get_period_k_day(code, start_day)    #此处没有办法在首部添加
171 |         r.to_csv(file_name, encoding= 'gbk')      #如果可以的话，则不必每次重写
172 |         return 
173 |     print('finish ' + code + ' update')
174 |     return
175 | 
176 | def k_day_update(id_list, stop_day = 0):
177 |     '''
178 |     用户API，更新个股的K线数据，可以是列表，也可以是str
179 |     '''
180 |     #参数合法性检查
181 |     if isinstance(id_list,list):
182 |         print('is a list')
183 |         for s in id_list:
184 |             k_day_to_csv(s,stop_day)
185 |     elif isinstance(id_list,str):
186 |         k_day_to_csv(id_list,stop_day)
187 |     else:
188 |         print('bad input. please check it')
189 |         return
190 |         
191 | if __name__ == '__main__':
192 |     id = '601012'
193 |     start_day = '20100625'
194 |     stop_day = '20180904'
195 |     
196 |     #获取昨天的收盘价
197 | #    price = get_close_price(id) 
198 | #    print(price)
199 |     
200 | #    #获取指定一天的收盘价
201 | #    price = get_close_price('600660','20170209') 
202 | #    print(price)
203 | #    
204 | #    #获取从start_day开始直到昨天的收盘价
205 | #    s = get_period_price('600660',start_day)
206 | #    print(s)
207 | #    
208 | #    #获取指定时间段内的收盘价
209 | #    s = get_period_k_day('601012',start_day,stop_day)
210 | #    print(s)
211 | #    s.to_csv('test.csv', encoding= 'gbk') 
212 |     
213 |     #更新K线数据并存文档
214 |     company_list = ['600660', '600066', '000651', '600522', '601012', '600887']
215 |     k_day_update(company_list)
216 |     k_day_update('600066')
217 |     
218 |     


--------------------------------------------------------------------------------
/Release/kday/k_day.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Mon Sep 10 20:46:24 2018
 4 | 本模块用于实现k线数据的入库/本地存储
 5 | @author: yinchao
 6 | """
 7 | # =============================================================================
 8 | # 1. sql账户配置
 9 | # 2. k_day数据更新
10 | # 3. k_day数据提取
11 | # =============================================================================
12 | 
13 | import get_price
14 | import pymysql
15 | import os
16 | from sqlalchemy import create_engine
17 | 
18 | 
19 | hosts = '47.98.216.118'
20 | users = 'yc'
21 | passwds = 'yc123!'
22 | databases = 'test'
23 | def pymysql_connect():
24 |   return pymysql.connect(
25 |   host=hosts,
26 |   database=databases,
27 |   user=users,
28 |   password=passwds,
29 |   port=3306,
30 |   charset='utf8'
31 |  )
32 | def connect_sql():
33 |     return create_engine("mysql+pymysql://"+ users + ":"+ passwds + "@" + hosts + ":3306/" + databases + "?charset=utf8")
34 | 
35 | def df_to_mysql(table, code_id, start_day = '19970101'):
36 |     connect = connect_sql()
37 |     df = get_price.get_period_k_day(code_id, start_day)
38 |     df.to_sql(name=table,con=connect,if_exists='append')
39 | 
40 | 
41 | def get_data_from_mysql(code_id):
42 |     try:
43 |         cmd = "select * from k_day2 where 股票代码 = \'"+code_id+"\';"
44 |         print(cmd)
45 |         conn = pymysql.connect(
46 |             host = hosts,
47 |             port = 3306,
48 |             user = users,
49 |             passwd = passwds,
50 |             db = databases,
51 |             charset = "utf8"
52 |             )
53 |         
54 |         cur = conn.cursor()
55 |         cur.execute(cmd)
56 |         result = cur.fetchall()
57 |         print(result)   #此处无法获取正确的数据
58 |         return result
59 |     except:
60 |         print('get nothing')
61 |         
62 | # =============================================================================
63 | # 
64 | # =============================================================================
65 | if __name__ == '__main__':
66 | #    df_to_mysql('k_day2', '601012', '20180801')
67 |     get_data_from_mysql('601012')


--------------------------------------------------------------------------------
/Release/pdf_decoder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Mar 25 20:52:13 2018
 4 | 
 5 | @author: Administrator
 6 | # make sure pdfminer3k has been installed
 7 | # otherwise:  pip install pdfminer3k 
 8 | """
 9 | 
10 | import sys
11 | import importlib
12 | importlib.reload(sys)
13 | 
14 | from pdfminer.pdfparser import PDFParser,PDFDocument
15 | from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
16 | from pdfminer.converter import PDFPageAggregator
17 | from pdfminer.layout import LTTextBoxHorizontal,LAParams
18 | from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
19 | 
20 | '''
21 |  解析pdf 文本，保存到txt文件中
22 | '''
23 | path = 'test.pdf'
24 | def parse():
25 |     fp = open(path, 'rb') # 以二进制读模式打开
26 |     #用文件对象来创建一个pdf文档分析器
27 |     praser = PDFParser(fp)
28 |     # 创建一个PDF文档
29 |     doc = PDFDocument()
30 |     # 连接分析器 与文档对象
31 |     praser.set_document(doc)
32 |     doc.set_parser(praser)
33 | 
34 |     # 提供初始化密码
35 |     # 如果没有密码 就创建一个空的字符串
36 |     doc.initialize()
37 | 
38 |     # 检测文档是否提供txt转换，不提供就忽略
39 |     if not doc.is_extractable:
40 |         raise PDFTextExtractionNotAllowed
41 |     else:
42 |         # 创建PDf 资源管理器 来管理共享资源
43 |         rsrcmgr = PDFResourceManager()
44 |         # 创建一个PDF设备对象
45 |         laparams = LAParams()
46 |         device = PDFPageAggregator(rsrcmgr, laparams=laparams)
47 |         # 创建一个PDF解释器对象
48 |         interpreter = PDFPageInterpreter(rsrcmgr, device)
49 | 
50 |         # 循环遍历列表，每次处理一个page的内容
51 |         for page in doc.get_pages(): # doc.get_pages() 获取page列表
52 |             interpreter.process_page(page)
53 |             # 接受该页面的LTPage对象
54 |             layout = device.get_result()
55 |             # 这里layout是一个LTPage对象 里面存放着 这个page解析出的各种对象 一般包括LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等等 想要获取文本就获得对象的text属性，
56 |             for x in layout:
57 |                 if (isinstance(x, LTTextBoxHorizontal)):
58 |                     with open(r'out.txt', 'a') as f:
59 |                         results = x.get_text()
60 |                         print(results)
61 |                         f.write(results + '\n')
62 | 
63 | if __name__ == '__main__':
64 |     parse()


--------------------------------------------------------------------------------
/Release/wechat.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Mar 20 19:54:14 2018
 4 | 
 5 | @author: Administrator
 6 | # note: key 可以从"http://www.tuling123.com/"处免费注册获得
 7 | """
 8 | 
 9 | import itchat
10 | import requests
11 | import json
12 | key = ''
13 | 
14 | # 1. 单独发送
15 | #itchat.auto_login()
16 | #users = itchat.search_friends(name=u'阿狸')
17 | #print(users)
18 | #who = users[0]['UserName']
19 | #print(who)
20 | #itchat.send('进入自动回复模式，和我对话试试看',toUserName = who)
21 | 
22 | #2. 自动回复
23 | 
24 | #itchat.auto_login()
25 | #@itchat.msg_register('Text',isGroupChat = True)#群回复
26 | #def text_reply(msg):
27 | #    return '新年快乐！（回复群消息）'
28 | #@itchat.msg_register('Text')#个人回复
29 | #def text_reply(msg):
30 | #    print(msg['Text'])
31 | #    print(type(msg))
32 | #    return '新年快乐！（回复好友消息）'
33 | #itchat.auto_login(hotReload=True)
34 | #itchat.run()
35 | 
36 | 
37 | #3. 实现了机器人对话
38 | #import requests
39 | #import json
40 | #key = 'aa7ab198e85e4ba3bec6622654789472'
41 | #while True:
42 | #    info = input('\n我：')
43 | #    url = 'http://www.tuling123.com/openapi/api?key='+key+'&info='+info
44 | #    res = requests.get(url)
45 | #    res.encoding = 'utf-8'
46 | #    jd = json.loads(res.text)#将得到的json格式的信息转换为Python的字典格式
47 | #    print('\nTuling: '+jd['text'])#输出结果
48 | 
49 | 
50 | #4. 个人图灵测试成功 
51 | itchat.auto_login()
52 | @itchat.msg_register('Text')#个人回复
53 | def text_reply(msg):
54 | #    print(msg['Text'])
55 |     url = 'http://www.tuling123.com/openapi/api?key='+key+'&info='+msg['Text']
56 |     res = requests.get(url)
57 |     res.encoding = 'utf-8'
58 |     jd = json.loads(res.text)#将得到的json格式的信息转换为Python的字典格式
59 |     return jd['text'] #输出结果
60 | itchat.auto_login(hotReload=True)
61 | itchat.run()
62 | 
63 | #5. 群回复测试成功
64 | #itchat.auto_login()
65 | #@itchat.msg_register('Text',isGroupChat = True)#群回复
66 | #def text_reply(msg):
67 | ##    print(msg['Text'])
68 | #    url = 'http://www.tuling123.com/openapi/api?key='+key+'&info='+msg['Text']
69 | #    res = requests.get(url)
70 | #    res.encoding = 'utf-8'
71 | #    jd = json.loads(res.text)#将得到的json格式的信息转换为Python的字典格式
72 | #    return jd['text'] #输出结果
73 | #itchat.auto_login(hotReload=True)
74 | #itchat.run()


--------------------------------------------------------------------------------
/Release/安居客爬虫框架/ReadMe.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/安居客爬虫框架/ReadMe.txt


--------------------------------------------------------------------------------
/Release/安居客爬虫框架/crawl_anjuke_v1.311.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Dec 11 14:30:50 2017
  4 | @author: 1707501
  5 | """
  6 | 
  7 | """
  8 | crawling anjuke house price 
  9 | GuiYang
 10 | 20171212 add proxy and detailed the parse of house information
 11 | 20171214 add spidertime and multiprocess
 12 | """
 13 | 
 14 | import requests
 15 | from bs4 import BeautifulSoup
 16 | import pymysql
 17 | import random,time
 18 | 
 19 | def parse_detial(html):
 20 |     soup = BeautifulSoup(html.text,'html5lib')
 21 |     houseinfo = soup.select('div.houseInfoBox')
 22 |     houseinfotitle = houseinfo[0].h4
 23 |     an_xian = houseinfotitle.select('span.anxian')[0].get_text()
 24 |     if '假一赔百' in an_xian:
 25 |         an_xian = "Yes"
 26 |     else:
 27 |         an_xian = "No"
 28 |     houseencode= houseinfotitle.select('span.house-encode')[0].get_text()
 29 |     houseinfoV2 = houseinfo[0].select('div.houseInfoV2-desc')[0].get_text()
 30 |     housedetail1 = houseinfoV2.split()
 31 |     housedetail2 = ':'.join(housedetail1)
 32 |     housedetail = housedetail2.replace('\ue092','').replace('\u200b','').replace('\ue094','').replace('\ue093','').replace('\ue095','')
 33 |     housefirstv = soup.select('div.first-col.detail-col')[0].find_all('dl')
 34 |     house_estate = ''.join(housefirstv[0].get_text().split())[3:]
 35 |     house_add  = ''.join(housefirstv[1].get_text().split())[3:]
 36 |     house_build_time = ''.join(housefirstv[2].get_text().split())[3:]
 37 |     house_type = ''.join(housefirstv[3].get_text().split())[3:]
 38 |     housesecondv = soup.select('div.second-col.detail-col')[0].find_all('dl')
 39 |     house_model_detail = ''.join(housesecondv[0].get_text().split())[3:]
 40 |     house_size = ''.join(housesecondv[1].get_text().split())[3:]
 41 |     house_orientation = ''.join(housesecondv[2].get_text().split())[3:]
 42 |     house_floor = ''.join(housesecondv[3].get_text().split())[3:]
 43 |     housethirdv = soup.select('div.third-col.detail-col')[0].find_all('dl')
 44 |     house_decorate = ''.join(housethirdv[0].get_text().split())[5:]
 45 |     house_univalence = ''.join(housethirdv[1].get_text().split())[5:]
 46 |     down_payment = ''.join(housethirdv[2].get_text().split())[5:]
 47 | #   monthly_payment = ''.join(housethirdv[3].get_text().split())[5:] #javescript loading data
 48 |     salerinfo = soup.select('p.broker-mobile')
 49 |     salerphone = salerinfo[0].get_text().replace('\ue047','')
 50 |     housetitle = ''.join(soup.select('h3.long-title')[0].get_text().split())
 51 |     houseinfov1 = soup.select('div.basic-info.clearfix')[0].find_all('span')
 52 |     housetotleprice = houseinfov1[0].get_text()
 53 | #==============================================================================
 54 | #     housemodel = houseinfov1[1].get_text()
 55 | #     housesize = houseinfov1[2].get_text()
 56 | #==============================================================================
 57 |     line = []
 58 |     line.append(housetitle)
 59 |     line.append(an_xian)
 60 |     line.append(houseencode)
 61 |     line.append(housetotleprice)
 62 |     line.append(house_model_detail)
 63 |     line.append(house_size)
 64 |     line.append(house_estate)
 65 |     line.append(house_add)
 66 |     line.append(house_build_time)
 67 |     line.append(house_type)
 68 |     line.append(house_orientation)
 69 |     line.append(house_floor)
 70 |     line.append(house_decorate)
 71 |     line.append(house_univalence)
 72 |     line.append(down_payment)
 73 |     line.append(housedetail)
 74 |     line.append(salerphone)
 75 |     result = '\t'.join(line)
 76 |     print(result)
 77 |     return result
 78 | 
 79 | def parse_list(html):
 80 |     secondurl = []
 81 |     soup = BeautifulSoup(html.text,'html5lib')
 82 |     houselists = soup.select('a.houseListTitle')
 83 |     for houseid in houselists:
 84 |         houseurl = houseid['href']
 85 |         secondurl.append(houseurl)
 86 |     return secondurl
 87 | 
 88 | def downloadhtml(url,proxy_ip):
 89 |     response = requests.get(url,headers=header,proxies={"http":proxy_ip})
 90 |     if response.status_code == 200:
 91 |         return response
 92 |     else:
 93 |         print("download html error!")
 94 | 
 95 | 
 96 | def Create_table():
 97 |     query = """CREATE TABLE IF NOT EXISTS `anjuke_collecter_original_test` (
 98 | `No` int(10) unsigned NOT NULL AUTO_INCREMENT,
 99 | `housetitle`    varchar(255) DEFAULT NULL,
100 | `an_xian`    varchar(255) DEFAULT NULL,
101 | `houseencode`    varchar(255) DEFAULT NULL,
102 | `housetotleprice`    varchar(255) DEFAULT NULL,
103 | `house_model_detail`    varchar(255) DEFAULT NULL,
104 | `house_size`    varchar(255) DEFAULT NULL,
105 | `house_estate`    varchar(255) DEFAULT NULL,
106 | `house_add`    varchar(255) DEFAULT NULL,
107 | `house_build_time`    varchar(255) DEFAULT NULL,
108 | `house_type`    varchar(255) DEFAULT NULL,
109 | `house_orientation`    varchar(255) DEFAULT NULL,
110 | `house_floor`    varchar(255) DEFAULT NULL,
111 | `house_decorate`    varchar(255) DEFAULT NULL,
112 | `house_univalence`    varchar(255) DEFAULT NULL,
113 | `down_payment`    varchar(255) DEFAULT NULL,
114 | `housedetail`    text DEFAULT NULL,
115 | `salerphone`    varchar(255) DEFAULT NULL,
116 | `Url`  varchar(255) DEFAULT NULL,
117 | `SpiderTime`  varchar(255) DEFAULT NULL,
118 | PRIMARY KEY (`No`)
119 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
120 |     db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8')
121 |     cursor = db.cursor()
122 |     cursor.execute(query)
123 |     db.commit()
124 |     cursor.close()
125 |     db.close()
126 | 
127 | 
128 | def etl_mysql(result):
129 |     db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8')
130 |     cursor = db.cursor()
131 |     result = tuple(result)
132 |     query = "insert into anjuke_collecter_original_test(housetitle,an_xian,houseencode,housetotleprice,house_model_detail,house_size,house_estate,house_add,house_build_time,house_type,house_orientation,house_floor,house_decorate,house_univalence,down_payment,housedetail,salerphone,Url,SpiderTime) values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % result
133 |     cursor.execute(query)
134 |     db.commit()
135 |     cursor.close()
136 |     db.close()
137 |     
138 | 
139 | def get_next_page(html):
140 |     soup = BeautifulSoup(html.text,'html5lib')
141 |     nexturl = soup.select('a.aNxt')[0]['href']
142 |     return nexturl
143 | 
144 | def get_proxy_ip():
145 |     db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8')
146 |     cursor = db.cursor()
147 |     query = "select ip,port from ip_collecter_original_test limit 17000"
148 |     cursor.execute(query)
149 |     ip_result = cursor.fetchall()
150 |     IPList = []
151 |     for i in ip_result:
152 |         Ip = i[0] + ":" + i[1]
153 |         IPList.append(Ip)
154 |     return IPList
155 | 
156 | def check_ip(IPList):
157 |     url = "https://www.baidu.com/"
158 |     proxy_ip = random.choice(IPList)
159 |     res = requests.get(url,headers=header,proxies={"http":proxy_ip})
160 |     if res.status_code == 200:
161 |         print(proxy_ip)
162 |         return proxy_ip
163 |     else:
164 |         return None    # 后期修改成迭代
165 |     
166 | header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
167 |           'Connection':'keep-alive'          }
168 | 
169 | # https://gy.anjuke.com/sale/p1/#filtersort
170 | # https://shanghai.anjuke.com/sale/p1/#filtersort
171 | # https://hangzhou.anjuke.com/sale/
172 | url = "https://shanghai.anjuke.com/sale/p1/#filtersort"
173 | 
174 | hosts = 
175 | users = 
176 | passwords = 
177 | databases = 
178 | 
179 | if __name__ == '__main__':
180 |     List_ip = get_proxy_ip()
181 |     next_url = url
182 |     Create_table()
183 |     while next_url != None:
184 |         proxy_ip = check_ip(List_ip)
185 |         res = downloadhtml(url,proxy_ip)
186 |         if res != None:
187 |             try:
188 |                 urllist = parse_list(res)
189 |             except:
190 |                 print('house url list parsing error!')
191 |             if urllist != None:
192 |                 for houseurl in urllist:
193 |                     proxy_ip = check_ip(List_ip)
194 |                     houseinfor = downloadhtml(houseurl,proxy_ip)
195 |                     try:
196 |                         results = parse_detial(houseinfor)
197 |                     except:
198 |                         results = None
199 |                         with open(r'E:\documents\personal\python\crawler\anjuke\anjuke_error_shanghai.txt','a',encoding='utf-8') as f:
200 |                             f.write(houseurl +"\n")
201 |                         print("parse hosue detial infor error!")
202 |                         continue
203 |                     with open(r'E:\documents\personal\python\crawler\anjuke\anjuke_shanghai_v15.txt','a',encoding='utf-8') as f:
204 |                         f.write(results + '\n')
205 |                     try:
206 |                         line = results.split('\t')
207 |                         ts = time.strftime('%Y%m%d%H%M%S',time.localtime(time.time()))
208 |                         line.append(houseurl)
209 |                         line.append(ts)
210 | #                        print(line)
211 |                         etl_mysql(line)
212 |                     except:
213 |                         print("data insert into mysql error!")
214 |                         continue
215 |             try:
216 |                 next_url = get_next_page(res)
217 |             except:
218 |                 next_url = None
219 |     print("crawling end!")


--------------------------------------------------------------------------------
/Release/树莓派信息实时推送示例/App.py:
--------------------------------------------------------------------------------
 1 | from PageDecoder import *
 2 | from StockClass import *
 3 | from PushMessage import *
 4 | import time
 5 | 
 6 | 
 7 | my_interest = ['000651','600660','600887','600377','601012']
 8 | for interest in my_interest:
 9 |     data = GetTotalData(interest)
10 |     istock = stock()
11 |     istock.SetData(data)
12 |     str1 = interest + '.CurPrice = ' + str(istock.CurPrice)
13 |     print(str1)
14 |     push(str1)
15 |     time.sleep(1)
16 | 


--------------------------------------------------------------------------------
/Release/树莓派信息实时推送示例/PageDecoder.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import urllib.request
 3 | 
 4 | base = 'http://hq.sinajs.cn/list='
 5 | bios = 'sh600660'
 6 | 
 7 | def GetTotalData(inputstr):
 8 |     '''
 9 |     input a number serial, ex:600660
10 |     '''
11 |     pattern_id = '\d{6}'
12 |     reobj = re.compile(pattern_id)
13 |     id = reobj.findall(inputstr)
14 |     id = "".join(id)
15 |     page = ''
16 |     if id != '':
17 |         flag = int(id)
18 |         if flag >= 600000:
19 |             bios = 'sh' + id
20 |         else:
21 |             bios = 'sz' + id
22 |         inputstr = base + bios
23 | #        print(inputstr)
24 |         page = urllib.request.urlopen(inputstr).read()
25 | #        print(page)
26 |         if len(page) < 30:
27 |             print('error, invalid id')
28 |             return 0
29 |         s = page[30:]
30 |         s = str(s)
31 |         
32 |         pattern_data = '\d+\.*\d*(?=,)'
33 |         reobj = re.compile(pattern_data)
34 |         data = reobj.findall(s)
35 |         data.pop()
36 |         data.pop()
37 |         
38 |         pattern_data = '\d\d\d\d-\d\d-\d\d'
39 |         reobj = re.compile(pattern_data)
40 |         date = reobj.findall(s)
41 |         data.append(date)
42 |         
43 |         pattern_data = '\d\d:\d\d:\d\d'
44 |         reobj = re.compile(pattern_data)
45 |         time = reobj.findall(s)
46 |         data.append(time)
47 |         data.append(id)
48 |         return data
49 |     else:
50 |         print('invalid id')
51 |         return 0
52 | 


--------------------------------------------------------------------------------
/Release/树莓派信息实时推送示例/PushMessage.py:
--------------------------------------------------------------------------------
 1 | # Author:YinChao
 2 | # Date:2017-12-21
 3 | # ver:V0.1
 4 | 
 5 | import pycurl,json
 6 | 
 7 | def push(str):
 8 |     appID = "59edd424a4c48aee80d6dd4a"
 9 |     appSecret = "05cc2a44d97e361f14d28c0ab8ff4acd"
10 |     pushEvent = "DoorAlert"
11 |     pushMessage = str
12 | 
13 |     c = pycurl.Curl()
14 |     c.setopt(c.URL, 'https://api.instapush.im/v1/post')
15 |     c.setopt(c.HTTPHEADER,['x-instapush-appid:' + appID,'x-instapush-appsecret:' + appSecret, 'Content-Type:application/json'])
16 | 
17 |     json_fields = {}
18 |     json_fields['event'] = pushEvent
19 |     json_fields['trackers'] = {}
20 |     json_fields['trackers']['message'] = pushMessage
21 | 
22 |     postfields = json.dumps(json_fields)
23 | 
24 |     c.setopt(c.POSTFIELDS, postfields)
25 | 
26 |     c.perform()
27 | 
28 |     c.close()
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/Release/树莓派信息实时推送示例/StockClass.py:
--------------------------------------------------------------------------------
  1 | #from DistinguishData import DataType
  2 | 
  3 | class stock:
  4 |     '''
  5 | # 参数定义
  6 | # ID            代码             600660      由单独变量给出
  7 | # 0 TdyOpen       今开盘价                     由List变量给出
  8 | # 1 YdyClose      昨天收盘
  9 | # 2 CurPrice      现价
 10 | # 3 HighPrice     最高价
 11 | # 4 LowPrice      最低价
 12 | # 5 CurBuyPrice   竞买价
 13 | # 6 CurSellPrice  竞卖价
 14 | # 7 CurQuantity   成交量
 15 | # 8 CurMoney      成交额
 16 | # 9 Buy1_quant    买一数量
 17 | # 10Buy1_price    买一报价
 18 | # 11Buy2_quant    买一数量
 19 | # 12Buy2_price    以此类推。。。
 20 | # 13Buy3_quant
 21 | # 14Buy3_price
 22 | # 15Buy4_quant
 23 | # 16Buy4_price
 24 | # 17Buy5_quant
 25 | # 18Buy5_price
 26 | # 19Sell1_quant
 27 | # 20Sell1_price
 28 | # 21Sell2_quant
 29 | # 22Sell2_price
 30 | # 23Sell3_quant
 31 | # 24Sell3_price
 32 | # 25Sell4_quant
 33 | # 26Sell4_price
 34 | # 27Sell5_quant
 35 | # 28Sell5_price
 36 |     '''
 37 |     def SetData(self, ldata):
 38 |         '''
 39 |         一次性设置所有的信息
 40 |         :param id: 股票代码，{'gldq',000651}
 41 |         :param lista: 输入结构体 只能通过正则表达式获得
 42 |         :return:无
 43 |         '''
 44 |         if ldata == 0:
 45 |             return 0
 46 |         self.TdyOpen = float(ldata[0])
 47 |         self.YdyClose = float(ldata[1])
 48 |         self.CurPrice = float(ldata[2])
 49 |         self.HighPrice = float(ldata[3])
 50 |         self.LowPrice = float(ldata[4])
 51 |         self.CurBuyPrice = float(ldata[5])
 52 |         self.CurSellPrice = float(ldata[6])
 53 |         self.CurQuantity = int(ldata[7])/1000000
 54 |         self.CurMoney = float(ldata[8])/100000000
 55 |         self.Buy1_quant = int(int(ldata[9])/100)
 56 |         self.Buy1_price = float(ldata[10])
 57 |         self.Buy2_quant = int(int(ldata[11])/100)
 58 |         self.Buy2_price = float(ldata[12])
 59 |         self.Buy3_quant = int(int(ldata[13])/100)
 60 |         self.Buy3_price = float(ldata[14])
 61 |         self.Buy4_quant = int(int(ldata[15])/100)
 62 |         self.Buy4_price = float(ldata[16])
 63 |         self.Buy5_quant = int(int(ldata[17])/100)
 64 |         self.Buy5_price = float(ldata[18])
 65 |         self.Sell1_quant = int(int(ldata[19])/100)
 66 |         self.Sell1_price = float(ldata[20])
 67 |         self.Sell2_quant = int(int(ldata[21])/100)
 68 |         self.Sell2_price = float(ldata[22])
 69 |         self.Sell3_quant = int(int(ldata[23])/100)
 70 |         self.Sell3_price = float(ldata[24])
 71 |         self.Sell4_quant = int(int(ldata[25])/100)
 72 |         self.Sell4_price = float(ldata[26])
 73 |         self.Sell5_quant = int(int(ldata[27])/100)
 74 |         self.Sell5_price = float(ldata[28])
 75 |         self.date = ldata[29]
 76 |         self.time = ldata[30]
 77 |         self.id = ldata[31]
 78 | 
 79 |     def PrintAllData(self):
 80 |         '''
 81 |         一次性打印所有信息（仅用于调试）
 82 |         :return:
 83 |         '''
 84 |         print('ID:\t'+self.id)
 85 |         print('今开:\t'+str(self.TdyOpen))
 86 |         print('昨收:\t'+str(self.YdyClose))
 87 |         print('现价:\t'+str(self.CurPrice))
 88 |         print('最高价:\t'+str(self.HighPrice))
 89 |         print('最低价:\t'+str(self.LowPrice))
 90 |         print('竞买:\t'+str(self.CurBuyPrice))
 91 |         print('竞卖:\t'+str(self.CurSellPrice))
 92 |         print('成交量(万手):\t'+str(self.CurQuantity))
 93 |         print('成交额(亿元):\t'+str(self.CurMoney))
 94 |         print('买一/手:\t'+str(self.Buy1_quant))
 95 |         print('买一/价:\t'+str(self.Buy1_price))
 96 |         print('买二/手:\t'+str(self.Buy2_quant))
 97 |         print('买二/价:\t'+str(self.Buy2_price))
 98 |         print('买三/手:\t'+str(self.Buy3_quant))
 99 |         print('买三/价:\t'+str(self.Buy3_price))
100 |         print('买四/手:\t'+str(self.Buy4_quant))
101 |         print('买四/价:\t'+str(self.Buy4_price))
102 |         print('买五/手:\t'+str(self.Buy5_quant))
103 |         print('买五/价:\t'+str(self.Buy5_price))
104 |         print('卖一/手:\t'+str(self.Sell1_quant))
105 |         print('卖一/价:\t'+str(self.Sell1_price))
106 |         print('卖二/手:\t'+str(self.Sell2_quant))
107 |         print('卖二/价:\t'+str(self.Sell2_price))
108 |         print('卖三/手:\t'+str(self.Sell3_quant))
109 |         print('卖三/价:\t'+str(self.Sell3_price))
110 |         print('卖四/手:\t'+str(self.Sell4_quant))
111 |         print('卖四/价:\t'+str(self.Sell4_price))
112 |         print('卖五/手:\t'+str(self.Sell5_quant))
113 |         print('卖五/价:\t'+str(self.Sell5_price))
114 |         print(self.date)
115 |         print(self.time)
116 | 
117 |     def RiseRate(self):
118 |         '''
119 |         获取股票实时涨幅
120 |         :return:
121 |         '''
122 |         rate = (self.CurPrice - self.YdyClose)/self.YdyClose * 100
123 |         rate = round(rate,2)
124 |         return rate
125 | 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/Release/树莓派信息实时推送示例/使用说明.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/Release/树莓派信息实时推送示例/使用说明.txt


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate


--------------------------------------------------------------------------------
/anack/.gitignore:
--------------------------------------------------------------------------------
1 | __pychache__/
2 | *.pyc
3 | Debug/
4 | 


--------------------------------------------------------------------------------
/anack/App/Detail_Stock_Selector.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Mar 12 15:57:22 2018
  4 | 
  5 | @author: 10191773
  6 | """
  7 | 
  8 | import sys
  9 | sys.path.append("..")
 10 | import pandas as pd
 11 | import pymysql
 12 | import tushare as ts
 13 | from SQL.sql import pymysql_connect
 14 | from SQL.sql import df_to_mysql
 15 | 
 16 | 
 17 | '''
 18 | code,代码
 19 | name,名称
 20 | industry,所属行业
 21 | area,地区
 22 | pe,市盈率
 23 | outstanding,流通股本(亿)
 24 | totals,总股本(亿)
 25 | totalAssets,总资产(万)
 26 | liquidAssets,流动资产
 27 | fixedAssets,固定资产
 28 | reserved,公积金
 29 | reservedPerShare,每股公积金
 30 | esp,每股收益
 31 | bvps,每股净资
 32 | pb,市净率
 33 | timeToMarket,上市日期
 34 | undp,未分利润
 35 | perundp, 每股未分配
 36 | rev,收入同比(%)
 37 | profit,利润同比(%)
 38 | gpr,毛利率(%)
 39 | npr,净利润率(%)
 40 | holders,股东人数
 41 | '''
 42 | 
 43 | dbconn=pymysql_connect()
 44 | 
 45 | def create_stock_select_table():
 46 |     #db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8')
 47 |     db = pymysql_connect()
 48 |     cursor = db.cursor()
 49 |     cursor.execute('DROP TABLE IF EXISTS all_stock_select') 
 50 |     stock_select_sql = """CREATE TABLE IF NOT EXISTS `all_stock_select` (
 51 |                 `code`    varchar(255) DEFAULT NULL,
 52 |                 `name`    varchar(255) DEFAULT NULL,
 53 |                 `industry`  varchar(255) DEFAULT NULL, 
 54 |                 `area`    varchar(255) DEFAULT NULL,
 55 |                 `pe`    float(25) DEFAULT NULL,                       #市盈率
 56 |                 `outstanding`    varchar(255) DEFAULT NULL, 
 57 |                 `totals`    varchar(255) DEFAULT NULL,
 58 |                 `totalAssets`    float(25) DEFAULT NULL,              #总资产(万)
 59 |                 `liquidAssets`    varchar(255) DEFAULT NULL, 
 60 |                 `fixedAssets`    varchar(255) DEFAULT NULL, 
 61 |                 `reserved`    varchar(255) DEFAULT NULL, 
 62 |                 `reservedPerShare`    varchar(255) DEFAULT NULL,
 63 |                 `esp`    varchar(255) DEFAULT NULL,
 64 |                 `bvps`    varchar(255) DEFAULT NULL,
 65 |                 `pb`    float(25) DEFAULT NULL,                       #市净率
 66 |                 `timeToMarket`    varchar(255) DEFAULT NULL,
 67 |                 `undp`    varchar(255) DEFAULT NULL,
 68 |                 `perundp`    varchar(255) DEFAULT NULL,
 69 |                 `rev`    float(25) DEFAULT NULL,                      #收入同比
 70 |                 `profit`    float(25) DEFAULT NULL,                   #利润同比
 71 |                 `gpr`    float(25) DEFAULT NULL,                      #毛利率
 72 |                 `npr`    float(25) DEFAULT NULL,                      #净利润率
 73 |                 `holders` varchar(255) DEFAULT NULL
 74 |                 ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
 75 |     cursor.execute(stock_select_sql)
 76 |     db.commit()
 77 |     cursor.close()
 78 |     db.close()
 79 | 
 80 | 
 81 | def create_stock_detail_select_table():
 82 |     #db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8')
 83 |     db = pymysql_connect()
 84 |     cursor = db.cursor()
 85 |     cursor.execute('DROP TABLE IF EXISTS detail_stock_select') 
 86 |     stock_select_sql = """CREATE TABLE IF NOT EXISTS `detail_stock_select` (
 87 |                 `code`    varchar(255) DEFAULT NULL,
 88 |                 `name`    varchar(255) DEFAULT NULL,
 89 |                 `industry`  varchar(255) DEFAULT NULL, 
 90 |                 `area`    varchar(255) DEFAULT NULL,
 91 |                 `pe`    float(25) DEFAULT NULL,                       #市盈率
 92 |                 `outstanding`    varchar(255) DEFAULT NULL, 
 93 |                 `totals`    varchar(255) DEFAULT NULL,
 94 |                 `totalAssets`    float(25) DEFAULT NULL,              #总资产(万)
 95 |                 `liquidAssets`    varchar(255) DEFAULT NULL, 
 96 |                 `fixedAssets`    varchar(255) DEFAULT NULL, 
 97 |                 `reserved`    varchar(255) DEFAULT NULL, 
 98 |                 `reservedPerShare`    varchar(255) DEFAULT NULL,
 99 |                 `esp`    varchar(255) DEFAULT NULL,
100 |                 `bvps`    varchar(255) DEFAULT NULL,
101 |                 `pb`    float(25) DEFAULT NULL,                       #市净率
102 |                 `timeToMarket`    varchar(255) DEFAULT NULL,
103 |                 `undp`    varchar(255) DEFAULT NULL,
104 |                 `perundp`    varchar(255) DEFAULT NULL,
105 |                 `rev`    float(25) DEFAULT NULL,                      #收入同比
106 |                 `profit`    float(25) DEFAULT NULL,                   #利润同比
107 |                 `gpr`    float(25) DEFAULT NULL,                      #毛利率
108 |                 `npr`    float(25) DEFAULT NULL,                      #净利润率
109 |                 `holders` varchar(255) DEFAULT NULL
110 |                 ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
111 |     cursor.execute(stock_select_sql)
112 |     db.commit()
113 |     cursor.close()
114 |     db.close()    
115 |  
116 |     
117 | #初步筛选   
118 | def stock_select_to_sql(PE,TotalAssists):
119 |     create_stock_select_table()    
120 |     
121 |     df=ts.get_stock_basics()
122 |     #df.to_excel('c:/python/all_stock_list.xlsx')
123 |     df= df[df['pe'] < PE]
124 |     df= df[df['pe'] > 0]
125 |     print(df)
126 |     #df.to_excel('c:/python/all_stock_pe50.xlsx')
127 |     df= df[df['totalAssets'] >= TotalAssists]
128 |     df= df[df['rev'] >= 0]
129 |     df= df[df['profit'] >= 0]
130 |     #df.to_excel('c:/python/all_stock_assets100.xlsx')
131 |     print(df)  
132 |     print('...........................before')
133 |     #df=df.iloc[1:]
134 |     #df.to_excel('c:/python/all_stock_assets100head.xlsx')
135 |     #sql.df_to_mysql('all_stock_select',df)
136 |     df_to_mysql('all_stock_select',df)
137 |     print('...........................after')
138 |     
139 | def GetIndustryData(id):
140 |     sqlcmd="select * from industry_estimation_avg where industry ='%s'" %(id)
141 |     try:
142 |         a=pd.read_sql(sqlcmd,dbconn)
143 |         return a
144 |     except:
145 |         print('invalid  input')
146 |         return pd.DataFrame()    
147 |  
148 |     
149 | #仔细筛选并入库----执行前提是industry_estimation_avg表已存在
150 | #PE,TotalAssists参数暂时没有用到   
151 | def stock_detail_select(PE,TotalAssists):
152 |     #stock_select_to_sql(PE,TotalAssists)
153 |     create_stock_detail_select_table()
154 |     #sqlcmd="select * from all_stock_select ORDER BY pe" 
155 |     #try:
156 |     #a=pd.read_sql(sqlcmd,dbconn)
157 |     a=ts.get_stock_basics()
158 |     target = pd.DataFrame() #创建一个空的dataframe
159 |     i=0
160 |     for i in range(0,len(a)):     
161 |         '''     
162 |         #测试输出某一个行业的所有股票数据
163 |         c=a.iloc[i,1] 
164 |         print('****',c)
165 |         if (c=='元器件'):
166 |             print('get---->',a.iloc[i],i)
167 |         if(c=='农药化肥'):
168 |             print('get...2>',a.iloc[i],i)
169 |             
170 |         '''           
171 | 
172 |         '''      
173 |         #测试输出数据库（行业平均值数据库）中指定行业的平均统计数据
174 |         c='农药化肥'
175 |         result=GetIndustryData(c)
176 |         if not result.empty:
177 |             #print(result) 
178 |             print(result.iloc[0]['avg_pe'],result.iloc[0]['avg_pb'],result.iloc[0]['avg_rev'], \
179 |                   result.iloc[0]['avg_profit'],result.iloc[0]['avg_gpr'],result.iloc[0]['avg_npr'])  
180 |             #print(result.iloc[0,5],result.iloc[0,6],result.iloc[0,7], \
181 |             #      result.iloc[0,8],result.iloc[0,9],result.iloc[0,10])  
182 |         else:
183 |            print('找不到行业名称...',i)
184 |         '''
185 |                       
186 |         #正式逻辑代码   
187 |         c=a.iloc[i,1] 
188 |         result=GetIndustryData(c)
189 |         if not result.empty:
190 |             cnt=0;
191 |             #print('#########',result.iloc[0],'pe:',a.iloc[i].pe)
192 |             
193 |             #此处判断条件可调，eg:判断条件中5/6的数据优于平均水平则认为值得研究,此处判断条件可操作范围较大，可以再讨论
194 |             if a.iloc[i].pe<result.iloc[0]['avg_pe']:
195 |                 cnt+=1
196 |             if a.iloc[i].pb<result.iloc[0]['avg_pb']:
197 |                 cnt+=1
198 |             if a.iloc[i].rev>result.iloc[0]['avg_rev']:
199 |                 cnt+=1
200 |             if a.iloc[i].gpr>result.iloc[0]['avg_gpr']:
201 |                 cnt+=1
202 |             if a.iloc[i].profit>result.iloc[0]['avg_profit']:
203 |                 cnt+=1
204 |             if a.iloc[i].npr>result.iloc[0]['avg_npr']:
205 |                 cnt+=1
206 |            
207 |             # 5/6的参数优于平均水平，则认为值得研究，保存入库
208 |             if cnt>=6:
209 |                 print('find industry data,avg ok data num is:',cnt)
210 |                 target = target.append(a.iloc[i])
211 |             #else:
212 |             #     print('item ok num not enogh,which is:',cnt)
213 |                   
214 |         #else:
215 |         #    print('找不到行业名称...',i)
216 |                
217 |         i=i+1
218 |         
219 |     print(target)    
220 |     df_to_mysql('detail_stock_select',target) #筛选结果入库
221 |     return target   
222 | 
223 | #to test run this fun
224 | #stock_detail_select(300,50)
225 | 


--------------------------------------------------------------------------------
/anack/App/HK_insider.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/HK_insider.py


--------------------------------------------------------------------------------
/anack/App/IndustryEstimation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*- 
  2 | import pandas as pd
  3 | #import pymysql
  4 | #from sqlalchemy import create_engine
  5 | import tushare as ts  
  6 | 
  7 | from SQL.sql import pymysql_connect
  8 | from SQL.sql import df_to_mysql
  9 | #from SQL.glo import get_value
 10 | #import requests
 11 | ## 加上字符集参数，防止中文乱码
 12 | 
 13 | #确定输出表头信息：
 14 | #基础：总市值、平均市值、市盈率、市净率、收入增长、净利增长、毛利率、净利率
 15 | #扩展：资产负债率、市净率、市现率、市销率（需要根据财务报表获取）
 16 | #自定义：
 17 | 
 18 |   
 19 | dbconn=pymysql_connect()
 20 |       
 21 | clm = ['行业','年度','企业数量','总市值','平均市值','平均市盈率','平均市净率',
 22 |           '收入增长率','利润增长率','毛利率','净利润率']
 23 | headers = ['name','industry','totalAssets','pe','pb','rev','profit','gpr','npr']
 24 | #sql语句示例
 25 | #select 字段 from 表名 where 条件;
 26 | #eg:select * from student where sex='男' and age>20; //查询性别是男，并且年龄大于20岁的人。
 27 | 
 28 | #创建industry_estimation表头    
 29 | def CreateTable():
 30 |     db = pymysql_connect()
 31 |     cursor = db.cursor()
 32 |     cursor.execute('DROP TABLE IF EXISTS industry_estimation') 
 33 |     estimation = """CREATE TABLE IF NOT EXISTS `industry_estimation` (
 34 |                 `行业`    varchar(25) DEFAULT NULL,
 35 |                 `年度`    varchar(25) DEFAULT NULL,
 36 |                 `企业数量`    int(25) DEFAULT NULL,
 37 |                 `总市值`    float(25) DEFAULT NULL,
 38 |                 `平均市值`    float(25) DEFAULT NULL,
 39 |                 `平均市盈率`    float(25) DEFAULT NULL,
 40 |                 `平均市净率`    float(25) DEFAULT NULL,
 41 |                 `收入增长率`    float(25) DEFAULT NULL,
 42 |                 `利润增长率`    float(25) DEFAULT NULL,
 43 |                 `毛利率`    float(25) DEFAULT NULL,
 44 |                 `净利润率`    float(25) DEFAULT NULL
 45 |                 ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
 46 |     cursor.execute(estimation)
 47 |     db.commit()
 48 |     cursor.close()
 49 |     db.close()    
 50 | 
 51 | #函数名：GetIndustryName
 52 | #更新时间：2018-3-17
 53 | #描述：行业翻译器，输入ID或者股票名称，解析其在anack_classify数据库中所在的行业名
 54 | #输入：股票名称或者代码， 比如 "福耀玻璃"或者"600660"都可以
 55 | #输出：行业名称    比如：汽车制造
 56 | #异常处理：如果没有对应信息，输出invalid id input错误提示信息
 57 | def GetIndustryName(id):
 58 |     sqlcmd="select code,name,industry from anack_classify where code ='%s'" %(id)
 59 |     try:
 60 |         a=pd.read_sql(sqlcmd,dbconn)
 61 |         return a.iloc[0]['industry']
 62 |     except:
 63 |         sqlcmd="select code,name,industry from anack_classify where name ='%s'" %(id)
 64 |         try:
 65 |             a=pd.read_sql(sqlcmd,dbconn)
 66 |             return a.iloc[0]['industry']
 67 |         except:
 68 |             print('invalid id input')
 69 |             return 
 70 |  
 71 | #描述：输入行业名，计算出该行业的平均水平
 72 | #输入：数据库用户信息， 行业名， 年度     
 73 | def Estimation(dbconn,industry_name, year):
 74 |     '''
 75 |     年度信息还没有用上
 76 |     '''
 77 |     sqlcmd="select code,name from anack_classify where industry ='%s'" %(industry_name)
 78 |       
 79 |     #利用pandas 模块导入mysql数据
 80 |     a=pd.read_sql(sqlcmd,dbconn)
 81 |     industry_id_list=a[:]
 82 | #    print(a)
 83 |     
 84 |     if len(a) == 0:
 85 |         print('行业名称输入错误，请重试')
 86 |         return 0
 87 |     else: 
 88 |         a = ts.get_stock_basics()   #获取的数据
 89 |         tushare_data=a.loc[:,headers]
 90 |         target = pd.DataFrame(columns = ['行业','industry','totalAssets','pe','pb','rev','profit','gpr','npr']) #创建一个空的dataframe
 91 |         
 92 |         for names in industry_id_list.name:
 93 |             target = target.append(tushare_data.loc[tushare_data.name == names], ignore_index=True)
 94 |         #print(target)
 95 |         
 96 |         总市值 = 0
 97 |         企业数量 = 0
 98 |         for sums in target.totalAssets:
 99 |             总市值 += sums
100 |             企业数量 += 1  
101 |         print('行业名：' + industry_name)
102 |         print('行业数量(家) = ' + str(企业数量))
103 |         print('行业总市值(万) = ' + str(总市值))   
104 |         平均市值 = 总市值/企业数量
105 |         print('平均市值(万) = ' + str(平均市值)) 
106 |         
107 |         weight = []
108 |         for each in target.totalAssets:
109 |             weight.append(each/总市值)
110 |         target['weight'] = weight    
111 |         
112 |         # 求平均市盈率
113 |         平均市盈率  = 0
114 |         num = 企业数量
115 |         i = 0
116 |         for each in target.pe:
117 |             if each == 0 or each > 100: #排除异常情况
118 |                 num -= 1
119 |             else:
120 |                 平均市盈率 += each * target.iloc[i]['weight']
121 |             i+=1
122 |         print('平均市盈率(%) = ' + str(平均市盈率))
123 |         
124 |         平均市净率 = 0
125 |         num = 企业数量
126 |         i = 0
127 |         for each in target.pb:
128 |             if each < 0 or each > 10: #排除异常情况
129 |                 num -= 1
130 |             else:
131 |                 平均市净率 += each * target.iloc[i]['weight']
132 |             i+=1
133 |         print('平均市净率(%) = ' + str(平均市净率))
134 |         
135 |         收入增长率 = 0
136 |         num = 企业数量
137 |         i = 0
138 |         for each in target.rev:
139 |             if each < -1000 or each > 1000: #排除异常情况
140 |                 num -= 1
141 |             else:
142 |                 收入增长率 += each * target.iloc[i]['weight']
143 |             i+=1
144 |         print('收入增长率(%) = '+str(收入增长率))
145 |         
146 |         利润增长率 = 0
147 |         num = 企业数量
148 |         i = 0
149 |         for each in target.profit:
150 |             if each < -1000 or each > 1000: #排除异常情况
151 |                 num -= 1
152 |             else:
153 |                 利润增长率 += each * target.iloc[i]['weight']
154 |             i+=1
155 |         print('利润增长率(%) = ' + str(利润增长率))
156 |         
157 |         
158 |         毛利率 = 0
159 |         num = 企业数量
160 |         i = 0
161 |         for each in target.gpr:
162 |             if each < -1000 or each > 1000: #排除异常情况
163 |                 num -= 1
164 |             else:
165 |                 毛利率 += each * target.iloc[i]['weight']
166 |             i+=1
167 |         print('毛利率(%) = ' + str(毛利率))
168 |         
169 |         净利润率 = 0
170 |         num = 企业数量
171 |         i = 0
172 |         for each in target.npr:
173 |             if each < -1000 or each > 1000: #排除异常情况
174 |                 num -= 1
175 |             else:
176 |                 净利润率 += each * target.iloc[i]['weight']
177 |             i+=1
178 |         print('净利润率(%) = ' + str(净利润率))
179 |         data = {'行业':industry_name,'年度':str(year),'企业数量':企业数量,
180 |                 '总市值':round(总市值/10000,4),'平均市值':round(平均市值/10000,4),'平均市盈率':round(平均市盈率,2),
181 |                 '平均市净率':round(平均市净率,2),'收入增长率':round(收入增长率,2),'利润增长率':round(利润增长率,2),
182 |                 '毛利率':round(毛利率,2),'净利润率':round(净利润率,2)}      
183 |         result_df = pd.DataFrame(data,columns = clm, index=["0"])
184 | #        print(result_df)
185 |         df_to_mysql('industry_estimation',result_df)
186 |         return result_df
187 | 
188 | #获取所有行业平均数据用于测试
189 | def Get_all_industry_average_data():
190 |     a = ts.get_stock_basics()
191 |     for i in range(0,len(a)):
192 |         print('industry:',a.iloc[i,1])
193 |         test=Estimation(dbconn,a.iloc[i,1],2017)
194 | # App示例代码，用完删掉
195 | 
196 | 
197 | #Estimation(dbconn,'家电行业')
198 | #print(GetIndustryName('福耀玻璃')) 
199 | #CreateTable()
200 | #Estimation(dbconn,GetIndustryName('宁沪高速'),2017)  
201 | #Estimation(dbconn,GetIndustryName('格力电器'),2017)   
202 | #Estimation(dbconn,GetIndustryName('福耀玻璃'),2017)   
203 | #Estimation(dbconn,GetIndustryName('隆基股份'),2017)   
204 | 
205 | #def get_interest_list():
206 | #    '''
207 | #    解析"感兴趣的个股列表.txt",返回list类型的数据供其他模块使用
208 | #    '''
209 | #    list_id = []
210 | #    with open('../SQL/感兴趣的个股列表.txt','r') as fh:
211 | #        s = fh.readline()   #获取更新时间
212 | #        s = fh.readline()   #获取目标长度  
213 | #        
214 | #        lines = fh.readlines()  #获取目标内容
215 | #    for s in lines:
216 | #        code = s[:6]
217 | #        list_id.append(code)    
218 | #    list_id.sort()
219 | #    return list_id  
220 | #
221 | #for s in get_interest_list():
222 | #    Estimation(dbconn,GetIndustryName(s),2017)
223 | 


--------------------------------------------------------------------------------
/anack/App/IndustryEstimation_detail.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*- 
  2 | import sys
  3 | sys.path.append("..")
  4 | import pandas as pd
  5 | #import pymysql
  6 | #from sqlalchemy import create_engine
  7 | import tushare as ts  
  8 | 
  9 | from SQL.sql import pymysql_connect
 10 | from SQL.sql import df_to_mysql
 11 | 
 12 | 
 13 | 
 14 | #from SQL.glo import get_value
 15 | #import requests
 16 | ## 加上字符集参数，防止中文乱码
 17 | 
 18 | #确定输出表头信息：
 19 | #基础：总市值、平均市值、市盈率、市净率、收入增长、净利增长、毛利率、净利率
 20 | #扩展：资产负债率、市净率、市现率、市销率（需要根据财务报表获取）
 21 | #自定义：
 22 | #sql语句示例
 23 | #select 字段 from 表名 where 条件;
 24 | #eg:select * from student where sex='男' and age>20; //查询性别是男，并且年龄大于20岁的人。
 25 | 
 26 | #创建industry_estimation表头    
 27 | '''
 28 | code,代码
 29 | name,名称
 30 | industry,所属行业
 31 | area,地区
 32 | pe,市盈率
 33 | outstanding,流通股本(亿)
 34 | totals,总股本(亿)
 35 | totalAssets,总资产(万)
 36 | liquidAssets,流动资产
 37 | fixedAssets,固定资产
 38 | reserved,公积金
 39 | reservedPerShare,每股公积金
 40 | esp,每股收益
 41 | bvps,每股净资
 42 | pb,市净率
 43 | timeToMarket,上市日期
 44 | undp,未分利润
 45 | perundp, 每股未分配
 46 | rev,收入同比(%)
 47 | profit,利润同比(%)
 48 | gpr,毛利率(%)
 49 | npr,净利润率(%)
 50 | holders,股东人数
 51 | '''
 52 | 
 53 | #作用：行业平均值明细数据入库
 54 | #输出：入库行业平均值明细，便于后续分析
 55 | def CreateTable():
 56 |     db = pymysql_connect()
 57 |     cursor = db.cursor()
 58 |     cursor.execute('DROP TABLE IF EXISTS industry_estimation_detail')
 59 |     estimation = """CREATE TABLE IF NOT EXISTS `industry_estimation_detail` (
 60 |                 `code`    varchar(25) DEFAULT NULL,
 61 |                 `name`    varchar(25) DEFAULT NULL,
 62 |                 `industry`    varchar(25) DEFAULT NULL,
 63 |                 `area`    varchar(25) DEFAULT NULL,
 64 |                 `pe`    varchar(25) DEFAULT NULL,
 65 |                 `outstanding`    varchar(25) DEFAULT NULL,
 66 |                 `totals`    varchar(25) DEFAULT NULL,
 67 |                 `totalAssets`    varchar(25) DEFAULT NULL,
 68 |                 `liquidAssets`    varchar(25) DEFAULT NULL,
 69 |                 `fixedAssets`    varchar(25) DEFAULT NULL,
 70 |                 `reserved`    varchar(25) DEFAULT NULL,
 71 |                 `reservedPerShare`    varchar(25) DEFAULT NULL,
 72 |                 `esp`    varchar(25) DEFAULT NULL,
 73 |                 `bvps`    varchar(25) DEFAULT NULL,
 74 |                 `pb`    varchar(25) DEFAULT NULL,
 75 |                 `timeToMarket`    varchar(25) DEFAULT NULL,
 76 |                 `undp`    varchar(25) DEFAULT NULL,
 77 |                 `perundp`    varchar(25) DEFAULT NULL,
 78 |                 `rev`    varchar(25) DEFAULT NULL,
 79 |                 `profit`    varchar(25) DEFAULT NULL,
 80 |                 `gpr`    varchar(25) DEFAULT NULL,
 81 |                 `npr`    varchar(25) DEFAULT NULL,
 82 |                 `holders`    varchar(25) DEFAULT NULL
 83 |                 ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
 84 |     cursor.execute(estimation)
 85 |     db.commit()
 86 |     cursor.close()
 87 |     db.close()    
 88 |        
 89 | def Estimation(): 
 90 |                
 91 |     result_df = pd.DataFrame(ts.get_stock_basics().values,columns = ts.get_stock_basics().columns)
 92 |     df_to_mysql('industry_estimation_detail',result_df)
 93 |     
 94 |     return result_df
 95 | 
 96 | 
 97 | 
 98 | 
 99 | #作用：查看行业平均值统计
100 | #输入：行业名称
101 | #输出：行业平均统计数
102 | def industry_stat(industry):    
103 |     df = pd.DataFrame(ts.get_stock_basics().values,columns = ts.get_stock_basics().columns)   
104 |     pe_stat = df[df.industry == industry].drop(['name','industry','area'], axis = 1).astype('float')
105 | # =============================================================================
106 | #     print(pe_stat.dtypes)
107 | # =============================================================================
108 |     result_df = pe_stat.describe()
109 |     print(result_df)
110 |     return result_df
111 | 
112 | 
113 | 
114 | 
115 | #作用：查看行业平均值统计
116 | #输出：所有行业平均统计数(筛选条件:PE <100,pb <10,1000>rev>-1000,1000>profit>-1000,,1000>gpr>-1000,,1000>npr>-1000)
117 | def CreateTable_industry_avg():
118 |     db = pymysql_connect()
119 |     cursor = db.cursor()  
120 |     cursor.execute('DROP TABLE IF EXISTS industry_estimation_avg')
121 |     estimation = """CREATE TABLE IF NOT EXISTS `industry_estimation_avg` as
122 |                  select industry,avg(pe) as avg_pe,avg(outstanding) as avg_outstanding,
123 |                  avg(totals) as avg_totals ,avg(totalAssets) as avg_totalAssets,
124 |                  avg(liquidAssets) as avg_liquidAssets,avg(fixedAssets) as avg_fixedAssets,
125 |                  avg(reserved) as avg_reserved,avg(reservedPerShare) as avg_reservedPerShare,
126 |                  avg(esp) as avg_esp,avg(bvps) as avg_bvps,avg(pb) as avg_pb,
127 |                  avg(timeToMarket) as avg_timeToMarket,avg(undp) as avg_undp,
128 |                  avg(perundp) as avg_perundp,avg(rev) as avg_rev ,avg(profit) as avg_profit 
129 |                  ,avg(gpr) as avg_gpr ,avg(npr) as avg_npr ,avg(holders) as avg_holders
130 |                  from industry_estimation_detail where pe < 100 and pb < 10 and rev <1000 and rev > -1000 and profit < 1000 and profit > -1000 and gpr < 1000 and gpr > -1000 and npr < 1000 and npr > -1000 
131 |                  group by industry
132 |                  """
133 |     cursor.execute(estimation)
134 |     db.commit()
135 |     cursor.close()
136 |     db.close()    
137 |     
138 |     
139 |     
140 | 
141 |     
142 |     
143 |     
144 |     
145 |     
146 | # =============================================================================
147 | # #调试使用
148 | #CreateTable()
149 | #Estimation()
150 | # industry_stat('通信设备')
151 | # CreateTable_industry_avg()
152 | # =============================================================================
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 


--------------------------------------------------------------------------------
/anack/App/M1808/M1808.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Apr  3 21:02:35 2018
  4 | 
  5 | @author: Administrator
  6 | """
  7 | from datetime import datetime
  8 | from datetime import timedelta
  9 | import time
 10 | import threading
 11 | from threading import Thread
 12 | L = threading.Lock() # 引入锁
 13 | 
 14 | #import imp #防止重复调用导致全局变量设置无效
 15 | #try:
 16 | #    imp.find_module('protocol')
 17 | #    found = True
 18 | #    print('arleady imported protocol')
 19 | #except ImportError:
 20 | #    from protocol import *
 21 | 
 22 | from protocol import ATDecoder
 23 | 
 24 | #try:
 25 | #    imp.find_module('wechat')
 26 | #    found = True
 27 | #    print('arleady imported wechat')
 28 | #except ImportError:
 29 | #    from wechat import *
 30 | # 参数初始化设置
 31 | now = datetime.now()
 32 | open_call_time = datetime(now.year,now.month,now.day,9,15)
 33 | close_call_time = datetime(now.year,now.month,now.day,9,25)
 34 | morning_open_time = datetime(now.year,now.month,now.day,9,30)
 35 | morning_close_time = datetime(now.year,now.month,now.day,11,30)
 36 | afternoon_open_time = datetime(now.year,now.month,now.day,13,00)
 37 | afternoon_close_time = datetime(now.year,now.month,now.day,23,00)
 38 | 
 39 | # 周期性调用该函数以实现完整的预警监测功能
 40 | def watch_dog_one_time():
 41 |     now = datetime.now()
 42 |     if now == close_call_time: # 获取大盘和个股的开盘信息并输出
 43 |         print(str(now)+'快开盘了')
 44 |     elif (now >= morning_open_time and now < morning_close_time) or \
 45 |     (now >= afternoon_open_time and now < afternoon_close_time):
 46 |         # 获取
 47 | #        rand = ran      
 48 |         print('主线程休眠')
 49 |         time.sleep(get_sleep_time() * 60) #休眠
 50 |         clear_sleep_time()
 51 |         print('主线程休眠完毕')
 52 |         print(str(now)+'检查一次')  
 53 |         test_str='AT:run'
 54 |         result = ATDecoder(test_str)
 55 | #        print(result)
 56 |         SendText2ChatRoom(result,'啊啊啊') #给指定群聊
 57 |     elif now >= afternoon_close_time: #3点以后停止运行
 58 |         print(str(now)+'停止运行')
 59 |         return 1
 60 |     else: #中场休息，直接sleep
 61 |         time.sleep(1)
 62 |         print(str(now)+'休息')
 63 |     return 0
 64 | 
 65 | # 外界的API接口，调用run函数以实现完整的监测
 66 | def M1808_run():
 67 |     while 1:
 68 |         L.acquire()
 69 |         ret = watch_dog_one_time()
 70 |         L.release()
 71 | #        if ret == 1:
 72 | #            return
 73 | #        else:
 74 |         time.sleep(30)
 75 | 
 76 | ###############################################################################
 77 | #import imp #官方提供的加载方法，仍然没用
 78 | #import sys
 79 | #def __import__(name, globals=None, locals=None, fromlist=None):
 80 | #    try:
 81 | #        return sys.modules['wechat']
 82 | #    except KeyError:
 83 | #        pass    
 84 | #        
 85 | #    fp,pathname,description = imp.find_module('wechat')
 86 | #    try:
 87 | #        imp.load_module('wechat',fp,pathname,description)
 88 | #    finally:
 89 | #        if fp:
 90 | #            fp.close()
 91 | #            
 92 | #itchat.auto_login(hotReload=True)
 93 |             
 94 |             
 95 | ###############################################################################
 96 | 
 97 | from wechat import *
 98 | def test():
 99 |     n = 1
100 |     while n > 0:
101 |         print(n)
102 |         n = n + 1
103 |         time.sleep(2)
104 | #t1 = Thread(target=M1808_run, args=())
105 | itchat.auto_login(hotReload=True)
106 | #t1 = Thread(target=test, args=()) #仅供测试
107 | t1 = Thread(target=M1808_run, args=())
108 | t2 = Thread(target=itchat.run,args=())
109 | t1.start()
110 | t2.start()
111 | ###############################################################################
112 | #from wechat import *
113 | #from protocol import *
114 | #import itchat
115 | #
116 | #itchat.auto_login(hotReload=True)
117 | #itchat.run()
118 | 
119 | #cmd='AT:set_target_id=600660,000651,601012,000002,000333'
120 | #ATDecoder(cmd)
121 | #cmd='AT:run'
122 | #s = ATDecoder(cmd)
123 | #cmd='AT:get_target_id?'
124 | #s = ATDecoder(cmd)
125 | ###############################################################################


--------------------------------------------------------------------------------
/anack/App/M1808/protocol.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Apr  7 10:23:33 2018
  4 | 
  5 | @author: Administrator
  6 | """
  7 | import time
  8 | import re
  9 | 
 10 | from early_warning import *
 11 | #import imp #防止重复调用导致全局变量设置无效
 12 | #try:
 13 | #    imp.find_module('early_warning')
 14 | #    found = True
 15 | #    print('arleady imported early_warning')
 16 | #except ImportError:
 17 | #    from early_warning import *
 18 | 
 19 | def ATDecoder(strin):
 20 |     pattern_id = '(?<=AT:)[^=?]+'
 21 |     reobj = re.compile(pattern_id)
 22 |     cmd = reobj.findall(strin)
 23 |     cmd = "".join(cmd)  #list to str
 24 |     print('\ncmd =',cmd)  #该句话只做调试用
 25 |         
 26 | #    if cmd in cmd_list: #找到合适的命令
 27 |     if cmd == 'set_target_id':  #设置感兴趣的股票列表
 28 | #        print('set target_id\n')
 29 |         pattern_id = '\d{6}'
 30 |         reobj = re.compile(pattern_id)
 31 |         stock_id = reobj.findall(strin)
 32 |         set_target_id(stock_id)
 33 |         return ('stock id set ok')
 34 |         
 35 |     elif cmd == 'get_target_id':    #显示股票列表
 36 | #        print(show_target_id())
 37 |         return show_target_id()
 38 |         
 39 |     elif cmd == 'clear_target': #清空股票列表
 40 |         clear_target_id()
 41 |         return 'target id cleared'
 42 |         
 43 |     elif cmd == 'set_para': #设置参数
 44 |         pattern_id = '[0-9.]+'
 45 |         reobj = re.compile(pattern_id)
 46 |         result = reobj.findall(strin)
 47 |         if len(result) == 1:
 48 |             set_param(float(result[0]))    
 49 |         else:
 50 |             set_param(float(result[0]),float(result[1]))    
 51 |         return 'set para th = %s, quantity = %s' % (result[0],result[1])
 52 |         
 53 |     elif cmd == 'get_para': #查看设置的预警参数
 54 |         th, quantity = get_param()
 55 |         return 'rase th, quantity = %.2f, %.2f' % (th, quantity)
 56 |         
 57 |     elif cmd == 'check': #主动查询当前个股状态
 58 |         r = str(get_main_market())
 59 |         r += '\n\n'
 60 |         r += str(get_stock_market())
 61 |         return r
 62 |         
 63 |     elif cmd == 'sleep': #让主机休眠x分钟
 64 |         pattern_id = '(?<=sleep=)[0-9]+'
 65 |         reobj = re.compile(pattern_id)
 66 |         result = reobj.findall(strin)
 67 |         result = "".join(result)  #list to str  
 68 |         print('主机休眠',result,'分钟\n')
 69 |         set_sleep_time(int(result))
 70 | #        time.sleep(int(result)*60)
 71 |         
 72 |         return '开始休眠'   #仅供测试
 73 |         
 74 |     elif cmd == 'level': #设置预警模式
 75 |         pattern_id = '(?<=level=)[0-9]'
 76 |         reobj = re.compile(pattern_id)
 77 |         result = reobj.findall(strin)
 78 |         result = "".join(result)  #list to str  
 79 |         set_warning_level(int(result))
 80 |         return ('设置预警模式')
 81 |         
 82 |     #测试指令，正式使用时请注释--------------------------------------------------
 83 |     elif cmd == 'run':
 84 |         init()
 85 |         market_info = get_stock_market()
 86 | #        print(market_info)
 87 |         warning_info = check(market_info)
 88 |         print(warning_info)
 89 |         return warning_info
 90 |     
 91 |     elif cmd == 'test':
 92 |         return 'still connecting...'
 93 | 
 94 | ###############################################################################
 95 | #test_str='AT:set_target_id=600660,000651,601012,000002,000333'
 96 | #print(ATDecoder(test_str))
 97 | #
 98 | #test_str='AT:get_target_id?'
 99 | #print(ATDecoder(test_str))
100 | 
101 | #test_str='AT:clear_target'
102 | #print(ATDecoder(test_str))
103 | 
104 | #test_str='AT:get_target_id?'
105 | #print(ATDecoder(test_str))
106 | 
107 | #test_str='AT:set_para=1.0,0.9'
108 | #print(ATDecoder(test_str))
109 | #
110 | #test_str='AT:get_para?'
111 | #print(ATDecoder(test_str))
112 | 
113 | #test_str='AT:sleep=1'
114 | #print(ATDecoder(test_str))
115 | 
116 | #test_str='AT:check?'    
117 | #print(ATDecoder(test_str))
118 | 
119 | #test_str='AT:level=3'
120 | #print(ATDecoder(test_str))
121 | #
122 | #test_str='AT:run'
123 | #print(ATDecoder(test_str))
124 | #
125 | #test_str='AT:test'
126 | #print(ATDecoder(test_str))
127 | 
128 | #print(get_main_market())
129 | #print(get_stock_market())
130 | ###################################
131 | #from test import * #此处就是设置的全局变量不起作用的原因
132 | #import test
133 | #test.set_a()
134 | #test.a = 1
135 | #print(test.a)


--------------------------------------------------------------------------------
/anack/App/M1808/wechat.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Apr 21 10:08:24 2018
  4 | 
  5 | @author: Administrator
  6 | """
  7 | import time
  8 | import itchat
  9 | from protocol import *
 10 | #import imp #防止重复调用导致全局变量设置无效
 11 | #try:
 12 | #    imp.find_module('protocol')
 13 | #    found = True
 14 | #    print('arleady imported protocol')
 15 | #except ImportError:
 16 | #    from protocol import *
 17 | '''
 18 | 1. 给单个人发消息
 19 | 2. 给指定群发消息
 20 | 3. 实现消息注册
 21 | 4. 显示当前可用的群聊
 22 | '''
 23 | def WechatLogin():
 24 |     itchat.auto_login(hotReload=True)
 25 |     
 26 | def SendText2Friend(msg,nick_name='filehelper'): #已经测试成功，可用
 27 |     '''
 28 |     @ 发送文本消息给指定好友，如果不指定nick_name则发送给自己的文件助手
 29 |     '''
 30 |     if nick_name == 'filehelper':
 31 |         itchat.send(msg,toUserName = 'filehelper')
 32 |     else:
 33 |         
 34 |         users = itchat.search_friends(name=nick_name)
 35 | #        print(users)
 36 |         who = users[0]['UserName']
 37 | #        print(who)
 38 |         itchat.send(msg,toUserName = who)
 39 |     
 40 | def SendText2ChatRoom(context, name):
 41 |     '''
 42 |     @ 发送消息到特定群聊内
 43 |     @ 备注：1.确定该群聊存在（可调用PrintChatRoomList查看）
 44 |     @      2.切记把群聊加入通讯录，否则只能显示活跃的前几个群聊
 45 |     '''
 46 |     itchat.get_chatrooms(update=True)
 47 |     iRoom = itchat.search_chatrooms(name)
 48 |     for room in iRoom:
 49 |         if room['NickName'] == name:
 50 |             userName = room['UserName']
 51 |             break
 52 |     try:
 53 |         itchat.send_msg(context, userName)
 54 |     except:
 55 |         print('warning: no this chatrooms')
 56 |         
 57 | def PrintChatRoomList():
 58 |     '''
 59 |     @ 显示当前可见的群聊名
 60 |     '''
 61 |     rooms = itchat.get_chatrooms(update=True)
 62 |     for s in rooms:
 63 |         print(s['NickName'])
 64 |     
 65 | @itchat.msg_register('Text',isGroupChat = True)#群回复
 66 | def text_reply(msg):
 67 | #    msg.user.send('%s: %s' % (msg.type, msg.text))  #终于发出消息了
 68 |     who = msg['ActualNickName']    #获取发送人的名称
 69 |     content = msg['Text']
 70 |     print(who,'call me')
 71 |     if content == 'logout' or content == 'quit' or content == 'exit':
 72 |         itchat.logout()
 73 |         return
 74 |     ### 发送内容有三种方式：给自己、给别人、给群聊(示例程序),测试成功
 75 | #    if who == '尹超': 
 76 | #        SendText2Friend('yc send')    #给自己(文件助手)
 77 | #        SendTxet2ChatRoom('yc send','啊啊啊') #给指定群聊
 78 | #    else:
 79 | #        SendText2Friend('ali send','阿狸')   #给指定的人
 80 | #        SendTxet2ChatRoom('ali send','啊啊啊') #给指定群聊
 81 |         
 82 |     #-------------------------------------------------
 83 |     authority = ['尹超','徐抒田','李航','李繁','鹏','顾秋杨']
 84 | #    if who in authority: #此处有bug，自己先发送的话who为空，必须别人先发信息
 85 |     if 1:
 86 | #        print(content)
 87 |         result = ATDecoder(content)
 88 | #        print(result)
 89 |         if result != None:
 90 |             SendText2ChatRoom(result,'啊啊啊') #给指定群聊
 91 | #    else:
 92 | #        print('no reply')
 93 |     #-------------------------------------------------------------------    
 94 |     time.sleep(1)
 95 | ########################################################################
 96 | #WechatLogin()
 97 | #SendText2Friend('test')
 98 | #SendText2Friend('test','阿狸')
 99 | #SendTxet2ChatRoom('test','啊啊啊')
100 | #itchat.run()
101 | 


--------------------------------------------------------------------------------
/anack/App/M1808/命令示例.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/M1808/命令示例.txt


--------------------------------------------------------------------------------
/anack/App/StockAnalyser.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*- 


--------------------------------------------------------------------------------
/anack/App/YT_produce_sell.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/YT_produce_sell.py


--------------------------------------------------------------------------------
/anack/App/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/__init__.py


--------------------------------------------------------------------------------
/anack/App/实时推送/App.py:
--------------------------------------------------------------------------------
 1 | from PageDecoder import *
 2 | from StockClass import *
 3 | from PushMessage import *
 4 | import time
 5 | 
 6 | 
 7 | my_interest = ['000651','600660','600887','600377','601012']
 8 | for interest in my_interest:
 9 |     data = GetTotalData(interest)
10 |     istock = stock()
11 |     istock.SetData(data)
12 |     str1 = interest + '.CurPrice = ' + str(istock.CurPrice)
13 |     print(str1)
14 |     push(str1)
15 |     time.sleep(1)
16 | 


--------------------------------------------------------------------------------
/anack/App/实时推送/PageDecoder.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import urllib.request
 3 | 
 4 | base = 'http://hq.sinajs.cn/list='
 5 | bios = 'sh600660'
 6 | 
 7 | def GetTotalData(inputstr):
 8 |     '''
 9 |     input a number serial, ex:600660
10 |     '''
11 |     pattern_id = '\d{6}'
12 |     reobj = re.compile(pattern_id)
13 |     id = reobj.findall(inputstr)
14 |     id = "".join(id)
15 |     page = ''
16 |     if id != '':
17 |         flag = int(id)
18 |         if flag >= 600000:
19 |             bios = 'sh' + id
20 |         else:
21 |             bios = 'sz' + id
22 |         inputstr = base + bios
23 | #        print(inputstr)
24 |         page = urllib.request.urlopen(inputstr).read()
25 | #        print(page)
26 |         if len(page) < 30:
27 |             print('error, invalid id')
28 |             return 0
29 |         s = page[30:]
30 |         s = str(s)
31 |         
32 |         pattern_data = '\d+\.*\d*(?=,)'
33 |         reobj = re.compile(pattern_data)
34 |         data = reobj.findall(s)
35 |         data.pop()
36 |         data.pop()
37 |         
38 |         pattern_data = '\d\d\d\d-\d\d-\d\d'
39 |         reobj = re.compile(pattern_data)
40 |         date = reobj.findall(s)
41 |         data.append(date)
42 |         
43 |         pattern_data = '\d\d:\d\d:\d\d'
44 |         reobj = re.compile(pattern_data)
45 |         time = reobj.findall(s)
46 |         data.append(time)
47 |         data.append(id)
48 |         return data
49 |     else:
50 |         print('invalid id')
51 |         return 0
52 | 


--------------------------------------------------------------------------------
/anack/App/实时推送/PushMessage.py:
--------------------------------------------------------------------------------
 1 | # Author:YinChao
 2 | # Date:2017-12-21
 3 | # ver:V0.1
 4 | 
 5 | import pycurl,json
 6 | 
 7 | def push(str):
 8 |     appID = "59edd424a4c48aee80d6dd4a"
 9 |     appSecret = "05cc2a44d97e361f14d28c0ab8ff4acd"
10 |     pushEvent = "DoorAlert"
11 |     pushMessage = str
12 | 
13 |     c = pycurl.Curl()
14 |     c.setopt(c.URL, 'https://api.instapush.im/v1/post')
15 |     c.setopt(c.HTTPHEADER,['x-instapush-appid:' + appID,'x-instapush-appsecret:' + appSecret, 'Content-Type:application/json'])
16 | 
17 |     json_fields = {}
18 |     json_fields['event'] = pushEvent
19 |     json_fields['trackers'] = {}
20 |     json_fields['trackers']['message'] = pushMessage
21 | 
22 |     postfields = json.dumps(json_fields)
23 | 
24 |     c.setopt(c.POSTFIELDS, postfields)
25 | 
26 |     c.perform()
27 | 
28 |     c.close()
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/anack/App/实时推送/StockClass.py:
--------------------------------------------------------------------------------
  1 | #from DistinguishData import DataType
  2 | 
  3 | class stock:
  4 |     '''
  5 | # 参数定义
  6 | # ID            代码             600660      由单独变量给出
  7 | # 0 TdyOpen       今开盘价                     由List变量给出
  8 | # 1 YdyClose      昨天收盘
  9 | # 2 CurPrice      现价
 10 | # 3 HighPrice     最高价
 11 | # 4 LowPrice      最低价
 12 | # 5 CurBuyPrice   竞买价
 13 | # 6 CurSellPrice  竞卖价
 14 | # 7 CurQuantity   成交量
 15 | # 8 CurMoney      成交额
 16 | # 9 Buy1_quant    买一数量
 17 | # 10Buy1_price    买一报价
 18 | # 11Buy2_quant    买一数量
 19 | # 12Buy2_price    以此类推。。。
 20 | # 13Buy3_quant
 21 | # 14Buy3_price
 22 | # 15Buy4_quant
 23 | # 16Buy4_price
 24 | # 17Buy5_quant
 25 | # 18Buy5_price
 26 | # 19Sell1_quant
 27 | # 20Sell1_price
 28 | # 21Sell2_quant
 29 | # 22Sell2_price
 30 | # 23Sell3_quant
 31 | # 24Sell3_price
 32 | # 25Sell4_quant
 33 | # 26Sell4_price
 34 | # 27Sell5_quant
 35 | # 28Sell5_price
 36 |     '''
 37 |     def SetData(self, ldata):
 38 |         '''
 39 |         一次性设置所有的信息
 40 |         :param id: 股票代码，{'gldq',000651}
 41 |         :param lista: 输入结构体 只能通过正则表达式获得
 42 |         :return:无
 43 |         '''
 44 |         if ldata == 0:
 45 |             return 0
 46 |         self.TdyOpen = float(ldata[0])
 47 |         self.YdyClose = float(ldata[1])
 48 |         self.CurPrice = float(ldata[2])
 49 |         self.HighPrice = float(ldata[3])
 50 |         self.LowPrice = float(ldata[4])
 51 |         self.CurBuyPrice = float(ldata[5])
 52 |         self.CurSellPrice = float(ldata[6])
 53 |         self.CurQuantity = int(ldata[7])/1000000
 54 |         self.CurMoney = float(ldata[8])/100000000
 55 |         self.Buy1_quant = int(int(ldata[9])/100)
 56 |         self.Buy1_price = float(ldata[10])
 57 |         self.Buy2_quant = int(int(ldata[11])/100)
 58 |         self.Buy2_price = float(ldata[12])
 59 |         self.Buy3_quant = int(int(ldata[13])/100)
 60 |         self.Buy3_price = float(ldata[14])
 61 |         self.Buy4_quant = int(int(ldata[15])/100)
 62 |         self.Buy4_price = float(ldata[16])
 63 |         self.Buy5_quant = int(int(ldata[17])/100)
 64 |         self.Buy5_price = float(ldata[18])
 65 |         self.Sell1_quant = int(int(ldata[19])/100)
 66 |         self.Sell1_price = float(ldata[20])
 67 |         self.Sell2_quant = int(int(ldata[21])/100)
 68 |         self.Sell2_price = float(ldata[22])
 69 |         self.Sell3_quant = int(int(ldata[23])/100)
 70 |         self.Sell3_price = float(ldata[24])
 71 |         self.Sell4_quant = int(int(ldata[25])/100)
 72 |         self.Sell4_price = float(ldata[26])
 73 |         self.Sell5_quant = int(int(ldata[27])/100)
 74 |         self.Sell5_price = float(ldata[28])
 75 |         self.date = ldata[29]
 76 |         self.time = ldata[30]
 77 |         self.id = ldata[31]
 78 | 
 79 |     def PrintAllData(self):
 80 |         '''
 81 |         一次性打印所有信息（仅用于调试）
 82 |         :return:
 83 |         '''
 84 |         print('ID:\t'+self.id)
 85 |         print('今开:\t'+str(self.TdyOpen))
 86 |         print('昨收:\t'+str(self.YdyClose))
 87 |         print('现价:\t'+str(self.CurPrice))
 88 |         print('最高价:\t'+str(self.HighPrice))
 89 |         print('最低价:\t'+str(self.LowPrice))
 90 |         print('竞买:\t'+str(self.CurBuyPrice))
 91 |         print('竞卖:\t'+str(self.CurSellPrice))
 92 |         print('成交量(万手):\t'+str(self.CurQuantity))
 93 |         print('成交额(亿元):\t'+str(self.CurMoney))
 94 |         print('买一/手:\t'+str(self.Buy1_quant))
 95 |         print('买一/价:\t'+str(self.Buy1_price))
 96 |         print('买二/手:\t'+str(self.Buy2_quant))
 97 |         print('买二/价:\t'+str(self.Buy2_price))
 98 |         print('买三/手:\t'+str(self.Buy3_quant))
 99 |         print('买三/价:\t'+str(self.Buy3_price))
100 |         print('买四/手:\t'+str(self.Buy4_quant))
101 |         print('买四/价:\t'+str(self.Buy4_price))
102 |         print('买五/手:\t'+str(self.Buy5_quant))
103 |         print('买五/价:\t'+str(self.Buy5_price))
104 |         print('卖一/手:\t'+str(self.Sell1_quant))
105 |         print('卖一/价:\t'+str(self.Sell1_price))
106 |         print('卖二/手:\t'+str(self.Sell2_quant))
107 |         print('卖二/价:\t'+str(self.Sell2_price))
108 |         print('卖三/手:\t'+str(self.Sell3_quant))
109 |         print('卖三/价:\t'+str(self.Sell3_price))
110 |         print('卖四/手:\t'+str(self.Sell4_quant))
111 |         print('卖四/价:\t'+str(self.Sell4_price))
112 |         print('卖五/手:\t'+str(self.Sell5_quant))
113 |         print('卖五/价:\t'+str(self.Sell5_price))
114 |         print(self.date)
115 |         print(self.time)
116 | 
117 |     def RiseRate(self):
118 |         '''
119 |         获取股票实时涨幅
120 |         :return:
121 |         '''
122 |         rate = (self.CurPrice - self.YdyClose)/self.YdyClose * 100
123 |         rate = round(rate,2)
124 |         return rate
125 | 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/anack/App/实时推送/使用说明.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/App/实时推送/使用说明.txt


--------------------------------------------------------------------------------
/anack/SQL/StockSelector.py:
--------------------------------------------------------------------------------
 1 | # This tools used to update file "感兴趣的个股列表.txt"
 2 | # syntax:
 3 | # first line:   update time:<date>
 4 | # seconde line: total:<number of items>
 5 | # other line:   <id>/t<name>  
 6 | #
 7 | # eg:
 8 | # update time:2018/3/4
 9 | # total:33
10 | # 000651  格力电器
11 | # ... other 32 items
12 | 
13 | # -*- coding: utf-8 -*-
14 | import pandas as pd
15 | 
16 | #------------------------------------------------------------------------------
17 | # change here
18 | # 用于筛选个股的各项参数
19 | # 筛股逻辑：
20 | # 1. 初筛：调用ts.get_stock_basics()即可
21 | # 动态市盈率60以下，日成交量大于1亿，市值大于100亿，收入同比、净利润率为正
22 | # 2. 仔细筛查：同行比对排名前5，个股历年同比连续增长
23 | # 同行业对比（从大到小排列）：pe倒数前五，毛利率顺数前5。pb排名靠后，利润同比、
24 | # 收入同比排名靠前
25 | # 自己同比：现金流为正、利润同比有增长
26 | # 
27 | parameter = []
28 | pe = 50
29 | pb = 
30 | and so on ...
31 | #------------------------------------------------------------------------------
32 | 
33 | def update_interest_list():
34 |   '''
35 |   根据指定的逻辑遍历A股，找出符合条件的个股，更新“感兴趣的个股列表.txt”文件，
36 |   同时以列表形式返回
37 |   '''
38 |   
39 |   return interest_list
40 |   
41 | def get_interest_list():
42 |     '''
43 |     解析"感兴趣的个股列表.txt",返回list类型的数据供其他模块使用
44 |     '''
45 |     list_id = []
46 |     with open('yourpath/感兴趣的个股列表.txt','r') as fh:
47 |         s = fh.readline()   #获取更新时间
48 |         s = fh.readline()   #获取目标长度  
49 |         
50 |         lines = fh.readlines()  #获取目标内容
51 |     for s in lines:
52 |         code = s[:6]
53 |         list_id.append(code)    
54 |     list_id.sort()
55 |     return list_id  


--------------------------------------------------------------------------------
/anack/SQL/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/SQL/__init__.py


--------------------------------------------------------------------------------
/anack/SQL/classify_to_sql.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Mar  4 14:21:14 2018
 4 | 
 5 | @author: Administrator
 6 | """
 7 | import pandas as pd
 8 | import tushare as ts
 9 | from SQL.sql import pymysql_connect
10 | from SQL.sql import df_to_mysql
11 | 
12 | def create_classify_table():
13 |     db = pymysql_connect()
14 |     cursor = db.cursor()
15 |     cursor.execute('DROP TABLE IF EXISTS anack_classify') 
16 |     classify = """CREATE TABLE IF NOT EXISTS `anack_classify` (
17 |                 `code`    varchar(255) DEFAULT NULL,
18 |                 `name`    varchar(255) DEFAULT NULL,
19 |                 `industry`    varchar(255) DEFAULT NULL,
20 |                 `area`    varchar(255) DEFAULT NULL,
21 |                 `sz50`    varchar(255) DEFAULT NULL,
22 |                 `hs300_weight`    FLOAT(10) DEFAULT NULL,
23 |                 `zz500_weight`    FLOAT(10) DEFAULT NULL
24 |                 ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
25 |     cursor.execute(classify)
26 |     db.commit()
27 |     cursor.close()
28 |     db.close()
29 |     
30 | def classify_info_to_sql():
31 |     create_classify_table()    
32 |     
33 |     a = ts.get_industry_classified()
34 |     a.columns = ['code', 'name', 'industry']
35 |     b = ts.get_area_classified()
36 |     c = ts.get_sz50s()
37 |     c = c.iloc[:,1::]
38 |     c['sz50'] = '1'
39 |     d = ts.get_hs300s()
40 |     d = d.iloc[:,1::]
41 |     d.columns = ['code','name','hs300_weight']
42 |     e = ts.get_zz500s()
43 |     e = e.iloc[:,1::]
44 |     e.columns = ['code','name','zz500_weight']
45 |     result = pd.merge(a, b, how='left', on=None, left_on=None, right_on=None,
46 |              left_index=False, right_index=False, sort=True,
47 |              suffixes=('_x', '_y'), copy=True, indicator=False)
48 |     result = pd.merge(result, c, how='left', on=None, left_on=None, right_on=None,
49 |              left_index=False, right_index=False, sort=True,
50 |              suffixes=('_x', '_y'), copy=True, indicator=False)
51 |     result = pd.merge(result, d, how='left', on=None, left_on=None, right_on=None,
52 |              left_index=False, right_index=False, sort=True,
53 |              suffixes=('_x', '_y'), copy=True, indicator=False)
54 |     result = pd.merge(result, e, how='left', on=None, left_on=None, right_on=None,
55 |              left_index=False, right_index=False, sort=True,
56 |              suffixes=('_x', '_y'), copy=True, indicator=False)
57 |     df_to_mysql('anack_classify',result)
58 |     
59 | #    -------------------------------------------------------------
60 | classify_info_to_sql()#每次调用都会更新
61 | 


--------------------------------------------------------------------------------
/anack/SQL/glo.py:
--------------------------------------------------------------------------------
 1 | # 本模块实现全局变量
 2 | # -*- coding:utf-8 -*- 
 3 |  
 4 | def _init():
 5 |     global _global_dict
 6 |     _global_dict = {}
 7 | 
 8 | 
 9 | def set_value(key,value):
10 |     _global_dict[key] = value
11 | 
12 | def get_value(key, default_value = None):
13 |     try:
14 |         return _global_dict[key]
15 |     except KeyError:
16 |         return default_value


--------------------------------------------------------------------------------
/anack/SQL/k_data_to_sql.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Mar  4 14:21:14 2018
 4 | 
 5 | @author: Administrator
 6 | """
 7 | 
 8 | import tushare as ts
 9 | from SQL.sql import pymysql_connect
10 | from SQL.sql import df_to_mysql
11 | 
12 | def create_k_table():
13 |     db = pymysql_connect()
14 |     cursor = db.cursor()
15 |     
16 |     sql1 = """CREATE TABLE IF NOT EXISTS `anack_d_k_data` (
17 | `date`    varchar(255) DEFAULT NULL,
18 | `open`    varchar(255) DEFAULT NULL,
19 | `close`    varchar(255) DEFAULT NULL,
20 | `high`    varchar(255) DEFAULT NULL,
21 | `low`    varchar(255) DEFAULT NULL,
22 | `volume`    varchar(255) DEFAULT NULL,
23 | `code`    varchar(255) DEFAULT NULL
24 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
25 |     
26 |     sql2 = """CREATE TABLE IF NOT EXISTS `anack_m_k_data` (
27 | `date`    varchar(255) DEFAULT NULL,
28 | `open`    varchar(255) DEFAULT NULL,
29 | `close`    varchar(255) DEFAULT NULL,
30 | `high`    varchar(255) DEFAULT NULL,
31 | `low`    varchar(255) DEFAULT NULL,
32 | `volume`    varchar(255) DEFAULT NULL,
33 | `code`    varchar(255) DEFAULT NULL
34 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
35 | 
36 |     cursor.execute(sql1)
37 |     cursor.execute(sql2)
38 |     db.commit()
39 |     cursor.close()
40 |     db.close()
41 |     
42 | def k_data(index,mode='D'):
43 | 
44 |     if mode == 'D':
45 |         df_to_mysql('anack_d_k_data',ts.get_k_data(index))
46 |     elif mode == 'M':
47 |         df_to_mysql('anack_m_k_data',ts.get_k_data(index,ktype='M'))
48 |         
49 | #------------------------------------------------------------------------------
50 | #create_k_table()
51 | #k_data('600660') 
52 | #k_data('600660','M')   
53 | 


--------------------------------------------------------------------------------
/anack/SQL/macro_to_sql.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Mar  4 14:21:14 2018
 4 | 
 5 | @author: Administrator
 6 | """
 7 | 
 8 | import pandas as pd
 9 | import pymysql
10 | 
11 | import tushare as ts
12 | from SQL.sql import pymysql_connect
13 | from SQL.sql import df_to_mysql
14 | #    
15 | def create_classify_table():
16 |     db = pymysql_connect()
17 |     cursor = db.cursor()
18 |     cursor.execute('DROP TABLE IF EXISTS anack_macro_data') 
19 |     macro = """CREATE TABLE IF NOT EXISTS `anack_macro_data` (
20 |                 `month`    varchar(255) DEFAULT NULL,
21 |                 `cpi`    varchar(16) DEFAULT NULL,
22 |                 `ppi`    varchar(16) DEFAULT NULL,
23 |                 `m2`    varchar(16) DEFAULT NULL,
24 |                 `m1`    varchar(16) DEFAULT NULL,
25 |                 `m0`    varchar(16) DEFAULT NULL
26 |                 ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
27 |     cursor.execute(macro)
28 |     db.commit()
29 |     cursor.close()
30 |     db.close()
31 |     
32 | def macro_info_to_sql():
33 |     create_classify_table()    
34 |     
35 |     a = ts.get_cpi()
36 |     b = ts.get_ppi()
37 |     c = ts.get_money_supply()
38 |     c = c.iloc[:,[0,1,3,5]]
39 |     b = b.iloc[:,[0,2]]
40 |     result = pd.merge(a, b, how='left', on=None, left_on=None, right_on=None,
41 |              left_index=False, right_index=False, sort=False,
42 |              suffixes=('_x', '_y'), copy=True, indicator=False)
43 |     result = pd.merge(result, c, how='left', on=None, left_on=None, right_on=None,
44 |              left_index=False, right_index=False, sort=False,
45 |              suffixes=('_x', '_y'), copy=True, indicator=False)
46 |     df_to_mysql('anack_macro_data',result)
47 |     
48 |     
49 | #    -------------------------------------------------------------
50 | macro_info_to_sql() #每次调用都会更新


--------------------------------------------------------------------------------
/anack/SQL/sql.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 | import pymysql
 3 | from sqlalchemy import create_engine
 4 | #import glo
 5 | #
 6 | #glo._init()
 7 | 
 8 | hosts = ''
 9 | users = ''
10 | passwds = ''
11 | databases = ''
12 | 
13 | #glo.set_value('host',host)
14 | #glo.set_value('user',user)
15 | #glo.set_value('passwd',passwd)
16 | #glo.set_value('database',database)
17 | #glo.set_value('charset','utf8')
18 | def pymysql_connect():
19 |   return pymysql.connect(
20 |   host=hosts,
21 |   database=databases,
22 |   user=users,
23 |   password=passwds,
24 |   port=3306,
25 |   charset='utf8'
26 |  )
27 | def connect_sql():
28 |     return create_engine("mysql+pymysql://"+ users + ":"+ passwds + "@" + hosts + ":3306/" + databases + "?charset=utf8")
29 | 
30 | def df_to_mysql(table,df):
31 |     connect = connect_sql()
32 |     df.to_sql(name=table,con=connect,if_exists='append',index=False,index_label=False)
33 | 


--------------------------------------------------------------------------------
/anack/SQL/update.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 本模块用于更新数据库
 3 | '''
 4 | from SQL.classify_to_sql import classify_info_to_sql
 5 | from SQL.macro_to_sql import macro_info_to_sql
 6 | from SQL.k_data_to_sql import create_k_table
 7 | from SQL.k_data_to_sql import k_data
 8 | 
 9 |   
10 | def get_interest_list(filename):
11 |     '''
12 |     解析"感兴趣的个股列表.txt",返回list类型的数据供其他模块使用
13 |     '''
14 |     list_id = []
15 |     with open(filename,'r') as fh:
16 |         s = fh.readline()   #获取更新时间
17 |         s = fh.readline()   #获取目标长度  
18 |         
19 |         lines = fh.readlines()  #获取目标内容
20 |     for s in lines:
21 |         code = s[:6]
22 |         list_id.append(code)    
23 |     list_id.sort()
24 |     return list_id  
25 |     
26 | def sql_update():
27 |     classify_info_to_sql()    #update classify data
28 |     
29 |     macro_info_to_sql()       #update macro data
30 |     
31 |     lls = []                    #update k_data, both day and month
32 |     lls = get_interest_list()
33 |     create_k_table()
34 |     for l in lls:
35 |         k_data(l) 
36 |         k_data(l,'M') 
37 | 
38 |     # update finnance data here...
39 |     # 在代码执行路径自动生成输入路径
40 | 
41 |     column_interest = ['货币资金','应收账款','存货','流动资产合计','固定资产净额','无形资产','资产总计','短期借款','预收款项','流动负债合计','长期借款','一年内到期的非流动负债','负债合计','盈余公积','所有者权益(或股东权益)合计']
42 |     for i in lls:
43 |         try:
44 |             cbfx = f.crawling_finance(path,i,column_interest)
45 |             cbfx.crawling_update()
46 |             f.Data_extract_balance()
47 |         except:
48 |             print(i)
49 | #------------------------------------------------------------------------------
50 | #sql_update()  #一条更新语句完成所有事情      
51 | 


--------------------------------------------------------------------------------
/anack/SQL/感兴趣的个股列表.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/SQL/感兴趣的个股列表.txt


--------------------------------------------------------------------------------
/anack/Tushare/basic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 | import tushare as ts
 3 | def info():
 4 |     print('本模块用于获取实时交易信息')
 5 |     print('k_day 获取个股的K线图')
 6 |     print('k_today 获取当日所有股票的K线图')
 7 |     print('index 获取今日指数信息')
 8 |     print('ddjy 获取指定日期下的大单交易信息')
 9 |     
10 | def k_day(index,mode='D'):
11 | 
12 |     if mode == 'D':
13 |         return ts.get_k_data(index)
14 |     elif mode == 'M':
15 |         return ts.get_k_data(index,ktype='M')
16 | 
17 | def k_today():
18 | 
19 |     return ts.get_today_all()
20 | 
21 | def index():
22 | 
23 |     return ts.get_index()
24 | 
25 | def ddjy(id,time,hand=400):
26 | 
27 |     return ts.get_sina_dd(id, date=time, vol=hand)
28 | 
29 | 


--------------------------------------------------------------------------------
/anack/Tushare/classify.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 | import tushare as ts
 3 | 
 4 | def info():
 5 |     '''
 6 |     本模块用于获取分类信息
 7 |     '''
 8 |     print('本模块用于获取分类信息')
 9 |     print('industry 行业分类数据')
10 |     print('concept 概念分类数据')
11 |     print('area 地域分类数据')
12 |     print('zxb 中小板列表')
13 |     print('cyb 创业板列表')
14 |     print('st ST列表')
15 |     print('hs300 沪深300列表')
16 |     print('sz50 上证50列表')
17 |     print('zz500 中证500列表')
18 |     
19 | def industry():
20 |     return ts.get_industry_classified()
21 |   
22 | def concept():
23 |     return ts.get_concept_classified()
24 |   
25 | def area():
26 |     return ts.get_area_classified()
27 |   
28 | def zxb():
29 |     return ts.get_sme_classified()
30 |   
31 | def cyb():
32 |     return ts.get_gme_classified()
33 |   
34 | def st():
35 |     return ts.get_st_classified()
36 |   
37 | def hs300():
38 |     return ts.get_hs300s()
39 |   
40 | def sz50():
41 |     return ts.get_sz50s()
42 |   
43 | def zz500():
44 |     return ts.get_zz500s()


--------------------------------------------------------------------------------
/anack/Tushare/finance.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 | import tushare as ts
 3 | def info():
 4 |   '''
 5 |   '''
 6 |     print('本模块用于获取基本面信息')
 7 |     print('basic_info 获取股票列表')
 8 |     print('finance_report 所有季度报表')
 9 |     print('area 地域分类数据')
10 |     print('zxb 中小板列表')
11 |     print('cyb 创业板列表')
12 |     print('st ST列表')
13 |     print('hs300 沪深300列表')
14 |     print('sz50 上证50列表')
15 |     print('zz500 中证500列表')
16 |   
17 | def basic_info():
18 |     '''
19 |     获取股票列表
20 |     '''
21 |     return ts.get_stock_basics()
22 | 
23 | def finance_report(year, month):
24 |     '''
25 |     季度报主表
26 |     '''
27 |     return ts.get_report_data(year,month)
28 | 
29 | def profit(year, month):
30 |     return ts.get_profit_data(year, month)
31 |     
32 | def cashflow(year, month):
33 |     return ts.get_cashflow_data(year, month)
34 | 


--------------------------------------------------------------------------------
/anack/Tushare/information.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 |     
 3 | import tushare as ts
 4 | 
 5 | 
 6 | def info():
 7 |     '''
 8 |     
 9 |     '''
10 |     print('本模块用于输出各种消息')
11 |     print('fund_holdings 基金持股')
12 |     print('forecast_info 业绩预告')
13 |     print('xsg_info 限售股信息')
14 |     
15 | def fund_holdings(year,month):
16 |     '''
17 |     基金持股消息披露
18 |     year:年
19 |     month：季度    只可取【1,2,3,4】
20 |     '''
21 |     try:
22 |         return ts.fund_holdings(year,month)
23 |     except:
24 |         print('error, month=[1,4], please check your parameter')
25 |         
26 | def forecast_info(year,month):
27 |     '''
28 |     业绩预告
29 |     '''
30 |     try:
31 |         return ts.forecast_data(year,month)
32 |     except:
33 |         print('error, month=[1,4], please check your parameter')
34 |         
35 | def xsg_info():
36 |     '''
37 |     限售股信息
38 |     '''
39 |     return ts.xsg_data()


--------------------------------------------------------------------------------
/anack/Tushare/macro.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 | import tushare as ts
 3 | 
 4 | def info():
 5 |     '''
 6 |     
 7 |     '''
 8 |     print('本模块用于获取宏观经济数据')
 9 |     print('deposit 存款利率一览表')
10 |     print('loan 贷款利率一览表')
11 |     print('rrr 存款准备金率')
12 |     print('money_supply 货币供应量')
13 |     print('gdp 国内生产总值')
14 |     print('cpi 居民消费价格指数')
15 |     print('ppi 工业品出厂价格指数')
16 |     print('gdp_contribute 三大产业对GDP的贡献率')
17 |     
18 | def deposit():
19 |   return ts.get_deposit_rate()
20 |   
21 | def loan():
22 |   return ts.get_loan_rate()
23 |   
24 | def rrr():
25 |   return ts.get_rrr()
26 |   
27 | def money_supply():
28 |   return ts.get_money_supply()
29 |   
30 | def gdp():
31 |   return ts.get_gdp_year()
32 |   
33 | def cpi():
34 |   return ts.get_cpi()
35 |   
36 | def ppi():
37 |   return ts.get_ppi()
38 |   
39 | def gdp_contribute():
40 |   return ts.get_gdp_contrib()
41 |   


--------------------------------------------------------------------------------
/anack/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack/__init__.py


--------------------------------------------------------------------------------
/anack/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*- 
 2 | 
 3 | import App.IndustryEstimation
 4 | from SQL.sql import pymysql_connect
 5 | from SQL.update import get_interest_list
 6 | import App.IndustryEstimation_detail
 7 | import App.Detail_Stock_Selector
 8 | 
 9 | industry_check = []
10 | dbconn=pymysql_connect()
11 | filename = './SQL/感兴趣的个股列表.txt'
12 | stock_code_num = ['600000' ,'600004' ,'600005' ,'600006' ,'600007' ,'600008' ,'600009' ,'600010' ,'600011' ,'600012' ,'600015' ,'600016' ,'600017' ,'600018' ,'600019' ,'600020' ,'600021' ,'600022' ,'600026' ,'600027' ,'600028' ,'600029' ,'600030' ,'600031' ,'600033' ,'600035' ,'600036' ,'600037' ,'600038' ,'600039' ,'600048' ,'600050' ,'600051' ,'600052' ,'600053' ,'600054' ,'600055' ,'600056' ,'600058' ,'600059' ,'600060' ,'600061' ,'600062' ,'600063' ,'600064' ,'600066' ,'600067' ,'600068' ,'600069' ,'600070' ,'600071' ,'600072' ,'600073' ,'600074' ,'600075' ,'600076' ,'600077' ,'600078' ,'600079' ,'600080' ,'600081' ,'600082' ,'600083' ,'600084' ,'600085' ,'600086' ,'600087' ,'600088' ,'600089' ,'600090' ,'600091' ,'600093' ,'600095' ,'600096' ,'600097' ,'600098' ,'600099' ,'600100' ,'600101' ,'600102' ,'600103' ,'600104' ,'600105' ,'600106' ,'600107' ,'600108' ,'600109' ,'600110' ,'600111' ,'600112' ,'600113' ,'600114' ,'600115' ,'600116' ,'600117' ,'600118' ,'600119' ,'600120' ,'600121' ,'600122' ,'600123' ,'600125' ,'600126' ,'600127' ,'600128' ,'600129' ,'600130' ,'600131' ,'600132' ,'600133' ,'600135' ,'600136' ,'600137' ,'600138' ,'600139' ,'600141' ,'600143' ,'600145' ,'600146' ,'600148' ,'600149' ,'600150' ,'600151' ,'600152' ,'600153' ,'600155' ,'600156' ,'600157' ,'600158' ,'600159' ,'600160' ,'600161' ,'600162' ,'600163' ,'600165' ,'600166' ,'600167' ,'600168' ,'600169' ,'600170' ,'600171' ,'600172' ,'600173' ,'600175' ,'600176' ,'600177' ,'600178' ,'600179' ,'600180' ,'600182' ,'600183' ,'600184' ,'600185' ,'600186' ,'600187' ,'600188' ,'600189' ,'600190' ,'600191' ,'600192' ,'600193' ,'600195' ,'600196' ,'600197' ,'600198' ,'600199' ,'600200' ,'600201' ,'600202' ,'600203' ,'600206' ,'600207' ,'600208' ,'600209' ,'600210' ,'600211' ,'600212' ,'600213' ,'600215' ,'600216' ,'600217' ,'600218' ,'600219' ,'600220' ,'600221' ,'600222' ,'600223' ,'600225' ,'600226' ,'600227' ,'600228' ,'600229' ,'600230' ,'600231' ,'600232' ,'600233' ,'600234' ,'600235' ,'600236' ,'600237' ,'600238' ,'600239' ,'600240' ,'600241' ,'600242' ,'600243' ,'600246' ,'600247' ,'600248' ,'600249' ,'600250' ,'600251' ,'600252' ,'600253' ,'600255' ,'600256' ,'600257' ,'600258' ,'600259' ,'600260' ,'600261' ,'600262' ,'600263' ,'600265' ,'600266' ,'600267' ,'600268' ,'600269' ,'600270' ,'600271' ,'600272' ,'600273' ,'600275' ,'600276' ,'600277' ,'600278' ,'600279' ,'600280' ,'600281' ,'600282' ,'600283' ,'600284' ,'600285' ,'600287' ,'600288' ,'600289' ,'600290' ,'600291' ,'600292' ,'600293' ,'600295' ,'600297' ,'600298' ,'600299' ,'600300' ,'600301' ,'600302' ,'600303' ,'600305' ,'600306' ,'600307' ,'600308' ,'600309' ,'600310' ,'600311' ,'600312' ,'600313' ,'600315' ,'600316' ,'600317' ,'600318' ,'600319' ,'600320' ,'600321' ,'600322' ,'600323' ,'600325' ,'600326' ,'600327' ,'600328' ,'600329' ,'600330' ,'600331' ,'600332' ,'600333' ,'600335' ,'600336' ,'600337' ,'600338' ,'600339' ,'600340' ,'600343' ,'600345' ,'600346' ,'600348' ,'600350' ,'600351' ,'600352' ,'600353' ,'600354' ,'600355' ,'600356' ,'600358' ,'600359' ,'600360' ,'600361' ,'600362' ,'600363' ,'600365' ,'600366' ,'600367' ,'600368' ,'600369' ,'600370' ,'600371' ,'600373' ,'600375' ,'600376' ,'600377' ,'600378' ,'600379' ,'600380' ,'600381' ,'600382' ,'600383' ,'600385' ,'600386' ,'600387' ,'600388' ,'600389' ,'600390' ,'600391' ,'600392' ,'600393' ,'600395' ,'600396' ,'600397' ,'600398' ,'600399' ,'600400' ,'600403' ,'600405' ,'600406' ,'600408' ,'600409' ,'600410' ,'600415' ,'600416' ,'600418' ,'600419' ,'600420' ,'600421' ,'600422' ,'600423' ,'600425' ,'600426' ,'600428' ,'600429' ,'600432' ,'600433' ,'600435' ,'600436' ,'600438' ,'600439' ,'600444' ,'600446' ,'600448' ,'600449' ,'600452' ,'600455' ,'600456' ,'600458' ,'600459' ,'600460' ,'600461' ,'600462' ,'600463' ,'600466' ,'600467' ,'600468' ,'600469' ,'600470' ,'600475' ,'600476' ,'600477' ,'600478' ,'600479' ,'600480' ,'600481' ,'600482' ,'600483' ,'600485' ,'600486' ,'600487' ,'600488' ,'600489' ,'600490' ,'600491' ,'600493' ,'600495' ,'600496' ,'600497' ,'600498' ,'600499' ,'600500' ,'600501' ,'600502' ,'600503' ,'600505' ,'600506' ,'600507' ,'600508' ,'600509' ,'600510' ,'600511' ,'600512' ,'600513' ,'600515' ,'600516' ,'600517' ,'600518' ,'600519' ,'600520' ,'600521' ,'600522' ,'600523' ,'600525' ,'600526' ,'600527' ,'600528' ,'600529' ,'600530' ,'600531' ,'600532' ,'600533' ,'600535' ,'600536' ,'600537' ,'600538' ,'600539' ,'600540' ,'600543' ,'600545' ,'600546' ,'600547' ,'600548' ,'600549' ,'600550' ,'600551' ,'600552' ,'600553' ,'600555' ,'600557' ,'600558' ,'600559' ,'600560' ,'600561' ,'600562' ,'600563' ,'600565' ,'600566' ,'600567' ,'600568' ,'600569' ,'600570' ,'600571' ,'600572' ,'600573' ,'600575' ,'600576' ,'600577' ,'600578' ,'600579' ,'600580' ,'600581' ,'600582' ,'600583' ,'600584' ,'600585' ,'600586' ,'600587' ,'600588' ,'600589' ,'600590' ,'600592' ,'600593' ,'600594' ,'600595' ,'600596' ,'600597' ,'600598' ,'600599' ,'600600' ,'600601' ,'600602' ,'600603' ,'600604' ,'600605' ,'600606' ,'600608' ,'600609' ,'600610' ,'600611' ,'600612' ,'600613' ,'600614' ,'600615' ,'600616' ,'600617' ,'600618' ,'600619' ,'600620' ,'600621' ,'600622' ,'600623' ,'600624' ,'600626' ,'600628' ,'600629' ,'600630' ,'600631' ,'600633' ,'600634' ,'600635' ,'600636' ,'600637' ,'600638' ,'600639' ,'600640' ,'600641' ,'600642' ,'600643' ,'600644' ,'600645' ,'600647' ,'600648' ,'600649' ,'600650' ,'600651' ,'600652' ,'600653' ,'600654' ,'600655' ,'600656' ,'600657' ,'600658' ,'600660' ,'600661' ,'600662' ,'600663' ,'600664' ,'600665' ,'600666' ,'600667' ,'600668' ,'600671' ,'600673' ,'600674' ,'600675' ,'600676' ,'600677' ,'600678' ,'600679' ,'600680' ,'600682' ,'600683' ,'600684' ,'600685' ,'600686' ,'600687' ,'600688' ,'600689' ,'600690' ,'600691' ,'600692' ,'600693' ,'600694' ,'600695' ,'600696' ,'600697' ,'600698' ,'600699' ,'600701' ,'600702' ,'600703' ,'600704' ,'600706' ,'600707' ,'600708' ,'600710' ,'600711' ,'600712' ,'600713' ,'600714' ,'600715' ,'600716' ,'600717' ,'600718' ,'600719' ,'600720' ,'600721' ,'600722' ,'600723' ,'600724' ,'600725' ,'600726' ,'600727' ,'600728' ,'600729' ,'600730' ,'600731' ,'600732' ,'600733' ,'600734' ,'600735' ,'600736' ,'600737' ,'600738' ,'600739' ,'600740' ,'600741' ,'600742' ,'600743' ,'600744' ,'600745' ,'600746' ,'600747' ,'600748' ,'600749' ,'600750' ,'600751' ,'600753' ,'600754' ,'600755' ,'600756' ,'600757' ,'600758' ,'600759' ,'600760' ,'600761' ,'600763' ,'600764' ,'600765' ,'600766' ,'600767' ,'600768' ,'600769' ,'600770' ,'600771' ,'600773' ,'600774' ,'600775' ,'600776' ,'600777' ,'600778' ,'600779' ,'600780' ,'600781' ,'600782' ,'600783' ,'600784' ,'600785' ,'600787' ,'600789' ,'600790' ,'600791' ,'600792' ,'600793' ,'600794' ,'600795' ,'600796' ,'600797' ,'600798' ,'600800' ,'600801' ,'600802' ,'600803' ,'600804' ,'600805' ,'600806' ,'600807' ,'600808' ,'600809' ,'600810' ,'600811' ,'600812' ,'600814' ,'600815' ,'600816' ,'600817' ,'600818' ,'600819' ,'600820' ,'600821' ,'600822' ,'600823' ,'600824' ,'600825' ,'600826' ,'600827' ,'600828' ,'600829' ,'600830' ,'600831' ,'600832' ,'600833' ,'600834' ,'600835' ,'600836' ,'600837' ,'600838' ,'600839' ,'600841' ,'600843' ,'600844' ,'600845' ,'600846' ,'600847' ,'600848' ,'600850' ,'600851' ,'600853' ,'600854' ,'600855' ,'600856' ,'600857' ,'600858' ,'600859' ,'600860' ,'600861' ,'600862' ,'600863' ,'600864' ,'600865' ,'600866' ,'600867' ,'600868' ,'600869' ,'600871' ,'600872' ,'600873' ,'600874' ,'600875' ,'600876' ,'600877' ,'600879' ,'600880' ,'600881' ,'600882' ,'600883' ,'600884' ,'600885' ,'600886' ,'600887' ,'600888' ,'600889' ,'600890' ,'600891' ,'600892' ,'600893' ,'600894' ,'600895' ,'600896' ,'600897' ,'600898' ,'600900' ,'600960' ,'600961' ,'600962' ,'600963' ,'600965' ,'600966' ,'600967' ,'600969' ,'600970' ,'600971' ,'600973' ,'600975' ,'600976' ,'600978' ,'600979' ,'600980' ,'600981' ,'600982' ,'600983' ,'600984' ,'600985' ,'600986' ,'600987' ,'600988' ,'600990' ,'600991' ,'600992' ,'600993' ,'600995' ,'600997' ,'600999' ,'601001' ,'601002' ,'601003' ,'601005' ,'601006' ,'601007' ,'601008' ,'601009' ,'601088' ,'601099' ,'601106' ,'601107' ,'601111' ,'601117' ,'601139' ,'601166' ,'601168' ,'601169' ,'601179' ,'601186' ,'601268' ,'601299' ,'601318' ,'601328' ,'601333' ,'601390' ,'601398' ,'601588' ,'601600' ,'601601' ,'601607' ,'601618' ,'601628' ,'601666' ,'601668' ,'601678' ,'601688' ,'601699' ,'601727' ,'601766' ,'601788' ,'601801' ,'601808' ,'601857' ,'601866' ,'601872' ,'601877' ,'601888' ,'601898' ,'601899' ,'601918' ,'601919' ,'601939' ,'601958' ,'601988' ,'601989' ,'601991' ,'601998' ,'601999' ,'000958' ,'601188' ,'601518']
13 | 
14 | 
15 | 
16 | 
17 | # =============================================================================
18 | # #行业平均数据
19 | # App.IndustryEstimation.CreateTable() #此处开启则清空此前所有内容
20 | # for stock_id in get_interest_list(filename):
21 | #     name = App.IndustryEstimation.GetIndustryName(stock_id) #根据id获取行业名
22 | #     
23 | #     if name in industry_check: #去重检查
24 | #         continue
25 | #     else:
26 | #         industry_check.append(name)
27 | #  
28 | #     App.IndustryEstimation.Estimation(dbconn,name,2017) #入库
29 | # =============================================================================
30 | 
31 | 
32 | 
33 | 
34 | 
35 | #行业平均数据明细
36 | # =============================================================================
37 | App.IndustryEstimation_detail.CreateTable() #此处开启则清空此前所有内容
38 | App.IndustryEstimation_detail.Estimation() #入库
39 | # =============================================================================
40 | #App.IndustryEstimation_detail.industry_stat('通信设备')
41 | 
42 | 
43 | #行业平均数据统计值入库
44 | App.IndustryEstimation_detail.CreateTable_industry_avg()
45 | 
46 | 
47 | # =============================================================================
48 | # #筛选基本面数据优于行业平均值的股票并入库，20170330
49 | # App.Detail_Stock_Selector.stock_detail_select(300,50)
50 | # 
51 | # =============================================================================
52 | 


--------------------------------------------------------------------------------
/anack_study_case/balance_columns.txt:
--------------------------------------------------------------------------------
 1 | 货币资金	h1
 2 | 交易性金融资产	h2
 3 | 衍生金融资产	h3
 4 | 应收票据	h4
 5 | 应收账款	h5
 6 | 预付款项	h6
 7 | 应收利息	h7
 8 | 应收股利	h8
 9 | 其他应收款	h9
10 | 买入返售金融资产	h10
11 | 存货	h11
12 | 划分为持有待售的资产	h12
13 | 一年内到期的非流动资产	h13
14 | 待摊费用	h14
15 | 待处理流动资产损益	h15
16 | 其他流动资产	h16
17 | 流动资产合计	h17
18 | 发放贷款及垫款	h18
19 | 可供出售金融资产	h19
20 | 持有至到期投资	h20
21 | 长期应收款	h21
22 | 长期股权投资	h22
23 | 投资性房地产	h23
24 | 固定资产净额	h24
25 | 在建工程	h25
26 | 工程物资	h26
27 | 固定资产清理	h27
28 | 生产性生物资产	h28
29 | 公益性生物资产	h29
30 | 油气资产	h30
31 | 无形资产	h31
32 | 开发支出	h32
33 | 商誉	h33
34 | 长期待摊费用	h34
35 | 递延所得税资产	h35
36 | 其他非流动资产	h36
37 | 非流动资产合计	h37
38 | 资产总计	h38
39 | 短期借款	h39
40 | 交易性金融负债	h40
41 | 应付票据	h41
42 | 应付账款	h42
43 | 预收款项	h43
44 | 应付手续费及佣金	h44
45 | 应付职工薪酬	h45
46 | 应交税费	h46
47 | 应付利息	h47
48 | 应付股利	h48
49 | 其他应付款	h49
50 | 预提费用	h50
51 | 一年内的递延收益	h51
52 | 应付短期债券	h52
53 | 一年内到期的非流动负债	h53
54 | 其他流动负债	h54
55 | 流动负债合计	h55
56 | 长期借款	h56
57 | 应付债券	h57
58 | 长期应付款	h58
59 | 长期应付职工薪酬	h59
60 | 专项应付款	h60
61 | 预计非流动负债	h61
62 | 递延所得税负债	h62
63 | 长期递延收益	h63
64 | 其他非流动负债	h64
65 | 非流动负债合计	h65
66 | 负债合计	h66
67 | 实收资本(或股本)	h67
68 | 资本公积	h68
69 | 减：库存股	h69
70 | 其他综合收益	h70
71 | 专项储备	h71
72 | 盈余公积	h72
73 | 一般风险准备	h73
74 | 未分配利润	h74
75 | 归属于母公司股东权益合计	h75
76 | 少数股东权益	h76
77 | 所有者权益(或股东权益)合计	h77
78 | 负债和所有者权益(或股东权益)总计	h78
79 | stock_code	h79
80 | Date	h80


--------------------------------------------------------------------------------
/anack_study_case/cash_flow_columns.txt:
--------------------------------------------------------------------------------
 1 | 销售商品、提供劳务收到的现金	h1
 2 | 收到的税费返还	h2
 3 | 收到的其他与经营活动有关的现金	h3
 4 | 经营活动现金流入小计	h4
 5 | 购买商品、接受劳务支付的现金	h5
 6 | 支付给职工以及为职工支付的现金	h6
 7 | 支付的各项税费	h7
 8 | 支付的其他与经营活动有关的现金	h8
 9 | 经营活动现金流出小计	h9
10 | 经营活动产生的现金流量净额	h10
11 | 收回投资所收到的现金	h11
12 | 取得投资收益所收到的现金	h12
13 | 处置固定资产、无形资产和其他长期资产所收回的现金净额	h13
14 | 处置子公司及其他营业单位收到的现金净额	h14
15 | 收到的其他与投资活动有关的现金	h15
16 | 投资活动现金流入小计	h16
17 | 购建固定资产、无形资产和其他长期资产所支付的现金	h17
18 | 投资所支付的现金	h18
19 | 取得子公司及其他营业单位支付的现金净额	h19
20 | 支付的其他与投资活动有关的现金	h20
21 | 投资活动现金流出小计	h21
22 | 投资活动产生的现金流量净额	h22
23 | 吸收投资收到的现金	h23
24 | 其中：子公司吸收少数股东投资收到的现金	h24
25 | 取得借款收到的现金	h25
26 | 发行债券收到的现金	h26
27 | 收到其他与筹资活动有关的现金	h27
28 | 筹资活动现金流入小计	h28
29 | 偿还债务支付的现金	h29
30 | 分配股利、利润或偿付利息所支付的现金	h30
31 | 其中：子公司支付给少数股东的股利、利润	h31
32 | 支付其他与筹资活动有关的现金	h32
33 | 筹资活动现金流出小计	h33
34 | 筹资活动产生的现金流量净额	h34
35 | 四、汇率变动对现金及现金等价物的影响	h35
36 | 五、现金及现金等价物净增加额	h36
37 | 加:期初现金及现金等价物余额	h37
38 | 六、期末现金及现金等价物余额	h38
39 | 净利润	h39
40 | 少数股东权益	h40
41 | 未确认的投资损失	h41
42 | 资产减值准备	h42
43 | 固定资产折旧、油气资产折耗、生产性物资折旧	h43
44 | 无形资产摊销	h44
45 | 长期待摊费用摊销	h45
46 | 待摊费用的减少	h46
47 | 预提费用的增加	h47
48 | 处置固定资产、无形资产和其他长期资产的损失	h48
49 | 固定资产报废损失	h49
50 | 公允价值变动损失	h50
51 | 递延收益增加（减：减少）	h51
52 | 预计负债	h52
53 | 财务费用	h53
54 | 投资损失	h54
55 | 递延所得税资产减少	h55
56 | 递延所得税负债增加	h56
57 | 存货的减少	h57
58 | 经营性应收项目的减少	h58
59 | 经营性应付项目的增加	h59
60 | 已完工尚未结算款的减少(减:增加)	h60
61 | 已结算尚未完工款的增加(减:减少)	h61
62 | 其他	h62
63 | 经营活动产生现金流量净额	h63
64 | 债务转为资本	h64
65 | 一年内到期的可转换公司债券	h65
66 | 融资租入固定资产	h66
67 | 现金的期末余额	h67
68 | 现金的期初余额	h68
69 | 现金等价物的期末余额	h69
70 | 现金等价物的期初余额	h70
71 | 现金及现金等价物的净增加额	h71
72 | stock_code	h72
73 | Date	h73


--------------------------------------------------------------------------------
/anack_study_case/cash_flow_statements_balance_profit_columns.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/cash_flow_statements_balance_profit_columns.xlsx


--------------------------------------------------------------------------------
/anack_study_case/dividend_rate_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Mar 26 21:29:43 2018
  4 | 
  5 | @author: lh
  6 | @version: 1.0
  7 | @time:20180403
  8 | @detail:实现模块化功能，计算股息率、分红率
  9 | """
 10 | import tushare as ts
 11 | import pandas as pd
 12 | import numpy as np
 13 | import requests
 14 | from requests.exceptions import RequestException
 15 | from bs4 import BeautifulSoup
 16 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'}
 17 | 
 18 | class dividend_rate:
 19 |     
 20 |     def __init__(self,id):
 21 |         self.id =id
 22 |     
 23 |     def get_one_page(url):
 24 |         try:
 25 |             response = requests.get(url,headers = headers)
 26 |             response.encoding = 'GB2312'
 27 |             if response.status_code == 200:
 28 |                 return response.text
 29 |             return None
 30 |         except RequestException:
 31 |             return None
 32 |     
 33 |     def parse(html):
 34 |         raw_data = []
 35 |         try:
 36 |             year_raw = []
 37 |             year = []
 38 |             bonus_share = []
 39 |             bonus_convert = []
 40 |             profit_send = []
 41 |             ex_rights = []
 42 |             register_day = []
 43 |             
 44 |             soup = BeautifulSoup(html,'html5lib')
 45 |             l = soup.select('table#sharebonus_1')
 46 |             ls = l[0].tbody
 47 |             lls = ls.select('td')
 48 |             for l in lls:
 49 |                 if (l.get_text().strip()) != '预案' and \
 50 |                 (l.get_text().strip()) != '实施' and \
 51 |                 (l.get_text().strip()) != '不分配' and \
 52 |                 (l.get_text().strip()) != '查看':
 53 |                     raw_data.append(l.get_text().strip())
 54 |             
 55 |             year_raw = raw_data[::7]
 56 |     #        print(raw_data)        #出错的话请检查此处的输出
 57 |     #        print(year_raw)        #出错的话请检查此处的输出
 58 |             for item in year_raw:
 59 |                 a = pd.to_datetime(item).year - 1
 60 |                 year.append(a)
 61 |             bonus_share = raw_data[1::7]
 62 |             bonus_convert = raw_data[2::7]
 63 |             profit_send = raw_data[3::7]
 64 |             ex_rights = raw_data[4::7]
 65 |             register_day = raw_data[5::7]
 66 |     #        print(register_day)
 67 |             data = {'年度':year,
 68 |                     '送股':bonus_share,
 69 |                     '转股':bonus_convert,
 70 |                     '派息':profit_send,
 71 |                     '除权日':ex_rights,
 72 |                     '登记日':register_day
 73 |                     }
 74 |             frame = pd.DataFrame(data)
 75 |             return frame
 76 |         except:
 77 |             print('cannot parse this page')
 78 |     
 79 |     
 80 |     # 提供给用户的函数，输入ID，解析出历史分红列表   
 81 | 
 82 |     def get_bonus_table(self):
 83 |         url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/'
 84 |         url += str(self.id)
 85 |         url += '.phtml'
 86 |         html = dividend_rate.get_one_page(url)
 87 |         return dividend_rate.parse(html)     
 88 | 
 89 | 
 90 | 
 91 |     @property
 92 |     def divident_rate(self):
 93 |         stock = ts.get_hist_data(self.id)
 94 |         df = dividend_rate.get_bonus_table(self)
 95 |         df_dividend = df[['年度','派息','登记日']]
 96 | #        print(df_dividend)
 97 |         stock_close_price = stock["close"]
 98 |         sIndex = stock_close_price.index.tolist()
 99 |         # 获取登记日
100 |         regis = df_dividend['登记日'].tolist()
101 | #        print(sIndex)
102 | #        print(regis)
103 |         close_price = []
104 |         diVi = []
105 |         aPe = []
106 |         bonus = []
107 |         div_year = []
108 |         for i in regis:
109 |             if i != "--" and i in sIndex:
110 |                 cprice = stock_close_price.loc[i]
111 |                 close_price.append(cprice)
112 |                 aDiv = df_dividend[df_dividend['登记日'] == i]['派息'].tolist()[0]
113 |                 year = df_dividend[df_dividend['登记日'] == i]['年度'].values #获得年份
114 |                 div_year.append(year[0])
115 |                 
116 |                 #此处的bonus暂时通过ts获得，以后可以直接搜索本地数据库
117 |                 profit_table = ts.get_report_data(year[0],4) #获取年度eps
118 |                 print('')
119 |                 target_eps = profit_table[profit_table['code'] == self.id]['eps'].values
120 |                 eps = target_eps[0].item()  #numpy.float64 -> float
121 |                 per_bonus = round(float(aDiv) / 10 / eps * 100, 2)
122 | #                per_bonus = 1   #测试时开启
123 |                 
124 |                 bonus.append(per_bonus)
125 | 
126 |                 diVi.append(float(aDiv)/10) #10股派息转1股派息
127 |         div_ratio = []
128 |         for i,j in zip(diVi,close_price):
129 |             adivr = float(i) / float(j) * 100
130 |             div_ratio.append(round(adivr,2))
131 |             aPe.append(round(100/adivr,2))
132 | 
133 |         reDf = pd.DataFrame({"cash_div":diVi,   #每股派现方案
134 |                              "div_ratio(%)":div_ratio, #股息率
135 |                              'ape':aPe, #真实市盈率
136 |                              'bonus_ratio(%)':bonus #分红率
137 |                              },index = div_year)
138 |                 
139 |         # 统计输出
140 |         print(self.id + '分红情况统计如下：')
141 |         avg_bonus = round(sum(bonus)/len(bonus),2)
142 |         print('1.平均分红率:',avg_bonus,'%')
143 |         avg_div = round(sum(div_ratio)/len(div_ratio),2)
144 |         print('2.平均股息率:',avg_div,'%')
145 |         print('3.详细列表如下所示')
146 |         return reDf
147 | 
148 | ##############################################
149 | a = dividend_rate('601012')
150 | s = a.divident_rate
151 | print(s)
152 | 


--------------------------------------------------------------------------------
/anack_study_case/lirunbiao.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/lirunbiao.csv


--------------------------------------------------------------------------------
/anack_study_case/liuliang.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/liuliang.csv


--------------------------------------------------------------------------------
/anack_study_case/profit_columns.txt:
--------------------------------------------------------------------------------
 1 | 一、营业总收入	h1
 2 | 营业收入	h2
 3 | 二、营业总成本	h3
 4 | 营业成本	h4
 5 | 营业税金及附加	h5
 6 | 销售费用	h6
 7 | 管理费用	h7
 8 | 财务费用	h8
 9 | 资产减值损失	h9
10 | 公允价值变动收益	h10
11 | 投资收益	h11
12 | 其中:对联营企业和合营企业的投资收益	h12
13 | 汇兑收益	h13
14 | 三、营业利润	h14
15 | 加:营业外收入	h15
16 | 减：营业外支出	h16
17 | 其中：非流动资产处置损失	h17
18 | 四、利润总额	h18
19 | 减：所得税费用	h19
20 | 五、净利润	h20
21 | 归属于母公司所有者的净利润	h21
22 | 少数股东损益	h22
23 | 基本每股收益(元/股)	h23
24 | 稀释每股收益(元/股)	h24
25 | 七、其他综合收益	h25
26 | 八、综合收益总额	h26
27 | 归属于母公司所有者的综合收益总额	h27
28 | 归属于少数股东的综合收益总额	h28
29 | stock_code	h29
30 | Date	h30


--------------------------------------------------------------------------------
/anack_study_case/sql常用模板.txt:
--------------------------------------------------------------------------------
  1 | Mysql 基础常用操作
  2 | 
  3 | 链接Mysql：
  4 | 格式： mysql -h主机地址 -u用户名 －p用户密码
  5 | 
  6 | 连接到本机上的MYSQL
  7 | 
  8 | 键入命令mysql-uroot -p，回车后提示你输密码，如果刚安装好MYSQL，超级用户root是没有密码的，故直接回车即可进入到MYSQL中了，MYSQL的提示符是：mysql>
  9 | 
 10 | 连接到远程主机上的MYSQL
 11 | 
 12 | 假设远程主机的IP为：10.0.0.1，用户名为root,密码为12356。则键入以下命令：
 13 | mysql -h10.0.0.1 -uroot -p12356
 14 | 
 15 | 退出MYSQL命令 
 16 | exit （回车）
 17 | 
 18 | 
 19 | 
 20 | 库操作：
 21 | 
 22 | --- 创建数据库：
 23 | CREATE DATABASE mysqltest;
 24 | 
 25 | --- 查看数据库
 26 | show databases;
 27 | 
 28 | --- 的数据表： 
 29 | use mysql；
 30 | show tables;
 31 | 
 32 | --- 创建带带字符集的数据库（存储数据中含有中文时常用）
 33 | CREATE DATABASE my_chinese CHARACTER SET=utf8;
 34 | 
 35 | --- 创建带校验的数据库
 36 | CREATE DATABASE mydb CHARACTER SET=utf8 COLLATE utf8_general_ci; 
 37 | 
 38 | --- 显示数据库创建语句(详情)：
 39 | SHOW CREATE DATABASE mydb3;
 40 | 
 41 | --- 数据库删除语句：
 42 | DROP DATABASE  mydb3;
 43 | 
 44 | 
 45 | --- 修改数据库的库字符编码
 46 | ALTER  DATABASE  mydb2 character set gb2312;
 47 | 
 48 | 
 49 | --- 表操作
 50 | 
 51 | 
 52 | 
 53 | ---创建数据库表
 54 |  create table employee
 55 | (
 56 |     id int,
 57 |     name varchar(40),
 58 |     sex  char(4),
 59 |     birthday date,
 60 |     Entry_date date,
 61 |     job  varchar(100),
 62 |     salary  Decimal(8,2),
 63 |     resume  Text
 64 | );
 65 | 
 66 | 
 67 | --- 在上面员工表的基本上增加一个image列。
 68 | alter table employee add image blob;
 69 | 
 70 | --- 修改job列，使其长度为60。
 71 | alter table employee modify job varchar(60);
 72 | 
 73 | --- 删除sex列。
 74 | alter table employee drop image;
 75 | 
 76 | --- 表名改为user。
 77 | rename table employee to user;
 78 | 
 79 | --- 修改表的字符集为utf-8
 80 | alter table user character set gbk;
 81 | alter table user character set utf8;
 82 | 
 83 | --- 列名name修改为username
 84 | alter table user change column name username varchar(100);
 85 | --- 删除表
 86 | drop table user;
 87 | 
 88 | --- 增删改查
 89 | --- 准备表
 90 | create table employee
 91 | (
 92 | id int,
 93 | name varchar(40),
 94 | sex varchar(4),
 95 | birthday date,
 96 | entry_date date,
 97 | salary decimal(8,2),
 98 | resume text
 99 | );
100 | 
101 | --- 插入数据
102 | insert into employee(id,name,sex,birthday,entry_date,salary,resume) values(1,'zhangsan','male','1993-03-04','2016-11-10','1000','i am a developer');
103 | 
104 | --- 可以省略表字段，但是必须插入全部字段
105 | insert into employee values(null,null,'male','1993-03-04','2016-11-10','1000','i am a developer');
106 | 
107 | ---指定某些列插入数据
108 | insert into employee(id) values(6);
109 | 
110 | ---插入汉字
111 | insert into employee(id,name) values(6,'张三');
112 | 
113 | --- mysql客户采用gb2312编码
114 | show variables like 'chara%';
115 | set character_set_client=gb2312;
116 | insert into employee(id,username) values('3','张三');
117 | 
118 | 
119 | ---查看时不乱码
120 | show variables like 'chara%';
121 | set character_set_results=gb2312;
122 | select * from employee;
123 | 
124 | ---修改表数据
125 | ---将所有员工薪水修改为5000元。
126 | update employee set salary=5000;
127 | 
128 | 
129 | ---将姓名为’zs’的员工薪水修改为3000元。
130 | update employee set salary = 3000 where name='zhangsan';
131 | 
132 | 
133 | ---将姓名为’aaa’的员工薪水修改为4000元,job改为ccc。
134 | update employee set salary = 4000,job='ccc' where name='张三';
135 | 
136 | 
137 | --- 将wu的薪水在原有基础上增加1000元。
138 | update employee set salary = salary+1000 where name='张三';
139 | 
140 | ---删除
141 | ---删除表中名称为’zs’的记录。
142 | delete from employee where job='ccc';
143 | 
144 | 
145 | ---删除表中所有记录。
146 | delete from employee;
147 | 
148 | 
149 | ---使用truncate删除表中记录
150 | truncate table employee;
151 | 
152 | ---查询
153 | 
154 | ---查询表中所有学生的信息。
155 | select id,name,chinese,english,math from student;
156 | 
157 | 
158 | ---查询表中所有学生的姓名和对应的英语成绩。
159 | select name,english from student;
160 | 
161 | 
162 | ---过滤表中重复数据。
163 | select distinct english from student;
164 | 
165 | ---在所有学生分数上加10分特长分。
166 | select name,(chinese+english+math)+10 from  student;
167 | 
168 | 
169 | ---统计每个学生的总分。
170 | select name,(chinese+english+math) from  student;
171 | 
172 | 
173 | ---使用别名表示学生分数
174 | select name,(chinese+english+math) as 总分 from  student;
175 | ---可以不用as
176 | select name,(chinese+english+math) 总分 from  student;
177 | 
178 | 
179 | ---查询姓名为wu的学生成绩
180 | select * from student where name='张三';
181 | 
182 | 
183 | ---查询英语成绩大于90分的同学
184 | select * from student where  english>'90';
185 | 
186 | 
187 | --- 显示数据表的结构： 
188 | describe 表名;
189 | --- 简写
190 | desc 表名;
191 | 
192 | --- 建表： 
193 | use 库名; 
194 | create table 表名 (字段设定列表);
195 | 
196 | --- 删库和删表: 
197 | drop database 库名;
198 | drop table 表名;
199 | 
200 | --- 表中记录清空： 
201 | truncate table wp_comments;
202 | delete * from wp_comments;
203 | --- 2种操作模式的区别，目标对象是表wp_comments，其中truncate操作中的table可以省略，delete操作中的*可以省略。这两者都是将wp_comments表中数据清空，不过也是有区别的，如下：truncate是整体删除（速度较快），delete是逐条删除（速度较慢）。truncate不写服务器log，delete写服务器log，也就是truncate效率比delete高的原因。truncate不激活trigger(触发器)，但是会重置Identity（标识列、自增字段），相当于自增列会被置为初始值，又重新从1开始记录，而不是接着原来的ID数。而delete删除以后，Identity依旧是接着被删除的最近的那一条记录ID加1后进行记录。如果只需删除表中的部分记录，只能使用DELETE语句配合where条件。 DELETE FROM wp_comments WHERE……
204 | 
205 | --- 显示表中的记录： 
206 | select * from 表名
207 | 
208 | 
209 | --- 数据库备份与恢复
210 | 
211 | --- 备份：从数据库导出数据：
212 | --- 格式：mysqldump -h链接ip -P(大写)端口 -u用户名 -p密码数据库名>d:XX.sql(路劲)
213 | mysqldump -h132.72.192.432 -P3307 -uroot -p8888 htgl>d:\htgl.sql;
214 | 
215 | 
216 | --- 备份导出示例
217 | ---导出数据和表结构——将特定数据库特定表中的数据和表格结构和数据全部返回
218 | mysqldump --u  b_user -h 101.3.20.33 -p'H_password'  -P3306 database_di up_subjects > 0101_0630_up_subjects.sql
219 | 
220 | 
221 | --- 导出表结构却不导出表数据——只返回特定数据库特定表格的表格结构，不返回数据,添加“-d”命令参数
222 | mysqldump --u  b_user -h 101.3.20.33 -p'H_password'  -P3306 -d database_di up_subjects > 0101_0630_up_subjects.sql
223 | 
224 | 
225 | ---导出表结构和满足挑顶条件的表数据——只返回特定数据库中特定表的表格结构和满足特定条件的数据 
226 | mysqldump --u  b_user -h 101.3.20.33 -p'H_password'  -P3306 database_di up_subjects --where=" ctime>'2017-01-01' and ctime<'2017-06-30'" > 0101_0630_up_subjects.sql
227 | 
228 | --- 导出数据却不导出表结构——只返回特定数据库中特定表格的数据，不返回表格结构，添加“-t”命令参数
229 | mysqldump --u  b_user -h 101.3.20.33 -p'H_password' -t -P3306 database_di up_subjects  >0101_0630_up_subjects.sql
230 | 
231 | --- 导出特定数据库的所有表格的表结构及其数据，添加“--databases ”命令参数
232 | mysqldump  --u  b_user -h 101.3.20.33 -p'H_password' -P3306
233 | --databases test  > all_database.sql
234 | 
235 | --- 恢复，导入数据库数据
236 | --- 将导出的本地文件导入到指定数据库
237 | --- 系统命令行
238 | --- 格式：mysql -h链接ip -P(大写)端口 -u用户名 -p密码 数据库名 < d:XX.sql(路劲) 
239 | mysql -uusername -ppassword db1 <tb1tb2.sql
240 | 
241 | 
242 | --- mysql命令行
243 | user db1;
244 | source tb1_tb2.sql;
245 | 
246 | --- 恢复整个数据库的方法：
247 | mysql -u  b_user -h 101.3.20.33 -p'H_password' -P3306   < all_database.sql
248 | 
249 | 
250 | --- mysqldump字符集设置
251 | mysqldump -uusername -ppassword --default-character-set=gb2312 db1 table1 > tb1.sql
252 | 
253 | --- mysqldump客户端可用来转储数据库或搜集数据库进行备份或将数据转移到另一个sql服务器(不一定是一个mysql服务器)。转储包含创建表和/或装载表的sql语句。
254 | 
255 | 
256 | --- 导出全部数据库
257 | --all-databases  , -A
258 | mysqldump -uroot -p --all-databases
259 | 
260 | 
261 | ---导出全部表空间。
262 | --all-tablespaces  , -Y
263 | mysqldump -uroot -p --all-databases --all-tablespaces
264 | 
265 | 
266 | 
267 | 


--------------------------------------------------------------------------------
/anack_study_case/zichanfuzhai.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/zichanfuzhai.csv


--------------------------------------------------------------------------------
/anack_study_case/财务表精简表头.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack_study_case/财务表精简表头.xlsx


--------------------------------------------------------------------------------
/anack_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu May 31 17:12:12 2018
 4 | 
 5 | @author: 1707501
 6 | """
 7 | 
 8 | import pymysql
 9 | 
10 | conn = pymysql.connect(
11 |         host = mysqlip,
12 |         port = 3306,
13 |         user = uusername,
14 |         passwd = upassword,
15 |         db = "test",
16 |         charset = "utf8"
17 |         )
18 | 
19 | cur = conn.cursor()
20 | print("OK!")
21 | # 查看库里的表
22 | sql = "show tables;"
23 | cur.execute(sql)
24 | result = cur.fetchall()
25 | print(result)
26 | 
27 | # 查询数据
28 | sql = "select * from target limit 100;"
29 | cur.execute(sql)
30 | result = cur.fetchall()
31 | print(result)


--------------------------------------------------------------------------------
/anack数据字典_v1.0.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/anack数据字典_v1.0.xlsx


--------------------------------------------------------------------------------
/anjuke_readme.txt:
--------------------------------------------------------------------------------
 1 |  crawl_anjuke_v1.311.py 说明
 2 | 
 3 | 
 4 | 本脚本实现爬取安居客二手房信息。可实现将爬取下来的信息存储到本地和导入mysql数据库。
 5 | 但需要注意两点：
 6 | 
 7 | 1、ip_collecter_original_test。
 8 | 是mysql数据库中的代理ip地址和port表。从该表中读取代理信息。如果不需要代理，可以修改代码即可。
 9 | 
10 | 2、配置和设置mysql数据库的链接信息。
11 | hosts = 
12 | users = 
13 | passwords = 
14 | databases = 


--------------------------------------------------------------------------------
/crawl_anjuke_v1.311.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Dec 11 14:30:50 2017
  4 | 
  5 | @author:xx
  6 | """
  7 | 
  8 | """
  9 | crawling anjuke house price 
 10 | GuiYang
 11 | 20171212 add proxy and detailed the parse of house information
 12 | 20171214 add spidertime and multiprocess
 13 | """
 14 | 
 15 | import requests
 16 | from bs4 import BeautifulSoup
 17 | import pymysql
 18 | import random,time
 19 | 
 20 | def parse_detial(html):
 21 |     soup = BeautifulSoup(html.text,'html5lib')
 22 |     houseinfo = soup.select('div.houseInfoBox')
 23 |     houseinfotitle = houseinfo[0].h4
 24 |     an_xian = houseinfotitle.select('span.anxian')[0].get_text()
 25 |     if '假一赔百' in an_xian:
 26 |         an_xian = "Yes"
 27 |     else:
 28 |         an_xian = "No"
 29 |     houseencode= houseinfotitle.select('span.house-encode')[0].get_text()
 30 |     houseinfoV2 = houseinfo[0].select('div.houseInfoV2-desc')[0].get_text()
 31 |     housedetail1 = houseinfoV2.split()
 32 |     housedetail2 = ':'.join(housedetail1)
 33 |     housedetail = housedetail2.replace('\ue092','').replace('\u200b','').replace('\ue094','').replace('\ue093','').replace('\ue095','')
 34 |     housefirstv = soup.select('div.first-col.detail-col')[0].find_all('dl')
 35 |     house_estate = ''.join(housefirstv[0].get_text().split())[3:]
 36 |     house_add  = ''.join(housefirstv[1].get_text().split())[3:]
 37 |     house_build_time = ''.join(housefirstv[2].get_text().split())[3:]
 38 |     house_type = ''.join(housefirstv[3].get_text().split())[3:]
 39 |     housesecondv = soup.select('div.second-col.detail-col')[0].find_all('dl')
 40 |     house_model_detail = ''.join(housesecondv[0].get_text().split())[3:]
 41 |     house_size = ''.join(housesecondv[1].get_text().split())[3:]
 42 |     house_orientation = ''.join(housesecondv[2].get_text().split())[3:]
 43 |     house_floor = ''.join(housesecondv[3].get_text().split())[3:]
 44 |     housethirdv = soup.select('div.third-col.detail-col')[0].find_all('dl')
 45 |     house_decorate = ''.join(housethirdv[0].get_text().split())[5:]
 46 |     house_univalence = ''.join(housethirdv[1].get_text().split())[5:]
 47 |     down_payment = ''.join(housethirdv[2].get_text().split())[5:]
 48 | #   monthly_payment = ''.join(housethirdv[3].get_text().split())[5:] #javescript loading data
 49 |     salerinfo = soup.select('p.broker-mobile')
 50 |     salerphone = salerinfo[0].get_text().replace('\ue047','')
 51 |     housetitle = ''.join(soup.select('h3.long-title')[0].get_text().split())
 52 |     houseinfov1 = soup.select('div.basic-info.clearfix')[0].find_all('span')
 53 |     housetotleprice = houseinfov1[0].get_text()
 54 | #==============================================================================
 55 | #     housemodel = houseinfov1[1].get_text()
 56 | #     housesize = houseinfov1[2].get_text()
 57 | #==============================================================================
 58 |     line = []
 59 |     line.append(housetitle)
 60 |     line.append(an_xian)
 61 |     line.append(houseencode)
 62 |     line.append(housetotleprice)
 63 |     line.append(house_model_detail)
 64 |     line.append(house_size)
 65 |     line.append(house_estate)
 66 |     line.append(house_add)
 67 |     line.append(house_build_time)
 68 |     line.append(house_type)
 69 |     line.append(house_orientation)
 70 |     line.append(house_floor)
 71 |     line.append(house_decorate)
 72 |     line.append(house_univalence)
 73 |     line.append(down_payment)
 74 |     line.append(housedetail)
 75 |     line.append(salerphone)
 76 |     result = '\t'.join(line)
 77 |     print(result)
 78 |     return result
 79 | 
 80 | def parse_list(html):
 81 |     secondurl = []
 82 |     soup = BeautifulSoup(html.text,'html5lib')
 83 |     houselists = soup.select('a.houseListTitle')
 84 |     for houseid in houselists:
 85 |         houseurl = houseid['href']
 86 |         secondurl.append(houseurl)
 87 |     return secondurl
 88 | 
 89 | def downloadhtml(url,proxy_ip):
 90 |     response = requests.get(url,headers=header,proxies={"http":proxy_ip})
 91 |     if response.status_code == 200:
 92 |         return response
 93 |     else:
 94 |         print("download html error!")
 95 | 
 96 | 
 97 | def Create_table():
 98 |     query = """CREATE TABLE IF NOT EXISTS `anjuke_collecter_original_test` (
 99 | `No` int(10) unsigned NOT NULL AUTO_INCREMENT,
100 | `housetitle`    varchar(255) DEFAULT NULL,
101 | `an_xian`    varchar(255) DEFAULT NULL,
102 | `houseencode`    varchar(255) DEFAULT NULL,
103 | `housetotleprice`    varchar(255) DEFAULT NULL,
104 | `house_model_detail`    varchar(255) DEFAULT NULL,
105 | `house_size`    varchar(255) DEFAULT NULL,
106 | `house_estate`    varchar(255) DEFAULT NULL,
107 | `house_add`    varchar(255) DEFAULT NULL,
108 | `house_build_time`    varchar(255) DEFAULT NULL,
109 | `house_type`    varchar(255) DEFAULT NULL,
110 | `house_orientation`    varchar(255) DEFAULT NULL,
111 | `house_floor`    varchar(255) DEFAULT NULL,
112 | `house_decorate`    varchar(255) DEFAULT NULL,
113 | `house_univalence`    varchar(255) DEFAULT NULL,
114 | `down_payment`    varchar(255) DEFAULT NULL,
115 | `housedetail`    text DEFAULT NULL,
116 | `salerphone`    varchar(255) DEFAULT NULL,
117 | `Url`  varchar(255) DEFAULT NULL,
118 | `SpiderTime`  varchar(255) DEFAULT NULL,
119 | PRIMARY KEY (`No`)
120 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8"""
121 |     db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8')
122 |     cursor = db.cursor()
123 |     cursor.execute(query)
124 |     db.commit()
125 |     cursor.close()
126 |     db.close()
127 | 
128 | 
129 | def etl_mysql(result):
130 |     db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8')
131 |     cursor = db.cursor()
132 |     result = tuple(result)
133 |     query = "insert into anjuke_collecter_original_test(housetitle,an_xian,houseencode,housetotleprice,house_model_detail,house_size,house_estate,house_add,house_build_time,house_type,house_orientation,house_floor,house_decorate,house_univalence,down_payment,housedetail,salerphone,Url,SpiderTime) values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % result
134 |     cursor.execute(query)
135 |     db.commit()
136 |     cursor.close()
137 |     db.close()
138 |     
139 | 
140 | def get_next_page(html):
141 |     soup = BeautifulSoup(html.text,'html5lib')
142 |     nexturl = soup.select('a.aNxt')[0]['href']
143 |     return nexturl
144 | 
145 | def get_proxy_ip():
146 |     db = pymysql.connect(host = hosts,user = users, password = passwords, database = databases,charset='utf8')
147 |     cursor = db.cursor()
148 |     query = "select ip,port from ip_collecter_original_test limit 17000"
149 |     cursor.execute(query)
150 |     ip_result = cursor.fetchall()
151 |     IPList = []
152 |     for i in ip_result:
153 |         Ip = i[0] + ":" + i[1]
154 |         IPList.append(Ip)
155 |     return IPList
156 | 
157 | def check_ip(IPList):
158 |     url = "https://www.baidu.com/"
159 |     proxy_ip = random.choice(IPList)
160 |     res = requests.get(url,headers=header,proxies={"http":proxy_ip})
161 |     if res.status_code == 200:
162 |         print(proxy_ip)
163 |         return proxy_ip
164 |     else:
165 |         return None    # 后期修改成迭代
166 |     
167 | header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
168 |           'Connection':'keep-alive'          }
169 | 
170 | # https://gy.anjuke.com/sale/p1/#filtersort
171 | # https://shanghai.anjuke.com/sale/p1/#filtersort
172 | # https://hangzhou.anjuke.com/sale/
173 | url = "https://shanghai.anjuke.com/sale/p1/#filtersort"
174 | 
175 | hosts = 
176 | users = 
177 | passwords = 
178 | databases = 
179 | 
180 | if __name__ == '__main__':
181 |     List_ip = get_proxy_ip()
182 |     next_url = url
183 |     Create_table()
184 |     while next_url != None:
185 |         proxy_ip = check_ip(List_ip)
186 |         res = downloadhtml(url,proxy_ip)
187 |         if res != None:
188 |             try:
189 |                 urllist = parse_list(res)
190 |             except:
191 |                 print('house url list parsing error!')
192 |             if urllist != None:
193 |                 for houseurl in urllist:
194 |                     proxy_ip = check_ip(List_ip)
195 |                     houseinfor = downloadhtml(houseurl,proxy_ip)
196 |                     try:
197 |                         results = parse_detial(houseinfor)
198 |                     except:
199 |                         results = None
200 |                         with open(r'E:\documents\personal\python\crawler\anjuke\anjuke_error_shanghai.txt','a',encoding='utf-8') as f:
201 |                             f.write(houseurl +"\n")
202 |                         print("parse hosue detial infor error!")
203 |                         continue
204 |                     with open(r'E:\documents\personal\python\crawler\anjuke\anjuke_shanghai_v15.txt','a',encoding='utf-8') as f:
205 |                         f.write(results + '\n')
206 |                     try:
207 |                         line = results.split('\t')
208 |                         ts = time.strftime('%Y%m%d%H%M%S',time.localtime(time.time()))
209 |                         line.append(houseurl)
210 |                         line.append(ts)
211 | #                        print(line)
212 |                         etl_mysql(line)
213 |                     except:
214 |                         print("data insert into mysql error!")
215 |                         continue
216 |             try:
217 |                 next_url = get_next_page(res)
218 |             except:
219 |                 next_url = None
220 |     print("crawling end!")
221 | 


--------------------------------------------------------------------------------
/raw_modules/ReadMe.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YinChao126/anack/f38db1d00d83a4c1fd0fd41e5b062bedb42fc947/raw_modules/ReadMe.txt


--------------------------------------------------------------------------------
/raw_modules/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/raw_modules/get_price.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 从163网址上获取指定ID指定时间段的K线数据
  4 | """
  5 | import requests
  6 | import re
  7 | import datetime 
  8 | import pandas as pd
  9 | '''
 10 | 
 11 | 完整网址：
 12 | http://quotes.money.163.com/service/chddata.html?code=0%06d&start=%d&end=%d&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER
 13 | '''
 14 | 
 15 | 
 16 | def get_close_price(id, day = 0):
 17 |     '''
 18 |     获取指定ID指定日期的收盘价
 19 |     输入：id -> str形式的ID号： '600660'
 20 |          day -> str形式的日期： '20180626'
 21 |     返回值：str形式的价格： '25.54'， 如果当天为节假日，则返回0
 22 |     '''
 23 |     if day == 0:
 24 |         day = datetime.datetime.now() - datetime.timedelta(days=1)
 25 |         day = day.strftime("%Y%m%d")
 26 |     if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市，则前缀为1
 27 |         nid = '1' + id
 28 |     else: #如果是沪市主板，则前缀为0
 29 |         nid = '0' + id
 30 |     url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\
 31 |     fields=TCLOSE" %(nid, day,day)
 32 |     res = requests.get(url)
 33 |     res.raise_for_status()
 34 |     
 35 |     for chunk in res.iter_content(100000):
 36 | #        print(chunk)
 37 |         pattern = '[^,\r\n]+'
 38 |         obj = re.compile(pattern)
 39 |         match = obj.findall(chunk.decode('gbk'))
 40 |         #print(match)
 41 |         if len(match) < 8:
 42 |             return 0
 43 |         else:
 44 |             return match[-1]
 45 |     
 46 | def get_period_k_day(id, start_day, stop_day = 0):
 47 |     '''
 48 |     获取指定ID一个时间段内的K线数据
 49 |     输入：id -> str形式的ID号： '600660'
 50 |          start_day -> str形式的日期： '20180626'
 51 |          stop_day -> 同上， 默认到昨天
 52 |     返回值：一个dataframe
 53 |     '''
 54 |     if stop_day == 0:
 55 |         day = datetime.datetime.now() - datetime.timedelta(days=1)
 56 |         day = day.strftime("%Y%m%d")
 57 | 
 58 |     if id[:3] == '000' or id[:3] == '002' or id[:3] == '300': #如果是深市，则前缀为1
 59 |         nid = '1' + id
 60 |     else: #如果是沪市主板，则前缀为0
 61 |         nid = '0' + id
 62 |     url = "http://quotes.money.163.com/service/chddata.html?code=%s&start=%s&end=%s&\
 63 |     fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(nid, start_day, stop_day)
 64 | 
 65 | 
 66 | #    url = "http://quotes.money.163.com/service/chddata.html?code=0%s&start=%s&end=%s&\
 67 | #    fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;VOTURNOVER;VATURNOVER" %(id, start_day,stop_day)
 68 |     res = requests.get(url)
 69 |     res.raise_for_status()
 70 | #    playFile = open(file_name, 'wb')
 71 |     
 72 |     raw_data = []
 73 |     for chunk in res.iter_content(1000000):
 74 | #        playFile.write(chunk)
 75 |         chunk = chunk.decode('gbk')
 76 |         pattern = '[^,\r\n]+'
 77 |         obj = re.compile(pattern)
 78 |         match = obj.findall(chunk)
 79 |         if len(match) < 8: #如果没有数据
 80 |             return 0
 81 |         
 82 |     header = match[:10] #如果增加字段，则此处以下需要相应修改
 83 | #    print(header)
 84 |     raw_data = match[10:]
 85 |     date = raw_data[::10]
 86 |     idc = raw_data[1::10]
 87 |     name = raw_data[2::10]
 88 |     price = raw_data[3::10]
 89 |     high = raw_data[4::10]
 90 |     lopen = raw_data[5::10]
 91 |     yesterday_close = raw_data[6::10]
 92 |     low = raw_data[7::10]
 93 |     vol = raw_data[8::10]
 94 |     mount = raw_data[9::10]
 95 |     
 96 |     data = {
 97 | #            header[0]:date,
 98 |             header[1]:idc,
 99 |             header[2]:name,
100 |             header[3]:price,
101 |             header[4]:high,
102 |             header[5]:lopen,
103 |             header[6]:yesterday_close,
104 |             header[7]:low,
105 |             header[8]:vol,
106 |             header[9]:mount
107 |             }
108 |     df = pd.DataFrame(data,index = date)
109 | #    playFile.close()
110 |     return df
111 | 
112 |     
113 | def k_day_to_csv(code, stop_day = 0):
114 |     '''
115 |     更新k线数据，并保存到本地，默认为更新到昨天
116 |     code：目标个股,只能为'000xxx'形式
117 |     stop_day: 0->昨天，    20170101:更新到指定的一天
118 |     @更新逻辑：
119 |     1. 如果无记录，则自动创建csv文件，默认为：ID.kday
120 |     2. 如果有部分记录，则自动分析，并将后续的内容更新
121 |     3. 如果记录比需要更新的更新，则直接返回
122 |     
123 |     缺陷：得到的数据是没有复权的，应该进行前复权
124 |     '''
125 |     base_path = './'   #修改此处可以更改文件存放路径，可以考虑作为一个配置参数
126 |     start_day = '19970101' #start时间统一从1997年开始
127 |     #参数合法性检查
128 |     if isinstance(code,list):
129 |         print('is a list')
130 |     elif isinstance(code,str):
131 |         file_name = code + '.csv'
132 | #        print(file_name)
133 |     else:
134 |         print('bad input. please check it')
135 |         return
136 |     
137 |     file_name = base_path + file_name
138 | #    print(file_name)
139 |     
140 |     update_flag = 1     #1代表重新生成，   2代表更新   3代表无需处理
141 |     #判断最新的是第几天
142 |     try:
143 |         with open(file_name,'r') as fh:
144 |             content = fh.readlines()
145 |             if len(content) > 2: #获取最新记录，总是在第二行
146 |                 latest_record = content[1].split(',')
147 |                 
148 |                 from datetime import datetime
149 |                 from dateutil.parser import parse
150 |                 latest_day = parse(latest_record[0])
151 |                 now = datetime.now().strftime('%Y-%m-%d')
152 |                 yesterday = parse(now)
153 |                 
154 |                 if yesterday > latest_day:
155 |                     update_flag = 2
156 |                     print('not the latest')
157 |                 else:
158 |                     update_flag = 3
159 |                     print(code + ' already the latest')
160 |                     return
161 |     except:
162 |         update_flag = 1
163 |         print('no record')
164 |         
165 |     #不同的情况适用不同更新逻辑
166 |     if update_flag == 1:    #完全更新
167 |         r = get_period_k_day(code, start_day)
168 |         r.to_csv(file_name, encoding= 'gbk') 
169 |     elif update_flag == 2:
170 |         r = get_period_k_day(code, start_day)    #此处没有办法在首部添加
171 |         r.to_csv(file_name, encoding= 'gbk')      #如果可以的话，则不必每次重写
172 |         return 
173 |     print('finish ' + code + ' update')
174 |     return
175 | 
176 | def k_day_update(id_list, stop_day = 0):
177 |     '''
178 |     用户API，更新个股的K线数据，可以是列表，也可以是str
179 |     '''
180 |     #参数合法性检查
181 |     if isinstance(id_list,list):
182 |         print('is a list')
183 |         for s in id_list:
184 |             k_day_to_csv(s,stop_day)
185 |     elif isinstance(id_list,str):
186 |         k_day_to_csv(id_list,stop_day)
187 |     else:
188 |         print('bad input. please check it')
189 |         return
190 |         
191 | if __name__ == '__main__':
192 |     id = '601012'
193 |     start_day = '20100625'
194 |     stop_day = '20180904'
195 |     
196 |     #获取昨天的收盘价
197 | #    price = get_close_price(id) 
198 | #    print(price)
199 |     
200 | #    #获取指定一天的收盘价
201 | #    price = get_close_price('600660','20170209') 
202 | #    print(price)
203 | #    
204 | #    #获取从start_day开始直到昨天的收盘价
205 | #    s = get_period_price('600660',start_day)
206 | #    print(s)
207 | #    
208 | #    #获取指定时间段内的收盘价
209 | #    s = get_period_k_day('601012',start_day,stop_day)
210 | #    print(s)
211 | #    s.to_csv('test.csv', encoding= 'gbk') 
212 |     
213 |     #更新K线数据并存文档
214 |     company_list = ['600660', '600066', '000651', '600522', '601012', '600887']
215 |     k_day_update(company_list)
216 |     k_day_update('600066')
217 |     
218 |     


--------------------------------------------------------------------------------