├── .gitignore
├── .idea
├── EstimateValueData.iml
├── dataSources.xml
├── dataSources
│ └── b8058ac0-c931-4b7b-a6b7-1a4aff69d5fa.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── BrisionAnys.py
├── CalcEstimateValue.py
├── CalcRegression.py
├── CalcRiskReturn.py
├── DataToMySql
├── DataDic.py
├── GetDataToMysql.py
├── MysqlCon.py
└── mysql.conf
├── DateFormatDf.py
├── EstimateValue.py
├── FamaFrenchRegression.py
├── GetAndSaveWindData
├── ClientWindIfindInit.py
├── GetDataFromWindAndMySql.py
├── GetDataFromWindNNotMysql.py
├── GetDataToMysql.py
├── GetDataTotalMain.py
├── GetFundFinanceReportData.py
├── GetindexName.py
└── MysqlCon.py
├── GetDataFromWindAndMySql.py
├── GetExcelData.py
├── GetFinanceReportData.py
├── GetWindDataToMySql.py
├── JudgeFund
├── CalcJudgeFund.py
├── JudgeAndGetFund.py
├── JudgeFundDC.py
├── JudgeFundImproveBase.py
├── JudgeFundIndexImprove.py
├── JudgeFundMain.py
├── JudgeFundQDII.py
├── JudgeFundTopic.py
└── YunFeiCalc.py
├── JudgeText.py
├── MonthReportData
├── CalcHXBCorr.py
├── FundEst.py
├── FundImproveEst.py
├── GetIndexEst.py
├── GetTableData.py
└── TaoLiJudge.py
├── PrintInfo.py
├── README.md
├── StockFactorCalc.py
├── mylog.py
└── windDemo.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
2 | *.xlsx
3 | *.xls
4 | *.txt
5 | *.png
--------------------------------------------------------------------------------
/.idea/EstimateValueData.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mysql
6 | true
7 | com.mysql.jdbc.Driver
8 | jdbc:mysql://localhost:3306/fund_data
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/BrisionAnys.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 多期Brisio业绩归因分析
5 | '''
6 |
7 | import pandas as pd
8 | import numpy as np
9 | from WindPy import w
10 | import matplotlib.pyplot as plt
11 | import matplotlib
12 |
13 |
14 | class BrisionAnys:
15 | def __init__(self, dicStockDf):
16 | self.benchMark = '000300.SH' # 基准指数
17 | self.dicStockDf = self.WashData(dicStockDf)
18 | self.myfont = matplotlib.font_manager.FontProperties(fname=r'C:/Windows/Fonts/simkai.ttf')
19 |
20 | # self.indusFieds = ['INDUSTRY_GICS', 'INDUSTRY_GICSCODE'] #wind行业名称和代码
21 | # self.indusFieds = ['INDUSTRY_CSRCCODE12', 'INDUSTRY_CSRC12'] #证监会行业名称和代码
22 | # self.indusFieds = ['INDUSTRY_SW', 'INDUSTRY_SWCODE'] # 申万行业名称和代码
23 | self.indusFieds = ['INDUSTRY_CITIC', 'INDUSTRY_CITICCODE'] # 中信行业名称和代码
24 |
25 | # 数据清洗,完善后可移动至其他模块
26 | def WashData(self, dicStockDf):
27 | dicTotalStock = {}
28 | for stockDate, stockDf in dicStockDf.items():
29 | stockList = [stock.replace(' ', '.') for stock in stockDf.index.tolist()]
30 | tempDf = pd.DataFrame(stockDf.values, index=stockList,
31 | columns=['styleCode', 'stockName', 'styleMoney', 'forRate', 'stockNum',
32 | 'stockUnitBuy',
33 | 'stockTotalBuy', 'stockBuyWeight0', 'stockClosePrice', 'stockValue',
34 | 'stockValueWeight', 'stockChange', 'stockTradeFlag'])
35 | dicTotalStock[stockDate] = tempDf
36 | return dicTotalStock
37 |
38 | # 计算组合收益部分
39 | def calcPoforlio(self, tradeNum, tradeProcess):
40 | tradeDate = tradeProcess[tradeNum][0] # 当前交易日期
41 | stockDf = tradeProcess[tradeNum][1] # 当前持仓个股
42 |
43 | stockList = stockDf.index.tolist()
44 | wsdata = w.wss(codes=stockList, fields=self.indusFieds,
45 | options="industryType=1;tradeDate=%s" % tradeDate)
46 | if wsdata.ErrorCode != 0:
47 | return
48 | tempDf = pd.DataFrame(wsdata.Data, columns=wsdata.Codes, index=wsdata.Fields).T
49 | stockDf1 = pd.concat([stockDf, tempDf], join='inner', axis=1)
50 | try:
51 | cht = stockDf['stockClosePrice'] / stockDf1['stockUnitBuy']
52 | except:
53 | cht= pd.Series([1]*stockDf1.shape[0],index=stockDf1.index) #打新股时,无行情
54 | stockDf1['logReturn'] = np.log(cht.tolist())
55 | stockDf1['iWeight'] = stockDf1['stockBuyWeight0'] / stockDf1['stockBuyWeight0'].sum()
56 |
57 | dicIndustry = {}
58 | for industryCode, tempDf in stockDf1.groupby(by=self.indusFieds[0]):
59 | tempDf['weightReturn'] = tempDf.loc[:,'logReturn'] * tempDf.loc[:,'iWeight']
60 | industryName = list(tempDf[self.indusFieds[1]].unique())[0]
61 | dicIndustry[industryCode] = {'stockReturn': tempDf['weightReturn'].sum(), 'stockINName': industryName,
62 | 'stockWeight': tempDf['iWeight'].sum()}
63 | stockDf2 = pd.DataFrame(dicIndustry).T
64 | return stockDf2
65 |
66 | # 计算基准收益部分
67 | def calcBenchMark(self, tradeNum, tradeProcess):
68 | tradeDate = tradeProcess[tradeNum][0] # 当前交易日期
69 |
70 | # 指数成分股
71 | indexWSData = w.wset("indexconstituent", "date=%s;windcode=%s" % (tradeDate, self.benchMark))
72 | if indexWSData.ErrorCode != 0:
73 | return
74 | indexData = pd.DataFrame(indexWSData.Data, index=['checkDate', 'windCode', 'windName', 'iWeight']).T
75 | indexData.set_index(keys='windCode', inplace=True)
76 |
77 | # 成股份对应的wind行业
78 | indexCodeList = indexData.index.tolist()
79 | wsindexdata = w.wss(codes=indexCodeList, fields=self.indusFieds,
80 | options="industryType=1;tradeDate=%s" % tradeDate)
81 | if wsindexdata.ErrorCode != 0:
82 | return
83 | tempIndexDf = pd.DataFrame(wsindexdata.Data, columns=wsindexdata.Codes, index=wsindexdata.Fields).T
84 |
85 | # 成分股收盘行情数据
86 | indexClose = w.wss(codes=tempIndexDf.index.tolist(), fields=['close'],
87 | options="tradeDate=%s;cycle=D;priceAdj=F" % tradeDate)
88 | indexCloseDf = pd.DataFrame(indexClose.Data, index=['close'], columns=indexClose.Codes).T
89 |
90 | beforeDate = tradeProcess[tradeNum - 1][0] # 上一净值发布日期
91 | indexClose = w.wss(codes=tempIndexDf.index.tolist(), fields=['close'],
92 | options="tradeDate=%s;cycle=D;priceAdj=F" % beforeDate)
93 | indexBeforeCloseDf = pd.DataFrame(indexClose.Data, index=['beforeClose'], columns=indexClose.Codes).T
94 | indexDF = pd.concat([indexData, tempIndexDf, indexBeforeCloseDf, indexCloseDf], axis=1, join='inner')
95 | indexDF['logReturn'] = np.log(indexDF['close'] / indexDF['beforeClose'])
96 | indexDF['iWeight'] = indexDF['iWeight'] / indexDF['iWeight'].sum()
97 |
98 | dicIndustry = {}
99 | for industryCode, tempDf in indexDF.groupby(by=[self.indusFieds[0]]):
100 | tempDf['weightReturn'] = tempDf['logReturn'] * tempDf['iWeight']
101 | industryName = list(tempDf[self.indusFieds[1]].unique())[0]
102 | dicIndustry[industryCode] = {'indexReturn': tempDf['weightReturn'].sum(), 'indexINName': industryName,
103 | 'indexWeight': tempDf['iWeight'].sum()}
104 |
105 | industryDf = pd.DataFrame(dicIndustry).T
106 | return industryDf
107 |
108 | # 计算入口
109 | def calc(self, dicTotalStock):
110 | w.start()
111 | tradeProcess = sorted(dicTotalStock.items(), key=lambda x: x[0], reverse=False)
112 | dicReturnFactor = {}
113 | dicReturnFactor['SR'] = []
114 | dicReturnFactor['AR'] = []
115 | dicReturnFactor['IR'] = []
116 | dicReturnFactor['pofolioReturn'] = []
117 | dicReturnFactor['benchMarkReturn'] = []
118 | dicReturnFactor['everySR'] = []
119 | dicReturnFactor['everyAR'] = []
120 | dicReturnFactor['everyIR'] = []
121 |
122 | for tradeNum in range(1, len(tradeProcess)):
123 | # for tradeNum in range(1, 5):
124 | tradeDate = tradeProcess[tradeNum][0] # 当前交易日期
125 | print('当前交易日期:', tradeDate)
126 |
127 | # 投资组合所属行业和收益
128 | stockDf = self.calcPoforlio(tradeNum, tradeProcess)
129 |
130 | # 指数成分股的所属行业和收益
131 | indexDf = self.calcBenchMark(tradeNum, tradeProcess)
132 |
133 | totaldf = pd.concat([indexDf, stockDf], axis=1, join='outer')
134 | totaldf.fillna(0, inplace=True)
135 | pofolioReturn = (totaldf['stockReturn'] * totaldf['stockWeight']).sum()
136 | benchMarkReturn = (totaldf['indexReturn'] * totaldf['indexWeight']).sum()
137 | kFactor = (np.log(1 + pofolioReturn) - np.log(1 + benchMarkReturn)) / (pofolioReturn - benchMarkReturn)
138 |
139 | dicReturnFactor['industryName'] = dicReturnFactor.get('industryName',totaldf.index.tolist())
140 | everySR = kFactor * ((totaldf['stockReturn'] - totaldf['indexReturn']) * totaldf['indexWeight'])
141 | everyAR = kFactor * ((totaldf['stockWeight'] - totaldf['indexWeight']) * totaldf['indexReturn'])
142 | everyIR = kFactor * ((totaldf['stockWeight'] - totaldf['indexWeight']) * (
143 | totaldf['stockReturn'] - totaldf['indexReturn']))
144 | dicReturnFactor['everySR'].append(everySR)
145 | dicReturnFactor['everyAR'].append(everyAR)
146 | dicReturnFactor['everyIR'].append(everyIR)
147 |
148 | SR = kFactor * ((totaldf['stockReturn'] - totaldf['indexReturn']) * totaldf['indexWeight']).sum()
149 | AR = kFactor * ((totaldf['stockWeight'] - totaldf['indexWeight']) * totaldf['indexReturn']).sum()
150 | IR = kFactor * ((totaldf['stockWeight'] - totaldf['indexWeight']) * (
151 | totaldf['stockReturn'] - totaldf['indexReturn'])).sum()
152 | dicReturnFactor['SR'].append(SR)
153 | dicReturnFactor['AR'].append(AR)
154 | dicReturnFactor['IR'].append(IR)
155 | dicReturnFactor['pofolioReturn'].append(pofolioReturn)
156 | dicReturnFactor['benchMarkReturn'].append(benchMarkReturn)
157 | return dicReturnFactor
158 |
159 | # 计算同时期基准总收益的分解
160 | def totalTradeAny(self, dicReturnFactor):
161 | result = {}
162 | npSR = np.array(dicReturnFactor['SR'])
163 | npAR = np.array(dicReturnFactor['AR'])
164 | npIR = np.array(dicReturnFactor['IR'])
165 |
166 | nppofolioReturn = np.array(dicReturnFactor['pofolioReturn'])
167 | npbenchMarkReturn = np.array(dicReturnFactor['benchMarkReturn'])
168 | totalPofolio = (1 + nppofolioReturn).cumprod() - 1
169 | totalBenchMark = (1 + npbenchMarkReturn).cumprod() - 1
170 | totalKFactor =(np.log(1+totalPofolio[-1]) - np.log(1+totalBenchMark[-1]))/(totalPofolio[-1]-totalBenchMark[-1])
171 | dicReturnFactor['totalKFactor'] = totalKFactor
172 |
173 | result['SR'] = npSR.sum() /totalKFactor
174 | result['AR'] = npAR.sum() / totalKFactor
175 | result['IR'] = npIR.sum() / totalKFactor
176 | return result
177 |
178 | #绘图入口
179 | def plotFigure(self,dicReturnFactor):
180 | plt.rcParams['font.sans-serif'] = ['SimHei']
181 | plt.rcParams['axes.unicode_minus'] = False
182 |
183 | everySR = np.array(dicReturnFactor['everySR'])
184 | totalSR = np.sum(everySR, axis=0)/dicReturnFactor['totalKFactor']
185 | SRSeries = pd.Series(totalSR, index=dicReturnFactor['industryName'])
186 |
187 | everyAR = np.array(dicReturnFactor['everyAR'])
188 | totalAR = np.sum(everyAR, axis=0) / dicReturnFactor['totalKFactor']
189 | ARSeries = pd.Series(totalAR, index=dicReturnFactor['industryName'])
190 |
191 | everyIR = np.array(dicReturnFactor['everyIR'])
192 | totalIR = np.sum(everyIR, axis=0) / dicReturnFactor['totalKFactor']
193 | IRSeries = pd.Series(totalIR, index=dicReturnFactor['industryName'])
194 |
195 | fig = plt.figure(figsize=(16, 9))
196 | ax1 = fig.add_subplot(131)
197 | ARSeries = ARSeries.sort_values()
198 | ARSeries.plot(kind='barh', ax=ax1)
199 | ax1.set_title(u'行业配置收益')
200 |
201 | ax2 = fig.add_subplot(132)
202 | SRSeries = SRSeries.sort_values()
203 | SRSeries.plot(kind='barh', ax=ax2)
204 | ax2.set_title(u'个股选择收益')
205 |
206 | ax3 = fig.add_subplot(133)
207 | IRSeries = IRSeries.sort_values()
208 | IRSeries.plot(kind='barh', ax=ax3)
209 | ax3.set_title(u'交互收益')
210 | plt.savefig('C:\\Users\\Administrator\\Desktop\\乐道4结果图\\' + '归因分析')
211 | plt.show()
212 |
213 | # 总入口
214 | def calcMain(self):
215 | dicReturnFactor = self.calc(self.dicStockDf)
216 | result = self.totalTradeAny(dicReturnFactor)
217 | self.plotFigure(dicReturnFactor)
218 | print(result)
219 |
220 |
221 | if __name__ == '__main__':
222 | import os
223 | from GetExcelData import GetExcelData
224 |
225 | fileTotalPath = os.getcwd() + r'\乐道4估值表' # 估值表文件夹路径
226 | netAssetDf, dicProduct = GetExcelData(fileTotalPath).getData()
227 | BrisionAnysDemo = BrisionAnys(dicProduct)
228 | BrisionAnysDemo.calcMain()
229 |
--------------------------------------------------------------------------------
/CalcEstimateValue.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 主程序
5 | '''
6 |
7 | import pandas as pd
8 | import matplotlib.pyplot as plt
9 | import numpy as np
10 | from datetime import date, datetime
11 | import matplotlib
12 | from sqlalchemy import create_engine
13 | from GetExcelData import GetExcelData
14 | from BrisionAnys import BrisionAnys
15 |
16 | class CalcEstimateValue:
17 | def __init__(self, fileTotalPath):
18 | self.fileTotalPath = fileTotalPath
19 | self.myfont = matplotlib.font_manager.FontProperties(fname=r'C:/Windows/Fonts/simkai.ttf')
20 | self.indexCodeList = ["000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH"]
21 |
22 | # 数据处理并绘图
23 | def controlData(self, netAssetDf, dicProduct):
24 | fig = plt.figure(figsize=(16, 9))
25 | ax1 = fig.add_subplot(221)
26 | indexReturn = (netAssetDf[self.indexCodeList] - netAssetDf[self.indexCodeList].shift(1)) / netAssetDf[
27 | self.indexCodeList].shift(1)
28 | indexDfAcc = (1 + indexReturn).cumprod() - 1
29 | indexDfAcc.fillna(method='pad', inplace=True)
30 | df1 = pd.concat([indexDfAcc, netAssetDf['accNetReturn']], axis=1)
31 | df1.plot(ax=ax1)
32 | ax1.set_title(u'累计收益走势图', fontproperties=self.myfont)
33 |
34 | def historydownrate(tempdata):
35 | templist = []
36 | for k in range(len(tempdata)):
37 | downrate = tempdata[k] / tempdata[:k + 1].max() - 1
38 | templist.append(downrate)
39 | tempdf = pd.Series(templist, index=tempdata.index)
40 | tempdf.name = tempdata.name
41 | return tempdf
42 |
43 | downDf = netAssetDf[['netValue', '399300.SZ']].apply(historydownrate)
44 | ax2 = fig.add_subplot(222)
45 | downDf.plot(ax=ax2)
46 | ax2.set_title(u'回撤率走势图', fontproperties=self.myfont)
47 |
48 | ax3 = fig.add_subplot(223)
49 | netAssetDf['netReturn'].plot(ax=ax3, kind='hist', bins=20)
50 | ax3.set_title(u'收益率分布图', fontproperties=self.myfont)
51 |
52 | ax4 = fig.add_subplot(224)
53 | tempArr = netAssetDf['thisNetReturn'].values
54 | tempArrUp = len(tempArr[tempArr >= 0])
55 | upRate = tempArrUp / len(tempArr) * 100
56 | upRate = round(upRate, 2)
57 | downRate = 100 - upRate
58 |
59 | tempArrUpRate = str(upRate) + '%'
60 | tempArrDownRate = str(downRate) + '%'
61 | labels = [u'Trade + %s' % tempArrUpRate, u'Trade - %s' % tempArrDownRate]
62 | ax4.pie([upRate, downRate], labels=labels)
63 | ax4.set_title(u'盈亏状况统计图', fontproperties=self.myfont)
64 | plt.savefig('C:\\Users\\Administrator\\Desktop\\乐道4结果图\\' + '趋势图')
65 |
66 | fig2 = plt.figure(figsize=(16, 9))
67 | ax5 = fig2.add_subplot(211)
68 | netAssetDf['stockRate'].plot(ax=ax5, kind='bar', color='LightGreen')
69 | ax5.set_title(u'仓位变化图', fontproperties=self.myfont)
70 | ax5.set_ylabel(u'流通股票占比', fontproperties=self.myfont)
71 |
72 | # ax6 = ax5.twinx()
73 | # df1[['accNetReturn','399300.SZ']].plot(ax=ax6)
74 | # netAssetDf['accNetReturn'].plot(ax=ax6, color='red')
75 | # ax6.set_ylabel(u'累计净值增长率', fontproperties=self.myfont)
76 |
77 | ax7 = fig2.add_subplot(212)
78 | netAssetDf['annualStd'] = netAssetDf['netValue'].rolling(window=4).std() * np.sqrt(12)
79 | netAssetDf['annualStdHS300'] = indexReturn['399300.SZ'].rolling(window=4).std() * np.sqrt(12)
80 | netAssetDf[['annualStd', 'annualStdHS300']].dropna().plot(ax=ax7)
81 | ax7.set_title(u'滚动年化波动率走势图', fontproperties=self.myfont)
82 | ax7.set_ylabel(u'年化波动率', fontproperties=self.myfont)
83 | plt.savefig('C:\\Users\\Administrator\\Desktop\\乐道4结果图\\' + '波动变化图')
84 |
85 | fig3 = plt.figure(figsize=(16, 9))
86 | ax8 = fig3.add_subplot(211)
87 | tempDf = netAssetDf[
88 | ['cashRate', 'ensureMoneyRate', 'antiSaleRate', 'securityRate', 'fundRate', 'otherRate']].copy()
89 | tempDf.fillna(0, inplace=True)
90 | color = ['r', 'g', 'b', 'y', 'k', 'c', 'm']
91 | for i in range(tempDf.shape[1]):
92 | ax8.bar(tempDf.index.tolist(), tempDf.ix[:, i], color=color[i], bottom=tempDf.ix[:, :i].sum(axis=1),
93 | width=3.95)
94 | ax8.set_label(['cashRate', 'ensureMoneyRate', 'antiSaleRate', 'securityRate', 'fundRate', 'otherRate'])
95 | for tick in ax8.get_xticklabels():
96 | tick.set_rotation(90)
97 |
98 | ax9 = fig3.add_subplot(212)
99 | tempdf = self.similateNet(netAssetDf[['netValue', 'netReturn','accNetReturn']])
100 | tempdf.plot(ax=ax9)
101 | plt.savefig('C:\\Users\\Administrator\\Desktop\\乐道4结果图\\' + '持仓分布图')
102 | plt.show()
103 |
104 | # 字符串拼接
105 | def CodeToStr(self, templist):
106 | tempstr = ''
107 | for temp in templist:
108 | tempstr = tempstr + "'" + temp + "'" + ","
109 | return tempstr[:-1]
110 |
111 | # 按照产品时间周期,获取指数历史数据
112 | def getIndexData(self, netAssetDf):
113 | startDate = netAssetDf.index.tolist()[0]
114 | endDate = netAssetDf.index.tolist()[-1]
115 | mysqlConfig = ['root', '123456', 'localhost', '3306', 'fund_data', 'utf8']
116 | mysqlcon = "mysql+pymysql://%s:%s@%s:%s/%s?charset=%s" % (
117 | mysqlConfig[0], mysqlConfig[1], mysqlConfig[2], mysqlConfig[3], mysqlConfig[4], mysqlConfig[5])
118 | conn = create_engine(mysqlcon)
119 |
120 | sqlStr = "select CODE,CLOSE,`UPDATE` from index_data where CODE in (%s) and `UPDATE`<='%s' and `UPDATE` >='%s'" % (
121 | self.CodeToStr(self.indexCodeList), endDate, startDate)
122 | tempDf1 = pd.read_sql(sql=sqlStr, con=conn)
123 |
124 | dflist = []
125 | for code, df in tempDf1.groupby(by=['CODE']):
126 | temp = pd.DataFrame(df['CLOSE'].values, index=df['UPDATE'].tolist(), columns=[code])
127 | dflist.append(temp)
128 | dflist.append(netAssetDf['netValue'])
129 | tempIndexDF = pd.concat(dflist, axis=1).fillna(method='pad')
130 | totalDf = pd.concat([netAssetDf, tempIndexDF[self.indexCodeList]], axis=1, join='inner')
131 | return totalDf
132 |
133 | # 模拟基金净值走势
134 | def similateNet(self, netDf):
135 | period = 6 * 4 # 预测周数
136 | rateMean = netDf['netReturn'].mean()
137 | rateStd = netDf['netReturn'].std()
138 |
139 | dicPredict = {}
140 | dicPredict['lowMarket'] = []
141 | dicPredict['highMarket'] = []
142 | dicPredict['middleMarket'] = []
143 | for i in range(period):
144 | randNum = np.random.normal(loc=rateMean, scale=rateStd, size=(100, 1))
145 | dicPredict['lowMarket'].append(np.percentile(randNum, 25, axis=0))
146 | dicPredict['middleMarket'].append(np.percentile(randNum, 50, axis=0))
147 | dicPredict['highMarket'].append(np.percentile(randNum, 75, axis=0))
148 |
149 | # lowMarket = netDf['netValue'][-1] * (1 + np.array(dicPredict['lowMarket']).cumsum())
150 | # highMarket = netDf['netValue'][-1] * (1 + np.array(dicPredict['highMarket']).cumsum())
151 | # middleMarket = netDf['netValue'][-1] * (1 + np.array(dicPredict['middleMarket']).cumsum())
152 |
153 | lowMarket = netDf['accNetReturn'][-1] * ((1 + np.array(dicPredict['lowMarket'])).cumprod())
154 | highMarket = netDf['accNetReturn'][-1] * ((1 + np.array(dicPredict['highMarket'])).cumprod())
155 | middleMarket = netDf['accNetReturn'][-1] * ((1 + np.array(dicPredict['middleMarket'])).cumprod())
156 |
157 | temp = list(pd.date_range(start=netDf.index.tolist()[-1], freq="W", periods=period))
158 | tempdate = [x.date() for x in temp]
159 |
160 | lowMarket = np.concatenate((netDf['accNetReturn'].values, np.array(lowMarket)))
161 | middleMarket = np.concatenate((netDf['accNetReturn'].values, np.array(middleMarket)))
162 | highMarket = np.concatenate((netDf['accNetReturn'].values, np.array(highMarket)))
163 | indexList = netDf.index.tolist()+tempdate
164 |
165 | df = pd.DataFrame(np.array([lowMarket, middleMarket, highMarket]).T, index=indexList,
166 | columns=['lowMarket', 'middleMarket', 'highMarket'])
167 | return df
168 |
169 | def calcMain(self):
170 | netAssetDf, dicProduct = GetExcelData(self.fileTotalPath).getData()
171 | netAssetDf = self.getIndexData(netAssetDf)
172 | self.controlData(netAssetDf, dicProduct)
173 |
174 | # BrisionAnysDemo = BrisionAnys(dicProduct)
175 | # BrisionAnysDemo.calcMain()
176 |
177 |
178 | if __name__ == '__main__':
179 | import os
180 | fileTotalPath = os.getcwd() + r'\乐道4估值表' # 估值表文件夹路径
181 | CalcEstimateValueDemo = CalcEstimateValue(fileTotalPath=fileTotalPath)
182 | CalcEstimateValueDemo.calcMain()
183 |
--------------------------------------------------------------------------------
/CalcRegression.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 | '''
5 | 回归类结果分析与保存
6 | 选股择时能力:TM,HM ,CL
7 | 行业回归
8 | 风格回归
9 | '''
10 | import numpy as np
11 | import pandas as pd
12 | import matplotlib.pyplot as plt
13 | import matplotlib
14 | from datetime import datetime, timedelta
15 | import statsmodels.api as sm
16 |
17 | matplotlib.rcParams['font.sans-serif'] = ['SimHei']
18 | matplotlib.rcParams['font.family'] = 'sans-serif'
19 | matplotlib.rcParams['axes.unicode_minus'] = False
20 |
21 |
22 | class CalcRegression:
23 | def __init__(self):
24 | pass
25 |
26 | def regression(self, x1, x2, y):
27 | '''
28 | 最小二乘回归
29 | :param x1:
30 | :param x2:
31 | :param y:
32 | :return:
33 | '''
34 | x1, x2 = x1.reshape(len(x1), 1), x2.reshape(len(x2), 1)
35 | c = np.ones((len(x1), 1))
36 | X = np.hstack((c, x1, x2))
37 | res = (sm.OLS(y, X)).fit()
38 | return res
39 |
40 | def getStyleRegression(self,fundIndustryDf, resultPath,fundName,industryDic,DCIndexDf=pd.DataFrame()):
41 | '''
42 | 风格归因
43 | :param fundIndustryDf:
44 | :param resultPath:
45 | :param fundName:
46 | :param industryDic:
47 | :return:
48 | '''
49 | industryCodeList = list(industryDic.keys())
50 | targetLabel = industryCodeList + [fundName]
51 | targetDf = fundIndustryDf[targetLabel]
52 | tempReturn = (targetDf - targetDf.shift(1)) / targetDf.shift(1)
53 | if not DCIndexDf.empty: #量化类产品
54 | bench_return_df = DCIndexDf/DCIndexDf.shift(1)-1
55 | tempReturn[fundName] = pd.concat([bench_return_df,tempReturn[fundName]],axis=1,sort=True).sum(axis=1)
56 |
57 | def reduceRf(tempSe):
58 | resultSe = tempSe - fundIndustryDf['无风险利率']
59 | return resultSe
60 |
61 | tempExReturn = tempReturn.apply(reduceRf)
62 | tempExReturn.dropna(inplace=True)
63 |
64 | list_r2, list_beta, list_tr, list_const = [], [], [], []
65 | Y = tempExReturn[fundName].values
66 | for code in industryCodeList:
67 | x = tempExReturn[code].values
68 | x = x.reshape(len(x), 1)
69 | c = np.ones((len(x), 1))
70 | X = np.hstack((c, x))
71 | res = (sm.OLS(Y, X)).fit()
72 | list_r2.append(res.rsquared)
73 | list_beta.append(res.params[1])
74 | list_const.append(res.params[0])
75 | list_tr.append((fundIndustryDf[code][-1] / fundIndustryDf[code][0] - 1) - fundIndustryDf['无风险利率'].mean())
76 | res_indus = pd.DataFrame([])
77 | res_indus['指数代码'] = industryCodeList
78 | res_indus['指数名称'] = [industryDic[code] for code in industryCodeList]
79 | res_indus['拟合R方'] = list_r2
80 | res_indus['beta'] = list_beta
81 | res_indus['alpha'] = list_const
82 | res_indus['期间总收益'] = list_tr
83 | res_indus['开始时间'] = tempExReturn.index.tolist()[0]
84 | res_indus['终止时间'] = tempExReturn.index.tolist()[-1]
85 | res_indus = res_indus.sort_values('拟合R方', ascending=False)
86 | res_indus.to_excel(resultPath + '风格回归结果.xlsx', index=False)
87 |
88 | maxR2Code = res_indus['指数代码'].tolist()[0]
89 | x = tempExReturn[maxR2Code].values
90 | maxR2Alpha = res_indus['alpha'].tolist()[0]
91 | maxR2Beta = res_indus['beta'].tolist()[0]
92 |
93 | plt.style.use('ggplot')
94 | plt.figure(figsize=(16, 9))
95 | plt.scatter(x, Y, s=30, color='blue', label='样本实例')
96 | plt.plot(x, maxR2Alpha + maxR2Beta * x, linewidth=3, color='red', label='回归线')
97 | plt.ylabel('产品超额收益')
98 | plt.xlabel('风格超额收益')
99 | plt.title('拟合效果最好的风格指数:' + industryDic[maxR2Code], fontsize=13,
100 | bbox={'facecolor': '0.8', 'pad': 5})
101 | plt.grid(True)
102 | plt.legend(loc='upper left') # 添加图例
103 | plt.savefig(resultPath + '拟合风格指数效果图.png')
104 | # plt.show()
105 |
106 | plt.style.use('ggplot')
107 | fig = plt.figure(figsize=(16, 9))
108 | ax = fig.add_subplot(111)
109 | indeustryAccDf = (1 + tempReturn[[fundName, maxR2Code]]).cumprod()
110 | indeustryAccDf['产品风格收益比'] = indeustryAccDf[fundName] / indeustryAccDf[maxR2Code]
111 | indeustryAccDf.plot(ax=ax)
112 | ax.set_ylabel('累计收益率')
113 | ax.set_xlabel('时间')
114 | ax.set_title('拟合效果最好的风格指数:' + industryDic[maxR2Code], fontsize=13,
115 | bbox={'facecolor': '0.8', 'pad': 5})
116 | ax.grid(True)
117 | ax.legend(loc='down right') # 添加图例
118 | plt.savefig(resultPath + '拟合风格指数累计走势对比图.png')
119 | plt.show()
120 |
121 | def getIndustryRegression(self,fundIndustryDf, resultPath,fundName,industryDic,bench_return=pd.DataFrame()):
122 | '''
123 | 行业归因
124 | :param fundIndustryDf:
125 | :param resultPath:
126 | :param fundName:
127 | :param industryDic:
128 | :return:
129 | '''
130 | industryCodeList = list(industryDic.keys())
131 | targetLabel = industryCodeList+[fundName]
132 | targetDf = fundIndustryDf[targetLabel]
133 |
134 | tempReturn = (targetDf-targetDf.shift(1))/targetDf.shift(1)
135 | if not bench_return.empty: #量化类产品
136 | bench_return_df = bench_return/bench_return.shift(1)-1
137 | tempReturn[fundName] = pd.concat([bench_return_df,tempReturn[fundName]],axis=1,sort=True).sum(axis=1)
138 | def reduceRf(tempSe):
139 | resultSe = tempSe - fundIndustryDf['无风险利率']
140 | return resultSe
141 | tempExReturn = tempReturn.apply(reduceRf)
142 | tempExReturn.dropna(inplace=True)
143 |
144 | list_r2, list_beta, list_tr,list_const = [], [], [],[]
145 | Y = tempExReturn[fundName].values
146 | for code in industryCodeList:
147 | x = tempExReturn[code].values
148 | x = x.reshape(len(x), 1)
149 | c = np.ones((len(x), 1))
150 | X = np.hstack((c, x))
151 | res = (sm.OLS(Y, X)).fit()
152 | list_r2.append(res.rsquared)
153 | list_beta.append(res.params[1])
154 | list_const.append(res.params[0])
155 | list_tr.append((fundIndustryDf[code][-1] / fundIndustryDf[code][0]-1) - fundIndustryDf['无风险利率'].mean())
156 | res_indus = pd.DataFrame([])
157 | res_indus['指数代码'] = industryCodeList
158 | res_indus['指数名称'] = [industryDic[code] for code in industryCodeList]
159 | res_indus['拟合R方'] = list_r2
160 | res_indus['beta'] = list_beta
161 | res_indus['alpha'] = list_const
162 | res_indus['期间总收益'] = list_tr
163 | res_indus['开始时间'] = tempExReturn.index.tolist()[0]
164 | res_indus['终止时间'] = tempExReturn.index.tolist()[-1]
165 | res_indus = res_indus.sort_values('拟合R方', ascending=False)
166 | res_indus.to_excel(resultPath+'行业回归结果.xlsx',index=False)
167 |
168 | maxR2Code = res_indus['指数代码'].tolist()[0]
169 | x = tempExReturn[maxR2Code].values
170 | maxR2Alpha = res_indus['alpha'].tolist()[0]
171 | maxR2Beta = res_indus['beta'].tolist()[0]
172 |
173 | plt.style.use('ggplot')
174 | plt.figure(figsize=(16, 9))
175 | plt.scatter(x, Y, s=30, color='blue', label='样本实例')
176 | plt.plot(x, maxR2Alpha + maxR2Beta * x, linewidth=3, color='red', label='回归线')
177 | plt.ylabel('产品超额收益')
178 | plt.xlabel('行业超额收益')
179 | plt.title('拟合效果最好的行业指数:'+industryDic[maxR2Code], fontsize=13,
180 | bbox={'facecolor': '0.8', 'pad': 5})
181 | plt.grid(True)
182 | plt.legend(loc='upper left') # 添加图例
183 | plt.savefig(resultPath+'拟合行业指数效果图.png')
184 | # plt.show()
185 |
186 | plt.style.use('ggplot')
187 | fig=plt.figure(figsize=(16, 9))
188 | ax = fig.add_subplot(111)
189 | indeustryAccDf = (1+tempReturn[[fundName,maxR2Code]]).cumprod()
190 | indeustryAccDf['产品行业收益比'] = indeustryAccDf[fundName]/indeustryAccDf[maxR2Code]
191 | indeustryAccDf.plot(ax=ax)
192 | ax.set_ylabel('累计收益率')
193 | ax.set_xlabel('时间')
194 | ax.set_title('拟合效果最好的行业指数:'+industryDic[maxR2Code], fontsize=13,
195 | bbox={'facecolor': '0.8', 'pad': 5})
196 | ax.grid(True)
197 | ax.legend(loc='down right') # 添加图例
198 | plt.savefig(resultPath + '拟合行业指数累计走势对比图.png')
199 |
200 | def getSelectStockAndTime(self, fundPlotDf, resultPath, fundName, netPeriod, benchMark,DCIndexDf=pd.DataFrame()):
201 | '''
202 | 计算选股择时能力
203 | :param ReturnData:
204 | :return:
205 | '''
206 |
207 | if netPeriod == 'W':
208 | calcPeriod = 52
209 | else:
210 | calcPeriod = 250
211 |
212 | if not DCIndexDf.empty:
213 | target_df = pd.concat([fundPlotDf[[fundName,benchMark]],DCIndexDf],axis=1,sort=True)
214 | tempReturn = (target_df - target_df.shift(1)) / target_df.shift(1)
215 | tempReturn.fillna(0, inplace=True)
216 | tempReturn[fundName] = tempReturn[[tempReturn.columns[0],tempReturn.columns[-1]]].sum(axis=1) #量化对冲产品
217 | else:
218 | targetDf = fundPlotDf[[fundName, benchMark]]
219 | tempReturn = (targetDf - targetDf.shift(1)) / targetDf.shift(1)
220 | tempReturn.fillna(0, inplace=True)
221 |
222 | fundReduceRf = tempReturn[fundName] - fundPlotDf['无风险利率']
223 | bencReduceRf = tempReturn[benchMark] - fundPlotDf['无风险利率']
224 |
225 | f = open(resultPath + "TM,HM,CL模型回归结果.txt", "w+")
226 | Y = fundReduceRf.values
227 | tmX1 = bencReduceRf.values
228 | tmX2 = np.power(tmX1, 2)
229 | TMResult = self.regression(tmX1, tmX2, Y)
230 |
231 | dicRegression = {}
232 | dicRegression['TM回归结果'] = {}
233 | dicRegression['TM回归结果']['R方'] = TMResult.rsquared
234 | dicRegression['TM回归结果']['择股指标(年化alpha)'] = str(round(TMResult.params[0] * calcPeriod * 100, 2)) + '%'
235 | dicRegression['TM回归结果']['择时指标(beta)'] = round(TMResult.params[2], 2)
236 | f.write(str(TMResult.summary(title='TM模型回归结果')))
237 | f.write('\n\n\n')
238 |
239 | d = [] # H-M模型
240 | for i in range(len(tempReturn[benchMark])):
241 | if tempReturn[benchMark][i] > fundPlotDf['无风险利率'][i]:
242 | d.append(1)
243 | else:
244 | d.append(0)
245 | hmX1 = bencReduceRf.values
246 | hmX2 = d * hmX1 ** 2
247 | HMResult = self.regression(hmX1, hmX2, Y)
248 | dicRegression['HM回归结果'] = {}
249 | dicRegression['HM回归结果']['R方'] = HMResult.rsquared
250 | dicRegression['HM回归结果']['择股指标(年化alpha)'] = str(round(HMResult.params[0] * calcPeriod * 100, 2)) + '%'
251 | dicRegression['HM回归结果']['择时指标(beta)'] = round(HMResult.params[2], 2)
252 | f.write(str(HMResult.summary(title='HM模型回归结果')))
253 | f.write('\n\n\n')
254 |
255 | x1, x2 = [], [] # C-L模型
256 | for i in range(len(tempReturn[benchMark])):
257 | if tempReturn[benchMark][i] > fundPlotDf['无风险利率'][i]:
258 | x1.append(tempReturn[benchMark][i] - fundPlotDf['无风险利率'][i])
259 | x2.append(0)
260 | else:
261 | x1.append(0)
262 | x2.append(tempReturn[benchMark][i] - fundPlotDf['无风险利率'][i])
263 | clX1, clX2 = np.array(x1), np.array(x2)
264 | CLResult = self.regression(clX1, clX2, Y)
265 | dicRegression['CL回归结果'] = {}
266 | dicRegression['CL回归结果']['R方'] = CLResult.rsquared
267 | dicRegression['CL回归结果']['择股指标(年化alpha)'] = str(round(CLResult.params[0] * calcPeriod * 100, 2)) + '%'
268 | dicRegression['CL回归结果']['择时指标(beta)'] = round(CLResult.params[2] - CLResult.params[1], 2)
269 |
270 | regressionDf = pd.DataFrame(dicRegression)
271 | regressionDf.to_excel(resultPath + '选股择时能力回归结果.xlsx')
272 | f.write(str(CLResult.summary(title='CL模型回归结果')))
273 | f.close()
274 | return
275 |
276 |
277 | if __name__ == '__main__':
278 | CalcRegressionDemo = CalcRegression()
279 |
--------------------------------------------------------------------------------
/CalcRiskReturn.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | '''
6 | 基于净值类数据的分析
7 | '''
8 |
9 |
10 | import pandas as pd
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | import matplotlib
14 | from datetime import datetime,timedelta
15 |
16 | matplotlib.rcParams['font.sans-serif'] = ['SimHei']
17 | matplotlib.rcParams['font.family'] = 'sans-serif'
18 | matplotlib.rcParams['axes.unicode_minus'] = False
19 | from DateFormatDf import DateFormatDf
20 |
21 |
22 | class CalcRiskReturn:
23 | def __init__(self):
24 | self.DateFormatDfDemo = DateFormatDf()
25 |
26 | def formaData(self, tempValue, flagP=True):
27 | if flagP:
28 | result = str(round(round(tempValue, 4) * 100, 2)) + '%'
29 | else:
30 | result = round(tempValue, 2)
31 | return result
32 |
33 | def calcMaxdown(self, return_list):
34 | '''最大回撤率'''
35 | return_list = (return_list + 1).cumprod()
36 | return_list = return_list.values
37 | i = np.argmax(np.maximum.accumulate(return_list) - return_list)
38 | if i == 0:
39 | return 0
40 | j = np.argmax(return_list[:i])
41 | result = (return_list[j] - return_list[i]) / return_list[j]
42 | return result
43 |
44 | def calcDetail(self, tempValueDf):
45 | dicResult = {} #格式化数据输出
46 | dicRightResult = {} #原始数据输出
47 | assetAnnualReturn = (tempValueDf.iloc[-1] / tempValueDf.iloc[0]) ** (250 / tempValueDf.shape[0]) - 1
48 | tempReturn = (tempValueDf - tempValueDf.shift(1)) / tempValueDf.shift(1)
49 | tempReturn.fillna(0, inplace=True)
50 |
51 | tempReturnValue = tempReturn.copy()
52 | tempReturnValue[tempReturnValue > 0] = 0
53 | assetDownRisk = tempReturnValue.std() * np.sqrt(250)
54 | assetStd = tempReturn.std() * np.sqrt(250)
55 | assetMaxDown = tempReturn.apply(self.calcMaxdown)
56 | assetCalmar = assetAnnualReturn / assetMaxDown
57 | assetSharp = (assetAnnualReturn - 0.02) / assetStd
58 | dicResult[u'年化收益'] = assetAnnualReturn.apply(self.formaData)
59 | dicResult[u'年化波动'] = assetStd.apply(self.formaData)
60 | dicResult[u'最大回撤'] = assetMaxDown.apply(self.formaData)
61 | dicResult[u'夏普比率'] = assetSharp.apply(self.formaData, args=(False,))
62 | dicResult[u'卡玛比率'] = assetCalmar.apply(self.formaData, args=(False,))
63 | dicResult[u'下行风险'] = assetDownRisk.apply(self.formaData)
64 |
65 | dicRightResult[u'年化收益'] = assetAnnualReturn
66 | dicRightResult[u'年化波动'] = assetStd
67 | dicRightResult[u'最大回撤'] = assetMaxDown
68 | dicRightResult[u'夏普比率'] = assetSharp
69 | dicRightResult[u'卡玛比率'] = assetCalmar
70 | dicRightResult[u'下行风险'] = assetDownRisk
71 |
72 | # successSe = len(tempReturn[tempReturn>0])/len(tempReturn)
73 | # dicResult[u'胜率'] = self.formaData(len(tempReturn[tempReturn>0])/len(tempReturn))
74 | return dicResult,dicRightResult
75 |
76 | def calcRiskReturn(self, fundIndexDf, resultPath):
77 | timeWindowList = ['近一月', '近三月', '近六月', '近一年', '成立以来']
78 | timeWindowNum = [21, 21 * 3, 21 * 6, 21 * 12, np.inf]
79 |
80 | fundDfList = []
81 | fundRightDfList = []
82 | for timeWindow in timeWindowList:
83 | timeNum = timeWindowNum[timeWindowList.index(timeWindow)]
84 |
85 | if timeNum != np.inf:
86 | tempValueDf = fundIndexDf[-timeNum:]
87 | else:
88 | tempValueDf = fundIndexDf
89 | try:
90 | tempResult,tempRightResult = self.calcDetail(tempValueDf)
91 | except:
92 | a=0
93 | tempDf = pd.DataFrame(tempResult).T
94 | tempDf['统计周期'] = timeWindow
95 | tempDf['数据截止日期'] = tempValueDf.index.tolist()[-1]
96 |
97 | tempRightDf = pd.DataFrame(tempRightResult).T
98 | tempRightDf['统计周期'] = timeWindow
99 | tempRightDf['数据截止日期'] = tempValueDf.index.tolist()[-1]
100 |
101 | fundDfList.append(tempDf)
102 | fundRightDfList.append(tempRightDf)
103 | tempToExcelDf = pd.concat(fundDfList, axis=0)
104 | tempRightToExcelDf =pd.concat(fundRightDfList, axis=0)
105 | tempToExcelDf['统计指标'] = tempToExcelDf.index.tolist()
106 | tempRightToExcelDf['统计指标'] = tempRightToExcelDf.index.tolist()
107 | tempToExcelDf.set_index(keys=['统计周期', '统计指标'], drop=True, inplace=True)
108 | tempRightToExcelDf.set_index(keys=['统计周期', '统计指标'], drop=True, inplace=True)
109 |
110 | # tempToExcelDf.rename(columns)
111 | tempToExcelDf.to_excel(resultPath + '风险收益统计指标.xlsx')
112 | tempRightToExcelDf.to_excel(resultPath + '风险收益统计指标原始数据.xlsx')
113 |
114 | def plotDayNetValueFigure(self, fundPlotDf, resultPath, fundName,netPeriod='',marketVolume=pd.DataFrame()):
115 | '''
116 | 累计收益走势,连续回撤率走势,滚动年化波动走势
117 | :param fundPlotDf:
118 | :param resultPath:
119 | :return:
120 | '''
121 | fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(fundPlotDf)
122 | tempReturn = (fundPlotFormatDf - fundPlotFormatDf.shift(1)) / fundPlotFormatDf.shift(1)
123 | tempReturn.fillna(0, inplace=True)
124 | accReturn = (1 + tempReturn).cumprod() - 1
125 |
126 | if not marketVolume.empty:
127 | marketVolume = self.DateFormatDfDemo.getStrToDate(marketVolume)
128 | marketVolume.rename(columns={'000300.SH':'沪深300成交量'},inplace=True)
129 | plt.style.use('ggplot')
130 | fig = plt.figure(figsize=(16, 9))
131 | ax = fig.add_subplot(211)
132 | accReturn.plot(ax=ax)
133 | ax.grid()
134 | ax.set_xlabel('时间')
135 | ax.set_ylabel('收益率')
136 | ax.set_title('累计收益走势图')
137 |
138 | ax2 = fig.add_subplot(212)
139 | marketVolume['沪深300成交量'].plot(ax=ax2)
140 | ax2.grid()
141 | ax2.set_xlabel('时间')
142 | ax2.set_ylabel('成交量')
143 | ax2.set_title('沪深300成交量')
144 |
145 | else:
146 | plt.style.use('ggplot')
147 | fig = plt.figure(figsize=(16, 9))
148 | ax = fig.add_subplot(111)
149 | accReturn.plot(ax=ax)
150 | ax.grid()
151 | ax.set_xlabel('时间')
152 | ax.set_ylabel('收益率')
153 | ax.set_title('累计收益走势图')
154 | plt.savefig(resultPath + '累计收益走势图.png')
155 |
156 | def historydownrate(tempdata):
157 | templist = []
158 | for k in range(len(tempdata)):
159 | downrate = tempdata[k] / tempdata[:k + 1].max() - 1
160 | templist.append(downrate)
161 | tempdf = pd.Series(templist, index=tempdata.index)
162 | tempdf.name = tempdata.name
163 | return tempdf
164 |
165 | tempComDf = tempReturn[[fundName,'沪深300']]
166 | downDf = (1 + tempComDf).cumprod().apply(historydownrate)
167 | plt.style.use('ggplot')
168 | fig1 = plt.figure(figsize=(16, 9))
169 | ax1 = fig1.add_subplot(111)
170 | downDf.plot(ax=ax1)
171 | ax1.grid()
172 | ax1.set_xlabel('时间')
173 | ax1.set_ylabel('回撤率')
174 | ax1.set_title('回撤率走势图')
175 | plt.savefig(resultPath + '回撤率走势图.png')
176 |
177 | if netPeriod=='W':
178 | window=4
179 | calcFreq = 52
180 | else:
181 | window=21
182 | calcFreq=250
183 |
184 | annualStdDf = tempComDf.rolling(window=window).std()*np.sqrt(calcFreq)
185 | plt.style.use('ggplot')
186 | fig2 = plt.figure(figsize=(16, 9))
187 | ax2 = fig2.add_subplot(111)
188 | annualStdDf.plot(ax=ax2)
189 | ax2.grid()
190 | ax2.set_xlabel('时间')
191 | ax2.set_ylabel('年化波动率')
192 | ax2.set_title('滚动年化波动率走势图')
193 | plt.savefig(resultPath + '滚动年化波动率走势图.png')
194 |
195 |
196 | dicDf = {}
197 | totalDateList = fundPlotFormatDf.index.tolist()
198 | for rolLoc in range(window,fundPlotFormatDf.shape[0]):
199 | if rolLoc+window<=fundPlotFormatDf.shape[0]:
200 | calcAnnualDf = fundPlotFormatDf.iloc[rolLoc:rolLoc+window]
201 | dicDf[totalDateList[rolLoc]] = (calcAnnualDf.iloc[-1] / calcAnnualDf.iloc[0]) ** (calcFreq / calcAnnualDf.shape[0]) - 1
202 | # dicDf[totalDateList[rolLoc]] = annualReturn
203 | rollAnnualReturnDf = pd.DataFrame(dicDf).T
204 | plt.style.use('ggplot')
205 | fig3 = plt.figure(figsize=(16, 9))
206 | ax3 = fig3.add_subplot(111)
207 | rollAnnualReturnDf.plot(ax=ax3)
208 | ax3.grid()
209 | ax3.set_xlabel('时间')
210 | ax3.set_ylabel('滚动年化收益率')
211 | ax3.set_title('滚动年化收益率走势图')
212 | plt.savefig(resultPath + '滚动年化收益率走势图.png')
213 | # plt.show()
214 |
215 | def calcWeekNetValueResult(self, weekFundPlotDf, resultPath, fundName):
216 | '''
217 | 周度频率数据统计
218 | :return:
219 | '''
220 |
221 | def upAndDownTrade(tempSe):
222 | failTrade = len(tempSe[tempSe < 0]) / len(tempSe)
223 | successTrade = 1 - failTrade
224 |
225 | totalValue = tempSe.tolist()
226 |
227 | tempFailTimes = 0
228 | failTimes = 0
229 | for valueLoc in range(len(tempSe)):
230 | if totalValue[valueLoc] < 0:
231 | tempFailTimes = tempFailTimes + 1
232 | else:
233 | if tempFailTimes >= failTimes:
234 | failTimes = tempFailTimes
235 | tempFailTimes = 0
236 |
237 | tempSuccessTimes = 0
238 | successTimes = 0
239 | for valueLoc in range(len(tempSe)):
240 | if totalValue[valueLoc] >= 0:
241 | tempSuccessTimes = tempSuccessTimes + 1
242 | else:
243 | if tempSuccessTimes >= successTimes:
244 | successTimes = tempSuccessTimes
245 | tempSuccessTimes = 0
246 |
247 | resultSe = pd.Series([failTrade, successTrade, failTimes, successTimes],
248 | index=['负交易周', '正交易周', '最大连续上涨周数', '最大连续下跌周数'],
249 | name=tempSe.name)
250 | return resultSe
251 |
252 | fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(weekFundPlotDf)
253 | tempReturn = (fundPlotFormatDf - fundPlotFormatDf.shift(1)) / fundPlotFormatDf.shift(1)
254 | tempReturn.fillna(0, inplace=True)
255 | tradeResultDf = tempReturn.apply(upAndDownTrade)
256 | tradeResultDf.to_excel(resultPath + '周度胜率统计.xlsx')
257 |
258 | def plotWeekNetValueFigure(self, weekFundPlotDf, resultPath, fundName):
259 | '''
260 | 周度收益相关统计与绘图
261 | :param weekFundPlotDf:
262 | :param resultPath:
263 | :param fundName:
264 | :return:
265 | '''
266 | fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(weekFundPlotDf)
267 | tempReturn = (fundPlotFormatDf - fundPlotFormatDf.shift(1)) / fundPlotFormatDf.shift(1)
268 | tempReturn.fillna(0, inplace=True)
269 | accReturn = (1 + tempReturn).cumprod() - 1
270 |
271 | plt.style.use('ggplot')
272 | fig1 = plt.figure(figsize=(16, 9))
273 | ax1 = fig1.add_subplot(111)
274 | accReturn.plot(ax=ax1)
275 | ax1.grid()
276 | ax1.set_xlabel('时间')
277 | ax1.set_ylabel('收益率')
278 | ax1.set_title('周度累计收益走势图')
279 | plt.savefig(resultPath + '周度累计收益走势图.png')
280 |
281 | plt.style.use('ggplot')
282 | fig2 = plt.figure(figsize=(16, 9))
283 | ax2 = fig2.add_subplot(111)
284 | tempSeUp = tempReturn[fundName].copy()
285 | tempSeUp[tempSeUp < 0] = np.nan
286 | tempSeUp.name = '正收益'
287 | tempSeDown = tempReturn[fundName].copy()
288 | tempSeDown[tempSeDown > 0] = np.nan
289 | tempSeDown.name = '负收益'
290 | tempDf = pd.concat([tempSeDown, tempSeUp], axis=1)
291 | tempDf.plot(kind='hist', ax=ax2, bins=20)
292 | ax2.grid()
293 | ax2.set_xlabel('周度收益率')
294 | ax2.set_ylabel('频率')
295 | ax2.set_title('周度收益率分布图')
296 | plt.savefig(resultPath + '周度收益率分布图.png')
297 |
298 | tempSe = tempReturn[fundName].copy()
299 | lossRate = len(tempSe[tempSe < 0]) / len(tempSe)
300 | successRate = 1 - lossRate
301 | tempPieSe = pd.Series([lossRate, successRate], index=['负交易周', '正交易周'], name='')
302 | plt.style.use('ggplot')
303 | fig3 = plt.figure(figsize=(16, 9))
304 | ax3 = fig3.add_subplot(111)
305 | ax3.set_title('周度交易胜负情况')
306 | tempPieSe.plot(kind='pie', autopct='%.2f%%', ax=ax3)
307 | plt.savefig(resultPath + '周度盈亏状况饼形图.png')
308 |
309 | def plotMonthNetValueFigure(self, monthFundPlotDf, resultPath, fundName):
310 | # fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(monthFundPlotDf)
311 | tempReturn = (monthFundPlotDf - monthFundPlotDf.shift(1)) / monthFundPlotDf.shift(1)
312 | tempReturn.fillna(0, inplace=True)
313 | plt.style.use('ggplot')
314 | fig1 = plt.figure(figsize=(16, 9))
315 | ax1 = fig1.add_subplot(111)
316 | tempReturn.plot(kind='bar', ax=ax1)
317 | ax1.grid()
318 | ax1.set_xlabel('时间')
319 | ax1.set_ylabel('收益率')
320 | ax1.set_title('月度收益率表现')
321 | plt.savefig(resultPath + '月度收益率表现图.png')
322 | # plt.show()
323 |
324 | def calcMonteCarlo(self,initValue, tradeDayList, mu, sigma,calcTimes=10000):
325 | '''
326 | 蒙特卡洛算法
327 | :param initValue: 起始值
328 | :param days: 持有期
329 | :param mu: 收益率均值
330 | :param sigma: 收益率标准差
331 | :return:
332 | '''
333 | days = len(tradeDayList)
334 | dt = 1 / days
335 | dfList=[]
336 | for calcTime in range(calcTimes):
337 | price = np.zeros(days)
338 | price[0] = initValue
339 | # Schok and Drift
340 | shock = np.zeros(days)
341 | drift = np.zeros(days)
342 |
343 | # Run price array for number of days
344 | for x in range(1, days):
345 | # Calculate Schock
346 | shock[x] = np.random.normal(loc=mu*dt, scale=sigma * np.sqrt(dt))
347 | # Calculate Drift
348 | drift[x] = mu * dt
349 | # Calculate Price
350 | price[x] = price[x - 1] + (price[x - 1] * (drift[x] + shock[x]))
351 | dfList.append(pd.Series(price,index=tradeDayList))
352 | resultDf = pd.concat(dfList,axis=1)
353 | return resultDf
354 |
355 | def getMentoCaloForecast(self,fundPlotDf, resultPath, tradeDayList,fundName):
356 | fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(fundPlotDf)
357 | tempReturn = (fundPlotFormatDf - fundPlotFormatDf.shift(1)) / fundPlotFormatDf.shift(1)
358 | tempReturn.fillna(0, inplace=True)
359 | mu = tempReturn[fundName].mean()
360 | sigma = tempReturn[fundName].std()
361 | initValue = fundPlotDf.iloc[-1][fundName]
362 |
363 | resultDf = self.calcMonteCarlo(initValue=initValue,tradeDayList=tradeDayList,mu=mu,sigma=sigma)
364 | dicResult = {}
365 | dicResult['悲观'] = resultDf.quantile(0.25,axis=1)
366 | dicResult['中性'] = resultDf.quantile(0.5, axis=1)
367 | dicResult['乐观'] = resultDf.quantile(0.75, axis=1)
368 | forcastDf = pd.DataFrame(dicResult)
369 |
370 | dfDic = {}
371 | for colName in forcastDf.columns:
372 | tempInitSe = forcastDf[colName].copy()
373 | tempDf = pd.concat([fundPlotDf[fundName],tempInitSe],axis=0)
374 | tempDf = tempDf.drop_duplicates().sort_index()
375 | dfDic[colName] = tempDf
376 | dfDic[fundName] = fundPlotDf[fundName]
377 | resultFinalDf = pd.DataFrame(dfDic)
378 | resultFinalDf = self.DateFormatDfDemo.getStrToDate(resultFinalDf)
379 |
380 | plt.style.use('ggplot')
381 | fig1 = plt.figure(figsize=(16, 9))
382 | ax1 = fig1.add_subplot(111)
383 | resultFinalDf.plot(ax=ax1)
384 | ax1.grid()
385 | ax1.set_xlabel('时间')
386 | ax1.set_ylabel('收益率')
387 | ax1.set_title('模拟净值走势图')
388 | plt.savefig(resultPath + '模拟净值走势图.png')
389 |
390 | forcastLast = resultDf.iloc[-1]
391 | plt.style.use('ggplot')
392 | fig2 = plt.figure(figsize=(16, 9))
393 | ax2 = fig2.add_subplot(111)
394 | forcastLast.plot(ax=ax2,kind='hist',bins=50)
395 | ax2.grid()
396 | ax2.set_xlabel('时间')
397 | ax2.set_ylabel('频率')
398 | ax2.set_title('10000次模拟的三个月后净值变动频数分布图')
399 | plt.savefig(resultPath + '净值变动频数分布图.png')
400 | # plt.show()
401 |
402 | forcastRate = forcastLast/initValue-1
403 | condition1 = len(forcastRate[forcastRate<=-0.01])/len(forcastRate)
404 | condition2 = len(forcastRate[(forcastRate <= -0.005)&(-0.01 0.01]) / len(forcastRate)
409 | indexList = ['<-1.0%','-1.0%~-0.5%','-0.5%~-0.0%','0.0%~0.5%','0.5%~1.0%','>1.0%']
410 | rateProSe = pd.Series([condition1,condition2,condition3,condition4,condition5,condition6],index=indexList,name='概率')
411 | rateProSe.index.name='收益率区间'
412 | rateProSe.to_excel(resultPath + '预测收益率概率表.xlsx')
413 |
414 | if __name__ == '__main__':
415 | CalcRiskReturnDemo = CalcRiskReturn()
416 | CalcRiskReturn.calcRiskReturn()
417 |
--------------------------------------------------------------------------------
/DataToMySql/DataDic.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 | '''
5 | 估值表数据字典,用于解析不同估值表格式后,对应到本地mysql数据库的字段
6 | '''
7 |
8 | class DataDic:
9 | def __init__(self):
10 | pass
11 |
12 | def getDataDic(self,nameList=[]):
13 | dicResult = {}
14 | dicTotalResult = self.getTotalDataDic()
15 | for nameStr in nameList:
16 | dicResult[nameStr] = dicTotalResult[nameStr]
17 | return dicResult
18 |
19 | def getTotalDataDic(self):
20 | dicResult = {}
21 | dicResult['基金代码'] = 'fund_code'
22 | dicResult['基金名称'] = 'fund_name'
23 | dicResult['单位净值']='net_value'
24 | dicResult['基金单位净值'] = 'net_value'
25 |
26 | dicResult['昨日单位净值'] = 'pre_net_value'
27 | dicResult['累计单位净值'] = 'acc_net_value'
28 | dicResult['累计净值'] = 'acc_net_value'
29 | dicResult['日净值增长率'] = 'rate_net_value'
30 | dicResult['净值日增长率(比)'] = 'rate_net_value'
31 |
32 | dicResult['数据日期'] = 'update_time'
33 |
34 | dicResult['科目代码'] = 'style_code'
35 | dicResult['科目名称'] = 'style_name'
36 | dicResult['成本'] = 'cost'
37 | dicResult['成本占比'] = 'cost_rate'
38 | dicResult['成本占净值比'] = 'cost_rate'
39 |
40 |
41 | dicResult['市值'] = 'market_value'
42 | dicResult['市值占比'] = 'market_value_rate'
43 | dicResult['市值占净值比'] = 'market_value_rate'
44 |
45 |
46 | dicResult['数量'] = 'quantity'
47 | dicResult['单位成本'] = 'unit_cost'
48 | dicResult['行情'] = 'close_price'
49 | dicResult['市价'] = 'close_price'
50 | dicResult['估值增值'] = 'estimate_change'
51 | dicResult['停牌信息'] = 'trade_flag'
52 |
53 | return dicResult
54 |
55 | if __name__=='__main__':
56 | DataDicDemo = DataDic()
57 | DataDic.getTotalDataDic()
--------------------------------------------------------------------------------
/DataToMySql/GetDataToMysql.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | '''
6 | 获取数据(excel,wind)存入本地相应数据库表中,
7 | 每张表构建索性,数据存在时,更新,不存在时,插入
8 | '''
9 | import pandas as pd
10 | from DataToMySql.MysqlCon import MysqlCon
11 | import numpy as np
12 |
13 | class GetDataToMysql:
14 | def __init__(self):
15 | self.conn = MysqlCon().getMysqlCon(flag='connect')
16 |
17 | def GetMain(self,dataDf,tableName):
18 | # 插入数据语句
19 | tableList = dataDf.columns.tolist()
20 | strFormat='%s,'*len(tableList)
21 | sqlStr = "replace into %s(%s)"%(tableName,','.join(tableList))+"VALUES(%s)"%strFormat[:-1]
22 |
23 | cursor = self.conn.cursor()
24 | # dataDf.replace(np.nan,None)
25 | try:
26 | dataDf[dataDf.isnull()] = None
27 | except:
28 | a=0
29 | for r in range(0, len(dataDf)):
30 | values = tuple(dataDf.ix[r, tableList].tolist())
31 | try:
32 | cursor.execute(sqlStr, values)
33 | except:
34 | a=0
35 | cursor.close()
36 | self.conn.commit()
37 |
38 |
39 | if __name__=="__main__":
40 | GetDataToMysqlDemo = GetDataToMysql()
41 | GetDataToMysqlDemo.GetMain()
--------------------------------------------------------------------------------
/DataToMySql/MysqlCon.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 | from configparser import ConfigParser
5 | import pymysql
6 | from sqlalchemy import create_engine
7 | import numpy as np
8 | import os
9 |
10 | class MysqlCon:
11 | def __init__(self):
12 | pass
13 |
14 | def getMysqlCon(self,flag='connect'):
15 | ConfigParserDemo = ConfigParser()
16 | try:
17 | ConfigParserDemo.read('mysql.conf')
18 | db_port = ConfigParserDemo.getint('db', 'db_port')
19 | except:
20 | nextPath = os.getcwd()+r'\\DataToMySql\\'
21 | ConfigParserDemo.read(nextPath+'mysql.conf')
22 | db_port = ConfigParserDemo.getint('db', 'db_port')
23 | db_user = ConfigParserDemo.get('db', 'db_user')
24 | db_pass = ConfigParserDemo.get('db', 'db_pass')
25 | db_host = ConfigParserDemo.get('db', 'db_host')
26 | db_database = ConfigParserDemo.get('db', 'db_database')
27 | pymysql.converters.encoders[np.float64] = pymysql.converters.escape_float
28 | pymysql.converters.conversions = pymysql.converters.encoders.copy()
29 | pymysql.converters.conversions.update(pymysql.converters.decoders)
30 | if flag=='connect':
31 | engine = pymysql.connect(host=db_host, user=db_user, passwd=db_pass, db=db_database, port=db_port)
32 |
33 | elif flag=='engine':
34 | sqlConStr = "mysql+pymysql://%s:%s@%s:%s/%s?charset=utf8"%(db_user,db_pass,db_host,db_port,db_database)
35 | engine = create_engine(sqlConStr)
36 | return engine
37 |
38 | if __name__=='__main__':
39 | MysqlConDemo = MysqlCon()
40 | MysqlConDemo.getMysqlCon()
--------------------------------------------------------------------------------
/DataToMySql/mysql.conf:
--------------------------------------------------------------------------------
1 | [db]
2 | db_port=3306
3 | db_user = root
4 | db_pass = 123456
5 | db_host = localhost
6 | db_database = fund_est
--------------------------------------------------------------------------------
/DateFormatDf.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | '''
6 | 对时间序列的DataFrame格式转换
7 | '''
8 |
9 | import pandas as pd
10 | import numpy as np
11 | from datetime import datetime
12 |
13 | class DateFormatDf:
14 | def __init__(self):
15 | pass
16 |
17 | def getStrToDate(self,tempDf,flag=1):
18 | '''
19 | flag=1:'2019-01-01'
20 | flag=2:'20190101'
21 | :param tempDf:
22 | :param flag:
23 | :return:
24 | '''
25 | dateList = tempDf.index.tolist()
26 | if flag==1:
27 | dateNewList = [datetime.strptime(dateStr,"%Y-%m-%d") for dateStr in dateList]
28 | elif flag==2:
29 | dateTempList = [dateStr[:4]+'-'+dateStr[4:6]+'-'+dateStr[6:] for dateStr in dateList]
30 | dateNewList = [datetime.strptime(dateStr, "%Y-%m-%d") for dateStr in dateTempList]
31 | resultDf = pd.DataFrame(tempDf.values,index=dateNewList,columns=tempDf.columns)
32 | return resultDf
33 |
34 | if __name__=='__main__':
35 | DateFormatDfDemo = DateFormatDf()
36 | DateFormatDfDemo.getStrToDate()
--------------------------------------------------------------------------------
/EstimateValue.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | import pandas as pd
6 | import numpy as np
7 | from CalcRiskReturn import CalcRiskReturn
8 | import os
9 | from datetime import datetime, timedelta
10 | from CalcRegression import CalcRegression
11 | from FamaFrenchRegression import FamaFrenchRegression
12 | from JudgeText import JudgeText
13 | import mylog as mylog
14 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
15 |
16 | import warnings
17 |
18 | warnings.filterwarnings('ignore')
19 |
20 |
21 | class EstimateValue:
22 | def __init__(self, dicParam):
23 | self.fundCode = dicParam['fundCode']
24 | self.netValuePeriod = dicParam.get('netValuePeriod', '')
25 | self.startDate = dicParam.get('startDate', '2014-06-17')
26 | endDate = datetime.today().strftime("%Y-%m-%d")
27 | self.endDate = dicParam.get('endDate', endDate)
28 | self.indexNameDic = {'000300.SH': '沪深300', '000852.SH': '中证1000', }
29 | # self.indexNameDic = {'000300.SH': '沪深300',}
30 | self.totalIndexName = list(self.indexNameDic.values())
31 | self.logger = mylog.set_log()
32 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
33 | self.DCIndex = dicParam['DCIndex']
34 |
35 | def getNetValueDataDic(self,fundName):
36 | # indexCodeList = ['000300.SH', '000016.SH', '000905.SH', '000906.SH']
37 | indexCodeList = ['000300.SH']
38 | dicResult = {}
39 | self.logger.info("获取基金净值数据...")
40 | netValuedf = self.GetDataTotalMainDemo.get_hq_data(code=self.fundCode, start_date=self.startDate,
41 | end_date=self.endDate, code_style='fund',
42 | name_list=['net_value_adj'])
43 | self.logger.info("基金净值数据获取成功!")
44 | # self.fundName = netValuedf['fund_name'].unique()[0]
45 | self.fundName = fundName
46 | dicResult['fundName'] = fundName
47 | dicResult['netValuedf'] = netValuedf
48 |
49 | self.logger.info("获取大盘指数数据...")
50 | startDate = netValuedf.index.tolist()[0]
51 | endDate = netValuedf.index.tolist()[-1]
52 |
53 | dfIndexList = []
54 | dfVolumeList = []
55 | for indexCode in indexCodeList:
56 | indexDf = self.GetDataTotalMainDemo.get_hq_data(code=indexCode,start_date=startDate,end_date=endDate)
57 | indexDf.rename(columns={'close_price': indexCode}, inplace=True)
58 | dfIndexList.append(indexDf)
59 |
60 | indexDf = self.GetDataTotalMainDemo.get_hq_data(code=indexCode, start_date=startDate,
61 | end_date=endDate, name_list=['volume'])
62 | indexDf.rename(columns={'volume': indexCode}, inplace=True)
63 | dfVolumeList.append(indexDf)
64 |
65 | dicResult['DCIndexDf'] = pd.DataFrame()
66 | if self.DCIndex:
67 | DCIndexDf = self.GetDataTotalMainDemo.get_hq_data(code=self.DCIndex,start_date=startDate,end_date=endDate)
68 | DCIndexDf.rename(columns={'close_price': self.DCIndex}, inplace=True)
69 | dicResult['DCIndexDf'] = DCIndexDf
70 |
71 | self.logger.info("获取大盘指数数据成功!")
72 | totalIndexDf = pd.concat(dfIndexList, axis=1)
73 | totalVolumeDf = pd.concat(dfVolumeList, axis=1)
74 | dicResult['indexDf'] = totalIndexDf
75 | dicResult['totalVolumeDf'] = totalVolumeDf
76 |
77 | # 行业指数
78 | industryList = ['801210.SI', '801050.SI', '801140.SI', '801020.SI', '801170.SI', '801030.SI', '801150.SI',
79 | '801010.SI', '801200.SI', '801230.SI', '801770.SI', '801730.SI', \
80 | '801130.SI', '801880.SI', '801180.SI', '801160.SI', '801780.SI', '801890.SI', '801080.SI',
81 | '801760.SI', '801790.SI', '801710.SI', '801740.SI', '801720.SI', \
82 | '801750.SI', '801110.SI', '801040.SI', '801120.SI']
83 | industryLabel = ['休闲服务', '有色金属', '轻工制造', '采掘', '交通运输', '化工', '医药生物', '农林牧渔', '商业贸易', '综合', '通信', '电气设备', '纺织服装',
84 | '汽车', '房地产', '公用事业', \
85 | '银行', '机械设备', '电子', '传媒', '非银金融', '建筑材料', '国防军工', '建筑装饰', '计算机', '家用电器', '钢铁', '食品饮料']
86 | industryDic = {industryCode: industryName for industryCode, industryName in zip(industryList, industryLabel)}
87 | dfIndestryList = []
88 | self.logger.info("获取申万一级行业指数数据...")
89 | for indexCode in industryList:
90 | industryDf = self.GetDataTotalMainDemo.get_hq_data(code=indexCode, start_date=startDate,
91 | end_date=endDate)
92 | industryDf.rename(columns={'close_price': indexCode}, inplace=True)
93 | dfIndestryList.append(industryDf)
94 |
95 | totalIndustryDf = pd.concat(dfIndestryList, axis=1)
96 | dicResult['totalIndustryDf'] = totalIndustryDf
97 | dicResult['industryDic'] = industryDic
98 | self.logger.info("获取申万一级行业指数数据成功!")
99 |
100 | # 风格指数
101 | styleList = ['801863.SI', '801822.SI', '801813.SI', '801831.SI', '801812.SI', '801821.SI', '801852.SI',
102 | '801842.SI', '801843.SI', '801832.SI', '801851.SI', \
103 | '801853.SI', '801841.SI', '801833.SI', '801823.SI', '801811.SI']
104 | styleLabel = ['新股指数', '中市盈率指数', '小盘指数', '高市净率指数', '中盘指数', '高市盈率指数', '微利股指数', '中价股指数', '低价股指数', '中市净率指数',
105 | '亏损股指数', '绩优股指数', '高价股指数', '低市净率指数', '低市盈率指数', '大盘指数']
106 | styleDic = {sylteCode: styleName for sylteCode, styleName in zip(styleList, styleLabel)}
107 | dfStyleList = []
108 | self.logger.info("获取风格指数数据...")
109 | for indexCode in styleList:
110 | styleDf = self.GetDataTotalMainDemo.get_hq_data(code=indexCode, start_date=startDate,
111 | end_date=endDate)
112 | styleDf.rename(columns={'close_price': indexCode}, inplace=True)
113 | dfStyleList.append(styleDf)
114 | totalStyleDf = pd.concat(dfStyleList, axis=1)
115 | dicResult['totalStyleDf'] = totalStyleDf
116 | dicResult['styleDic'] = styleDic
117 | self.logger.info("获取风格指数数据成功")
118 | return dicResult
119 |
120 | def getRiskFree(self):
121 | if self.netValuePeriod == 'W':
122 | riskFree = 0.02 / 52
123 | else:
124 | riskFree = 0.02 / 250
125 | return riskFree
126 |
127 | def calcAndPlotSaveRiskReturn(self, dicNetValueResult, resultPath):
128 | '''
129 | 计算并保存指定周期的风险收益指标
130 | 绘图
131 | :param dicNetValueResult:
132 | :return:
133 | '''
134 | fundIndexDf = pd.concat([dicNetValueResult['netValuedf']['net_value_adj'], dicNetValueResult['indexDf']],
135 | axis=1, join='inner')
136 | fundIndexDf.rename(columns={'net_value_adj': dicNetValueResult['fundName']}, inplace=True)
137 | fundPlotDf = fundIndexDf.rename(columns=self.indexNameDic)
138 |
139 | CalcRiskReturnDemo = CalcRiskReturn()
140 | self.logger.info("计算日频数据相关结论...")
141 | CalcRiskReturnDemo.calcRiskReturn(fundPlotDf, resultPath)
142 | marketVolume = dicNetValueResult['totalVolumeDf']
143 | CalcRiskReturnDemo.plotDayNetValueFigure(fundPlotDf, resultPath, fundName=self.fundName,
144 | netPeriod=self.netValuePeriod, marketVolume=marketVolume)
145 |
146 | startDate = fundPlotDf.index.tolist()[-1]
147 | startDate = datetime.strptime(startDate, "%Y-%m-%d")
148 | endDate = startDate + timedelta(days=31 * 3)
149 |
150 | tradeDayList = self.GetDataTotalMainDemo.get_tradeday(start_date=startDate,end_date=endDate,period=self.netValuePeriod)
151 | CalcRiskReturnDemo.getMentoCaloForecast(fundPlotDf, resultPath, tradeDayList, fundName=self.fundName)
152 |
153 | self.logger.info("计算周频数据相关结论...")
154 | tradeWeekList = self.GetDataTotalMainDemo.get_tradeday(start_date=fundPlotDf.index.tolist()[0], end_date=fundPlotDf.index.tolist()[-1],
155 | period='W')
156 | weekFundPlotDf = fundPlotDf.loc[tradeWeekList].dropna(axis=0)
157 | CalcRiskReturnDemo.plotWeekNetValueFigure(weekFundPlotDf, resultPath, fundName=self.fundName)
158 | CalcRiskReturnDemo.calcWeekNetValueResult(weekFundPlotDf, resultPath, fundName=self.fundName)
159 |
160 | self.logger.info("计算月频数据相关结论...")
161 | tradeMonthList = self.GetDataTotalMainDemo.get_tradeday(start_date=fundPlotDf.index.tolist()[0], end_date=fundPlotDf.index.tolist()[-1],
162 | period='M')
163 | monthFundPlotDf = fundPlotDf.loc[tradeMonthList].dropna(axis=0)
164 | CalcRiskReturnDemo.plotMonthNetValueFigure(monthFundPlotDf, resultPath, fundName=self.fundName)
165 |
166 | targetDf = fundPlotDf.copy()
167 | targetDf['无风险利率'] = self.getRiskFree()
168 | CalcRegressionDemo = CalcRegression()
169 | self.logger.info("计算选股,择时能力相关结论...")
170 |
171 |
172 | CalcRegressionDemo.getSelectStockAndTime(targetDf, resultPath, fundName=self.fundName,
173 | netPeriod=self.netValuePeriod, benchMark=list(self.indexNameDic.values())[0],DCIndexDf=dicNetValueResult['DCIndexDf'])
174 |
175 | self.logger.info("计算行业,风格回归相关结论...")
176 | fundIndustryDf = pd.concat(
177 | [dicNetValueResult['netValuedf']['net_value_adj'], dicNetValueResult['totalIndustryDf']],
178 | axis=1, join='inner')
179 | fundIndustryDf.rename(columns={'net_value_adj': dicNetValueResult['fundName']}, inplace=True)
180 | fundIndustryDf['无风险利率'] = self.getRiskFree()
181 | CalcRegressionDemo.getIndustryRegression(fundIndustryDf, resultPath, fundName=self.fundName,
182 | industryDic=dicNetValueResult['industryDic'],bench_return=dicNetValueResult['indexDf'])
183 |
184 | fundIndustryDf = pd.concat(
185 | [dicNetValueResult['netValuedf']['net_value_adj'], dicNetValueResult['totalStyleDf']],
186 | axis=1, join='inner')
187 | fundIndustryDf.rename(columns={'net_value_adj': dicNetValueResult['fundName']}, inplace=True)
188 | fundIndustryDf['无风险利率'] = self.getRiskFree()
189 | CalcRegressionDemo.getStyleRegression(fundIndustryDf, resultPath, fundName=self.fundName,
190 | industryDic=dicNetValueResult['styleDic'],DCIndexDf=dicNetValueResult['DCIndexDf'])
191 |
192 | def getSavePath(self):
193 | '''
194 | 获取保存产品分析结果的路径
195 | :return:
196 | '''
197 | totalFileList = os.listdir(os.getcwd() + r"\\分析结果\\")
198 | if self.fundName not in totalFileList:
199 | os.mkdir(path=os.getcwd() + r"\\分析结果\\%s\\" % self.fundName)
200 | resultPath = os.getcwd() + r"\\分析结果\\%s\\" % self.fundName
201 | return resultPath
202 |
203 | def getMain(self,fundName='缺基金名称'):
204 | dicNetValueResult = self.getNetValueDataDic(fundName=fundName) # 获取产品净值数据和指数数据
205 | resultPath = self.getSavePath() # 创建分析结果保存文件路径
206 | #
207 | # FamaFrenchRegressionDemo = FamaFrenchRegression()
208 | # FamaFrenchRegressionDemo.calcResult(resultPath,dicNetValueResult['totalIndustryDf'],dicNetValueResult['industryDic'])
209 | # FamaFrenchRegressionDemo.calcMain(closePriceSe=dicNetValueResult['netValuedf']['net_value_adj'],resultPath=resultPath)
210 |
211 | self.calcAndPlotSaveRiskReturn(dicNetValueResult, resultPath) # 净值类统计结果,按统计周期分析与保存
212 | JudgeTextDemo = JudgeText()
213 | JudgeTextDemo.getNetJudgeText(fundCode=self.fundCode, fundName=self.fundName,
214 | totalIndexName=self.totalIndexName)
215 | self.logger.info("计算完成!")
216 |
217 |
218 | if __name__ == '__main__':
219 | # 乐道S60034 宽远S35529,000409.OF
220 |
221 | nameDic = {'费曼一号(增强IE500)': 'SEP131', '华量锐天1号(T0对冲IE500)': 'SW7742', '阿尔法对冲': 'SK7720'}
222 | # codeList = ['SS2221', 'SY3702']
223 | codeList = ['519062.OF']
224 |
225 | for fundcode in codeList:
226 | print(fundcode)
227 | dicParam = {}
228 | dicParam['fundCode'] = fundcode # 基金代码
229 | dicParam['netValuePeriod'] = 'D' # 净值披露频率
230 | dicParam['startDate'] = '2014-11-30'
231 | dicParam['DCIndex'] = '' #对冲类产品,默认为空;非空时为对冲的指数代码
232 |
233 | EstimateValueDemo = EstimateValue(dicParam=dicParam)
234 | EstimateValueDemo.getMain(fundName='海富通阿尔法对冲')
235 |
--------------------------------------------------------------------------------
/FamaFrenchRegression.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 | '''
5 | fama french 三因素回归分析
6 | '''
7 |
8 | import pandas as pd
9 | import numpy as np
10 | from GetDataFromWindAndMySql import GetDataFromWindAndMySql
11 | from PrintInfo import PrintInfo
12 | import time
13 |
14 | class FamaFrenchRegression:
15 | def __init__(self):
16 | self.GetDataFromWindAndMySqlDemo = GetDataFromWindAndMySql()
17 | self.PrintInfoDemo = PrintInfo()
18 |
19 | def getFacrotReturn(self,resultPath,dateList, indexCode):
20 | totalCodeSet = set({})
21 | dicTempResult = {}
22 | self.PrintInfoDemo.PrintLog("获取宽基指数成分股,并计算产品起止日期内规模因子,账面市值因子的收益" )
23 | self.PrintInfoDemo.PrintLog("为减少接口频繁请求成分股数据导致掉线,这里每次调用后采用睡眠函数,间隔0.2秒..")
24 | for dateStr in dateList:
25 | universeDf = self.GetDataFromWindAndMySqlDemo.getIndexConstituent(indexCode=indexCode, getDate=dateStr)
26 | totalCodeSet = totalCodeSet.union(universeDf['stock_code'].to_dict().values())
27 | tempStockDf = self.GetDataFromWindAndMySqlDemo.getCurrentDateData(
28 | tempCodeList=universeDf['stock_code'].tolist(), getDate=dateStr, tableFlag='stock',
29 | nameList=['close_price', 'market_value', 'pb_lf'])
30 | if tempStockDf.empty:
31 | continue
32 |
33 | dicTempResult[dateStr] = {}
34 | ME30 = np.percentile(tempStockDf['market_value'], 30)
35 | ME70 = np.percentile(tempStockDf['market_value'], 70)
36 | SM = tempStockDf[tempStockDf['market_value'] <= ME30].index.tolist()
37 | BM = tempStockDf[tempStockDf['market_value'] > ME70].index.tolist()
38 |
39 | BP = tempStockDf[tempStockDf > 0].dropna()
40 | BP[['pb_lf']] = 1 / BP[['pb_lf']]
41 | BP30 = np.percentile(BP['pb_lf'], 30)
42 | BP70 = np.percentile(BP['pb_lf'], 70)
43 | LP = BP[BP['pb_lf'] <= BP30].index.tolist()
44 | HP = BP[BP['pb_lf'] > BP70].index.tolist()
45 | dicTempResult[dateStr]['SM'] = SM
46 | dicTempResult[dateStr]['BM'] = BM
47 | dicTempResult[dateStr]['LP'] = LP
48 | dicTempResult[dateStr]['HP'] = HP
49 | time.sleep(0.2)
50 | self.PrintInfoDemo.PrintLog("产品起止日期内规模因子,账面市值因子的收益计算完成")
51 | self.PrintInfoDemo.PrintLog("批量获取产品起止日期内的所有成分股行情数据...")
52 | totalStockCloseDf = self.GetDataFromWindAndMySqlDemo.getCurrentNameData(tempCodeList=list(totalCodeSet),
53 | startDate=dateList[0],
54 | endDate=dateList[-1], tableFlag='stock',
55 | nameStr='close_price')
56 |
57 | self.PrintInfoDemo.PrintLog("产品起止日期内的所有成分股行情数据获取完成!")
58 | dateSort = sorted(dicTempResult.items(), key=lambda x: x[0], reverse=False)
59 | dicResult = {}
60 | for num in range(1, len(dateSort)):
61 | dateStr = dateSort[num][0]
62 | preDateStr = dateSort[num - 1][0]
63 | dicCodeList = dateSort[num][1]
64 | dicResult[dateStr] = {}
65 | SMReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['SM']] - totalStockCloseDf.ix[preDateStr, dicCodeList['SM']]) / \
66 | totalStockCloseDf.ix[preDateStr, dicCodeList['SM']]
67 | SMMeanReturn = SMReturn.mean()
68 |
69 | BMReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['BM']] - totalStockCloseDf.ix[
70 | preDateStr, dicCodeList['BM']]) / \
71 | totalStockCloseDf.ix[preDateStr, dicCodeList['BM']]
72 | BMMeanReturn = BMReturn.mean()
73 |
74 | LPReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['LP']] - totalStockCloseDf.ix[
75 | preDateStr, dicCodeList['LP']]) / \
76 | totalStockCloseDf.ix[preDateStr, dicCodeList['LP']]
77 | LPMeanReturn = LPReturn.mean()
78 |
79 | HPReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['HP']] - totalStockCloseDf.ix[
80 | preDateStr, dicCodeList['HP']]) / \
81 | totalStockCloseDf.ix[preDateStr, dicCodeList['HP']]
82 | HPMeanReturn = HPReturn.mean()
83 | dicResult[dateStr]['SMB'] = SMMeanReturn-BMMeanReturn
84 | dicResult[dateStr]['HML'] = LPMeanReturn - HPMeanReturn
85 |
86 | resultDf = pd.DataFrame(dicResult).T
87 | resultDf.to_excel(resultPath+'规模因子账面市值因子(%s成分股).xlsx'%indexCode)
88 | self.PrintInfoDemo.PrintLog("产品起止日期内的SMB,HML收益率计算完成,存入本地!")
89 | return resultDf
90 |
91 | def calcMain(self, closePriceSe, resultPath,indexCode='000016.SH',):
92 | self.PrintInfoDemo.PrintLog("开始计算fama-french三因子模型,采用的宽基指数为%s"%indexCode)
93 | tempReturn = (closePriceSe - closePriceSe.shift(1)) / closePriceSe.shift(1)
94 | tempReturn.name = closePriceSe.name
95 | dateList = tempReturn.index.tolist()
96 | factorReturnDf = self.getFacrotReturn(resultPath,dateList=dateList, indexCode=indexCode)
97 | calcRusultDf = pd.concat([factorReturnDf,tempReturn],axis=1,join='inner')
98 | calcRusultDf.to_excel(resultPath+'三因子样本数据.xlsx')
99 |
100 |
101 | if __name__ == '__main__':
102 | FamaFrenchRegressionDemo = FamaFrenchRegression()
103 | FamaFrenchRegressionDemo.calcMain()
104 |
--------------------------------------------------------------------------------
/GetAndSaveWindData/ClientWindIfindInit.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | '''
6 | wind/ifind 账号登录初始化
7 | '''
8 |
9 | import mylog as mylog
10 | from WindPy import *
11 | import pandas as pd
12 | from GetAndSaveWindData.MysqlCon import MysqlCon
13 | from iFinDPy import *
14 |
15 | class ClientWindIfindInit:
16 | def __init__(self,data_source='ifind'):
17 | self.logger = mylog.logger
18 |
19 | def log_init(self, data_resource='ifind'):
20 | '''
21 | 登录客户端初始化
22 | :param data_resource:
23 | :return:
24 | '''
25 | flag = True
26 | if data_resource == 'ifind':
27 | log_state = THS_iFinDLogin('zszq5072', '754628')
28 | if log_state == 0:
29 | self.logger.info("同花顺账号登录成功!")
30 | else:
31 | self.logger.error("同花顺账号登录异常,请检查!")
32 | flag = False
33 | elif data_resource == 'wind':
34 | w.start()
35 | return flag
--------------------------------------------------------------------------------
/GetAndSaveWindData/GetDataFromWindNNotMysql.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 获取wind数据,不保存mysql,部分非常规的书
5 | '''
6 |
7 | # from WindPy import w
8 | import pandas as pd
9 | from iFinDPy import *
10 | import mylog as mylog
11 | import WindPy as Wind
12 |
13 |
14 | class GetDataFromWindNotMysql:
15 | def __init__(self, data_resource='ifind'):
16 | self.logger = mylog.set_log()
17 | self.dic_init = {}
18 | self.dic_init['data_resource'] = data_resource
19 | self.dic_init['data_init_flag'] = self.log_init(data_resource)
20 |
21 | def log_init(self, data_resource='ifind'):
22 | '''
23 | 登录客户端初始化
24 | :param data_resource:
25 | :return:
26 | '''
27 | flag = True
28 | if data_resource == 'ifind':
29 | log_state = THS_iFinDLogin('zszq5072', '754628')
30 | if log_state == 0:
31 | self.logger.info("同花顺账号登录成功!")
32 | else:
33 | self.logger.error("同花顺账号登录异常,请检查!")
34 | flag = False
35 | elif data_resource == 'wind':
36 | try:
37 | Wind.w.start()
38 | except:
39 | self.logger.info("wind启动失败")
40 | flag = False
41 | return flag
42 |
43 | def get_fund_stock_filed(self, start_date, end_date, fund_code=''):
44 | fund_df = pd.DataFrame()
45 | fileds = ['prt_topstockname', 'prt_topstockcode', 'prt_topstockvalue', 'prt_heavilyheldstocktostock',
46 | 'prt_heavilyheldstocktonav']
47 | name_Dic = {'prt_topstockname'.upper(): '股票名称', 'prt_topstockcode'.upper(): '股票代码',
48 | 'prt_topstockvalue'.upper(): '持股市值', 'prt_heavilyheldstocktostock'.upper(): '市值占股票投资市值比',
49 | 'prt_heavilyheldstocktonav'.upper(): '市值占基金资产净值比'}
50 | df_list=[]
51 | for order_num in range(1,11):
52 | wsddata = Wind.w.wsd(codes=fund_code, fields=fileds, beginTime=start_date, endTime=end_date,
53 | options="order=%s;unit=1;Period=Q;Days=Alldays"%order_num)
54 | if wsddata.ErrorCode != 0:
55 | self.logger.error("获取重仓股数据有误,错误代码" + str(wsddata.ErrorCode))
56 | continue
57 | temp_fund_df = pd.DataFrame(wsddata.Data, index=wsddata.Fields, columns=wsddata.Times).T
58 | temp_fund_df['重仓排名']= order_num
59 | df_list.append(temp_fund_df)
60 | if df_list:
61 | fund_df = pd.concat(df_list,axis=0,sort=True)
62 | fund_df.rename(columns=name_Dic,inplace=True)
63 | fund_df['披露日期'] = fund_df.index.tolist()
64 | fund_df.dropna(inplace=True)
65 |
66 | indus_list = []
67 | for datestr,temp_df in fund_df.groupby(by='披露日期'):
68 | code_init=list(temp_df['股票代码'].tolist())
69 | code_list = []
70 | for code in code_init:
71 | if code[0]=='6':
72 | codestr=code+'.SH'
73 | elif code[0] in ['0','3']:
74 | codestr = code+'.SZ'
75 | code_list.append(codestr)
76 | # tradeDate = datestr[:4]+datestr[5:7]+datestr[8:10]
77 | tradeDate = datetime.strftime(datestr,"%Y%m%d")
78 | param_list = list(set(code_list))
79 | wssdata = Wind.w.wss(codes=param_list,fields=['industry_citic'],options='tradeDate=%s;industryType=1'%tradeDate)
80 | if wssdata.ErrorCode != 0:
81 | self.logger.error("获取股票所属行业数据有误,错误代码" + str(wssdata.ErrorCode))
82 | continue
83 | # temp_fund_df = pd.DataFrame(wssdata.Data, columns=wssdata.Codes, index=wssdata.Fields).T
84 | temp_se = pd.Series(wssdata.Data[0],index=wssdata.Codes,name='所属行业')
85 | indus_list = indus_list+[temp_se[code] for code in code_list]
86 | fund_df['所属行业'] = indus_list
87 | return fund_df
88 |
89 | def get_fund_filed(self, start_date, end_date, fund_code=''):
90 | '''
91 | 基金季度数据,
92 | 基金份额,基金规模,股票资产占基金净资产比例
93 | '''
94 | # total_date_list = w.tdays(start_date, end_date, "Days=Alldays;Period=Q")
95 | fund_df = pd.DataFrame()
96 | fileds = ['unit_fundshare_total', 'netasset_total', 'prt_stocktonav']
97 | name_Dic = {'unit_fundshare_total'.upper(): '基金份额_万份', 'netasset_total'.upper(): '基金规模',
98 | 'prt_stocktonav'.upper(): '股票市值占基金资产净值比'}
99 | wsddata = Wind.w.wsd(codes=fund_code, fields=fileds, beginTime=start_date, endTime=end_date,
100 | options="unit=1;Period=Q;Days=Alldays")
101 | if wsddata.ErrorCode != 0:
102 | self.logger.error("获取全A股数据有误,错误代码" + str(wsddata.ErrorCode))
103 | return fund_df
104 | fund_df = pd.DataFrame(wsddata.Data, index=wsddata.Fields, columns=wsddata.Times).T
105 | fund_df.rename(columns=name_Dic, inplace=True)
106 | return fund_df
107 |
--------------------------------------------------------------------------------
/GetAndSaveWindData/GetDataToMysql.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | '''
6 | 获取数据(excel,wind)存入本地相应数据库表中,
7 | 每张表构建索性,数据存在时,更新,不存在时,插入
8 | '''
9 | import pandas as pd
10 | import numpy as np
11 | from GetAndSaveWindData.MysqlCon import MysqlCon
12 | import mylog as mylog
13 |
14 | # self.logger.basicConfig(format="%(asctime)s %(filename)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S",
15 | # level=self.logger.DEBUG)
16 |
17 | class GetDataToMysql:
18 | def __init__(self):
19 | self.conn = MysqlCon().getMysqlCon(flag='connect')
20 | self.logger = mylog.set_log()
21 |
22 | def GetMain(self,dataDf,tableName,):
23 | # 插入数据语句
24 | tableList = dataDf.columns.tolist()
25 | strFormat='%s,'*len(tableList)
26 | sqlStr = "replace into %s(%s)"%(tableName,','.join(tableList))+"VALUES(%s)"%strFormat[:-1]
27 |
28 | dataDf = dataDf.astype(object).where(pd.notnull(dataDf), None)
29 | cursor = self.conn.cursor()
30 |
31 | for r in range(0, len(dataDf)):
32 | values = tuple(dataDf.iloc[r][tableList].tolist())
33 | cursor.execute(sqlStr, values)
34 |
35 | cursor.close()
36 | self.conn.commit()
37 | self.logger.info("数据存入mysql成功!")
38 |
39 |
40 | if __name__=="__main__":
41 | GetDataToMysqlDemo = GetDataToMysql()
42 | GetDataToMysqlDemo.GetMain()
--------------------------------------------------------------------------------
/GetAndSaveWindData/GetFundFinanceReportData.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 将wind/ifind的数据导入到本地数据库,并从数据库返回结果
5 | '''
6 |
7 | import pandas as pd
8 | from GetAndSaveWindData.GetDataToMysql import GetDataToMysql
9 | import mylog as mylog
10 | import numpy as np
11 | from datetime import datetime
12 |
13 |
14 |
15 | class GetFundFinanceReportData:
16 | def __init__(self):
17 | self.logger = mylog.set_log()
18 | self.GetDataToMysqlDemo = GetDataToMysql()
19 |
20 | def get_fund_stock_info(self, third_conn, engine, total_date_list, fund_code='100053.OF'):
21 | rpt_date_str_list = []
22 | for rpt_date in total_date_list:
23 | if rpt_date[-5:]=='06-30':
24 | name_str = rpt_date[:4]+'年中报'
25 | else:
26 | name_str = rpt_date[:4]+'年年报'
27 | rpt_date_str_list.append(name_str)
28 | sql_str = "select * from fund_contain_stock_detail where rpt_date in %s and fund_code='%s'" % (
29 | str(tuple(rpt_date_str_list)), fund_code)
30 | result_df = pd.read_sql(sql=sql_str, con=engine)
31 | have_rpt_str_list = result_df['rpt_date'].tolist()
32 | lack_rpt_list = [rpt_date for rpt_date in rpt_date_str_list if rpt_date not in have_rpt_str_list]
33 | name_mysql_dic = {'sec_name': 'fund_name', 'marketvalueofstockholdings': 'market_value_of_stockholdings',
34 | 'proportiontototalstockinvestments': 'pro_total_stock_inve',
35 | 'proportiontonetvalue': 'pro_net_value',
36 | 'proportiontoshareholdtocirculation': 'pro_sharehold_cir'}
37 | if lack_rpt_list:
38 | temp_df_list = []
39 | for lack_rpt in lack_rpt_list:
40 | lack_date = total_date_list[rpt_date_str_list.index(lack_rpt)]
41 | rptdate = ''.join(lack_date.split('-'))
42 | options = "rptdate=%s;windcode=%s" % (rptdate, fund_code)
43 | wset_data = third_conn.wset(tablename="allfundhelddetail", options=options)
44 | if wset_data.ErrorCode != 0:
45 | self.logger.error('wind获取基金持股明细数据错误,错误代码%s,请检查!' % wset_data.ErrorCode)
46 | return pd.DataFrame()
47 | temp_rpt_df = pd.DataFrame(wset_data.Data, index=wset_data.Fields, columns=wset_data.Codes).T
48 | if temp_rpt_df.empty:
49 | continue
50 | temp_rpt_df['fund_code'] = fund_code
51 | temp_rpt_df['record_time'] = datetime.today().strftime("%Y-%m-%d")
52 | temp_rpt_df.rename(columns=name_mysql_dic, inplace=True)
53 | self.GetDataToMysqlDemo.GetMain(temp_rpt_df, 'fund_contain_stock_detail')
54 | self.logger.info("存储%s,报告期%s持股数据成功!" % (fund_code, lack_rpt))
55 | temp_df_list.append(temp_rpt_df)
56 | if temp_df_list:
57 | temp_df = pd.concat(temp_df_list, axis=0, sort=True)
58 | result_df = pd.concat([result_df, temp_df], axis=0, sort=True)
59 | return result_df
60 |
61 | def get_main(self):
62 | pass
63 |
--------------------------------------------------------------------------------
/GetAndSaveWindData/GetindexName.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | import pandas as pd
4 |
5 |
6 | from WindPy import w
7 |
8 | class GetindeName:
9 | def __init__(self):
10 | pass
11 |
12 | def get_data(self):
13 | w.start()
14 | df1 = pd.read_excel("行业指数ETF概况.xlsx",sheet_name='Sheet1',index_col=0)
15 | df2 = pd.read_excel("策略指数ETF概况.xlsx", sheet_name='Sheet1', index_col=0)
16 | df3 = pd.read_excel("主题指数ETF概况.xlsx", sheet_name='Sheet1', index_col=0)
17 | df4 = pd.read_excel("规模指数ETF概况.xlsx", sheet_name='Sheet1', index_col=0)
18 | df5 = pd.read_excel("风格指数ETF概况.xlsx", sheet_name='Sheet1', index_col=0)
19 |
20 | # index_code_list = df1.index.tolist()+df2.index.tolist()+df3.index.tolist()+df4.index.tolist()+df5.index.tolist()
21 | name_list = ['行业指数ETF概况','策略指数ETF概况','主题指数ETF概况','规模指数ETF概况','风格指数ETF概况']
22 | df_list = [df1,df2,df3,df4,df5]
23 | for name in name_list:
24 | df = df_list[name_list.index(name)]
25 | aa = w.wss(df.index.tolist(), "sec_name")
26 | tempdf1 = pd.DataFrame(aa.Data, columns=aa.Codes, index=aa.Fields).T
27 | result = pd.concat([df1,tempdf1],axis=1,sort=True)
28 | result.to_excel("%s.xlsx"%name)
29 | break
30 |
31 | if __name__=='__main__':
32 | GetindeNameDemo = GetindeName()
33 | GetindeNameDemo.get_data()
--------------------------------------------------------------------------------
/GetAndSaveWindData/MysqlCon.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | import pymysql
6 | from sqlalchemy import create_engine
7 | import numpy as np
8 |
9 | class MysqlCon:
10 | def __init__(self):
11 | pass
12 |
13 | def getMysqlCon(self,flag='connect'):
14 | db_port = 3306
15 | db_user = 'root'
16 | db_pass = '123456'
17 | db_host = 'localhost'
18 | db_database = 'fund_est'
19 |
20 | pymysql.converters.encoders[np.float64] = pymysql.converters.escape_float
21 | pymysql.converters.conversions = pymysql.converters.encoders.copy()
22 | pymysql.converters.conversions.update(pymysql.converters.decoders)
23 | if flag=='connect':
24 | engine = pymysql.connect(host=db_host, user=db_user, passwd=db_pass, db=db_database, port=db_port)
25 |
26 | elif flag=='engine':
27 | sqlConStr = "mysql+pymysql://%s:%s@%s:%s/%s?charset=utf8"%(db_user,db_pass,db_host,db_port,db_database)
28 | engine = create_engine(sqlConStr)
29 | return engine
30 |
31 | if __name__=='__main__':
32 | MysqlConDemo = MysqlCon()
33 | MysqlConDemo.getMysqlCon()
--------------------------------------------------------------------------------
/GetDataFromWindAndMySql.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 将wind的数据导入到本地数据库,并从数据库返回结果
5 | '''
6 |
7 | from WindPy import w
8 | import pandas as pd
9 | from DataToMySql.MysqlCon import MysqlCon
10 | from DataToMySql.GetDataToMysql import GetDataToMysql
11 | from PrintInfo import PrintInfo
12 | w.start()
13 |
14 |
15 | class GetDataFromWindAndMySql:
16 | def __init__(self):
17 | self.wsetData = ["000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH"] # 要获取数据的证券代码
18 | self.indexFieldName = ["open", "high", "low", "close", "volume", "amt", "chg", "pct_chg", "turn"] # 要获取的数据字段
19 | self.fundFieldName = ["nav", "NAV_acc", "sec_name"]
20 | self.stockFieldName = ["open","high","low","close","volume","amt","turn","mkt_cap_ard","pe_ttm","ps_ttm","pb_lf"]
21 | self.engine = MysqlCon().getMysqlCon(flag='engine')
22 | self.conn = MysqlCon().getMysqlCon(flag='connect')
23 | self.PrintInfoDemo = PrintInfo()
24 | self.GetDataToMysqlDemo = GetDataToMysql()
25 |
26 | def getIndexConstituent(self,indexCode='000300.SH',getDate='2019-06-06'):
27 | '''
28 | 获取指数成分股
29 | :param indexCode:
30 | :param getDate:
31 | :return:
32 | '''
33 | sqlStr = "select * from index_constituent where index_code='%s' and update_time='%s'"%(indexCode,getDate)
34 | resultDf = pd.read_sql(sql=sqlStr, con=self.engine)
35 | if resultDf.empty:
36 | wsetdata = w.wset("indexconstituent", "date=%s;windcode=%s"%(getDate,indexCode))
37 | if wsetdata.ErrorCode != 0:
38 | self.PrintInfoDemo.PrintLog("获取指数成分股数据有误,错误代码" + str(wsetdata.ErrorCode))
39 | return pd.DataFrame()
40 |
41 | resultDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields).T
42 | dateList = [datetampStr.strftime('%Y-%m-%d') for datetampStr in resultDf['date'].tolist()]
43 | resultDf['date'] = dateList
44 | nameDic = {'date':'adjust_time','wind_code':'stock_code',"sec_name":'stock_name','i_weight':'stock_weight'}
45 | resultDf.rename(columns=nameDic,inplace=True)
46 | resultDf['update_time'] = getDate
47 | resultDf['index_code'] = indexCode
48 |
49 | self.GetDataToMysqlDemo.GetMain(resultDf,'index_constituent')
50 | return resultDf
51 |
52 | def getLackDataToMySql(self, tempCode, startDate, endDate, tableFlag='index'):
53 | if tableFlag == 'index':
54 | tableStr = 'index_value'
55 | codeName = 'index_code'
56 | elif tableFlag == 'fund':
57 | tableStr = 'fund_net_value'
58 | codeName = 'fund_code'
59 | elif tableFlag == 'stock':
60 | tableStr='stock_hq_value'
61 | codeName = 'stock_code'
62 | sqlStr = "select max(update_time),min(update_time) from %s where %s='%s'" % (tableStr, codeName, tempCode)
63 | cursor = self.conn.cursor()
64 | cursor.execute(sqlStr)
65 | dateStrTuple = cursor.fetchall()[0]
66 | maxDate = dateStrTuple[0]
67 | minDate = dateStrTuple[1]
68 |
69 | if not maxDate:
70 | self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag)
71 | return
72 |
73 | if endDate < minDate or startDate > minDate:
74 | self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag)
75 | elif startDate <= minDate:
76 | if minDate <= endDate < maxDate:
77 | if startDate!=minDate:
78 | self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag)
79 | elif endDate >= maxDate:
80 | self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag)
81 | if endDate!=maxDate:
82 | self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag)
83 | elif endDate > maxDate:
84 | self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag)
85 |
86 | def getDataFromWind(self, tempCode, startDate='2019-04-01', endDate='2019-04-30', tableFlag='index'):
87 | if tableFlag == 'index':
88 | tableStr = 'index_value'
89 | nameDic = {"OPEN": "open_price", "HIGH": "high_price", "LOW": "low_price", "CLOSE": "close_price",
90 | "VOLUME": "volume", "AMT": "amt", "CHG": "chg", "PCT_CHG": "pct_chg", "TURN": "turn"}
91 | fields = self.indexFieldName
92 | codeName = 'index_code'
93 | elif tableFlag=='fund':
94 | tableStr = 'fund_net_value'
95 | nameDic = {"NAV": "net_value", "NAV_ACC": "acc_net_value", "SEC_NAME": "fund_name"}
96 | fields = self.fundFieldName
97 | codeName = 'fund_code'
98 | elif tableFlag=='stock':
99 | tableStr = 'stock_hq_value'
100 | nameDic = {"OPEN": "open_price", "HIGH": "high_price", "LOW": "low_price", "CLOSE": "close_price",
101 | "VOLUME": "volume", "AMT": "amt", "TURN": "turn", "MKT_CAP_ARD": "market_value", "PE_TTM": "pe_ttm","PS_TTM": "ps_ttm","PB_LF":"pb_lf"}
102 | fields = self.stockFieldName
103 | codeName = 'stock_code'
104 |
105 | wsetdata = w.wsd(codes=tempCode, fields=fields, beginTime=startDate, endTime=endDate)
106 | if wsetdata.ErrorCode != 0:
107 | self.PrintInfoDemo.PrintLog("获取行情数据有误,错误代码" + str(wsetdata.ErrorCode))
108 | return
109 |
110 | tempDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields, columns=wsetdata.Times).T
111 | tempDf[codeName] = tempCode
112 | tempDf['update_time'] = wsetdata.Times
113 | tempDf.rename(columns=nameDic, inplace=True)
114 | dateList = [dateStr.strftime("%Y-%m-%d") for dateStr in tempDf['update_time'].tolist()]
115 | tempDf['update_time'] = dateList
116 | self.GetDataToMysqlDemo.GetMain(tempDf, tableStr)
117 | return tempDf
118 |
119 | def getDataFromMySql(self, tempCode, startDate, endDate, tableFlag='index', nameList=['close_price']):
120 | if not nameList:
121 | self.PrintInfoDemo.PrintLog('传入获取指数的字段不合法,请检查!')
122 |
123 | if tableFlag == 'index':
124 | tableStr = 'index_value'
125 | codeName = 'index_code'
126 | elif tableFlag=='fund':
127 | codeName = 'fund_code'
128 | tableStr = 'fund_net_value'
129 | elif tableFlag=='stock':
130 | codeName = 'stock_code'
131 | tableStr = 'stock_hq_value'
132 |
133 | sqlStr = "select %s,update_time from %s where %s='%s' and update_time>='%s'" \
134 | " and update_time<='%s'" % (','.join(nameList), tableStr, codeName, tempCode, startDate, endDate)
135 | resultDf = pd.read_sql(sql=sqlStr, con=self.engine)
136 | resultDf.set_index(keys='update_time', inplace=True, drop=True)
137 | resultDf = resultDf.drop_duplicates().sort_index()
138 | return resultDf
139 |
140 | def getCurrentNameData(self,tempCodeList,startDate,endDate,tableFlag='stock',nameStr='close_price'):
141 | '''
142 | 获取指定字段的数据
143 | '''
144 | if tableFlag=='stock':
145 | totalCodeStr=''
146 | for stockCode in tempCodeList:
147 | totalCodeStr = totalCodeStr+stockCode+"','"
148 |
149 | sqlStr1= "select max(update_time),min(update_time) from stock_hq_value where stock_code in ('%s')"%totalCodeStr[:-3]
150 | cursor = self.conn.cursor()
151 | cursor.execute(sqlStr1)
152 | dateStrTuple = cursor.fetchall()[0]
153 | maxDate = dateStrTuple[0]
154 | minDate = dateStrTuple[1]
155 |
156 | if not maxDate:
157 | for tempCode in tempCodeList:
158 | self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag)
159 | return
160 | else:
161 | if endDate < minDate or startDate > minDate:
162 | for tempCode in tempCodeList:
163 | self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag)
164 | elif startDate <= minDate:
165 | if minDate <= endDate < maxDate:
166 | for tempCode in tempCodeList:
167 | self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag)
168 | elif endDate >= maxDate:
169 | for tempCode in tempCodeList:
170 | self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag)
171 | self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag)
172 | elif endDate >= maxDate:
173 | for tempCode in tempCodeList:
174 | self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag)
175 |
176 | sqlStr = "select %s,update_time,stock_code from stock_hq_value where stock_code in ('%s') and update_time<='%s' " \
177 | "and update_time>='%s'" % (nameStr,totalCodeStr,endDate,startDate)
178 | resultDf = pd.read_sql(sql=sqlStr, con=self.engine)
179 | dfList=[]
180 | for code,tempDf in resultDf.groupby('stock_code'):
181 | df = pd.DataFrame(tempDf[nameStr].values,index=tempDf['update_time'],columns=[code])
182 | dfList.append(df)
183 | resultDf = pd.concat(dfList,axis=1)
184 | return resultDf
185 |
186 | def getCurrentDateData(self,tempCodeList,getDate,tableFlag='stock',nameList=['close_price']):
187 | '''
188 | 获取指定日期的截面数据
189 | :return:
190 | '''
191 | if tableFlag=='stock':
192 | totalCodeStr = ""
193 | for stockCode in tempCodeList:
194 | totalCodeStr = totalCodeStr+stockCode+"','"
195 |
196 | sqlStr = "select * from stock_hq_value where stock_code in ('%s') and update_time='%s'" % (totalCodeStr[:-3], getDate)
197 | resultDf = pd.read_sql(sql=sqlStr, con=self.engine)
198 | if resultDf.empty:
199 | codes = tempCodeList
200 | fields=self.stockFieldName
201 | tradeDate = getDate
202 | wssData = w.wss(codes=codes,fields=fields,options="tradeDate=%s;priceAdj=F;cycle=D"%tradeDate)
203 | if wssData.ErrorCode!=0:
204 | self.PrintInfoDemo.PrintLog("获取行情数据有误,错误代码" + str(wssData.ErrorCode))
205 | return pd.DataFrame()
206 | tempDf =pd.DataFrame(wssData.Data,index=fields,columns=codes).T
207 | tempDf.dropna(inplace=True)
208 | if tempDf.empty:
209 | self.PrintInfoDemo.PrintLog("当前日期%s无行情"%getDate)
210 | return pd.DataFrame()
211 |
212 | tempDf['update_time'] = getDate
213 | nameDic = {"open": "open_price", "high": "high_price", "low": "low_price", "close": "close_price",
214 | "mkt_cap_ard": "market_value",}
215 | tempDf.rename(columns=nameDic,inplace=True)
216 |
217 | tempDf['stock_code'] = tempDf.index.tolist()
218 | self.GetDataToMysqlDemo.GetMain(tempDf, 'stock_hq_value')
219 | returnDf = tempDf[nameList]
220 | return returnDf
221 | else:
222 | resultDf.set_index('stock_code',drop=True,inplace=True)
223 | returnDf = resultDf[nameList]
224 | return returnDf
225 |
226 | def getHQData(self, tempCode, startDate='2019-03-01', endDate='2019-05-30', tableFlag='index',
227 | nameList=['close_price']):
228 | '''
229 | #获取指数行情数据入口
230 | '''
231 | self.getLackDataToMySql(tempCode, startDate, endDate, tableFlag)
232 | resultDf = self.getDataFromMySql(tempCode, startDate, endDate, tableFlag=tableFlag, nameList=nameList)
233 | return resultDf
234 |
235 | def getTradeDay(self, startdate, endDate, Period=''):
236 | '''
237 | 获取指定周期交易日,封装wind接口
238 | :param Period: ''日,W周,M月,Q季,S半年,Y年
239 | :return:
240 | '''
241 | # w.start()
242 | data = w.tdays(beginTime=startdate, endTime=endDate, options="Period=%s" % Period)
243 | if data.ErrorCode != 0:
244 | self.PrintInfoDemo.PrintLog('wind获取交易日期错误,请检查!')
245 | return
246 | tradeDayList = data.Data[0]
247 | tradeDayList = [tradeDay.strftime('%Y-%m-%d') for tradeDay in tradeDayList]
248 | # w.close()
249 | return tradeDayList
250 |
251 |
252 | if __name__ == '__main__':
253 | GetDataFromWindAndMySqlDemo = GetDataFromWindAndMySql()
254 | aa = GetDataFromWindAndMySqlDemo.getHQData(tempCode='000300.SH', startDate='2019-02-01', endDate='2019-05-01')
255 | # aa = GetDataFromWindAndMySqlDemo.getIndexConstituent(indexCode='000905.SH',getDate='2010-02-03')
256 | # getHQData(self, tempCode, startDate='2019-04-01', endDate='2019-04-30', tableFlag='index',
257 | # nameList=['close_price']):
258 | # aa = GetDataFromWindAndMySqlDemo.getHQData(tempCode='300033.SZ',tableFlag='stock',startDate='2010-01-01',endDate='2010-02-01')
259 | # aa = GetDataFromWindAndMySqlDemo.getCurrentDateData(tempCodeList=['300033.SZ','600000.SH'],getDate='2012-03-08')
260 | print(aa)
--------------------------------------------------------------------------------
/GetExcelData.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | '''
3 | 解析估值表数据
4 | '''
5 |
6 | import os
7 | import pandas as pd
8 | from datetime import datetime,date
9 | import xlrd
10 |
11 | class GetExcelData:
12 | def __init__(self,fileTotalPath):
13 | self.fileTotalPath = fileTotalPath #估值表文件夹
14 |
15 | # 获取excel数据
16 | def getData(self):
17 | # ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
18 | totalExcelNameList = os.listdir(self.fileTotalPath)
19 | dicProduct = {} # 按日期整理的持仓数据
20 |
21 | dicName = {} # 数据字段对应英文名称
22 | dicName[u'单位净值'] = 'netValue'
23 | dicName[u'累计单位净值'] = 'accNetValue'
24 | dicName[u'日净值增长率'] = 'netReturn'
25 | dicName[u'累计净值增长率'] = 'accNetReturn'
26 | dicName[u'实现收益'] = 'eargeMoney'
27 | dicName[u'本期净值增长率'] = 'thisNetReturn'
28 | dicName[u'流通股票投资合计'] = 'stockRate'
29 |
30 | dicName['1002'] = 'cashRate'
31 | dicName['1031'] = 'ensureMoneyRate'
32 | dicName[u'其中股票投资'] = 'securityRate'
33 | dicName[u'其中基金投资'] = 'fundRate'
34 | dicName['1202'] = 'antiSaleRate'
35 | dicName['1203'] = 'receivableSeRate' # 应收股利
36 | dicName['1204'] = 'receivableIrRate' # 应收利息
37 | dicName['3003'] = 'securityCalcRate' # 证券清算款
38 |
39 | dicNetAsset = {} # 资产及净值类数据
40 | dicAssetType = {} # 资产及其种类比例数据
41 |
42 | for excelName in totalExcelNameList:
43 | upDate = excelName[-12:-4]
44 | upDate = upDate[:4]+'-'+upDate[4:6]+'-'+upDate[6:]
45 | data = xlrd.open_workbook(self.fileTotalPath + '\\' + excelName)
46 | table = data.sheet_by_index(0)
47 |
48 | dicNameCode = {}
49 | for rowNum in range(table.nrows):
50 | judgeStr = table.cell(rowNum, 0).value
51 | if judgeStr[:4] =='1102' and judgeStr[-2:] in ['SH','SZ']: # 股票持仓数据
52 | dicNameCode[judgeStr[-9:]] = table.row_values(rowNum)
53 | elif judgeStr in dicName.keys(): # 资产及净值类数据
54 | dicNetAsset[dicName[judgeStr]] = dicNetAsset.get(dicName[judgeStr], {})
55 | dicNetAsset[dicName[judgeStr]][upDate] = {}
56 |
57 | if judgeStr not in ['流通股票投资合计', '1203', '1002', '1031', '其中股票投资', '其中基金投资', '1202', '3003', '1204']:
58 | temp = table.cell(rowNum, 1)
59 | else:
60 | temp = table.cell(rowNum, 10)
61 | try:
62 | dicNetAsset[dicName[judgeStr]][upDate] = float(temp.value)
63 | except:
64 | if temp.ctype == 1:
65 | if temp.value.find('%') != -1:
66 | dicNetAsset[dicName[judgeStr]][upDate] = float(temp.value[:-1]) / 100
67 | else:
68 | temp = temp.value.replace(',', '')
69 | dicNetAsset[dicName[judgeStr]][upDate] = float(temp)
70 | tempDf = pd.DataFrame(dicNameCode, index=table.row_values(4)).T
71 | dicProduct[upDate] = tempDf
72 |
73 | netAssetDf = pd.DataFrame(dicNetAsset)
74 | start_date = [datetime.strptime(datestr, "%Y-%m-%d").date() for datestr in
75 | netAssetDf.index.tolist()]
76 | netAssetDf.index = start_date
77 | netAssetDf.index.name = 'update'
78 | # tempDf = netAssetDf[
79 | # ['cashRate', 'ensureMoneyRate','receivableSeRate', 'antiSaleRate', 'securityRate', 'fundRate', 'receivableIrRate', 'securityCalcRate']].fillna(0)
80 | tempDf = netAssetDf[
81 | ['cashRate', 'ensureMoneyRate', 'antiSaleRate', 'securityRate', 'fundRate']].fillna(0)
82 | netAssetDf['otherRate'] = 1 - tempDf.sum(axis=1)
83 | # writer = pd.ExcelWriter('tempResult.xlsx')
84 | # netAssetDf.to_excel(writer)
85 | # writer.save()
86 | return netAssetDf, dicProduct
87 |
88 | if __name__=='__main__':
89 | fileTotalPath = os.getcwd() + r'\乐道4估值表' # 估值表文件夹路径
90 | GetExcelDataDemo = GetExcelData(fileTotalPath=fileTotalPath)
91 | GetExcelDataDemo.getData()
92 |
--------------------------------------------------------------------------------
/GetFinanceReportData.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | import pandas as pd
6 | import numpy as np
7 | import mylog as mylog
8 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
9 | import matplotlib.pyplot as plt
10 | import matplotlib
11 | from datetime import datetime,timedelta
12 |
13 | matplotlib.rcParams['font.sans-serif'] = ['SimHei']
14 | matplotlib.rcParams['font.family'] = 'sans-serif'
15 | matplotlib.rcParams['axes.unicode_minus'] = False
16 |
17 |
18 | class GetFinanceReportData:
19 | def __init__(self, dic_param, file_path=''):
20 | self.dic_param = dic_param
21 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
22 | self.file_path=file_path
23 |
24 | def get_industry_sta(self,dic_df):
25 | '''
26 | 基金持股行业统计
27 | :param dic_df:
28 | :return:
29 | '''
30 | dic_stock_weight = {} #占股票投资市值比
31 | dic_net_value_weight={} #占净值比
32 | for rpt_date,temp_df in dic_df.items():
33 | total_code_list = temp_df['stock_code'].tolist()
34 | if rpt_date.find('中报')!=-1:
35 | rpt_date_str = rpt_date[:4]+'0630'
36 | else:
37 | rpt_date_str = rpt_date[:4]+'1231'
38 | temp_new_df = temp_df.set_index('stock_code')
39 | df = self.GetDataTotalMainDemo.get_stock_industry(industry_flag='中证',code_list=total_code_list,industryType=1,tradeDate=rpt_date_str)
40 | temp_total_df = pd.concat([temp_new_df,df],axis=1,sort=True)
41 | dic_stock_weight[rpt_date_str] = {}
42 | dic_net_value_weight[rpt_date_str] = {}
43 | for industry,stock_df in temp_total_df.groupby(df.columns.tolist()[0]):
44 | dic_stock_weight[rpt_date_str][industry]=stock_df['pro_total_stock_inve'].sum()
45 | dic_net_value_weight[rpt_date_str][industry] = stock_df['pro_net_value'].sum()
46 | stock_inves_rate_df = pd.DataFrame(dic_stock_weight).T.fillna(0)/100
47 | net_value_rate_df = pd.DataFrame(dic_net_value_weight).T.fillna(0)/100
48 | if self.file_path:
49 | stock_inves_rate_df.to_excel('占股票投资比例.xlsx')
50 | net_value_rate_df.to_excel("占净值比例.xlsx")
51 |
52 | fig1 = plt.figure(figsize=(16,9))
53 | ax_inves = fig1.add_subplot(111)
54 | stock_inves_rate_df.plot(kind='bar')
55 | plt.show()
56 | return stock_inves_rate_df,net_value_rate_df
57 |
58 |
59 |
60 | def get_main(self):
61 |
62 | fund_contain_stock_df = self.GetDataTotalMainDemo.get_fund_report_data(fund_code=dic_param['fund_code'],
63 | start_date=dic_param['start_date'],
64 | end_date=dic_param['end_date'])
65 | dic_df = {}
66 | total_rpt_list = fund_contain_stock_df.sort_values("rpt_date")['rpt_date'].tolist()
67 | for rpt_date,temp_df in fund_contain_stock_df.groupby(by='rpt_date'):
68 | dic_df[rpt_date] = temp_df
69 | self.get_industry_sta(dic_df)
70 |
71 |
72 | if __name__ == '__main__':
73 | dic_param = {}
74 | dic_param['fund_code'] = '110022.OF'
75 | dic_param['fund_name'] = '富国上证综指'
76 | dic_param['start_date'] = '2011-01-30'
77 | dic_param['end_date'] = datetime.today().strftime("%Y-%m-%d")
78 | GetFinanceReportDataDemo = GetFinanceReportData(dic_param=dic_param)
79 | GetFinanceReportDataDemo.get_main()
80 |
--------------------------------------------------------------------------------
/GetWindDataToMySql.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 将wind的数据导入到本地数据库
5 | '''
6 |
7 | from WindPy import w
8 | import pymysql
9 | from datetime import datetime
10 | import pandas as pd
11 | from sqlalchemy import create_engine
12 |
13 |
14 | class GetWindDataToMySql:
15 | def __init__(self):
16 | # 数据库配置文件
17 | self.dataBaseConfig = {}
18 | self.dataBaseConfig['host'] = 'localhost'
19 | self.dataBaseConfig['user'] = 'root'
20 | self.dataBaseConfig['password'] = '123456'
21 | self.dataBaseConfig['database'] = 'fund_data'
22 |
23 | self.wsetData = ["000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH"] # 要获取数据的证券代码
24 | self.dataName = ["open", "high", "low", "close", "volume", "amt", "chg", "pct_chg", "turn"] # 要获取的数据字段
25 |
26 | #数据库连接
27 | def connectMysql(self,dataBase='newOpen'):
28 | if dataBase == 'newOpen':
29 | db = pymysql.connect(host=self.dataBaseConfig['host'], user=self.dataBaseConfig['user'],
30 | passwd=self.dataBaseConfig['password'], db=self.dataBaseConfig['database'])
31 | return db
32 | else:
33 | dataBase.close()
34 | return
35 |
36 | #日志信息打印
37 | def PrintInfo(self, infostr, otherInfo=''):
38 | currenttime = datetime.now().strftime('%H:%M:%S')
39 | if isinstance(otherInfo, str):
40 | if not otherInfo:
41 | print(currenttime + '[INFO]:' + infostr)
42 | else:
43 | print(currenttime + '[INFO]:' + infostr, otherInfo)
44 | else:
45 | print(currenttime + '[INFO]:' + infostr, otherInfo)
46 |
47 | # 获取数据的开始日期
48 | def getDataStartDate(self):
49 | db = self.connectMysql()
50 | cursor = db.cursor()
51 | sqlStr = "select max(`UPDATE`) from index_data"
52 | cursor.execute(sqlStr)
53 | data = cursor.fetchone()[0]
54 | if not data:
55 | startDate = '2007-01-01'
56 | else:
57 | startDate = data
58 | self.connectMysql(dataBase=db)
59 | self.PrintInfo("获取数据的开始日期 : %s" % startDate)
60 | return startDate
61 |
62 | # 从wind获取数据
63 | def getDataFromWind(self,startDate):
64 | totalData = {}
65 | w.start()
66 | for code in self.wsetData:
67 | self.PrintInfo("获取当前指数的历史数据 : %s" % code)
68 | wsetdata = w.wsd(codes=code, fields=self.dataName, beginTime=startDate)
69 | if wsetdata.ErrorCode != 0:
70 | self.PrintInfo("获取当前指数的历史数据异常 : %s" % code)
71 | continue
72 |
73 | tempDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields, columns=wsetdata.Times).T
74 | tempDf['CODE'] = code
75 | tempDf['UPDATE'] = wsetdata.Times
76 | totalData[code] = tempDf
77 | w.close()
78 | return totalData
79 |
80 | #将数据导入到mysql
81 | def dataToMysql(self,totalData):
82 | if not totalData:
83 | self.PrintInfo("未获取到任何有效数据,请检查!" )
84 | return
85 |
86 | mysqlConfig = ['root', '123456', 'localhost', '3306', 'fund_data', 'utf8']
87 | mysqlcon = "mysql+pymysql://%s:%s@%s:%s/%s?charset=%s" % (
88 | mysqlConfig[0], mysqlConfig[1], mysqlConfig[2], mysqlConfig[3], mysqlConfig[4], mysqlConfig[5])
89 | conn = create_engine(mysqlcon)
90 |
91 | for code,datadf in totalData.items():
92 | self.PrintInfo('%s历史数据写入数据库。。' % code)
93 | datadf.to_sql(name='index_data', con=conn, if_exists='append', index=False)
94 |
95 | #运行入口
96 | def startMain(self):
97 | startDate = self.getDataStartDate()
98 | totalData = self.getDataFromWind(startDate)
99 | self.dataToMysql(totalData)
100 |
101 | if __name__=='__main__':
102 | GetWindDataToMySqlDemo = GetWindDataToMySql()
103 | GetWindDataToMySqlDemo.startMain()
--------------------------------------------------------------------------------
/JudgeFund/CalcJudgeFund.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 基金评价指标排名计算
5 | '''
6 |
7 | import pandas as pd
8 | import numpy as np
9 | from datetime import datetime, timedelta
10 |
11 |
12 | '''
13 | 证券代码 证券简称 基金成立日 '近1月(%)','近3月(%)','近6月(%)','近1年(%)','近3年(%)','基金规模(亿元)','年化收益率(%) '
14 | ,'最大回撤(%)','年化波动率(%)','下行标准差(%)','Sharpe','Alpha(年化)(%)','Sharpe(年化)','Treynor(年化)','Sortino(年化)',
15 | '选时能力','选股能力','信息比率(年化)','基金经理(现任)','基金管理人
16 | '004840.OF','001708.OF','004695.OF','006749.OF','002465.OF','002182.OF','004696.OF','003208.OF'
17 |
18 |
19 | '''
20 |
21 |
22 | class CalcJudgeFund:
23 | def __init__(self):
24 | self.file_path = r"D:\\工作文件\\产品评价\\东兴\\"
25 | self.sort_up = ['近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)', '近3年(%)', '基金规模(亿元)', '年化收益率(%)', '最大回撤(%)',
26 | 'Alpha(年化)(%)', 'Sharpe(年化)', 'Treynor(年化)', 'Sortino(年化)', '选时能力', '选股能力',
27 | '信息比率(年化)'] # 倒序排名,越大越好
28 | self.sort_down =['年化波动率(%)','下行标准差(%)'] #逆序排名,越小越好
29 | self.targe_code_list=['004840.OF','001708.OF','004695.OF','006749.OF','002465.OF','002182.OF','004696.OF','003208.OF']
30 |
31 | def calc_sort(self):
32 | total_fund = pd.read_excel(self.file_path+"偏股混合型.xlsx", sheet_name='Sheet1', index_col=0)
33 | df_list = []
34 | for up_col in self.sort_up:
35 | temp_se = total_fund[up_col].rank(ascending=False)
36 | temp_dic = temp_se.to_dict()
37 | total_num = temp_se.max()
38 | str_dic={}
39 | for fund_code,rank_num in temp_dic.items():
40 | if np.isnan(rank_num):
41 | str_dic[fund_code] = '--/%s' % int(total_num)
42 | else:
43 | str_dic[fund_code] = '%s/%s'%(int(rank_num),int(total_num))
44 | temp_new_se= pd.Series(str_dic,name=up_col)
45 | df_list.append(temp_new_se)
46 |
47 | for down_col in self.sort_down:
48 | temp_se = total_fund[down_col].rank(ascending=True)
49 | temp_dic = temp_se.to_dict()
50 | total_num = temp_se.max()
51 | str_dic = {}
52 | for fund_code, rank_num in temp_dic.items():
53 | if np.isnan(rank_num):
54 | str_dic[fund_code] = '--/%s' % int(total_num)
55 | else:
56 | str_dic[fund_code] = '%s/%s' % (int(rank_num), int(total_num))
57 | temp_new_se = pd.Series(str_dic, name=down_col)
58 | df_list.append(temp_new_se)
59 | sort_df = pd.concat(df_list,axis=1,sort=True)
60 |
61 | fix_df = total_fund[['证券简称','基金成立日','基金经理(现任)','基金管理人']]
62 | final_df = pd.concat([fix_df,sort_df],sort=True,axis=1)
63 | final_df.to_excel(self.file_path+"偏股混合型基金排名.xlsx")
64 | return final_df
65 |
66 | def get_target(self,df):
67 | target_df =df.loc[self.targe_code_list]
68 |
69 |
70 | if __name__ == '__main__':
71 | CalcJudgeFundDemo = CalcJudgeFund()
72 | CalcJudgeFundDemo.calc_sort()
73 |
--------------------------------------------------------------------------------
/JudgeFund/JudgeAndGetFund.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 |
4 | import pandas as pd
5 | import mylog as mylog
6 | import numpy as np
7 | from datetime import datetime,timedelta
8 | from GetAndSaveWindData.GetDataFromWindNNotMysql import GetDataFromWindNotMysql
9 | import matplotlib.pyplot as plt
10 | import matplotlib
11 |
12 | matplotlib.rcParams['font.sans-serif'] = ['SimHei']
13 | matplotlib.rcParams['font.family'] = 'sans-serif'
14 | matplotlib.rcParams['axes.unicode_minus'] = False
15 |
16 | class JudgeAndGetFund:
17 | def __init__(self):
18 | self.GetDataFromWindNotMysqlDemo = GetDataFromWindNotMysql(data_resource='wind')
19 |
20 | def get_init_param(self,fund_name='易方达行业领先'):
21 | dic_param={}
22 | base_df= pd.read_excel("参数_%s.xlsx"%fund_name,sheet_name='基础信息')
23 | bench_df= pd.read_excel("参数_%s.xlsx"%fund_name,sheet_name='业绩基准')
24 | self.start_date = base_df.iloc[0]['任职日期']
25 | self.end_date = base_df.iloc[0]['离任日期']
26 | self.fund_code = base_df.iloc[0]['基金代码']
27 | return dic_param
28 |
29 | def get_fe_change(self,dic_param):
30 | df = self.GetDataFromWindNotMysqlDemo.get_fund_filed(start_date=self.start_date,end_date=self.end_date,fund_code=self.fund_code)
31 | df['基金份额变化率'] = df['基金份额_万份']/df['基金份额_万份'].shift(1)-1
32 | df['基金规模变化率'] = df['基金规模']/df['基金规模'].shift()-1
33 | fig_fe = plt.figure(figsize=(16,9))
34 | ax_fe = fig_fe.add_subplot(111)
35 | df['基金份额_万份'].plot.bar(ax=ax_fe)
36 | ax_fe.set_title('基金份额_万份')
37 | plt.savefig('基金份额概况.png')
38 |
39 | fig_size = plt.figure(figsize=(16,9))
40 | ax_size = fig_size.add_subplot(111)
41 | wid = 0.5
42 | df['基金规模'].plot(kind='bar',ax=ax_size,color='r',width=wid)
43 | ax_size.set_xticklabels(df.index,rotation=90)
44 | ax_size.set_title('基金规模')
45 | plt.savefig('基金规模概况.png')
46 |
47 | fig_stock_rate = plt.figure(figsize=(16, 9))
48 | ax_stock_rate = fig_stock_rate.add_subplot(111)
49 | (df['股票市值占基金资产净值比']/100).plot(kind='bar',ax=ax_stock_rate,color='b',)
50 | ax_stock_rate.set_title('股票市值占基金资产净值比')
51 | plt.savefig('股票占比情况.png')
52 | # plt.show()
53 | df.to_excel("%s份额规模概况.xlsx"%self.fund_code)
54 |
55 | def sum_plot(self,df):
56 | '''
57 | 十大重仓股占比与绘图
58 | '''
59 | dic_sum={}
60 | dic_indus_fund_sum={}
61 | dic_indus_stock_sum={}
62 | for datestr,temp_df in df.groupby(by='披露日期'):
63 | dic_indus_fund_sum[datestr] = {}
64 | dic_indus_stock_sum[datestr] = {}
65 | for indus,detail_df in temp_df.groupby('所属行业'):
66 | dic_indus_fund_sum[datestr][indus] = detail_df['市值占基金资产净值比'].sum()
67 | dic_indus_stock_sum[datestr][indus]= detail_df['市值占股票投资市值比'].sum()
68 | dic_sum[datestr]={'十大重仓股市值占基金净值比':temp_df['市值占基金资产净值比'].sum(),'十大重仓股市值占股票投资市值比':temp_df['市值占股票投资市值比'].sum()}
69 | value_fund_df = pd.DataFrame(dic_indus_fund_sum).T / 100
70 | value_fund_df.fillna(0, inplace=True)
71 |
72 | value_stock_df = pd.DataFrame(dic_indus_stock_sum).T / 100
73 | value_stock_df.fillna(0, inplace=True)
74 |
75 | sum_df = pd.DataFrame(dic_sum).T
76 | fig = plt.figure(figsize=(16,9))
77 | ax = fig.add_subplot(111)
78 | sum_df.plot.bar(ax=ax)
79 |
80 | color = ['#36648B', '#458B00', '#7A378B', '#8B0A50', '#8FBC8F', '#B8860B', '#FFF68F', '#FFF5EE', '#FFF0F5',
81 | '#FFEFDB',
82 | '#F4A460', '#A0522D', '#FFE4E1', '#BC8F8F', '#A52A2A', '#800000', '#F5F5F5', '#DCDCDC', '#808080',
83 | '#000000',
84 | '#FFA500', '#F5DEB3', '#DAA520', '#BDB76B', '#556B2F', '#006400', '#98FB98', '#7FFFAA', '#20B2AA',
85 | '#F0FFFF',
86 | '#191970', '#BA55D3', '#DDA0DD', '#4B0082', '#8FBC8F', '#B8860B', '#FFF68F', '#FFF5EE', '#FFF0F5',
87 | '#FFEFDB',
88 | '#36648B', '#458B00', '#7A378B', '#8B0A50', '#8FBC8F', '#B8860B', '#FFF68F', '#FFF5EE', '#FFF0F5',
89 | '#FFEFDB']
90 | fig2 = plt.figure(figsize=(16, 9))
91 | ax2 = fig2.add_subplot(111)
92 | datestrList = value_fund_df.index.tolist()
93 | labels = value_fund_df.columns.tolist()
94 | for i in range(value_fund_df.shape[1]):
95 | ax2.bar(datestrList, value_fund_df.ix[:, i], color=color[i],
96 | bottom=value_fund_df.ix[:, :i].sum(axis=1),)
97 |
98 | box = ax2.get_position()
99 | ax2.set_position([box.x0, box.y0, box.width * 1.02, box.height])
100 | ax2.legend(labels=labels, bbox_to_anchor=(1, 0.8), ncol=1)
101 | ax2.set_title("重仓行业市值占基金资产净值比")
102 | for tick in ax2.get_xticklabels():
103 | tick.set_rotation(90)
104 | plt.savefig('重仓行业市值占基金资产净值比.png')
105 |
106 | fig3 = plt.figure(figsize=(16, 9))
107 | ax3 = fig3.add_subplot(111)
108 | datestrList2 = value_stock_df.index.tolist()
109 | labels2 = value_stock_df.columns.tolist()
110 | for i in range(value_stock_df.shape[1]):
111 | ax3.bar(datestrList2, value_stock_df.ix[:, i], color=color[i],
112 | bottom=value_stock_df.ix[:, :i].sum(axis=1), )
113 | box2 = ax3.get_position()
114 | ax3.set_position([box2.x0, box2.y0, box2.width * 1.02, box2.height])
115 | ax3.legend(labels=labels2, bbox_to_anchor=(1, 0.8), ncol=1)
116 | ax3.set_title("重仓行业市值占股票投资净值比")
117 | for tick in ax3.get_xticklabels():
118 | tick.set_rotation(90)
119 | plt.savefig('重仓行业市值占股票投资净值比.png')
120 | plt.show()
121 |
122 | def get_stock_diff(self,df):
123 | temp_df = df.copy()
124 | temp_df = temp_df.set_index(keys=['披露日期','重仓排名'])
125 | total_date_list = list(df['披露日期'].unique())
126 | result_df = pd.DataFrame()
127 | change_name_list=['市值占基金资产净值比','市值占股票投资市值比','持股市值','股票代码']
128 | for date_num in range(len(total_date_list)):
129 | if date_num==0:
130 | target_df = temp_df.loc[total_date_list[date_num]][change_name_list].set_index('股票代码')
131 | else:
132 | current_df = temp_df.loc[total_date_list[date_num]][change_name_list].set_index('股票代码')
133 | pre_df = temp_df.loc[total_date_list[date_num-1]][change_name_list].set_index('股票代码')
134 | a=0
135 |
136 |
137 | a=0
138 |
139 | def get_stock_detail(self,dic_param):
140 | try:
141 | df = pd.read_excel("%s重仓股概况.xlsx"%self.fund_code,index_col=0,converters={'股票代码':str,'重仓排名':int})
142 | except:
143 | df = self.GetDataFromWindNotMysqlDemo.get_fund_stock_filed(start_date=self.start_date,end_date=self.end_date,fund_code=self.fund_code)
144 | df.to_excel('%s重仓股概况.xlsx'%self.fund_code)
145 | # df = self.GetDataFromWindNotMysqlDemo.get_fund_stock_filed(start_date=self.start_date, end_date=self.end_date,
146 | # fund_code=self.fund_code)
147 | # df.to_excel('%s重仓股概况.xlsx' % self.fund_code)
148 | df['披露日期']=[datetime.strftime(dateStr,"%Y-%m-%d") for dateStr in df.index.tolist()]
149 | df.dropna(inplace=True)
150 | # self.sum_plot(df)
151 | self.get_stock_diff(df)
152 |
153 |
154 |
155 | def get_main(self):
156 | dic_param = self.get_init_param()
157 | # self.get_fe_change(dic_param)
158 | self.get_stock_detail(dic_param)
159 |
160 |
161 | if __name__=='__main__':
162 | JudgeAndGetFundDemo = JudgeAndGetFund()
163 | JudgeAndGetFundDemo.get_main()
--------------------------------------------------------------------------------
/JudgeFund/JudgeFundDC.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 量化对冲型基金评价
5 | '''
6 |
7 | import pandas as pd
8 | import mylog as mylog
9 | import numpy as np
10 | from datetime import datetime, timedelta
11 | import mylog as mylogdemo
12 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
13 | import statsmodels.api as sm
14 |
15 | class JudgeFundDC:
16 | def __init__(self):
17 | self.logger = mylogdemo.set_log()
18 | self.last_date_str = datetime.today().strftime("%Y-%m-%d")
19 | self.target_code = '519062.OF'
20 |
21 | def get_comyany_info(self, company, df, target_labe=''):
22 | self.logger.info("截止当前最新日期%s,现有量化对冲型基金(含A/C类)共%s只。" % (self.last_date_str, df.shape[0]))
23 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')}
24 | total_company_esta={company_name:temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in total_company_dic.items()}
25 | total_company_esta_se = pd.Series(total_company_esta,name='基金成立日').sort_values(ascending=False)
26 | esta_percent = (total_company_esta_se.index.tolist().index(company)+1)/len(total_company_esta_se)
27 | esta_percent_str = str(np.round(esta_percent*100,2))+'%'
28 |
29 | num_dic = {company_name:temp_df.shape[0] for company_name,temp_df in total_company_dic.items()}
30 | company_df = pd.DataFrame()
31 | num_se = pd.Series(num_dic, ).sort_values()
32 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique()))
33 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%'
34 | self.logger.info('%s旗下现有量化型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str))
35 | if per_rate >= 0.7:
36 | self.logger.info("占比靠前,反映管理人在发行量化对冲型基金上的优秀运作能力。")
37 | elif 0.7 >= per_rate > 0.4:
38 | self.logger.info("占比中等,管理人发行量化对冲型基金的数量一般。")
39 | elif per_rate <= 0.4:
40 | self.logger.info('占比下游,管理人对量化对冲型基金发行数量较少。')
41 | company_fund_df = df[df['基金管理人'] == company]
42 | min_fund_esta = company_fund_df['基金成立日'].min()
43 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0]
44 | min_fund_esta_name = min_fund_esta_se['证券简称']
45 | self.logger.info("其中,%s(%s),基金经理%s,于%s成立,为该管理人旗下成立最早的产品;" % (
46 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日']))
47 | self.logger.info("按各管理人发行最早量化对冲型基金的时间看,该产品发行时间占各管理人同类型的%s分位数"%esta_percent_str)
48 | if esta_percent>=0.6:
49 | self.logger.info("发行时间早与多数管理人,一定程度上反应了管理人更丰富的投资管理经验。")
50 | elif 0.3<=esta_percent<0.6:
51 | self.logger.info("发行时间排名中等,管理人整体投资管理经验中等水平")
52 | else:
53 | self.logger.info("发行时间较晚,需谨慎对待管理人可能对量化对冲型基金投资管理经验较短的问题")
54 |
55 | def get_manager_info(self, company, df, manager_name,target_labe=''):
56 | total_manager_list = df['基金经理(现任)'].tolist()
57 | fund_num_list=[]
58 | for num in range(len(total_manager_list)):
59 | if isinstance(total_manager_list[num],str) and total_manager_list[num].find(manager_name) != -1:
60 | fund_num_list.append(num)
61 | target_df = df.iloc[fund_num_list]
62 | if target_df.empty:
63 | self.logger.info('未查询到基金经理%s管理过的量化对冲型产品,对其投资经验需保持谨慎!')
64 | return
65 | self.logger.info('基金经理%s,查询到其管理量化对冲型产品%s只(含A/C类),'%(manager_name,target_df.shape[0]))
66 |
67 | last_se = target_df[target_df['基金成立日']==target_df['基金成立日'].min()].iloc[0]
68 |
69 | self.logger.info("其中,%s(%s),于%s成立,为该基金经理管理的最早的%s产品,最新规模为%s亿元;" % (
70 | last_se['证券简称'], last_se.name, last_se['基金成立日'],target_labe, last_se['基金规模亿元']))
71 |
72 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分']
73 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%'
74 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分']
75 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%'
76 | total_alpha = last_se['Alpha_w_52_hs300百分']
77 | total_alpha_str = str(np.round(total_alpha,2))+'%'
78 | if annual_alpha > same_style_alpha:
79 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,高于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str))
80 | else:
81 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,低于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str))
82 |
83 | current_se = target_df.loc[self.target_code]
84 | self.logger.info("%s成立于%s,当前规模%s亿元,规模同类排名%s"%(current_se['证券简称'],current_se['基金成立日'],current_se['基金规模亿元'],current_se['规模同类排名']))
85 | rate_name_list = [data_name for data_name in current_se.index.tolist() if data_name.find('回报排名')!=-1]
86 | rate_se = last_se.loc[rate_name_list]
87 | rate_se.name='回报排名'
88 | rate_se.to_excel('%s回报排名.xlsx'%current_se['证券简称'])
89 | a=0
90 |
91 | def get_main(self):
92 | df = pd.read_excel("量化对冲公募基金.xlsx", index_col=0)
93 | self.get_comyany_info(company='海富通基金管理有限公司',df=df)
94 | self.get_manager_info(company='海富通基金管理有限公司',df=df,manager_name='朱斌全')
95 |
96 | if __name__=='__main__':
97 | JudgeFundDCDemo = JudgeFundDC()
98 | JudgeFundDCDemo.get_main()
--------------------------------------------------------------------------------
/JudgeFund/JudgeFundImproveBase.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 |
4 | import pandas as pd
5 | import mylog as mylog
6 | import numpy as np
7 | from datetime import datetime,timedelta
8 |
9 |
10 | class JudgeFundImproveBase:
11 | def __init__(self):
12 | self.logger = mylog.set_log()
13 |
14 | def get_manager_info(self, company, df, target_code_list, manager_name):
15 | total_manager_list = df['基金经理(现任)'].tolist()
16 | fund_num_list = []
17 | for num in range(len(total_manager_list)):
18 | if isinstance(total_manager_list[num], str) and total_manager_list[num].find(manager_name) != -1:
19 | fund_num_list.append(num)
20 | target_df = df.iloc[fund_num_list]
21 | if target_df.empty:
22 | self.logger.info('未查询到基金经理%s管理过的指数增强型产品,对其投资经验需保持谨慎!')
23 | return
24 | self.logger.info('基金经理%s,查询到其管理指数增强型产品%s只(含A/C类),反映了该基金经理有一定相关产品的投资经验' % (manager_name, target_df.shape[0]))
25 | last_se = target_df[target_df['基金成立日'] == target_df['基金成立日'].min()].iloc[0]
26 | self.logger.info("其中,%s(%s),于%s成立,为该基金经理管理的最早的跟踪%s产品,最新规模为%s亿元;" % (
27 | last_se['证券简称'], last_se.name, last_se['基金成立日'], target_code_list[0], last_se['基金规模亿元']))
28 |
29 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分']
30 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%'
31 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分']
32 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%'
33 | total_alpha = last_se['Alpha_w_52_hs300百分']
34 | total_alpha_str = str(np.round(total_alpha, 2)) + '%'
35 | if annual_alpha > same_style_alpha:
36 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,高于同类平均的年化alpha收益%s" % (
37 | total_alpha_str, annual_alpha_str, same_style_alpha_str))
38 | else:
39 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,低于同类平均的年化alpha收益%s" % (
40 | total_alpha_str, annual_alpha_str, same_style_alpha_str))
41 |
42 | rate_name_list = [data_name for data_name in last_se.index.tolist() if data_name.find('回报排名') != -1]
43 | rate_se = last_se.loc[rate_name_list]
44 |
45 | def get_target_label_fund(self, company_fund_df, target_code_list=[]):
46 | bench_code_list = company_fund_df['跟踪指数代码'].tolist()
47 | num_list = [bench_code_num for bench_code_num in range(len(bench_code_list)) if bench_code_list[bench_code_num] in target_code_list]
48 | target_df = company_fund_df.iloc[num_list]
49 | if target_df.empty:
50 | return
51 |
52 | min_fund_esta = target_df['基金成立日'].min()
53 | min_fund_esta_se = target_df[target_df['基金成立日'] == min_fund_esta].iloc[0]
54 | min_fund_esta_name = min_fund_esta_se['证券简称']
55 | self.logger.info("其中,%s(%s),基金经理%s,于%s成立,为该管理人旗下成立最早的跟踪%s产品,最新规模为%s亿元;" % (
56 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'],
57 | target_code_list[0], min_fund_esta_se['基金规模亿元']))
58 |
59 | annual_alpha = min_fund_esta_se['Alpha(年化)_d_52_hs300百分']
60 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%'
61 | same_style_alpha = min_fund_esta_se['Alpha(年化)同类平均_d_52_hs300百分']
62 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%'
63 | if annual_alpha > same_style_alpha:
64 | self.logger.info("该基金自成立以来,年化alpha收益为%s,高于同类平均的alpha收益%s" % (annual_alpha_str, same_style_alpha_str))
65 | else:
66 | self.logger.info("该基金自成立以来,年化alpha收益为%s,低于同类平均的alpha收益%s" % (annual_alpha_str, same_style_alpha_str))
67 | return
68 |
69 | def get_comyany_info(self, company, df, target_code_list=[]):
70 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')}
71 | total_company_esta = {company_name: temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in
72 | total_company_dic.items()}
73 | total_company_esta_se = pd.Series(total_company_esta, name='基金成立日').sort_values(ascending=False)
74 | esta_percent = (total_company_esta_se.index.tolist().index(company) + 1) / len(total_company_esta_se)
75 | esta_percent_str = str(np.round(esta_percent * 100, 2)) + '%'
76 |
77 | num_dic = {company_name: temp_df.shape[0] for company_name, temp_df in total_company_dic.items()}
78 | company_df = pd.DataFrame()
79 | num_se = pd.Series(num_dic, ).sort_values()
80 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique()))
81 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%'
82 | self.logger.info('%s旗下现有指数增强型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str))
83 | if per_rate >= 0.7:
84 | self.logger.info("占比靠前,反映管理人在发行指数增强型基金上的优秀运作能力。")
85 | elif 0.7 >= per_rate > 0.4:
86 | self.logger.info("占比中等,管理人发行指数增强型基金的数量一般。")
87 | elif per_rate <= 0.4:
88 | self.logger.info('占比下游,管理人对指数增强型基金发行数量较少。')
89 | company_fund_df = df[df['基金管理人'] == company]
90 | min_fund_esta = company_fund_df['基金成立日'].min()
91 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0]
92 | min_fund_esta_name = min_fund_esta_se['证券简称']
93 | self.logger.info("其中,%s(%s),基金经理%s,于%s成立,为该管理人旗下成立最早的增强型产品;" % (
94 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日']))
95 | self.logger.info("按各管理人发行最早指数增强型基金的时间看,该产品发行时间占各管理人同类型的%s分位数" % esta_percent_str)
96 | if esta_percent >= 0.6:
97 | self.logger.info("发行时间早与多数管理人,一定程度上反应了管理人更丰富的投资管理经验。")
98 | elif 0.3 <= esta_percent < 0.6:
99 | self.logger.info("发行时间排名中等,管理人整体投资管理经验中等水平")
100 | else:
101 | self.logger.info("发行时间较晚,需谨慎对待管理人可能对指数增强型基金投资管理经验较短的问题")
102 |
103 | if target_code_list:
104 | dic_target = {}
105 | for company_name, temp_df in df.groupby(by='基金管理人'):
106 | dic_target[company_name] = dic_target.get(company_name, 0)
107 | for bench_code in temp_df['跟踪指数代码'].tolist():
108 | if bench_code in target_code_list:
109 | dic_target[company_name] = dic_target[company_name]+1
110 |
111 | label_se = pd.Series(dic_target, name='产品数量')
112 | self.logger.info("跟踪%s指数的管理人共%s家" % (target_code_list[0], len(label_se[label_se > 0])))
113 | self.get_target_label_fund(company_fund_df, target_code_list)
114 | return
115 |
116 | def get_main(self, company='万家基金管理有限公司', manager_name='乔亮'):
117 | df = pd.read_excel('指数增强评价指标.xlsx', index_col=0)
118 | self.get_comyany_info(company, df, target_code_list=['000852.SH',])
119 | self.get_manager_info(company, df, target_code_list=['000852.SH',], manager_name=manager_name)
120 |
121 | if __name__=='__main__':
122 | JudgeFundImproveBaseDemo = JudgeFundImproveBase()
123 | JudgeFundImproveBaseDemo.get_main()
--------------------------------------------------------------------------------
/JudgeFund/JudgeFundIndexImprove.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 指数增强型基金评价
5 | '''
6 |
7 | import pandas as pd
8 | import mylog as mylog
9 | import numpy as np
10 | from datetime import datetime, timedelta
11 | import mylog as mylogdemo
12 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
13 | import statsmodels.api as sm
14 |
15 |
16 | class JudgeFundIndexImprove:
17 | def __init__(self):
18 | self.logger = mylogdemo.set_log()
19 | self.last_date_str = datetime.today().strftime("%Y-%m-%d")
20 | self.target_code = '110003.OF'
21 |
22 | def get_total_info(self, df, ):
23 | self.logger.info("截止当前最新日期%s,现有指数增强型基金(含A/C类)共%s只。" % (self.last_date_str, df.shape[0]))
24 | target_bench_code = df.loc[self.target_code]['跟踪指数代码']
25 | for bench_code, temp_df in df.groupby(by='跟踪指数代码'):
26 | if bench_code == target_bench_code:
27 | self.logger.info("其中跟踪指数为%s的增强型基金%s只" % (target_bench_code, temp_df.shape[0]))
28 | break
29 |
30 | def calc_select(self, dic_result_df):
31 | pass
32 |
33 | def get_data(self, df):
34 | '''
35 | 获取跟踪指数和产品复权单位净值数据
36 | :param df:
37 | :return:
38 | '''
39 | dic_fund_index = {}
40 | for fund_code in df.index.tolist():
41 | if df.loc[fund_code]['证券简称'].find('C') == -1:
42 | dic_fund_index[fund_code] = df.loc[fund_code]['跟踪指数代码']
43 |
44 | GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
45 | dic_total_index_df = {}
46 | dic_result_df = {}
47 | for fund_code, index_code in dic_fund_index.items():
48 | start_date = df.loc[fund_code]['基金成立日']
49 | if datetime.today() - timedelta(days=365) < start_date:
50 | continue
51 | start_date = start_date.strftime("%Y-%m-%d")
52 | temp_fund_df = GetDataTotalMainDemo.get_hq_data(code=fund_code, code_style='fund', start_date=start_date,
53 | end_date=self.last_date_str, name_list=['net_value_adj'])
54 | temp_fund_df.rename(columns={'net_value_adj': fund_code}, inplace=True)
55 | if index_code not in dic_total_index_df:
56 | temp_index_df = GetDataTotalMainDemo.get_hq_data(code=index_code, code_style='index',
57 | start_date=start_date, end_date=self.last_date_str,
58 | name_list=['close_price'])
59 | temp_index_df.rename(columns={'close_price': index_code}, inplace=True)
60 | dic_total_index_df[index_code] = temp_index_df
61 | else:
62 | temp_index_df = dic_total_index_df[index_code]
63 | dic_result_df[fund_code + '_' + index_code] = pd.concat([temp_fund_df, temp_index_df], axis=1, sort=True)
64 | return dic_result_df
65 |
66 | def regression(self, x1, x2, y):
67 | '''
68 | 最小二乘回归
69 | :param x1:
70 | :param x2:
71 | :param y:
72 | :return:
73 | '''
74 | x1, x2 = x1.reshape(len(x1), 1), x2.reshape(len(x2), 1)
75 | c = np.ones((len(x1), 1))
76 | X = np.hstack((c, x1, x2))
77 | res = (sm.OLS(y, X)).fit()
78 | return res
79 |
80 | def get_select_judge(self, df):
81 | '''
82 | 获取所有基金的选股,择时能力
83 | :param df:
84 | :return:
85 | '''
86 | try:
87 | select_df = pd.read_excel("择时选股能力.xlsx", index_col=0)
88 | except:
89 | dic_result_df = self.get_data(df)
90 | dicRegression = {}
91 | for fund_index_code, fund_index_df in dic_result_df.items():
92 | fund_code = fund_index_code.split('_')[0]
93 | index_code = fund_index_code.split('_')[1]
94 | tempReturn = (fund_index_df - fund_index_df.shift(1)) / fund_index_df.shift(1)
95 | tempReturn.fillna(0, inplace=True)
96 | riskFree = 0.02 / 250
97 | fundReduceRf = tempReturn[fund_code] - riskFree
98 | bencReduceRf = tempReturn[index_code] - riskFree
99 | Y = fundReduceRf.values
100 | tmX1 = bencReduceRf.values
101 | tmX2 = np.power(tmX1, 2)
102 | TMResult = self.regression(tmX1, tmX2, Y)
103 |
104 | dicRegression[fund_code] = {}
105 | dicRegression[fund_code]['R方'] = round(TMResult.rsquared, 2)
106 | dicRegression[fund_code]['择股指标(年化alpha)'] = str(round(TMResult.params[0] * 252 * 100, 2)) + '%'
107 | dicRegression[fund_code]['择时指标(beta)'] = round(TMResult.params[2], 2)
108 | select_df = pd.DataFrame(dicRegression).T
109 | select_df.to_excel("择时选股能力.xlsx")
110 |
111 | target_bench_code = df.loc[self.target_code]['跟踪指数代码']
112 | same_total_df = df[df['跟踪指数代码'] == target_bench_code]
113 | estdate_str = (datetime.strptime(self.last_date_str,"%Y-%m-%d")-timedelta(days=365)).strftime("%Y-%m-%d")
114 | same_total_df = same_total_df[same_total_df['基金成立日']<=estdate_str]
115 | same_code_list = [fund_code for fund_code in same_total_df.index.tolist() if
116 | df.loc[fund_code]['证券简称'].find('C') == -1]
117 | if len(same_code_list) >= 5:
118 | same_df = select_df.loc[same_code_list]
119 | self.logger.info('选取跟踪同样指数即%s'%(target_bench_code,))
120 | else:
121 | same_df = select_df
122 | self.logger.info('选取所有指数增强基金')
123 |
124 | alpha_sort_df = same_df.sort_values(by='择股指标(年化alpha)')
125 | esta_alpha_percent = (alpha_sort_df.index.tolist().index(self.target_code) + 1) / alpha_sort_df.shape[0]
126 | esta_alpha_percent_str = str(np.round(esta_alpha_percent * 100, 2)) + '%'
127 |
128 | beta_sort_df = same_df.sort_values(by='择时指标(beta)')
129 | esta_beta_percent = (beta_sort_df.index.tolist().index(self.target_code) + 1) / beta_sort_df.shape[0]
130 | esta_beta_percent_str = str(np.round(esta_beta_percent * 100, 2)) + '%'
131 |
132 | R_sort_df = same_df.sort_values(by='R方')
133 | esta_R_percent = (R_sort_df.index.tolist().index(self.target_code) + 1) / R_sort_df.shape[0]
134 | esta_R_percent_str = str(np.round(esta_R_percent * 100, 2)) + '%'
135 |
136 | self.logger.info("对所有运作时间超1年的的增强型指数基金(A/C类基金只统计数据最长的一类),根据其跟踪指数,利用TM回归模型,对其选股择时能力解析")
137 | self.logger.info("当前基金TM回归后的年化alpha为%s,选股能力占同类指数增强基金%s分位数。" % (
138 | same_df.loc[self.target_code]['择股指标(年化alpha)'], esta_alpha_percent_str))
139 |
140 | self.logger.info(
141 | '择时指标(beta)回归系数为%s,择时能力占同类%s分位数' % (same_df.loc[self.target_code]['择时指标(beta)'], esta_beta_percent_str))
142 | self.logger.info('TM回归解释程度%s'%(same_df.loc[self.target_code]['R方']))
143 |
144 | def get_main(self):
145 | df = pd.read_excel("指数增强基金2020-08-14.xlsx", index_col=0)
146 | self.get_total_info(df)
147 | self.get_select_judge(df)
148 |
149 |
150 | if __name__ == '__main__':
151 | JudgeFundIndexImproveDemo = JudgeFundIndexImprove()
152 | JudgeFundIndexImproveDemo.get_main()
153 |
--------------------------------------------------------------------------------
/JudgeFund/JudgeFundMain.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 |
4 | import pandas as pd
5 | import mylog as mylog
6 | import numpy as np
7 | from datetime import datetime,timedelta
8 |
9 |
10 | class JudgeFundMain:
11 | def __init__(self):
12 | self.logger = mylog.set_log()
13 |
14 | def get_manager_info(self, company, df, target_labe, manager_name):
15 | total_manager_list = df['基金经理(现任)'].tolist()
16 | fund_num_list=[]
17 | for num in range(len(total_manager_list)):
18 | if isinstance(total_manager_list[num],str) and total_manager_list[num].find(manager_name) != -1:
19 | fund_num_list.append(num)
20 | target_df = df.iloc[fund_num_list]
21 | if target_df.empty:
22 | self.logger.info('未查询到基金经理%s管理过的指数增强型产品,对其投资经验需保持谨慎!')
23 | return
24 |
25 | target_labe_list=[]
26 | for index_code in target_df.index.tolist():
27 | if target_df.loc[index_code]['基金全称'].find('量化')!=-1:
28 | target_labe_list.append(index_code)
29 | target_labe_df = target_df.loc[target_labe_list]
30 | self.logger.info('基金经理%s,查询到其管理指数增强型产品%s只(含A/C类),'%(manager_name,target_df.shape[0]))
31 | if target_labe_df.empty:
32 | self.logger.info('但未找到其管理过的%s产品,需对其该方面投资经验保持谨慎!')
33 | return
34 |
35 | self.logger.info('其中%s产品共%s只,反映了该基金经理有一定相关产品的投资经验;'%(target_labe,target_labe_df.shape[0]))
36 | last_se = target_labe_df[target_labe_df['基金成立日']==target_labe_df['基金成立日'].min()].iloc[0]
37 | self.logger.info("其中,%s(%s),于%s成立,为该基金经理管理的最早的%s产品,最新规模为%s亿元;" % (
38 | last_se['证券简称'], last_se.name, last_se['基金成立日'],target_labe, last_se['基金规模亿元']))
39 |
40 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分']
41 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%'
42 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分']
43 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%'
44 | total_alpha = last_se['Alpha_w_52_hs300百分']
45 | total_alpha_str = str(np.round(total_alpha,2))+'%'
46 | if annual_alpha > same_style_alpha:
47 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,高于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str))
48 | else:
49 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,低于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str))
50 | rate_name_list = [data_name for data_name in last_se.index.tolist() if data_name.find('回报排名')!=-1]
51 | rate_se = last_se.loc[rate_name_list]
52 |
53 | def get_target_label_fund(self, company_fund_df, target_labe='量化'):
54 | name_list = company_fund_df['基金全称'].tolist()
55 | num_list = [fund_f_num for fund_f_num in range(len(name_list)) if name_list[fund_f_num].find(target_labe) != -1]
56 | target_df = company_fund_df.iloc[num_list]
57 | if target_df.empty:
58 | return
59 |
60 | min_fund_esta = target_df['基金成立日'].min()
61 | min_fund_esta_se = target_df[target_df['基金成立日'] == min_fund_esta].iloc[0]
62 | min_fund_esta_name = min_fund_esta_se['证券简称']
63 | self.logger.info("其中,%s(%s),基金经理%s,于%s成立,为该管理人旗下成立最早的%s产品,最新规模为%s亿元;" % (
64 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'],
65 | target_labe, min_fund_esta_se['基金规模亿元']))
66 |
67 | annual_alpha = min_fund_esta_se['Alpha(年化)_d_52_hs300百分']
68 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%'
69 | same_style_alpha = min_fund_esta_se['Alpha(年化)同类平均_d_52_hs300百分']
70 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%'
71 | if annual_alpha > same_style_alpha:
72 | self.logger.info("该基金自成立以来,年化alpha收益为%s,高于同类平均的alpha收益%s" % (annual_alpha_str, same_style_alpha_str))
73 | else:
74 | self.logger.info("该基金自成立以来,年化alpha收益为%s,低于同类平均的alpha收益%s" % (annual_alpha_str, same_style_alpha_str))
75 | return
76 |
77 | def get_comyany_info(self, company, df, target_labe=''):
78 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')}
79 | total_company_esta={company_name:temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in total_company_dic.items()}
80 | total_company_esta_se = pd.Series(total_company_esta,name='基金成立日').sort_values(ascending=False)
81 | esta_percent = (total_company_esta_se.index.tolist().index(company)+1)/len(total_company_esta_se)
82 | esta_percent_str = str(np.round(esta_percent*100,2))+'%'
83 |
84 | num_dic = {company_name:temp_df.shape[0] for company_name,temp_df in total_company_dic.items()}
85 | company_df = pd.DataFrame()
86 | num_se = pd.Series(num_dic, ).sort_values()
87 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique()))
88 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%'
89 | self.logger.info('%s旗下现有指数增强型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str))
90 | if per_rate >= 0.7:
91 | self.logger.info("占比靠前,反映管理人在发行指数增强型基金上的优秀运作能力。")
92 | elif 0.7 >= per_rate > 0.4:
93 | self.logger.info("占比中等,管理人发行指数增强型基金的数量一般。")
94 | elif per_rate <= 0.4:
95 | self.logger.info('占比下游,管理人对指数增强型基金发行数量较少。')
96 | company_fund_df = df[df['基金管理人'] == company]
97 | min_fund_esta = company_fund_df['基金成立日'].min()
98 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0]
99 | min_fund_esta_name = min_fund_esta_se['证券简称']
100 | self.logger.info("其中,%s(%s),基金经理%s,于%s成立,为该管理人旗下成立最早的产品;" % (
101 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日']))
102 | self.logger.info("按各管理人发行最早指数增强型基金的时间看,该产品发行时间占各管理人同类型的%s分位数"%esta_percent_str)
103 | if esta_percent>=0.6:
104 | self.logger.info("发行时间早与多数管理人,一定程度上反应了管理人更丰富的投资管理经验。")
105 | elif 0.3<=esta_percent<0.6:
106 | self.logger.info("发行时间排名中等,管理人整体投资管理经验中等水平")
107 | else:
108 | self.logger.info("发行时间较晚,需谨慎对待管理人可能对指数增强型基金投资管理经验较短的问题")
109 |
110 | if target_labe:
111 | dic_target = {}
112 | for company_name, temp_df in df.groupby(by='基金管理人'):
113 | dic_target[company_name] = dic_target.get(company_name, 0)
114 | temp_name_list = temp_df['基金全称'].tolist()
115 | for fund_name in temp_name_list:
116 | if fund_name.find(target_labe) != -1:
117 | dic_target[company_name] = dic_target[company_name] + 1
118 | label_se = pd.Series(dic_target, name=target_labe + '产品数量')
119 | self.logger.info("有%s产品(含A/C类)的管理人共%s家" % (target_labe, len(label_se[label_se > 0])))
120 | per_rate_label = (list(label_se.unique()).index(label_se[company]) + 1) / len(list(label_se.unique()))
121 | per_rate_label_str = str(np.round(per_rate_label * 100, 2)) + '%'
122 | self.logger.info(
123 | '其中%s旗下%s产品共%s只,占所有管理人同类型数量的%s分位数' % (company, target_labe, label_se[company], per_rate_label_str))
124 | self.get_target_label_fund(company_fund_df, target_labe)
125 | return
126 |
127 | def get_main(self, company='万家基金管理有限公司', manager_name='乔亮'):
128 | df = pd.read_excel('指数增强评价指标.xlsx', index_col=0)
129 | self.get_comyany_info(company, df, target_labe='量化')
130 | self.get_manager_info(company, df, target_labe='量化', manager_name=manager_name)
131 |
132 |
133 | if __name__ == '__main__':
134 | JudgeFundMainDemo = JudgeFundMain()
135 | JudgeFundMainDemo.get_main()
136 |
--------------------------------------------------------------------------------
/JudgeFund/JudgeFundQDII.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | QDII基金评价
5 | '''
6 |
7 | import pandas as pd
8 | import mylog as mylog
9 | import numpy as np
10 | from datetime import datetime, timedelta
11 | import mylog as mylogdemo
12 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
13 | import statsmodels.api as sm
14 |
15 | class JudgeFundQDII:
16 | def __init__(self):
17 | self.logger = mylogdemo.set_log()
18 | self.last_date_str = datetime.today().strftime("%Y-%m-%d")
19 | self.target_code = '161130.OF'
20 |
21 | def get_manager_info(self, company, df, manager_name,target_labe=''):
22 | total_manager_list = df['基金经理(现任)'].tolist()
23 | fund_num_list=[]
24 | for num in range(len(total_manager_list)):
25 | if isinstance(total_manager_list[num],str) and total_manager_list[num].find(manager_name) != -1:
26 | fund_num_list.append(num)
27 | target_df = df.iloc[fund_num_list]
28 | if target_df.empty:
29 | self.logger.info('未查询到基金经理%s管理过的QDII型产品,对其投资经验需保持谨慎!')
30 | return
31 | self.logger.info('基金经理%s,查询到其管理QDII型产品%s只(含A/C类),'%(manager_name,target_df.shape[0]))
32 |
33 | last_se = target_df[target_df['基金成立日']==target_df['基金成立日'].min()].iloc[0]
34 |
35 | self.logger.info("其中,%s(%s),于%s成立,为该基金经理管理的最早的%s产品,最新规模为%s亿元;" % (
36 | last_se['证券简称'], last_se.name, last_se['基金成立日'],target_labe, last_se['基金规模亿元']))
37 |
38 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分']
39 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%'
40 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分']
41 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%'
42 | total_alpha = last_se['Alpha_w_52_hs300百分']
43 | total_alpha_str = str(np.round(total_alpha,2))+'%'
44 | if annual_alpha > same_style_alpha:
45 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,高于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str))
46 | else:
47 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,低于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str))
48 |
49 | current_se = target_df.loc[self.target_code]
50 | self.logger.info("%s成立于%s,当前规模%s亿元,规模同类排名%s"%(current_se['证券简称'],current_se['基金成立日'],current_se['基金规模亿元'],current_se['规模同类排名']))
51 | rate_name_list = [data_name for data_name in current_se.index.tolist() if data_name.find('回报排名')!=-1]
52 | rate_se = last_se.loc[rate_name_list]
53 | rate_se.name='回报排名'
54 | rate_se.to_excel('%s回报排名.xlsx'%current_se['证券简称'])
55 |
56 | def get_comyany_info(self, company, df, target_labe=''):
57 | self.logger.info("截止当前最新日期%s,现有QDII型基金(含A/C类)共%s只。" % (self.last_date_str, df.shape[0]))
58 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')}
59 | total_company_esta={company_name:temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in total_company_dic.items()}
60 | total_company_esta_se = pd.Series(total_company_esta,name='基金成立日').sort_values(ascending=False)
61 | esta_percent = (total_company_esta_se.index.tolist().index(company)+1)/len(total_company_esta_se)
62 | esta_percent_str = str(np.round(esta_percent*100,2))+'%'
63 |
64 | num_dic = {company_name:temp_df.shape[0] for company_name,temp_df in total_company_dic.items()}
65 | company_df = pd.DataFrame()
66 | num_se = pd.Series(num_dic, ).sort_values()
67 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique()))
68 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%'
69 | self.logger.info('%s旗下现有QDII型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str))
70 | if per_rate >= 0.7:
71 | self.logger.info("占比靠前,反映管理人在发行QDII型基金上的优秀运作能力。")
72 | elif 0.7 >= per_rate > 0.4:
73 | self.logger.info("占比中等,管理人发行QDII型基金的数量一般。")
74 | elif per_rate <= 0.4:
75 | self.logger.info('占比下游,管理人对QDII型基金发行数量较少。')
76 | company_fund_df = df[df['基金管理人'] == company]
77 | min_fund_esta = company_fund_df['基金成立日'].min()
78 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0]
79 | min_fund_esta_name = min_fund_esta_se['证券简称']
80 | self.logger.info("其中,%s(%s),基金经理%s,于%s成立,为该管理人旗下成立最早的产品;" % (
81 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日']))
82 | self.logger.info("按各管理人发行最早QDII型基金的时间看,该产品发行时间占各管理人同类型的%s分位数"%esta_percent_str)
83 | if esta_percent>=0.6:
84 | self.logger.info("发行时间早与多数管理人,一定程度上反应了管理人更丰富的投资管理经验。")
85 | elif 0.3<=esta_percent<0.6:
86 | self.logger.info("发行时间排名中等,管理人整体投资管理经验中等水平")
87 | else:
88 | self.logger.info("发行时间较晚,需谨慎对待管理人可能对QDII型基金投资管理经验较短的问题")
89 |
90 | def get_track(self):
91 | df =pd.read_excel("QDii跟踪.xlsx",index_col=0)
92 |
93 | # total_company_esta_se = pd.Series(total_company_esta, name='基金成立日').sort_values(ascending=False)
94 | track_error = df['跟踪误差(跟踪指数)'].sort_values(ascending=False).dropna()
95 | esta_percent = (track_error.index.tolist().index(self.target_code) + 1) / len(track_error)
96 | esta_percent_str = str(np.round(esta_percent * 100, 2)) + '%'
97 | self.logger.info("从对跟踪指数的表现来看,近一年的周度收益跟踪误差为%s,占同类产品跟踪各自指数的%s分位数"%(track_error[self.target_code],esta_percent_str))
98 | if esta_percent>=0.7:
99 | self.logger.info("对指数的跟踪表现优秀,跟踪指数能力居同类上游")
100 | elif 0.4<=esta_percent<0.7:
101 | self.logger.info("对指数的跟踪表现一般,跟踪指数能力中等水平")
102 | else:
103 | self.logger.info("跟踪指数能力排名靠后,跟踪能力较差")
104 |
105 | mana_fee = df['管理费率'].sort_values(ascending=False).dropna()
106 | tg_fee = df['托管费率'].sort_values(ascending=False).dropna()
107 | a=0
108 |
109 |
110 | def get_main(self):
111 | df = pd.read_excel("QDII被动.xlsx", index_col=0)
112 | self.get_comyany_info(company='易方达基金管理有限公司', df=df)
113 | self.get_manager_info(company='易方达基金管理有限公司', df=df, manager_name='范冰')
114 | self.get_track()
115 |
116 | if __name__=='__main__':
117 | JudgeFundQDIIDemo = JudgeFundQDII()
118 | JudgeFundQDIIDemo.get_main()
--------------------------------------------------------------------------------
/JudgeFund/JudgeFundTopic.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 规模ETF评价
5 | '''
6 |
7 | import pandas as pd
8 | import mylog as mylog
9 | import numpy as np
10 | from datetime import datetime, timedelta
11 | import mylog as mylogdemo
12 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
13 | import statsmodels.api as sm
14 |
15 | class JudgeFundTopic:
16 | def __init__(self):
17 | self.logger = mylogdemo.set_log()
18 | self.last_date_str = datetime.today().strftime("%Y-%m-%d")
19 | self.target_code = '159995.SZ'
20 |
21 | def get_manager_info(self, company, df, manager_name,target_labe=''):
22 | total_manager_list = df['基金经理(现任)'].tolist()
23 | fund_num_list=[]
24 | for num in range(len(total_manager_list)):
25 | if isinstance(total_manager_list[num],str) and total_manager_list[num].find(manager_name) != -1:
26 | fund_num_list.append(num)
27 | target_df = df.iloc[fund_num_list]
28 | if target_df.empty:
29 | self.logger.info('未查询到基金经理%s管理过的规模型产品,对其投资经验需保持谨慎!')
30 | return
31 | self.logger.info('基金经理%s,查询到其管理规模型产品%s只(含A/C类),'%(manager_name,target_df.shape[0]))
32 |
33 | last_se = target_df[target_df['基金成立日']==target_df['基金成立日'].min()].iloc[0]
34 |
35 | self.logger.info("其中,%s(%s),于%s成立,为该基金经理管理的最早的%s产品,最新规模为%s亿元;" % (
36 | last_se['证券简称'], last_se.name, last_se['基金成立日'],target_labe, last_se['基金规模亿元']))
37 |
38 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分']
39 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%'
40 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分']
41 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%'
42 | total_alpha = last_se['Alpha_w_52_hs300百分']
43 | total_alpha_str = str(np.round(total_alpha,2))+'%'
44 | if annual_alpha > same_style_alpha:
45 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,高于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str))
46 | else:
47 | self.logger.info("该基金自成立以来,近一年超额alpha收益(相对沪深300)为%s,年化alpha收益为%s,低于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str))
48 |
49 | try:
50 | current_se = target_df.loc[self.target_code]
51 | self.logger.info("%s成立于%s,当前规模%s亿元,规模同类排名%s"%(current_se['证券简称'],current_se['基金成立日'],current_se['基金规模亿元'],current_se['规模同类排名']))
52 | rate_name_list = [data_name for data_name in current_se.index.tolist() if data_name.find('回报排名')!=-1]
53 | rate_se = last_se.loc[rate_name_list]
54 | rate_se.name='回报排名'
55 | rate_se.to_excel('%s回报排名.xlsx'%current_se['证券简称'])
56 | except:
57 | self.logger.info("出错,请检查!")
58 |
59 |
60 | def get_comyany_info(self, company, df, target_labe=''):
61 | self.logger.info("截止当前最新日期%s,现有规模型基金(含A/C类)共%s只。" % (self.last_date_str, df.shape[0]))
62 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')}
63 | total_company_esta={company_name:temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in total_company_dic.items()}
64 | total_company_esta_se = pd.Series(total_company_esta,name='基金成立日').sort_values(ascending=False)
65 | esta_percent = (total_company_esta_se.index.tolist().index(company)+1)/len(total_company_esta_se)
66 | esta_percent_str = str(np.round(esta_percent*100,2))+'%'
67 |
68 | num_dic = {company_name:temp_df.shape[0] for company_name,temp_df in total_company_dic.items()}
69 | company_df = pd.DataFrame()
70 | num_se = pd.Series(num_dic, ).sort_values()
71 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique()))
72 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%'
73 | self.logger.info('%s旗下现有规模型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str))
74 | if per_rate >= 0.7:
75 | self.logger.info("占比靠前,反映管理人在发行规模型基金上的优秀运作能力。")
76 | elif 0.7 >= per_rate > 0.4:
77 | self.logger.info("占比中等,管理人发行规模型基金的数量一般。")
78 | elif per_rate <= 0.4:
79 | self.logger.info('占比下游,管理人对规模型基金发行数量较少。')
80 | company_fund_df = df[df['基金管理人'] == company]
81 | min_fund_esta = company_fund_df['基金成立日'].min()
82 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0]
83 | min_fund_esta_name = min_fund_esta_se['证券简称']
84 | self.logger.info("其中,%s(%s),基金经理%s,于%s成立,为该管理人旗下成立最早的产品;" % (
85 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日']))
86 | self.logger.info("按各管理人发行最早规模型基金的时间看,该产品发行时间占各管理人同类型的%s分位数"%esta_percent_str)
87 | if esta_percent>=0.6:
88 | self.logger.info("发行时间早与多数管理人,一定程度上反应了管理人更丰富的投资管理经验。")
89 | elif 0.3<=esta_percent<0.6:
90 | self.logger.info("发行时间排名中等,管理人整体投资管理经验中等水平")
91 | else:
92 | self.logger.info("发行时间较晚,需谨慎对待管理人可能对规模型基金投资管理经验较短的问题")
93 |
94 | def get_main(self):
95 | # df = pd.read_excel("规模ETF.xlsx", index_col=0)
96 | file_path = "D:\\工作文件\\产品评价\\"
97 | # df = pd.read_excel(file_path+"跨境ETF.xlsx", index_col=0)
98 | df = pd.read_excel(file_path + "行业ETF.xlsx", index_col=0)
99 | self.get_comyany_info(company='华夏基金管理有限公司', df=df)
100 | self.get_manager_info(company='华夏基金管理有限公司', df=df, manager_name='赵宗庭')
101 |
102 | if __name__=='__main__':
103 | JudgeFundTopicDemo = JudgeFundTopic()
104 | JudgeFundTopicDemo.get_main()
--------------------------------------------------------------------------------
/JudgeFund/YunFeiCalc.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 |
4 |
5 | '''
6 | 004253.OF 国泰黄金ETF,投资金额20444.18,成本价1.3430;
7 | 001023.OF 华夏亚债中国C,投资金额14058.63,成本价1.2560;
8 | 005658.OF 华夏沪深300ETFC,投资金额5615.57,成本价1.3580;
9 | 005659.OF 南方恒生ETFC,投资金额7068.28,成本价1.0367;
10 | 004253.OF 标普500ETF,投资金额13560,成本价2.3594;
11 | '''
12 |
13 |
14 | from datetime import datetime,timedelta
15 |
16 | class YunFeiCalc:
17 | def __init__(self):
18 | pass
19 |
20 | def calc_main(self):
21 | au9999=1.4680
22 | asset1= 20444.18*(au9999/1.3430-1)
23 |
24 | bond = 1.1290
25 | asset2 = 561.32+(14058.63*(1.2600/1.2560)-561.32) * (bond / 1.2560 - 1)
26 |
27 | hs300 = 1.6341
28 | asset3 = 5615.57 * (hs300 / 1.3580 - 1)
29 |
30 | bp500 = 2.6630
31 | asset4 = 13560 * (bp500 / 2.3594 - 1)
32 |
33 | hsetf = 1.0160
34 | asset5 =7068.28*(hsetf/1.0367-1)
35 |
36 | total_earn = asset1+asset2+asset3+asset4+asset5
37 | print("排除货币基金,共盈利:%s"%total_earn)
38 |
39 | jgday = (datetime.today()-datetime.strptime("2020-03-05","%Y-%m-%d")).days
40 | print("货基建仓天数%s"%jgday)
41 |
42 | hbjj = 40000*0.0262*jgday/365
43 | print("货基盈利%s"%hbjj)
44 |
45 | print("总共盈利%s"%(total_earn+hbjj))
46 |
47 |
48 | if __name__=='__main__':
49 | YunFeiCalcDemo = YunFeiCalc()
50 | YunFeiCalcDemo.calc_main()
--------------------------------------------------------------------------------
/JudgeText.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 | '''
5 | 基于分析结果,给出标准化输出评价
6 | '''
7 |
8 | import pandas as pd
9 | import os
10 |
11 |
12 | class JudgeText:
13 | def __init__(self, ):
14 | pass
15 |
16 | def getCompareIndex(self, tempDic, fundName, compareName, formatFlag=True, rightFlag=True):
17 | '''
18 | 同期市场指数相比
19 | :param tempDic:
20 | :param fundName:
21 | :param compareName: 对比的指标名称
22 | :param formatFlag: 是否对取值化为百分比,保留2为有效数字
23 | :param rightFlag: 是否是取值越大越好
24 | :return:
25 | '''
26 | indexTradeStr = ''
27 | goodTrade = []
28 | badTrade = []
29 | for name, value in tempDic.items():
30 | if name != fundName:
31 | if formatFlag:
32 | indexTradeStr = indexTradeStr + name + ',%.2f%%' % (value * 100) + ','
33 | else:
34 | indexTradeStr = indexTradeStr + name + ',%.2f' % (value) + ','
35 | if value < tempDic[fundName]:
36 | goodTrade.append(name)
37 | else:
38 | badTrade.append(name)
39 |
40 | if formatFlag:
41 | resultText = "该产品的%s为%.2f%%" % (compareName, tempDic[fundName] * 100) + ',同期市场变现为%s' % (indexTradeStr)
42 | else:
43 | resultText = "该产品的%s为%.2f" % (compareName, tempDic[fundName]) + ',同期市场变现为%s' % (indexTradeStr)
44 |
45 | if rightFlag:
46 | if goodTrade:
47 | resultText = resultText + '即,强于%s' % (','.join(goodTrade))
48 |
49 | if badTrade:
50 | resultText = resultText + '较弱与%s' % (','.join(badTrade))
51 | else:
52 | if badTrade:
53 | resultText = resultText + '即,强于%s' % (','.join(badTrade))
54 |
55 | if goodTrade:
56 | resultText = resultText + '较弱与%s' % (','.join(goodTrade))
57 | return resultText
58 |
59 | def getNetJudgeText(self, totalIndexName, fundName='华夏大盘精选', fundCode='000011.OF'):
60 | resultPath = os.getcwd() + r"\\分析结果\\%s\\" % fundName
61 | gaiKuoTxt = "基金绩效评价在证券分析是重要的一部分。绩效评价系统性地可分为三个部分:最基本的绩效衡量," \
62 | "深层次的绩效归因,以及最终成熟的绩效评价。\n本文选择%s(%s)为基金研究对象," \
63 | "市场组合用%s来近似,对该基金在期间的绩" \
64 | "效进行评估。\n全文分为5个部分:基金绩效衡量概况,在这里最基金绩效进行基本指标的度量;选股择时能力," \
65 | "利用CAPM的衍生模型来度量该基金的选股择时能力;\n多因子归因,从多个因子的角度来做基金收益率做归因;" \
66 | "行业归因;风格归因。" % (fundName, fundCode, '、'.join(totalIndexName)) + '\n\n'
67 | riskReturnText = "风险收益统计指标结果:\n"
68 |
69 | weekSuccessDf = pd.read_excel(resultPath + "周度胜率统计.xlsx", index_col=0)
70 | trandSuccess = weekSuccessDf.loc['正交易周'].to_dict()
71 | weekSuccessText = self.getCompareIndex(trandSuccess, fundName, compareName='周度交易胜率') + '\n'
72 | weekSuccessText = '(1)交易胜率层面来看,' + weekSuccessText
73 |
74 | riskReturnDf = pd.read_excel(resultPath + "风险收益统计指标原始数据.xlsx", index_col=[0, 1])
75 | chengLiRiskReturnDf = riskReturnDf.loc['成立以来']
76 | chengLiAnnualDic = chengLiRiskReturnDf.loc['年化收益'].to_dict()
77 | chengLiAnnualDic.pop('数据截止日期')
78 | annualReturnText = self.getCompareIndex(chengLiAnnualDic, fundName, compareName='年化收益') + '\n'
79 | annualReturnText = '(2)收益方面,' + annualReturnText
80 |
81 | chengLiStdDic = chengLiRiskReturnDf.loc['年化波动'].to_dict()
82 | chengLiStdDic.pop('数据截止日期')
83 | riskText = self.getCompareIndex(chengLiStdDic, fundName, compareName='年化波动', rightFlag=False)
84 | riskText = '(3)风险方面,' + riskText
85 |
86 | chengLiDownStdDic = chengLiRiskReturnDf.loc['下行风险'].to_dict()
87 | chengLiDownStdDic.pop('数据截止日期')
88 | downStdText = self.getCompareIndex(chengLiDownStdDic, fundName, compareName='下行风险', rightFlag=False) + '\n'
89 | riskText = riskText + '\n 从下行风险角度来看,' + downStdText
90 |
91 | chengLiDownDic = chengLiRiskReturnDf.loc['最大回撤'].to_dict()
92 | chengLiDownDic.pop('数据截止日期')
93 | downText = self.getCompareIndex(chengLiDownDic, fundName, compareName='最大回撤', rightFlag=False) + '\n'
94 | riskText = riskText + ' 最大回撤方面,' + downText
95 |
96 | chengLiSharpDic = chengLiRiskReturnDf.loc['夏普比率'].to_dict()
97 | chengLiSharpDic.pop('数据截止日期')
98 | sharpText = self.getCompareIndex(chengLiSharpDic, fundName, compareName='夏普比率', formatFlag=False) + '\n'
99 | sharpText = '(4)投资效率来看,' + sharpText
100 |
101 | riskReturnText = riskReturnText + weekSuccessText + annualReturnText + riskText + sharpText
102 | totalText = gaiKuoTxt + riskReturnText
103 |
104 | indusrtyAndStyleText = '\n行业归因结果:\n'
105 | indusrtyRegressionDf = pd.read_excel(resultPath + "行业回归结果.xlsx", )
106 | bestIndustry = indusrtyRegressionDf.iloc[0].to_dict()
107 | industryText = "该产品拟合效果最好的行业为%s(%s),其回归后的拟合R方为%.2f%%" % (
108 | bestIndustry['指数名称'], bestIndustry['指数代码'], bestIndustry['拟合R方'] * 100) + '\n'
109 |
110 | tempDf = indusrtyRegressionDf[indusrtyRegressionDf['拟合R方'] >= 0.7]
111 | totalNum = 5 #最高的拟合行业数量
112 | if tempDf.empty:
113 | tempDf2 = indusrtyRegressionDf[indusrtyRegressionDf['拟合R方'] >= 0.1]
114 | if not tempDf2.empty:
115 | industryDetailTxt = "从行业回归结果来看,该产品无拟合效果较为优秀的行业指数(R方大于0.7)," \
116 | "这可能是用于产品运作过程中,持仓个股的行业变化较为频繁带来的,可结合进一步的持仓分析综合来看"
117 | else:
118 | industryDetailTxt = "从行业回归结果来看,该产品不存在具有一定相关性的行业指数(R方大于0.1)," \
119 | "这可能是用于产品运作过程中,持仓个股的行业变化极其频繁,行业分布极其分散,也可能是产品" \
120 | "有对冲市场beta风险的操作所带来的,可结合进一步的持仓分析,同时期的市场风险收益指标等综合来看"
121 | else:
122 | if tempDf.shape[0] > totalNum:
123 | codeList = tempDf.iloc[:totalNum]['指数代码'].tolist()
124 | codeNameList = tempDf.iloc[:totalNum]['指数名称'].tolist()
125 | else:
126 | codeList = tempDf['指数代码'].tolist()
127 | codeNameList = tempDf['指数名称'].tolist()
128 | strList = []
129 | for code, codeName in zip(codeList, codeNameList):
130 | strList.append(codeName + '(%s)' % code)
131 | industryDetailTxt = "从行业回归结果来看,该产品拟合效果较为优秀的行业指数(R方大于0.7)主要有%s," \
132 | "可对比该产品的投资类型给出判断。" % (','.join(strList))+'\n'
133 | indusrtyAndStyleText = indusrtyAndStyleText + industryText + industryDetailTxt
134 |
135 | indusrtyAndStyleText = indusrtyAndStyleText + '\n风格归因结果:\n'
136 | styleRegressionDf = pd.read_excel(resultPath + "风格回归结果.xlsx", )
137 | bestStyle = styleRegressionDf.iloc[0].to_dict()
138 | styleText = "该产品拟合效果最好的风格指数为%s(%s),其回归后的拟合R方为%.2f%%" % (
139 | bestStyle['指数名称'], bestStyle['指数代码'], bestStyle['拟合R方'] * 100) + '\n'
140 |
141 | tempStyleDf = styleRegressionDf[styleRegressionDf['拟合R方'] >= 0.7]
142 | totalStyleNum = 3 # 最高的拟合风格数量
143 | if tempStyleDf.empty:
144 | tempStyleDf2 = styleRegressionDf[styleRegressionDf['拟合R方'] >= 0.1]
145 | if not tempStyleDf2.empty:
146 | styleDetailTxt = "从风格回归结果来看,该产品无拟合效果较为优秀的风格指数(R方大于0.7," \
147 | "这可能是用于产品运作过程中,基金经理投资风格较为灵活,可结合进一步的持仓分析综合来看"
148 | else:
149 | styleDetailTxt ="从风格回归结果来看,该产品不存在具有一定相关性的风格指数(R方大于0.1," \
150 | "这可能是用于产品运作过程中,基金经理投资风格极其灵活多变,或采用了衍生品对冲系统风险带来的,可结合进一步的持仓分析综合来看"
151 | else:
152 | if tempStyleDf.shape[0] > totalStyleNum:
153 | codeStyleList = tempStyleDf.iloc[:totalNum]['指数代码'].tolist()
154 | codeStyleNameList = tempStyleDf.iloc[:totalNum]['指数名称'].tolist()
155 | else:
156 | codeStyleList = tempStyleDf['指数代码'].tolist()
157 | codeStyleNameList = tempStyleDf['指数名称'].tolist()
158 | strStyleList = []
159 | for code, codeName in zip(codeStyleList, codeStyleNameList):
160 | strStyleList.append(codeName + '(%s)' % code)
161 | styleDetailTxt = "从风格回归结果来看,该产品拟合效果较为优秀的风格指数(R方大于0.7)主要有%s," \
162 | "可对比该产品的投资风格给出判断。" % (','.join(strStyleList)) + '\n'
163 | indusrtyAndStyleText = indusrtyAndStyleText + styleText + styleDetailTxt
164 | totalText = totalText + indusrtyAndStyleText
165 | f = open(resultPath + "综合评价结论.txt", "w+")
166 | f.write(totalText)
167 | f.close()
168 |
169 | if __name__=="__main__":
170 | JudgeTextDemo = JudgeText()
171 | pass
172 |
--------------------------------------------------------------------------------
/MonthReportData/CalcHXBCorr.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 获取指数估值数据
5 | '''
6 |
7 | import pandas as pd
8 | from GetAndSaveWindData.MysqlCon import MysqlCon
9 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
10 | from datetime import datetime
11 | import os
12 |
13 |
14 | class CalcHXBCorr:
15 | def __init__(self):
16 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
17 |
18 | def get_Data(self):
19 | pass
20 |
21 | def get_main(self):
22 | df1 = pd.read_excel("被动指数产品.xlsx", index_col=0)
23 | dic_size = {}
24 | for fund_ma,tempdf in df1.groupby(by='基金管理人'):
25 | dic_size[fund_ma] = tempdf['基金规模'].sum()
26 | size_se = pd.Series(dic_size,name='基金公司管理规模').sort_values(ascending=False)
27 | dic_save_df = {}
28 | for manage_name in size_se.index.tolist():
29 | dic_save_df[manage_name]=df1.loc[df1['基金管理人']==manage_name]
30 |
31 | save_path = os.getcwd() + '\\基金公司管理产品概况.xlsx'
32 | writer = pd.ExcelWriter(save_path)
33 | for fund_name, save_df in dic_save_df.items():
34 | save_df.to_excel(writer, sheet_name=fund_name)
35 | writer.save()
36 |
37 |
38 | df = pd.read_excel("基金发行明细.xlsx", sheet_name='Sheet1', index_col=0)
39 | dic_df = {}
40 | for code in df.index.tolist():
41 | start_date = df.loc[code]['起始日'].strftime("%Y-%m-%d")
42 | end_date = df.loc[code]['结尾日'].strftime("%Y-%m-%d")
43 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code, code_style='fund', start_date=start_date,
44 | end_date=end_date, name_list=['acc_net_value'])
45 | temp_df.rename(columns={"acc_net_value": code}, inplace=True)
46 | temp_return_df = temp_df / temp_df.shift(1) - 1
47 | temp_return_df.dropna(inplace=True)
48 | dic_df[df.loc[code]['名称']] = temp_return_df
49 |
50 | min_date = df['起始日'].min().strftime("%Y-%m-%d")
51 | max_date = df['结尾日'].max().strftime("%Y-%m-%d")
52 | code_list2 = ['000300.SH', '000905.SH', '000852.SH', '000935.SH', '000933.SH', '000932.SH', '000936.CSI',
53 | '000934.SH', '000931.CSI', '000930.CSI','000929.CSI', '000937.CSI', '000928.SH'] #
54 | name_dic = {'000300.SH': '沪深300', '000905.SH': '中证500', '000852.SH': '中证1000', '000935.SH': '中证信息',
55 | '000933.SH': '中证医药', '000932.SH': '中证消费', '000936.CSI': '中证电信','000934.SH': '中证金融',
56 | '000930.CSI':'中证工业','000929.CSI':'中证材料','000937.CSI':'中证公用','000928.SH':'中证能源',
57 | '000931.CSI':'中证可选'}
58 |
59 | index_df_list = []
60 | for code in code_list2:
61 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code, code_style='index', start_date=min_date,
62 | end_date=max_date, )
63 | temp_df.rename(columns={"close_price": code}, inplace=True)
64 | index_df_list.append(temp_df)
65 | index_df = pd.concat(index_df_list, axis=1, sort=True)
66 | index_df.dropna(inplace=True)
67 | index_return_df = index_df / index_df.shift(1) - 1
68 | index_return_df.rename(columns=name_dic,inplace=True)
69 |
70 | df_list=[]
71 | for fund_name, fund_df in dic_df.items():
72 | start_corr_date = fund_df.index.tolist()[0]
73 | end_corr_date = fund_df.index.tolist()[-1]
74 | temp_index_df = index_return_df.loc[
75 | (index_return_df.index >= start_corr_date) & (index_return_df.index <= end_corr_date)]
76 | fund_index_df = pd.concat([fund_df, temp_index_df], axis=1, sort=True)
77 | corr_df = fund_index_df.corr()
78 | temp_Se = corr_df.iloc[0][1:]
79 | temp_corr_df = pd.DataFrame(temp_Se.values,columns=[fund_name],index=temp_Se.index.tolist())
80 | df_list.append(temp_corr_df)
81 | final_df = pd.concat(df_list,axis=1,sort=True).T
82 | final_df.to_excel("基金相关系数.xlsx")
83 |
84 |
85 | if __name__ == '__main__':
86 | CalcHXBCorrDemo = CalcHXBCorr()
87 | CalcHXBCorrDemo.get_main()
88 |
--------------------------------------------------------------------------------
/MonthReportData/FundEst.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 对本月的基金状况统计,输出ppt中所用基金表格数据
5 | '''
6 |
7 | import pandas as pd
8 | from datetime import datetime
9 | import mylog as mylog
10 | from GetAndSaveWindData.MysqlCon import MysqlCon
11 | import numpy as np
12 | from WindPy import w
13 | import os
14 |
15 |
16 | class FundEst:
17 | def __init__(self):
18 | self.file_loc = r"D:\\工作文件\\指数基金月报\\202011\\基金概况\\"
19 | self.ppt_loc = r"D:\\工作文件\\指数基金月报\\202011\\基金ppt概况\\"
20 | self.name_se = [
21 | '基金代码', '基金简称', '基金成立日', '基金规模(亿元)', '基金管理人', '跟踪指数', '近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)', '近3年(%)',
22 | '今年以来(%)', '近一年最大回撤', '夏普比率', '年化波动', '年化收益', ]
23 |
24 | def get_data(self):
25 | total_fund_file = os.listdir(self.file_loc)
26 | dic_df = {}
27 | for fund_file_name in total_fund_file:
28 | temp_df = pd.read_excel(self.file_loc + fund_file_name, index_col=0)
29 | temp_df['基金代码'] = temp_df.index
30 | dic_df[fund_file_name[:-5]] = temp_df
31 | return dic_df
32 |
33 | def calc_size_fund(self, temp_df):
34 | Hs300_df = temp_df[temp_df['跟踪指数代码'] == '000300.SH']
35 | temp_hs300_df = Hs300_df.sort_values(by='基金规模(亿元)', ascending=False, ).iloc[:25].sort_values(
36 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1)
37 | temp_hs300_df['跟踪指数'] = '沪深300'
38 | temp_hs300_df = temp_hs300_df[self.name_se]
39 | temp_hs300_df.to_excel(self.ppt_loc + "跟踪沪深300概况.xlsx", index=False)
40 |
41 | Zz500_df = temp_df[temp_df['跟踪指数代码'] == '000905.SH']
42 | temp_zz500_df = Zz500_df.sort_values(by='基金规模(亿元)', ascending=False, ).iloc[:15].sort_values(
43 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1)
44 | temp_zz500_df['跟踪指数'] = '中证500'
45 | temp_zz500_df = temp_zz500_df[self.name_se]
46 | temp_zz500_df.to_excel(self.ppt_loc + "跟踪中证500概况.xlsx", index=False)
47 |
48 | Sz50_df = temp_df[temp_df['跟踪指数代码'] == '000016.SH']
49 | temp_sz50_df = Sz50_df.sort_values(by='基金规模(亿元)', ascending=False, ).iloc[:3].sort_values(
50 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1)
51 | temp_sz50_df['跟踪指数'] = '上证50'
52 | temp_sz50_df = temp_sz50_df[self.name_se]
53 | temp_sz50_df.to_excel(self.ppt_loc + "跟踪上证50概况.xlsx", index=False)
54 |
55 | Cybz_df = temp_df[temp_df['跟踪指数代码'] == '399006.SZ']
56 | temp_cybz_df = Cybz_df.sort_values(by='基金规模(亿元)', ascending=False, ).iloc[:3].sort_values(
57 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1)
58 | temp_cybz_df['跟踪指数'] = '创业板指'
59 | temp_cybz_df = temp_cybz_df[self.name_se]
60 | temp_cybz_df.to_excel(self.ppt_loc + "跟踪创业板指概况.xlsx", index=False)
61 |
62 | other_code_dic = {'399330.SZ': '深证100', '000906.SH': '中证800', '000903.SH': "中证100", "399001.SZ": "深证成指",
63 | "000010.SH": "上证180", "000001.SH": "上证指数", "399005.SZ": "中小板指","000688.SH":"科创50"}
64 | df_list=[]
65 | for index_code, other_df in temp_df.groupby(by='跟踪指数代码'):
66 | target_df = other_df[other_df['基金规模(亿元)'] >= 3]
67 | if index_code in other_code_dic and not target_df.empty:
68 | temp_indexcode_df = target_df.sort_values(by='基金规模(亿元)', ascending=False, ).sort_values(
69 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1)
70 | temp_indexcode_df['跟踪指数'] = other_code_dic[index_code]
71 | df_list.append(temp_indexcode_df)
72 |
73 | if df_list:
74 | other_target_df = pd.concat(df_list,axis=0,sort=True)
75 | other_target_df = other_target_df[self.name_se]
76 | other_target_df.to_excel(self.ppt_loc + "跟踪其他指数概况.xlsx", index=False)
77 |
78 | def calc_topic_fund(self,temp_df):
79 | temp_topic_df =temp_df[temp_df['基金规模(亿元)'] >= 20]
80 | temp_topic_df.sort_values(by='基金规模(亿元)',inplace=True)
81 |
82 | df_list=[]
83 | for index_code,df in temp_topic_df.groupby(by='跟踪指数代码'):
84 | df_list.append(df.iloc[:1])
85 | total_df = pd.concat(df_list,axis=0,sort=True)
86 | total_df.sort_values(by='近1月(%)', ascending=False,inplace=True)
87 | total_df.rename(columns={"跟踪指数代码":"跟踪指数"},inplace=True)
88 | total_df = total_df[self.name_se]
89 | total_df.to_excel(self.ppt_loc + "跟踪主题指数概况.xlsx", index=False)
90 |
91 | def calc_indus_fund(self,temp_df):
92 | temp_indus_df = temp_df[temp_df['基金规模(亿元)'] >= 10]
93 | temp_indus_df.sort_values(by='基金规模(亿元)', inplace=True)
94 |
95 | df_list = []
96 | for index_code, df in temp_indus_df.groupby(by='跟踪指数代码'):
97 | df_list.append(df)
98 | total_df = pd.concat(df_list, axis=0, sort=True)
99 | total_df.rename(columns={"跟踪指数代码": "跟踪指数"}, inplace=True)
100 | total_df = total_df[self.name_se]
101 | total_df.to_excel(self.ppt_loc + "跟踪行业指数概况.xlsx", index=False)
102 |
103 | def calc_strate_fund(self,temp_df):
104 | temp_strate_df = temp_df[temp_df['基金规模(亿元)'] >= 5]
105 | temp_strate_df.sort_values(by='基金规模(亿元)', inplace=True)
106 |
107 | df_list = []
108 | for index_code, df in temp_strate_df.groupby(by='跟踪指数代码'):
109 | df_list.append(df)
110 | total_df = pd.concat(df_list, axis=0, sort=True)
111 | total_df.rename(columns={"跟踪指数代码": "跟踪指数"}, inplace=True)
112 | total_df = total_df[self.name_se]
113 | total_df.to_excel(self.ppt_loc + "跟踪策略指数概况.xlsx", index=False)
114 |
115 | def calc_style_fund(self,temp_df):
116 | temp_style_df = temp_df.sort_values(by='基金规模(亿元)')
117 |
118 | df_list = []
119 | for index_code, df in temp_style_df.groupby(by='跟踪指数代码'):
120 | df_list.append(df)
121 | total_df = pd.concat(df_list, axis=0, sort=True)
122 | total_df.rename(columns={"跟踪指数代码": "跟踪指数"}, inplace=True)
123 | total_df = total_df[self.name_se]
124 | total_df.to_excel(self.ppt_loc + "跟踪风格指数概况.xlsx", index=False)
125 |
126 | def get_main(self):
127 | dic_df = self.get_data()
128 | for file_name, temp_df in dic_df.items():
129 | if file_name.find('规模') != -1:
130 | self.calc_size_fund(temp_df)
131 | elif file_name.find('主题')!=-1:
132 | self.calc_topic_fund(temp_df)
133 | elif file_name.find('行业')!=-1:
134 | self.calc_indus_fund(temp_df)
135 | elif file_name.find('策略')!=-1:
136 | self.calc_strate_fund(temp_df)
137 | elif file_name.find('风格')!=-1:
138 | self.calc_style_fund(temp_df)
139 |
140 |
141 | if __name__ == '__main__':
142 | FundEstDemo = FundEst()
143 | FundEstDemo.get_main()
144 |
--------------------------------------------------------------------------------
/MonthReportData/FundImproveEst.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 对本月的增强型指数基金状况统计,输出ppt中所用基金表格数据
5 | '''
6 |
7 | import pandas as pd
8 | from datetime import datetime
9 | import mylog as mylog
10 | from GetAndSaveWindData.MysqlCon import MysqlCon
11 | import numpy as np
12 | from WindPy import w
13 | import os
14 | w.start()
15 |
16 | class FundImproveEst:
17 | def __init__(self):
18 | self.file_path=r'D:\\工作文件\\'
19 | self.file_loc = r"D:\\工作文件\\指数基金月报\\202011\\"
20 | self.file_target = r"D:\\工作文件\\指数基金月报\\202011\\基金ppt概况\\"
21 | self.name_se=[
22 | '基金代码', '基金简称', '基金成立日', '基金规模(亿元)', '基金管理人', '跟踪指数','近一月超额(%)', '近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)', '近3年(%)',
23 | '今年以来(%)', '近一年最大回撤', '夏普比率', '年化波动', '年化收益', ]
24 |
25 | def get_data(self):
26 | total_improve_df = pd.read_excel(r'D:\\工作文件\\增强指数基金11月.xlsx')
27 | name_dic = {"fund_setupdate": "基金成立日", "netasset_total": "基金规模(亿元)", "fund_trackerror_threshold": "年化跟踪误差(%)",
28 | "fund_corp_fundmanagementcompany": "基金管理人", "fund_trackindexcode": "跟踪指数代码",
29 | "nav": "单位净值", "return_1m": "近1月(%)", "return_3m": "近3月(%)", "return_ytd": "今年以来(%)",
30 | "return_1y": "近1年(%)", "risk_returnyearly": "年化收益", "risk_stdevyearly": "年化波动",
31 | "sec_name": "基金简称", "return_6m": "近6月(%)", "return_3y": "近3年(%)", "risk_sharpe": "夏普比率",
32 | "risk_maxdownside": "近一年最大回撤"}
33 | name_dic_reuslt = {key.upper(): values for key, values in name_dic.items()}
34 |
35 | total_code_list = total_improve_df['证券代码'].tolist()
36 | fields = "sec_name,fund_setupdate,netasset_total,fund_corp_fundmanagementcompany,fund_trackindexcode," \
37 | "return_1m,return_3m,return_6m,return_1y,return_3y,return_ytd,risk_sharpe,risk_maxdownside,risk_returnyearly,risk_stdevyearly"
38 | options_str = "unit=1;tradeDate=20201101;annualized=0;startDate=20191031;endDate=20201031;period=2;returnType=1;yield=1;riskFreeRate=1"
39 | wssdata = w.wss(codes=total_code_list, fields=fields, options=options_str)
40 | if wssdata.ErrorCode != 0:
41 | print("获取wind数据错误%s" % wssdata.ErrorCode)
42 | return
43 | resultDf = pd.DataFrame(wssdata.Data, index=wssdata.Fields, columns=wssdata.Codes).T
44 | resultDf.index.name = '基金代码'
45 | resultDf.rename(columns=name_dic_reuslt, inplace=True)
46 | resultDf['基金规模(亿元)'] = resultDf['基金规模(亿元)'] / 100000000
47 | resultDf.sort_values(by='基金规模(亿元)', ascending=False, inplace=True)
48 | resultDf.to_excel(self.file_loc + '11月增强型指数基金表现.xlsx')
49 | return resultDf
50 |
51 | def calc_detail_df(self,temp_df):
52 | bench_code_list = list(temp_df['跟踪指数代码'].unique())
53 | options_str = "startDate=20201001;endDate=20201031"
54 | wssdata = w.wss(codes=bench_code_list, fields=['pct_chg_per','sec_name'], options=options_str)
55 | if wssdata.ErrorCode != 0:
56 | print("获取wind数据错误%s" % wssdata.ErrorCode)
57 | return
58 | bench_return_df = pd.DataFrame(wssdata.Data, index=wssdata.Fields, columns=wssdata.Codes).T
59 | bench_return_df.rename(columns={'pct_chg_per'.upper():"跟踪指数近1月",'sec_name'.upper():"证券简称"},inplace=True)
60 | df_other_list= []
61 | for index_code,df in temp_df.groupby(by='跟踪指数代码'):
62 | if index_code=='000300.SH':
63 | df = df[df['基金规模(亿元)']>=2]
64 | save_str = '沪深300'
65 | elif index_code=='000905.SH':
66 | df = df[df['基金规模(亿元)'] >= 2]
67 | save_str = '中证500'
68 | else:
69 | df = df[df['基金规模(亿元)'] >= 2]
70 | index_code = index_code.upper()
71 | df['近一月超额(%)'] = df['近1月(%)'] - bench_return_df.loc[index_code]['跟踪指数近1月']
72 | df['跟踪指数'] = bench_return_df.loc[index_code]['证券简称']
73 | temp_final_df = df.sort_values(by='近1月(%)').drop('跟踪指数代码',axis=1)
74 | temp_final_df['基金代码']= temp_final_df.index
75 | temp_final_df = temp_final_df[self.name_se]
76 | if index_code in ['000300.SH','000905.SH']:
77 | temp_final_df.to_excel(self.file_target+"增强产品%s概况.xlsx"%save_str,index=False)
78 | else:
79 | df_other_list.append(temp_final_df)
80 |
81 | if df_other_list:
82 | total_other_df = pd.concat(df_other_list,axis=0,sort=True)
83 | total_other_df = total_other_df[self.name_se]
84 | total_other_df.to_excel(self.file_target + "增强产品其他指数概况.xlsx",index=False)
85 |
86 |
87 | def get_main(self):
88 | total_df = self.get_data()
89 | self.calc_detail_df(total_df)
90 |
91 | if __name__=='__main__':
92 | FundImproveEstDemo = FundImproveEst()
93 | FundImproveEstDemo.get_main()
--------------------------------------------------------------------------------
/MonthReportData/GetIndexEst.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 获取指数估值与ppt图表数据
5 | '''
6 |
7 | import pandas as pd
8 | from GetAndSaveWindData.MysqlCon import MysqlCon
9 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
10 |
11 | import numpy as np
12 | from datetime import datetime, timedelta
13 | import os
14 | import matplotlib
15 | import matplotlib.pyplot as plt
16 | import statsmodels.api as sm
17 | from WindPy import w
18 | from MonthReportData.GetTableData import GetTableData
19 |
20 | matplotlib.rcParams['font.sans-serif'] = ['SimHei']
21 | matplotlib.rcParams['font.family'] = 'sans-serif'
22 | matplotlib.rcParams['axes.unicode_minus'] = False
23 |
24 |
25 | class GetIndexEst:
26 | def __init__(self):
27 | self.start_date = (datetime.today() - timedelta(days=365 * 10)).strftime("%Y-%m-%d")
28 | # self.end_date = datetime.today().strftime("%Y-%m-%d")
29 | self.end_date = '2020-08-31'
30 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
31 | self.file_path = os.getcwd() + '\\GetDataResult\\估值\\'
32 | self.file_month_path = os.getcwd() + '\\GetDataResult\\月度表现\\'
33 |
34 | def get_plot_figure(self, dic_df):
35 | for code, df in dic_df.items():
36 | temp_fig = plt.figure(figsize=(16, 9))
37 | temp_ax = temp_fig.add_subplot(111)
38 | df.plot(ax=temp_ax)
39 | temp_ax.grid()
40 | # temp_ax.set_title(u'%sPE走势' % code)
41 | plt.savefig(self.file_path + '%s估值走势图.png' % code)
42 | # plt.show()
43 |
44 | def get_regression(self, index_code_list, bench_code_list, total_return_df):
45 |
46 | for index_code in index_code_list:
47 | list_r2, list_beta, list_tr, list_const = [], [], [], []
48 | Y = total_return_df[index_code].values
49 | for bench_code in bench_code_list:
50 | x = total_return_df[bench_code].values
51 | x = x.reshape(len(x), 1)
52 | c = np.ones((len(x), 1))
53 | X = np.hstack((c, x))
54 | res = (sm.OLS(Y, X)).fit()
55 | list_r2.append(res.rsquared)
56 | list_beta.append(res.params[1])
57 | list_const.append(res.params[0])
58 |
59 | res_indus = pd.DataFrame([])
60 | res_indus['指数代码'] = bench_code_list
61 |
62 | res_indus['拟合R方'] = list_r2
63 |
64 | res_indus['beta'] = list_beta
65 | res_indus['alpha'] = list_const
66 | res_indus = res_indus.sort_values('拟合R方', ascending=False)
67 | res_indus.to_excel(self.file_path + '%s风格指数回归结果.xlsx' % index_code, index=False)
68 |
69 | maxR2Code = res_indus['指数代码'].tolist()[0]
70 | x = total_return_df[maxR2Code].values
71 | maxR2Alpha = res_indus['alpha'].tolist()[0]
72 | maxR2Beta = res_indus['beta'].tolist()[0]
73 |
74 | plt.style.use('ggplot')
75 | plt.figure(figsize=(16, 9))
76 | plt.scatter(x, Y, s=30, color='blue', label='样本实例')
77 | plt.plot(x, maxR2Alpha + maxR2Beta * x, linewidth=3, color='red', label='回归线')
78 | plt.ylabel('宽基指数超额收益')
79 | plt.xlabel('风格指数超额收益')
80 | # plt.title('%s拟合效果最好的风格指数:'%index_code +maxR2Code, fontsize=13,
81 | # bbox={'facecolor': '0.8', 'pad': 5})
82 | plt.grid(True)
83 | plt.legend(loc='upper left') # 添加图例
84 | plt.savefig(self.file_path + '%s拟合风格指数效果图.png' % index_code)
85 | # plt.show()
86 |
87 | plt.style.use('ggplot')
88 | fig = plt.figure(figsize=(16, 9))
89 | ax = fig.add_subplot(111)
90 | indeustryAccDf = (1 + total_return_df[[index_code, maxR2Code]]).cumprod()
91 | indeustryAccDf['指数收益比'] = indeustryAccDf[index_code] / indeustryAccDf[maxR2Code]
92 | indeustryAccDf.plot(ax=ax)
93 | ax.set_ylabel('累计收益率')
94 | ax.set_xlabel('时间')
95 | # ax.set_title('%s拟合效果最好的风格指数:'%index_code + maxR2Code, fontsize=13,
96 | # bbox={'facecolor': '0.8', 'pad': 5})
97 | ax.grid(True)
98 | ax.legend(loc='down right') # 添加图例
99 | plt.savefig(self.file_path + '%s拟合风格指数累计走势对比图.png' % index_code)
100 |
101 | def get_index_regress(self, index_code_list):
102 | bench_code_list = ['399314.SZ', '399315.SZ', '399316.SZ']
103 | df_list = []
104 | for code in bench_code_list:
105 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code=code, start_date=self.start_date,
106 | end_date=self.end_date, code_style='index')
107 | temp_df.rename(columns={'close_price': code}, inplace=True)
108 | df_list.append(temp_df)
109 | bench_df = pd.concat(df_list, axis=1, sort=True)
110 |
111 | df_list2 = []
112 | for code in index_code_list:
113 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code=code, start_date=self.start_date,
114 | end_date=self.end_date, code_style='index')
115 | temp_df.rename(columns={'close_price': code}, inplace=True)
116 | df_list2.append(temp_df)
117 | index_df = pd.concat(df_list2, axis=1, sort=True)
118 |
119 | total_df = pd.concat([index_df, bench_df], axis=1, sort=True)
120 | total_return_df = total_df / total_df.shift(1) - 1
121 | total_return_df.dropna(inplace=True)
122 | total_return_df.corr().to_excel(self.file_path + '相关系数.xlsx')
123 |
124 | self.get_regression(index_code_list, bench_code_list, total_return_df)
125 |
126 | def get_index_consit(self, index_code='000913.SH', weight=1):
127 | temp_df = self.GetDataTotalMainDemo.get_index_constituent(indexCode=index_code)
128 | wss_data = w.wss(codes=temp_df['stock_code'].tolist(),
129 | fields=["industry_sw", "mkt_cap_ard", "roe_ttm", "yoyprofit", "dividendyield"],
130 | options="industryType=1;unit=1;tradeDate=20200823;rptDate=20191231;rptYear=2019")
131 | code_ind_df = pd.DataFrame(wss_data.Data, index=wss_data.Fields, columns=wss_data.Codes).T
132 | name_dic = {"industry_sw".upper(): "申万一级行业", "mkt_cap_ard".upper(): "总市值",
133 | "dividendyield".upper(): "股息率(2019年)",
134 | "yoyprofit".upper(): "净利润同比增长率", "roe_ttm".upper(): "ROE"}
135 | code_ind_df.rename(columns=name_dic, inplace=True)
136 | try:
137 | use_df = temp_df[['stock_code', 'stock_weight', 'stock_name']].set_index('stock_code')
138 | except:
139 | a = 0
140 |
141 | stock_result_df = pd.concat([use_df, code_ind_df], sort=True, axis=1)
142 | df = pd.concat([use_df, code_ind_df], axis=1, sort=True)
143 | dic_ind_weight = {}
144 | for ind, stock_df in df.groupby('申万一级行业'):
145 | dic_ind_weight[ind] = stock_df['stock_weight'].sum() * weight / 100
146 | return dic_ind_weight, stock_result_df
147 |
148 | def calc_stock_weight(self, dic_stock_weight, index_se):
149 | for index_code, temp_df in dic_stock_weight.items():
150 | temp_df['port_stock_weight'] = temp_df['stock_weight'] * index_se[index_code]
151 | total_stock_df = pd.concat(list(dic_stock_weight.values()), axis=0, sort=True)
152 | total_stock_df['stock_code_label'] = total_stock_df.index.tolist()
153 | df_list = []
154 | for code, temp_stock_df in total_stock_df.groupby(by='stock_code_label'):
155 | if temp_stock_df.shape[0] > 1:
156 | target_df = temp_stock_df.iloc[0]
157 | target_df['port_stock_weight'] = temp_stock_df['port_stock_weight'].sum()
158 | target_df = pd.DataFrame(target_df).T
159 | df_list.append(target_df)
160 | else:
161 | df_list.append(temp_stock_df)
162 | total_stock_result = pd.concat(df_list, axis=0, sort=True).sort_values(by='port_stock_weight', ascending=False)
163 | name_dic = {'port_stock_weight': '权重', 'stock_name': '简称'}
164 | total_stock_result.rename(columns=name_dic).to_excel("股票持仓数据.xlsx")
165 |
166 | def get_port_weight(self, index_code_list=[], weight_list=[]):
167 | temp_se = pd.Series(weight_list, index=index_code_list)
168 | port_df_list = []
169 | dic_stock_weight = {}
170 | for index_code in index_code_list:
171 | dic_ind_weight, stock_weight_df = self.get_index_consit(index_code, weight=temp_se[index_code])
172 | dic_stock_weight[index_code] = stock_weight_df
173 | ind_weight_se = pd.Series(dic_ind_weight, name=index_code)
174 | port_df_list.append(ind_weight_se)
175 |
176 | self.calc_stock_weight(dic_stock_weight, temp_se)
177 | total_ind = pd.concat(port_df_list, axis=1, sort=True).sum(axis=1)
178 | total_ind.name = '组合行业暴露'
179 |
180 | bench_code_list = ['000300.SH', '000905.SH']
181 | bench_code_df_list = []
182 | for bench_code in bench_code_list:
183 | dic_bench_weight, _ = self.get_index_consit(bench_code)
184 | bench_weight_se = pd.Series(dic_bench_weight, name=bench_code)
185 | bench_code_df_list.append(bench_weight_se)
186 | bench_code_df = pd.concat(bench_code_df_list, axis=1, sort=True).rename(
187 | columns={'000300.SH': '沪深300', '000905.SH': "中证500"})
188 | total_df = pd.concat([total_ind, bench_code_df], axis=1, sort=True).fillna(0)
189 | total_df['相对沪深300'] = total_df['组合行业暴露'] - total_df['沪深300']
190 | total_df['相对中证500'] = total_df['组合行业暴露'] - total_df['中证500']
191 | total_df.to_excel("主题OTC组合暴露.xlsx")
192 |
193 | def get_init_param(self):
194 | code_list1 = ['399006.SZ', '399005.SZ', '000852.SH', '399001.SZ', '000905.SH', '000300.SH', '000001.SH',
195 | '000016.SH'] # 宽基
196 | code_list2 = ['000935.SH', '000933.SH', '000932.SH', '000936.CSI', '000934.SH', '000931.CSI', '000930.CSI',
197 | '000929.CSI', '000937.CSI', '000928.SH'] # 行业
198 | code_list3 = ['990001.CSI', '980017.CNI', '399803.SZ', '399973.SZ', '399441.SZ', '931066.CSI', '931087.CSI',
199 | '000941.CSI', 'H30318.CSI', '931079.CSI', '931071.CSI', '399997.SZ', '399976.SZ', '399362.SZ',
200 | 'H30533.CSI', '399812.SZ', '399974.SZ', '000860.CSI', '000861.CSI', '000859.CSI',
201 | '000015.SH'] # 主题
202 | code_list4 = ['399673.SZ', '399293.SZ', '399296.SZ', '399295.SZ', '930758.CSI', '399983.SZ', '000984.SH',
203 | '000971.SH', '000982.SH', '399990.SZ', '399702.SZ', '000050.SH', '931052.CSI', '930838.CSI',
204 | 'H30269.CSI', '000925.CSI'] # 策略
205 | code_list5 = ['399377.SZ', '399348.SZ', '399919.SZ', '000029.SH'] # 风格
206 | dic_index = {}
207 | dic_index['宽基'] = code_list1
208 | dic_index['行业'] = code_list2
209 | dic_index['主题'] = code_list3
210 | dic_index['策略'] = code_list4
211 | dic_index['风格'] = code_list5
212 | return dic_index
213 |
214 | def get_main(self, ):
215 | dic_index_param = self.get_init_param()
216 | for index_type, index_code_list in dic_index_param.items():
217 | GetTableDataDemo = GetTableData()
218 | total_df = GetTableDataDemo.get_data(code_list=index_code_list, index_type=index_type)
219 |
220 | dic_df = {}
221 | dic_PE = {}
222 | for code in index_code_list:
223 | df = self.GetDataTotalMainDemo.get_hq_data(code=code, start_date=self.start_date,
224 | end_date=self.end_date, code_style='index_daily',
225 | dic_param={'fields': 'pe_ttm', 'filed_name': 'PE值'})
226 | df.rename(columns={'update_time': '时间', 'factor_value': "PE_TTM"}, inplace=True)
227 | last_value = df['PE_TTM'][-1]
228 | percent_num = (df['PE_TTM'].sort_values().tolist().index(last_value) + 1) / df.shape[0]
229 | print('%s当前估值分位数%s' % (code, round(percent_num, 4)))
230 | df.to_excel(self.file_path + '估值%s.xlsx' % code[:6])
231 | dic_df[total_df.loc[code]['证券简称']] = df
232 | dic_PE[code] = {'PE': last_value, 'PE分位数': percent_num}
233 | pe_df = pd.DataFrame(dic_PE).T
234 | total_last_df = pd.concat([total_df, pe_df], axis=1, sort=True)
235 |
236 | name_list = ['证券代码','证券简称', '近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)', '近3年(%)', '今年以来(%)', '近一年最大回撤(%)', 'Sharp比率',
237 | '年化波动(%)', '年化收益(%)', '月度成交额变化(%)', '月度换手率变化(%)', 'PE', 'PE分位数']
238 | total_last_df.to_excel(self.file_month_path + '%s指数月度表现.xlsx' % index_type, index=False)
239 |
240 | self.get_plot_figure(dic_df)
241 |
242 |
243 |
244 | if __name__ == '__main__':
245 | GetIndexEstDemo = GetIndexEst()
246 | GetIndexEstDemo.get_main()
247 | # GetIndexEstDemo.get_index_consit()
248 | # GetIndexEstDemo.get_index_regress(code_list1)
249 | # ind_code_list = ['000913.SH', '000932.SH', '000988.CSI', '399986.SZ', '399995.SZ', '931008.CSI', '931009.CSI',
250 | # '000806.CSI'] #行业OTC
251 | # ind_weight_list = [0.038, 0.3, 0.214, 0.114, 0.06, 0.063, 0.135, 0.075]
252 |
253 | # zt_code_list = ['399814.SZ','930653.CSI','930743.CSI','930875.CSI','930914.CSI','h11136.CSI'] #主题OTC
254 | # zt_weight_list = [0.3089,0.0424,0.0605,0.2298,0.3089,0.0495]
255 | # GetIndexEstDemo.get_port_weight(index_code_list=ind_code_list, weight_list=ind_weight_list)
256 |
--------------------------------------------------------------------------------
/MonthReportData/GetTableData.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 获取月报指数表格数据
5 | '''
6 |
7 | import pandas as pd
8 | from GetAndSaveWindData.MysqlCon import MysqlCon
9 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
10 |
11 | import numpy as np
12 | from datetime import datetime, timedelta
13 | import os
14 | import matplotlib
15 | import matplotlib.pyplot as plt
16 | import statsmodels.api as sm
17 | from WindPy import w
18 |
19 | w.start()
20 |
21 |
22 | class GetTableData:
23 | def __init__(self):
24 | dic_total_param = {}
25 | dic_param = {}
26 | dic_param['近1月'] = {'start_date': '2020-10-01', 'end_date': '2020-10-31'}
27 | dic_param['近3月'] = {'start_date': '2020-08-01', 'end_date': '2020-10-31'}
28 | dic_param['近6月'] = {'start_date': '2020-05-01', 'end_date': '2020-10-31'}
29 | dic_param['近1年'] = {'start_date': '2019-11-01', 'end_date': '2020-10-31'}
30 | dic_param['近3年'] = {'start_date': '2017-11-01', 'end_date': '2020-10-31'}
31 | dic_param['今年以来'] = {'start_date': '2020-01-01', 'end_date': '2020-10-31'}
32 | dic_total_param['区间'] = dic_param
33 | dic_total_param['年度其他'] = {'start_date': '2019-11-01', 'end_date': '2020-10-31'}
34 | dic_total_param['上月'] = {'start_date': '2020-09-01', 'end_date': '2020-09-30'}
35 | self.dic_total_param = dic_total_param
36 |
37 |
38 | def get_data(self, code_list=[], end_date='2020-09-30',index_type='宽基',pe_df=pd.DataFrame()):
39 | #获取证券名称
40 | wss_name_data = w.wss(codes=code_list, fields=['sec_name', ])
41 | name_df = pd.DataFrame(wss_name_data.Data, index=wss_name_data.Fields, columns=wss_name_data.Codes).T
42 | name_df.rename(columns={'sec_name'.upper(): '证券简称'}, inplace=True)
43 |
44 | #获取区间涨跌
45 | df_list = []
46 | for param_name, param_dic in self.dic_total_param['区间'].items():
47 | startDate = ('').join(param_dic['start_date'].split('-'))
48 | endDate = ('').join(param_dic['end_date'].split('-'))
49 | options = "startDate=%s;endDate=%s" % (startDate, endDate)
50 | wss_data = w.wss(codes=code_list, fields=['pct_chg_per', ], options=options)
51 | if wss_data.ErrorCode != 0:
52 | print("wind获区间涨跌数据有误,错误代码" + str(wss_data.ErrorCode))
53 | return pd.DataFrame()
54 | resultDf = pd.DataFrame(wss_data.Data, index=wss_data.Fields, columns=wss_data.Codes).T
55 | resultDf.rename(columns={'pct_chg_per'.upper(): param_name+'(%)', }, inplace=True)
56 | df_list.append(resultDf)
57 | total_data_df = pd.concat(df_list, axis=1, sort=True)
58 | total_df = pd.concat([total_data_df, name_df], axis=1, sort=True)
59 |
60 | #获取年度回撤,sharp,收益,波动等
61 | startDate = ('').join(self.dic_total_param['年度其他']['start_date'].split('-'))
62 | endDate = ('').join(self.dic_total_param['年度其他']['end_date'].split('-'))
63 | options = "startDate=%s;endDate=%s;period=2;returnType=1;riskFreeRate=1" % (startDate, endDate)
64 | wss_data = w.wss(codes=code_list,
65 | fields=['risk_maxdownside', 'risk_sharpe', 'risk_stdevyearly', 'risk_returnyearly_index',],
66 | options=options)
67 | if wss_data.ErrorCode != 0:
68 | print("wind获年度其他数据有误,错误代码" + str(wss_data.ErrorCode))
69 | return pd.DataFrame()
70 | risk_index_df = pd.DataFrame(wss_data.Data, index=wss_data.Fields, columns=wss_data.Codes).T
71 | risk_index_df.rename(columns={'risk_maxdownside'.upper(): '近一年最大回撤(%)', 'risk_sharpe'.upper(): 'Sharp比率',
72 | 'risk_stdevyearly'.upper(): '年化波动(%)', 'risk_returnyearly_index'.upper(): '年化收益(%)'},inplace=True)
73 | total_final_df = pd.concat([total_df,risk_index_df],axis=1,sort=True)
74 |
75 | #获取月度成交额,换手率变化
76 | startDate = ('').join(self.dic_total_param['区间']['近1月']['start_date'].split('-'))
77 | endDate = ('').join(self.dic_total_param['区间']['近1月']['end_date'].split('-'))
78 | options = "unit=1;startDate=%s;endDate=%s" % (startDate, endDate)
79 |
80 | last_startDate = ('').join(self.dic_total_param['上月']['start_date'].split('-'))
81 | last_endDate = ('').join(self.dic_total_param['上月']['end_date'].split('-'))
82 | last_options = "unit=1;startDate=%s;endDate=%s" %(last_startDate,last_endDate)
83 | this_month = w.wss(codes=code_list, fields=["amt_per","turn_per"], options=options)
84 | this_month_df = pd.DataFrame(this_month.Data, index=this_month.Fields, columns=this_month.Codes).T
85 |
86 | last_month = w.wss(codes=code_list, fields=["amt_per","turn_per"], options=last_options)
87 | last_month_df = pd.DataFrame(last_month.Data, index=last_month.Fields, columns=last_month.Codes).T
88 | change_df = (this_month_df/last_month_df-1)*100
89 | change_df.rename(columns={'amt_per'.upper():'月度成交额变化(%)',"turn_per".upper():"月度换手率变化(%)"},inplace=True)
90 | total_last_df = pd.concat([total_final_df,change_df],axis=1,sort=True)
91 | total_last_df['证券代码'] = total_last_df.index
92 | return total_last_df
93 |
94 |
95 |
96 | if __name__ == '__main__':
97 | GetTableDataDemo = GetTableData()
98 | GetTableDataDemo.get_data(code_list=['000016.SH', '000300.SH'])
99 |
--------------------------------------------------------------------------------
/MonthReportData/TaoLiJudge.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 获取指数估值数据
5 | '''
6 |
7 | import pandas as pd
8 | from GetAndSaveWindData.MysqlCon import MysqlCon
9 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
10 | from datetime import datetime, timedelta
11 | import os
12 | from WindPy import w
13 |
14 |
15 | class TalLiJudge:
16 | def __init__(self):
17 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind')
18 |
19 | def get_data(self):
20 | target_df = pd.read_excel("基金发行明细.xlsx", sheet_name='Sheet1', )
21 | total_df = pd.read_excel("主动权益类基金.xlsx")
22 | dic_df = {}
23 | for name in target_df['基金经理'].tolist():
24 | name_list = name.split('、')
25 | if len(name_list) == 1:
26 | dic_df[name] = total_df[total_df['基金经理'] == name]
27 | else:
28 | for name in name_list:
29 | temp_df_list = []
30 | for target_name in total_df['基金经理'].tolist():
31 | if target_name.find(name) != -1:
32 | temp_df_list.append(total_df[total_df['基金经理'] == target_name])
33 | temp_df = pd.concat(temp_df_list, axis=0, sort=True)
34 | dic_df[name] = temp_df
35 | return dic_df
36 |
37 | def get_calc_result(self, dic_df):
38 | dic_name_df = {}
39 | dic_name_corr_df = {}
40 | dic_name_poc_df = {}
41 | for name, fund_df in dic_df.items():
42 | temp_df_list = []
43 | for num in range(fund_df.shape[0]):
44 | code = fund_df.iloc[num]['证券代码']
45 | se_name = fund_df.iloc[num]['证券简称']
46 | start_date = fund_df.iloc[num]['任职日期'].strftime("%Y-%m-%d")
47 | end_date = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
48 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code=code, start_date=start_date, end_date=end_date,
49 | code_style='fund', name_list=['net_value_adj'])
50 | temp_df.rename(columns={'net_value_adj': se_name}, inplace=True)
51 | temp_return_df = temp_df / temp_df.shift(1) - 1
52 | temp_return_df.dropna(inplace=True)
53 | temp_df_list.append(temp_return_df)
54 | temp_total_df = pd.concat(temp_df_list, axis=1, sort=True)
55 | temp_total_df.dropna(axis=1, how='all', inplace=True)
56 | dic_name_df[name] = temp_total_df
57 | dic_name_corr_df[name] = temp_total_df.corr()
58 | fields = ["prt_stockvalue_topindustryname2", "prt_stockvalue_topindustrytonav2",
59 | "prt_stockvalue_topindustrytostock2","sec_name"]
60 | name_dic = {"prt_stockvalue_topindustryname2".upper(): "重仓行业名称",
61 | "prt_stockvalue_topindustrytonav2".upper(): "重仓行业市值占基金资产净值比",
62 | "prt_stockvalue_topindustrytostock2".upper(): "重仓行业市值占股票投资市值比",
63 | "sec_name".upper(): "证券简称"}
64 | poc_df_list = []
65 | for order in range(1,6):
66 | wss_data = w.wss(codes=fund_df['证券代码'].tolist(),fields=fields,options="rptDate=20200630;order=%s"%str(order))
67 | if wss_data.ErrorCode != 0:
68 | print("wind获取因子数据有误,错误代码" + str(wss_data.ErrorCode))
69 | continue
70 | resultDf = pd.DataFrame(wss_data.Data, index=wss_data.Fields, columns=wss_data.Codes).T
71 | resultDf.rename(columns=name_dic,inplace=True)
72 | resultDf['重仓行业排名']=order
73 | poc_df_list.append(resultDf)
74 |
75 | if poc_df_list:
76 | temp_total_poc = pd.concat(poc_df_list,axis=0,sort=True)
77 | dic_name_poc_df[name] = temp_total_poc
78 | save_path = os.getcwd() + '\\HXBFundManager\\基金经理管理产品相关性.xlsx'
79 | poc_save_path = os.getcwd() + '\\HXBFundManager\\基金经理重仓行业概况.xlsx'
80 | writer = pd.ExcelWriter(save_path)
81 | for fund_name, corr_df in dic_name_corr_df.items():
82 | corr_df.to_excel(writer, sheet_name=fund_name)
83 | writer.save()
84 |
85 | writer2 = pd.ExcelWriter(poc_save_path)
86 | for fund_name, poc_df in dic_name_poc_df.items():
87 | poc_df.to_excel(writer2, sheet_name=fund_name)
88 | writer2.save()
89 |
90 | def get_main(self):
91 | dic_df = self.get_data()
92 | self.get_calc_result(dic_df)
93 |
94 |
95 | if __name__ == '__main__':
96 | TalLiJudgeDemo = TalLiJudge()
97 | TalLiJudgeDemo.get_main()
98 |
--------------------------------------------------------------------------------
/PrintInfo.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | '''
4 | 日志信息打印
5 | '''
6 |
7 | from datetime import datetime
8 |
9 | class PrintInfo:
10 | def __init__(self):
11 | pass
12 |
13 | def PrintLog(self,infostr,otherInfo=''):
14 | currentTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
15 | if isinstance(otherInfo,str):
16 | if not otherInfo:
17 | print(currentTime + '[INFO]: '+infostr)
18 | else:
19 | print(currentTime+ '[INFO]: '+infostr,otherInfo)
20 | else:
21 | print(currentTime + '[INFO]: ' + infostr, otherInfo)
22 |
23 |
24 | if __name__ == '__main__':
25 | PrintInfoDemo = PrintInfo()
26 | PrintInfoDemo.PrintLog('日期信息打印测试')
27 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # EstimateValueData
2 | ## 基金估值表,深度分析
3 | (1)公募基金和宽基指数数据从本地数据库获取,本地数据库缺失时,从wind的api中获取,获取后先存入本地数据库,再从数据库返回;
4 | (2)私募基金净值数据,从本地数据库获取,数据库不存在时,打印错误日志信息
5 | (3)估值表数据仅限托管在浙商证券的私募基金
6 | ## 标准化输出基金(公募,私募等净值类产品)的深度分析结果。
7 | ## 基于净值类的各风险收益统计指标,风格回归分析,行业回归分析,蒙特卡洛走势预测等。
8 | ## 。。。。。。
9 |
--------------------------------------------------------------------------------
/StockFactorCalc.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | import pandas as pd
3 | import numpy as np
4 |
5 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain
6 |
7 |
8 | class StockFactorCalcl:
9 | def __init__(self):
10 | self.GEtDataTotaMainDemo = GetDataTotalMain(data_resource='wind')
11 |
12 |
13 | def get_main(self):
14 | pass
15 |
16 |
17 | def get_history_data(self):
18 | pass
19 |
20 | def get_wash_data(self):
21 | pass
22 |
23 |
24 | if __name__=='_main__':
25 | StockFactorCalclDemo = StockFactorCalcl()
26 | StockFactorCalclDemo.get_main()
--------------------------------------------------------------------------------
/mylog.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # Author:zouhao
3 | # email:1084848158@qq.com
4 |
5 | # 日志
6 | import logging
7 | # import coloredlogs
8 | # coloredlogs.install()
9 |
10 | def set_log(file_name=''):
11 | logger = logging.getLogger()
12 | logger.setLevel(logging.INFO)
13 | ch = logging.StreamHandler()
14 | ch.setLevel(logging.INFO)
15 |
16 | formatter = logging.Formatter("%(asctime)s %(filename)s:%(levelname)s:%(message)s",datefmt="%d-%M-%Y %H:%M:%S")
17 | ch.setFormatter(formatter)
18 | logger.addHandler(ch)
19 |
20 | if file_name:
21 | file_handler = logging.FileHandler('%s.log'%file_name)
22 | file_handler.setLevel(level=logging.INFO)
23 | file_handler.setFormatter(formatter)
24 | logger.addHandler(file_handler)
25 | return logger
--------------------------------------------------------------------------------
/windDemo.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 |
3 | from WindPy import w
4 |
5 | w.start();
6 | # 定义打印输出函数,用来展示数据使用
7 | def printpy(outdata):
8 | if outdata.ErrorCode!=0:
9 | print('error code:'+str(outdata.ErrorCode)+'\n');
10 | return();
11 | for i in range(0,len(outdata.Data[0])):
12 | strTemp=''
13 | if len(outdata.Times)>1:
14 | strTemp=str(outdata.Times[i])+' '
15 | for k in range(0, len(outdata.Fields)):
16 | strTemp=strTemp+str(outdata.Data[k][i])+' '
17 | print(strTemp)
18 |
19 | print('\n\n'+'-----通过wsd来提取时间序列数据,比如取开高低收成交量,成交额数据-----'+'\n')
20 | # wsddata1=w.wsd("000001.SZ", "open,high,low,close,volume,amt", "2015-11-22", "2015-12-22", "Fill=Previous")
21 | wsddata1 = w.wset('indexconstituent',options='date=2018-09-11;windcode=%s'%'000300.SZ')
22 | printpy(wsddata1)
--------------------------------------------------------------------------------