├── .gitignore ├── .idea ├── EstimateValueData.iml ├── dataSources.xml ├── dataSources │ └── b8058ac0-c931-4b7b-a6b7-1a4aff69d5fa.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── BrisionAnys.py ├── CalcEstimateValue.py ├── CalcRegression.py ├── CalcRiskReturn.py ├── DataToMySql ├── DataDic.py ├── GetDataToMysql.py ├── MysqlCon.py └── mysql.conf ├── DateFormatDf.py ├── EstimateValue.py ├── FamaFrenchRegression.py ├── GetAndSaveWindData ├── ClientWindIfindInit.py ├── GetDataFromWindAndMySql.py ├── GetDataFromWindNNotMysql.py ├── GetDataToMysql.py ├── GetDataTotalMain.py ├── GetFundFinanceReportData.py ├── GetindexName.py └── MysqlCon.py ├── GetDataFromWindAndMySql.py ├── GetExcelData.py ├── GetFinanceReportData.py ├── GetWindDataToMySql.py ├── JudgeFund ├── CalcJudgeFund.py ├── JudgeAndGetFund.py ├── JudgeFundDC.py ├── JudgeFundImproveBase.py ├── JudgeFundIndexImprove.py ├── JudgeFundMain.py ├── JudgeFundQDII.py ├── JudgeFundTopic.py └── YunFeiCalc.py ├── JudgeText.py ├── MonthReportData ├── CalcHXBCorr.py ├── FundEst.py ├── FundImproveEst.py ├── GetIndexEst.py ├── GetTableData.py └── TaoLiJudge.py ├── PrintInfo.py ├── README.md ├── StockFactorCalc.py ├── mylog.py └── windDemo.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.xlsx 3 | *.xls 4 | *.txt 5 | *.png -------------------------------------------------------------------------------- /.idea/EstimateValueData.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | -------------------------------------------------------------------------------- /.idea/dataSources.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | mysql 6 | true 7 | com.mysql.jdbc.Driver 8 | jdbc:mysql://localhost:3306/fund_data 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /BrisionAnys.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 多期Brisio业绩归因分析 5 | ''' 6 | 7 | import pandas as pd 8 | import numpy as np 9 | from WindPy import w 10 | import matplotlib.pyplot as plt 11 | import matplotlib 12 | 13 | 14 | class BrisionAnys: 15 | def __init__(self, dicStockDf): 16 | self.benchMark = '000300.SH' # 基准指数 17 | self.dicStockDf = self.WashData(dicStockDf) 18 | self.myfont = matplotlib.font_manager.FontProperties(fname=r'C:/Windows/Fonts/simkai.ttf') 19 | 20 | # self.indusFieds = ['INDUSTRY_GICS', 'INDUSTRY_GICSCODE'] #wind行业名称和代码 21 | # self.indusFieds = ['INDUSTRY_CSRCCODE12', 'INDUSTRY_CSRC12'] #证监会行业名称和代码 22 | # self.indusFieds = ['INDUSTRY_SW', 'INDUSTRY_SWCODE'] # 申万行业名称和代码 23 | self.indusFieds = ['INDUSTRY_CITIC', 'INDUSTRY_CITICCODE'] # 中信行业名称和代码 24 | 25 | # 数据清洗，完善后可移动至其他模块 26 | def WashData(self, dicStockDf): 27 | dicTotalStock = {} 28 | for stockDate, stockDf in dicStockDf.items(): 29 | stockList = [stock.replace(' ', '.') for stock in stockDf.index.tolist()] 30 | tempDf = pd.DataFrame(stockDf.values, index=stockList, 31 | columns=['styleCode', 'stockName', 'styleMoney', 'forRate', 'stockNum', 32 | 'stockUnitBuy', 33 | 'stockTotalBuy', 'stockBuyWeight0', 'stockClosePrice', 'stockValue', 34 | 'stockValueWeight', 'stockChange', 'stockTradeFlag']) 35 | dicTotalStock[stockDate] = tempDf 36 | return dicTotalStock 37 | 38 | # 计算组合收益部分 39 | def calcPoforlio(self, tradeNum, tradeProcess): 40 | tradeDate = tradeProcess[tradeNum][0] # 当前交易日期 41 | stockDf = tradeProcess[tradeNum][1] # 当前持仓个股 42 | 43 | stockList = stockDf.index.tolist() 44 | wsdata = w.wss(codes=stockList, fields=self.indusFieds, 45 | options="industryType=1;tradeDate=%s" % tradeDate) 46 | if wsdata.ErrorCode != 0: 47 | return 48 | tempDf = pd.DataFrame(wsdata.Data, columns=wsdata.Codes, index=wsdata.Fields).T 49 | stockDf1 = pd.concat([stockDf, tempDf], join='inner', axis=1) 50 | try: 51 | cht = stockDf['stockClosePrice'] / stockDf1['stockUnitBuy'] 52 | except: 53 | cht= pd.Series([1]*stockDf1.shape[0],index=stockDf1.index) #打新股时，无行情 54 | stockDf1['logReturn'] = np.log(cht.tolist()) 55 | stockDf1['iWeight'] = stockDf1['stockBuyWeight0'] / stockDf1['stockBuyWeight0'].sum() 56 | 57 | dicIndustry = {} 58 | for industryCode, tempDf in stockDf1.groupby(by=self.indusFieds[0]): 59 | tempDf['weightReturn'] = tempDf.loc[:,'logReturn'] * tempDf.loc[:,'iWeight'] 60 | industryName = list(tempDf[self.indusFieds[1]].unique())[0] 61 | dicIndustry[industryCode] = {'stockReturn': tempDf['weightReturn'].sum(), 'stockINName': industryName, 62 | 'stockWeight': tempDf['iWeight'].sum()} 63 | stockDf2 = pd.DataFrame(dicIndustry).T 64 | return stockDf2 65 | 66 | # 计算基准收益部分 67 | def calcBenchMark(self, tradeNum, tradeProcess): 68 | tradeDate = tradeProcess[tradeNum][0] # 当前交易日期 69 | 70 | # 指数成分股 71 | indexWSData = w.wset("indexconstituent", "date=%s;windcode=%s" % (tradeDate, self.benchMark)) 72 | if indexWSData.ErrorCode != 0: 73 | return 74 | indexData = pd.DataFrame(indexWSData.Data, index=['checkDate', 'windCode', 'windName', 'iWeight']).T 75 | indexData.set_index(keys='windCode', inplace=True) 76 | 77 | # 成股份对应的wind行业 78 | indexCodeList = indexData.index.tolist() 79 | wsindexdata = w.wss(codes=indexCodeList, fields=self.indusFieds, 80 | options="industryType=1;tradeDate=%s" % tradeDate) 81 | if wsindexdata.ErrorCode != 0: 82 | return 83 | tempIndexDf = pd.DataFrame(wsindexdata.Data, columns=wsindexdata.Codes, index=wsindexdata.Fields).T 84 | 85 | # 成分股收盘行情数据 86 | indexClose = w.wss(codes=tempIndexDf.index.tolist(), fields=['close'], 87 | options="tradeDate=%s;cycle=D;priceAdj=F" % tradeDate) 88 | indexCloseDf = pd.DataFrame(indexClose.Data, index=['close'], columns=indexClose.Codes).T 89 | 90 | beforeDate = tradeProcess[tradeNum - 1][0] # 上一净值发布日期 91 | indexClose = w.wss(codes=tempIndexDf.index.tolist(), fields=['close'], 92 | options="tradeDate=%s;cycle=D;priceAdj=F" % beforeDate) 93 | indexBeforeCloseDf = pd.DataFrame(indexClose.Data, index=['beforeClose'], columns=indexClose.Codes).T 94 | indexDF = pd.concat([indexData, tempIndexDf, indexBeforeCloseDf, indexCloseDf], axis=1, join='inner') 95 | indexDF['logReturn'] = np.log(indexDF['close'] / indexDF['beforeClose']) 96 | indexDF['iWeight'] = indexDF['iWeight'] / indexDF['iWeight'].sum() 97 | 98 | dicIndustry = {} 99 | for industryCode, tempDf in indexDF.groupby(by=[self.indusFieds[0]]): 100 | tempDf['weightReturn'] = tempDf['logReturn'] * tempDf['iWeight'] 101 | industryName = list(tempDf[self.indusFieds[1]].unique())[0] 102 | dicIndustry[industryCode] = {'indexReturn': tempDf['weightReturn'].sum(), 'indexINName': industryName, 103 | 'indexWeight': tempDf['iWeight'].sum()} 104 | 105 | industryDf = pd.DataFrame(dicIndustry).T 106 | return industryDf 107 | 108 | # 计算入口 109 | def calc(self, dicTotalStock): 110 | w.start() 111 | tradeProcess = sorted(dicTotalStock.items(), key=lambda x: x[0], reverse=False) 112 | dicReturnFactor = {} 113 | dicReturnFactor['SR'] = [] 114 | dicReturnFactor['AR'] = [] 115 | dicReturnFactor['IR'] = [] 116 | dicReturnFactor['pofolioReturn'] = [] 117 | dicReturnFactor['benchMarkReturn'] = [] 118 | dicReturnFactor['everySR'] = [] 119 | dicReturnFactor['everyAR'] = [] 120 | dicReturnFactor['everyIR'] = [] 121 | 122 | for tradeNum in range(1, len(tradeProcess)): 123 | # for tradeNum in range(1, 5): 124 | tradeDate = tradeProcess[tradeNum][0] # 当前交易日期 125 | print('当前交易日期：', tradeDate) 126 | 127 | # 投资组合所属行业和收益 128 | stockDf = self.calcPoforlio(tradeNum, tradeProcess) 129 | 130 | # 指数成分股的所属行业和收益 131 | indexDf = self.calcBenchMark(tradeNum, tradeProcess) 132 | 133 | totaldf = pd.concat([indexDf, stockDf], axis=1, join='outer') 134 | totaldf.fillna(0, inplace=True) 135 | pofolioReturn = (totaldf['stockReturn'] * totaldf['stockWeight']).sum() 136 | benchMarkReturn = (totaldf['indexReturn'] * totaldf['indexWeight']).sum() 137 | kFactor = (np.log(1 + pofolioReturn) - np.log(1 + benchMarkReturn)) / (pofolioReturn - benchMarkReturn) 138 | 139 | dicReturnFactor['industryName'] = dicReturnFactor.get('industryName',totaldf.index.tolist()) 140 | everySR = kFactor * ((totaldf['stockReturn'] - totaldf['indexReturn']) * totaldf['indexWeight']) 141 | everyAR = kFactor * ((totaldf['stockWeight'] - totaldf['indexWeight']) * totaldf['indexReturn']) 142 | everyIR = kFactor * ((totaldf['stockWeight'] - totaldf['indexWeight']) * ( 143 | totaldf['stockReturn'] - totaldf['indexReturn'])) 144 | dicReturnFactor['everySR'].append(everySR) 145 | dicReturnFactor['everyAR'].append(everyAR) 146 | dicReturnFactor['everyIR'].append(everyIR) 147 | 148 | SR = kFactor * ((totaldf['stockReturn'] - totaldf['indexReturn']) * totaldf['indexWeight']).sum() 149 | AR = kFactor * ((totaldf['stockWeight'] - totaldf['indexWeight']) * totaldf['indexReturn']).sum() 150 | IR = kFactor * ((totaldf['stockWeight'] - totaldf['indexWeight']) * ( 151 | totaldf['stockReturn'] - totaldf['indexReturn'])).sum() 152 | dicReturnFactor['SR'].append(SR) 153 | dicReturnFactor['AR'].append(AR) 154 | dicReturnFactor['IR'].append(IR) 155 | dicReturnFactor['pofolioReturn'].append(pofolioReturn) 156 | dicReturnFactor['benchMarkReturn'].append(benchMarkReturn) 157 | return dicReturnFactor 158 | 159 | # 计算同时期基准总收益的分解 160 | def totalTradeAny(self, dicReturnFactor): 161 | result = {} 162 | npSR = np.array(dicReturnFactor['SR']) 163 | npAR = np.array(dicReturnFactor['AR']) 164 | npIR = np.array(dicReturnFactor['IR']) 165 | 166 | nppofolioReturn = np.array(dicReturnFactor['pofolioReturn']) 167 | npbenchMarkReturn = np.array(dicReturnFactor['benchMarkReturn']) 168 | totalPofolio = (1 + nppofolioReturn).cumprod() - 1 169 | totalBenchMark = (1 + npbenchMarkReturn).cumprod() - 1 170 | totalKFactor =(np.log(1+totalPofolio[-1]) - np.log(1+totalBenchMark[-1]))/(totalPofolio[-1]-totalBenchMark[-1]) 171 | dicReturnFactor['totalKFactor'] = totalKFactor 172 | 173 | result['SR'] = npSR.sum() /totalKFactor 174 | result['AR'] = npAR.sum() / totalKFactor 175 | result['IR'] = npIR.sum() / totalKFactor 176 | return result 177 | 178 | #绘图入口 179 | def plotFigure(self,dicReturnFactor): 180 | plt.rcParams['font.sans-serif'] = ['SimHei'] 181 | plt.rcParams['axes.unicode_minus'] = False 182 | 183 | everySR = np.array(dicReturnFactor['everySR']) 184 | totalSR = np.sum(everySR, axis=0)/dicReturnFactor['totalKFactor'] 185 | SRSeries = pd.Series(totalSR, index=dicReturnFactor['industryName']) 186 | 187 | everyAR = np.array(dicReturnFactor['everyAR']) 188 | totalAR = np.sum(everyAR, axis=0) / dicReturnFactor['totalKFactor'] 189 | ARSeries = pd.Series(totalAR, index=dicReturnFactor['industryName']) 190 | 191 | everyIR = np.array(dicReturnFactor['everyIR']) 192 | totalIR = np.sum(everyIR, axis=0) / dicReturnFactor['totalKFactor'] 193 | IRSeries = pd.Series(totalIR, index=dicReturnFactor['industryName']) 194 | 195 | fig = plt.figure(figsize=(16, 9)) 196 | ax1 = fig.add_subplot(131) 197 | ARSeries = ARSeries.sort_values() 198 | ARSeries.plot(kind='barh', ax=ax1) 199 | ax1.set_title(u'行业配置收益') 200 | 201 | ax2 = fig.add_subplot(132) 202 | SRSeries = SRSeries.sort_values() 203 | SRSeries.plot(kind='barh', ax=ax2) 204 | ax2.set_title(u'个股选择收益') 205 | 206 | ax3 = fig.add_subplot(133) 207 | IRSeries = IRSeries.sort_values() 208 | IRSeries.plot(kind='barh', ax=ax3) 209 | ax3.set_title(u'交互收益') 210 | plt.savefig('C:\\Users\\Administrator\\Desktop\\乐道4结果图\\' + '归因分析') 211 | plt.show() 212 | 213 | # 总入口 214 | def calcMain(self): 215 | dicReturnFactor = self.calc(self.dicStockDf) 216 | result = self.totalTradeAny(dicReturnFactor) 217 | self.plotFigure(dicReturnFactor) 218 | print(result) 219 | 220 | 221 | if __name__ == '__main__': 222 | import os 223 | from GetExcelData import GetExcelData 224 | 225 | fileTotalPath = os.getcwd() + r'\乐道4估值表' # 估值表文件夹路径 226 | netAssetDf, dicProduct = GetExcelData(fileTotalPath).getData() 227 | BrisionAnysDemo = BrisionAnys(dicProduct) 228 | BrisionAnysDemo.calcMain() 229 | -------------------------------------------------------------------------------- /CalcEstimateValue.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 主程序 5 | ''' 6 | 7 | import pandas as pd 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | from datetime import date, datetime 11 | import matplotlib 12 | from sqlalchemy import create_engine 13 | from GetExcelData import GetExcelData 14 | from BrisionAnys import BrisionAnys 15 | 16 | class CalcEstimateValue: 17 | def __init__(self, fileTotalPath): 18 | self.fileTotalPath = fileTotalPath 19 | self.myfont = matplotlib.font_manager.FontProperties(fname=r'C:/Windows/Fonts/simkai.ttf') 20 | self.indexCodeList = ["000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH"] 21 | 22 | # 数据处理并绘图 23 | def controlData(self, netAssetDf, dicProduct): 24 | fig = plt.figure(figsize=(16, 9)) 25 | ax1 = fig.add_subplot(221) 26 | indexReturn = (netAssetDf[self.indexCodeList] - netAssetDf[self.indexCodeList].shift(1)) / netAssetDf[ 27 | self.indexCodeList].shift(1) 28 | indexDfAcc = (1 + indexReturn).cumprod() - 1 29 | indexDfAcc.fillna(method='pad', inplace=True) 30 | df1 = pd.concat([indexDfAcc, netAssetDf['accNetReturn']], axis=1) 31 | df1.plot(ax=ax1) 32 | ax1.set_title(u'累计收益走势图', fontproperties=self.myfont) 33 | 34 | def historydownrate(tempdata): 35 | templist = [] 36 | for k in range(len(tempdata)): 37 | downrate = tempdata[k] / tempdata[:k + 1].max() - 1 38 | templist.append(downrate) 39 | tempdf = pd.Series(templist, index=tempdata.index) 40 | tempdf.name = tempdata.name 41 | return tempdf 42 | 43 | downDf = netAssetDf[['netValue', '399300.SZ']].apply(historydownrate) 44 | ax2 = fig.add_subplot(222) 45 | downDf.plot(ax=ax2) 46 | ax2.set_title(u'回撤率走势图', fontproperties=self.myfont) 47 | 48 | ax3 = fig.add_subplot(223) 49 | netAssetDf['netReturn'].plot(ax=ax3, kind='hist', bins=20) 50 | ax3.set_title(u'收益率分布图', fontproperties=self.myfont) 51 | 52 | ax4 = fig.add_subplot(224) 53 | tempArr = netAssetDf['thisNetReturn'].values 54 | tempArrUp = len(tempArr[tempArr >= 0]) 55 | upRate = tempArrUp / len(tempArr) * 100 56 | upRate = round(upRate, 2) 57 | downRate = 100 - upRate 58 | 59 | tempArrUpRate = str(upRate) + '%' 60 | tempArrDownRate = str(downRate) + '%' 61 | labels = [u'Trade + %s' % tempArrUpRate, u'Trade - %s' % tempArrDownRate] 62 | ax4.pie([upRate, downRate], labels=labels) 63 | ax4.set_title(u'盈亏状况统计图', fontproperties=self.myfont) 64 | plt.savefig('C:\\Users\\Administrator\\Desktop\\乐道4结果图\\' + '趋势图') 65 | 66 | fig2 = plt.figure(figsize=(16, 9)) 67 | ax5 = fig2.add_subplot(211) 68 | netAssetDf['stockRate'].plot(ax=ax5, kind='bar', color='LightGreen') 69 | ax5.set_title(u'仓位变化图', fontproperties=self.myfont) 70 | ax5.set_ylabel(u'流通股票占比', fontproperties=self.myfont) 71 | 72 | # ax6 = ax5.twinx() 73 | # df1[['accNetReturn','399300.SZ']].plot(ax=ax6) 74 | # netAssetDf['accNetReturn'].plot(ax=ax6, color='red') 75 | # ax6.set_ylabel(u'累计净值增长率', fontproperties=self.myfont) 76 | 77 | ax7 = fig2.add_subplot(212) 78 | netAssetDf['annualStd'] = netAssetDf['netValue'].rolling(window=4).std() * np.sqrt(12) 79 | netAssetDf['annualStdHS300'] = indexReturn['399300.SZ'].rolling(window=4).std() * np.sqrt(12) 80 | netAssetDf[['annualStd', 'annualStdHS300']].dropna().plot(ax=ax7) 81 | ax7.set_title(u'滚动年化波动率走势图', fontproperties=self.myfont) 82 | ax7.set_ylabel(u'年化波动率', fontproperties=self.myfont) 83 | plt.savefig('C:\\Users\\Administrator\\Desktop\\乐道4结果图\\' + '波动变化图') 84 | 85 | fig3 = plt.figure(figsize=(16, 9)) 86 | ax8 = fig3.add_subplot(211) 87 | tempDf = netAssetDf[ 88 | ['cashRate', 'ensureMoneyRate', 'antiSaleRate', 'securityRate', 'fundRate', 'otherRate']].copy() 89 | tempDf.fillna(0, inplace=True) 90 | color = ['r', 'g', 'b', 'y', 'k', 'c', 'm'] 91 | for i in range(tempDf.shape[1]): 92 | ax8.bar(tempDf.index.tolist(), tempDf.ix[:, i], color=color[i], bottom=tempDf.ix[:, :i].sum(axis=1), 93 | width=3.95) 94 | ax8.set_label(['cashRate', 'ensureMoneyRate', 'antiSaleRate', 'securityRate', 'fundRate', 'otherRate']) 95 | for tick in ax8.get_xticklabels(): 96 | tick.set_rotation(90) 97 | 98 | ax9 = fig3.add_subplot(212) 99 | tempdf = self.similateNet(netAssetDf[['netValue', 'netReturn','accNetReturn']]) 100 | tempdf.plot(ax=ax9) 101 | plt.savefig('C:\\Users\\Administrator\\Desktop\\乐道4结果图\\' + '持仓分布图') 102 | plt.show() 103 | 104 | # 字符串拼接 105 | def CodeToStr(self, templist): 106 | tempstr = '' 107 | for temp in templist: 108 | tempstr = tempstr + "'" + temp + "'" + "," 109 | return tempstr[:-1] 110 | 111 | # 按照产品时间周期，获取指数历史数据 112 | def getIndexData(self, netAssetDf): 113 | startDate = netAssetDf.index.tolist()[0] 114 | endDate = netAssetDf.index.tolist()[-1] 115 | mysqlConfig = ['root', '123456', 'localhost', '3306', 'fund_data', 'utf8'] 116 | mysqlcon = "mysql+pymysql://%s:%s@%s:%s/%s?charset=%s" % ( 117 | mysqlConfig[0], mysqlConfig[1], mysqlConfig[2], mysqlConfig[3], mysqlConfig[4], mysqlConfig[5]) 118 | conn = create_engine(mysqlcon) 119 | 120 | sqlStr = "select CODE,CLOSE,`UPDATE` from index_data where CODE in (%s) and `UPDATE`<='%s' and `UPDATE` >='%s'" % ( 121 | self.CodeToStr(self.indexCodeList), endDate, startDate) 122 | tempDf1 = pd.read_sql(sql=sqlStr, con=conn) 123 | 124 | dflist = [] 125 | for code, df in tempDf1.groupby(by=['CODE']): 126 | temp = pd.DataFrame(df['CLOSE'].values, index=df['UPDATE'].tolist(), columns=[code]) 127 | dflist.append(temp) 128 | dflist.append(netAssetDf['netValue']) 129 | tempIndexDF = pd.concat(dflist, axis=1).fillna(method='pad') 130 | totalDf = pd.concat([netAssetDf, tempIndexDF[self.indexCodeList]], axis=1, join='inner') 131 | return totalDf 132 | 133 | # 模拟基金净值走势 134 | def similateNet(self, netDf): 135 | period = 6 * 4 # 预测周数 136 | rateMean = netDf['netReturn'].mean() 137 | rateStd = netDf['netReturn'].std() 138 | 139 | dicPredict = {} 140 | dicPredict['lowMarket'] = [] 141 | dicPredict['highMarket'] = [] 142 | dicPredict['middleMarket'] = [] 143 | for i in range(period): 144 | randNum = np.random.normal(loc=rateMean, scale=rateStd, size=(100, 1)) 145 | dicPredict['lowMarket'].append(np.percentile(randNum, 25, axis=0)) 146 | dicPredict['middleMarket'].append(np.percentile(randNum, 50, axis=0)) 147 | dicPredict['highMarket'].append(np.percentile(randNum, 75, axis=0)) 148 | 149 | # lowMarket = netDf['netValue'][-1] * (1 + np.array(dicPredict['lowMarket']).cumsum()) 150 | # highMarket = netDf['netValue'][-1] * (1 + np.array(dicPredict['highMarket']).cumsum()) 151 | # middleMarket = netDf['netValue'][-1] * (1 + np.array(dicPredict['middleMarket']).cumsum()) 152 | 153 | lowMarket = netDf['accNetReturn'][-1] * ((1 + np.array(dicPredict['lowMarket'])).cumprod()) 154 | highMarket = netDf['accNetReturn'][-1] * ((1 + np.array(dicPredict['highMarket'])).cumprod()) 155 | middleMarket = netDf['accNetReturn'][-1] * ((1 + np.array(dicPredict['middleMarket'])).cumprod()) 156 | 157 | temp = list(pd.date_range(start=netDf.index.tolist()[-1], freq="W", periods=period)) 158 | tempdate = [x.date() for x in temp] 159 | 160 | lowMarket = np.concatenate((netDf['accNetReturn'].values, np.array(lowMarket))) 161 | middleMarket = np.concatenate((netDf['accNetReturn'].values, np.array(middleMarket))) 162 | highMarket = np.concatenate((netDf['accNetReturn'].values, np.array(highMarket))) 163 | indexList = netDf.index.tolist()+tempdate 164 | 165 | df = pd.DataFrame(np.array([lowMarket, middleMarket, highMarket]).T, index=indexList, 166 | columns=['lowMarket', 'middleMarket', 'highMarket']) 167 | return df 168 | 169 | def calcMain(self): 170 | netAssetDf, dicProduct = GetExcelData(self.fileTotalPath).getData() 171 | netAssetDf = self.getIndexData(netAssetDf) 172 | self.controlData(netAssetDf, dicProduct) 173 | 174 | # BrisionAnysDemo = BrisionAnys(dicProduct) 175 | # BrisionAnysDemo.calcMain() 176 | 177 | 178 | if __name__ == '__main__': 179 | import os 180 | fileTotalPath = os.getcwd() + r'\乐道4估值表' # 估值表文件夹路径 181 | CalcEstimateValueDemo = CalcEstimateValue(fileTotalPath=fileTotalPath) 182 | CalcEstimateValueDemo.calcMain() 183 | -------------------------------------------------------------------------------- /CalcRegression.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | ''' 5 | 回归类结果分析与保存 6 | 选股择时能力：TM,HM ,CL 7 | 行业回归 8 | 风格回归 9 | ''' 10 | import numpy as np 11 | import pandas as pd 12 | import matplotlib.pyplot as plt 13 | import matplotlib 14 | from datetime import datetime, timedelta 15 | import statsmodels.api as sm 16 | 17 | matplotlib.rcParams['font.sans-serif'] = ['SimHei'] 18 | matplotlib.rcParams['font.family'] = 'sans-serif' 19 | matplotlib.rcParams['axes.unicode_minus'] = False 20 | 21 | 22 | class CalcRegression: 23 | def __init__(self): 24 | pass 25 | 26 | def regression(self, x1, x2, y): 27 | ''' 28 | 最小二乘回归 29 | :param x1: 30 | :param x2: 31 | :param y: 32 | :return: 33 | ''' 34 | x1, x2 = x1.reshape(len(x1), 1), x2.reshape(len(x2), 1) 35 | c = np.ones((len(x1), 1)) 36 | X = np.hstack((c, x1, x2)) 37 | res = (sm.OLS(y, X)).fit() 38 | return res 39 | 40 | def getStyleRegression(self,fundIndustryDf, resultPath,fundName,industryDic,DCIndexDf=pd.DataFrame()): 41 | ''' 42 | 风格归因 43 | :param fundIndustryDf: 44 | :param resultPath: 45 | :param fundName: 46 | :param industryDic: 47 | :return: 48 | ''' 49 | industryCodeList = list(industryDic.keys()) 50 | targetLabel = industryCodeList + [fundName] 51 | targetDf = fundIndustryDf[targetLabel] 52 | tempReturn = (targetDf - targetDf.shift(1)) / targetDf.shift(1) 53 | if not DCIndexDf.empty: #量化类产品 54 | bench_return_df = DCIndexDf/DCIndexDf.shift(1)-1 55 | tempReturn[fundName] = pd.concat([bench_return_df,tempReturn[fundName]],axis=1,sort=True).sum(axis=1) 56 | 57 | def reduceRf(tempSe): 58 | resultSe = tempSe - fundIndustryDf['无风险利率'] 59 | return resultSe 60 | 61 | tempExReturn = tempReturn.apply(reduceRf) 62 | tempExReturn.dropna(inplace=True) 63 | 64 | list_r2, list_beta, list_tr, list_const = [], [], [], [] 65 | Y = tempExReturn[fundName].values 66 | for code in industryCodeList: 67 | x = tempExReturn[code].values 68 | x = x.reshape(len(x), 1) 69 | c = np.ones((len(x), 1)) 70 | X = np.hstack((c, x)) 71 | res = (sm.OLS(Y, X)).fit() 72 | list_r2.append(res.rsquared) 73 | list_beta.append(res.params[1]) 74 | list_const.append(res.params[0]) 75 | list_tr.append((fundIndustryDf[code][-1] / fundIndustryDf[code][0] - 1) - fundIndustryDf['无风险利率'].mean()) 76 | res_indus = pd.DataFrame([]) 77 | res_indus['指数代码'] = industryCodeList 78 | res_indus['指数名称'] = [industryDic[code] for code in industryCodeList] 79 | res_indus['拟合R方'] = list_r2 80 | res_indus['beta'] = list_beta 81 | res_indus['alpha'] = list_const 82 | res_indus['期间总收益'] = list_tr 83 | res_indus['开始时间'] = tempExReturn.index.tolist()[0] 84 | res_indus['终止时间'] = tempExReturn.index.tolist()[-1] 85 | res_indus = res_indus.sort_values('拟合R方', ascending=False) 86 | res_indus.to_excel(resultPath + '风格回归结果.xlsx', index=False) 87 | 88 | maxR2Code = res_indus['指数代码'].tolist()[0] 89 | x = tempExReturn[maxR2Code].values 90 | maxR2Alpha = res_indus['alpha'].tolist()[0] 91 | maxR2Beta = res_indus['beta'].tolist()[0] 92 | 93 | plt.style.use('ggplot') 94 | plt.figure(figsize=(16, 9)) 95 | plt.scatter(x, Y, s=30, color='blue', label='样本实例') 96 | plt.plot(x, maxR2Alpha + maxR2Beta * x, linewidth=3, color='red', label='回归线') 97 | plt.ylabel('产品超额收益') 98 | plt.xlabel('风格超额收益') 99 | plt.title('拟合效果最好的风格指数：' + industryDic[maxR2Code], fontsize=13, 100 | bbox={'facecolor': '0.8', 'pad': 5}) 101 | plt.grid(True) 102 | plt.legend(loc='upper left') # 添加图例 103 | plt.savefig(resultPath + '拟合风格指数效果图.png') 104 | # plt.show() 105 | 106 | plt.style.use('ggplot') 107 | fig = plt.figure(figsize=(16, 9)) 108 | ax = fig.add_subplot(111) 109 | indeustryAccDf = (1 + tempReturn[[fundName, maxR2Code]]).cumprod() 110 | indeustryAccDf['产品风格收益比'] = indeustryAccDf[fundName] / indeustryAccDf[maxR2Code] 111 | indeustryAccDf.plot(ax=ax) 112 | ax.set_ylabel('累计收益率') 113 | ax.set_xlabel('时间') 114 | ax.set_title('拟合效果最好的风格指数：' + industryDic[maxR2Code], fontsize=13, 115 | bbox={'facecolor': '0.8', 'pad': 5}) 116 | ax.grid(True) 117 | ax.legend(loc='down right') # 添加图例 118 | plt.savefig(resultPath + '拟合风格指数累计走势对比图.png') 119 | plt.show() 120 | 121 | def getIndustryRegression(self,fundIndustryDf, resultPath,fundName,industryDic,bench_return=pd.DataFrame()): 122 | ''' 123 | 行业归因 124 | :param fundIndustryDf: 125 | :param resultPath: 126 | :param fundName: 127 | :param industryDic: 128 | :return: 129 | ''' 130 | industryCodeList = list(industryDic.keys()) 131 | targetLabel = industryCodeList+[fundName] 132 | targetDf = fundIndustryDf[targetLabel] 133 | 134 | tempReturn = (targetDf-targetDf.shift(1))/targetDf.shift(1) 135 | if not bench_return.empty: #量化类产品 136 | bench_return_df = bench_return/bench_return.shift(1)-1 137 | tempReturn[fundName] = pd.concat([bench_return_df,tempReturn[fundName]],axis=1,sort=True).sum(axis=1) 138 | def reduceRf(tempSe): 139 | resultSe = tempSe - fundIndustryDf['无风险利率'] 140 | return resultSe 141 | tempExReturn = tempReturn.apply(reduceRf) 142 | tempExReturn.dropna(inplace=True) 143 | 144 | list_r2, list_beta, list_tr,list_const = [], [], [],[] 145 | Y = tempExReturn[fundName].values 146 | for code in industryCodeList: 147 | x = tempExReturn[code].values 148 | x = x.reshape(len(x), 1) 149 | c = np.ones((len(x), 1)) 150 | X = np.hstack((c, x)) 151 | res = (sm.OLS(Y, X)).fit() 152 | list_r2.append(res.rsquared) 153 | list_beta.append(res.params[1]) 154 | list_const.append(res.params[0]) 155 | list_tr.append((fundIndustryDf[code][-1] / fundIndustryDf[code][0]-1) - fundIndustryDf['无风险利率'].mean()) 156 | res_indus = pd.DataFrame([]) 157 | res_indus['指数代码'] = industryCodeList 158 | res_indus['指数名称'] = [industryDic[code] for code in industryCodeList] 159 | res_indus['拟合R方'] = list_r2 160 | res_indus['beta'] = list_beta 161 | res_indus['alpha'] = list_const 162 | res_indus['期间总收益'] = list_tr 163 | res_indus['开始时间'] = tempExReturn.index.tolist()[0] 164 | res_indus['终止时间'] = tempExReturn.index.tolist()[-1] 165 | res_indus = res_indus.sort_values('拟合R方', ascending=False) 166 | res_indus.to_excel(resultPath+'行业回归结果.xlsx',index=False) 167 | 168 | maxR2Code = res_indus['指数代码'].tolist()[0] 169 | x = tempExReturn[maxR2Code].values 170 | maxR2Alpha = res_indus['alpha'].tolist()[0] 171 | maxR2Beta = res_indus['beta'].tolist()[0] 172 | 173 | plt.style.use('ggplot') 174 | plt.figure(figsize=(16, 9)) 175 | plt.scatter(x, Y, s=30, color='blue', label='样本实例') 176 | plt.plot(x, maxR2Alpha + maxR2Beta * x, linewidth=3, color='red', label='回归线') 177 | plt.ylabel('产品超额收益') 178 | plt.xlabel('行业超额收益') 179 | plt.title('拟合效果最好的行业指数：'+industryDic[maxR2Code], fontsize=13, 180 | bbox={'facecolor': '0.8', 'pad': 5}) 181 | plt.grid(True) 182 | plt.legend(loc='upper left') # 添加图例 183 | plt.savefig(resultPath+'拟合行业指数效果图.png') 184 | # plt.show() 185 | 186 | plt.style.use('ggplot') 187 | fig=plt.figure(figsize=(16, 9)) 188 | ax = fig.add_subplot(111) 189 | indeustryAccDf = (1+tempReturn[[fundName,maxR2Code]]).cumprod() 190 | indeustryAccDf['产品行业收益比'] = indeustryAccDf[fundName]/indeustryAccDf[maxR2Code] 191 | indeustryAccDf.plot(ax=ax) 192 | ax.set_ylabel('累计收益率') 193 | ax.set_xlabel('时间') 194 | ax.set_title('拟合效果最好的行业指数：'+industryDic[maxR2Code], fontsize=13, 195 | bbox={'facecolor': '0.8', 'pad': 5}) 196 | ax.grid(True) 197 | ax.legend(loc='down right') # 添加图例 198 | plt.savefig(resultPath + '拟合行业指数累计走势对比图.png') 199 | 200 | def getSelectStockAndTime(self, fundPlotDf, resultPath, fundName, netPeriod, benchMark,DCIndexDf=pd.DataFrame()): 201 | ''' 202 | 计算选股择时能力 203 | :param ReturnData: 204 | :return: 205 | ''' 206 | 207 | if netPeriod == 'W': 208 | calcPeriod = 52 209 | else: 210 | calcPeriod = 250 211 | 212 | if not DCIndexDf.empty: 213 | target_df = pd.concat([fundPlotDf[[fundName,benchMark]],DCIndexDf],axis=1,sort=True) 214 | tempReturn = (target_df - target_df.shift(1)) / target_df.shift(1) 215 | tempReturn.fillna(0, inplace=True) 216 | tempReturn[fundName] = tempReturn[[tempReturn.columns[0],tempReturn.columns[-1]]].sum(axis=1) #量化对冲产品 217 | else: 218 | targetDf = fundPlotDf[[fundName, benchMark]] 219 | tempReturn = (targetDf - targetDf.shift(1)) / targetDf.shift(1) 220 | tempReturn.fillna(0, inplace=True) 221 | 222 | fundReduceRf = tempReturn[fundName] - fundPlotDf['无风险利率'] 223 | bencReduceRf = tempReturn[benchMark] - fundPlotDf['无风险利率'] 224 | 225 | f = open(resultPath + "TM,HM,CL模型回归结果.txt", "w+") 226 | Y = fundReduceRf.values 227 | tmX1 = bencReduceRf.values 228 | tmX2 = np.power(tmX1, 2) 229 | TMResult = self.regression(tmX1, tmX2, Y) 230 | 231 | dicRegression = {} 232 | dicRegression['TM回归结果'] = {} 233 | dicRegression['TM回归结果']['R方'] = TMResult.rsquared 234 | dicRegression['TM回归结果']['择股指标(年化alpha)'] = str(round(TMResult.params[0] * calcPeriod * 100, 2)) + '%' 235 | dicRegression['TM回归结果']['择时指标(beta)'] = round(TMResult.params[2], 2) 236 | f.write(str(TMResult.summary(title='TM模型回归结果'))) 237 | f.write('\n\n\n') 238 | 239 | d = [] # H-M模型 240 | for i in range(len(tempReturn[benchMark])): 241 | if tempReturn[benchMark][i] > fundPlotDf['无风险利率'][i]: 242 | d.append(1) 243 | else: 244 | d.append(0) 245 | hmX1 = bencReduceRf.values 246 | hmX2 = d * hmX1 ** 2 247 | HMResult = self.regression(hmX1, hmX2, Y) 248 | dicRegression['HM回归结果'] = {} 249 | dicRegression['HM回归结果']['R方'] = HMResult.rsquared 250 | dicRegression['HM回归结果']['择股指标(年化alpha)'] = str(round(HMResult.params[0] * calcPeriod * 100, 2)) + '%' 251 | dicRegression['HM回归结果']['择时指标(beta)'] = round(HMResult.params[2], 2) 252 | f.write(str(HMResult.summary(title='HM模型回归结果'))) 253 | f.write('\n\n\n') 254 | 255 | x1, x2 = [], [] # C-L模型 256 | for i in range(len(tempReturn[benchMark])): 257 | if tempReturn[benchMark][i] > fundPlotDf['无风险利率'][i]: 258 | x1.append(tempReturn[benchMark][i] - fundPlotDf['无风险利率'][i]) 259 | x2.append(0) 260 | else: 261 | x1.append(0) 262 | x2.append(tempReturn[benchMark][i] - fundPlotDf['无风险利率'][i]) 263 | clX1, clX2 = np.array(x1), np.array(x2) 264 | CLResult = self.regression(clX1, clX2, Y) 265 | dicRegression['CL回归结果'] = {} 266 | dicRegression['CL回归结果']['R方'] = CLResult.rsquared 267 | dicRegression['CL回归结果']['择股指标(年化alpha)'] = str(round(CLResult.params[0] * calcPeriod * 100, 2)) + '%' 268 | dicRegression['CL回归结果']['择时指标(beta)'] = round(CLResult.params[2] - CLResult.params[1], 2) 269 | 270 | regressionDf = pd.DataFrame(dicRegression) 271 | regressionDf.to_excel(resultPath + '选股择时能力回归结果.xlsx') 272 | f.write(str(CLResult.summary(title='CL模型回归结果'))) 273 | f.close() 274 | return 275 | 276 | 277 | if __name__ == '__main__': 278 | CalcRegressionDemo = CalcRegression() 279 | -------------------------------------------------------------------------------- /CalcRiskReturn.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | ''' 6 | 基于净值类数据的分析 7 | ''' 8 | 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | import matplotlib 14 | from datetime import datetime,timedelta 15 | 16 | matplotlib.rcParams['font.sans-serif'] = ['SimHei'] 17 | matplotlib.rcParams['font.family'] = 'sans-serif' 18 | matplotlib.rcParams['axes.unicode_minus'] = False 19 | from DateFormatDf import DateFormatDf 20 | 21 | 22 | class CalcRiskReturn: 23 | def __init__(self): 24 | self.DateFormatDfDemo = DateFormatDf() 25 | 26 | def formaData(self, tempValue, flagP=True): 27 | if flagP: 28 | result = str(round(round(tempValue, 4) * 100, 2)) + '%' 29 | else: 30 | result = round(tempValue, 2) 31 | return result 32 | 33 | def calcMaxdown(self, return_list): 34 | '''最大回撤率''' 35 | return_list = (return_list + 1).cumprod() 36 | return_list = return_list.values 37 | i = np.argmax(np.maximum.accumulate(return_list) - return_list) 38 | if i == 0: 39 | return 0 40 | j = np.argmax(return_list[:i]) 41 | result = (return_list[j] - return_list[i]) / return_list[j] 42 | return result 43 | 44 | def calcDetail(self, tempValueDf): 45 | dicResult = {} #格式化数据输出 46 | dicRightResult = {} #原始数据输出 47 | assetAnnualReturn = (tempValueDf.iloc[-1] / tempValueDf.iloc[0]) ** (250 / tempValueDf.shape[0]) - 1 48 | tempReturn = (tempValueDf - tempValueDf.shift(1)) / tempValueDf.shift(1) 49 | tempReturn.fillna(0, inplace=True) 50 | 51 | tempReturnValue = tempReturn.copy() 52 | tempReturnValue[tempReturnValue > 0] = 0 53 | assetDownRisk = tempReturnValue.std() * np.sqrt(250) 54 | assetStd = tempReturn.std() * np.sqrt(250) 55 | assetMaxDown = tempReturn.apply(self.calcMaxdown) 56 | assetCalmar = assetAnnualReturn / assetMaxDown 57 | assetSharp = (assetAnnualReturn - 0.02) / assetStd 58 | dicResult[u'年化收益'] = assetAnnualReturn.apply(self.formaData) 59 | dicResult[u'年化波动'] = assetStd.apply(self.formaData) 60 | dicResult[u'最大回撤'] = assetMaxDown.apply(self.formaData) 61 | dicResult[u'夏普比率'] = assetSharp.apply(self.formaData, args=(False,)) 62 | dicResult[u'卡玛比率'] = assetCalmar.apply(self.formaData, args=(False,)) 63 | dicResult[u'下行风险'] = assetDownRisk.apply(self.formaData) 64 | 65 | dicRightResult[u'年化收益'] = assetAnnualReturn 66 | dicRightResult[u'年化波动'] = assetStd 67 | dicRightResult[u'最大回撤'] = assetMaxDown 68 | dicRightResult[u'夏普比率'] = assetSharp 69 | dicRightResult[u'卡玛比率'] = assetCalmar 70 | dicRightResult[u'下行风险'] = assetDownRisk 71 | 72 | # successSe = len(tempReturn[tempReturn>0])/len(tempReturn) 73 | # dicResult[u'胜率'] = self.formaData(len(tempReturn[tempReturn>0])/len(tempReturn)) 74 | return dicResult,dicRightResult 75 | 76 | def calcRiskReturn(self, fundIndexDf, resultPath): 77 | timeWindowList = ['近一月', '近三月', '近六月', '近一年', '成立以来'] 78 | timeWindowNum = [21, 21 * 3, 21 * 6, 21 * 12, np.inf] 79 | 80 | fundDfList = [] 81 | fundRightDfList = [] 82 | for timeWindow in timeWindowList: 83 | timeNum = timeWindowNum[timeWindowList.index(timeWindow)] 84 | 85 | if timeNum != np.inf: 86 | tempValueDf = fundIndexDf[-timeNum:] 87 | else: 88 | tempValueDf = fundIndexDf 89 | try: 90 | tempResult,tempRightResult = self.calcDetail(tempValueDf) 91 | except: 92 | a=0 93 | tempDf = pd.DataFrame(tempResult).T 94 | tempDf['统计周期'] = timeWindow 95 | tempDf['数据截止日期'] = tempValueDf.index.tolist()[-1] 96 | 97 | tempRightDf = pd.DataFrame(tempRightResult).T 98 | tempRightDf['统计周期'] = timeWindow 99 | tempRightDf['数据截止日期'] = tempValueDf.index.tolist()[-1] 100 | 101 | fundDfList.append(tempDf) 102 | fundRightDfList.append(tempRightDf) 103 | tempToExcelDf = pd.concat(fundDfList, axis=0) 104 | tempRightToExcelDf =pd.concat(fundRightDfList, axis=0) 105 | tempToExcelDf['统计指标'] = tempToExcelDf.index.tolist() 106 | tempRightToExcelDf['统计指标'] = tempRightToExcelDf.index.tolist() 107 | tempToExcelDf.set_index(keys=['统计周期', '统计指标'], drop=True, inplace=True) 108 | tempRightToExcelDf.set_index(keys=['统计周期', '统计指标'], drop=True, inplace=True) 109 | 110 | # tempToExcelDf.rename(columns) 111 | tempToExcelDf.to_excel(resultPath + '风险收益统计指标.xlsx') 112 | tempRightToExcelDf.to_excel(resultPath + '风险收益统计指标原始数据.xlsx') 113 | 114 | def plotDayNetValueFigure(self, fundPlotDf, resultPath, fundName,netPeriod='',marketVolume=pd.DataFrame()): 115 | ''' 116 | 累计收益走势，连续回撤率走势，滚动年化波动走势 117 | :param fundPlotDf: 118 | :param resultPath: 119 | :return: 120 | ''' 121 | fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(fundPlotDf) 122 | tempReturn = (fundPlotFormatDf - fundPlotFormatDf.shift(1)) / fundPlotFormatDf.shift(1) 123 | tempReturn.fillna(0, inplace=True) 124 | accReturn = (1 + tempReturn).cumprod() - 1 125 | 126 | if not marketVolume.empty: 127 | marketVolume = self.DateFormatDfDemo.getStrToDate(marketVolume) 128 | marketVolume.rename(columns={'000300.SH':'沪深300成交量'},inplace=True) 129 | plt.style.use('ggplot') 130 | fig = plt.figure(figsize=(16, 9)) 131 | ax = fig.add_subplot(211) 132 | accReturn.plot(ax=ax) 133 | ax.grid() 134 | ax.set_xlabel('时间') 135 | ax.set_ylabel('收益率') 136 | ax.set_title('累计收益走势图') 137 | 138 | ax2 = fig.add_subplot(212) 139 | marketVolume['沪深300成交量'].plot(ax=ax2) 140 | ax2.grid() 141 | ax2.set_xlabel('时间') 142 | ax2.set_ylabel('成交量') 143 | ax2.set_title('沪深300成交量') 144 | 145 | else: 146 | plt.style.use('ggplot') 147 | fig = plt.figure(figsize=(16, 9)) 148 | ax = fig.add_subplot(111) 149 | accReturn.plot(ax=ax) 150 | ax.grid() 151 | ax.set_xlabel('时间') 152 | ax.set_ylabel('收益率') 153 | ax.set_title('累计收益走势图') 154 | plt.savefig(resultPath + '累计收益走势图.png') 155 | 156 | def historydownrate(tempdata): 157 | templist = [] 158 | for k in range(len(tempdata)): 159 | downrate = tempdata[k] / tempdata[:k + 1].max() - 1 160 | templist.append(downrate) 161 | tempdf = pd.Series(templist, index=tempdata.index) 162 | tempdf.name = tempdata.name 163 | return tempdf 164 | 165 | tempComDf = tempReturn[[fundName,'沪深300']] 166 | downDf = (1 + tempComDf).cumprod().apply(historydownrate) 167 | plt.style.use('ggplot') 168 | fig1 = plt.figure(figsize=(16, 9)) 169 | ax1 = fig1.add_subplot(111) 170 | downDf.plot(ax=ax1) 171 | ax1.grid() 172 | ax1.set_xlabel('时间') 173 | ax1.set_ylabel('回撤率') 174 | ax1.set_title('回撤率走势图') 175 | plt.savefig(resultPath + '回撤率走势图.png') 176 | 177 | if netPeriod=='W': 178 | window=4 179 | calcFreq = 52 180 | else: 181 | window=21 182 | calcFreq=250 183 | 184 | annualStdDf = tempComDf.rolling(window=window).std()*np.sqrt(calcFreq) 185 | plt.style.use('ggplot') 186 | fig2 = plt.figure(figsize=(16, 9)) 187 | ax2 = fig2.add_subplot(111) 188 | annualStdDf.plot(ax=ax2) 189 | ax2.grid() 190 | ax2.set_xlabel('时间') 191 | ax2.set_ylabel('年化波动率') 192 | ax2.set_title('滚动年化波动率走势图') 193 | plt.savefig(resultPath + '滚动年化波动率走势图.png') 194 | 195 | 196 | dicDf = {} 197 | totalDateList = fundPlotFormatDf.index.tolist() 198 | for rolLoc in range(window,fundPlotFormatDf.shape[0]): 199 | if rolLoc+window<=fundPlotFormatDf.shape[0]: 200 | calcAnnualDf = fundPlotFormatDf.iloc[rolLoc:rolLoc+window] 201 | dicDf[totalDateList[rolLoc]] = (calcAnnualDf.iloc[-1] / calcAnnualDf.iloc[0]) ** (calcFreq / calcAnnualDf.shape[0]) - 1 202 | # dicDf[totalDateList[rolLoc]] = annualReturn 203 | rollAnnualReturnDf = pd.DataFrame(dicDf).T 204 | plt.style.use('ggplot') 205 | fig3 = plt.figure(figsize=(16, 9)) 206 | ax3 = fig3.add_subplot(111) 207 | rollAnnualReturnDf.plot(ax=ax3) 208 | ax3.grid() 209 | ax3.set_xlabel('时间') 210 | ax3.set_ylabel('滚动年化收益率') 211 | ax3.set_title('滚动年化收益率走势图') 212 | plt.savefig(resultPath + '滚动年化收益率走势图.png') 213 | # plt.show() 214 | 215 | def calcWeekNetValueResult(self, weekFundPlotDf, resultPath, fundName): 216 | ''' 217 | 周度频率数据统计 218 | :return: 219 | ''' 220 | 221 | def upAndDownTrade(tempSe): 222 | failTrade = len(tempSe[tempSe < 0]) / len(tempSe) 223 | successTrade = 1 - failTrade 224 | 225 | totalValue = tempSe.tolist() 226 | 227 | tempFailTimes = 0 228 | failTimes = 0 229 | for valueLoc in range(len(tempSe)): 230 | if totalValue[valueLoc] < 0: 231 | tempFailTimes = tempFailTimes + 1 232 | else: 233 | if tempFailTimes >= failTimes: 234 | failTimes = tempFailTimes 235 | tempFailTimes = 0 236 | 237 | tempSuccessTimes = 0 238 | successTimes = 0 239 | for valueLoc in range(len(tempSe)): 240 | if totalValue[valueLoc] >= 0: 241 | tempSuccessTimes = tempSuccessTimes + 1 242 | else: 243 | if tempSuccessTimes >= successTimes: 244 | successTimes = tempSuccessTimes 245 | tempSuccessTimes = 0 246 | 247 | resultSe = pd.Series([failTrade, successTrade, failTimes, successTimes], 248 | index=['负交易周', '正交易周', '最大连续上涨周数', '最大连续下跌周数'], 249 | name=tempSe.name) 250 | return resultSe 251 | 252 | fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(weekFundPlotDf) 253 | tempReturn = (fundPlotFormatDf - fundPlotFormatDf.shift(1)) / fundPlotFormatDf.shift(1) 254 | tempReturn.fillna(0, inplace=True) 255 | tradeResultDf = tempReturn.apply(upAndDownTrade) 256 | tradeResultDf.to_excel(resultPath + '周度胜率统计.xlsx') 257 | 258 | def plotWeekNetValueFigure(self, weekFundPlotDf, resultPath, fundName): 259 | ''' 260 | 周度收益相关统计与绘图 261 | :param weekFundPlotDf: 262 | :param resultPath: 263 | :param fundName: 264 | :return: 265 | ''' 266 | fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(weekFundPlotDf) 267 | tempReturn = (fundPlotFormatDf - fundPlotFormatDf.shift(1)) / fundPlotFormatDf.shift(1) 268 | tempReturn.fillna(0, inplace=True) 269 | accReturn = (1 + tempReturn).cumprod() - 1 270 | 271 | plt.style.use('ggplot') 272 | fig1 = plt.figure(figsize=(16, 9)) 273 | ax1 = fig1.add_subplot(111) 274 | accReturn.plot(ax=ax1) 275 | ax1.grid() 276 | ax1.set_xlabel('时间') 277 | ax1.set_ylabel('收益率') 278 | ax1.set_title('周度累计收益走势图') 279 | plt.savefig(resultPath + '周度累计收益走势图.png') 280 | 281 | plt.style.use('ggplot') 282 | fig2 = plt.figure(figsize=(16, 9)) 283 | ax2 = fig2.add_subplot(111) 284 | tempSeUp = tempReturn[fundName].copy() 285 | tempSeUp[tempSeUp < 0] = np.nan 286 | tempSeUp.name = '正收益' 287 | tempSeDown = tempReturn[fundName].copy() 288 | tempSeDown[tempSeDown > 0] = np.nan 289 | tempSeDown.name = '负收益' 290 | tempDf = pd.concat([tempSeDown, tempSeUp], axis=1) 291 | tempDf.plot(kind='hist', ax=ax2, bins=20) 292 | ax2.grid() 293 | ax2.set_xlabel('周度收益率') 294 | ax2.set_ylabel('频率') 295 | ax2.set_title('周度收益率分布图') 296 | plt.savefig(resultPath + '周度收益率分布图.png') 297 | 298 | tempSe = tempReturn[fundName].copy() 299 | lossRate = len(tempSe[tempSe < 0]) / len(tempSe) 300 | successRate = 1 - lossRate 301 | tempPieSe = pd.Series([lossRate, successRate], index=['负交易周', '正交易周'], name='') 302 | plt.style.use('ggplot') 303 | fig3 = plt.figure(figsize=(16, 9)) 304 | ax3 = fig3.add_subplot(111) 305 | ax3.set_title('周度交易胜负情况') 306 | tempPieSe.plot(kind='pie', autopct='%.2f%%', ax=ax3) 307 | plt.savefig(resultPath + '周度盈亏状况饼形图.png') 308 | 309 | def plotMonthNetValueFigure(self, monthFundPlotDf, resultPath, fundName): 310 | # fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(monthFundPlotDf) 311 | tempReturn = (monthFundPlotDf - monthFundPlotDf.shift(1)) / monthFundPlotDf.shift(1) 312 | tempReturn.fillna(0, inplace=True) 313 | plt.style.use('ggplot') 314 | fig1 = plt.figure(figsize=(16, 9)) 315 | ax1 = fig1.add_subplot(111) 316 | tempReturn.plot(kind='bar', ax=ax1) 317 | ax1.grid() 318 | ax1.set_xlabel('时间') 319 | ax1.set_ylabel('收益率') 320 | ax1.set_title('月度收益率表现') 321 | plt.savefig(resultPath + '月度收益率表现图.png') 322 | # plt.show() 323 | 324 | def calcMonteCarlo(self,initValue, tradeDayList, mu, sigma,calcTimes=10000): 325 | ''' 326 | 蒙特卡洛算法 327 | :param initValue: 起始值 328 | :param days: 持有期 329 | :param mu: 收益率均值 330 | :param sigma: 收益率标准差 331 | :return: 332 | ''' 333 | days = len(tradeDayList) 334 | dt = 1 / days 335 | dfList=[] 336 | for calcTime in range(calcTimes): 337 | price = np.zeros(days) 338 | price[0] = initValue 339 | # Schok and Drift 340 | shock = np.zeros(days) 341 | drift = np.zeros(days) 342 | 343 | # Run price array for number of days 344 | for x in range(1, days): 345 | # Calculate Schock 346 | shock[x] = np.random.normal(loc=mu*dt, scale=sigma * np.sqrt(dt)) 347 | # Calculate Drift 348 | drift[x] = mu * dt 349 | # Calculate Price 350 | price[x] = price[x - 1] + (price[x - 1] * (drift[x] + shock[x])) 351 | dfList.append(pd.Series(price,index=tradeDayList)) 352 | resultDf = pd.concat(dfList,axis=1) 353 | return resultDf 354 | 355 | def getMentoCaloForecast(self,fundPlotDf, resultPath, tradeDayList,fundName): 356 | fundPlotFormatDf = self.DateFormatDfDemo.getStrToDate(fundPlotDf) 357 | tempReturn = (fundPlotFormatDf - fundPlotFormatDf.shift(1)) / fundPlotFormatDf.shift(1) 358 | tempReturn.fillna(0, inplace=True) 359 | mu = tempReturn[fundName].mean() 360 | sigma = tempReturn[fundName].std() 361 | initValue = fundPlotDf.iloc[-1][fundName] 362 | 363 | resultDf = self.calcMonteCarlo(initValue=initValue,tradeDayList=tradeDayList,mu=mu,sigma=sigma) 364 | dicResult = {} 365 | dicResult['悲观'] = resultDf.quantile(0.25,axis=1) 366 | dicResult['中性'] = resultDf.quantile(0.5, axis=1) 367 | dicResult['乐观'] = resultDf.quantile(0.75, axis=1) 368 | forcastDf = pd.DataFrame(dicResult) 369 | 370 | dfDic = {} 371 | for colName in forcastDf.columns: 372 | tempInitSe = forcastDf[colName].copy() 373 | tempDf = pd.concat([fundPlotDf[fundName],tempInitSe],axis=0) 374 | tempDf = tempDf.drop_duplicates().sort_index() 375 | dfDic[colName] = tempDf 376 | dfDic[fundName] = fundPlotDf[fundName] 377 | resultFinalDf = pd.DataFrame(dfDic) 378 | resultFinalDf = self.DateFormatDfDemo.getStrToDate(resultFinalDf) 379 | 380 | plt.style.use('ggplot') 381 | fig1 = plt.figure(figsize=(16, 9)) 382 | ax1 = fig1.add_subplot(111) 383 | resultFinalDf.plot(ax=ax1) 384 | ax1.grid() 385 | ax1.set_xlabel('时间') 386 | ax1.set_ylabel('收益率') 387 | ax1.set_title('模拟净值走势图') 388 | plt.savefig(resultPath + '模拟净值走势图.png') 389 | 390 | forcastLast = resultDf.iloc[-1] 391 | plt.style.use('ggplot') 392 | fig2 = plt.figure(figsize=(16, 9)) 393 | ax2 = fig2.add_subplot(111) 394 | forcastLast.plot(ax=ax2,kind='hist',bins=50) 395 | ax2.grid() 396 | ax2.set_xlabel('时间') 397 | ax2.set_ylabel('频率') 398 | ax2.set_title('10000次模拟的三个月后净值变动频数分布图') 399 | plt.savefig(resultPath + '净值变动频数分布图.png') 400 | # plt.show() 401 | 402 | forcastRate = forcastLast/initValue-1 403 | condition1 = len(forcastRate[forcastRate<=-0.01])/len(forcastRate) 404 | condition2 = len(forcastRate[(forcastRate <= -0.005)&(-0.01 0.01]) / len(forcastRate) 409 | indexList = ['<-1.0%','-1.0%~-0.5%','-0.5%~-0.0%','0.0%~0.5%','0.5%~1.0%','>1.0%'] 410 | rateProSe = pd.Series([condition1,condition2,condition3,condition4,condition5,condition6],index=indexList,name='概率') 411 | rateProSe.index.name='收益率区间' 412 | rateProSe.to_excel(resultPath + '预测收益率概率表.xlsx') 413 | 414 | if __name__ == '__main__': 415 | CalcRiskReturnDemo = CalcRiskReturn() 416 | CalcRiskReturn.calcRiskReturn() 417 | -------------------------------------------------------------------------------- /DataToMySql/DataDic.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | ''' 5 | 估值表数据字典，用于解析不同估值表格式后，对应到本地mysql数据库的字段 6 | ''' 7 | 8 | class DataDic: 9 | def __init__(self): 10 | pass 11 | 12 | def getDataDic(self,nameList=[]): 13 | dicResult = {} 14 | dicTotalResult = self.getTotalDataDic() 15 | for nameStr in nameList: 16 | dicResult[nameStr] = dicTotalResult[nameStr] 17 | return dicResult 18 | 19 | def getTotalDataDic(self): 20 | dicResult = {} 21 | dicResult['基金代码'] = 'fund_code' 22 | dicResult['基金名称'] = 'fund_name' 23 | dicResult['单位净值']='net_value' 24 | dicResult['基金单位净值'] = 'net_value' 25 | 26 | dicResult['昨日单位净值'] = 'pre_net_value' 27 | dicResult['累计单位净值'] = 'acc_net_value' 28 | dicResult['累计净值'] = 'acc_net_value' 29 | dicResult['日净值增长率'] = 'rate_net_value' 30 | dicResult['净值日增长率(比)'] = 'rate_net_value' 31 | 32 | dicResult['数据日期'] = 'update_time' 33 | 34 | dicResult['科目代码'] = 'style_code' 35 | dicResult['科目名称'] = 'style_name' 36 | dicResult['成本'] = 'cost' 37 | dicResult['成本占比'] = 'cost_rate' 38 | dicResult['成本占净值比'] = 'cost_rate' 39 | 40 | 41 | dicResult['市值'] = 'market_value' 42 | dicResult['市值占比'] = 'market_value_rate' 43 | dicResult['市值占净值比'] = 'market_value_rate' 44 | 45 | 46 | dicResult['数量'] = 'quantity' 47 | dicResult['单位成本'] = 'unit_cost' 48 | dicResult['行情'] = 'close_price' 49 | dicResult['市价'] = 'close_price' 50 | dicResult['估值增值'] = 'estimate_change' 51 | dicResult['停牌信息'] = 'trade_flag' 52 | 53 | return dicResult 54 | 55 | if __name__=='__main__': 56 | DataDicDemo = DataDic() 57 | DataDic.getTotalDataDic() -------------------------------------------------------------------------------- /DataToMySql/GetDataToMysql.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | ''' 6 | 获取数据（excel,wind)存入本地相应数据库表中， 7 | 每张表构建索性，数据存在时，更新，不存在时，插入 8 | ''' 9 | import pandas as pd 10 | from DataToMySql.MysqlCon import MysqlCon 11 | import numpy as np 12 | 13 | class GetDataToMysql: 14 | def __init__(self): 15 | self.conn = MysqlCon().getMysqlCon(flag='connect') 16 | 17 | def GetMain(self,dataDf,tableName): 18 | # 插入数据语句 19 | tableList = dataDf.columns.tolist() 20 | strFormat='%s,'*len(tableList) 21 | sqlStr = "replace into %s(%s)"%(tableName,','.join(tableList))+"VALUES(%s)"%strFormat[:-1] 22 | 23 | cursor = self.conn.cursor() 24 | # dataDf.replace(np.nan,None) 25 | try: 26 | dataDf[dataDf.isnull()] = None 27 | except: 28 | a=0 29 | for r in range(0, len(dataDf)): 30 | values = tuple(dataDf.ix[r, tableList].tolist()) 31 | try: 32 | cursor.execute(sqlStr, values) 33 | except: 34 | a=0 35 | cursor.close() 36 | self.conn.commit() 37 | 38 | 39 | if __name__=="__main__": 40 | GetDataToMysqlDemo = GetDataToMysql() 41 | GetDataToMysqlDemo.GetMain() -------------------------------------------------------------------------------- /DataToMySql/MysqlCon.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | from configparser import ConfigParser 5 | import pymysql 6 | from sqlalchemy import create_engine 7 | import numpy as np 8 | import os 9 | 10 | class MysqlCon: 11 | def __init__(self): 12 | pass 13 | 14 | def getMysqlCon(self,flag='connect'): 15 | ConfigParserDemo = ConfigParser() 16 | try: 17 | ConfigParserDemo.read('mysql.conf') 18 | db_port = ConfigParserDemo.getint('db', 'db_port') 19 | except: 20 | nextPath = os.getcwd()+r'\\DataToMySql\\' 21 | ConfigParserDemo.read(nextPath+'mysql.conf') 22 | db_port = ConfigParserDemo.getint('db', 'db_port') 23 | db_user = ConfigParserDemo.get('db', 'db_user') 24 | db_pass = ConfigParserDemo.get('db', 'db_pass') 25 | db_host = ConfigParserDemo.get('db', 'db_host') 26 | db_database = ConfigParserDemo.get('db', 'db_database') 27 | pymysql.converters.encoders[np.float64] = pymysql.converters.escape_float 28 | pymysql.converters.conversions = pymysql.converters.encoders.copy() 29 | pymysql.converters.conversions.update(pymysql.converters.decoders) 30 | if flag=='connect': 31 | engine = pymysql.connect(host=db_host, user=db_user, passwd=db_pass, db=db_database, port=db_port) 32 | 33 | elif flag=='engine': 34 | sqlConStr = "mysql+pymysql://%s:%s@%s:%s/%s?charset=utf8"%(db_user,db_pass,db_host,db_port,db_database) 35 | engine = create_engine(sqlConStr) 36 | return engine 37 | 38 | if __name__=='__main__': 39 | MysqlConDemo = MysqlCon() 40 | MysqlConDemo.getMysqlCon() -------------------------------------------------------------------------------- /DataToMySql/mysql.conf: -------------------------------------------------------------------------------- 1 | [db] 2 | db_port=3306 3 | db_user = root 4 | db_pass = 123456 5 | db_host = localhost 6 | db_database = fund_est -------------------------------------------------------------------------------- /DateFormatDf.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | ''' 6 | 对时间序列的DataFrame格式转换 7 | ''' 8 | 9 | import pandas as pd 10 | import numpy as np 11 | from datetime import datetime 12 | 13 | class DateFormatDf: 14 | def __init__(self): 15 | pass 16 | 17 | def getStrToDate(self,tempDf,flag=1): 18 | ''' 19 | flag=1:'2019-01-01' 20 | flag=2:'20190101' 21 | :param tempDf: 22 | :param flag: 23 | :return: 24 | ''' 25 | dateList = tempDf.index.tolist() 26 | if flag==1: 27 | dateNewList = [datetime.strptime(dateStr,"%Y-%m-%d") for dateStr in dateList] 28 | elif flag==2: 29 | dateTempList = [dateStr[:4]+'-'+dateStr[4:6]+'-'+dateStr[6:] for dateStr in dateList] 30 | dateNewList = [datetime.strptime(dateStr, "%Y-%m-%d") for dateStr in dateTempList] 31 | resultDf = pd.DataFrame(tempDf.values,index=dateNewList,columns=tempDf.columns) 32 | return resultDf 33 | 34 | if __name__=='__main__': 35 | DateFormatDfDemo = DateFormatDf() 36 | DateFormatDfDemo.getStrToDate() -------------------------------------------------------------------------------- /EstimateValue.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | import pandas as pd 6 | import numpy as np 7 | from CalcRiskReturn import CalcRiskReturn 8 | import os 9 | from datetime import datetime, timedelta 10 | from CalcRegression import CalcRegression 11 | from FamaFrenchRegression import FamaFrenchRegression 12 | from JudgeText import JudgeText 13 | import mylog as mylog 14 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 15 | 16 | import warnings 17 | 18 | warnings.filterwarnings('ignore') 19 | 20 | 21 | class EstimateValue: 22 | def __init__(self, dicParam): 23 | self.fundCode = dicParam['fundCode'] 24 | self.netValuePeriod = dicParam.get('netValuePeriod', '') 25 | self.startDate = dicParam.get('startDate', '2014-06-17') 26 | endDate = datetime.today().strftime("%Y-%m-%d") 27 | self.endDate = dicParam.get('endDate', endDate) 28 | self.indexNameDic = {'000300.SH': '沪深300', '000852.SH': '中证1000', } 29 | # self.indexNameDic = {'000300.SH': '沪深300',} 30 | self.totalIndexName = list(self.indexNameDic.values()) 31 | self.logger = mylog.set_log() 32 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind') 33 | self.DCIndex = dicParam['DCIndex'] 34 | 35 | def getNetValueDataDic(self,fundName): 36 | # indexCodeList = ['000300.SH', '000016.SH', '000905.SH', '000906.SH'] 37 | indexCodeList = ['000300.SH'] 38 | dicResult = {} 39 | self.logger.info("获取基金净值数据...") 40 | netValuedf = self.GetDataTotalMainDemo.get_hq_data(code=self.fundCode, start_date=self.startDate, 41 | end_date=self.endDate, code_style='fund', 42 | name_list=['net_value_adj']) 43 | self.logger.info("基金净值数据获取成功！") 44 | # self.fundName = netValuedf['fund_name'].unique()[0] 45 | self.fundName = fundName 46 | dicResult['fundName'] = fundName 47 | dicResult['netValuedf'] = netValuedf 48 | 49 | self.logger.info("获取大盘指数数据...") 50 | startDate = netValuedf.index.tolist()[0] 51 | endDate = netValuedf.index.tolist()[-1] 52 | 53 | dfIndexList = [] 54 | dfVolumeList = [] 55 | for indexCode in indexCodeList: 56 | indexDf = self.GetDataTotalMainDemo.get_hq_data(code=indexCode,start_date=startDate,end_date=endDate) 57 | indexDf.rename(columns={'close_price': indexCode}, inplace=True) 58 | dfIndexList.append(indexDf) 59 | 60 | indexDf = self.GetDataTotalMainDemo.get_hq_data(code=indexCode, start_date=startDate, 61 | end_date=endDate, name_list=['volume']) 62 | indexDf.rename(columns={'volume': indexCode}, inplace=True) 63 | dfVolumeList.append(indexDf) 64 | 65 | dicResult['DCIndexDf'] = pd.DataFrame() 66 | if self.DCIndex: 67 | DCIndexDf = self.GetDataTotalMainDemo.get_hq_data(code=self.DCIndex,start_date=startDate,end_date=endDate) 68 | DCIndexDf.rename(columns={'close_price': self.DCIndex}, inplace=True) 69 | dicResult['DCIndexDf'] = DCIndexDf 70 | 71 | self.logger.info("获取大盘指数数据成功！") 72 | totalIndexDf = pd.concat(dfIndexList, axis=1) 73 | totalVolumeDf = pd.concat(dfVolumeList, axis=1) 74 | dicResult['indexDf'] = totalIndexDf 75 | dicResult['totalVolumeDf'] = totalVolumeDf 76 | 77 | # 行业指数 78 | industryList = ['801210.SI', '801050.SI', '801140.SI', '801020.SI', '801170.SI', '801030.SI', '801150.SI', 79 | '801010.SI', '801200.SI', '801230.SI', '801770.SI', '801730.SI', \ 80 | '801130.SI', '801880.SI', '801180.SI', '801160.SI', '801780.SI', '801890.SI', '801080.SI', 81 | '801760.SI', '801790.SI', '801710.SI', '801740.SI', '801720.SI', \ 82 | '801750.SI', '801110.SI', '801040.SI', '801120.SI'] 83 | industryLabel = ['休闲服务', '有色金属', '轻工制造', '采掘', '交通运输', '化工', '医药生物', '农林牧渔', '商业贸易', '综合', '通信', '电气设备', '纺织服装', 84 | '汽车', '房地产', '公用事业', \ 85 | '银行', '机械设备', '电子', '传媒', '非银金融', '建筑材料', '国防军工', '建筑装饰', '计算机', '家用电器', '钢铁', '食品饮料'] 86 | industryDic = {industryCode: industryName for industryCode, industryName in zip(industryList, industryLabel)} 87 | dfIndestryList = [] 88 | self.logger.info("获取申万一级行业指数数据...") 89 | for indexCode in industryList: 90 | industryDf = self.GetDataTotalMainDemo.get_hq_data(code=indexCode, start_date=startDate, 91 | end_date=endDate) 92 | industryDf.rename(columns={'close_price': indexCode}, inplace=True) 93 | dfIndestryList.append(industryDf) 94 | 95 | totalIndustryDf = pd.concat(dfIndestryList, axis=1) 96 | dicResult['totalIndustryDf'] = totalIndustryDf 97 | dicResult['industryDic'] = industryDic 98 | self.logger.info("获取申万一级行业指数数据成功！") 99 | 100 | # 风格指数 101 | styleList = ['801863.SI', '801822.SI', '801813.SI', '801831.SI', '801812.SI', '801821.SI', '801852.SI', 102 | '801842.SI', '801843.SI', '801832.SI', '801851.SI', \ 103 | '801853.SI', '801841.SI', '801833.SI', '801823.SI', '801811.SI'] 104 | styleLabel = ['新股指数', '中市盈率指数', '小盘指数', '高市净率指数', '中盘指数', '高市盈率指数', '微利股指数', '中价股指数', '低价股指数', '中市净率指数', 105 | '亏损股指数', '绩优股指数', '高价股指数', '低市净率指数', '低市盈率指数', '大盘指数'] 106 | styleDic = {sylteCode: styleName for sylteCode, styleName in zip(styleList, styleLabel)} 107 | dfStyleList = [] 108 | self.logger.info("获取风格指数数据...") 109 | for indexCode in styleList: 110 | styleDf = self.GetDataTotalMainDemo.get_hq_data(code=indexCode, start_date=startDate, 111 | end_date=endDate) 112 | styleDf.rename(columns={'close_price': indexCode}, inplace=True) 113 | dfStyleList.append(styleDf) 114 | totalStyleDf = pd.concat(dfStyleList, axis=1) 115 | dicResult['totalStyleDf'] = totalStyleDf 116 | dicResult['styleDic'] = styleDic 117 | self.logger.info("获取风格指数数据成功") 118 | return dicResult 119 | 120 | def getRiskFree(self): 121 | if self.netValuePeriod == 'W': 122 | riskFree = 0.02 / 52 123 | else: 124 | riskFree = 0.02 / 250 125 | return riskFree 126 | 127 | def calcAndPlotSaveRiskReturn(self, dicNetValueResult, resultPath): 128 | ''' 129 | 计算并保存指定周期的风险收益指标 130 | 绘图 131 | :param dicNetValueResult: 132 | :return: 133 | ''' 134 | fundIndexDf = pd.concat([dicNetValueResult['netValuedf']['net_value_adj'], dicNetValueResult['indexDf']], 135 | axis=1, join='inner') 136 | fundIndexDf.rename(columns={'net_value_adj': dicNetValueResult['fundName']}, inplace=True) 137 | fundPlotDf = fundIndexDf.rename(columns=self.indexNameDic) 138 | 139 | CalcRiskReturnDemo = CalcRiskReturn() 140 | self.logger.info("计算日频数据相关结论...") 141 | CalcRiskReturnDemo.calcRiskReturn(fundPlotDf, resultPath) 142 | marketVolume = dicNetValueResult['totalVolumeDf'] 143 | CalcRiskReturnDemo.plotDayNetValueFigure(fundPlotDf, resultPath, fundName=self.fundName, 144 | netPeriod=self.netValuePeriod, marketVolume=marketVolume) 145 | 146 | startDate = fundPlotDf.index.tolist()[-1] 147 | startDate = datetime.strptime(startDate, "%Y-%m-%d") 148 | endDate = startDate + timedelta(days=31 * 3) 149 | 150 | tradeDayList = self.GetDataTotalMainDemo.get_tradeday(start_date=startDate,end_date=endDate,period=self.netValuePeriod) 151 | CalcRiskReturnDemo.getMentoCaloForecast(fundPlotDf, resultPath, tradeDayList, fundName=self.fundName) 152 | 153 | self.logger.info("计算周频数据相关结论...") 154 | tradeWeekList = self.GetDataTotalMainDemo.get_tradeday(start_date=fundPlotDf.index.tolist()[0], end_date=fundPlotDf.index.tolist()[-1], 155 | period='W') 156 | weekFundPlotDf = fundPlotDf.loc[tradeWeekList].dropna(axis=0) 157 | CalcRiskReturnDemo.plotWeekNetValueFigure(weekFundPlotDf, resultPath, fundName=self.fundName) 158 | CalcRiskReturnDemo.calcWeekNetValueResult(weekFundPlotDf, resultPath, fundName=self.fundName) 159 | 160 | self.logger.info("计算月频数据相关结论...") 161 | tradeMonthList = self.GetDataTotalMainDemo.get_tradeday(start_date=fundPlotDf.index.tolist()[0], end_date=fundPlotDf.index.tolist()[-1], 162 | period='M') 163 | monthFundPlotDf = fundPlotDf.loc[tradeMonthList].dropna(axis=0) 164 | CalcRiskReturnDemo.plotMonthNetValueFigure(monthFundPlotDf, resultPath, fundName=self.fundName) 165 | 166 | targetDf = fundPlotDf.copy() 167 | targetDf['无风险利率'] = self.getRiskFree() 168 | CalcRegressionDemo = CalcRegression() 169 | self.logger.info("计算选股，择时能力相关结论...") 170 | 171 | 172 | CalcRegressionDemo.getSelectStockAndTime(targetDf, resultPath, fundName=self.fundName, 173 | netPeriod=self.netValuePeriod, benchMark=list(self.indexNameDic.values())[0],DCIndexDf=dicNetValueResult['DCIndexDf']) 174 | 175 | self.logger.info("计算行业，风格回归相关结论...") 176 | fundIndustryDf = pd.concat( 177 | [dicNetValueResult['netValuedf']['net_value_adj'], dicNetValueResult['totalIndustryDf']], 178 | axis=1, join='inner') 179 | fundIndustryDf.rename(columns={'net_value_adj': dicNetValueResult['fundName']}, inplace=True) 180 | fundIndustryDf['无风险利率'] = self.getRiskFree() 181 | CalcRegressionDemo.getIndustryRegression(fundIndustryDf, resultPath, fundName=self.fundName, 182 | industryDic=dicNetValueResult['industryDic'],bench_return=dicNetValueResult['indexDf']) 183 | 184 | fundIndustryDf = pd.concat( 185 | [dicNetValueResult['netValuedf']['net_value_adj'], dicNetValueResult['totalStyleDf']], 186 | axis=1, join='inner') 187 | fundIndustryDf.rename(columns={'net_value_adj': dicNetValueResult['fundName']}, inplace=True) 188 | fundIndustryDf['无风险利率'] = self.getRiskFree() 189 | CalcRegressionDemo.getStyleRegression(fundIndustryDf, resultPath, fundName=self.fundName, 190 | industryDic=dicNetValueResult['styleDic'],DCIndexDf=dicNetValueResult['DCIndexDf']) 191 | 192 | def getSavePath(self): 193 | ''' 194 | 获取保存产品分析结果的路径 195 | :return: 196 | ''' 197 | totalFileList = os.listdir(os.getcwd() + r"\\分析结果\\") 198 | if self.fundName not in totalFileList: 199 | os.mkdir(path=os.getcwd() + r"\\分析结果\\%s\\" % self.fundName) 200 | resultPath = os.getcwd() + r"\\分析结果\\%s\\" % self.fundName 201 | return resultPath 202 | 203 | def getMain(self,fundName='缺基金名称'): 204 | dicNetValueResult = self.getNetValueDataDic(fundName=fundName) # 获取产品净值数据和指数数据 205 | resultPath = self.getSavePath() # 创建分析结果保存文件路径 206 | # 207 | # FamaFrenchRegressionDemo = FamaFrenchRegression() 208 | # FamaFrenchRegressionDemo.calcResult(resultPath,dicNetValueResult['totalIndustryDf'],dicNetValueResult['industryDic']) 209 | # FamaFrenchRegressionDemo.calcMain(closePriceSe=dicNetValueResult['netValuedf']['net_value_adj'],resultPath=resultPath) 210 | 211 | self.calcAndPlotSaveRiskReturn(dicNetValueResult, resultPath) # 净值类统计结果，按统计周期分析与保存 212 | JudgeTextDemo = JudgeText() 213 | JudgeTextDemo.getNetJudgeText(fundCode=self.fundCode, fundName=self.fundName, 214 | totalIndexName=self.totalIndexName) 215 | self.logger.info("计算完成！") 216 | 217 | 218 | if __name__ == '__main__': 219 | # 乐道S60034 宽远S35529,000409.OF 220 | 221 | nameDic = {'费曼一号（增强IE500）': 'SEP131', '华量锐天1号（T0对冲IE500）': 'SW7742', '阿尔法对冲': 'SK7720'} 222 | # codeList = ['SS2221', 'SY3702'] 223 | codeList = ['519062.OF'] 224 | 225 | for fundcode in codeList: 226 | print(fundcode) 227 | dicParam = {} 228 | dicParam['fundCode'] = fundcode # 基金代码 229 | dicParam['netValuePeriod'] = 'D' # 净值披露频率 230 | dicParam['startDate'] = '2014-11-30' 231 | dicParam['DCIndex'] = '' #对冲类产品，默认为空；非空时为对冲的指数代码 232 | 233 | EstimateValueDemo = EstimateValue(dicParam=dicParam) 234 | EstimateValueDemo.getMain(fundName='海富通阿尔法对冲') 235 | -------------------------------------------------------------------------------- /FamaFrenchRegression.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | ''' 5 | fama french 三因素回归分析 6 | ''' 7 | 8 | import pandas as pd 9 | import numpy as np 10 | from GetDataFromWindAndMySql import GetDataFromWindAndMySql 11 | from PrintInfo import PrintInfo 12 | import time 13 | 14 | class FamaFrenchRegression: 15 | def __init__(self): 16 | self.GetDataFromWindAndMySqlDemo = GetDataFromWindAndMySql() 17 | self.PrintInfoDemo = PrintInfo() 18 | 19 | def getFacrotReturn(self,resultPath,dateList, indexCode): 20 | totalCodeSet = set({}) 21 | dicTempResult = {} 22 | self.PrintInfoDemo.PrintLog("获取宽基指数成分股，并计算产品起止日期内规模因子，账面市值因子的收益" ) 23 | self.PrintInfoDemo.PrintLog("为减少接口频繁请求成分股数据导致掉线，这里每次调用后采用睡眠函数，间隔0.2秒..") 24 | for dateStr in dateList: 25 | universeDf = self.GetDataFromWindAndMySqlDemo.getIndexConstituent(indexCode=indexCode, getDate=dateStr) 26 | totalCodeSet = totalCodeSet.union(universeDf['stock_code'].to_dict().values()) 27 | tempStockDf = self.GetDataFromWindAndMySqlDemo.getCurrentDateData( 28 | tempCodeList=universeDf['stock_code'].tolist(), getDate=dateStr, tableFlag='stock', 29 | nameList=['close_price', 'market_value', 'pb_lf']) 30 | if tempStockDf.empty: 31 | continue 32 | 33 | dicTempResult[dateStr] = {} 34 | ME30 = np.percentile(tempStockDf['market_value'], 30) 35 | ME70 = np.percentile(tempStockDf['market_value'], 70) 36 | SM = tempStockDf[tempStockDf['market_value'] <= ME30].index.tolist() 37 | BM = tempStockDf[tempStockDf['market_value'] > ME70].index.tolist() 38 | 39 | BP = tempStockDf[tempStockDf > 0].dropna() 40 | BP[['pb_lf']] = 1 / BP[['pb_lf']] 41 | BP30 = np.percentile(BP['pb_lf'], 30) 42 | BP70 = np.percentile(BP['pb_lf'], 70) 43 | LP = BP[BP['pb_lf'] <= BP30].index.tolist() 44 | HP = BP[BP['pb_lf'] > BP70].index.tolist() 45 | dicTempResult[dateStr]['SM'] = SM 46 | dicTempResult[dateStr]['BM'] = BM 47 | dicTempResult[dateStr]['LP'] = LP 48 | dicTempResult[dateStr]['HP'] = HP 49 | time.sleep(0.2) 50 | self.PrintInfoDemo.PrintLog("产品起止日期内规模因子，账面市值因子的收益计算完成") 51 | self.PrintInfoDemo.PrintLog("批量获取产品起止日期内的所有成分股行情数据...") 52 | totalStockCloseDf = self.GetDataFromWindAndMySqlDemo.getCurrentNameData(tempCodeList=list(totalCodeSet), 53 | startDate=dateList[0], 54 | endDate=dateList[-1], tableFlag='stock', 55 | nameStr='close_price') 56 | 57 | self.PrintInfoDemo.PrintLog("产品起止日期内的所有成分股行情数据获取完成！") 58 | dateSort = sorted(dicTempResult.items(), key=lambda x: x[0], reverse=False) 59 | dicResult = {} 60 | for num in range(1, len(dateSort)): 61 | dateStr = dateSort[num][0] 62 | preDateStr = dateSort[num - 1][0] 63 | dicCodeList = dateSort[num][1] 64 | dicResult[dateStr] = {} 65 | SMReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['SM']] - totalStockCloseDf.ix[preDateStr, dicCodeList['SM']]) / \ 66 | totalStockCloseDf.ix[preDateStr, dicCodeList['SM']] 67 | SMMeanReturn = SMReturn.mean() 68 | 69 | BMReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['BM']] - totalStockCloseDf.ix[ 70 | preDateStr, dicCodeList['BM']]) / \ 71 | totalStockCloseDf.ix[preDateStr, dicCodeList['BM']] 72 | BMMeanReturn = BMReturn.mean() 73 | 74 | LPReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['LP']] - totalStockCloseDf.ix[ 75 | preDateStr, dicCodeList['LP']]) / \ 76 | totalStockCloseDf.ix[preDateStr, dicCodeList['LP']] 77 | LPMeanReturn = LPReturn.mean() 78 | 79 | HPReturn = (totalStockCloseDf.ix[dateStr, dicCodeList['HP']] - totalStockCloseDf.ix[ 80 | preDateStr, dicCodeList['HP']]) / \ 81 | totalStockCloseDf.ix[preDateStr, dicCodeList['HP']] 82 | HPMeanReturn = HPReturn.mean() 83 | dicResult[dateStr]['SMB'] = SMMeanReturn-BMMeanReturn 84 | dicResult[dateStr]['HML'] = LPMeanReturn - HPMeanReturn 85 | 86 | resultDf = pd.DataFrame(dicResult).T 87 | resultDf.to_excel(resultPath+'规模因子账面市值因子（%s成分股）.xlsx'%indexCode) 88 | self.PrintInfoDemo.PrintLog("产品起止日期内的SMB,HML收益率计算完成，存入本地！") 89 | return resultDf 90 | 91 | def calcMain(self, closePriceSe, resultPath,indexCode='000016.SH',): 92 | self.PrintInfoDemo.PrintLog("开始计算fama-french三因子模型,采用的宽基指数为%s"%indexCode) 93 | tempReturn = (closePriceSe - closePriceSe.shift(1)) / closePriceSe.shift(1) 94 | tempReturn.name = closePriceSe.name 95 | dateList = tempReturn.index.tolist() 96 | factorReturnDf = self.getFacrotReturn(resultPath,dateList=dateList, indexCode=indexCode) 97 | calcRusultDf = pd.concat([factorReturnDf,tempReturn],axis=1,join='inner') 98 | calcRusultDf.to_excel(resultPath+'三因子样本数据.xlsx') 99 | 100 | 101 | if __name__ == '__main__': 102 | FamaFrenchRegressionDemo = FamaFrenchRegression() 103 | FamaFrenchRegressionDemo.calcMain() 104 | -------------------------------------------------------------------------------- /GetAndSaveWindData/ClientWindIfindInit.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | ''' 6 | wind/ifind 账号登录初始化 7 | ''' 8 | 9 | import mylog as mylog 10 | from WindPy import * 11 | import pandas as pd 12 | from GetAndSaveWindData.MysqlCon import MysqlCon 13 | from iFinDPy import * 14 | 15 | class ClientWindIfindInit: 16 | def __init__(self,data_source='ifind'): 17 | self.logger = mylog.logger 18 | 19 | def log_init(self, data_resource='ifind'): 20 | ''' 21 | 登录客户端初始化 22 | :param data_resource: 23 | :return: 24 | ''' 25 | flag = True 26 | if data_resource == 'ifind': 27 | log_state = THS_iFinDLogin('zszq5072', '754628') 28 | if log_state == 0: 29 | self.logger.info("同花顺账号登录成功！") 30 | else: 31 | self.logger.error("同花顺账号登录异常，请检查！") 32 | flag = False 33 | elif data_resource == 'wind': 34 | w.start() 35 | return flag -------------------------------------------------------------------------------- /GetAndSaveWindData/GetDataFromWindNNotMysql.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 获取wind数据，不保存mysql,部分非常规的书 5 | ''' 6 | 7 | # from WindPy import w 8 | import pandas as pd 9 | from iFinDPy import * 10 | import mylog as mylog 11 | import WindPy as Wind 12 | 13 | 14 | class GetDataFromWindNotMysql: 15 | def __init__(self, data_resource='ifind'): 16 | self.logger = mylog.set_log() 17 | self.dic_init = {} 18 | self.dic_init['data_resource'] = data_resource 19 | self.dic_init['data_init_flag'] = self.log_init(data_resource) 20 | 21 | def log_init(self, data_resource='ifind'): 22 | ''' 23 | 登录客户端初始化 24 | :param data_resource: 25 | :return: 26 | ''' 27 | flag = True 28 | if data_resource == 'ifind': 29 | log_state = THS_iFinDLogin('zszq5072', '754628') 30 | if log_state == 0: 31 | self.logger.info("同花顺账号登录成功！") 32 | else: 33 | self.logger.error("同花顺账号登录异常，请检查！") 34 | flag = False 35 | elif data_resource == 'wind': 36 | try: 37 | Wind.w.start() 38 | except: 39 | self.logger.info("wind启动失败") 40 | flag = False 41 | return flag 42 | 43 | def get_fund_stock_filed(self, start_date, end_date, fund_code=''): 44 | fund_df = pd.DataFrame() 45 | fileds = ['prt_topstockname', 'prt_topstockcode', 'prt_topstockvalue', 'prt_heavilyheldstocktostock', 46 | 'prt_heavilyheldstocktonav'] 47 | name_Dic = {'prt_topstockname'.upper(): '股票名称', 'prt_topstockcode'.upper(): '股票代码', 48 | 'prt_topstockvalue'.upper(): '持股市值', 'prt_heavilyheldstocktostock'.upper(): '市值占股票投资市值比', 49 | 'prt_heavilyheldstocktonav'.upper(): '市值占基金资产净值比'} 50 | df_list=[] 51 | for order_num in range(1,11): 52 | wsddata = Wind.w.wsd(codes=fund_code, fields=fileds, beginTime=start_date, endTime=end_date, 53 | options="order=%s;unit=1;Period=Q;Days=Alldays"%order_num) 54 | if wsddata.ErrorCode != 0: 55 | self.logger.error("获取重仓股数据有误，错误代码" + str(wsddata.ErrorCode)) 56 | continue 57 | temp_fund_df = pd.DataFrame(wsddata.Data, index=wsddata.Fields, columns=wsddata.Times).T 58 | temp_fund_df['重仓排名']= order_num 59 | df_list.append(temp_fund_df) 60 | if df_list: 61 | fund_df = pd.concat(df_list,axis=0,sort=True) 62 | fund_df.rename(columns=name_Dic,inplace=True) 63 | fund_df['披露日期'] = fund_df.index.tolist() 64 | fund_df.dropna(inplace=True) 65 | 66 | indus_list = [] 67 | for datestr,temp_df in fund_df.groupby(by='披露日期'): 68 | code_init=list(temp_df['股票代码'].tolist()) 69 | code_list = [] 70 | for code in code_init: 71 | if code[0]=='6': 72 | codestr=code+'.SH' 73 | elif code[0] in ['0','3']: 74 | codestr = code+'.SZ' 75 | code_list.append(codestr) 76 | # tradeDate = datestr[:4]+datestr[5:7]+datestr[8:10] 77 | tradeDate = datetime.strftime(datestr,"%Y%m%d") 78 | param_list = list(set(code_list)) 79 | wssdata = Wind.w.wss(codes=param_list,fields=['industry_citic'],options='tradeDate=%s;industryType=1'%tradeDate) 80 | if wssdata.ErrorCode != 0: 81 | self.logger.error("获取股票所属行业数据有误，错误代码" + str(wssdata.ErrorCode)) 82 | continue 83 | # temp_fund_df = pd.DataFrame(wssdata.Data, columns=wssdata.Codes, index=wssdata.Fields).T 84 | temp_se = pd.Series(wssdata.Data[0],index=wssdata.Codes,name='所属行业') 85 | indus_list = indus_list+[temp_se[code] for code in code_list] 86 | fund_df['所属行业'] = indus_list 87 | return fund_df 88 | 89 | def get_fund_filed(self, start_date, end_date, fund_code=''): 90 | ''' 91 | 基金季度数据， 92 | 基金份额，基金规模，股票资产占基金净资产比例 93 | ''' 94 | # total_date_list = w.tdays(start_date, end_date, "Days=Alldays;Period=Q") 95 | fund_df = pd.DataFrame() 96 | fileds = ['unit_fundshare_total', 'netasset_total', 'prt_stocktonav'] 97 | name_Dic = {'unit_fundshare_total'.upper(): '基金份额_万份', 'netasset_total'.upper(): '基金规模', 98 | 'prt_stocktonav'.upper(): '股票市值占基金资产净值比'} 99 | wsddata = Wind.w.wsd(codes=fund_code, fields=fileds, beginTime=start_date, endTime=end_date, 100 | options="unit=1;Period=Q;Days=Alldays") 101 | if wsddata.ErrorCode != 0: 102 | self.logger.error("获取全A股数据有误，错误代码" + str(wsddata.ErrorCode)) 103 | return fund_df 104 | fund_df = pd.DataFrame(wsddata.Data, index=wsddata.Fields, columns=wsddata.Times).T 105 | fund_df.rename(columns=name_Dic, inplace=True) 106 | return fund_df 107 | -------------------------------------------------------------------------------- /GetAndSaveWindData/GetDataToMysql.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | ''' 6 | 获取数据（excel,wind)存入本地相应数据库表中， 7 | 每张表构建索性，数据存在时，更新，不存在时，插入 8 | ''' 9 | import pandas as pd 10 | import numpy as np 11 | from GetAndSaveWindData.MysqlCon import MysqlCon 12 | import mylog as mylog 13 | 14 | # self.logger.basicConfig(format="%(asctime)s %(filename)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", 15 | # level=self.logger.DEBUG) 16 | 17 | class GetDataToMysql: 18 | def __init__(self): 19 | self.conn = MysqlCon().getMysqlCon(flag='connect') 20 | self.logger = mylog.set_log() 21 | 22 | def GetMain(self,dataDf,tableName,): 23 | # 插入数据语句 24 | tableList = dataDf.columns.tolist() 25 | strFormat='%s,'*len(tableList) 26 | sqlStr = "replace into %s(%s)"%(tableName,','.join(tableList))+"VALUES(%s)"%strFormat[:-1] 27 | 28 | dataDf = dataDf.astype(object).where(pd.notnull(dataDf), None) 29 | cursor = self.conn.cursor() 30 | 31 | for r in range(0, len(dataDf)): 32 | values = tuple(dataDf.iloc[r][tableList].tolist()) 33 | cursor.execute(sqlStr, values) 34 | 35 | cursor.close() 36 | self.conn.commit() 37 | self.logger.info("数据存入mysql成功！") 38 | 39 | 40 | if __name__=="__main__": 41 | GetDataToMysqlDemo = GetDataToMysql() 42 | GetDataToMysqlDemo.GetMain() -------------------------------------------------------------------------------- /GetAndSaveWindData/GetFundFinanceReportData.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 将wind/ifind的数据导入到本地数据库,并从数据库返回结果 5 | ''' 6 | 7 | import pandas as pd 8 | from GetAndSaveWindData.GetDataToMysql import GetDataToMysql 9 | import mylog as mylog 10 | import numpy as np 11 | from datetime import datetime 12 | 13 | 14 | 15 | class GetFundFinanceReportData: 16 | def __init__(self): 17 | self.logger = mylog.set_log() 18 | self.GetDataToMysqlDemo = GetDataToMysql() 19 | 20 | def get_fund_stock_info(self, third_conn, engine, total_date_list, fund_code='100053.OF'): 21 | rpt_date_str_list = [] 22 | for rpt_date in total_date_list: 23 | if rpt_date[-5:]=='06-30': 24 | name_str = rpt_date[:4]+'年中报' 25 | else: 26 | name_str = rpt_date[:4]+'年年报' 27 | rpt_date_str_list.append(name_str) 28 | sql_str = "select * from fund_contain_stock_detail where rpt_date in %s and fund_code='%s'" % ( 29 | str(tuple(rpt_date_str_list)), fund_code) 30 | result_df = pd.read_sql(sql=sql_str, con=engine) 31 | have_rpt_str_list = result_df['rpt_date'].tolist() 32 | lack_rpt_list = [rpt_date for rpt_date in rpt_date_str_list if rpt_date not in have_rpt_str_list] 33 | name_mysql_dic = {'sec_name': 'fund_name', 'marketvalueofstockholdings': 'market_value_of_stockholdings', 34 | 'proportiontototalstockinvestments': 'pro_total_stock_inve', 35 | 'proportiontonetvalue': 'pro_net_value', 36 | 'proportiontoshareholdtocirculation': 'pro_sharehold_cir'} 37 | if lack_rpt_list: 38 | temp_df_list = [] 39 | for lack_rpt in lack_rpt_list: 40 | lack_date = total_date_list[rpt_date_str_list.index(lack_rpt)] 41 | rptdate = ''.join(lack_date.split('-')) 42 | options = "rptdate=%s;windcode=%s" % (rptdate, fund_code) 43 | wset_data = third_conn.wset(tablename="allfundhelddetail", options=options) 44 | if wset_data.ErrorCode != 0: 45 | self.logger.error('wind获取基金持股明细数据错误，错误代码%s，请检查！' % wset_data.ErrorCode) 46 | return pd.DataFrame() 47 | temp_rpt_df = pd.DataFrame(wset_data.Data, index=wset_data.Fields, columns=wset_data.Codes).T 48 | if temp_rpt_df.empty: 49 | continue 50 | temp_rpt_df['fund_code'] = fund_code 51 | temp_rpt_df['record_time'] = datetime.today().strftime("%Y-%m-%d") 52 | temp_rpt_df.rename(columns=name_mysql_dic, inplace=True) 53 | self.GetDataToMysqlDemo.GetMain(temp_rpt_df, 'fund_contain_stock_detail') 54 | self.logger.info("存储%s,报告期%s持股数据成功！" % (fund_code, lack_rpt)) 55 | temp_df_list.append(temp_rpt_df) 56 | if temp_df_list: 57 | temp_df = pd.concat(temp_df_list, axis=0, sort=True) 58 | result_df = pd.concat([result_df, temp_df], axis=0, sort=True) 59 | return result_df 60 | 61 | def get_main(self): 62 | pass 63 | -------------------------------------------------------------------------------- /GetAndSaveWindData/GetindexName.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | import pandas as pd 4 | 5 | 6 | from WindPy import w 7 | 8 | class GetindeName: 9 | def __init__(self): 10 | pass 11 | 12 | def get_data(self): 13 | w.start() 14 | df1 = pd.read_excel("行业指数ETF概况.xlsx",sheet_name='Sheet1',index_col=0) 15 | df2 = pd.read_excel("策略指数ETF概况.xlsx", sheet_name='Sheet1', index_col=0) 16 | df3 = pd.read_excel("主题指数ETF概况.xlsx", sheet_name='Sheet1', index_col=0) 17 | df4 = pd.read_excel("规模指数ETF概况.xlsx", sheet_name='Sheet1', index_col=0) 18 | df5 = pd.read_excel("风格指数ETF概况.xlsx", sheet_name='Sheet1', index_col=0) 19 | 20 | # index_code_list = df1.index.tolist()+df2.index.tolist()+df3.index.tolist()+df4.index.tolist()+df5.index.tolist() 21 | name_list = ['行业指数ETF概况','策略指数ETF概况','主题指数ETF概况','规模指数ETF概况','风格指数ETF概况'] 22 | df_list = [df1,df2,df3,df4,df5] 23 | for name in name_list: 24 | df = df_list[name_list.index(name)] 25 | aa = w.wss(df.index.tolist(), "sec_name") 26 | tempdf1 = pd.DataFrame(aa.Data, columns=aa.Codes, index=aa.Fields).T 27 | result = pd.concat([df1,tempdf1],axis=1,sort=True) 28 | result.to_excel("%s.xlsx"%name) 29 | break 30 | 31 | if __name__=='__main__': 32 | GetindeNameDemo = GetindeName() 33 | GetindeNameDemo.get_data() -------------------------------------------------------------------------------- /GetAndSaveWindData/MysqlCon.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | import pymysql 6 | from sqlalchemy import create_engine 7 | import numpy as np 8 | 9 | class MysqlCon: 10 | def __init__(self): 11 | pass 12 | 13 | def getMysqlCon(self,flag='connect'): 14 | db_port = 3306 15 | db_user = 'root' 16 | db_pass = '123456' 17 | db_host = 'localhost' 18 | db_database = 'fund_est' 19 | 20 | pymysql.converters.encoders[np.float64] = pymysql.converters.escape_float 21 | pymysql.converters.conversions = pymysql.converters.encoders.copy() 22 | pymysql.converters.conversions.update(pymysql.converters.decoders) 23 | if flag=='connect': 24 | engine = pymysql.connect(host=db_host, user=db_user, passwd=db_pass, db=db_database, port=db_port) 25 | 26 | elif flag=='engine': 27 | sqlConStr = "mysql+pymysql://%s:%s@%s:%s/%s?charset=utf8"%(db_user,db_pass,db_host,db_port,db_database) 28 | engine = create_engine(sqlConStr) 29 | return engine 30 | 31 | if __name__=='__main__': 32 | MysqlConDemo = MysqlCon() 33 | MysqlConDemo.getMysqlCon() -------------------------------------------------------------------------------- /GetDataFromWindAndMySql.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 将wind的数据导入到本地数据库,并从数据库返回结果 5 | ''' 6 | 7 | from WindPy import w 8 | import pandas as pd 9 | from DataToMySql.MysqlCon import MysqlCon 10 | from DataToMySql.GetDataToMysql import GetDataToMysql 11 | from PrintInfo import PrintInfo 12 | w.start() 13 | 14 | 15 | class GetDataFromWindAndMySql: 16 | def __init__(self): 17 | self.wsetData = ["000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH"] # 要获取数据的证券代码 18 | self.indexFieldName = ["open", "high", "low", "close", "volume", "amt", "chg", "pct_chg", "turn"] # 要获取的数据字段 19 | self.fundFieldName = ["nav", "NAV_acc", "sec_name"] 20 | self.stockFieldName = ["open","high","low","close","volume","amt","turn","mkt_cap_ard","pe_ttm","ps_ttm","pb_lf"] 21 | self.engine = MysqlCon().getMysqlCon(flag='engine') 22 | self.conn = MysqlCon().getMysqlCon(flag='connect') 23 | self.PrintInfoDemo = PrintInfo() 24 | self.GetDataToMysqlDemo = GetDataToMysql() 25 | 26 | def getIndexConstituent(self,indexCode='000300.SH',getDate='2019-06-06'): 27 | ''' 28 | 获取指数成分股 29 | :param indexCode: 30 | :param getDate: 31 | :return: 32 | ''' 33 | sqlStr = "select * from index_constituent where index_code='%s' and update_time='%s'"%(indexCode,getDate) 34 | resultDf = pd.read_sql(sql=sqlStr, con=self.engine) 35 | if resultDf.empty: 36 | wsetdata = w.wset("indexconstituent", "date=%s;windcode=%s"%(getDate,indexCode)) 37 | if wsetdata.ErrorCode != 0: 38 | self.PrintInfoDemo.PrintLog("获取指数成分股数据有误，错误代码" + str(wsetdata.ErrorCode)) 39 | return pd.DataFrame() 40 | 41 | resultDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields).T 42 | dateList = [datetampStr.strftime('%Y-%m-%d') for datetampStr in resultDf['date'].tolist()] 43 | resultDf['date'] = dateList 44 | nameDic = {'date':'adjust_time','wind_code':'stock_code',"sec_name":'stock_name','i_weight':'stock_weight'} 45 | resultDf.rename(columns=nameDic,inplace=True) 46 | resultDf['update_time'] = getDate 47 | resultDf['index_code'] = indexCode 48 | 49 | self.GetDataToMysqlDemo.GetMain(resultDf,'index_constituent') 50 | return resultDf 51 | 52 | def getLackDataToMySql(self, tempCode, startDate, endDate, tableFlag='index'): 53 | if tableFlag == 'index': 54 | tableStr = 'index_value' 55 | codeName = 'index_code' 56 | elif tableFlag == 'fund': 57 | tableStr = 'fund_net_value' 58 | codeName = 'fund_code' 59 | elif tableFlag == 'stock': 60 | tableStr='stock_hq_value' 61 | codeName = 'stock_code' 62 | sqlStr = "select max(update_time),min(update_time) from %s where %s='%s'" % (tableStr, codeName, tempCode) 63 | cursor = self.conn.cursor() 64 | cursor.execute(sqlStr) 65 | dateStrTuple = cursor.fetchall()[0] 66 | maxDate = dateStrTuple[0] 67 | minDate = dateStrTuple[1] 68 | 69 | if not maxDate: 70 | self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag) 71 | return 72 | 73 | if endDate < minDate or startDate > minDate: 74 | self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag) 75 | elif startDate <= minDate: 76 | if minDate <= endDate < maxDate: 77 | if startDate!=minDate: 78 | self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag) 79 | elif endDate >= maxDate: 80 | self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag) 81 | if endDate!=maxDate: 82 | self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag) 83 | elif endDate > maxDate: 84 | self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag) 85 | 86 | def getDataFromWind(self, tempCode, startDate='2019-04-01', endDate='2019-04-30', tableFlag='index'): 87 | if tableFlag == 'index': 88 | tableStr = 'index_value' 89 | nameDic = {"OPEN": "open_price", "HIGH": "high_price", "LOW": "low_price", "CLOSE": "close_price", 90 | "VOLUME": "volume", "AMT": "amt", "CHG": "chg", "PCT_CHG": "pct_chg", "TURN": "turn"} 91 | fields = self.indexFieldName 92 | codeName = 'index_code' 93 | elif tableFlag=='fund': 94 | tableStr = 'fund_net_value' 95 | nameDic = {"NAV": "net_value", "NAV_ACC": "acc_net_value", "SEC_NAME": "fund_name"} 96 | fields = self.fundFieldName 97 | codeName = 'fund_code' 98 | elif tableFlag=='stock': 99 | tableStr = 'stock_hq_value' 100 | nameDic = {"OPEN": "open_price", "HIGH": "high_price", "LOW": "low_price", "CLOSE": "close_price", 101 | "VOLUME": "volume", "AMT": "amt", "TURN": "turn", "MKT_CAP_ARD": "market_value", "PE_TTM": "pe_ttm","PS_TTM": "ps_ttm","PB_LF":"pb_lf"} 102 | fields = self.stockFieldName 103 | codeName = 'stock_code' 104 | 105 | wsetdata = w.wsd(codes=tempCode, fields=fields, beginTime=startDate, endTime=endDate) 106 | if wsetdata.ErrorCode != 0: 107 | self.PrintInfoDemo.PrintLog("获取行情数据有误，错误代码" + str(wsetdata.ErrorCode)) 108 | return 109 | 110 | tempDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields, columns=wsetdata.Times).T 111 | tempDf[codeName] = tempCode 112 | tempDf['update_time'] = wsetdata.Times 113 | tempDf.rename(columns=nameDic, inplace=True) 114 | dateList = [dateStr.strftime("%Y-%m-%d") for dateStr in tempDf['update_time'].tolist()] 115 | tempDf['update_time'] = dateList 116 | self.GetDataToMysqlDemo.GetMain(tempDf, tableStr) 117 | return tempDf 118 | 119 | def getDataFromMySql(self, tempCode, startDate, endDate, tableFlag='index', nameList=['close_price']): 120 | if not nameList: 121 | self.PrintInfoDemo.PrintLog('传入获取指数的字段不合法，请检查！') 122 | 123 | if tableFlag == 'index': 124 | tableStr = 'index_value' 125 | codeName = 'index_code' 126 | elif tableFlag=='fund': 127 | codeName = 'fund_code' 128 | tableStr = 'fund_net_value' 129 | elif tableFlag=='stock': 130 | codeName = 'stock_code' 131 | tableStr = 'stock_hq_value' 132 | 133 | sqlStr = "select %s,update_time from %s where %s='%s' and update_time>='%s'" \ 134 | " and update_time<='%s'" % (','.join(nameList), tableStr, codeName, tempCode, startDate, endDate) 135 | resultDf = pd.read_sql(sql=sqlStr, con=self.engine) 136 | resultDf.set_index(keys='update_time', inplace=True, drop=True) 137 | resultDf = resultDf.drop_duplicates().sort_index() 138 | return resultDf 139 | 140 | def getCurrentNameData(self,tempCodeList,startDate,endDate,tableFlag='stock',nameStr='close_price'): 141 | ''' 142 | 获取指定字段的数据 143 | ''' 144 | if tableFlag=='stock': 145 | totalCodeStr='' 146 | for stockCode in tempCodeList: 147 | totalCodeStr = totalCodeStr+stockCode+"','" 148 | 149 | sqlStr1= "select max(update_time),min(update_time) from stock_hq_value where stock_code in ('%s')"%totalCodeStr[:-3] 150 | cursor = self.conn.cursor() 151 | cursor.execute(sqlStr1) 152 | dateStrTuple = cursor.fetchall()[0] 153 | maxDate = dateStrTuple[0] 154 | minDate = dateStrTuple[1] 155 | 156 | if not maxDate: 157 | for tempCode in tempCodeList: 158 | self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag) 159 | return 160 | else: 161 | if endDate < minDate or startDate > minDate: 162 | for tempCode in tempCodeList: 163 | self.getDataFromWind(tempCode, startDate=startDate, endDate=endDate, tableFlag=tableFlag) 164 | elif startDate <= minDate: 165 | if minDate <= endDate < maxDate: 166 | for tempCode in tempCodeList: 167 | self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag) 168 | elif endDate >= maxDate: 169 | for tempCode in tempCodeList: 170 | self.getDataFromWind(tempCode, startDate=startDate, endDate=minDate, tableFlag=tableFlag) 171 | self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag) 172 | elif endDate >= maxDate: 173 | for tempCode in tempCodeList: 174 | self.getDataFromWind(tempCode, startDate=maxDate, endDate=endDate, tableFlag=tableFlag) 175 | 176 | sqlStr = "select %s,update_time,stock_code from stock_hq_value where stock_code in ('%s') and update_time<='%s' " \ 177 | "and update_time>='%s'" % (nameStr,totalCodeStr,endDate,startDate) 178 | resultDf = pd.read_sql(sql=sqlStr, con=self.engine) 179 | dfList=[] 180 | for code,tempDf in resultDf.groupby('stock_code'): 181 | df = pd.DataFrame(tempDf[nameStr].values,index=tempDf['update_time'],columns=[code]) 182 | dfList.append(df) 183 | resultDf = pd.concat(dfList,axis=1) 184 | return resultDf 185 | 186 | def getCurrentDateData(self,tempCodeList,getDate,tableFlag='stock',nameList=['close_price']): 187 | ''' 188 | 获取指定日期的截面数据 189 | :return: 190 | ''' 191 | if tableFlag=='stock': 192 | totalCodeStr = "" 193 | for stockCode in tempCodeList: 194 | totalCodeStr = totalCodeStr+stockCode+"','" 195 | 196 | sqlStr = "select * from stock_hq_value where stock_code in ('%s') and update_time='%s'" % (totalCodeStr[:-3], getDate) 197 | resultDf = pd.read_sql(sql=sqlStr, con=self.engine) 198 | if resultDf.empty: 199 | codes = tempCodeList 200 | fields=self.stockFieldName 201 | tradeDate = getDate 202 | wssData = w.wss(codes=codes,fields=fields,options="tradeDate=%s;priceAdj=F;cycle=D"%tradeDate) 203 | if wssData.ErrorCode!=0: 204 | self.PrintInfoDemo.PrintLog("获取行情数据有误，错误代码" + str(wssData.ErrorCode)) 205 | return pd.DataFrame() 206 | tempDf =pd.DataFrame(wssData.Data,index=fields,columns=codes).T 207 | tempDf.dropna(inplace=True) 208 | if tempDf.empty: 209 | self.PrintInfoDemo.PrintLog("当前日期%s无行情"%getDate) 210 | return pd.DataFrame() 211 | 212 | tempDf['update_time'] = getDate 213 | nameDic = {"open": "open_price", "high": "high_price", "low": "low_price", "close": "close_price", 214 | "mkt_cap_ard": "market_value",} 215 | tempDf.rename(columns=nameDic,inplace=True) 216 | 217 | tempDf['stock_code'] = tempDf.index.tolist() 218 | self.GetDataToMysqlDemo.GetMain(tempDf, 'stock_hq_value') 219 | returnDf = tempDf[nameList] 220 | return returnDf 221 | else: 222 | resultDf.set_index('stock_code',drop=True,inplace=True) 223 | returnDf = resultDf[nameList] 224 | return returnDf 225 | 226 | def getHQData(self, tempCode, startDate='2019-03-01', endDate='2019-05-30', tableFlag='index', 227 | nameList=['close_price']): 228 | ''' 229 | #获取指数行情数据入口 230 | ''' 231 | self.getLackDataToMySql(tempCode, startDate, endDate, tableFlag) 232 | resultDf = self.getDataFromMySql(tempCode, startDate, endDate, tableFlag=tableFlag, nameList=nameList) 233 | return resultDf 234 | 235 | def getTradeDay(self, startdate, endDate, Period=''): 236 | ''' 237 | 获取指定周期交易日,封装wind接口 238 | :param Period: ''日，W周，M月，Q季，S半年，Y年 239 | :return: 240 | ''' 241 | # w.start() 242 | data = w.tdays(beginTime=startdate, endTime=endDate, options="Period=%s" % Period) 243 | if data.ErrorCode != 0: 244 | self.PrintInfoDemo.PrintLog('wind获取交易日期错误，请检查！') 245 | return 246 | tradeDayList = data.Data[0] 247 | tradeDayList = [tradeDay.strftime('%Y-%m-%d') for tradeDay in tradeDayList] 248 | # w.close() 249 | return tradeDayList 250 | 251 | 252 | if __name__ == '__main__': 253 | GetDataFromWindAndMySqlDemo = GetDataFromWindAndMySql() 254 | aa = GetDataFromWindAndMySqlDemo.getHQData(tempCode='000300.SH', startDate='2019-02-01', endDate='2019-05-01') 255 | # aa = GetDataFromWindAndMySqlDemo.getIndexConstituent(indexCode='000905.SH',getDate='2010-02-03') 256 | # getHQData(self, tempCode, startDate='2019-04-01', endDate='2019-04-30', tableFlag='index', 257 | # nameList=['close_price']): 258 | # aa = GetDataFromWindAndMySqlDemo.getHQData(tempCode='300033.SZ',tableFlag='stock',startDate='2010-01-01',endDate='2010-02-01') 259 | # aa = GetDataFromWindAndMySqlDemo.getCurrentDateData(tempCodeList=['300033.SZ','600000.SH'],getDate='2012-03-08') 260 | print(aa) -------------------------------------------------------------------------------- /GetExcelData.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | ''' 3 | 解析估值表数据 4 | ''' 5 | 6 | import os 7 | import pandas as pd 8 | from datetime import datetime,date 9 | import xlrd 10 | 11 | class GetExcelData: 12 | def __init__(self,fileTotalPath): 13 | self.fileTotalPath = fileTotalPath #估值表文件夹 14 | 15 | # 获取excel数据 16 | def getData(self): 17 | # ctype： 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error 18 | totalExcelNameList = os.listdir(self.fileTotalPath) 19 | dicProduct = {} # 按日期整理的持仓数据 20 | 21 | dicName = {} # 数据字段对应英文名称 22 | dicName[u'单位净值'] = 'netValue' 23 | dicName[u'累计单位净值'] = 'accNetValue' 24 | dicName[u'日净值增长率'] = 'netReturn' 25 | dicName[u'累计净值增长率'] = 'accNetReturn' 26 | dicName[u'实现收益'] = 'eargeMoney' 27 | dicName[u'本期净值增长率'] = 'thisNetReturn' 28 | dicName[u'流通股票投资合计'] = 'stockRate' 29 | 30 | dicName['1002'] = 'cashRate' 31 | dicName['1031'] = 'ensureMoneyRate' 32 | dicName[u'其中股票投资'] = 'securityRate' 33 | dicName[u'其中基金投资'] = 'fundRate' 34 | dicName['1202'] = 'antiSaleRate' 35 | dicName['1203'] = 'receivableSeRate' # 应收股利 36 | dicName['1204'] = 'receivableIrRate' # 应收利息 37 | dicName['3003'] = 'securityCalcRate' # 证券清算款 38 | 39 | dicNetAsset = {} # 资产及净值类数据 40 | dicAssetType = {} # 资产及其种类比例数据 41 | 42 | for excelName in totalExcelNameList: 43 | upDate = excelName[-12:-4] 44 | upDate = upDate[:4]+'-'+upDate[4:6]+'-'+upDate[6:] 45 | data = xlrd.open_workbook(self.fileTotalPath + '\\' + excelName) 46 | table = data.sheet_by_index(0) 47 | 48 | dicNameCode = {} 49 | for rowNum in range(table.nrows): 50 | judgeStr = table.cell(rowNum, 0).value 51 | if judgeStr[:4] =='1102' and judgeStr[-2:] in ['SH','SZ']: # 股票持仓数据 52 | dicNameCode[judgeStr[-9:]] = table.row_values(rowNum) 53 | elif judgeStr in dicName.keys(): # 资产及净值类数据 54 | dicNetAsset[dicName[judgeStr]] = dicNetAsset.get(dicName[judgeStr], {}) 55 | dicNetAsset[dicName[judgeStr]][upDate] = {} 56 | 57 | if judgeStr not in ['流通股票投资合计', '1203', '1002', '1031', '其中股票投资', '其中基金投资', '1202', '3003', '1204']: 58 | temp = table.cell(rowNum, 1) 59 | else: 60 | temp = table.cell(rowNum, 10) 61 | try: 62 | dicNetAsset[dicName[judgeStr]][upDate] = float(temp.value) 63 | except: 64 | if temp.ctype == 1: 65 | if temp.value.find('%') != -1: 66 | dicNetAsset[dicName[judgeStr]][upDate] = float(temp.value[:-1]) / 100 67 | else: 68 | temp = temp.value.replace(',', '') 69 | dicNetAsset[dicName[judgeStr]][upDate] = float(temp) 70 | tempDf = pd.DataFrame(dicNameCode, index=table.row_values(4)).T 71 | dicProduct[upDate] = tempDf 72 | 73 | netAssetDf = pd.DataFrame(dicNetAsset) 74 | start_date = [datetime.strptime(datestr, "%Y-%m-%d").date() for datestr in 75 | netAssetDf.index.tolist()] 76 | netAssetDf.index = start_date 77 | netAssetDf.index.name = 'update' 78 | # tempDf = netAssetDf[ 79 | # ['cashRate', 'ensureMoneyRate','receivableSeRate', 'antiSaleRate', 'securityRate', 'fundRate', 'receivableIrRate', 'securityCalcRate']].fillna(0) 80 | tempDf = netAssetDf[ 81 | ['cashRate', 'ensureMoneyRate', 'antiSaleRate', 'securityRate', 'fundRate']].fillna(0) 82 | netAssetDf['otherRate'] = 1 - tempDf.sum(axis=1) 83 | # writer = pd.ExcelWriter('tempResult.xlsx') 84 | # netAssetDf.to_excel(writer) 85 | # writer.save() 86 | return netAssetDf, dicProduct 87 | 88 | if __name__=='__main__': 89 | fileTotalPath = os.getcwd() + r'\乐道4估值表' # 估值表文件夹路径 90 | GetExcelDataDemo = GetExcelData(fileTotalPath=fileTotalPath) 91 | GetExcelDataDemo.getData() 92 | -------------------------------------------------------------------------------- /GetFinanceReportData.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import mylog as mylog 8 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 9 | import matplotlib.pyplot as plt 10 | import matplotlib 11 | from datetime import datetime,timedelta 12 | 13 | matplotlib.rcParams['font.sans-serif'] = ['SimHei'] 14 | matplotlib.rcParams['font.family'] = 'sans-serif' 15 | matplotlib.rcParams['axes.unicode_minus'] = False 16 | 17 | 18 | class GetFinanceReportData: 19 | def __init__(self, dic_param, file_path=''): 20 | self.dic_param = dic_param 21 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind') 22 | self.file_path=file_path 23 | 24 | def get_industry_sta(self,dic_df): 25 | ''' 26 | 基金持股行业统计 27 | :param dic_df: 28 | :return: 29 | ''' 30 | dic_stock_weight = {} #占股票投资市值比 31 | dic_net_value_weight={} #占净值比 32 | for rpt_date,temp_df in dic_df.items(): 33 | total_code_list = temp_df['stock_code'].tolist() 34 | if rpt_date.find('中报')!=-1: 35 | rpt_date_str = rpt_date[:4]+'0630' 36 | else: 37 | rpt_date_str = rpt_date[:4]+'1231' 38 | temp_new_df = temp_df.set_index('stock_code') 39 | df = self.GetDataTotalMainDemo.get_stock_industry(industry_flag='中证',code_list=total_code_list,industryType=1,tradeDate=rpt_date_str) 40 | temp_total_df = pd.concat([temp_new_df,df],axis=1,sort=True) 41 | dic_stock_weight[rpt_date_str] = {} 42 | dic_net_value_weight[rpt_date_str] = {} 43 | for industry,stock_df in temp_total_df.groupby(df.columns.tolist()[0]): 44 | dic_stock_weight[rpt_date_str][industry]=stock_df['pro_total_stock_inve'].sum() 45 | dic_net_value_weight[rpt_date_str][industry] = stock_df['pro_net_value'].sum() 46 | stock_inves_rate_df = pd.DataFrame(dic_stock_weight).T.fillna(0)/100 47 | net_value_rate_df = pd.DataFrame(dic_net_value_weight).T.fillna(0)/100 48 | if self.file_path: 49 | stock_inves_rate_df.to_excel('占股票投资比例.xlsx') 50 | net_value_rate_df.to_excel("占净值比例.xlsx") 51 | 52 | fig1 = plt.figure(figsize=(16,9)) 53 | ax_inves = fig1.add_subplot(111) 54 | stock_inves_rate_df.plot(kind='bar') 55 | plt.show() 56 | return stock_inves_rate_df,net_value_rate_df 57 | 58 | 59 | 60 | def get_main(self): 61 | 62 | fund_contain_stock_df = self.GetDataTotalMainDemo.get_fund_report_data(fund_code=dic_param['fund_code'], 63 | start_date=dic_param['start_date'], 64 | end_date=dic_param['end_date']) 65 | dic_df = {} 66 | total_rpt_list = fund_contain_stock_df.sort_values("rpt_date")['rpt_date'].tolist() 67 | for rpt_date,temp_df in fund_contain_stock_df.groupby(by='rpt_date'): 68 | dic_df[rpt_date] = temp_df 69 | self.get_industry_sta(dic_df) 70 | 71 | 72 | if __name__ == '__main__': 73 | dic_param = {} 74 | dic_param['fund_code'] = '110022.OF' 75 | dic_param['fund_name'] = '富国上证综指' 76 | dic_param['start_date'] = '2011-01-30' 77 | dic_param['end_date'] = datetime.today().strftime("%Y-%m-%d") 78 | GetFinanceReportDataDemo = GetFinanceReportData(dic_param=dic_param) 79 | GetFinanceReportDataDemo.get_main() 80 | -------------------------------------------------------------------------------- /GetWindDataToMySql.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 将wind的数据导入到本地数据库 5 | ''' 6 | 7 | from WindPy import w 8 | import pymysql 9 | from datetime import datetime 10 | import pandas as pd 11 | from sqlalchemy import create_engine 12 | 13 | 14 | class GetWindDataToMySql: 15 | def __init__(self): 16 | # 数据库配置文件 17 | self.dataBaseConfig = {} 18 | self.dataBaseConfig['host'] = 'localhost' 19 | self.dataBaseConfig['user'] = 'root' 20 | self.dataBaseConfig['password'] = '123456' 21 | self.dataBaseConfig['database'] = 'fund_data' 22 | 23 | self.wsetData = ["000001.SH", "399300.SZ", "000016.SH", "000905.SH", "000906.SH"] # 要获取数据的证券代码 24 | self.dataName = ["open", "high", "low", "close", "volume", "amt", "chg", "pct_chg", "turn"] # 要获取的数据字段 25 | 26 | #数据库连接 27 | def connectMysql(self,dataBase='newOpen'): 28 | if dataBase == 'newOpen': 29 | db = pymysql.connect(host=self.dataBaseConfig['host'], user=self.dataBaseConfig['user'], 30 | passwd=self.dataBaseConfig['password'], db=self.dataBaseConfig['database']) 31 | return db 32 | else: 33 | dataBase.close() 34 | return 35 | 36 | #日志信息打印 37 | def PrintInfo(self, infostr, otherInfo=''): 38 | currenttime = datetime.now().strftime('%H:%M:%S') 39 | if isinstance(otherInfo, str): 40 | if not otherInfo: 41 | print(currenttime + '[INFO]:' + infostr) 42 | else: 43 | print(currenttime + '[INFO]:' + infostr, otherInfo) 44 | else: 45 | print(currenttime + '[INFO]:' + infostr, otherInfo) 46 | 47 | # 获取数据的开始日期 48 | def getDataStartDate(self): 49 | db = self.connectMysql() 50 | cursor = db.cursor() 51 | sqlStr = "select max(`UPDATE`) from index_data" 52 | cursor.execute(sqlStr) 53 | data = cursor.fetchone()[0] 54 | if not data: 55 | startDate = '2007-01-01' 56 | else: 57 | startDate = data 58 | self.connectMysql(dataBase=db) 59 | self.PrintInfo("获取数据的开始日期 : %s" % startDate) 60 | return startDate 61 | 62 | # 从wind获取数据 63 | def getDataFromWind(self,startDate): 64 | totalData = {} 65 | w.start() 66 | for code in self.wsetData: 67 | self.PrintInfo("获取当前指数的历史数据 : %s" % code) 68 | wsetdata = w.wsd(codes=code, fields=self.dataName, beginTime=startDate) 69 | if wsetdata.ErrorCode != 0: 70 | self.PrintInfo("获取当前指数的历史数据异常 : %s" % code) 71 | continue 72 | 73 | tempDf = pd.DataFrame(wsetdata.Data, index=wsetdata.Fields, columns=wsetdata.Times).T 74 | tempDf['CODE'] = code 75 | tempDf['UPDATE'] = wsetdata.Times 76 | totalData[code] = tempDf 77 | w.close() 78 | return totalData 79 | 80 | #将数据导入到mysql 81 | def dataToMysql(self,totalData): 82 | if not totalData: 83 | self.PrintInfo("未获取到任何有效数据，请检查！" ) 84 | return 85 | 86 | mysqlConfig = ['root', '123456', 'localhost', '3306', 'fund_data', 'utf8'] 87 | mysqlcon = "mysql+pymysql://%s:%s@%s:%s/%s?charset=%s" % ( 88 | mysqlConfig[0], mysqlConfig[1], mysqlConfig[2], mysqlConfig[3], mysqlConfig[4], mysqlConfig[5]) 89 | conn = create_engine(mysqlcon) 90 | 91 | for code,datadf in totalData.items(): 92 | self.PrintInfo('%s历史数据写入数据库。。' % code) 93 | datadf.to_sql(name='index_data', con=conn, if_exists='append', index=False) 94 | 95 | #运行入口 96 | def startMain(self): 97 | startDate = self.getDataStartDate() 98 | totalData = self.getDataFromWind(startDate) 99 | self.dataToMysql(totalData) 100 | 101 | if __name__=='__main__': 102 | GetWindDataToMySqlDemo = GetWindDataToMySql() 103 | GetWindDataToMySqlDemo.startMain() -------------------------------------------------------------------------------- /JudgeFund/CalcJudgeFund.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 基金评价指标排名计算 5 | ''' 6 | 7 | import pandas as pd 8 | import numpy as np 9 | from datetime import datetime, timedelta 10 | 11 | 12 | ''' 13 | 证券代码证券简称基金成立日 '近1月(%)','近3月(%)','近6月(%)','近1年(%)','近3年(%)','基金规模(亿元)','年化收益率(%) ' 14 | ,'最大回撤(%)','年化波动率(%)','下行标准差(%)','Sharpe','Alpha(年化)(%)','Sharpe(年化)','Treynor(年化)','Sortino(年化)', 15 | '选时能力','选股能力','信息比率(年化)','基金经理(现任)','基金管理人 16 | '004840.OF','001708.OF','004695.OF','006749.OF','002465.OF','002182.OF','004696.OF','003208.OF' 17 | 18 | 19 | ''' 20 | 21 | 22 | class CalcJudgeFund: 23 | def __init__(self): 24 | self.file_path = r"D:\\工作文件\\产品评价\\东兴\\" 25 | self.sort_up = ['近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)', '近3年(%)', '基金规模(亿元)', '年化收益率(%)', '最大回撤(%)', 26 | 'Alpha(年化)(%)', 'Sharpe(年化)', 'Treynor(年化)', 'Sortino(年化)', '选时能力', '选股能力', 27 | '信息比率(年化)'] # 倒序排名，越大越好 28 | self.sort_down =['年化波动率(%)','下行标准差(%)'] #逆序排名，越小越好 29 | self.targe_code_list=['004840.OF','001708.OF','004695.OF','006749.OF','002465.OF','002182.OF','004696.OF','003208.OF'] 30 | 31 | def calc_sort(self): 32 | total_fund = pd.read_excel(self.file_path+"偏股混合型.xlsx", sheet_name='Sheet1', index_col=0) 33 | df_list = [] 34 | for up_col in self.sort_up: 35 | temp_se = total_fund[up_col].rank(ascending=False) 36 | temp_dic = temp_se.to_dict() 37 | total_num = temp_se.max() 38 | str_dic={} 39 | for fund_code,rank_num in temp_dic.items(): 40 | if np.isnan(rank_num): 41 | str_dic[fund_code] = '--/%s' % int(total_num) 42 | else: 43 | str_dic[fund_code] = '%s/%s'%(int(rank_num),int(total_num)) 44 | temp_new_se= pd.Series(str_dic,name=up_col) 45 | df_list.append(temp_new_se) 46 | 47 | for down_col in self.sort_down: 48 | temp_se = total_fund[down_col].rank(ascending=True) 49 | temp_dic = temp_se.to_dict() 50 | total_num = temp_se.max() 51 | str_dic = {} 52 | for fund_code, rank_num in temp_dic.items(): 53 | if np.isnan(rank_num): 54 | str_dic[fund_code] = '--/%s' % int(total_num) 55 | else: 56 | str_dic[fund_code] = '%s/%s' % (int(rank_num), int(total_num)) 57 | temp_new_se = pd.Series(str_dic, name=down_col) 58 | df_list.append(temp_new_se) 59 | sort_df = pd.concat(df_list,axis=1,sort=True) 60 | 61 | fix_df = total_fund[['证券简称','基金成立日','基金经理(现任)','基金管理人']] 62 | final_df = pd.concat([fix_df,sort_df],sort=True,axis=1) 63 | final_df.to_excel(self.file_path+"偏股混合型基金排名.xlsx") 64 | return final_df 65 | 66 | def get_target(self,df): 67 | target_df =df.loc[self.targe_code_list] 68 | 69 | 70 | if __name__ == '__main__': 71 | CalcJudgeFundDemo = CalcJudgeFund() 72 | CalcJudgeFundDemo.calc_sort() 73 | -------------------------------------------------------------------------------- /JudgeFund/JudgeAndGetFund.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | 4 | import pandas as pd 5 | import mylog as mylog 6 | import numpy as np 7 | from datetime import datetime,timedelta 8 | from GetAndSaveWindData.GetDataFromWindNNotMysql import GetDataFromWindNotMysql 9 | import matplotlib.pyplot as plt 10 | import matplotlib 11 | 12 | matplotlib.rcParams['font.sans-serif'] = ['SimHei'] 13 | matplotlib.rcParams['font.family'] = 'sans-serif' 14 | matplotlib.rcParams['axes.unicode_minus'] = False 15 | 16 | class JudgeAndGetFund: 17 | def __init__(self): 18 | self.GetDataFromWindNotMysqlDemo = GetDataFromWindNotMysql(data_resource='wind') 19 | 20 | def get_init_param(self,fund_name='易方达行业领先'): 21 | dic_param={} 22 | base_df= pd.read_excel("参数_%s.xlsx"%fund_name,sheet_name='基础信息') 23 | bench_df= pd.read_excel("参数_%s.xlsx"%fund_name,sheet_name='业绩基准') 24 | self.start_date = base_df.iloc[0]['任职日期'] 25 | self.end_date = base_df.iloc[0]['离任日期'] 26 | self.fund_code = base_df.iloc[0]['基金代码'] 27 | return dic_param 28 | 29 | def get_fe_change(self,dic_param): 30 | df = self.GetDataFromWindNotMysqlDemo.get_fund_filed(start_date=self.start_date,end_date=self.end_date,fund_code=self.fund_code) 31 | df['基金份额变化率'] = df['基金份额_万份']/df['基金份额_万份'].shift(1)-1 32 | df['基金规模变化率'] = df['基金规模']/df['基金规模'].shift()-1 33 | fig_fe = plt.figure(figsize=(16,9)) 34 | ax_fe = fig_fe.add_subplot(111) 35 | df['基金份额_万份'].plot.bar(ax=ax_fe) 36 | ax_fe.set_title('基金份额_万份') 37 | plt.savefig('基金份额概况.png') 38 | 39 | fig_size = plt.figure(figsize=(16,9)) 40 | ax_size = fig_size.add_subplot(111) 41 | wid = 0.5 42 | df['基金规模'].plot(kind='bar',ax=ax_size,color='r',width=wid) 43 | ax_size.set_xticklabels(df.index,rotation=90) 44 | ax_size.set_title('基金规模') 45 | plt.savefig('基金规模概况.png') 46 | 47 | fig_stock_rate = plt.figure(figsize=(16, 9)) 48 | ax_stock_rate = fig_stock_rate.add_subplot(111) 49 | (df['股票市值占基金资产净值比']/100).plot(kind='bar',ax=ax_stock_rate,color='b',) 50 | ax_stock_rate.set_title('股票市值占基金资产净值比') 51 | plt.savefig('股票占比情况.png') 52 | # plt.show() 53 | df.to_excel("%s份额规模概况.xlsx"%self.fund_code) 54 | 55 | def sum_plot(self,df): 56 | ''' 57 | 十大重仓股占比与绘图 58 | ''' 59 | dic_sum={} 60 | dic_indus_fund_sum={} 61 | dic_indus_stock_sum={} 62 | for datestr,temp_df in df.groupby(by='披露日期'): 63 | dic_indus_fund_sum[datestr] = {} 64 | dic_indus_stock_sum[datestr] = {} 65 | for indus,detail_df in temp_df.groupby('所属行业'): 66 | dic_indus_fund_sum[datestr][indus] = detail_df['市值占基金资产净值比'].sum() 67 | dic_indus_stock_sum[datestr][indus]= detail_df['市值占股票投资市值比'].sum() 68 | dic_sum[datestr]={'十大重仓股市值占基金净值比':temp_df['市值占基金资产净值比'].sum(),'十大重仓股市值占股票投资市值比':temp_df['市值占股票投资市值比'].sum()} 69 | value_fund_df = pd.DataFrame(dic_indus_fund_sum).T / 100 70 | value_fund_df.fillna(0, inplace=True) 71 | 72 | value_stock_df = pd.DataFrame(dic_indus_stock_sum).T / 100 73 | value_stock_df.fillna(0, inplace=True) 74 | 75 | sum_df = pd.DataFrame(dic_sum).T 76 | fig = plt.figure(figsize=(16,9)) 77 | ax = fig.add_subplot(111) 78 | sum_df.plot.bar(ax=ax) 79 | 80 | color = ['#36648B', '#458B00', '#7A378B', '#8B0A50', '#8FBC8F', '#B8860B', '#FFF68F', '#FFF5EE', '#FFF0F5', 81 | '#FFEFDB', 82 | '#F4A460', '#A0522D', '#FFE4E1', '#BC8F8F', '#A52A2A', '#800000', '#F5F5F5', '#DCDCDC', '#808080', 83 | '#000000', 84 | '#FFA500', '#F5DEB3', '#DAA520', '#BDB76B', '#556B2F', '#006400', '#98FB98', '#7FFFAA', '#20B2AA', 85 | '#F0FFFF', 86 | '#191970', '#BA55D3', '#DDA0DD', '#4B0082', '#8FBC8F', '#B8860B', '#FFF68F', '#FFF5EE', '#FFF0F5', 87 | '#FFEFDB', 88 | '#36648B', '#458B00', '#7A378B', '#8B0A50', '#8FBC8F', '#B8860B', '#FFF68F', '#FFF5EE', '#FFF0F5', 89 | '#FFEFDB'] 90 | fig2 = plt.figure(figsize=(16, 9)) 91 | ax2 = fig2.add_subplot(111) 92 | datestrList = value_fund_df.index.tolist() 93 | labels = value_fund_df.columns.tolist() 94 | for i in range(value_fund_df.shape[1]): 95 | ax2.bar(datestrList, value_fund_df.ix[:, i], color=color[i], 96 | bottom=value_fund_df.ix[:, :i].sum(axis=1),) 97 | 98 | box = ax2.get_position() 99 | ax2.set_position([box.x0, box.y0, box.width * 1.02, box.height]) 100 | ax2.legend(labels=labels, bbox_to_anchor=(1, 0.8), ncol=1) 101 | ax2.set_title("重仓行业市值占基金资产净值比") 102 | for tick in ax2.get_xticklabels(): 103 | tick.set_rotation(90) 104 | plt.savefig('重仓行业市值占基金资产净值比.png') 105 | 106 | fig3 = plt.figure(figsize=(16, 9)) 107 | ax3 = fig3.add_subplot(111) 108 | datestrList2 = value_stock_df.index.tolist() 109 | labels2 = value_stock_df.columns.tolist() 110 | for i in range(value_stock_df.shape[1]): 111 | ax3.bar(datestrList2, value_stock_df.ix[:, i], color=color[i], 112 | bottom=value_stock_df.ix[:, :i].sum(axis=1), ) 113 | box2 = ax3.get_position() 114 | ax3.set_position([box2.x0, box2.y0, box2.width * 1.02, box2.height]) 115 | ax3.legend(labels=labels2, bbox_to_anchor=(1, 0.8), ncol=1) 116 | ax3.set_title("重仓行业市值占股票投资净值比") 117 | for tick in ax3.get_xticklabels(): 118 | tick.set_rotation(90) 119 | plt.savefig('重仓行业市值占股票投资净值比.png') 120 | plt.show() 121 | 122 | def get_stock_diff(self,df): 123 | temp_df = df.copy() 124 | temp_df = temp_df.set_index(keys=['披露日期','重仓排名']) 125 | total_date_list = list(df['披露日期'].unique()) 126 | result_df = pd.DataFrame() 127 | change_name_list=['市值占基金资产净值比','市值占股票投资市值比','持股市值','股票代码'] 128 | for date_num in range(len(total_date_list)): 129 | if date_num==0: 130 | target_df = temp_df.loc[total_date_list[date_num]][change_name_list].set_index('股票代码') 131 | else: 132 | current_df = temp_df.loc[total_date_list[date_num]][change_name_list].set_index('股票代码') 133 | pre_df = temp_df.loc[total_date_list[date_num-1]][change_name_list].set_index('股票代码') 134 | a=0 135 | 136 | 137 | a=0 138 | 139 | def get_stock_detail(self,dic_param): 140 | try: 141 | df = pd.read_excel("%s重仓股概况.xlsx"%self.fund_code,index_col=0,converters={'股票代码':str,'重仓排名':int}) 142 | except: 143 | df = self.GetDataFromWindNotMysqlDemo.get_fund_stock_filed(start_date=self.start_date,end_date=self.end_date,fund_code=self.fund_code) 144 | df.to_excel('%s重仓股概况.xlsx'%self.fund_code) 145 | # df = self.GetDataFromWindNotMysqlDemo.get_fund_stock_filed(start_date=self.start_date, end_date=self.end_date, 146 | # fund_code=self.fund_code) 147 | # df.to_excel('%s重仓股概况.xlsx' % self.fund_code) 148 | df['披露日期']=[datetime.strftime(dateStr,"%Y-%m-%d") for dateStr in df.index.tolist()] 149 | df.dropna(inplace=True) 150 | # self.sum_plot(df) 151 | self.get_stock_diff(df) 152 | 153 | 154 | 155 | def get_main(self): 156 | dic_param = self.get_init_param() 157 | # self.get_fe_change(dic_param) 158 | self.get_stock_detail(dic_param) 159 | 160 | 161 | if __name__=='__main__': 162 | JudgeAndGetFundDemo = JudgeAndGetFund() 163 | JudgeAndGetFundDemo.get_main() -------------------------------------------------------------------------------- /JudgeFund/JudgeFundDC.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 量化对冲型基金评价 5 | ''' 6 | 7 | import pandas as pd 8 | import mylog as mylog 9 | import numpy as np 10 | from datetime import datetime, timedelta 11 | import mylog as mylogdemo 12 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 13 | import statsmodels.api as sm 14 | 15 | class JudgeFundDC: 16 | def __init__(self): 17 | self.logger = mylogdemo.set_log() 18 | self.last_date_str = datetime.today().strftime("%Y-%m-%d") 19 | self.target_code = '519062.OF' 20 | 21 | def get_comyany_info(self, company, df, target_labe=''): 22 | self.logger.info("截止当前最新日期%s,现有量化对冲型基金(含A/C类)共%s只。" % (self.last_date_str, df.shape[0])) 23 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')} 24 | total_company_esta={company_name:temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in total_company_dic.items()} 25 | total_company_esta_se = pd.Series(total_company_esta,name='基金成立日').sort_values(ascending=False) 26 | esta_percent = (total_company_esta_se.index.tolist().index(company)+1)/len(total_company_esta_se) 27 | esta_percent_str = str(np.round(esta_percent*100,2))+'%' 28 | 29 | num_dic = {company_name:temp_df.shape[0] for company_name,temp_df in total_company_dic.items()} 30 | company_df = pd.DataFrame() 31 | num_se = pd.Series(num_dic, ).sort_values() 32 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique())) 33 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%' 34 | self.logger.info('%s旗下现有量化型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str)) 35 | if per_rate >= 0.7: 36 | self.logger.info("占比靠前，反映管理人在发行量化对冲型基金上的优秀运作能力。") 37 | elif 0.7 >= per_rate > 0.4: 38 | self.logger.info("占比中等，管理人发行量化对冲型基金的数量一般。") 39 | elif per_rate <= 0.4: 40 | self.logger.info('占比下游，管理人对量化对冲型基金发行数量较少。') 41 | company_fund_df = df[df['基金管理人'] == company] 42 | min_fund_esta = company_fund_df['基金成立日'].min() 43 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0] 44 | min_fund_esta_name = min_fund_esta_se['证券简称'] 45 | self.logger.info("其中，%s(%s)，基金经理%s,于%s成立，为该管理人旗下成立最早的产品；" % ( 46 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'])) 47 | self.logger.info("按各管理人发行最早量化对冲型基金的时间看，该产品发行时间占各管理人同类型的%s分位数"%esta_percent_str) 48 | if esta_percent>=0.6: 49 | self.logger.info("发行时间早与多数管理人，一定程度上反应了管理人更丰富的投资管理经验。") 50 | elif 0.3<=esta_percent<0.6: 51 | self.logger.info("发行时间排名中等，管理人整体投资管理经验中等水平") 52 | else: 53 | self.logger.info("发行时间较晚，需谨慎对待管理人可能对量化对冲型基金投资管理经验较短的问题") 54 | 55 | def get_manager_info(self, company, df, manager_name,target_labe=''): 56 | total_manager_list = df['基金经理(现任)'].tolist() 57 | fund_num_list=[] 58 | for num in range(len(total_manager_list)): 59 | if isinstance(total_manager_list[num],str) and total_manager_list[num].find(manager_name) != -1: 60 | fund_num_list.append(num) 61 | target_df = df.iloc[fund_num_list] 62 | if target_df.empty: 63 | self.logger.info('未查询到基金经理%s管理过的量化对冲型产品，对其投资经验需保持谨慎！') 64 | return 65 | self.logger.info('基金经理%s,查询到其管理量化对冲型产品%s只(含A/C类),'%(manager_name,target_df.shape[0])) 66 | 67 | last_se = target_df[target_df['基金成立日']==target_df['基金成立日'].min()].iloc[0] 68 | 69 | self.logger.info("其中，%s(%s)，于%s成立，为该基金经理管理的最早的%s产品，最新规模为%s亿元；" % ( 70 | last_se['证券简称'], last_se.name, last_se['基金成立日'],target_labe, last_se['基金规模亿元'])) 71 | 72 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分'] 73 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%' 74 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分'] 75 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%' 76 | total_alpha = last_se['Alpha_w_52_hs300百分'] 77 | total_alpha_str = str(np.round(total_alpha,2))+'%' 78 | if annual_alpha > same_style_alpha: 79 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s，年化alpha收益为%s，高于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str)) 80 | else: 81 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s,年化alpha收益为%s，低于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str)) 82 | 83 | current_se = target_df.loc[self.target_code] 84 | self.logger.info("%s成立于%s,当前规模%s亿元,规模同类排名%s"%(current_se['证券简称'],current_se['基金成立日'],current_se['基金规模亿元'],current_se['规模同类排名'])) 85 | rate_name_list = [data_name for data_name in current_se.index.tolist() if data_name.find('回报排名')!=-1] 86 | rate_se = last_se.loc[rate_name_list] 87 | rate_se.name='回报排名' 88 | rate_se.to_excel('%s回报排名.xlsx'%current_se['证券简称']) 89 | a=0 90 | 91 | def get_main(self): 92 | df = pd.read_excel("量化对冲公募基金.xlsx", index_col=0) 93 | self.get_comyany_info(company='海富通基金管理有限公司',df=df) 94 | self.get_manager_info(company='海富通基金管理有限公司',df=df,manager_name='朱斌全') 95 | 96 | if __name__=='__main__': 97 | JudgeFundDCDemo = JudgeFundDC() 98 | JudgeFundDCDemo.get_main() -------------------------------------------------------------------------------- /JudgeFund/JudgeFundImproveBase.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | 4 | import pandas as pd 5 | import mylog as mylog 6 | import numpy as np 7 | from datetime import datetime,timedelta 8 | 9 | 10 | class JudgeFundImproveBase: 11 | def __init__(self): 12 | self.logger = mylog.set_log() 13 | 14 | def get_manager_info(self, company, df, target_code_list, manager_name): 15 | total_manager_list = df['基金经理(现任)'].tolist() 16 | fund_num_list = [] 17 | for num in range(len(total_manager_list)): 18 | if isinstance(total_manager_list[num], str) and total_manager_list[num].find(manager_name) != -1: 19 | fund_num_list.append(num) 20 | target_df = df.iloc[fund_num_list] 21 | if target_df.empty: 22 | self.logger.info('未查询到基金经理%s管理过的指数增强型产品，对其投资经验需保持谨慎！') 23 | return 24 | self.logger.info('基金经理%s,查询到其管理指数增强型产品%s只(含A/C类),反映了该基金经理有一定相关产品的投资经验' % (manager_name, target_df.shape[0])) 25 | last_se = target_df[target_df['基金成立日'] == target_df['基金成立日'].min()].iloc[0] 26 | self.logger.info("其中，%s(%s)，于%s成立，为该基金经理管理的最早的跟踪%s产品，最新规模为%s亿元；" % ( 27 | last_se['证券简称'], last_se.name, last_se['基金成立日'], target_code_list[0], last_se['基金规模亿元'])) 28 | 29 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分'] 30 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%' 31 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分'] 32 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%' 33 | total_alpha = last_se['Alpha_w_52_hs300百分'] 34 | total_alpha_str = str(np.round(total_alpha, 2)) + '%' 35 | if annual_alpha > same_style_alpha: 36 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s，年化alpha收益为%s，高于同类平均的年化alpha收益%s" % ( 37 | total_alpha_str, annual_alpha_str, same_style_alpha_str)) 38 | else: 39 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s,年化alpha收益为%s，低于同类平均的年化alpha收益%s" % ( 40 | total_alpha_str, annual_alpha_str, same_style_alpha_str)) 41 | 42 | rate_name_list = [data_name for data_name in last_se.index.tolist() if data_name.find('回报排名') != -1] 43 | rate_se = last_se.loc[rate_name_list] 44 | 45 | def get_target_label_fund(self, company_fund_df, target_code_list=[]): 46 | bench_code_list = company_fund_df['跟踪指数代码'].tolist() 47 | num_list = [bench_code_num for bench_code_num in range(len(bench_code_list)) if bench_code_list[bench_code_num] in target_code_list] 48 | target_df = company_fund_df.iloc[num_list] 49 | if target_df.empty: 50 | return 51 | 52 | min_fund_esta = target_df['基金成立日'].min() 53 | min_fund_esta_se = target_df[target_df['基金成立日'] == min_fund_esta].iloc[0] 54 | min_fund_esta_name = min_fund_esta_se['证券简称'] 55 | self.logger.info("其中，%s(%s)，基金经理%s,于%s成立，为该管理人旗下成立最早的跟踪%s产品，最新规模为%s亿元；" % ( 56 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'], 57 | target_code_list[0], min_fund_esta_se['基金规模亿元'])) 58 | 59 | annual_alpha = min_fund_esta_se['Alpha(年化)_d_52_hs300百分'] 60 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%' 61 | same_style_alpha = min_fund_esta_se['Alpha(年化)同类平均_d_52_hs300百分'] 62 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%' 63 | if annual_alpha > same_style_alpha: 64 | self.logger.info("该基金自成立以来，年化alpha收益为%s，高于同类平均的alpha收益%s" % (annual_alpha_str, same_style_alpha_str)) 65 | else: 66 | self.logger.info("该基金自成立以来，年化alpha收益为%s，低于同类平均的alpha收益%s" % (annual_alpha_str, same_style_alpha_str)) 67 | return 68 | 69 | def get_comyany_info(self, company, df, target_code_list=[]): 70 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')} 71 | total_company_esta = {company_name: temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in 72 | total_company_dic.items()} 73 | total_company_esta_se = pd.Series(total_company_esta, name='基金成立日').sort_values(ascending=False) 74 | esta_percent = (total_company_esta_se.index.tolist().index(company) + 1) / len(total_company_esta_se) 75 | esta_percent_str = str(np.round(esta_percent * 100, 2)) + '%' 76 | 77 | num_dic = {company_name: temp_df.shape[0] for company_name, temp_df in total_company_dic.items()} 78 | company_df = pd.DataFrame() 79 | num_se = pd.Series(num_dic, ).sort_values() 80 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique())) 81 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%' 82 | self.logger.info('%s旗下现有指数增强型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str)) 83 | if per_rate >= 0.7: 84 | self.logger.info("占比靠前，反映管理人在发行指数增强型基金上的优秀运作能力。") 85 | elif 0.7 >= per_rate > 0.4: 86 | self.logger.info("占比中等，管理人发行指数增强型基金的数量一般。") 87 | elif per_rate <= 0.4: 88 | self.logger.info('占比下游，管理人对指数增强型基金发行数量较少。') 89 | company_fund_df = df[df['基金管理人'] == company] 90 | min_fund_esta = company_fund_df['基金成立日'].min() 91 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0] 92 | min_fund_esta_name = min_fund_esta_se['证券简称'] 93 | self.logger.info("其中，%s(%s)，基金经理%s,于%s成立，为该管理人旗下成立最早的增强型产品；" % ( 94 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'])) 95 | self.logger.info("按各管理人发行最早指数增强型基金的时间看，该产品发行时间占各管理人同类型的%s分位数" % esta_percent_str) 96 | if esta_percent >= 0.6: 97 | self.logger.info("发行时间早与多数管理人，一定程度上反应了管理人更丰富的投资管理经验。") 98 | elif 0.3 <= esta_percent < 0.6: 99 | self.logger.info("发行时间排名中等，管理人整体投资管理经验中等水平") 100 | else: 101 | self.logger.info("发行时间较晚，需谨慎对待管理人可能对指数增强型基金投资管理经验较短的问题") 102 | 103 | if target_code_list: 104 | dic_target = {} 105 | for company_name, temp_df in df.groupby(by='基金管理人'): 106 | dic_target[company_name] = dic_target.get(company_name, 0) 107 | for bench_code in temp_df['跟踪指数代码'].tolist(): 108 | if bench_code in target_code_list: 109 | dic_target[company_name] = dic_target[company_name]+1 110 | 111 | label_se = pd.Series(dic_target, name='产品数量') 112 | self.logger.info("跟踪%s指数的管理人共%s家" % (target_code_list[0], len(label_se[label_se > 0]))) 113 | self.get_target_label_fund(company_fund_df, target_code_list) 114 | return 115 | 116 | def get_main(self, company='万家基金管理有限公司', manager_name='乔亮'): 117 | df = pd.read_excel('指数增强评价指标.xlsx', index_col=0) 118 | self.get_comyany_info(company, df, target_code_list=['000852.SH',]) 119 | self.get_manager_info(company, df, target_code_list=['000852.SH',], manager_name=manager_name) 120 | 121 | if __name__=='__main__': 122 | JudgeFundImproveBaseDemo = JudgeFundImproveBase() 123 | JudgeFundImproveBaseDemo.get_main() -------------------------------------------------------------------------------- /JudgeFund/JudgeFundIndexImprove.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 指数增强型基金评价 5 | ''' 6 | 7 | import pandas as pd 8 | import mylog as mylog 9 | import numpy as np 10 | from datetime import datetime, timedelta 11 | import mylog as mylogdemo 12 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 13 | import statsmodels.api as sm 14 | 15 | 16 | class JudgeFundIndexImprove: 17 | def __init__(self): 18 | self.logger = mylogdemo.set_log() 19 | self.last_date_str = datetime.today().strftime("%Y-%m-%d") 20 | self.target_code = '110003.OF' 21 | 22 | def get_total_info(self, df, ): 23 | self.logger.info("截止当前最新日期%s,现有指数增强型基金(含A/C类)共%s只。" % (self.last_date_str, df.shape[0])) 24 | target_bench_code = df.loc[self.target_code]['跟踪指数代码'] 25 | for bench_code, temp_df in df.groupby(by='跟踪指数代码'): 26 | if bench_code == target_bench_code: 27 | self.logger.info("其中跟踪指数为%s的增强型基金%s只" % (target_bench_code, temp_df.shape[0])) 28 | break 29 | 30 | def calc_select(self, dic_result_df): 31 | pass 32 | 33 | def get_data(self, df): 34 | ''' 35 | 获取跟踪指数和产品复权单位净值数据 36 | :param df: 37 | :return: 38 | ''' 39 | dic_fund_index = {} 40 | for fund_code in df.index.tolist(): 41 | if df.loc[fund_code]['证券简称'].find('C') == -1: 42 | dic_fund_index[fund_code] = df.loc[fund_code]['跟踪指数代码'] 43 | 44 | GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind') 45 | dic_total_index_df = {} 46 | dic_result_df = {} 47 | for fund_code, index_code in dic_fund_index.items(): 48 | start_date = df.loc[fund_code]['基金成立日'] 49 | if datetime.today() - timedelta(days=365) < start_date: 50 | continue 51 | start_date = start_date.strftime("%Y-%m-%d") 52 | temp_fund_df = GetDataTotalMainDemo.get_hq_data(code=fund_code, code_style='fund', start_date=start_date, 53 | end_date=self.last_date_str, name_list=['net_value_adj']) 54 | temp_fund_df.rename(columns={'net_value_adj': fund_code}, inplace=True) 55 | if index_code not in dic_total_index_df: 56 | temp_index_df = GetDataTotalMainDemo.get_hq_data(code=index_code, code_style='index', 57 | start_date=start_date, end_date=self.last_date_str, 58 | name_list=['close_price']) 59 | temp_index_df.rename(columns={'close_price': index_code}, inplace=True) 60 | dic_total_index_df[index_code] = temp_index_df 61 | else: 62 | temp_index_df = dic_total_index_df[index_code] 63 | dic_result_df[fund_code + '_' + index_code] = pd.concat([temp_fund_df, temp_index_df], axis=1, sort=True) 64 | return dic_result_df 65 | 66 | def regression(self, x1, x2, y): 67 | ''' 68 | 最小二乘回归 69 | :param x1: 70 | :param x2: 71 | :param y: 72 | :return: 73 | ''' 74 | x1, x2 = x1.reshape(len(x1), 1), x2.reshape(len(x2), 1) 75 | c = np.ones((len(x1), 1)) 76 | X = np.hstack((c, x1, x2)) 77 | res = (sm.OLS(y, X)).fit() 78 | return res 79 | 80 | def get_select_judge(self, df): 81 | ''' 82 | 获取所有基金的选股，择时能力 83 | :param df: 84 | :return: 85 | ''' 86 | try: 87 | select_df = pd.read_excel("择时选股能力.xlsx", index_col=0) 88 | except: 89 | dic_result_df = self.get_data(df) 90 | dicRegression = {} 91 | for fund_index_code, fund_index_df in dic_result_df.items(): 92 | fund_code = fund_index_code.split('_')[0] 93 | index_code = fund_index_code.split('_')[1] 94 | tempReturn = (fund_index_df - fund_index_df.shift(1)) / fund_index_df.shift(1) 95 | tempReturn.fillna(0, inplace=True) 96 | riskFree = 0.02 / 250 97 | fundReduceRf = tempReturn[fund_code] - riskFree 98 | bencReduceRf = tempReturn[index_code] - riskFree 99 | Y = fundReduceRf.values 100 | tmX1 = bencReduceRf.values 101 | tmX2 = np.power(tmX1, 2) 102 | TMResult = self.regression(tmX1, tmX2, Y) 103 | 104 | dicRegression[fund_code] = {} 105 | dicRegression[fund_code]['R方'] = round(TMResult.rsquared, 2) 106 | dicRegression[fund_code]['择股指标(年化alpha)'] = str(round(TMResult.params[0] * 252 * 100, 2)) + '%' 107 | dicRegression[fund_code]['择时指标(beta)'] = round(TMResult.params[2], 2) 108 | select_df = pd.DataFrame(dicRegression).T 109 | select_df.to_excel("择时选股能力.xlsx") 110 | 111 | target_bench_code = df.loc[self.target_code]['跟踪指数代码'] 112 | same_total_df = df[df['跟踪指数代码'] == target_bench_code] 113 | estdate_str = (datetime.strptime(self.last_date_str,"%Y-%m-%d")-timedelta(days=365)).strftime("%Y-%m-%d") 114 | same_total_df = same_total_df[same_total_df['基金成立日']<=estdate_str] 115 | same_code_list = [fund_code for fund_code in same_total_df.index.tolist() if 116 | df.loc[fund_code]['证券简称'].find('C') == -1] 117 | if len(same_code_list) >= 5: 118 | same_df = select_df.loc[same_code_list] 119 | self.logger.info('选取跟踪同样指数即%s'%(target_bench_code,)) 120 | else: 121 | same_df = select_df 122 | self.logger.info('选取所有指数增强基金') 123 | 124 | alpha_sort_df = same_df.sort_values(by='择股指标(年化alpha)') 125 | esta_alpha_percent = (alpha_sort_df.index.tolist().index(self.target_code) + 1) / alpha_sort_df.shape[0] 126 | esta_alpha_percent_str = str(np.round(esta_alpha_percent * 100, 2)) + '%' 127 | 128 | beta_sort_df = same_df.sort_values(by='择时指标(beta)') 129 | esta_beta_percent = (beta_sort_df.index.tolist().index(self.target_code) + 1) / beta_sort_df.shape[0] 130 | esta_beta_percent_str = str(np.round(esta_beta_percent * 100, 2)) + '%' 131 | 132 | R_sort_df = same_df.sort_values(by='R方') 133 | esta_R_percent = (R_sort_df.index.tolist().index(self.target_code) + 1) / R_sort_df.shape[0] 134 | esta_R_percent_str = str(np.round(esta_R_percent * 100, 2)) + '%' 135 | 136 | self.logger.info("对所有运作时间超1年的的增强型指数基金（A/C类基金只统计数据最长的一类），根据其跟踪指数，利用TM回归模型，对其选股择时能力解析") 137 | self.logger.info("当前基金TM回归后的年化alpha为%s,选股能力占同类指数增强基金%s分位数。" % ( 138 | same_df.loc[self.target_code]['择股指标(年化alpha)'], esta_alpha_percent_str)) 139 | 140 | self.logger.info( 141 | '择时指标(beta)回归系数为%s，择时能力占同类%s分位数' % (same_df.loc[self.target_code]['择时指标(beta)'], esta_beta_percent_str)) 142 | self.logger.info('TM回归解释程度%s'%(same_df.loc[self.target_code]['R方'])) 143 | 144 | def get_main(self): 145 | df = pd.read_excel("指数增强基金2020-08-14.xlsx", index_col=0) 146 | self.get_total_info(df) 147 | self.get_select_judge(df) 148 | 149 | 150 | if __name__ == '__main__': 151 | JudgeFundIndexImproveDemo = JudgeFundIndexImprove() 152 | JudgeFundIndexImproveDemo.get_main() 153 | -------------------------------------------------------------------------------- /JudgeFund/JudgeFundMain.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | 4 | import pandas as pd 5 | import mylog as mylog 6 | import numpy as np 7 | from datetime import datetime,timedelta 8 | 9 | 10 | class JudgeFundMain: 11 | def __init__(self): 12 | self.logger = mylog.set_log() 13 | 14 | def get_manager_info(self, company, df, target_labe, manager_name): 15 | total_manager_list = df['基金经理(现任)'].tolist() 16 | fund_num_list=[] 17 | for num in range(len(total_manager_list)): 18 | if isinstance(total_manager_list[num],str) and total_manager_list[num].find(manager_name) != -1: 19 | fund_num_list.append(num) 20 | target_df = df.iloc[fund_num_list] 21 | if target_df.empty: 22 | self.logger.info('未查询到基金经理%s管理过的指数增强型产品，对其投资经验需保持谨慎！') 23 | return 24 | 25 | target_labe_list=[] 26 | for index_code in target_df.index.tolist(): 27 | if target_df.loc[index_code]['基金全称'].find('量化')!=-1: 28 | target_labe_list.append(index_code) 29 | target_labe_df = target_df.loc[target_labe_list] 30 | self.logger.info('基金经理%s,查询到其管理指数增强型产品%s只(含A/C类),'%(manager_name,target_df.shape[0])) 31 | if target_labe_df.empty: 32 | self.logger.info('但未找到其管理过的%s产品,需对其该方面投资经验保持谨慎！') 33 | return 34 | 35 | self.logger.info('其中%s产品共%s只,反映了该基金经理有一定相关产品的投资经验；'%(target_labe,target_labe_df.shape[0])) 36 | last_se = target_labe_df[target_labe_df['基金成立日']==target_labe_df['基金成立日'].min()].iloc[0] 37 | self.logger.info("其中，%s(%s)，于%s成立，为该基金经理管理的最早的%s产品，最新规模为%s亿元；" % ( 38 | last_se['证券简称'], last_se.name, last_se['基金成立日'],target_labe, last_se['基金规模亿元'])) 39 | 40 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分'] 41 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%' 42 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分'] 43 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%' 44 | total_alpha = last_se['Alpha_w_52_hs300百分'] 45 | total_alpha_str = str(np.round(total_alpha,2))+'%' 46 | if annual_alpha > same_style_alpha: 47 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s，年化alpha收益为%s，高于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str)) 48 | else: 49 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s,年化alpha收益为%s，低于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str)) 50 | rate_name_list = [data_name for data_name in last_se.index.tolist() if data_name.find('回报排名')!=-1] 51 | rate_se = last_se.loc[rate_name_list] 52 | 53 | def get_target_label_fund(self, company_fund_df, target_labe='量化'): 54 | name_list = company_fund_df['基金全称'].tolist() 55 | num_list = [fund_f_num for fund_f_num in range(len(name_list)) if name_list[fund_f_num].find(target_labe) != -1] 56 | target_df = company_fund_df.iloc[num_list] 57 | if target_df.empty: 58 | return 59 | 60 | min_fund_esta = target_df['基金成立日'].min() 61 | min_fund_esta_se = target_df[target_df['基金成立日'] == min_fund_esta].iloc[0] 62 | min_fund_esta_name = min_fund_esta_se['证券简称'] 63 | self.logger.info("其中，%s(%s)，基金经理%s,于%s成立，为该管理人旗下成立最早的%s产品，最新规模为%s亿元；" % ( 64 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'], 65 | target_labe, min_fund_esta_se['基金规模亿元'])) 66 | 67 | annual_alpha = min_fund_esta_se['Alpha(年化)_d_52_hs300百分'] 68 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%' 69 | same_style_alpha = min_fund_esta_se['Alpha(年化)同类平均_d_52_hs300百分'] 70 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%' 71 | if annual_alpha > same_style_alpha: 72 | self.logger.info("该基金自成立以来，年化alpha收益为%s，高于同类平均的alpha收益%s" % (annual_alpha_str, same_style_alpha_str)) 73 | else: 74 | self.logger.info("该基金自成立以来，年化alpha收益为%s，低于同类平均的alpha收益%s" % (annual_alpha_str, same_style_alpha_str)) 75 | return 76 | 77 | def get_comyany_info(self, company, df, target_labe=''): 78 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')} 79 | total_company_esta={company_name:temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in total_company_dic.items()} 80 | total_company_esta_se = pd.Series(total_company_esta,name='基金成立日').sort_values(ascending=False) 81 | esta_percent = (total_company_esta_se.index.tolist().index(company)+1)/len(total_company_esta_se) 82 | esta_percent_str = str(np.round(esta_percent*100,2))+'%' 83 | 84 | num_dic = {company_name:temp_df.shape[0] for company_name,temp_df in total_company_dic.items()} 85 | company_df = pd.DataFrame() 86 | num_se = pd.Series(num_dic, ).sort_values() 87 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique())) 88 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%' 89 | self.logger.info('%s旗下现有指数增强型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str)) 90 | if per_rate >= 0.7: 91 | self.logger.info("占比靠前，反映管理人在发行指数增强型基金上的优秀运作能力。") 92 | elif 0.7 >= per_rate > 0.4: 93 | self.logger.info("占比中等，管理人发行指数增强型基金的数量一般。") 94 | elif per_rate <= 0.4: 95 | self.logger.info('占比下游，管理人对指数增强型基金发行数量较少。') 96 | company_fund_df = df[df['基金管理人'] == company] 97 | min_fund_esta = company_fund_df['基金成立日'].min() 98 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0] 99 | min_fund_esta_name = min_fund_esta_se['证券简称'] 100 | self.logger.info("其中，%s(%s)，基金经理%s,于%s成立，为该管理人旗下成立最早的产品；" % ( 101 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'])) 102 | self.logger.info("按各管理人发行最早指数增强型基金的时间看，该产品发行时间占各管理人同类型的%s分位数"%esta_percent_str) 103 | if esta_percent>=0.6: 104 | self.logger.info("发行时间早与多数管理人，一定程度上反应了管理人更丰富的投资管理经验。") 105 | elif 0.3<=esta_percent<0.6: 106 | self.logger.info("发行时间排名中等，管理人整体投资管理经验中等水平") 107 | else: 108 | self.logger.info("发行时间较晚，需谨慎对待管理人可能对指数增强型基金投资管理经验较短的问题") 109 | 110 | if target_labe: 111 | dic_target = {} 112 | for company_name, temp_df in df.groupby(by='基金管理人'): 113 | dic_target[company_name] = dic_target.get(company_name, 0) 114 | temp_name_list = temp_df['基金全称'].tolist() 115 | for fund_name in temp_name_list: 116 | if fund_name.find(target_labe) != -1: 117 | dic_target[company_name] = dic_target[company_name] + 1 118 | label_se = pd.Series(dic_target, name=target_labe + '产品数量') 119 | self.logger.info("有%s产品(含A/C类)的管理人共%s家" % (target_labe, len(label_se[label_se > 0]))) 120 | per_rate_label = (list(label_se.unique()).index(label_se[company]) + 1) / len(list(label_se.unique())) 121 | per_rate_label_str = str(np.round(per_rate_label * 100, 2)) + '%' 122 | self.logger.info( 123 | '其中%s旗下%s产品共%s只，占所有管理人同类型数量的%s分位数' % (company, target_labe, label_se[company], per_rate_label_str)) 124 | self.get_target_label_fund(company_fund_df, target_labe) 125 | return 126 | 127 | def get_main(self, company='万家基金管理有限公司', manager_name='乔亮'): 128 | df = pd.read_excel('指数增强评价指标.xlsx', index_col=0) 129 | self.get_comyany_info(company, df, target_labe='量化') 130 | self.get_manager_info(company, df, target_labe='量化', manager_name=manager_name) 131 | 132 | 133 | if __name__ == '__main__': 134 | JudgeFundMainDemo = JudgeFundMain() 135 | JudgeFundMainDemo.get_main() 136 | -------------------------------------------------------------------------------- /JudgeFund/JudgeFundQDII.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | QDII基金评价 5 | ''' 6 | 7 | import pandas as pd 8 | import mylog as mylog 9 | import numpy as np 10 | from datetime import datetime, timedelta 11 | import mylog as mylogdemo 12 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 13 | import statsmodels.api as sm 14 | 15 | class JudgeFundQDII: 16 | def __init__(self): 17 | self.logger = mylogdemo.set_log() 18 | self.last_date_str = datetime.today().strftime("%Y-%m-%d") 19 | self.target_code = '161130.OF' 20 | 21 | def get_manager_info(self, company, df, manager_name,target_labe=''): 22 | total_manager_list = df['基金经理(现任)'].tolist() 23 | fund_num_list=[] 24 | for num in range(len(total_manager_list)): 25 | if isinstance(total_manager_list[num],str) and total_manager_list[num].find(manager_name) != -1: 26 | fund_num_list.append(num) 27 | target_df = df.iloc[fund_num_list] 28 | if target_df.empty: 29 | self.logger.info('未查询到基金经理%s管理过的QDII型产品，对其投资经验需保持谨慎！') 30 | return 31 | self.logger.info('基金经理%s,查询到其管理QDII型产品%s只(含A/C类),'%(manager_name,target_df.shape[0])) 32 | 33 | last_se = target_df[target_df['基金成立日']==target_df['基金成立日'].min()].iloc[0] 34 | 35 | self.logger.info("其中，%s(%s)，于%s成立，为该基金经理管理的最早的%s产品，最新规模为%s亿元；" % ( 36 | last_se['证券简称'], last_se.name, last_se['基金成立日'],target_labe, last_se['基金规模亿元'])) 37 | 38 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分'] 39 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%' 40 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分'] 41 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%' 42 | total_alpha = last_se['Alpha_w_52_hs300百分'] 43 | total_alpha_str = str(np.round(total_alpha,2))+'%' 44 | if annual_alpha > same_style_alpha: 45 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s，年化alpha收益为%s，高于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str)) 46 | else: 47 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s,年化alpha收益为%s，低于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str)) 48 | 49 | current_se = target_df.loc[self.target_code] 50 | self.logger.info("%s成立于%s,当前规模%s亿元,规模同类排名%s"%(current_se['证券简称'],current_se['基金成立日'],current_se['基金规模亿元'],current_se['规模同类排名'])) 51 | rate_name_list = [data_name for data_name in current_se.index.tolist() if data_name.find('回报排名')!=-1] 52 | rate_se = last_se.loc[rate_name_list] 53 | rate_se.name='回报排名' 54 | rate_se.to_excel('%s回报排名.xlsx'%current_se['证券简称']) 55 | 56 | def get_comyany_info(self, company, df, target_labe=''): 57 | self.logger.info("截止当前最新日期%s,现有QDII型基金(含A/C类)共%s只。" % (self.last_date_str, df.shape[0])) 58 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')} 59 | total_company_esta={company_name:temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in total_company_dic.items()} 60 | total_company_esta_se = pd.Series(total_company_esta,name='基金成立日').sort_values(ascending=False) 61 | esta_percent = (total_company_esta_se.index.tolist().index(company)+1)/len(total_company_esta_se) 62 | esta_percent_str = str(np.round(esta_percent*100,2))+'%' 63 | 64 | num_dic = {company_name:temp_df.shape[0] for company_name,temp_df in total_company_dic.items()} 65 | company_df = pd.DataFrame() 66 | num_se = pd.Series(num_dic, ).sort_values() 67 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique())) 68 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%' 69 | self.logger.info('%s旗下现有QDII型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str)) 70 | if per_rate >= 0.7: 71 | self.logger.info("占比靠前，反映管理人在发行QDII型基金上的优秀运作能力。") 72 | elif 0.7 >= per_rate > 0.4: 73 | self.logger.info("占比中等，管理人发行QDII型基金的数量一般。") 74 | elif per_rate <= 0.4: 75 | self.logger.info('占比下游，管理人对QDII型基金发行数量较少。') 76 | company_fund_df = df[df['基金管理人'] == company] 77 | min_fund_esta = company_fund_df['基金成立日'].min() 78 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0] 79 | min_fund_esta_name = min_fund_esta_se['证券简称'] 80 | self.logger.info("其中，%s(%s)，基金经理%s,于%s成立，为该管理人旗下成立最早的产品；" % ( 81 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'])) 82 | self.logger.info("按各管理人发行最早QDII型基金的时间看，该产品发行时间占各管理人同类型的%s分位数"%esta_percent_str) 83 | if esta_percent>=0.6: 84 | self.logger.info("发行时间早与多数管理人，一定程度上反应了管理人更丰富的投资管理经验。") 85 | elif 0.3<=esta_percent<0.6: 86 | self.logger.info("发行时间排名中等，管理人整体投资管理经验中等水平") 87 | else: 88 | self.logger.info("发行时间较晚，需谨慎对待管理人可能对QDII型基金投资管理经验较短的问题") 89 | 90 | def get_track(self): 91 | df =pd.read_excel("QDii跟踪.xlsx",index_col=0) 92 | 93 | # total_company_esta_se = pd.Series(total_company_esta, name='基金成立日').sort_values(ascending=False) 94 | track_error = df['跟踪误差(跟踪指数)'].sort_values(ascending=False).dropna() 95 | esta_percent = (track_error.index.tolist().index(self.target_code) + 1) / len(track_error) 96 | esta_percent_str = str(np.round(esta_percent * 100, 2)) + '%' 97 | self.logger.info("从对跟踪指数的表现来看，近一年的周度收益跟踪误差为%s，占同类产品跟踪各自指数的%s分位数"%(track_error[self.target_code],esta_percent_str)) 98 | if esta_percent>=0.7: 99 | self.logger.info("对指数的跟踪表现优秀，跟踪指数能力居同类上游") 100 | elif 0.4<=esta_percent<0.7: 101 | self.logger.info("对指数的跟踪表现一般，跟踪指数能力中等水平") 102 | else: 103 | self.logger.info("跟踪指数能力排名靠后，跟踪能力较差") 104 | 105 | mana_fee = df['管理费率'].sort_values(ascending=False).dropna() 106 | tg_fee = df['托管费率'].sort_values(ascending=False).dropna() 107 | a=0 108 | 109 | 110 | def get_main(self): 111 | df = pd.read_excel("QDII被动.xlsx", index_col=0) 112 | self.get_comyany_info(company='易方达基金管理有限公司', df=df) 113 | self.get_manager_info(company='易方达基金管理有限公司', df=df, manager_name='范冰') 114 | self.get_track() 115 | 116 | if __name__=='__main__': 117 | JudgeFundQDIIDemo = JudgeFundQDII() 118 | JudgeFundQDIIDemo.get_main() -------------------------------------------------------------------------------- /JudgeFund/JudgeFundTopic.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 规模ETF评价 5 | ''' 6 | 7 | import pandas as pd 8 | import mylog as mylog 9 | import numpy as np 10 | from datetime import datetime, timedelta 11 | import mylog as mylogdemo 12 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 13 | import statsmodels.api as sm 14 | 15 | class JudgeFundTopic: 16 | def __init__(self): 17 | self.logger = mylogdemo.set_log() 18 | self.last_date_str = datetime.today().strftime("%Y-%m-%d") 19 | self.target_code = '159995.SZ' 20 | 21 | def get_manager_info(self, company, df, manager_name,target_labe=''): 22 | total_manager_list = df['基金经理(现任)'].tolist() 23 | fund_num_list=[] 24 | for num in range(len(total_manager_list)): 25 | if isinstance(total_manager_list[num],str) and total_manager_list[num].find(manager_name) != -1: 26 | fund_num_list.append(num) 27 | target_df = df.iloc[fund_num_list] 28 | if target_df.empty: 29 | self.logger.info('未查询到基金经理%s管理过的规模型产品，对其投资经验需保持谨慎！') 30 | return 31 | self.logger.info('基金经理%s,查询到其管理规模型产品%s只(含A/C类),'%(manager_name,target_df.shape[0])) 32 | 33 | last_se = target_df[target_df['基金成立日']==target_df['基金成立日'].min()].iloc[0] 34 | 35 | self.logger.info("其中，%s(%s)，于%s成立，为该基金经理管理的最早的%s产品，最新规模为%s亿元；" % ( 36 | last_se['证券简称'], last_se.name, last_se['基金成立日'],target_labe, last_se['基金规模亿元'])) 37 | 38 | annual_alpha = last_se['Alpha(年化)_d_52_hs300百分'] 39 | annual_alpha_str = str(np.round(annual_alpha, 2)) + '%' 40 | same_style_alpha = last_se['Alpha(年化)同类平均_d_52_hs300百分'] 41 | same_style_alpha_str = str(np.round(same_style_alpha, 2)) + '%' 42 | total_alpha = last_se['Alpha_w_52_hs300百分'] 43 | total_alpha_str = str(np.round(total_alpha,2))+'%' 44 | if annual_alpha > same_style_alpha: 45 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s，年化alpha收益为%s，高于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str)) 46 | else: 47 | self.logger.info("该基金自成立以来，近一年超额alpha收益（相对沪深300）为%s,年化alpha收益为%s，低于同类平均的年化alpha收益%s" % (total_alpha_str,annual_alpha_str, same_style_alpha_str)) 48 | 49 | try: 50 | current_se = target_df.loc[self.target_code] 51 | self.logger.info("%s成立于%s,当前规模%s亿元,规模同类排名%s"%(current_se['证券简称'],current_se['基金成立日'],current_se['基金规模亿元'],current_se['规模同类排名'])) 52 | rate_name_list = [data_name for data_name in current_se.index.tolist() if data_name.find('回报排名')!=-1] 53 | rate_se = last_se.loc[rate_name_list] 54 | rate_se.name='回报排名' 55 | rate_se.to_excel('%s回报排名.xlsx'%current_se['证券简称']) 56 | except: 57 | self.logger.info("出错，请检查！") 58 | 59 | 60 | def get_comyany_info(self, company, df, target_labe=''): 61 | self.logger.info("截止当前最新日期%s,现有规模型基金(含A/C类)共%s只。" % (self.last_date_str, df.shape[0])) 62 | total_company_dic = {company_name: temp_df for company_name, temp_df in df.groupby(by='基金管理人')} 63 | total_company_esta={company_name:temp_df['基金成立日'].min().strftime('%Y-%m-%d') for company_name, temp_df in total_company_dic.items()} 64 | total_company_esta_se = pd.Series(total_company_esta,name='基金成立日').sort_values(ascending=False) 65 | esta_percent = (total_company_esta_se.index.tolist().index(company)+1)/len(total_company_esta_se) 66 | esta_percent_str = str(np.round(esta_percent*100,2))+'%' 67 | 68 | num_dic = {company_name:temp_df.shape[0] for company_name,temp_df in total_company_dic.items()} 69 | company_df = pd.DataFrame() 70 | num_se = pd.Series(num_dic, ).sort_values() 71 | per_rate = (list(num_se.unique()).index(num_se[company]) + 1) / len(list(num_se.unique())) 72 | per_rate_str = str(np.round(per_rate * 100, 4)) + '%' 73 | self.logger.info('%s旗下现有规模型产品共%s只(含A类C类),占所有管理人所持数量的%s分位数。' % (company, num_se[company], per_rate_str)) 74 | if per_rate >= 0.7: 75 | self.logger.info("占比靠前，反映管理人在发行规模型基金上的优秀运作能力。") 76 | elif 0.7 >= per_rate > 0.4: 77 | self.logger.info("占比中等，管理人发行规模型基金的数量一般。") 78 | elif per_rate <= 0.4: 79 | self.logger.info('占比下游，管理人对规模型基金发行数量较少。') 80 | company_fund_df = df[df['基金管理人'] == company] 81 | min_fund_esta = company_fund_df['基金成立日'].min() 82 | min_fund_esta_se = company_fund_df[company_fund_df['基金成立日'] == min_fund_esta].iloc[0] 83 | min_fund_esta_name = min_fund_esta_se['证券简称'] 84 | self.logger.info("其中，%s(%s)，基金经理%s,于%s成立，为该管理人旗下成立最早的产品；" % ( 85 | min_fund_esta_se['证券简称'], min_fund_esta_se.name, min_fund_esta_se['基金经理(现任)'], min_fund_esta_se['基金成立日'])) 86 | self.logger.info("按各管理人发行最早规模型基金的时间看，该产品发行时间占各管理人同类型的%s分位数"%esta_percent_str) 87 | if esta_percent>=0.6: 88 | self.logger.info("发行时间早与多数管理人，一定程度上反应了管理人更丰富的投资管理经验。") 89 | elif 0.3<=esta_percent<0.6: 90 | self.logger.info("发行时间排名中等，管理人整体投资管理经验中等水平") 91 | else: 92 | self.logger.info("发行时间较晚，需谨慎对待管理人可能对规模型基金投资管理经验较短的问题") 93 | 94 | def get_main(self): 95 | # df = pd.read_excel("规模ETF.xlsx", index_col=0) 96 | file_path = "D:\\工作文件\\产品评价\\" 97 | # df = pd.read_excel(file_path+"跨境ETF.xlsx", index_col=0) 98 | df = pd.read_excel(file_path + "行业ETF.xlsx", index_col=0) 99 | self.get_comyany_info(company='华夏基金管理有限公司', df=df) 100 | self.get_manager_info(company='华夏基金管理有限公司', df=df, manager_name='赵宗庭') 101 | 102 | if __name__=='__main__': 103 | JudgeFundTopicDemo = JudgeFundTopic() 104 | JudgeFundTopicDemo.get_main() -------------------------------------------------------------------------------- /JudgeFund/YunFeiCalc.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | 4 | 5 | ''' 6 | 004253.OF 国泰黄金ETF，投资金额20444.18，成本价1.3430； 7 | 001023.OF 华夏亚债中国C，投资金额14058.63，成本价1.2560； 8 | 005658.OF 华夏沪深300ETFC，投资金额5615.57，成本价1.3580； 9 | 005659.OF 南方恒生ETFC，投资金额7068.28，成本价1.0367； 10 | 004253.OF 标普500ETF，投资金额13560，成本价2.3594； 11 | ''' 12 | 13 | 14 | from datetime import datetime,timedelta 15 | 16 | class YunFeiCalc: 17 | def __init__(self): 18 | pass 19 | 20 | def calc_main(self): 21 | au9999=1.4680 22 | asset1= 20444.18*(au9999/1.3430-1) 23 | 24 | bond = 1.1290 25 | asset2 = 561.32+(14058.63*(1.2600/1.2560)-561.32) * (bond / 1.2560 - 1) 26 | 27 | hs300 = 1.6341 28 | asset3 = 5615.57 * (hs300 / 1.3580 - 1) 29 | 30 | bp500 = 2.6630 31 | asset4 = 13560 * (bp500 / 2.3594 - 1) 32 | 33 | hsetf = 1.0160 34 | asset5 =7068.28*(hsetf/1.0367-1) 35 | 36 | total_earn = asset1+asset2+asset3+asset4+asset5 37 | print("排除货币基金，共盈利：%s"%total_earn) 38 | 39 | jgday = (datetime.today()-datetime.strptime("2020-03-05","%Y-%m-%d")).days 40 | print("货基建仓天数%s"%jgday) 41 | 42 | hbjj = 40000*0.0262*jgday/365 43 | print("货基盈利%s"%hbjj) 44 | 45 | print("总共盈利%s"%(total_earn+hbjj)) 46 | 47 | 48 | if __name__=='__main__': 49 | YunFeiCalcDemo = YunFeiCalc() 50 | YunFeiCalcDemo.calc_main() -------------------------------------------------------------------------------- /JudgeText.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | ''' 5 | 基于分析结果，给出标准化输出评价 6 | ''' 7 | 8 | import pandas as pd 9 | import os 10 | 11 | 12 | class JudgeText: 13 | def __init__(self, ): 14 | pass 15 | 16 | def getCompareIndex(self, tempDic, fundName, compareName, formatFlag=True, rightFlag=True): 17 | ''' 18 | 同期市场指数相比 19 | :param tempDic: 20 | :param fundName: 21 | :param compareName: 对比的指标名称 22 | :param formatFlag: 是否对取值化为百分比，保留2为有效数字 23 | :param rightFlag: 是否是取值越大越好 24 | :return: 25 | ''' 26 | indexTradeStr = '' 27 | goodTrade = [] 28 | badTrade = [] 29 | for name, value in tempDic.items(): 30 | if name != fundName: 31 | if formatFlag: 32 | indexTradeStr = indexTradeStr + name + ',%.2f%%' % (value * 100) + ',' 33 | else: 34 | indexTradeStr = indexTradeStr + name + ',%.2f' % (value) + ',' 35 | if value < tempDic[fundName]: 36 | goodTrade.append(name) 37 | else: 38 | badTrade.append(name) 39 | 40 | if formatFlag: 41 | resultText = "该产品的%s为%.2f%%" % (compareName, tempDic[fundName] * 100) + ',同期市场变现为%s' % (indexTradeStr) 42 | else: 43 | resultText = "该产品的%s为%.2f" % (compareName, tempDic[fundName]) + ',同期市场变现为%s' % (indexTradeStr) 44 | 45 | if rightFlag: 46 | if goodTrade: 47 | resultText = resultText + '即，强于%s' % (','.join(goodTrade)) 48 | 49 | if badTrade: 50 | resultText = resultText + '较弱与%s' % (','.join(badTrade)) 51 | else: 52 | if badTrade: 53 | resultText = resultText + '即，强于%s' % (','.join(badTrade)) 54 | 55 | if goodTrade: 56 | resultText = resultText + '较弱与%s' % (','.join(goodTrade)) 57 | return resultText 58 | 59 | def getNetJudgeText(self, totalIndexName, fundName='华夏大盘精选', fundCode='000011.OF'): 60 | resultPath = os.getcwd() + r"\\分析结果\\%s\\" % fundName 61 | gaiKuoTxt = "基金绩效评价在证券分析是重要的一部分。绩效评价系统性地可分为三个部分：最基本的绩效衡量，" \ 62 | "深层次的绩效归因，以及最终成熟的绩效评价。\n本文选择%s(%s)为基金研究对象，" \ 63 | "市场组合用%s来近似，对该基金在期间的绩" \ 64 | "效进行评估。\n全文分为5个部分：基金绩效衡量概况，在这里最基金绩效进行基本指标的度量；选股择时能力，" \ 65 | "利用CAPM的衍生模型来度量该基金的选股择时能力；\n多因子归因，从多个因子的角度来做基金收益率做归因；" \ 66 | "行业归因；风格归因。" % (fundName, fundCode, '、'.join(totalIndexName)) + '\n\n' 67 | riskReturnText = "风险收益统计指标结果：\n" 68 | 69 | weekSuccessDf = pd.read_excel(resultPath + "周度胜率统计.xlsx", index_col=0) 70 | trandSuccess = weekSuccessDf.loc['正交易周'].to_dict() 71 | weekSuccessText = self.getCompareIndex(trandSuccess, fundName, compareName='周度交易胜率') + '\n' 72 | weekSuccessText = '(1)交易胜率层面来看，' + weekSuccessText 73 | 74 | riskReturnDf = pd.read_excel(resultPath + "风险收益统计指标原始数据.xlsx", index_col=[0, 1]) 75 | chengLiRiskReturnDf = riskReturnDf.loc['成立以来'] 76 | chengLiAnnualDic = chengLiRiskReturnDf.loc['年化收益'].to_dict() 77 | chengLiAnnualDic.pop('数据截止日期') 78 | annualReturnText = self.getCompareIndex(chengLiAnnualDic, fundName, compareName='年化收益') + '\n' 79 | annualReturnText = '(2)收益方面，' + annualReturnText 80 | 81 | chengLiStdDic = chengLiRiskReturnDf.loc['年化波动'].to_dict() 82 | chengLiStdDic.pop('数据截止日期') 83 | riskText = self.getCompareIndex(chengLiStdDic, fundName, compareName='年化波动', rightFlag=False) 84 | riskText = '(3)风险方面，' + riskText 85 | 86 | chengLiDownStdDic = chengLiRiskReturnDf.loc['下行风险'].to_dict() 87 | chengLiDownStdDic.pop('数据截止日期') 88 | downStdText = self.getCompareIndex(chengLiDownStdDic, fundName, compareName='下行风险', rightFlag=False) + '\n' 89 | riskText = riskText + '\n 从下行风险角度来看，' + downStdText 90 | 91 | chengLiDownDic = chengLiRiskReturnDf.loc['最大回撤'].to_dict() 92 | chengLiDownDic.pop('数据截止日期') 93 | downText = self.getCompareIndex(chengLiDownDic, fundName, compareName='最大回撤', rightFlag=False) + '\n' 94 | riskText = riskText + ' 最大回撤方面，' + downText 95 | 96 | chengLiSharpDic = chengLiRiskReturnDf.loc['夏普比率'].to_dict() 97 | chengLiSharpDic.pop('数据截止日期') 98 | sharpText = self.getCompareIndex(chengLiSharpDic, fundName, compareName='夏普比率', formatFlag=False) + '\n' 99 | sharpText = '(4)投资效率来看，' + sharpText 100 | 101 | riskReturnText = riskReturnText + weekSuccessText + annualReturnText + riskText + sharpText 102 | totalText = gaiKuoTxt + riskReturnText 103 | 104 | indusrtyAndStyleText = '\n行业归因结果：\n' 105 | indusrtyRegressionDf = pd.read_excel(resultPath + "行业回归结果.xlsx", ) 106 | bestIndustry = indusrtyRegressionDf.iloc[0].to_dict() 107 | industryText = "该产品拟合效果最好的行业为%s(%s)，其回归后的拟合R方为%.2f%%" % ( 108 | bestIndustry['指数名称'], bestIndustry['指数代码'], bestIndustry['拟合R方'] * 100) + '\n' 109 | 110 | tempDf = indusrtyRegressionDf[indusrtyRegressionDf['拟合R方'] >= 0.7] 111 | totalNum = 5 #最高的拟合行业数量 112 | if tempDf.empty: 113 | tempDf2 = indusrtyRegressionDf[indusrtyRegressionDf['拟合R方'] >= 0.1] 114 | if not tempDf2.empty: 115 | industryDetailTxt = "从行业回归结果来看，该产品无拟合效果较为优秀的行业指数（R方大于0.7），" \ 116 | "这可能是用于产品运作过程中，持仓个股的行业变化较为频繁带来的，可结合进一步的持仓分析综合来看" 117 | else: 118 | industryDetailTxt = "从行业回归结果来看，该产品不存在具有一定相关性的行业指数（R方大于0.1），" \ 119 | "这可能是用于产品运作过程中，持仓个股的行业变化极其频繁，行业分布极其分散，也可能是产品" \ 120 | "有对冲市场beta风险的操作所带来的，可结合进一步的持仓分析，同时期的市场风险收益指标等综合来看" 121 | else: 122 | if tempDf.shape[0] > totalNum: 123 | codeList = tempDf.iloc[:totalNum]['指数代码'].tolist() 124 | codeNameList = tempDf.iloc[:totalNum]['指数名称'].tolist() 125 | else: 126 | codeList = tempDf['指数代码'].tolist() 127 | codeNameList = tempDf['指数名称'].tolist() 128 | strList = [] 129 | for code, codeName in zip(codeList, codeNameList): 130 | strList.append(codeName + '(%s)' % code) 131 | industryDetailTxt = "从行业回归结果来看，该产品拟合效果较为优秀的行业指数（R方大于0.7）主要有%s，" \ 132 | "可对比该产品的投资类型给出判断。" % (','.join(strList))+'\n' 133 | indusrtyAndStyleText = indusrtyAndStyleText + industryText + industryDetailTxt 134 | 135 | indusrtyAndStyleText = indusrtyAndStyleText + '\n风格归因结果：\n' 136 | styleRegressionDf = pd.read_excel(resultPath + "风格回归结果.xlsx", ) 137 | bestStyle = styleRegressionDf.iloc[0].to_dict() 138 | styleText = "该产品拟合效果最好的风格指数为%s(%s)，其回归后的拟合R方为%.2f%%" % ( 139 | bestStyle['指数名称'], bestStyle['指数代码'], bestStyle['拟合R方'] * 100) + '\n' 140 | 141 | tempStyleDf = styleRegressionDf[styleRegressionDf['拟合R方'] >= 0.7] 142 | totalStyleNum = 3 # 最高的拟合风格数量 143 | if tempStyleDf.empty: 144 | tempStyleDf2 = styleRegressionDf[styleRegressionDf['拟合R方'] >= 0.1] 145 | if not tempStyleDf2.empty: 146 | styleDetailTxt = "从风格回归结果来看，该产品无拟合效果较为优秀的风格指数（R方大于0.7，" \ 147 | "这可能是用于产品运作过程中，基金经理投资风格较为灵活，可结合进一步的持仓分析综合来看" 148 | else: 149 | styleDetailTxt ="从风格回归结果来看，该产品不存在具有一定相关性的风格指数（R方大于0.1，" \ 150 | "这可能是用于产品运作过程中，基金经理投资风格极其灵活多变，或采用了衍生品对冲系统风险带来的，可结合进一步的持仓分析综合来看" 151 | else: 152 | if tempStyleDf.shape[0] > totalStyleNum: 153 | codeStyleList = tempStyleDf.iloc[:totalNum]['指数代码'].tolist() 154 | codeStyleNameList = tempStyleDf.iloc[:totalNum]['指数名称'].tolist() 155 | else: 156 | codeStyleList = tempStyleDf['指数代码'].tolist() 157 | codeStyleNameList = tempStyleDf['指数名称'].tolist() 158 | strStyleList = [] 159 | for code, codeName in zip(codeStyleList, codeStyleNameList): 160 | strStyleList.append(codeName + '(%s)' % code) 161 | styleDetailTxt = "从风格回归结果来看，该产品拟合效果较为优秀的风格指数（R方大于0.7）主要有%s，" \ 162 | "可对比该产品的投资风格给出判断。" % (','.join(strStyleList)) + '\n' 163 | indusrtyAndStyleText = indusrtyAndStyleText + styleText + styleDetailTxt 164 | totalText = totalText + indusrtyAndStyleText 165 | f = open(resultPath + "综合评价结论.txt", "w+") 166 | f.write(totalText) 167 | f.close() 168 | 169 | if __name__=="__main__": 170 | JudgeTextDemo = JudgeText() 171 | pass 172 | -------------------------------------------------------------------------------- /MonthReportData/CalcHXBCorr.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 获取指数估值数据 5 | ''' 6 | 7 | import pandas as pd 8 | from GetAndSaveWindData.MysqlCon import MysqlCon 9 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 10 | from datetime import datetime 11 | import os 12 | 13 | 14 | class CalcHXBCorr: 15 | def __init__(self): 16 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind') 17 | 18 | def get_Data(self): 19 | pass 20 | 21 | def get_main(self): 22 | df1 = pd.read_excel("被动指数产品.xlsx", index_col=0) 23 | dic_size = {} 24 | for fund_ma,tempdf in df1.groupby(by='基金管理人'): 25 | dic_size[fund_ma] = tempdf['基金规模'].sum() 26 | size_se = pd.Series(dic_size,name='基金公司管理规模').sort_values(ascending=False) 27 | dic_save_df = {} 28 | for manage_name in size_se.index.tolist(): 29 | dic_save_df[manage_name]=df1.loc[df1['基金管理人']==manage_name] 30 | 31 | save_path = os.getcwd() + '\\基金公司管理产品概况.xlsx' 32 | writer = pd.ExcelWriter(save_path) 33 | for fund_name, save_df in dic_save_df.items(): 34 | save_df.to_excel(writer, sheet_name=fund_name) 35 | writer.save() 36 | 37 | 38 | df = pd.read_excel("基金发行明细.xlsx", sheet_name='Sheet1', index_col=0) 39 | dic_df = {} 40 | for code in df.index.tolist(): 41 | start_date = df.loc[code]['起始日'].strftime("%Y-%m-%d") 42 | end_date = df.loc[code]['结尾日'].strftime("%Y-%m-%d") 43 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code, code_style='fund', start_date=start_date, 44 | end_date=end_date, name_list=['acc_net_value']) 45 | temp_df.rename(columns={"acc_net_value": code}, inplace=True) 46 | temp_return_df = temp_df / temp_df.shift(1) - 1 47 | temp_return_df.dropna(inplace=True) 48 | dic_df[df.loc[code]['名称']] = temp_return_df 49 | 50 | min_date = df['起始日'].min().strftime("%Y-%m-%d") 51 | max_date = df['结尾日'].max().strftime("%Y-%m-%d") 52 | code_list2 = ['000300.SH', '000905.SH', '000852.SH', '000935.SH', '000933.SH', '000932.SH', '000936.CSI', 53 | '000934.SH', '000931.CSI', '000930.CSI','000929.CSI', '000937.CSI', '000928.SH'] # 54 | name_dic = {'000300.SH': '沪深300', '000905.SH': '中证500', '000852.SH': '中证1000', '000935.SH': '中证信息', 55 | '000933.SH': '中证医药', '000932.SH': '中证消费', '000936.CSI': '中证电信','000934.SH': '中证金融', 56 | '000930.CSI':'中证工业','000929.CSI':'中证材料','000937.CSI':'中证公用','000928.SH':'中证能源', 57 | '000931.CSI':'中证可选'} 58 | 59 | index_df_list = [] 60 | for code in code_list2: 61 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code, code_style='index', start_date=min_date, 62 | end_date=max_date, ) 63 | temp_df.rename(columns={"close_price": code}, inplace=True) 64 | index_df_list.append(temp_df) 65 | index_df = pd.concat(index_df_list, axis=1, sort=True) 66 | index_df.dropna(inplace=True) 67 | index_return_df = index_df / index_df.shift(1) - 1 68 | index_return_df.rename(columns=name_dic,inplace=True) 69 | 70 | df_list=[] 71 | for fund_name, fund_df in dic_df.items(): 72 | start_corr_date = fund_df.index.tolist()[0] 73 | end_corr_date = fund_df.index.tolist()[-1] 74 | temp_index_df = index_return_df.loc[ 75 | (index_return_df.index >= start_corr_date) & (index_return_df.index <= end_corr_date)] 76 | fund_index_df = pd.concat([fund_df, temp_index_df], axis=1, sort=True) 77 | corr_df = fund_index_df.corr() 78 | temp_Se = corr_df.iloc[0][1:] 79 | temp_corr_df = pd.DataFrame(temp_Se.values,columns=[fund_name],index=temp_Se.index.tolist()) 80 | df_list.append(temp_corr_df) 81 | final_df = pd.concat(df_list,axis=1,sort=True).T 82 | final_df.to_excel("基金相关系数.xlsx") 83 | 84 | 85 | if __name__ == '__main__': 86 | CalcHXBCorrDemo = CalcHXBCorr() 87 | CalcHXBCorrDemo.get_main() 88 | -------------------------------------------------------------------------------- /MonthReportData/FundEst.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 对本月的基金状况统计，输出ppt中所用基金表格数据 5 | ''' 6 | 7 | import pandas as pd 8 | from datetime import datetime 9 | import mylog as mylog 10 | from GetAndSaveWindData.MysqlCon import MysqlCon 11 | import numpy as np 12 | from WindPy import w 13 | import os 14 | 15 | 16 | class FundEst: 17 | def __init__(self): 18 | self.file_loc = r"D:\\工作文件\\指数基金月报\\202011\\基金概况\\" 19 | self.ppt_loc = r"D:\\工作文件\\指数基金月报\\202011\\基金ppt概况\\" 20 | self.name_se = [ 21 | '基金代码', '基金简称', '基金成立日', '基金规模(亿元)', '基金管理人', '跟踪指数', '近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)', '近3年(%)', 22 | '今年以来(%)', '近一年最大回撤', '夏普比率', '年化波动', '年化收益', ] 23 | 24 | def get_data(self): 25 | total_fund_file = os.listdir(self.file_loc) 26 | dic_df = {} 27 | for fund_file_name in total_fund_file: 28 | temp_df = pd.read_excel(self.file_loc + fund_file_name, index_col=0) 29 | temp_df['基金代码'] = temp_df.index 30 | dic_df[fund_file_name[:-5]] = temp_df 31 | return dic_df 32 | 33 | def calc_size_fund(self, temp_df): 34 | Hs300_df = temp_df[temp_df['跟踪指数代码'] == '000300.SH'] 35 | temp_hs300_df = Hs300_df.sort_values(by='基金规模(亿元)', ascending=False, ).iloc[:25].sort_values( 36 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1) 37 | temp_hs300_df['跟踪指数'] = '沪深300' 38 | temp_hs300_df = temp_hs300_df[self.name_se] 39 | temp_hs300_df.to_excel(self.ppt_loc + "跟踪沪深300概况.xlsx", index=False) 40 | 41 | Zz500_df = temp_df[temp_df['跟踪指数代码'] == '000905.SH'] 42 | temp_zz500_df = Zz500_df.sort_values(by='基金规模(亿元)', ascending=False, ).iloc[:15].sort_values( 43 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1) 44 | temp_zz500_df['跟踪指数'] = '中证500' 45 | temp_zz500_df = temp_zz500_df[self.name_se] 46 | temp_zz500_df.to_excel(self.ppt_loc + "跟踪中证500概况.xlsx", index=False) 47 | 48 | Sz50_df = temp_df[temp_df['跟踪指数代码'] == '000016.SH'] 49 | temp_sz50_df = Sz50_df.sort_values(by='基金规模(亿元)', ascending=False, ).iloc[:3].sort_values( 50 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1) 51 | temp_sz50_df['跟踪指数'] = '上证50' 52 | temp_sz50_df = temp_sz50_df[self.name_se] 53 | temp_sz50_df.to_excel(self.ppt_loc + "跟踪上证50概况.xlsx", index=False) 54 | 55 | Cybz_df = temp_df[temp_df['跟踪指数代码'] == '399006.SZ'] 56 | temp_cybz_df = Cybz_df.sort_values(by='基金规模(亿元)', ascending=False, ).iloc[:3].sort_values( 57 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1) 58 | temp_cybz_df['跟踪指数'] = '创业板指' 59 | temp_cybz_df = temp_cybz_df[self.name_se] 60 | temp_cybz_df.to_excel(self.ppt_loc + "跟踪创业板指概况.xlsx", index=False) 61 | 62 | other_code_dic = {'399330.SZ': '深证100', '000906.SH': '中证800', '000903.SH': "中证100", "399001.SZ": "深证成指", 63 | "000010.SH": "上证180", "000001.SH": "上证指数", "399005.SZ": "中小板指","000688.SH":"科创50"} 64 | df_list=[] 65 | for index_code, other_df in temp_df.groupby(by='跟踪指数代码'): 66 | target_df = other_df[other_df['基金规模(亿元)'] >= 3] 67 | if index_code in other_code_dic and not target_df.empty: 68 | temp_indexcode_df = target_df.sort_values(by='基金规模(亿元)', ascending=False, ).sort_values( 69 | by='近1月(%)', ascending=False).drop('跟踪指数代码', axis=1) 70 | temp_indexcode_df['跟踪指数'] = other_code_dic[index_code] 71 | df_list.append(temp_indexcode_df) 72 | 73 | if df_list: 74 | other_target_df = pd.concat(df_list,axis=0,sort=True) 75 | other_target_df = other_target_df[self.name_se] 76 | other_target_df.to_excel(self.ppt_loc + "跟踪其他指数概况.xlsx", index=False) 77 | 78 | def calc_topic_fund(self,temp_df): 79 | temp_topic_df =temp_df[temp_df['基金规模(亿元)'] >= 20] 80 | temp_topic_df.sort_values(by='基金规模(亿元)',inplace=True) 81 | 82 | df_list=[] 83 | for index_code,df in temp_topic_df.groupby(by='跟踪指数代码'): 84 | df_list.append(df.iloc[:1]) 85 | total_df = pd.concat(df_list,axis=0,sort=True) 86 | total_df.sort_values(by='近1月(%)', ascending=False,inplace=True) 87 | total_df.rename(columns={"跟踪指数代码":"跟踪指数"},inplace=True) 88 | total_df = total_df[self.name_se] 89 | total_df.to_excel(self.ppt_loc + "跟踪主题指数概况.xlsx", index=False) 90 | 91 | def calc_indus_fund(self,temp_df): 92 | temp_indus_df = temp_df[temp_df['基金规模(亿元)'] >= 10] 93 | temp_indus_df.sort_values(by='基金规模(亿元)', inplace=True) 94 | 95 | df_list = [] 96 | for index_code, df in temp_indus_df.groupby(by='跟踪指数代码'): 97 | df_list.append(df) 98 | total_df = pd.concat(df_list, axis=0, sort=True) 99 | total_df.rename(columns={"跟踪指数代码": "跟踪指数"}, inplace=True) 100 | total_df = total_df[self.name_se] 101 | total_df.to_excel(self.ppt_loc + "跟踪行业指数概况.xlsx", index=False) 102 | 103 | def calc_strate_fund(self,temp_df): 104 | temp_strate_df = temp_df[temp_df['基金规模(亿元)'] >= 5] 105 | temp_strate_df.sort_values(by='基金规模(亿元)', inplace=True) 106 | 107 | df_list = [] 108 | for index_code, df in temp_strate_df.groupby(by='跟踪指数代码'): 109 | df_list.append(df) 110 | total_df = pd.concat(df_list, axis=0, sort=True) 111 | total_df.rename(columns={"跟踪指数代码": "跟踪指数"}, inplace=True) 112 | total_df = total_df[self.name_se] 113 | total_df.to_excel(self.ppt_loc + "跟踪策略指数概况.xlsx", index=False) 114 | 115 | def calc_style_fund(self,temp_df): 116 | temp_style_df = temp_df.sort_values(by='基金规模(亿元)') 117 | 118 | df_list = [] 119 | for index_code, df in temp_style_df.groupby(by='跟踪指数代码'): 120 | df_list.append(df) 121 | total_df = pd.concat(df_list, axis=0, sort=True) 122 | total_df.rename(columns={"跟踪指数代码": "跟踪指数"}, inplace=True) 123 | total_df = total_df[self.name_se] 124 | total_df.to_excel(self.ppt_loc + "跟踪风格指数概况.xlsx", index=False) 125 | 126 | def get_main(self): 127 | dic_df = self.get_data() 128 | for file_name, temp_df in dic_df.items(): 129 | if file_name.find('规模') != -1: 130 | self.calc_size_fund(temp_df) 131 | elif file_name.find('主题')!=-1: 132 | self.calc_topic_fund(temp_df) 133 | elif file_name.find('行业')!=-1: 134 | self.calc_indus_fund(temp_df) 135 | elif file_name.find('策略')!=-1: 136 | self.calc_strate_fund(temp_df) 137 | elif file_name.find('风格')!=-1: 138 | self.calc_style_fund(temp_df) 139 | 140 | 141 | if __name__ == '__main__': 142 | FundEstDemo = FundEst() 143 | FundEstDemo.get_main() 144 | -------------------------------------------------------------------------------- /MonthReportData/FundImproveEst.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 对本月的增强型指数基金状况统计，输出ppt中所用基金表格数据 5 | ''' 6 | 7 | import pandas as pd 8 | from datetime import datetime 9 | import mylog as mylog 10 | from GetAndSaveWindData.MysqlCon import MysqlCon 11 | import numpy as np 12 | from WindPy import w 13 | import os 14 | w.start() 15 | 16 | class FundImproveEst: 17 | def __init__(self): 18 | self.file_path=r'D:\\工作文件\\' 19 | self.file_loc = r"D:\\工作文件\\指数基金月报\\202011\\" 20 | self.file_target = r"D:\\工作文件\\指数基金月报\\202011\\基金ppt概况\\" 21 | self.name_se=[ 22 | '基金代码', '基金简称', '基金成立日', '基金规模(亿元)', '基金管理人', '跟踪指数','近一月超额(%)', '近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)', '近3年(%)', 23 | '今年以来(%)', '近一年最大回撤', '夏普比率', '年化波动', '年化收益', ] 24 | 25 | def get_data(self): 26 | total_improve_df = pd.read_excel(r'D:\\工作文件\\增强指数基金11月.xlsx') 27 | name_dic = {"fund_setupdate": "基金成立日", "netasset_total": "基金规模(亿元)", "fund_trackerror_threshold": "年化跟踪误差(%)", 28 | "fund_corp_fundmanagementcompany": "基金管理人", "fund_trackindexcode": "跟踪指数代码", 29 | "nav": "单位净值", "return_1m": "近1月(%)", "return_3m": "近3月(%)", "return_ytd": "今年以来(%)", 30 | "return_1y": "近1年(%)", "risk_returnyearly": "年化收益", "risk_stdevyearly": "年化波动", 31 | "sec_name": "基金简称", "return_6m": "近6月(%)", "return_3y": "近3年(%)", "risk_sharpe": "夏普比率", 32 | "risk_maxdownside": "近一年最大回撤"} 33 | name_dic_reuslt = {key.upper(): values for key, values in name_dic.items()} 34 | 35 | total_code_list = total_improve_df['证券代码'].tolist() 36 | fields = "sec_name,fund_setupdate,netasset_total,fund_corp_fundmanagementcompany,fund_trackindexcode," \ 37 | "return_1m,return_3m,return_6m,return_1y,return_3y,return_ytd,risk_sharpe,risk_maxdownside,risk_returnyearly,risk_stdevyearly" 38 | options_str = "unit=1;tradeDate=20201101;annualized=0;startDate=20191031;endDate=20201031;period=2;returnType=1;yield=1;riskFreeRate=1" 39 | wssdata = w.wss(codes=total_code_list, fields=fields, options=options_str) 40 | if wssdata.ErrorCode != 0: 41 | print("获取wind数据错误%s" % wssdata.ErrorCode) 42 | return 43 | resultDf = pd.DataFrame(wssdata.Data, index=wssdata.Fields, columns=wssdata.Codes).T 44 | resultDf.index.name = '基金代码' 45 | resultDf.rename(columns=name_dic_reuslt, inplace=True) 46 | resultDf['基金规模(亿元)'] = resultDf['基金规模(亿元)'] / 100000000 47 | resultDf.sort_values(by='基金规模(亿元)', ascending=False, inplace=True) 48 | resultDf.to_excel(self.file_loc + '11月增强型指数基金表现.xlsx') 49 | return resultDf 50 | 51 | def calc_detail_df(self,temp_df): 52 | bench_code_list = list(temp_df['跟踪指数代码'].unique()) 53 | options_str = "startDate=20201001;endDate=20201031" 54 | wssdata = w.wss(codes=bench_code_list, fields=['pct_chg_per','sec_name'], options=options_str) 55 | if wssdata.ErrorCode != 0: 56 | print("获取wind数据错误%s" % wssdata.ErrorCode) 57 | return 58 | bench_return_df = pd.DataFrame(wssdata.Data, index=wssdata.Fields, columns=wssdata.Codes).T 59 | bench_return_df.rename(columns={'pct_chg_per'.upper():"跟踪指数近1月",'sec_name'.upper():"证券简称"},inplace=True) 60 | df_other_list= [] 61 | for index_code,df in temp_df.groupby(by='跟踪指数代码'): 62 | if index_code=='000300.SH': 63 | df = df[df['基金规模(亿元)']>=2] 64 | save_str = '沪深300' 65 | elif index_code=='000905.SH': 66 | df = df[df['基金规模(亿元)'] >= 2] 67 | save_str = '中证500' 68 | else: 69 | df = df[df['基金规模(亿元)'] >= 2] 70 | index_code = index_code.upper() 71 | df['近一月超额(%)'] = df['近1月(%)'] - bench_return_df.loc[index_code]['跟踪指数近1月'] 72 | df['跟踪指数'] = bench_return_df.loc[index_code]['证券简称'] 73 | temp_final_df = df.sort_values(by='近1月(%)').drop('跟踪指数代码',axis=1) 74 | temp_final_df['基金代码']= temp_final_df.index 75 | temp_final_df = temp_final_df[self.name_se] 76 | if index_code in ['000300.SH','000905.SH']: 77 | temp_final_df.to_excel(self.file_target+"增强产品%s概况.xlsx"%save_str,index=False) 78 | else: 79 | df_other_list.append(temp_final_df) 80 | 81 | if df_other_list: 82 | total_other_df = pd.concat(df_other_list,axis=0,sort=True) 83 | total_other_df = total_other_df[self.name_se] 84 | total_other_df.to_excel(self.file_target + "增强产品其他指数概况.xlsx",index=False) 85 | 86 | 87 | def get_main(self): 88 | total_df = self.get_data() 89 | self.calc_detail_df(total_df) 90 | 91 | if __name__=='__main__': 92 | FundImproveEstDemo = FundImproveEst() 93 | FundImproveEstDemo.get_main() -------------------------------------------------------------------------------- /MonthReportData/GetIndexEst.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 获取指数估值与ppt图表数据 5 | ''' 6 | 7 | import pandas as pd 8 | from GetAndSaveWindData.MysqlCon import MysqlCon 9 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 10 | 11 | import numpy as np 12 | from datetime import datetime, timedelta 13 | import os 14 | import matplotlib 15 | import matplotlib.pyplot as plt 16 | import statsmodels.api as sm 17 | from WindPy import w 18 | from MonthReportData.GetTableData import GetTableData 19 | 20 | matplotlib.rcParams['font.sans-serif'] = ['SimHei'] 21 | matplotlib.rcParams['font.family'] = 'sans-serif' 22 | matplotlib.rcParams['axes.unicode_minus'] = False 23 | 24 | 25 | class GetIndexEst: 26 | def __init__(self): 27 | self.start_date = (datetime.today() - timedelta(days=365 * 10)).strftime("%Y-%m-%d") 28 | # self.end_date = datetime.today().strftime("%Y-%m-%d") 29 | self.end_date = '2020-08-31' 30 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind') 31 | self.file_path = os.getcwd() + '\\GetDataResult\\估值\\' 32 | self.file_month_path = os.getcwd() + '\\GetDataResult\\月度表现\\' 33 | 34 | def get_plot_figure(self, dic_df): 35 | for code, df in dic_df.items(): 36 | temp_fig = plt.figure(figsize=(16, 9)) 37 | temp_ax = temp_fig.add_subplot(111) 38 | df.plot(ax=temp_ax) 39 | temp_ax.grid() 40 | # temp_ax.set_title(u'%sPE走势' % code) 41 | plt.savefig(self.file_path + '%s估值走势图.png' % code) 42 | # plt.show() 43 | 44 | def get_regression(self, index_code_list, bench_code_list, total_return_df): 45 | 46 | for index_code in index_code_list: 47 | list_r2, list_beta, list_tr, list_const = [], [], [], [] 48 | Y = total_return_df[index_code].values 49 | for bench_code in bench_code_list: 50 | x = total_return_df[bench_code].values 51 | x = x.reshape(len(x), 1) 52 | c = np.ones((len(x), 1)) 53 | X = np.hstack((c, x)) 54 | res = (sm.OLS(Y, X)).fit() 55 | list_r2.append(res.rsquared) 56 | list_beta.append(res.params[1]) 57 | list_const.append(res.params[0]) 58 | 59 | res_indus = pd.DataFrame([]) 60 | res_indus['指数代码'] = bench_code_list 61 | 62 | res_indus['拟合R方'] = list_r2 63 | 64 | res_indus['beta'] = list_beta 65 | res_indus['alpha'] = list_const 66 | res_indus = res_indus.sort_values('拟合R方', ascending=False) 67 | res_indus.to_excel(self.file_path + '%s风格指数回归结果.xlsx' % index_code, index=False) 68 | 69 | maxR2Code = res_indus['指数代码'].tolist()[0] 70 | x = total_return_df[maxR2Code].values 71 | maxR2Alpha = res_indus['alpha'].tolist()[0] 72 | maxR2Beta = res_indus['beta'].tolist()[0] 73 | 74 | plt.style.use('ggplot') 75 | plt.figure(figsize=(16, 9)) 76 | plt.scatter(x, Y, s=30, color='blue', label='样本实例') 77 | plt.plot(x, maxR2Alpha + maxR2Beta * x, linewidth=3, color='red', label='回归线') 78 | plt.ylabel('宽基指数超额收益') 79 | plt.xlabel('风格指数超额收益') 80 | # plt.title('%s拟合效果最好的风格指数：'%index_code +maxR2Code, fontsize=13, 81 | # bbox={'facecolor': '0.8', 'pad': 5}) 82 | plt.grid(True) 83 | plt.legend(loc='upper left') # 添加图例 84 | plt.savefig(self.file_path + '%s拟合风格指数效果图.png' % index_code) 85 | # plt.show() 86 | 87 | plt.style.use('ggplot') 88 | fig = plt.figure(figsize=(16, 9)) 89 | ax = fig.add_subplot(111) 90 | indeustryAccDf = (1 + total_return_df[[index_code, maxR2Code]]).cumprod() 91 | indeustryAccDf['指数收益比'] = indeustryAccDf[index_code] / indeustryAccDf[maxR2Code] 92 | indeustryAccDf.plot(ax=ax) 93 | ax.set_ylabel('累计收益率') 94 | ax.set_xlabel('时间') 95 | # ax.set_title('%s拟合效果最好的风格指数：'%index_code + maxR2Code, fontsize=13, 96 | # bbox={'facecolor': '0.8', 'pad': 5}) 97 | ax.grid(True) 98 | ax.legend(loc='down right') # 添加图例 99 | plt.savefig(self.file_path + '%s拟合风格指数累计走势对比图.png' % index_code) 100 | 101 | def get_index_regress(self, index_code_list): 102 | bench_code_list = ['399314.SZ', '399315.SZ', '399316.SZ'] 103 | df_list = [] 104 | for code in bench_code_list: 105 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code=code, start_date=self.start_date, 106 | end_date=self.end_date, code_style='index') 107 | temp_df.rename(columns={'close_price': code}, inplace=True) 108 | df_list.append(temp_df) 109 | bench_df = pd.concat(df_list, axis=1, sort=True) 110 | 111 | df_list2 = [] 112 | for code in index_code_list: 113 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code=code, start_date=self.start_date, 114 | end_date=self.end_date, code_style='index') 115 | temp_df.rename(columns={'close_price': code}, inplace=True) 116 | df_list2.append(temp_df) 117 | index_df = pd.concat(df_list2, axis=1, sort=True) 118 | 119 | total_df = pd.concat([index_df, bench_df], axis=1, sort=True) 120 | total_return_df = total_df / total_df.shift(1) - 1 121 | total_return_df.dropna(inplace=True) 122 | total_return_df.corr().to_excel(self.file_path + '相关系数.xlsx') 123 | 124 | self.get_regression(index_code_list, bench_code_list, total_return_df) 125 | 126 | def get_index_consit(self, index_code='000913.SH', weight=1): 127 | temp_df = self.GetDataTotalMainDemo.get_index_constituent(indexCode=index_code) 128 | wss_data = w.wss(codes=temp_df['stock_code'].tolist(), 129 | fields=["industry_sw", "mkt_cap_ard", "roe_ttm", "yoyprofit", "dividendyield"], 130 | options="industryType=1;unit=1;tradeDate=20200823;rptDate=20191231;rptYear=2019") 131 | code_ind_df = pd.DataFrame(wss_data.Data, index=wss_data.Fields, columns=wss_data.Codes).T 132 | name_dic = {"industry_sw".upper(): "申万一级行业", "mkt_cap_ard".upper(): "总市值", 133 | "dividendyield".upper(): "股息率（2019年）", 134 | "yoyprofit".upper(): "净利润同比增长率", "roe_ttm".upper(): "ROE"} 135 | code_ind_df.rename(columns=name_dic, inplace=True) 136 | try: 137 | use_df = temp_df[['stock_code', 'stock_weight', 'stock_name']].set_index('stock_code') 138 | except: 139 | a = 0 140 | 141 | stock_result_df = pd.concat([use_df, code_ind_df], sort=True, axis=1) 142 | df = pd.concat([use_df, code_ind_df], axis=1, sort=True) 143 | dic_ind_weight = {} 144 | for ind, stock_df in df.groupby('申万一级行业'): 145 | dic_ind_weight[ind] = stock_df['stock_weight'].sum() * weight / 100 146 | return dic_ind_weight, stock_result_df 147 | 148 | def calc_stock_weight(self, dic_stock_weight, index_se): 149 | for index_code, temp_df in dic_stock_weight.items(): 150 | temp_df['port_stock_weight'] = temp_df['stock_weight'] * index_se[index_code] 151 | total_stock_df = pd.concat(list(dic_stock_weight.values()), axis=0, sort=True) 152 | total_stock_df['stock_code_label'] = total_stock_df.index.tolist() 153 | df_list = [] 154 | for code, temp_stock_df in total_stock_df.groupby(by='stock_code_label'): 155 | if temp_stock_df.shape[0] > 1: 156 | target_df = temp_stock_df.iloc[0] 157 | target_df['port_stock_weight'] = temp_stock_df['port_stock_weight'].sum() 158 | target_df = pd.DataFrame(target_df).T 159 | df_list.append(target_df) 160 | else: 161 | df_list.append(temp_stock_df) 162 | total_stock_result = pd.concat(df_list, axis=0, sort=True).sort_values(by='port_stock_weight', ascending=False) 163 | name_dic = {'port_stock_weight': '权重', 'stock_name': '简称'} 164 | total_stock_result.rename(columns=name_dic).to_excel("股票持仓数据.xlsx") 165 | 166 | def get_port_weight(self, index_code_list=[], weight_list=[]): 167 | temp_se = pd.Series(weight_list, index=index_code_list) 168 | port_df_list = [] 169 | dic_stock_weight = {} 170 | for index_code in index_code_list: 171 | dic_ind_weight, stock_weight_df = self.get_index_consit(index_code, weight=temp_se[index_code]) 172 | dic_stock_weight[index_code] = stock_weight_df 173 | ind_weight_se = pd.Series(dic_ind_weight, name=index_code) 174 | port_df_list.append(ind_weight_se) 175 | 176 | self.calc_stock_weight(dic_stock_weight, temp_se) 177 | total_ind = pd.concat(port_df_list, axis=1, sort=True).sum(axis=1) 178 | total_ind.name = '组合行业暴露' 179 | 180 | bench_code_list = ['000300.SH', '000905.SH'] 181 | bench_code_df_list = [] 182 | for bench_code in bench_code_list: 183 | dic_bench_weight, _ = self.get_index_consit(bench_code) 184 | bench_weight_se = pd.Series(dic_bench_weight, name=bench_code) 185 | bench_code_df_list.append(bench_weight_se) 186 | bench_code_df = pd.concat(bench_code_df_list, axis=1, sort=True).rename( 187 | columns={'000300.SH': '沪深300', '000905.SH': "中证500"}) 188 | total_df = pd.concat([total_ind, bench_code_df], axis=1, sort=True).fillna(0) 189 | total_df['相对沪深300'] = total_df['组合行业暴露'] - total_df['沪深300'] 190 | total_df['相对中证500'] = total_df['组合行业暴露'] - total_df['中证500'] 191 | total_df.to_excel("主题OTC组合暴露.xlsx") 192 | 193 | def get_init_param(self): 194 | code_list1 = ['399006.SZ', '399005.SZ', '000852.SH', '399001.SZ', '000905.SH', '000300.SH', '000001.SH', 195 | '000016.SH'] # 宽基 196 | code_list2 = ['000935.SH', '000933.SH', '000932.SH', '000936.CSI', '000934.SH', '000931.CSI', '000930.CSI', 197 | '000929.CSI', '000937.CSI', '000928.SH'] # 行业 198 | code_list3 = ['990001.CSI', '980017.CNI', '399803.SZ', '399973.SZ', '399441.SZ', '931066.CSI', '931087.CSI', 199 | '000941.CSI', 'H30318.CSI', '931079.CSI', '931071.CSI', '399997.SZ', '399976.SZ', '399362.SZ', 200 | 'H30533.CSI', '399812.SZ', '399974.SZ', '000860.CSI', '000861.CSI', '000859.CSI', 201 | '000015.SH'] # 主题 202 | code_list4 = ['399673.SZ', '399293.SZ', '399296.SZ', '399295.SZ', '930758.CSI', '399983.SZ', '000984.SH', 203 | '000971.SH', '000982.SH', '399990.SZ', '399702.SZ', '000050.SH', '931052.CSI', '930838.CSI', 204 | 'H30269.CSI', '000925.CSI'] # 策略 205 | code_list5 = ['399377.SZ', '399348.SZ', '399919.SZ', '000029.SH'] # 风格 206 | dic_index = {} 207 | dic_index['宽基'] = code_list1 208 | dic_index['行业'] = code_list2 209 | dic_index['主题'] = code_list3 210 | dic_index['策略'] = code_list4 211 | dic_index['风格'] = code_list5 212 | return dic_index 213 | 214 | def get_main(self, ): 215 | dic_index_param = self.get_init_param() 216 | for index_type, index_code_list in dic_index_param.items(): 217 | GetTableDataDemo = GetTableData() 218 | total_df = GetTableDataDemo.get_data(code_list=index_code_list, index_type=index_type) 219 | 220 | dic_df = {} 221 | dic_PE = {} 222 | for code in index_code_list: 223 | df = self.GetDataTotalMainDemo.get_hq_data(code=code, start_date=self.start_date, 224 | end_date=self.end_date, code_style='index_daily', 225 | dic_param={'fields': 'pe_ttm', 'filed_name': 'PE值'}) 226 | df.rename(columns={'update_time': '时间', 'factor_value': "PE_TTM"}, inplace=True) 227 | last_value = df['PE_TTM'][-1] 228 | percent_num = (df['PE_TTM'].sort_values().tolist().index(last_value) + 1) / df.shape[0] 229 | print('%s当前估值分位数%s' % (code, round(percent_num, 4))) 230 | df.to_excel(self.file_path + '估值%s.xlsx' % code[:6]) 231 | dic_df[total_df.loc[code]['证券简称']] = df 232 | dic_PE[code] = {'PE': last_value, 'PE分位数': percent_num} 233 | pe_df = pd.DataFrame(dic_PE).T 234 | total_last_df = pd.concat([total_df, pe_df], axis=1, sort=True) 235 | 236 | name_list = ['证券代码','证券简称', '近1月(%)', '近3月(%)', '近6月(%)', '近1年(%)', '近3年(%)', '今年以来(%)', '近一年最大回撤(%)', 'Sharp比率', 237 | '年化波动(%)', '年化收益(%)', '月度成交额变化(%)', '月度换手率变化(%)', 'PE', 'PE分位数'] 238 | total_last_df.to_excel(self.file_month_path + '%s指数月度表现.xlsx' % index_type, index=False) 239 | 240 | self.get_plot_figure(dic_df) 241 | 242 | 243 | 244 | if __name__ == '__main__': 245 | GetIndexEstDemo = GetIndexEst() 246 | GetIndexEstDemo.get_main() 247 | # GetIndexEstDemo.get_index_consit() 248 | # GetIndexEstDemo.get_index_regress(code_list1) 249 | # ind_code_list = ['000913.SH', '000932.SH', '000988.CSI', '399986.SZ', '399995.SZ', '931008.CSI', '931009.CSI', 250 | # '000806.CSI'] #行业OTC 251 | # ind_weight_list = [0.038, 0.3, 0.214, 0.114, 0.06, 0.063, 0.135, 0.075] 252 | 253 | # zt_code_list = ['399814.SZ','930653.CSI','930743.CSI','930875.CSI','930914.CSI','h11136.CSI'] #主题OTC 254 | # zt_weight_list = [0.3089,0.0424,0.0605,0.2298,0.3089,0.0495] 255 | # GetIndexEstDemo.get_port_weight(index_code_list=ind_code_list, weight_list=ind_weight_list) 256 | -------------------------------------------------------------------------------- /MonthReportData/GetTableData.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 获取月报指数表格数据 5 | ''' 6 | 7 | import pandas as pd 8 | from GetAndSaveWindData.MysqlCon import MysqlCon 9 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 10 | 11 | import numpy as np 12 | from datetime import datetime, timedelta 13 | import os 14 | import matplotlib 15 | import matplotlib.pyplot as plt 16 | import statsmodels.api as sm 17 | from WindPy import w 18 | 19 | w.start() 20 | 21 | 22 | class GetTableData: 23 | def __init__(self): 24 | dic_total_param = {} 25 | dic_param = {} 26 | dic_param['近1月'] = {'start_date': '2020-10-01', 'end_date': '2020-10-31'} 27 | dic_param['近3月'] = {'start_date': '2020-08-01', 'end_date': '2020-10-31'} 28 | dic_param['近6月'] = {'start_date': '2020-05-01', 'end_date': '2020-10-31'} 29 | dic_param['近1年'] = {'start_date': '2019-11-01', 'end_date': '2020-10-31'} 30 | dic_param['近3年'] = {'start_date': '2017-11-01', 'end_date': '2020-10-31'} 31 | dic_param['今年以来'] = {'start_date': '2020-01-01', 'end_date': '2020-10-31'} 32 | dic_total_param['区间'] = dic_param 33 | dic_total_param['年度其他'] = {'start_date': '2019-11-01', 'end_date': '2020-10-31'} 34 | dic_total_param['上月'] = {'start_date': '2020-09-01', 'end_date': '2020-09-30'} 35 | self.dic_total_param = dic_total_param 36 | 37 | 38 | def get_data(self, code_list=[], end_date='2020-09-30',index_type='宽基',pe_df=pd.DataFrame()): 39 | #获取证券名称 40 | wss_name_data = w.wss(codes=code_list, fields=['sec_name', ]) 41 | name_df = pd.DataFrame(wss_name_data.Data, index=wss_name_data.Fields, columns=wss_name_data.Codes).T 42 | name_df.rename(columns={'sec_name'.upper(): '证券简称'}, inplace=True) 43 | 44 | #获取区间涨跌 45 | df_list = [] 46 | for param_name, param_dic in self.dic_total_param['区间'].items(): 47 | startDate = ('').join(param_dic['start_date'].split('-')) 48 | endDate = ('').join(param_dic['end_date'].split('-')) 49 | options = "startDate=%s;endDate=%s" % (startDate, endDate) 50 | wss_data = w.wss(codes=code_list, fields=['pct_chg_per', ], options=options) 51 | if wss_data.ErrorCode != 0: 52 | print("wind获区间涨跌数据有误，错误代码" + str(wss_data.ErrorCode)) 53 | return pd.DataFrame() 54 | resultDf = pd.DataFrame(wss_data.Data, index=wss_data.Fields, columns=wss_data.Codes).T 55 | resultDf.rename(columns={'pct_chg_per'.upper(): param_name+'(%)', }, inplace=True) 56 | df_list.append(resultDf) 57 | total_data_df = pd.concat(df_list, axis=1, sort=True) 58 | total_df = pd.concat([total_data_df, name_df], axis=1, sort=True) 59 | 60 | #获取年度回撤，sharp，收益，波动等 61 | startDate = ('').join(self.dic_total_param['年度其他']['start_date'].split('-')) 62 | endDate = ('').join(self.dic_total_param['年度其他']['end_date'].split('-')) 63 | options = "startDate=%s;endDate=%s;period=2;returnType=1;riskFreeRate=1" % (startDate, endDate) 64 | wss_data = w.wss(codes=code_list, 65 | fields=['risk_maxdownside', 'risk_sharpe', 'risk_stdevyearly', 'risk_returnyearly_index',], 66 | options=options) 67 | if wss_data.ErrorCode != 0: 68 | print("wind获年度其他数据有误，错误代码" + str(wss_data.ErrorCode)) 69 | return pd.DataFrame() 70 | risk_index_df = pd.DataFrame(wss_data.Data, index=wss_data.Fields, columns=wss_data.Codes).T 71 | risk_index_df.rename(columns={'risk_maxdownside'.upper(): '近一年最大回撤(%)', 'risk_sharpe'.upper(): 'Sharp比率', 72 | 'risk_stdevyearly'.upper(): '年化波动(%)', 'risk_returnyearly_index'.upper(): '年化收益(%)'},inplace=True) 73 | total_final_df = pd.concat([total_df,risk_index_df],axis=1,sort=True) 74 | 75 | #获取月度成交额，换手率变化 76 | startDate = ('').join(self.dic_total_param['区间']['近1月']['start_date'].split('-')) 77 | endDate = ('').join(self.dic_total_param['区间']['近1月']['end_date'].split('-')) 78 | options = "unit=1;startDate=%s;endDate=%s" % (startDate, endDate) 79 | 80 | last_startDate = ('').join(self.dic_total_param['上月']['start_date'].split('-')) 81 | last_endDate = ('').join(self.dic_total_param['上月']['end_date'].split('-')) 82 | last_options = "unit=1;startDate=%s;endDate=%s" %(last_startDate,last_endDate) 83 | this_month = w.wss(codes=code_list, fields=["amt_per","turn_per"], options=options) 84 | this_month_df = pd.DataFrame(this_month.Data, index=this_month.Fields, columns=this_month.Codes).T 85 | 86 | last_month = w.wss(codes=code_list, fields=["amt_per","turn_per"], options=last_options) 87 | last_month_df = pd.DataFrame(last_month.Data, index=last_month.Fields, columns=last_month.Codes).T 88 | change_df = (this_month_df/last_month_df-1)*100 89 | change_df.rename(columns={'amt_per'.upper():'月度成交额变化(%)',"turn_per".upper():"月度换手率变化(%)"},inplace=True) 90 | total_last_df = pd.concat([total_final_df,change_df],axis=1,sort=True) 91 | total_last_df['证券代码'] = total_last_df.index 92 | return total_last_df 93 | 94 | 95 | 96 | if __name__ == '__main__': 97 | GetTableDataDemo = GetTableData() 98 | GetTableDataDemo.get_data(code_list=['000016.SH', '000300.SH']) 99 | -------------------------------------------------------------------------------- /MonthReportData/TaoLiJudge.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 获取指数估值数据 5 | ''' 6 | 7 | import pandas as pd 8 | from GetAndSaveWindData.MysqlCon import MysqlCon 9 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 10 | from datetime import datetime, timedelta 11 | import os 12 | from WindPy import w 13 | 14 | 15 | class TalLiJudge: 16 | def __init__(self): 17 | self.GetDataTotalMainDemo = GetDataTotalMain(data_resource='wind') 18 | 19 | def get_data(self): 20 | target_df = pd.read_excel("基金发行明细.xlsx", sheet_name='Sheet1', ) 21 | total_df = pd.read_excel("主动权益类基金.xlsx") 22 | dic_df = {} 23 | for name in target_df['基金经理'].tolist(): 24 | name_list = name.split('、') 25 | if len(name_list) == 1: 26 | dic_df[name] = total_df[total_df['基金经理'] == name] 27 | else: 28 | for name in name_list: 29 | temp_df_list = [] 30 | for target_name in total_df['基金经理'].tolist(): 31 | if target_name.find(name) != -1: 32 | temp_df_list.append(total_df[total_df['基金经理'] == target_name]) 33 | temp_df = pd.concat(temp_df_list, axis=0, sort=True) 34 | dic_df[name] = temp_df 35 | return dic_df 36 | 37 | def get_calc_result(self, dic_df): 38 | dic_name_df = {} 39 | dic_name_corr_df = {} 40 | dic_name_poc_df = {} 41 | for name, fund_df in dic_df.items(): 42 | temp_df_list = [] 43 | for num in range(fund_df.shape[0]): 44 | code = fund_df.iloc[num]['证券代码'] 45 | se_name = fund_df.iloc[num]['证券简称'] 46 | start_date = fund_df.iloc[num]['任职日期'].strftime("%Y-%m-%d") 47 | end_date = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") 48 | temp_df = self.GetDataTotalMainDemo.get_hq_data(code=code, start_date=start_date, end_date=end_date, 49 | code_style='fund', name_list=['net_value_adj']) 50 | temp_df.rename(columns={'net_value_adj': se_name}, inplace=True) 51 | temp_return_df = temp_df / temp_df.shift(1) - 1 52 | temp_return_df.dropna(inplace=True) 53 | temp_df_list.append(temp_return_df) 54 | temp_total_df = pd.concat(temp_df_list, axis=1, sort=True) 55 | temp_total_df.dropna(axis=1, how='all', inplace=True) 56 | dic_name_df[name] = temp_total_df 57 | dic_name_corr_df[name] = temp_total_df.corr() 58 | fields = ["prt_stockvalue_topindustryname2", "prt_stockvalue_topindustrytonav2", 59 | "prt_stockvalue_topindustrytostock2","sec_name"] 60 | name_dic = {"prt_stockvalue_topindustryname2".upper(): "重仓行业名称", 61 | "prt_stockvalue_topindustrytonav2".upper(): "重仓行业市值占基金资产净值比", 62 | "prt_stockvalue_topindustrytostock2".upper(): "重仓行业市值占股票投资市值比", 63 | "sec_name".upper(): "证券简称"} 64 | poc_df_list = [] 65 | for order in range(1,6): 66 | wss_data = w.wss(codes=fund_df['证券代码'].tolist(),fields=fields,options="rptDate=20200630;order=%s"%str(order)) 67 | if wss_data.ErrorCode != 0: 68 | print("wind获取因子数据有误，错误代码" + str(wss_data.ErrorCode)) 69 | continue 70 | resultDf = pd.DataFrame(wss_data.Data, index=wss_data.Fields, columns=wss_data.Codes).T 71 | resultDf.rename(columns=name_dic,inplace=True) 72 | resultDf['重仓行业排名']=order 73 | poc_df_list.append(resultDf) 74 | 75 | if poc_df_list: 76 | temp_total_poc = pd.concat(poc_df_list,axis=0,sort=True) 77 | dic_name_poc_df[name] = temp_total_poc 78 | save_path = os.getcwd() + '\\HXBFundManager\\基金经理管理产品相关性.xlsx' 79 | poc_save_path = os.getcwd() + '\\HXBFundManager\\基金经理重仓行业概况.xlsx' 80 | writer = pd.ExcelWriter(save_path) 81 | for fund_name, corr_df in dic_name_corr_df.items(): 82 | corr_df.to_excel(writer, sheet_name=fund_name) 83 | writer.save() 84 | 85 | writer2 = pd.ExcelWriter(poc_save_path) 86 | for fund_name, poc_df in dic_name_poc_df.items(): 87 | poc_df.to_excel(writer2, sheet_name=fund_name) 88 | writer2.save() 89 | 90 | def get_main(self): 91 | dic_df = self.get_data() 92 | self.get_calc_result(dic_df) 93 | 94 | 95 | if __name__ == '__main__': 96 | TalLiJudgeDemo = TalLiJudge() 97 | TalLiJudgeDemo.get_main() 98 | -------------------------------------------------------------------------------- /PrintInfo.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | ''' 4 | 日志信息打印 5 | ''' 6 | 7 | from datetime import datetime 8 | 9 | class PrintInfo: 10 | def __init__(self): 11 | pass 12 | 13 | def PrintLog(self,infostr,otherInfo=''): 14 | currentTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 15 | if isinstance(otherInfo,str): 16 | if not otherInfo: 17 | print(currentTime + '[INFO]: '+infostr) 18 | else: 19 | print(currentTime+ '[INFO]: '+infostr,otherInfo) 20 | else: 21 | print(currentTime + '[INFO]: ' + infostr, otherInfo) 22 | 23 | 24 | if __name__ == '__main__': 25 | PrintInfoDemo = PrintInfo() 26 | PrintInfoDemo.PrintLog('日期信息打印测试') 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EstimateValueData 2 | ## 基金估值表，深度分析 3 | （1）公募基金和宽基指数数据从本地数据库获取，本地数据库缺失时，从wind的api中获取，获取后先存入本地数据库，再从数据库返回； 4 | （2）私募基金净值数据，从本地数据库获取，数据库不存在时，打印错误日志信息 5 | （3）估值表数据仅限托管在浙商证券的私募基金 6 | ## 标准化输出基金（公募，私募等净值类产品）的深度分析结果。 7 | ## 基于净值类的各风险收益统计指标，风格回归分析，行业回归分析，蒙特卡洛走势预测等。 8 | ## 。。。。。。 9 | -------------------------------------------------------------------------------- /StockFactorCalc.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | import pandas as pd 3 | import numpy as np 4 | 5 | from GetAndSaveWindData.GetDataTotalMain import GetDataTotalMain 6 | 7 | 8 | class StockFactorCalcl: 9 | def __init__(self): 10 | self.GEtDataTotaMainDemo = GetDataTotalMain(data_resource='wind') 11 | 12 | 13 | def get_main(self): 14 | pass 15 | 16 | 17 | def get_history_data(self): 18 | pass 19 | 20 | def get_wash_data(self): 21 | pass 22 | 23 | 24 | if __name__=='_main__': 25 | StockFactorCalclDemo = StockFactorCalcl() 26 | StockFactorCalclDemo.get_main() -------------------------------------------------------------------------------- /mylog.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Author:zouhao 3 | # email:1084848158@qq.com 4 | 5 | # 日志 6 | import logging 7 | # import coloredlogs 8 | # coloredlogs.install() 9 | 10 | def set_log(file_name=''): 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | ch = logging.StreamHandler() 14 | ch.setLevel(logging.INFO) 15 | 16 | formatter = logging.Formatter("%(asctime)s %(filename)s:%(levelname)s:%(message)s",datefmt="%d-%M-%Y %H:%M:%S") 17 | ch.setFormatter(formatter) 18 | logger.addHandler(ch) 19 | 20 | if file_name: 21 | file_handler = logging.FileHandler('%s.log'%file_name) 22 | file_handler.setLevel(level=logging.INFO) 23 | file_handler.setFormatter(formatter) 24 | logger.addHandler(file_handler) 25 | return logger -------------------------------------------------------------------------------- /windDemo.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | from WindPy import w 4 | 5 | w.start(); 6 | # 定义打印输出函数，用来展示数据使用 7 | def printpy(outdata): 8 | if outdata.ErrorCode!=0: 9 | print('error code:'+str(outdata.ErrorCode)+'\n'); 10 | return(); 11 | for i in range(0,len(outdata.Data[0])): 12 | strTemp='' 13 | if len(outdata.Times)>1: 14 | strTemp=str(outdata.Times[i])+' ' 15 | for k in range(0, len(outdata.Fields)): 16 | strTemp=strTemp+str(outdata.Data[k][i])+' ' 17 | print(strTemp) 18 | 19 | print('\n\n'+'-----通过wsd来提取时间序列数据，比如取开高低收成交量，成交额数据-----'+'\n') 20 | # wsddata1=w.wsd("000001.SZ", "open,high,low,close,volume,amt", "2015-11-22", "2015-12-22", "Fill=Previous") 21 | wsddata1 = w.wset('indexconstituent',options='date=2018-09-11;windcode=%s'%'000300.SZ') 22 | printpy(wsddata1) --------------------------------------------------------------------------------