├── .gitignore
├── DL_Finance_Project_2.pdf
├── README.md
├── R_squares
├── 2023-06-09_05-24-47.json
├── 2023-06-09_06-07-56.json
├── 2023-06-09_09-08-26.json
└── 2023-06-11_18-16-38.json
├── analysis.py
├── data_prepare.py
├── imgs
├── R2_pred_table.png
├── R2_total_table.png
├── alpha
│ ├── CA0_1_inference_alpha_plot.png
│ ├── CA0_2_inference_alpha_plot.png
│ ├── CA0_3_inference_alpha_plot.png
│ ├── CA0_4_inference_alpha_plot.png
│ ├── CA0_5_alpha_plot.png
│ ├── CA0_5_inference_alpha_plot.png
│ ├── CA0_6_inference_alpha_plot.png
│ ├── CA1_1_inference_alpha_plot.png
│ ├── CA1_2_inference_alpha_plot.png
│ ├── CA1_3_inference_alpha_plot.png
│ ├── CA1_4_inference_alpha_plot.png
│ ├── CA1_5_inference_alpha_plot.png
│ ├── CA1_6_inference_alpha_plot.png
│ ├── CA2_1_inference_alpha_plot.png
│ ├── CA2_2_inference_alpha_plot.png
│ ├── CA2_3_inference_alpha_plot.png
│ ├── CA2_4_inference_alpha_plot.png
│ ├── CA2_5_inference_alpha_plot.png
│ ├── CA2_6_inference_alpha_plot.png
│ ├── CA3_1_inference_alpha_plot.png
│ ├── CA3_2_inference_alpha_plot.png
│ ├── CA3_3_inference_alpha_plot.png
│ ├── CA3_4_inference_alpha_plot.png
│ ├── CA3_5_inference_alpha_plot.png
│ ├── CA3_6_inference_alpha_plot.png
│ ├── FF_1_inference_alpha_plot.png
│ ├── FF_2_inference_alpha_plot.png
│ ├── FF_3_inference_alpha_plot.png
│ ├── FF_4_inference_alpha_plot.png
│ ├── FF_5_inference_alpha_plot.png
│ ├── FF_6_inference_alpha_plot.png
│ ├── IPCA_1_inference_alpha_plot.png
│ ├── IPCA_2_inference_alpha_plot.png
│ ├── IPCA_3_inference_alpha_plot.png
│ ├── IPCA_4_inference_alpha_plot.png
│ ├── IPCA_5_inference_alpha_plot.png
│ ├── IPCA_6_inference_alpha_plot.png
│ ├── PCA_1_inference_alpha_plot.png
│ ├── PCA_2_inference_alpha_plot.png
│ ├── PCA_3_inference_alpha_plot.png
│ ├── PCA_4_inference_alpha_plot.png
│ ├── PCA_5_inference_alpha_plot.png
│ └── PCA_6_inference_alpha_plot.png
├── omit_char_R2_bias.png
├── pred_R2.png
└── total_R2.png
├── main.py
├── models
├── CA.py
├── FF.py
├── IPCA.py
├── PCA.py
└── modelBase.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # raw data
2 | data/
3 | data.zip
4 | __MACOSX/
5 | new_data.zip
6 | __pycache__
7 | models/__pycache__
8 | saved_models
9 | *_loss_*.png
10 | *.ipynb
11 | results/
12 | logs/
13 | R_squares/
14 |
--------------------------------------------------------------------------------
/DL_Finance_Project_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/DL_Finance_Project_2.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Autoencoder-Asset-Pricing-Models
2 |
3 | 🧐 [**Report**](https://www.richardsong.live/autoencoder-asset-pricing-models) | [**Report PDF file**](https://cloud.tsinghua.edu.cn/f/c02804bed00b4083bcb7/?dl=1)
4 | ## Set Up
5 |
6 | ```bash
7 | # generate preprocessed data and download portfolio returns
8 | python data_prepare.py
9 |
10 | # train models (ALL together)
11 | python main.py --Model 'FF PCA IPCA CA0 CA1 CA2 CA3' --K '1 2 3 4 5 6'
12 |
13 | # train models (selected models and K, for example)
14 | python main.py --Model 'IPCA CA3' --K '5 6'
15 |
16 | # analyze characteristics' importance (if needed)
17 | python main.py --Model 'IPCA CA0 CA1 CA2 CA3' --K '5' --omit_char 'absacc acc age agr bm bm_ia cashdebt cashpr cfp cfp_ia chatoia chcsho chempia chinv chpmia convind currat depr divi divo dy egr ep gma grcapx grltnoa herf hire invest lev lgr mve_ia operprof orgcap pchcapx_ia pchcurrat pchdepr pchgm_pchsale pchquick pchsale_pchinvt pchsale_pchrect pchsale_pchxsga pchsaleinv pctacc ps quick rd rd_mve rd_sale realestate roic salecash saleinv salerec secured securedind sgr sin sp tang tb aeavol cash chtx cinvest ear ms nincr roaq roavol roeq rsup stdacc stdcf baspread beta betasq chmom dolvol idiovol ill indmom maxret mom12m mom1m mom36m mom6m mvel1 pricedelay retvol std_dolvol std_turn turn zerotrade'
18 |
19 | # analyze models (calculate R^2, plot R^2 tables, bars and bias heatmap)
20 | python analysis.py
21 | ```
22 | ## Results
23 | ### Total R^2 (%)
24 |
25 |
26 |
27 |
28 | ### Predict R^2 (%)
29 |
30 |
31 |
32 | ### Risk Premia v.s. Mispricing
33 |
34 |
35 |  |
36 |  |
37 |
38 |
39 |  |
40 |  |
41 |
42 |
43 |  |
44 |  |
45 |
46 |
47 |
48 | ### Characteristics Importance (reduced total R^2 (%), K=5)
49 |
50 |
--------------------------------------------------------------------------------
/R_squares/2023-06-09_05-24-47.json:
--------------------------------------------------------------------------------
1 | {"models": ["FF_1", "FF_2", "FF_3", "FF_4", "FF_5", "FF_6"], "omit_char": [""], "R2_total": [0.08537139414421824, 0.1576019101919831, 0.1986486217133806, 0.20315476596988524, 0.31397093775365037, 0.3616431120471959]}
--------------------------------------------------------------------------------
/R_squares/2023-06-09_06-07-56.json:
--------------------------------------------------------------------------------
1 | {"models": ["CA0_1", "CA0_2", "CA0_3", "CA0_4", "CA0_5", "CA0_6", "CA1_1", "CA1_2", "CA1_3", "CA1_4", "CA1_5", "CA1_6", "CA2_1", "CA2_2", "CA2_3", "CA2_4", "CA2_5", "CA2_6", "CA3_1", "CA3_2", "CA3_3", "CA3_4", "CA3_5", "CA3_6"], "omit_char": [""], "R2_total": [
2 | 0.5677639760646643, 0.6761207104643877, 0.7076066105263652, 0.6913661386379286, 0.662602500272096, 0.7110612627936461,
3 | 0.5517562872860107, 0.7025783407556893, 0.685776051607686, 0.6664443573030849, 0.7006957708196195, 0.7052861947690043,
4 | 0.5967130036325399, 0.6626964974803786, 0.6608531336078073, 0.7070314610106503, 0.6462021917956272, 0.6767568343936613,
5 | 0.5531676704426002, 0.5249032928672436, 0.5642100044551001, 0.5458004779254889, 0.5558832641978944, 0.5235321637890534]}
--------------------------------------------------------------------------------
/R_squares/2023-06-09_09-08-26.json:
--------------------------------------------------------------------------------
1 | {"models": ["PCA_1", "PCA_2", "PCA_3", "PCA_4", "PCA_5", "PCA_6"], "omit_char": [""], "R2_total": [0.4061160826307103, 0.5300364609271587, 0.5913033228863098, 0.6246396597772854, 0.6467919712825208, 0.6720178863573743]}
--------------------------------------------------------------------------------
/R_squares/2023-06-11_18-16-38.json:
--------------------------------------------------------------------------------
1 | {"models": ["IPCA_5", "CA0_5", "CA1_5", "CA2_5", "CA3_5"], "omit_char": ["absacc", "acc", "age", "agr", "bm", "bm_ia", "cashdebt", "cashpr", "cfp", "cfp_ia", "chatoia", "chcsho", "chempia", "chinv", "chpmia", "convind", "currat", "depr", "divi", "divo", "dy", "egr", "ep", "gma", "grcapx", "grltnoa", "herf", "hire", "invest", "lev", "lgr", "mve_ia", "operprof", "orgcap", "pchcapx_ia", "pchcurrat", "pchdepr", "pchgm_pchsale", "pchquick", "pchsale_pchinvt", "pchsale_pchrect", "pchsale_pchxsga", "pchsaleinv", "pctacc", "ps", "quick", "rd", "rd_mve", "rd_sale", "realestate", "roic", "salecash", "saleinv", "salerec", "secured", "securedind", "sgr", "sin", "sp", "tang", "tb", "aeavol", "cash", "chtx", "cinvest", "ear", "ms", "nincr", "roaq", "roavol", "roeq", "rsup", "stdacc", "stdcf", "baspread", "beta", "betasq", "chmom", "dolvol", "idiovol", "ill", "indmom", "maxret", "mom12m", "mom1m", "mom36m", "mom6m", "mvel1", "pricedelay", "retvol", "std_dolvol", "std_turn", "turn", "zerotrade"], "R2_total": [1.5442819689237552e-05, -0.0008641544212459884, -0.0001747076111721091, -6.028723743389808e-05, 0.0004881307548909586, -9.453040450568828e-05, 0.0011809606037788134, -0.00047160860320083486, 0.0004497328261676703, 0.00035797708987406196, 0.00046514268643882417, 8.010321267071241e-05, 0.0005184883538837948, -0.00037818109386877907, -5.991642326275137e-05, 0.0002601835988962353, 0.0011439216429843801, 0.0017112133538985663, -4.350496459193387e-06, -0.00019380257770462705, 0.00043948388552850215, 0.00031580941297537635, 0.003338161066369527, 0.0021027676465422696, -0.0003171094148499698, 0.0014555971916409005, -0.0007270330656120594, 0.0016980056916262587, 0.0009553837759342931, 0.0027868308676146647, 0.00026402683391868464, 0.0007850527331111357, 0.0020449984309897085, 0.001054644787377823, 5.184771678434785e-06, 0.002178892159566903, -0.00017501728488655832, -0.0002577126732409285, 0.0009543687413273716, 0.0005083110135046809, 7.62809849720325e-05, 8.658720973830913e-05, 0.0004486453095020604, 0.0008906763104503668, 0.0010605476004982295, 0.0018694816639623912, 0.0002959972532902144, 0.0017008605711600344, 0.0007976861126247625, -0.002652802485675565, 0.002662754828368419, 9.917422749694538e-05, 5.964285215753762e-05, 0.0022189518272991426, -7.435817815093504e-05, -0.001698616694105315, 0.001233404502690938, 0.00047054690788606024, 0.0026004651808273493, 0.0011172780845385422, -0.0001482646856509895, -0.00041623695665415905, -0.0015909825974204095, -0.00031927406061749153, -0.00043655338823822554, 0.0001361268145957384, -0.00032109572478289383, 8.887812899915914e-05, 0.002067090875226163, 0.0010982209839661694, -0.0005637330110257466, -0.0005329012820900481, 0.0009084092837141622, 0.0021178568491788674, 0.0033586699296590528, 0.03875439641206746, 0.007293900327323799, 0.004763913685328358, 0.004817433432478202, 0.007084909806944539, 0.0025907095069127584, 0.0080607854611624, 0.0057093251947115675, 0.009105559086416637, 0.011542299391846145, 0.0028401829485293906, 0.02709723337680492, 0.004118952479980065, -0.00025137933247576516, 0.018393986829754794, 0.003372828092497837, 0.0013541844149635995, 0.00793699314016949, 0.0036518023385704312, -0.01840451865415549, 0.0035504742986338655, -0.02124055707570216, -0.003382787260082454, 0.001661945882751592, -8.216386260140318e-05, -0.011643569891665484, -0.0015886009587249283, -0.0060291572912980484, -0.003934251792188981, -0.0003035868273979503, 0.0025248914169603287, -0.0004996588466505969, 0.0009582432785717465, -0.0017444408411588785, -0.004805523268163303, -0.002931825562557, -0.014321167551977099, -0.005731926091473105, -0.001628883333692932, -0.008380269170191412, -0.002495037120210153, -0.013508701388505795, 0.001503549704314544, -0.0007751891041964942, -0.0007030969454133729, -0.0028340425016759596, 0.002562114878683408, 0.0007168131966963642, 0.0005966917290103346, -0.0009823429746401713, -0.008049996513831648, -0.006935426188849236, -0.013752522526764732, -0.0005069144917650981, 0.0008889578779507357, -0.004511542543265024, 0.000512296750466934, -0.0002943171885458895, -0.00022271179096300386, -0.0021640651213321593, 0.0002623110944273144, -0.00026747487114753277, -0.0027015552337811277, -0.012375056721493305, -0.00010214235568639651, -0.012322425151997773, -0.004802562616962769, -0.0040789335496304036, 0.0004565130977485232, -0.0004572283160489965, 0.011102582602458222, -0.0020968867403851066, -0.0011853937188981423, -0.005591149809828, -0.010832419289181106, -0.001980246263801777, -0.0017540247099745443, 0.0018022912780594202, -0.0005056805241561158, -0.0017686227072655214, -0.0003527295679881526, 0.016529002299418005, -0.0014105862431510463, -0.0005732444581987295, -0.002869230597707273, -0.004965959446656232, 0.0004066440403122096, -0.009874817195565821, -0.030274345848102402, -0.012926682404285739, -0.0008783158084264553, -0.006107566065733372, -0.010184352477880187, -0.025327970485242712, -0.035590091815137614, -0.0303458179931807, -0.004102272911347127, -0.0004503926531160829, -0.029369754046835173, -0.007045255786105931, 0.0016875597888881266, -0.03481944581257812, -0.006678644726203498, -0.014538583275030104, -0.0007153352724629247, -0.011420023006262658, -0.004652763809388283, -0.005798820819408745, -0.024656178382399974, 0.0011578591100704916, -0.005280277401536693, 0.0010356576649497296, 0.003940580861575116, -0.005276868987642791, 0.00770392753313931, -0.007944469488529449, 0.001991397641811221, 0.003759694548383874, 0.0011195084904335184, -0.007110697689415635, 0.0032203225902148747, -0.007578862375151041, -0.0060194039077946515, 0.00024130283639867134, 0.0013235404395363082, 0.00043243599875075756, 0.001123780074328562, 0.00050973691967926, -0.00034757211361624574, -0.002281972957101308, 0.000845475795447137, -0.0020588684564848414, 0.0021372282150149413, -0.00019856545094609768, -0.0017913068956328937, 0.0006373160107784326, 0.0012739985965753986, -0.001696571054130902, -0.0002589327068805991, -0.00022288072963450034, -0.0031104722681001284, -0.0008235753576365523, -0.002447807642767752, 0.002004633800402944, -0.003224266216026561, 0.00864564856685679, -0.00554499989692514, -0.00026444194813035615, -0.0010032832870013886, -0.0032213627206253426, -0.000911160024681501, 0.0004076477827085201, 0.0007995060213979999, -0.0005885363844878588, 0.0032955989913380224, 0.0004894614286667931, 0.002261644799368967, -0.0032746021967355876, -0.0005524850007211368, -0.0031971616217488785, 0.0060628232866996035, -0.0018951843317909223, -0.004804670310596504, -0.016865397416521932, 0.0022279632468085175, 0.0004397955748330906, -0.0012895574791834674, -0.0052936056246406515, 0.001047196960554997, 0.0014035810745537391, -0.0021789054250395123, 0.004835618627879845, -0.004423137045597492, 0.0033839427269372058, 0.0004317648131846319, 0.010148684107688322, -0.0005852067203551137, -0.00023673829544612612, -0.0004969772951760598, -0.0007139106069110612, -0.0006781488781816281, -0.004463513032158084, -0.008314803798795345, 0.005664192158806869, -0.0017831527387602852, -0.005528449668342539, -0.00018355344047649158, -0.013367195182721558, 0.010476427486050932, -0.0030995623273649686, -0.0033128122970741414, 0.004656006266778645, -0.010849289595229128, -0.002397579088451285, -0.009958364378056861, -0.0011444352933996926, 0.0021166335185883733, -0.018000165477872865, -0.0032624694006755384, -0.004127065179238776, 0.012949255788881175, -0.0018095543583533935, -0.005592349116810835, 0.0037267653611244844, -0.0038516820526487416, 0.024489734744573943, 0.004683327531111781, 0.001269388150738071, -0.0017407506416220464, 0.011902507395885942, 0.004639839618832409, 0.002506386658879589, 0.0003685885982728232, 0.009029032176506746, 0.003135888070068371, -0.0016520809387076119, 0.003088001303703236, 0.00014727469647846103, 0.0035561159496605432, -2.876814735663924e-05, -1.1060450691990908e-06, -0.00046745210145082705, 0.0012870350251698026, 0.0042191397766525585, 0.00655774915823315, 0.0005207348452095362, 0.0023795699587515484, 0.0043810533565454834, 0.004609257547817935, 0.01290056247180027, 0.0014757109716835304, 0.0010271818164879765, 0.0015607194203527408, 0.0004559197274759397, 0.00033707593565324157, 0.002642703381714573, 0.006501328950226926, 0.0008704244332061739, 0.002463581165456641, 0.004364567880446146, -0.0018809110325389566, 0.00017150256113329654, 0.00017711236826822851, -7.174454357028459e-05, 3.2134260813054816e-05, -0.0018074430724653867, 0.0007488238809050252, -0.00021827012815400781, -0.0002562985350954561, 0.00023716735492340657, 0.001413468319080624, 0.006882923410709174, 0.0007931366577244026, -0.0004717657827425503, 0.009100363328674477, 0.005404627376927373, -0.004026623783044081, 0.005351064544848239, -0.0035389768278023537, 0.002068255236817085, -0.002090048742856676, -0.0019271886285645579, 0.0011498135139685894, 0.0018314054258520285, -0.00026283971236906734, 0.0015413975782866407, -0.004150725533632493, -0.001320978589455457, 0.00031339691627352284, -0.0016566925476326766, -0.0006220275171834322, 7.44998579660372e-05, -0.0002999797537633908, 0.0007030142990428478, 0.0007913912947924429, 0.0011556895007599488, 0.011568536396979079, 0.0028225455085252316, 0.00035905603670860486, -0.000841494670420162, 0.009615860981466828, 0.03377755052540243, 0.0789273078045386, 0.05525867670164508, -0.0010166697260519664, 0.00957764643602288, 0.06977357007791107, 0.006012541368369928, 0.002010875608860041, 0.03032162865092458, 0.002846750112344365, -0.00016217114898731122, 0.0060712783132312875, 0.006622682402069313, 0.006219871147256417, 0.0018001652923477218, 0.06992553842572524, 0.006724233973021909, 0.007539747957793552, 0.020538531613926936, 0.007943897075231354, 0.008360812789827254, 0.003290025441256006, 0.020634865090878085, 0.0008080876743085108, -0.0030892486063024416, 0.00041600088024251747, 0.0070829632445147395, 0.0036536124147609206, -0.0016869382360118479, 0.0034263287960714095, -0.0006554657701027811, 0.0029753478023664126, 0.0006174977901268752, 7.101865790082318e-05, -0.00016257411215181428, -0.0009369820715646737, 0.0005434387564733356, 0.0012271814940856274, 0.0017175994942115747, 0.0005473661944555008, 0.005273938186585614, 0.0027605830798231867, 0.0053267195601733874, -0.0008641356489738072, 0.0003752916592929534, -8.763321084148679e-05, 0.0015245669500787429, -0.00012165228700977693, 0.0007204145703794129, 7.33961001830874e-06, 0.0007303311512011357, 0.002484907097797917, 0.012401690162314405, 0.002021342443194185, -0.00036144987650921223, -0.0012055569846747272, 0.00085115010144865, 2.829758256761572e-05, -3.762854703992513e-05, 0.0006878905429239524, -0.0007663772085586551, 0.0001906678540952722, 0.0001922411569781346, 0.001457809117053288, 0.0061655533145379415, -0.0002077338553436725, 0.004428694252206933, 0.007572691730434067, 0.015522139371950017, -0.0007973496961124482, 0.010913869326709902, -0.0009089608282075723, 0.001704153441209666, -0.001975343179565603, 0.0007767569381108563, 0.0001946668039775057, -0.0003049050619283733, -0.0006077903992949274, -0.0014501145568337481, 0.0024408470743457755, 0.0024867530414642847, -0.00028112355434628533, 0.00029719386409921, -0.0005815474877897131, -9.683347434896739e-05, 0.0015214781874257621, 0.00038445751824878194, -0.0006103274076518783, 0.0016081931094918955, 0.008255889709628095, 0.013087103367242059, 0.0004902027422780675, 0.007683661011998355, 0.0006227685572650632, 0.040959894211064496, 0.04659799783753149, 0.05623489106446333, -0.0011074112724636098, 0.0026292183555661763, 0.06648468153042297, 0.008507328563525984, 0.0012577368022517188, 0.05146040805311081, 0.006280620030596484, 0.0025791825898906495, 0.006777579940425715, 0.009547184931625763, 0.008159048875296504, 0.0008150683146730398, 0.06841724518294723, -0.0003192050956154491, 0.005694154235833304, 0.0024969631810545234, -0.001106586510389418]}
--------------------------------------------------------------------------------
/analysis.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from utils import *
4 | import seaborn as sns
5 | import matplotlib.pyplot as plt
6 | import plotly.figure_factory as ff
7 |
8 | import warnings
9 | warnings.filterwarnings('ignore')
10 |
11 |
12 | def calculate_R2(model, type, input=None, complete_r=None):
13 | portfolio_ret = pd.read_pickle('data/portfolio_ret.pkl')
14 | oos_ret = portfolio_ret.loc[(portfolio_ret['DATE'] >= OOS_start) & (portfolio_ret['DATE'] <= OOS_end)]
15 |
16 | if not isinstance(input, np.ndarray):
17 | # print('type: ', type)
18 | if isinstance(model, str):
19 | output_path = f'results/{type}/{model}_{type}.csv'
20 | else:
21 | output_path = f'results/{type}/{model.name}_{type}.csv'
22 | # print('path : ', output_path)
23 | model_output = pd.read_csv(output_path)
24 | else:
25 | model_output = input
26 | model_output = pd.DataFrame(model_output, columns=CHARAS_LIST)
27 | model_output['DATE'] = oos_ret['DATE'].to_list()
28 |
29 | for col in model_output.columns: # hard code for format error
30 | model_output[col] = model_output[col].apply(lambda x: float(str(x).replace('[', '').replace(']', '')))
31 |
32 | residual_square = ((oos_ret.set_index('DATE') - model_output.set_index('DATE'))**2).dropna()
33 | residual_square = (1 - (residual_square == np.inf) * 1.0) * residual_square # drop Inf outliers
34 |
35 | total_square = oos_ret.set_index('DATE')**2
36 | total_square = (1 - (total_square == np.inf) * 1.0) * total_square # drop Inf outliers
37 |
38 | model_output_R2 = 1 - np.sum(residual_square.values)/np.sum(total_square.values)
39 |
40 | if not isinstance(input, np.ndarray):
41 | return model_output_R2
42 |
43 | else:
44 | no_omit_output = complete_r
45 | no_omit_output = pd.DataFrame(no_omit_output, columns=CHARAS_LIST)
46 | no_omit_output['DATE'] = oos_ret['DATE'].to_list()
47 |
48 | no_omit_residual_square = ((oos_ret.set_index('DATE') - no_omit_output.set_index('DATE'))**2).dropna()
49 | no_omit_residual_square = (1 - (no_omit_residual_square == np.inf) * 1.0) * no_omit_residual_square # drop Inf outliers
50 |
51 | no_omit_model_output_R2 = 1 - np.sum(no_omit_residual_square.values)/np.sum(total_square.values)
52 |
53 | return no_omit_model_output_R2 - model_output_R2 # the difference of R^2, i.e. the importance of characteristics
54 |
55 |
56 |
57 | def alpha_plot(model, type, save_dir='imgs'):
58 | if 'alpha' not in os.listdir(save_dir):
59 | os.mkdir(f'{save_dir}/alpha')
60 |
61 | portfolio_ret = pd.read_pickle('data/portfolio_ret.pkl')
62 | oos_result = portfolio_ret.loc[(portfolio_ret['DATE'] >= OOS_start) & (portfolio_ret['DATE'] <= OOS_end)].set_index('DATE')
63 |
64 | output_path = f'results/{type}/{model.name}_{type}.csv'
65 | inference_result = pd.read_csv(output_path)
66 | inference_result = inference_result.set_index('DATE')
67 |
68 | pricing_error_analysis = []
69 | for col in CHARAS_LIST:
70 | raw_return = oos_result[col].mean()
71 | error = oos_result[col] - inference_result[col]
72 | alpha = error.mean()
73 | t_stat = abs(error.mean()/error.std()) * np.sqrt(oos_result.shape[0])
74 | pricing_error_analysis.append([raw_return, alpha, t_stat])
75 |
76 | pricing_error_analysis = pd.DataFrame(pricing_error_analysis, columns = ['raw ret', 'alpha', 't_stat'], index=CHARAS_LIST)
77 |
78 | lower_point = min(np.min(pricing_error_analysis['raw ret']), np.min(pricing_error_analysis['alpha'])) * 1.15
79 | upper_point = max(np.max(pricing_error_analysis['raw ret']), np.max(pricing_error_analysis['alpha'])) * 1.15
80 |
81 | significant_mask = pricing_error_analysis['t_stat'] > 3
82 |
83 | plt.scatter(pricing_error_analysis.loc[significant_mask]['raw ret'], pricing_error_analysis.loc[significant_mask]['alpha'], marker='^', color='r', alpha=0.6, label=f'#Alphas(|t|>3.0)={np.sum(significant_mask*1.0)}')
84 | plt.scatter(pricing_error_analysis.loc[~significant_mask]['raw ret'], pricing_error_analysis.loc[~significant_mask]['alpha'], marker='o', color='b', alpha=0.6, label=f'#Alphas(|t|<3.0)={94-np.sum(significant_mask*1.0)}')
85 | plt.plot(np.linspace(lower_point, upper_point, 10), np.linspace(lower_point, upper_point, 10), color='black')
86 |
87 | plt.ylabel('Alpha (%)')
88 | plt.xlabel('Raw Return (%)')
89 | plt.legend()
90 |
91 | plt.title(model.name)
92 | plt.savefig(f'{save_dir}/alpha/{model.name}_inference_alpha_plot.png')
93 | plt.close()
94 |
95 |
96 | def plot_R2_bar(R_df, type):
97 |
98 | R_df['Model'] = R_df[0].apply(lambda x: x.split('_')[0])
99 |
100 | labels = ['K=1', 'K=2', 'K=3', 'K=4', 'K=5', 'K=6']
101 | FF = (R_df.loc[R_df['Model']=='FF'][1]*100).to_list()
102 | PCA = (R_df.loc[R_df['Model']=='PCA'][1]*100).to_list()
103 | IPCA = (R_df.loc[R_df['Model']=='IPCA'][1]*100).to_list()
104 | CA0 = (R_df.loc[R_df['Model']=='CA0'][1]*100).to_list()
105 | CA1 = (R_df.loc[R_df['Model']=='CA1'][1]*100).to_list()
106 | CA2 = (R_df.loc[R_df['Model']=='CA2'][1]*100).to_list()
107 | CA3 = (R_df.loc[R_df['Model']=='CA3'][1]*100).to_list()
108 |
109 |
110 | x = np.arange(len(labels)) # 标签位置
111 | width = 0.11
112 |
113 | fig, ax = plt.subplots(figsize=(15, 5))
114 | ax.bar(x - width*3 , FF, width, label='FF', color=plt.get_cmap('OrRd')(np.linspace(0, 1, 8)[1]))
115 | ax.bar(x - width*2 , PCA, width, label='PCA', color=plt.get_cmap('OrRd')(np.linspace(0, 1, 8)[2]))
116 | ax.bar(x - width , IPCA, width, label='IPCA', color=plt.get_cmap('OrRd')(np.linspace(0, 1, 8)[3]))
117 | ax.bar(x + 0.00, CA0, width, label='CA0', color=plt.get_cmap('OrRd')(np.linspace(0, 1, 8)[4]))
118 | ax.bar(x + width , CA1, width, label='CA1', color=plt.get_cmap('OrRd')(np.linspace(0, 1, 8)[5]))
119 | ax.bar(x + width*2 , CA2, width, label='CA2', color=plt.get_cmap('OrRd')(np.linspace(0, 1, 8)[6]))
120 | ax.bar(x + width*3 , CA3, width, label='CA3', color=plt.get_cmap('OrRd')(np.linspace(0, 1, 8)[7]))
121 |
122 |
123 | ax.set_ylabel(f'Portfolio {type} R^2 (%)')
124 | ax.set_xticks(x)
125 | ax.set_xticklabels(labels)
126 | ax.legend()
127 |
128 | fig.tight_layout()
129 |
130 | plt.savefig(f'imgs/{type}_R2.png')
131 | plt.close()
132 |
133 |
134 |
135 | def plot_R2_table(R_df, type):
136 | plt.figure(dpi=200)
137 |
138 | for col in R_df.columns:
139 | R_df[col] = R_df[col].apply(lambda x: round_number(x))
140 |
141 | R_df = R_df.reset_index()
142 | R_df.columns = ['Model', 'K=1', 'K=2', 'K=3', 'K=4', 'K=5', 'K=6']
143 |
144 |
145 | fig_total = ff.create_table(R_df,
146 | colorscale=[[0, 'white'],
147 | [0.01, 'lightgrey'],
148 | [1.0, 'white']],
149 | font_colors=['#000000', '#000000',
150 | '#000000'])
151 | fig_total.update_layout(
152 | autosize=False,
153 | width=500,
154 | height=200,
155 | )
156 | fig_total.write_image(f"imgs/R2_{type}_table.png", scale=4)
157 |
158 |
159 |
160 | def round_number(num):
161 | num = str(round(num*100, 2))
162 | while len(num.split('.')[1]) < 2:
163 | num = num + '0'
164 | return num
165 |
166 |
167 |
168 | if __name__=="__main__":
169 | CAs = ["CA0_1", "CA0_2", "CA0_3", "CA0_4", "CA0_5", "CA0_6", "CA1_1", "CA1_2", "CA1_3", "CA1_4", "CA1_5", "CA1_6", "CA2_1", "CA2_2", "CA2_3", "CA2_4", "CA2_5", "CA2_6", "CA3_1", "CA3_2", "CA3_3", "CA3_4", "CA3_5", "CA3_6"]
170 | FFs = ["FF_1", "FF_2", "FF_3", "FF_4", "FF_5", "FF_6"]
171 | PCAs = ["PCA_1", "PCA_2", "PCA_3", "PCA_4", "PCA_5", "PCA_6"]
172 | IPCAs = ["IPCA_1", "IPCA_2", "IPCA_3", "IPCA_4", "IPCA_5", "IPCA_6"]
173 | models = FFs + PCAs + IPCAs + CAs
174 |
175 | ## Plot R^2 bars
176 | total_R2 = []
177 | for m in models:
178 | total_R2.append(calculate_R2(m, 'inference'))
179 | R_total = pd.DataFrame([models, total_R2]).T
180 |
181 | predict_R2 = []
182 | for m in models:
183 | predict_R2.append(calculate_R2(m, 'predict'))
184 | R_pred = pd.DataFrame([models, predict_R2]).T
185 |
186 | plot_R2_bar(R_total, 'total')
187 | plot_R2_bar(R_pred, 'pred')
188 |
189 | ## Save R^2 tables
190 | R_total_df = pd.DataFrame(np.array(total_R2).reshape(-1, 6), columns = ['K=1', 'K=2', 'K=3', 'K=4', 'K=5', 'K=6'], index=['FF', 'PCA', 'IPCA', 'CA0', 'CA1', 'CA2', 'CA3'])
191 | R_pred_df = pd.DataFrame(np.array(predict_R2).reshape(-1, 6), columns = ['K=1', 'K=2', 'K=3', 'K=4', 'K=5', 'K=6'], index=['FF', 'PCA', 'IPCA', 'CA0', 'CA1', 'CA2', 'CA3'])
192 |
193 | plot_R2_table(R_total_df, 'total')
194 | plot_R2_table(R_pred_df, 'pred')
195 |
196 |
197 | ## Plot characteristics importance heatmap
198 | # models = ["IPCA", "CA0_5", "CA1_5", "CA2_5", "CA3_5"]
199 | # #TODO: paste results from R_squares/
200 | # R2_omit = []
201 | # R_minus = pd.DataFrame(np.array(R2_omit).reshape(-1, 94)*100, index=models, columns=CHARAS_LIST).T
202 | # char_ranks = R_minus.T.sum().argsort().argsort().index.to_list()
203 | # char_ranks.reverse()
204 |
205 | # plt.figure(figsize=(8, 15), dpi=200)
206 | # sns.heatmap(R_minus.T[char_ranks].T, cmap='Blues', linewidths=0.6)
207 | # plt.savefig('imgs/omit_char_R2_bias.png', bbox_inches='tight')
208 | # plt.close()
209 |
--------------------------------------------------------------------------------
/data_prepare.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | pd.options.mode.chained_assignment = None
4 | from tqdm import tqdm
5 |
6 | import os
7 | import zipfile
8 | from joblib import delayed, Parallel
9 | from itertools import product
10 | from utils import CHARAS_LIST
11 |
12 | import warnings
13 | warnings.filterwarnings('ignore')
14 |
15 | if 'data.zip' not in os.listdir():
16 | os.system('wget https://cloud.tsinghua.edu.cn/f/07d6a0223d054247af26/?dl=1 -O data.zip')
17 |
18 | if 'data' not in os.listdir():
19 | os.mkdir('data')
20 | os.system('wget https://cloud.tsinghua.edu.cn/f/179082ecf0f147a4840c/?dl=1 -O portfolio_ret.pkl')
21 | os.system('wget https://cloud.tsinghua.edu.cn/f/b93c6ae7e2014d3a951e/?dl=1 -O ff5.csv')
22 | os.system('wget https://cloud.tsinghua.edu.cn/f/5f077be9eda0428ab7e5/?dl=1 -O UMD.csv')
23 | os.system('wget https://cloud.tsinghua.edu.cn/f/a916da12d5a9450eb0df/?dl=1 -O p_charas.pkl')
24 |
25 | os.system('mv portfolio_ret.pkl data')
26 | os.system('mv ff5.csv data')
27 | os.system('mv UMD.csv data')
28 | os.system('mv p_charas.pkl data')
29 |
30 |
31 | with zipfile.ZipFile('data.zip', 'r') as z:
32 | with z.open('data/month_ret.pkl') as f:
33 | print('Reading month_ret.pkl', end=' ')
34 | mon_ret = pd.read_pickle(f)
35 | mon_ret.to_pickle('data/month_ret.pkl')
36 | print('Done!')
37 |
38 | with z.open('data/datashare.pkl') as f:
39 | print('Reading datashare.pkl', end=' ')
40 | datashare = pd.read_pickle(f)
41 | datashare['DATE'].drop_duplicates().reset_index(drop=True).to_pickle('data/mon_list.pkl')
42 | # datashare.to_pickle('data/datashare.pkl')
43 | print('Done!')
44 |
45 |
46 |
47 | def pre_process(date):
48 | cross_slice = datashare.loc[datashare.DATE == date].copy(deep=False)
49 | omitted_mask = 1.0 * np.isnan(cross_slice.loc[cross_slice['DATE'] == date])
50 | # fill nan values with each factors median
51 | cross_slice.loc[cross_slice.DATE == date] = cross_slice.fillna(0) + omitted_mask * cross_slice.median()
52 | # if all stocks' factor is nan, fill by zero
53 | cross_slice.loc[cross_slice.DATE == date] = cross_slice.fillna(0)
54 |
55 | re_df = []
56 | # rank normalization
57 | for col in CHARAS_LIST:
58 | series = cross_slice[col]
59 | de_duplicate_slice = pd.DataFrame(series.drop_duplicates().to_list(), columns=['chara'])
60 | series = pd.DataFrame(series.to_list(), columns=['chara'])
61 | # sort and assign rank, the same value should have the same rank
62 | de_duplicate_slice['sort_rank'] = de_duplicate_slice['chara'].argsort().argsort()
63 | rank = pd.merge(series, de_duplicate_slice, left_on='chara', right_on='chara', how='right')['sort_rank']
64 | # if all values are zero, the results will contain nan
65 | rank_normal = ((rank - rank.min())/(rank.max() - rank.min())*2 - 1)
66 | re_df.append(rank_normal)
67 | re_df = pd.DataFrame(re_df, index=CHARAS_LIST).T.fillna(0)
68 | re_df['permno'] = list(cross_slice['permno'].astype(int))
69 | re_df['DATE'] = list(cross_slice['DATE'].astype(int))
70 |
71 | return re_df[['permno', 'DATE'] + CHARAS_LIST]
72 |
73 |
74 |
75 | def cal_portfolio_ret(it, df):
76 | d, f = it[0], it[1]
77 | # long portfolio, qunatile 0.0~0.1; short portfolio, qunatile 0.9~1.0
78 | long_portfolio = df.loc[df.DATE == d][['permno', f]].sort_values(by=f, ascending=False)[:df.loc[df.DATE == d].shape[0]//10]['permno'].to_list()
79 | short_portfolio = df.loc[df.DATE == d][['permno', f]].sort_values(by=f, ascending=False)[-df.loc[df.DATE == d].shape[0]//10:]['permno'].to_list()
80 | # long-short portfolio return
81 | long_ret = mon_ret.loc[mon_ret.date == d].drop_duplicates('permno').set_index('permno').reindex(long_portfolio)['ret-rf'].dropna().mean()
82 | short_ret = mon_ret.loc[mon_ret.date == d].drop_duplicates('permno').set_index('permno').reindex(short_portfolio)['ret-rf'].dropna().mean()
83 | chara_ret = 0.5*(long_ret - short_ret)
84 |
85 | return chara_ret
86 |
87 |
88 | def cal_portfolio_charas(month, df):
89 | mon_portfolio_chara = []
90 | p_name = ['p_' + chr for chr in CHARAS_LIST]
91 | for chr in CHARAS_LIST:
92 | long_portfolio = df.loc[df.DATE == month].sort_values(by=chr, ascending=False).reset_index(drop=True)[:df.loc[df.DATE == month].shape[0]//10]['permno'].to_list()
93 | short_portfolio = df.loc[df.DATE == month].sort_values(by=chr, ascending=False).reset_index(drop=True)[-df.loc[df.DATE == month].shape[0]//10:]['permno'].to_list()
94 |
95 | long_charas = df.loc[df.DATE == month].set_index('permno').loc[long_portfolio][CHARAS_LIST]
96 | short_charas = df.loc[df.DATE == month].set_index('permno').loc[short_portfolio][CHARAS_LIST]
97 |
98 | mon_portfolio_chara.append([month] + (0.5*(long_charas.mean() - short_charas.mean())).to_list())
99 |
100 | return pd.DataFrame(mon_portfolio_chara, index=p_name, columns=['DATE']+CHARAS_LIST)
101 |
102 |
103 |
104 | if __name__ == '__main__':
105 | # pre-process share data
106 | processed_df = Parallel(n_jobs=-1)(delayed(pre_process)(d) for d in tqdm(datashare.DATE.drop_duplicates().to_list(), colour='green', desc='Processing'))
107 | processed_df = pd.concat(processed_df)
108 |
109 | ##TODO: calculate portfolio returns (or download preprocessed data)
110 | # iter_list = list(product(datashare.DATE.drop_duplicates(), CHARAS_LIST))
111 | # portfolio_rets = Parallel(n_jobs=-1)(delayed(cal_portfolio_ret)(it, df=processed_df) for it in tqdm(iter_list, colour='green', desc='Calculating'))
112 | # portfolio_rets = pd.DataFrame(np.array(portfolio_rets).reshape(-1, 94), index=datashare.DATE.drop_duplicates(), columns=CHARAS_LIST).reset_index()
113 | # portfolio_rets[CHARAS_LIST] = portfolio_rets[CHARAS_LIST].astype(np.float16)
114 |
115 |
116 | ##TODO: calculate portfolio characteristics (or download preprocessed data)
117 | # mon_list = pd.read_pickle('data/mon_list.pkl')
118 | # _portfolio_chara_set = Parallel(n_jobs=-1)(delayed(cal_portfolio_charas)(mon, df=processed_df) for mon in tqdm(mon_list, colour='yellow', desc='Calculating P characteristics'))
119 | # p_charas = _portfolio_chara_set[0].copy(deep=False)
120 | # for tdf in _portfolio_chara_set[1:]:
121 | # p_charas = pd.concat([p_charas, tdf])
122 |
123 |
124 | processed_df.to_pickle('data/datashare_re.pkl')
125 | # portfolio_rets.to_pickle('data/portfolio_rets.pkl')
126 | # p_charas.to_pickle('data/p_charas.pkl')
--------------------------------------------------------------------------------
/imgs/R2_pred_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/R2_pred_table.png
--------------------------------------------------------------------------------
/imgs/R2_total_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/R2_total_table.png
--------------------------------------------------------------------------------
/imgs/alpha/CA0_1_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA0_1_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA0_2_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA0_2_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA0_3_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA0_3_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA0_4_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA0_4_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA0_5_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA0_5_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA0_5_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA0_5_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA0_6_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA0_6_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA1_1_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA1_1_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA1_2_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA1_2_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA1_3_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA1_3_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA1_4_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA1_4_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA1_5_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA1_5_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA1_6_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA1_6_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA2_1_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA2_1_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA2_2_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA2_2_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA2_3_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA2_3_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA2_4_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA2_4_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA2_5_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA2_5_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA2_6_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA2_6_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA3_1_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA3_1_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA3_2_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA3_2_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA3_3_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA3_3_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA3_4_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA3_4_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA3_5_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA3_5_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/CA3_6_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/CA3_6_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/FF_1_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/FF_1_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/FF_2_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/FF_2_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/FF_3_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/FF_3_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/FF_4_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/FF_4_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/FF_5_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/FF_5_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/FF_6_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/FF_6_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/IPCA_1_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/IPCA_1_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/IPCA_2_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/IPCA_2_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/IPCA_3_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/IPCA_3_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/IPCA_4_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/IPCA_4_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/IPCA_5_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/IPCA_5_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/IPCA_6_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/IPCA_6_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/PCA_1_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/PCA_1_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/PCA_2_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/PCA_2_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/PCA_3_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/PCA_3_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/PCA_4_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/PCA_4_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/PCA_5_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/PCA_5_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/alpha/PCA_6_inference_alpha_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/alpha/PCA_6_inference_alpha_plot.png
--------------------------------------------------------------------------------
/imgs/omit_char_R2_bias.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/omit_char_R2_bias.png
--------------------------------------------------------------------------------
/imgs/pred_R2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/pred_R2.png
--------------------------------------------------------------------------------
/imgs/total_R2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardS0268/Autoencoder-Asset-Pricing-Models/5aa57e0a1519f1fb23bfef48cdd5cf9cb29e467a/imgs/total_R2.png
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from models.PCA import PCA
3 | from models.FF import FF
4 | from models.IPCA import IPCA
5 | from models.CA import CA0, CA1, CA2, CA3
6 |
7 | import gc
8 | import argparse
9 | import pandas as pd
10 | import numpy as np
11 | import time
12 | import json
13 | from tqdm import tqdm
14 | from utils import *
15 | from analysis import *
16 | import matplotlib.pyplot as plt
17 | from itertools import product
18 | import os
19 |
20 | import warnings
21 | warnings.filterwarnings('ignore')
22 |
23 |
24 |
25 | def model_inference_and_predict(model):
26 | """
27 | Inference and Prediction of non NN models:
28 | Returns: model.name_inference.csv & model.name_inference.csv saved in path 'results'
29 | """
30 | mon_list = pd.read_pickle('data/mon_list.pkl')
31 | test_mons = mon_list.loc[mon_list >= model.test_period[0]]
32 | inference_result = []
33 | predict_result = []
34 | T_bar = tqdm(test_mons.groupby(test_mons.apply(lambda x: x//10000)), colour='red', desc=f'{model.name} Inferencing & Predicting')
35 |
36 | for g in T_bar: # rolling train
37 | T_bar.set_postfix({'Year': g[0]})
38 | model.train_model()
39 |
40 | for m in g[1].to_list():
41 | inference_result.append(model.inference(m)) # T * N * m
42 | if not len(model.omit_char):
43 | predict_result.append(model.predict(m))
44 | # model refit (change train period and valid period)
45 | model.refit()
46 |
47 | if not len(model.omit_char):
48 | inference_result = pd.DataFrame(inference_result, index=test_mons, columns=CHARAS_LIST)
49 | inference_result.to_csv(f'results/inference/{model.name}_inference.csv')
50 | predict_result = pd.DataFrame(predict_result, index=test_mons, columns=CHARAS_LIST)
51 | predict_result.to_csv(f'results/predict/{model.name}_predict.csv')
52 |
53 | return inference_result
54 |
55 |
56 |
57 | def model_inference_and_predict_CA(model):
58 | """
59 | Inference and Prediction of NN models:
60 | Returns: model.name_inference.csv & model.name_inference.csv saved in path 'results'
61 | """
62 | model = model.to('cuda')
63 | mon_list = pd.read_pickle('data/mon_list.pkl')
64 | test_mons = mon_list.loc[(mon_list >= model.test_period[0])]
65 |
66 | if not len(model.omit_char): # no omit characteristics
67 | inference_result = pd.DataFrame()
68 | predict_result = pd.DataFrame()
69 | else:
70 | inference_result = []
71 |
72 | T_bar = tqdm(test_mons.groupby(test_mons.apply(lambda x: x//10000)), colour='red', desc=f'{model.name} Inferencing & Predicting')
73 |
74 | stock_index = pd.Series(dtype=np.int64)
75 | for g in T_bar: # rolling train, refit once a year
76 | T_bar.set_postfix({'Year': g[0]})
77 |
78 | model.reset_weight()
79 | model.release_gpu()
80 | # release GPU memory
81 | for _ in range(6): # call function multiple times to clear the cuda cache
82 | torch.cuda.empty_cache()
83 |
84 | train_loss, val_loss = model.train_model()
85 | # plot loss
86 | plt.plot(train_loss, label='train_loss')
87 | plt.plot(val_loss, label='val_loss')
88 | plt.legend()
89 | plt.savefig(f'results/train_loss/{model.name}_loss_{g[0]}.png')
90 | plt.close()
91 |
92 | for m in g[1].to_list():
93 | m_stock_index, _, _, _ = model._get_item(m)
94 | stock_index = pd.concat([stock_index, pd.Series(m_stock_index)]).drop_duplicates().astype(int)
95 |
96 | if not len(model.omit_char): # no omit characteristics
97 | # move inference_R and predict_R to cpu
98 | inference_R = model.inference(m) # return (N, 1)
99 | inference_R = inference_R.cpu().detach().numpy()
100 | inference_R = pd.DataFrame(inference_R, index=m_stock_index, columns=[m])
101 | inference_result = pd.concat([inference_result.reset_index(drop=True), inference_R.reset_index(drop=True)], axis=1) # (N, T)
102 |
103 | predict_R = model.predict(m) # reutrn (N, 1)
104 | predict_R = predict_R.cpu().detach().numpy()
105 | predict_R = pd.DataFrame(predict_R, index=m_stock_index, columns=[m])
106 | predict_result = pd.concat([predict_result.reset_index(drop=True), predict_R.reset_index(drop=True)], axis=1) # (N, T)
107 |
108 | else:
109 | inference_R = model.inference(m) # return (N, m), m is the length of omit_char
110 | inference_result.append(inference_R) # (T, N, m)
111 |
112 | # refit: change train period and valid period
113 | model.refit()
114 |
115 | if not len(model.omit_char):
116 | inference_result = pd.DataFrame(inference_result.values.T, index=test_mons, columns=CHARAS_LIST)
117 | inference_result.to_csv(f'results/inference/{model.name}_inference.csv')
118 |
119 | predict_result = pd.DataFrame(predict_result.values.T, index=test_mons, columns=CHARAS_LIST)
120 | predict_result.to_csv(f'results/predict/{model.name}_predict.csv')
121 |
122 | # GC: release RAM memory(model)
123 | del model
124 | gc.collect()
125 | return inference_result
126 |
127 |
128 |
129 | def git_push(msg):
130 | os.system('git add R_squares')
131 | os.system(f'git commit -m "{msg}"')
132 | os.system('git push')
133 |
134 |
135 |
136 | def model_selection(model_type, model_K, omit_char=[]):
137 | assert model_type in ['FF', 'PCA', 'IPCA', 'CA0', 'CA1', 'CA2', 'CA3'], f'No Such Model: {model_type}'
138 |
139 | if model_type == 'FF':
140 | return {
141 | 'name': f'FF_{model_K}',
142 | 'omit_char': [],
143 | 'model': FF(K=model_K)
144 | }
145 |
146 | elif model_type == 'PCA':
147 | return {
148 | 'name': f'PCA_{model_K}',
149 | 'omit_char': omit_char,
150 | 'model': PCA(K=model_K, omit_char=omit_char)
151 | }
152 |
153 | elif model_type == 'IPCA':
154 | return {
155 | 'name': f'IPCA_{model_K}',
156 | 'omit_char': omit_char,
157 | 'model': IPCA(K=model_K, omit_char=omit_char)
158 | }
159 |
160 | elif model_type == 'CA0':
161 | return {
162 | 'name': f'CA0_{model_K}',
163 | 'omit_char': omit_char,
164 | 'model': CA0(hidden_size=model_K, lr=CA_LR, omit_char=omit_char)
165 | }
166 |
167 | elif model_type == 'CA1':
168 | return {
169 | 'name': f'CA1_{model_K}',
170 | 'omit_char': omit_char,
171 | 'model': CA1(hidden_size=model_K, dropout=CA_DR, lr=CA_LR, omit_char=omit_char)
172 | }
173 |
174 | elif model_type == 'CA2':
175 | return {
176 | 'name': f'CA2_{model_K}',
177 | 'omit_char': omit_char,
178 | 'model': CA2(hidden_size=model_K, dropout=CA_DR, lr=CA_LR, omit_char=omit_char)
179 | }
180 |
181 | else:
182 | return {
183 | 'name': f'CA3_{model_K}',
184 | 'omit_char': omit_char,
185 | 'model': CA3(hidden_size=model_K, dropout=CA_DR, lr=CA_LR, omit_char=omit_char)
186 | }
187 |
188 |
189 |
190 | if __name__ == "__main__":
191 | parser = argparse.ArgumentParser()
192 | parser.add_argument('--Model', type=str, default='FF PCA IPCA CA0 CA1 CA2 CA3')
193 | parser.add_argument('--K', type=str, default='1 2 3 4 5 6')
194 | parser.add_argument('--omit_char', type=str, default='')
195 |
196 | args = parser.parse_args()
197 |
198 | if 'results' not in os.listdir('./'):
199 | os.mkdir('results')
200 | if 'train_loss' not in os.listdir('./results'):
201 | os.mkdir('results/train_loss')
202 | if 'inference' not in os.listdir('./results'):
203 | os.mkdir('results/inference')
204 | if 'predict' not in os.listdir('./results'):
205 | os.mkdir('results/predict')
206 | if 'imgs' not in os.listdir('./'):
207 | os.mkdir('imgs')
208 |
209 |
210 | models_name = []
211 | R_square = []
212 | for g in product(args.Model.split(' '), args.K.split(' ')):
213 | if isinstance(args.omit_char, str) and len(args.omit_char) > 0:
214 | omit_chars = args.omit_char.split(' ')
215 | else:
216 | omit_chars = []
217 |
218 | model = model_selection(g[0], int(g[1]), omit_chars)
219 |
220 | print(f"{time.strftime('%a, %d %b %Y %H:%M:%S +0800', time.gmtime())} | Model: {model['name']} | {omit_chars}")
221 | print('name : ', model['name'])
222 | models_name.append(model['name'])
223 |
224 | if model['name'].split('_')[0][:-1] == 'CA':
225 | print('model_inference_and_predict_CA')
226 | # if have omit char, inf_ret (T, N, m)
227 | inf_ret = model_inference_and_predict_CA(model['model'])
228 | else:
229 | inf_ret = model_inference_and_predict(model['model'])
230 |
231 | gc.collect()
232 |
233 | # Save total R^2
234 | if not len(model['omit_char']):
235 | R_square.append(calculate_R2(model['model'], 'inference'))
236 | alpha_plot(model['model'], 'inference', save_dir='imgs')
237 | # alpha_plot(model['model'], 'predict', save_dir='alpha_imgs')
238 | else:
239 | inf_ret = np.array(inf_ret)
240 | for i in range(len(model['omit_char'])):
241 | inference_r = inf_ret[:, :, i] # T * N
242 | complete_r = inf_ret[:, :, -1]
243 | R_square.append(calculate_R2(None, None, inference_r, complete_r))
244 |
245 | del model
246 |
247 | # save R_square to json
248 | p = time.localtime()
249 | time_str = "{:0>4d}-{:0>2d}-{:0>2d}_{:0>2d}-{:0>2d}-{:0>2d}".format(p.tm_year, p.tm_mon, p.tm_mday, p.tm_hour, p.tm_min, p.tm_sec)
250 | filename = f"R_squares/{time_str}.json"
251 | obj = {
252 | "models": models_name,
253 | 'omit_char': args.omit_char.split(' '),
254 | "R2_total": R_square,
255 | }
256 |
257 | with open(filename, "w") as out_file:
258 | json.dump(obj, out_file)
259 |
260 | # git push
261 | # git_push(f"Run main.py")
262 |
263 |
264 |
--------------------------------------------------------------------------------
/models/CA.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import numpy as np
4 | import collections
5 | from .modelBase import modelBase
6 | from utils import CHARAS_LIST
7 |
8 | import torch
9 | from torch import nn
10 | from torch.utils.data import Dataset, DataLoader, TensorDataset
11 |
12 |
13 | MAX_EPOCH = 200
14 |
15 | class CA_base(nn.Module, modelBase):
16 | def __init__(self, name, omit_char=[], device='cuda'):
17 | nn.Module.__init__(self)
18 | modelBase.__init__(self, name)
19 | self.beta_nn = None
20 | self.factor_nn = None
21 | self.optimizer = None
22 | self.criterion = None
23 | self.omit_char = omit_char
24 |
25 | self.factor_nn_pred = []
26 |
27 | self.device = device
28 |
29 | self.datashare_chara = pd.read_pickle('./data/datashare_re.pkl').astype(np.float64)
30 | self.p_charas = pd.read_pickle('./data/p_charas.pkl').astype(np.float64).reset_index()
31 | self.portfolio_ret= pd.read_pickle('./data/portfolio_ret.pkl').astype(np.float64)
32 | self.mon_ret = pd.read_pickle('./data/month_ret.pkl').astype(np.float64)
33 |
34 | self.train_dataloader = None
35 | self.valid_dataloader = None
36 | self.test_dataloader = None
37 |
38 |
39 | def debug(self, month):
40 | beta_nn_input = self.p_charas.loc[self.p_charas['DATE'] == month][CHARAS_LIST]
41 | # beta_nn_input = self.datashare_chara.loc[self.datashare_chara['DATE'] == month].set_index('permno')[charas]
42 | print(beta_nn_input)
43 |
44 |
45 | def _get_item(self, month):
46 | if month not in self.p_charas['DATE'].values:
47 | # find the closest month in p_charas to month
48 | month = self.p_charas['DATE'].values[np.argmin(np.abs(self.p_charas['DATE'].values - month))]
49 |
50 | beta_nn_input = self.p_charas.loc[self.p_charas['DATE'] == month][CHARAS_LIST] # (94, 94)
51 | labels = self.portfolio_ret.loc[self.portfolio_ret['DATE'] == month][CHARAS_LIST].T.values # (94, 1)
52 | beta_nn_input['ret-rf'] = labels
53 | align_df = beta_nn_input.copy(deep=False).dropna()
54 |
55 | factor_nn_input = self.portfolio_ret.loc[self.portfolio_ret['DATE'] == month][CHARAS_LIST]
56 |
57 | # exit(0) if there is any nan in align_df
58 | if align_df.isnull().values.any():
59 | assert False, f'There is nan in align_df of : {month}'
60 | # return stock index (L), beta_nn_input (94*94=P*N), factor_nn_input (94*1=P*1), labels (94, = N,)
61 | return align_df.index, align_df.values[:, :-1].T, factor_nn_input.T.values , align_df.values[:, -1].T
62 |
63 |
64 | def dataloader(self, period):
65 | mon_list = pd.read_pickle('data/mon_list.pkl')
66 | mon_list = mon_list.loc[(mon_list >= period[0]) & (mon_list <= period[1])]
67 | beta_nn_input_set = []
68 | factor_nn_input_set = []
69 | label_set = []
70 | for mon in mon_list:
71 | _, _beta_input, _factor_input, label = self._get_item(mon)
72 | beta_nn_input_set.append(_beta_input)
73 | factor_nn_input_set.append(_factor_input)
74 | label_set.append(label)
75 |
76 | beta_nn_input_set = torch.tensor(beta_nn_input_set, dtype=torch.float32).to(self.device)
77 | factor_nn_input_set = torch.tensor(factor_nn_input_set, dtype=torch.float32).to(self.device)
78 | label_set = torch.tensor(label_set, dtype=torch.float32).to(self.device)
79 |
80 | dataset = TensorDataset(beta_nn_input_set, factor_nn_input_set, label_set)
81 | return DataLoader(dataset, batch_size=1, shuffle=True)
82 |
83 |
84 | def forward(self, char, pfret):
85 | processed_char = self.beta_nn(char)
86 | processed_pfret = self.factor_nn(pfret)
87 | return torch.sum(processed_char * processed_pfret, dim=1)
88 |
89 |
90 | # train_one_epoch
91 | def __train_one_epoch(self):
92 | epoch_loss = 0.0
93 | for i, (beta_nn_input, factor_nn_input, labels) in enumerate(self.train_dataloader):
94 | self.optimizer.zero_grad()
95 | # beta_nn_input reshape: (1, 94, 94) -> (94, 94) (1*P*N => N*P)
96 | # factor_nn_input reshape: (1, 94, 1) -> (1, 94) (1*P*1 => 1*P)
97 | # labels reshape: (1, 94) -> (94, ) (1*N => N,)
98 | beta_nn_input = beta_nn_input.squeeze(0).T
99 | factor_nn_input = factor_nn_input.squeeze(0).T
100 | labels = labels.squeeze(0)
101 | output = self.forward(beta_nn_input, factor_nn_input)
102 | loss = self.criterion(output, labels)
103 |
104 | loss.backward()
105 | self.optimizer.step()
106 | epoch_loss += loss.item()
107 |
108 | if i % 100 == 0:
109 | # print(f'Batches: {i}, loss: {loss.item()}')
110 | pass
111 |
112 | return epoch_loss / len(self.train_dataloader)
113 |
114 |
115 | def __valid_one_epoch(self):
116 | epoch_loss = 0.0
117 | for i, (beta_nn_input, factor_nn_input, labels) in enumerate(self.valid_dataloader):
118 | # beta_nn_input reshape: (1, 94, 94) -> (94, 94) (1*P*N => N*P)
119 | # factor_nn_input reshape: (1, 94, 1) -> (1, 94) (1*P*1 => 1*P)
120 | # labels reshape: (1, 94) -> (94, ) (1*N => N,)
121 | beta_nn_input = beta_nn_input.squeeze(0).T
122 | factor_nn_input = factor_nn_input.squeeze(0).T
123 | labels = labels.squeeze(0)
124 |
125 | output = self.forward(beta_nn_input, factor_nn_input)
126 | loss = self.criterion(output, labels)
127 | epoch_loss += loss.item()
128 |
129 | return epoch_loss / len(self.valid_dataloader)
130 |
131 |
132 | def train_model(self):
133 | if 'saved_models' not in os.listdir('./'):
134 | os.mkdir('saved_models')
135 |
136 | self.train_dataloader = self.dataloader(self.train_period)
137 | self.valid_dataloader = self.dataloader(self.valid_period)
138 | self.test_dataloader = self.dataloader(self.test_period)
139 |
140 | min_error = np.Inf
141 | no_update_steps = 0
142 | valid_loss = []
143 | train_loss = []
144 | for i in range(MAX_EPOCH):
145 | # print(f'Epoch {i}')
146 | self.train()
147 | train_error = self.__train_one_epoch()
148 | train_loss.append(train_error)
149 |
150 | self.eval()
151 | # valid and early stop
152 | with torch.no_grad():
153 | valid_error = self.__valid_one_epoch()
154 |
155 | valid_loss.append(valid_error)
156 | if valid_error < min_error:
157 | min_error = valid_error
158 | no_update_steps = 0
159 | # save model
160 | torch.save(self.state_dict(), f'./saved_models/{self.name}.pt')
161 | else:
162 | no_update_steps += 1
163 |
164 | if no_update_steps > 2: # early stop, if consecutive 3 epoches no improvement on validation set
165 | print(f'Early stop at epoch {i}')
166 | break
167 | # load from (best) saved model
168 | self.load_state_dict(torch.load(f'./saved_models/{self.name}.pt'))
169 | return train_loss, valid_loss
170 |
171 |
172 | def test_model(self):
173 | # beta, factor, label = self.test_dataset
174 | # i = np.random.randint(len(beta))
175 | # beta_nn_input = beta[i]
176 | # factor_nn_input = factor[i]
177 | # labels = label[i]
178 | output = None
179 | label = None
180 | for i, beta_nn_input, factor_nn_input, labels in enumerate(self.test_dataloader):
181 | # convert to tensor
182 | # beta_nn_input = torch.tensor(beta_nn_input, dtype=torch.float32).T.to(self.device)
183 | # factor_nn_input = torch.tensor(factor_nn_input, dtype=torch.float32).T.to(self.device)
184 | # labels = torch.tensor(labels, dtype=torch.float32).T.to(self.device)
185 | output = self.forward(beta_nn_input, factor_nn_input)
186 | break
187 |
188 | loss = self.criterion(output, labels)
189 | print(f'Test loss: {loss.item()}')
190 | print(f'Predicted: {output}')
191 | print(f'Ground truth: {labels}')
192 | return output, labels
193 |
194 |
195 | def calBeta(self, month, skip_char=[]):
196 | _, beta_nn_input, _, _ = self._get_item(month) # beta input: 94*94 = P*N
197 |
198 | # if some variables need be omitted
199 | if len(skip_char):
200 | beta_nn_input = pd.DataFrame(beta_nn_input.T, columns=CHARAS_LIST) # N*P
201 | beta_nn_input[skip_char] = beta_nn_input[skip_char] * 0.0
202 | beta_nn_input = beta_nn_input.values.T # P*N
203 |
204 | beta_nn_input = torch.tensor(beta_nn_input, dtype=torch.float32).T.to(self.device) # N*P
205 | return self.beta_nn(beta_nn_input) # N*K
206 |
207 |
208 | def calFactor(self, month, skip_char=[]):
209 | _, _, factor_nn_input, _ = self._get_item(month) # factor input: P*1
210 |
211 | # if some variables need be omitted
212 | if len(skip_char):
213 | factor_nn_input = pd.DataFrame(factor_nn_input.T, columns=CHARAS_LIST) # 1*P
214 | factor_nn_input[skip_char] = factor_nn_input[skip_char] * 0.0
215 | factor_nn_input = factor_nn_input.values.T # P*1
216 |
217 | factor_nn_input = torch.tensor(factor_nn_input, dtype=torch.float32).T.to(self.device) # 1*P
218 | factor_pred = self.factor_nn(factor_nn_input).T # K*1
219 |
220 | self.factor_nn_pred.append(factor_pred)
221 |
222 | return factor_pred # K*1
223 |
224 |
225 | def inference(self, month):
226 | if len(self.omit_char) == 0:
227 | assert month >= self.test_period[0], f"Month error, {month} is not in test period {self.test_period}"
228 |
229 | mon_factor, mon_beta = self.calFactor(month), self.calBeta(month)
230 |
231 | assert mon_beta.shape[1] == mon_factor.shape[0], f"Dimension mismatch between mon_factor: {mon_factor.shape} and mon_beta: {mon_beta.shape}"
232 |
233 | # R_{N*1} = Beta_{N*K} @ F_{K*1}
234 | return mon_beta @ mon_factor
235 | else:
236 | ret_R = []
237 | for char in self.omit_char:
238 | mon_factor, mon_beta = self.calFactor(month, [char]), self.calBeta(month, [char])
239 | ret_R.append((mon_beta @ mon_factor).cpu().detach().numpy()) # N*1
240 |
241 | mon_factor, mon_beta = self.calFactor(month), self.calBeta(month)
242 | ret_R.append((mon_beta @ mon_factor).cpu().detach().numpy()) # also add complete result
243 |
244 | return np.array(ret_R).squeeze(2).T # N*m
245 |
246 |
247 | def cal_delayed_Factor(self, month):
248 | # calculate the last day of the previous month
249 | if self.refit_cnt == 0:
250 | avg_f_pred = self.factor_nn_pred[0] # input of the first predict take hat{f}_t
251 | # print(avg_f_pred.shape)
252 | else:
253 | avg_f_pred = torch.mean(torch.stack(self.factor_nn_pred[:self.refit_cnt]), dim=0)
254 |
255 | return avg_f_pred
256 |
257 |
258 | def reset_weight(self):
259 | for layer in self.beta_nn: # reset beta_nn parameters
260 | if hasattr(layer, 'reset_parameters'):
261 | layer.reset_parameters()
262 |
263 | for layer in self.factor_nn: # reset factor_nn parameters
264 | if hasattr(layer, 'reset_parameters'):
265 | layer.reset_parameters()
266 |
267 | self.optimizer.state = collections.defaultdict(dict) # reset optimizer state
268 |
269 |
270 | def release_gpu(self):
271 | if self.train_dataloader is not None:
272 | del self.train_dataloader
273 | if self.valid_dataloader is not None:
274 | del self.valid_dataloader
275 | if self.test_dataloader is not None:
276 | del self.test_dataloader
277 | torch.cuda.empty_cache()
278 |
279 |
280 |
281 | class CA0(CA_base):
282 | def __init__(self, hidden_size, lr=0.001, omit_char=[], device='cuda'):
283 | CA_base.__init__(self, name=f'CA0_{hidden_size}', omit_char=omit_char, device=device)
284 | # P -> K
285 | self.beta_nn = nn.Sequential(
286 | # output layer
287 | nn.Linear(94, hidden_size)
288 | )
289 | self.factor_nn = nn.Sequential(
290 | nn.Linear(94, hidden_size)
291 | )
292 |
293 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
294 | self.criterion = nn.MSELoss().to(device)
295 |
296 |
297 |
298 | class CA1(CA_base):
299 | def __init__(self, hidden_size, dropout=0.5, lr=0.001, omit_char=[], device='cuda'):
300 | CA_base.__init__(self, name=f'CA1_{hidden_size}', omit_char=omit_char, device=device)
301 | self.dropout = dropout
302 | # P -> 32 -> K
303 | self.beta_nn = nn.Sequential(
304 | # hidden layer 1
305 | nn.Linear(94, 32),
306 | nn.BatchNorm1d(32),
307 | nn.ReLU(),
308 | nn.Dropout(self.dropout),
309 | # output layer
310 | nn.Linear(32, hidden_size)
311 | )
312 | self.factor_nn = nn.Sequential(
313 | nn.Linear(94, hidden_size)
314 | )
315 |
316 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
317 | self.criterion = nn.MSELoss().to(device)
318 |
319 |
320 |
321 | class CA2(CA_base):
322 | def __init__(self, hidden_size, dropout=0.5, lr=0.001, omit_char=[], device='cuda'):
323 | CA_base.__init__(self, name=f'CA2_{hidden_size}', omit_char=omit_char, device=device)
324 | self.dropout = dropout
325 | # P -> 32 -> 16 -> K
326 | self.beta_nn = nn.Sequential(
327 | # hidden layer 1
328 | nn.Linear(94, 32),
329 | nn.BatchNorm1d(32),
330 | nn.ReLU(),
331 | nn.Dropout(self.dropout),
332 | # hidden layer 2
333 | nn.Linear(32, 16),
334 | nn.BatchNorm1d(16),
335 | nn.ReLU(),
336 | nn.Dropout(self.dropout),
337 | # output layer
338 | nn.Linear(16, hidden_size)
339 | )
340 | self.factor_nn = nn.Sequential(
341 | nn.Linear(94, hidden_size)
342 | )
343 |
344 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
345 | self.criterion = nn.MSELoss().to(device)
346 |
347 |
348 |
349 | class CA3(CA_base):
350 | def __init__(self, hidden_size, dropout=0.5, lr=0.001, omit_char=[], device='cuda'):
351 | CA_base.__init__(self, name=f'CA3_{hidden_size}', omit_char=omit_char, device=device)
352 | self.dropout = dropout
353 | # P -> 32 -> 16 -> 8 -> K
354 | self.beta_nn = nn.Sequential(
355 | # hidden layer 1
356 | nn.Linear(94, 32),
357 | nn.BatchNorm1d(32),
358 | nn.ReLU(),
359 | nn.Dropout(self.dropout),
360 | # hidden layer 2
361 | nn.Linear(32, 16),
362 | nn.BatchNorm1d(16),
363 | nn.ReLU(),
364 | nn.Dropout(self.dropout),
365 | # hidden layer 3
366 | nn.Linear(16, 8),
367 | nn.BatchNorm1d(8),
368 | nn.ReLU(),
369 | nn.Dropout(self.dropout),
370 | # output layer
371 | nn.Linear(8, hidden_size)
372 | )
373 | self.factor_nn = nn.Sequential(
374 | nn.Linear(94, hidden_size)
375 | )
376 |
377 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=0.01)
378 | self.criterion = nn.MSELoss().to(device)
--------------------------------------------------------------------------------
/models/FF.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('../')
3 |
4 | from utils import CHARAS_LIST
5 | from .modelBase import modelBase
6 |
7 | import pandas as pd
8 | import statsmodels.api as sm
9 | from dateutil.relativedelta import relativedelta
10 |
11 |
12 | class FF(modelBase):
13 | def __init__(self, K):
14 | super(FF, self).__init__(f'FF_{K}')
15 | self.K = K
16 | self.train_period[0] = 19630731 # ff5 data from FF website is only available from 196307
17 | self.omit_char = []
18 | self.__prepare_FFf()
19 |
20 |
21 | def __prepare_FFf(self):
22 | ff5 = pd.read_csv('data/ff5.csv', index_col=0)
23 | UMD = pd.read_csv('data/UMD.csv', index_col=0)
24 | UMD.columns = ['UMD']
25 | FFf = pd.concat([ff5, UMD.loc[196307:]], axis=1)
26 | self.fname = ['Mkt-RF', 'SMB', 'HML', 'CMA', 'RMW', 'UMD']
27 | self.FFf = FFf[self.fname]
28 | self.portfolio_ret = pd.read_pickle('data/portfolio_ret.pkl')
29 | self.portfolio_ret['DATE'] = self.portfolio_ret['DATE'].apply(lambda x: x//100)
30 |
31 |
32 | def train_model(self):
33 | self.beta_matrix = []
34 | X = self.FFf[self.fname[:self.K]].loc[self.train_period[0]//100:self.train_period[1]//100]
35 | for col in CHARAS_LIST:
36 | y = self.portfolio_ret.set_index('DATE')[col].loc[self.train_period[0]//100:self.train_period[1]//100]
37 | model = sm.OLS(y.values, X.values).fit()
38 | self.beta_matrix.append(model.params)
39 | self.beta_matrix = pd.DataFrame(self.beta_matrix, columns=self.fname[:self.K], index=CHARAS_LIST)
40 |
41 |
42 | def calBeta(self, month): # beta is time invariant
43 | return self.beta_matrix # N * K
44 |
45 |
46 | def calFactor(self, month):
47 | return self.FFf[self.fname[:self.K]].loc[month//100] # K * 1
48 |
49 |
50 | def cal_delayed_Factor(self, month):
51 | last_mon = int(str(pd.to_datetime(str(month)) - relativedelta(months=1)).split(' ')[0].replace('-', '')[:-2])
52 | # return average of prevailing sample hat{f} (from 198701) up to t-1
53 | return self.FFf[self.fname[:self.K]].loc[198701:last_mon].mean()
54 |
55 |
--------------------------------------------------------------------------------
/models/IPCA.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | import sys
5 | sys.path.append('../')
6 |
7 | from utils import *
8 | from .modelBase import modelBase
9 |
10 |
11 | class IPCA(modelBase):
12 | def __init__(self, K, omit_char=[]):
13 | super(IPCA, self).__init__(f'IPCA_{K}')
14 | self.K = K
15 | self.omit_char = omit_char
16 | np.random.seed(10)
17 | self.gamma = np.random.random([94, self.K]) # P = 94, we have total 94 characteristics
18 | self.valid_error = []
19 | self.__prepare_data()
20 |
21 |
22 | def __prepare_data(self):
23 | self.portfolio_ret = pd.read_pickle('data/portfolio_ret.pkl')
24 | self.p_charas = pd.read_pickle('data/p_charas.pkl')
25 | self.mon_list = pd.read_pickle('data/mon_list.pkl')
26 |
27 |
28 | def __valid(self):
29 | MSE_set = []
30 | for mon in self.mon_list[(self.mon_list >= self.valid_period[0]) & (self.mon_list <= self.valid_period[1])]:
31 | Z = self.p_charas.loc[self.p_charas.DATE == mon][CHARAS_LIST].values # N * P
32 | y = self.portfolio_ret.loc[self.portfolio_ret.DATE == mon][CHARAS_LIST].values.T # N * 1
33 | beta = Z @ self.gamma # N * K
34 | f_hat = np.array(np.matrix(beta.T @ beta).I @ beta.T @ y) # K * 1
35 | residual = y - beta @ f_hat
36 | MSE = np.sum(residual**2)
37 | MSE_set.append(MSE)
38 |
39 | valid_error = sum(MSE_set)
40 | self.valid_error.append(valid_error)
41 |
42 | return valid_error
43 |
44 |
45 | def __gamma_iter(self, gamma_old):
46 | numer = np.zeros((94*self.K, 1))
47 | denom = np.zeros((94*self.K, 94*self.K))
48 | for mon in self.mon_list[(self.mon_list >= self.train_period[0]) & (self.mon_list <= self.train_period[1])]:
49 | Z = self.p_charas.loc[self.p_charas.DATE == mon][CHARAS_LIST].values # N * P
50 | y = self.portfolio_ret.loc[self.portfolio_ret.DATE == mon][CHARAS_LIST].values.T # N * 1
51 | beta = Z @ gamma_old # N * K
52 | f_hat = np.array(np.matrix(beta.T @ beta).I @ beta.T @ y) # K * 1
53 | numer += (np.kron(f_hat, Z.T) @ y)
54 | denom += (np.kron(f_hat, Z.T) @ np.kron(f_hat.T, Z))
55 |
56 | gamma_new = (np.linalg.pinv(denom) @ numer).reshape(self.K, 94)
57 | gamma_new = gamma_new.T
58 |
59 | return gamma_new
60 |
61 |
62 | def train_model(self):
63 | update_cnt = 0
64 | min_valid_err = np.Inf
65 | best_gamma = np.zeros((94, self.K))
66 | while update_cnt < 5:
67 | self.gamma = self.__gamma_iter(self.gamma)
68 | valid_error = self.__valid()
69 | if valid_error < min_valid_err:
70 | min_valid_err = valid_error
71 | best_gamma = self.gamma
72 | update_cnt = 0
73 | else:
74 | update_cnt += 1
75 |
76 | self.gamma = best_gamma
77 |
78 |
79 | def inference(self, month):
80 | if not len(self.omit_char):
81 | Z = self.p_charas.loc[self.p_charas.DATE == month][CHARAS_LIST].values # N * P
82 | y = self.portfolio_ret.loc[self.portfolio_ret.DATE == month][CHARAS_LIST].values.T # N * 1
83 | beta = Z @ self.gamma # N * K
84 | f_hat = np.array(np.matrix(beta.T @ beta).I @ beta.T @ y) # K * 1
85 | return (beta @ f_hat).flatten() # N, 1
86 | else:
87 | inference_R = []
88 | Z = self.p_charas.loc[self.p_charas.DATE == month][CHARAS_LIST].copy(deep=False)
89 | y = self.portfolio_ret.loc[self.portfolio_ret.DATE == month][CHARAS_LIST].copy(deep=False)
90 |
91 | for char in self.omit_char:
92 | Z_input = Z.copy(deep=False)
93 | y_input = y.copy(deep=False)
94 | Z_input[[char]] = Z_input[[char]] * 0.0
95 | y_input[[char]] = y_input[[char]] * 0.0
96 | Z_input = Z_input.values
97 | y_input = y_input.values.T
98 | beta = Z_input @ self.gamma
99 | f_hat = np.array(np.matrix(beta.T @ beta).I @ beta.T @ y_input) # K * 1
100 | inference_R.append((beta @ f_hat).flatten()) # m * N
101 |
102 | Z_input = Z.values
103 | y_input = y.values.T
104 | beta = Z_input @ self.gamma
105 | f_hat = np.array(np.matrix(beta.T @ beta).I @ beta.T @ y_input) # K * 1
106 | inference_R.append((beta @ f_hat).flatten()) # m * N
107 |
108 | return np.array(inference_R).T # N * m
109 |
110 |
111 | def predict(self, month):
112 | if self.refit_cnt == 0:
113 | return self.inference(month)
114 |
115 | lag_f_hat = []
116 | for mon in self.mon_list[(self.mon_list >= 19870101) & (self.mon_list < month)]:
117 | Z = self.p_charas.loc[self.p_charas.DATE == mon][CHARAS_LIST].values # N * P
118 | y = self.portfolio_ret.loc[self.portfolio_ret.DATE == mon][CHARAS_LIST].values.T # N * 1
119 | beta = Z @ self.gamma # N * K
120 | f_hat = np.array(np.matrix(beta.T @ beta).I @ beta.T @ y) # K * 1
121 | lag_f_hat.append(f_hat)
122 |
123 | Z = self.p_charas.loc[self.p_charas.DATE == month][CHARAS_LIST].values # N * P
124 | y = self.portfolio_ret.loc[self.portfolio_ret.DATE == month][CHARAS_LIST].values.T # N * 1
125 | beta = Z @ self.gamma # N * K
126 |
127 | # return average of prevailing sample hat{f} (from 198701) up to t-1
128 | avg_lag_f = np.mean(lag_f_hat, axis=0)
129 | return beta @ avg_lag_f
--------------------------------------------------------------------------------
/models/PCA.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('../')
3 |
4 | import pandas as pd
5 | import numpy as np
6 |
7 | from utils import *
8 | from .modelBase import modelBase
9 |
10 |
11 | def stock_R_matrix(start_date, end_date):
12 | R_matrix = pd.read_pickle('data/stock_R_matrix.pkl')
13 | return R_matrix.T.loc[start_date: end_date].T
14 |
15 | def portfolio_R_matrix(start_date, end_date):
16 | portfolio_ret = pd.read_pickle('data/portfolio_ret.pkl')
17 | return portfolio_ret.loc[(portfolio_ret['DATE'] >= start_date) & (portfolio_ret['DATE'] <= end_date)].set_index('DATE').T
18 |
19 |
20 |
21 | class PCA(modelBase):
22 | def __init__(self, K, omit_char=[]):
23 | super(PCA, self).__init__(f'PCA_{K}')
24 | self.K = K
25 | self.omit_char = omit_char
26 | self.portfolio_ret = pd.read_pickle('data/portfolio_ret.pkl')
27 |
28 | def train_model(self):
29 | pr = self.portfolio_ret.loc[(self.portfolio_ret.DATE >= self.train_period[0]) & (self.portfolio_ret.DATE <= self.train_period[1])][CHARAS_LIST].values
30 | pr = pr - np.mean(pr, axis=0) # col demean
31 | ret_cov_matrix = np.zeros((pr.shape[1], pr.shape[1]))
32 |
33 | for i in range(pr.shape[0]): # Sum of y^t @ y^t.T
34 | ret_cov_matrix += (pr[i, :].reshape(-1, 1) @ pr[i, :].reshape(-1, 1).T)
35 | ret_cov_matrix = ret_cov_matrix/(pr.shape[0]*pr.shape[1]) # N * N
36 |
37 | eigVal, eigVec = np.linalg.eig(ret_cov_matrix)
38 | sorted_indices = np.argsort(eigVal)
39 | self.beta = eigVec[:,sorted_indices[:-self.K-1:-1]] # Beta: N * K
40 |
41 |
42 | def calBeta(self, month):
43 | return np.real(self.beta)
44 |
45 |
46 | def calFactor(self, month):
47 | tr = self.portfolio_ret.loc[self.portfolio_ret.DATE <= month].iloc[-1][CHARAS_LIST].values
48 | tr = tr - np.mean(tr, axis=0) # col demean
49 | # print(tr)
50 | factor = np.array((np.matrix(self.beta.T @ self.beta).I @ self.beta.T) @ tr.T).T # K * 1
51 | return np.real(factor.flatten())
52 |
53 |
54 | def cal_delayed_Factor(self, month):
55 | if self.refit_cnt == 0:
56 | return self.calFactor(month)
57 |
58 | tr = self.portfolio_ret.loc[(self.portfolio_ret.DATE >= 19870101) & (self.portfolio_ret.DATE < month)][CHARAS_LIST].values
59 | tr = tr - np.mean(tr, axis=0) # col demean
60 |
61 | # return average of prevailing sample hat{f} (from 198701) up to t-1
62 | factors = []
63 | for i in range(tr.shape[0]):
64 | factors.append(np.array(np.matrix(self.beta.T @ self.beta).I @ self.beta.T @ tr[i, :]))
65 |
66 | factors = np.array(factors).squeeze(1).T # K * T
67 | avg_delay_f = np.mean(factors, axis=1).reshape(-1, 1) # K * 1
68 |
69 | return np.real(avg_delay_f.flatten())
70 |
--------------------------------------------------------------------------------
/models/modelBase.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import datetime
4 | from dateutil.relativedelta import relativedelta
5 |
6 | class modelBase:
7 | def __init__(self, name):
8 | self.name = name
9 | self.train_idx = 0
10 | self.refit_cnt = 0
11 |
12 | # initial train, valid and test periods are default accroding to original paper
13 | self.train_period = [19570101, 19741231]
14 | self.valid_period = [19750101, 19861231]
15 | self.test_period = [19870101, 19871231]
16 |
17 |
18 | def train_model(self):
19 | # print('trained')
20 | pass
21 |
22 |
23 | def calBeta(self, month):
24 | """
25 | Calculate specific month's beta. Should be specified by different models
26 | -> return np.array, dim = (N, K)
27 | """
28 | # return np.zeros([13000, 3])
29 | pass
30 |
31 |
32 | def calFactor(self, month):
33 | """
34 | Calculate specific month's factor. Should be specified by different models
35 | -> return np.array, dim = (K, 1)
36 | """
37 | # return np.zeros([3, 1])
38 | pass
39 |
40 |
41 | def cal_delayed_Factor(self, month):
42 | """
43 | Calculate delayed month's factor, i.e. mean average of factors up to t-1. Should be specified by different models
44 | -> return np.array, dim = (K, 1)
45 | """
46 | pass
47 |
48 |
49 | def inference(self, month):
50 | assert month >= self.test_period[0], f"Month error, {month} is not in test period {self.test_period}"
51 |
52 | mon_factor, mon_beta = self.calFactor(month), self.calBeta(month)
53 |
54 | assert mon_beta.shape[1] == mon_factor.shape[0], f"Dimension mismatch between mon_factor: {mon_factor.shape} and mon_beta: {mon_beta.shape}"
55 |
56 | # R_{N*1} = Beta_{N*K} @ F_{K*1}
57 | return mon_beta @ mon_factor
58 |
59 |
60 | def predict(self, month):
61 | assert month >= self.test_period[0] and month <= self.test_period[1], f"Month error, {month} is not in test period {self.test_period}"
62 |
63 | lag_factor, mon_beta = self.cal_delayed_Factor(month), self.calBeta(month)
64 |
65 | assert mon_beta.shape[1] == lag_factor.shape[0], f"Dimension mismatch between lag_factor: {lag_factor.shape} and mon_beta: {mon_beta.shape}"
66 |
67 | # R_{N*1} = Beta_{N*K} @ lag_F_avg{K*1}
68 | return mon_beta @ lag_factor
69 |
70 |
71 | def refit(self):
72 | # self.train_period[1] += 10000 # method in original paper: increase training size by one year each time refit
73 | self.train_period = (pd.Series(self.train_period) + 10000).to_list() # rolling training
74 | self.valid_period = (pd.Series(self.valid_period) + 10000).to_list()
75 | self.test_period = (pd.Series(self.test_period) + 10000).to_list()
76 | self.refit_cnt += 1
77 |
78 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 |
4 | # # stock-level characteristics with index corresponding to original paper
5 | # annual_chara = {
6 | # 'absacc': 1, 'acc': 2, 'age': 4, 'agr': 5, 'bm': 9,
7 | # 'bm_ia': 10, 'cashdebt': 12, 'cashpr': 13, 'cfp': 14, 'cfp_ia': 15,
8 | # 'chatoia': 16, 'chcsho': 17, 'chempia': 18, 'chinv': 19, 'chpmia': 21,
9 | # 'convind': 24, 'currat': 25, 'depr': 26, 'divi': 27, 'divo': 28,
10 | # 'dy': 30, 'egr': 32, 'ep': 33, 'gma': 34, 'grcapx': 35,
11 | # 'grltnoa': 36, 'herf': 37, 'hire': 38, 'invest': 42, 'lev': 43,
12 | # 'lgr': 44, 'mve_ia': 52, 'operprof': 54, 'orgcap': 55, 'pchcapx_ia': 56,
13 | # 'pchcurrat': 57, 'pchdepr': 58, 'pchgm_pchsale': 59, 'pchquick': 60, 'pchsale_pchinvt': 61,
14 | # 'pchsale_pchrect': 62, 'pchsale_pchxsga': 63, 'pchsaleinv': 64, 'pctacc': 65, 'ps': 67,
15 | # 'quick': 68, 'rd': 69, 'rd_mve': 70, 'rd_sale': 71, 'realestate': 72,
16 | # 'roic': 77, 'salecash': 79, 'saleinv': 80, 'salerec': 81, 'secured': 82,
17 | # 'securedind': 83, 'sgr': 84, 'sin': 85, 'sp': 86, 'tang': 91, 'tb': 92
18 | # }
19 |
20 | # quarter_chara = {
21 | # 'aeavol': 3, 'cash': 11, 'chtx': 22, 'cinvest': 23,
22 | # 'ear': 31, 'ms': 50, 'nincr': 53, 'roaq': 74,
23 | # 'roavol': 75, 'roeq': 76, 'rsup': 78, 'stdacc': 89, 'stdcf': 90
24 | # }
25 |
26 | # month_chara = {
27 | # 'baspread': 6, 'beta': 7, 'betasq': 8, 'chmom': 20,
28 | # 'dolvol': 29, 'idiovol': 39, 'ill': 40, 'indmom': 41,
29 | # 'maxret': 45, 'mom12m': 46, 'mom1m': 47, 'mom36m': 48,
30 | # 'mom6m': 49, 'mvel1': 51, 'pricedelay': 66, 'retvol': 73,
31 | # 'std_dolvol': 87, 'std_turn': 88, 'turn': 93, 'zerotrade': 94
32 | # }
33 |
34 | CHARAS_LIST = ['absacc','acc','age','agr','bm','bm_ia','cashdebt','cashpr','cfp','cfp_ia','chatoia','chcsho','chempia','chinv','chpmia','convind','currat','depr','divi','divo','dy','egr','ep','gma','grcapx','grltnoa','herf','hire','invest','lev','lgr','mve_ia','operprof','orgcap','pchcapx_ia','pchcurrat','pchdepr','pchgm_pchsale','pchquick','pchsale_pchinvt','pchsale_pchrect','pchsale_pchxsga','pchsaleinv','pctacc','ps','quick','rd','rd_mve','rd_sale','realestate','roic','salecash','saleinv','salerec','secured','securedind','sgr','sin','sp','tang','tb','aeavol','cash','chtx','cinvest','ear','ms','nincr','roaq','roavol','roeq','rsup','stdacc','stdcf','baspread','beta','betasq','chmom','dolvol','idiovol','ill','indmom','maxret','mom12m','mom1m','mom36m','mom6m','mvel1','pricedelay','retvol','std_dolvol','std_turn','turn','zerotrade']
35 |
36 |
37 | # default learning rate of CA model
38 | CA_DR = 0.5 # drop out rate
39 | CA_LR = 0.001 # learning rate
40 |
41 | # out of sample period
42 | OOS_start = 19870101
43 | OOS_end = 20161231
44 |
45 |
46 |
47 | class HiddenPrints:
48 | def __init__(self, activated=True):
49 | self.activated = activated
50 | self.original_stdout = None
51 |
52 | def open(self):
53 | sys.stdout.close()
54 | sys.stdout = self.original_stdout
55 |
56 | def close(self):
57 | self.original_stdout = sys.stdout
58 | sys.stdout = open(os.devnull, 'w')
59 |
60 | def __enter__(self):
61 | if self.activated:
62 | self.close()
63 |
64 | def __exit__(self, exc_type, exc_val, exc_tb):
65 | if self.activated:
66 | self.open()
67 |
68 |
69 |
70 | def git_push(message):
71 | os.system('git add results')
72 | os.system(f'git commit -m "no_dropout: {message}"')
73 | os.system('git push')
--------------------------------------------------------------------------------