├── .gitignore ├── contributing.md ├── cover.png ├── data ├── SPY.csv ├── alternative_data │ ├── AWU.csv │ ├── AXC.csv │ ├── BGN.csv │ ├── BMG.csv │ ├── CHWK.csv │ ├── CUU.csv │ ├── DLVY.csv │ ├── DVRL.csv │ ├── EHH.csv │ ├── EUZ.csv │ ├── EXY.csv │ ├── EZVY.csv │ ├── FJKV.csv │ ├── FJZC.csv │ ├── FRCE.csv │ ├── FSVO.csv │ ├── HECP.csv │ ├── HER.csv │ ├── HESC.csv │ ├── HRVC.csv │ ├── HXX.csv │ ├── IEZI.csv │ ├── IOV.csv │ ├── IRX.csv │ ├── IYPP.csv │ ├── JMS.csv │ ├── JWDV.csv │ ├── JXAN.csv │ ├── KEQ.csv │ ├── KER.csv │ ├── KGO.csv │ ├── KSR.csv │ ├── KTVL.csv │ ├── KUAQ.csv │ ├── LJGB.csv │ ├── LMLK.csv │ ├── MEF.csv │ ├── MHNG.csv │ ├── MTZB.csv │ ├── NDQC.csv │ ├── NEGH.csv │ ├── NKP.csv │ ├── NSLG.csv │ ├── NZSR.csv │ ├── OBAK.csv │ ├── OCG.csv │ ├── OKK.csv │ ├── OLE.csv │ ├── OWKQ.csv │ ├── OZI.csv │ ├── OZMT.csv │ ├── PDX.csv │ ├── PQCE.csv │ ├── PSWA.csv │ ├── PUO.csv │ ├── PYTC.csv │ ├── QEBK.csv │ ├── RALO.csv │ ├── RLZA.csv │ ├── RPEZ.csv │ ├── RQG.csv │ ├── RVWV.csv │ ├── RZW.csv │ ├── SEBI.csv │ ├── SGVQ.csv │ ├── SIP.csv │ ├── SLRR.csv │ ├── SXQ.csv │ ├── TGXX.csv │ ├── TQKA.csv │ ├── TRE.csv │ ├── TUMH.csv │ ├── UAIG.csv │ ├── UFT.csv │ ├── UTQ.csv │ ├── UZS.csv │ ├── VBB.csv │ ├── VCXW.csv │ ├── VDHJ.csv │ ├── VHE.csv │ ├── VLOZ.csv │ ├── WFS.csv │ ├── WHMG.csv │ ├── WNE.csv │ ├── WOXI.csv │ ├── XAU.csv │ ├── XBN.csv │ ├── XJJI.csv │ ├── XSOQ.csv │ ├── XWR.csv │ ├── XYCJ.csv │ ├── XZFM.csv │ ├── YPDA.csv │ ├── ZEA.csv │ ├── ZGL.csv │ ├── ZOE.csv │ ├── ZWH.csv │ ├── ZWNG.csv │ ├── ZXGV.csv │ └── ZZQB.csv └── eod │ ├── AWU.csv │ ├── AXC.csv │ ├── BGN.csv │ ├── BMG.csv │ ├── CHWK.csv │ ├── CUU.csv │ ├── DLVY.csv │ ├── DVRL.csv │ ├── EHH.csv │ ├── EUZ.csv │ ├── EXY.csv │ ├── EZVY.csv │ ├── FJKV.csv │ ├── FJZC.csv │ ├── FRCE.csv │ ├── FSVO.csv │ ├── HECP.csv │ ├── HER.csv │ ├── HESC.csv │ ├── HRVC.csv │ ├── HXX.csv │ ├── IEZI.csv │ ├── IOV.csv │ ├── IRX.csv │ ├── IYPP.csv │ ├── JMS.csv │ ├── JWDV.csv │ ├── JXAN.csv │ ├── KEQ.csv │ ├── KER.csv │ ├── KGO.csv │ ├── KSR.csv │ ├── KTVL.csv │ ├── KUAQ.csv │ ├── LJGB.csv │ ├── LMLK.csv │ ├── MEF.csv │ ├── MHNG.csv │ ├── MTZB.csv │ ├── NDQC.csv │ ├── NEGH.csv │ ├── NKP.csv │ ├── NSLG.csv │ ├── NZSR.csv │ ├── OBAK.csv │ ├── OCG.csv │ ├── OKK.csv │ ├── OLE.csv │ ├── OWKQ.csv │ ├── OZI.csv │ ├── OZMT.csv │ ├── PDX.csv │ ├── PQCE.csv │ ├── PSWA.csv │ ├── PUO.csv │ ├── PYTC.csv │ ├── QEBK.csv │ ├── RALO.csv │ ├── RLZA.csv │ ├── RPEZ.csv │ ├── RQG.csv │ ├── RVWV.csv │ ├── RZW.csv │ ├── SEBI.csv │ ├── SGVQ.csv │ ├── SIP.csv │ ├── SLRR.csv │ ├── SXQ.csv │ ├── TGXX.csv │ ├── TQKA.csv │ ├── TRE.csv │ ├── TUMH.csv │ ├── UAIG.csv │ ├── UFT.csv │ ├── UTQ.csv │ ├── UZS.csv │ ├── VBB.csv │ ├── VCXW.csv │ ├── VDHJ.csv │ ├── VHE.csv │ ├── VLOZ.csv │ ├── WFS.csv │ ├── WHMG.csv │ ├── WNE.csv │ ├── WOXI.csv │ ├── XAU.csv │ ├── XBN.csv │ ├── XJJI.csv │ ├── XSOQ.csv │ ├── XWR.csv │ ├── XYCJ.csv │ ├── XZFM.csv │ ├── YPDA.csv │ ├── ZEA.csv │ ├── ZGL.csv │ ├── ZOE.csv │ ├── ZWH.csv │ ├── ZWNG.csv │ ├── ZXGV.csv │ └── ZZQB.csv ├── license.txt ├── listings ├── chapter_1 │ ├── 1_1_type_hinting_examples.py │ ├── 1_2_pandas_data_types.py │ ├── 1_3_pandas_data_types_part_2.py │ └── 1_4_pandas_indexes.py ├── chapter_2 │ ├── 2_10_maximum_drawdown.py │ ├── 2_11_maximum_drawdown_with_metadata.py │ ├── 2_12_log_max_drawdown_ratio.py │ ├── 2_13_calmar_ratio.py │ ├── 2_14_pure_profit_score.py │ ├── 2_15_jensens_alpha.py │ ├── 2_1_return_series_pure_python.py │ ├── 2_2_return_series_pandas.py │ ├── 2_3_log_return_series.py │ ├── 2_4_annualized_volatility.py │ ├── 2_5_annualized_volatility_on_awu.py │ ├── 2_6_calculating_cagr.py │ ├── 2_7_calculating_cagr_on_awu.py │ ├── 2_8_calculating_sharpe_ratio.py │ └── 2_9_calculating_downside_volatility.py ├── chapter_3 │ ├── 3_1_calculate_simple_moving_average.py │ ├── 3_2_slow_simple_moving_average.py │ ├── 3_3_fast_simple_moving_average.py │ ├── 3_4_calculating_macd.py │ ├── 3_5_calculate_bollinger_bands.py │ ├── 3_6_calculate_chaikin_money_flow.py │ └── 3_7_example_signals.py ├── chapter_4 │ ├── 4_1_assertions_example.py │ ├── 4_2_position_class.py │ ├── 4_3_position_object_usage.py │ ├── 4_4_portfolio_history_class.py │ ├── 4_5_portfolio_history_usage.py │ ├── 4_6_simple_simulator_class.py │ └── 4_7_simple_simulator_usage.py ├── chapter_5 │ ├── 5_1_grid_search_optimizer.py │ ├── 5_2_grid_search_example.py │ ├── 5_3_white_noise_preference_matrix.py │ └── 5_4_bootstrap_simulated_preference_matrix.py ├── chapter_6 │ ├── 6_1_loading_alternative_data.py │ └── 6_2_exploratory_analysis.py └── chapter_7 │ ├── 7_1_symmetric_cusum_filter_on_revenue.py │ ├── 7_2_computing_triple_barrier_labels.py │ ├── 7_3_computing_average_uniqueness.py │ ├── 7_4_computing_features.py │ ├── 7_5_modeling_and_cross_validation.py │ ├── 7_6_machine_learning_pipeline.py │ └── 7_7_simulation_with_machine_learning_model.py ├── readme.md └── src ├── __init__.py ├── bootstrap_portfolio.py ├── fit_alternative_data_model.py ├── optimize_portfolio.py ├── pypm ├── __init__.py ├── data_io.py ├── filters.py ├── indicators.py ├── labels.py ├── metrics.py ├── ml_model │ ├── __init__.py │ ├── data_io.py │ ├── events.py │ ├── features.py │ ├── labels.py │ ├── model.py │ ├── signals.py │ └── weights.py ├── optimization.py ├── portfolio.py ├── signals.py ├── simulation.py └── weights.py ├── simulate_alternative_data_portfolio.py ├── simulate_portfolio.py └── white_noise_portfolio.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .Rproj.user 131 | -------------------------------------------------------------------------------- /contributing.md: -------------------------------------------------------------------------------- 1 | 2 | ## Contributing Guidelines 3 | 4 | Contributions are encouraged through pull requests for minor changes, fixes, and improvements that do not materially change the content as it corresponds to the book. 5 | 6 | Major modifications and improvements are encouraged via forks, but will not be pulled into this repository. 7 | 8 | -------------------------------------------------------------------------------- /cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlan/algorithmic-trading-with-python/ebe01087c7d9172db72bc3c9adc1eee5e882ac49/cover.png -------------------------------------------------------------------------------- /data/alternative_data/HRVC.csv: -------------------------------------------------------------------------------- 1 | date,value 2 | 2017-10-02,199742.0 3 | 2017-10-03,212548.0 4 | 2017-10-04,226800.0 5 | 2017-10-05,228232.0 6 | 2017-10-06,198208.0 7 | 2017-10-09,215753.0 8 | 2017-10-10,194550.0 9 | 2017-10-11,189900.0 10 | 2017-10-12,217522.0 11 | 2017-10-13,205112.0 12 | 2017-10-16,193099.0 13 | 2017-10-17,219615.0 14 | 2017-10-18,209341.0 15 | 2017-10-19,190206.0 16 | 2017-10-20,183482.0 17 | 2017-10-23,169123.0 18 | 2017-10-24,210535.0 19 | 2017-10-25,193737.0 20 | 2017-10-26,185285.0 21 | 2017-10-27,179315.0 22 | 2017-10-30,214524.0 23 | 2017-10-31,219727.0 24 | 2017-11-01,206819.0 25 | 2017-11-02,208283.0 26 | 2017-11-03,153447.0 27 | 2017-11-06,221032.0 28 | 2017-11-07,208531.0 29 | 2017-11-08,208685.0 30 | 2017-11-09,212152.0 31 | 2017-11-10,191342.0 32 | 2017-11-13,250969.0 33 | 2017-11-14,179738.0 34 | 2017-11-15,193917.0 35 | 2017-11-16,173358.0 36 | 2017-11-17,184790.0 37 | 2017-11-20,195448.0 38 | 2017-11-21,226394.0 39 | 2017-11-22,207083.0 40 | 2017-11-24,209734.0 41 | 2017-11-27,230518.0 42 | 2017-11-28,198218.0 43 | 2017-11-29,198546.0 44 | 2017-11-30,179337.0 45 | 2017-12-01,196046.0 46 | 2017-12-04,239066.0 47 | 2017-12-05,216157.0 48 | 2017-12-06,227464.0 49 | 2017-12-07,215148.0 50 | 2017-12-08,212035.0 51 | 2017-12-11,216586.0 52 | 2017-12-12,202155.0 53 | 2017-12-13,238222.0 54 | 2017-12-14,206124.0 55 | 2017-12-15,204381.0 56 | 2017-12-18,157521.0 57 | 2017-12-19,167363.0 58 | 2017-12-20,219725.0 59 | 2017-12-21,238015.0 60 | 2017-12-22,225680.0 61 | 2017-12-26,206946.0 62 | 2017-12-27,198663.0 63 | 2017-12-28,220129.0 64 | 2017-12-29,203207.0 65 | 2018-01-02,243368.0 66 | 2018-01-03,214976.0 67 | 2018-01-04,178153.0 68 | 2018-01-05,196032.0 69 | 2018-01-08,205801.0 70 | 2018-01-09,179226.0 71 | 2018-01-10,217716.0 72 | 2018-01-11,220956.0 73 | 2018-01-12,214345.0 74 | 2018-01-16,211830.0 75 | 2018-01-17,211417.0 76 | 2018-01-18,189561.0 77 | 2018-01-19,221302.0 78 | 2018-01-22,197326.0 79 | 2018-01-23,200280.0 80 | 2018-01-24,175559.0 81 | 2018-01-25,220274.0 82 | 2018-01-26,178883.0 83 | 2018-01-29,186071.0 84 | 2018-01-30,230154.0 85 | 2018-01-31,187110.0 86 | 2018-02-01,225306.0 87 | 2018-02-02,204518.0 88 | 2018-02-05,186802.0 89 | 2018-02-06,217700.0 90 | 2018-02-07,204664.0 91 | 2018-02-08,223852.0 92 | 2018-02-09,230212.0 93 | 2018-02-12,199052.0 94 | 2018-02-13,178829.0 95 | 2018-02-14,183818.0 96 | 2018-02-15,201350.0 97 | 2018-02-16,213021.0 98 | 2018-02-20,211204.0 99 | 2018-02-21,200563.0 100 | 2018-02-22,225925.0 101 | 2018-02-23,207164.0 102 | 2018-02-26,196511.0 103 | 2018-02-27,227149.0 104 | 2018-02-28,208754.0 105 | 2018-03-01,189602.0 106 | 2018-03-02,176743.0 107 | 2018-03-05,179293.0 108 | 2018-03-06,150733.0 109 | 2018-03-07,205004.0 110 | 2018-03-08,204276.0 111 | 2018-03-09,212610.0 112 | 2018-03-12,204968.0 113 | 2018-03-13,173584.0 114 | 2018-03-14,151350.0 115 | 2018-03-15,209024.0 116 | 2018-03-16,208075.0 117 | 2018-03-19,205328.0 118 | 2018-03-20,202297.0 119 | 2018-03-21,196766.0 120 | 2018-03-22,212564.0 121 | 2018-03-23,178928.0 122 | 2018-03-26,201761.0 123 | 2018-03-27,194611.0 124 | 2018-03-28,207117.0 125 | 2018-03-29,174583.0 126 | 2018-04-02,191255.0 127 | 2018-04-03,181984.0 128 | 2018-04-04,227337.0 129 | 2018-04-05,185858.0 130 | 2018-04-06,222383.0 131 | 2018-04-09,195830.0 132 | 2018-04-10,207175.0 133 | 2018-04-11,197501.0 134 | 2018-04-12,202835.0 135 | 2018-04-13,207782.0 136 | 2018-04-16,204197.0 137 | 2018-04-17,200469.0 138 | 2018-04-18,208110.0 139 | 2018-04-19,157750.0 140 | 2018-04-20,190151.0 141 | 2018-04-23,190046.0 142 | 2018-04-24,198268.0 143 | 2018-04-25,204081.0 144 | 2018-04-26,206102.0 145 | 2018-04-27,211717.0 146 | 2018-04-30,201615.0 147 | 2018-05-01,196476.0 148 | 2018-05-02,215011.0 149 | 2018-05-03,204623.0 150 | 2018-05-04,186403.0 151 | 2018-05-07,196138.0 152 | 2018-05-08,211013.0 153 | 2018-05-09,210175.0 154 | 2018-05-10,200855.0 155 | 2018-05-11,207251.0 156 | 2018-05-14,172186.0 157 | 2018-05-15,194308.0 158 | 2018-05-16,198479.0 159 | 2018-05-17,188118.0 160 | 2018-05-18,223507.0 161 | 2018-05-21,197886.0 162 | 2018-05-22,221335.0 163 | 2018-05-23,181840.0 164 | 2018-05-24,226995.0 165 | 2018-05-25,168377.0 166 | 2018-05-29,224651.0 167 | 2018-05-30,191330.0 168 | 2018-05-31,202417.0 169 | 2018-06-01,199794.0 170 | 2018-06-04,196877.0 171 | 2018-06-05,214168.0 172 | 2018-06-06,180081.0 173 | 2018-06-07,172991.0 174 | 2018-06-08,200362.0 175 | 2018-06-11,182746.0 176 | 2018-06-12,172449.0 177 | 2018-06-13,196593.0 178 | 2018-06-14,235390.0 179 | 2018-06-15,212229.0 180 | 2018-06-18,197484.0 181 | 2018-06-19,196204.0 182 | 2018-06-20,223585.0 183 | 2018-06-21,199418.0 184 | 2018-06-22,200493.0 185 | 2018-06-25,230461.0 186 | 2018-06-26,188471.0 187 | 2018-06-27,187363.0 188 | 2018-06-28,187970.0 189 | 2018-06-29,211436.0 190 | 2018-07-02,215892.0 191 | 2018-07-03,164917.0 192 | 2018-07-05,172871.0 193 | 2018-07-06,179970.0 194 | 2018-07-09,167339.0 195 | 2018-07-10,181429.0 196 | 2018-07-11,199076.0 197 | 2018-07-12,215335.0 198 | 2018-07-13,157689.0 199 | 2018-07-16,208933.0 200 | 2018-07-17,183536.0 201 | 2018-07-18,186206.0 202 | 2018-07-19,184851.0 203 | 2018-07-20,228828.0 204 | 2018-07-23,196867.0 205 | 2018-07-24,203933.0 206 | 2018-07-25,182385.0 207 | 2018-07-26,188227.0 208 | 2018-07-27,205302.0 209 | 2018-07-30,159383.0 210 | 2018-07-31,161000.0 211 | 2018-08-01,219709.0 212 | 2018-08-02,197273.0 213 | 2018-08-03,179268.0 214 | 2018-08-06,210374.0 215 | 2018-08-07,183179.0 216 | 2018-08-08,216563.0 217 | 2018-08-09,226791.0 218 | 2018-08-10,207543.0 219 | 2018-08-13,187719.0 220 | 2018-08-14,222928.0 221 | 2018-08-15,230737.0 222 | 2018-08-16,196648.0 223 | 2018-08-17,197041.0 224 | 2018-08-20,228850.0 225 | 2018-08-21,224698.0 226 | 2018-08-22,228461.0 227 | 2018-08-23,230094.0 228 | 2018-08-24,178207.0 229 | 2018-08-27,244873.0 230 | 2018-08-28,209088.0 231 | 2018-08-29,230723.0 232 | 2018-08-30,212288.0 233 | 2018-08-31,214426.0 234 | 2018-09-04,207754.0 235 | 2018-09-05,213409.0 236 | 2018-09-06,170465.0 237 | 2018-09-07,182935.0 238 | 2018-09-10,216339.0 239 | 2018-09-11,216836.0 240 | 2018-09-12,208466.0 241 | 2018-09-13,193341.0 242 | 2018-09-14,242216.0 243 | 2018-09-17,196847.0 244 | 2018-09-18,202280.0 245 | 2018-09-19,223302.0 246 | 2018-09-20,203343.0 247 | 2018-09-21,227132.0 248 | 2018-09-24,215823.0 249 | 2018-09-25,169395.0 250 | 2018-09-26,207679.0 251 | 2018-09-27,186355.0 252 | 2018-09-28,210530.0 253 | 2018-10-01,188884.0 254 | 2018-10-02,191762.0 255 | 2018-10-03,232030.0 256 | 2018-10-04,214706.0 257 | 2018-10-05,213685.0 258 | 2018-10-08,205509.0 259 | 2018-10-09,229840.0 260 | 2018-10-10,203251.0 261 | 2018-10-11,217037.0 262 | 2018-10-12,252063.0 263 | 2018-10-15,186625.0 264 | 2018-10-16,214448.0 265 | 2018-10-17,174949.0 266 | 2018-10-18,174781.0 267 | 2018-10-19,213331.0 268 | 2018-10-22,225811.0 269 | 2018-10-23,210716.0 270 | 2018-10-24,226919.0 271 | 2018-10-25,239843.0 272 | 2018-10-26,196001.0 273 | 2018-10-29,190627.0 274 | 2018-10-30,196620.0 275 | 2018-10-31,190284.0 276 | 2018-11-01,232609.0 277 | 2018-11-02,200541.0 278 | 2018-11-05,190302.0 279 | 2018-11-06,195035.0 280 | 2018-11-07,205464.0 281 | 2018-11-08,238503.0 282 | 2018-11-09,233655.0 283 | 2018-11-12,247601.0 284 | 2018-11-13,221816.0 285 | 2018-11-14,203877.0 286 | 2018-11-15,182143.0 287 | 2018-11-16,227373.0 288 | 2018-11-19,219731.0 289 | 2018-11-20,178592.0 290 | 2018-11-21,213573.0 291 | 2018-11-23,200974.0 292 | 2018-11-26,227110.0 293 | 2018-11-27,234092.0 294 | 2018-11-28,233096.0 295 | 2018-11-29,255990.0 296 | 2018-11-30,240890.0 297 | 2018-12-03,233088.0 298 | 2018-12-04,239283.0 299 | 2018-12-06,264711.0 300 | 2018-12-07,234189.0 301 | 2018-12-10,199918.0 302 | 2018-12-11,222631.0 303 | 2018-12-12,207044.0 304 | 2018-12-13,200392.0 305 | 2018-12-14,234567.0 306 | 2018-12-17,256247.0 307 | 2018-12-18,215274.0 308 | 2018-12-19,199063.0 309 | 2018-12-20,224864.0 310 | 2018-12-21,235553.0 311 | 2018-12-24,263157.0 312 | 2018-12-26,222228.0 313 | 2018-12-27,243436.0 314 | 2018-12-28,213643.0 315 | 2018-12-31,225986.0 316 | 2019-01-02,226179.0 317 | 2019-01-03,270371.0 318 | 2019-01-04,196207.0 319 | 2019-01-07,208663.0 320 | 2019-01-08,248797.0 321 | 2019-01-09,227881.0 322 | 2019-01-10,193694.0 323 | 2019-01-11,221644.0 324 | 2019-01-14,226586.0 325 | 2019-01-15,204034.0 326 | 2019-01-16,204868.0 327 | 2019-01-17,204047.0 328 | 2019-01-18,205983.0 329 | 2019-01-22,215795.0 330 | 2019-01-23,177478.0 331 | 2019-01-24,247324.0 332 | 2019-01-25,196729.0 333 | 2019-01-28,233227.0 334 | 2019-01-29,196652.0 335 | 2019-01-30,192321.0 336 | 2019-01-31,226490.0 337 | 2019-02-01,199647.0 338 | 2019-02-04,193555.0 339 | 2019-02-05,217980.0 340 | 2019-02-06,214931.0 341 | 2019-02-07,233756.0 342 | 2019-02-08,215714.0 343 | 2019-02-11,284872.0 344 | 2019-02-12,212406.0 345 | 2019-02-13,204691.0 346 | 2019-02-14,184231.0 347 | 2019-02-15,219456.0 348 | 2019-02-19,204000.0 349 | 2019-02-20,235486.0 350 | 2019-02-21,212348.0 351 | 2019-02-22,207053.0 352 | 2019-02-25,206841.0 353 | 2019-02-26,178836.0 354 | 2019-02-27,186185.0 355 | 2019-02-28,215154.0 356 | 2019-03-01,203328.0 357 | 2019-03-04,178952.0 358 | 2019-03-05,175307.0 359 | 2019-03-06,208299.0 360 | 2019-03-07,212062.0 361 | 2019-03-08,206425.0 362 | 2019-03-11,164523.0 363 | 2019-03-12,208042.0 364 | 2019-03-13,235979.0 365 | 2019-03-14,191438.0 366 | 2019-03-15,264815.0 367 | 2019-03-18,234634.0 368 | 2019-03-19,215023.0 369 | 2019-03-20,164279.0 370 | 2019-03-21,205571.0 371 | 2019-03-22,204580.0 372 | 2019-03-25,192785.0 373 | 2019-03-26,197547.0 374 | 2019-03-27,210031.0 375 | 2019-03-28,212812.0 376 | 2019-03-29,172052.0 377 | 2019-04-01,236313.0 378 | 2019-04-02,179146.0 379 | 2019-04-03,156313.0 380 | 2019-04-04,201327.0 381 | 2019-04-05,179218.0 382 | 2019-04-08,208690.0 383 | 2019-04-09,201889.0 384 | 2019-04-10,232640.0 385 | 2019-04-11,249136.0 386 | 2019-04-12,216847.0 387 | 2019-04-15,211921.0 388 | 2019-04-16,228320.0 389 | 2019-04-17,195772.0 390 | 2019-04-18,193698.0 391 | 2019-04-22,179383.0 392 | 2019-04-23,218582.0 393 | 2019-04-24,187045.0 394 | 2019-04-25,167232.0 395 | 2019-04-26,169960.0 396 | 2019-04-29,185783.0 397 | 2019-04-30,218060.0 398 | 2019-05-01,222705.0 399 | 2019-05-02,221047.0 400 | 2019-05-03,208116.0 401 | 2019-05-06,221227.0 402 | 2019-05-07,227839.0 403 | 2019-05-08,208982.0 404 | 2019-05-09,241380.0 405 | 2019-05-10,224420.0 406 | 2019-05-13,218874.0 407 | 2019-05-14,233051.0 408 | 2019-05-15,209363.0 409 | 2019-05-16,204598.0 410 | 2019-05-17,191761.0 411 | 2019-05-20,194191.0 412 | 2019-05-21,255307.0 413 | 2019-05-22,222709.0 414 | 2019-05-23,184269.0 415 | 2019-05-24,220323.0 416 | 2019-05-28,207569.0 417 | 2019-05-29,189049.0 418 | 2019-05-30,177797.0 419 | 2019-05-31,215139.0 420 | 2019-06-03,228532.0 421 | 2019-06-04,203860.0 422 | 2019-06-05,212475.0 423 | 2019-06-06,189273.0 424 | 2019-06-07,181573.0 425 | 2019-06-10,216926.0 426 | 2019-06-11,210069.0 427 | 2019-06-12,196536.0 428 | 2019-06-13,232360.0 429 | 2019-06-14,215498.0 430 | 2019-06-17,239570.0 431 | 2019-06-18,213664.0 432 | 2019-06-19,195720.0 433 | 2019-06-20,206962.0 434 | 2019-06-21,208074.0 435 | 2019-06-24,241329.0 436 | 2019-06-25,176407.0 437 | 2019-06-26,262237.0 438 | 2019-06-27,204418.0 439 | 2019-06-28,233336.0 440 | 2019-07-01,213031.0 441 | 2019-07-02,227425.0 442 | 2019-07-03,241044.0 443 | 2019-07-05,202065.0 444 | 2019-07-08,193485.0 445 | 2019-07-09,235103.0 446 | 2019-07-10,184052.0 447 | 2019-07-11,202539.0 448 | 2019-07-12,199740.0 449 | 2019-07-15,230836.0 450 | 2019-07-16,222362.0 451 | 2019-07-17,265203.0 452 | 2019-07-18,207975.0 453 | 2019-07-19,223468.0 454 | 2019-07-22,183314.0 455 | 2019-07-23,238553.0 456 | 2019-07-24,208459.0 457 | 2019-07-25,213064.0 458 | 2019-07-26,209056.0 459 | 2019-07-29,208424.0 460 | 2019-07-30,226658.0 461 | 2019-07-31,221202.0 462 | 2019-08-01,211144.0 463 | 2019-08-02,234430.0 464 | 2019-08-05,201581.0 465 | 2019-08-06,169277.0 466 | 2019-08-07,227123.0 467 | 2019-08-08,244151.0 468 | 2019-08-09,182744.0 469 | 2019-08-12,242729.0 470 | 2019-08-13,208951.0 471 | 2019-08-14,227435.0 472 | 2019-08-15,224195.0 473 | 2019-08-16,197012.0 474 | 2019-08-19,264390.0 475 | 2019-08-20,206969.0 476 | 2019-08-21,207534.0 477 | 2019-08-22,175696.0 478 | 2019-08-23,234533.0 479 | 2019-08-26,215620.0 480 | 2019-08-27,199266.0 481 | 2019-08-28,174298.0 482 | 2019-08-29,240823.0 483 | 2019-08-30,208684.0 484 | 2019-09-03,264284.0 485 | 2019-09-04,201644.0 486 | 2019-09-05,228044.0 487 | 2019-09-06,201028.0 488 | 2019-09-09,202218.0 489 | 2019-09-10,245567.0 490 | 2019-09-11,240227.0 491 | 2019-09-12,182705.0 492 | 2019-09-13,236634.0 493 | 2019-09-16,205976.0 494 | 2019-09-17,195457.0 495 | 2019-09-18,184070.0 496 | 2019-09-19,206251.0 497 | 2019-09-20,203837.0 498 | 2019-09-23,190928.0 499 | 2019-09-24,232935.0 500 | 2019-09-25,211451.0 501 | 2019-09-26,200314.0 502 | 2019-09-27,195926.0 503 | 2019-09-30,219788.0 504 | 2019-10-01,236744.0 505 | 2019-10-02,222219.0 506 | 2019-10-03,204931.0 507 | 2019-10-04,211875.0 508 | 2019-10-07,233955.0 509 | 2019-10-08,196324.0 510 | 2019-10-09,202477.0 511 | 2019-10-10,228503.0 512 | 2019-10-11,199737.0 513 | 2019-10-14,253851.0 514 | 2019-10-15,228601.0 515 | 2019-10-16,180920.0 516 | 2019-10-17,221938.0 517 | 2019-10-18,215957.0 518 | 2019-10-21,173637.0 519 | 2019-10-22,209806.0 520 | 2019-10-23,205010.0 521 | 2019-10-24,210664.0 522 | 2019-10-25,211773.0 523 | 2019-10-28,211631.0 524 | 2019-10-29,194738.0 525 | 2019-10-30,205982.0 526 | 2019-10-31,191063.0 527 | 2019-11-01,179137.0 528 | 2019-11-04,211371.0 529 | 2019-11-05,223270.0 530 | 2019-11-06,276605.0 531 | 2019-11-07,242609.0 532 | 2019-11-08,204443.0 533 | 2019-11-11,245952.0 534 | 2019-11-12,238041.0 535 | 2019-11-13,222058.0 536 | 2019-11-14,221566.0 537 | 2019-11-15,203806.0 538 | 2019-11-18,219534.0 539 | 2019-11-19,236376.0 540 | 2019-11-20,199045.0 541 | 2019-11-21,246170.0 542 | 2019-11-22,183462.0 543 | 2019-11-25,263499.0 544 | 2019-11-26,202534.0 545 | 2019-11-27,237125.0 546 | 2019-11-29,213578.0 547 | 2019-12-02,199935.0 548 | 2019-12-03,247914.0 549 | 2019-12-04,259856.0 550 | 2019-12-05,227246.0 551 | 2019-12-06,230061.0 552 | 2019-12-09,208392.0 553 | 2019-12-10,194513.0 554 | 2019-12-11,229407.0 555 | 2019-12-12,239324.0 556 | 2019-12-13,237440.0 557 | 2019-12-16,258681.0 558 | 2019-12-17,203218.0 559 | 2019-12-18,261029.0 560 | 2019-12-19,252389.0 561 | 2019-12-20,253307.0 562 | 2019-12-23,221920.0 563 | 2019-12-24,257740.0 564 | 2019-12-26,246320.0 565 | 2019-12-27,210558.0 566 | 2019-12-30,229775.0 567 | 2019-12-31,221618.0 568 | -------------------------------------------------------------------------------- /data/alternative_data/IYPP.csv: -------------------------------------------------------------------------------- 1 | date,value 2 | -------------------------------------------------------------------------------- /data/alternative_data/KER.csv: -------------------------------------------------------------------------------- 1 | date,value 2 | -------------------------------------------------------------------------------- /data/alternative_data/PQCE.csv: -------------------------------------------------------------------------------- 1 | date,value 2 | 2015-03-31,8918.0 3 | 2015-04-01,9746.0 4 | 2015-04-02,8543.0 5 | 2015-04-06,9901.0 6 | 2015-04-07,10013.0 7 | 2015-04-08,8243.0 8 | 2015-04-09,8334.0 9 | 2015-04-10,6892.0 10 | 2015-04-13,8285.0 11 | 2015-04-14,10270.0 12 | 2015-04-15,8838.0 13 | 2015-04-16,9067.0 14 | 2015-04-17,9486.0 15 | 2015-04-20,9324.0 16 | 2015-04-21,8214.0 17 | 2015-04-22,9275.0 18 | 2015-04-23,9267.0 19 | 2015-04-24,10398.0 20 | 2015-04-27,9832.0 21 | 2015-04-28,9846.0 22 | 2015-04-29,11295.0 23 | 2015-04-30,7890.0 24 | 2015-05-01,8118.0 25 | 2015-05-04,8549.0 26 | 2015-05-05,10004.0 27 | 2015-05-06,9860.0 28 | 2015-05-07,7657.0 29 | 2015-05-08,8967.0 30 | 2015-05-11,8090.0 31 | 2015-05-12,10383.0 32 | 2015-05-13,9607.0 33 | 2015-05-14,8404.0 34 | 2015-05-15,9034.0 35 | 2015-05-18,8596.0 36 | 2015-05-19,9567.0 37 | 2015-05-20,8477.0 38 | 2015-05-21,8980.0 39 | 2015-05-22,8828.0 40 | 2015-05-26,8022.0 41 | 2015-05-27,8833.0 42 | 2015-05-28,8357.0 43 | 2015-05-29,10015.0 44 | 2015-06-01,8437.0 45 | 2015-06-02,7578.0 46 | 2015-06-03,8812.0 47 | 2015-06-04,9669.0 48 | 2015-06-05,10868.0 49 | 2015-06-08,9041.0 50 | 2015-06-09,9050.0 51 | 2015-06-10,9185.0 52 | 2015-06-11,8911.0 53 | 2015-06-12,8739.0 54 | 2015-06-15,8646.0 55 | 2015-06-16,8036.0 56 | 2015-06-17,9720.0 57 | 2015-06-18,9165.0 58 | 2015-06-19,8740.0 59 | 2015-06-22,10133.0 60 | 2015-06-23,8449.0 61 | 2015-06-24,8879.0 62 | 2015-06-25,8881.0 63 | 2015-06-26,8440.0 64 | 2015-06-29,10320.0 65 | 2015-06-30,9085.0 66 | 2015-07-01,9999.0 67 | 2015-07-02,10090.0 68 | 2015-07-06,7232.0 69 | 2015-07-07,8191.0 70 | 2015-07-08,8133.0 71 | 2015-07-09,10156.0 72 | 2015-07-10,8200.0 73 | 2015-07-13,8008.0 74 | 2015-07-14,8462.0 75 | 2015-07-15,8989.0 76 | 2015-07-16,8271.0 77 | 2015-07-17,9009.0 78 | 2015-07-20,8176.0 79 | 2015-07-21,9641.0 80 | 2015-07-22,7780.0 81 | 2015-07-23,9833.0 82 | 2015-07-24,8358.0 83 | 2015-07-27,8301.0 84 | 2015-07-28,10360.0 85 | 2015-07-29,7010.0 86 | 2015-07-30,8872.0 87 | 2015-07-31,8430.0 88 | 2015-08-03,8722.0 89 | 2015-08-04,8833.0 90 | 2015-08-05,7567.0 91 | 2015-08-06,8640.0 92 | 2015-08-07,9284.0 93 | 2015-08-10,8445.0 94 | 2015-08-11,8022.0 95 | 2015-08-12,9009.0 96 | 2015-08-13,8206.0 97 | 2015-08-14,10153.0 98 | 2015-08-17,8287.0 99 | 2015-08-18,8320.0 100 | 2015-08-19,8509.0 101 | 2015-08-20,9595.0 102 | 2015-08-21,8614.0 103 | 2015-08-24,8514.0 104 | 2015-08-25,8834.0 105 | 2015-08-26,9059.0 106 | 2015-08-27,7528.0 107 | 2015-08-28,9942.0 108 | 2015-08-31,10222.0 109 | 2015-09-01,9797.0 110 | 2015-09-02,8442.0 111 | 2015-09-03,9770.0 112 | 2015-09-04,8672.0 113 | 2015-09-08,9403.0 114 | 2015-09-09,7840.0 115 | 2015-09-10,9147.0 116 | 2015-09-11,8743.0 117 | 2015-09-14,9642.0 118 | 2015-09-15,7827.0 119 | 2015-09-16,8484.0 120 | 2015-09-17,9245.0 121 | 2015-09-18,9267.0 122 | 2015-09-21,9700.0 123 | 2015-09-22,10335.0 124 | 2015-09-23,10076.0 125 | 2015-09-24,10096.0 126 | 2015-09-25,8119.0 127 | 2015-09-28,11123.0 128 | 2015-09-29,10667.0 129 | 2015-09-30,10310.0 130 | 2015-10-01,10204.0 131 | 2015-10-02,10888.0 132 | 2015-10-05,9935.0 133 | 2015-10-06,7948.0 134 | 2015-10-07,9655.0 135 | 2015-10-08,9166.0 136 | 2015-10-09,8512.0 137 | 2015-10-12,10348.0 138 | 2015-10-13,9708.0 139 | 2015-10-14,10212.0 140 | 2015-10-15,8877.0 141 | 2015-10-16,10092.0 142 | 2015-10-19,12914.0 143 | 2015-10-20,8727.0 144 | 2015-10-21,8902.0 145 | 2015-10-22,10464.0 146 | 2015-10-23,8915.0 147 | 2015-10-26,12274.0 148 | 2015-10-27,10076.0 149 | 2015-10-28,10792.0 150 | 2015-10-29,9992.0 151 | 2015-10-30,11303.0 152 | 2015-11-02,11491.0 153 | 2015-11-03,11532.0 154 | 2015-11-04,9877.0 155 | 2015-11-05,11495.0 156 | 2015-11-06,10898.0 157 | 2015-11-09,11430.0 158 | 2015-11-10,11608.0 159 | 2015-11-11,11989.0 160 | 2015-11-12,11084.0 161 | 2015-11-13,11942.0 162 | 2015-11-16,9477.0 163 | 2015-11-17,12945.0 164 | 2015-11-18,10983.0 165 | 2015-11-19,10233.0 166 | 2015-11-20,12536.0 167 | 2015-11-23,11742.0 168 | 2015-11-24,13600.0 169 | 2015-11-25,11208.0 170 | 2015-11-27,11378.0 171 | 2015-11-30,12560.0 172 | 2015-12-01,13421.0 173 | 2015-12-02,12945.0 174 | 2015-12-03,13359.0 175 | 2015-12-04,11871.0 176 | 2015-12-07,11533.0 177 | 2015-12-08,10700.0 178 | 2015-12-09,14087.0 179 | 2015-12-10,11614.0 180 | 2015-12-11,11178.0 181 | 2015-12-14,14839.0 182 | 2015-12-15,13065.0 183 | 2015-12-16,13337.0 184 | 2015-12-17,14097.0 185 | 2015-12-18,14338.0 186 | 2015-12-21,12793.0 187 | 2015-12-22,12544.0 188 | 2015-12-23,12305.0 189 | 2015-12-24,11504.0 190 | 2015-12-28,13444.0 191 | 2015-12-29,12792.0 192 | 2015-12-30,13051.0 193 | 2015-12-31,14256.0 194 | 2016-01-04,13832.0 195 | 2016-01-05,10138.0 196 | 2016-01-06,10782.0 197 | 2016-01-07,11557.0 198 | 2016-01-08,13559.0 199 | 2016-01-11,14624.0 200 | 2016-01-12,11616.0 201 | 2016-01-13,11422.0 202 | 2016-01-14,11328.0 203 | 2016-01-15,13386.0 204 | 2016-01-19,11589.0 205 | 2016-01-20,12970.0 206 | 2016-01-21,12015.0 207 | 2016-01-22,12248.0 208 | 2016-01-25,12767.0 209 | 2016-01-26,14373.0 210 | 2016-01-27,11200.0 211 | 2016-01-28,12198.0 212 | 2016-01-29,11007.0 213 | 2016-02-01,11535.0 214 | 2016-02-02,14260.0 215 | 2016-02-03,11457.0 216 | 2016-02-04,11616.0 217 | 2016-02-05,13165.0 218 | 2016-02-08,13501.0 219 | 2016-02-09,12379.0 220 | 2016-02-10,11022.0 221 | 2016-02-11,13027.0 222 | 2016-02-12,13032.0 223 | 2016-02-16,11451.0 224 | 2016-02-17,13441.0 225 | 2016-02-18,12405.0 226 | 2016-02-19,9190.0 227 | 2016-02-22,12006.0 228 | 2016-02-23,10856.0 229 | 2016-02-24,10540.0 230 | 2016-02-25,12862.0 231 | 2016-02-26,10275.0 232 | 2016-02-29,12544.0 233 | 2016-03-01,11518.0 234 | 2016-03-02,12804.0 235 | 2016-03-03,12009.0 236 | 2016-03-04,12008.0 237 | 2016-03-07,10560.0 238 | 2016-03-08,9732.0 239 | 2016-03-09,12145.0 240 | 2016-03-10,11035.0 241 | 2016-03-11,9080.0 242 | 2016-03-14,9967.0 243 | 2016-03-15,10073.0 244 | 2016-03-16,12505.0 245 | 2016-03-17,10435.0 246 | 2016-03-18,9876.0 247 | 2016-03-21,9666.0 248 | 2016-03-22,11044.0 249 | 2016-03-23,11468.0 250 | 2016-03-24,10880.0 251 | 2016-03-28,10438.0 252 | 2016-03-29,13232.0 253 | 2016-03-30,10670.0 254 | 2016-03-31,12115.0 255 | 2016-04-01,12385.0 256 | 2016-04-04,10121.0 257 | 2016-04-05,12013.0 258 | 2016-04-06,9348.0 259 | 2016-04-07,12190.0 260 | 2016-04-08,12092.0 261 | 2016-04-11,10343.0 262 | 2016-04-12,10474.0 263 | 2016-04-13,10554.0 264 | 2016-04-14,10544.0 265 | 2016-04-15,10192.0 266 | 2016-04-18,9868.0 267 | 2016-04-19,11927.0 268 | 2016-04-20,12020.0 269 | 2016-04-21,11610.0 270 | 2016-04-22,12211.0 271 | 2016-04-25,12230.0 272 | 2016-04-26,11875.0 273 | 2016-04-27,10125.0 274 | 2016-04-28,12775.0 275 | 2016-04-29,11721.0 276 | 2016-05-02,9605.0 277 | 2016-05-03,9769.0 278 | 2016-05-04,11286.0 279 | 2016-05-05,10808.0 280 | 2016-05-06,10818.0 281 | 2016-05-09,9601.0 282 | 2016-05-10,11757.0 283 | 2016-05-11,10976.0 284 | 2016-05-12,10254.0 285 | 2016-05-13,10414.0 286 | 2016-05-16,12058.0 287 | 2016-05-17,10020.0 288 | 2016-05-18,8782.0 289 | 2016-05-19,11814.0 290 | 2016-05-20,10458.0 291 | 2016-05-23,9208.0 292 | 2016-05-24,11170.0 293 | 2016-05-25,10461.0 294 | 2016-05-26,11216.0 295 | 2016-05-27,10700.0 296 | 2016-05-31,11518.0 297 | 2016-06-01,10046.0 298 | 2016-06-02,10643.0 299 | 2016-06-03,10844.0 300 | 2016-06-06,10615.0 301 | 2016-06-07,10279.0 302 | 2016-06-08,10788.0 303 | 2016-06-09,10265.0 304 | 2016-06-10,12697.0 305 | 2016-06-13,10222.0 306 | 2016-06-14,11055.0 307 | 2016-06-15,10362.0 308 | 2016-06-16,9782.0 309 | 2016-06-17,10600.0 310 | 2016-06-20,11854.0 311 | 2016-06-21,10092.0 312 | 2016-06-22,9481.0 313 | 2016-06-23,10054.0 314 | 2016-06-24,13028.0 315 | 2016-06-27,9222.0 316 | 2016-06-28,10142.0 317 | 2016-06-29,10619.0 318 | 2016-06-30,13576.0 319 | 2016-07-01,10906.0 320 | 2016-07-05,9204.0 321 | 2016-07-06,9588.0 322 | 2016-07-07,11269.0 323 | 2016-07-08,10749.0 324 | 2016-07-11,10189.0 325 | 2016-07-12,11209.0 326 | 2016-07-13,11606.0 327 | 2016-07-14,11379.0 328 | 2016-07-15,10028.0 329 | 2016-07-18,11085.0 330 | 2016-07-19,12476.0 331 | 2016-07-20,14173.0 332 | 2016-07-21,10284.0 333 | 2016-07-22,11413.0 334 | 2016-07-25,11026.0 335 | 2016-07-26,10705.0 336 | 2016-07-27,9962.0 337 | 2016-07-28,13357.0 338 | 2016-07-29,11657.0 339 | 2016-08-01,9360.0 340 | 2016-08-02,11426.0 341 | 2016-08-03,10974.0 342 | 2016-08-04,9522.0 343 | 2016-08-05,10038.0 344 | 2016-08-08,10837.0 345 | 2016-08-09,10326.0 346 | 2016-08-10,10913.0 347 | 2016-08-11,12127.0 348 | 2016-08-12,13590.0 349 | 2016-08-15,9823.0 350 | 2016-08-16,11505.0 351 | 2016-08-17,10018.0 352 | 2016-08-18,9811.0 353 | 2016-08-19,12634.0 354 | 2016-08-22,10934.0 355 | 2016-08-23,10395.0 356 | 2016-08-24,10630.0 357 | 2016-08-25,9019.0 358 | 2016-08-26,11895.0 359 | 2016-08-29,12016.0 360 | 2016-08-30,11500.0 361 | 2016-08-31,9885.0 362 | 2016-09-01,12960.0 363 | 2016-09-02,11081.0 364 | 2016-09-06,10797.0 365 | 2016-09-07,10899.0 366 | 2016-09-08,11746.0 367 | 2016-09-09,12265.0 368 | 2016-09-12,12043.0 369 | 2016-09-13,12541.0 370 | 2016-09-14,7790.0 371 | 2016-09-15,10435.0 372 | 2016-09-16,11709.0 373 | 2016-09-19,11544.0 374 | 2016-09-20,11176.0 375 | 2016-09-21,11707.0 376 | 2016-09-22,11087.0 377 | 2016-09-23,10167.0 378 | 2016-09-26,11724.0 379 | 2016-09-27,11883.0 380 | 2016-09-28,11104.0 381 | 2016-09-29,11960.0 382 | 2016-09-30,11937.0 383 | 2016-10-03,10686.0 384 | 2016-10-04,10560.0 385 | 2016-10-05,12314.0 386 | 2016-10-06,11377.0 387 | 2016-10-07,11053.0 388 | 2016-10-10,12246.0 389 | 2016-10-11,13209.0 390 | 2016-10-12,13468.0 391 | 2016-10-13,12998.0 392 | 2016-10-14,12432.0 393 | 2016-10-17,13028.0 394 | 2016-10-18,11321.0 395 | 2016-10-19,13731.0 396 | 2016-10-20,11505.0 397 | 2016-10-21,12658.0 398 | 2016-10-24,11551.0 399 | 2016-10-25,13157.0 400 | 2016-10-26,12240.0 401 | 2016-10-27,13877.0 402 | 2016-10-28,14056.0 403 | 2016-10-31,13117.0 404 | 2016-11-01,14859.0 405 | 2016-11-02,12957.0 406 | 2016-11-03,13657.0 407 | 2016-11-04,12797.0 408 | 2016-11-07,14438.0 409 | 2016-11-08,14068.0 410 | 2016-11-09,13307.0 411 | 2016-11-10,13186.0 412 | 2016-11-11,11788.0 413 | 2016-11-14,14129.0 414 | 2016-11-15,11714.0 415 | 2016-11-16,13211.0 416 | 2016-11-17,10984.0 417 | 2016-11-18,14109.0 418 | 2016-11-21,10747.0 419 | 2016-11-22,13480.0 420 | 2016-11-23,14349.0 421 | 2016-11-25,16598.0 422 | 2016-11-28,16091.0 423 | 2016-11-29,14914.0 424 | 2016-11-30,13758.0 425 | 2016-12-01,16103.0 426 | 2016-12-02,14532.0 427 | 2016-12-05,14278.0 428 | 2016-12-06,12427.0 429 | 2016-12-07,16579.0 430 | 2016-12-08,14468.0 431 | 2016-12-09,13615.0 432 | 2016-12-12,16672.0 433 | 2016-12-13,15704.0 434 | 2016-12-14,12202.0 435 | 2016-12-15,15582.0 436 | 2016-12-16,14141.0 437 | 2016-12-19,13261.0 438 | 2016-12-20,17105.0 439 | 2016-12-21,12445.0 440 | 2016-12-22,15922.0 441 | 2016-12-23,17369.0 442 | 2016-12-27,17048.0 443 | 2016-12-28,14067.0 444 | 2016-12-29,13305.0 445 | 2016-12-30,15680.0 446 | 2017-01-03,14779.0 447 | 2017-01-04,15696.0 448 | 2017-01-05,16183.0 449 | 2017-01-06,18695.0 450 | 2017-01-09,15383.0 451 | 2017-01-10,16679.0 452 | 2017-01-11,17020.0 453 | 2017-01-12,15720.0 454 | 2017-01-13,15732.0 455 | 2017-01-17,16032.0 456 | 2017-01-18,15810.0 457 | 2017-01-19,14771.0 458 | 2017-01-20,15061.0 459 | 2017-01-23,18517.0 460 | 2017-01-24,16692.0 461 | 2017-01-25,15253.0 462 | 2017-01-26,12949.0 463 | 2017-01-27,13884.0 464 | 2017-01-30,13233.0 465 | 2017-01-31,14878.0 466 | 2017-02-01,13718.0 467 | 2017-02-02,14725.0 468 | 2017-02-03,14340.0 469 | 2017-02-06,15292.0 470 | 2017-02-07,13666.0 471 | 2017-02-08,16296.0 472 | 2017-02-09,14075.0 473 | 2017-02-10,15432.0 474 | 2017-02-13,13794.0 475 | 2017-02-14,15489.0 476 | 2017-02-15,10478.0 477 | 2017-02-16,15155.0 478 | 2017-02-17,14021.0 479 | 2017-02-21,12115.0 480 | 2017-02-22,17267.0 481 | 2017-02-23,15483.0 482 | 2017-02-24,16357.0 483 | 2017-02-27,16895.0 484 | 2017-02-28,14119.0 485 | 2017-03-01,11546.0 486 | 2017-03-02,12516.0 487 | 2017-03-03,12881.0 488 | 2017-03-06,13578.0 489 | 2017-03-07,12536.0 490 | 2017-03-08,16161.0 491 | 2017-03-09,14955.0 492 | 2017-03-10,13301.0 493 | 2017-03-13,14516.0 494 | 2017-03-14,14274.0 495 | 2017-03-15,13427.0 496 | 2017-03-16,15435.0 497 | 2017-03-17,13874.0 498 | 2017-03-20,15284.0 499 | 2017-03-21,12868.0 500 | 2017-03-22,11912.0 501 | 2017-03-23,13480.0 502 | 2017-03-24,11589.0 503 | 2017-03-27,14357.0 504 | 2017-03-28,11648.0 505 | 2017-03-29,12360.0 506 | 2017-03-30,15072.0 507 | 2017-03-31,12834.0 508 | 2017-04-03,12932.0 509 | 2017-04-04,12522.0 510 | 2017-04-05,12050.0 511 | 2017-04-06,11951.0 512 | 2017-04-07,10518.0 513 | 2017-04-10,13493.0 514 | 2017-04-11,12341.0 515 | 2017-04-12,14569.0 516 | 2017-04-13,13305.0 517 | 2017-04-17,14152.0 518 | 2017-04-18,14492.0 519 | 2017-04-19,13545.0 520 | 2017-04-20,12065.0 521 | 2017-04-21,15071.0 522 | 2017-04-24,12903.0 523 | 2017-04-25,13548.0 524 | 2017-04-26,13676.0 525 | 2017-04-27,14215.0 526 | 2017-04-28,15646.0 527 | 2017-05-01,10927.0 528 | 2017-05-02,12565.0 529 | 2017-05-03,12826.0 530 | 2017-05-04,13520.0 531 | 2017-05-05,12934.0 532 | 2017-05-08,14857.0 533 | 2017-05-09,16156.0 534 | 2017-05-10,14410.0 535 | 2017-05-11,13446.0 536 | 2017-05-12,13161.0 537 | 2017-05-15,13481.0 538 | 2017-05-16,13339.0 539 | 2017-05-17,14934.0 540 | 2017-05-18,11514.0 541 | 2017-05-19,14478.0 542 | 2017-05-22,12585.0 543 | 2017-05-23,12452.0 544 | 2017-05-24,10864.0 545 | 2017-05-25,13407.0 546 | 2017-05-26,13740.0 547 | 2017-05-30,13367.0 548 | 2017-05-31,15078.0 549 | 2017-06-01,15193.0 550 | 2017-06-02,13078.0 551 | 2017-06-05,12520.0 552 | 2017-06-06,15765.0 553 | 2017-06-07,13253.0 554 | 2017-06-08,13306.0 555 | 2017-06-09,13907.0 556 | 2017-06-12,13746.0 557 | 2017-06-13,12905.0 558 | 2017-06-14,13495.0 559 | 2017-06-15,13585.0 560 | 2017-06-16,12762.0 561 | 2017-06-19,13477.0 562 | 2017-06-20,10222.0 563 | 2017-06-21,10314.0 564 | 2017-06-22,12416.0 565 | 2017-06-23,10876.0 566 | 2017-06-26,13908.0 567 | 2017-06-27,11827.0 568 | 2017-06-28,12545.0 569 | 2017-06-29,10144.0 570 | 2017-06-30,10828.0 571 | 2017-07-03,10467.0 572 | 2017-07-05,13210.0 573 | 2017-07-06,10452.0 574 | 2017-07-07,11905.0 575 | 2017-07-10,13350.0 576 | 2017-07-11,14854.0 577 | 2017-07-12,11133.0 578 | 2017-07-13,12699.0 579 | 2017-07-14,13170.0 580 | 2017-07-17,14195.0 581 | 2017-07-18,12583.0 582 | 2017-07-19,17080.0 583 | 2017-07-20,9868.0 584 | 2017-07-21,10412.0 585 | 2017-07-24,11475.0 586 | 2017-07-25,11910.0 587 | 2017-07-26,12758.0 588 | 2017-07-27,12389.0 589 | 2017-07-28,10785.0 590 | 2017-07-31,16414.0 591 | 2017-08-01,13251.0 592 | 2017-08-02,12488.0 593 | 2017-08-03,13885.0 594 | 2017-08-04,12476.0 595 | 2017-08-07,14538.0 596 | 2017-08-08,12000.0 597 | 2017-08-09,12320.0 598 | 2017-08-10,10218.0 599 | 2017-08-11,12823.0 600 | 2017-08-14,12460.0 601 | 2017-08-15,12892.0 602 | 2017-08-16,13037.0 603 | 2017-08-17,14741.0 604 | 2017-08-18,14738.0 605 | 2017-08-21,15753.0 606 | 2017-08-22,14684.0 607 | 2017-08-23,12186.0 608 | 2017-08-24,12950.0 609 | 2017-08-25,12427.0 610 | 2017-08-28,14034.0 611 | 2017-08-29,15596.0 612 | 2017-08-30,14127.0 613 | 2017-08-31,13232.0 614 | 2017-09-01,15738.0 615 | 2017-09-05,14841.0 616 | 2017-09-06,16079.0 617 | 2017-09-07,10594.0 618 | 2017-09-08,12761.0 619 | 2017-09-11,16136.0 620 | 2017-09-12,11914.0 621 | 2017-09-13,13068.0 622 | 2017-09-14,14854.0 623 | 2017-09-15,15565.0 624 | 2017-09-18,13322.0 625 | 2017-09-19,16797.0 626 | 2017-09-20,12901.0 627 | 2017-09-21,12779.0 628 | 2017-09-22,14607.0 629 | 2017-09-25,15146.0 630 | 2017-09-26,15115.0 631 | 2017-09-27,14358.0 632 | 2017-09-28,13013.0 633 | 2017-09-29,13494.0 634 | 2017-10-02,14292.0 635 | 2017-10-03,16532.0 636 | 2017-10-04,13458.0 637 | 2017-10-05,15794.0 638 | 2017-10-06,13628.0 639 | 2017-10-09,16417.0 640 | 2017-10-10,12574.0 641 | 2017-10-11,16220.0 642 | 2017-10-12,14587.0 643 | 2017-10-13,13998.0 644 | 2017-10-16,15545.0 645 | 2017-10-17,17877.0 646 | 2017-10-18,12571.0 647 | 2017-10-19,14204.0 648 | 2017-10-20,14683.0 649 | 2017-10-23,14256.0 650 | 2017-10-24,17355.0 651 | 2017-10-25,15906.0 652 | 2017-10-26,15540.0 653 | 2017-10-27,13346.0 654 | 2017-10-30,10305.0 655 | 2017-10-31,16487.0 656 | 2017-11-01,14488.0 657 | 2017-11-02,14345.0 658 | 2017-11-03,13343.0 659 | 2017-11-06,17535.0 660 | 2017-11-07,16161.0 661 | 2017-11-08,15014.0 662 | 2017-11-09,12015.0 663 | 2017-11-10,15091.0 664 | 2017-11-13,14079.0 665 | 2017-11-14,14489.0 666 | 2017-11-15,15356.0 667 | 2017-11-16,14271.0 668 | 2017-11-17,15018.0 669 | 2017-11-20,14536.0 670 | 2017-11-21,13259.0 671 | 2017-11-22,14333.0 672 | 2017-11-24,13668.0 673 | 2017-11-27,14514.0 674 | 2017-11-28,15767.0 675 | 2017-11-29,14401.0 676 | 2017-11-30,15443.0 677 | 2017-12-01,15519.0 678 | 2017-12-04,14076.0 679 | 2017-12-05,12515.0 680 | 2017-12-06,15434.0 681 | 2017-12-07,11983.0 682 | 2017-12-08,15244.0 683 | 2017-12-11,15644.0 684 | 2017-12-12,14080.0 685 | 2017-12-13,11868.0 686 | 2017-12-14,15017.0 687 | 2017-12-15,13793.0 688 | 2017-12-18,14330.0 689 | 2017-12-19,12517.0 690 | 2017-12-20,14911.0 691 | 2017-12-21,14078.0 692 | 2017-12-22,15468.0 693 | 2017-12-26,11526.0 694 | 2017-12-27,16095.0 695 | 2017-12-28,17974.0 696 | 2017-12-29,16082.0 697 | 2018-01-02,14649.0 698 | 2018-01-03,15571.0 699 | 2018-01-04,13797.0 700 | 2018-01-05,11547.0 701 | 2018-01-08,13881.0 702 | 2018-01-09,14722.0 703 | 2018-01-10,12580.0 704 | 2018-01-11,17736.0 705 | 2018-01-12,15248.0 706 | 2018-01-16,14378.0 707 | 2018-01-17,16757.0 708 | 2018-01-18,15051.0 709 | 2018-01-19,15882.0 710 | 2018-01-22,15448.0 711 | 2018-01-23,15381.0 712 | 2018-01-24,19326.0 713 | 2018-01-25,16623.0 714 | 2018-01-26,15992.0 715 | 2018-01-29,14019.0 716 | 2018-01-30,13455.0 717 | 2018-01-31,14843.0 718 | 2018-02-01,14431.0 719 | 2018-02-02,14325.0 720 | 2018-02-05,14104.0 721 | 2018-02-06,15815.0 722 | 2018-02-07,14252.0 723 | 2018-02-08,15726.0 724 | 2018-02-09,15578.0 725 | 2018-02-12,14160.0 726 | 2018-02-13,17494.0 727 | 2018-02-14,14778.0 728 | 2018-02-15,16146.0 729 | 2018-02-16,16607.0 730 | 2018-02-20,14174.0 731 | 2018-02-21,16024.0 732 | 2018-02-22,14372.0 733 | 2018-02-23,15545.0 734 | 2018-02-26,16605.0 735 | 2018-02-27,17195.0 736 | 2018-02-28,16831.0 737 | 2018-03-01,14474.0 738 | 2018-03-02,13998.0 739 | 2018-03-05,15446.0 740 | 2018-03-06,13164.0 741 | 2018-03-07,16793.0 742 | 2018-03-08,14721.0 743 | 2018-03-09,12699.0 744 | 2018-03-12,16085.0 745 | 2018-03-13,13569.0 746 | 2018-03-14,17992.0 747 | 2018-03-15,15485.0 748 | 2018-03-16,15041.0 749 | 2018-03-19,14529.0 750 | 2018-03-20,13751.0 751 | 2018-03-21,15747.0 752 | 2018-03-22,18491.0 753 | 2018-03-23,17875.0 754 | 2018-03-26,14910.0 755 | 2018-03-27,17507.0 756 | 2018-03-28,16663.0 757 | 2018-03-29,18341.0 758 | 2018-04-02,12375.0 759 | 2018-04-03,15121.0 760 | 2018-04-04,13036.0 761 | 2018-04-05,15340.0 762 | 2018-04-06,15234.0 763 | 2018-04-09,14386.0 764 | 2018-04-10,14289.0 765 | 2018-04-11,16782.0 766 | 2018-04-12,17325.0 767 | 2018-04-13,16548.0 768 | 2018-04-16,18073.0 769 | 2018-04-17,16431.0 770 | 2018-04-18,16468.0 771 | 2018-04-19,18137.0 772 | 2018-04-20,13796.0 773 | 2018-04-23,12432.0 774 | 2018-04-24,16881.0 775 | 2018-04-25,18414.0 776 | 2018-04-26,15931.0 777 | 2018-04-27,14471.0 778 | 2018-04-30,14865.0 779 | 2018-05-01,12999.0 780 | 2018-05-02,16903.0 781 | 2018-05-03,12569.0 782 | 2018-05-04,16067.0 783 | 2018-05-07,16152.0 784 | 2018-05-08,11627.0 785 | 2018-05-09,15279.0 786 | 2018-05-10,14828.0 787 | 2018-05-11,16304.0 788 | 2018-05-14,15012.0 789 | 2018-05-15,16069.0 790 | 2018-05-16,14576.0 791 | 2018-05-17,15594.0 792 | 2018-05-18,16375.0 793 | 2018-05-21,14209.0 794 | 2018-05-22,15637.0 795 | 2018-05-23,17881.0 796 | 2018-05-24,13460.0 797 | 2018-05-25,13179.0 798 | 2018-05-29,14338.0 799 | 2018-05-30,12745.0 800 | 2018-05-31,17385.0 801 | 2018-06-01,15859.0 802 | 2018-06-04,13300.0 803 | 2018-06-05,15684.0 804 | 2018-06-06,15425.0 805 | 2018-06-07,14483.0 806 | 2018-06-08,15787.0 807 | 2018-06-11,14562.0 808 | 2018-06-12,14104.0 809 | 2018-06-13,15821.0 810 | 2018-06-14,12716.0 811 | 2018-06-15,16353.0 812 | 2018-06-18,15679.0 813 | 2018-06-19,13990.0 814 | 2018-06-20,14332.0 815 | 2018-06-21,16764.0 816 | 2018-06-22,14741.0 817 | 2018-06-25,15596.0 818 | 2018-06-26,15663.0 819 | 2018-06-27,15113.0 820 | 2018-06-28,16211.0 821 | 2018-06-29,13559.0 822 | 2018-07-02,16316.0 823 | 2018-07-03,15683.0 824 | 2018-07-05,16260.0 825 | 2018-07-06,16225.0 826 | 2018-07-09,13334.0 827 | 2018-07-10,13341.0 828 | 2018-07-11,14705.0 829 | 2018-07-12,15709.0 830 | 2018-07-13,14795.0 831 | 2018-07-16,16337.0 832 | 2018-07-17,14885.0 833 | 2018-07-18,18256.0 834 | 2018-07-19,17893.0 835 | 2018-07-20,16872.0 836 | 2018-07-23,14253.0 837 | 2018-07-24,14227.0 838 | 2018-07-25,16888.0 839 | 2018-07-26,16656.0 840 | 2018-07-27,16974.0 841 | 2018-07-30,19337.0 842 | 2018-07-31,13452.0 843 | 2018-08-01,15350.0 844 | 2018-08-02,16629.0 845 | 2018-08-03,15249.0 846 | 2018-08-06,12811.0 847 | 2018-08-07,15768.0 848 | 2018-08-08,14583.0 849 | 2018-08-09,15534.0 850 | 2018-08-10,14680.0 851 | 2018-08-13,15163.0 852 | 2018-08-14,16373.0 853 | 2018-08-15,16091.0 854 | 2018-08-16,17603.0 855 | 2018-08-17,16467.0 856 | 2018-08-20,13982.0 857 | 2018-08-21,12553.0 858 | 2018-08-22,13796.0 859 | 2018-08-23,14759.0 860 | 2018-08-24,16791.0 861 | 2018-08-27,15747.0 862 | 2018-08-28,16596.0 863 | 2018-08-29,17264.0 864 | 2018-08-30,13083.0 865 | 2018-08-31,17253.0 866 | 2018-09-04,16530.0 867 | 2018-09-05,16146.0 868 | 2018-09-06,16937.0 869 | 2018-09-07,16391.0 870 | 2018-09-10,16532.0 871 | 2018-09-11,12983.0 872 | 2018-09-12,15174.0 873 | 2018-09-13,14345.0 874 | 2018-09-14,17648.0 875 | 2018-09-17,13917.0 876 | 2018-09-18,15958.0 877 | 2018-09-19,15165.0 878 | 2018-09-20,14942.0 879 | 2018-09-21,17308.0 880 | 2018-09-24,13241.0 881 | 2018-09-25,15344.0 882 | 2018-09-26,16047.0 883 | 2018-09-27,15761.0 884 | 2018-09-28,16268.0 885 | 2018-10-01,16573.0 886 | 2018-10-02,16835.0 887 | 2018-10-03,17679.0 888 | 2018-10-04,14481.0 889 | 2018-10-05,17013.0 890 | 2018-10-08,15994.0 891 | 2018-10-09,16756.0 892 | 2018-10-10,17403.0 893 | 2018-10-11,17336.0 894 | 2018-10-12,13129.0 895 | 2018-10-15,13623.0 896 | 2018-10-16,17312.0 897 | 2018-10-17,15322.0 898 | 2018-10-18,14285.0 899 | 2018-10-19,14896.0 900 | 2018-10-22,16709.0 901 | 2018-10-23,15658.0 902 | 2018-10-24,16332.0 903 | 2018-10-25,16662.0 904 | 2018-10-26,18921.0 905 | 2018-10-29,14958.0 906 | 2018-10-30,13541.0 907 | 2018-10-31,14305.0 908 | 2018-11-01,14241.0 909 | 2018-11-02,15586.0 910 | 2018-11-05,13706.0 911 | 2018-11-06,15996.0 912 | 2018-11-07,16676.0 913 | 2018-11-08,15077.0 914 | 2018-11-09,16878.0 915 | 2018-11-12,16603.0 916 | 2018-11-13,15386.0 917 | 2018-11-14,17210.0 918 | 2018-11-15,15168.0 919 | 2018-11-16,15594.0 920 | 2018-11-19,20269.0 921 | 2018-11-20,20194.0 922 | 2018-11-21,16594.0 923 | 2018-11-23,14663.0 924 | 2018-11-26,16308.0 925 | 2018-11-27,17350.0 926 | 2018-11-28,13819.0 927 | 2018-11-29,18834.0 928 | 2018-11-30,15459.0 929 | 2018-12-03,17413.0 930 | 2018-12-04,15139.0 931 | 2018-12-06,14588.0 932 | 2018-12-07,14397.0 933 | 2018-12-10,14542.0 934 | 2018-12-11,16233.0 935 | 2018-12-12,14931.0 936 | 2018-12-13,15821.0 937 | 2018-12-14,16495.0 938 | 2018-12-17,16390.0 939 | 2018-12-18,20413.0 940 | 2018-12-19,19487.0 941 | 2018-12-20,16694.0 942 | 2018-12-21,14686.0 943 | 2018-12-24,15727.0 944 | 2018-12-26,18060.0 945 | 2018-12-27,18655.0 946 | 2018-12-28,16389.0 947 | 2018-12-31,17031.0 948 | 2019-01-02,13976.0 949 | 2019-01-03,17017.0 950 | 2019-01-04,17286.0 951 | 2019-01-07,16817.0 952 | 2019-01-08,14732.0 953 | 2019-01-09,15285.0 954 | 2019-01-10,18300.0 955 | 2019-01-11,17303.0 956 | 2019-01-14,17571.0 957 | 2019-01-15,16826.0 958 | 2019-01-16,17681.0 959 | 2019-01-17,17547.0 960 | 2019-01-18,17095.0 961 | 2019-01-22,17377.0 962 | 2019-01-23,15812.0 963 | 2019-01-24,15785.0 964 | 2019-01-25,15595.0 965 | 2019-01-28,17413.0 966 | 2019-01-29,13841.0 967 | 2019-01-30,16567.0 968 | 2019-01-31,18302.0 969 | 2019-02-01,17436.0 970 | 2019-02-04,14631.0 971 | 2019-02-05,17581.0 972 | 2019-02-06,18643.0 973 | 2019-02-07,14744.0 974 | 2019-02-08,16296.0 975 | 2019-02-11,15528.0 976 | 2019-02-12,14049.0 977 | 2019-02-13,17126.0 978 | 2019-02-14,19012.0 979 | 2019-02-15,17915.0 980 | 2019-02-19,18106.0 981 | 2019-02-20,20800.0 982 | 2019-02-21,16314.0 983 | 2019-02-22,17912.0 984 | 2019-02-25,17452.0 985 | 2019-02-26,19890.0 986 | 2019-02-27,20312.0 987 | 2019-02-28,19787.0 988 | 2019-03-01,17527.0 989 | 2019-03-04,20882.0 990 | 2019-03-05,20029.0 991 | 2019-03-06,15156.0 992 | 2019-03-07,13578.0 993 | 2019-03-08,14128.0 994 | 2019-03-11,18272.0 995 | 2019-03-12,18826.0 996 | 2019-03-13,15663.0 997 | 2019-03-14,18524.0 998 | 2019-03-15,19051.0 999 | 2019-03-18,14402.0 1000 | 2019-03-19,17954.0 1001 | 2019-03-20,16280.0 1002 | 2019-03-21,16243.0 1003 | 2019-03-22,18688.0 1004 | 2019-03-25,17222.0 1005 | 2019-03-26,18562.0 1006 | 2019-03-27,14478.0 1007 | 2019-03-28,17325.0 1008 | 2019-03-29,16188.0 1009 | 2019-04-01,15432.0 1010 | 2019-04-02,18448.0 1011 | 2019-04-03,19900.0 1012 | 2019-04-04,17792.0 1013 | 2019-04-05,18196.0 1014 | 2019-04-08,19564.0 1015 | 2019-04-09,14419.0 1016 | 2019-04-10,20259.0 1017 | 2019-04-11,17077.0 1018 | 2019-04-12,17315.0 1019 | 2019-04-15,17393.0 1020 | 2019-04-16,19280.0 1021 | 2019-04-17,19128.0 1022 | 2019-04-18,17824.0 1023 | 2019-04-22,20474.0 1024 | 2019-04-23,16924.0 1025 | 2019-04-24,17541.0 1026 | 2019-04-25,18646.0 1027 | 2019-04-26,17893.0 1028 | 2019-04-29,17568.0 1029 | 2019-04-30,17072.0 1030 | 2019-05-01,19664.0 1031 | 2019-05-02,18367.0 1032 | 2019-05-03,18545.0 1033 | 2019-05-06,14510.0 1034 | 2019-05-07,19047.0 1035 | 2019-05-08,18096.0 1036 | 2019-05-09,19420.0 1037 | 2019-05-10,19795.0 1038 | 2019-05-13,21213.0 1039 | 2019-05-14,17385.0 1040 | 2019-05-15,14789.0 1041 | 2019-05-16,16001.0 1042 | 2019-05-17,16761.0 1043 | 2019-05-20,18635.0 1044 | 2019-05-21,15281.0 1045 | 2019-05-22,20066.0 1046 | 2019-05-23,19112.0 1047 | 2019-05-24,15950.0 1048 | 2019-05-28,18557.0 1049 | 2019-05-29,19066.0 1050 | 2019-05-30,18650.0 1051 | 2019-05-31,17130.0 1052 | 2019-06-03,16609.0 1053 | 2019-06-04,16667.0 1054 | 2019-06-05,17212.0 1055 | 2019-06-06,15483.0 1056 | 2019-06-07,20362.0 1057 | 2019-06-10,15799.0 1058 | 2019-06-11,18053.0 1059 | 2019-06-12,15444.0 1060 | 2019-06-13,16993.0 1061 | 2019-06-14,17468.0 1062 | 2019-06-17,18240.0 1063 | 2019-06-18,19427.0 1064 | 2019-06-19,16622.0 1065 | 2019-06-20,13806.0 1066 | 2019-06-21,16922.0 1067 | 2019-06-24,13448.0 1068 | 2019-06-25,14924.0 1069 | 2019-06-26,16398.0 1070 | 2019-06-27,16054.0 1071 | 2019-06-28,17165.0 1072 | 2019-07-01,17034.0 1073 | 2019-07-02,16055.0 1074 | 2019-07-03,15327.0 1075 | 2019-07-05,14879.0 1076 | 2019-07-08,14642.0 1077 | 2019-07-09,18163.0 1078 | 2019-07-10,19239.0 1079 | 2019-07-11,19080.0 1080 | 2019-07-12,14961.0 1081 | 2019-07-15,17994.0 1082 | 2019-07-16,17274.0 1083 | 2019-07-17,16727.0 1084 | 2019-07-18,16752.0 1085 | 2019-07-19,16894.0 1086 | 2019-07-22,17227.0 1087 | 2019-07-23,13987.0 1088 | 2019-07-24,15561.0 1089 | 2019-07-25,18167.0 1090 | 2019-07-26,16221.0 1091 | 2019-07-29,18683.0 1092 | 2019-07-30,16294.0 1093 | 2019-07-31,14915.0 1094 | 2019-08-01,16009.0 1095 | 2019-08-02,18484.0 1096 | 2019-08-05,17198.0 1097 | 2019-08-06,20213.0 1098 | 2019-08-07,15215.0 1099 | 2019-08-08,17098.0 1100 | 2019-08-09,17230.0 1101 | 2019-08-12,18102.0 1102 | 2019-08-13,18808.0 1103 | 2019-08-14,16780.0 1104 | 2019-08-15,17783.0 1105 | 2019-08-16,13671.0 1106 | 2019-08-19,19047.0 1107 | 2019-08-20,13353.0 1108 | 2019-08-21,15331.0 1109 | 2019-08-22,19669.0 1110 | 2019-08-23,18546.0 1111 | 2019-08-26,16937.0 1112 | 2019-08-27,18241.0 1113 | 2019-08-28,18586.0 1114 | 2019-08-29,17104.0 1115 | 2019-08-30,18199.0 1116 | 2019-09-03,18123.0 1117 | 2019-09-04,16453.0 1118 | 2019-09-05,16983.0 1119 | 2019-09-06,15865.0 1120 | 2019-09-09,16028.0 1121 | 2019-09-10,15518.0 1122 | 2019-09-11,18438.0 1123 | 2019-09-12,18016.0 1124 | 2019-09-13,17516.0 1125 | 2019-09-16,16501.0 1126 | 2019-09-17,16381.0 1127 | 2019-09-18,17174.0 1128 | 2019-09-19,20349.0 1129 | 2019-09-20,15495.0 1130 | 2019-09-23,16590.0 1131 | 2019-09-24,15548.0 1132 | 2019-09-25,17079.0 1133 | 2019-09-26,16676.0 1134 | 2019-09-27,15320.0 1135 | 2019-09-30,18933.0 1136 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Freeware License, some rights reserved 2 | 3 | Copyright (c) 2020 Christopher Conlan 4 | 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 6 | of this software and associated documentation files (the "Software"), 7 | to work with the Software within the limits of freeware distribution and fair use. 8 | This includes the rights to use, copy, and modify the Software for personal use. 9 | Users are also allowed and encouraged to submit corrections and modifications 10 | to the Software for the benefit of other users. 11 | 12 | It is not allowed to reuse, modify, or redistribute the Software for 13 | commercial use in any way, or for a user’s educational materials such as books 14 | or blog articles without prior permission from the copyright holder. 15 | 16 | The above copyright notice and this permission notice need to be included 17 | in all copies or substantial portions of the software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | -------------------------------------------------------------------------------- /listings/chapter_1/1_1_type_hinting_examples.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Tuple, Any 2 | import numpy as np 3 | import datetime 4 | 5 | # A list of floating point numbers 6 | v: List[float] = [i * 1.23 for i in range(10)] 7 | 8 | # A list of mixed type values 9 | v: List[Any] = ['apple', 123, 'banana', None] 10 | 11 | # A dictionary of floats indexed by dates 12 | v: Dict[datetime.date, float] = { 13 | datetime.date.today(): 123.456, 14 | datetime.date(2000, 1, 1): 234.567, 15 | } 16 | 17 | # A dictionary of lists of strings indexed by tuples of integers 18 | v: Dict[Tuple[int, int], List[str]] = { 19 | (2, 3): [ 20 | 'apple', 21 | 'banana', 22 | ], 23 | (4, 7): [ 24 | 'orange', 25 | 'pineapple', 26 | ] 27 | } 28 | 29 | # An incorrect type hint 30 | # Your compiler or IDE might complain about this 31 | v: List[str] = [1, 2, 3] 32 | 33 | # A possibly incorrect type hint 34 | # There is no concensus on whether or not this is correct 35 | v: List[float] = [1, None, 3, None, 5] 36 | 37 | # This is non-descript but correct 38 | v: List = [(1,2,'a'), (4,5,'b')] 39 | 40 | # This is more descriptive 41 | v: List[Tuple[int, int, str]] = [(1,2,'a'), (4,5,'b')] 42 | 43 | # Custom types are supported 44 | from typing import NewType 45 | StockTicker = NewType('StockTicker', np.float64) 46 | ticker: StockTicker = 'AAPL' 47 | 48 | # Functions can define input and return types 49 | def convert_to_string(value: Any) -> str: 50 | return str(value) 51 | -------------------------------------------------------------------------------- /listings/chapter_1/1_2_pandas_data_types.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import datetime 3 | 4 | data = { 5 | 'SPY': { 6 | datetime.date(2000, 1, 4): 100, 7 | datetime.date(2000, 1, 5): 101, 8 | }, 9 | 'AAPL': { 10 | datetime.date(2000, 1, 4): 300, 11 | datetime.date(2000, 1, 5): 303, 12 | }, 13 | } 14 | df: pd.DataFrame = pd.DataFrame(data=data) 15 | print(df) 16 | # Returns ... 17 | # SPY AAPL 18 | # 2000-01-04 100 300 19 | # 2000-01-05 101 303 20 | 21 | # Index by column 22 | aapl_series: pd.Series = df['AAPL'] 23 | print(aapl_series) 24 | # Returns ... 25 | # 2000-01-04 300 26 | # 2000-01-05 303 27 | # Name: AAPL, dtype: int64 28 | 29 | # Index by row 30 | start_of_year_row: pd.Series = df.loc[datetime.date(2000, 1, 4)] 31 | print(start_of_year_row) 32 | # Returns ... 33 | # SPY 100 34 | # AAPL 300 35 | # Name: 2000-01-04, dtype: int64 36 | 37 | # Index by both 38 | start_of_year_price: pd.Series = df['AAPL'][datetime.date(2000, 1, 4)] 39 | print(start_of_year_price) 40 | # Returns ... 41 | # 300 -------------------------------------------------------------------------------- /listings/chapter_1/1_3_pandas_data_types_part_2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import datetime 3 | 4 | data = { 5 | 'SPY': { 6 | datetime.date(2000, 1, 4): 100, 7 | datetime.date(2000, 1, 5): 101, 8 | }, 9 | 'AAPL': { 10 | datetime.date(2000, 1, 4): 300, 11 | datetime.date(2000, 1, 5): 303, 12 | }, 13 | } 14 | 15 | ### Begin listing 16 | 17 | # Create a series 18 | series = pd.Series(data=data['SPY']) 19 | print(series) 20 | # Returns ... 21 | # 2000-01-04 100 22 | # 2000-01-05 101 23 | # dtype: int64 24 | -------------------------------------------------------------------------------- /listings/chapter_1/1_4_pandas_indexes.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import datetime 3 | 4 | ### Begin listing 5 | 6 | dates = [datetime.date(2000, 1, i) for i in range(1, 11)] 7 | values = [i**2 for i in range(1, 11)] 8 | series = pd.Series(data=values, index=dates) 9 | 10 | # O(n) time complexity search through a list 11 | print(datetime.date(2000, 1, 5) in dates) 12 | # Returns ... 13 | # True 14 | 15 | # O(1) time complexity search through an index 16 | print(datetime.date(2000, 1, 5) in series.index) 17 | # Returns ... 18 | # True 19 | -------------------------------------------------------------------------------- /listings/chapter_2/2_10_maximum_drawdown.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Callable 2 | 3 | DRAWDOWN_EVALUATORS: Dict[str, Callable] = { 4 | 'dollar': lambda price, peak: peak - price, 5 | 'percent': lambda price, peak: -((price / peak) - 1), 6 | 'log': lambda price, peak: np.log(peak) - np.log(price), 7 | } 8 | 9 | def calculate_drawdown_series(series: pd.Series, method: str='log') -> pd.Series: 10 | """ 11 | Returns the drawdown series 12 | """ 13 | assert method in DRAWDOWN_EVALUATORS, \ 14 | f'Method "{method}" must by one of {list(DRAWDOWN_EVALUATORS.keys())}' 15 | 16 | evaluator = DRAWDOWN_EVALUATORS[method] 17 | return evaluator(series, series.cummax()) 18 | 19 | def calculate_max_drawdown(series: pd.Series, method: str='log') -> float: 20 | """ 21 | Simply returns the max drawdown as a float 22 | """ 23 | return calculate_drawdown_series(series, method).max() -------------------------------------------------------------------------------- /listings/chapter_2/2_11_maximum_drawdown_with_metadata.py: -------------------------------------------------------------------------------- 1 | def calculate_max_drawdown_with_metadata(series: pd.Series, 2 | method: str='log') -> Dict[str, Any]: 3 | """ 4 | Calculates max_drawdown and stores metadata about when and where. Returns 5 | a dictionary of the form 6 | { 7 | 'max_drawdown': float, 8 | 'peak_date': pd.Timestamp, 9 | 'peak_price': float, 10 | 'trough_date': pd.Timestamp, 11 | 'trough_price': float, 12 | } 13 | """ 14 | 15 | assert method in DRAWDOWN_EVALUATORS, \ 16 | f'Method "{method}" must by one of {list(DRAWDOWN_EVALUATORS.keys())}' 17 | 18 | evaluator = DRAWDOWN_EVALUATORS[method] 19 | 20 | max_drawdown = 0 21 | local_peak_date = peak_date = trough_date = series.index[0] 22 | local_peak_price = peak_price = trough_price = series.iloc[0] 23 | 24 | for date, price in series.iteritems(): 25 | 26 | # Keep track of the rolling max 27 | if price > local_peak_price: 28 | local_peak_date = date 29 | local_peak_price = price 30 | 31 | # Compute the drawdown 32 | drawdown = evaluator(price, local_peak_price) 33 | 34 | # Store new max drawdown values 35 | if drawdown > max_drawdown: 36 | max_drawdown = drawdown 37 | 38 | peak_date = local_peak_date 39 | peak_price = local_peak_price 40 | 41 | trough_date = date 42 | trough_price = price 43 | 44 | return { 45 | 'max_drawdown': max_drawdown, 46 | 'peak_date': peak_date, 47 | 'peak_price': peak_price, 48 | 'trough_date': trough_date, 49 | 'trough_price': trough_price 50 | } -------------------------------------------------------------------------------- /listings/chapter_2/2_12_log_max_drawdown_ratio.py: -------------------------------------------------------------------------------- 1 | def calculate_log_max_drawdown_ratio(series: pd.Series) -> float: 2 | log_drawdown = calculate_max_drawdown(series, method='log') 3 | log_return = np.log(series.iloc[-1]) - np.log(series.iloc[0]) 4 | return log_return - log_drawdown -------------------------------------------------------------------------------- /listings/chapter_2/2_13_calmar_ratio.py: -------------------------------------------------------------------------------- 1 | def calculate_calmar_ratio(series: pd.Series, years_past: int=3) -> float: 2 | """ 3 | Return the percent max drawdown ratio over the past three years using 4 | CAGR as the numerator, otherwise known as the Calmar Ratio 5 | """ 6 | 7 | # Filter series on past three years 8 | last_date = series.index[-1] 9 | three_years_ago = last_date - pd.Timedelta(days=years_past*365.25) 10 | series = series[series.index > three_years_ago] 11 | 12 | # Compute annualized percent max drawdown ratio 13 | percent_drawdown = calculate_max_drawdown(series, method='percent') 14 | cagr = calculate_cagr(series) 15 | return cagr / percent_drawdown -------------------------------------------------------------------------------- /listings/chapter_2/2_14_pure_profit_score.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LinearRegression 2 | 3 | def calculate_pure_profit_score(price_series: pd.Series) -> float: 4 | """ 5 | Calculates the pure profit score 6 | """ 7 | cagr = calculate_cagr(price_series) 8 | 9 | # Build a single column for a predictor, t 10 | t: np.ndarray = np.arange(0, price_series.shape[0]).reshape(-1, 1) 11 | 12 | # Fit the regression 13 | regression = LinearRegression().fit(t, price_series) 14 | 15 | # Get the r-squared value 16 | r_squared = regression.score(t, price_series) 17 | 18 | return cagr * r_squared 19 | -------------------------------------------------------------------------------- /listings/chapter_2/2_15_jensens_alpha.py: -------------------------------------------------------------------------------- 1 | def calculate_jensens_alpha(return_series: pd.Series, 2 | benchmark_return_series: pd.Series) -> float: 3 | """ 4 | Calculates jensens alpha. Prefers input series have the same index. Handles 5 | NAs. 6 | """ 7 | 8 | # Join series along date index and purge NAs 9 | df = pd.concat([return_series, benchmark_return_series], sort=True, axis=1) 10 | df = df.dropna() 11 | 12 | # Get the appropriate data structure for scikit learn 13 | clean_returns: pd.Series = df[return_series.name] 14 | clean_benchmarks = pd.DataFrame(df[benchmark_return_series.name]) 15 | 16 | # Fit a linear regression and return the alpha 17 | regression = LinearRegression().fit(clean_benchmarks, y=clean_returns) 18 | return regression.intercept_ -------------------------------------------------------------------------------- /listings/chapter_2/2_1_return_series_pure_python.py: -------------------------------------------------------------------------------- 1 | def calculate_return_series(prices: List[float]) -> List[float]: 2 | """ 3 | Calculates return series as a parallel list of returns on prices 4 | """ 5 | return_series = [None] 6 | for i in range(1, len(prices)): 7 | return_series.append((prices[i] / prices[i-1]) - 1) 8 | 9 | return return_series -------------------------------------------------------------------------------- /listings/chapter_2/2_2_return_series_pandas.py: -------------------------------------------------------------------------------- 1 | def calculate_return_series(series: pd.Series) -> pd.Series: 2 | """ 3 | Calculates the return series of a time series. 4 | The first value will always be NaN. 5 | Output series retains the index of the input series. 6 | """ 7 | shifted_series = series.shift(1, axis=0) 8 | return series / shifted_series - 1 -------------------------------------------------------------------------------- /listings/chapter_2/2_3_log_return_series.py: -------------------------------------------------------------------------------- 1 | def calculate_log_return_series(series: pd.Series) -> pd.Series: 2 | """ 3 | Same as calculate_return_series but with log returns 4 | """ 5 | shifted_series = series.shift(1, axis=0) 6 | return pd.Series(np.log(series / shifted_series)) -------------------------------------------------------------------------------- /listings/chapter_2/2_4_annualized_volatility.py: -------------------------------------------------------------------------------- 1 | def get_years_past(series: pd.Series) -> float: 2 | """ 3 | Calculate the years past according to the index of the series for use with 4 | functions that require annualization 5 | """ 6 | start_date = series.index[0] 7 | end_date = series.index[-1] 8 | return (end_date - start_date).days / 365.25 9 | 10 | def calculate_annualized_volatility(return_series: pd.Series) -> float: 11 | """ 12 | Calculates annualized volatility for a date-indexed return series. 13 | Works for any interval of date-indexed prices and returns. 14 | """ 15 | years_past = get_years_past(return_series) 16 | entries_per_year = return_series.shape[0] / years_past 17 | return return_series.std() * np.sqrt(entries_per_year) -------------------------------------------------------------------------------- /listings/chapter_2/2_5_annualized_volatility_on_awu.py: -------------------------------------------------------------------------------- 1 | from pypm import data_io, metrics 2 | 3 | df = data_io.load_eod_data('AWU') 4 | return_series = metrics.calculate_log_return_series(df['close']) 5 | print(metrics.calculate_annualized_volatility(return_series)) 6 | -------------------------------------------------------------------------------- /listings/chapter_2/2_6_calculating_cagr.py: -------------------------------------------------------------------------------- 1 | def calculate_cagr(series: pd.Series) -> float: 2 | """ 3 | Calculate compounded annual growth rate 4 | """ 5 | value_factor = series.iloc[-1] / series.iloc[0] 6 | year_past = get_years_past(series) 7 | return (value_factor ** (1 / year_past)) - 1 -------------------------------------------------------------------------------- /listings/chapter_2/2_7_calculating_cagr_on_awu.py: -------------------------------------------------------------------------------- 1 | from pypm import data_io, metrics 2 | 3 | df = data_io.load_eod_data('AWU') 4 | print(metrics.calculate_cagr(df['close'])) -------------------------------------------------------------------------------- /listings/chapter_2/2_8_calculating_sharpe_ratio.py: -------------------------------------------------------------------------------- 1 | def calculate_sharpe_ratio(price_series: pd.Series, 2 | benchmark_rate: float=0) -> float: 3 | """ 4 | Calculates the sharpe ratio given a price series. Defaults to benchmark_rate 5 | of zero. 6 | """ 7 | cagr = calculate_cagr(price_series) 8 | return_series = calculate_return_series(price_series) 9 | volatility = calculate_annualized_volatility(return_series) 10 | return (cagr - benchmark_rate) / volatility -------------------------------------------------------------------------------- /listings/chapter_2/2_9_calculating_downside_volatility.py: -------------------------------------------------------------------------------- 1 | def calculate_annualized_downside_deviation(return_series: pd.Series, 2 | benchmark_rate: float=0) -> float: 3 | """ 4 | Calculates the downside deviation for use in the sortino ratio. 5 | 6 | Benchmark rate is assumed to be annualized. It will be adjusted according 7 | to the number of periods per year seen in the data. 8 | """ 9 | 10 | # For both de-annualizing the benchmark rate and annualizing result 11 | years_past = get_years_past(return_series) 12 | entries_per_year = return_series.shape[0] / years_past 13 | 14 | adjusted_benchmark_rate = ((1+benchmark_rate) ** (1/entries_per_year)) - 1 15 | 16 | downside_series = adjusted_benchmark_rate - return_series 17 | downside_sum_of_squares = (downside_series[downside_series > 0] ** 2).sum() 18 | denominator = return_series.shape[0] - 1 19 | downside_deviation = np.sqrt(downside_sum_of_squares / denominator) 20 | 21 | return downside_deviation * np.sqrt(entries_per_year) 22 | 23 | def calculate_sortino_ratio(price_series: pd.Series, 24 | benchmark_rate: float=0) -> float: 25 | """ 26 | Calculates the sortino ratio. 27 | """ 28 | cagr = calculate_cagr(price_series) 29 | return_series = calculate_return_series(price_series) 30 | downside_deviation = calculate_annualized_downside_deviation(return_series) 31 | return (cagr - benchmark_rate) / downside_deviation -------------------------------------------------------------------------------- /listings/chapter_3/3_1_calculate_simple_moving_average.py: -------------------------------------------------------------------------------- 1 | def calculate_simple_moving_average(series: pd.Series, n: int=20) -> pd.Series: 2 | """Calculates the simple moving average""" 3 | return series.rolling(n).mean() -------------------------------------------------------------------------------- /listings/chapter_3/3_2_slow_simple_moving_average.py: -------------------------------------------------------------------------------- 1 | def slow_moving_average(values: List[float], m: int=20): 2 | """ 3 | This is O(nm) time, because it re-computes the sum at every step 4 | 1 + 2 + 3 + 4 + ... / m 5 | 2 + 3 + 4 + 5 + ... / m 6 | 3 + 4 + 5 + 6 + ... / m 7 | 4 + 5 + 6 + 7 + ... / m 8 | and so on ... 9 | Leading to approx (m-1) * n individual additions. 10 | """ 11 | 12 | # Initial values 13 | moving_average = [None] * (m-1) 14 | 15 | for i in range(m-1, len(values)): 16 | the_average = np.mean(values[(i-m+1):i+1]) 17 | moving_average.append(the_average) 18 | 19 | return moving_average -------------------------------------------------------------------------------- /listings/chapter_3/3_3_fast_simple_moving_average.py: -------------------------------------------------------------------------------- 1 | def fast_moving_average(values: List[float], m: int=20): 2 | """ 3 | This is O(n) time, because it keeps track of the intermediate sum. 4 | Leading to approx 2n individual additions. 5 | """ 6 | 7 | # Initial values 8 | moving_average = [None] * (m-1) 9 | accumulator = sum(values[:m]) 10 | moving_average.append(accumulator / m) 11 | 12 | for i in range(m, len(values)): 13 | accumulator -= values[i-m] 14 | accumulator += values[i] 15 | moving_average.append(accumulator / m) 16 | 17 | return moving_average -------------------------------------------------------------------------------- /listings/chapter_3/3_4_calculating_macd.py: -------------------------------------------------------------------------------- 1 | def calculate_macd_oscillator(series: pd.Series, 2 | n1: int=5, n2: int=34) -> pd.Series: 3 | """ 4 | Calculate the moving average convergence divergence oscillator, given a 5 | short moving average of length n1 and a long moving average of length n2 6 | """ 7 | assert n1 < n2, f'n1 must be less than n2' 8 | return calculate_simple_moving_average(series, n1) - \ 9 | calculate_simple_moving_average(series, n2) -------------------------------------------------------------------------------- /listings/chapter_3/3_5_calculate_bollinger_bands.py: -------------------------------------------------------------------------------- 1 | def calculate_bollinger_bands(series: pd.Series, n: int=20) -> pd.DataFrame: 2 | """ 3 | Calculates the bollinger bands and returns them as a dataframe 4 | """ 5 | 6 | sma = calculate_simple_moving_average(series, n) 7 | stdev = calculate_simple_moving_sample_stdev(series, n) 8 | 9 | return pd.DataFrame({ 10 | 'middle': sma, 11 | 'upper': sma + 2 * stdev, 12 | 'lower': sma - 2 * stdev 13 | }) -------------------------------------------------------------------------------- /listings/chapter_3/3_6_calculate_chaikin_money_flow.py: -------------------------------------------------------------------------------- 1 | def calculate_money_flow_volume_series(df: pd.DataFrame) -> pd.Series: 2 | """ 3 | Calculates money flow series 4 | """ 5 | mfv = df['volume'] * (2*df['close'] - df['high'] - df['low']) / \ 6 | (df['high'] - df['low']) 7 | return mfv 8 | 9 | def calculate_money_flow_volume(df: pd.DataFrame, n: int=20) -> pd.Series: 10 | """ 11 | Calculates money flow volume, or q_t in our formula 12 | """ 13 | return calculate_money_flow_volume_series(df).rolling(n).sum() 14 | 15 | def calculate_chaikin_money_flow(df: pd.DataFrame, n: int=20) -> pd.Series: 16 | """ 17 | Calculates the Chaikin money flow 18 | """ 19 | return calculate_money_flow_volume(df, n) / df['volume'].rolling(n).sum() -------------------------------------------------------------------------------- /listings/chapter_3/3_7_example_signals.py: -------------------------------------------------------------------------------- 1 | def create_macd_signal(series: pd.Series, n1: int=5, n2: int=34) -> pd.Series: 2 | """ 3 | Create a momentum-based signal based on the MACD crossover principle. 4 | Generate a buy signal when the MACD cross above zero, and a sell signal when 5 | it crosses below zero. 6 | """ 7 | 8 | # Calculate the macd and get the signs of the values. 9 | macd = calculate_macd_oscillator(series, n1, n2) 10 | macd_sign = np.sign(macd) 11 | 12 | # Create a copy shifted by some amount. 13 | macd_shifted_sign = macd_sign.shift(1, axis=0) 14 | 15 | # Multiply by the sign by the boolean. This will have the effect of casting 16 | # the boolean to an integer (either 0 or 1) and then multiply by the sign 17 | # (either -1, 0 or 1). 18 | return macd_sign * (macd_sign != macd_shifted_sign) 19 | 20 | 21 | def create_bollinger_band_signal(series: pd.Series, n: int=20) -> pd.Series: 22 | """ 23 | Create a reversal-based signal based on the upper and lower bands of the 24 | Bollinger bands. Generate a buy signal when the price is below the lower 25 | band, and a sell signal when the price is above the upper band. 26 | """ 27 | bollinger_bands = calculate_bollinger_bands(series, n) 28 | sell = series > bollinger_bands['upper'] 29 | buy = series < bollinger_bands['lower'] 30 | return (1*buy - 1*sell) -------------------------------------------------------------------------------- /listings/chapter_4/4_1_assertions_example.py: -------------------------------------------------------------------------------- 1 | assert 2 + 2 == 4, 'The laws of mathematics are crumbling.' 2 | assert 2 + 2 == 5, 'You will see this message in an AssertionError.' -------------------------------------------------------------------------------- /listings/chapter_4/4_2_position_class.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | from typing import Tuple, List, Dict, Callable, NewType, Any 5 | from collections import OrderedDict, defaultdict 6 | 7 | from pypm import metrics, signals, data_io 8 | 9 | Symbol = NewType('Symbol', str) 10 | Dollars = NewType('Dollars', float) 11 | 12 | DATE_FORMAT_STR = '%a %b %d, %Y' 13 | def _pdate(date: pd.Timestamp): 14 | """Pretty-print a datetime with just the date""" 15 | return date.strftime(DATE_FORMAT_STR) 16 | 17 | class Position(object): 18 | """ 19 | A simple object to hold and manipulate data related to long stock trades. 20 | 21 | Allows a single buy and sell operation on an asset for a constant number of 22 | shares. 23 | 24 | The __init__ method is equivelant to a buy operation. The exit 25 | method is a sell operation. 26 | """ 27 | 28 | def __init__(self, symbol: Symbol, entry_date: pd.Timestamp, 29 | entry_price: Dollars, shares: int): 30 | """ 31 | Equivelent to buying a certain number of shares of the asset 32 | """ 33 | 34 | # Recorded on initialization 35 | self.entry_date = entry_date 36 | self.entry_price = entry_price 37 | self.shares = shares 38 | self.symbol = symbol 39 | 40 | # Recorded on position exit 41 | self.exit_date: pd.Timestamp = None 42 | self.exit_price: Dollars = None 43 | 44 | # For easily getting current portolio value 45 | self.last_date: pd.Timestamp = None 46 | self.last_price: Dollars = None 47 | 48 | # Updated intermediately 49 | self._dict_series: Dict[pd.Timestamp, Dollars] = OrderedDict() 50 | self.record_price_update(entry_date, entry_price) 51 | 52 | # Cache control for pd.Series representation 53 | self._price_series: pd.Series = None 54 | self._needs_update_pd_series: bool = True 55 | 56 | def exit(self, exit_date, exit_price): 57 | """ 58 | Equivelent to selling a stock holding 59 | """ 60 | assert self.entry_date != exit_date, 'Churned a position same-day.' 61 | assert not self.exit_date, 'Position already closed.' 62 | self.record_price_update(exit_date, exit_price) 63 | self.exit_date = exit_date 64 | self.exit_price = exit_price 65 | 66 | def record_price_update(self, date, price): 67 | """ 68 | Stateless function to record intermediate prices of existing positions 69 | """ 70 | self.last_date = date 71 | self.last_price = price 72 | self._dict_series[date] = price 73 | 74 | # Invalidate cache on self.price_series 75 | self._needs_update_pd_series = True 76 | 77 | @property 78 | def price_series(self) -> pd.Series: 79 | """ 80 | Returns cached readonly pd.Series 81 | """ 82 | if self._needs_update_pd_series or self._price_series is None: 83 | self._price_series = pd.Series(self._dict_series) 84 | self._needs_update_pd_series = False 85 | return self._price_series 86 | 87 | @property 88 | def last_value(self) -> Dollars: 89 | return self.last_price * self.shares 90 | 91 | @property 92 | def is_active(self) -> bool: 93 | return self.exit_date is None 94 | 95 | @property 96 | def is_closed(self) -> bool: 97 | return not self.is_active 98 | 99 | @property 100 | def value_series(self) -> pd.Series: 101 | """ 102 | Returns the value of the position over time. Ignores self.exit_date. 103 | Used in calculating the equity curve. 104 | """ 105 | assert self.is_closed, 'Position must be closed to access this property' 106 | return self.shares * self.price_series[:-1] 107 | 108 | @property 109 | def percent_return(self) -> float: 110 | return (self.exit_price / self.entry_price) - 1 111 | 112 | @property 113 | def entry_value(self) -> Dollars: 114 | return self.shares * self.entry_price 115 | 116 | @property 117 | def exit_value(self) -> Dollars: 118 | return self.shares * self.exit_price 119 | 120 | @property 121 | def change_in_value(self) -> Dollars: 122 | return self.exit_value - self.entry_value 123 | 124 | @property 125 | def trade_length(self): 126 | return len(self._dict_series) - 1 127 | 128 | def print_position_summary(self): 129 | _entry_date = _pdate(self.entry_date) 130 | _exit_date = _pdate(self.exit_date) 131 | _days = self.trade_length 132 | 133 | _entry_price = round(self.entry_price, 2) 134 | _exit_price = round(self.exit_price, 2) 135 | 136 | _entry_value = round(self.entry_value, 2) 137 | _exit_value = round(self.exit_value, 2) 138 | 139 | _return = round(100 * self.percent_return, 1) 140 | _diff = round(self.change_in_value, 2) 141 | 142 | print(f'{self.symbol:<5} Trade summary') 143 | print(f'Date: {_entry_date} -> {_exit_date} [{_days} days]') 144 | print(f'Price: ${_entry_price} -> ${_exit_price} [{_return}%]') 145 | print(f'Value: ${_entry_value} -> ${_exit_value} [${_diff}]') 146 | print() 147 | 148 | def __hash__(self): 149 | """ 150 | A unique position will be defined by a unique combination of an 151 | entry_date and symbol, in accordance with our constraints regarding 152 | duplicate, variable, and compound positions 153 | """ 154 | return hash((self.entry_date, self.symbol)) -------------------------------------------------------------------------------- /listings/chapter_4/4_3_position_object_usage.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pypm import data_io, portfolio 3 | 4 | symbol = 'AWU' 5 | df = data_io.load_eod_data(symbol) 6 | shares_to_buy = 50 7 | 8 | for i, row in enumerate(df.itertuples()): 9 | date = row.Index 10 | price = row.close 11 | 12 | if i == 123: 13 | position = portfolio.Position(symbol, date, price, shares_to_buy) 14 | elif 123 < i < 234: 15 | position.record_price_update(date, price) 16 | elif i == 234: 17 | position.exit(date, price) 18 | 19 | position.print_position_summary() 20 | 21 | # Returns ... 22 | # AWU Trade summary 23 | # Date: Wed Jun 30, 2010 -> Tue Dec 07, 2010 [111 days] 24 | # Price: $220.34 -> $305.98 [38.9%] 25 | # Value: $11017.0 -> $15299.0 [$4282.0] -------------------------------------------------------------------------------- /listings/chapter_4/4_4_portfolio_history_class.py: -------------------------------------------------------------------------------- 1 | class PortfolioHistory(object): 2 | """ 3 | Holds Position objects and keeps track of portfolio variables. 4 | Produces summary statistics. 5 | """ 6 | 7 | def __init__(self): 8 | # Keep track of positions, recorded in this list after close 9 | self.position_history: List[Position] = [] 10 | self._logged_positions: Set[Position] = set() 11 | 12 | # Keep track of the last seen date 13 | self.last_date: pd.Timestamp = pd.Timestamp.min 14 | 15 | # Readonly fields 16 | self._cash_history: Dict[pd.Timestamp, Dollars] = dict() 17 | self._simulation_finished = False 18 | self._spy: pd.DataFrame = pd.DataFrame() 19 | self._spy_log_returns: pd.Series = pd.Series() 20 | 21 | def add_to_history(self, position: Position): 22 | _log = self._logged_positions 23 | assert not position in _log, 'Recorded the same position twice.' 24 | assert position.is_closed, 'Position is not closed.' 25 | self._logged_positions.add(position) 26 | self.position_history.append(position) 27 | self.last_date = max(self.last_date, position.last_date) 28 | 29 | def record_cash(self, date, cash): 30 | self._cash_history[date] = cash 31 | self.last_date = max(self.last_date, date) 32 | 33 | @staticmethod 34 | def _as_oseries(d: Dict[pd.Timestamp, Any]) -> pd.Series: 35 | return pd.Series(d).sort_index() 36 | 37 | def _compute_cash_series(self): 38 | self._cash_series = self._as_oseries(self._cash_history) 39 | 40 | @property 41 | def cash_series(self) -> pd.Series: 42 | return self._cash_series 43 | 44 | def _compute_portfolio_value_series(self): 45 | value_by_date = defaultdict(float) 46 | last_date = self.last_date 47 | 48 | # Add up value of assets 49 | for position in self.position_history: 50 | for date, value in position.value_series.items(): 51 | value_by_date[date] += value 52 | 53 | # Make sure all dates in cash_series are present 54 | for date in self.cash_series.index: 55 | value_by_date[date] += 0 56 | 57 | self._portfolio_value_series = self._as_oseries(value_by_date) 58 | 59 | @property 60 | def portfolio_value_series(self): 61 | return self._portfolio_value_series 62 | 63 | def _compute_equity_series(self): 64 | c_series = self.cash_series 65 | p_series = self.portfolio_value_series 66 | assert all(c_series.index == p_series.index), \ 67 | 'portfolio_series has dates not in cash_series' 68 | self._equity_series = c_series + p_series 69 | 70 | @property 71 | def equity_series(self): 72 | return self._equity_series 73 | 74 | def _compute_log_return_series(self): 75 | self._log_return_series = \ 76 | metrics.calculate_log_return_series(self.equity_series) 77 | 78 | @property 79 | def log_return_series(self): 80 | return self._log_return_series 81 | 82 | def _assert_finished(self): 83 | assert self._simulation_finished, \ 84 | 'Simuation must be finished by running self.finish() in order ' + \ 85 | 'to access this method or property.' 86 | 87 | def finish(self): 88 | """ 89 | Notate that the simulation is finished and compute readonly values 90 | """ 91 | self._simulation_finished = True 92 | self._compute_cash_series() 93 | self._compute_portfolio_value_series() 94 | self._compute_equity_series() 95 | self._compute_log_return_series() 96 | self._assert_finished() 97 | 98 | def compute_portfolio_size_series(self) -> pd.Series: 99 | size_by_date = defaultdict(int) 100 | for position in self.position_history: 101 | for date in position.value_series.index: 102 | size_by_date[date] += 1 103 | return self._as_oseries(size_by_date) 104 | 105 | @property 106 | def spy(self): 107 | if self._spy.empty: 108 | self._spy = data_io.load_spy_data() 109 | return self._spy 110 | 111 | @property 112 | def spy_log_returns(self): 113 | if self._spy_log_returns.empty: 114 | close = self.spy['close'] 115 | self._spy_log_returns = metrics.calculate_log_return_series(close) 116 | return self._spy_log_returns 117 | 118 | @property 119 | def percent_return(self): 120 | return metrics.calculate_percent_return(self.equity_series) 121 | 122 | @property 123 | def spy_percent_return(self): 124 | return metrics.calculate_percent_return(self.spy['close']) 125 | 126 | @property 127 | def cagr(self): 128 | return metrics.calculate_cagr(self.equity_series) 129 | 130 | @property 131 | def volatility(self): 132 | return metrics.calculate_annualized_volatility(self.log_return_series) 133 | 134 | @property 135 | def sharpe_ratio(self): 136 | return metrics.calculate_sharpe_ratio(self.equity_series) 137 | 138 | @property 139 | def spy_cagr(self): 140 | return metrics.calculate_cagr(self.spy['close']) 141 | 142 | @property 143 | def excess_cagr(self): 144 | return self.cagr - self.spy_cagr 145 | 146 | @property 147 | def jensens_alpha(self): 148 | return metrics.calculate_jensens_alpha( 149 | self.log_return_series, 150 | self.spy_log_returns, 151 | ) 152 | 153 | @property 154 | def dollar_max_drawdown(self): 155 | return metrics.calculate_max_drawdown(self.equity_series, 'dollar') 156 | 157 | @property 158 | def percent_max_drawdown(self): 159 | return metrics.calculate_max_drawdown(self.equity_series, 'percent') 160 | 161 | @property 162 | def log_max_drawdown_ratio(self): 163 | return metrics.calculate_log_max_drawdown_ratio(self.equity_series) 164 | 165 | @property 166 | def number_of_trades(self): 167 | return len(self.position_history) 168 | 169 | @property 170 | def average_active_trades(self): 171 | return self.compute_portfolio_size_series().mean() 172 | 173 | @property 174 | def final_cash(self): 175 | self._assert_finished() 176 | return self.cash_series[-1] 177 | 178 | @property 179 | def final_equity(self): 180 | self._assert_finished() 181 | return self.equity_series[-1] 182 | 183 | def print_position_summaries(self): 184 | for position in self.position_history: 185 | position.print_position_summary() 186 | 187 | def print_summary(self): 188 | self._assert_finished() 189 | s = f'Equity: ${self.final_equity:.2f}\n' \ 190 | f'Percent Return: {100*self.percent_return:.2f}%\n' \ 191 | f'S&P 500 Return: {100*self.spy_percent_return:.2f}%\n\n' \ 192 | f'Number of trades: {self.number_of_trades}\n' \ 193 | f'Average active trades: {self.average_active_trades:.2f}\n\n' \ 194 | f'CAGR: {100*self.cagr:.2f}%\n' \ 195 | f'S&P 500 CAGR: {100*self.spy_cagr:.2f}%\n' \ 196 | f'Excess CAGR: {100*self.excess_cagr:.2f}%\n\n' \ 197 | f'Annualized Volatility: {100*self.volatility:.2f}%\n' \ 198 | f'Sharpe Ratio: {self.sharpe_ratio:.2f}\n' \ 199 | f'Jensen\'s Alpha: {self.jensens_alpha:.6f}\n\n' \ 200 | f'Dollar Max Drawdown: ${self.dollar_max_drawdown:.2f}\n' \ 201 | f'Percent Max Drawdown: {100*self.percent_max_drawdown:.2f}%\n' \ 202 | f'Log Max Drawdown Ratio: {self.log_max_drawdown_ratio:.2f}\n' 203 | 204 | print(s) 205 | 206 | def plot(self, show=True) -> plt.Figure: 207 | """ 208 | Plots equity, cash and portfolio value curves. 209 | """ 210 | self._assert_finished() 211 | 212 | figure, axes = plt.subplots(nrows=3, ncols=1) 213 | figure.tight_layout(pad=3.0) 214 | axes[0].plot(self.equity_series) 215 | axes[0].set_title('Equity') 216 | axes[0].grid() 217 | 218 | axes[1].plot(self.cash_series) 219 | axes[1].set_title('Cash') 220 | axes[1].grid() 221 | 222 | axes[2].plot(self.portfolio_value_series) 223 | axes[2].set_title('Portfolio Value') 224 | axes[2].grid() 225 | 226 | if show: 227 | plt.show() 228 | 229 | return figure 230 | -------------------------------------------------------------------------------- /listings/chapter_4/4_5_portfolio_history_usage.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pypm import data_io 3 | from pypm.portfolio import Position, PortfolioHistory 4 | 5 | symbol = 'AWU' 6 | df = data_io.load_eod_data(symbol) 7 | 8 | portfolio_history = PortfolioHistory() 9 | initial_cash = cash = 10000 10 | 11 | for i, row in enumerate(df.itertuples()): 12 | date = row.Index 13 | price = row.close 14 | 15 | if i == 123: 16 | # Figure out how many shares to buy 17 | shares_to_buy = initial_cash / price 18 | 19 | # Record the position 20 | position = Position(symbol, date, price, shares_to_buy) 21 | 22 | # Spend all of your cash 23 | cash -= initial_cash 24 | 25 | elif 123 < i < 2345: 26 | position.record_price_update(date, price) 27 | 28 | elif i == 2345: 29 | # Sell the asset 30 | position.exit(date, price) 31 | 32 | # Get your cash back 33 | cash += price * shares_to_buy 34 | 35 | # Record the position 36 | portfolio_history.add_to_history(position) 37 | 38 | # Record cash at every step 39 | portfolio_history.record_cash(date, cash) 40 | 41 | portfolio_history.finish() 42 | 43 | portfolio_history.print_position_summaries() 44 | # Returns ... 45 | # AWU Trade summary 46 | # Date: Wed Jun 30, 2010 -> Tue Apr 30, 2019 [2222 days] 47 | # Price: $220.34 -> $386.26 [75.3%] 48 | # Value: $10000.0 -> $17530.18 [$7530.18] 49 | 50 | portfolio_history.print_summary() 51 | # Returns ... 52 | # Equity: $17530.18 53 | # Percent Return: 75.30% 54 | # S&P 500 Return: 184.00% 55 | # 56 | # Number of trades: 1 57 | # Average active trades: 1.00 58 | # 59 | # CAGR: 5.78% 60 | # S&P 500 CAGR: 11.02% 61 | # Excess CAGR: -5.24% 62 | # 63 | # Annualized Volatility: 29.97% 64 | # Sharpe Ratio: 0.19 65 | # Jensen's Alpha: -0.000198 66 | # 67 | # Dollar Max Drawdown: $9006.08 68 | # Percent Max Drawdown: 60.08% 69 | # Log Max Drawdown Ratio: -0.36 70 | 71 | portfolio_history.plot() -------------------------------------------------------------------------------- /listings/chapter_4/4_6_simple_simulator_class.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List, Dict, Callable, NewType, Any, Iterable 2 | 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | 6 | from pypm import metrics, signals, data_io 7 | from pypm.portfolio import PortfolioHistory, Position, Symbol, Dollars 8 | 9 | from collections import OrderedDict, defaultdict 10 | 11 | class SimpleSimulator(object): 12 | """ 13 | A simple trading simulator to work with the PortfolioHistory class 14 | """ 15 | 16 | def __init__(self, initial_cash: float=10000, max_active_positions: int=5, 17 | percent_slippage: float=0.0005, trade_fee: float=1): 18 | 19 | ### Set simulation parameters 20 | 21 | # Initial cash in porfolio 22 | # self.cash will fluctuate 23 | self.initial_cash = self.cash = initial_cash 24 | 25 | # Maximum number of different assets that can be help simultaneously 26 | self.max_active_positions: int = max_active_positions 27 | 28 | # The percentage difference between closing price and fill price for the 29 | # position, to simulate adverse effects of market orders 30 | self.percent_slippage = percent_slippage 31 | 32 | # The fixed fee in order to open a position in dollar terms 33 | self.trade_fee = trade_fee 34 | 35 | # Keep track of live trades 36 | self.active_positions_by_symbol: Dict[Symbol, Position] = OrderedDict() 37 | 38 | # Keep track of portfolio history like cash, equity, and positions 39 | self.portfolio_history = PortfolioHistory() 40 | 41 | @property 42 | def active_positions_count(self): 43 | return len(self.active_positions_by_symbol) 44 | 45 | @property 46 | def free_position_slots(self): 47 | return self.max_active_positions - self.active_positions_count 48 | 49 | @property 50 | def active_symbols(self) -> List[Symbol]: 51 | return list(self.active_positions_by_symbol.keys()) 52 | 53 | def print_initial_parameters(self): 54 | s = f'Initial Cash: ${self.initial_cash} \n' \ 55 | f'Maximum Number of Assets: {self.max_active_positions}\n' 56 | print(s) 57 | return s 58 | 59 | @staticmethod 60 | def make_tuple_lookup(columns) -> Callable[[str, str], int]: 61 | """ 62 | Map a multi-index dataframe to an itertuples-like object. 63 | 64 | The index of the dateframe is always the zero-th element. 65 | """ 66 | 67 | # col is a hierarchical column index represented by a tuple of strings 68 | tuple_lookup: Dict[Tuple[str, str], int] = { 69 | col: i + 1 for i, col in enumerate(columns) 70 | } 71 | 72 | return lambda symbol, metric: tuple_lookup[(symbol, metric)] 73 | 74 | @staticmethod 75 | def make_all_valid_lookup(_idx: Callable): 76 | """ 77 | Return a function that checks for valid data, given a lookup function 78 | """ 79 | return lambda row, symbol: ( 80 | not pd.isna(row[_idx(symbol, 'pref')]) and \ 81 | not pd.isna(row[_idx(symbol, 'signal')]) and \ 82 | not pd.isna(row[_idx(symbol, 'price')]) 83 | ) 84 | 85 | def buy_to_open(self, symbol, date, price): 86 | """ 87 | Keep track of new position, make sure it isn't an existing position. 88 | Verify you have cash. 89 | """ 90 | 91 | # Figure out how much we are willing to spend 92 | cash_to_spend = self.cash / self.free_position_slots 93 | cash_to_spend -= self.trade_fee 94 | 95 | # Calculate buy_price and number of shares. Fractional shares allowed. 96 | purchase_price = (1 + self.percent_slippage) * price 97 | shares = cash_to_spend / purchase_price 98 | 99 | # Spend the cash 100 | self.cash -= cash_to_spend + self.trade_fee 101 | assert self.cash >= 0, 'Spent cash you do not have.' 102 | self.portfolio_history.record_cash(date, self.cash) 103 | 104 | # Record the position 105 | positions_by_symbol = self.active_positions_by_symbol 106 | assert not symbol in positions_by_symbol, 'Symbol already in portfolio.' 107 | position = Position(symbol, date, purchase_price, shares) 108 | positions_by_symbol[symbol] = position 109 | 110 | def sell_to_close(self, symbol, date, price): 111 | """ 112 | Keep track of exit price, recover cash, close position, and record it in 113 | portfolio history. 114 | 115 | Will raise a KeyError if symbol isn't an active position 116 | """ 117 | 118 | # Exit the position 119 | positions_by_symbol = self.active_positions_by_symbol 120 | position = positions_by_symbol[symbol] 121 | position.exit(date, price) 122 | 123 | # Receive the cash 124 | sale_value = position.last_value * (1 - self.percent_slippage) 125 | self.cash += sale_value 126 | self.portfolio_history.record_cash(date, self.cash) 127 | 128 | # Record in portfolio history 129 | self.portfolio_history.add_to_history(position) 130 | del positions_by_symbol[symbol] 131 | 132 | @staticmethod 133 | def _assert_equal_columns(*args: Iterable[pd.DataFrame]): 134 | column_names = set(args[0].columns.values) 135 | for arg in args[1:]: 136 | assert set(arg.columns.values) == column_names, \ 137 | 'Found unequal column names in input data frames.' 138 | 139 | def simulate(self, price: pd.DataFrame, signal: pd.DataFrame, 140 | preference: pd.DataFrame): 141 | """ 142 | Runs the simulation. 143 | 144 | price, signal, and preference are data frames with the column names 145 | represented by the same set of stock symbols. 146 | """ 147 | 148 | # Create a hierarchical data frame to loop through 149 | self._assert_equal_columns(price, signal, preference) 150 | df = data_io.concatenate_metrics({ 151 | 'price': price, 152 | 'signal': signal, 153 | 'pref': preference, 154 | }) 155 | 156 | # Get list of symbols 157 | all_symbols = list(set(price.columns.values)) 158 | 159 | # Get lookup functions 160 | _idx = self.make_tuple_lookup(df.columns) 161 | _all_valid = self.make_all_valid_lookup(_idx) 162 | 163 | # Store some variables 164 | active_positions_by_symbol = self.active_positions_by_symbol 165 | max_active_positions = self.max_active_positions 166 | 167 | # Iterating over all dates. 168 | # itertuples() is significantly faster than iterrows(), it however comes 169 | # at the cost of being able index easily. In order to get around this 170 | # we use an tuple lookup function: "_idx" 171 | for row in df.itertuples(): 172 | 173 | # date index is always first element of tuple row 174 | date = row[0] 175 | 176 | # Get symbols with valid and tradable data 177 | symbols: List[str] = [s for s in all_symbols if _all_valid(row, s)] 178 | 179 | # Iterate over active positions and sell stocks with a sell signal. 180 | _active = self.active_symbols 181 | to_exit = [s for s in _active if row[_idx(s, 'signal')] == -1] 182 | for s in to_exit: 183 | sell_price = row[_idx(s, 'price')] 184 | self.sell_to_close(s, date, sell_price) 185 | 186 | # Get up to max_active_positions symbols with a buy signal in 187 | # decreasing order of preference 188 | to_buy = [ 189 | s for s in symbols if \ 190 | row[_idx(s, 'signal')] == 1 and \ 191 | not s in active_positions_by_symbol 192 | ] 193 | to_buy.sort(key=lambda s: row[_idx(s, 'pref')], reverse=True) 194 | to_buy = to_buy[:max_active_positions] 195 | 196 | for s in to_buy: 197 | buy_price = row[_idx(s, 'price')] 198 | buy_preference = row[_idx(s, 'pref')] 199 | 200 | # If we have some empty slots, just buy the asset outright 201 | if self.active_positions_count < max_active_positions: 202 | self.buy_to_open(s, date, buy_price) 203 | continue 204 | 205 | # If are holding max_active_positions, evaluate a swap based on 206 | # preference 207 | _active = self.active_symbols 208 | active_prefs = [(s, row[_idx(s, 'pref')]) for s in _active] 209 | 210 | _min = min(active_prefs, key=lambda k: k[1]) 211 | min_active_symbol, min_active_preference = _min 212 | 213 | # If a more preferable symbol exists, then sell an old one 214 | if min_active_preference < buy_preference: 215 | sell_price = row[_idx(min_active_symbol, 'price')] 216 | self.sell_to_close(min_active_symbol, date, sell_price) 217 | self.buy_to_open(s, date, buy_price) 218 | 219 | # Update price data everywhere 220 | for s in self.active_symbols: 221 | price = row[_idx(s, 'price')] 222 | position = active_positions_by_symbol[s] 223 | position.record_price_update(date, price) 224 | 225 | # Sell all positions and mark simulation as complete 226 | for s in self.active_symbols: 227 | self.sell_to_close(s, date, row[_idx(s, 'price')]) 228 | self.portfolio_history.finish() -------------------------------------------------------------------------------- /listings/chapter_4/4_7_simple_simulator_usage.py: -------------------------------------------------------------------------------- 1 | ### pypm/simulate_portfolio.py 2 | from pypm import metrics, signals, data_io, simulation 3 | import pandas as pd 4 | 5 | def simulate_portfolio(): 6 | 7 | bollinger_n = 20 8 | sharpe_n = 20 9 | 10 | # Load in data 11 | symbols: List[str] = data_io.get_all_symbols() 12 | prices: pd.DataFrame = data_io.load_eod_matrix(symbols) 13 | 14 | # Use the bollinger band outer band crossorver as a signal 15 | _bollinger = signals.create_bollinger_band_signal 16 | signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0) 17 | 18 | # Use a rolling sharpe ratio approximation as a preference matrix 19 | _sharpe = metrics.calculate_rolling_sharpe_ratio 20 | preference = prices.apply(_sharpe, args=(sharpe_n, ), axis=0) 21 | 22 | # Run the simulator 23 | simulator = simulation.SimpleSimulator( 24 | initial_cash=10000, 25 | max_active_positions=5, 26 | percent_slippage=0.0005, 27 | trade_fee=1, 28 | ) 29 | simulator.simulate(prices, signal, preference) 30 | 31 | # Print results 32 | simulator.portfolio_history.print_position_summaries() 33 | simulator.print_initial_parameters() 34 | simulator.portfolio_history.print_summary() 35 | simulator.portfolio_history.plot() 36 | 37 | if __name__ == '__main__': 38 | simulate_portfolio() 39 | 40 | # Returns ... 41 | # Initial Cash: $10000 42 | # Maximum Number of Assets: 5 43 | # 44 | # Equity: $39758.61 45 | # Percent Return: 297.59% 46 | # S&P 500 Return: 184.00% 47 | # 48 | # Number of trades: 1835 49 | # Average active trades: 4.83 50 | # 51 | # CAGR: 14.82% 52 | # S&P 500 CAGR: 11.02% 53 | # Excess CAGR: 3.80% 54 | # 55 | # Annualized Volatility: 17.93% 56 | # Sharpe Ratio: 0.83 57 | # Jensen's Alpha: 0.000147 58 | # 59 | # Dollar Max Drawdown: $10594.83 60 | # Percent Max Drawdown: 30.03% 61 | # Log Max Drawdown Ratio: 1.02 -------------------------------------------------------------------------------- /listings/chapter_5/5_1_grid_search_optimizer.py: -------------------------------------------------------------------------------- 1 | from pypm import metrics, signals, data_io, simulation 2 | 3 | import pandas as pd 4 | import numpy as np 5 | from collections import defaultdict, OrderedDict 6 | from itertools import product 7 | from timeit import default_timer 8 | from typing import Dict, Tuple, List, Callable, Iterable, Any, NewType, Mapping 9 | 10 | import matplotlib.pyplot as plt 11 | from matplotlib import cm 12 | from mpl_toolkits.mplot3d import Axes3D 13 | 14 | # Performance data and parameter inputs are dictionaries 15 | Parameters = NewType('Parameters', Dict[str, float]) 16 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float] 17 | 18 | # Simulation function must take parameters as keyword arguments pointing to 19 | # iterables and return a performance metric dictionary 20 | SimKwargs = NewType('Kwargs', Mapping[str, Iterable[Any]]) 21 | SimFunction = NewType('SimFunction', Callable[[SimKwargs], Performance]) 22 | 23 | class OptimizationResult(object): 24 | """Simple container class for optimization data""" 25 | 26 | def __init__(self, parameters: Parameters, performance: Performance): 27 | 28 | # Make sure no collisions between performance metrics and params 29 | assert len(parameters.keys() & performance.keys()) == 0, \ 30 | 'parameter name matches performance metric name' 31 | 32 | self.parameters = parameters 33 | self.performance = performance 34 | 35 | @property 36 | def as_dict(self) -> Dict[str, float]: 37 | """Combines the dictionaries after we are sure of no collisions""" 38 | return {**self.parameters, **self.performance} 39 | 40 | 41 | class GridSearchOptimizer(object): 42 | """ 43 | A generic grid search optimizer that requires only a simulation function and 44 | a series of parameter ranges. Provides timing, summary, and plotting 45 | utilities with return data. 46 | """ 47 | 48 | def __init__(self, simulation_function: SimFunction): 49 | 50 | self.simulate = simulation_function 51 | self._results_list: List[OptimizationResult] = list() 52 | self._results_df = pd.DataFrame() 53 | 54 | self._optimization_finished = False 55 | 56 | def add_results(self, parameters: Parameters, performance: Performance): 57 | _results = OptimizationResult(parameters, performance) 58 | self._results_list.append(_results) 59 | 60 | def optimize(self, **optimization_ranges: SimKwargs): 61 | 62 | assert optimization_ranges, 'Must provide non-empty parameters.' 63 | 64 | # Convert all iterables to lists 65 | param_ranges = {k: list(v) for k, v in optimization_ranges.items()} 66 | self.param_names = param_names = list(param_ranges.keys()) 67 | 68 | # Count total simulation 69 | n = total_simulations = np.prod([len(r) for r in param_ranges.values()]) 70 | 71 | total_time_elapsed = 0 72 | 73 | print(f'Starting simulation ...') 74 | print(f'Simulating 1 / {n} ...', end='\r') 75 | for i, params in enumerate(product(*param_ranges.values())): 76 | if i > 0: 77 | _avg = avg_time = total_time_elapsed / i 78 | _rem = remaining_time = (n - (i + 1)) * avg_time 79 | s = f'Simulating {i+1} / {n} ... ' 80 | s += f'{_rem:.0f}s remaining ({_avg:.1f}s avg)' 81 | s += ' '*8 82 | print(s, end='\r') 83 | 84 | timer_start = default_timer() 85 | 86 | parameters = {n: param for n, param in zip(param_names, params)} 87 | results = self.simulate(**parameters) 88 | self.add_results(parameters, results) 89 | 90 | timer_end = default_timer() 91 | total_time_elapsed += timer_end - timer_start 92 | 93 | print(f'Simulated {total_simulations} / {total_simulations} ...') 94 | print(f'Elapsed time: {total_time_elapsed:.0f}s') 95 | print(f'Done.') 96 | 97 | self._optimization_finished = True 98 | 99 | def _assert_finished(self): 100 | assert self._optimization_finished, \ 101 | 'Run self.optimize before accessing this method.' 102 | 103 | @property 104 | def results(self) -> pd.DataFrame: 105 | self._assert_finished() 106 | if self._results_df.empty: 107 | 108 | _results_list = self._results_list 109 | self._results_df = pd.DataFrame([r.as_dict for r in _results_list]) 110 | 111 | _columns = set(list(self._results_df.columns.values)) 112 | _params = set(self.param_names) 113 | self.metric_names = list(_columns - _params) 114 | 115 | return self._results_df 116 | 117 | def print_summary(self): 118 | df = self.results 119 | metric_names = self.metric_names 120 | 121 | print('Summary statistics') 122 | print(df[metric_names].describe().T) 123 | 124 | def get_best(self, metric_name: str) -> pd.DataFrame: 125 | """ 126 | Sort the results by a specific performance metric 127 | """ 128 | self._assert_finished() 129 | 130 | results = self.results 131 | param_names = self.param_names 132 | metric_names = self.metric_names 133 | 134 | assert metric_name in metric_names, 'Not a performance metric' 135 | partial_df = self.results[param_names+[metric_name]] 136 | 137 | return partial_df.sort_values(metric_name, ascending=False) 138 | 139 | def plot_1d_hist(self, x, show=True): 140 | self.results.hist(x) 141 | if show: 142 | plt.show() 143 | 144 | def plot_2d_line(self, x, y, show=True, **filter_kwargs): 145 | _results = self.results 146 | for k, v in filter_kwargs.items(): 147 | _results = _results[getattr(_results, k) == v] 148 | 149 | ax = _results.plot(x, y) 150 | if filter_kwargs: 151 | k_str = ', '.join([f'{k}={v}' for k,v in filter_kwargs.items()]) 152 | ax.legend([f'{x} ({k_str})']) 153 | 154 | if show: 155 | plt.show() 156 | 157 | def plot_2d_violin(self, x, y, show=True): 158 | """ 159 | Group y along x then plot violin charts 160 | """ 161 | x_values = self.results[x].unique() 162 | x_values.sort() 163 | 164 | y_by_x = OrderedDict([(v, []) for v in x_values]) 165 | for _, row in self.results.iterrows(): 166 | y_by_x[row[x]].append(row[y]) 167 | 168 | fig, ax = plt.subplots() 169 | 170 | ax.violinplot(dataset=list(y_by_x.values()), showmedians=True) 171 | ax.set_xlabel(x) 172 | ax.set_ylabel(y) 173 | ax.set_xticks(range(0, len(y_by_x)+1)) 174 | ax.set_xticklabels([''] + list(y_by_x.keys())) 175 | if show: 176 | plt.show() 177 | 178 | def plot_3d_mesh(self, x, y, z, show=True, **filter_kwargs): 179 | """ 180 | Plot interactive 3d mesh. z axis should typically be performance metric 181 | """ 182 | _results = self.results 183 | fig = plt.figure() 184 | ax = Axes3D(fig) 185 | 186 | for k, v in filter_kwargs.items(): 187 | _results = _results[getattr(_results, k) == v] 188 | 189 | X, Y, Z = [getattr(_results, attr) for attr in (x, y, z)] 190 | ax.plot_trisurf(X, Y, Z, cmap=cm.jet, linewidth=0.2) 191 | ax.set_xlabel(x) 192 | ax.set_ylabel(y) 193 | ax.set_zlabel(z) 194 | if show: 195 | plt.show() 196 | 197 | def plot(self, *attrs: Tuple[str], show=True, 198 | **filter_kwargs: Dict[str, Any]): 199 | """ 200 | Attempt to intelligently dispatch plotting functions based on the number 201 | and type of attributes. Last argument should typically be the 202 | performance metric. 203 | """ 204 | self._assert_finished() 205 | param_names = self.param_names 206 | metric_names = self.metric_names 207 | 208 | if len(attrs) == 3: 209 | assert attrs[0] in param_names and attrs[1] in param_names, \ 210 | 'First two positional arguments must be parameter names.' 211 | 212 | assert attrs[2] in metric_names, \ 213 | 'Last positional argument must be a metric name.' 214 | 215 | assert len(filter_kwargs) + 2 == len(param_names), \ 216 | 'Must filter remaining parameters. e.g. p_three=some_number.' 217 | 218 | self.plot_3d_mesh(*attrs, show=show, **filter_kwargs) 219 | 220 | elif len(attrs) == 2: 221 | if len(param_names) == 1 or filter_kwargs: 222 | self.plot_2d_line(*attrs, show=show, **filter_kwargs) 223 | 224 | elif len(param_names) > 1: 225 | self.plot_2d_violin(*attrs, show=show) 226 | 227 | elif len(attrs) == 1: 228 | self.plot_1d_hist(*attrs, show=show) 229 | 230 | else: 231 | raise ValueError('Must pass between one and three column names.') -------------------------------------------------------------------------------- /listings/chapter_5/5_2_grid_search_example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from pypm import metrics, signals, data_io, simulation, optimization 4 | from pypm.optimization import GridSearchOptimizer 5 | 6 | from typing import List, Dict, Tuple, Callable 7 | 8 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float] 9 | 10 | def bind_simulator(**sim_kwargs) -> Callable: 11 | """ 12 | Create a function with all static simulation data bound to it, where the 13 | arguments are simulation parameters 14 | """ 15 | 16 | symbols: List[str] = data_io.get_all_symbols() 17 | prices: pd.DataFrame = data_io.load_eod_matrix(symbols) 18 | 19 | _bollinger: Callable = signals.create_bollinger_band_signal 20 | _sharpe: Callable = metrics.calculate_rolling_sharpe_ratio 21 | 22 | def _simulate(bollinger_n: int, sharpe_n: int) -> Performance: 23 | 24 | signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0) 25 | preference = prices.apply(_sharpe, args=(sharpe_n, ), axis=0) 26 | 27 | simulator = simulation.SimpleSimulator(**sim_kwargs) 28 | simulator.simulate(prices, signal, preference) 29 | 30 | return simulator.portfolio_history.get_performance_metric_data() 31 | 32 | return _simulate 33 | 34 | if __name__ == '__main__': 35 | 36 | simulate = bind_simulator(initial_cash=10000, max_active_positions=5) 37 | 38 | optimizer = GridSearchOptimizer(simulate) 39 | optimizer.optimize( 40 | bollinger_n=range(10, 110, 10), 41 | sharpe_n=range(10, 110, 10), 42 | ) 43 | 44 | print(optimizer.get_best('excess_cagr')) 45 | optimizer.plot('excess_cagr') 46 | optimizer.plot('bollinger_n', 'excess_cagr') 47 | optimizer.plot('bollinger_n', 'sharpe_n', 'excess_cagr') 48 | 49 | # Returns ... 50 | # bollinger_n sharpe_n excess_cagr 51 | # 17 20 80 0.092841 52 | # 16 20 70 0.062477 53 | # 98 100 90 0.055047 54 | # 19 20 100 0.050255 55 | # 1 10 20 0.043642 56 | # .. ... ... ... 57 | # 89 90 100 -0.054080 58 | # 69 70 100 -0.054404 59 | # 63 70 40 -0.061105 60 | # 61 70 20 -0.063276 61 | # 50 60 10 -0.065433 -------------------------------------------------------------------------------- /listings/chapter_5/5_3_white_noise_preference_matrix.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pypm import metrics, signals, data_io, simulation, optimization 5 | from pypm.optimization import GridSearchOptimizer 6 | 7 | from typing import List, Dict, Tuple, Callable 8 | 9 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float] 10 | 11 | def bind_simulator(**sim_kwargs) -> Callable: 12 | """ 13 | Create a simulator that uses white noise for the preference matrix 14 | """ 15 | symbols: List[str] = data_io.get_all_symbols() 16 | prices: pd.DataFrame = data_io.load_eod_matrix(symbols) 17 | 18 | _bollinger: Callable = signals.create_bollinger_band_signal 19 | 20 | # Bollinger n is constant throughout 21 | bollinger_n = 20 22 | 23 | def _simulate(white_noise_test_id: int) -> Performance: 24 | 25 | signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0) 26 | 27 | # Build a pile of noise in the same shape as the price data 28 | _noise = np.random.normal(loc=0, scale=1, size=prices.shape) 29 | _cols = prices.columns 30 | _index = prices.index 31 | preference = pd.DataFrame(_noise, columns=_cols, index=_index) 32 | 33 | simulator = simulation.SimpleSimulator(**sim_kwargs) 34 | simulator.simulate(prices, signal, preference) 35 | 36 | return simulator.portfolio_history.get_performance_metric_data() 37 | 38 | return _simulate 39 | 40 | if __name__ == '__main__': 41 | 42 | simulate = bind_simulator(initial_cash=10000, max_active_positions=5) 43 | 44 | optimizer = GridSearchOptimizer(simulate) 45 | optimizer.optimize(white_noise_test_id=range(1000)) 46 | 47 | print(optimizer.get_best('excess_cagr')) 48 | optimizer.print_summary() 49 | optimizer.plot('excess_cagr') -------------------------------------------------------------------------------- /listings/chapter_5/5_4_bootstrap_simulated_preference_matrix.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pypm import metrics, signals, data_io, simulation, optimization 5 | from pypm.optimization import GridSearchOptimizer 6 | 7 | from typing import List, Dict, Tuple, Callable 8 | 9 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float] 10 | 11 | def bind_simulator(**sim_kwargs) -> Callable: 12 | """ 13 | Create a simulator that uses white noise for the preference matrix 14 | """ 15 | symbols: List[str] = data_io.get_all_symbols() 16 | prices: pd.DataFrame = data_io.load_eod_matrix(symbols) 17 | 18 | _bollinger: Callable = signals.create_bollinger_band_signal 19 | bollinger_n = 20 20 | 21 | returns = metrics.calculate_return_series(prices) 22 | sharpe_n = 20 23 | 24 | def bootstrap_rolling_sharpe_ratio(return_series: pd.Series) -> pd.Series: 25 | _series = return_series.iloc[1:] 26 | _series = _series.sample(n=return_series.shape[0], replace=True) 27 | _series.iloc[:1] = [np.nan] 28 | _series = pd.Series(_series.values, index=return_series.index) 29 | _windowed_series = _series.rolling(sharpe_n) 30 | return _windowed_series.mean() / _windowed_series.std() 31 | 32 | _sharpe: Callable = bootstrap_rolling_sharpe_ratio 33 | 34 | def _simulate(bootstrap_test_id: int) -> Performance: 35 | 36 | signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0) 37 | preference = returns.apply(_sharpe, axis=0) 38 | 39 | simulator = simulation.SimpleSimulator(**sim_kwargs) 40 | simulator.simulate(prices, signal, preference) 41 | 42 | return simulator.portfolio_history.get_performance_metric_data() 43 | 44 | return _simulate 45 | 46 | if __name__ == '__main__': 47 | 48 | simulate = bind_simulator(initial_cash=10000, max_active_positions=5) 49 | 50 | optimizer = GridSearchOptimizer(simulate) 51 | optimizer.optimize(bootstrap_test_id=range(1000)) 52 | 53 | print(optimizer.get_best('excess_cagr')) 54 | optimizer.print_summary() 55 | optimizer.plot('excess_cagr') -------------------------------------------------------------------------------- /listings/chapter_6/6_1_loading_alternative_data.py: -------------------------------------------------------------------------------- 1 | from pypm import data_io 2 | import numpy as np 3 | import pandas as pd 4 | from typing import List 5 | 6 | # Load in everything 7 | symbols: List[str] = data_io.get_all_symbols() 8 | eod_data: pd.DataFrame = data_io.load_eod_matrix(symbols) 9 | alt_data: pd.DataFrame = data_io.load_alternative_data_matrix(symbols) 10 | 11 | # Our eod_data goes back 10 years, but our alt_data goes back 5 years 12 | eod_data = eod_data[eod_data.index >= alt_data.index.min()] 13 | assert np.all(eod_data.index == alt_data.index) 14 | assert np.all(eod_data.columns == alt_data.columns) -------------------------------------------------------------------------------- /listings/chapter_6/6_2_exploratory_analysis.py: -------------------------------------------------------------------------------- 1 | from pypm import data_io, metrics 2 | import numpy as np 3 | import pandas as pd 4 | from typing import List 5 | 6 | # Load in everything 7 | symbols: List[str] = data_io.get_all_symbols() 8 | eod_data: pd.DataFrame = data_io.load_eod_matrix(symbols) 9 | alt_data: pd.DataFrame = data_io.load_alternative_data_matrix(symbols) 10 | eod_data = eod_data[eod_data.index >= alt_data.index.min()] 11 | 12 | _calc_returns = metrics.calculate_log_return_series 13 | _corr_by_symbol = dict() 14 | 15 | for symbol in symbols: 16 | 17 | alt_series = alt_data[symbol].dropna() 18 | price_series = eod_data[symbol] 19 | 20 | if alt_series.empty: 21 | continue 22 | 23 | # Calculate returns, ensuring each series has the same index 24 | price_return_series = _calc_returns(price_series.loc[alt_series.index]) 25 | alt_return_series = _calc_returns(alt_series) 26 | 27 | # Remove the NA at the front 28 | price_return_series = price_return_series.iloc[1:] 29 | alt_return_series = alt_return_series.iloc[1:] 30 | 31 | # Calculate the correllation 32 | _corr = np.corrcoef(price_return_series, alt_return_series) 33 | 34 | # This element of the correlation matrix is the number we want 35 | _corr_by_symbol[symbol] = _corr[1,0] 36 | 37 | # Describe results 38 | results = pd.Series(_corr_by_symbol) 39 | print(pd.DataFrame(results.describe()).T) 40 | # Returns ... 41 | # count mean std min 25% 50% 75% max 42 | # 97.0 -0.002539 0.032456 -0.065556 -0.024983 -0.003735 0.0174 0.099085 -------------------------------------------------------------------------------- /listings/chapter_7/7_1_symmetric_cusum_filter_on_revenue.py: -------------------------------------------------------------------------------- 1 | # In pypm.filters 2 | import numpy as np 3 | import pandas as pd 4 | 5 | def calculate_non_uniform_lagged_change(series: pd.Series, n_days: int): 6 | """ 7 | Use pd.Series.searchsorted to measure the lagged change in a non-uniformly 8 | spaced time series over n_days of calendar time. 9 | """ 10 | 11 | # Get mapping from now to n_days ago at every point 12 | _timedelta: pd.Timedelta = pd.Timedelta(days=n_days) 13 | _idx: pd.Series = series.index.searchsorted(series.index - _timedelta) 14 | _idx = _idx[_idx > 0] 15 | 16 | # Get the last len(series) - n_days values 17 | _series = series.iloc[-_idx.shape[0]:] 18 | 19 | # Build a padding of NA values 20 | _pad_length = series.shape[0] - _idx.shape[0] 21 | _na_pad = pd.Series(None, index=series.index[:_pad_length]) 22 | 23 | # Get the corresonding lagged values 24 | _lagged_series = series.iloc[_idx] 25 | 26 | # Measure the difference 27 | _diff = pd.Series(_series.values-_lagged_series.values, index=_series.index) 28 | 29 | return pd.concat([_na_pad, _diff]) 30 | 31 | 32 | def calculate_cusum_events(series: pd.Series, 33 | filter_threshold: float) -> pd.DatetimeIndex: 34 | """ 35 | Calculate symmetric cusum filter and corresponding events 36 | """ 37 | 38 | event_dates = list() 39 | s_up = 0 40 | s_down = 0 41 | 42 | for date, price in series.items(): 43 | s_up = max(0, s_up + price) 44 | s_down = min(0, s_down + price) 45 | 46 | if s_up > filter_threshold: 47 | s_up = 0 48 | event_dates.append(date) 49 | 50 | elif s_down < -filter_threshold: 51 | s_down = 0 52 | event_dates.append(date) 53 | 54 | return pd.DatetimeIndex(event_dates) 55 | 56 | # In pypm.ml_model.events 57 | from pypm import filters 58 | 59 | def calculate_events_for_revenue_series(series: pd.Series, 60 | filter_threshold: float, lookback: int=365) -> pd.DatetimeIndex: 61 | """ 62 | Calculate the symmetric cusum filter to generate events on YoY changes in 63 | the log revenue series 64 | """ 65 | series = np.log(series) 66 | series = filters.calculate_non_uniform_lagged_change(series, lookback) 67 | return filters.calculate_cusum_events(series, filter_threshold) 68 | 69 | 70 | def calculate_events(revenue_series: pd.Series): 71 | return calculate_events_for_revenue_series( 72 | revenue_series, 73 | filter_threshold=5, 74 | lookback=365, 75 | ) -------------------------------------------------------------------------------- /listings/chapter_7/7_2_computing_triple_barrier_labels.py: -------------------------------------------------------------------------------- 1 | # See pypm.labels 2 | import numpy as np 3 | import pandas as pd 4 | from typing import Tuple 5 | 6 | def compute_triple_barrier_labels( 7 | price_series: pd.Series, 8 | event_index: pd.Series, 9 | time_delta_days: int, 10 | upper_delta: float=None, 11 | lower_delta: float=None, 12 | vol_span: int=20, 13 | upper_z: float=None, 14 | lower_z: float=None, 15 | upper_label: int=1, 16 | lower_label: int=-1) -> Tuple[pd.Series, pd.Series]: 17 | """ 18 | Calculate event labels according to the triple-barrier method. 19 | 20 | Return a series with both the original events and the labels. Labels 1, 0, 21 | and -1 correspond to upper barrier breach, vertical barrier breach, and 22 | lower barrier breach, respectively. 23 | 24 | Also return series where the index is the start date of the label and the 25 | values are the end dates of the label. 26 | """ 27 | 28 | timedelta = pd.Timedelta(days=time_delta_days) 29 | series = pd.Series(np.log(price_series.values), index=price_series.index) 30 | 31 | # A list with elements of {-1, 0, 1} indicating the outcome of the events 32 | labels = list() 33 | label_dates = list() 34 | 35 | if upper_z or lower_z: 36 | volatility = series.ewm(span=vol_span).std() 37 | volatility *= np.sqrt(time_delta_days / vol_span) 38 | 39 | for event_date in event_index: 40 | date_barrier = event_date + timedelta 41 | 42 | start_price = series.loc[event_date] 43 | log_returns = series.loc[event_date:date_barrier] - start_price 44 | 45 | # First element of tuple is 1 or -1 indicating upper or lower barrier 46 | # Second element of tuple is first date when barrier was crossed 47 | candidates: List[Tuple[int, pd.Timestamp]] = list() 48 | 49 | # Add the first upper or lower delta crosses to candidates 50 | if upper_delta: 51 | _date = log_returns[log_returns > upper_delta].first_valid_index() 52 | if _date: 53 | candidates.append((upper_label, _date)) 54 | 55 | if lower_delta: 56 | _date = log_returns[log_returns < lower_delta].first_valid_index() 57 | if _date: 58 | candidates.append((lower_label, _date)) 59 | 60 | # Add the first upper_z and lower_z crosses to candidates 61 | if upper_z: 62 | upper_barrier = upper_z * volatility[event_date] 63 | _date = log_returns[log_returns > upper_barrier].first_valid_index() 64 | if _date: 65 | candidates.append((upper_label, _date)) 66 | 67 | if lower_z: 68 | lower_barrier = lower_z * volatility[event_date] 69 | _date = log_returns[log_returns < lower_barrier].first_valid_index() 70 | if _date: 71 | candidates.append((lower_label, _date)) 72 | 73 | if candidates: 74 | # If any candidates, return label for first date 75 | label, label_date = min(candidates, key=lambda x: x[1]) 76 | else: 77 | # If there were no candidates, time barrier was touched 78 | label, label_date = 0, date_barrier 79 | 80 | labels.append(label) 81 | label_dates.append(label_date) 82 | 83 | label_series = pd.Series(labels, index=event_index) 84 | event_spans = pd.Series(label_dates, index=event_index) 85 | 86 | return label_series, event_spans 87 | 88 | 89 | # See pypm.ml_model.labels 90 | from typing import Tuple 91 | from pypm import labels 92 | 93 | def calculate_labels(price_series, event_index) -> Tuple[pd.Series, pd.Series]: 94 | """ 95 | Calculate labels based on the triple barrier method. Return a series of 96 | event labels index by event start date, and return a series of event end 97 | dates indexed by event start date. 98 | """ 99 | 100 | # Remove event that don't have a proper chance to materialize 101 | time_delta_days = 90 102 | max_date = price_series.index.max() 103 | cutoff = max_date - pd.Timedelta(days=time_delta_days) 104 | event_index = event_index[event_index <= cutoff] 105 | 106 | # Use triple barrier method 107 | event_labels, event_spans = labels.compute_triple_barrier_labels( 108 | price_series, 109 | event_index, 110 | time_delta_days=time_delta_days, 111 | upper_z=1.8, 112 | lower_z=-1.8, 113 | ) 114 | 115 | return event_labels, event_spans 116 | -------------------------------------------------------------------------------- /listings/chapter_7/7_3_computing_average_uniqueness.py: -------------------------------------------------------------------------------- 1 | # See pypm.weights 2 | import numpy as np 3 | import pandas as pd 4 | from scipy.stats import hmean 5 | 6 | def calculate_uniqueness(event_spans: pd.Series, 7 | price_index: pd.Series) -> pd.Series: 8 | """ 9 | event_spans is a series with an index of start dates and values of end dates 10 | of a label. 11 | 12 | price_index is an index of underlying dates for the event 13 | 14 | Returns a series of uniqueness values that can be used as weights, indexed 15 | as the event start dates. Weights may need to be standardized again before 16 | training. 17 | """ 18 | 19 | # Create a binary data frame 20 | # value is 1 during event span and 0 otherwise 21 | columns = range(event_spans.shape[0]) 22 | df = pd.DataFrame(0, index=price_index, columns=columns) 23 | 24 | for i, (event_start, event_end) in enumerate(event_spans.items()): 25 | df[i].loc[event_start:event_end] += 1 26 | 27 | # Compute concurrency over event span then calculate uniqueness 28 | uniquenesses = list() 29 | for i, (event_start, event_end) in enumerate(event_spans.items()): 30 | concurrency: pd.Series = df.loc[event_start:event_end].sum(axis=1) 31 | uniqueness = 1 / hmean(concurrency) 32 | uniquenesses.append(uniqueness) 33 | 34 | return pd.Series(uniquenesses, index=event_spans.index) 35 | 36 | # See pypm.ml_model.weights 37 | import numpy as np 38 | import pandas as pd 39 | 40 | from pypm.weights import calculate_uniqueness 41 | 42 | def calculate_weights(event_spans: pd.Series, 43 | price_index: pd.Series) -> pd.Series: 44 | return calculate_uniqueness(event_spans, price_index) 45 | -------------------------------------------------------------------------------- /listings/chapter_7/7_4_computing_features.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from pypm import indicators, filters, metrics 5 | 6 | _calc_delta = filters.calculate_non_uniform_lagged_change 7 | _calc_ma = indicators.calculate_simple_moving_average 8 | _calc_log_return = metrics.calculate_log_return_series 9 | 10 | def _calc_rolling_vol(series, n): 11 | return series.rolling(n).std() * np.sqrt(252 / n) 12 | 13 | def calculate_features(price_series, revenue_series) -> pd.DataFrame: 14 | """ 15 | Calculate any and all potentially useful features. Return as a dataframe. 16 | """ 17 | 18 | log_revenue = np.log(revenue_series) 19 | log_prices = np.log(price_series) 20 | 21 | log_revenue_ma = _calc_ma(log_revenue, 10) 22 | log_prices_ma = _calc_ma(log_prices, 10) 23 | 24 | log_returns = _calc_log_return(price_series) 25 | 26 | features_by_name = dict() 27 | 28 | for i in [7, 30, 90, 180, 360]: 29 | 30 | rev_feature = _calc_delta(log_revenue_ma, i) 31 | price_feature = _calc_delta(log_prices_ma, i) 32 | vol_feature = _calc_rolling_vol(log_returns, i) 33 | 34 | features_by_name.update({ 35 | f'{i}_day_revenue_delta': rev_feature, 36 | f'{i}_day_return': price_feature, 37 | f'{i}_day_vol': vol_feature, 38 | }) 39 | 40 | features_df = pd.DataFrame(features_by_name) 41 | return features_df 42 | -------------------------------------------------------------------------------- /listings/chapter_7/7_5_modeling_and_cross_validation.py: -------------------------------------------------------------------------------- 1 | # See pypm.ml_model.model 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.model_selection import RepeatedKFold 7 | from sklearn.base import clone 8 | 9 | from joblib import Parallel, delayed 10 | 11 | # Number of jobs to run in parallel 12 | # Set to number of computer cores to use 13 | N_JOBS = 10 14 | N_SPLITS = 5 15 | N_REPEATS = 4 16 | 17 | def _fit_and_score(classifier, X, y, w, train_index, test_index, i) -> float: 18 | """ 19 | The function used by joblib to split, train, and score cross validations 20 | """ 21 | X_train = X.iloc[train_index] 22 | X_test = X.iloc[test_index] 23 | 24 | y_train = y.iloc[train_index] 25 | y_test = y.iloc[test_index] 26 | 27 | w_train = w.iloc[train_index] 28 | w_test = w.iloc[test_index] 29 | 30 | classifier.fit(X_train, y_train, w_train) 31 | score = classifier.score(X_test, y_test, w_test) 32 | 33 | print(f'Finished {i} ({100*score:.1f}%)') 34 | 35 | return score 36 | 37 | def repeated_k_fold(classifier, X, y, w) -> np.ndarray: 38 | """ 39 | Perform repeated k-fold cross validation on a classifier. Spread fitting 40 | job over multiple computer cores. 41 | """ 42 | n_jobs = N_JOBS 43 | 44 | n_splits = N_SPLITS 45 | n_repeats = N_REPEATS 46 | 47 | total_fits = n_splits * n_repeats 48 | 49 | _k_fold = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats) 50 | 51 | print(f'Fitting {total_fits} models {n_jobs} at a time ...') 52 | print() 53 | 54 | parallel = Parallel(n_jobs=n_jobs) 55 | scores = parallel( 56 | delayed(_fit_and_score)( 57 | clone(classifier), X, y, w, train_index, test_index, i 58 | ) for i, (train_index, test_index) in enumerate(_k_fold.split(X)) 59 | ) 60 | 61 | return np.array(scores) 62 | 63 | 64 | def calculate_model(df: pd.DataFrame) -> RandomForestClassifier: 65 | """ 66 | Given a dataframe with a y column, weights column, and predictor columns 67 | with arbitrary names, cross-validated and fit a classifier. Print 68 | diagnostics. 69 | """ 70 | classifier = RandomForestClassifier(n_estimators=100) 71 | 72 | # Separate data 73 | predictor_columns = [ 74 | c for c in df.columns.values if not c in ('y', 'weights') 75 | ] 76 | X = df[predictor_columns] 77 | y = df['y'] 78 | w = df['weights'] 79 | 80 | # Fit cross validation 81 | scores = repeated_k_fold(classifier, X, y, w) 82 | 83 | # Get a full dataset fit for importance scores 84 | classifier.fit(X, y, w) 85 | 86 | # Compute diagnostics 87 | _imp = classifier.feature_importances_ 88 | importance_series = pd.Series(_imp, index=predictor_columns) 89 | importance_series = importance_series.sort_values(ascending=False) 90 | 91 | # baseline accuracy is the best value achievable with a constant guess 92 | baseline = np.max(y.value_counts() / y.shape[0]) 93 | 94 | # Compute a rough confidence interval for the improvement 95 | mean_score = scores.mean() 96 | std_score = scores.std() 97 | 98 | upper_bound = mean_score + 2 * std_score 99 | lower_bound = mean_score - 2 * std_score 100 | ibounds = (lower_bound - baseline, upper_bound - baseline) 101 | 102 | print('Feature importances') 103 | for col, imp in importance_series.items(): 104 | print(f'{col:24} {imp:>.3f}') 105 | print() 106 | 107 | print('Cross validation scores') 108 | print(np.round(100 * scores, 1)) 109 | print() 110 | 111 | print(f'Baseline accuracy {100*baseline:.1f}%') 112 | print(f'OOS accuracy {100*mean_score:.1f}% +/- {200 * scores.std():.1f}%') 113 | print(f'Improvement {100*(ibounds[0]):.1f} to {100*(ibounds[1]):.1f}%') 114 | print() 115 | 116 | return classifier 117 | 118 | -------------------------------------------------------------------------------- /listings/chapter_7/7_6_machine_learning_pipeline.py: -------------------------------------------------------------------------------- 1 | # See fit_alternative_data_model.py 2 | import os 3 | import pandas as pd 4 | import numpy as np 5 | from typing import Dict 6 | 7 | from joblib import dump 8 | 9 | from pypm.ml_model.data_io import load_data 10 | from pypm.ml_model.events import calculate_events 11 | from pypm.ml_model.labels import calculate_labels 12 | from pypm.ml_model.features import calculate_features 13 | from pypm.ml_model.model import calculate_model 14 | from pypm.ml_model.weights import calculate_weights 15 | 16 | SRC_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | 18 | if __name__ == '__main__': 19 | 20 | # All the data we have to work with 21 | symbols, eod_data, alt_data = load_data() 22 | 23 | # The ML dataframe for each symbol, to be combined later 24 | df_by_symbol: Dict[str, pd.DataFrame] = dict() 25 | 26 | # Build ML dataframe for each symbol 27 | for symbol in symbols: 28 | 29 | # Get revenue and price series 30 | revenue_series = alt_data[symbol].dropna() 31 | price_series = eod_data[symbol].dropna() 32 | price_index = price_series.index 33 | 34 | # Get events, labels, weights, and features 35 | event_index = calculate_events(revenue_series) 36 | event_labels, event_spans = calculate_labels(price_series, event_index) 37 | weights = calculate_weights(event_spans, price_index) 38 | features_df = calculate_features(price_series, revenue_series) 39 | 40 | # Subset features by event dates 41 | features_on_events = features_df.loc[event_index] 42 | 43 | # Convert labels and events to a data frame 44 | labels_df = pd.DataFrame(event_labels) 45 | labels_df.columns = ['y'] 46 | 47 | # Converts weights to a data frame 48 | weights_df = pd.DataFrame(weights) 49 | weights_df.columns = ['weights'] 50 | 51 | # Concatenate features to labels 52 | df = pd.concat([features_on_events, weights_df, labels_df], axis=1) 53 | df_by_symbol[symbol] = df 54 | 55 | # Create final ML dataframe 56 | df = pd.concat(df_by_symbol.values(), axis=0) 57 | df.sort_index(inplace=True) 58 | df.dropna(inplace=True) 59 | print(df) 60 | 61 | # Fit the model 62 | classifier = calculate_model(df) 63 | 64 | # Save the model 65 | dump(classifier, os.path.join(SRC_DIR, 'ml_model.joblib')) 66 | 67 | # Returns ... 68 | # 7_day_revenue_delta 7_day_return 7_day_vol ... 69 | # 2016-06-07 -0.000721 0.019520 0.096002 ... 70 | # 2016-06-08 0.029827 0.025005 0.113246 ... 71 | # 2016-06-08 -0.046427 0.013868 0.051878 ... 72 | # 2016-06-09 0.001558 0.032410 0.064574 ... 73 | # 2016-06-10 0.004933 0.011751 0.045105 ... 74 | # ... ... ... ... ... 75 | # 2019-09-30 -0.031956 -0.008562 0.072845 ... 76 | # 2019-10-01 -0.074244 -0.018469 0.053665 ... 77 | # 2019-10-01 0.009513 -0.015659 0.094087 ... 78 | # 2019-10-02 0.012819 -0.008300 0.062938 ... 79 | # 2019-10-02 0.003023 0.015749 0.043320 ... 80 | # 81 | # [1563 rows x 17 columns] 82 | # Fitting 20 models 10 at a time ... 83 | # 84 | # ... 85 | # ... 86 | # ... 87 | # 88 | # Feature importances 89 | # 30_day_return 0.099 90 | # 7_day_return 0.097 91 | # 30_day_vol 0.073 92 | # 90_day_return 0.068 93 | # 360_day_vol 0.066 94 | # 360_day_revenue_delta 0.064 95 | # 360_day_return 0.063 96 | # 180_day_return 0.063 97 | # 180_day_revenue_delta 0.060 98 | # 90_day_vol 0.060 99 | # 180_day_vol 0.060 100 | # 7_day_vol 0.059 101 | # 7_day_revenue_delta 0.057 102 | # 90_day_revenue_delta 0.057 103 | # 30_day_revenue_delta 0.055 104 | # 105 | # Cross validation scores 106 | # ... 107 | # 108 | # Baseline accuracy 42.2% 109 | # OOS accuracy 52.4% +/- 5.3% 110 | # Improvement 4.9 to 15.6% 111 | # -------------------------------------------------------------------------------- /listings/chapter_7/7_7_simulation_with_machine_learning_model.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | import os 5 | from joblib import load 6 | 7 | from pypm.ml_model.data_io import load_data 8 | from pypm.ml_model.signals import calculate_signals 9 | 10 | from pypm import metrics, simulation 11 | 12 | SRC_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | 14 | def simulate_portfolio(): 15 | 16 | # All the data we have to work with 17 | symbols, eod_data, alt_data = load_data() 18 | 19 | # Load classifier from file 20 | classifier = load(os.path.join(SRC_DIR, 'ml_model.joblib')) 21 | 22 | # Generate signals from classifier 23 | print('Calculating signals ...') 24 | signal = calculate_signals(classifier, symbols, eod_data, alt_data) 25 | 26 | # Get rid of eod_data before valid signals 27 | first_signal_date = signal.first_valid_index() 28 | eod_data = eod_data[eod_data.index > first_signal_date] 29 | 30 | # Set the preference to increase by row, so new trades are preferred 31 | print('Calculating preference matrix ...') 32 | preference = pd.DataFrame( 33 | np.random.random(eod_data.shape), 34 | columns=eod_data.columns, 35 | index=eod_data.index, 36 | ) 37 | 38 | # Run the simulator 39 | simulator = simulation.SimpleSimulator( 40 | initial_cash=10000, 41 | max_active_positions=10, 42 | percent_slippage=0.0005, 43 | trade_fee=1, 44 | ) 45 | simulator.simulate(eod_data, signal, preference) 46 | 47 | # Print results 48 | simulator.portfolio_history.print_position_summaries() 49 | simulator.print_initial_parameters() 50 | simulator.portfolio_history.print_summary() 51 | simulator.portfolio_history.plot() 52 | simulator.portfolio_history.plot_benchmark_comparison() 53 | 54 | if __name__ == '__main__': 55 | simulate_portfolio() 56 | 57 | # Returns ... 58 | # Initial Cash: $10000 59 | # Maximum Number of Assets: 10 60 | # 61 | # Equity: $45455.68 62 | # Percent Return: 354.56% 63 | # S&P 500 Return: 33.80% 64 | # 65 | # Number of trades: 291 66 | # Average active trades: 9.89 67 | # 68 | # CAGR: 83.75% 69 | # S&P 500 CAGR: 12.43% 70 | # Excess CAGR: 71.32% 71 | # 72 | # Annualized Volatility: 14.44% 73 | # Sharpe Ratio: 5.80 74 | # Jensen's Alpha: 0.002018 75 | # 76 | # Dollar Max Drawdown: $1892.59 77 | # Percent Max Drawdown: 8.60% 78 | # Log Max Drawdown Ratio: 1.42 79 | # -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Algorithmic Trading with Python 2 | Source code for Algorithmic Trading with Python (2020) by Chris Conlan. 3 | 4 | Paperback available for purchase [on Amazon](https://amzn.to/2UZbHuA). 5 | 6 | --------------- 7 | 8 | #### Useful resources 9 | 10 | These stand-alone resources can be useful to researchers with or without the accompanying book. The rest of the material in this repository depends on explanation and context given in the book. 11 | 12 | + Performance metrics used to evaluate trading strategies: [metrics.py](src/pypm/metrics.py) 13 | + Common technical indicators in pure Pandas: [indicators.py](src/pypm/indicators.py) 14 | + Converting common technical indicators into ternary signals: [signals.py](src/pypm/signals.py) 15 | + Generic grid search wrapper for numeric optimization: [optimization.py](src/pypm/optimization.py) 16 | + Object-oriented building blocks for portfolio simulation: [portfolio.py](src/pypm/portfolio.py) 17 | + Generic wrapper for multi-core repeated K fold cross-validation: [model.py](src/pypm/ml_model/model.py) 18 | + Free-to-use simulated EOD stock data and alternative data streams: [data](data) 19 | 20 | ---- 21 | 22 | ![](cover.png) 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlan/algorithmic-trading-with-python/ebe01087c7d9172db72bc3c9adc1eee5e882ac49/src/__init__.py -------------------------------------------------------------------------------- /src/bootstrap_portfolio.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pypm import metrics, signals, data_io, simulation, optimization 5 | from pypm.optimization import GridSearchOptimizer 6 | 7 | from typing import List, Dict, Tuple, Callable 8 | 9 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float] 10 | 11 | def bind_simulator(**sim_kwargs) -> Callable: 12 | """ 13 | Create a simulator that uses white noise for the preference matrix 14 | """ 15 | symbols: List[str] = data_io.get_all_symbols() 16 | prices: pd.DataFrame = data_io.load_eod_matrix(symbols) 17 | 18 | _bollinger: Callable = signals.create_bollinger_band_signal 19 | bollinger_n = 20 20 | 21 | returns = metrics.calculate_return_series(prices) 22 | sharpe_n = 20 23 | 24 | def bootstrap_rolling_sharpe_ratio(return_series: pd.Series) -> pd.Series: 25 | _series = return_series.iloc[1:] 26 | _series = _series.sample(n=return_series.shape[0], replace=True) 27 | _series.iloc[:1] = [np.nan] 28 | _series = pd.Series(_series.values, index=return_series.index) 29 | _windowed_series = _series.rolling(sharpe_n) 30 | return _windowed_series.mean() / _windowed_series.std() 31 | 32 | _sharpe: Callable = bootstrap_rolling_sharpe_ratio 33 | 34 | def _simulate(bootstrap_test_id: int) -> Performance: 35 | 36 | signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0) 37 | preference = returns.apply(_sharpe, axis=0) 38 | 39 | simulator = simulation.SimpleSimulator(**sim_kwargs) 40 | simulator.simulate(prices, signal, preference) 41 | 42 | return simulator.portfolio_history.get_performance_metric_data() 43 | 44 | return _simulate 45 | 46 | if __name__ == '__main__': 47 | 48 | simulate = bind_simulator(initial_cash=10000, max_active_positions=5) 49 | 50 | optimizer = GridSearchOptimizer(simulate) 51 | optimizer.optimize(bootstrap_test_id=range(1000)) 52 | 53 | print(optimizer.get_best('excess_cagr')) 54 | optimizer.print_summary() 55 | optimizer.plot('excess_cagr') 56 | -------------------------------------------------------------------------------- /src/fit_alternative_data_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from typing import Dict 5 | 6 | from joblib import dump 7 | 8 | from pypm.ml_model.data_io import load_data 9 | from pypm.ml_model.events import calculate_events 10 | from pypm.ml_model.labels import calculate_labels 11 | from pypm.ml_model.features import calculate_features 12 | from pypm.ml_model.model import calculate_model 13 | from pypm.ml_model.weights import calculate_weights 14 | 15 | SRC_DIR = os.path.dirname(os.path.abspath(__file__)) 16 | 17 | if __name__ == '__main__': 18 | 19 | # All the data we have to work with 20 | symbols, eod_data, alt_data = load_data() 21 | 22 | # The ML dataframe for each symbol, to be combined later 23 | df_by_symbol: Dict[str, pd.DataFrame] = dict() 24 | 25 | # Build ML dataframe for each symbol 26 | for symbol in symbols: 27 | 28 | # Get revenue and price series 29 | revenue_series = alt_data[symbol].dropna() 30 | price_series = eod_data[symbol].dropna() 31 | price_index = price_series.index 32 | 33 | # Get events, labels, weights, and features 34 | event_index = calculate_events(revenue_series) 35 | event_labels, event_spans = calculate_labels(price_series, event_index) 36 | weights = calculate_weights(event_spans, price_index) 37 | features_df = calculate_features(price_series, revenue_series) 38 | 39 | # Subset features by event dates 40 | features_on_events = features_df.loc[event_index] 41 | 42 | # Convert labels and events to a dataframe 43 | labels_df = pd.DataFrame(event_labels) 44 | labels_df.columns = ['y'] 45 | 46 | # Converts weights to a dataframe 47 | weights_df = pd.DataFrame(weights) 48 | weights_df.columns = ['weights'] 49 | 50 | # Concatenate features to labels 51 | df = pd.concat([features_on_events, weights_df, labels_df], axis=1) 52 | df_by_symbol[symbol] = df 53 | 54 | # Create final ML dataframe 55 | df = pd.concat(df_by_symbol.values(), axis=0) 56 | df.sort_index(inplace=True) 57 | df.dropna(inplace=True) 58 | print(df) 59 | 60 | # Fit the model 61 | classifier = calculate_model(df) 62 | 63 | # Save the model 64 | dump(classifier, os.path.join(SRC_DIR, 'ml_model.joblib')) 65 | 66 | 67 | -------------------------------------------------------------------------------- /src/optimize_portfolio.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from pypm import metrics, signals, data_io, simulation, optimization 4 | from pypm.optimization import GridSearchOptimizer 5 | 6 | from typing import List, Dict, Tuple, Callable 7 | 8 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float] 9 | 10 | def bind_simulator(**sim_kwargs) -> Callable: 11 | """ 12 | Create a function with all static simulation data bound to it, where the 13 | arguments are simulation parameters 14 | """ 15 | 16 | symbols: List[str] = data_io.get_all_symbols() 17 | prices: pd.DataFrame = data_io.load_eod_matrix(symbols) 18 | 19 | _bollinger: Callable = signals.create_bollinger_band_signal 20 | _sharpe: Callable = metrics.calculate_rolling_sharpe_ratio 21 | 22 | def _simulate(bollinger_n: int, sharpe_n: int) -> Performance: 23 | 24 | signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0) 25 | preference = prices.apply(_sharpe, args=(sharpe_n, ), axis=0) 26 | 27 | simulator = simulation.SimpleSimulator(**sim_kwargs) 28 | simulator.simulate(prices, signal, preference) 29 | 30 | return simulator.portfolio_history.get_performance_metric_data() 31 | 32 | return _simulate 33 | 34 | if __name__ == '__main__': 35 | 36 | simulate = bind_simulator(initial_cash=10000, max_active_positions=5) 37 | 38 | optimizer = GridSearchOptimizer(simulate) 39 | optimizer.optimize( 40 | bollinger_n=range(10, 110, 10), 41 | sharpe_n=range(10, 110, 10), 42 | ) 43 | 44 | print(optimizer.get_best('excess_cagr')) 45 | optimizer.print_summary() 46 | optimizer.plot('excess_cagr') 47 | optimizer.plot('bollinger_n', 'excess_cagr', sharpe_n=20) 48 | optimizer.plot('bollinger_n', 'sharpe_n', 'excess_cagr') 49 | 50 | -------------------------------------------------------------------------------- /src/pypm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlan/algorithmic-trading-with-python/ebe01087c7d9172db72bc3c9adc1eee5e882ac49/src/pypm/__init__.py -------------------------------------------------------------------------------- /src/pypm/data_io.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from pandas import DataFrame 4 | from typing import Dict, List, Tuple 5 | 6 | DATA_DIR = os.path.join( 7 | os.path.dirname(os.path.abspath(__file__)), 8 | '..', 9 | '..', 10 | 'data', 11 | ) 12 | EOD_DATA_DIR = os.path.join(DATA_DIR, 'eod') 13 | ALTERNATIVE_DATA_DIR = os.path.join(DATA_DIR, 'alternative_data') 14 | 15 | def load_eod_data(ticker: str, data_dir: str=EOD_DATA_DIR) -> DataFrame: 16 | f_path = os.path.join(data_dir, f'{ticker}.csv') 17 | assert os.path.isfile(f_path), f'No data available for {ticker}' 18 | return pd.read_csv(f_path, parse_dates=['date'], index_col='date') 19 | 20 | def load_spy_data() -> DataFrame: 21 | """ 22 | Convenience function to load S&P 500 ETF EOD data 23 | """ 24 | return load_eod_data('SPY', DATA_DIR) 25 | 26 | def _combine_columns(filepaths_by_symbol: Dict[str, str], 27 | attr: str='close') -> pd.DataFrame: 28 | 29 | data_frames = [ 30 | pd.read_csv( 31 | filepath, 32 | index_col='date', 33 | usecols=['date', attr], 34 | parse_dates=['date'], 35 | ).rename( 36 | columns={ 37 | 'date': 'date', 38 | attr: symbol, 39 | } 40 | ) for symbol, filepath in filepaths_by_symbol.items() 41 | ] 42 | return pd.concat(data_frames, sort=True, axis=1) 43 | 44 | 45 | def load_eod_matrix(tickers: List[str], attr: str='close') -> pd.DataFrame: 46 | filepaths_by_symbol = { 47 | t: os.path.join(EOD_DATA_DIR, f'{t}.csv') for t in tickers 48 | } 49 | return _combine_columns(filepaths_by_symbol, attr) 50 | 51 | def load_alternative_data_matrix(tickers: List[str]) -> pd.DataFrame: 52 | filepaths_by_symbol = { 53 | t: os.path.join(ALTERNATIVE_DATA_DIR, f'{t}.csv') for t in tickers 54 | } 55 | return _combine_columns(filepaths_by_symbol, 'value') 56 | 57 | 58 | def get_all_symbols() -> List[str]: 59 | return [v.strip('.csv') for v in os.listdir(EOD_DATA_DIR)] 60 | 61 | 62 | def build_eod_closes() -> None: 63 | filenames = os.listdir(EOD_DATA_DIR) 64 | filepaths_by_symbol = { 65 | v.strip('.csv'): os.path.join(EOD_DATA_DIR, v) for v in filenames 66 | } 67 | result = _combine_columns(filepaths_by_symbol) 68 | result.to_csv(os.path.join(DATA_DIR, 'eod_closes.csv')) 69 | 70 | 71 | def concatenate_metrics(df_by_metric: Dict[str, pd.DataFrame]) -> pd.DataFrame: 72 | """ 73 | Concatenates different dataframes that have the same columns into a 74 | hierarchical dataframe. 75 | 76 | The input df_by_metric should of the form 77 | 78 | { 79 | 'metric_1': pd.DataFrame() 80 | 'metric_2: pd.DataFrame() 81 | } 82 | where each dataframe should have the same columns, i.e. symbols. 83 | """ 84 | 85 | to_concatenate = [] 86 | tuples = [] 87 | for key, df in df_by_metric.items(): 88 | to_concatenate.append(df) 89 | tuples += [(s, key) for s in df.columns.values] 90 | 91 | df = pd.concat(to_concatenate, sort=True, axis=1) 92 | df.columns = pd.MultiIndex.from_tuples(tuples, names=['symbol', 'metric']) 93 | 94 | return df 95 | 96 | 97 | if __name__ == '__main__': 98 | build_eod_closes() 99 | 100 | 101 | -------------------------------------------------------------------------------- /src/pypm/filters.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def calculate_non_uniform_lagged_change(series: pd.Series, n_days: int): 5 | """ 6 | Use pd.Series.searchsorted to measure the lagged change in a non-uniformly 7 | spaced time series over n_days of calendar time. 8 | """ 9 | 10 | # Get mapping from now to n_days ago at every point 11 | _timedelta: pd.Timedelta = pd.Timedelta(days=n_days) 12 | _idx: pd.Series = series.index.searchsorted(series.index - _timedelta) 13 | _idx = _idx[_idx > 0] 14 | 15 | # Get the last len(series) - n_days values 16 | _series = series.iloc[-_idx.shape[0]:] 17 | 18 | # Build a padding of NA values 19 | _pad_length = series.shape[0] - _idx.shape[0] 20 | _na_pad = pd.Series(None, index=series.index[:_pad_length]) 21 | 22 | # Get the corresonding lagged values 23 | _lagged_series = series.iloc[_idx] 24 | 25 | # Measure the difference 26 | _diff = pd.Series(_series.values-_lagged_series.values, index=_series.index) 27 | 28 | return pd.concat([_na_pad, _diff]) 29 | 30 | 31 | def calculate_cusum_events(series: pd.Series, 32 | filter_threshold: float) -> pd.DatetimeIndex: 33 | """ 34 | Calculate symmetric cusum filter and corresponding events 35 | """ 36 | 37 | event_dates = list() 38 | s_up = 0 39 | s_down = 0 40 | 41 | for date, price in series.items(): 42 | s_up = max(0, s_up + price) 43 | s_down = min(0, s_down + price) 44 | 45 | if s_up > filter_threshold: 46 | s_up = 0 47 | event_dates.append(date) 48 | 49 | elif s_down < -filter_threshold: 50 | s_down = 0 51 | event_dates.append(date) 52 | 53 | return pd.DatetimeIndex(event_dates) 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/pypm/indicators.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pypm.data_io import load_eod_data 3 | 4 | 5 | def calculate_simple_moving_average(series: pd.Series, n: int=20) -> pd.Series: 6 | """Calculates the simple moving average""" 7 | return series.rolling(n).mean() 8 | 9 | 10 | def calculate_simple_moving_sample_stdev(series: pd.Series, n: int=20) -> pd.Series: 11 | """Calculates the simple moving average""" 12 | return series.rolling(n).std() 13 | 14 | 15 | def calculate_macd_oscillator(series: pd.Series, 16 | n1: int=5, n2: int=34) -> pd.Series: 17 | """ 18 | Calculate the moving average convergence divergence oscillator, given a 19 | short moving average of length n1 and a long moving average of length n2 20 | """ 21 | assert n1 < n2, f'n1 must be less than n2' 22 | return calculate_simple_moving_average(series, n1) - \ 23 | calculate_simple_moving_average(series, n2) 24 | 25 | 26 | def calculate_bollinger_bands(series: pd.Series, n: int=20) -> pd.DataFrame: 27 | """ 28 | Calculates the Bollinger Bands and returns them as a dataframe 29 | """ 30 | 31 | sma = calculate_simple_moving_average(series, n) 32 | stdev = calculate_simple_moving_sample_stdev(series, n) 33 | 34 | return pd.DataFrame({ 35 | 'middle': sma, 36 | 'upper': sma + 2 * stdev, 37 | 'lower': sma - 2 * stdev 38 | }) 39 | 40 | 41 | def calculate_money_flow_volume_series(df: pd.DataFrame) -> pd.Series: 42 | """ 43 | Calculates money flow series 44 | """ 45 | mfv = df['volume'] * (2*df['close'] - df['high'] - df['low']) / \ 46 | (df['high'] - df['low']) 47 | return mfv 48 | 49 | def calculate_money_flow_volume(df: pd.DataFrame, n: int=20) -> pd.Series: 50 | """ 51 | Calculates money flow volume, or q_t in our formula 52 | """ 53 | return calculate_money_flow_volume_series(df).rolling(n).sum() 54 | 55 | def calculate_chaikin_money_flow(df: pd.DataFrame, n: int=20) -> pd.Series: 56 | """ 57 | Calculates the Chaikin money flow 58 | """ 59 | return calculate_money_flow_volume(df, n) / df['volume'].rolling(n).sum() 60 | 61 | 62 | if __name__ == '__main__': 63 | data = load_eod_data('AWU') 64 | closes = data['close'] 65 | sma = calculate_simple_moving_average(closes, 10) 66 | macd = calculate_macd_oscillator(closes, 5, 50) 67 | 68 | bollinger_bands = calculate_bollinger_bands(closes, 100) 69 | bollinger_bands = bollinger_bands.assign(closes=closes) 70 | bollinger_bands.plot() 71 | 72 | cmf = calculate_chaikin_money_flow(data) 73 | # cmf.plot() 74 | 75 | 76 | import matplotlib.pyplot as plt 77 | plt.show() -------------------------------------------------------------------------------- /src/pypm/labels.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from typing import Tuple 4 | 5 | def compute_triple_barrier_labels( 6 | price_series: pd.Series, 7 | event_index: pd.Series, 8 | time_delta_days: int, 9 | upper_delta: float=None, 10 | lower_delta: float=None, 11 | vol_span: int=20, 12 | upper_z: float=None, 13 | lower_z: float=None, 14 | upper_label: int=1, 15 | lower_label: int=-1) -> Tuple[pd.Series, pd.Series]: 16 | """ 17 | Calculate event labels according to the triple-barrier method. 18 | 19 | Return a series with both the original events and the labels. Labels 1, 0, 20 | and -1 correspond to upper barrier breach, vertical barrier breach, and 21 | lower barrier breach, respectively. 22 | 23 | Also return series where the index is the start date of the label and the 24 | values are the end dates of the label. 25 | """ 26 | 27 | timedelta = pd.Timedelta(days=time_delta_days) 28 | series = pd.Series(np.log(price_series.values), index=price_series.index) 29 | 30 | # A list with elements of {-1, 0, 1} indicating the outcome of the events 31 | labels = list() 32 | label_dates = list() 33 | 34 | if upper_z or lower_z: 35 | volatility = series.ewm(span=vol_span).std() 36 | volatility *= np.sqrt(time_delta_days / vol_span) 37 | 38 | for event_date in event_index: 39 | date_barrier = event_date + timedelta 40 | 41 | start_price = series.loc[event_date] 42 | log_returns = series.loc[event_date:date_barrier] - start_price 43 | 44 | # First element of tuple is 1 or -1 indicating upper or lower barrier 45 | # Second element of tuple is first date when barrier was crossed 46 | candidates: List[Tuple[int, pd.Timestamp]] = list() 47 | 48 | # Add the first upper or lower date to candidates 49 | if upper_delta: 50 | _date = log_returns[log_returns > upper_delta].first_valid_index() 51 | if _date: 52 | candidates.append((upper_label, _date)) 53 | 54 | if lower_delta: 55 | _date = log_returns[log_returns < lower_delta].first_valid_index() 56 | if _date: 57 | candidates.append((lower_label, _date)) 58 | 59 | # Add the first upper_z and lower_z to candidates 60 | if upper_z: 61 | upper_barrier = upper_z * volatility[event_date] 62 | _date = log_returns[log_returns > upper_barrier].first_valid_index() 63 | if _date: 64 | candidates.append((upper_label, _date)) 65 | 66 | if lower_z: 67 | lower_barrier = lower_z * volatility[event_date] 68 | _date = log_returns[log_returns < lower_barrier].first_valid_index() 69 | if _date: 70 | candidates.append((lower_label, _date)) 71 | 72 | if candidates: 73 | # If any candidates, return label for first date 74 | label, label_date = min(candidates, key=lambda x: x[1]) 75 | else: 76 | # If there were no candidates, time barrier was touched 77 | label, label_date = 0, date_barrier 78 | 79 | labels.append(label) 80 | label_dates.append(label_date) 81 | 82 | label_series = pd.Series(labels, index=event_index) 83 | event_spans = pd.Series(label_dates, index=event_index) 84 | 85 | return label_series, event_spans 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /src/pypm/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from pypm.data_io import load_eod_data, load_spy_data 4 | from sklearn.linear_model import LinearRegression 5 | from typing import Dict, Any, Callable 6 | 7 | def calculate_return_series(series: pd.Series) -> pd.Series: 8 | """ 9 | Calculates the return series of a given time series. 10 | 11 | >>> data = load_eod_data('VBB') 12 | >>> close_series = data['close'] 13 | >>> return_series = return_series(close_series) 14 | 15 | The first value will always be NaN. 16 | """ 17 | 18 | shifted_series = series.shift(1, axis=0) 19 | return series / shifted_series - 1 20 | 21 | 22 | def calculate_log_return_series(series: pd.Series) -> pd.Series: 23 | """ 24 | Same as calculate_return_series but with log returns 25 | """ 26 | shifted_series = series.shift(1, axis=0) 27 | return pd.Series(np.log(series / shifted_series)) 28 | 29 | 30 | def calculate_percent_return(series: pd.Series) -> float: 31 | """ 32 | Takes the first and last value in a series to determine the percent return, 33 | assuming the series is in date-ascending order 34 | """ 35 | return series.iloc[-1] / series.iloc[0] - 1 36 | 37 | 38 | def get_years_past(series: pd.Series) -> float: 39 | """ 40 | Calculate the years past according to the index of the series for use with 41 | functions that require annualization 42 | """ 43 | start_date = series.index[0] 44 | end_date = series.index[-1] 45 | return (end_date - start_date).days / 365.25 46 | 47 | 48 | def calculate_cagr(series: pd.Series) -> float: 49 | """ 50 | Calculate compounded annual growth rate 51 | """ 52 | start_price = series.iloc[0] 53 | end_price = series.iloc[-1] 54 | value_factor = end_price / start_price 55 | year_past = get_years_past(series) 56 | return (value_factor ** (1 / year_past)) - 1 57 | 58 | 59 | def calculate_annualized_volatility(return_series: pd.Series) -> float: 60 | """ 61 | Calculates annualized volatility for a date-indexed return series. 62 | Works for any interval of date-indexed prices and returns. 63 | """ 64 | years_past = get_years_past(return_series) 65 | entries_per_year = return_series.shape[0] / years_past 66 | return return_series.std() * np.sqrt(entries_per_year) 67 | 68 | 69 | def calculate_sharpe_ratio(price_series: pd.Series, 70 | benchmark_rate: float=0) -> float: 71 | """ 72 | Calculates the Sharpe ratio given a price series. Defaults to benchmark_rate 73 | of zero. 74 | """ 75 | cagr = calculate_cagr(price_series) 76 | return_series = calculate_return_series(price_series) 77 | volatility = calculate_annualized_volatility(return_series) 78 | return (cagr - benchmark_rate) / volatility 79 | 80 | 81 | def calculate_rolling_sharpe_ratio(price_series: pd.Series, 82 | n: float=20) -> pd.Series: 83 | """ 84 | Compute an approximation of the Sharpe ratio on a rolling basis. 85 | Intended for use as a preference value. 86 | """ 87 | rolling_return_series = calculate_return_series(price_series).rolling(n) 88 | return rolling_return_series.mean() / rolling_return_series.std() 89 | 90 | 91 | def calculate_annualized_downside_deviation(return_series: pd.Series, 92 | benchmark_rate: float=0) -> float: 93 | """ 94 | Calculates the downside deviation for use in the Sortino ratio. 95 | 96 | Benchmark rate is assumed to be annualized. It will be adjusted according 97 | to the number of periods per year seen in the data. 98 | """ 99 | 100 | # For both de-annualizing the benchmark rate and annualizing result 101 | years_past = get_years_past(return_series) 102 | entries_per_year = return_series.shape[0] / years_past 103 | 104 | adjusted_benchmark_rate = ((1+benchmark_rate) ** (1/entries_per_year)) - 1 105 | 106 | downside_series = adjusted_benchmark_rate - return_series 107 | downside_sum_of_squares = (downside_series[downside_series > 0] ** 2).sum() 108 | denominator = return_series.shape[0] - 1 109 | downside_deviation = np.sqrt(downside_sum_of_squares / denominator) 110 | 111 | return downside_deviation * np.sqrt(entries_per_year) 112 | 113 | 114 | def calculate_sortino_ratio(price_series: pd.Series, 115 | benchmark_rate: float=0) -> float: 116 | """ 117 | Calculates the Sortino ratio. 118 | """ 119 | cagr = calculate_cagr(price_series) 120 | return_series = calculate_return_series(price_series) 121 | downside_deviation = calculate_annualized_downside_deviation(return_series) 122 | return (cagr - benchmark_rate) / downside_deviation 123 | 124 | 125 | def calculate_pure_profit_score(price_series: pd.Series) -> float: 126 | """ 127 | Calculates the pure profit score 128 | """ 129 | cagr = calculate_cagr(price_series) 130 | 131 | # Build a single column for a predictor, t 132 | t: np.ndarray = np.arange(0, price_series.shape[0]).reshape(-1, 1) 133 | 134 | # Fit the regression 135 | regression = LinearRegression().fit(t, price_series) 136 | 137 | # Get the r-squared value 138 | r_squared = regression.score(t, price_series) 139 | 140 | return cagr * r_squared 141 | 142 | def calculate_jensens_alpha(return_series: pd.Series, 143 | benchmark_return_series: pd.Series) -> float: 144 | """ 145 | Calculates Jensen's alpha. Prefers input series have the same index. Handles 146 | NAs. 147 | """ 148 | 149 | # Join series along date index and purge NAs 150 | df = pd.concat([return_series, benchmark_return_series], sort=True, axis=1) 151 | df = df.dropna() 152 | 153 | # Get the appropriate data structure for scikit learn 154 | clean_returns: pd.Series = df[df.columns.values[0]] 155 | clean_benchmarks = pd.DataFrame(df[df.columns.values[1]]) 156 | 157 | # Fit a linear regression and return the alpha 158 | regression = LinearRegression().fit(clean_benchmarks, y=clean_returns) 159 | return regression.intercept_ 160 | 161 | def calculate_jensens_alpha_v2(return_series: pd.Series) -> float: 162 | """ 163 | Calculates Jensen's alpha, but loads in SPY prices as the benchmark series 164 | for you. Can be slow if run repeatedly. 165 | """ 166 | spy_data = load_spy_data() 167 | benchmark_return_series = calculate_log_return_series(spy_data['close']) 168 | return calculate_jensens_alpha(return_series, benchmark_return_series) 169 | 170 | 171 | DRAWDOWN_EVALUATORS: Dict[str, Callable] = { 172 | 'dollar': lambda price, peak: peak - price, 173 | 'percent': lambda price, peak: -((price / peak) - 1), 174 | 'log': lambda price, peak: np.log(peak) - np.log(price), 175 | } 176 | 177 | def calculate_drawdown_series(series: pd.Series, method: str='log') -> pd.Series: 178 | """ 179 | Returns the drawdown series 180 | """ 181 | assert method in DRAWDOWN_EVALUATORS, \ 182 | f'Method "{method}" must by one of {list(DRAWDOWN_EVALUATORS.keys())}' 183 | 184 | evaluator = DRAWDOWN_EVALUATORS[method] 185 | return evaluator(series, series.cummax()) 186 | 187 | def calculate_max_drawdown(series: pd.Series, method: str='log') -> float: 188 | """ 189 | Simply returns the max drawdown as a float 190 | """ 191 | return calculate_drawdown_series(series, method).max() 192 | 193 | def calculate_max_drawdown_with_metadata(series: pd.Series, 194 | method: str='log') -> Dict[str, Any]: 195 | """ 196 | Calculates max_drawndown and stores metadata about when and where. Returns 197 | a dictionary of the form 198 | { 199 | 'max_drawdown': float, 200 | 'peak_date': pd.Timestamp, 201 | 'peak_price': float, 202 | 'trough_date': pd.Timestamp, 203 | 'trough_price': float, 204 | } 205 | """ 206 | 207 | assert method in DRAWDOWN_EVALUATORS, \ 208 | f'Method "{method}" must by one of {list(DRAWDOWN_EVALUATORS.keys())}' 209 | 210 | evaluator = DRAWDOWN_EVALUATORS[method] 211 | 212 | max_drawdown = 0 213 | local_peak_date = peak_date = trough_date = series.index[0] 214 | local_peak_price = peak_price = trough_price = series.iloc[0] 215 | 216 | for date, price in series.iteritems(): 217 | 218 | # Keep track of the rolling max 219 | if price > local_peak_price: 220 | local_peak_date = date 221 | local_peak_price = price 222 | 223 | # Compute the drawdown 224 | drawdown = evaluator(price, local_peak_price) 225 | 226 | # Store new max drawdown values 227 | if drawdown > max_drawdown: 228 | max_drawdown = drawdown 229 | 230 | peak_date = local_peak_date 231 | peak_price = local_peak_price 232 | 233 | trough_date = date 234 | trough_price = price 235 | 236 | return { 237 | 'max_drawdown': max_drawdown, 238 | 'peak_date': peak_date, 239 | 'peak_price': peak_price, 240 | 'trough_date': trough_date, 241 | 'trough_price': trough_price 242 | } 243 | 244 | def calculate_log_max_drawdown_ratio(series: pd.Series) -> float: 245 | log_drawdown = calculate_max_drawdown(series, method='log') 246 | log_return = np.log(series.iloc[-1]) - np.log(series.iloc[0]) 247 | return log_return - log_drawdown 248 | 249 | def calculate_calmar_ratio(series: pd.Series, years_past: int=3) -> float: 250 | """ 251 | Return the percent max drawdown ratio over the past three years, otherwise 252 | known as the Calmar Ratio 253 | """ 254 | 255 | # Filter series on past three years 256 | last_date = series.index[-1] 257 | three_years_ago = last_date - pd.Timedelta(days=years_past*365.25) 258 | series = series[series.index > three_years_ago] 259 | 260 | # Compute annualized percent max drawdown ratio 261 | percent_drawdown = calculate_max_drawdown(series, method='percent') 262 | cagr = calculate_cagr(series) 263 | return cagr / percent_drawdown 264 | 265 | 266 | -------------------------------------------------------------------------------- /src/pypm/ml_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlan/algorithmic-trading-with-python/ebe01087c7d9172db72bc3c9adc1eee5e882ac49/src/pypm/ml_model/__init__.py -------------------------------------------------------------------------------- /src/pypm/ml_model/data_io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from typing import Dict, Any, Callable, Tuple, List 4 | 5 | from pypm import data_io 6 | 7 | def load_data() -> Tuple[List[str], pd.DataFrame, pd.DataFrame]: 8 | """ 9 | Load the data as is will be used in the alternative data model 10 | """ 11 | symbols: List[str] = data_io.get_all_symbols() 12 | alt_data = data_io.load_alternative_data_matrix(symbols) 13 | eod_data = data_io.load_eod_matrix(symbols) 14 | eod_data = eod_data[eod_data.index >= alt_data.index.min()] 15 | 16 | return symbols, eod_data, alt_data 17 | -------------------------------------------------------------------------------- /src/pypm/ml_model/events.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from pypm import filters 4 | 5 | def calculate_events_for_revenue_series(series: pd.Series, 6 | filter_threshold: float, lookback: int=365) -> pd.DatetimeIndex: 7 | """ 8 | Calculate the symmetric cusum filter to generate events on YoY changes in 9 | the log revenue series 10 | """ 11 | series = np.log(series) 12 | series = filters.calculate_non_uniform_lagged_change(series, lookback) 13 | return filters.calculate_cusum_events(series, filter_threshold) 14 | 15 | 16 | def calculate_events(revenue_series: pd.Series): 17 | return calculate_events_for_revenue_series( 18 | revenue_series, 19 | filter_threshold=5, 20 | lookback=365, 21 | ) 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/pypm/ml_model/features.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from pypm import indicators, filters, metrics 5 | 6 | _calc_delta = filters.calculate_non_uniform_lagged_change 7 | _calc_ma = indicators.calculate_simple_moving_average 8 | _calc_log_return = metrics.calculate_log_return_series 9 | 10 | def _calc_rolling_vol(series, n): 11 | return series.rolling(n).std() * np.sqrt(252 / n) 12 | 13 | def calculate_features(price_series, revenue_series) -> pd.DataFrame: 14 | """ 15 | Calculate any and all potentially useful features. Return as a dataframe. 16 | """ 17 | 18 | log_revenue = np.log(revenue_series) 19 | log_prices = np.log(price_series) 20 | 21 | log_revenue_ma = _calc_ma(log_revenue, 10) 22 | log_prices_ma = _calc_ma(log_prices, 10) 23 | 24 | log_returns = _calc_log_return(price_series) 25 | 26 | features_by_name = dict() 27 | 28 | for i in [7, 30, 90, 180, 360]: 29 | 30 | rev_feature = _calc_delta(log_revenue_ma, i) 31 | price_feature = _calc_delta(log_prices_ma, i) 32 | vol_feature = _calc_rolling_vol(log_returns, i) 33 | 34 | features_by_name.update({ 35 | f'{i}_day_revenue_delta': rev_feature, 36 | f'{i}_day_return': price_feature, 37 | f'{i}_day_vol': vol_feature, 38 | }) 39 | 40 | features_df = pd.DataFrame(features_by_name) 41 | return features_df 42 | -------------------------------------------------------------------------------- /src/pypm/ml_model/labels.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from typing import Tuple 4 | 5 | from pypm import labels 6 | 7 | def calculate_labels(price_series, event_index) -> Tuple[pd.Series, pd.Series]: 8 | """ 9 | Calculate labels based on the triple barrier method. Return a series of 10 | event labels index by event start date, and return a series of event end 11 | dates indexed by event start date. 12 | """ 13 | 14 | # Remove event that don't have a proper chance to materialize 15 | time_delta_days = 90 16 | max_date = price_series.index.max() 17 | cutoff = max_date - pd.Timedelta(days=time_delta_days) 18 | event_index = event_index[event_index <= cutoff] 19 | 20 | # Use triple barrier method 21 | event_labels, event_spans = labels.compute_triple_barrier_labels( 22 | price_series, 23 | event_index, 24 | time_delta_days=time_delta_days, 25 | # upper_delta=0.10, 26 | # lower_delta=-0.10, 27 | upper_z=1.8, 28 | lower_z=-1.8, 29 | lower_label=-1, 30 | ) 31 | 32 | return event_labels, event_spans 33 | -------------------------------------------------------------------------------- /src/pypm/ml_model/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from sklearn.ensemble import RandomForestClassifier 5 | from sklearn.model_selection import RepeatedKFold 6 | from sklearn.base import clone 7 | 8 | from joblib import Parallel, delayed 9 | 10 | # Number of jobs to run in parallel 11 | # Set to number of computer cores to use 12 | N_JOBS = 10 13 | N_SPLITS = 5 14 | N_REPEATS = 4 15 | 16 | def _fit_and_score(classifier, X, y, w, train_index, test_index, i) -> float: 17 | """ 18 | The function used by joblib to split, train, and score cross-validations 19 | """ 20 | X_train = X.iloc[train_index] 21 | X_test = X.iloc[test_index] 22 | 23 | y_train = y.iloc[train_index] 24 | y_test = y.iloc[test_index] 25 | 26 | w_train = w.iloc[train_index] 27 | w_test = w.iloc[test_index] 28 | 29 | classifier.fit(X_train, y_train, w_train) 30 | score = classifier.score(X_test, y_test, w_test) 31 | 32 | print(f'Finished {i} ({100*score:.1f}%)') 33 | 34 | return score 35 | 36 | def repeated_k_fold(classifier, X, y, w) -> np.ndarray: 37 | """ 38 | Perform repeated k-fold cross-validation on a classifier. Spread fitting 39 | job over multiple computer cores. 40 | """ 41 | n_jobs = N_JOBS 42 | 43 | n_splits = N_SPLITS 44 | n_repeats = N_REPEATS 45 | 46 | total_fits = n_splits * n_repeats 47 | 48 | _k_fold = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats) 49 | 50 | print(f'Fitting {total_fits} models {n_jobs} at a time ...') 51 | print() 52 | 53 | parallel = Parallel(n_jobs=n_jobs) 54 | scores = parallel( 55 | delayed(_fit_and_score)( 56 | clone(classifier), X, y, w, train_index, test_index, i 57 | ) for i, (train_index, test_index) in enumerate(_k_fold.split(X)) 58 | ) 59 | 60 | return np.array(scores) 61 | 62 | 63 | def calculate_model(df: pd.DataFrame) -> RandomForestClassifier: 64 | """ 65 | Given a dataframe with a y column, weights column, and predictor columns 66 | with arbitrary names, cross-validated and fit a classifier. Print 67 | diagnostics. 68 | """ 69 | classifier = RandomForestClassifier(n_estimators=100) 70 | 71 | # Separate data 72 | predictor_columns = [ 73 | c for c in df.columns.values if not c in ('y', 'weights') 74 | ] 75 | X = df[predictor_columns] 76 | y = df['y'] 77 | w = df['weights'] 78 | 79 | # Fit cross-validation 80 | scores = repeated_k_fold(classifier, X, y, w) 81 | 82 | # Get a full dataset fit for importance scores 83 | classifier.fit(X, y, w) 84 | 85 | # Compute diagnostics 86 | _imp = classifier.feature_importances_ 87 | importance_series = pd.Series(_imp, index=predictor_columns) 88 | importance_series = importance_series.sort_values(ascending=False) 89 | 90 | # baseline accuracy is the best value achievable with a constant guess 91 | baseline = np.max(y.value_counts() / y.shape[0]) 92 | 93 | # Compute a rough confidence interval for the improvement 94 | mean_score = scores.mean() 95 | std_score = scores.std() 96 | 97 | upper_bound = mean_score + 2 * std_score 98 | lower_bound = mean_score - 2 * std_score 99 | ibounds = (lower_bound - baseline, upper_bound - baseline) 100 | 101 | print() 102 | print('Feature importances') 103 | for col, imp in importance_series.items(): 104 | print(f'{col:24} {imp:>.3f}') 105 | print() 106 | 107 | print('Cross validation scores') 108 | print(np.round(100 * scores, 1)) 109 | print() 110 | 111 | print(f'Baseline accuracy {100*baseline:.1f}%') 112 | print(f'OOS accuracy {100*mean_score:.1f}% +/- {200 * scores.std():.1f}%') 113 | print(f'Improvement {100*(ibounds[0]):.1f} to {100*(ibounds[1]):.1f}%') 114 | print() 115 | 116 | return classifier 117 | -------------------------------------------------------------------------------- /src/pypm/ml_model/signals.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pypm.ml_model.events import calculate_events 5 | from pypm.ml_model.features import calculate_features 6 | 7 | from typing import List 8 | 9 | def calculate_signals(classifier, symbols: List[str], eod_data: pd.DataFrame, 10 | alt_data: pd.DataFrame) -> pd.DataFrame: 11 | """ 12 | Calculate signal dataframes for use in the simulator 13 | """ 14 | 15 | # For storing the signals 16 | signal_by_symbol = dict() 17 | 18 | # Build events and features for each symbol 19 | for symbol in symbols: 20 | 21 | # Get revenue and price series 22 | revenue_series = alt_data[symbol].dropna() 23 | price_series = eod_data[symbol].dropna() 24 | 25 | # Build output template 26 | signal_series = pd.Series(0, index=price_series.index) 27 | 28 | # Get events and features 29 | event_index = calculate_events(revenue_series) 30 | features_df = calculate_features(price_series, revenue_series) 31 | 32 | features_on_events = features_df.loc[event_index] 33 | features_on_events.dropna(inplace=True) 34 | event_index = features_on_events.index 35 | 36 | if features_on_events.empty: 37 | predictions = pd.Series() 38 | else: 39 | _predictions = classifier.predict(features_on_events) 40 | predictions = pd.Series(_predictions, index=event_index) 41 | 42 | # Add into output template 43 | signal_series = signal_series.add(predictions, fill_value=0) 44 | 45 | signal_by_symbol[symbol] = signal_series 46 | 47 | signal = pd.DataFrame(signal_by_symbol) 48 | signal.sort_index(inplace=True) 49 | signal.dropna(inplace=True) 50 | 51 | return signal 52 | 53 | -------------------------------------------------------------------------------- /src/pypm/ml_model/weights.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from pypm.weights import calculate_uniqueness 5 | 6 | def calculate_weights(event_spans: pd.Series, 7 | price_index: pd.Series) -> pd.Series: 8 | return calculate_uniqueness(event_spans, price_index) 9 | -------------------------------------------------------------------------------- /src/pypm/optimization.py: -------------------------------------------------------------------------------- 1 | from pypm import metrics, signals, data_io, simulation 2 | 3 | import pandas as pd 4 | import numpy as np 5 | from collections import defaultdict, OrderedDict 6 | from itertools import product 7 | from timeit import default_timer 8 | from typing import Dict, Tuple, List, Callable, Iterable, Any, NewType, Mapping 9 | 10 | import matplotlib.pyplot as plt 11 | from matplotlib import cm 12 | from mpl_toolkits.mplot3d import Axes3D 13 | 14 | # Performance data and parameter inputs are dictionaries 15 | Parameters = NewType('Parameters', Dict[str, float]) 16 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float] 17 | 18 | # Simulation function must take parameters as keyword arguments pointing to 19 | # iterables and return a performance metric dictionary 20 | SimKwargs = NewType('Kwargs', Mapping[str, Iterable[Any]]) 21 | SimFunction = NewType('SimFunction', Callable[[SimKwargs], Performance]) 22 | 23 | class OptimizationResult(object): 24 | """Simple container class for optimization data""" 25 | 26 | def __init__(self, parameters: Parameters, performance: Performance): 27 | 28 | # Make sure no collisions between performance metrics and params 29 | assert len(parameters.keys() & performance.keys()) == 0, \ 30 | 'parameter name matches performance metric name' 31 | 32 | self.parameters = parameters 33 | self.performance = performance 34 | 35 | @property 36 | def as_dict(self) -> Dict[str, float]: 37 | """Combines the dictionaries after we are sure of no collisions""" 38 | return {**self.parameters, **self.performance} 39 | 40 | 41 | class GridSearchOptimizer(object): 42 | """ 43 | A generic grid search optimizer that requires only a simulation function and 44 | a series of parameter ranges. Provides timing, summary, and plotting 45 | utilities with return data. 46 | """ 47 | 48 | def __init__(self, simulation_function: SimFunction): 49 | 50 | self.simulate = simulation_function 51 | self._results_list: List[OptimizationResult] = list() 52 | self._results_df = pd.DataFrame() 53 | 54 | self._optimization_finished = False 55 | 56 | def add_results(self, parameters: Parameters, performance: Performance): 57 | _results = OptimizationResult(parameters, performance) 58 | self._results_list.append(_results) 59 | 60 | def optimize(self, **optimization_ranges: SimKwargs): 61 | 62 | assert optimization_ranges, 'Must provide non-empty parameters.' 63 | 64 | # Convert all iterables to lists 65 | param_ranges = {k: list(v) for k, v in optimization_ranges.items()} 66 | self.param_names = param_names = list(param_ranges.keys()) 67 | 68 | # Count total simulation 69 | n = total_simulations = np.prod([len(r) for r in param_ranges.values()]) 70 | 71 | total_time_elapsed = 0 72 | 73 | print(f'Starting simulation ...') 74 | print(f'Simulating 1 / {n} ...', end='\r') 75 | for i, params in enumerate(product(*param_ranges.values())): 76 | if i > 0: 77 | _avg = avg_time = total_time_elapsed / i 78 | _rem = remaining_time = (n - (i + 1)) * avg_time 79 | s = f'Simulating {i+1} / {n} ... ' 80 | s += f'{_rem:.0f}s remaining ({_avg:.1f}s avg)' 81 | s += ' '*8 82 | print(s, end='\r') 83 | 84 | timer_start = default_timer() 85 | 86 | parameters = {n: param for n, param in zip(param_names, params)} 87 | results = self.simulate(**parameters) 88 | self.add_results(parameters, results) 89 | 90 | timer_end = default_timer() 91 | total_time_elapsed += timer_end - timer_start 92 | 93 | print(f'Simulated {total_simulations} / {total_simulations} ...') 94 | print(f'Elapsed time: {total_time_elapsed:.0f}s') 95 | print(f'Done.') 96 | 97 | self._optimization_finished = True 98 | 99 | def _assert_finished(self): 100 | assert self._optimization_finished, \ 101 | 'Run self.optimize before accessing this method.' 102 | 103 | @property 104 | def results(self) -> pd.DataFrame: 105 | self._assert_finished() 106 | if self._results_df.empty: 107 | 108 | _results_list = self._results_list 109 | self._results_df = pd.DataFrame([r.as_dict for r in _results_list]) 110 | 111 | _columns = set(list(self._results_df.columns.values)) 112 | _params = set(self.param_names) 113 | self.metric_names = list(_columns - _params) 114 | 115 | return self._results_df 116 | 117 | def print_summary(self): 118 | df = self.results 119 | metric_names = self.metric_names 120 | 121 | print('Summary statistics') 122 | print(df[metric_names].describe().T) 123 | 124 | def get_best(self, metric_name: str) -> pd.DataFrame: 125 | """ 126 | Sort the results by a specific performance metric 127 | """ 128 | self._assert_finished() 129 | 130 | results = self.results 131 | param_names = self.param_names 132 | metric_names = self.metric_names 133 | 134 | assert metric_name in metric_names, 'Not a performance metric' 135 | partial_df = self.results[param_names+[metric_name]] 136 | 137 | return partial_df.sort_values(metric_name, ascending=False) 138 | 139 | def plot_1d_hist(self, x, show=True): 140 | self.results.hist(x) 141 | if show: 142 | plt.show() 143 | 144 | def plot_2d_line(self, x, y, show=True, **filter_kwargs): 145 | _results = self.results 146 | for k, v in filter_kwargs.items(): 147 | _results = _results[getattr(_results, k) == v] 148 | 149 | ax = _results.plot(x, y) 150 | if filter_kwargs: 151 | k_str = ', '.join([f'{k}={v}' for k,v in filter_kwargs.items()]) 152 | ax.legend([f'{x} ({k_str})']) 153 | 154 | if show: 155 | plt.show() 156 | 157 | def plot_2d_violin(self, x, y, show=True): 158 | """ 159 | Group y along x then plot violin charts 160 | """ 161 | x_values = self.results[x].unique() 162 | x_values.sort() 163 | 164 | y_by_x = OrderedDict([(v, []) for v in x_values]) 165 | for _, row in self.results.iterrows(): 166 | y_by_x[row[x]].append(row[y]) 167 | 168 | fig, ax = plt.subplots() 169 | 170 | ax.violinplot(dataset=list(y_by_x.values()), showmedians=True) 171 | ax.set_xlabel(x) 172 | ax.set_ylabel(y) 173 | ax.set_xticks(range(0, len(y_by_x)+1)) 174 | ax.set_xticklabels([''] + list(y_by_x.keys())) 175 | if show: 176 | plt.show() 177 | 178 | def plot_3d_mesh(self, x, y, z, show=True, **filter_kwargs): 179 | """ 180 | Plot interactive 3d mesh. z axis should typically be performance metric 181 | """ 182 | _results = self.results 183 | fig = plt.figure() 184 | ax = Axes3D(fig) 185 | 186 | for k, v in filter_kwargs.items(): 187 | _results = _results[getattr(_results, k) == v] 188 | 189 | X, Y, Z = [getattr(_results, attr) for attr in (x, y, z)] 190 | ax.plot_trisurf(X, Y, Z, cmap=cm.jet, linewidth=0.2) 191 | ax.set_xlabel(x) 192 | ax.set_ylabel(y) 193 | ax.set_zlabel(z) 194 | if show: 195 | plt.show() 196 | 197 | def plot(self, *attrs: Tuple[str], show=True, 198 | **filter_kwargs: Dict[str, Any]): 199 | """ 200 | Attempt to intelligently dispatch plotting functions based on the number 201 | and type of attributes. Last argument should typically be the 202 | performance metric. 203 | """ 204 | self._assert_finished() 205 | param_names = self.param_names 206 | metric_names = self.metric_names 207 | 208 | if len(attrs) == 3: 209 | assert attrs[0] in param_names and attrs[1] in param_names, \ 210 | 'First two positional arguments must be parameter names.' 211 | 212 | assert attrs[2] in metric_names, \ 213 | 'Last positional argument must be a metric name.' 214 | 215 | assert len(filter_kwargs) + 2 == len(param_names), \ 216 | 'Must filter remaining parameters. e.g. p_three=some_number.' 217 | 218 | self.plot_3d_mesh(*attrs, show=show, **filter_kwargs) 219 | 220 | elif len(attrs) == 2: 221 | if len(param_names) == 1 or filter_kwargs: 222 | self.plot_2d_line(*attrs, show=show, **filter_kwargs) 223 | 224 | elif len(param_names) > 1: 225 | self.plot_2d_violin(*attrs, show=show) 226 | 227 | elif len(attrs) == 1: 228 | self.plot_1d_hist(*attrs, show=show) 229 | 230 | else: 231 | raise ValueError('Must pass between one and three column names.') 232 | 233 | 234 | 235 | 236 | -------------------------------------------------------------------------------- /src/pypm/portfolio.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | from typing import Tuple, List, Dict, Callable, NewType, Any 5 | from collections import OrderedDict, defaultdict 6 | 7 | from pypm import metrics, signals, data_io 8 | 9 | Symbol = NewType('Symbol', str) 10 | Dollars = NewType('Dollars', float) 11 | 12 | DATE_FORMAT_STR = '%a %b %d, %Y' 13 | 14 | 15 | def _pdate(date: pd.Timestamp): 16 | """Pretty-print a datetime with just the date""" 17 | return date.strftime(DATE_FORMAT_STR) 18 | 19 | 20 | class Position(object): 21 | """ 22 | A simple object to hold and manipulate data related to long stock trades. 23 | Allows a single buy and sell operation on an asset for a constant number of 24 | shares. 25 | The __init__ method is equivalent to a buy operation. The exit 26 | method is a sell operation. 27 | """ 28 | 29 | def __init__(self, symbol: Symbol, entry_date: pd.Timestamp, 30 | entry_price: Dollars, shares: int): 31 | """ 32 | Equivalent to buying a certain number of shares of the asset 33 | """ 34 | 35 | # Recorded on initialization 36 | self.entry_date = entry_date 37 | 38 | assert entry_price > 0, 'Cannot buy asset with zero or negative price.' 39 | self.entry_price = entry_price 40 | 41 | assert shares > 0, 'Cannot buy zero or negative shares.' 42 | self.shares = shares 43 | 44 | self.symbol = symbol 45 | 46 | # Recorded on position exit 47 | self.exit_date: pd.Timestamp = None 48 | self.exit_price: Dollars = None 49 | 50 | # For easily getting current portfolio value 51 | self.last_date: pd.Timestamp = None 52 | self.last_price: Dollars = None 53 | 54 | # Updated intermediately 55 | self._dict_series: Dict[pd.Timestamp, Dollars] = OrderedDict() 56 | self.record_price_update(entry_date, entry_price) 57 | 58 | # Cache control for pd.Series representation 59 | self._price_series: pd.Series = None 60 | self._needs_update_pd_series: bool = True 61 | 62 | def exit(self, exit_date, exit_price): 63 | """ 64 | Equivalent to selling a stock holding 65 | """ 66 | assert self.entry_date != exit_date, 'Churned a position same-day.' 67 | assert not self.exit_date, 'Position already closed.' 68 | self.record_price_update(exit_date, exit_price) 69 | self.exit_date = exit_date 70 | self.exit_price = exit_price 71 | 72 | def record_price_update(self, date, price): 73 | """ 74 | Stateless function to record intermediate prices of existing positions 75 | """ 76 | self.last_date = date 77 | self.last_price = price 78 | self._dict_series[date] = price 79 | 80 | # Invalidate cache on self.price_series 81 | self._needs_update_pd_series = True 82 | 83 | @property 84 | def price_series(self) -> pd.Series: 85 | """ 86 | Returns cached readonly pd.Series 87 | """ 88 | if self._needs_update_pd_series or self._price_series is None: 89 | self._price_series = pd.Series(self._dict_series) 90 | self._needs_update_pd_series = False 91 | return self._price_series 92 | 93 | @property 94 | def last_value(self) -> Dollars: 95 | return self.last_price * self.shares 96 | 97 | @property 98 | def is_active(self) -> bool: 99 | return self.exit_date is None 100 | 101 | @property 102 | def is_closed(self) -> bool: 103 | return not self.is_active 104 | 105 | @property 106 | def value_series(self) -> pd.Series: 107 | """ 108 | Returns the value of the position over time. Ignores self.exit_date. 109 | Used in calculating the equity curve. 110 | """ 111 | assert self.is_closed, 'Position must be closed to access this property' 112 | return self.shares * self.price_series[:-1] 113 | 114 | @property 115 | def percent_return(self) -> float: 116 | return (self.exit_price / self.entry_price) - 1 117 | 118 | @property 119 | def entry_value(self) -> Dollars: 120 | return self.shares * self.entry_price 121 | 122 | @property 123 | def exit_value(self) -> Dollars: 124 | return self.shares * self.exit_price 125 | 126 | @property 127 | def change_in_value(self) -> Dollars: 128 | return self.exit_value - self.entry_value 129 | 130 | @property 131 | def trade_length(self): 132 | return len(self._dict_series) - 1 133 | 134 | def print_position_summary(self): 135 | _entry_date = _pdate(self.entry_date) 136 | _exit_date = _pdate(self.exit_date) 137 | _days = self.trade_length 138 | 139 | _entry_price = round(self.entry_price, 2) 140 | _exit_price = round(self.exit_price, 2) 141 | 142 | _entry_value = round(self.entry_value, 2) 143 | _exit_value = round(self.exit_value, 2) 144 | 145 | _return = round(100 * self.percent_return, 1) 146 | _diff = round(self.change_in_value, 2) 147 | 148 | print(f'{self.symbol:<5} Trade summary') 149 | print(f'Date: {_entry_date} -> {_exit_date} [{_days} days]') 150 | print(f'Price: ${_entry_price} -> ${_exit_price} [{_return}%]') 151 | print(f'Value: ${_entry_value} -> ${_exit_value} [${_diff}]') 152 | print() 153 | 154 | def __hash__(self): 155 | """ 156 | A unique position will be defined by a unique combination of an 157 | entry_date and symbol, in accordance with our constraints regarding 158 | duplicate, variable, and compound positions 159 | """ 160 | return hash((self.entry_date, self.symbol)) 161 | 162 | 163 | class PortfolioHistory(object): 164 | """ 165 | Holds Position objects and keeps track of portfolio variables. 166 | Produces summary statistics. 167 | """ 168 | 169 | def __init__(self): 170 | # Keep track of positions, recorded in this list after close 171 | self.position_history: List[Position] = [] 172 | self._logged_positions: Set[Position] = set() 173 | 174 | # Keep track of the last seen date 175 | self.last_date: pd.Timestamp = pd.Timestamp.min 176 | 177 | # Readonly fields 178 | self._cash_history: Dict[pd.Timestamp, Dollars] = dict() 179 | self._simulation_finished = False 180 | self._spy: pd.DataFrame = pd.DataFrame() 181 | self._spy_log_returns: pd.Series = pd.Series() 182 | 183 | def add_to_history(self, position: Position): 184 | _log = self._logged_positions 185 | assert not position in _log, 'Recorded the same position twice.' 186 | assert position.is_closed, 'Position is not closed.' 187 | self._logged_positions.add(position) 188 | self.position_history.append(position) 189 | self.last_date = max(self.last_date, position.last_date) 190 | 191 | def record_cash(self, date, cash): 192 | self._cash_history[date] = cash 193 | self.last_date = max(self.last_date, date) 194 | 195 | @staticmethod 196 | def _as_oseries(d: Dict[pd.Timestamp, Any]) -> pd.Series: 197 | return pd.Series(d).sort_index() 198 | 199 | def _compute_cash_series(self): 200 | self._cash_series = self._as_oseries(self._cash_history) 201 | 202 | @property 203 | def cash_series(self) -> pd.Series: 204 | return self._cash_series 205 | 206 | def _compute_portfolio_value_series(self): 207 | value_by_date = defaultdict(float) 208 | last_date = self.last_date 209 | 210 | # Add up value of assets 211 | for position in self.position_history: 212 | for date, value in position.value_series.items(): 213 | value_by_date[date] += value 214 | 215 | # Make sure all dates in cash_series are present 216 | for date in self.cash_series.index: 217 | value_by_date[date] += 0 218 | 219 | self._portfolio_value_series = self._as_oseries(value_by_date) 220 | 221 | @property 222 | def portfolio_value_series(self): 223 | return self._portfolio_value_series 224 | 225 | def _compute_equity_series(self): 226 | c_series = self.cash_series 227 | p_series = self.portfolio_value_series 228 | assert all(c_series.index == p_series.index), \ 229 | 'portfolio_series has dates not in cash_series' 230 | self._equity_series = c_series + p_series 231 | 232 | @property 233 | def equity_series(self): 234 | return self._equity_series 235 | 236 | def _compute_log_return_series(self): 237 | self._log_return_series = \ 238 | metrics.calculate_log_return_series(self.equity_series) 239 | 240 | @property 241 | def log_return_series(self): 242 | return self._log_return_series 243 | 244 | def _assert_finished(self): 245 | assert self._simulation_finished, \ 246 | 'Simulation must be finished by running self.finish() in order ' + \ 247 | 'to access this method or property.' 248 | 249 | def finish(self): 250 | """ 251 | Notate that the simulation is finished and compute readonly values 252 | """ 253 | self._simulation_finished = True 254 | self._compute_cash_series() 255 | self._compute_portfolio_value_series() 256 | self._compute_equity_series() 257 | self._compute_log_return_series() 258 | self._assert_finished() 259 | 260 | def compute_portfolio_size_series(self) -> pd.Series: 261 | size_by_date = defaultdict(int) 262 | for position in self.position_history: 263 | for date in position.value_series.index: 264 | size_by_date[date] += 1 265 | return self._as_oseries(size_by_date) 266 | 267 | @property 268 | def spy(self): 269 | if self._spy.empty: 270 | first_date = self.cash_series.index[0] 271 | _spy = data_io.load_spy_data() 272 | self._spy = _spy[_spy.index > first_date] 273 | return self._spy 274 | 275 | @property 276 | def spy_log_returns(self): 277 | if self._spy_log_returns.empty: 278 | close = self.spy['close'] 279 | self._spy_log_returns = metrics.calculate_log_return_series(close) 280 | return self._spy_log_returns 281 | 282 | @property 283 | def percent_return(self): 284 | return metrics.calculate_percent_return(self.equity_series) 285 | 286 | @property 287 | def spy_percent_return(self): 288 | return metrics.calculate_percent_return(self.spy['close']) 289 | 290 | @property 291 | def cagr(self): 292 | return metrics.calculate_cagr(self.equity_series) 293 | 294 | @property 295 | def volatility(self): 296 | return metrics.calculate_annualized_volatility(self.log_return_series) 297 | 298 | @property 299 | def sharpe_ratio(self): 300 | return metrics.calculate_sharpe_ratio(self.equity_series) 301 | 302 | @property 303 | def spy_cagr(self): 304 | return metrics.calculate_cagr(self.spy['close']) 305 | 306 | @property 307 | def excess_cagr(self): 308 | return self.cagr - self.spy_cagr 309 | 310 | @property 311 | def jensens_alpha(self): 312 | return metrics.calculate_jensens_alpha( 313 | self.log_return_series, 314 | self.spy_log_returns, 315 | ) 316 | 317 | @property 318 | def dollar_max_drawdown(self): 319 | return metrics.calculate_max_drawdown(self.equity_series, 'dollar') 320 | 321 | @property 322 | def percent_max_drawdown(self): 323 | return metrics.calculate_max_drawdown(self.equity_series, 'percent') 324 | 325 | @property 326 | def log_max_drawdown_ratio(self): 327 | return metrics.calculate_log_max_drawdown_ratio(self.equity_series) 328 | 329 | @property 330 | def number_of_trades(self): 331 | return len(self.position_history) 332 | 333 | @property 334 | def average_active_trades(self): 335 | return self.compute_portfolio_size_series().mean() 336 | 337 | @property 338 | def final_cash(self): 339 | self._assert_finished() 340 | return self.cash_series[-1] 341 | 342 | @property 343 | def final_equity(self): 344 | self._assert_finished() 345 | return self.equity_series[-1] 346 | 347 | _PERFORMANCE_METRICS_PROPS = [ 348 | 'percent_return', 349 | 'spy_percent_return', 350 | 'cagr', 351 | 'volatility', 352 | 'sharpe_ratio', 353 | 'spy_cagr', 354 | 'excess_cagr', 355 | 'jensens_alpha', 356 | 'dollar_max_drawdown', 357 | 'percent_max_drawdown', 358 | 'log_max_drawdown_ratio', 359 | 'number_of_trades', 360 | 'average_active_trades', 361 | 'final_cash', 362 | 'final_equity', 363 | ] 364 | 365 | PerformancePayload = NewType('PerformancePayload', Dict[str, float]) 366 | 367 | def get_performance_metric_data(self) -> PerformancePayload: 368 | props = self._PERFORMANCE_METRICS_PROPS 369 | return {prop: getattr(self, prop) for prop in props} 370 | 371 | def print_position_summaries(self): 372 | for position in self.position_history: 373 | position.print_position_summary() 374 | 375 | def print_summary(self): 376 | self._assert_finished() 377 | s = f'Equity: ${self.final_equity:.2f}\n' \ 378 | f'Percent Return: {100 * self.percent_return:.2f}%\n' \ 379 | f'S&P 500 Return: {100 * self.spy_percent_return:.2f}%\n\n' \ 380 | f'Number of trades: {self.number_of_trades}\n' \ 381 | f'Average active trades: {self.average_active_trades:.2f}\n\n' \ 382 | f'CAGR: {100 * self.cagr:.2f}%\n' \ 383 | f'S&P 500 CAGR: {100 * self.spy_cagr:.2f}%\n' \ 384 | f'Excess CAGR: {100 * self.excess_cagr:.2f}%\n\n' \ 385 | f'Annualized Volatility: {100 * self.volatility:.2f}%\n' \ 386 | f'Sharpe Ratio: {self.sharpe_ratio:.2f}\n' \ 387 | f'Jensen\'s Alpha: {self.jensens_alpha:.6f}\n\n' \ 388 | f'Dollar Max Drawdown: ${self.dollar_max_drawdown:.2f}\n' \ 389 | f'Percent Max Drawdown: {100 * self.percent_max_drawdown:.2f}%\n' \ 390 | f'Log Max Drawdown Ratio: {self.log_max_drawdown_ratio:.2f}\n' 391 | 392 | print(s) 393 | 394 | def plot(self, show=True) -> plt.Figure: 395 | """ 396 | Plots equity, cash and portfolio value curves. 397 | """ 398 | self._assert_finished() 399 | 400 | figure, axes = plt.subplots(nrows=3, ncols=1) 401 | figure.tight_layout(pad=3.0) 402 | axes[0].plot(self.equity_series) 403 | axes[0].set_title('Equity') 404 | axes[0].grid() 405 | 406 | axes[1].plot(self.cash_series) 407 | axes[1].set_title('Cash') 408 | axes[1].grid() 409 | 410 | axes[2].plot(self.portfolio_value_series) 411 | axes[2].set_title('Portfolio Value') 412 | axes[2].grid() 413 | 414 | if show: 415 | plt.show() 416 | 417 | return figure 418 | 419 | def plot_benchmark_comparison(self, show=True) -> plt.Figure: 420 | """ 421 | Plot comparable investment in the S&P 500. 422 | """ 423 | self._assert_finished() 424 | 425 | equity_curve = self.equity_series 426 | ax = equity_curve.plot() 427 | 428 | spy_closes = self.spy['close'] 429 | initial_cash = self.cash_series[0] 430 | initial_spy = spy_closes[0] 431 | 432 | scaled_spy = spy_closes * (initial_cash / initial_spy) 433 | scaled_spy.plot() 434 | 435 | baseline = pd.Series(initial_cash, index=equity_curve.index) 436 | ax = baseline.plot(color='black') 437 | ax.grid() 438 | 439 | ax.legend(['Equity curve', 'S&P 500 portfolio']) 440 | 441 | if show: 442 | plt.show() 443 | -------------------------------------------------------------------------------- /src/pypm/signals.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from pypm.indicators import calculate_macd_oscillator, \ 4 | calculate_bollinger_bands 5 | from pypm.data_io import load_eod_data 6 | 7 | 8 | def create_macd_signal(series: pd.Series, n1: int=5, n2: int=34) -> pd.Series: 9 | """ 10 | Create a momentum-based signal based on the MACD crossover principle. 11 | Generate a buy signal when the MACD crosses above zero, and a sell signal when 12 | it crosses below zero. 13 | """ 14 | 15 | # Calculate the macd and get the signs of the values. 16 | macd = calculate_macd_oscillator(series, n1, n2) 17 | macd_sign = np.sign(macd) 18 | 19 | # Create a copy shifted by some amount. 20 | macd_shifted_sign = macd_sign.shift(1, axis=0) 21 | 22 | # Multiply the sign by the boolean. This will have the effect of casting 23 | # the boolean to an integer (either 0 or 1) and then multiply by the sign 24 | # (either -1, 0 or 1). 25 | return macd_sign * (macd_sign != macd_shifted_sign) 26 | 27 | 28 | def create_bollinger_band_signal(series: pd.Series, n: int=20) -> pd.Series: 29 | """ 30 | Create a reversal-based signal based on the upper and lower bands of the 31 | Bollinger bands. Generate a buy signal when the price is below the lower 32 | band, and a sell signal when the price is above the upper band. 33 | """ 34 | bollinger_bands = calculate_bollinger_bands(series, n) 35 | sell = series > bollinger_bands['upper'] 36 | buy = series < bollinger_bands['lower'] 37 | return (1*buy - 1*sell) 38 | 39 | -------------------------------------------------------------------------------- /src/pypm/simulation.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List, Dict, Callable, NewType, Any, Iterable 2 | 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | 6 | from pypm import metrics, signals, data_io 7 | from pypm.portfolio import PortfolioHistory, Position, Symbol, Dollars 8 | 9 | from collections import OrderedDict, defaultdict 10 | 11 | class SimpleSimulator(object): 12 | """ 13 | A simple trading simulator to work with the PortfolioHistory class 14 | """ 15 | 16 | def __init__(self, initial_cash: float=10000, max_active_positions: int=5, 17 | percent_slippage: float=0.0005, trade_fee: float=1): 18 | 19 | ### Set simulation parameters 20 | 21 | # Initial cash in porfolio 22 | # self.cash will fluctuate 23 | self.initial_cash = self.cash = initial_cash 24 | 25 | # Maximum number of different assets that can be help simultaneously 26 | self.max_active_positions: int = max_active_positions 27 | 28 | # The percentage difference between closing price and fill price for the 29 | # position, to simulate adverse effects of market orders 30 | self.percent_slippage = percent_slippage 31 | 32 | # The fixed fee in order to open a position in dollar terms 33 | self.trade_fee = trade_fee 34 | 35 | # Keep track of live trades 36 | self.active_positions_by_symbol: Dict[Symbol, Position] = OrderedDict() 37 | 38 | # Keep track of portfolio history like cash, equity, and positions 39 | self.portfolio_history = PortfolioHistory() 40 | 41 | @property 42 | def active_positions_count(self): 43 | return len(self.active_positions_by_symbol) 44 | 45 | @property 46 | def free_position_slots(self): 47 | return self.max_active_positions - self.active_positions_count 48 | 49 | @property 50 | def active_symbols(self) -> List[Symbol]: 51 | return list(self.active_positions_by_symbol.keys()) 52 | 53 | def print_initial_parameters(self): 54 | s = f'Initial Cash: ${self.initial_cash} \n' \ 55 | f'Maximum Number of Assets: {self.max_active_positions}\n' 56 | print(s) 57 | return s 58 | 59 | @staticmethod 60 | def make_tuple_lookup(columns) -> Callable[[str, str], int]: 61 | """ 62 | Map a multi-index dataframe to an itertuples-like object. 63 | 64 | The index of the dateframe is always the zero-th element. 65 | """ 66 | 67 | # col is a hierarchical column index represented by a tuple of strings 68 | tuple_lookup: Dict[Tuple[str, str], int] = { 69 | col: i + 1 for i, col in enumerate(columns) 70 | } 71 | 72 | return lambda symbol, metric: tuple_lookup[(symbol, metric)] 73 | 74 | @staticmethod 75 | def make_all_valid_lookup(_idx: Callable): 76 | """ 77 | Return a function that checks for valid data, given a lookup function 78 | """ 79 | return lambda row, symbol: ( 80 | not pd.isna(row[_idx(symbol, 'pref')]) and \ 81 | not pd.isna(row[_idx(symbol, 'signal')]) and \ 82 | not pd.isna(row[_idx(symbol, 'price')]) 83 | ) 84 | 85 | def buy_to_open(self, symbol, date, price): 86 | """ 87 | Keep track of new position, make sure it isn't an existing position. 88 | Verify you have cash. 89 | """ 90 | 91 | # Figure out how much we are willing to spend 92 | cash_available = self.cash - self.trade_fee 93 | cash_to_spend = cash_available / self.free_position_slots 94 | 95 | # Calculate buy_price and number of shares. Fractional shares allowed. 96 | purchase_price = (1 + self.percent_slippage) * price 97 | shares = cash_to_spend / purchase_price 98 | 99 | # Spend the cash 100 | self.cash -= cash_to_spend + self.trade_fee 101 | assert self.cash >= 0, 'Spent cash you do not have.' 102 | self.portfolio_history.record_cash(date, self.cash) 103 | 104 | # Record the position 105 | positions_by_symbol = self.active_positions_by_symbol 106 | assert not symbol in positions_by_symbol, 'Symbol already in portfolio.' 107 | position = Position(symbol, date, purchase_price, shares) 108 | positions_by_symbol[symbol] = position 109 | 110 | def sell_to_close(self, symbol, date, price): 111 | """ 112 | Keep track of exit price, recover cash, close position, and record it in 113 | portfolio history. 114 | 115 | Will raise a KeyError if symbol isn't an active position 116 | """ 117 | 118 | # Exit the position 119 | positions_by_symbol = self.active_positions_by_symbol 120 | position = positions_by_symbol[symbol] 121 | position.exit(date, price) 122 | 123 | # Receive the cash 124 | sale_value = position.last_value * (1 - self.percent_slippage) 125 | self.cash += sale_value 126 | self.portfolio_history.record_cash(date, self.cash) 127 | 128 | # Record in portfolio history 129 | self.portfolio_history.add_to_history(position) 130 | del positions_by_symbol[symbol] 131 | 132 | @staticmethod 133 | def _assert_equal_columns(*args: Iterable[pd.DataFrame]): 134 | column_names = set(args[0].columns.values) 135 | for arg in args[1:]: 136 | assert set(arg.columns.values) == column_names, \ 137 | 'Found unequal column names in input dataframes.' 138 | 139 | def simulate(self, price: pd.DataFrame, signal: pd.DataFrame, 140 | preference: pd.DataFrame): 141 | """ 142 | Runs the simulation. 143 | 144 | price, signal, and preference are dataframes with the column names 145 | represented by the same set of stock symbols. 146 | """ 147 | 148 | # Create a hierarchical dataframe to loop through 149 | self._assert_equal_columns(price, signal, preference) 150 | df = data_io.concatenate_metrics({ 151 | 'price': price, 152 | 'signal': signal, 153 | 'pref': preference, 154 | }) 155 | 156 | # Get list of symbols 157 | all_symbols = list(set(price.columns.values)) 158 | 159 | # Get lookup functions 160 | _idx = self.make_tuple_lookup(df.columns) 161 | _all_valid = self.make_all_valid_lookup(_idx) 162 | 163 | # Store some variables 164 | active_positions_by_symbol = self.active_positions_by_symbol 165 | max_active_positions = self.max_active_positions 166 | 167 | # Iterating over all dates. 168 | # itertuples() is significantly faster than iterrows(), it however comes 169 | # at the cost of not being able index easily. In order to get around this 170 | # we use an tuple lookup function: "_idx" 171 | for row in df.itertuples(): 172 | 173 | # date index is always first element of tuple row 174 | date = row[0] 175 | 176 | # Get symbols with valid and tradable data 177 | symbols: List[str] = [s for s in all_symbols if _all_valid(row, s)] 178 | 179 | # Iterate over active positions and sell stocks with a sell signal. 180 | _active = self.active_symbols 181 | to_exit = [s for s in _active if row[_idx(s, 'signal')] == -1] 182 | for s in to_exit: 183 | sell_price = row[_idx(s, 'price')] 184 | self.sell_to_close(s, date, sell_price) 185 | 186 | # Get up to max_active_positions symbols with a buy signal in 187 | # decreasing order of preference 188 | to_buy = [ 189 | s for s in symbols if \ 190 | row[_idx(s, 'signal')] == 1 and \ 191 | not s in active_positions_by_symbol 192 | ] 193 | to_buy.sort(key=lambda s: row[_idx(s, 'pref')], reverse=True) 194 | to_buy = to_buy[:max_active_positions] 195 | 196 | for s in to_buy: 197 | buy_price = row[_idx(s, 'price')] 198 | buy_preference = row[_idx(s, 'pref')] 199 | 200 | # If we have some empty slots, just buy the asset outright 201 | if self.active_positions_count < max_active_positions: 202 | self.buy_to_open(s, date, buy_price) 203 | continue 204 | 205 | # If are holding max_active_positions, evaluate a swap based on 206 | # preference 207 | _active = self.active_symbols 208 | active_prefs = [(s, row[_idx(s, 'pref')]) for s in _active] 209 | 210 | _min = min(active_prefs, key=lambda k: k[1]) 211 | min_active_symbol, min_active_preference = _min 212 | 213 | # If a more preferable symbol exists, then sell an old one 214 | if min_active_preference < buy_preference: 215 | sell_price = row[_idx(min_active_symbol, 'price')] 216 | self.sell_to_close(min_active_symbol, date, sell_price) 217 | self.buy_to_open(s, date, buy_price) 218 | 219 | # Update price data everywhere 220 | for s in self.active_symbols: 221 | price = row[_idx(s, 'price')] 222 | position = active_positions_by_symbol[s] 223 | position.record_price_update(date, price) 224 | 225 | self.portfolio_history.record_cash(date, self.cash) 226 | 227 | # Sell all positions and mark simulation as complete 228 | for s in self.active_symbols: 229 | self.sell_to_close(s, date, row[_idx(s, 'price')]) 230 | self.portfolio_history.finish() 231 | 232 | 233 | 234 | -------------------------------------------------------------------------------- /src/pypm/weights.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from scipy.stats import hmean 4 | 5 | def calculate_uniqueness(event_spans: pd.Series, 6 | price_index: pd.Series) -> pd.Series: 7 | """ 8 | event_spans is a series with an index of start dates and values of end dates 9 | of a label. 10 | 11 | price_index is an index of underlying dates for the event 12 | 13 | Returns a series of uniqueness values that can be used as weights, indexed 14 | as the event start dates. Weights may need to be standardized again before 15 | training. 16 | """ 17 | 18 | # Create a binary dataframe 19 | # value is 1 during event span and 0 otherwise 20 | columns = range(event_spans.shape[0]) 21 | df = pd.DataFrame(0, index=price_index, columns=columns) 22 | 23 | for i, (event_start, event_end) in enumerate(event_spans.items()): 24 | df[i].loc[event_start:event_end] += 1 25 | 26 | # Compute concurrency over event span then calculate uniqueness 27 | avg_uniquenesses = list() 28 | for i, (event_start, event_end) in enumerate(event_spans.items()): 29 | concurrency: pd.Series = df.loc[event_start:event_end].sum(axis=1) 30 | avg_uniqueness = 1 / hmean(concurrency) 31 | avg_uniquenesses.append(avg_uniqueness) 32 | 33 | return pd.Series(avg_uniquenesses, index=event_spans.index) 34 | -------------------------------------------------------------------------------- /src/simulate_alternative_data_portfolio.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | import os 5 | from joblib import load 6 | 7 | from pypm.ml_model.data_io import load_data 8 | from pypm.ml_model.signals import calculate_signals 9 | 10 | from pypm import metrics, simulation 11 | 12 | SRC_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | 14 | def simulate_portfolio(): 15 | 16 | # All the data we have to work with 17 | symbols, eod_data, alt_data = load_data() 18 | 19 | # Load classifier from file 20 | classifier = load(os.path.join(SRC_DIR, 'ml_model.joblib')) 21 | 22 | # Generate signals from classifier 23 | print('Calculating signals ...') 24 | signal = calculate_signals(classifier, symbols, eod_data, alt_data) 25 | 26 | # Get rid of eod_data before valid signals 27 | first_signal_date = signal.first_valid_index() 28 | eod_data = eod_data[eod_data.index > first_signal_date] 29 | 30 | # Set the preference to increase by row, so new trades are preferred 31 | print('Calculating preference matrix ...') 32 | preference = pd.DataFrame( 33 | np.random.random(eod_data.shape), 34 | columns=eod_data.columns, 35 | index=eod_data.index, 36 | ) 37 | 38 | # Run the simulator 39 | simulator = simulation.SimpleSimulator( 40 | initial_cash=10000, 41 | max_active_positions=10, 42 | percent_slippage=0.0005, 43 | trade_fee=1, 44 | ) 45 | simulator.simulate(eod_data, signal, preference) 46 | 47 | # Print results 48 | simulator.portfolio_history.print_position_summaries() 49 | simulator.print_initial_parameters() 50 | simulator.portfolio_history.print_summary() 51 | simulator.portfolio_history.plot() 52 | simulator.portfolio_history.plot_benchmark_comparison() 53 | 54 | if __name__ == '__main__': 55 | simulate_portfolio() 56 | 57 | -------------------------------------------------------------------------------- /src/simulate_portfolio.py: -------------------------------------------------------------------------------- 1 | from pypm import metrics, signals, data_io, simulation 2 | import pandas as pd 3 | 4 | def simulate_portfolio(): 5 | 6 | bollinger_n = 20 7 | sharpe_n = 100 8 | 9 | # Load in data 10 | symbols: List[str] = data_io.get_all_symbols() 11 | prices: pd.DataFrame = data_io.load_eod_matrix(symbols) 12 | 13 | # Use the Bollinger Band outer band crossover as a signal 14 | _bollinger = signals.create_bollinger_band_signal 15 | signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0) 16 | 17 | # Use a rolling sharpe ratio approximation as a preference matrix 18 | _sharpe = metrics.calculate_rolling_sharpe_ratio 19 | preference = prices.apply(_sharpe, args=(sharpe_n, ), axis=0) 20 | 21 | # Run the simulator 22 | simulator = simulation.SimpleSimulator( 23 | initial_cash=10000, 24 | max_active_positions=5, 25 | percent_slippage=0.0005, 26 | trade_fee=1, 27 | ) 28 | simulator.simulate(prices, signal, preference) 29 | 30 | # Print results 31 | simulator.portfolio_history.print_position_summaries() 32 | simulator.print_initial_parameters() 33 | simulator.portfolio_history.print_summary() 34 | simulator.portfolio_history.plot() 35 | simulator.portfolio_history.plot_benchmark_comparison() 36 | 37 | if __name__ == '__main__': 38 | simulate_portfolio() 39 | -------------------------------------------------------------------------------- /src/white_noise_portfolio.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pypm import metrics, signals, data_io, simulation, optimization 5 | from pypm.optimization import GridSearchOptimizer 6 | 7 | from typing import List, Dict, Tuple, Callable 8 | 9 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float] 10 | 11 | def bind_simulator(**sim_kwargs) -> Callable: 12 | """ 13 | Create a simulator that uses white noise for the preference matrix 14 | """ 15 | symbols: List[str] = data_io.get_all_symbols() 16 | prices: pd.DataFrame = data_io.load_eod_matrix(symbols) 17 | 18 | _bollinger: Callable = signals.create_bollinger_band_signal 19 | 20 | # Bollinger n is constant throughout 21 | bollinger_n = 20 22 | 23 | def _simulate(white_noise_test_id: int) -> Performance: 24 | 25 | signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0) 26 | 27 | # Build a pile of noise in the same shape as the price data 28 | _noise = np.random.normal(loc=0, scale=1, size=prices.shape) 29 | _cols = prices.columns 30 | _index = prices.index 31 | preference = pd.DataFrame(_noise, columns=_cols, index=_index) 32 | 33 | simulator = simulation.SimpleSimulator(**sim_kwargs) 34 | simulator.simulate(prices, signal, preference) 35 | 36 | return simulator.portfolio_history.get_performance_metric_data() 37 | 38 | return _simulate 39 | 40 | if __name__ == '__main__': 41 | 42 | simulate = bind_simulator(initial_cash=10000, max_active_positions=5) 43 | 44 | optimizer = GridSearchOptimizer(simulate) 45 | optimizer.optimize(white_noise_test_id=range(1000)) 46 | 47 | print(optimizer.get_best('excess_cagr')) 48 | optimizer.print_summary() 49 | optimizer.plot('excess_cagr') 50 | --------------------------------------------------------------------------------