├── .gitignore
├── contributing.md
├── cover.png
├── data
    ├── SPY.csv
    ├── alternative_data
    │   ├── AWU.csv
    │   ├── AXC.csv
    │   ├── BGN.csv
    │   ├── BMG.csv
    │   ├── CHWK.csv
    │   ├── CUU.csv
    │   ├── DLVY.csv
    │   ├── DVRL.csv
    │   ├── EHH.csv
    │   ├── EUZ.csv
    │   ├── EXY.csv
    │   ├── EZVY.csv
    │   ├── FJKV.csv
    │   ├── FJZC.csv
    │   ├── FRCE.csv
    │   ├── FSVO.csv
    │   ├── HECP.csv
    │   ├── HER.csv
    │   ├── HESC.csv
    │   ├── HRVC.csv
    │   ├── HXX.csv
    │   ├── IEZI.csv
    │   ├── IOV.csv
    │   ├── IRX.csv
    │   ├── IYPP.csv
    │   ├── JMS.csv
    │   ├── JWDV.csv
    │   ├── JXAN.csv
    │   ├── KEQ.csv
    │   ├── KER.csv
    │   ├── KGO.csv
    │   ├── KSR.csv
    │   ├── KTVL.csv
    │   ├── KUAQ.csv
    │   ├── LJGB.csv
    │   ├── LMLK.csv
    │   ├── MEF.csv
    │   ├── MHNG.csv
    │   ├── MTZB.csv
    │   ├── NDQC.csv
    │   ├── NEGH.csv
    │   ├── NKP.csv
    │   ├── NSLG.csv
    │   ├── NZSR.csv
    │   ├── OBAK.csv
    │   ├── OCG.csv
    │   ├── OKK.csv
    │   ├── OLE.csv
    │   ├── OWKQ.csv
    │   ├── OZI.csv
    │   ├── OZMT.csv
    │   ├── PDX.csv
    │   ├── PQCE.csv
    │   ├── PSWA.csv
    │   ├── PUO.csv
    │   ├── PYTC.csv
    │   ├── QEBK.csv
    │   ├── RALO.csv
    │   ├── RLZA.csv
    │   ├── RPEZ.csv
    │   ├── RQG.csv
    │   ├── RVWV.csv
    │   ├── RZW.csv
    │   ├── SEBI.csv
    │   ├── SGVQ.csv
    │   ├── SIP.csv
    │   ├── SLRR.csv
    │   ├── SXQ.csv
    │   ├── TGXX.csv
    │   ├── TQKA.csv
    │   ├── TRE.csv
    │   ├── TUMH.csv
    │   ├── UAIG.csv
    │   ├── UFT.csv
    │   ├── UTQ.csv
    │   ├── UZS.csv
    │   ├── VBB.csv
    │   ├── VCXW.csv
    │   ├── VDHJ.csv
    │   ├── VHE.csv
    │   ├── VLOZ.csv
    │   ├── WFS.csv
    │   ├── WHMG.csv
    │   ├── WNE.csv
    │   ├── WOXI.csv
    │   ├── XAU.csv
    │   ├── XBN.csv
    │   ├── XJJI.csv
    │   ├── XSOQ.csv
    │   ├── XWR.csv
    │   ├── XYCJ.csv
    │   ├── XZFM.csv
    │   ├── YPDA.csv
    │   ├── ZEA.csv
    │   ├── ZGL.csv
    │   ├── ZOE.csv
    │   ├── ZWH.csv
    │   ├── ZWNG.csv
    │   ├── ZXGV.csv
    │   └── ZZQB.csv
    └── eod
    │   ├── AWU.csv
    │   ├── AXC.csv
    │   ├── BGN.csv
    │   ├── BMG.csv
    │   ├── CHWK.csv
    │   ├── CUU.csv
    │   ├── DLVY.csv
    │   ├── DVRL.csv
    │   ├── EHH.csv
    │   ├── EUZ.csv
    │   ├── EXY.csv
    │   ├── EZVY.csv
    │   ├── FJKV.csv
    │   ├── FJZC.csv
    │   ├── FRCE.csv
    │   ├── FSVO.csv
    │   ├── HECP.csv
    │   ├── HER.csv
    │   ├── HESC.csv
    │   ├── HRVC.csv
    │   ├── HXX.csv
    │   ├── IEZI.csv
    │   ├── IOV.csv
    │   ├── IRX.csv
    │   ├── IYPP.csv
    │   ├── JMS.csv
    │   ├── JWDV.csv
    │   ├── JXAN.csv
    │   ├── KEQ.csv
    │   ├── KER.csv
    │   ├── KGO.csv
    │   ├── KSR.csv
    │   ├── KTVL.csv
    │   ├── KUAQ.csv
    │   ├── LJGB.csv
    │   ├── LMLK.csv
    │   ├── MEF.csv
    │   ├── MHNG.csv
    │   ├── MTZB.csv
    │   ├── NDQC.csv
    │   ├── NEGH.csv
    │   ├── NKP.csv
    │   ├── NSLG.csv
    │   ├── NZSR.csv
    │   ├── OBAK.csv
    │   ├── OCG.csv
    │   ├── OKK.csv
    │   ├── OLE.csv
    │   ├── OWKQ.csv
    │   ├── OZI.csv
    │   ├── OZMT.csv
    │   ├── PDX.csv
    │   ├── PQCE.csv
    │   ├── PSWA.csv
    │   ├── PUO.csv
    │   ├── PYTC.csv
    │   ├── QEBK.csv
    │   ├── RALO.csv
    │   ├── RLZA.csv
    │   ├── RPEZ.csv
    │   ├── RQG.csv
    │   ├── RVWV.csv
    │   ├── RZW.csv
    │   ├── SEBI.csv
    │   ├── SGVQ.csv
    │   ├── SIP.csv
    │   ├── SLRR.csv
    │   ├── SXQ.csv
    │   ├── TGXX.csv
    │   ├── TQKA.csv
    │   ├── TRE.csv
    │   ├── TUMH.csv
    │   ├── UAIG.csv
    │   ├── UFT.csv
    │   ├── UTQ.csv
    │   ├── UZS.csv
    │   ├── VBB.csv
    │   ├── VCXW.csv
    │   ├── VDHJ.csv
    │   ├── VHE.csv
    │   ├── VLOZ.csv
    │   ├── WFS.csv
    │   ├── WHMG.csv
    │   ├── WNE.csv
    │   ├── WOXI.csv
    │   ├── XAU.csv
    │   ├── XBN.csv
    │   ├── XJJI.csv
    │   ├── XSOQ.csv
    │   ├── XWR.csv
    │   ├── XYCJ.csv
    │   ├── XZFM.csv
    │   ├── YPDA.csv
    │   ├── ZEA.csv
    │   ├── ZGL.csv
    │   ├── ZOE.csv
    │   ├── ZWH.csv
    │   ├── ZWNG.csv
    │   ├── ZXGV.csv
    │   └── ZZQB.csv
├── license.txt
├── listings
    ├── chapter_1
    │   ├── 1_1_type_hinting_examples.py
    │   ├── 1_2_pandas_data_types.py
    │   ├── 1_3_pandas_data_types_part_2.py
    │   └── 1_4_pandas_indexes.py
    ├── chapter_2
    │   ├── 2_10_maximum_drawdown.py
    │   ├── 2_11_maximum_drawdown_with_metadata.py
    │   ├── 2_12_log_max_drawdown_ratio.py
    │   ├── 2_13_calmar_ratio.py
    │   ├── 2_14_pure_profit_score.py
    │   ├── 2_15_jensens_alpha.py
    │   ├── 2_1_return_series_pure_python.py
    │   ├── 2_2_return_series_pandas.py
    │   ├── 2_3_log_return_series.py
    │   ├── 2_4_annualized_volatility.py
    │   ├── 2_5_annualized_volatility_on_awu.py
    │   ├── 2_6_calculating_cagr.py
    │   ├── 2_7_calculating_cagr_on_awu.py
    │   ├── 2_8_calculating_sharpe_ratio.py
    │   └── 2_9_calculating_downside_volatility.py
    ├── chapter_3
    │   ├── 3_1_calculate_simple_moving_average.py
    │   ├── 3_2_slow_simple_moving_average.py
    │   ├── 3_3_fast_simple_moving_average.py
    │   ├── 3_4_calculating_macd.py
    │   ├── 3_5_calculate_bollinger_bands.py
    │   ├── 3_6_calculate_chaikin_money_flow.py
    │   └── 3_7_example_signals.py
    ├── chapter_4
    │   ├── 4_1_assertions_example.py
    │   ├── 4_2_position_class.py
    │   ├── 4_3_position_object_usage.py
    │   ├── 4_4_portfolio_history_class.py
    │   ├── 4_5_portfolio_history_usage.py
    │   ├── 4_6_simple_simulator_class.py
    │   └── 4_7_simple_simulator_usage.py
    ├── chapter_5
    │   ├── 5_1_grid_search_optimizer.py
    │   ├── 5_2_grid_search_example.py
    │   ├── 5_3_white_noise_preference_matrix.py
    │   └── 5_4_bootstrap_simulated_preference_matrix.py
    ├── chapter_6
    │   ├── 6_1_loading_alternative_data.py
    │   └── 6_2_exploratory_analysis.py
    └── chapter_7
    │   ├── 7_1_symmetric_cusum_filter_on_revenue.py
    │   ├── 7_2_computing_triple_barrier_labels.py
    │   ├── 7_3_computing_average_uniqueness.py
    │   ├── 7_4_computing_features.py
    │   ├── 7_5_modeling_and_cross_validation.py
    │   ├── 7_6_machine_learning_pipeline.py
    │   └── 7_7_simulation_with_machine_learning_model.py
├── readme.md
└── src
    ├── __init__.py
    ├── bootstrap_portfolio.py
    ├── fit_alternative_data_model.py
    ├── optimize_portfolio.py
    ├── pypm
        ├── __init__.py
        ├── data_io.py
        ├── filters.py
        ├── indicators.py
        ├── labels.py
        ├── metrics.py
        ├── ml_model
        │   ├── __init__.py
        │   ├── data_io.py
        │   ├── events.py
        │   ├── features.py
        │   ├── labels.py
        │   ├── model.py
        │   ├── signals.py
        │   └── weights.py
        ├── optimization.py
        ├── portfolio.py
        ├── signals.py
        ├── simulation.py
        └── weights.py
    ├── simulate_alternative_data_portfolio.py
    ├── simulate_portfolio.py
    └── white_noise_portfolio.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | .Rproj.user
131 | 


--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
1 | 
2 | ## Contributing Guidelines
3 | 
4 | Contributions are encouraged through pull requests for minor changes, fixes, and improvements that do not materially change the content as it corresponds to the book. 
5 | 
6 | Major modifications and improvements are encouraged via forks, but will not be pulled into this repository.
7 | 
8 | 


--------------------------------------------------------------------------------
/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlan/algorithmic-trading-with-python/ebe01087c7d9172db72bc3c9adc1eee5e882ac49/cover.png


--------------------------------------------------------------------------------
/data/alternative_data/HRVC.csv:
--------------------------------------------------------------------------------
  1 | date,value
  2 | 2017-10-02,199742.0
  3 | 2017-10-03,212548.0
  4 | 2017-10-04,226800.0
  5 | 2017-10-05,228232.0
  6 | 2017-10-06,198208.0
  7 | 2017-10-09,215753.0
  8 | 2017-10-10,194550.0
  9 | 2017-10-11,189900.0
 10 | 2017-10-12,217522.0
 11 | 2017-10-13,205112.0
 12 | 2017-10-16,193099.0
 13 | 2017-10-17,219615.0
 14 | 2017-10-18,209341.0
 15 | 2017-10-19,190206.0
 16 | 2017-10-20,183482.0
 17 | 2017-10-23,169123.0
 18 | 2017-10-24,210535.0
 19 | 2017-10-25,193737.0
 20 | 2017-10-26,185285.0
 21 | 2017-10-27,179315.0
 22 | 2017-10-30,214524.0
 23 | 2017-10-31,219727.0
 24 | 2017-11-01,206819.0
 25 | 2017-11-02,208283.0
 26 | 2017-11-03,153447.0
 27 | 2017-11-06,221032.0
 28 | 2017-11-07,208531.0
 29 | 2017-11-08,208685.0
 30 | 2017-11-09,212152.0
 31 | 2017-11-10,191342.0
 32 | 2017-11-13,250969.0
 33 | 2017-11-14,179738.0
 34 | 2017-11-15,193917.0
 35 | 2017-11-16,173358.0
 36 | 2017-11-17,184790.0
 37 | 2017-11-20,195448.0
 38 | 2017-11-21,226394.0
 39 | 2017-11-22,207083.0
 40 | 2017-11-24,209734.0
 41 | 2017-11-27,230518.0
 42 | 2017-11-28,198218.0
 43 | 2017-11-29,198546.0
 44 | 2017-11-30,179337.0
 45 | 2017-12-01,196046.0
 46 | 2017-12-04,239066.0
 47 | 2017-12-05,216157.0
 48 | 2017-12-06,227464.0
 49 | 2017-12-07,215148.0
 50 | 2017-12-08,212035.0
 51 | 2017-12-11,216586.0
 52 | 2017-12-12,202155.0
 53 | 2017-12-13,238222.0
 54 | 2017-12-14,206124.0
 55 | 2017-12-15,204381.0
 56 | 2017-12-18,157521.0
 57 | 2017-12-19,167363.0
 58 | 2017-12-20,219725.0
 59 | 2017-12-21,238015.0
 60 | 2017-12-22,225680.0
 61 | 2017-12-26,206946.0
 62 | 2017-12-27,198663.0
 63 | 2017-12-28,220129.0
 64 | 2017-12-29,203207.0
 65 | 2018-01-02,243368.0
 66 | 2018-01-03,214976.0
 67 | 2018-01-04,178153.0
 68 | 2018-01-05,196032.0
 69 | 2018-01-08,205801.0
 70 | 2018-01-09,179226.0
 71 | 2018-01-10,217716.0
 72 | 2018-01-11,220956.0
 73 | 2018-01-12,214345.0
 74 | 2018-01-16,211830.0
 75 | 2018-01-17,211417.0
 76 | 2018-01-18,189561.0
 77 | 2018-01-19,221302.0
 78 | 2018-01-22,197326.0
 79 | 2018-01-23,200280.0
 80 | 2018-01-24,175559.0
 81 | 2018-01-25,220274.0
 82 | 2018-01-26,178883.0
 83 | 2018-01-29,186071.0
 84 | 2018-01-30,230154.0
 85 | 2018-01-31,187110.0
 86 | 2018-02-01,225306.0
 87 | 2018-02-02,204518.0
 88 | 2018-02-05,186802.0
 89 | 2018-02-06,217700.0
 90 | 2018-02-07,204664.0
 91 | 2018-02-08,223852.0
 92 | 2018-02-09,230212.0
 93 | 2018-02-12,199052.0
 94 | 2018-02-13,178829.0
 95 | 2018-02-14,183818.0
 96 | 2018-02-15,201350.0
 97 | 2018-02-16,213021.0
 98 | 2018-02-20,211204.0
 99 | 2018-02-21,200563.0
100 | 2018-02-22,225925.0
101 | 2018-02-23,207164.0
102 | 2018-02-26,196511.0
103 | 2018-02-27,227149.0
104 | 2018-02-28,208754.0
105 | 2018-03-01,189602.0
106 | 2018-03-02,176743.0
107 | 2018-03-05,179293.0
108 | 2018-03-06,150733.0
109 | 2018-03-07,205004.0
110 | 2018-03-08,204276.0
111 | 2018-03-09,212610.0
112 | 2018-03-12,204968.0
113 | 2018-03-13,173584.0
114 | 2018-03-14,151350.0
115 | 2018-03-15,209024.0
116 | 2018-03-16,208075.0
117 | 2018-03-19,205328.0
118 | 2018-03-20,202297.0
119 | 2018-03-21,196766.0
120 | 2018-03-22,212564.0
121 | 2018-03-23,178928.0
122 | 2018-03-26,201761.0
123 | 2018-03-27,194611.0
124 | 2018-03-28,207117.0
125 | 2018-03-29,174583.0
126 | 2018-04-02,191255.0
127 | 2018-04-03,181984.0
128 | 2018-04-04,227337.0
129 | 2018-04-05,185858.0
130 | 2018-04-06,222383.0
131 | 2018-04-09,195830.0
132 | 2018-04-10,207175.0
133 | 2018-04-11,197501.0
134 | 2018-04-12,202835.0
135 | 2018-04-13,207782.0
136 | 2018-04-16,204197.0
137 | 2018-04-17,200469.0
138 | 2018-04-18,208110.0
139 | 2018-04-19,157750.0
140 | 2018-04-20,190151.0
141 | 2018-04-23,190046.0
142 | 2018-04-24,198268.0
143 | 2018-04-25,204081.0
144 | 2018-04-26,206102.0
145 | 2018-04-27,211717.0
146 | 2018-04-30,201615.0
147 | 2018-05-01,196476.0
148 | 2018-05-02,215011.0
149 | 2018-05-03,204623.0
150 | 2018-05-04,186403.0
151 | 2018-05-07,196138.0
152 | 2018-05-08,211013.0
153 | 2018-05-09,210175.0
154 | 2018-05-10,200855.0
155 | 2018-05-11,207251.0
156 | 2018-05-14,172186.0
157 | 2018-05-15,194308.0
158 | 2018-05-16,198479.0
159 | 2018-05-17,188118.0
160 | 2018-05-18,223507.0
161 | 2018-05-21,197886.0
162 | 2018-05-22,221335.0
163 | 2018-05-23,181840.0
164 | 2018-05-24,226995.0
165 | 2018-05-25,168377.0
166 | 2018-05-29,224651.0
167 | 2018-05-30,191330.0
168 | 2018-05-31,202417.0
169 | 2018-06-01,199794.0
170 | 2018-06-04,196877.0
171 | 2018-06-05,214168.0
172 | 2018-06-06,180081.0
173 | 2018-06-07,172991.0
174 | 2018-06-08,200362.0
175 | 2018-06-11,182746.0
176 | 2018-06-12,172449.0
177 | 2018-06-13,196593.0
178 | 2018-06-14,235390.0
179 | 2018-06-15,212229.0
180 | 2018-06-18,197484.0
181 | 2018-06-19,196204.0
182 | 2018-06-20,223585.0
183 | 2018-06-21,199418.0
184 | 2018-06-22,200493.0
185 | 2018-06-25,230461.0
186 | 2018-06-26,188471.0
187 | 2018-06-27,187363.0
188 | 2018-06-28,187970.0
189 | 2018-06-29,211436.0
190 | 2018-07-02,215892.0
191 | 2018-07-03,164917.0
192 | 2018-07-05,172871.0
193 | 2018-07-06,179970.0
194 | 2018-07-09,167339.0
195 | 2018-07-10,181429.0
196 | 2018-07-11,199076.0
197 | 2018-07-12,215335.0
198 | 2018-07-13,157689.0
199 | 2018-07-16,208933.0
200 | 2018-07-17,183536.0
201 | 2018-07-18,186206.0
202 | 2018-07-19,184851.0
203 | 2018-07-20,228828.0
204 | 2018-07-23,196867.0
205 | 2018-07-24,203933.0
206 | 2018-07-25,182385.0
207 | 2018-07-26,188227.0
208 | 2018-07-27,205302.0
209 | 2018-07-30,159383.0
210 | 2018-07-31,161000.0
211 | 2018-08-01,219709.0
212 | 2018-08-02,197273.0
213 | 2018-08-03,179268.0
214 | 2018-08-06,210374.0
215 | 2018-08-07,183179.0
216 | 2018-08-08,216563.0
217 | 2018-08-09,226791.0
218 | 2018-08-10,207543.0
219 | 2018-08-13,187719.0
220 | 2018-08-14,222928.0
221 | 2018-08-15,230737.0
222 | 2018-08-16,196648.0
223 | 2018-08-17,197041.0
224 | 2018-08-20,228850.0
225 | 2018-08-21,224698.0
226 | 2018-08-22,228461.0
227 | 2018-08-23,230094.0
228 | 2018-08-24,178207.0
229 | 2018-08-27,244873.0
230 | 2018-08-28,209088.0
231 | 2018-08-29,230723.0
232 | 2018-08-30,212288.0
233 | 2018-08-31,214426.0
234 | 2018-09-04,207754.0
235 | 2018-09-05,213409.0
236 | 2018-09-06,170465.0
237 | 2018-09-07,182935.0
238 | 2018-09-10,216339.0
239 | 2018-09-11,216836.0
240 | 2018-09-12,208466.0
241 | 2018-09-13,193341.0
242 | 2018-09-14,242216.0
243 | 2018-09-17,196847.0
244 | 2018-09-18,202280.0
245 | 2018-09-19,223302.0
246 | 2018-09-20,203343.0
247 | 2018-09-21,227132.0
248 | 2018-09-24,215823.0
249 | 2018-09-25,169395.0
250 | 2018-09-26,207679.0
251 | 2018-09-27,186355.0
252 | 2018-09-28,210530.0
253 | 2018-10-01,188884.0
254 | 2018-10-02,191762.0
255 | 2018-10-03,232030.0
256 | 2018-10-04,214706.0
257 | 2018-10-05,213685.0
258 | 2018-10-08,205509.0
259 | 2018-10-09,229840.0
260 | 2018-10-10,203251.0
261 | 2018-10-11,217037.0
262 | 2018-10-12,252063.0
263 | 2018-10-15,186625.0
264 | 2018-10-16,214448.0
265 | 2018-10-17,174949.0
266 | 2018-10-18,174781.0
267 | 2018-10-19,213331.0
268 | 2018-10-22,225811.0
269 | 2018-10-23,210716.0
270 | 2018-10-24,226919.0
271 | 2018-10-25,239843.0
272 | 2018-10-26,196001.0
273 | 2018-10-29,190627.0
274 | 2018-10-30,196620.0
275 | 2018-10-31,190284.0
276 | 2018-11-01,232609.0
277 | 2018-11-02,200541.0
278 | 2018-11-05,190302.0
279 | 2018-11-06,195035.0
280 | 2018-11-07,205464.0
281 | 2018-11-08,238503.0
282 | 2018-11-09,233655.0
283 | 2018-11-12,247601.0
284 | 2018-11-13,221816.0
285 | 2018-11-14,203877.0
286 | 2018-11-15,182143.0
287 | 2018-11-16,227373.0
288 | 2018-11-19,219731.0
289 | 2018-11-20,178592.0
290 | 2018-11-21,213573.0
291 | 2018-11-23,200974.0
292 | 2018-11-26,227110.0
293 | 2018-11-27,234092.0
294 | 2018-11-28,233096.0
295 | 2018-11-29,255990.0
296 | 2018-11-30,240890.0
297 | 2018-12-03,233088.0
298 | 2018-12-04,239283.0
299 | 2018-12-06,264711.0
300 | 2018-12-07,234189.0
301 | 2018-12-10,199918.0
302 | 2018-12-11,222631.0
303 | 2018-12-12,207044.0
304 | 2018-12-13,200392.0
305 | 2018-12-14,234567.0
306 | 2018-12-17,256247.0
307 | 2018-12-18,215274.0
308 | 2018-12-19,199063.0
309 | 2018-12-20,224864.0
310 | 2018-12-21,235553.0
311 | 2018-12-24,263157.0
312 | 2018-12-26,222228.0
313 | 2018-12-27,243436.0
314 | 2018-12-28,213643.0
315 | 2018-12-31,225986.0
316 | 2019-01-02,226179.0
317 | 2019-01-03,270371.0
318 | 2019-01-04,196207.0
319 | 2019-01-07,208663.0
320 | 2019-01-08,248797.0
321 | 2019-01-09,227881.0
322 | 2019-01-10,193694.0
323 | 2019-01-11,221644.0
324 | 2019-01-14,226586.0
325 | 2019-01-15,204034.0
326 | 2019-01-16,204868.0
327 | 2019-01-17,204047.0
328 | 2019-01-18,205983.0
329 | 2019-01-22,215795.0
330 | 2019-01-23,177478.0
331 | 2019-01-24,247324.0
332 | 2019-01-25,196729.0
333 | 2019-01-28,233227.0
334 | 2019-01-29,196652.0
335 | 2019-01-30,192321.0
336 | 2019-01-31,226490.0
337 | 2019-02-01,199647.0
338 | 2019-02-04,193555.0
339 | 2019-02-05,217980.0
340 | 2019-02-06,214931.0
341 | 2019-02-07,233756.0
342 | 2019-02-08,215714.0
343 | 2019-02-11,284872.0
344 | 2019-02-12,212406.0
345 | 2019-02-13,204691.0
346 | 2019-02-14,184231.0
347 | 2019-02-15,219456.0
348 | 2019-02-19,204000.0
349 | 2019-02-20,235486.0
350 | 2019-02-21,212348.0
351 | 2019-02-22,207053.0
352 | 2019-02-25,206841.0
353 | 2019-02-26,178836.0
354 | 2019-02-27,186185.0
355 | 2019-02-28,215154.0
356 | 2019-03-01,203328.0
357 | 2019-03-04,178952.0
358 | 2019-03-05,175307.0
359 | 2019-03-06,208299.0
360 | 2019-03-07,212062.0
361 | 2019-03-08,206425.0
362 | 2019-03-11,164523.0
363 | 2019-03-12,208042.0
364 | 2019-03-13,235979.0
365 | 2019-03-14,191438.0
366 | 2019-03-15,264815.0
367 | 2019-03-18,234634.0
368 | 2019-03-19,215023.0
369 | 2019-03-20,164279.0
370 | 2019-03-21,205571.0
371 | 2019-03-22,204580.0
372 | 2019-03-25,192785.0
373 | 2019-03-26,197547.0
374 | 2019-03-27,210031.0
375 | 2019-03-28,212812.0
376 | 2019-03-29,172052.0
377 | 2019-04-01,236313.0
378 | 2019-04-02,179146.0
379 | 2019-04-03,156313.0
380 | 2019-04-04,201327.0
381 | 2019-04-05,179218.0
382 | 2019-04-08,208690.0
383 | 2019-04-09,201889.0
384 | 2019-04-10,232640.0
385 | 2019-04-11,249136.0
386 | 2019-04-12,216847.0
387 | 2019-04-15,211921.0
388 | 2019-04-16,228320.0
389 | 2019-04-17,195772.0
390 | 2019-04-18,193698.0
391 | 2019-04-22,179383.0
392 | 2019-04-23,218582.0
393 | 2019-04-24,187045.0
394 | 2019-04-25,167232.0
395 | 2019-04-26,169960.0
396 | 2019-04-29,185783.0
397 | 2019-04-30,218060.0
398 | 2019-05-01,222705.0
399 | 2019-05-02,221047.0
400 | 2019-05-03,208116.0
401 | 2019-05-06,221227.0
402 | 2019-05-07,227839.0
403 | 2019-05-08,208982.0
404 | 2019-05-09,241380.0
405 | 2019-05-10,224420.0
406 | 2019-05-13,218874.0
407 | 2019-05-14,233051.0
408 | 2019-05-15,209363.0
409 | 2019-05-16,204598.0
410 | 2019-05-17,191761.0
411 | 2019-05-20,194191.0
412 | 2019-05-21,255307.0
413 | 2019-05-22,222709.0
414 | 2019-05-23,184269.0
415 | 2019-05-24,220323.0
416 | 2019-05-28,207569.0
417 | 2019-05-29,189049.0
418 | 2019-05-30,177797.0
419 | 2019-05-31,215139.0
420 | 2019-06-03,228532.0
421 | 2019-06-04,203860.0
422 | 2019-06-05,212475.0
423 | 2019-06-06,189273.0
424 | 2019-06-07,181573.0
425 | 2019-06-10,216926.0
426 | 2019-06-11,210069.0
427 | 2019-06-12,196536.0
428 | 2019-06-13,232360.0
429 | 2019-06-14,215498.0
430 | 2019-06-17,239570.0
431 | 2019-06-18,213664.0
432 | 2019-06-19,195720.0
433 | 2019-06-20,206962.0
434 | 2019-06-21,208074.0
435 | 2019-06-24,241329.0
436 | 2019-06-25,176407.0
437 | 2019-06-26,262237.0
438 | 2019-06-27,204418.0
439 | 2019-06-28,233336.0
440 | 2019-07-01,213031.0
441 | 2019-07-02,227425.0
442 | 2019-07-03,241044.0
443 | 2019-07-05,202065.0
444 | 2019-07-08,193485.0
445 | 2019-07-09,235103.0
446 | 2019-07-10,184052.0
447 | 2019-07-11,202539.0
448 | 2019-07-12,199740.0
449 | 2019-07-15,230836.0
450 | 2019-07-16,222362.0
451 | 2019-07-17,265203.0
452 | 2019-07-18,207975.0
453 | 2019-07-19,223468.0
454 | 2019-07-22,183314.0
455 | 2019-07-23,238553.0
456 | 2019-07-24,208459.0
457 | 2019-07-25,213064.0
458 | 2019-07-26,209056.0
459 | 2019-07-29,208424.0
460 | 2019-07-30,226658.0
461 | 2019-07-31,221202.0
462 | 2019-08-01,211144.0
463 | 2019-08-02,234430.0
464 | 2019-08-05,201581.0
465 | 2019-08-06,169277.0
466 | 2019-08-07,227123.0
467 | 2019-08-08,244151.0
468 | 2019-08-09,182744.0
469 | 2019-08-12,242729.0
470 | 2019-08-13,208951.0
471 | 2019-08-14,227435.0
472 | 2019-08-15,224195.0
473 | 2019-08-16,197012.0
474 | 2019-08-19,264390.0
475 | 2019-08-20,206969.0
476 | 2019-08-21,207534.0
477 | 2019-08-22,175696.0
478 | 2019-08-23,234533.0
479 | 2019-08-26,215620.0
480 | 2019-08-27,199266.0
481 | 2019-08-28,174298.0
482 | 2019-08-29,240823.0
483 | 2019-08-30,208684.0
484 | 2019-09-03,264284.0
485 | 2019-09-04,201644.0
486 | 2019-09-05,228044.0
487 | 2019-09-06,201028.0
488 | 2019-09-09,202218.0
489 | 2019-09-10,245567.0
490 | 2019-09-11,240227.0
491 | 2019-09-12,182705.0
492 | 2019-09-13,236634.0
493 | 2019-09-16,205976.0
494 | 2019-09-17,195457.0
495 | 2019-09-18,184070.0
496 | 2019-09-19,206251.0
497 | 2019-09-20,203837.0
498 | 2019-09-23,190928.0
499 | 2019-09-24,232935.0
500 | 2019-09-25,211451.0
501 | 2019-09-26,200314.0
502 | 2019-09-27,195926.0
503 | 2019-09-30,219788.0
504 | 2019-10-01,236744.0
505 | 2019-10-02,222219.0
506 | 2019-10-03,204931.0
507 | 2019-10-04,211875.0
508 | 2019-10-07,233955.0
509 | 2019-10-08,196324.0
510 | 2019-10-09,202477.0
511 | 2019-10-10,228503.0
512 | 2019-10-11,199737.0
513 | 2019-10-14,253851.0
514 | 2019-10-15,228601.0
515 | 2019-10-16,180920.0
516 | 2019-10-17,221938.0
517 | 2019-10-18,215957.0
518 | 2019-10-21,173637.0
519 | 2019-10-22,209806.0
520 | 2019-10-23,205010.0
521 | 2019-10-24,210664.0
522 | 2019-10-25,211773.0
523 | 2019-10-28,211631.0
524 | 2019-10-29,194738.0
525 | 2019-10-30,205982.0
526 | 2019-10-31,191063.0
527 | 2019-11-01,179137.0
528 | 2019-11-04,211371.0
529 | 2019-11-05,223270.0
530 | 2019-11-06,276605.0
531 | 2019-11-07,242609.0
532 | 2019-11-08,204443.0
533 | 2019-11-11,245952.0
534 | 2019-11-12,238041.0
535 | 2019-11-13,222058.0
536 | 2019-11-14,221566.0
537 | 2019-11-15,203806.0
538 | 2019-11-18,219534.0
539 | 2019-11-19,236376.0
540 | 2019-11-20,199045.0
541 | 2019-11-21,246170.0
542 | 2019-11-22,183462.0
543 | 2019-11-25,263499.0
544 | 2019-11-26,202534.0
545 | 2019-11-27,237125.0
546 | 2019-11-29,213578.0
547 | 2019-12-02,199935.0
548 | 2019-12-03,247914.0
549 | 2019-12-04,259856.0
550 | 2019-12-05,227246.0
551 | 2019-12-06,230061.0
552 | 2019-12-09,208392.0
553 | 2019-12-10,194513.0
554 | 2019-12-11,229407.0
555 | 2019-12-12,239324.0
556 | 2019-12-13,237440.0
557 | 2019-12-16,258681.0
558 | 2019-12-17,203218.0
559 | 2019-12-18,261029.0
560 | 2019-12-19,252389.0
561 | 2019-12-20,253307.0
562 | 2019-12-23,221920.0
563 | 2019-12-24,257740.0
564 | 2019-12-26,246320.0
565 | 2019-12-27,210558.0
566 | 2019-12-30,229775.0
567 | 2019-12-31,221618.0
568 | 


--------------------------------------------------------------------------------
/data/alternative_data/IYPP.csv:
--------------------------------------------------------------------------------
1 | date,value
2 | 


--------------------------------------------------------------------------------
/data/alternative_data/KER.csv:
--------------------------------------------------------------------------------
1 | date,value
2 | 


--------------------------------------------------------------------------------
/data/alternative_data/PQCE.csv:
--------------------------------------------------------------------------------
   1 | date,value
   2 | 2015-03-31,8918.0
   3 | 2015-04-01,9746.0
   4 | 2015-04-02,8543.0
   5 | 2015-04-06,9901.0
   6 | 2015-04-07,10013.0
   7 | 2015-04-08,8243.0
   8 | 2015-04-09,8334.0
   9 | 2015-04-10,6892.0
  10 | 2015-04-13,8285.0
  11 | 2015-04-14,10270.0
  12 | 2015-04-15,8838.0
  13 | 2015-04-16,9067.0
  14 | 2015-04-17,9486.0
  15 | 2015-04-20,9324.0
  16 | 2015-04-21,8214.0
  17 | 2015-04-22,9275.0
  18 | 2015-04-23,9267.0
  19 | 2015-04-24,10398.0
  20 | 2015-04-27,9832.0
  21 | 2015-04-28,9846.0
  22 | 2015-04-29,11295.0
  23 | 2015-04-30,7890.0
  24 | 2015-05-01,8118.0
  25 | 2015-05-04,8549.0
  26 | 2015-05-05,10004.0
  27 | 2015-05-06,9860.0
  28 | 2015-05-07,7657.0
  29 | 2015-05-08,8967.0
  30 | 2015-05-11,8090.0
  31 | 2015-05-12,10383.0
  32 | 2015-05-13,9607.0
  33 | 2015-05-14,8404.0
  34 | 2015-05-15,9034.0
  35 | 2015-05-18,8596.0
  36 | 2015-05-19,9567.0
  37 | 2015-05-20,8477.0
  38 | 2015-05-21,8980.0
  39 | 2015-05-22,8828.0
  40 | 2015-05-26,8022.0
  41 | 2015-05-27,8833.0
  42 | 2015-05-28,8357.0
  43 | 2015-05-29,10015.0
  44 | 2015-06-01,8437.0
  45 | 2015-06-02,7578.0
  46 | 2015-06-03,8812.0
  47 | 2015-06-04,9669.0
  48 | 2015-06-05,10868.0
  49 | 2015-06-08,9041.0
  50 | 2015-06-09,9050.0
  51 | 2015-06-10,9185.0
  52 | 2015-06-11,8911.0
  53 | 2015-06-12,8739.0
  54 | 2015-06-15,8646.0
  55 | 2015-06-16,8036.0
  56 | 2015-06-17,9720.0
  57 | 2015-06-18,9165.0
  58 | 2015-06-19,8740.0
  59 | 2015-06-22,10133.0
  60 | 2015-06-23,8449.0
  61 | 2015-06-24,8879.0
  62 | 2015-06-25,8881.0
  63 | 2015-06-26,8440.0
  64 | 2015-06-29,10320.0
  65 | 2015-06-30,9085.0
  66 | 2015-07-01,9999.0
  67 | 2015-07-02,10090.0
  68 | 2015-07-06,7232.0
  69 | 2015-07-07,8191.0
  70 | 2015-07-08,8133.0
  71 | 2015-07-09,10156.0
  72 | 2015-07-10,8200.0
  73 | 2015-07-13,8008.0
  74 | 2015-07-14,8462.0
  75 | 2015-07-15,8989.0
  76 | 2015-07-16,8271.0
  77 | 2015-07-17,9009.0
  78 | 2015-07-20,8176.0
  79 | 2015-07-21,9641.0
  80 | 2015-07-22,7780.0
  81 | 2015-07-23,9833.0
  82 | 2015-07-24,8358.0
  83 | 2015-07-27,8301.0
  84 | 2015-07-28,10360.0
  85 | 2015-07-29,7010.0
  86 | 2015-07-30,8872.0
  87 | 2015-07-31,8430.0
  88 | 2015-08-03,8722.0
  89 | 2015-08-04,8833.0
  90 | 2015-08-05,7567.0
  91 | 2015-08-06,8640.0
  92 | 2015-08-07,9284.0
  93 | 2015-08-10,8445.0
  94 | 2015-08-11,8022.0
  95 | 2015-08-12,9009.0
  96 | 2015-08-13,8206.0
  97 | 2015-08-14,10153.0
  98 | 2015-08-17,8287.0
  99 | 2015-08-18,8320.0
 100 | 2015-08-19,8509.0
 101 | 2015-08-20,9595.0
 102 | 2015-08-21,8614.0
 103 | 2015-08-24,8514.0
 104 | 2015-08-25,8834.0
 105 | 2015-08-26,9059.0
 106 | 2015-08-27,7528.0
 107 | 2015-08-28,9942.0
 108 | 2015-08-31,10222.0
 109 | 2015-09-01,9797.0
 110 | 2015-09-02,8442.0
 111 | 2015-09-03,9770.0
 112 | 2015-09-04,8672.0
 113 | 2015-09-08,9403.0
 114 | 2015-09-09,7840.0
 115 | 2015-09-10,9147.0
 116 | 2015-09-11,8743.0
 117 | 2015-09-14,9642.0
 118 | 2015-09-15,7827.0
 119 | 2015-09-16,8484.0
 120 | 2015-09-17,9245.0
 121 | 2015-09-18,9267.0
 122 | 2015-09-21,9700.0
 123 | 2015-09-22,10335.0
 124 | 2015-09-23,10076.0
 125 | 2015-09-24,10096.0
 126 | 2015-09-25,8119.0
 127 | 2015-09-28,11123.0
 128 | 2015-09-29,10667.0
 129 | 2015-09-30,10310.0
 130 | 2015-10-01,10204.0
 131 | 2015-10-02,10888.0
 132 | 2015-10-05,9935.0
 133 | 2015-10-06,7948.0
 134 | 2015-10-07,9655.0
 135 | 2015-10-08,9166.0
 136 | 2015-10-09,8512.0
 137 | 2015-10-12,10348.0
 138 | 2015-10-13,9708.0
 139 | 2015-10-14,10212.0
 140 | 2015-10-15,8877.0
 141 | 2015-10-16,10092.0
 142 | 2015-10-19,12914.0
 143 | 2015-10-20,8727.0
 144 | 2015-10-21,8902.0
 145 | 2015-10-22,10464.0
 146 | 2015-10-23,8915.0
 147 | 2015-10-26,12274.0
 148 | 2015-10-27,10076.0
 149 | 2015-10-28,10792.0
 150 | 2015-10-29,9992.0
 151 | 2015-10-30,11303.0
 152 | 2015-11-02,11491.0
 153 | 2015-11-03,11532.0
 154 | 2015-11-04,9877.0
 155 | 2015-11-05,11495.0
 156 | 2015-11-06,10898.0
 157 | 2015-11-09,11430.0
 158 | 2015-11-10,11608.0
 159 | 2015-11-11,11989.0
 160 | 2015-11-12,11084.0
 161 | 2015-11-13,11942.0
 162 | 2015-11-16,9477.0
 163 | 2015-11-17,12945.0
 164 | 2015-11-18,10983.0
 165 | 2015-11-19,10233.0
 166 | 2015-11-20,12536.0
 167 | 2015-11-23,11742.0
 168 | 2015-11-24,13600.0
 169 | 2015-11-25,11208.0
 170 | 2015-11-27,11378.0
 171 | 2015-11-30,12560.0
 172 | 2015-12-01,13421.0
 173 | 2015-12-02,12945.0
 174 | 2015-12-03,13359.0
 175 | 2015-12-04,11871.0
 176 | 2015-12-07,11533.0
 177 | 2015-12-08,10700.0
 178 | 2015-12-09,14087.0
 179 | 2015-12-10,11614.0
 180 | 2015-12-11,11178.0
 181 | 2015-12-14,14839.0
 182 | 2015-12-15,13065.0
 183 | 2015-12-16,13337.0
 184 | 2015-12-17,14097.0
 185 | 2015-12-18,14338.0
 186 | 2015-12-21,12793.0
 187 | 2015-12-22,12544.0
 188 | 2015-12-23,12305.0
 189 | 2015-12-24,11504.0
 190 | 2015-12-28,13444.0
 191 | 2015-12-29,12792.0
 192 | 2015-12-30,13051.0
 193 | 2015-12-31,14256.0
 194 | 2016-01-04,13832.0
 195 | 2016-01-05,10138.0
 196 | 2016-01-06,10782.0
 197 | 2016-01-07,11557.0
 198 | 2016-01-08,13559.0
 199 | 2016-01-11,14624.0
 200 | 2016-01-12,11616.0
 201 | 2016-01-13,11422.0
 202 | 2016-01-14,11328.0
 203 | 2016-01-15,13386.0
 204 | 2016-01-19,11589.0
 205 | 2016-01-20,12970.0
 206 | 2016-01-21,12015.0
 207 | 2016-01-22,12248.0
 208 | 2016-01-25,12767.0
 209 | 2016-01-26,14373.0
 210 | 2016-01-27,11200.0
 211 | 2016-01-28,12198.0
 212 | 2016-01-29,11007.0
 213 | 2016-02-01,11535.0
 214 | 2016-02-02,14260.0
 215 | 2016-02-03,11457.0
 216 | 2016-02-04,11616.0
 217 | 2016-02-05,13165.0
 218 | 2016-02-08,13501.0
 219 | 2016-02-09,12379.0
 220 | 2016-02-10,11022.0
 221 | 2016-02-11,13027.0
 222 | 2016-02-12,13032.0
 223 | 2016-02-16,11451.0
 224 | 2016-02-17,13441.0
 225 | 2016-02-18,12405.0
 226 | 2016-02-19,9190.0
 227 | 2016-02-22,12006.0
 228 | 2016-02-23,10856.0
 229 | 2016-02-24,10540.0
 230 | 2016-02-25,12862.0
 231 | 2016-02-26,10275.0
 232 | 2016-02-29,12544.0
 233 | 2016-03-01,11518.0
 234 | 2016-03-02,12804.0
 235 | 2016-03-03,12009.0
 236 | 2016-03-04,12008.0
 237 | 2016-03-07,10560.0
 238 | 2016-03-08,9732.0
 239 | 2016-03-09,12145.0
 240 | 2016-03-10,11035.0
 241 | 2016-03-11,9080.0
 242 | 2016-03-14,9967.0
 243 | 2016-03-15,10073.0
 244 | 2016-03-16,12505.0
 245 | 2016-03-17,10435.0
 246 | 2016-03-18,9876.0
 247 | 2016-03-21,9666.0
 248 | 2016-03-22,11044.0
 249 | 2016-03-23,11468.0
 250 | 2016-03-24,10880.0
 251 | 2016-03-28,10438.0
 252 | 2016-03-29,13232.0
 253 | 2016-03-30,10670.0
 254 | 2016-03-31,12115.0
 255 | 2016-04-01,12385.0
 256 | 2016-04-04,10121.0
 257 | 2016-04-05,12013.0
 258 | 2016-04-06,9348.0
 259 | 2016-04-07,12190.0
 260 | 2016-04-08,12092.0
 261 | 2016-04-11,10343.0
 262 | 2016-04-12,10474.0
 263 | 2016-04-13,10554.0
 264 | 2016-04-14,10544.0
 265 | 2016-04-15,10192.0
 266 | 2016-04-18,9868.0
 267 | 2016-04-19,11927.0
 268 | 2016-04-20,12020.0
 269 | 2016-04-21,11610.0
 270 | 2016-04-22,12211.0
 271 | 2016-04-25,12230.0
 272 | 2016-04-26,11875.0
 273 | 2016-04-27,10125.0
 274 | 2016-04-28,12775.0
 275 | 2016-04-29,11721.0
 276 | 2016-05-02,9605.0
 277 | 2016-05-03,9769.0
 278 | 2016-05-04,11286.0
 279 | 2016-05-05,10808.0
 280 | 2016-05-06,10818.0
 281 | 2016-05-09,9601.0
 282 | 2016-05-10,11757.0
 283 | 2016-05-11,10976.0
 284 | 2016-05-12,10254.0
 285 | 2016-05-13,10414.0
 286 | 2016-05-16,12058.0
 287 | 2016-05-17,10020.0
 288 | 2016-05-18,8782.0
 289 | 2016-05-19,11814.0
 290 | 2016-05-20,10458.0
 291 | 2016-05-23,9208.0
 292 | 2016-05-24,11170.0
 293 | 2016-05-25,10461.0
 294 | 2016-05-26,11216.0
 295 | 2016-05-27,10700.0
 296 | 2016-05-31,11518.0
 297 | 2016-06-01,10046.0
 298 | 2016-06-02,10643.0
 299 | 2016-06-03,10844.0
 300 | 2016-06-06,10615.0
 301 | 2016-06-07,10279.0
 302 | 2016-06-08,10788.0
 303 | 2016-06-09,10265.0
 304 | 2016-06-10,12697.0
 305 | 2016-06-13,10222.0
 306 | 2016-06-14,11055.0
 307 | 2016-06-15,10362.0
 308 | 2016-06-16,9782.0
 309 | 2016-06-17,10600.0
 310 | 2016-06-20,11854.0
 311 | 2016-06-21,10092.0
 312 | 2016-06-22,9481.0
 313 | 2016-06-23,10054.0
 314 | 2016-06-24,13028.0
 315 | 2016-06-27,9222.0
 316 | 2016-06-28,10142.0
 317 | 2016-06-29,10619.0
 318 | 2016-06-30,13576.0
 319 | 2016-07-01,10906.0
 320 | 2016-07-05,9204.0
 321 | 2016-07-06,9588.0
 322 | 2016-07-07,11269.0
 323 | 2016-07-08,10749.0
 324 | 2016-07-11,10189.0
 325 | 2016-07-12,11209.0
 326 | 2016-07-13,11606.0
 327 | 2016-07-14,11379.0
 328 | 2016-07-15,10028.0
 329 | 2016-07-18,11085.0
 330 | 2016-07-19,12476.0
 331 | 2016-07-20,14173.0
 332 | 2016-07-21,10284.0
 333 | 2016-07-22,11413.0
 334 | 2016-07-25,11026.0
 335 | 2016-07-26,10705.0
 336 | 2016-07-27,9962.0
 337 | 2016-07-28,13357.0
 338 | 2016-07-29,11657.0
 339 | 2016-08-01,9360.0
 340 | 2016-08-02,11426.0
 341 | 2016-08-03,10974.0
 342 | 2016-08-04,9522.0
 343 | 2016-08-05,10038.0
 344 | 2016-08-08,10837.0
 345 | 2016-08-09,10326.0
 346 | 2016-08-10,10913.0
 347 | 2016-08-11,12127.0
 348 | 2016-08-12,13590.0
 349 | 2016-08-15,9823.0
 350 | 2016-08-16,11505.0
 351 | 2016-08-17,10018.0
 352 | 2016-08-18,9811.0
 353 | 2016-08-19,12634.0
 354 | 2016-08-22,10934.0
 355 | 2016-08-23,10395.0
 356 | 2016-08-24,10630.0
 357 | 2016-08-25,9019.0
 358 | 2016-08-26,11895.0
 359 | 2016-08-29,12016.0
 360 | 2016-08-30,11500.0
 361 | 2016-08-31,9885.0
 362 | 2016-09-01,12960.0
 363 | 2016-09-02,11081.0
 364 | 2016-09-06,10797.0
 365 | 2016-09-07,10899.0
 366 | 2016-09-08,11746.0
 367 | 2016-09-09,12265.0
 368 | 2016-09-12,12043.0
 369 | 2016-09-13,12541.0
 370 | 2016-09-14,7790.0
 371 | 2016-09-15,10435.0
 372 | 2016-09-16,11709.0
 373 | 2016-09-19,11544.0
 374 | 2016-09-20,11176.0
 375 | 2016-09-21,11707.0
 376 | 2016-09-22,11087.0
 377 | 2016-09-23,10167.0
 378 | 2016-09-26,11724.0
 379 | 2016-09-27,11883.0
 380 | 2016-09-28,11104.0
 381 | 2016-09-29,11960.0
 382 | 2016-09-30,11937.0
 383 | 2016-10-03,10686.0
 384 | 2016-10-04,10560.0
 385 | 2016-10-05,12314.0
 386 | 2016-10-06,11377.0
 387 | 2016-10-07,11053.0
 388 | 2016-10-10,12246.0
 389 | 2016-10-11,13209.0
 390 | 2016-10-12,13468.0
 391 | 2016-10-13,12998.0
 392 | 2016-10-14,12432.0
 393 | 2016-10-17,13028.0
 394 | 2016-10-18,11321.0
 395 | 2016-10-19,13731.0
 396 | 2016-10-20,11505.0
 397 | 2016-10-21,12658.0
 398 | 2016-10-24,11551.0
 399 | 2016-10-25,13157.0
 400 | 2016-10-26,12240.0
 401 | 2016-10-27,13877.0
 402 | 2016-10-28,14056.0
 403 | 2016-10-31,13117.0
 404 | 2016-11-01,14859.0
 405 | 2016-11-02,12957.0
 406 | 2016-11-03,13657.0
 407 | 2016-11-04,12797.0
 408 | 2016-11-07,14438.0
 409 | 2016-11-08,14068.0
 410 | 2016-11-09,13307.0
 411 | 2016-11-10,13186.0
 412 | 2016-11-11,11788.0
 413 | 2016-11-14,14129.0
 414 | 2016-11-15,11714.0
 415 | 2016-11-16,13211.0
 416 | 2016-11-17,10984.0
 417 | 2016-11-18,14109.0
 418 | 2016-11-21,10747.0
 419 | 2016-11-22,13480.0
 420 | 2016-11-23,14349.0
 421 | 2016-11-25,16598.0
 422 | 2016-11-28,16091.0
 423 | 2016-11-29,14914.0
 424 | 2016-11-30,13758.0
 425 | 2016-12-01,16103.0
 426 | 2016-12-02,14532.0
 427 | 2016-12-05,14278.0
 428 | 2016-12-06,12427.0
 429 | 2016-12-07,16579.0
 430 | 2016-12-08,14468.0
 431 | 2016-12-09,13615.0
 432 | 2016-12-12,16672.0
 433 | 2016-12-13,15704.0
 434 | 2016-12-14,12202.0
 435 | 2016-12-15,15582.0
 436 | 2016-12-16,14141.0
 437 | 2016-12-19,13261.0
 438 | 2016-12-20,17105.0
 439 | 2016-12-21,12445.0
 440 | 2016-12-22,15922.0
 441 | 2016-12-23,17369.0
 442 | 2016-12-27,17048.0
 443 | 2016-12-28,14067.0
 444 | 2016-12-29,13305.0
 445 | 2016-12-30,15680.0
 446 | 2017-01-03,14779.0
 447 | 2017-01-04,15696.0
 448 | 2017-01-05,16183.0
 449 | 2017-01-06,18695.0
 450 | 2017-01-09,15383.0
 451 | 2017-01-10,16679.0
 452 | 2017-01-11,17020.0
 453 | 2017-01-12,15720.0
 454 | 2017-01-13,15732.0
 455 | 2017-01-17,16032.0
 456 | 2017-01-18,15810.0
 457 | 2017-01-19,14771.0
 458 | 2017-01-20,15061.0
 459 | 2017-01-23,18517.0
 460 | 2017-01-24,16692.0
 461 | 2017-01-25,15253.0
 462 | 2017-01-26,12949.0
 463 | 2017-01-27,13884.0
 464 | 2017-01-30,13233.0
 465 | 2017-01-31,14878.0
 466 | 2017-02-01,13718.0
 467 | 2017-02-02,14725.0
 468 | 2017-02-03,14340.0
 469 | 2017-02-06,15292.0
 470 | 2017-02-07,13666.0
 471 | 2017-02-08,16296.0
 472 | 2017-02-09,14075.0
 473 | 2017-02-10,15432.0
 474 | 2017-02-13,13794.0
 475 | 2017-02-14,15489.0
 476 | 2017-02-15,10478.0
 477 | 2017-02-16,15155.0
 478 | 2017-02-17,14021.0
 479 | 2017-02-21,12115.0
 480 | 2017-02-22,17267.0
 481 | 2017-02-23,15483.0
 482 | 2017-02-24,16357.0
 483 | 2017-02-27,16895.0
 484 | 2017-02-28,14119.0
 485 | 2017-03-01,11546.0
 486 | 2017-03-02,12516.0
 487 | 2017-03-03,12881.0
 488 | 2017-03-06,13578.0
 489 | 2017-03-07,12536.0
 490 | 2017-03-08,16161.0
 491 | 2017-03-09,14955.0
 492 | 2017-03-10,13301.0
 493 | 2017-03-13,14516.0
 494 | 2017-03-14,14274.0
 495 | 2017-03-15,13427.0
 496 | 2017-03-16,15435.0
 497 | 2017-03-17,13874.0
 498 | 2017-03-20,15284.0
 499 | 2017-03-21,12868.0
 500 | 2017-03-22,11912.0
 501 | 2017-03-23,13480.0
 502 | 2017-03-24,11589.0
 503 | 2017-03-27,14357.0
 504 | 2017-03-28,11648.0
 505 | 2017-03-29,12360.0
 506 | 2017-03-30,15072.0
 507 | 2017-03-31,12834.0
 508 | 2017-04-03,12932.0
 509 | 2017-04-04,12522.0
 510 | 2017-04-05,12050.0
 511 | 2017-04-06,11951.0
 512 | 2017-04-07,10518.0
 513 | 2017-04-10,13493.0
 514 | 2017-04-11,12341.0
 515 | 2017-04-12,14569.0
 516 | 2017-04-13,13305.0
 517 | 2017-04-17,14152.0
 518 | 2017-04-18,14492.0
 519 | 2017-04-19,13545.0
 520 | 2017-04-20,12065.0
 521 | 2017-04-21,15071.0
 522 | 2017-04-24,12903.0
 523 | 2017-04-25,13548.0
 524 | 2017-04-26,13676.0
 525 | 2017-04-27,14215.0
 526 | 2017-04-28,15646.0
 527 | 2017-05-01,10927.0
 528 | 2017-05-02,12565.0
 529 | 2017-05-03,12826.0
 530 | 2017-05-04,13520.0
 531 | 2017-05-05,12934.0
 532 | 2017-05-08,14857.0
 533 | 2017-05-09,16156.0
 534 | 2017-05-10,14410.0
 535 | 2017-05-11,13446.0
 536 | 2017-05-12,13161.0
 537 | 2017-05-15,13481.0
 538 | 2017-05-16,13339.0
 539 | 2017-05-17,14934.0
 540 | 2017-05-18,11514.0
 541 | 2017-05-19,14478.0
 542 | 2017-05-22,12585.0
 543 | 2017-05-23,12452.0
 544 | 2017-05-24,10864.0
 545 | 2017-05-25,13407.0
 546 | 2017-05-26,13740.0
 547 | 2017-05-30,13367.0
 548 | 2017-05-31,15078.0
 549 | 2017-06-01,15193.0
 550 | 2017-06-02,13078.0
 551 | 2017-06-05,12520.0
 552 | 2017-06-06,15765.0
 553 | 2017-06-07,13253.0
 554 | 2017-06-08,13306.0
 555 | 2017-06-09,13907.0
 556 | 2017-06-12,13746.0
 557 | 2017-06-13,12905.0
 558 | 2017-06-14,13495.0
 559 | 2017-06-15,13585.0
 560 | 2017-06-16,12762.0
 561 | 2017-06-19,13477.0
 562 | 2017-06-20,10222.0
 563 | 2017-06-21,10314.0
 564 | 2017-06-22,12416.0
 565 | 2017-06-23,10876.0
 566 | 2017-06-26,13908.0
 567 | 2017-06-27,11827.0
 568 | 2017-06-28,12545.0
 569 | 2017-06-29,10144.0
 570 | 2017-06-30,10828.0
 571 | 2017-07-03,10467.0
 572 | 2017-07-05,13210.0
 573 | 2017-07-06,10452.0
 574 | 2017-07-07,11905.0
 575 | 2017-07-10,13350.0
 576 | 2017-07-11,14854.0
 577 | 2017-07-12,11133.0
 578 | 2017-07-13,12699.0
 579 | 2017-07-14,13170.0
 580 | 2017-07-17,14195.0
 581 | 2017-07-18,12583.0
 582 | 2017-07-19,17080.0
 583 | 2017-07-20,9868.0
 584 | 2017-07-21,10412.0
 585 | 2017-07-24,11475.0
 586 | 2017-07-25,11910.0
 587 | 2017-07-26,12758.0
 588 | 2017-07-27,12389.0
 589 | 2017-07-28,10785.0
 590 | 2017-07-31,16414.0
 591 | 2017-08-01,13251.0
 592 | 2017-08-02,12488.0
 593 | 2017-08-03,13885.0
 594 | 2017-08-04,12476.0
 595 | 2017-08-07,14538.0
 596 | 2017-08-08,12000.0
 597 | 2017-08-09,12320.0
 598 | 2017-08-10,10218.0
 599 | 2017-08-11,12823.0
 600 | 2017-08-14,12460.0
 601 | 2017-08-15,12892.0
 602 | 2017-08-16,13037.0
 603 | 2017-08-17,14741.0
 604 | 2017-08-18,14738.0
 605 | 2017-08-21,15753.0
 606 | 2017-08-22,14684.0
 607 | 2017-08-23,12186.0
 608 | 2017-08-24,12950.0
 609 | 2017-08-25,12427.0
 610 | 2017-08-28,14034.0
 611 | 2017-08-29,15596.0
 612 | 2017-08-30,14127.0
 613 | 2017-08-31,13232.0
 614 | 2017-09-01,15738.0
 615 | 2017-09-05,14841.0
 616 | 2017-09-06,16079.0
 617 | 2017-09-07,10594.0
 618 | 2017-09-08,12761.0
 619 | 2017-09-11,16136.0
 620 | 2017-09-12,11914.0
 621 | 2017-09-13,13068.0
 622 | 2017-09-14,14854.0
 623 | 2017-09-15,15565.0
 624 | 2017-09-18,13322.0
 625 | 2017-09-19,16797.0
 626 | 2017-09-20,12901.0
 627 | 2017-09-21,12779.0
 628 | 2017-09-22,14607.0
 629 | 2017-09-25,15146.0
 630 | 2017-09-26,15115.0
 631 | 2017-09-27,14358.0
 632 | 2017-09-28,13013.0
 633 | 2017-09-29,13494.0
 634 | 2017-10-02,14292.0
 635 | 2017-10-03,16532.0
 636 | 2017-10-04,13458.0
 637 | 2017-10-05,15794.0
 638 | 2017-10-06,13628.0
 639 | 2017-10-09,16417.0
 640 | 2017-10-10,12574.0
 641 | 2017-10-11,16220.0
 642 | 2017-10-12,14587.0
 643 | 2017-10-13,13998.0
 644 | 2017-10-16,15545.0
 645 | 2017-10-17,17877.0
 646 | 2017-10-18,12571.0
 647 | 2017-10-19,14204.0
 648 | 2017-10-20,14683.0
 649 | 2017-10-23,14256.0
 650 | 2017-10-24,17355.0
 651 | 2017-10-25,15906.0
 652 | 2017-10-26,15540.0
 653 | 2017-10-27,13346.0
 654 | 2017-10-30,10305.0
 655 | 2017-10-31,16487.0
 656 | 2017-11-01,14488.0
 657 | 2017-11-02,14345.0
 658 | 2017-11-03,13343.0
 659 | 2017-11-06,17535.0
 660 | 2017-11-07,16161.0
 661 | 2017-11-08,15014.0
 662 | 2017-11-09,12015.0
 663 | 2017-11-10,15091.0
 664 | 2017-11-13,14079.0
 665 | 2017-11-14,14489.0
 666 | 2017-11-15,15356.0
 667 | 2017-11-16,14271.0
 668 | 2017-11-17,15018.0
 669 | 2017-11-20,14536.0
 670 | 2017-11-21,13259.0
 671 | 2017-11-22,14333.0
 672 | 2017-11-24,13668.0
 673 | 2017-11-27,14514.0
 674 | 2017-11-28,15767.0
 675 | 2017-11-29,14401.0
 676 | 2017-11-30,15443.0
 677 | 2017-12-01,15519.0
 678 | 2017-12-04,14076.0
 679 | 2017-12-05,12515.0
 680 | 2017-12-06,15434.0
 681 | 2017-12-07,11983.0
 682 | 2017-12-08,15244.0
 683 | 2017-12-11,15644.0
 684 | 2017-12-12,14080.0
 685 | 2017-12-13,11868.0
 686 | 2017-12-14,15017.0
 687 | 2017-12-15,13793.0
 688 | 2017-12-18,14330.0
 689 | 2017-12-19,12517.0
 690 | 2017-12-20,14911.0
 691 | 2017-12-21,14078.0
 692 | 2017-12-22,15468.0
 693 | 2017-12-26,11526.0
 694 | 2017-12-27,16095.0
 695 | 2017-12-28,17974.0
 696 | 2017-12-29,16082.0
 697 | 2018-01-02,14649.0
 698 | 2018-01-03,15571.0
 699 | 2018-01-04,13797.0
 700 | 2018-01-05,11547.0
 701 | 2018-01-08,13881.0
 702 | 2018-01-09,14722.0
 703 | 2018-01-10,12580.0
 704 | 2018-01-11,17736.0
 705 | 2018-01-12,15248.0
 706 | 2018-01-16,14378.0
 707 | 2018-01-17,16757.0
 708 | 2018-01-18,15051.0
 709 | 2018-01-19,15882.0
 710 | 2018-01-22,15448.0
 711 | 2018-01-23,15381.0
 712 | 2018-01-24,19326.0
 713 | 2018-01-25,16623.0
 714 | 2018-01-26,15992.0
 715 | 2018-01-29,14019.0
 716 | 2018-01-30,13455.0
 717 | 2018-01-31,14843.0
 718 | 2018-02-01,14431.0
 719 | 2018-02-02,14325.0
 720 | 2018-02-05,14104.0
 721 | 2018-02-06,15815.0
 722 | 2018-02-07,14252.0
 723 | 2018-02-08,15726.0
 724 | 2018-02-09,15578.0
 725 | 2018-02-12,14160.0
 726 | 2018-02-13,17494.0
 727 | 2018-02-14,14778.0
 728 | 2018-02-15,16146.0
 729 | 2018-02-16,16607.0
 730 | 2018-02-20,14174.0
 731 | 2018-02-21,16024.0
 732 | 2018-02-22,14372.0
 733 | 2018-02-23,15545.0
 734 | 2018-02-26,16605.0
 735 | 2018-02-27,17195.0
 736 | 2018-02-28,16831.0
 737 | 2018-03-01,14474.0
 738 | 2018-03-02,13998.0
 739 | 2018-03-05,15446.0
 740 | 2018-03-06,13164.0
 741 | 2018-03-07,16793.0
 742 | 2018-03-08,14721.0
 743 | 2018-03-09,12699.0
 744 | 2018-03-12,16085.0
 745 | 2018-03-13,13569.0
 746 | 2018-03-14,17992.0
 747 | 2018-03-15,15485.0
 748 | 2018-03-16,15041.0
 749 | 2018-03-19,14529.0
 750 | 2018-03-20,13751.0
 751 | 2018-03-21,15747.0
 752 | 2018-03-22,18491.0
 753 | 2018-03-23,17875.0
 754 | 2018-03-26,14910.0
 755 | 2018-03-27,17507.0
 756 | 2018-03-28,16663.0
 757 | 2018-03-29,18341.0
 758 | 2018-04-02,12375.0
 759 | 2018-04-03,15121.0
 760 | 2018-04-04,13036.0
 761 | 2018-04-05,15340.0
 762 | 2018-04-06,15234.0
 763 | 2018-04-09,14386.0
 764 | 2018-04-10,14289.0
 765 | 2018-04-11,16782.0
 766 | 2018-04-12,17325.0
 767 | 2018-04-13,16548.0
 768 | 2018-04-16,18073.0
 769 | 2018-04-17,16431.0
 770 | 2018-04-18,16468.0
 771 | 2018-04-19,18137.0
 772 | 2018-04-20,13796.0
 773 | 2018-04-23,12432.0
 774 | 2018-04-24,16881.0
 775 | 2018-04-25,18414.0
 776 | 2018-04-26,15931.0
 777 | 2018-04-27,14471.0
 778 | 2018-04-30,14865.0
 779 | 2018-05-01,12999.0
 780 | 2018-05-02,16903.0
 781 | 2018-05-03,12569.0
 782 | 2018-05-04,16067.0
 783 | 2018-05-07,16152.0
 784 | 2018-05-08,11627.0
 785 | 2018-05-09,15279.0
 786 | 2018-05-10,14828.0
 787 | 2018-05-11,16304.0
 788 | 2018-05-14,15012.0
 789 | 2018-05-15,16069.0
 790 | 2018-05-16,14576.0
 791 | 2018-05-17,15594.0
 792 | 2018-05-18,16375.0
 793 | 2018-05-21,14209.0
 794 | 2018-05-22,15637.0
 795 | 2018-05-23,17881.0
 796 | 2018-05-24,13460.0
 797 | 2018-05-25,13179.0
 798 | 2018-05-29,14338.0
 799 | 2018-05-30,12745.0
 800 | 2018-05-31,17385.0
 801 | 2018-06-01,15859.0
 802 | 2018-06-04,13300.0
 803 | 2018-06-05,15684.0
 804 | 2018-06-06,15425.0
 805 | 2018-06-07,14483.0
 806 | 2018-06-08,15787.0
 807 | 2018-06-11,14562.0
 808 | 2018-06-12,14104.0
 809 | 2018-06-13,15821.0
 810 | 2018-06-14,12716.0
 811 | 2018-06-15,16353.0
 812 | 2018-06-18,15679.0
 813 | 2018-06-19,13990.0
 814 | 2018-06-20,14332.0
 815 | 2018-06-21,16764.0
 816 | 2018-06-22,14741.0
 817 | 2018-06-25,15596.0
 818 | 2018-06-26,15663.0
 819 | 2018-06-27,15113.0
 820 | 2018-06-28,16211.0
 821 | 2018-06-29,13559.0
 822 | 2018-07-02,16316.0
 823 | 2018-07-03,15683.0
 824 | 2018-07-05,16260.0
 825 | 2018-07-06,16225.0
 826 | 2018-07-09,13334.0
 827 | 2018-07-10,13341.0
 828 | 2018-07-11,14705.0
 829 | 2018-07-12,15709.0
 830 | 2018-07-13,14795.0
 831 | 2018-07-16,16337.0
 832 | 2018-07-17,14885.0
 833 | 2018-07-18,18256.0
 834 | 2018-07-19,17893.0
 835 | 2018-07-20,16872.0
 836 | 2018-07-23,14253.0
 837 | 2018-07-24,14227.0
 838 | 2018-07-25,16888.0
 839 | 2018-07-26,16656.0
 840 | 2018-07-27,16974.0
 841 | 2018-07-30,19337.0
 842 | 2018-07-31,13452.0
 843 | 2018-08-01,15350.0
 844 | 2018-08-02,16629.0
 845 | 2018-08-03,15249.0
 846 | 2018-08-06,12811.0
 847 | 2018-08-07,15768.0
 848 | 2018-08-08,14583.0
 849 | 2018-08-09,15534.0
 850 | 2018-08-10,14680.0
 851 | 2018-08-13,15163.0
 852 | 2018-08-14,16373.0
 853 | 2018-08-15,16091.0
 854 | 2018-08-16,17603.0
 855 | 2018-08-17,16467.0
 856 | 2018-08-20,13982.0
 857 | 2018-08-21,12553.0
 858 | 2018-08-22,13796.0
 859 | 2018-08-23,14759.0
 860 | 2018-08-24,16791.0
 861 | 2018-08-27,15747.0
 862 | 2018-08-28,16596.0
 863 | 2018-08-29,17264.0
 864 | 2018-08-30,13083.0
 865 | 2018-08-31,17253.0
 866 | 2018-09-04,16530.0
 867 | 2018-09-05,16146.0
 868 | 2018-09-06,16937.0
 869 | 2018-09-07,16391.0
 870 | 2018-09-10,16532.0
 871 | 2018-09-11,12983.0
 872 | 2018-09-12,15174.0
 873 | 2018-09-13,14345.0
 874 | 2018-09-14,17648.0
 875 | 2018-09-17,13917.0
 876 | 2018-09-18,15958.0
 877 | 2018-09-19,15165.0
 878 | 2018-09-20,14942.0
 879 | 2018-09-21,17308.0
 880 | 2018-09-24,13241.0
 881 | 2018-09-25,15344.0
 882 | 2018-09-26,16047.0
 883 | 2018-09-27,15761.0
 884 | 2018-09-28,16268.0
 885 | 2018-10-01,16573.0
 886 | 2018-10-02,16835.0
 887 | 2018-10-03,17679.0
 888 | 2018-10-04,14481.0
 889 | 2018-10-05,17013.0
 890 | 2018-10-08,15994.0
 891 | 2018-10-09,16756.0
 892 | 2018-10-10,17403.0
 893 | 2018-10-11,17336.0
 894 | 2018-10-12,13129.0
 895 | 2018-10-15,13623.0
 896 | 2018-10-16,17312.0
 897 | 2018-10-17,15322.0
 898 | 2018-10-18,14285.0
 899 | 2018-10-19,14896.0
 900 | 2018-10-22,16709.0
 901 | 2018-10-23,15658.0
 902 | 2018-10-24,16332.0
 903 | 2018-10-25,16662.0
 904 | 2018-10-26,18921.0
 905 | 2018-10-29,14958.0
 906 | 2018-10-30,13541.0
 907 | 2018-10-31,14305.0
 908 | 2018-11-01,14241.0
 909 | 2018-11-02,15586.0
 910 | 2018-11-05,13706.0
 911 | 2018-11-06,15996.0
 912 | 2018-11-07,16676.0
 913 | 2018-11-08,15077.0
 914 | 2018-11-09,16878.0
 915 | 2018-11-12,16603.0
 916 | 2018-11-13,15386.0
 917 | 2018-11-14,17210.0
 918 | 2018-11-15,15168.0
 919 | 2018-11-16,15594.0
 920 | 2018-11-19,20269.0
 921 | 2018-11-20,20194.0
 922 | 2018-11-21,16594.0
 923 | 2018-11-23,14663.0
 924 | 2018-11-26,16308.0
 925 | 2018-11-27,17350.0
 926 | 2018-11-28,13819.0
 927 | 2018-11-29,18834.0
 928 | 2018-11-30,15459.0
 929 | 2018-12-03,17413.0
 930 | 2018-12-04,15139.0
 931 | 2018-12-06,14588.0
 932 | 2018-12-07,14397.0
 933 | 2018-12-10,14542.0
 934 | 2018-12-11,16233.0
 935 | 2018-12-12,14931.0
 936 | 2018-12-13,15821.0
 937 | 2018-12-14,16495.0
 938 | 2018-12-17,16390.0
 939 | 2018-12-18,20413.0
 940 | 2018-12-19,19487.0
 941 | 2018-12-20,16694.0
 942 | 2018-12-21,14686.0
 943 | 2018-12-24,15727.0
 944 | 2018-12-26,18060.0
 945 | 2018-12-27,18655.0
 946 | 2018-12-28,16389.0
 947 | 2018-12-31,17031.0
 948 | 2019-01-02,13976.0
 949 | 2019-01-03,17017.0
 950 | 2019-01-04,17286.0
 951 | 2019-01-07,16817.0
 952 | 2019-01-08,14732.0
 953 | 2019-01-09,15285.0
 954 | 2019-01-10,18300.0
 955 | 2019-01-11,17303.0
 956 | 2019-01-14,17571.0
 957 | 2019-01-15,16826.0
 958 | 2019-01-16,17681.0
 959 | 2019-01-17,17547.0
 960 | 2019-01-18,17095.0
 961 | 2019-01-22,17377.0
 962 | 2019-01-23,15812.0
 963 | 2019-01-24,15785.0
 964 | 2019-01-25,15595.0
 965 | 2019-01-28,17413.0
 966 | 2019-01-29,13841.0
 967 | 2019-01-30,16567.0
 968 | 2019-01-31,18302.0
 969 | 2019-02-01,17436.0
 970 | 2019-02-04,14631.0
 971 | 2019-02-05,17581.0
 972 | 2019-02-06,18643.0
 973 | 2019-02-07,14744.0
 974 | 2019-02-08,16296.0
 975 | 2019-02-11,15528.0
 976 | 2019-02-12,14049.0
 977 | 2019-02-13,17126.0
 978 | 2019-02-14,19012.0
 979 | 2019-02-15,17915.0
 980 | 2019-02-19,18106.0
 981 | 2019-02-20,20800.0
 982 | 2019-02-21,16314.0
 983 | 2019-02-22,17912.0
 984 | 2019-02-25,17452.0
 985 | 2019-02-26,19890.0
 986 | 2019-02-27,20312.0
 987 | 2019-02-28,19787.0
 988 | 2019-03-01,17527.0
 989 | 2019-03-04,20882.0
 990 | 2019-03-05,20029.0
 991 | 2019-03-06,15156.0
 992 | 2019-03-07,13578.0
 993 | 2019-03-08,14128.0
 994 | 2019-03-11,18272.0
 995 | 2019-03-12,18826.0
 996 | 2019-03-13,15663.0
 997 | 2019-03-14,18524.0
 998 | 2019-03-15,19051.0
 999 | 2019-03-18,14402.0
1000 | 2019-03-19,17954.0
1001 | 2019-03-20,16280.0
1002 | 2019-03-21,16243.0
1003 | 2019-03-22,18688.0
1004 | 2019-03-25,17222.0
1005 | 2019-03-26,18562.0
1006 | 2019-03-27,14478.0
1007 | 2019-03-28,17325.0
1008 | 2019-03-29,16188.0
1009 | 2019-04-01,15432.0
1010 | 2019-04-02,18448.0
1011 | 2019-04-03,19900.0
1012 | 2019-04-04,17792.0
1013 | 2019-04-05,18196.0
1014 | 2019-04-08,19564.0
1015 | 2019-04-09,14419.0
1016 | 2019-04-10,20259.0
1017 | 2019-04-11,17077.0
1018 | 2019-04-12,17315.0
1019 | 2019-04-15,17393.0
1020 | 2019-04-16,19280.0
1021 | 2019-04-17,19128.0
1022 | 2019-04-18,17824.0
1023 | 2019-04-22,20474.0
1024 | 2019-04-23,16924.0
1025 | 2019-04-24,17541.0
1026 | 2019-04-25,18646.0
1027 | 2019-04-26,17893.0
1028 | 2019-04-29,17568.0
1029 | 2019-04-30,17072.0
1030 | 2019-05-01,19664.0
1031 | 2019-05-02,18367.0
1032 | 2019-05-03,18545.0
1033 | 2019-05-06,14510.0
1034 | 2019-05-07,19047.0
1035 | 2019-05-08,18096.0
1036 | 2019-05-09,19420.0
1037 | 2019-05-10,19795.0
1038 | 2019-05-13,21213.0
1039 | 2019-05-14,17385.0
1040 | 2019-05-15,14789.0
1041 | 2019-05-16,16001.0
1042 | 2019-05-17,16761.0
1043 | 2019-05-20,18635.0
1044 | 2019-05-21,15281.0
1045 | 2019-05-22,20066.0
1046 | 2019-05-23,19112.0
1047 | 2019-05-24,15950.0
1048 | 2019-05-28,18557.0
1049 | 2019-05-29,19066.0
1050 | 2019-05-30,18650.0
1051 | 2019-05-31,17130.0
1052 | 2019-06-03,16609.0
1053 | 2019-06-04,16667.0
1054 | 2019-06-05,17212.0
1055 | 2019-06-06,15483.0
1056 | 2019-06-07,20362.0
1057 | 2019-06-10,15799.0
1058 | 2019-06-11,18053.0
1059 | 2019-06-12,15444.0
1060 | 2019-06-13,16993.0
1061 | 2019-06-14,17468.0
1062 | 2019-06-17,18240.0
1063 | 2019-06-18,19427.0
1064 | 2019-06-19,16622.0
1065 | 2019-06-20,13806.0
1066 | 2019-06-21,16922.0
1067 | 2019-06-24,13448.0
1068 | 2019-06-25,14924.0
1069 | 2019-06-26,16398.0
1070 | 2019-06-27,16054.0
1071 | 2019-06-28,17165.0
1072 | 2019-07-01,17034.0
1073 | 2019-07-02,16055.0
1074 | 2019-07-03,15327.0
1075 | 2019-07-05,14879.0
1076 | 2019-07-08,14642.0
1077 | 2019-07-09,18163.0
1078 | 2019-07-10,19239.0
1079 | 2019-07-11,19080.0
1080 | 2019-07-12,14961.0
1081 | 2019-07-15,17994.0
1082 | 2019-07-16,17274.0
1083 | 2019-07-17,16727.0
1084 | 2019-07-18,16752.0
1085 | 2019-07-19,16894.0
1086 | 2019-07-22,17227.0
1087 | 2019-07-23,13987.0
1088 | 2019-07-24,15561.0
1089 | 2019-07-25,18167.0
1090 | 2019-07-26,16221.0
1091 | 2019-07-29,18683.0
1092 | 2019-07-30,16294.0
1093 | 2019-07-31,14915.0
1094 | 2019-08-01,16009.0
1095 | 2019-08-02,18484.0
1096 | 2019-08-05,17198.0
1097 | 2019-08-06,20213.0
1098 | 2019-08-07,15215.0
1099 | 2019-08-08,17098.0
1100 | 2019-08-09,17230.0
1101 | 2019-08-12,18102.0
1102 | 2019-08-13,18808.0
1103 | 2019-08-14,16780.0
1104 | 2019-08-15,17783.0
1105 | 2019-08-16,13671.0
1106 | 2019-08-19,19047.0
1107 | 2019-08-20,13353.0
1108 | 2019-08-21,15331.0
1109 | 2019-08-22,19669.0
1110 | 2019-08-23,18546.0
1111 | 2019-08-26,16937.0
1112 | 2019-08-27,18241.0
1113 | 2019-08-28,18586.0
1114 | 2019-08-29,17104.0
1115 | 2019-08-30,18199.0
1116 | 2019-09-03,18123.0
1117 | 2019-09-04,16453.0
1118 | 2019-09-05,16983.0
1119 | 2019-09-06,15865.0
1120 | 2019-09-09,16028.0
1121 | 2019-09-10,15518.0
1122 | 2019-09-11,18438.0
1123 | 2019-09-12,18016.0
1124 | 2019-09-13,17516.0
1125 | 2019-09-16,16501.0
1126 | 2019-09-17,16381.0
1127 | 2019-09-18,17174.0
1128 | 2019-09-19,20349.0
1129 | 2019-09-20,15495.0
1130 | 2019-09-23,16590.0
1131 | 2019-09-24,15548.0
1132 | 2019-09-25,17079.0
1133 | 2019-09-26,16676.0
1134 | 2019-09-27,15320.0
1135 | 2019-09-30,18933.0
1136 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | Freeware License, some rights reserved
 2 | 
 3 | Copyright (c) 2020 Christopher Conlan
 4 | 
 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 
 6 | of this software and associated documentation files (the "Software"), 
 7 | to work with the Software within the limits of freeware distribution and fair use. 
 8 | This includes the rights to use, copy, and modify the Software for personal use. 
 9 | Users are also allowed and encouraged to submit corrections and modifications 
10 | to the Software for the benefit of other users.
11 | 
12 | It is not allowed to reuse,  modify, or redistribute the Software for 
13 | commercial use in any way, or for a user’s educational materials such as books 
14 | or blog articles without prior permission from the copyright holder. 
15 | 
16 | The above copyright notice and this permission notice need to be included 
17 | in all copies or substantial portions of the software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/listings/chapter_1/1_1_type_hinting_examples.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict, Tuple, Any
 2 | import numpy as np
 3 | import datetime
 4 | 
 5 | # A list of floating point numbers
 6 | v: List[float] = [i * 1.23 for i in range(10)]
 7 | 
 8 | # A list of mixed type values
 9 | v: List[Any] = ['apple', 123, 'banana', None]
10 | 
11 | # A dictionary of floats indexed by dates
12 | v: Dict[datetime.date, float] = {
13 |     datetime.date.today(): 123.456,
14 |     datetime.date(2000, 1, 1): 234.567,
15 | }
16 | 
17 | # A dictionary of lists of strings indexed by tuples of integers
18 | v: Dict[Tuple[int, int], List[str]] = {
19 |     (2, 3): [
20 |         'apple', 
21 |         'banana',
22 |     ],
23 |     (4, 7): [
24 |         'orange',
25 |         'pineapple',
26 |     ]
27 | }
28 | 
29 | # An incorrect type hint
30 | # Your compiler or IDE might complain about this
31 | v: List[str] = [1, 2, 3]
32 | 
33 | # A possibly incorrect type hint
34 | # There is no concensus on whether or not this is correct
35 | v: List[float] = [1, None, 3, None, 5]
36 | 
37 | # This is non-descript but correct
38 | v: List = [(1,2,'a'), (4,5,'b')]
39 | 
40 | # This is more descriptive
41 | v: List[Tuple[int, int, str]] = [(1,2,'a'), (4,5,'b')]
42 | 
43 | # Custom types are supported
44 | from typing import NewType
45 | StockTicker = NewType('StockTicker', np.float64)
46 | ticker: StockTicker = 'AAPL'
47 | 
48 | # Functions can define input and return types
49 | def convert_to_string(value: Any) -> str:
50 |     return str(value)
51 |     


--------------------------------------------------------------------------------
/listings/chapter_1/1_2_pandas_data_types.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import datetime
 3 | 
 4 | data = {
 5 |     'SPY': {
 6 |         datetime.date(2000, 1, 4): 100,
 7 |         datetime.date(2000, 1, 5): 101,
 8 |     },
 9 |     'AAPL': {
10 |         datetime.date(2000, 1, 4): 300,
11 |         datetime.date(2000, 1, 5): 303,
12 |     },
13 | }
14 | df: pd.DataFrame = pd.DataFrame(data=data)
15 | print(df)
16 | # Returns ...
17 | #             SPY  AAPL
18 | # 2000-01-04  100   300
19 | # 2000-01-05  101   303
20 | 
21 | # Index by column
22 | aapl_series: pd.Series = df['AAPL']
23 | print(aapl_series)
24 | # Returns ... 
25 | # 2000-01-04    300
26 | # 2000-01-05    303
27 | # Name: AAPL, dtype: int64
28 | 
29 | # Index by row
30 | start_of_year_row: pd.Series = df.loc[datetime.date(2000, 1, 4)]
31 | print(start_of_year_row)
32 | # Returns ... 
33 | # SPY     100
34 | # AAPL    300
35 | # Name: 2000-01-04, dtype: int64
36 | 
37 | # Index by both
38 | start_of_year_price: pd.Series = df['AAPL'][datetime.date(2000, 1, 4)]
39 | print(start_of_year_price)
40 | # Returns ... 
41 | # 300


--------------------------------------------------------------------------------
/listings/chapter_1/1_3_pandas_data_types_part_2.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import datetime
 3 | 
 4 | data = {
 5 |     'SPY': {
 6 |         datetime.date(2000, 1, 4): 100,
 7 |         datetime.date(2000, 1, 5): 101,
 8 |     },
 9 |     'AAPL': {
10 |         datetime.date(2000, 1, 4): 300,
11 |         datetime.date(2000, 1, 5): 303,
12 |     },
13 | }
14 | 
15 | ### Begin listing
16 | 
17 | # Create a series
18 | series = pd.Series(data=data['SPY'])
19 | print(series)
20 | # Returns ...
21 | # 2000-01-04    100
22 | # 2000-01-05    101
23 | # dtype: int64
24 | 


--------------------------------------------------------------------------------
/listings/chapter_1/1_4_pandas_indexes.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import datetime
 3 | 
 4 | ### Begin listing
 5 | 
 6 | dates = [datetime.date(2000, 1, i) for i in range(1, 11)]
 7 | values = [i**2 for i in range(1, 11)]
 8 | series = pd.Series(data=values, index=dates)
 9 | 
10 | # O(n) time complexity search through a list
11 | print(datetime.date(2000, 1, 5) in dates)
12 | # Returns ...
13 | # True
14 | 
15 | # O(1) time complexity search through an index
16 | print(datetime.date(2000, 1, 5) in series.index)
17 | # Returns ...
18 | # True
19 | 


--------------------------------------------------------------------------------
/listings/chapter_2/2_10_maximum_drawdown.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, Callable
 2 | 
 3 | DRAWDOWN_EVALUATORS: Dict[str, Callable] = {
 4 |     'dollar': lambda price, peak: peak - price,
 5 |     'percent': lambda price, peak: -((price / peak) - 1),
 6 |     'log': lambda price, peak: np.log(peak) - np.log(price),
 7 | }
 8 | 
 9 | def calculate_drawdown_series(series: pd.Series, method: str='log') -> pd.Series:
10 |     """
11 |     Returns the drawdown series
12 |     """
13 |     assert method in DRAWDOWN_EVALUATORS, \
14 |         f'Method "{method}" must by one of {list(DRAWDOWN_EVALUATORS.keys())}'
15 | 
16 |     evaluator = DRAWDOWN_EVALUATORS[method]
17 |     return evaluator(series, series.cummax())
18 | 
19 | def calculate_max_drawdown(series: pd.Series, method: str='log') -> float:
20 |     """
21 |     Simply returns the max drawdown as a float
22 |     """
23 |     return calculate_drawdown_series(series, method).max()


--------------------------------------------------------------------------------
/listings/chapter_2/2_11_maximum_drawdown_with_metadata.py:
--------------------------------------------------------------------------------
 1 | def calculate_max_drawdown_with_metadata(series: pd.Series, 
 2 |     method: str='log') -> Dict[str, Any]:
 3 |     """
 4 |     Calculates max_drawdown and stores metadata about when and where. Returns 
 5 |     a dictionary of the form 
 6 |         {
 7 |             'max_drawdown': float,
 8 |             'peak_date': pd.Timestamp,
 9 |             'peak_price': float,
10 |             'trough_date': pd.Timestamp,
11 |             'trough_price': float,
12 |         }
13 |     """
14 | 
15 |     assert method in DRAWDOWN_EVALUATORS, \
16 |         f'Method "{method}" must by one of {list(DRAWDOWN_EVALUATORS.keys())}'
17 | 
18 |     evaluator = DRAWDOWN_EVALUATORS[method]
19 | 
20 |     max_drawdown = 0
21 |     local_peak_date = peak_date = trough_date = series.index[0]
22 |     local_peak_price = peak_price = trough_price = series.iloc[0]
23 | 
24 |     for date, price in series.iteritems():
25 | 
26 |         # Keep track of the rolling max
27 |         if price > local_peak_price:
28 |             local_peak_date = date
29 |             local_peak_price = price
30 | 
31 |         # Compute the drawdown
32 |         drawdown = evaluator(price, local_peak_price)
33 | 
34 |         # Store new max drawdown values
35 |         if drawdown > max_drawdown:
36 |             max_drawdown = drawdown
37 | 
38 |             peak_date = local_peak_date
39 |             peak_price = local_peak_price
40 | 
41 |             trough_date = date
42 |             trough_price = price
43 | 
44 |     return {
45 |         'max_drawdown': max_drawdown,
46 |         'peak_date': peak_date,
47 |         'peak_price': peak_price,
48 |         'trough_date': trough_date,
49 |         'trough_price': trough_price
50 |     }


--------------------------------------------------------------------------------
/listings/chapter_2/2_12_log_max_drawdown_ratio.py:
--------------------------------------------------------------------------------
1 | def calculate_log_max_drawdown_ratio(series: pd.Series) -> float:
2 |     log_drawdown = calculate_max_drawdown(series, method='log')
3 |     log_return = np.log(series.iloc[-1]) - np.log(series.iloc[0])
4 |     return log_return - log_drawdown


--------------------------------------------------------------------------------
/listings/chapter_2/2_13_calmar_ratio.py:
--------------------------------------------------------------------------------
 1 | def calculate_calmar_ratio(series: pd.Series, years_past: int=3) -> float:
 2 |     """
 3 |     Return the percent max drawdown ratio over the past three years using
 4 |     CAGR as the numerator, otherwise known as the Calmar Ratio
 5 |     """
 6 | 
 7 |     # Filter series on past three years
 8 |     last_date = series.index[-1]
 9 |     three_years_ago = last_date - pd.Timedelta(days=years_past*365.25)
10 |     series = series[series.index > three_years_ago]
11 | 
12 |     # Compute annualized percent max drawdown ratio
13 |     percent_drawdown = calculate_max_drawdown(series, method='percent')
14 |     cagr = calculate_cagr(series)
15 |     return cagr / percent_drawdown


--------------------------------------------------------------------------------
/listings/chapter_2/2_14_pure_profit_score.py:
--------------------------------------------------------------------------------
 1 | from sklearn.linear_model import LinearRegression
 2 | 
 3 | def calculate_pure_profit_score(price_series: pd.Series) -> float:
 4 |     """
 5 |     Calculates the pure profit score
 6 |     """
 7 |     cagr = calculate_cagr(price_series)
 8 | 
 9 |     # Build a single column for a predictor, t
10 |     t: np.ndarray = np.arange(0, price_series.shape[0]).reshape(-1, 1)
11 | 
12 |     # Fit the regression
13 |     regression = LinearRegression().fit(t, price_series)
14 | 
15 |     # Get the r-squared value
16 |     r_squared = regression.score(t, price_series)
17 | 
18 |     return cagr * r_squared
19 | 


--------------------------------------------------------------------------------
/listings/chapter_2/2_15_jensens_alpha.py:
--------------------------------------------------------------------------------
 1 | def calculate_jensens_alpha(return_series: pd.Series, 
 2 |     benchmark_return_series: pd.Series) -> float: 
 3 |     """
 4 |     Calculates jensens alpha. Prefers input series have the same index. Handles
 5 |     NAs.
 6 |     """
 7 | 
 8 |     # Join series along date index and purge NAs
 9 |     df = pd.concat([return_series, benchmark_return_series], sort=True, axis=1)
10 |     df = df.dropna()
11 | 
12 |     # Get the appropriate data structure for scikit learn
13 |     clean_returns: pd.Series = df[return_series.name]
14 |     clean_benchmarks = pd.DataFrame(df[benchmark_return_series.name])
15 | 
16 |     # Fit a linear regression and return the alpha
17 |     regression = LinearRegression().fit(clean_benchmarks, y=clean_returns)
18 |     return regression.intercept_


--------------------------------------------------------------------------------
/listings/chapter_2/2_1_return_series_pure_python.py:
--------------------------------------------------------------------------------
1 | def calculate_return_series(prices: List[float]) -> List[float]:
2 |     """
3 |     Calculates return series as a parallel list of returns on prices
4 |     """
5 |     return_series = [None]
6 |     for i in range(1, len(prices)):
7 |         return_series.append((prices[i] / prices[i-1]) - 1)
8 | 
9 |     return return_series


--------------------------------------------------------------------------------
/listings/chapter_2/2_2_return_series_pandas.py:
--------------------------------------------------------------------------------
1 | def calculate_return_series(series: pd.Series) -> pd.Series:
2 |     """
3 |     Calculates the return series of a time series.
4 |     The first value will always be NaN.
5 |     Output series retains the index of the input series.
6 |     """
7 |     shifted_series = series.shift(1, axis=0)
8 |     return series / shifted_series - 1


--------------------------------------------------------------------------------
/listings/chapter_2/2_3_log_return_series.py:
--------------------------------------------------------------------------------
1 | def calculate_log_return_series(series: pd.Series) -> pd.Series:
2 |     """
3 |     Same as calculate_return_series but with log returns
4 |     """
5 |     shifted_series = series.shift(1, axis=0)
6 |     return pd.Series(np.log(series / shifted_series))


--------------------------------------------------------------------------------
/listings/chapter_2/2_4_annualized_volatility.py:
--------------------------------------------------------------------------------
 1 | def get_years_past(series: pd.Series) -> float:
 2 |     """
 3 |     Calculate the years past according to the index of the series for use with
 4 |     functions that require annualization   
 5 |     """
 6 |     start_date = series.index[0]
 7 |     end_date = series.index[-1]
 8 |     return (end_date - start_date).days / 365.25
 9 |     
10 | def calculate_annualized_volatility(return_series: pd.Series) -> float:
11 |     """
12 |     Calculates annualized volatility for a date-indexed return series. 
13 |     Works for any interval of date-indexed prices and returns.
14 |     """
15 |     years_past = get_years_past(return_series)
16 |     entries_per_year = return_series.shape[0] / years_past
17 |     return return_series.std() * np.sqrt(entries_per_year)


--------------------------------------------------------------------------------
/listings/chapter_2/2_5_annualized_volatility_on_awu.py:
--------------------------------------------------------------------------------
1 | from pypm import data_io, metrics
2 | 
3 | df = data_io.load_eod_data('AWU')
4 | return_series = metrics.calculate_log_return_series(df['close'])
5 | print(metrics.calculate_annualized_volatility(return_series))
6 | 


--------------------------------------------------------------------------------
/listings/chapter_2/2_6_calculating_cagr.py:
--------------------------------------------------------------------------------
1 | def calculate_cagr(series: pd.Series) -> float:
2 |     """
3 |     Calculate compounded annual growth rate
4 |     """
5 |     value_factor = series.iloc[-1] / series.iloc[0]
6 |     year_past = get_years_past(series)
7 |     return (value_factor ** (1 / year_past)) - 1


--------------------------------------------------------------------------------
/listings/chapter_2/2_7_calculating_cagr_on_awu.py:
--------------------------------------------------------------------------------
1 | from pypm import data_io, metrics
2 | 
3 | df = data_io.load_eod_data('AWU')
4 | print(metrics.calculate_cagr(df['close']))


--------------------------------------------------------------------------------
/listings/chapter_2/2_8_calculating_sharpe_ratio.py:
--------------------------------------------------------------------------------
 1 | def calculate_sharpe_ratio(price_series: pd.Series, 
 2 |     benchmark_rate: float=0) -> float:
 3 |     """
 4 |     Calculates the sharpe ratio given a price series. Defaults to benchmark_rate
 5 |     of zero.
 6 |     """
 7 |     cagr = calculate_cagr(price_series)
 8 |     return_series = calculate_return_series(price_series)
 9 |     volatility = calculate_annualized_volatility(return_series)
10 |     return (cagr - benchmark_rate) / volatility


--------------------------------------------------------------------------------
/listings/chapter_2/2_9_calculating_downside_volatility.py:
--------------------------------------------------------------------------------
 1 | def calculate_annualized_downside_deviation(return_series: pd.Series, 
 2 |     benchmark_rate: float=0) -> float:
 3 |     """
 4 |     Calculates the downside deviation for use in the sortino ratio.
 5 | 
 6 |     Benchmark rate is assumed to be annualized. It will be adjusted according 
 7 |     to the number of periods per year seen in the data.
 8 |     """
 9 | 
10 |     # For both de-annualizing the benchmark rate and annualizing result
11 |     years_past = get_years_past(return_series)
12 |     entries_per_year = return_series.shape[0] / years_past
13 | 
14 |     adjusted_benchmark_rate = ((1+benchmark_rate) ** (1/entries_per_year)) - 1
15 | 
16 |     downside_series = adjusted_benchmark_rate - return_series
17 |     downside_sum_of_squares = (downside_series[downside_series > 0] ** 2).sum()
18 |     denominator = return_series.shape[0] - 1
19 |     downside_deviation = np.sqrt(downside_sum_of_squares / denominator)
20 | 
21 |     return downside_deviation * np.sqrt(entries_per_year)
22 | 
23 | def calculate_sortino_ratio(price_series: pd.Series, 
24 |     benchmark_rate: float=0) -> float:
25 |     """
26 |     Calculates the sortino ratio.
27 |     """
28 |     cagr = calculate_cagr(price_series)
29 |     return_series = calculate_return_series(price_series)
30 |     downside_deviation = calculate_annualized_downside_deviation(return_series)
31 |     return (cagr - benchmark_rate) / downside_deviation


--------------------------------------------------------------------------------
/listings/chapter_3/3_1_calculate_simple_moving_average.py:
--------------------------------------------------------------------------------
1 | def calculate_simple_moving_average(series: pd.Series, n: int=20) -> pd.Series:
2 |     """Calculates the simple moving average"""
3 |     return series.rolling(n).mean()


--------------------------------------------------------------------------------
/listings/chapter_3/3_2_slow_simple_moving_average.py:
--------------------------------------------------------------------------------
 1 | def slow_moving_average(values: List[float], m: int=20):
 2 |     """
 3 |     This is O(nm) time, because it re-computes the sum at every step
 4 |     1 + 2 + 3 + 4 + ... / m
 5 |     2 + 3 + 4 + 5 + ... / m
 6 |     3 + 4 + 5 + 6 + ... / m
 7 |     4 + 5 + 6 + 7 + ... / m
 8 |     and so on ...
 9 |     Leading to approx (m-1) * n individual additions.
10 |     """
11 | 
12 |     # Initial values
13 |     moving_average = [None] * (m-1)
14 | 
15 |     for i in range(m-1, len(values)):
16 |         the_average = np.mean(values[(i-m+1):i+1])
17 |         moving_average.append(the_average)
18 | 
19 |     return moving_average


--------------------------------------------------------------------------------
/listings/chapter_3/3_3_fast_simple_moving_average.py:
--------------------------------------------------------------------------------
 1 | def fast_moving_average(values: List[float], m: int=20):
 2 |     """
 3 |     This is O(n) time, because it keeps track of the intermediate sum.
 4 |     Leading to approx 2n individual additions.
 5 |     """
 6 | 
 7 |     # Initial values
 8 |     moving_average = [None] * (m-1)
 9 |     accumulator = sum(values[:m])
10 |     moving_average.append(accumulator / m)
11 | 
12 |     for i in range(m, len(values)):
13 |         accumulator -= values[i-m]
14 |         accumulator += values[i]
15 |         moving_average.append(accumulator / m)
16 | 
17 |     return moving_average


--------------------------------------------------------------------------------
/listings/chapter_3/3_4_calculating_macd.py:
--------------------------------------------------------------------------------
1 | def calculate_macd_oscillator(series: pd.Series,
2 |     n1: int=5, n2: int=34) -> pd.Series:
3 |     """
4 |     Calculate the moving average convergence divergence oscillator, given a 
5 |     short moving average of length n1 and a long moving average of length n2
6 |     """
7 |     assert n1 < n2, f'n1 must be less than n2'
8 |     return calculate_simple_moving_average(series, n1) - \
9 |         calculate_simple_moving_average(series, n2)


--------------------------------------------------------------------------------
/listings/chapter_3/3_5_calculate_bollinger_bands.py:
--------------------------------------------------------------------------------
 1 | def calculate_bollinger_bands(series: pd.Series, n: int=20) -> pd.DataFrame:
 2 |     """
 3 |     Calculates the bollinger bands and returns them as a dataframe
 4 |     """
 5 | 
 6 |     sma = calculate_simple_moving_average(series, n)
 7 |     stdev = calculate_simple_moving_sample_stdev(series, n)
 8 | 
 9 |     return pd.DataFrame({
10 |         'middle': sma,
11 |         'upper': sma + 2 * stdev,
12 |         'lower': sma - 2 * stdev
13 |     })


--------------------------------------------------------------------------------
/listings/chapter_3/3_6_calculate_chaikin_money_flow.py:
--------------------------------------------------------------------------------
 1 | def calculate_money_flow_volume_series(df: pd.DataFrame) -> pd.Series:
 2 |     """
 3 |     Calculates money flow series
 4 |     """
 5 |     mfv = df['volume'] * (2*df['close'] - df['high'] - df['low']) / \
 6 |                                     (df['high'] - df['low'])
 7 |     return mfv
 8 | 
 9 | def calculate_money_flow_volume(df: pd.DataFrame, n: int=20) -> pd.Series:
10 |     """
11 |     Calculates money flow volume, or q_t in our formula
12 |     """
13 |     return calculate_money_flow_volume_series(df).rolling(n).sum()
14 | 
15 | def calculate_chaikin_money_flow(df: pd.DataFrame, n: int=20) -> pd.Series:
16 |     """
17 |     Calculates the Chaikin money flow
18 |     """
19 |     return calculate_money_flow_volume(df, n) / df['volume'].rolling(n).sum()


--------------------------------------------------------------------------------
/listings/chapter_3/3_7_example_signals.py:
--------------------------------------------------------------------------------
 1 | def create_macd_signal(series: pd.Series, n1: int=5, n2: int=34) -> pd.Series:
 2 |     """
 3 |     Create a momentum-based signal based on the MACD crossover principle. 
 4 |     Generate a buy signal when the MACD cross above zero, and a sell signal when
 5 |     it crosses below zero.
 6 |     """
 7 | 
 8 |     # Calculate the macd and get the signs of the values.
 9 |     macd = calculate_macd_oscillator(series, n1, n2)
10 |     macd_sign = np.sign(macd)
11 | 
12 |     # Create a copy shifted by some amount.
13 |     macd_shifted_sign = macd_sign.shift(1, axis=0)
14 | 
15 |     # Multiply by the sign by the boolean. This will have the effect of casting
16 |     # the boolean to an integer (either 0 or 1) and then multiply by the sign
17 |     # (either -1, 0 or 1).
18 |     return macd_sign * (macd_sign != macd_shifted_sign)
19 | 
20 | 
21 | def create_bollinger_band_signal(series: pd.Series, n: int=20) -> pd.Series:
22 |     """
23 |     Create a reversal-based signal based on the upper and lower bands of the 
24 |     Bollinger bands. Generate a buy signal when the price is below the lower 
25 |     band, and a sell signal when the price is above the upper band.
26 |     """
27 |     bollinger_bands = calculate_bollinger_bands(series, n)
28 |     sell = series > bollinger_bands['upper']
29 |     buy = series < bollinger_bands['lower']
30 |     return (1*buy - 1*sell)


--------------------------------------------------------------------------------
/listings/chapter_4/4_1_assertions_example.py:
--------------------------------------------------------------------------------
1 | assert 2 + 2 == 4, 'The laws of mathematics are crumbling.'
2 | assert 2 + 2 == 5, 'You will see this message in an AssertionError.'


--------------------------------------------------------------------------------
/listings/chapter_4/4_2_position_class.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | from typing import Tuple, List, Dict, Callable, NewType, Any
  5 | from collections import OrderedDict, defaultdict
  6 | 
  7 | from pypm import metrics, signals, data_io
  8 | 
  9 | Symbol = NewType('Symbol', str)
 10 | Dollars = NewType('Dollars', float)
 11 | 
 12 | DATE_FORMAT_STR = '%a %b %d, %Y'
 13 | def _pdate(date: pd.Timestamp):
 14 |     """Pretty-print a datetime with just the date"""
 15 |     return date.strftime(DATE_FORMAT_STR)
 16 | 
 17 | class Position(object):
 18 |     """
 19 |     A simple object to hold and manipulate data related to long stock trades.
 20 | 
 21 |     Allows a single buy and sell operation on an asset for a constant number of 
 22 |     shares.
 23 | 
 24 |     The __init__ method is equivelant to a buy operation. The exit
 25 |     method is a sell operation.
 26 |     """
 27 | 
 28 |     def __init__(self, symbol: Symbol, entry_date: pd.Timestamp, 
 29 |         entry_price: Dollars, shares: int):
 30 |         """
 31 |         Equivelent to buying a certain number of shares of the asset
 32 |         """
 33 | 
 34 |         # Recorded on initialization
 35 |         self.entry_date = entry_date
 36 |         self.entry_price = entry_price
 37 |         self.shares = shares
 38 |         self.symbol = symbol
 39 | 
 40 |         # Recorded on position exit
 41 |         self.exit_date: pd.Timestamp = None
 42 |         self.exit_price: Dollars = None
 43 | 
 44 |         # For easily getting current portolio value
 45 |         self.last_date: pd.Timestamp = None
 46 |         self.last_price: Dollars = None
 47 | 
 48 |         # Updated intermediately
 49 |         self._dict_series: Dict[pd.Timestamp, Dollars] = OrderedDict()
 50 |         self.record_price_update(entry_date, entry_price)
 51 | 
 52 |         # Cache control for pd.Series representation
 53 |         self._price_series: pd.Series = None
 54 |         self._needs_update_pd_series: bool = True
 55 | 
 56 |     def exit(self, exit_date, exit_price):
 57 |         """
 58 |         Equivelent to selling a stock holding
 59 |         """
 60 |         assert self.entry_date != exit_date, 'Churned a position same-day.'
 61 |         assert not self.exit_date, 'Position already closed.'
 62 |         self.record_price_update(exit_date, exit_price)
 63 |         self.exit_date = exit_date
 64 |         self.exit_price = exit_price
 65 | 
 66 |     def record_price_update(self, date, price):
 67 |         """
 68 |         Stateless function to record intermediate prices of existing positions
 69 |         """
 70 |         self.last_date = date
 71 |         self.last_price = price
 72 |         self._dict_series[date] = price
 73 | 
 74 |         # Invalidate cache on self.price_series
 75 |         self._needs_update_pd_series = True
 76 | 
 77 |     @property
 78 |     def price_series(self) -> pd.Series:
 79 |         """
 80 |         Returns cached readonly pd.Series 
 81 |         """
 82 |         if self._needs_update_pd_series or self._price_series is None:
 83 |             self._price_series = pd.Series(self._dict_series)
 84 |             self._needs_update_pd_series = False
 85 |         return self._price_series
 86 | 
 87 |     @property
 88 |     def last_value(self) -> Dollars:
 89 |         return self.last_price * self.shares
 90 | 
 91 |     @property
 92 |     def is_active(self) -> bool:
 93 |         return self.exit_date is None
 94 | 
 95 |     @property
 96 |     def is_closed(self) -> bool:
 97 |         return not self.is_active
 98 |     
 99 |     @property
100 |     def value_series(self) -> pd.Series:
101 |         """
102 |         Returns the value of the position over time. Ignores self.exit_date.
103 |         Used in calculating the equity curve.
104 |         """
105 |         assert self.is_closed, 'Position must be closed to access this property'
106 |         return self.shares * self.price_series[:-1]
107 | 
108 |     @property
109 |     def percent_return(self) -> float:
110 |         return (self.exit_price / self.entry_price) - 1
111 |     
112 |     @property
113 |     def entry_value(self) -> Dollars:
114 |         return self.shares * self.entry_price
115 | 
116 |     @property
117 |     def exit_value(self) -> Dollars:
118 |         return self.shares * self.exit_price
119 | 
120 |     @property
121 |     def change_in_value(self) -> Dollars:
122 |         return self.exit_value - self.entry_value
123 | 
124 |     @property
125 |     def trade_length(self):
126 |         return len(self._dict_series) - 1
127 |     
128 |     def print_position_summary(self):
129 |         _entry_date = _pdate(self.entry_date)
130 |         _exit_date = _pdate(self.exit_date)
131 |         _days = self.trade_length
132 | 
133 |         _entry_price = round(self.entry_price, 2)
134 |         _exit_price = round(self.exit_price, 2)
135 | 
136 |         _entry_value = round(self.entry_value, 2)
137 |         _exit_value = round(self.exit_value, 2)
138 | 
139 |         _return = round(100 * self.percent_return, 1)
140 |         _diff = round(self.change_in_value, 2)
141 | 
142 |         print(f'{self.symbol:<5}     Trade summary')
143 |         print(f'Date:     {_entry_date} -> {_exit_date} [{_days} days]')
144 |         print(f'Price:    ${_entry_price} -> ${_exit_price} [{_return}%]')
145 |         print(f'Value:    ${_entry_value} -> ${_exit_value} [${_diff}]')
146 |         print()
147 | 
148 |     def __hash__(self):
149 |         """
150 |         A unique position will be defined by a unique combination of an 
151 |         entry_date and symbol, in accordance with our constraints regarding 
152 |         duplicate, variable, and compound positions
153 |         """
154 |         return hash((self.entry_date, self.symbol))


--------------------------------------------------------------------------------
/listings/chapter_4/4_3_position_object_usage.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pypm import data_io, portfolio
 3 | 
 4 | symbol = 'AWU'
 5 | df = data_io.load_eod_data(symbol)
 6 | shares_to_buy = 50
 7 | 
 8 | for i, row in enumerate(df.itertuples()):
 9 | 	date = row.Index
10 | 	price = row.close
11 | 
12 | 	if i == 123:
13 | 		position = portfolio.Position(symbol, date, price, shares_to_buy)
14 | 	elif 123 < i < 234:
15 | 		position.record_price_update(date, price)
16 | 	elif i == 234:
17 | 		position.exit(date, price)
18 | 
19 | position.print_position_summary()
20 | 
21 | # Returns ...
22 | # AWU       Trade summary
23 | # Date:     Wed Jun 30, 2010 -> Tue Dec 07, 2010 [111 days]
24 | # Price:    $220.34 -> $305.98 [38.9%]
25 | # Value:    $11017.0 -> $15299.0 [$4282.0]


--------------------------------------------------------------------------------
/listings/chapter_4/4_4_portfolio_history_class.py:
--------------------------------------------------------------------------------
  1 | class PortfolioHistory(object):
  2 |     """
  3 |     Holds Position objects and keeps track of portfolio variables.
  4 |     Produces summary statistics.
  5 |     """
  6 | 
  7 |     def __init__(self):
  8 |         # Keep track of positions, recorded in this list after close
  9 |         self.position_history: List[Position] = []
 10 |         self._logged_positions: Set[Position] = set()
 11 | 
 12 |         # Keep track of the last seen date
 13 |         self.last_date: pd.Timestamp = pd.Timestamp.min
 14 | 
 15 |         # Readonly fields
 16 |         self._cash_history: Dict[pd.Timestamp, Dollars] = dict()
 17 |         self._simulation_finished = False
 18 |         self._spy: pd.DataFrame = pd.DataFrame()
 19 |         self._spy_log_returns: pd.Series = pd.Series()
 20 | 
 21 |     def add_to_history(self, position: Position):
 22 |         _log = self._logged_positions
 23 |         assert not position in _log, 'Recorded the same position twice.'
 24 |         assert position.is_closed, 'Position is not closed.'
 25 |         self._logged_positions.add(position)
 26 |         self.position_history.append(position)
 27 |         self.last_date = max(self.last_date, position.last_date)
 28 | 
 29 |     def record_cash(self, date, cash):
 30 |         self._cash_history[date] = cash
 31 |         self.last_date = max(self.last_date, date)
 32 | 
 33 |     @staticmethod
 34 |     def _as_oseries(d: Dict[pd.Timestamp, Any]) -> pd.Series:
 35 |         return pd.Series(d).sort_index()
 36 | 
 37 |     def _compute_cash_series(self):
 38 |         self._cash_series = self._as_oseries(self._cash_history)
 39 | 
 40 |     @property
 41 |     def cash_series(self) -> pd.Series:
 42 |         return self._cash_series
 43 | 
 44 |     def _compute_portfolio_value_series(self):
 45 |         value_by_date = defaultdict(float)
 46 |         last_date = self.last_date
 47 | 
 48 |         # Add up value of assets
 49 |         for position in self.position_history:
 50 |             for date, value in position.value_series.items():
 51 |                 value_by_date[date] += value
 52 | 
 53 |         # Make sure all dates in cash_series are present
 54 |         for date in self.cash_series.index:
 55 |             value_by_date[date] += 0
 56 | 
 57 |         self._portfolio_value_series = self._as_oseries(value_by_date)
 58 | 
 59 |     @property
 60 |     def portfolio_value_series(self):
 61 |         return self._portfolio_value_series
 62 | 
 63 |     def _compute_equity_series(self):
 64 |         c_series = self.cash_series
 65 |         p_series = self.portfolio_value_series
 66 |         assert all(c_series.index == p_series.index), \
 67 |             'portfolio_series has dates not in cash_series'
 68 |         self._equity_series = c_series + p_series     
 69 | 
 70 |     @property
 71 |     def equity_series(self):
 72 |         return self._equity_series
 73 | 
 74 |     def _compute_log_return_series(self):
 75 |         self._log_return_series = \
 76 |             metrics.calculate_log_return_series(self.equity_series)
 77 | 
 78 |     @property
 79 |     def log_return_series(self):
 80 |         return self._log_return_series
 81 | 
 82 |     def _assert_finished(self):
 83 |         assert self._simulation_finished, \
 84 |             'Simuation must be finished by running self.finish() in order ' + \
 85 |             'to access this method or property.'
 86 | 
 87 |     def finish(self):
 88 |         """
 89 |         Notate that the simulation is finished and compute readonly values
 90 |         """
 91 |         self._simulation_finished = True
 92 |         self._compute_cash_series()
 93 |         self._compute_portfolio_value_series()
 94 |         self._compute_equity_series()
 95 |         self._compute_log_return_series()
 96 |         self._assert_finished()
 97 | 
 98 |     def compute_portfolio_size_series(self) -> pd.Series:
 99 |         size_by_date = defaultdict(int)
100 |         for position in self.position_history:
101 |             for date in position.value_series.index:
102 |                 size_by_date[date] += 1
103 |         return self._as_oseries(size_by_date)
104 | 
105 |     @property
106 |     def spy(self):
107 |         if self._spy.empty:
108 |             self._spy = data_io.load_spy_data()
109 |         return self._spy
110 | 
111 |     @property
112 |     def spy_log_returns(self):
113 |         if self._spy_log_returns.empty:
114 |             close = self.spy['close']
115 |             self._spy_log_returns =  metrics.calculate_log_return_series(close)
116 |         return self._spy_log_returns
117 | 
118 |     @property
119 |     def percent_return(self):
120 |         return metrics.calculate_percent_return(self.equity_series)
121 | 
122 |     @property
123 |     def spy_percent_return(self):
124 |         return metrics.calculate_percent_return(self.spy['close'])
125 | 
126 |     @property
127 |     def cagr(self):
128 |         return metrics.calculate_cagr(self.equity_series)
129 | 
130 |     @property
131 |     def volatility(self):
132 |         return metrics.calculate_annualized_volatility(self.log_return_series)
133 | 
134 |     @property
135 |     def sharpe_ratio(self):
136 |         return metrics.calculate_sharpe_ratio(self.equity_series)
137 | 
138 |     @property
139 |     def spy_cagr(self):
140 |         return metrics.calculate_cagr(self.spy['close'])
141 |     
142 |     @property
143 |     def excess_cagr(self):
144 |         return self.cagr - self.spy_cagr
145 | 
146 |     @property
147 |     def jensens_alpha(self):
148 |         return metrics.calculate_jensens_alpha(
149 |             self.log_return_series,
150 |             self.spy_log_returns,
151 |         )
152 | 
153 |     @property
154 |     def dollar_max_drawdown(self):
155 |         return metrics.calculate_max_drawdown(self.equity_series, 'dollar')
156 | 
157 |     @property
158 |     def percent_max_drawdown(self):
159 |         return metrics.calculate_max_drawdown(self.equity_series, 'percent')
160 | 
161 |     @property
162 |     def log_max_drawdown_ratio(self):
163 |         return metrics.calculate_log_max_drawdown_ratio(self.equity_series)
164 |     
165 |     @property
166 |     def number_of_trades(self):
167 |         return len(self.position_history)
168 | 
169 |     @property
170 |     def average_active_trades(self):
171 |         return self.compute_portfolio_size_series().mean()
172 | 
173 |     @property
174 |     def final_cash(self):
175 |         self._assert_finished()
176 |         return self.cash_series[-1]
177 |     
178 |     @property
179 |     def final_equity(self):
180 |         self._assert_finished()
181 |         return self.equity_series[-1]
182 |     
183 |     def print_position_summaries(self):
184 |         for position in self.position_history:
185 |             position.print_position_summary()
186 | 
187 |     def print_summary(self):
188 |         self._assert_finished()
189 |         s = f'Equity: ${self.final_equity:.2f}\n' \
190 |             f'Percent Return: {100*self.percent_return:.2f}%\n' \
191 |             f'S&P 500 Return: {100*self.spy_percent_return:.2f}%\n\n' \
192 |             f'Number of trades: {self.number_of_trades}\n' \
193 |             f'Average active trades: {self.average_active_trades:.2f}\n\n' \
194 |             f'CAGR: {100*self.cagr:.2f}%\n' \
195 |             f'S&P 500 CAGR: {100*self.spy_cagr:.2f}%\n' \
196 |             f'Excess CAGR: {100*self.excess_cagr:.2f}%\n\n' \
197 |             f'Annualized Volatility: {100*self.volatility:.2f}%\n' \
198 |             f'Sharpe Ratio: {self.sharpe_ratio:.2f}\n' \
199 |             f'Jensen\'s Alpha: {self.jensens_alpha:.6f}\n\n' \
200 |             f'Dollar Max Drawdown: ${self.dollar_max_drawdown:.2f}\n' \
201 |             f'Percent Max Drawdown: {100*self.percent_max_drawdown:.2f}%\n' \
202 |             f'Log Max Drawdown Ratio: {self.log_max_drawdown_ratio:.2f}\n'
203 | 
204 |         print(s)
205 | 
206 |     def plot(self, show=True) -> plt.Figure:
207 |         """
208 |         Plots equity, cash and portfolio value curves.
209 |         """
210 |         self._assert_finished()
211 | 
212 |         figure, axes = plt.subplots(nrows=3, ncols=1)
213 |         figure.tight_layout(pad=3.0)
214 |         axes[0].plot(self.equity_series)
215 |         axes[0].set_title('Equity')
216 |         axes[0].grid()
217 | 
218 |         axes[1].plot(self.cash_series)
219 |         axes[1].set_title('Cash')
220 |         axes[1].grid()
221 | 
222 |         axes[2].plot(self.portfolio_value_series)
223 |         axes[2].set_title('Portfolio Value')
224 |         axes[2].grid()
225 | 
226 |         if show:
227 |             plt.show()
228 | 
229 |         return figure
230 | 


--------------------------------------------------------------------------------
/listings/chapter_4/4_5_portfolio_history_usage.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pypm import data_io
 3 | from pypm.portfolio import Position, PortfolioHistory
 4 | 
 5 | symbol = 'AWU'
 6 | df = data_io.load_eod_data(symbol)
 7 | 
 8 | portfolio_history = PortfolioHistory()
 9 | initial_cash = cash = 10000
10 | 
11 | for i, row in enumerate(df.itertuples()):
12 |     date = row.Index
13 |     price = row.close
14 | 
15 |     if i == 123:
16 |         # Figure out how many shares to buy
17 |         shares_to_buy = initial_cash / price 
18 | 
19 |         # Record the position
20 |         position = Position(symbol, date, price, shares_to_buy)
21 | 
22 |         # Spend all of your cash
23 |         cash -= initial_cash
24 | 
25 |     elif 123 < i < 2345:
26 |         position.record_price_update(date, price)
27 | 
28 |     elif i == 2345:
29 |         # Sell the asset
30 |         position.exit(date, price)
31 | 
32 |         # Get your cash back
33 |         cash += price * shares_to_buy
34 | 
35 |         # Record the position
36 |         portfolio_history.add_to_history(position)
37 | 
38 |     # Record cash at every step
39 |     portfolio_history.record_cash(date, cash)
40 | 
41 | portfolio_history.finish()
42 | 
43 | portfolio_history.print_position_summaries()
44 | # Returns ...
45 | # AWU       Trade summary
46 | # Date:     Wed Jun 30, 2010 -> Tue Apr 30, 2019 [2222 days]
47 | # Price:    $220.34 -> $386.26 [75.3%]
48 | # Value:    $10000.0 -> $17530.18 [$7530.18]
49 | 
50 | portfolio_history.print_summary()
51 | # Returns ...
52 | # Equity: $17530.18
53 | # Percent Return: 75.30%
54 | # S&P 500 Return: 184.00%
55 | # 
56 | # Number of trades: 1
57 | # Average active trades: 1.00
58 | #
59 | # CAGR: 5.78%
60 | # S&P 500 CAGR: 11.02%
61 | # Excess CAGR: -5.24%
62 | #
63 | # Annualized Volatility: 29.97%
64 | # Sharpe Ratio: 0.19
65 | # Jensen's Alpha: -0.000198
66 | #
67 | # Dollar Max Drawdown: $9006.08
68 | # Percent Max Drawdown: 60.08%
69 | # Log Max Drawdown Ratio: -0.36
70 | 
71 | portfolio_history.plot()


--------------------------------------------------------------------------------
/listings/chapter_4/4_6_simple_simulator_class.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple, List, Dict, Callable, NewType, Any, Iterable
  2 | 
  3 | import pandas as pd
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | from pypm import metrics, signals, data_io
  7 | from pypm.portfolio import PortfolioHistory, Position, Symbol, Dollars
  8 | 
  9 | from collections import OrderedDict, defaultdict
 10 | 
 11 | class SimpleSimulator(object):
 12 |     """
 13 |     A simple trading simulator to work with the PortfolioHistory class
 14 |     """
 15 | 
 16 |     def __init__(self, initial_cash: float=10000, max_active_positions: int=5,
 17 |         percent_slippage: float=0.0005, trade_fee: float=1):
 18 | 
 19 |         ### Set simulation parameters
 20 | 
 21 |         # Initial cash in porfolio
 22 |         # self.cash will fluctuate
 23 |         self.initial_cash = self.cash = initial_cash
 24 | 
 25 |         # Maximum number of different assets that can be help simultaneously
 26 |         self.max_active_positions: int = max_active_positions
 27 | 
 28 |         # The percentage difference between closing price and fill price for the
 29 |         # position, to simulate adverse effects of market orders
 30 |         self.percent_slippage = percent_slippage
 31 | 
 32 |         # The fixed fee in order to open a position in dollar terms
 33 |         self.trade_fee = trade_fee
 34 | 
 35 |         # Keep track of live trades
 36 |         self.active_positions_by_symbol: Dict[Symbol, Position] = OrderedDict()
 37 | 
 38 |         # Keep track of portfolio history like cash, equity, and positions
 39 |         self.portfolio_history = PortfolioHistory()
 40 | 
 41 |     @property
 42 |     def active_positions_count(self):
 43 |         return len(self.active_positions_by_symbol)
 44 | 
 45 |     @property
 46 |     def free_position_slots(self):
 47 |         return self.max_active_positions - self.active_positions_count
 48 | 
 49 |     @property
 50 |     def active_symbols(self) -> List[Symbol]:
 51 |         return list(self.active_positions_by_symbol.keys())
 52 | 
 53 |     def print_initial_parameters(self):
 54 |         s = f'Initial Cash: ${self.initial_cash} \n' \
 55 |             f'Maximum Number of Assets: {self.max_active_positions}\n'
 56 |         print(s)
 57 |         return s
 58 | 
 59 |     @staticmethod
 60 |     def make_tuple_lookup(columns) -> Callable[[str, str], int]:
 61 |         """
 62 |         Map a multi-index dataframe to an itertuples-like object.
 63 | 
 64 |         The index of the dateframe is always the zero-th element.
 65 |         """
 66 | 
 67 |         # col is a hierarchical column index represented by a tuple of strings
 68 |         tuple_lookup: Dict[Tuple[str, str], int] = { 
 69 |             col: i + 1 for i, col in enumerate(columns) 
 70 |         }
 71 | 
 72 |         return lambda symbol, metric: tuple_lookup[(symbol, metric)]
 73 | 
 74 |     @staticmethod
 75 |     def make_all_valid_lookup(_idx: Callable):
 76 |         """
 77 |         Return a function that checks for valid data, given a lookup function
 78 |         """
 79 |         return lambda row, symbol: (
 80 |             not pd.isna(row[_idx(symbol, 'pref')]) and \
 81 |             not pd.isna(row[_idx(symbol, 'signal')]) and \
 82 |             not pd.isna(row[_idx(symbol, 'price')])
 83 |         )
 84 | 
 85 |     def buy_to_open(self, symbol, date, price):
 86 |         """
 87 |         Keep track of new position, make sure it isn't an existing position. 
 88 |         Verify you have cash.
 89 |         """
 90 | 
 91 |         # Figure out how much we are willing to spend
 92 |         cash_to_spend = self.cash / self.free_position_slots
 93 |         cash_to_spend -= self.trade_fee
 94 | 
 95 |         # Calculate buy_price and number of shares. Fractional shares allowed.
 96 |         purchase_price = (1 + self.percent_slippage) * price
 97 |         shares = cash_to_spend / purchase_price
 98 | 
 99 |         # Spend the cash
100 |         self.cash -= cash_to_spend + self.trade_fee
101 |         assert self.cash >= 0, 'Spent cash you do not have.'
102 |         self.portfolio_history.record_cash(date, self.cash)   
103 | 
104 |         # Record the position
105 |         positions_by_symbol = self.active_positions_by_symbol
106 |         assert not symbol in positions_by_symbol, 'Symbol already in portfolio.'        
107 |         position = Position(symbol, date, purchase_price, shares)
108 |         positions_by_symbol[symbol] = position
109 | 
110 |     def sell_to_close(self, symbol, date, price):
111 |         """
112 |         Keep track of exit price, recover cash, close position, and record it in
113 |         portfolio history.
114 | 
115 |         Will raise a KeyError if symbol isn't an active position
116 |         """
117 | 
118 |         # Exit the position
119 |         positions_by_symbol = self.active_positions_by_symbol
120 |         position = positions_by_symbol[symbol]
121 |         position.exit(date, price)
122 | 
123 |         # Receive the cash
124 |         sale_value = position.last_value * (1 - self.percent_slippage)
125 |         self.cash += sale_value
126 |         self.portfolio_history.record_cash(date, self.cash)
127 | 
128 |         # Record in portfolio history
129 |         self.portfolio_history.add_to_history(position)
130 |         del positions_by_symbol[symbol]
131 |     
132 |     @staticmethod
133 |     def _assert_equal_columns(*args: Iterable[pd.DataFrame]):
134 |         column_names = set(args[0].columns.values)
135 |         for arg in args[1:]:
136 |             assert set(arg.columns.values) == column_names, \
137 |                 'Found unequal column names in input data frames.'
138 | 
139 |     def simulate(self, price: pd.DataFrame, signal: pd.DataFrame, 
140 |         preference: pd.DataFrame):
141 |         """
142 |         Runs the simulation.
143 | 
144 |         price, signal, and preference are data frames with the column names 
145 |         represented by the same set of stock symbols.
146 |         """
147 | 
148 |         # Create a hierarchical data frame to loop through
149 |         self._assert_equal_columns(price, signal, preference)
150 |         df = data_io.concatenate_metrics({
151 |             'price': price,
152 |             'signal': signal,
153 |             'pref': preference,
154 |         })
155 | 
156 |         # Get list of symbols
157 |         all_symbols = list(set(price.columns.values))
158 | 
159 |         # Get lookup functions
160 |         _idx = self.make_tuple_lookup(df.columns)
161 |         _all_valid = self.make_all_valid_lookup(_idx)
162 | 
163 |         # Store some variables
164 |         active_positions_by_symbol = self.active_positions_by_symbol
165 |         max_active_positions = self.max_active_positions
166 | 
167 |         # Iterating over all dates.
168 |         # itertuples() is significantly faster than iterrows(), it however comes
169 |         # at the cost of being able index easily. In order to get around this
170 |         # we use an tuple lookup function: "_idx"
171 |         for row in df.itertuples():
172 | 
173 |             # date index is always first element of tuple row
174 |             date = row[0]
175 | 
176 |             # Get symbols with valid and tradable data
177 |             symbols: List[str] = [s for s in all_symbols if _all_valid(row, s)]
178 | 
179 |             # Iterate over active positions and sell stocks with a sell signal.
180 |             _active = self.active_symbols
181 |             to_exit = [s for s in _active if row[_idx(s, 'signal')] == -1]
182 |             for s in to_exit:
183 |                 sell_price = row[_idx(s, 'price')]
184 |                 self.sell_to_close(s, date, sell_price)
185 | 
186 |             # Get up to max_active_positions symbols with a buy signal in 
187 |             # decreasing order of preference
188 |             to_buy = [
189 |                 s for s in symbols if \
190 |                     row[_idx(s, 'signal')] == 1 and \
191 |                     not s in active_positions_by_symbol
192 |             ]
193 |             to_buy.sort(key=lambda s: row[_idx(s, 'pref')], reverse=True)
194 |             to_buy = to_buy[:max_active_positions]
195 | 
196 |             for s in to_buy:
197 |                 buy_price = row[_idx(s, 'price')]
198 |                 buy_preference = row[_idx(s, 'pref')]
199 | 
200 |                 # If we have some empty slots, just buy the asset outright
201 |                 if self.active_positions_count < max_active_positions:
202 |                     self.buy_to_open(s, date, buy_price)
203 |                     continue
204 | 
205 |                 # If are holding max_active_positions, evaluate a swap based on
206 |                 # preference
207 |                 _active = self.active_symbols
208 |                 active_prefs = [(s, row[_idx(s, 'pref')]) for s in _active]
209 | 
210 |                 _min = min(active_prefs, key=lambda k: k[1])
211 |                 min_active_symbol, min_active_preference = _min
212 | 
213 |                 # If a more preferable symbol exists, then sell an old one
214 |                 if min_active_preference < buy_preference:
215 |                     sell_price = row[_idx(min_active_symbol, 'price')]
216 |                     self.sell_to_close(min_active_symbol, date, sell_price)
217 |                     self.buy_to_open(s, date, buy_price)
218 | 
219 |             # Update price data everywhere
220 |             for s in self.active_symbols:
221 |                 price = row[_idx(s, 'price')]
222 |                 position = active_positions_by_symbol[s]
223 |                 position.record_price_update(date, price)
224 | 
225 |         # Sell all positions and mark simulation as complete
226 |         for s in self.active_symbols:
227 |             self.sell_to_close(s, date, row[_idx(s, 'price')])
228 |         self.portfolio_history.finish()


--------------------------------------------------------------------------------
/listings/chapter_4/4_7_simple_simulator_usage.py:
--------------------------------------------------------------------------------
 1 | ### pypm/simulate_portfolio.py
 2 | from pypm import metrics, signals, data_io, simulation
 3 | import pandas as pd
 4 | 
 5 | def simulate_portfolio():
 6 | 
 7 |     bollinger_n = 20
 8 |     sharpe_n = 20
 9 | 
10 |     # Load in data
11 |     symbols: List[str] = data_io.get_all_symbols()
12 |     prices: pd.DataFrame = data_io.load_eod_matrix(symbols)
13 | 
14 |     # Use the bollinger band outer band crossorver as a signal
15 |     _bollinger = signals.create_bollinger_band_signal
16 |     signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0)
17 | 
18 |     # Use a rolling sharpe ratio approximation as a preference matrix
19 |     _sharpe = metrics.calculate_rolling_sharpe_ratio
20 |     preference = prices.apply(_sharpe, args=(sharpe_n, ), axis=0)
21 | 
22 |     # Run the simulator
23 |     simulator = simulation.SimpleSimulator(
24 |         initial_cash=10000,
25 |         max_active_positions=5,
26 |         percent_slippage=0.0005,
27 |         trade_fee=1,
28 |     )
29 |     simulator.simulate(prices, signal, preference)
30 | 
31 |     # Print results
32 |     simulator.portfolio_history.print_position_summaries()
33 |     simulator.print_initial_parameters()
34 |     simulator.portfolio_history.print_summary()
35 |     simulator.portfolio_history.plot()
36 | 
37 | if __name__ == '__main__':
38 |     simulate_portfolio()
39 |     
40 | # Returns ...
41 | # Initial Cash: $10000 
42 | # Maximum Number of Assets: 5
43 | #
44 | # Equity: $39758.61
45 | # Percent Return: 297.59%
46 | # S&P 500 Return: 184.00%
47 | #
48 | # Number of trades: 1835
49 | # Average active trades: 4.83
50 | #
51 | # CAGR: 14.82%
52 | # S&P 500 CAGR: 11.02%
53 | # Excess CAGR: 3.80%
54 | #
55 | # Annualized Volatility: 17.93%
56 | # Sharpe Ratio: 0.83
57 | # Jensen's Alpha: 0.000147
58 | #
59 | # Dollar Max Drawdown: $10594.83
60 | # Percent Max Drawdown: 30.03%
61 | # Log Max Drawdown Ratio: 1.02


--------------------------------------------------------------------------------
/listings/chapter_5/5_1_grid_search_optimizer.py:
--------------------------------------------------------------------------------
  1 | from pypm import metrics, signals, data_io, simulation
  2 | 
  3 | import pandas as pd
  4 | import numpy as np
  5 | from collections import defaultdict, OrderedDict
  6 | from itertools import product
  7 | from timeit import default_timer
  8 | from typing import Dict, Tuple, List, Callable, Iterable, Any, NewType, Mapping
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | from matplotlib import cm 
 12 | from mpl_toolkits.mplot3d import Axes3D 
 13 | 
 14 | # Performance data and parameter inputs are dictionaries
 15 | Parameters = NewType('Parameters', Dict[str, float])
 16 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float]
 17 | 
 18 | # Simulation function must take parameters as keyword arguments pointing to 
 19 | # iterables and return a performance metric dictionary
 20 | SimKwargs = NewType('Kwargs', Mapping[str, Iterable[Any]])
 21 | SimFunction = NewType('SimFunction', Callable[[SimKwargs], Performance])
 22 | 
 23 | class OptimizationResult(object):
 24 |     """Simple container class for optimization data"""
 25 | 
 26 |     def __init__(self, parameters: Parameters, performance: Performance):
 27 | 
 28 |         # Make sure no collisions between performance metrics and params
 29 |         assert len(parameters.keys() & performance.keys()) == 0, \
 30 |             'parameter name matches performance metric name'
 31 | 
 32 |         self.parameters = parameters
 33 |         self.performance = performance
 34 | 
 35 |     @property
 36 |     def as_dict(self) -> Dict[str, float]:
 37 |         """Combines the dictionaries after we are sure of no collisions"""
 38 |         return {**self.parameters, **self.performance}
 39 |     
 40 | 
 41 | class GridSearchOptimizer(object):
 42 |     """
 43 |     A generic grid search optimizer that requires only a simulation function and
 44 |     a series of parameter ranges. Provides timing, summary, and plotting 
 45 |     utilities with return data.
 46 |     """
 47 | 
 48 |     def __init__(self, simulation_function: SimFunction):
 49 | 
 50 |         self.simulate = simulation_function
 51 |         self._results_list: List[OptimizationResult] = list()
 52 |         self._results_df = pd.DataFrame()
 53 | 
 54 |         self._optimization_finished = False
 55 | 
 56 |     def add_results(self, parameters: Parameters, performance: Performance):
 57 |         _results = OptimizationResult(parameters, performance)
 58 |         self._results_list.append(_results)
 59 | 
 60 |     def optimize(self, **optimization_ranges: SimKwargs):
 61 | 
 62 |         assert optimization_ranges, 'Must provide non-empty parameters.'
 63 | 
 64 |         # Convert all iterables to lists
 65 |         param_ranges = {k: list(v) for k, v in optimization_ranges.items()}
 66 |         self.param_names = param_names = list(param_ranges.keys())
 67 | 
 68 |         # Count total simulation
 69 |         n = total_simulations = np.prod([len(r) for r in param_ranges.values()])
 70 | 
 71 |         total_time_elapsed = 0
 72 | 
 73 |         print(f'Starting simulation ...')
 74 |         print(f'Simulating 1 / {n} ...', end='\r')
 75 |         for i, params in enumerate(product(*param_ranges.values())):
 76 |             if i > 0:
 77 |                 _avg = avg_time = total_time_elapsed / i
 78 |                 _rem = remaining_time = (n - (i + 1)) * avg_time
 79 |                 s =  f'Simulating {i+1} / {n} ... '
 80 |                 s += f'{_rem:.0f}s remaining ({_avg:.1f}s avg)'
 81 |                 s += ' '*8
 82 |                 print(s, end='\r')
 83 | 
 84 |             timer_start = default_timer()
 85 | 
 86 |             parameters = {n: param for n, param in zip(param_names, params)}
 87 |             results = self.simulate(**parameters)
 88 |             self.add_results(parameters, results)
 89 | 
 90 |             timer_end = default_timer()
 91 |             total_time_elapsed += timer_end - timer_start 
 92 | 
 93 |         print(f'Simulated {total_simulations} / {total_simulations} ...')
 94 |         print(f'Elapsed time: {total_time_elapsed:.0f}s')
 95 |         print(f'Done.')
 96 | 
 97 |         self._optimization_finished = True
 98 | 
 99 |     def _assert_finished(self):
100 |         assert self._optimization_finished, \
101 |             'Run self.optimize before accessing this method.'
102 | 
103 |     @property
104 |     def results(self) -> pd.DataFrame:
105 |         self._assert_finished()
106 |         if self._results_df.empty:
107 | 
108 |             _results_list = self._results_list
109 |             self._results_df = pd.DataFrame([r.as_dict for r in _results_list])
110 | 
111 |             _columns = set(list(self._results_df.columns.values))
112 |             _params = set(self.param_names)
113 |             self.metric_names = list(_columns - _params)
114 | 
115 |         return self._results_df
116 | 
117 |     def print_summary(self):
118 |         df = self.results
119 |         metric_names = self.metric_names
120 | 
121 |         print('Summary statistics')
122 |         print(df[metric_names].describe().T)
123 | 
124 |     def get_best(self, metric_name: str) -> pd.DataFrame:
125 |         """
126 |         Sort the results by a specific performance metric
127 |         """
128 |         self._assert_finished()
129 | 
130 |         results = self.results
131 |         param_names = self.param_names
132 |         metric_names = self.metric_names
133 | 
134 |         assert metric_name in metric_names, 'Not a performance metric'
135 |         partial_df = self.results[param_names+[metric_name]]
136 | 
137 |         return partial_df.sort_values(metric_name, ascending=False)
138 | 
139 |     def plot_1d_hist(self, x, show=True):
140 |         self.results.hist(x)
141 |         if show:
142 |             plt.show()
143 | 
144 |     def plot_2d_line(self, x, y, show=True, **filter_kwargs):
145 |         _results = self.results
146 |         for k, v in filter_kwargs.items():
147 |             _results = _results[getattr(_results, k) == v]
148 | 
149 |         ax = _results.plot(x, y)
150 |         if filter_kwargs:
151 |             k_str = ', '.join([f'{k}={v}' for k,v in filter_kwargs.items()])
152 |             ax.legend([f'{x} ({k_str})'])
153 | 
154 |         if show:
155 |             plt.show()
156 | 
157 |     def plot_2d_violin(self, x, y, show=True):
158 |         """
159 |         Group y along x then plot violin charts
160 |         """
161 |         x_values = self.results[x].unique()
162 |         x_values.sort()
163 | 
164 |         y_by_x = OrderedDict([(v, []) for v in x_values])
165 |         for _, row in self.results.iterrows():
166 |             y_by_x[row[x]].append(row[y])
167 | 
168 |         fig, ax = plt.subplots()
169 | 
170 |         ax.violinplot(dataset=list(y_by_x.values()), showmedians=True)
171 |         ax.set_xlabel(x)
172 |         ax.set_ylabel(y)
173 |         ax.set_xticks(range(0, len(y_by_x)+1))
174 |         ax.set_xticklabels([''] + list(y_by_x.keys()))
175 |         if show:
176 |             plt.show()
177 | 
178 |     def plot_3d_mesh(self, x, y, z, show=True, **filter_kwargs):
179 |         """
180 |         Plot interactive 3d mesh. z axis should typically be performance metric
181 |         """
182 |         _results = self.results
183 |         fig = plt.figure()
184 |         ax = Axes3D(fig)
185 | 
186 |         for k, v in filter_kwargs.items():
187 |             _results = _results[getattr(_results, k) == v]
188 | 
189 |         X, Y, Z = [getattr(_results, attr) for attr in (x, y, z)]
190 |         ax.plot_trisurf(X, Y, Z, cmap=cm.jet, linewidth=0.2)
191 |         ax.set_xlabel(x)
192 |         ax.set_ylabel(y)
193 |         ax.set_zlabel(z)
194 |         if show:
195 |             plt.show()
196 | 
197 |     def plot(self, *attrs: Tuple[str], show=True, 
198 |         **filter_kwargs: Dict[str, Any]):
199 |         """
200 |         Attempt to intelligently dispatch plotting functions based on the number
201 |         and type of attributes. Last argument should typically be the 
202 |         performance metric.
203 |         """
204 |         self._assert_finished()
205 |         param_names = self.param_names
206 |         metric_names = self.metric_names
207 | 
208 |         if len(attrs) == 3:
209 |             assert attrs[0] in param_names and attrs[1] in param_names, \
210 |                 'First two positional arguments must be parameter names.'
211 | 
212 |             assert attrs[2] in metric_names, \
213 |                 'Last positional argument must be a metric name.'
214 | 
215 |             assert len(filter_kwargs) + 2 == len(param_names), \
216 |                 'Must filter remaining parameters. e.g. p_three=some_number.'
217 | 
218 |             self.plot_3d_mesh(*attrs, show=show, **filter_kwargs)
219 | 
220 |         elif len(attrs) == 2:
221 |             if len(param_names) == 1 or filter_kwargs:
222 |                 self.plot_2d_line(*attrs, show=show, **filter_kwargs)
223 | 
224 |             elif len(param_names) > 1:
225 |                 self.plot_2d_violin(*attrs, show=show)
226 | 
227 |         elif len(attrs) == 1:
228 |             self.plot_1d_hist(*attrs, show=show)
229 | 
230 |         else:
231 |             raise ValueError('Must pass between one and three column names.')


--------------------------------------------------------------------------------
/listings/chapter_5/5_2_grid_search_example.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from pypm import metrics, signals, data_io, simulation, optimization
 4 | from pypm.optimization import GridSearchOptimizer
 5 | 
 6 | from typing import List, Dict, Tuple, Callable
 7 | 
 8 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float]
 9 | 
10 | def bind_simulator(**sim_kwargs) -> Callable:
11 |     """
12 |     Create a function with all static simulation data bound to it, where the 
13 |     arguments are simulation parameters
14 |     """
15 | 
16 |     symbols: List[str] = data_io.get_all_symbols()
17 |     prices: pd.DataFrame = data_io.load_eod_matrix(symbols)
18 | 
19 |     _bollinger: Callable = signals.create_bollinger_band_signal
20 |     _sharpe: Callable = metrics.calculate_rolling_sharpe_ratio
21 | 
22 |     def _simulate(bollinger_n: int, sharpe_n: int) -> Performance:
23 |         
24 |         signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0)
25 |         preference = prices.apply(_sharpe, args=(sharpe_n, ), axis=0)
26 | 
27 |         simulator = simulation.SimpleSimulator(**sim_kwargs)
28 |         simulator.simulate(prices, signal, preference)
29 | 
30 |         return simulator.portfolio_history.get_performance_metric_data()
31 | 
32 |     return _simulate
33 | 
34 | if __name__ == '__main__':
35 | 
36 |     simulate = bind_simulator(initial_cash=10000, max_active_positions=5)
37 | 
38 |     optimizer = GridSearchOptimizer(simulate)
39 |     optimizer.optimize(
40 |         bollinger_n=range(10, 110, 10),
41 |         sharpe_n=range(10, 110, 10),
42 |     )
43 | 
44 |     print(optimizer.get_best('excess_cagr'))
45 |     optimizer.plot('excess_cagr')
46 |     optimizer.plot('bollinger_n', 'excess_cagr')
47 |     optimizer.plot('bollinger_n', 'sharpe_n', 'excess_cagr')
48 | 
49 | # Returns ... 
50 | #     bollinger_n  sharpe_n  excess_cagr
51 | # 17           20        80     0.092841
52 | # 16           20        70     0.062477
53 | # 98          100        90     0.055047
54 | # 19           20       100     0.050255
55 | # 1            10        20     0.043642
56 | # ..          ...       ...          ...
57 | # 89           90       100    -0.054080
58 | # 69           70       100    -0.054404
59 | # 63           70        40    -0.061105
60 | # 61           70        20    -0.063276
61 | # 50           60        10    -0.065433


--------------------------------------------------------------------------------
/listings/chapter_5/5_3_white_noise_preference_matrix.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from pypm import metrics, signals, data_io, simulation, optimization
 5 | from pypm.optimization import GridSearchOptimizer
 6 | 
 7 | from typing import List, Dict, Tuple, Callable
 8 | 
 9 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float]
10 | 
11 | def bind_simulator(**sim_kwargs) -> Callable:
12 |     """
13 |     Create a simulator that uses white noise for the preference matrix
14 |     """
15 |     symbols: List[str] = data_io.get_all_symbols()
16 |     prices: pd.DataFrame = data_io.load_eod_matrix(symbols)
17 | 
18 |     _bollinger: Callable = signals.create_bollinger_band_signal
19 | 
20 |     # Bollinger n is constant throughout
21 |     bollinger_n = 20
22 | 
23 |     def _simulate(white_noise_test_id: int) -> Performance:
24 |         
25 |         signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0)
26 | 
27 |         # Build a pile of noise in the same shape as the price data
28 |         _noise = np.random.normal(loc=0, scale=1, size=prices.shape)
29 |         _cols = prices.columns
30 |         _index = prices.index
31 |         preference = pd.DataFrame(_noise, columns=_cols, index=_index)
32 | 
33 |         simulator = simulation.SimpleSimulator(**sim_kwargs)
34 |         simulator.simulate(prices, signal, preference)
35 | 
36 |         return simulator.portfolio_history.get_performance_metric_data()
37 | 
38 |     return _simulate
39 | 
40 | if __name__ == '__main__':
41 | 
42 |     simulate = bind_simulator(initial_cash=10000, max_active_positions=5)
43 | 
44 |     optimizer = GridSearchOptimizer(simulate)
45 |     optimizer.optimize(white_noise_test_id=range(1000))
46 | 
47 |     print(optimizer.get_best('excess_cagr'))
48 |     optimizer.print_summary()
49 |     optimizer.plot('excess_cagr')


--------------------------------------------------------------------------------
/listings/chapter_5/5_4_bootstrap_simulated_preference_matrix.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from pypm import metrics, signals, data_io, simulation, optimization
 5 | from pypm.optimization import GridSearchOptimizer
 6 | 
 7 | from typing import List, Dict, Tuple, Callable
 8 | 
 9 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float]
10 | 
11 | def bind_simulator(**sim_kwargs) -> Callable:
12 |     """
13 |     Create a simulator that uses white noise for the preference matrix
14 |     """
15 |     symbols: List[str] = data_io.get_all_symbols()
16 |     prices: pd.DataFrame = data_io.load_eod_matrix(symbols)
17 | 
18 |     _bollinger: Callable = signals.create_bollinger_band_signal
19 |     bollinger_n = 20
20 | 
21 |     returns = metrics.calculate_return_series(prices)
22 |     sharpe_n = 20
23 | 
24 |     def bootstrap_rolling_sharpe_ratio(return_series: pd.Series) -> pd.Series:
25 |         _series = return_series.iloc[1:]
26 |         _series = _series.sample(n=return_series.shape[0], replace=True)
27 |         _series.iloc[:1] = [np.nan]
28 |         _series = pd.Series(_series.values, index=return_series.index)
29 |         _windowed_series = _series.rolling(sharpe_n)
30 |         return _windowed_series.mean() / _windowed_series.std()
31 | 
32 |     _sharpe: Callable = bootstrap_rolling_sharpe_ratio
33 | 
34 |     def _simulate(bootstrap_test_id: int) -> Performance:
35 |         
36 |         signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0)
37 |         preference = returns.apply(_sharpe, axis=0)
38 | 
39 |         simulator = simulation.SimpleSimulator(**sim_kwargs)
40 |         simulator.simulate(prices, signal, preference)
41 | 
42 |         return simulator.portfolio_history.get_performance_metric_data()
43 | 
44 |     return _simulate
45 | 
46 | if __name__ == '__main__':
47 | 
48 |     simulate = bind_simulator(initial_cash=10000, max_active_positions=5)
49 | 
50 |     optimizer = GridSearchOptimizer(simulate)
51 |     optimizer.optimize(bootstrap_test_id=range(1000))
52 | 
53 |     print(optimizer.get_best('excess_cagr'))
54 |     optimizer.print_summary()
55 |     optimizer.plot('excess_cagr')


--------------------------------------------------------------------------------
/listings/chapter_6/6_1_loading_alternative_data.py:
--------------------------------------------------------------------------------
 1 | from pypm import data_io
 2 | import numpy as np
 3 | import pandas as pd
 4 | from typing import List
 5 | 
 6 | # Load in everything
 7 | symbols: List[str] = data_io.get_all_symbols()
 8 | eod_data: pd.DataFrame = data_io.load_eod_matrix(symbols)
 9 | alt_data: pd.DataFrame = data_io.load_alternative_data_matrix(symbols)
10 | 
11 | # Our eod_data goes back 10 years, but our alt_data goes back 5 years
12 | eod_data = eod_data[eod_data.index >= alt_data.index.min()]
13 | assert np.all(eod_data.index == alt_data.index)
14 | assert np.all(eod_data.columns == alt_data.columns)


--------------------------------------------------------------------------------
/listings/chapter_6/6_2_exploratory_analysis.py:
--------------------------------------------------------------------------------
 1 | from pypm import data_io, metrics
 2 | import numpy as np
 3 | import pandas as pd
 4 | from typing import List
 5 | 
 6 | # Load in everything
 7 | symbols: List[str] = data_io.get_all_symbols()
 8 | eod_data: pd.DataFrame = data_io.load_eod_matrix(symbols)
 9 | alt_data: pd.DataFrame = data_io.load_alternative_data_matrix(symbols)
10 | eod_data = eod_data[eod_data.index >= alt_data.index.min()]
11 | 
12 | _calc_returns = metrics.calculate_log_return_series
13 | _corr_by_symbol = dict()
14 | 
15 | for symbol in symbols:
16 | 
17 |     alt_series = alt_data[symbol].dropna()
18 |     price_series = eod_data[symbol]
19 | 
20 |     if alt_series.empty:
21 |         continue
22 | 
23 |     # Calculate returns, ensuring each series has the same index
24 |     price_return_series = _calc_returns(price_series.loc[alt_series.index])
25 |     alt_return_series = _calc_returns(alt_series)
26 | 
27 |     # Remove the NA at the front
28 |     price_return_series = price_return_series.iloc[1:]
29 |     alt_return_series = alt_return_series.iloc[1:]
30 | 
31 |     # Calculate the correllation
32 |     _corr = np.corrcoef(price_return_series, alt_return_series)
33 | 
34 |     # This element of the correlation matrix is the number we want
35 |     _corr_by_symbol[symbol] = _corr[1,0]
36 | 
37 | # Describe results
38 | results = pd.Series(_corr_by_symbol)
39 | print(pd.DataFrame(results.describe()).T)
40 | # Returns ...
41 | #  count      mean       std       min       25%       50%     75%       max
42 | #   97.0 -0.002539  0.032456 -0.065556 -0.024983 -0.003735  0.0174  0.099085


--------------------------------------------------------------------------------
/listings/chapter_7/7_1_symmetric_cusum_filter_on_revenue.py:
--------------------------------------------------------------------------------
 1 | # In pypm.filters
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | def calculate_non_uniform_lagged_change(series: pd.Series, n_days: int):
 6 |     """
 7 |     Use pd.Series.searchsorted to measure the lagged change in a non-uniformly 
 8 |     spaced time series over n_days of calendar time. 
 9 |     """
10 | 
11 |     # Get mapping from now to n_days ago at every point
12 |     _timedelta: pd.Timedelta = pd.Timedelta(days=n_days)
13 |     _idx: pd.Series = series.index.searchsorted(series.index - _timedelta)
14 |     _idx = _idx[_idx > 0]
15 | 
16 |     # Get the last len(series) - n_days values
17 |     _series = series.iloc[-_idx.shape[0]:]
18 | 
19 |     # Build a padding of NA values
20 |     _pad_length = series.shape[0] - _idx.shape[0]
21 |     _na_pad = pd.Series(None, index=series.index[:_pad_length])
22 | 
23 |     # Get the corresonding lagged values
24 |     _lagged_series = series.iloc[_idx]
25 | 
26 |     # Measure the difference
27 |     _diff = pd.Series(_series.values-_lagged_series.values, index=_series.index)
28 | 
29 |     return pd.concat([_na_pad, _diff])
30 | 
31 | 
32 | def calculate_cusum_events(series: pd.Series, 
33 |     filter_threshold: float) -> pd.DatetimeIndex:
34 |     """
35 |     Calculate symmetric cusum filter and corresponding events
36 |     """
37 | 
38 |     event_dates = list()
39 |     s_up = 0
40 |     s_down = 0
41 | 
42 |     for date, price in series.items():
43 |         s_up = max(0, s_up + price)
44 |         s_down = min(0, s_down + price)
45 | 
46 |         if s_up > filter_threshold:
47 |             s_up = 0
48 |             event_dates.append(date)
49 | 
50 |         elif s_down < -filter_threshold:
51 |             s_down = 0
52 |             event_dates.append(date)
53 | 
54 |     return pd.DatetimeIndex(event_dates)
55 | 
56 | # In pypm.ml_model.events
57 | from pypm import filters
58 | 
59 | def calculate_events_for_revenue_series(series: pd.Series, 
60 |     filter_threshold: float, lookback: int=365) -> pd.DatetimeIndex:
61 |     """
62 |     Calculate the symmetric cusum filter to generate events on YoY changes in 
63 |     the log revenue series
64 |     """
65 |     series = np.log(series)
66 |     series = filters.calculate_non_uniform_lagged_change(series, lookback)
67 |     return filters.calculate_cusum_events(series, filter_threshold)
68 | 
69 | 
70 | def calculate_events(revenue_series: pd.Series):
71 |     return calculate_events_for_revenue_series(
72 |         revenue_series,
73 |         filter_threshold=5,
74 |         lookback=365,
75 |     )


--------------------------------------------------------------------------------
/listings/chapter_7/7_2_computing_triple_barrier_labels.py:
--------------------------------------------------------------------------------
  1 | # See pypm.labels
  2 | import numpy as np
  3 | import pandas as pd
  4 | from typing import Tuple
  5 | 
  6 | def compute_triple_barrier_labels(
  7 |     price_series: pd.Series, 
  8 |     event_index: pd.Series, 
  9 |     time_delta_days: int, 
 10 |     upper_delta: float=None, 
 11 |     lower_delta: float=None, 
 12 |     vol_span: int=20, 
 13 |     upper_z: float=None,
 14 |     lower_z: float=None,
 15 |     upper_label: int=1, 
 16 |     lower_label: int=-1) -> Tuple[pd.Series, pd.Series]:
 17 |     """
 18 |     Calculate event labels according to the triple-barrier method. 
 19 | 
 20 |     Return a series with both the original events and the labels. Labels 1, 0, 
 21 |     and -1 correspond to upper barrier breach, vertical barrier breach, and 
 22 |     lower barrier breach, respectively. 
 23 | 
 24 |     Also return series where the index is the start date of the label and the 
 25 |     values are the end dates of the label.
 26 |     """
 27 | 
 28 |     timedelta = pd.Timedelta(days=time_delta_days)
 29 |     series = pd.Series(np.log(price_series.values), index=price_series.index)
 30 | 
 31 |     # A list with elements of {-1, 0, 1} indicating the outcome of the events
 32 |     labels = list()
 33 |     label_dates = list()
 34 | 
 35 |     if upper_z or lower_z:
 36 |         volatility = series.ewm(span=vol_span).std()
 37 |         volatility *= np.sqrt(time_delta_days / vol_span)
 38 | 
 39 |     for event_date in event_index:
 40 |         date_barrier = event_date + timedelta
 41 | 
 42 |         start_price = series.loc[event_date]
 43 |         log_returns = series.loc[event_date:date_barrier] - start_price
 44 | 
 45 |         # First element of tuple is 1 or -1 indicating upper or lower barrier
 46 |         # Second element of tuple is first date when barrier was crossed
 47 |         candidates: List[Tuple[int, pd.Timestamp]] = list()
 48 | 
 49 |         # Add the first upper or lower delta crosses to candidates
 50 |         if upper_delta:
 51 |             _date = log_returns[log_returns > upper_delta].first_valid_index()
 52 |             if _date:
 53 |                 candidates.append((upper_label, _date))
 54 |     
 55 |         if lower_delta:
 56 |             _date = log_returns[log_returns < lower_delta].first_valid_index()
 57 |             if _date:
 58 |                 candidates.append((lower_label, _date))
 59 | 
 60 |         # Add the first upper_z and lower_z crosses to candidates
 61 |         if upper_z:
 62 |             upper_barrier = upper_z * volatility[event_date]
 63 |             _date = log_returns[log_returns > upper_barrier].first_valid_index()
 64 |             if _date:
 65 |                 candidates.append((upper_label, _date))
 66 | 
 67 |         if lower_z:
 68 |             lower_barrier = lower_z * volatility[event_date]
 69 |             _date = log_returns[log_returns < lower_barrier].first_valid_index()
 70 |             if _date:
 71 |                 candidates.append((lower_label, _date))
 72 | 
 73 |         if candidates:
 74 |             # If any candidates, return label for first date
 75 |             label, label_date = min(candidates, key=lambda x: x[1])
 76 |         else:
 77 |             # If there were no candidates, time barrier was touched
 78 |             label, label_date = 0, date_barrier
 79 | 
 80 |         labels.append(label)
 81 |         label_dates.append(label_date)
 82 | 
 83 |     label_series = pd.Series(labels, index=event_index)
 84 |     event_spans = pd.Series(label_dates, index=event_index)
 85 | 
 86 |     return label_series, event_spans
 87 | 
 88 | 
 89 | # See pypm.ml_model.labels
 90 | from typing import Tuple
 91 | from pypm import labels
 92 | 
 93 | def calculate_labels(price_series, event_index) -> Tuple[pd.Series, pd.Series]:
 94 |     """
 95 |     Calculate labels based on the triple barrier method. Return a series of 
 96 |     event labels index by event start date, and return a series of event end 
 97 |     dates indexed by event start date.
 98 |     """
 99 | 
100 |     # Remove event that don't have a proper chance to materialize
101 |     time_delta_days = 90
102 |     max_date = price_series.index.max()
103 |     cutoff = max_date - pd.Timedelta(days=time_delta_days)
104 |     event_index = event_index[event_index <= cutoff]
105 | 
106 |     # Use triple barrier method
107 |     event_labels, event_spans = labels.compute_triple_barrier_labels(
108 |         price_series,
109 |         event_index,
110 |         time_delta_days=time_delta_days,
111 |         upper_z=1.8,
112 |         lower_z=-1.8,
113 |     )
114 | 
115 |     return event_labels, event_spans
116 | 


--------------------------------------------------------------------------------
/listings/chapter_7/7_3_computing_average_uniqueness.py:
--------------------------------------------------------------------------------
 1 | # See pypm.weights
 2 | import numpy as np
 3 | import pandas as pd
 4 | from scipy.stats import hmean
 5 | 
 6 | def calculate_uniqueness(event_spans: pd.Series, 
 7 |     price_index: pd.Series) -> pd.Series:
 8 |     """
 9 |     event_spans is a series with an index of start dates and values of end dates
10 |     of a label.
11 | 
12 |     price_index is an index of underlying dates for the event
13 | 
14 |     Returns a series of uniqueness values that can be used as weights, indexed 
15 |     as the event start dates. Weights may need to be standardized again before 
16 |     training.
17 |     """
18 | 
19 |     # Create a binary data frame 
20 |     # value is 1 during event span and 0 otherwise
21 |     columns = range(event_spans.shape[0])
22 |     df = pd.DataFrame(0, index=price_index, columns=columns)
23 | 
24 |     for i, (event_start, event_end) in enumerate(event_spans.items()):
25 |         df[i].loc[event_start:event_end] += 1
26 | 
27 |     # Compute concurrency over event span then calculate uniqueness
28 |     uniquenesses = list()
29 |     for i, (event_start, event_end) in enumerate(event_spans.items()):
30 |         concurrency: pd.Series = df.loc[event_start:event_end].sum(axis=1)
31 |         uniqueness = 1 / hmean(concurrency)
32 |         uniquenesses.append(uniqueness)
33 | 
34 |     return pd.Series(uniquenesses, index=event_spans.index)
35 | 
36 | # See pypm.ml_model.weights
37 | import numpy as np
38 | import pandas as pd
39 | 
40 | from pypm.weights import calculate_uniqueness
41 | 
42 | def calculate_weights(event_spans: pd.Series, 
43 |     price_index: pd.Series) -> pd.Series:
44 |     return calculate_uniqueness(event_spans, price_index)
45 | 


--------------------------------------------------------------------------------
/listings/chapter_7/7_4_computing_features.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from pypm import indicators, filters, metrics
 5 | 
 6 | _calc_delta = filters.calculate_non_uniform_lagged_change
 7 | _calc_ma = indicators.calculate_simple_moving_average
 8 | _calc_log_return = metrics.calculate_log_return_series
 9 | 
10 | def _calc_rolling_vol(series, n):
11 |     return series.rolling(n).std() * np.sqrt(252 / n)
12 | 
13 | def calculate_features(price_series, revenue_series) -> pd.DataFrame:
14 |     """
15 |     Calculate any and all potentially useful features. Return as a dataframe.
16 |     """
17 | 
18 |     log_revenue = np.log(revenue_series)
19 |     log_prices = np.log(price_series)
20 | 
21 |     log_revenue_ma = _calc_ma(log_revenue, 10)
22 |     log_prices_ma = _calc_ma(log_prices, 10)
23 | 
24 |     log_returns = _calc_log_return(price_series)
25 | 
26 |     features_by_name = dict()
27 | 
28 |     for i in [7, 30, 90, 180, 360]:
29 | 
30 |         rev_feature = _calc_delta(log_revenue_ma, i)
31 |         price_feature = _calc_delta(log_prices_ma, i)
32 |         vol_feature = _calc_rolling_vol(log_returns, i)
33 | 
34 |         features_by_name.update({
35 |             f'{i}_day_revenue_delta': rev_feature,
36 |             f'{i}_day_return': price_feature,
37 |             f'{i}_day_vol': vol_feature,
38 |         })
39 | 
40 |     features_df = pd.DataFrame(features_by_name)    
41 |     return features_df
42 | 


--------------------------------------------------------------------------------
/listings/chapter_7/7_5_modeling_and_cross_validation.py:
--------------------------------------------------------------------------------
  1 | # See pypm.ml_model.model
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | from sklearn.ensemble import RandomForestClassifier
  6 | from sklearn.model_selection import RepeatedKFold
  7 | from sklearn.base import clone
  8 | 
  9 | from joblib import Parallel, delayed
 10 | 
 11 | # Number of jobs to run in parallel
 12 | # Set to number of computer cores to use
 13 | N_JOBS = 10
 14 | N_SPLITS = 5
 15 | N_REPEATS = 4
 16 | 
 17 | def _fit_and_score(classifier, X, y, w, train_index, test_index, i) -> float:
 18 |     """
 19 |     The function used by joblib to split, train, and score cross validations
 20 |     """
 21 |     X_train = X.iloc[train_index]
 22 |     X_test = X.iloc[test_index]
 23 | 
 24 |     y_train = y.iloc[train_index]
 25 |     y_test = y.iloc[test_index]
 26 | 
 27 |     w_train = w.iloc[train_index]
 28 |     w_test = w.iloc[test_index]
 29 | 
 30 |     classifier.fit(X_train, y_train, w_train)
 31 |     score = classifier.score(X_test, y_test, w_test)
 32 | 
 33 |     print(f'Finished {i} ({100*score:.1f}%)')
 34 | 
 35 |     return score
 36 | 
 37 | def repeated_k_fold(classifier, X, y, w) -> np.ndarray:
 38 |     """
 39 |     Perform repeated k-fold cross validation on a classifier. Spread fitting 
 40 |     job over multiple computer cores.
 41 |     """
 42 |     n_jobs = N_JOBS
 43 | 
 44 |     n_splits = N_SPLITS
 45 |     n_repeats = N_REPEATS
 46 | 
 47 |     total_fits =  n_splits * n_repeats
 48 | 
 49 |     _k_fold = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats)
 50 | 
 51 |     print(f'Fitting {total_fits} models {n_jobs} at a time ...')
 52 |     print()
 53 | 
 54 |     parallel = Parallel(n_jobs=n_jobs)
 55 |     scores = parallel(
 56 |         delayed(_fit_and_score)(
 57 |             clone(classifier), X, y, w, train_index, test_index, i
 58 |         ) for i, (train_index, test_index) in enumerate(_k_fold.split(X))
 59 |     )
 60 | 
 61 |     return np.array(scores)
 62 | 
 63 | 
 64 | def calculate_model(df: pd.DataFrame) -> RandomForestClassifier:
 65 |     """
 66 |     Given a dataframe with a y column, weights column, and predictor columns 
 67 |     with arbitrary names, cross-validated and fit a classifier. Print 
 68 |     diagnostics.
 69 |     """
 70 |     classifier = RandomForestClassifier(n_estimators=100)
 71 | 
 72 |     # Separate data
 73 |     predictor_columns = [
 74 |         c for c in df.columns.values if not c in ('y', 'weights')
 75 |     ]
 76 |     X = df[predictor_columns]
 77 |     y = df['y']
 78 |     w = df['weights']
 79 | 
 80 |     # Fit cross validation
 81 |     scores = repeated_k_fold(classifier, X, y, w)
 82 | 
 83 |     # Get a full dataset fit for importance scores
 84 |     classifier.fit(X, y, w)
 85 | 
 86 |     # Compute diagnostics
 87 |     _imp = classifier.feature_importances_
 88 |     importance_series = pd.Series(_imp, index=predictor_columns)
 89 |     importance_series = importance_series.sort_values(ascending=False)
 90 | 
 91 |     # baseline accuracy is the best value achievable with a constant guess
 92 |     baseline = np.max(y.value_counts() / y.shape[0])
 93 | 
 94 |     # Compute a rough confidence interval for the improvement
 95 |     mean_score = scores.mean()
 96 |     std_score = scores.std()
 97 | 
 98 |     upper_bound = mean_score + 2 * std_score
 99 |     lower_bound = mean_score - 2 * std_score
100 |     ibounds = (lower_bound - baseline, upper_bound - baseline)
101 | 
102 |     print('Feature importances')
103 |     for col, imp in importance_series.items():
104 |         print(f'{col:24} {imp:>.3f}')
105 |     print()
106 | 
107 |     print('Cross validation scores')
108 |     print(np.round(100 * scores, 1))
109 |     print()
110 | 
111 |     print(f'Baseline accuracy {100*baseline:.1f}%')
112 |     print(f'OOS accuracy {100*mean_score:.1f}% +/- {200 * scores.std():.1f}%')
113 |     print(f'Improvement {100*(ibounds[0]):.1f} to {100*(ibounds[1]):.1f}%')
114 |     print()
115 | 
116 |     return classifier
117 | 
118 | 


--------------------------------------------------------------------------------
/listings/chapter_7/7_6_machine_learning_pipeline.py:
--------------------------------------------------------------------------------
  1 | # See fit_alternative_data_model.py
  2 | import os
  3 | import pandas as pd
  4 | import numpy as np
  5 | from typing import Dict
  6 | 
  7 | from joblib import dump
  8 | 
  9 | from pypm.ml_model.data_io import load_data
 10 | from pypm.ml_model.events import calculate_events
 11 | from pypm.ml_model.labels import calculate_labels
 12 | from pypm.ml_model.features import calculate_features
 13 | from pypm.ml_model.model import calculate_model
 14 | from pypm.ml_model.weights import calculate_weights
 15 | 
 16 | SRC_DIR = os.path.dirname(os.path.abspath(__file__))
 17 | 
 18 | if __name__ == '__main__':
 19 | 
 20 |     # All the data we have to work with
 21 |     symbols, eod_data, alt_data = load_data()
 22 | 
 23 |     # The ML dataframe for each symbol, to be combined later
 24 |     df_by_symbol: Dict[str, pd.DataFrame] = dict()
 25 | 
 26 |     # Build ML dataframe for each symbol
 27 |     for symbol in symbols:
 28 | 
 29 |         # Get revenue and price series
 30 |         revenue_series = alt_data[symbol].dropna()
 31 |         price_series = eod_data[symbol].dropna()
 32 |         price_index = price_series.index
 33 | 
 34 |         # Get events, labels, weights, and features
 35 |         event_index = calculate_events(revenue_series)
 36 |         event_labels, event_spans = calculate_labels(price_series, event_index)
 37 |         weights = calculate_weights(event_spans, price_index)
 38 |         features_df = calculate_features(price_series, revenue_series)
 39 | 
 40 |         # Subset features by event dates
 41 |         features_on_events = features_df.loc[event_index]
 42 | 
 43 |         # Convert labels and events to a data frame
 44 |         labels_df = pd.DataFrame(event_labels)
 45 |         labels_df.columns = ['y']
 46 | 
 47 |         # Converts weights to a data frame
 48 |         weights_df = pd.DataFrame(weights)
 49 |         weights_df.columns = ['weights']
 50 | 
 51 |         # Concatenate features to labels
 52 |         df = pd.concat([features_on_events, weights_df, labels_df], axis=1)
 53 |         df_by_symbol[symbol] = df
 54 | 
 55 |     # Create final ML dataframe
 56 |     df = pd.concat(df_by_symbol.values(), axis=0)
 57 |     df.sort_index(inplace=True)
 58 |     df.dropna(inplace=True)
 59 |     print(df)
 60 | 
 61 |     # Fit the model
 62 |     classifier = calculate_model(df)
 63 | 
 64 |     # Save the model
 65 |     dump(classifier, os.path.join(SRC_DIR, 'ml_model.joblib'))
 66 | 
 67 | # Returns ...
 68 | #             7_day_revenue_delta  7_day_return  7_day_vol  ...
 69 | # 2016-06-07            -0.000721      0.019520   0.096002  ...
 70 | # 2016-06-08             0.029827      0.025005   0.113246  ...
 71 | # 2016-06-08            -0.046427      0.013868   0.051878  ...
 72 | # 2016-06-09             0.001558      0.032410   0.064574  ...
 73 | # 2016-06-10             0.004933      0.011751   0.045105  ...
 74 | # ...                         ...           ...        ...  ...
 75 | # 2019-09-30            -0.031956     -0.008562   0.072845  ...
 76 | # 2019-10-01            -0.074244     -0.018469   0.053665  ...
 77 | # 2019-10-01             0.009513     -0.015659   0.094087  ...
 78 | # 2019-10-02             0.012819     -0.008300   0.062938  ...
 79 | # 2019-10-02             0.003023      0.015749   0.043320  ...
 80 | # 
 81 | # [1563 rows x 17 columns]
 82 | # Fitting 20 models 10 at a time ...
 83 | # 
 84 | # ...
 85 | # ...
 86 | # ...
 87 | # 
 88 | # Feature importances
 89 | # 30_day_return            0.099
 90 | # 7_day_return             0.097
 91 | # 30_day_vol               0.073
 92 | # 90_day_return            0.068
 93 | # 360_day_vol              0.066
 94 | # 360_day_revenue_delta    0.064
 95 | # 360_day_return           0.063
 96 | # 180_day_return           0.063
 97 | # 180_day_revenue_delta    0.060
 98 | # 90_day_vol               0.060
 99 | # 180_day_vol              0.060
100 | # 7_day_vol                0.059
101 | # 7_day_revenue_delta      0.057
102 | # 90_day_revenue_delta     0.057
103 | # 30_day_revenue_delta     0.055
104 | # 
105 | # Cross validation scores
106 | # ...
107 | # 
108 | # Baseline accuracy 42.2%
109 | # OOS accuracy 52.4% +/- 5.3%
110 | # Improvement 4.9 to 15.6%
111 | # 


--------------------------------------------------------------------------------
/listings/chapter_7/7_7_simulation_with_machine_learning_model.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | import os
 5 | from joblib import load
 6 | 
 7 | from pypm.ml_model.data_io import load_data
 8 | from pypm.ml_model.signals import calculate_signals
 9 | 
10 | from pypm import metrics, simulation
11 | 
12 | SRC_DIR = os.path.dirname(os.path.abspath(__file__))
13 | 
14 | def simulate_portfolio():
15 | 
16 |     # All the data we have to work with
17 |     symbols, eod_data, alt_data = load_data()
18 | 
19 |     # Load classifier from file
20 |     classifier = load(os.path.join(SRC_DIR, 'ml_model.joblib'))
21 | 
22 |     # Generate signals from classifier
23 |     print('Calculating signals ...')
24 |     signal = calculate_signals(classifier, symbols, eod_data, alt_data)
25 | 
26 |     # Get rid of eod_data before valid signals
27 |     first_signal_date = signal.first_valid_index()
28 |     eod_data = eod_data[eod_data.index > first_signal_date]
29 | 
30 |     # Set the preference to increase by row, so new trades are preferred
31 |     print('Calculating preference matrix ...')
32 |     preference = pd.DataFrame(
33 |         np.random.random(eod_data.shape), 
34 |         columns=eod_data.columns, 
35 |         index=eod_data.index,
36 |     )
37 | 
38 |     # Run the simulator
39 |     simulator = simulation.SimpleSimulator(
40 |         initial_cash=10000,
41 |         max_active_positions=10,
42 |         percent_slippage=0.0005,
43 |         trade_fee=1,
44 |     )
45 |     simulator.simulate(eod_data, signal, preference)
46 | 
47 |     # Print results
48 |     simulator.portfolio_history.print_position_summaries()
49 |     simulator.print_initial_parameters()
50 |     simulator.portfolio_history.print_summary()
51 |     simulator.portfolio_history.plot()
52 |     simulator.portfolio_history.plot_benchmark_comparison()
53 | 
54 | if __name__ == '__main__':
55 |     simulate_portfolio()
56 | 
57 | # Returns ...
58 | # Initial Cash: $10000 
59 | # Maximum Number of Assets: 10
60 | # 
61 | # Equity: $45455.68
62 | # Percent Return: 354.56%
63 | # S&P 500 Return: 33.80%
64 | # 
65 | # Number of trades: 291
66 | # Average active trades: 9.89
67 | # 
68 | # CAGR: 83.75%
69 | # S&P 500 CAGR: 12.43%
70 | # Excess CAGR: 71.32%
71 | # 
72 | # Annualized Volatility: 14.44%
73 | # Sharpe Ratio: 5.80
74 | # Jensen's Alpha: 0.002018
75 | # 
76 | # Dollar Max Drawdown: $1892.59
77 | # Percent Max Drawdown: 8.60%
78 | # Log Max Drawdown Ratio: 1.42
79 | # 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Algorithmic Trading with Python
 2 | Source code for Algorithmic Trading with Python (2020) by Chris Conlan. 
 3 | 
 4 | Paperback available for purchase [on Amazon](https://amzn.to/2UZbHuA).
 5 | 
 6 | ---------------
 7 | 
 8 | #### Useful resources
 9 | 
10 | These stand-alone resources can be useful to researchers with or without the accompanying book. The rest of the material in this repository depends on explanation and context given in the book.
11 | 
12 | + Performance metrics used to evaluate trading strategies: [metrics.py](src/pypm/metrics.py)
13 | + Common technical indicators in pure Pandas: [indicators.py](src/pypm/indicators.py)
14 | + Converting common technical indicators into ternary signals: [signals.py](src/pypm/signals.py)
15 | + Generic grid search wrapper for numeric optimization: [optimization.py](src/pypm/optimization.py)
16 | + Object-oriented building blocks for portfolio simulation: [portfolio.py](src/pypm/portfolio.py)
17 | + Generic wrapper for multi-core repeated K fold cross-validation: [model.py](src/pypm/ml_model/model.py)
18 | + Free-to-use simulated EOD stock data and alternative data streams: [data](data)
19 | 
20 | ----
21 | 
22 | ![](cover.png)
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlan/algorithmic-trading-with-python/ebe01087c7d9172db72bc3c9adc1eee5e882ac49/src/__init__.py


--------------------------------------------------------------------------------
/src/bootstrap_portfolio.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from pypm import metrics, signals, data_io, simulation, optimization
 5 | from pypm.optimization import GridSearchOptimizer
 6 | 
 7 | from typing import List, Dict, Tuple, Callable
 8 | 
 9 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float]
10 | 
11 | def bind_simulator(**sim_kwargs) -> Callable:
12 |     """
13 |     Create a simulator that uses white noise for the preference matrix
14 |     """
15 |     symbols: List[str] = data_io.get_all_symbols()
16 |     prices: pd.DataFrame = data_io.load_eod_matrix(symbols)
17 | 
18 |     _bollinger: Callable = signals.create_bollinger_band_signal
19 |     bollinger_n = 20
20 | 
21 |     returns = metrics.calculate_return_series(prices)
22 |     sharpe_n = 20
23 | 
24 |     def bootstrap_rolling_sharpe_ratio(return_series: pd.Series) -> pd.Series:
25 |         _series = return_series.iloc[1:]
26 |         _series = _series.sample(n=return_series.shape[0], replace=True)
27 |         _series.iloc[:1] = [np.nan]
28 |         _series = pd.Series(_series.values, index=return_series.index)
29 |         _windowed_series = _series.rolling(sharpe_n)
30 |         return _windowed_series.mean() / _windowed_series.std()
31 | 
32 |     _sharpe: Callable = bootstrap_rolling_sharpe_ratio
33 | 
34 |     def _simulate(bootstrap_test_id: int) -> Performance:
35 |         
36 |         signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0)
37 |         preference = returns.apply(_sharpe, axis=0)
38 | 
39 |         simulator = simulation.SimpleSimulator(**sim_kwargs)
40 |         simulator.simulate(prices, signal, preference)
41 | 
42 |         return simulator.portfolio_history.get_performance_metric_data()
43 | 
44 |     return _simulate
45 | 
46 | if __name__ == '__main__':
47 | 
48 |     simulate = bind_simulator(initial_cash=10000, max_active_positions=5)
49 | 
50 |     optimizer = GridSearchOptimizer(simulate)
51 |     optimizer.optimize(bootstrap_test_id=range(1000))
52 | 
53 |     print(optimizer.get_best('excess_cagr'))
54 |     optimizer.print_summary()
55 |     optimizer.plot('excess_cagr')
56 | 


--------------------------------------------------------------------------------
/src/fit_alternative_data_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import numpy as np
 4 | from typing import Dict
 5 | 
 6 | from joblib import dump
 7 | 
 8 | from pypm.ml_model.data_io import load_data
 9 | from pypm.ml_model.events import calculate_events
10 | from pypm.ml_model.labels import calculate_labels
11 | from pypm.ml_model.features import calculate_features
12 | from pypm.ml_model.model import calculate_model
13 | from pypm.ml_model.weights import calculate_weights
14 | 
15 | SRC_DIR = os.path.dirname(os.path.abspath(__file__))
16 | 
17 | if __name__ == '__main__':
18 | 
19 |     # All the data we have to work with
20 |     symbols, eod_data, alt_data = load_data()
21 | 
22 |     # The ML dataframe for each symbol, to be combined later
23 |     df_by_symbol: Dict[str, pd.DataFrame] = dict()
24 | 
25 |     # Build ML dataframe for each symbol
26 |     for symbol in symbols:
27 | 
28 |         # Get revenue and price series
29 |         revenue_series = alt_data[symbol].dropna()
30 |         price_series = eod_data[symbol].dropna()
31 |         price_index = price_series.index
32 | 
33 |         # Get events, labels, weights, and features
34 |         event_index = calculate_events(revenue_series)
35 |         event_labels, event_spans = calculate_labels(price_series, event_index)
36 |         weights = calculate_weights(event_spans, price_index)
37 |         features_df = calculate_features(price_series, revenue_series)
38 | 
39 |         # Subset features by event dates
40 |         features_on_events = features_df.loc[event_index]
41 | 
42 |         # Convert labels and events to a dataframe
43 |         labels_df = pd.DataFrame(event_labels)
44 |         labels_df.columns = ['y']
45 | 
46 |         # Converts weights to a dataframe
47 |         weights_df = pd.DataFrame(weights)
48 |         weights_df.columns = ['weights']
49 | 
50 |         # Concatenate features to labels
51 |         df = pd.concat([features_on_events, weights_df, labels_df], axis=1)
52 |         df_by_symbol[symbol] = df
53 | 
54 |     # Create final ML dataframe
55 |     df = pd.concat(df_by_symbol.values(), axis=0)
56 |     df.sort_index(inplace=True)
57 |     df.dropna(inplace=True)
58 |     print(df)
59 | 
60 |     # Fit the model
61 |     classifier = calculate_model(df)
62 | 
63 |     # Save the model
64 |     dump(classifier, os.path.join(SRC_DIR, 'ml_model.joblib'))
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/src/optimize_portfolio.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from pypm import metrics, signals, data_io, simulation, optimization
 4 | from pypm.optimization import GridSearchOptimizer
 5 | 
 6 | from typing import List, Dict, Tuple, Callable
 7 | 
 8 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float]
 9 | 
10 | def bind_simulator(**sim_kwargs) -> Callable:
11 |     """
12 |     Create a function with all static simulation data bound to it, where the 
13 |     arguments are simulation parameters
14 |     """
15 | 
16 |     symbols: List[str] = data_io.get_all_symbols()
17 |     prices: pd.DataFrame = data_io.load_eod_matrix(symbols)
18 | 
19 |     _bollinger: Callable = signals.create_bollinger_band_signal
20 |     _sharpe: Callable = metrics.calculate_rolling_sharpe_ratio
21 | 
22 |     def _simulate(bollinger_n: int, sharpe_n: int) -> Performance:
23 |         
24 |         signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0)
25 |         preference = prices.apply(_sharpe, args=(sharpe_n, ), axis=0)
26 | 
27 |         simulator = simulation.SimpleSimulator(**sim_kwargs)
28 |         simulator.simulate(prices, signal, preference)
29 | 
30 |         return simulator.portfolio_history.get_performance_metric_data()
31 | 
32 |     return _simulate
33 | 
34 | if __name__ == '__main__':
35 | 
36 |     simulate = bind_simulator(initial_cash=10000, max_active_positions=5)
37 | 
38 |     optimizer = GridSearchOptimizer(simulate)
39 |     optimizer.optimize(
40 |         bollinger_n=range(10, 110, 10),
41 |         sharpe_n=range(10, 110, 10),
42 |     )
43 | 
44 |     print(optimizer.get_best('excess_cagr'))
45 |     optimizer.print_summary()
46 |     optimizer.plot('excess_cagr')
47 |     optimizer.plot('bollinger_n', 'excess_cagr', sharpe_n=20)
48 |     optimizer.plot('bollinger_n', 'sharpe_n', 'excess_cagr')
49 | 
50 | 


--------------------------------------------------------------------------------
/src/pypm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlan/algorithmic-trading-with-python/ebe01087c7d9172db72bc3c9adc1eee5e882ac49/src/pypm/__init__.py


--------------------------------------------------------------------------------
/src/pypm/data_io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | from pandas import DataFrame
  4 | from typing import Dict, List, Tuple
  5 | 
  6 | DATA_DIR = os.path.join(
  7 |     os.path.dirname(os.path.abspath(__file__)), 
  8 |     '..',
  9 |     '..',
 10 |     'data',
 11 | )
 12 | EOD_DATA_DIR = os.path.join(DATA_DIR, 'eod')
 13 | ALTERNATIVE_DATA_DIR = os.path.join(DATA_DIR, 'alternative_data')
 14 | 
 15 | def load_eod_data(ticker: str, data_dir: str=EOD_DATA_DIR) -> DataFrame:
 16 |     f_path = os.path.join(data_dir, f'{ticker}.csv')
 17 |     assert os.path.isfile(f_path), f'No data available for {ticker}'
 18 |     return pd.read_csv(f_path, parse_dates=['date'], index_col='date')
 19 | 
 20 | def load_spy_data() -> DataFrame:
 21 |     """
 22 |     Convenience function to load S&P 500 ETF EOD data
 23 |     """
 24 |     return load_eod_data('SPY', DATA_DIR)
 25 | 
 26 | def _combine_columns(filepaths_by_symbol: Dict[str, str], 
 27 |     attr: str='close') -> pd.DataFrame:
 28 | 
 29 |     data_frames = [
 30 |         pd.read_csv(
 31 |             filepath, 
 32 |             index_col='date', 
 33 |             usecols=['date', attr], 
 34 |             parse_dates=['date'],
 35 |         ).rename(
 36 |             columns={
 37 |                 'date': 'date', 
 38 |                 attr: symbol,
 39 |             }
 40 |         ) for symbol, filepath in filepaths_by_symbol.items()
 41 |     ]
 42 |     return pd.concat(data_frames, sort=True, axis=1)    
 43 | 
 44 | 
 45 | def load_eod_matrix(tickers: List[str], attr: str='close') -> pd.DataFrame:
 46 |     filepaths_by_symbol = {
 47 |         t: os.path.join(EOD_DATA_DIR, f'{t}.csv') for t in tickers
 48 |     }
 49 |     return _combine_columns(filepaths_by_symbol, attr)
 50 | 
 51 | def load_alternative_data_matrix(tickers: List[str]) -> pd.DataFrame:
 52 |     filepaths_by_symbol = {
 53 |         t: os.path.join(ALTERNATIVE_DATA_DIR, f'{t}.csv') for t in tickers
 54 |     }
 55 |     return _combine_columns(filepaths_by_symbol, 'value')
 56 | 
 57 | 
 58 | def get_all_symbols() -> List[str]:
 59 |     return [v.strip('.csv') for v in os.listdir(EOD_DATA_DIR)]
 60 | 
 61 | 
 62 | def build_eod_closes() -> None:
 63 |     filenames = os.listdir(EOD_DATA_DIR)
 64 |     filepaths_by_symbol = {
 65 |         v.strip('.csv'): os.path.join(EOD_DATA_DIR, v) for v in filenames
 66 |     }
 67 |     result = _combine_columns(filepaths_by_symbol)
 68 |     result.to_csv(os.path.join(DATA_DIR, 'eod_closes.csv'))
 69 | 
 70 | 
 71 | def concatenate_metrics(df_by_metric: Dict[str, pd.DataFrame]) -> pd.DataFrame:
 72 |     """
 73 |     Concatenates different dataframes that have the same columns into a
 74 |     hierarchical dataframe.
 75 | 
 76 |     The input df_by_metric should of the form
 77 | 
 78 |     {
 79 |         'metric_1': pd.DataFrame()
 80 |         'metric_2: pd.DataFrame()
 81 |     }
 82 |     where each dataframe should have the same columns, i.e. symbols.
 83 |     """
 84 | 
 85 |     to_concatenate = []
 86 |     tuples = []
 87 |     for key, df in df_by_metric.items():
 88 |         to_concatenate.append(df)
 89 |         tuples += [(s, key) for s in df.columns.values]
 90 | 
 91 |     df = pd.concat(to_concatenate, sort=True, axis=1)
 92 |     df.columns = pd.MultiIndex.from_tuples(tuples, names=['symbol', 'metric'])
 93 | 
 94 |     return df
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     build_eod_closes()
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/src/pypm/filters.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | def calculate_non_uniform_lagged_change(series: pd.Series, n_days: int):
 5 |     """
 6 |     Use pd.Series.searchsorted to measure the lagged change in a non-uniformly 
 7 |     spaced time series over n_days of calendar time. 
 8 |     """
 9 | 
10 |     # Get mapping from now to n_days ago at every point
11 |     _timedelta: pd.Timedelta = pd.Timedelta(days=n_days)
12 |     _idx: pd.Series = series.index.searchsorted(series.index - _timedelta)
13 |     _idx = _idx[_idx > 0]
14 | 
15 |     # Get the last len(series) - n_days values
16 |     _series = series.iloc[-_idx.shape[0]:]
17 | 
18 |     # Build a padding of NA values
19 |     _pad_length = series.shape[0] - _idx.shape[0]
20 |     _na_pad = pd.Series(None, index=series.index[:_pad_length])
21 | 
22 |     # Get the corresonding lagged values
23 |     _lagged_series = series.iloc[_idx]
24 | 
25 |     # Measure the difference
26 |     _diff = pd.Series(_series.values-_lagged_series.values, index=_series.index)
27 | 
28 |     return pd.concat([_na_pad, _diff])
29 | 
30 | 
31 | def calculate_cusum_events(series: pd.Series, 
32 |     filter_threshold: float) -> pd.DatetimeIndex:
33 |     """
34 |     Calculate symmetric cusum filter and corresponding events
35 |     """
36 | 
37 |     event_dates = list()
38 |     s_up = 0
39 |     s_down = 0
40 | 
41 |     for date, price in series.items():
42 |         s_up = max(0, s_up + price)
43 |         s_down = min(0, s_down + price)
44 | 
45 |         if s_up > filter_threshold:
46 |             s_up = 0
47 |             event_dates.append(date)
48 | 
49 |         elif s_down < -filter_threshold:
50 |             s_down = 0
51 |             event_dates.append(date)
52 | 
53 |     return pd.DatetimeIndex(event_dates)
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/src/pypm/indicators.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pypm.data_io import load_eod_data
 3 | 
 4 | 
 5 | def calculate_simple_moving_average(series: pd.Series, n: int=20) -> pd.Series:
 6 |     """Calculates the simple moving average"""
 7 |     return series.rolling(n).mean()
 8 | 
 9 | 
10 | def calculate_simple_moving_sample_stdev(series: pd.Series, n: int=20) -> pd.Series:
11 |     """Calculates the simple moving average"""
12 |     return series.rolling(n).std()
13 | 
14 | 
15 | def calculate_macd_oscillator(series: pd.Series,
16 |     n1: int=5, n2: int=34) -> pd.Series:
17 |     """
18 |     Calculate the moving average convergence divergence oscillator, given a 
19 |     short moving average of length n1 and a long moving average of length n2
20 |     """
21 |     assert n1 < n2, f'n1 must be less than n2'
22 |     return calculate_simple_moving_average(series, n1) - \
23 |         calculate_simple_moving_average(series, n2)
24 | 
25 | 
26 | def calculate_bollinger_bands(series: pd.Series, n: int=20) -> pd.DataFrame:
27 |     """
28 |     Calculates the Bollinger Bands and returns them as a dataframe
29 |     """
30 | 
31 |     sma = calculate_simple_moving_average(series, n)
32 |     stdev = calculate_simple_moving_sample_stdev(series, n)
33 | 
34 |     return pd.DataFrame({
35 |         'middle': sma,
36 |         'upper': sma + 2 * stdev,
37 |         'lower': sma - 2 * stdev
38 |     })
39 | 
40 | 
41 | def calculate_money_flow_volume_series(df: pd.DataFrame) -> pd.Series:
42 |     """
43 |     Calculates money flow series
44 |     """
45 |     mfv = df['volume'] * (2*df['close'] - df['high'] - df['low']) / \
46 |                                     (df['high'] - df['low'])
47 |     return mfv
48 | 
49 | def calculate_money_flow_volume(df: pd.DataFrame, n: int=20) -> pd.Series:
50 |     """
51 |     Calculates money flow volume, or q_t in our formula
52 |     """
53 |     return calculate_money_flow_volume_series(df).rolling(n).sum()
54 | 
55 | def calculate_chaikin_money_flow(df: pd.DataFrame, n: int=20) -> pd.Series:
56 |     """
57 |     Calculates the Chaikin money flow
58 |     """
59 |     return calculate_money_flow_volume(df, n) / df['volume'].rolling(n).sum()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     data = load_eod_data('AWU')
64 |     closes = data['close']
65 |     sma = calculate_simple_moving_average(closes, 10)
66 |     macd = calculate_macd_oscillator(closes, 5, 50)
67 | 
68 |     bollinger_bands = calculate_bollinger_bands(closes, 100)
69 |     bollinger_bands = bollinger_bands.assign(closes=closes)
70 |     bollinger_bands.plot()
71 | 
72 |     cmf = calculate_chaikin_money_flow(data)
73 |     # cmf.plot()
74 | 
75 | 
76 |     import matplotlib.pyplot as plt
77 |     plt.show()


--------------------------------------------------------------------------------
/src/pypm/labels.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from typing import Tuple
 4 | 
 5 | def compute_triple_barrier_labels(
 6 |     price_series: pd.Series, 
 7 |     event_index: pd.Series, 
 8 |     time_delta_days: int, 
 9 |     upper_delta: float=None, 
10 |     lower_delta: float=None, 
11 |     vol_span: int=20, 
12 |     upper_z: float=None,
13 |     lower_z: float=None,
14 |     upper_label: int=1, 
15 |     lower_label: int=-1) -> Tuple[pd.Series, pd.Series]:
16 |     """
17 |     Calculate event labels according to the triple-barrier method. 
18 | 
19 |     Return a series with both the original events and the labels. Labels 1, 0, 
20 |     and -1 correspond to upper barrier breach, vertical barrier breach, and 
21 |     lower barrier breach, respectively. 
22 | 
23 |     Also return series where the index is the start date of the label and the 
24 |     values are the end dates of the label.
25 |     """
26 | 
27 |     timedelta = pd.Timedelta(days=time_delta_days)
28 |     series = pd.Series(np.log(price_series.values), index=price_series.index)
29 | 
30 |     # A list with elements of {-1, 0, 1} indicating the outcome of the events
31 |     labels = list()
32 |     label_dates = list()
33 | 
34 |     if upper_z or lower_z:
35 |         volatility = series.ewm(span=vol_span).std()
36 |         volatility *= np.sqrt(time_delta_days / vol_span)
37 | 
38 |     for event_date in event_index:
39 |         date_barrier = event_date + timedelta
40 | 
41 |         start_price = series.loc[event_date]
42 |         log_returns = series.loc[event_date:date_barrier] - start_price
43 | 
44 |         # First element of tuple is 1 or -1 indicating upper or lower barrier
45 |         # Second element of tuple is first date when barrier was crossed
46 |         candidates: List[Tuple[int, pd.Timestamp]] = list()
47 | 
48 |         # Add the first upper or lower date to candidates
49 |         if upper_delta:
50 |             _date = log_returns[log_returns > upper_delta].first_valid_index()
51 |             if _date:
52 |                 candidates.append((upper_label, _date))
53 |     
54 |         if lower_delta:
55 |             _date = log_returns[log_returns < lower_delta].first_valid_index()
56 |             if _date:
57 |                 candidates.append((lower_label, _date))
58 | 
59 |         # Add the first upper_z and lower_z to candidates
60 |         if upper_z:
61 |             upper_barrier = upper_z * volatility[event_date]
62 |             _date = log_returns[log_returns > upper_barrier].first_valid_index()
63 |             if _date:
64 |                 candidates.append((upper_label, _date))
65 | 
66 |         if lower_z:
67 |             lower_barrier = lower_z * volatility[event_date]
68 |             _date = log_returns[log_returns < lower_barrier].first_valid_index()
69 |             if _date:
70 |                 candidates.append((lower_label, _date))
71 | 
72 |         if candidates:
73 |             # If any candidates, return label for first date
74 |             label, label_date = min(candidates, key=lambda x: x[1])
75 |         else:
76 |             # If there were no candidates, time barrier was touched
77 |             label, label_date = 0, date_barrier
78 | 
79 |         labels.append(label)
80 |         label_dates.append(label_date)
81 | 
82 |     label_series = pd.Series(labels, index=event_index)
83 |     event_spans = pd.Series(label_dates, index=event_index)
84 | 
85 |     return label_series, event_spans
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/src/pypm/metrics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from pypm.data_io import load_eod_data, load_spy_data
  4 | from sklearn.linear_model import LinearRegression
  5 | from typing import Dict, Any, Callable
  6 | 
  7 | def calculate_return_series(series: pd.Series) -> pd.Series:
  8 |     """
  9 |     Calculates the return series of a given time series.
 10 | 
 11 |     >>> data = load_eod_data('VBB')
 12 |     >>> close_series = data['close']
 13 |     >>> return_series = return_series(close_series)
 14 | 
 15 |     The first value will always be NaN.
 16 |     """
 17 | 
 18 |     shifted_series = series.shift(1, axis=0)
 19 |     return series / shifted_series - 1
 20 | 
 21 | 
 22 | def calculate_log_return_series(series: pd.Series) -> pd.Series:
 23 |     """
 24 |     Same as calculate_return_series but with log returns
 25 |     """
 26 |     shifted_series = series.shift(1, axis=0)
 27 |     return pd.Series(np.log(series / shifted_series))
 28 | 
 29 | 
 30 | def calculate_percent_return(series: pd.Series) -> float:
 31 |     """
 32 |     Takes the first and last value in a series to determine the percent return, 
 33 |     assuming the series is in date-ascending order
 34 |     """
 35 |     return series.iloc[-1] / series.iloc[0] - 1
 36 | 
 37 | 
 38 | def get_years_past(series: pd.Series) -> float:
 39 |     """
 40 |     Calculate the years past according to the index of the series for use with
 41 |     functions that require annualization   
 42 |     """
 43 |     start_date = series.index[0]
 44 |     end_date = series.index[-1]
 45 |     return (end_date - start_date).days / 365.25
 46 | 
 47 | 
 48 | def calculate_cagr(series: pd.Series) -> float:
 49 |     """
 50 |     Calculate compounded annual growth rate
 51 |     """
 52 |     start_price = series.iloc[0]
 53 |     end_price = series.iloc[-1]
 54 |     value_factor = end_price / start_price
 55 |     year_past = get_years_past(series)
 56 |     return (value_factor ** (1 / year_past)) - 1
 57 | 
 58 | 
 59 | def calculate_annualized_volatility(return_series: pd.Series) -> float:
 60 |     """
 61 |     Calculates annualized volatility for a date-indexed return series. 
 62 |     Works for any interval of date-indexed prices and returns.
 63 |     """
 64 |     years_past = get_years_past(return_series)
 65 |     entries_per_year = return_series.shape[0] / years_past
 66 |     return return_series.std() * np.sqrt(entries_per_year)
 67 | 
 68 | 
 69 | def calculate_sharpe_ratio(price_series: pd.Series, 
 70 |     benchmark_rate: float=0) -> float:
 71 |     """
 72 |     Calculates the Sharpe ratio given a price series. Defaults to benchmark_rate
 73 |     of zero.
 74 |     """
 75 |     cagr = calculate_cagr(price_series)
 76 |     return_series = calculate_return_series(price_series)
 77 |     volatility = calculate_annualized_volatility(return_series)
 78 |     return (cagr - benchmark_rate) / volatility
 79 | 
 80 | 
 81 | def calculate_rolling_sharpe_ratio(price_series: pd.Series,
 82 |     n: float=20) -> pd.Series:
 83 |     """
 84 |     Compute an approximation of the Sharpe ratio on a rolling basis. 
 85 |     Intended for use as a preference value.
 86 |     """
 87 |     rolling_return_series = calculate_return_series(price_series).rolling(n)
 88 |     return rolling_return_series.mean() / rolling_return_series.std()
 89 | 
 90 | 
 91 | def calculate_annualized_downside_deviation(return_series: pd.Series,
 92 |     benchmark_rate: float=0) -> float:
 93 |     """
 94 |     Calculates the downside deviation for use in the Sortino ratio.
 95 | 
 96 |     Benchmark rate is assumed to be annualized. It will be adjusted according
 97 |     to the number of periods per year seen in the data.
 98 |     """
 99 | 
100 |     # For both de-annualizing the benchmark rate and annualizing result
101 |     years_past = get_years_past(return_series)
102 |     entries_per_year = return_series.shape[0] / years_past
103 | 
104 |     adjusted_benchmark_rate = ((1+benchmark_rate) ** (1/entries_per_year)) - 1
105 | 
106 |     downside_series = adjusted_benchmark_rate - return_series
107 |     downside_sum_of_squares = (downside_series[downside_series > 0] ** 2).sum()
108 |     denominator = return_series.shape[0] - 1
109 |     downside_deviation = np.sqrt(downside_sum_of_squares / denominator)
110 | 
111 |     return downside_deviation * np.sqrt(entries_per_year)
112 | 
113 | 
114 | def calculate_sortino_ratio(price_series: pd.Series,
115 |     benchmark_rate: float=0) -> float:
116 |     """
117 |     Calculates the Sortino ratio.
118 |     """
119 |     cagr = calculate_cagr(price_series)
120 |     return_series = calculate_return_series(price_series)
121 |     downside_deviation = calculate_annualized_downside_deviation(return_series)
122 |     return (cagr - benchmark_rate) / downside_deviation
123 | 
124 | 
125 | def calculate_pure_profit_score(price_series: pd.Series) -> float:
126 |     """
127 |     Calculates the pure profit score
128 |     """
129 |     cagr = calculate_cagr(price_series)
130 | 
131 |     # Build a single column for a predictor, t
132 |     t: np.ndarray = np.arange(0, price_series.shape[0]).reshape(-1, 1)
133 | 
134 |     # Fit the regression
135 |     regression = LinearRegression().fit(t, price_series)
136 | 
137 |     # Get the r-squared value
138 |     r_squared = regression.score(t, price_series)
139 | 
140 |     return cagr * r_squared
141 | 
142 | def calculate_jensens_alpha(return_series: pd.Series, 
143 |     benchmark_return_series: pd.Series) -> float: 
144 |     """
145 |     Calculates Jensen's alpha. Prefers input series have the same index. Handles
146 |     NAs.
147 |     """
148 | 
149 |     # Join series along date index and purge NAs
150 |     df = pd.concat([return_series, benchmark_return_series], sort=True, axis=1)
151 |     df = df.dropna()
152 | 
153 |     # Get the appropriate data structure for scikit learn
154 |     clean_returns: pd.Series = df[df.columns.values[0]]
155 |     clean_benchmarks = pd.DataFrame(df[df.columns.values[1]])
156 | 
157 |     # Fit a linear regression and return the alpha
158 |     regression = LinearRegression().fit(clean_benchmarks, y=clean_returns)
159 |     return regression.intercept_
160 | 
161 | def calculate_jensens_alpha_v2(return_series: pd.Series) -> float: 
162 |     """
163 |     Calculates Jensen's alpha, but loads in SPY prices as the benchmark series 
164 |     for you. Can be slow if run repeatedly.
165 |     """
166 |     spy_data = load_spy_data()
167 |     benchmark_return_series = calculate_log_return_series(spy_data['close'])
168 |     return calculate_jensens_alpha(return_series, benchmark_return_series)
169 |     
170 | 
171 | DRAWDOWN_EVALUATORS: Dict[str, Callable] = {
172 |     'dollar': lambda price, peak: peak - price,
173 |     'percent': lambda price, peak: -((price / peak) - 1),
174 |     'log': lambda price, peak: np.log(peak) - np.log(price),
175 | }
176 | 
177 | def calculate_drawdown_series(series: pd.Series, method: str='log') -> pd.Series:
178 |     """
179 |     Returns the drawdown series
180 |     """
181 |     assert method in DRAWDOWN_EVALUATORS, \
182 |         f'Method "{method}" must by one of {list(DRAWDOWN_EVALUATORS.keys())}'
183 | 
184 |     evaluator = DRAWDOWN_EVALUATORS[method]
185 |     return evaluator(series, series.cummax())
186 | 
187 | def calculate_max_drawdown(series: pd.Series, method: str='log') -> float:
188 |     """
189 |     Simply returns the max drawdown as a float
190 |     """
191 |     return calculate_drawdown_series(series, method).max()
192 | 
193 | def calculate_max_drawdown_with_metadata(series: pd.Series, 
194 |     method: str='log') -> Dict[str, Any]:
195 |     """
196 |     Calculates max_drawndown and stores metadata about when and where. Returns 
197 |     a dictionary of the form 
198 |         {
199 |             'max_drawdown': float,
200 |             'peak_date': pd.Timestamp,
201 |             'peak_price': float,
202 |             'trough_date': pd.Timestamp,
203 |             'trough_price': float,
204 |         }
205 |     """
206 | 
207 |     assert method in DRAWDOWN_EVALUATORS, \
208 |         f'Method "{method}" must by one of {list(DRAWDOWN_EVALUATORS.keys())}'
209 | 
210 |     evaluator = DRAWDOWN_EVALUATORS[method]
211 | 
212 |     max_drawdown = 0
213 |     local_peak_date = peak_date = trough_date = series.index[0]
214 |     local_peak_price = peak_price = trough_price = series.iloc[0]
215 | 
216 |     for date, price in series.iteritems():
217 | 
218 |         # Keep track of the rolling max
219 |         if price > local_peak_price:
220 |             local_peak_date = date
221 |             local_peak_price = price
222 | 
223 |         # Compute the drawdown
224 |         drawdown = evaluator(price, local_peak_price)
225 | 
226 |         # Store new max drawdown values
227 |         if drawdown > max_drawdown:
228 |             max_drawdown = drawdown
229 | 
230 |             peak_date = local_peak_date
231 |             peak_price = local_peak_price
232 | 
233 |             trough_date = date
234 |             trough_price = price
235 | 
236 |     return {
237 |         'max_drawdown': max_drawdown,
238 |         'peak_date': peak_date,
239 |         'peak_price': peak_price,
240 |         'trough_date': trough_date,
241 |         'trough_price': trough_price
242 |     }
243 | 
244 | def calculate_log_max_drawdown_ratio(series: pd.Series) -> float:
245 |     log_drawdown = calculate_max_drawdown(series, method='log')
246 |     log_return = np.log(series.iloc[-1]) - np.log(series.iloc[0])
247 |     return log_return - log_drawdown
248 | 
249 | def calculate_calmar_ratio(series: pd.Series, years_past: int=3) -> float:
250 |     """
251 |     Return the percent max drawdown ratio over the past three years, otherwise 
252 |     known as the Calmar Ratio
253 |     """
254 | 
255 |     # Filter series on past three years
256 |     last_date = series.index[-1]
257 |     three_years_ago = last_date - pd.Timedelta(days=years_past*365.25)
258 |     series = series[series.index > three_years_ago]
259 | 
260 |     # Compute annualized percent max drawdown ratio
261 |     percent_drawdown = calculate_max_drawdown(series, method='percent')
262 |     cagr = calculate_cagr(series)
263 |     return cagr / percent_drawdown
264 | 
265 | 
266 | 


--------------------------------------------------------------------------------
/src/pypm/ml_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlan/algorithmic-trading-with-python/ebe01087c7d9172db72bc3c9adc1eee5e882ac49/src/pypm/ml_model/__init__.py


--------------------------------------------------------------------------------
/src/pypm/ml_model/data_io.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from typing import Dict, Any, Callable, Tuple, List
 4 | 
 5 | from pypm import data_io
 6 | 
 7 | def load_data() -> Tuple[List[str], pd.DataFrame, pd.DataFrame]:
 8 | 	"""
 9 | 	Load the data as is will be used in the alternative data model
10 | 	"""
11 | 	symbols: List[str] = data_io.get_all_symbols()
12 | 	alt_data = data_io.load_alternative_data_matrix(symbols)
13 | 	eod_data = data_io.load_eod_matrix(symbols)
14 | 	eod_data = eod_data[eod_data.index >= alt_data.index.min()]
15 | 
16 | 	return symbols, eod_data, alt_data
17 | 


--------------------------------------------------------------------------------
/src/pypm/ml_model/events.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from pypm import filters
 4 | 
 5 | def calculate_events_for_revenue_series(series: pd.Series, 
 6 |     filter_threshold: float, lookback: int=365) -> pd.DatetimeIndex:
 7 |     """
 8 |     Calculate the symmetric cusum filter to generate events on YoY changes in 
 9 |     the log revenue series
10 |     """
11 |     series = np.log(series)
12 |     series = filters.calculate_non_uniform_lagged_change(series, lookback)
13 |     return filters.calculate_cusum_events(series, filter_threshold)
14 | 
15 | 
16 | def calculate_events(revenue_series: pd.Series):
17 |     return calculate_events_for_revenue_series(
18 |         revenue_series,
19 |         filter_threshold=5,
20 |         lookback=365,
21 |     )
22 |     
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/pypm/ml_model/features.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from pypm import indicators, filters, metrics
 5 | 
 6 | _calc_delta = filters.calculate_non_uniform_lagged_change
 7 | _calc_ma = indicators.calculate_simple_moving_average
 8 | _calc_log_return = metrics.calculate_log_return_series
 9 | 
10 | def _calc_rolling_vol(series, n):
11 |     return series.rolling(n).std() * np.sqrt(252 / n)
12 | 
13 | def calculate_features(price_series, revenue_series) -> pd.DataFrame:
14 |     """
15 |     Calculate any and all potentially useful features. Return as a dataframe.
16 |     """
17 | 
18 |     log_revenue = np.log(revenue_series)
19 |     log_prices = np.log(price_series)
20 | 
21 |     log_revenue_ma = _calc_ma(log_revenue, 10)
22 |     log_prices_ma = _calc_ma(log_prices, 10)
23 | 
24 |     log_returns = _calc_log_return(price_series)
25 | 
26 |     features_by_name = dict()
27 | 
28 |     for i in [7, 30, 90, 180, 360]:
29 | 
30 |         rev_feature = _calc_delta(log_revenue_ma, i)
31 |         price_feature = _calc_delta(log_prices_ma, i)
32 |         vol_feature = _calc_rolling_vol(log_returns, i)
33 | 
34 |         features_by_name.update({
35 |             f'{i}_day_revenue_delta': rev_feature,
36 |             f'{i}_day_return': price_feature,
37 |             f'{i}_day_vol': vol_feature,
38 |         })
39 | 
40 |     features_df = pd.DataFrame(features_by_name)    
41 |     return features_df
42 | 


--------------------------------------------------------------------------------
/src/pypm/ml_model/labels.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from typing import Tuple
 4 | 
 5 | from pypm import labels
 6 | 
 7 | def calculate_labels(price_series, event_index) -> Tuple[pd.Series, pd.Series]:
 8 |     """
 9 |     Calculate labels based on the triple barrier method. Return a series of 
10 |     event labels index by event start date, and return a series of event end 
11 |     dates indexed by event start date.
12 |     """
13 | 
14 |     # Remove event that don't have a proper chance to materialize
15 |     time_delta_days = 90
16 |     max_date = price_series.index.max()
17 |     cutoff = max_date - pd.Timedelta(days=time_delta_days)
18 |     event_index = event_index[event_index <= cutoff]
19 | 
20 |     # Use triple barrier method
21 |     event_labels, event_spans = labels.compute_triple_barrier_labels(
22 |         price_series,
23 |         event_index,
24 |         time_delta_days=time_delta_days,
25 |         # upper_delta=0.10,
26 |         # lower_delta=-0.10,
27 |         upper_z=1.8,
28 |         lower_z=-1.8,
29 |         lower_label=-1,
30 |     )
31 | 
32 |     return event_labels, event_spans
33 | 


--------------------------------------------------------------------------------
/src/pypm/ml_model/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from sklearn.ensemble import RandomForestClassifier
  5 | from sklearn.model_selection import RepeatedKFold
  6 | from sklearn.base import clone
  7 | 
  8 | from joblib import Parallel, delayed
  9 | 
 10 | # Number of jobs to run in parallel
 11 | # Set to number of computer cores to use
 12 | N_JOBS = 10
 13 | N_SPLITS = 5
 14 | N_REPEATS = 4
 15 | 
 16 | def _fit_and_score(classifier, X, y, w, train_index, test_index, i) -> float:
 17 |     """
 18 |     The function used by joblib to split, train, and score cross-validations
 19 |     """
 20 |     X_train = X.iloc[train_index]
 21 |     X_test = X.iloc[test_index]
 22 | 
 23 |     y_train = y.iloc[train_index]
 24 |     y_test = y.iloc[test_index]
 25 | 
 26 |     w_train = w.iloc[train_index]
 27 |     w_test = w.iloc[test_index]
 28 | 
 29 |     classifier.fit(X_train, y_train, w_train)
 30 |     score = classifier.score(X_test, y_test, w_test)
 31 | 
 32 |     print(f'Finished {i} ({100*score:.1f}%)')
 33 | 
 34 |     return score
 35 | 
 36 | def repeated_k_fold(classifier, X, y, w) -> np.ndarray:
 37 |     """
 38 |     Perform repeated k-fold cross-validation on a classifier. Spread fitting 
 39 |     job over multiple computer cores.
 40 |     """
 41 |     n_jobs = N_JOBS
 42 | 
 43 |     n_splits = N_SPLITS
 44 |     n_repeats = N_REPEATS
 45 | 
 46 |     total_fits =  n_splits * n_repeats
 47 | 
 48 |     _k_fold = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats)
 49 | 
 50 |     print(f'Fitting {total_fits} models {n_jobs} at a time ...')
 51 |     print()
 52 | 
 53 |     parallel = Parallel(n_jobs=n_jobs)
 54 |     scores = parallel(
 55 |         delayed(_fit_and_score)(
 56 |             clone(classifier), X, y, w, train_index, test_index, i
 57 |         ) for i, (train_index, test_index) in enumerate(_k_fold.split(X))
 58 |     )
 59 | 
 60 |     return np.array(scores)
 61 | 
 62 | 
 63 | def calculate_model(df: pd.DataFrame) -> RandomForestClassifier:
 64 |     """
 65 |     Given a dataframe with a y column, weights column, and predictor columns 
 66 |     with arbitrary names, cross-validated and fit a classifier. Print 
 67 |     diagnostics.
 68 |     """
 69 |     classifier = RandomForestClassifier(n_estimators=100)
 70 | 
 71 |     # Separate data
 72 |     predictor_columns = [
 73 |         c for c in df.columns.values if not c in ('y', 'weights')
 74 |     ]
 75 |     X = df[predictor_columns]
 76 |     y = df['y']
 77 |     w = df['weights']
 78 | 
 79 |     # Fit cross-validation
 80 |     scores = repeated_k_fold(classifier, X, y, w)
 81 | 
 82 |     # Get a full dataset fit for importance scores
 83 |     classifier.fit(X, y, w)
 84 | 
 85 |     # Compute diagnostics
 86 |     _imp = classifier.feature_importances_
 87 |     importance_series = pd.Series(_imp, index=predictor_columns)
 88 |     importance_series = importance_series.sort_values(ascending=False)
 89 | 
 90 |     # baseline accuracy is the best value achievable with a constant guess
 91 |     baseline = np.max(y.value_counts() / y.shape[0])
 92 | 
 93 |     # Compute a rough confidence interval for the improvement
 94 |     mean_score = scores.mean()
 95 |     std_score = scores.std()
 96 | 
 97 |     upper_bound = mean_score + 2 * std_score
 98 |     lower_bound = mean_score - 2 * std_score
 99 |     ibounds = (lower_bound - baseline, upper_bound - baseline)
100 | 
101 |     print()
102 |     print('Feature importances')
103 |     for col, imp in importance_series.items():
104 |         print(f'{col:24} {imp:>.3f}')
105 |     print()
106 | 
107 |     print('Cross validation scores')
108 |     print(np.round(100 * scores, 1))
109 |     print()
110 | 
111 |     print(f'Baseline accuracy {100*baseline:.1f}%')
112 |     print(f'OOS accuracy {100*mean_score:.1f}% +/- {200 * scores.std():.1f}%')
113 |     print(f'Improvement {100*(ibounds[0]):.1f} to {100*(ibounds[1]):.1f}%')
114 |     print()
115 | 
116 |     return classifier
117 | 


--------------------------------------------------------------------------------
/src/pypm/ml_model/signals.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from pypm.ml_model.events import calculate_events
 5 | from pypm.ml_model.features import calculate_features
 6 | 
 7 | from typing import List
 8 | 
 9 | def calculate_signals(classifier, symbols: List[str], eod_data: pd.DataFrame,
10 | 	alt_data: pd.DataFrame) -> pd.DataFrame:
11 | 	"""
12 | 	Calculate signal dataframes for use in the simulator
13 | 	"""
14 | 
15 | 	# For storing the signals
16 | 	signal_by_symbol = dict()
17 | 
18 | 	# Build events and features for each symbol
19 | 	for symbol in symbols:
20 | 
21 | 		# Get revenue and price series
22 | 		revenue_series = alt_data[symbol].dropna()
23 | 		price_series = eod_data[symbol].dropna()
24 | 
25 | 		# Build output template
26 | 		signal_series = pd.Series(0, index=price_series.index)
27 | 
28 | 		# Get events and features
29 | 		event_index = calculate_events(revenue_series)
30 | 		features_df = calculate_features(price_series, revenue_series)
31 | 
32 | 		features_on_events = features_df.loc[event_index]
33 | 		features_on_events.dropna(inplace=True)
34 | 		event_index = features_on_events.index
35 | 
36 | 		if features_on_events.empty:
37 | 			predictions = pd.Series()
38 | 		else:
39 | 			_predictions = classifier.predict(features_on_events)
40 | 			predictions = pd.Series(_predictions, index=event_index)
41 | 		
42 | 		# Add into output template
43 | 		signal_series = signal_series.add(predictions, fill_value=0)
44 | 
45 | 		signal_by_symbol[symbol] = signal_series
46 | 
47 | 	signal = pd.DataFrame(signal_by_symbol)
48 | 	signal.sort_index(inplace=True)
49 | 	signal.dropna(inplace=True)
50 | 
51 | 	return signal
52 | 
53 | 


--------------------------------------------------------------------------------
/src/pypm/ml_model/weights.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | 
4 | from pypm.weights import calculate_uniqueness
5 | 
6 | def calculate_weights(event_spans: pd.Series, 
7 |     price_index: pd.Series) -> pd.Series:
8 |     return calculate_uniqueness(event_spans, price_index)
9 | 


--------------------------------------------------------------------------------
/src/pypm/optimization.py:
--------------------------------------------------------------------------------
  1 | from pypm import metrics, signals, data_io, simulation
  2 | 
  3 | import pandas as pd
  4 | import numpy as np
  5 | from collections import defaultdict, OrderedDict
  6 | from itertools import product
  7 | from timeit import default_timer
  8 | from typing import Dict, Tuple, List, Callable, Iterable, Any, NewType, Mapping
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | from matplotlib import cm 
 12 | from mpl_toolkits.mplot3d import Axes3D 
 13 | 
 14 | # Performance data and parameter inputs are dictionaries
 15 | Parameters = NewType('Parameters', Dict[str, float])
 16 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float]
 17 | 
 18 | # Simulation function must take parameters as keyword arguments pointing to 
 19 | # iterables and return a performance metric dictionary
 20 | SimKwargs = NewType('Kwargs', Mapping[str, Iterable[Any]])
 21 | SimFunction = NewType('SimFunction', Callable[[SimKwargs], Performance])
 22 | 
 23 | class OptimizationResult(object):
 24 |     """Simple container class for optimization data"""
 25 | 
 26 |     def __init__(self, parameters: Parameters, performance: Performance):
 27 | 
 28 |         # Make sure no collisions between performance metrics and params
 29 |         assert len(parameters.keys() & performance.keys()) == 0, \
 30 |             'parameter name matches performance metric name'
 31 | 
 32 |         self.parameters = parameters
 33 |         self.performance = performance
 34 | 
 35 |     @property
 36 |     def as_dict(self) -> Dict[str, float]:
 37 |         """Combines the dictionaries after we are sure of no collisions"""
 38 |         return {**self.parameters, **self.performance}
 39 |     
 40 | 
 41 | class GridSearchOptimizer(object):
 42 |     """
 43 |     A generic grid search optimizer that requires only a simulation function and
 44 |     a series of parameter ranges. Provides timing, summary, and plotting 
 45 |     utilities with return data.
 46 |     """
 47 | 
 48 |     def __init__(self, simulation_function: SimFunction):
 49 | 
 50 |         self.simulate = simulation_function
 51 |         self._results_list: List[OptimizationResult] = list()
 52 |         self._results_df = pd.DataFrame()
 53 | 
 54 |         self._optimization_finished = False
 55 | 
 56 |     def add_results(self, parameters: Parameters, performance: Performance):
 57 |         _results = OptimizationResult(parameters, performance)
 58 |         self._results_list.append(_results)
 59 | 
 60 |     def optimize(self, **optimization_ranges: SimKwargs):
 61 | 
 62 |         assert optimization_ranges, 'Must provide non-empty parameters.'
 63 | 
 64 |         # Convert all iterables to lists
 65 |         param_ranges = {k: list(v) for k, v in optimization_ranges.items()}
 66 |         self.param_names = param_names = list(param_ranges.keys())
 67 | 
 68 |         # Count total simulation
 69 |         n = total_simulations = np.prod([len(r) for r in param_ranges.values()])
 70 | 
 71 |         total_time_elapsed = 0
 72 | 
 73 |         print(f'Starting simulation ...')
 74 |         print(f'Simulating 1 / {n} ...', end='\r')
 75 |         for i, params in enumerate(product(*param_ranges.values())):
 76 |             if i > 0:
 77 |                 _avg = avg_time = total_time_elapsed / i
 78 |                 _rem = remaining_time = (n - (i + 1)) * avg_time
 79 |                 s =  f'Simulating {i+1} / {n} ... '
 80 |                 s += f'{_rem:.0f}s remaining ({_avg:.1f}s avg)'
 81 |                 s += ' '*8
 82 |                 print(s, end='\r')
 83 | 
 84 |             timer_start = default_timer()
 85 | 
 86 |             parameters = {n: param for n, param in zip(param_names, params)}
 87 |             results = self.simulate(**parameters)
 88 |             self.add_results(parameters, results)
 89 | 
 90 |             timer_end = default_timer()
 91 |             total_time_elapsed += timer_end - timer_start 
 92 | 
 93 |         print(f'Simulated {total_simulations} / {total_simulations} ...')
 94 |         print(f'Elapsed time: {total_time_elapsed:.0f}s')
 95 |         print(f'Done.')
 96 | 
 97 |         self._optimization_finished = True
 98 | 
 99 |     def _assert_finished(self):
100 |         assert self._optimization_finished, \
101 |             'Run self.optimize before accessing this method.'
102 | 
103 |     @property
104 |     def results(self) -> pd.DataFrame:
105 |         self._assert_finished()
106 |         if self._results_df.empty:
107 | 
108 |             _results_list = self._results_list
109 |             self._results_df = pd.DataFrame([r.as_dict for r in _results_list])
110 | 
111 |             _columns = set(list(self._results_df.columns.values))
112 |             _params = set(self.param_names)
113 |             self.metric_names = list(_columns - _params)
114 | 
115 |         return self._results_df
116 | 
117 |     def print_summary(self):
118 |         df = self.results
119 |         metric_names = self.metric_names
120 | 
121 |         print('Summary statistics')
122 |         print(df[metric_names].describe().T)
123 | 
124 |     def get_best(self, metric_name: str) -> pd.DataFrame:
125 |         """
126 |         Sort the results by a specific performance metric
127 |         """
128 |         self._assert_finished()
129 | 
130 |         results = self.results
131 |         param_names = self.param_names
132 |         metric_names = self.metric_names
133 | 
134 |         assert metric_name in metric_names, 'Not a performance metric'
135 |         partial_df = self.results[param_names+[metric_name]]
136 | 
137 |         return partial_df.sort_values(metric_name, ascending=False)
138 | 
139 |     def plot_1d_hist(self, x, show=True):
140 |         self.results.hist(x)
141 |         if show:
142 |             plt.show()
143 | 
144 |     def plot_2d_line(self, x, y, show=True, **filter_kwargs):
145 |         _results = self.results
146 |         for k, v in filter_kwargs.items():
147 |             _results = _results[getattr(_results, k) == v]
148 | 
149 |         ax = _results.plot(x, y)
150 |         if filter_kwargs:
151 |             k_str = ', '.join([f'{k}={v}' for k,v in filter_kwargs.items()])
152 |             ax.legend([f'{x} ({k_str})'])
153 | 
154 |         if show:
155 |             plt.show()
156 | 
157 |     def plot_2d_violin(self, x, y, show=True):
158 |         """
159 |         Group y along x then plot violin charts
160 |         """
161 |         x_values = self.results[x].unique()
162 |         x_values.sort()
163 | 
164 |         y_by_x = OrderedDict([(v, []) for v in x_values])
165 |         for _, row in self.results.iterrows():
166 |             y_by_x[row[x]].append(row[y])
167 | 
168 |         fig, ax = plt.subplots()
169 | 
170 |         ax.violinplot(dataset=list(y_by_x.values()), showmedians=True)
171 |         ax.set_xlabel(x)
172 |         ax.set_ylabel(y)
173 |         ax.set_xticks(range(0, len(y_by_x)+1))
174 |         ax.set_xticklabels([''] + list(y_by_x.keys()))
175 |         if show:
176 |             plt.show()
177 | 
178 |     def plot_3d_mesh(self, x, y, z, show=True, **filter_kwargs):
179 |         """
180 |         Plot interactive 3d mesh. z axis should typically be performance metric
181 |         """
182 |         _results = self.results
183 |         fig = plt.figure()
184 |         ax = Axes3D(fig)
185 | 
186 |         for k, v in filter_kwargs.items():
187 |             _results = _results[getattr(_results, k) == v]
188 | 
189 |         X, Y, Z = [getattr(_results, attr) for attr in (x, y, z)]
190 |         ax.plot_trisurf(X, Y, Z, cmap=cm.jet, linewidth=0.2)
191 |         ax.set_xlabel(x)
192 |         ax.set_ylabel(y)
193 |         ax.set_zlabel(z)
194 |         if show:
195 |             plt.show()
196 | 
197 |     def plot(self, *attrs: Tuple[str], show=True, 
198 |         **filter_kwargs: Dict[str, Any]):
199 |         """
200 |         Attempt to intelligently dispatch plotting functions based on the number
201 |         and type of attributes. Last argument should typically be the 
202 |         performance metric.
203 |         """
204 |         self._assert_finished()
205 |         param_names = self.param_names
206 |         metric_names = self.metric_names
207 | 
208 |         if len(attrs) == 3:
209 |             assert attrs[0] in param_names and attrs[1] in param_names, \
210 |                 'First two positional arguments must be parameter names.'
211 | 
212 |             assert attrs[2] in metric_names, \
213 |                 'Last positional argument must be a metric name.'
214 | 
215 |             assert len(filter_kwargs) + 2 == len(param_names), \
216 |                 'Must filter remaining parameters. e.g. p_three=some_number.'
217 | 
218 |             self.plot_3d_mesh(*attrs, show=show, **filter_kwargs)
219 | 
220 |         elif len(attrs) == 2:
221 |             if len(param_names) == 1 or filter_kwargs:
222 |                 self.plot_2d_line(*attrs, show=show, **filter_kwargs)
223 | 
224 |             elif len(param_names) > 1:
225 |                 self.plot_2d_violin(*attrs, show=show)
226 | 
227 |         elif len(attrs) == 1:
228 |             self.plot_1d_hist(*attrs, show=show)
229 | 
230 |         else:
231 |             raise ValueError('Must pass between one and three column names.')
232 | 
233 | 
234 | 
235 | 
236 | 


--------------------------------------------------------------------------------
/src/pypm/portfolio.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | from typing import Tuple, List, Dict, Callable, NewType, Any
  5 | from collections import OrderedDict, defaultdict
  6 | 
  7 | from pypm import metrics, signals, data_io
  8 | 
  9 | Symbol = NewType('Symbol', str)
 10 | Dollars = NewType('Dollars', float)
 11 | 
 12 | DATE_FORMAT_STR = '%a %b %d, %Y'
 13 | 
 14 | 
 15 | def _pdate(date: pd.Timestamp):
 16 |     """Pretty-print a datetime with just the date"""
 17 |     return date.strftime(DATE_FORMAT_STR)
 18 | 
 19 | 
 20 | class Position(object):
 21 |     """
 22 |     A simple object to hold and manipulate data related to long stock trades.
 23 |     Allows a single buy and sell operation on an asset for a constant number of 
 24 |     shares.
 25 |     The __init__ method is equivalent to a buy operation. The exit
 26 |     method is a sell operation.
 27 |     """
 28 | 
 29 |     def __init__(self, symbol: Symbol, entry_date: pd.Timestamp,
 30 |                  entry_price: Dollars, shares: int):
 31 |         """
 32 |         Equivalent to buying a certain number of shares of the asset
 33 |         """
 34 | 
 35 |         # Recorded on initialization
 36 |         self.entry_date = entry_date
 37 | 
 38 |         assert entry_price > 0, 'Cannot buy asset with zero or negative price.'
 39 |         self.entry_price = entry_price
 40 | 
 41 |         assert shares > 0, 'Cannot buy zero or negative shares.'
 42 |         self.shares = shares
 43 | 
 44 |         self.symbol = symbol
 45 | 
 46 |         # Recorded on position exit
 47 |         self.exit_date: pd.Timestamp = None
 48 |         self.exit_price: Dollars = None
 49 | 
 50 |         # For easily getting current portfolio value
 51 |         self.last_date: pd.Timestamp = None
 52 |         self.last_price: Dollars = None
 53 | 
 54 |         # Updated intermediately
 55 |         self._dict_series: Dict[pd.Timestamp, Dollars] = OrderedDict()
 56 |         self.record_price_update(entry_date, entry_price)
 57 | 
 58 |         # Cache control for pd.Series representation
 59 |         self._price_series: pd.Series = None
 60 |         self._needs_update_pd_series: bool = True
 61 | 
 62 |     def exit(self, exit_date, exit_price):
 63 |         """
 64 |         Equivalent to selling a stock holding
 65 |         """
 66 |         assert self.entry_date != exit_date, 'Churned a position same-day.'
 67 |         assert not self.exit_date, 'Position already closed.'
 68 |         self.record_price_update(exit_date, exit_price)
 69 |         self.exit_date = exit_date
 70 |         self.exit_price = exit_price
 71 | 
 72 |     def record_price_update(self, date, price):
 73 |         """
 74 |         Stateless function to record intermediate prices of existing positions
 75 |         """
 76 |         self.last_date = date
 77 |         self.last_price = price
 78 |         self._dict_series[date] = price
 79 | 
 80 |         # Invalidate cache on self.price_series
 81 |         self._needs_update_pd_series = True
 82 | 
 83 |     @property
 84 |     def price_series(self) -> pd.Series:
 85 |         """
 86 |         Returns cached readonly pd.Series 
 87 |         """
 88 |         if self._needs_update_pd_series or self._price_series is None:
 89 |             self._price_series = pd.Series(self._dict_series)
 90 |             self._needs_update_pd_series = False
 91 |         return self._price_series
 92 | 
 93 |     @property
 94 |     def last_value(self) -> Dollars:
 95 |         return self.last_price * self.shares
 96 | 
 97 |     @property
 98 |     def is_active(self) -> bool:
 99 |         return self.exit_date is None
100 | 
101 |     @property
102 |     def is_closed(self) -> bool:
103 |         return not self.is_active
104 | 
105 |     @property
106 |     def value_series(self) -> pd.Series:
107 |         """
108 |         Returns the value of the position over time. Ignores self.exit_date.
109 |         Used in calculating the equity curve.
110 |         """
111 |         assert self.is_closed, 'Position must be closed to access this property'
112 |         return self.shares * self.price_series[:-1]
113 | 
114 |     @property
115 |     def percent_return(self) -> float:
116 |         return (self.exit_price / self.entry_price) - 1
117 | 
118 |     @property
119 |     def entry_value(self) -> Dollars:
120 |         return self.shares * self.entry_price
121 | 
122 |     @property
123 |     def exit_value(self) -> Dollars:
124 |         return self.shares * self.exit_price
125 | 
126 |     @property
127 |     def change_in_value(self) -> Dollars:
128 |         return self.exit_value - self.entry_value
129 | 
130 |     @property
131 |     def trade_length(self):
132 |         return len(self._dict_series) - 1
133 | 
134 |     def print_position_summary(self):
135 |         _entry_date = _pdate(self.entry_date)
136 |         _exit_date = _pdate(self.exit_date)
137 |         _days = self.trade_length
138 | 
139 |         _entry_price = round(self.entry_price, 2)
140 |         _exit_price = round(self.exit_price, 2)
141 | 
142 |         _entry_value = round(self.entry_value, 2)
143 |         _exit_value = round(self.exit_value, 2)
144 | 
145 |         _return = round(100 * self.percent_return, 1)
146 |         _diff = round(self.change_in_value, 2)
147 | 
148 |         print(f'{self.symbol:<5}     Trade summary')
149 |         print(f'Date:     {_entry_date} -> {_exit_date} [{_days} days]')
150 |         print(f'Price:    ${_entry_price} -> ${_exit_price} [{_return}%]')
151 |         print(f'Value:    ${_entry_value} -> ${_exit_value} [${_diff}]')
152 |         print()
153 | 
154 |     def __hash__(self):
155 |         """
156 |         A unique position will be defined by a unique combination of an 
157 |         entry_date and symbol, in accordance with our constraints regarding 
158 |         duplicate, variable, and compound positions
159 |         """
160 |         return hash((self.entry_date, self.symbol))
161 | 
162 | 
163 | class PortfolioHistory(object):
164 |     """
165 |     Holds Position objects and keeps track of portfolio variables.
166 |     Produces summary statistics.
167 |     """
168 | 
169 |     def __init__(self):
170 |         # Keep track of positions, recorded in this list after close
171 |         self.position_history: List[Position] = []
172 |         self._logged_positions: Set[Position] = set()
173 | 
174 |         # Keep track of the last seen date
175 |         self.last_date: pd.Timestamp = pd.Timestamp.min
176 | 
177 |         # Readonly fields
178 |         self._cash_history: Dict[pd.Timestamp, Dollars] = dict()
179 |         self._simulation_finished = False
180 |         self._spy: pd.DataFrame = pd.DataFrame()
181 |         self._spy_log_returns: pd.Series = pd.Series()
182 | 
183 |     def add_to_history(self, position: Position):
184 |         _log = self._logged_positions
185 |         assert not position in _log, 'Recorded the same position twice.'
186 |         assert position.is_closed, 'Position is not closed.'
187 |         self._logged_positions.add(position)
188 |         self.position_history.append(position)
189 |         self.last_date = max(self.last_date, position.last_date)
190 | 
191 |     def record_cash(self, date, cash):
192 |         self._cash_history[date] = cash
193 |         self.last_date = max(self.last_date, date)
194 | 
195 |     @staticmethod
196 |     def _as_oseries(d: Dict[pd.Timestamp, Any]) -> pd.Series:
197 |         return pd.Series(d).sort_index()
198 | 
199 |     def _compute_cash_series(self):
200 |         self._cash_series = self._as_oseries(self._cash_history)
201 | 
202 |     @property
203 |     def cash_series(self) -> pd.Series:
204 |         return self._cash_series
205 | 
206 |     def _compute_portfolio_value_series(self):
207 |         value_by_date = defaultdict(float)
208 |         last_date = self.last_date
209 | 
210 |         # Add up value of assets
211 |         for position in self.position_history:
212 |             for date, value in position.value_series.items():
213 |                 value_by_date[date] += value
214 | 
215 |         # Make sure all dates in cash_series are present
216 |         for date in self.cash_series.index:
217 |             value_by_date[date] += 0
218 | 
219 |         self._portfolio_value_series = self._as_oseries(value_by_date)
220 | 
221 |     @property
222 |     def portfolio_value_series(self):
223 |         return self._portfolio_value_series
224 | 
225 |     def _compute_equity_series(self):
226 |         c_series = self.cash_series
227 |         p_series = self.portfolio_value_series
228 |         assert all(c_series.index == p_series.index), \
229 |             'portfolio_series has dates not in cash_series'
230 |         self._equity_series = c_series + p_series
231 | 
232 |     @property
233 |     def equity_series(self):
234 |         return self._equity_series
235 | 
236 |     def _compute_log_return_series(self):
237 |         self._log_return_series = \
238 |             metrics.calculate_log_return_series(self.equity_series)
239 | 
240 |     @property
241 |     def log_return_series(self):
242 |         return self._log_return_series
243 | 
244 |     def _assert_finished(self):
245 |         assert self._simulation_finished, \
246 |             'Simulation must be finished by running self.finish() in order ' + \
247 |             'to access this method or property.'
248 | 
249 |     def finish(self):
250 |         """
251 |         Notate that the simulation is finished and compute readonly values
252 |         """
253 |         self._simulation_finished = True
254 |         self._compute_cash_series()
255 |         self._compute_portfolio_value_series()
256 |         self._compute_equity_series()
257 |         self._compute_log_return_series()
258 |         self._assert_finished()
259 | 
260 |     def compute_portfolio_size_series(self) -> pd.Series:
261 |         size_by_date = defaultdict(int)
262 |         for position in self.position_history:
263 |             for date in position.value_series.index:
264 |                 size_by_date[date] += 1
265 |         return self._as_oseries(size_by_date)
266 | 
267 |     @property
268 |     def spy(self):
269 |         if self._spy.empty:
270 |             first_date = self.cash_series.index[0]
271 |             _spy = data_io.load_spy_data()
272 |             self._spy = _spy[_spy.index > first_date]
273 |         return self._spy
274 | 
275 |     @property
276 |     def spy_log_returns(self):
277 |         if self._spy_log_returns.empty:
278 |             close = self.spy['close']
279 |             self._spy_log_returns = metrics.calculate_log_return_series(close)
280 |         return self._spy_log_returns
281 | 
282 |     @property
283 |     def percent_return(self):
284 |         return metrics.calculate_percent_return(self.equity_series)
285 | 
286 |     @property
287 |     def spy_percent_return(self):
288 |         return metrics.calculate_percent_return(self.spy['close'])
289 | 
290 |     @property
291 |     def cagr(self):
292 |         return metrics.calculate_cagr(self.equity_series)
293 | 
294 |     @property
295 |     def volatility(self):
296 |         return metrics.calculate_annualized_volatility(self.log_return_series)
297 | 
298 |     @property
299 |     def sharpe_ratio(self):
300 |         return metrics.calculate_sharpe_ratio(self.equity_series)
301 | 
302 |     @property
303 |     def spy_cagr(self):
304 |         return metrics.calculate_cagr(self.spy['close'])
305 | 
306 |     @property
307 |     def excess_cagr(self):
308 |         return self.cagr - self.spy_cagr
309 | 
310 |     @property
311 |     def jensens_alpha(self):
312 |         return metrics.calculate_jensens_alpha(
313 |             self.log_return_series,
314 |             self.spy_log_returns,
315 |         )
316 | 
317 |     @property
318 |     def dollar_max_drawdown(self):
319 |         return metrics.calculate_max_drawdown(self.equity_series, 'dollar')
320 | 
321 |     @property
322 |     def percent_max_drawdown(self):
323 |         return metrics.calculate_max_drawdown(self.equity_series, 'percent')
324 | 
325 |     @property
326 |     def log_max_drawdown_ratio(self):
327 |         return metrics.calculate_log_max_drawdown_ratio(self.equity_series)
328 | 
329 |     @property
330 |     def number_of_trades(self):
331 |         return len(self.position_history)
332 | 
333 |     @property
334 |     def average_active_trades(self):
335 |         return self.compute_portfolio_size_series().mean()
336 | 
337 |     @property
338 |     def final_cash(self):
339 |         self._assert_finished()
340 |         return self.cash_series[-1]
341 | 
342 |     @property
343 |     def final_equity(self):
344 |         self._assert_finished()
345 |         return self.equity_series[-1]
346 | 
347 |     _PERFORMANCE_METRICS_PROPS = [
348 |         'percent_return',
349 |         'spy_percent_return',
350 |         'cagr',
351 |         'volatility',
352 |         'sharpe_ratio',
353 |         'spy_cagr',
354 |         'excess_cagr',
355 |         'jensens_alpha',
356 |         'dollar_max_drawdown',
357 |         'percent_max_drawdown',
358 |         'log_max_drawdown_ratio',
359 |         'number_of_trades',
360 |         'average_active_trades',
361 |         'final_cash',
362 |         'final_equity',
363 |     ]
364 | 
365 |     PerformancePayload = NewType('PerformancePayload', Dict[str, float])
366 | 
367 |     def get_performance_metric_data(self) -> PerformancePayload:
368 |         props = self._PERFORMANCE_METRICS_PROPS
369 |         return {prop: getattr(self, prop) for prop in props}
370 | 
371 |     def print_position_summaries(self):
372 |         for position in self.position_history:
373 |             position.print_position_summary()
374 | 
375 |     def print_summary(self):
376 |         self._assert_finished()
377 |         s = f'Equity: ${self.final_equity:.2f}\n' \
378 |             f'Percent Return: {100 * self.percent_return:.2f}%\n' \
379 |             f'S&P 500 Return: {100 * self.spy_percent_return:.2f}%\n\n' \
380 |             f'Number of trades: {self.number_of_trades}\n' \
381 |             f'Average active trades: {self.average_active_trades:.2f}\n\n' \
382 |             f'CAGR: {100 * self.cagr:.2f}%\n' \
383 |             f'S&P 500 CAGR: {100 * self.spy_cagr:.2f}%\n' \
384 |             f'Excess CAGR: {100 * self.excess_cagr:.2f}%\n\n' \
385 |             f'Annualized Volatility: {100 * self.volatility:.2f}%\n' \
386 |             f'Sharpe Ratio: {self.sharpe_ratio:.2f}\n' \
387 |             f'Jensen\'s Alpha: {self.jensens_alpha:.6f}\n\n' \
388 |             f'Dollar Max Drawdown: ${self.dollar_max_drawdown:.2f}\n' \
389 |             f'Percent Max Drawdown: {100 * self.percent_max_drawdown:.2f}%\n' \
390 |             f'Log Max Drawdown Ratio: {self.log_max_drawdown_ratio:.2f}\n'
391 | 
392 |         print(s)
393 | 
394 |     def plot(self, show=True) -> plt.Figure:
395 |         """
396 |         Plots equity, cash and portfolio value curves.
397 |         """
398 |         self._assert_finished()
399 | 
400 |         figure, axes = plt.subplots(nrows=3, ncols=1)
401 |         figure.tight_layout(pad=3.0)
402 |         axes[0].plot(self.equity_series)
403 |         axes[0].set_title('Equity')
404 |         axes[0].grid()
405 | 
406 |         axes[1].plot(self.cash_series)
407 |         axes[1].set_title('Cash')
408 |         axes[1].grid()
409 | 
410 |         axes[2].plot(self.portfolio_value_series)
411 |         axes[2].set_title('Portfolio Value')
412 |         axes[2].grid()
413 | 
414 |         if show:
415 |             plt.show()
416 | 
417 |         return figure
418 | 
419 |     def plot_benchmark_comparison(self, show=True) -> plt.Figure:
420 |         """
421 |         Plot comparable investment in the S&P 500.
422 |         """
423 |         self._assert_finished()
424 | 
425 |         equity_curve = self.equity_series
426 |         ax = equity_curve.plot()
427 | 
428 |         spy_closes = self.spy['close']
429 |         initial_cash = self.cash_series[0]
430 |         initial_spy = spy_closes[0]
431 | 
432 |         scaled_spy = spy_closes * (initial_cash / initial_spy)
433 |         scaled_spy.plot()
434 | 
435 |         baseline = pd.Series(initial_cash, index=equity_curve.index)
436 |         ax = baseline.plot(color='black')
437 |         ax.grid()
438 | 
439 |         ax.legend(['Equity curve', 'S&P 500 portfolio'])
440 | 
441 |         if show:
442 |             plt.show()
443 | 


--------------------------------------------------------------------------------
/src/pypm/signals.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from pypm.indicators import calculate_macd_oscillator, \
 4 |     calculate_bollinger_bands
 5 | from pypm.data_io import load_eod_data
 6 | 
 7 | 
 8 | def create_macd_signal(series: pd.Series, n1: int=5, n2: int=34) -> pd.Series:
 9 |     """
10 |     Create a momentum-based signal based on the MACD crossover principle. 
11 |     Generate a buy signal when the MACD crosses above zero, and a sell signal when
12 |     it crosses below zero.
13 |     """
14 | 
15 |     # Calculate the macd and get the signs of the values.
16 |     macd = calculate_macd_oscillator(series, n1, n2)
17 |     macd_sign = np.sign(macd)
18 | 
19 |     # Create a copy shifted by some amount.
20 |     macd_shifted_sign = macd_sign.shift(1, axis=0)
21 | 
22 |     # Multiply the sign by the boolean. This will have the effect of casting
23 |     # the boolean to an integer (either 0 or 1) and then multiply by the sign
24 |     # (either -1, 0 or 1).
25 |     return macd_sign * (macd_sign != macd_shifted_sign)
26 | 
27 | 
28 | def create_bollinger_band_signal(series: pd.Series, n: int=20) -> pd.Series:
29 |     """
30 |     Create a reversal-based signal based on the upper and lower bands of the 
31 |     Bollinger bands. Generate a buy signal when the price is below the lower 
32 |     band, and a sell signal when the price is above the upper band.
33 |     """
34 |     bollinger_bands = calculate_bollinger_bands(series, n)
35 |     sell = series > bollinger_bands['upper']
36 |     buy = series < bollinger_bands['lower']
37 |     return (1*buy - 1*sell)
38 | 
39 | 


--------------------------------------------------------------------------------
/src/pypm/simulation.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple, List, Dict, Callable, NewType, Any, Iterable
  2 | 
  3 | import pandas as pd
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | from pypm import metrics, signals, data_io
  7 | from pypm.portfolio import PortfolioHistory, Position, Symbol, Dollars
  8 | 
  9 | from collections import OrderedDict, defaultdict
 10 | 
 11 | class SimpleSimulator(object):
 12 |     """
 13 |     A simple trading simulator to work with the PortfolioHistory class
 14 |     """
 15 | 
 16 |     def __init__(self, initial_cash: float=10000, max_active_positions: int=5,
 17 |         percent_slippage: float=0.0005, trade_fee: float=1):
 18 | 
 19 |         ### Set simulation parameters
 20 | 
 21 |         # Initial cash in porfolio
 22 |         # self.cash will fluctuate
 23 |         self.initial_cash = self.cash = initial_cash
 24 | 
 25 |         # Maximum number of different assets that can be help simultaneously
 26 |         self.max_active_positions: int = max_active_positions
 27 | 
 28 |         # The percentage difference between closing price and fill price for the
 29 |         # position, to simulate adverse effects of market orders
 30 |         self.percent_slippage = percent_slippage
 31 | 
 32 |         # The fixed fee in order to open a position in dollar terms
 33 |         self.trade_fee = trade_fee
 34 | 
 35 |         # Keep track of live trades
 36 |         self.active_positions_by_symbol: Dict[Symbol, Position] = OrderedDict()
 37 | 
 38 |         # Keep track of portfolio history like cash, equity, and positions
 39 |         self.portfolio_history = PortfolioHistory()
 40 | 
 41 |     @property
 42 |     def active_positions_count(self):
 43 |         return len(self.active_positions_by_symbol)
 44 | 
 45 |     @property
 46 |     def free_position_slots(self):
 47 |         return self.max_active_positions - self.active_positions_count
 48 | 
 49 |     @property
 50 |     def active_symbols(self) -> List[Symbol]:
 51 |         return list(self.active_positions_by_symbol.keys())
 52 | 
 53 |     def print_initial_parameters(self):
 54 |         s = f'Initial Cash: ${self.initial_cash} \n' \
 55 |             f'Maximum Number of Assets: {self.max_active_positions}\n'
 56 |         print(s)
 57 |         return s
 58 | 
 59 |     @staticmethod
 60 |     def make_tuple_lookup(columns) -> Callable[[str, str], int]:
 61 |         """
 62 |         Map a multi-index dataframe to an itertuples-like object.
 63 | 
 64 |         The index of the dateframe is always the zero-th element.
 65 |         """
 66 | 
 67 |         # col is a hierarchical column index represented by a tuple of strings
 68 |         tuple_lookup: Dict[Tuple[str, str], int] = { 
 69 |             col: i + 1 for i, col in enumerate(columns) 
 70 |         }
 71 | 
 72 |         return lambda symbol, metric: tuple_lookup[(symbol, metric)]
 73 | 
 74 |     @staticmethod
 75 |     def make_all_valid_lookup(_idx: Callable):
 76 |         """
 77 |         Return a function that checks for valid data, given a lookup function
 78 |         """
 79 |         return lambda row, symbol: (
 80 |             not pd.isna(row[_idx(symbol, 'pref')]) and \
 81 |             not pd.isna(row[_idx(symbol, 'signal')]) and \
 82 |             not pd.isna(row[_idx(symbol, 'price')])
 83 |         )
 84 | 
 85 |     def buy_to_open(self, symbol, date, price):
 86 |         """
 87 |         Keep track of new position, make sure it isn't an existing position. 
 88 |         Verify you have cash.
 89 |         """
 90 | 
 91 |         # Figure out how much we are willing to spend
 92 |         cash_available = self.cash - self.trade_fee
 93 |         cash_to_spend = cash_available / self.free_position_slots
 94 |         
 95 |         # Calculate buy_price and number of shares. Fractional shares allowed.
 96 |         purchase_price = (1 + self.percent_slippage) * price
 97 |         shares = cash_to_spend / purchase_price
 98 | 
 99 |         # Spend the cash
100 |         self.cash -= cash_to_spend + self.trade_fee
101 |         assert self.cash >= 0, 'Spent cash you do not have.'
102 |         self.portfolio_history.record_cash(date, self.cash)   
103 | 
104 |         # Record the position
105 |         positions_by_symbol = self.active_positions_by_symbol
106 |         assert not symbol in positions_by_symbol, 'Symbol already in portfolio.'        
107 |         position = Position(symbol, date, purchase_price, shares)
108 |         positions_by_symbol[symbol] = position
109 | 
110 |     def sell_to_close(self, symbol, date, price):
111 |         """
112 |         Keep track of exit price, recover cash, close position, and record it in
113 |         portfolio history.
114 | 
115 |         Will raise a KeyError if symbol isn't an active position
116 |         """
117 | 
118 |         # Exit the position
119 |         positions_by_symbol = self.active_positions_by_symbol
120 |         position = positions_by_symbol[symbol]
121 |         position.exit(date, price)
122 | 
123 |         # Receive the cash
124 |         sale_value = position.last_value * (1 - self.percent_slippage)
125 |         self.cash += sale_value
126 |         self.portfolio_history.record_cash(date, self.cash)
127 | 
128 |         # Record in portfolio history
129 |         self.portfolio_history.add_to_history(position)
130 |         del positions_by_symbol[symbol]
131 |     
132 |     @staticmethod
133 |     def _assert_equal_columns(*args: Iterable[pd.DataFrame]):
134 |         column_names = set(args[0].columns.values)
135 |         for arg in args[1:]:
136 |             assert set(arg.columns.values) == column_names, \
137 |                 'Found unequal column names in input dataframes.'
138 | 
139 |     def simulate(self, price: pd.DataFrame, signal: pd.DataFrame, 
140 |         preference: pd.DataFrame):
141 |         """
142 |         Runs the simulation.
143 | 
144 |         price, signal, and preference are dataframes with the column names 
145 |         represented by the same set of stock symbols.
146 |         """
147 | 
148 |         # Create a hierarchical dataframe to loop through
149 |         self._assert_equal_columns(price, signal, preference)
150 |         df = data_io.concatenate_metrics({
151 |             'price': price,
152 |             'signal': signal,
153 |             'pref': preference,
154 |         })
155 | 
156 |         # Get list of symbols
157 |         all_symbols = list(set(price.columns.values))
158 | 
159 |         # Get lookup functions
160 |         _idx = self.make_tuple_lookup(df.columns)
161 |         _all_valid = self.make_all_valid_lookup(_idx)
162 | 
163 |         # Store some variables
164 |         active_positions_by_symbol = self.active_positions_by_symbol
165 |         max_active_positions = self.max_active_positions
166 | 
167 |         # Iterating over all dates.
168 |         # itertuples() is significantly faster than iterrows(), it however comes
169 |         # at the cost of not being able index easily. In order to get around this
170 |         # we use an tuple lookup function: "_idx"
171 |         for row in df.itertuples():
172 | 
173 |             # date index is always first element of tuple row
174 |             date = row[0]
175 | 
176 |             # Get symbols with valid and tradable data
177 |             symbols: List[str] = [s for s in all_symbols if _all_valid(row, s)]
178 | 
179 |             # Iterate over active positions and sell stocks with a sell signal.
180 |             _active = self.active_symbols
181 |             to_exit = [s for s in _active if row[_idx(s, 'signal')] == -1]
182 |             for s in to_exit:
183 |                 sell_price = row[_idx(s, 'price')]
184 |                 self.sell_to_close(s, date, sell_price)
185 | 
186 |             # Get up to max_active_positions symbols with a buy signal in 
187 |             # decreasing order of preference
188 |             to_buy = [
189 |                 s for s in symbols if \
190 |                     row[_idx(s, 'signal')] == 1 and \
191 |                     not s in active_positions_by_symbol
192 |             ]
193 |             to_buy.sort(key=lambda s: row[_idx(s, 'pref')], reverse=True)
194 |             to_buy = to_buy[:max_active_positions]
195 | 
196 |             for s in to_buy:
197 |                 buy_price = row[_idx(s, 'price')]
198 |                 buy_preference = row[_idx(s, 'pref')]
199 | 
200 |                 # If we have some empty slots, just buy the asset outright
201 |                 if self.active_positions_count < max_active_positions:
202 |                     self.buy_to_open(s, date, buy_price)
203 |                     continue
204 | 
205 |                 # If are holding max_active_positions, evaluate a swap based on
206 |                 # preference
207 |                 _active = self.active_symbols
208 |                 active_prefs = [(s, row[_idx(s, 'pref')]) for s in _active]
209 | 
210 |                 _min = min(active_prefs, key=lambda k: k[1])
211 |                 min_active_symbol, min_active_preference = _min
212 | 
213 |                 # If a more preferable symbol exists, then sell an old one
214 |                 if min_active_preference < buy_preference:
215 |                     sell_price = row[_idx(min_active_symbol, 'price')]
216 |                     self.sell_to_close(min_active_symbol, date, sell_price)
217 |                     self.buy_to_open(s, date, buy_price)
218 | 
219 |             # Update price data everywhere
220 |             for s in self.active_symbols:
221 |                 price = row[_idx(s, 'price')]
222 |                 position = active_positions_by_symbol[s]
223 |                 position.record_price_update(date, price)
224 | 
225 |             self.portfolio_history.record_cash(date, self.cash)
226 | 
227 |         # Sell all positions and mark simulation as complete
228 |         for s in self.active_symbols:
229 |             self.sell_to_close(s, date, row[_idx(s, 'price')])
230 |         self.portfolio_history.finish()
231 | 
232 | 
233 | 
234 | 


--------------------------------------------------------------------------------
/src/pypm/weights.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from scipy.stats import hmean
 4 | 
 5 | def calculate_uniqueness(event_spans: pd.Series, 
 6 |     price_index: pd.Series) -> pd.Series:
 7 |     """
 8 |     event_spans is a series with an index of start dates and values of end dates
 9 |     of a label.
10 | 
11 |     price_index is an index of underlying dates for the event
12 | 
13 |     Returns a series of uniqueness values that can be used as weights, indexed 
14 |     as the event start dates. Weights may need to be standardized again before 
15 |     training.
16 |     """
17 | 
18 |     # Create a binary dataframe 
19 |     # value is 1 during event span and 0 otherwise
20 |     columns = range(event_spans.shape[0])
21 |     df = pd.DataFrame(0, index=price_index, columns=columns)
22 | 
23 |     for i, (event_start, event_end) in enumerate(event_spans.items()):
24 |         df[i].loc[event_start:event_end] += 1
25 | 
26 |     # Compute concurrency over event span then calculate uniqueness
27 |     avg_uniquenesses = list()
28 |     for i, (event_start, event_end) in enumerate(event_spans.items()):
29 |         concurrency: pd.Series = df.loc[event_start:event_end].sum(axis=1)
30 |         avg_uniqueness = 1 / hmean(concurrency)
31 |         avg_uniquenesses.append(avg_uniqueness)
32 | 
33 |     return pd.Series(avg_uniquenesses, index=event_spans.index)
34 | 


--------------------------------------------------------------------------------
/src/simulate_alternative_data_portfolio.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | import os
 5 | from joblib import load
 6 | 
 7 | from pypm.ml_model.data_io import load_data
 8 | from pypm.ml_model.signals import calculate_signals
 9 | 
10 | from pypm import metrics, simulation
11 | 
12 | SRC_DIR = os.path.dirname(os.path.abspath(__file__))
13 | 
14 | def simulate_portfolio():
15 | 
16 |     # All the data we have to work with
17 |     symbols, eod_data, alt_data = load_data()
18 | 
19 |     # Load classifier from file
20 |     classifier = load(os.path.join(SRC_DIR, 'ml_model.joblib'))
21 | 
22 |     # Generate signals from classifier
23 |     print('Calculating signals ...')
24 |     signal = calculate_signals(classifier, symbols, eod_data, alt_data)
25 | 
26 |     # Get rid of eod_data before valid signals
27 |     first_signal_date = signal.first_valid_index()
28 |     eod_data = eod_data[eod_data.index > first_signal_date]
29 | 
30 |     # Set the preference to increase by row, so new trades are preferred
31 |     print('Calculating preference matrix ...')
32 |     preference = pd.DataFrame(
33 |         np.random.random(eod_data.shape), 
34 |         columns=eod_data.columns, 
35 |         index=eod_data.index,
36 |     )
37 | 
38 |     # Run the simulator
39 |     simulator = simulation.SimpleSimulator(
40 |         initial_cash=10000,
41 |         max_active_positions=10,
42 |         percent_slippage=0.0005,
43 |         trade_fee=1,
44 |     )
45 |     simulator.simulate(eod_data, signal, preference)
46 | 
47 |     # Print results
48 |     simulator.portfolio_history.print_position_summaries()
49 |     simulator.print_initial_parameters()
50 |     simulator.portfolio_history.print_summary()
51 |     simulator.portfolio_history.plot()
52 |     simulator.portfolio_history.plot_benchmark_comparison()
53 | 
54 | if __name__ == '__main__':
55 |     simulate_portfolio()
56 | 
57 | 


--------------------------------------------------------------------------------
/src/simulate_portfolio.py:
--------------------------------------------------------------------------------
 1 | from pypm import metrics, signals, data_io, simulation
 2 | import pandas as pd
 3 | 
 4 | def simulate_portfolio():
 5 | 
 6 |     bollinger_n = 20
 7 |     sharpe_n = 100
 8 | 
 9 |     # Load in data
10 |     symbols: List[str] = data_io.get_all_symbols()
11 |     prices: pd.DataFrame = data_io.load_eod_matrix(symbols)
12 | 
13 |     # Use the Bollinger Band outer band crossover as a signal
14 |     _bollinger = signals.create_bollinger_band_signal
15 |     signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0)
16 | 
17 |     # Use a rolling sharpe ratio approximation as a preference matrix
18 |     _sharpe = metrics.calculate_rolling_sharpe_ratio
19 |     preference = prices.apply(_sharpe, args=(sharpe_n, ), axis=0)
20 | 
21 |     # Run the simulator
22 |     simulator = simulation.SimpleSimulator(
23 |         initial_cash=10000,
24 |         max_active_positions=5,
25 |         percent_slippage=0.0005,
26 |         trade_fee=1,
27 |     )
28 |     simulator.simulate(prices, signal, preference)
29 | 
30 |     # Print results
31 |     simulator.portfolio_history.print_position_summaries()
32 |     simulator.print_initial_parameters()
33 |     simulator.portfolio_history.print_summary()
34 |     simulator.portfolio_history.plot()
35 |     simulator.portfolio_history.plot_benchmark_comparison()
36 | 
37 | if __name__ == '__main__':
38 |     simulate_portfolio()
39 | 


--------------------------------------------------------------------------------
/src/white_noise_portfolio.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from pypm import metrics, signals, data_io, simulation, optimization
 5 | from pypm.optimization import GridSearchOptimizer
 6 | 
 7 | from typing import List, Dict, Tuple, Callable
 8 | 
 9 | Performance = simulation.PortfolioHistory.PerformancePayload # Dict[str, float]
10 | 
11 | def bind_simulator(**sim_kwargs) -> Callable:
12 |     """
13 |     Create a simulator that uses white noise for the preference matrix
14 |     """
15 |     symbols: List[str] = data_io.get_all_symbols()
16 |     prices: pd.DataFrame = data_io.load_eod_matrix(symbols)
17 | 
18 |     _bollinger: Callable = signals.create_bollinger_band_signal
19 | 
20 |     # Bollinger n is constant throughout
21 |     bollinger_n = 20
22 | 
23 |     def _simulate(white_noise_test_id: int) -> Performance:
24 |         
25 |         signal = prices.apply(_bollinger, args=(bollinger_n,), axis=0)
26 | 
27 |         # Build a pile of noise in the same shape as the price data
28 |         _noise = np.random.normal(loc=0, scale=1, size=prices.shape)
29 |         _cols = prices.columns
30 |         _index = prices.index
31 |         preference = pd.DataFrame(_noise, columns=_cols, index=_index)
32 | 
33 |         simulator = simulation.SimpleSimulator(**sim_kwargs)
34 |         simulator.simulate(prices, signal, preference)
35 | 
36 |         return simulator.portfolio_history.get_performance_metric_data()
37 | 
38 |     return _simulate
39 | 
40 | if __name__ == '__main__':
41 | 
42 |     simulate = bind_simulator(initial_cash=10000, max_active_positions=5)
43 | 
44 |     optimizer = GridSearchOptimizer(simulate)
45 |     optimizer.optimize(white_noise_test_id=range(1000))
46 | 
47 |     print(optimizer.get_best('excess_cagr'))
48 |     optimizer.print_summary()
49 |     optimizer.plot('excess_cagr')
50 | 


--------------------------------------------------------------------------------