├── .bumpversion.cfg ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── docs_suggestion.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── publish-final-dist.yaml │ ├── publish-test-dist.yaml │ └── python-tests.yaml ├── .gitignore ├── .pylintrc ├── .readthedocs.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DEV_NOTE ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── arbitragelab ├── __init__.py ├── codependence │ ├── __init__.py │ ├── codependence_matrix.py │ ├── correlation.py │ ├── gnpr_distance.py │ ├── information.py │ └── optimal_transport.py ├── cointegration_approach │ ├── __init__.py │ ├── base.py │ ├── coint_sim.py │ ├── engle_granger.py │ ├── johansen.py │ ├── minimum_profit.py │ ├── multi_coint.py │ ├── sparse_mr_portfolio.py │ └── utils.py ├── copula_approach │ ├── __init__.py │ ├── archimedean │ │ ├── __init__.py │ │ ├── clayton.py │ │ ├── frank.py │ │ ├── gumbel.py │ │ ├── joe.py │ │ ├── n13.py │ │ └── n14.py │ ├── base.py │ ├── copula_calculation.py │ ├── elliptical │ │ ├── __init__.py │ │ ├── gaussian.py │ │ └── student.py │ ├── mixed_copulas │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cfg_mix_copula.py │ │ └── ctg_mix_copula.py │ ├── pairs_selection.py │ ├── vine_copula_partner_selection.py │ ├── vine_copula_partner_selection_utils.py │ ├── vinecop_generate.py │ └── vinecop_strategy.py ├── distance_approach │ ├── __init__.py │ ├── basic_distance_approach.py │ └── pearson_distance_approach.py ├── hedge_ratios │ ├── __init__.py │ ├── adf_optimal.py │ ├── box_tiao.py │ ├── half_life.py │ ├── johansen.py │ ├── linear.py │ └── spread_construction.py ├── ml_approach │ ├── __init__.py │ ├── feature_expander.py │ ├── filters.py │ ├── neural_networks.py │ ├── optics_dbscan_pairs_clustering.py │ ├── regressor_committee.py │ └── tar.py ├── optimal_mean_reversion │ ├── __init__.py │ ├── cir_model.py │ ├── heat_potentials.py │ ├── ou_model.py │ └── xou_model.py ├── other_approaches │ ├── __init__.py │ ├── kalman_filter.py │ └── pca_approach.py ├── spread_selection │ ├── __init__.py │ ├── base.py │ └── cointegration.py ├── stochastic_control_approach │ ├── __init__.py │ ├── optimal_convergence.py │ ├── ou_model_jurek.py │ └── ou_model_mudchanatongsuk.py ├── tearsheet │ ├── __init__.py │ └── tearsheet.py ├── time_series_approach │ ├── __init__.py │ ├── arima_predict.py │ ├── h_strategy.py │ ├── ou_optimal_threshold.py │ ├── ou_optimal_threshold_bertram.py │ ├── ou_optimal_threshold_zeng.py │ ├── quantile_time_series.py │ └── regime_switching_arbitrage_rule.py ├── trading │ ├── __init__.py │ ├── basic_copula.py │ ├── copula_strategy_mpi.py │ ├── minimum_profit.py │ ├── multi_coint.py │ └── z_score.py └── util │ ├── __init__.py │ ├── base_futures_roller.py │ ├── data_cursor.py │ ├── data_importer.py │ ├── generate_dataset.py │ ├── indexed_highlight.py │ ├── rollers.py │ └── spread_modeling_helper.py ├── coverage ├── docs ├── Makefile ├── make.bat └── source │ ├── _static │ ├── .gitkeep │ ├── favicon_arbitragelab.png │ ├── ht_logo_black.png │ ├── ht_logo_white.png │ ├── logo_black.png │ └── logo_white.png │ ├── _templates │ └── breadcrumbs.html │ ├── additional_information │ └── license.rst │ ├── changelog.rst │ ├── codependence │ ├── codependence_marti.rst │ ├── codependence_matrix.rst │ ├── correlation_based_metrics.rst │ ├── images │ │ ├── abs.png │ │ ├── angular_distance.png │ │ ├── codep_slides.png │ │ ├── codependence_slides.png │ │ ├── dependence_copulas.png │ │ ├── distance_correlation.png │ │ ├── entropy_relation_diagram.png │ │ ├── independent.png │ │ ├── linear.png │ │ ├── modified_angular_distance.png │ │ ├── optimal_transport_distance.png │ │ ├── squared.png │ │ └── target_copulas.png │ ├── information_theory_metrics.rst │ ├── introduction.rst │ └── optimal_transport.rst │ ├── cointegration_approach │ ├── cointegration_tests.rst │ ├── half_life.rst │ ├── images │ │ ├── AME-DOV.png │ │ ├── MR_strength_box_tiao.png │ │ ├── cluster.gif │ │ ├── coint_sim.png │ │ ├── column_lasso_demo-opt.gif │ │ ├── cov_select_demo-opt.gif │ │ ├── engle-granger_portfolio.png │ │ ├── greedy_demo.gif │ │ ├── johansen_portfolio.png │ │ ├── minimum_profit_slides.png │ │ ├── multitask_lasso_demo-opt.gif │ │ ├── nile_river_level.png │ │ └── sparse_mr_slides.png │ ├── introduction.rst │ ├── minimum_profit.rst │ ├── minimum_profit_simulation.rst │ ├── multivariate_cointegration.rst │ └── sparse_mr_portfolio.rst │ ├── conf.py │ ├── copula_approach │ ├── copula_brief_intro.rst │ ├── copula_deeper_intro.rst │ ├── cvine_copula_strategy.rst │ ├── images │ │ ├── 3d_vinecop_decomposition.png │ │ ├── AMGN_HD_MixCop.png │ │ ├── Bollinger_band_example.png │ │ ├── CMPI_vs_log_prices.png │ │ ├── C_vine_D_vine_structure.png │ │ ├── CumDenN13.png │ │ ├── Cvine_tuple.png │ │ ├── Equity_curve_cvinecop.png │ │ ├── R_vine_structure.png │ │ ├── Rvine_Cvine_Dvine.png │ │ ├── copula_marginal_dist_demo.png │ │ ├── densityGaussian.png │ │ ├── densityGumbel.png │ │ ├── ecdf_vs_ecdflin.png │ │ ├── eucdis_ranked_rho_tau.png.png │ │ ├── formation_copulas.png │ │ ├── individual_ranked_rho_tau.png │ │ ├── positions_log_prices.png │ │ ├── rho_ranked_rho_tau.png │ │ ├── tau_ranked_rho_tau.png │ │ ├── top_euc.png │ │ ├── top_tau.png │ │ ├── top_tau_quantile.png │ │ ├── trading_opportunities.png │ │ ├── workflow_getdata.png │ │ ├── workflow_select_structure.png │ │ └── workflow_vinecop_density.png │ ├── introduction.rst │ ├── partner_selection.rst │ ├── utility_functions.rst │ └── vine_copula_intro.rst │ ├── data │ ├── data_importer.rst │ ├── futures_rollover.rst │ └── images │ │ ├── back_cont.jpeg │ │ ├── prices.png │ │ ├── returns.png │ │ ├── rolling_intuition.png │ │ └── ticker_collection.png │ ├── developer │ └── debugging.rst │ ├── distance_approach │ ├── distance_approach.rst │ ├── images │ │ ├── SSD_distance_example.png │ │ ├── distance_approach_pair.png │ │ ├── distance_approach_portfolio.png │ │ ├── distance_approach_results_portfolio.png │ │ └── pearson_approach_beta_stocks.png │ ├── introduction.rst │ └── pearson_approach.rst │ ├── getting_started │ ├── equity_curve_convention.rst │ ├── getting_started_images │ │ ├── OLS_vs_TLS.png │ │ ├── derivative.png │ │ ├── graph.png │ │ └── prior.png │ ├── installation.rst │ └── research_tools.rst │ ├── hedge_ratios │ └── hedge_ratios.rst │ ├── index.rst │ ├── ml_approach │ ├── filters.rst │ ├── images │ │ ├── 2nd_order_honn.png │ │ ├── 3d_cluster_optics_plot.png │ │ ├── confirmation_filter.png │ │ ├── correlation_filter.png │ │ ├── correlation_filter_example.png │ │ ├── crack_spread.png │ │ ├── example_ml_pair.png │ │ ├── honn_decision_region_xor.png │ │ ├── honn_loss_xor.png │ │ ├── honn_types.png │ │ ├── knee_plot.png │ │ ├── leverage_structure.png │ │ ├── mlp_decision_region_xor.png │ │ ├── mlp_loss_xor.png │ │ ├── pairs_selection_rules_diagram.png │ │ ├── paper_results.png │ │ ├── pi_sigma_nn.png │ │ ├── prposed_framework_diagram.png │ │ ├── rnn_lstm_example.png │ │ ├── rpnn.png │ │ ├── threshold_filter_example.png │ │ ├── vol_filter.png │ │ └── xor_boundaries.png │ ├── introduction.rst │ ├── ml_based_pairs_selection.rst │ ├── neural_networks.rst │ ├── spread_modeling.rst │ └── threshold_ar.rst │ ├── optimal_mean_reversion │ ├── cir_model.rst │ ├── heat_potentials.rst │ ├── images │ │ ├── cir_description.png │ │ ├── cir_optimal_switching.png │ │ ├── description_function.png │ │ ├── fit_check_function.png │ │ ├── optimal_levels_plot.png │ │ ├── optimal_switching.png │ │ └── xou_vs_ou.png │ ├── introduction.rst │ ├── ou_model.rst │ └── xou_model.rst │ ├── other_approaches │ ├── images │ │ ├── kalman_cumulative_returns.png │ │ ├── kalman_intercept.png │ │ ├── kalman_slope.png │ │ ├── pca_approach_portfolio.png │ │ └── pca_approach_s_score.png │ ├── kalman_filter.rst │ └── pca_approach.rst │ ├── spread_selection │ ├── cointegration_spread_selection.rst │ └── images │ │ └── pairs_selection_rules_diagram.png │ ├── stochastic_control_approach │ ├── images │ │ ├── jurek_describe.png │ │ ├── mudchana_describe.png │ │ ├── oc_delta_neutral_first.png │ │ ├── oc_delta_neutral_second.png │ │ ├── oc_describe.png │ │ ├── oc_optimal_first.png │ │ ├── oc_optimal_second.png │ │ ├── oc_spread.png │ │ ├── oc_wealth_delta_neutral.png │ │ ├── oc_wealth_optimal.png │ │ ├── optimal_weights.png │ │ ├── optimal_weights_fund_flows.png │ │ └── stabilization_bound.png │ ├── introduction.rst │ ├── optimal_convergence.rst │ ├── ou_model_jurek.rst │ └── ou_model_mudchanatongsuk.rst │ ├── time_series_approach │ ├── h_strategy.rst │ ├── images │ │ ├── auto_arima_prediction.png │ │ ├── model_diagram.png │ │ ├── quantile_thresholds.png │ │ └── trading_example.png │ ├── introduction.rst │ ├── ou_optimal_threshold_bertram.rst │ ├── ou_optimal_threshold_zeng.rst │ ├── quantile_time_series_strategy.rst │ └── regime_switching_arbitrage_rule.rst │ ├── trading │ ├── basic_copula.rst │ ├── images │ │ ├── AME-DOV.png │ │ ├── formation_copulas.png │ │ ├── mpi_flags_positions.png │ │ ├── mpi_normalized_prices.png │ │ ├── mpi_units.png │ │ ├── positions_log_prices.png │ │ ├── returns_and_samples.png │ │ └── trading_opportunities.png │ ├── minimum_profit.rst │ ├── mispricing_index_strategy.rst │ ├── multi_coint.rst │ └── z_score.rst │ └── visualization │ ├── images │ ├── coint_eg.png │ ├── coint_jh.png │ └── ou_tearsheet.png │ └── tearsheet.rst ├── pylint ├── pyproject.toml └── tests ├── __init__.py ├── test_auto_arima.py ├── test_basic_distance_approach.py ├── test_cir_model.py ├── test_codependence.py ├── test_coint_sim.py ├── test_copula_generate_mixedcopula.py ├── test_copula_pairs_selection.py ├── test_copulas.py ├── test_data ├── ANZ-ADB.csv ├── BKD_ESC_2009_2011.csv ├── BKD_ESC_unittest_positions.csv ├── CL=F_NG=F_data.csv ├── Country_ETF.csv ├── NonNegative_CL_forward_roll.csv ├── NonNegative_nRB_forward_roll.csv ├── XLF-XLK.csv ├── b0.csv ├── cl.csv ├── eh1.csv ├── eh2.csv ├── gld_gdx_data.csv ├── incorrect_data.csv ├── multi_coint.csv ├── nbp.csv ├── prices_10y_SP500.csv ├── rb.csv ├── s.csv ├── shell-rdp-close_USD.csv ├── sp100_prices.csv ├── sp500_2016_test.csv ├── sp500_constituents-detailed.csv └── stock_prices.csv ├── test_data_importer.py ├── test_feature_expander.py ├── test_filters.py ├── test_futures_roller.py ├── test_h_strategy.py ├── test_heat_potentials.py ├── test_hedge_ratios.py ├── test_hedge_ratios_spread_construction.py ├── test_indexed_highlight.py ├── test_kalman_filter.py ├── test_mean_reversion.py ├── test_minimum_profit.py ├── test_mixed_copula.py ├── test_multi_coint.py ├── test_neural_networks.py ├── test_optics_dbscan_pairs_clustering.py ├── test_optimal_convergence.py ├── test_ou_model.py ├── test_ou_model_jurek.py ├── test_ou_model_mudchanatongsuk.py ├── test_ou_optimal_threshold.py ├── test_ou_optimal_threshold_bertram.py ├── test_ou_optimal_threshold_zeng.py ├── test_partner_selection.py ├── test_pca_approach.py ├── test_pearson_distance_approach.py ├── test_quantile_time_series.py ├── test_regime_switching_arbitrage_rule.py ├── test_regressor_committee.py ├── test_sparse_mr_portfolio.py ├── test_spread_modeling_helper.py ├── test_spread_selection_cointegration.py ├── test_tar.py ├── test_tearsheet.py ├── test_trading_basic_copula.py ├── test_trading_copula_strategy_mpi.py ├── test_trading_minimum_profit.py ├── test_trading_multi_coint.py ├── test_trading_z_score.py ├── test_vinecop_generate_strategy.py └── test_xou_model.py /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.0.0 3 | commit = False 4 | tag = False 5 | tag_name = {new_version} 6 | 7 | [bumpversion:file:pyproject.toml] 8 | search = version = "{current_version}" 9 | replace = version = "{new_version}" 10 | 11 | [bumpversion:file:docs/source/conf.py] 12 | search = release = "{current_version}" 13 | replace = release = "{new_version}" 14 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [html] 2 | directory = build/coverage/html 3 | 4 | [run] 5 | branch = True 6 | parallel = True 7 | omit = 8 | *__init__* 9 | arbitragelab/network/imports.py 10 | arbitragelab/util/segment.py 11 | tests/* 12 | /opt/conda/* 13 | venv/* 14 | # Ensure we exclude any files in .local 15 | */.local/* 16 | */tmp/* 17 | # Temporary fix for YFinance issues 18 | arbitragelab/util/data_importer.py 19 | # External part of package 20 | arbitragelab/util/data_cursor.py 21 | 22 | disable_warnings = no-data-collected 23 | 24 | [report] 25 | partial_branches = True 26 | show_missing = True 27 | exclude_lines = 28 | pragma: no cover 29 | raise NotImplementedError 30 | @abc.abstractmethod 31 | @abc.abstractproperty 32 | def _dependencies(self): 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | * ArbitrageLab: `python -m arbitragelab --version` 8 | * Python version: `python --version` 9 | * Operating System: `uname -a` 10 | 11 | ### Description 12 | 13 | Describe what you were trying to get done. 14 | Tell us what happened, what went wrong, and what you expected to happen. 15 | 16 | ### What I Did 17 | 18 | ``` 19 | Paste the command(s) you ran and the output. 20 | If there was a crash, please include the traceback here. 21 | ``` 22 | 23 | ### Expected behavior 24 | A clear and concise description of what you expected to happen. 25 | 26 | ### Additional context 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/docs_suggestion.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Docs suggestion 3 | about: Request an improvement to the docs 4 | 5 | --- 6 | 7 | ### Description 8 | Describe what you would like to see better documented. 9 | 10 | ### Intended audience 11 | Who is the audience for the documentation improvement? Developers, users, maintainers? 12 | 13 | ### Additional context 14 | Add any other context about the suggestion here. 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Purpose 2 | _Describe the problem or feature in addition to a link to the issues._ 3 | 4 | ## Approach 5 | _How does this change address the problem?_ 6 | 7 | ## Tests for New Behavior 8 | _What new tests were added to cover new features or behaviors?_ 9 | 10 | ## Checklist 11 | _Make sure you did the following (if applicable):_ 12 | - [ ] Added tests for any new features or behaviors. 13 | - [ ] Ran ``./pylint`` to make sure code style is consistent. 14 | - [ ] Built and reviewed the docs. 15 | - [ ] Added a note to the [changelog](https://github.com/hudson-and-thames/arbitragelab/blob/develop/docs/source/changelog.rst). 16 | 17 | ## Learning 18 | _Describe the research stage_ 19 | 20 | _Links to blog posts, patterns, libraries or addons used to solve this problem_ 21 | -------------------------------------------------------------------------------- /.github/workflows/publish-final-dist.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Distribution to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - '[0-9]+.[0-9]+.[0-9]' 7 | 8 | jobs: 9 | build-and-publish-final-dist: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out code 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: '3.8' 19 | 20 | - name: Install Poetry 21 | run: | 22 | pip install poetry 23 | 24 | - name: Install dependencies 25 | run: | 26 | poetry install --without docs,tests 27 | 28 | - name: Build the package 29 | run: | 30 | poetry build 31 | 32 | - name: Publish to TestPyPI 33 | env: 34 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_API_TOKEN }} 35 | run: | 36 | poetry publish 37 | -------------------------------------------------------------------------------- /.github/workflows/publish-test-dist.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Distribution to TestPyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - '[0-9]+.[0-9]+.[0-9]+-dev' 7 | 8 | jobs: 9 | build-and-publish-test-dist: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out code 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: '3.8' 19 | 20 | - name: Install Poetry 21 | run: | 22 | pip install poetry 23 | 24 | - name: Install dependencies 25 | run: | 26 | poetry install --without docs,tests 27 | 28 | - name: Build the package 29 | run: | 30 | poetry build 31 | 32 | - name: Publish to TestPyPI 33 | env: 34 | POETRY_PYPI_TOKEN_TESTPYPI: ${{ secrets.TEST_PYPI_API_TOKEN }} 35 | run: | 36 | poetry config repositories.testpypi https://test.pypi.org/legacy/ 37 | poetry publish -r testpypi 38 | -------------------------------------------------------------------------------- /.github/workflows/python-tests.yaml: -------------------------------------------------------------------------------- 1 | name: Test code and documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - develop 7 | pull_request: 8 | branches: 9 | - develop 10 | 11 | jobs: 12 | test-code-style: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] 17 | 18 | steps: 19 | - name: Checkout code 20 | uses: actions/checkout@v4 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | 27 | - name: Install Poetry 28 | run: | 29 | pip install --upgrade pip 30 | pip install poetry 31 | 32 | 33 | - name: Install dependencies 34 | run: | 35 | poetry install --without docs 36 | 37 | - name: Run Pylint 38 | run: | 39 | poetry run pylint arbitragelab tests --rcfile=.pylintrc --output-format=text --output=pylint-report.txt 40 | 41 | - name: Upload test results 42 | uses: actions/upload-artifact@v4 43 | with: 44 | name: pylint-report-${{ matrix.python-version }} 45 | path: pylint-report.txt 46 | 47 | 48 | test-coverage: 49 | runs-on: ubuntu-latest 50 | strategy: 51 | matrix: 52 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] 53 | 54 | steps: 55 | - name: Checkout code 56 | uses: actions/checkout@v4 57 | 58 | - name: Set up Python 59 | uses: actions/setup-python@v5 60 | with: 61 | python-version: ${{ matrix.python-version }} 62 | 63 | - name: Install Poetry 64 | run: | 65 | pip install --upgrade pip 66 | pip install poetry 67 | 68 | - name: Install dependencies 69 | run: | 70 | poetry install --without docs 71 | 72 | - name: Run tests with coverage 73 | run: | 74 | poetry run pytest tests/ --cov=arbitragelab --cov-report=term --cov-branch --cov-config=.coveragerc 75 | 76 | - name: Generate coverage HTML report 77 | run: poetry run coverage html 78 | 79 | - name: Upload Coverage HTML Report as Artifact 80 | uses: actions/upload-artifact@v4 81 | with: 82 | name: coverage-html-${{ matrix.python-version }} 83 | path: build/coverage/html/index.html 84 | 85 | - name: Check coverage 86 | run: poetry run coverage report --fail-under=100 87 | 88 | test-docs: 89 | runs-on: ubuntu-latest 90 | strategy: 91 | matrix: 92 | python-version: [3.8] 93 | 94 | steps: 95 | - name: Checkout code 96 | uses: actions/checkout@v4 97 | 98 | - name: Set up Python 99 | uses: actions/setup-python@v5 100 | with: 101 | python-version: ${{ matrix.python-version }} 102 | 103 | - name: Install Poetry 104 | run: | 105 | pip install poetry 106 | 107 | - name: Install requirements 108 | run: | 109 | poetry install --without tests 110 | 111 | - name: Build documentation 112 | run: | 113 | cd docs 114 | poetry run make html 115 | 116 | - name: Run doctests 117 | run: | 118 | cd docs 119 | poetry run make doctest 120 | 121 | - name: Upload doctest results as an artifact 122 | uses: actions/upload-artifact@v4 123 | with: 124 | name: doctest-results 125 | path: docs/build/doctest/output.txt 126 | 127 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | docs/build/ 3 | .local/ 4 | cover/ 5 | build/ 6 | dist/ 7 | .tox/ 8 | __pycache__ 9 | *.pyc 10 | test_reports 11 | .coverage 12 | .coverage.* 13 | coverage.xml 14 | .DS_Store 15 | *.pickle 16 | */.ipynb_checkpoints/* 17 | arbitragelab.egg-info/* 18 | .eggs 19 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | 4 | version: 2 5 | 6 | build: 7 | os: "ubuntu-22.04" 8 | tools: 9 | python: "3.8" 10 | jobs: 11 | post_create_environment: 12 | # Install poetry 13 | - pip install poetry 14 | post_install: 15 | # Install dependencies 16 | - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --only docs 17 | 18 | sphinx: 19 | configuration: docs/source/conf.py 20 | 21 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## 1. Our Commitment 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make 6 | participation in our project and our community a harassment-free experience for everyone. 7 | 8 | ## 2. Our Standards 9 | 10 | Examples of behavior that contributes to creating a positive environment include: 11 | 12 | - Demonstrating empathy and kindness toward other people 13 | - Being respectful of differing opinions, viewpoints, and experiences 14 | - Giving and gracefully accepting constructive feedback 15 | - Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience 16 | - Focusing on what is best not just for us as individuals, but for the overall community 17 | 18 | Examples of unacceptable behavior by participants include: 19 | 20 | - The use of sexualized language or imagery and unwelcome sexual attention or advances 21 | - Trolling, insulting or derogatory comments, and personal or political attacks 22 | - Public or private harassment 23 | - Publishing others' private information, such as a physical or email address, without explicit permission 24 | - Other conduct which could reasonably be considered inappropriate in a professional setting 25 | 26 | ## 3. Our Responsibilities 27 | 28 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take 29 | appropriate and fair corrective action in response to any instances of unacceptable behavior. 30 | 31 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, 32 | issues, and other contributions that are not aligned with this Code of Conduct, or to ban temporarily or permanently 33 | any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 34 | 35 | ## 4. Scope 36 | 37 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the 38 | project or its community. Examples of representing a project or community include using an official project e-mail 39 | address, posting via an official social media account, or acting as an appointed representative at an online or 40 | offline event. 41 | 42 | ## 5. Enforcement 43 | 44 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project 45 | team at opensource[at]hudsonthames.org. All complaints will be reviewed and investigated promptly and fairly. 46 | 47 | All project maintainers are obligated to respect the privacy and security of the reporter of any incident. 48 | 49 | ## 6. Attribution 50 | 51 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/version/2/0/code_of_conduct.html), version 2.0. 52 | 53 | This policy is adapted from the guidelines provided by the Python Software Foundation available at [https://www.python.org/psf/conduct/](https://www.python.org/psf/conduct/). 54 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines for Hudson and Thames ArbitrageLab 2 | 3 | Thank you for considering contributing to Hudson and Thames ArbitrageLab! We value your time and effort in helping 4 | improve our project. Please follow these guidelines to ensure a smooth contribution process for everyone involved. 5 | 6 | ## Getting Started 7 | 8 | Before you begin: 9 | - Make sure you have a GitHub account. 10 | - Familiarize yourself with our project by reading the documentation [here](https://hudson-and-thames-arbitragelab.readthedocs-hosted.com/en/latest/). 11 | - Check the issues page for outstanding work and discussions to see if someone else is already working on your idea. 12 | 13 | ## Making Contributions 14 | 15 | To contribute to our project, follow these steps: 16 | 17 | 1. **Fork the Repository** 18 | - Go to the GitHub page of Hudson and Thames ArbitrageLab and click the "Fork" button at the top right corner. 19 | 20 | 2. **Clone Your Fork** 21 | - After forking, clone the repository to your local machine to start making changes: 22 | ```bash 23 | git clone https://github.com/your_username/arbitragelab.git 24 | cd arbitragelab 25 | ``` 26 | 27 | 3. **Create a Branch** 28 | - Create a new branch for your changes: 29 | ```bash 30 | git checkout -b feature/your_feature_name 31 | ``` 32 | 33 | 4. **Make Changes Locally** 34 | - Implement your feature or bug fix. 35 | - Write clear, comprehensible commit messages. 36 | - Make sure your code adheres to the existing style of the project to maintain its readability. 37 | 38 | 5. **Coverage and Unit Test** 39 | - Run all unit tests to confirm your changes don't break existing functionality. We require 100% coverage. 40 | - Check code coverage and improve it if possible. Coverage reports should be part of the project's test suite. 41 | 42 | 6. **Follow PR Template** 43 | - When creating a pull request, ensure you follow one of our PR templates provided in the repository. This helps maintain the project's consistency and facilitates review. 44 | 45 | 7. **Submit a Pull Request** 46 | - Push your changes to your fork: 47 | ```bash 48 | git push origin feature/your_feature_name 49 | ``` 50 | - Go to the repository on GitHub, and you'll see a "Compare & pull request" button. Click it and fill in the details according to the chosen template. 51 | - Submit the pull request for review, to the develop branch - never master/main. 52 | 53 | ## Code Review 54 | 55 | Your pull request will be reviewed by maintainers who may provide feedback or request changes. Keep an eye on your GitHub notifications and respond promptly to comments. 56 | 57 | ## Acceptance Criteria 58 | 59 | Before a pull request is accepted, it must: 60 | - Pass all automated build checks. 61 | - Achieve successful results on all unit tests. 62 | - Maintain or improve existing code coverage. 63 | - Adhere to the coding standards and documentation style of the project. 64 | 65 | ## Final Steps 66 | 67 | Once your pull request is approved and merged, you are officially a contributor. Congratulations! We encourage you to 68 | continue participating in our community and consider tackling other issues or improving documentation. 69 | 70 | For any questions or help with getting started, don't hesitate to reach out through our community forums or issue tracker. 71 | 72 | Thank you for contributing to Hudson and Thames ArbitrageLab! 73 | -------------------------------------------------------------------------------- /DEV_NOTE: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | ## Creating a release 4 | 5 | - Create `release/` branch 6 | - Bump versions throughout source files (we use `bump2version` to do automate this process, TODO: Add instructions) 7 | - Update customer install instructions in documentation source files 8 | - Update release information in changelog in documentation source files 9 | - Open PR from `release` branch into `develop` 10 | - Merge PR once approved 11 | - Test you can install the wheel from a fresh environment 12 | - Merge `develop` into `master` 13 | - Upload the wheel to pypi. 14 | - Tag the commit with the version number 15 | - Write a blog post announcing the release 16 | - Send a newsletter email 17 | - Post on social media 18 | 19 | ## Bumping version numbers using `bump2version` 20 | 21 | We use `bump2version` to automatically bump versions throughout source files. 22 | 23 | Configuration lives in the `.bumpversion.cfg` file. To run `bump2version`, first install it via `pip`: 24 | 25 | ``` sh 26 | pip install --upgrade bump2version 27 | ``` 28 | 29 | And then bump the version: 30 | 31 | ``` sh 32 | bump2version 33 | ``` 34 | 35 | where `` tells you which version to be bumped. The acceptable 36 | values are `major`, `minor` or `patch`, conforming to the semantic versioning 37 | pattern: `major.minor.patch`. For example, `3.2.7` has a major version of 3, a 38 | minor version of 2 and a patch version of 7. -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2007-2024 The Hudson and Thames developers. 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include *.txt 3 | include Licence.txt 4 | recursive-include arbitragelab *.csv 5 | recursive-include scripts *.py 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 5 | 6 |
7 |
8 | 9 | # Welcome to the Arbitrage Laboratory! 10 | 11 | **What was only possible with the help of huge R&D teams is now at your disposal, anywhere, anytime.** 12 | 13 | [Documentation Click Here](https://hudson-and-thames-arbitragelab.readthedocs-hosted.com/en/latest/index.html). 14 | 15 | ArbitrageLab is a python library that includes both end-to-end strategies and strategy creation tools that cover the 16 | whole range of strategies defined by [Krauss' taxonomy](https://www.econstor.eu/bitstream/10419/116783/1/833997289.pdf) for pairs trading strategies. 17 | 18 | ## What is ArbitrageLab? 19 | 20 | ArbitrageLab is an open-source python library that enables traders who want to exploit mean-reverting portfolios 21 | by providing a complete set of algorithms from the best academic journals. 22 | 23 | View the documentation to [get started](https://hudson-and-thames-arbitragelab.readthedocs-hosted.com/en/latest/index.html). 24 | 25 | ## Special Thank You: 26 | A lot of passion and love went into the creation of this library, and we would like to say special thank you to: 27 | 28 | Original Team: 29 | * [Jacques Francois Joubert](https://www.linkedin.com/in/jacquesjoubert/) 30 | * [Illya Barziy](https://www.linkedin.com/in/illyabarziy/) 31 | * [Valeriia Pervushyna](https://www.linkedin.com/in/valeriia-pervushyna/) 32 | * [Dirk Frees](https://www.linkedin.com/in/dirkfreese/) 33 | 34 | A heartfelt thank you to Illya and Valeriia for your exceptional contributions to ArbitrageLab. Your dedication and 35 | talent have been instrumental in enhancing the library and company as a whole. Your technical ingenuity, 36 | and meticulous attention to detail, have not only enriched our project but also set a high standard for excellence. We deeply 37 | appreciate your hard work and commitment to making ArbitrageLab a success. 38 | 39 | A special thank you to Dirk for the quality time and deep insights you have dedicated to enhancing our business. 40 | Your expertise and motivational efforts were, and continue to be invaluable. We greatly appreciate your 41 | commitment and enthusiastic support. We couldn't have asked for a better Start-Up Advisor! 42 | 43 | Core Contributions 44 | * [Hansen Pei](https://www.linkedin.com/in/hansen-pei-0949691b3/) 45 | * Yefeng Wang 46 | * [Vijay Nadimpalli](https://www.linkedin.com/in/vijay-nadimpalli/) 47 | * [Joohwan Ko](https://www.linkedin.com/in/joohwan-ko-638748174/) 48 | 49 | 50 | ## Dedicated to WorldQuant University (WQU) 51 | 52 |
53 | 54 | 56 | 57 |
58 |
59 | 60 | We are thrilled to highlight an exceptional educational opportunity for those passionate about financial 61 | engineering — WorldQuant University’s Master of Science in Financial Engineering (MSFE) program. This groundbreaking 62 | initiative is completely online and tuition-free, democratizing advanced education in a way that's accessible to 63 | individuals around the globe. 64 | 65 | The MSFE program at WorldQuant University is designed to equip students with the quantitative skills essential for a 66 | competitive edge in today's tech-driven finance sectors. With a curriculum that balances theory and practical 67 | application, students not only gain deep insights but also practical skills that can be immediately applied in various 68 | financial roles. 69 | 70 | If you're looking to elevate your expertise or pivot your career towards quantitative finance, I encourage you to 71 | explore this opportunity. WorldQuant University is not just about education; it’s about empowering future financial 72 | leaders. Learn more about their [MSFE program](https://www.wqu.edu/) and take a significant step towards transforming 73 | your professional life. 74 | 75 | ## Learn To Build Production Ready Python Libraries 76 | We have released a course on Udemy that you can follow to produce your own open-source projects for finance. 77 | * [Writing Production-Grade Python Code for Quant Developers](https://www.udemy.com/course/writing-production-grade-code-for-quantitative-developers/) 78 | -------------------------------------------------------------------------------- /arbitragelab/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ArbitrageLab helps portfolio managers and traders who want to leverage the power of Statistical Arbitrage by providing 3 | reproducible, interpretable, and easy to use tools. 4 | """ 5 | # pylint: disable=consider-using-from-import 6 | 7 | import arbitragelab.codependence as codependence 8 | import arbitragelab.cointegration_approach as cointegration_approach 9 | import arbitragelab.copula_approach as copula_approach 10 | import arbitragelab.distance_approach as distance_approach 11 | import arbitragelab.hedge_ratios as hedge_ratios 12 | import arbitragelab.ml_approach as ml_approach 13 | import arbitragelab.optimal_mean_reversion as optimal_mean_reversion 14 | import arbitragelab.other_approaches as other_approaches 15 | import arbitragelab.spread_selection as spread_selection 16 | import arbitragelab.stochastic_control_approach as stochastic_control_approach 17 | import arbitragelab.tearsheet as tearsheet 18 | import arbitragelab.time_series_approach as time_series_approach 19 | import arbitragelab.trading as trading 20 | import arbitragelab.util as util 21 | -------------------------------------------------------------------------------- /arbitragelab/codependence/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements various codependence measures. 3 | """ 4 | 5 | from arbitragelab.codependence.correlation import (angular_distance, absolute_angular_distance, squared_angular_distance, 6 | distance_correlation) 7 | from arbitragelab.codependence.information import (get_mutual_info, get_optimal_number_of_bins, 8 | variation_of_information_score) 9 | from arbitragelab.codependence.codependence_matrix import (get_dependence_matrix, get_distance_matrix) 10 | from arbitragelab.codependence.gnpr_distance import (spearmans_rho, gpr_distance, gnpr_distance) 11 | from arbitragelab.codependence.optimal_transport import (optimal_transport_dependence) 12 | -------------------------------------------------------------------------------- /arbitragelab/cointegration_approach/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements Cointegration-based Statistical Arbitrage strategies. 3 | """ 4 | 5 | from arbitragelab.cointegration_approach.johansen import JohansenPortfolio 6 | from arbitragelab.cointegration_approach.engle_granger import EngleGrangerPortfolio 7 | from arbitragelab.cointegration_approach.minimum_profit import MinimumProfit 8 | from arbitragelab.cointegration_approach.coint_sim import CointegrationSimulation 9 | from arbitragelab.cointegration_approach.multi_coint import MultivariateCointegration 10 | from arbitragelab.cointegration_approach.sparse_mr_portfolio import SparseMeanReversionPortfolio 11 | from arbitragelab.cointegration_approach.utils import (get_half_life_of_mean_reversion, get_hurst_exponent) 12 | -------------------------------------------------------------------------------- /arbitragelab/cointegration_approach/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base class for cointegration approach in statistical arbitrage. 3 | """ 4 | 5 | from abc import ABC 6 | import pandas as pd 7 | 8 | 9 | class CointegratedPortfolio(ABC): 10 | """ 11 | Class for portfolios formed using the cointegration method (Johansen test, Engle-Granger test). 12 | """ 13 | 14 | def construct_mean_reverting_portfolio(self, price_data: pd.DataFrame, 15 | cointegration_vector: pd.Series = None) -> pd.Series: 16 | """ 17 | When cointegration vector was formed, this function is used to multiply asset prices by cointegration vector 18 | to form mean-reverting portfolio which is analyzed for possible trade signals. 19 | 20 | :param price_data: (pd.DataFrame) Price data with columns containing asset prices. 21 | :param cointegration_vector: (pd.Series) Cointegration vector used to form a mean-reverting portfolio. 22 | If None, a cointegration vector with maximum eigenvalue from fit() method is used. 23 | :return: (pd.Series) Cointegrated portfolio dollar value. 24 | """ 25 | 26 | if cointegration_vector is None: 27 | cointegration_vector = self.cointegration_vectors.iloc[0] # Use eigenvector with biggest eigenvalue. 28 | 29 | return (cointegration_vector * price_data).sum(axis=1) 30 | 31 | def get_scaled_cointegration_vector(self, cointegration_vector: pd.Series = None) -> pd.Series: 32 | """ 33 | This function returns the scaled values of the cointegration vector in terms of how many units of other 34 | cointegrated assets should be bought if we buy one unit of one asset. 35 | 36 | :param cointegration_vector: (pd.Series) Cointegration vector used to form a mean-reverting portfolio. 37 | If None, a cointegration vector with maximum eigenvalue from fit() method is used. 38 | :return: (pd.Series) The scaled cointegration vector values. 39 | """ 40 | 41 | if cointegration_vector is None: 42 | cointegration_vector = self.cointegration_vectors.iloc[0] # Use eigenvector with biggest eigenvalue 43 | 44 | scaling_coefficient = 1 / cointegration_vector.iloc[0] # Calculating the scaling coefficient 45 | 46 | # Calculating the scaled cointegration vector 47 | scaled_cointegration_vector = cointegration_vector * scaling_coefficient 48 | 49 | return scaled_cointegration_vector 50 | -------------------------------------------------------------------------------- /arbitragelab/cointegration_approach/engle_granger.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements Engle-Granger cointegration approach. 3 | """ 4 | 5 | from typing import Tuple 6 | import numpy as np 7 | import pandas as pd 8 | from statsmodels.tsa.stattools import adfuller 9 | from sklearn.linear_model import LinearRegression 10 | from arbitragelab.cointegration_approach.base import CointegratedPortfolio 11 | 12 | 13 | class EngleGrangerPortfolio(CointegratedPortfolio): 14 | """ 15 | The class implements the construction of a mean-reverting portfolio using the two-step Engle-Granger method. 16 | It also tests model residuals for unit-root (presence of cointegration). 17 | """ 18 | 19 | # pylint: disable=invalid-name 20 | def __init__(self): 21 | """ 22 | Class constructor method. 23 | """ 24 | 25 | self.price_data = None # pd.DataFrame with price data used to fit the model. 26 | self.residuals = None # OLS model residuals. 27 | self.dependent_variable = None # Column name for dependent variable used in OLS estimation. 28 | self.cointegration_vectors = None # Regression coefficients used as hedge-ratios. 29 | self.hedge_ratios = None # Engle-Granger hedge ratios. 30 | self.adf_statistics = None # ADF statistics. 31 | 32 | def perform_eg_test(self, residuals: pd.Series): 33 | """ 34 | Perform Engle-Granger test on model residuals and generate test statistics and p values. 35 | 36 | :param residuals: (pd.Series) OLS residuals. 37 | """ 38 | test_result = adfuller(residuals) 39 | critical_values = test_result[4] 40 | self.adf_statistics = pd.DataFrame(index=['99%', '95%', '90%'], data=critical_values.values()) 41 | self.adf_statistics.loc['statistic_value', 0] = test_result[0] 42 | 43 | def fit(self, price_data: pd.DataFrame, add_constant: bool = False): 44 | """ 45 | Finds hedge-ratios using a two-step Engle-Granger method to form a mean-reverting portfolio. 46 | By default, the first column of price data is used as a dependent variable in OLS estimation. 47 | 48 | This method was originally described in `"Co-integration and Error Correction: Representation, 49 | Estimation, and Testing," Econometrica, Econometric Society, vol. 55(2), pages 251-276, March 1987 50 | `_ by Engle, Robert F and Granger, Clive W J. 51 | 52 | :param price_data: (pd.DataFrame) Price data with columns containing asset prices. 53 | :param add_constant: (bool) A flag to add a constant term in linear regression. 54 | """ 55 | 56 | self.price_data = price_data 57 | self.dependent_variable = price_data.columns[0] 58 | 59 | # Fit the regression 60 | hedge_ratios, _, _, residuals = self.get_ols_hedge_ratio(price_data=price_data, 61 | dependent_variable=self.dependent_variable, 62 | add_constant=add_constant) 63 | self.cointegration_vectors = pd.DataFrame([np.append(1, -1 * np.array( 64 | [hedge for ticker, hedge in hedge_ratios.items() if ticker != self.dependent_variable]))], 65 | columns=price_data.columns) 66 | 67 | self.hedge_ratios = pd.DataFrame([np.append(1, np.array( 68 | [hedge for ticker, hedge in hedge_ratios.items() if ticker != self.dependent_variable]))], 69 | columns=price_data.columns) 70 | 71 | # Get model residuals 72 | self.residuals = residuals 73 | self.perform_eg_test(self.residuals) 74 | 75 | @staticmethod 76 | def get_ols_hedge_ratio(price_data: pd.DataFrame, dependent_variable: str, add_constant: bool = False) -> \ 77 | Tuple[dict, pd.DataFrame, pd.Series, pd.Series]: 78 | """ 79 | Get OLS hedge ratio: y = beta*X. 80 | 81 | :param price_data: (pd.DataFrame) Data Frame with security prices. 82 | :param dependent_variable: (str) Column name which represents the dependent variable (y). 83 | :param add_constant: (bool) Boolean flag to add constant in regression setting. 84 | :return: (Tuple) Hedge ratios, X, and y and OLS fit residuals. 85 | """ 86 | 87 | ols_model = LinearRegression(fit_intercept=add_constant) 88 | 89 | X = price_data.copy() 90 | X.drop(columns=dependent_variable, axis=1, inplace=True) 91 | exogenous_variables = X.columns.tolist() 92 | if X.shape[1] == 1: 93 | X = X.values.reshape(-1, 1) 94 | 95 | y = price_data[dependent_variable].copy() 96 | 97 | ols_model.fit(X, y) 98 | residuals = y - ols_model.predict(X) 99 | 100 | hedge_ratios = ols_model.coef_ 101 | hedge_ratios_dict = dict(zip([dependent_variable] + exogenous_variables, np.insert(hedge_ratios, 0, 1.0))) 102 | 103 | return hedge_ratios_dict, X, y, residuals 104 | -------------------------------------------------------------------------------- /arbitragelab/cointegration_approach/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various utility functions used in cointegration/mean-reversion trading. 3 | """ 4 | 5 | import numpy as np 6 | import pandas as pd 7 | from sklearn.linear_model import LinearRegression 8 | 9 | 10 | def get_half_life_of_mean_reversion(data: pd.Series) -> float: 11 | """ 12 | Get half-life of mean-reversion under the assumption that data follows the Ornstein-Uhlenbeck process. 13 | 14 | :param data: (np.array) Data points. 15 | :return: (float) Half-life of mean reversion. 16 | """ 17 | 18 | reg = LinearRegression(fit_intercept=True) 19 | 20 | training_data = data.shift(1).dropna().values.reshape(-1, 1) 21 | target_values = data.diff().dropna() 22 | reg.fit(X=training_data, y=target_values) 23 | 24 | half_life = -np.log(2) / reg.coef_[0] 25 | 26 | return half_life 27 | 28 | 29 | def get_hurst_exponent(data: np.array, max_lags: int = 100) -> float: 30 | """ 31 | Hurst Exponent Calculation. 32 | 33 | :param data: (np.array) Time Series that is going to be analyzed. 34 | :param max_lags: (int) Maximum amount of lags to be used calculating tau. 35 | :return: (float) Hurst exponent. 36 | """ 37 | 38 | lags = range(2, max_lags) 39 | tau = [np.sqrt(np.std(np.subtract(data[lag:], data[:-lag]))) 40 | for lag in lags] 41 | poly = np.polyfit(np.log(lags), np.log(tau), 1) 42 | 43 | return poly[0] * 2.0 44 | -------------------------------------------------------------------------------- /arbitragelab/copula_approach/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements Copula-based Statistical Arbitrage tools. 3 | """ 4 | 5 | from arbitragelab.copula_approach.copula_calculation import ( 6 | find_marginal_cdf, sic, aic, hqic, construct_ecdf_lin, scad_penalty, 7 | scad_derivative, adjust_weights, to_quantile, fit_copula_to_empirical_data) 8 | from arbitragelab.copula_approach import archimedean 9 | from arbitragelab.copula_approach import elliptical 10 | from arbitragelab.copula_approach import mixed_copulas 11 | from arbitragelab.copula_approach.vine_copula_partner_selection import PartnerSelection 12 | from arbitragelab.copula_approach.vinecop_generate import (RVineCop, CVineCop) 13 | from arbitragelab.copula_approach.vinecop_strategy import CVineCopStrat 14 | -------------------------------------------------------------------------------- /arbitragelab/copula_approach/archimedean/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements Archimedean Copulas. 3 | """ 4 | 5 | from arbitragelab.copula_approach.archimedean.gumbel import Gumbel 6 | from arbitragelab.copula_approach.archimedean.clayton import Clayton 7 | from arbitragelab.copula_approach.archimedean.frank import Frank 8 | from arbitragelab.copula_approach.archimedean.joe import Joe 9 | from arbitragelab.copula_approach.archimedean.n13 import N13 10 | from arbitragelab.copula_approach.archimedean.n14 import N14 11 | -------------------------------------------------------------------------------- /arbitragelab/copula_approach/elliptical/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements Elliptical Copulas. 3 | """ 4 | 5 | from arbitragelab.copula_approach.elliptical.student import StudentCopula, fit_nu_for_t_copula 6 | from arbitragelab.copula_approach.elliptical.gaussian import GaussianCopula 7 | -------------------------------------------------------------------------------- /arbitragelab/copula_approach/mixed_copulas/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements Mixed Copulas. 3 | """ 4 | 5 | from arbitragelab.copula_approach.mixed_copulas.base import MixedCopula 6 | from arbitragelab.copula_approach.mixed_copulas.cfg_mix_copula import CFGMixCop 7 | from arbitragelab.copula_approach.mixed_copulas.ctg_mix_copula import CTGMixCop 8 | -------------------------------------------------------------------------------- /arbitragelab/distance_approach/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The module implements Distance-based Statistical Arbitrage strategies. 3 | """ 4 | from arbitragelab.distance_approach.basic_distance_approach import DistanceStrategy 5 | from arbitragelab.distance_approach.pearson_distance_approach import PearsonStrategy 6 | -------------------------------------------------------------------------------- /arbitragelab/hedge_ratios/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module which implements various hedge ratios calculations. 3 | """ 4 | 5 | from arbitragelab.hedge_ratios.linear import get_ols_hedge_ratio, get_tls_hedge_ratio 6 | from arbitragelab.hedge_ratios.half_life import get_minimum_hl_hedge_ratio 7 | from arbitragelab.hedge_ratios.johansen import get_johansen_hedge_ratio 8 | from arbitragelab.hedge_ratios.spread_construction import construct_spread 9 | from arbitragelab.hedge_ratios.box_tiao import get_box_tiao_hedge_ratio 10 | from arbitragelab.hedge_ratios.adf_optimal import get_adf_optimal_hedge_ratio 11 | -------------------------------------------------------------------------------- /arbitragelab/hedge_ratios/adf_optimal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of finding ADF optimal hedge ratio. 3 | """ 4 | # pylint: disable=invalid-name 5 | # pylint: disable=protected-access 6 | 7 | from typing import Tuple 8 | import warnings 9 | import pandas as pd 10 | import numpy as np 11 | from scipy.optimize import minimize 12 | 13 | from arbitragelab.cointegration_approach import EngleGrangerPortfolio 14 | 15 | 16 | def _min_adf_stat(beta: np.array, X: pd.DataFrame, y: pd.Series) -> float: 17 | """ 18 | Fitness function to minimize in ADF test statistic algorithm. 19 | 20 | :param beta: (np.array) Array of hedge ratios. 21 | :param X: (pd.DataFrame) DataFrame of dependent variables. We hold `beta` units of X assets. 22 | :param y: (pd.Series) Series of target variable. For this asset we hold 1 unit. 23 | :return: (float) Half-life of mean-reversion. 24 | """ 25 | 26 | # Performing Engle-Granger test on spread 27 | portfolio = EngleGrangerPortfolio() 28 | spread = y - (beta * X).sum(axis=1) 29 | portfolio.perform_eg_test(spread) 30 | 31 | return portfolio.adf_statistics.loc['statistic_value'].iloc[0] 32 | 33 | 34 | def get_adf_optimal_hedge_ratio(price_data: pd.DataFrame, dependent_variable: str) -> \ 35 | Tuple[dict, pd.DataFrame, pd.Series, pd.Series, object]: 36 | """ 37 | Get hedge ratio by minimizing ADF test statistic. 38 | 39 | :param price_data: (pd.DataFrame) DataFrame with security prices. 40 | :param dependent_variable: (str) Column name which represents the dependent variable (y). 41 | :return: (Tuple) Hedge ratios, X, and y, OLS fit residuals and optimization object. 42 | """ 43 | 44 | X = price_data.copy() 45 | X.drop(columns=dependent_variable, axis=1, inplace=True) 46 | 47 | y = price_data[dependent_variable].copy() 48 | initial_guess = (y[0] / X).mean().values 49 | result = minimize(_min_adf_stat, x0=initial_guess, method='BFGS', tol=1e-5, args=(X, y)) 50 | residuals = y - (result.x * X).sum(axis=1) 51 | 52 | hedge_ratios = result.x 53 | hedge_ratios_dict = dict(zip([dependent_variable] + X.columns.tolist(), np.insert(hedge_ratios, 0, 1.0))) 54 | if result.status != 0: 55 | warnings.warn('Optimization failed to converge. Please check output hedge ratio! The result can be unstable!') 56 | 57 | return hedge_ratios_dict, X, y, residuals, result 58 | -------------------------------------------------------------------------------- /arbitragelab/hedge_ratios/box_tiao.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hedge ratio estimation using Box-Tiao canonical decomposition on the assets dataframe. 3 | """ 4 | # pylint: disable=invalid-name 5 | 6 | from typing import Tuple 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import statsmodels.api as sm 11 | 12 | from arbitragelab.hedge_ratios.spread_construction import construct_spread 13 | 14 | 15 | def _least_square_VAR_fit(demeaned_price_data: pd.DataFrame) -> np.array: 16 | """ 17 | Calculate the least square estimate of the VAR(1) matrix. 18 | 19 | :param demeaned_price_data: (pd.DataFrame) Demeaned price data. 20 | :return: (np.array) Least square estimate of VAR(1) matrix. 21 | """ 22 | 23 | # Fit VAR(1) model 24 | var_model = sm.tsa.VAR(demeaned_price_data) 25 | 26 | # The statsmodels package will give the least square estimate 27 | least_sq_est = np.squeeze(var_model.fit(1).coefs, axis=0) 28 | 29 | return least_sq_est 30 | 31 | 32 | def get_box_tiao_hedge_ratio(price_data: pd.DataFrame, dependent_variable: str) -> \ 33 | Tuple[dict, pd.DataFrame, None, pd.Series]: 34 | """ 35 | Perform Box-Tiao canonical decomposition on the assets dataframe. 36 | 37 | The resulting ratios are the weightings of each asset in the portfolio. There are N decompositions for N assets, 38 | where each column vector corresponds to one portfolio. The order of the weightings corresponds to the 39 | descending order of the eigenvalues. 40 | 41 | :param price_data: (pd.DataFrame) DataFrame with security prices. 42 | :param dependent_variable: (str) Column name which represents the dependent variable (y). 43 | :return: (Tuple) Hedge ratios, X, and fit residuals. 44 | """ 45 | 46 | X = price_data.copy() 47 | X = X[[dependent_variable] + [x for x in X.columns if x != dependent_variable]] 48 | 49 | demeaned = X - X.mean() # Subtract mean columns 50 | 51 | # Calculate the least square estimate of the price with VAR(1) model 52 | least_sq_est = _least_square_VAR_fit(demeaned) 53 | 54 | # Construct the matrix from which the eigenvectors need to be computed 55 | covar = demeaned.cov() 56 | box_tiao_matrix = np.linalg.inv(covar) @ least_sq_est @ covar @ least_sq_est.T 57 | 58 | # Calculate the eigenvectors and sort by eigenvalue 59 | eigvals, eigvecs = np.linalg.eig(box_tiao_matrix) 60 | 61 | # Sort the eigenvectors by eigenvalues by descending order 62 | bt_eigvecs = eigvecs[:, np.argsort(eigvals)[::-1]] 63 | hedge_ratios = dict(zip(X.columns, bt_eigvecs[:, -1])) 64 | 65 | # Convert to a format expected by `construct_spread` function and normalize such that dependent has a hedge ratio 1 66 | for ticker, h in hedge_ratios.items(): 67 | if ticker != dependent_variable: 68 | hedge_ratios[ticker] = -h / hedge_ratios[dependent_variable] 69 | hedge_ratios[dependent_variable] = 1.0 70 | 71 | residuals = construct_spread(price_data, hedge_ratios=hedge_ratios, dependent_variable=dependent_variable) 72 | 73 | # Return the weights 74 | return hedge_ratios, X, None, residuals 75 | -------------------------------------------------------------------------------- /arbitragelab/hedge_ratios/half_life.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module which implements Minimum Half-Life Hedge Ratio detection algorithm. 3 | """ 4 | # pylint: disable=invalid-name 5 | 6 | from typing import Tuple 7 | import warnings 8 | import pandas as pd 9 | import numpy as np 10 | from scipy.optimize import minimize 11 | 12 | from arbitragelab.cointegration_approach.utils import get_half_life_of_mean_reversion 13 | 14 | 15 | def _min_hl_function(beta: np.array, X: pd.DataFrame, y: pd.Series) -> float: 16 | """ 17 | Fitness function to minimize in Minimum Half-Life Hedge Ratio algorithm. 18 | 19 | :param beta: (np.array) Array of hedge ratios. 20 | :param X: (pd.DataFrame) DataFrame of dependent variables. We hold `beta` units of X assets. 21 | :param y: (pd.Series) Series of target variable. For this asset we hold 1 unit. 22 | :return: (float) Half-life of mean-reversion. 23 | """ 24 | 25 | spread = y - (beta * X).sum(axis=1) 26 | 27 | return abs(get_half_life_of_mean_reversion(spread)) 28 | 29 | 30 | def get_minimum_hl_hedge_ratio(price_data: pd.DataFrame, dependent_variable: str) -> \ 31 | Tuple[dict, pd.DataFrame, pd.Series, pd.Series, object]: 32 | """ 33 | Get hedge ratio by minimizing spread half-life of mean reversion. 34 | 35 | :param price_data: (pd.DataFrame) DataFrame with security prices. 36 | :param dependent_variable: (str) Column name which represents the dependent variable (y). 37 | :return: (Tuple) Hedge ratios, X, and y, OLS fit residuals and optimization object. 38 | """ 39 | 40 | X = price_data.copy() 41 | X.drop(columns=dependent_variable, axis=1, inplace=True) 42 | 43 | y = price_data[dependent_variable].copy() 44 | initial_guess = (y[0] / X).mean().values 45 | result = minimize(_min_hl_function, x0=initial_guess, method='BFGS', tol=1e-5, args=(X, y)) 46 | residuals = y - (result.x * X).sum(axis=1) 47 | 48 | hedge_ratios = result.x 49 | hedge_ratios_dict = dict(zip([dependent_variable] + X.columns.tolist(), np.insert(hedge_ratios, 0, 1.0))) 50 | if result.status != 0: 51 | warnings.warn('Optimization failed to converge. Please check output hedge ratio! The result can be unstable!') 52 | 53 | return hedge_ratios_dict, X, y, residuals, result 54 | -------------------------------------------------------------------------------- /arbitragelab/hedge_ratios/johansen.py: -------------------------------------------------------------------------------- 1 | """ 2 | Johansen hedge ratio calculation. 3 | """ 4 | # pylint: disable=invalid-name 5 | 6 | from typing import Tuple 7 | 8 | import pandas as pd 9 | 10 | from arbitragelab.cointegration_approach import JohansenPortfolio 11 | from arbitragelab.hedge_ratios.spread_construction import construct_spread 12 | 13 | 14 | def get_johansen_hedge_ratio(price_data: pd.DataFrame, dependent_variable: str) -> Tuple[ 15 | dict, pd.DataFrame, pd.Series, pd.Series]: 16 | """ 17 | Get hedge ratio from Johansen test eigenvector. 18 | 19 | :param price_data: (pd.DataFrame) DataFrame with security prices. 20 | :param dependent_variable: (str) Column name which represents the dependent variable (y). 21 | :return: (Tuple) Hedge ratios, X, and y and OLS fit residuals. 22 | """ 23 | 24 | # Construct a Johansen portfolio 25 | port = JohansenPortfolio() 26 | port.fit(price_data, dependent_variable) 27 | 28 | X = price_data.copy() 29 | X.drop(columns=dependent_variable, axis=1, inplace=True) 30 | 31 | y = price_data[dependent_variable].copy() 32 | 33 | # Convert to a format expected by `construct_spread` function and normalize such that dependent has a hedge ratio 1. 34 | hedge_ratios = port.hedge_ratios.iloc[0].to_dict() 35 | 36 | residuals = construct_spread(price_data, hedge_ratios=hedge_ratios, dependent_variable=dependent_variable) 37 | 38 | # Normalize Johansen cointegration vectors such that dependent variable has a hedge ratio of 1. 39 | return hedge_ratios, X, y, residuals 40 | -------------------------------------------------------------------------------- /arbitragelab/hedge_ratios/linear.py: -------------------------------------------------------------------------------- 1 | """ 2 | The module implements OLS (Ordinary Least Squares) and TLS (Total Least Squares) hedge ratio calculations. 3 | """ 4 | # pylint: disable=invalid-name 5 | 6 | from typing import Tuple 7 | import pandas as pd 8 | import numpy as np 9 | from sklearn.linear_model import LinearRegression 10 | from scipy.odr import ODR, Model, RealData 11 | 12 | 13 | def get_ols_hedge_ratio(price_data: pd.DataFrame, dependent_variable: str, add_constant: bool = False) -> \ 14 | Tuple[dict, pd.DataFrame, pd.Series, pd.Series]: 15 | """ 16 | Get OLS hedge ratio: y = beta*X. 17 | 18 | :param price_data: (pd.DataFrame) Data Frame with security prices. 19 | :param dependent_variable: (str) Column name which represents the dependent variable (y). 20 | :param add_constant: (bool) Boolean flag to add constant in regression setting. 21 | :return: (Tuple) Hedge ratios, X, and y and OLS fit residuals. 22 | """ 23 | 24 | ols_model = LinearRegression(fit_intercept=add_constant) 25 | 26 | X = price_data.copy() 27 | X.drop(columns=dependent_variable, axis=1, inplace=True) 28 | exogenous_variables = X.columns.tolist() 29 | if X.shape[1] == 1: 30 | X = X.values.reshape(-1, 1) 31 | 32 | y = price_data[dependent_variable].copy() 33 | 34 | ols_model.fit(X, y) 35 | residuals = y - ols_model.predict(X) 36 | 37 | hedge_ratios = ols_model.coef_ 38 | hedge_ratios_dict = dict(zip([dependent_variable] + exogenous_variables, np.insert(hedge_ratios, 0, 1.0))) 39 | 40 | return hedge_ratios_dict, X, y, residuals 41 | 42 | 43 | def _linear_f_no_constant(beta: np.array, x_variable: np.array) -> np.array: 44 | """ 45 | This is the helper linear model that is used in the Orthogonal Regression. 46 | 47 | :param beta: (np.array) Model beta coefficient. 48 | :param x_variable: (np.array) Model X vector. 49 | :return: (np.array) Vector result of equation calculation. 50 | """ 51 | 52 | _, b = beta[0], beta[1:] 53 | b.shape = (b.shape[0], 1) 54 | 55 | return (x_variable * b).sum(axis=0) 56 | 57 | 58 | def _linear_f_constant(beta: np.array, x_variable: np.array) -> np.array: 59 | """ 60 | This is the helper linear model that is used in the Orthogonal Regression. 61 | 62 | :param beta: (np.array) Model beta coefficient. 63 | :param x_variable: (np.array) Model X vector. 64 | :return: (np.array) Vector result of equation calculation. 65 | """ 66 | 67 | a, b = beta[0], beta[1:] 68 | b.shape = (b.shape[0], 1) 69 | 70 | return a + (x_variable * b).sum(axis=0) 71 | 72 | 73 | def get_tls_hedge_ratio(price_data: pd.DataFrame, dependent_variable: str, add_constant: bool = False) -> \ 74 | Tuple[dict, pd.DataFrame, pd.Series, pd.Series]: 75 | """ 76 | Get Total Least Squares (TLS) hedge ratio using Orthogonal Regression. 77 | 78 | :param price_data: (pd.DataFrame) Data Frame with security prices. 79 | :param dependent_variable: (str) Column name which represents the dependent variable (y). 80 | :param add_constant: (bool) Boolean flag to add constant in regression setting. 81 | :return: (Tuple) Hedge ratios dict, X, and y and fit residuals. 82 | """ 83 | 84 | X = price_data.copy() 85 | X.drop(columns=dependent_variable, axis=1, inplace=True) 86 | y = price_data[dependent_variable].copy() 87 | 88 | linear = Model(_linear_f_constant) if add_constant is True else Model(_linear_f_no_constant) 89 | mydata = RealData(X.T, y) 90 | myodr = ODR(mydata, linear, beta0=np.ones(X.shape[1] + 1)) 91 | res_co = myodr.run() 92 | 93 | hedge_ratios = res_co.beta[1:] # We don't need constant 94 | residuals = y - res_co.beta[0] - (X * hedge_ratios).sum(axis=1) if add_constant is True else y - ( 95 | X * hedge_ratios).sum(axis=1) 96 | hedge_ratios_dict = dict(zip([dependent_variable] + X.columns.tolist(), np.insert(hedge_ratios, 0, 1.0))) 97 | 98 | return hedge_ratios_dict, X, y, residuals 99 | -------------------------------------------------------------------------------- /arbitragelab/hedge_ratios/spread_construction.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions used to construct spreads. 3 | """ 4 | 5 | import pandas as pd 6 | 7 | 8 | def construct_spread(price_data: pd.DataFrame, hedge_ratios: pd.Series, dependent_variable: str = None) -> pd.Series: 9 | """ 10 | Construct spread from `price_data` and `hedge_ratios`. If a user sets `dependent_variable` it means that a 11 | spread will be: 12 | 13 | hedge_ratio_dependent_variable * dependent_variable - sum(hedge_ratios * other variables). 14 | Otherwise, spread is: hedge_ratio_0 * variable_0 - sum(hedge ratios * variables[1:]). 15 | 16 | :param price_data: (pd.DataFrame) Asset prices data frame. 17 | :param hedge_ratios: (pd.Series) Hedge ratios series (index-tickers, values-hedge ratios). 18 | :param dependent_variable: (str) Dependent variable to use. Set None for dependent variable being equal to 0 column. 19 | :return: (pd.Series) Spread series. 20 | """ 21 | 22 | weighted_prices = price_data * hedge_ratios # price * hedge 23 | 24 | if dependent_variable is not None: 25 | non_dependent_variables = [x for x in weighted_prices.columns if x != dependent_variable] 26 | return weighted_prices[dependent_variable] - weighted_prices[non_dependent_variables].sum(axis=1) 27 | 28 | return weighted_prices[weighted_prices.columns[0]] - weighted_prices[weighted_prices.columns[1:]].sum(axis=1) 29 | -------------------------------------------------------------------------------- /arbitragelab/ml_approach/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module houses the ML Based Approaches. 3 | """ 4 | 5 | from arbitragelab.ml_approach.optics_dbscan_pairs_clustering import OPTICSDBSCANPairsClustering 6 | from arbitragelab.ml_approach.tar import TAR 7 | from arbitragelab.ml_approach.feature_expander import FeatureExpander 8 | from arbitragelab.ml_approach.regressor_committee import RegressorCommittee 9 | from arbitragelab.ml_approach.filters import ThresholdFilter, CorrelationFilter, VolatilityFilter 10 | from arbitragelab.ml_approach.neural_networks import MultiLayerPerceptron, RecurrentNeuralNetwork, PiSigmaNeuralNetwork 11 | -------------------------------------------------------------------------------- /arbitragelab/ml_approach/tar.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements the TAR model by (Enders and Granger 1998). 3 | """ 4 | 5 | import pandas as pd 6 | import statsmodels.api as sm 7 | from statsmodels.regression.linear_model import RegressionResults 8 | 9 | 10 | class TAR(): 11 | """ 12 | The Threshold AutoRegressive Model is an extension provided by Enders and 13 | Granger to the standard Dicker-Fuller Test. It considers the upside and 14 | downside moves separately, thus allowing for the possibility of asymmetric adjustment. 15 | """ 16 | 17 | def __init__(self, price_data: pd.DataFrame): 18 | """ 19 | Init function. 20 | 21 | :param price_data: (pd.DataFrame) Collection of time series to 22 | construct to spread from. 23 | """ 24 | 25 | self.spread = price_data 26 | self.results = None 27 | 28 | @staticmethod 29 | def _tag_regime(series: pd.Series) -> pd.DataFrame: 30 | """ 31 | Tags up/down swings in different vectors. 32 | 33 | :param series: (pd.Series) Time series to tag. 34 | :return: (pd.DataFrame) Original series with two new columns 35 | with values [0,1] indicating down/up swings. 36 | """ 37 | 38 | tagged_df = series.copy().to_frame() 39 | tagged_df.columns = ['y_{t-1}'] 40 | tagged_df['I_{1}'] = 0 41 | tagged_df['I_{0}'] = 0 42 | tagged_df.loc[tagged_df['y_{t-1}'] >= 0, 'I_{1}'] = 1 43 | tagged_df.loc[tagged_df['y_{t-1}'] < 0, 'I_{0}'] = 1 44 | 45 | return tagged_df.dropna() 46 | 47 | def fit(self) -> RegressionResults: 48 | """ 49 | Fits the OLS model. 50 | 51 | :return: (RegressionResults) 52 | """ 53 | 54 | # Convert price spread into returns and lag by 1 period. 55 | jspread = pd.DataFrame(self.spread.values) 56 | jspread.columns = ['spread'] 57 | jspread['rets'] = jspread['spread'] 58 | jspread['rets'] = jspread['rets'].diff() 59 | jspread['spread_lag1'] = jspread['spread'].shift(1) 60 | jspread.dropna(inplace=True) 61 | 62 | returns = jspread['rets'] 63 | 64 | lagged_spread = jspread['spread_lag1'] 65 | 66 | # Get up/down swings tagged as boolean masks. 67 | tagged_spread = self._tag_regime(lagged_spread) 68 | 69 | # Multiply the lagged returns with the corresponding masks. 70 | regime_one = tagged_spread['y_{t-1}'] * tagged_spread['I_{1}'] 71 | regime_two = tagged_spread['y_{t-1}'] * tagged_spread['I_{0}'] 72 | 73 | regime_tagged_spread = pd.concat([regime_one, regime_two], axis=1) 74 | 75 | regime_tagged_spread.columns = ['p_1', 'p_2'] 76 | 77 | model = sm.OLS(returns.values, regime_tagged_spread) 78 | results = model.fit() 79 | self.results = results 80 | 81 | return results 82 | 83 | def summary(self) -> pd.DataFrame: 84 | """ 85 | Returns summary as in paper. Uses the Wald Test to check for 86 | significance of the following hypotheses; 87 | - p_1 = 0 88 | - p_2 = 0 89 | - p_1 = p_2 90 | 91 | :return: (pd.DataFrame) Summary of results. 92 | """ 93 | 94 | coefficient_1 = self.results.params.loc['p_1'] 95 | pvalue_1 = self.results.wald_test('p_1 = 0').pvalue 96 | 97 | coefficient_2 = self.results.params.loc['p_2'] 98 | pvalue_2 = self.results.wald_test('p_2 = 0').pvalue 99 | 100 | equiv_fvalue = self.results.wald_test('p_1 = p_2').fvalue 101 | equiv_pvalue = self.results.wald_test('p_1 = p_2').pvalue 102 | 103 | tuple_frame = [(coefficient_1, None, pvalue_1), 104 | (coefficient_2, None, pvalue_2), 105 | (None, equiv_fvalue[0][0], equiv_pvalue)] 106 | 107 | result_frame = pd.DataFrame(tuple_frame).T 108 | result_frame.columns = ['p_1', 'p_2', 'p_1 = p_2'] 109 | result_frame['index'] = ['Coefficient', 'F-stat', 'p-value'] 110 | result_frame.set_index('index', inplace=True) 111 | 112 | return result_frame.astype(float) 113 | -------------------------------------------------------------------------------- /arbitragelab/optimal_mean_reversion/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes derived from Optimal Mean Reversion module. 3 | """ 4 | 5 | from arbitragelab.optimal_mean_reversion.ou_model import OrnsteinUhlenbeck 6 | from arbitragelab.optimal_mean_reversion.xou_model import ExponentialOrnsteinUhlenbeck 7 | from arbitragelab.optimal_mean_reversion.cir_model import CoxIngersollRoss 8 | from arbitragelab.optimal_mean_reversion.heat_potentials import HeatPotentials 9 | -------------------------------------------------------------------------------- /arbitragelab/other_approaches/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements other Statistical Arbitrage strategies. 3 | """ 4 | from arbitragelab.other_approaches.kalman_filter import KalmanFilterStrategy 5 | from arbitragelab.other_approaches.pca_approach import PCAStrategy 6 | -------------------------------------------------------------------------------- /arbitragelab/spread_selection/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class which implements various functions used to filter-out cointegrated pairs. 3 | """ 4 | 5 | from arbitragelab.spread_selection.cointegration import CointegrationSpreadSelector 6 | -------------------------------------------------------------------------------- /arbitragelab/spread_selection/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Abstract pair selector class. 3 | """ 4 | # pylint: disable=consider-using-f-string 5 | from abc import ABC 6 | from abc import abstractmethod 7 | 8 | import sys 9 | 10 | 11 | class AbstractPairsSelector(ABC): 12 | """ 13 | This is an abstract class for pairs selectors objects. 14 | It has abstract method select_pairs(), which needs to be implemented. 15 | """ 16 | 17 | @abstractmethod 18 | def select_spreads(self): 19 | """ 20 | Method which selects pairs based on some predefined criteria. 21 | """ 22 | 23 | raise NotImplementedError('Must implement select_pairs() method.') 24 | 25 | @staticmethod 26 | def _print_progress(iteration, max_iterations, prefix='', suffix='', decimals=1, bar_length=50): 27 | # pylint: disable=expression-not-assigned 28 | """ 29 | Calls in a loop to create a terminal progress bar. 30 | https://gist.github.com/aubricus/f91fb55dc6ba5557fbab06119420dd6a 31 | 32 | :param iteration: (int) Current iteration. 33 | :param max_iterations: (int) Maximum number of iterations. 34 | :param prefix: (str) Prefix string. 35 | :param suffix: (str) Suffix string. 36 | :param decimals: (int) Positive number of decimals in percent completed. 37 | :param bar_length: (int) Character length of the bar. 38 | """ 39 | 40 | str_format = "{0:." + str(decimals) + "f}" 41 | # Calculate the percent completed. 42 | percents = str_format.format(100 * (iteration / float(max_iterations))) 43 | # Calculate the length of bar. 44 | filled_length = int(round(bar_length * iteration / float(max_iterations))) 45 | # Fill the bar. 46 | block = '█' * filled_length + '-' * (bar_length - filled_length) 47 | # Print new line. 48 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, block, percents, '%', suffix)), 49 | 50 | if iteration == max_iterations: 51 | sys.stdout.write('\n') 52 | sys.stdout.flush() 53 | -------------------------------------------------------------------------------- /arbitragelab/stochastic_control_approach/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements Stochastic Control Approach based Statistical Arbitrage strategies. 3 | """ 4 | 5 | from arbitragelab.stochastic_control_approach.ou_model_jurek import OUModelJurek 6 | from arbitragelab.stochastic_control_approach.ou_model_mudchanatongsuk import OUModelMudchanatongsuk 7 | from arbitragelab.stochastic_control_approach.optimal_convergence import OptimalConvergence 8 | -------------------------------------------------------------------------------- /arbitragelab/tearsheet/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes derived from ArbitrageLab TearSheet module. 3 | """ 4 | 5 | from arbitragelab.tearsheet.tearsheet import TearSheet 6 | -------------------------------------------------------------------------------- /arbitragelab/time_series_approach/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements Time Series-based Statistical Arbitrage strategies. 3 | """ 4 | 5 | from arbitragelab.time_series_approach.arima_predict import AutoARIMAForecast, get_trend_order 6 | from arbitragelab.time_series_approach.quantile_time_series import QuantileTimeSeriesTradingStrategy 7 | from arbitragelab.time_series_approach.ou_optimal_threshold_bertram import OUModelOptimalThresholdBertram 8 | from arbitragelab.time_series_approach.ou_optimal_threshold_zeng import OUModelOptimalThresholdZeng 9 | -------------------------------------------------------------------------------- /arbitragelab/time_series_approach/quantile_time_series.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements the quantile time series approach described in 3 | `"A Machine Learning based Pairs Trading Investment Strategy" `__ 4 | (pages 37-43) by Simão Moraes Sarmento and Nuno Horta. 5 | """ 6 | # pylint: disable=consider-using-f-string 7 | 8 | import pandas as pd 9 | import seaborn as sns 10 | import matplotlib.pyplot as plt 11 | 12 | 13 | class QuantileTimeSeriesTradingStrategy: 14 | """ 15 | The strategy which implements a quantile-based time series approach in mean-reversion trading. First, we define 16 | top quantile of positive spread (`y`) differences and bottom quantile of negative spread differences. 17 | Secondly, we use time series spread prediction `y_hat` (it can be user-specified prediction, ARIMA, ANN, RNN, etc.) 18 | We enter a position if y_hat - y <= bottom quantile or y_hat - y >= top quantile. 19 | 20 | This strategy is described in `"A Machine Learning based Pairs Trading Investment Strategy" `__ 21 | (pages 37-43) by Simão Moraes Sarmento and Nuno Horta. 22 | """ 23 | 24 | def __init__(self, long_quantile: float = 0.9, short_quantile: float = 0.1): 25 | """ 26 | Class constructor. 27 | 28 | :param long_quantile: (float) Positive spread differences quantile used as long entry threshold. 29 | :param short_quantile: (float) Negative spread differences quantile used as short entry threshold. 30 | """ 31 | 32 | self.long_quantile = long_quantile 33 | self.short_quantile = short_quantile 34 | self.long_diff_threshold = None 35 | self.short_diff_threshold = None 36 | 37 | self.positive_differences = None 38 | self.negative_differences = None 39 | 40 | self.positions = [] # Positions (-1, 0, 1) logs 41 | 42 | def fit_thresholds(self, spread_series: pd.Series): 43 | """ 44 | Define quantile-based long/short difference thresholds from spread series. 45 | 46 | :param spread_series: (pd.Series) Spread series used to fit thresholds. 47 | """ 48 | 49 | differences = spread_series.diff() 50 | self.positive_differences = differences[differences > 0] 51 | self.negative_differences = differences[differences < 0] 52 | 53 | self.long_diff_threshold = self.positive_differences.quantile(self.long_quantile) 54 | self.short_diff_threshold = self.negative_differences.quantile(self.short_quantile) 55 | 56 | def plot_thresholds(self): 57 | """ 58 | Plot KDE-plots of positive and negative differences vs long/short thresholds. 59 | 60 | :return: (plt.axes) The KDE plot. 61 | """ 62 | 63 | _, axes = plt.subplots(1, 2, figsize=(10, 8), sharey=True) 64 | 65 | # Positive differences plot 66 | sns.kdeplot(self.positive_differences, fill=True, color="green", ax=axes[0], 67 | label='threshold: {}'.format(self.long_diff_threshold.round(4))) 68 | axes[0].axvline(self.long_diff_threshold, linestyle='--', color='black') 69 | axes[0].set_title('Positive differences KDE') 70 | 71 | # Negative differences plot 72 | sns.kdeplot(self.negative_differences, fill=True, color="red", ax=axes[1], 73 | label='threshold: {}'.format(self.short_diff_threshold.round(4))) 74 | axes[1].axvline(self.short_diff_threshold, linestyle='--', color='black') 75 | axes[1].set_title('Negative differences KDE') 76 | 77 | return axes 78 | 79 | def get_allocation(self, predicted_difference: float, exit_threshold: float = 0) -> int: 80 | """ 81 | Get target allocation (-1, 0, 1) based on current spread value, predicted value, and exit threshold. -1/1 means 82 | either to open a new short/long position or stay in a long/short trade (if the position has been already opened). 83 | 0 means exit the position. 84 | 85 | :param predicted_difference: (float) Spread predicted value - current spread value 86 | :param exit_threshold: (float) Difference between predicted and current value threshold to close the trade. 87 | :return: (int) Trade signal: -1 (short), 0 (exit current position/stay in cash), 1(long). 88 | """ 89 | 90 | # New position entry 91 | if predicted_difference >= self.long_diff_threshold: 92 | return_flag = 1 93 | elif predicted_difference <= self.short_diff_threshold: 94 | return_flag = -1 95 | elif len(self.positions) > 0 and self.positions[-1] == 1 and predicted_difference > exit_threshold: 96 | return_flag = 1 97 | elif len(self.positions) > 0 and self.positions[-1] == -1 and predicted_difference <= exit_threshold: 98 | return_flag = -1 99 | else: 100 | return_flag = 0 101 | 102 | self.positions.append(return_flag) 103 | 104 | return return_flag 105 | -------------------------------------------------------------------------------- /arbitragelab/trading/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various trading rules implementation. 3 | """ 4 | 5 | from arbitragelab.trading.basic_copula import BasicCopulaTradingRule 6 | from arbitragelab.trading.copula_strategy_mpi import MPICopulaTradingRule 7 | from arbitragelab.trading.minimum_profit import MinimumProfitTradingRule 8 | from arbitragelab.trading.multi_coint import MultivariateCointegrationTradingRule 9 | from arbitragelab.trading.z_score import BollingerBandsTradingRule 10 | -------------------------------------------------------------------------------- /arbitragelab/util/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions. 3 | """ 4 | 5 | from arbitragelab.util.data_importer import DataImporter 6 | from arbitragelab.util.indexed_highlight import IndexedHighlight 7 | from arbitragelab.util.generate_dataset import get_classification_data 8 | from arbitragelab.util.spread_modeling_helper import SpreadModelingHelper 9 | from arbitragelab.util.rollers import BaseFuturesRoller, CrudeOilFutureRoller, NBPFutureRoller, RBFutureRoller 10 | -------------------------------------------------------------------------------- /arbitragelab/util/generate_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module generates synthetic classification dataset of INFORMED, REDUNDANT, and NOISE explanatory 3 | variables based on the book Machine Learning for Asset Manager (code snippet 6.1) 4 | """ 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from sklearn.datasets import make_classification 9 | 10 | 11 | # pylint: disable=invalid-name 12 | def get_classification_data(n_features=100, n_informative=25, n_redundant=25, n_samples=10000, random_state=0, sigma=.0): 13 | """ 14 | A function to generate synthetic classification data sets. 15 | 16 | :param n_features: (int) Total number of features to be generated (i.e. informative + redundant + noisy). 17 | :param n_informative: (int) Number of informative features. 18 | :param n_redundant: (int) Number of redundant features. 19 | :param n_samples: (int) Number of samples (rows) to be generate. 20 | :param random_state: (int) Random seed. 21 | :param sigma: (float) This argument is used to introduce substitution effect to the redundant features in 22 | the dataset by adding gaussian noise. The lower the value of sigma, the greater the 23 | substitution effect. 24 | :return: (pd.DataFrame, pd.Series) X and y as features and labels respectively. 25 | """ 26 | np.random.seed(random_state) 27 | X, y = make_classification(n_samples=n_samples, n_features=n_features-n_redundant, n_informative=n_informative, 28 | n_redundant=0, shuffle=False, random_state=random_state) 29 | cols = ['I_'+str(i) for i in range(n_informative)] 30 | cols += ['N_'+str(i) for i in range(n_features-n_informative-n_redundant)] 31 | X, y = pd.DataFrame(X, columns=cols), pd.Series(y) 32 | i = np.random.choice(range(n_informative), size=n_redundant) 33 | for k, j in enumerate(i): 34 | X['R_'+str(k)] = X['I_'+str(j)]+np.random.normal(size=X.shape[0])*sigma 35 | return X, y 36 | -------------------------------------------------------------------------------- /arbitragelab/util/indexed_highlight.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module houses the extension HighlightingDataCursor class to support cluster 3 | by cluster highlighting. 4 | """ 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | # Temporary solution, current version 0.7.1 is not compatible with matplotlib>=3.3.0 9 | from arbitragelab.util.data_cursor import DataCursor, HighlightingDataCursor 10 | 11 | 12 | class IndexedHighlight(HighlightingDataCursor): 13 | """ 14 | This class extends HighlightingDataCursor to add support for 15 | highlighting of cluster groups. 16 | """ 17 | 18 | def __init__(self, axes, **kwargs): 19 | """ 20 | Initializes the highlighting object for each AxesSubplot in a plot. 21 | """ 22 | artists = axes 23 | 24 | kwargs['display'] = 'single' 25 | HighlightingDataCursor.__init__(self, artists, **kwargs) 26 | self.highlights = [self.create_highlight(artist) for artist in artists] 27 | plt.setp(self.highlights, visible=False) 28 | 29 | def update(self, event, annotation): 30 | """ 31 | On each update event, this method will loop through all SubPlot objects 32 | and the group of points corresponding to the current selected object 33 | will be highlighted. 34 | """ 35 | 36 | # Hide all other annotations 37 | plt.setp(self.highlights, visible=False) 38 | 39 | for i, artst in enumerate(self.artists): 40 | if event.artist is artst: 41 | self.highlights[i].set(visible=True) 42 | 43 | DataCursor.update(self, event, annotation) 44 | -------------------------------------------------------------------------------- /coverage: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "----Running Code Coverage----" 4 | 5 | # Remove multiprocessing coverage files in case a previous combine wasn't performed 6 | rm -fR cover/ 7 | # Remove the main coverage file (.coverage) 8 | . venv/bin/activate 9 | coverage erase 10 | 11 | # Discover and run all tests, check unit tests results 12 | . venv/bin/activate 13 | coverage run --concurrency=multiprocessing -m pytest tests/ 14 | res_test=$? 15 | if [ $res_test -ne 0 ] 16 | then 17 | echo -e "Build FAILURE: Unit tests failed" 18 | exit 1 19 | fi 20 | 21 | # Check coverage results 22 | . venv/bin/activate 23 | coverage combine 24 | res_combine=$? 25 | if [ $res_combine -ne 0 ] 26 | then 27 | echo -e "Build FAILURE: Coverage combine failed" 28 | exit 1 29 | fi 30 | 31 | . venv/bin/activate 32 | coverage report --fail-under=100 33 | coverage_report=$? 34 | if [ $coverage_report -ne 0 ] 35 | then 36 | echo -e "Build FAILURE: Coverage percentage failed" 37 | exit 1 38 | fi 39 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/_static/.gitkeep -------------------------------------------------------------------------------- /docs/source/_static/favicon_arbitragelab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/_static/favicon_arbitragelab.png -------------------------------------------------------------------------------- /docs/source/_static/ht_logo_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/_static/ht_logo_black.png -------------------------------------------------------------------------------- /docs/source/_static/ht_logo_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/_static/ht_logo_white.png -------------------------------------------------------------------------------- /docs/source/_static/logo_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/_static/logo_black.png -------------------------------------------------------------------------------- /docs/source/_static/logo_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/_static/logo_white.png -------------------------------------------------------------------------------- /docs/source/_templates/breadcrumbs.html: -------------------------------------------------------------------------------- 1 | {%- extends "sphinx_rtd_theme/breadcrumbs.html" %} 2 | 3 | {% block breadcrumbs_aside %} 4 | {% endblock %} -------------------------------------------------------------------------------- /docs/source/additional_information/license.rst: -------------------------------------------------------------------------------- 1 | .. _additional_information-license: 2 | 3 | ======= 4 | License 5 | ======= 6 | 7 | BSD 3-Clause License 8 | 9 | Copyright (c) 2007-2024 The Hudson and Thames Quantitative Research developers. 10 | All rights reserved. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are met: 14 | 15 | * Redistributions of source code must retain the above copyright notice, this 16 | list of conditions and the following disclaimer. 17 | 18 | * Redistributions in binary form must reproduce the above copyright notice, 19 | this list of conditions and the following disclaimer in the documentation 20 | and/or other materials provided with the distribution. 21 | 22 | * Neither the name of the copyright holder nor the names of its 23 | contributors may be used to endorse or promote products derived from 24 | this software without specific prior written permission. 25 | 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 29 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 30 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 33 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 | -------------------------------------------------------------------------------- /docs/source/codependence/codependence_matrix.rst: -------------------------------------------------------------------------------- 1 | .. _codependence-codependence_matrix: 2 | 3 | =================== 4 | Codependence Matrix 5 | =================== 6 | 7 | .. raw:: html 8 | 9 |
16 | 17 | 26 |
27 |
28 | 29 | | 30 | 31 | The functions in this part of the module are used to generate dependence and distance matrices using the codependency and 32 | distance metrics described previously. 33 | 34 | 1. **Dependence Matrix** function is used to compute codependences between elements in a given dataframe of elements 35 | using various codependence metrics like Mutual Information, Variation of Information, Distance Correlation, 36 | Spearman's Rho, GPR distance, and GNPR distance. 37 | 38 | 2. **Distance Matrix** function can be used to compute a distance matrix from a given codependency matrix using 39 | distance metrics like angular, squared angular and absolute angular. 40 | 41 | .. Note:: 42 | **Underlying Literature** 43 | 44 | The following sources elaborate extensively on the topic: 45 | 46 | - `Codependence (Presentation Slides) `__ *by* Marcos Lopez de Prado. 47 | 48 | Implementation 49 | ############## 50 | 51 | .. py:currentmodule:: arbitragelab.codependence.codependence_matrix 52 | .. autofunction:: get_dependence_matrix 53 | .. autofunction:: get_distance_matrix 54 | 55 | 56 | Example 57 | ####### 58 | 59 | .. code-block:: 60 | 61 | import pandas as pd 62 | from arbitragelab.codependence import (get_dependence_matrix, get_distance_matrix) 63 | 64 | # Import dataframe of returns for assets in a portfolio 65 | asset_returns = pd.read_csv(DATA_PATH, index_col='Date', parse_dates=True) 66 | 67 | # Calculate distance correlation matrix 68 | distance_corr = get_dependence_matrix(asset_returns, dependence_method='distance_correlation') 69 | 70 | # Calculate Pearson correlation matrix 71 | pearson_corr = asset_returns.corr() 72 | 73 | # Calculate absolute angular distance from a Pearson correlation matrix 74 | abs_angular_dist = absolute_angular_distance(pearson_corr) 75 | 76 | Presentation Slides 77 | ################### 78 | 79 | .. image:: images/codependence_slides.png 80 | :scale: 40 % 81 | :align: center 82 | :target: https://drive.google.com/file/d/1pamteuYyc06r1q-BR3VFsxwa3c7-7oeK/view 83 | 84 | References 85 | ########## 86 | 87 | * `de Prado, M.L., 2020. Codependence (Presentation Slides). Available at SSRN 3512994. `_ 88 | -------------------------------------------------------------------------------- /docs/source/codependence/images/abs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/abs.png -------------------------------------------------------------------------------- /docs/source/codependence/images/angular_distance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/angular_distance.png -------------------------------------------------------------------------------- /docs/source/codependence/images/codep_slides.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/codep_slides.png -------------------------------------------------------------------------------- /docs/source/codependence/images/codependence_slides.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/codependence_slides.png -------------------------------------------------------------------------------- /docs/source/codependence/images/dependence_copulas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/dependence_copulas.png -------------------------------------------------------------------------------- /docs/source/codependence/images/distance_correlation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/distance_correlation.png -------------------------------------------------------------------------------- /docs/source/codependence/images/entropy_relation_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/entropy_relation_diagram.png -------------------------------------------------------------------------------- /docs/source/codependence/images/independent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/independent.png -------------------------------------------------------------------------------- /docs/source/codependence/images/linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/linear.png -------------------------------------------------------------------------------- /docs/source/codependence/images/modified_angular_distance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/modified_angular_distance.png -------------------------------------------------------------------------------- /docs/source/codependence/images/optimal_transport_distance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/optimal_transport_distance.png -------------------------------------------------------------------------------- /docs/source/codependence/images/squared.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/squared.png -------------------------------------------------------------------------------- /docs/source/codependence/images/target_copulas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/codependence/images/target_copulas.png -------------------------------------------------------------------------------- /docs/source/codependence/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _codependence-introduction: 2 | 3 | ============ 4 | Introduction 5 | ============ 6 | 7 | .. raw:: html 8 | 9 |
16 | 17 | 26 |
27 |
28 | 29 | | 30 | 31 | This module includes implementations of codependence metrics. According to Lopez de Prado: 32 | 33 | "Two random variables are codependent when knowing the value of one helps us determine the value of the other. 34 | This should not be confounded with the notion of causality." 35 | 36 | Pearson correlation coefficient is the most famous and widely used measure of codependence, however, it has some drawbacks. 37 | 38 | .. warning:: 39 | 40 | Pearson correlation suffers from 3 major drawbacks: 41 | 42 | 1) It captures linear effects, but if two variables have strong non-linear dependency (squared or abs for example) Pearson correlation won't find any pattern between them. 43 | 2) Correlation is not a distance metric: it does not satisfy non-negativity and subadditivity conditions. 44 | 3) Financial markets have non-linear patterns, which Pearson correlation fails to capture. 45 | 46 | Pearson correlation is not the only way of measuring codependence. There are alternative and more modern measures of codependence, 47 | which are described in the parts of this module. 48 | 49 | .. note:: 50 | For some methods in this module, it’s discussed whether they are true metrics. 51 | According to Arkhangel'skii, A. V. and Pontryagin, L. S. (1990), **General Topology I**: 52 | A metric on a set :math:`X` is a function (called a distance): 53 | 54 | .. math:: 55 | d: X \times X \rightarrow [0,+ \infty) ; x, y, z \in X 56 | 57 | for which the following three axioms are satisfied: 58 | 59 | 1. :math:`d(x, y) = 0 \iff x = y` — identity of indiscernibles; 60 | 61 | 2. :math:`d(x,y) = d(y,x)` — symmetry; 62 | 63 | 3. :math:`d(x,y) \le d(x,z) + d(z,y)` — triangle inequality; 64 | 65 | and these imply :math:`d(x,y) \ge 0` — non-negativity. 66 | 67 | Presentation Slides 68 | ################### 69 | 70 | .. image:: images/codep_slides.png 71 | :scale: 70 % 72 | :align: center 73 | :target: https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3512994 74 | 75 | .. image:: images/codependence_slides.png 76 | :scale: 40 % 77 | :align: center 78 | :target: https://drive.google.com/file/d/1pamteuYyc06r1q-BR3VFsxwa3c7-7oeK/view 79 | 80 | References 81 | ########## 82 | 83 | * `Lopez de Prado, M., 2020. Codependence (Presentation Slides). Available at SSRN 3512994. `_ 84 | -------------------------------------------------------------------------------- /docs/source/cointegration_approach/half_life.rst: -------------------------------------------------------------------------------- 1 | .. _cointegration_approach-half_life: 2 | 3 | =========================== 4 | Half-life of Mean-Reversion 5 | =========================== 6 | 7 | This module contains a function that allows calculating a half-life of the mean-reversion process 8 | under the assumption that data follows the Ornstein-Uhlenbeck process. 9 | 10 | The Ornstein-Uhlenbeck process can be described using a formula: 11 | 12 | .. math:: 13 | 14 | dy(t) = ( \lambda y(t-1) + \mu ) dt + d \varepsilon 15 | 16 | where :math:`d \varepsilon` is some Gaussian noise. 17 | 18 | Implementation 19 | ############## 20 | 21 | .. py:currentmodule:: arbitragelab.cointegration_approach.utils 22 | 23 | .. autofunction:: get_half_life_of_mean_reversion 24 | 25 | Examples 26 | ######## 27 | 28 | .. code-block:: 29 | 30 | # Importing the function 31 | from arbitragelab.cointegration_approach.utils import get_half_life_of_mean_reversion 32 | 33 | # Finding the half-life of mean-reversion 34 | half_life = get_half_life_of_mean_reversion(spread) 35 | -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/AME-DOV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/AME-DOV.png -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/MR_strength_box_tiao.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/MR_strength_box_tiao.png -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/cluster.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/cluster.gif -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/coint_sim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/coint_sim.png -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/column_lasso_demo-opt.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/column_lasso_demo-opt.gif -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/cov_select_demo-opt.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/cov_select_demo-opt.gif -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/engle-granger_portfolio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/engle-granger_portfolio.png -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/greedy_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/greedy_demo.gif -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/johansen_portfolio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/johansen_portfolio.png -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/minimum_profit_slides.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/minimum_profit_slides.png -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/multitask_lasso_demo-opt.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/multitask_lasso_demo-opt.gif -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/nile_river_level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/nile_river_level.png -------------------------------------------------------------------------------- /docs/source/cointegration_approach/images/sparse_mr_slides.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/cointegration_approach/images/sparse_mr_slides.png -------------------------------------------------------------------------------- /docs/source/cointegration_approach/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _cointegration_approach-introduction: 2 | 3 | .. Note:: 4 | 5 | These descriptions closely follow the book by Ernest P. Chan: 6 | `Algorithmic Trading: Winning Strategies and Their Rationale `__. 7 | 8 | ============ 9 | Introduction 10 | ============ 11 | 12 | .. raw:: html 13 | 14 |
21 | 22 | 31 |
32 |
33 | 34 | | 35 | 36 | Mean-reverting processes and events often occur in nature. Observations of the processes that have a 37 | mean-reverting nature tend to move to their average value over time. However, as mentioned in 38 | the work of E.P. Chan, most financial price series are not mean-reverting. 39 | 40 | The upside is that we can construct advanced financial instruments from multiple simple ones thus 41 | obtaining the desired property. Observation series (stock, commodity prices, etc.) that can be combined 42 | to achieve a mean-reverting process are called *cointegrating*. The approach described above allows us 43 | to use the properties of mean-reverting processes to generate profit. 44 | 45 | .. figure:: images/nile_river_level.png 46 | :scale: 70 % 47 | :align: center 48 | 49 | A naturally occurring mean-reverting process: level of the Nile from 622 AD to 1284 AD. 50 | An example from `"Algorithmic Trading: Winning Strategies and Their Rationale" `__ 51 | by Ernest P. Chan. 52 | 53 | Tools presented in this module help testing if it's possible to create a mean-reverting portfolio 54 | from a given set of elements, and what combination of these elements is needed for portfolio construction. 55 | It also includes simple trading strategies that generate trading signals based on a given series of 56 | portfolio prices. These strategies also explain how mean-reverting property can be used. 57 | 58 | Another approach that is based on the mean reversion - cross-sectional mean reversion, where the 59 | cumulative returns of the instruments in a basket revert to the cumulative return of the basket is 60 | not covered in this module. 61 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('./../..')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'arbitragelab' 21 | author = 'Hudson & Thames Quantitative Research' 22 | 23 | # The full version, including alpha/beta/rc tags 24 | release = "1.0.0" 25 | 26 | 27 | # -- General configuration --------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.coverage', 35 | 'sphinx.ext.intersphinx', 36 | 'sphinx.ext.doctest', 37 | 'sphinx_copybutton', 38 | 'myst_parser', 39 | 'autoapi.extension', 40 | 'releases', 41 | ] 42 | 43 | # AUTOAPI SETTINGS 44 | autoapi_type = 'python' 45 | autoapi_dirs = ["../../arbitragelab"] 46 | autoapi_root = "technical/api" 47 | autoapi_add_toctree_entry = False 48 | autoapi_ignore = ["*arbitragelab/network/imports*", "*arbitragelab/util/segment*"] 49 | autoapi_options = [ 50 | "members", 51 | "undoc-members", 52 | "inherited-members", 53 | "special-members", 54 | "show-inheritance", 55 | "show-module-summary", 56 | ] 57 | 58 | suppress_warnings = ["autoapi.python_import_resolution" ] 59 | 60 | # Add any paths that contain templates here, relative to this directory. 61 | templates_path = ['_templates'] 62 | 63 | master_doc = 'index' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | # This pattern also affects html_static_path and html_extra_path. 68 | exclude_patterns = [] 69 | 70 | 71 | # -- Options for HTML output ------------------------------------------------- 72 | 73 | # The theme to use for HTML and HTML Help pages. See the documentation for 74 | # a list of builtin themes. 75 | # 76 | html_theme = 'hudsonthames_sphinx_theme' 77 | add_module_names = False 78 | 79 | # Theme options are theme-specific and customize the look and feel of a theme 80 | # further. For a list of options available for each theme, see the 81 | # documentation. 82 | # 83 | # html_theme_options = {} 84 | 85 | html_logo = '_static/logo_white.png' 86 | html_theme_options = { 87 | 'logo_only': True, 88 | 'display_version': True, 89 | } 90 | html_favicon = '_static/favicon_arbitragelab.png' 91 | 92 | # Add any paths that contain custom static files (such as style sheets) here, 93 | # relative to this directory. They are copied after the builtin static files, 94 | # so a file named "default.css" will overwrite the builtin "default.css". 95 | html_static_path = ['_static'] 96 | html_copy_source = True 97 | 98 | # 'releases' (changelog) settings 99 | releases_github_path = 'hudson-and-thames/arbitragelab' 100 | releases_unstable_prehistory = True 101 | -------------------------------------------------------------------------------- /docs/source/copula_approach/images/3d_vinecop_decomposition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/3d_vinecop_decomposition.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/AMGN_HD_MixCop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/AMGN_HD_MixCop.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/Bollinger_band_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/Bollinger_band_example.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/CMPI_vs_log_prices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/CMPI_vs_log_prices.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/C_vine_D_vine_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/C_vine_D_vine_structure.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/CumDenN13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/CumDenN13.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/Cvine_tuple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/Cvine_tuple.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/Equity_curve_cvinecop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/Equity_curve_cvinecop.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/R_vine_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/R_vine_structure.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/Rvine_Cvine_Dvine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/Rvine_Cvine_Dvine.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/copula_marginal_dist_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/copula_marginal_dist_demo.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/densityGaussian.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/densityGaussian.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/densityGumbel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/densityGumbel.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/ecdf_vs_ecdflin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/ecdf_vs_ecdflin.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/eucdis_ranked_rho_tau.png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/eucdis_ranked_rho_tau.png.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/formation_copulas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/formation_copulas.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/individual_ranked_rho_tau.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/individual_ranked_rho_tau.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/positions_log_prices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/positions_log_prices.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/rho_ranked_rho_tau.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/rho_ranked_rho_tau.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/tau_ranked_rho_tau.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/tau_ranked_rho_tau.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/top_euc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/top_euc.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/top_tau.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/top_tau.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/top_tau_quantile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/top_tau_quantile.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/trading_opportunities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/trading_opportunities.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/workflow_getdata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/workflow_getdata.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/workflow_select_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/workflow_select_structure.png -------------------------------------------------------------------------------- /docs/source/copula_approach/images/workflow_vinecop_density.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/copula_approach/images/workflow_vinecop_density.png -------------------------------------------------------------------------------- /docs/source/copula_approach/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _copula_approach-introduction: 2 | 3 | .. Note:: 4 | These descriptions closely follow the following two papers: 5 | 6 | `Pairs trading: a copula approach. (2013) `__ by Liew, Rong Qi, and Yuan Wu. 7 | 8 | `Trading strategies with copulas. (2013) `__ by Stander, Yolanda, Daniël Marais, and Ilse Botha. 9 | 10 | ============ 11 | Introduction 12 | ============ 13 | 14 | Copula is a relatively new analysis tool for pairs trading, compared to more traditional approaches such 15 | as distance and cointegration. Since pairs trading can be considered one of the long/short equity strategies, 16 | copula enables a more nuanced and detailed understanding of the traded pair when compared to, say, Euclidean distance 17 | approaches, thereby generating more reasonable trading opportunities for capturing relative mispricing. 18 | 19 | Consider having a pair of cointegrated stocks. By analyzing their time series, one can calculate their standardized 20 | price gap as part of a distance approach, or project their long-run mean as in a cointegrated system as part of a 21 | cointegration approach. However, none of the two methods are built with the distributions from their time series. 22 | The copula model naturally incorporates their marginal distributions, together with other interesting properties from 23 | each copula, e.g., tail dependency for capturing rare and/or extreme moments like large, cointegrated swings in the 24 | market. 25 | 26 | Briefly speaking, copula is a tool to capture details of how two random variables are "correlated". By having a more 27 | detailed modeling framework, we expect the pairs trading strategy followed to be more realistic and robust and possibly 28 | to bring more trading opportunities. 29 | 30 | .. figure:: images/copula_marginal_dist_demo.png 31 | :scale: 30 % 32 | :align: center 33 | 34 | An illustration of the conditional distribution function of V for a given value of U and the conditional 35 | distribution function of U for a given value of V using the N14 copula dependence structure. 36 | An example from 37 | "Trading strategies with copulas." 38 | by Stander, Yolanda, Daniël Marais, and Ilse Botha. 39 | 40 | Tools presented in this module enable the user to: 41 | 42 | * Transform and fit pair's price data to a given type of copula; 43 | 44 | * Sample and plot from a given copula; 45 | 46 | * Generate trading positions given the pair's data using a copula: 47 | 48 | - Feed in training lists (i.e., data from 2016-2019) and thus generate a position list. 49 | 50 | - Feed in a single pair's data point (i.e., EOD data from just today) and thus generate a single position. 51 | 52 | There are 8 commonly used pure copulas that are now available: :code:`Gumbel`, :code:`Frank`, :code:`Clayton`, :code:`Joe`, 53 | :code:`N13`, :code:`N14`, :code:`Gaussian` and :code:`Student` (Student-t) under :code:`Copula`. 54 | Also there are 2 mixed copulas :code:`CTGMixCop` (Clayton-Student-Gumbel) and :code:`CFGMixCop` (Clayton-Frank-Gumbel) under 55 | :code:`MixedCopula`. 56 | They share some common repertoire of methods and attributes. 57 | 58 | Users can create and fit copulas to data and use them directly. Also, the fitted copulas can be used in trading 59 | strategies such as :code:`BasicCopulaTradingRule` and :code:`MispricingIndexCopulaTradingRule` class described in the 60 | :ref:`Copula Trading Strategies ` section of the documentation. 61 | 62 | The user may choose to fit the pair's data to all provided copulas, then compare the information criterion scores (AIC, 63 | SIC, HQIC, Log-likelihood) to decide the best copula. One can further use the fitted copula to generate trading positions 64 | by giving thresholds from data. 65 | -------------------------------------------------------------------------------- /docs/source/data/data_importer.rst: -------------------------------------------------------------------------------- 1 | .. _data-data_importer: 2 | 3 | ============= 4 | Data Importer 5 | ============= 6 | 7 | This module features helpers to fetch pricing data commonly used by the quant community to benchmark algorithms on data that comes from the ‘real world’. 8 | 9 | Asset Universes 10 | ############### 11 | 12 | Get ticker collections of a specific asset universe. 13 | 14 | 15 | .. figure:: images/ticker_collection.png 16 | :align: center 17 | 18 | Example showing the pulling of all of the DOW component tickers. 19 | 20 | .. automodule:: arbitragelab.util 21 | 22 | .. autoclass:: DataImporter 23 | :members: __init__ 24 | 25 | .. automethod:: DataImporter.get_sp500_tickers 26 | .. automethod:: DataImporter.get_dow_tickers 27 | 28 | 29 | Price/Fundamental Data Fetcher 30 | ############################## 31 | 32 | Pull data about a specific symbol/symbol list using the yfinance library. 33 | 34 | 35 | .. figure:: images/prices.png 36 | :align: center 37 | 38 | Example showing the requested asset prices. 39 | 40 | .. automethod:: DataImporter.get_price_data 41 | .. automethod:: DataImporter.get_ticker_sector_info 42 | 43 | Pre/Post Processing Pricing Data 44 | ################################ 45 | 46 | After pulling/loading the pricing data, it has to be processed before being used in models. 47 | 48 | 49 | .. figure:: images/returns.png 50 | :align: center 51 | 52 | Example showing plotted returns. 53 | 54 | 55 | .. automethod:: DataImporter.get_returns_data 56 | .. automethod:: DataImporter.remove_nuns 57 | -------------------------------------------------------------------------------- /docs/source/data/images/back_cont.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/data/images/back_cont.jpeg -------------------------------------------------------------------------------- /docs/source/data/images/prices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/data/images/prices.png -------------------------------------------------------------------------------- /docs/source/data/images/returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/data/images/returns.png -------------------------------------------------------------------------------- /docs/source/data/images/rolling_intuition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/data/images/rolling_intuition.png -------------------------------------------------------------------------------- /docs/source/data/images/ticker_collection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/data/images/ticker_collection.png -------------------------------------------------------------------------------- /docs/source/developer/debugging.rst: -------------------------------------------------------------------------------- 1 | .. _developer-debugging: 2 | 3 | =========================== 4 | Documentation for Debugging 5 | =========================== 6 | 7 | This section of the documentation contains tools to simplify the process of debugging programs 8 | that use the ArbitrageLab package. 9 | 10 | Raw Documentation 11 | ################# 12 | 13 | Using this link, one can download a PDF file containing raw version of the documentation, which 14 | includes descriptions of all public and private functions, classes, and methods available in each 15 | module of the ArbitrageLab package. 16 | 17 | .. raw:: html 18 | 19 | 58 | 59 | 62 | 63 | | 64 | 65 | ---- 66 | 67 | UML Diagram 68 | ########### 69 | 70 | The link below leads to a UML diagram of the ArbitrageLab package classes in .dot format. 71 | To use the .dot file, one may want to download the `Graphviz `__ open-source software. 72 | 73 | .. raw:: html 74 | 75 | 114 | 115 | 118 | -------------------------------------------------------------------------------- /docs/source/distance_approach/images/SSD_distance_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/distance_approach/images/SSD_distance_example.png -------------------------------------------------------------------------------- /docs/source/distance_approach/images/distance_approach_pair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/distance_approach/images/distance_approach_pair.png -------------------------------------------------------------------------------- /docs/source/distance_approach/images/distance_approach_portfolio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/distance_approach/images/distance_approach_portfolio.png -------------------------------------------------------------------------------- /docs/source/distance_approach/images/distance_approach_results_portfolio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/distance_approach/images/distance_approach_results_portfolio.png -------------------------------------------------------------------------------- /docs/source/distance_approach/images/pearson_approach_beta_stocks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/distance_approach/images/pearson_approach_beta_stocks.png -------------------------------------------------------------------------------- /docs/source/distance_approach/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _distance_approach-introduction: 2 | 3 | ============ 4 | Introduction 5 | ============ 6 | 7 | Distance Approach 8 | ################# 9 | 10 | This approach is based on picking two financial instruments whose price series moved together over a 11 | set historical period. Then, during a testing period, if the difference (spread) in their prices is 12 | surpassing a set threshold, we enter a long position for the asset with a lower price and a short 13 | position for an asset with a higher price. 14 | 15 | .. figure:: images/SSD_distance_example.png 16 | :scale: 80 % 17 | :align: center 18 | 19 | An example showing square distances between points of two price series. The sum of these distances 20 | (or Sum of Square Differences) is used to determine pairs of assets that moved together in the 21 | original approach described by Gatev et al. (2006) in 22 | `Pairs trading: Performance of a relative-value arbitrage rule `__. 23 | 24 | Following this approach, we expect that after diverging, prices of chosen instruments will revert back 25 | to levels previously observed. Our position can be closed once the prices of the instruments cross. 26 | 27 | This approach can also be expanded to trading one element against a weighted portfolio of co-moving 28 | elements (a quasi-multivariate framework) or trading one portfolio against the other portfolio 29 | (a fully multivariate framework). 30 | 31 | Each strategy that follows this approach can be split into two steps. In the first one, the historical 32 | period is defined and the distance measures are used to identify the co-moving assets to form pairs. 33 | In the second step, the threshold rules are defined and trading signals are generated for a testing 34 | dataset. 35 | -------------------------------------------------------------------------------- /docs/source/getting_started/getting_started_images/OLS_vs_TLS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/getting_started/getting_started_images/OLS_vs_TLS.png -------------------------------------------------------------------------------- /docs/source/getting_started/getting_started_images/derivative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/getting_started/getting_started_images/derivative.png -------------------------------------------------------------------------------- /docs/source/getting_started/getting_started_images/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/getting_started/getting_started_images/graph.png -------------------------------------------------------------------------------- /docs/source/getting_started/getting_started_images/prior.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/getting_started/getting_started_images/prior.png -------------------------------------------------------------------------------- /docs/source/getting_started/installation.rst: -------------------------------------------------------------------------------- 1 | .. _getting_started-installation: 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | Recommended Setup on Windows 8 | ############################ 9 | 10 | #. Download and install the latest version of `Anaconda 3 `__ 11 | #. Launch Anaconda Navigator 12 | #. Click Environments, choose an environment name, select Python 3.8, and click Create 13 | #. Click Home, browse to your new environment, and click Install under Jupyter Notebook 14 | #. Launch the Anaconda Prompt and activate the environment: 15 | 16 | .. code-block:: 17 | 18 | conda activate 19 | 20 | #. Install ArbitrageLab using ``pip``: 21 | 22 | .. code-block:: 23 | 24 | pip install arbitragelab 25 | 26 | #. You are now ready to use ArbitrageLab. 27 | 28 | 29 | Recommended Setup on Linux / MacOS 30 | ################################## 31 | 32 | .. note:: 33 | 34 | If you are running on Apple Silicon, you will need to make sure `Homebrew 35 | `__ is installed, and that you have installed ``cmake``: 36 | 37 | .. code-block:: 38 | 39 | brew install cmake 40 | 41 | 42 | 43 | #. Install some variant of ``conda`` environment manager (we recommend Anaconda or Miniconda) for your platform. 44 | #. Launch a new terminal and create a new ``conda`` environment using your environment manager: 45 | 46 | .. code-block:: 47 | 48 | conda create -n python=3.8 49 | 50 | #. Make sure the environment is activated: 51 | 52 | .. code-block:: 53 | 54 | conda activate 55 | 56 | #. Install ArbitrageLab using ``pip``: 57 | 58 | .. code-block:: 59 | 60 | pip install arbitragelab 61 | 62 | #. You are now ready to use ArbitrageLab. 63 | 64 | 65 | Google Colab 66 | ############ 67 | 68 | .. note:: 69 | 70 | Google Colab frequently updates the version of Python it used. You might need to 71 | explore additional methods of using a Python 3.8 kernel for full support of 72 | ArbitrageLab. 73 | 74 | #. Open a new Terminal, and install ArbitrageLab using ``pip``: 75 | 76 | .. code-block:: 77 | 78 | pip install arbitragelab 79 | -------------------------------------------------------------------------------- /docs/source/getting_started/research_tools.rst: -------------------------------------------------------------------------------- 1 | 2 | ============== 3 | Research Tools 4 | ============== 5 | 6 | As researchers, we often neglect finding the right tools to streamline 7 | the progress. Financial machine learning is no different in that a lot of the papers are scattered 8 | across different journals and different fields. Ranging from journals on econometrics to machine 9 | learning, researchers often struggle to find the best academic papers to begin their studies. 10 | 11 | At Hudson & Thames, we primarily use two resources: `Connected Papers`_ and `EThOS`_. These two 12 | free sites have been invaluable and offer an advantage to search through the most cutting edge 13 | resources available for our ArbitrageLab package. 14 | 15 | .. _Connected Papers: https://www.connectedpapers.com/ 16 | .. _EThOS: https://ethos.bl.uk/Home.do 17 | 18 | 19 | Connected Papers 20 | ################ 21 | 22 | Connected papers is unique in that it is not a citation tree. A citation from a paper does not 23 | necessarily lead the reader to another paper. The two topics might be completely different and 24 | an unimportant topic for the researcher. 25 | 26 | It uniquely identifies the related papers by looking at the cocitation and bibliographic coupling. 27 | More about the website is available at the connected papers founder’s `medium`_ post. 28 | 29 | To give a brief demonstration, we will examine a `paper`_ by Li and Hoi that started our Online Portfolio Selection module. 30 | 31 | If you type in the name of the paper, you will see a graph like the one below. 32 | 33 | .. image:: getting_started_images/graph.png 34 | :width: 50% 35 | :align: center 36 | 37 | It immediately shows which are the most associated papers. The darker circles indicate that they are 38 | more recent, so we can easily follow from the older papers to the newer ones. Connected papers also 39 | has an amazing feature for prior works and derivative works. 40 | 41 | Prior works is available for researchers to see what are the most famous and cited papers in this field 42 | to recognize the importance and start with the baseline material. If we click the button for prior works, 43 | for our current search, we see an image like this: 44 | 45 | .. image:: getting_started_images/prior.png 46 | :width: 50% 47 | :align: center 48 | 49 | We can easily see which were the most cited papers. It is not surprising that the number one paper 50 | associated with Online Portfolio Selection is Thomas Cover's Universal Portfolio, the original paper 51 | that began the studies in Portfolio Selection based on information theory. 52 | 53 | Once the researcher gets more familiar with the topic by going through literature review with prior 54 | works, they can move on to the derivative works, which cover the most recent papers associated with 55 | the paper of interest. 56 | 57 | .. image:: getting_started_images/derivative.png 58 | :width: 50% 59 | :align: center 60 | 61 | .. _medium: https://medium.com/connectedpapers/announcing-connected-papers-a-visual-tool-for-researchers-to-find-and-explore-academic-papers-89146a54c7d4 62 | .. _paper: https://arxiv.org/abs/1212.2129 63 | 64 | EThOS 65 | ##### 66 | 67 | `EThOS`_ is a online library sponsored by the United Kingdom to make publicly-funded research available 68 | to all researchers. 69 | 70 | The best feature for EThOS is the availability of all doctoral theses in the UK. If your topic of 71 | interest does not have too many sources from journals, there is a high chance that you can find 72 | good works in EThOS as it is not limited to published journals but rather all doctoral theses as well. 73 | -------------------------------------------------------------------------------- /docs/source/ml_approach/images/2nd_order_honn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/2nd_order_honn.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/3d_cluster_optics_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/3d_cluster_optics_plot.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/confirmation_filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/confirmation_filter.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/correlation_filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/correlation_filter.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/correlation_filter_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/correlation_filter_example.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/crack_spread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/crack_spread.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/example_ml_pair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/example_ml_pair.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/honn_decision_region_xor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/honn_decision_region_xor.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/honn_loss_xor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/honn_loss_xor.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/honn_types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/honn_types.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/knee_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/knee_plot.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/leverage_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/leverage_structure.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/mlp_decision_region_xor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/mlp_decision_region_xor.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/mlp_loss_xor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/mlp_loss_xor.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/pairs_selection_rules_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/pairs_selection_rules_diagram.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/paper_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/paper_results.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/pi_sigma_nn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/pi_sigma_nn.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/prposed_framework_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/prposed_framework_diagram.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/rnn_lstm_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/rnn_lstm_example.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/rpnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/rpnn.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/threshold_filter_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/threshold_filter_example.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/vol_filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/vol_filter.png -------------------------------------------------------------------------------- /docs/source/ml_approach/images/xor_boundaries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/ml_approach/images/xor_boundaries.png -------------------------------------------------------------------------------- /docs/source/ml_approach/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _ml_approach-introduction: 2 | 3 | .. note:: 4 | The following description closely follows the book by Simão Moraes Sarmento and Nuno Horta: 5 | `A Machine Learning based Pairs Trading Investment Strategy `__. 6 | 7 | .. warning:: 8 | In order to use this module, you should additionally install *TensorFlow v2.8.0.* and *Keras v2.3.1.* 9 | For more details, please visit our :ref:`ArbitrageLab installation guide `. 10 | 11 | 12 | ============ 13 | Introduction 14 | ============ 15 | 16 | .. raw:: html 17 | 18 |
25 | 26 | 35 |
36 |
37 | 38 | | 39 | 40 | The success of a Pairs Trading strategy highly depends on finding the right pairs. 41 | But with the increasing availability of data, more traders manage to spot interesting 42 | pairs and quickly profit from the correction of price discrepancies, leaving no margin 43 | for the latecomers. 44 | 45 | To find opportunities the investor will employ methods that are either too restrictive, 46 | like restricting the search to inter-sector relationships, or too open, like removing 47 | any limit on the search space. 48 | 49 | In this module, a Machine Learning based framework is applied to provide a balanced 50 | solution to this problem. Firstly with the application of Unsupervised Learning to 51 | define the search space. Secondly, in the grouping of relevant securities (not necessarily 52 | from the same sector) in clusters, and finally in the detection of rewarding pairs within 53 | them, that would otherwise be hard to identify, even for the experienced investor. 54 | 55 | 56 | .. figure:: images/example_ml_pair.png 57 | :align: center 58 | 59 | An example pair found using the pair selector implemented in this module. 60 | 61 | -------------------------------------------------------------------------------- /docs/source/optimal_mean_reversion/images/cir_description.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/optimal_mean_reversion/images/cir_description.png -------------------------------------------------------------------------------- /docs/source/optimal_mean_reversion/images/cir_optimal_switching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/optimal_mean_reversion/images/cir_optimal_switching.png -------------------------------------------------------------------------------- /docs/source/optimal_mean_reversion/images/description_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/optimal_mean_reversion/images/description_function.png -------------------------------------------------------------------------------- /docs/source/optimal_mean_reversion/images/fit_check_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/optimal_mean_reversion/images/fit_check_function.png -------------------------------------------------------------------------------- /docs/source/optimal_mean_reversion/images/optimal_levels_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/optimal_mean_reversion/images/optimal_levels_plot.png -------------------------------------------------------------------------------- /docs/source/optimal_mean_reversion/images/optimal_switching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/optimal_mean_reversion/images/optimal_switching.png -------------------------------------------------------------------------------- /docs/source/optimal_mean_reversion/images/xou_vs_ou.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/optimal_mean_reversion/images/xou_vs_ou.png -------------------------------------------------------------------------------- /docs/source/optimal_mean_reversion/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _optimal_mean_reverting_strategies-introduction: 2 | 3 | ============ 4 | Introduction 5 | ============ 6 | 7 | Optimal Mean-Reverting Strategies 8 | ################################# 9 | 10 | Various asset prices such as commodities, 11 | volatility indices, foreign exchange rates, etc. are known to exhibit mean reversion. But the most popular method 12 | to use such characteristic is to construct mean-reverting portfolio prices by simultaneously taking positions in two highly correlated or co-moving 13 | assets - an approach that is more widely known as *pairs trading*. As creating a spread gives the opportunity for the 14 | statistical arbitrage, it doesn't come as a surprise that working with mean-reverting portfolios is quite popular 15 | with hedge fund managers and investors. 16 | 17 | However, the problem of how to determine when to open or close the position still holds. Do you, as an 18 | investor, need to enter the market immediately or wait for the future opportunity? When to liquidate the position 19 | after making the first trade? All these questions lead us to the investigation of the optimal sequential timing 20 | of trades. 21 | 22 | In this module, we will be formalizing the optimal stopping problem for assets or portfolios that have mean-reverting 23 | dynamics and providing the solutions based on three mean-reverting models: 24 | 25 | * Ornstein-Uhlenbeck (OU) 26 | * Exponential Ornstein-Uhlenbeck (XOU) 27 | * Cox-Ingersoll-Ross (CIR) 28 | 29 | 30 | Naturally, the module is divided into three submodules for approaches to creating an optimal mean-reverting 31 | strategy: ``OrnsteinUhlenbeck``, ``ExponentialOrnsteinUhlenbeck`` and ``CoxIngersollRoss``. 32 | 33 | .. note:: 34 | We are solving the optimal stopping problem for a mean-reverting portfolio that is constructed by holding :math:`\alpha` 35 | shares of a risky asset :math:`S^{(1)}` and and shorting :math:`\beta` of another risky asset :math:`S^{(2)}`, 36 | yielding a portfolio value: 37 | 38 | .. math:: 39 | X_t^{\alpha,\beta} = \alpha S^{(1)} - \beta S^{(2)}, t \geq 0 40 | 41 | More information regarding this problem can be found in the following publication: 42 | 43 | `Optimal Mean reversion Trading: Mathematical Analysis and Practical Applications by Tim Leung and Xin Li `_ (p. 16) 44 | 45 | 46 | -------------------------------------------------------------------------------- /docs/source/other_approaches/images/kalman_cumulative_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/other_approaches/images/kalman_cumulative_returns.png -------------------------------------------------------------------------------- /docs/source/other_approaches/images/kalman_intercept.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/other_approaches/images/kalman_intercept.png -------------------------------------------------------------------------------- /docs/source/other_approaches/images/kalman_slope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/other_approaches/images/kalman_slope.png -------------------------------------------------------------------------------- /docs/source/other_approaches/images/pca_approach_portfolio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/other_approaches/images/pca_approach_portfolio.png -------------------------------------------------------------------------------- /docs/source/other_approaches/images/pca_approach_s_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/other_approaches/images/pca_approach_s_score.png -------------------------------------------------------------------------------- /docs/source/spread_selection/images/pairs_selection_rules_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/spread_selection/images/pairs_selection_rules_diagram.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/jurek_describe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/jurek_describe.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/mudchana_describe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/mudchana_describe.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/oc_delta_neutral_first.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/oc_delta_neutral_first.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/oc_delta_neutral_second.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/oc_delta_neutral_second.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/oc_describe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/oc_describe.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/oc_optimal_first.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/oc_optimal_first.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/oc_optimal_second.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/oc_optimal_second.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/oc_spread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/oc_spread.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/oc_wealth_delta_neutral.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/oc_wealth_delta_neutral.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/oc_wealth_optimal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/oc_wealth_optimal.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/optimal_weights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/optimal_weights.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/optimal_weights_fund_flows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/optimal_weights_fund_flows.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/images/stabilization_bound.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/stochastic_control_approach/images/stabilization_bound.png -------------------------------------------------------------------------------- /docs/source/stochastic_control_approach/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _stochastic_control_approach-introduction: 2 | 3 | .. note:: 4 | The following documentation appears in Section 5 of the following work: 5 | 6 | `Krauss (2015), Statistical arbitrage pairs trading strategies: Review and outlook `__ 7 | 8 | 9 | ============ 10 | Introduction 11 | ============ 12 | 13 | .. raw:: html 14 | 15 |
22 | 23 | 32 |
33 |
34 | 35 | | 36 | 37 | Modeling asset pricing dynamics with the Ornstein-Uhlenbeck process 38 | ################################################################### 39 | 40 | OU Model Jurek 41 | ************** 42 | 43 | `Jurek and Yang (2007) `__ provide the paper with the 44 | highest impact in this domain. In their setup, they allow non-myopic arbitrageurs to allocate their capital 45 | to a mean-reverting spread or to a risk-free asset. The former evolves according to an Ornstein-Uhlenbeck process, 46 | and the latter is compounded continuously with the risk-free rate. Two scenarios for investor preferences are considered 47 | over a finite time horizon: constant relative risk aversion and the recursive Epstein-Zinutility function. 48 | Utilizing the asset price dynamics, Jurek and Yang develop the budget constraints 49 | and the wealth dynamics of the arbitrageurs’ assets. 50 | 51 | 52 | Applying stochastic control theory, the authors are able to derive the Hamilton-Jacobi-Bellmann (HJB) equation and 53 | subsequently find closed-form solutions for the value and policy functions for both scenarios. Jurek and Yang provide 54 | the most comprehensive discussion of the stochastic control approach applied to an Ornstein-Uhlenbeck framework. 55 | 56 | OU Model Mudchanatongsuk 57 | ************************ 58 | 59 | `Mudchanatongsuk et al.(2008) `__ also solve 60 | the stochastic control problem for pairs trading under power utility for terminal wealth. 61 | Their ansatz mostly differs in the assumed asset pricing dynamics, but the spread also relies on an OU-process. 62 | 63 | 64 | References 65 | ########## 66 | 67 | * `Krauss, Christopher (2015) : Statistical arbitrage pairs trading strategies:Review and outlook, IWQW Discussion Papers, No. 09/2015, `__ 68 | -------------------------------------------------------------------------------- /docs/source/time_series_approach/images/auto_arima_prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/time_series_approach/images/auto_arima_prediction.png -------------------------------------------------------------------------------- /docs/source/time_series_approach/images/model_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/time_series_approach/images/model_diagram.png -------------------------------------------------------------------------------- /docs/source/time_series_approach/images/quantile_thresholds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/time_series_approach/images/quantile_thresholds.png -------------------------------------------------------------------------------- /docs/source/time_series_approach/images/trading_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/time_series_approach/images/trading_example.png -------------------------------------------------------------------------------- /docs/source/time_series_approach/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _time_series_approach-introduction: 2 | 3 | ============ 4 | Introduction 5 | ============ 6 | 7 | Time Series Approach 8 | #################### 9 | 10 | This approach is not focusing on picking a set of comoving securities, but assumes that they have already 11 | been chosen in the prior stage using either a standard cointegration test or one of the alternative methods. 12 | The core idea of the time series approach is to model the spread of the pair or a set of assets, and 13 | therefore generating the optimized trading signals. 14 | 15 | .. figure:: images/auto_arima_prediction.png 16 | :scale: 80 % 17 | :align: center 18 | 19 | An example showing predicted spread values using the Auto ARIMA approach and real spread values. 20 | The Auto ARIMA model is used in the Quantile Time Series Strategy, described in the book by 21 | Simão Moraes Sarmento, and Nuno Horta 22 | `"A Machine Learning based Pairs Trading Investment Strategy" `__. 23 | 24 | Tools that can be used to model the spread include, but are not limited to: a time series model, 25 | a state-space model, a Bayesian approach, models based on the OU processes, nonparametric approach 26 | with renko and kagi. 27 | -------------------------------------------------------------------------------- /docs/source/trading/images/AME-DOV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/trading/images/AME-DOV.png -------------------------------------------------------------------------------- /docs/source/trading/images/formation_copulas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/trading/images/formation_copulas.png -------------------------------------------------------------------------------- /docs/source/trading/images/mpi_flags_positions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/trading/images/mpi_flags_positions.png -------------------------------------------------------------------------------- /docs/source/trading/images/mpi_normalized_prices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/trading/images/mpi_normalized_prices.png -------------------------------------------------------------------------------- /docs/source/trading/images/mpi_units.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/trading/images/mpi_units.png -------------------------------------------------------------------------------- /docs/source/trading/images/positions_log_prices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/trading/images/positions_log_prices.png -------------------------------------------------------------------------------- /docs/source/trading/images/returns_and_samples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/trading/images/returns_and_samples.png -------------------------------------------------------------------------------- /docs/source/trading/images/trading_opportunities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/trading/images/trading_opportunities.png -------------------------------------------------------------------------------- /docs/source/visualization/images/coint_eg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/visualization/images/coint_eg.png -------------------------------------------------------------------------------- /docs/source/visualization/images/coint_jh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/visualization/images/coint_jh.png -------------------------------------------------------------------------------- /docs/source/visualization/images/ou_tearsheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/docs/source/visualization/images/ou_tearsheet.png -------------------------------------------------------------------------------- /pylint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pylint arbitragelab --rcfile=.pylintrc -f text 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hudson-and-thames/arbitragelab/32ccd567e8541965a35293a67944945f6d377f65/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_auto_arima.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests AUTO ARIMA prediction functions. 3 | """ 4 | 5 | import warnings 6 | import unittest 7 | import os 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from arbitragelab.time_series_approach import get_trend_order, AutoARIMAForecast 12 | 13 | 14 | class TestAutoARIMA(unittest.TestCase): 15 | """ 16 | Tests Auto ARIMA predictions. 17 | """ 18 | 19 | def setUp(self): 20 | """ 21 | Set the file path for the tick data csv 22 | """ 23 | 24 | np.random.seed(0) 25 | project_path = os.path.dirname(__file__) 26 | path = project_path + '/test_data/stock_prices.csv' 27 | 28 | stock_prices = pd.read_csv(path, index_col=0, parse_dates=[0]) 29 | self.non_stationary_series = stock_prices['XLF'].iloc[200:300] # Non-stationary part 30 | returns = np.random.normal(0, 1, size=self.non_stationary_series.shape[0]) 31 | self.stationary_series = pd.Series(index=self.non_stationary_series.index, data=returns) 32 | 33 | def test_trend_order(self): 34 | """ 35 | Tests get_trend_order function. 36 | """ 37 | 38 | stationary_trend_order = get_trend_order(self.stationary_series) 39 | non_stationary_trend_order = get_trend_order(self.non_stationary_series) 40 | self.assertEqual(stationary_trend_order, 0) 41 | self.assertEqual(non_stationary_trend_order, 1) 42 | 43 | # Testing the extit from loop 44 | high_trend_order = get_trend_order(self.non_stationary_series, 0) 45 | self.assertEqual(high_trend_order, 0) 46 | 47 | def test_auto_arima(self): 48 | """ 49 | Test Auto ARIMA prediction function. 50 | """ 51 | 52 | y_train = self.non_stationary_series.iloc[:70] 53 | y_test = self.non_stationary_series.iloc[70:] 54 | 55 | auto_arima_model = AutoARIMAForecast(start_p=3, start_q=3, max_p=10, max_q=10) 56 | 57 | with warnings.catch_warnings(): # Testing with warnings 58 | warnings.filterwarnings('ignore', r'Maximum Likelihood optimization failed to converge.') 59 | 60 | auto_arima_model.get_best_arima_model(y_train, verbose=False, silence_warnings=False) 61 | 62 | # And without warnings 63 | auto_arima_model.get_best_arima_model(y_train, verbose=False, silence_warnings=True) 64 | 65 | recursive_arima_prediction = auto_arima_model.predict(y=y_test, retrain_freq=1, train_window=None, 66 | silence_warnings=False) 67 | non_recursive_arima_prediction = auto_arima_model.predict(y=y_test, retrain_freq=1, train_window=30) 68 | 69 | self.assertAlmostEqual(recursive_arima_prediction.mean(), 6.72, delta=1e-2) 70 | self.assertAlmostEqual(recursive_arima_prediction.iloc[10], 8.04, delta=1e-2) 71 | self.assertAlmostEqual((recursive_arima_prediction - non_recursive_arima_prediction).mean(), 0.08, delta=1e-2) 72 | self.assertAlmostEqual(recursive_arima_prediction.iloc[1], 7.42, delta=1e-2) 73 | self.assertAlmostEqual(non_recursive_arima_prediction.iloc[1], 5.08, delta=1e-2) 74 | -------------------------------------------------------------------------------- /tests/test_copula_pairs_selection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for pairs selection module under copula_approach. 3 | """ 4 | # pylint: disable = invalid-name, protected-access 5 | import os 6 | import unittest 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from arbitragelab.copula_approach import pairs_selection 12 | 13 | 14 | class TestPairsSelector(unittest.TestCase): 15 | """ 16 | Testing methods in PairsSelector. 17 | """ 18 | 19 | def setUp(self): 20 | # Using saved ETF price series for testing and trading 21 | project_path = os.path.dirname(__file__) 22 | data_path = project_path + "/test_data/stock_prices.csv" 23 | self.stocks = pd.read_csv(data_path, parse_dates=True, index_col="Date") 24 | 25 | def test_rank_pairs(self): 26 | """ 27 | Testing for rank_pairs method. 28 | """ 29 | PS = pairs_selection.PairsSelector() 30 | 31 | # Default options 32 | scores_dis = PS.rank_pairs(self.stocks, 'euc distance') 33 | scores_rho = PS.rank_pairs(self.stocks, 'spearman rho') 34 | scores_tau = PS.rank_pairs(self.stocks, 'kendall tau') 35 | # Check length 36 | self.assertEqual(len(scores_dis), 253) 37 | self.assertEqual(len(scores_rho), 253) 38 | self.assertEqual(len(scores_tau), 253) 39 | # Sample a few to check 40 | self.assertAlmostEqual(scores_dis['BND']['CSJ'], -1.3367629104092054, delta=1e-5) 41 | self.assertAlmostEqual(scores_dis['TIP']['CSJ'], -2.322229913467921, delta=1e-5) 42 | self.assertAlmostEqual(scores_dis['EFA']['SPY'], -16.65037813410612, delta=1e-5) 43 | 44 | self.assertAlmostEqual(scores_rho['BND']['CSJ'], 0.6877356800107545, delta=1e-5) 45 | self.assertAlmostEqual(scores_rho['TIP']['CSJ'], 0.7302595008128704, delta=1e-5) 46 | self.assertAlmostEqual(scores_rho['EFA']['SPY'], 0.6678733772844093, delta=1e-5) 47 | 48 | self.assertAlmostEqual(scores_tau['BND']['CSJ'], 0.5235355150320703, delta=1e-5) 49 | self.assertAlmostEqual(scores_tau['TIP']['CSJ'], 0.5575605525931701, delta=1e-5) 50 | self.assertAlmostEqual(scores_tau['EFA']['SPY'], 0.4980204822767728, delta=1e-5) 51 | 52 | # Given the number of pairs to keep 53 | scores_dis_cut = PS.rank_pairs(self.stocks, 'euc distance', keep_num_pairs=100) 54 | pd.testing.assert_series_equal(scores_dis_cut, scores_dis[:100]) 55 | 56 | @staticmethod 57 | def test_pre_processing_nan(): 58 | """ 59 | Testing for _pre_processing_nan method. 60 | """ 61 | 62 | # Initiate data and selctor 63 | PS = pairs_selection.PairsSelector() 64 | toy_data = {'A': [1, 2, 3, 4, np.NaN, 6], 65 | 'B': [np.NaN, 2, 3, 4, 5, 6], 66 | 'C': [1, 2, 3, 4, 5, np.NaN], 67 | 'D': [np.NaN, np.NaN, 3, 4, 5, 6], 68 | 'E': [1, 2, 3, 4, np.NaN, np.NaN], 69 | 'F': [1, 2, np.NaN, np.NaN, 5, 6]} 70 | toy_df = pd.DataFrame(data=toy_data, dtype=float) 71 | 72 | # Fill NaN 73 | forward_fill_df = PS._pre_processing_nan(toy_df, 'forward fill') 74 | linear_interp_df = PS._pre_processing_nan(toy_df, 'linear interp') 75 | none_df = PS._pre_processing_nan(toy_df, None) 76 | 77 | # Expected data for forward fill 78 | ff_expect_data = {'A': [1, 2, 3, 4, 4, 6], 79 | 'B': [np.NaN, 2, 3, 4, 5, 6], 80 | 'C': [1, 2, 3, 4, 5, 5], 81 | 'D': [np.NaN, np.NaN, 3, 4, 5, 6], 82 | 'E': [1, 2, 3, 4, 4, 4], 83 | 'F': [1, 2, 2, 2, 5, 6]} 84 | 85 | # Expected data for linear interp 86 | li_expect_data = {'A': [1, 2, 3, 4, 5, 6], 87 | 'B': [np.NaN, 2, 3, 4, 5, 6], 88 | 'C': [1, 2, 3, 4, 5, 5], 89 | 'D': [np.NaN, np.NaN, 3, 4, 5, 6], 90 | 'E': [1, 2, 3, 4, 4, 4], 91 | 'F': [1, 2, 3, 4, 5, 6]} 92 | 93 | ff_expect = pd.DataFrame(data=ff_expect_data, dtype=float) 94 | li_expect = pd.DataFrame(data=li_expect_data, dtype=float) 95 | 96 | # Checking with the result 97 | pd.testing.assert_frame_equal(forward_fill_df, ff_expect, check_dtype=False) 98 | pd.testing.assert_frame_equal(linear_interp_df, li_expect, check_dtype=False) 99 | pd.testing.assert_frame_equal(none_df, toy_df, check_dtype=False) 100 | -------------------------------------------------------------------------------- /tests/test_data/gld_gdx_data.csv: -------------------------------------------------------------------------------- 1 | Date,GDX,GLD 2 | 8/24/2015,13.68765831,110.5299988 3 | 8/25/2015,13.24205971,109.1600037 4 | 8/26/2015,12.63178253,107.6699982 5 | 8/27/2015,13.35830307,107.7300034 6 | 8/28/2015,13.80390358,108.6999969 7 | 8/31/2015,13.66828632,108.8199997 8 | 9/1/2015,13.24205971,109.1999969 9 | 9/2/2015,13.20331287,108.6200027 10 | 9/3/2015,12.96113873,107.8399963 11 | 9/4/2015,12.97082615,107.4899979 12 | 9/8/2015,13.1258173,107.5199966 13 | 9/9/2015,12.71896553,106.1299973 14 | 9/10/2015,12.70927715,106.3799973 15 | 9/11/2015,12.7964592,106.1600037 16 | 9/14/2015,12.6608429,106.2200012 17 | 9/15/2015,12.69959068,105.9000015 18 | 9/16/2015,13.45517349,107.3099976 19 | 9/17/2015,13.82327747,108.4100037 20 | 9/18/2015,14.02670193,109.2099991 21 | 9/21/2015,13.56172943,108.5299988 22 | 9/22/2015,12.94176483,107.7900009 23 | 9/23/2015,12.84489441,108.2200012 24 | 9/24/2015,13.77484226,110.4899979 25 | 9/25/2015,13.54235554,109.8099976 26 | 9/28/2015,12.90301609,108.4199982 27 | 9/29/2015,12.99019909,107.9800034 28 | 9/30/2015,13.30986786,106.8600006 29 | 10/1/2015,12.99019909,106.7300034 30 | 10/2/2015,14.0363884,108.9899979 31 | 10/5/2015,14.64666748,108.7699966 32 | 10/6/2015,15.18913651,109.8600006 33 | 10/7/2015,15.1213274,109.6999969 34 | 10/8/2015,14.92758751,109.1399994 35 | 10/9/2015,15.75097942,110.8700027 36 | 10/12/2015,15.28600693,111.3099976 37 | 10/13/2015,15.37318897,111.8600006 38 | 10/14/2015,16.37094498,113.8099976 39 | 10/15/2015,16.36125755,113.2900009 40 | 10/16/2015,15.8769083,112.4899979 41 | 10/19/2015,15.20851135,112.0199966 42 | 10/20/2015,15.81878757,112.7300034 43 | 10/21/2015,15.35381508,111.7300034 44 | 10/22/2015,15.61536503,111.6900024 45 | 10/23/2015,16.00284004,111.5 46 | 10/26/2015,15.50880718,111.4300003 47 | 10/27/2015,15.5378685,111.6800003 48 | 10/28/2015,15.39256191,110.7799988 49 | 10/29/2015,14.64666748,109.7200012 50 | 10/30/2015,14.49167728,109.3000031 51 | 11/2/2015,14.54979801,108.5899963 52 | 11/3/2015,14.47230148,106.9800034 53 | 11/4/2015,14.16232014,105.9700012 54 | 11/5/2015,13.63922405,105.6399994 55 | 11/6/2015,13.03863335,104.0999985 56 | 11/9/2015,13.39705181,104.4000015 57 | 11/10/2015,13.04832172,104.1800003 58 | 11/11/2015,13.22268581,103.8300018 59 | 11/12/2015,13.04832172,103.8499985 60 | 11/13/2015,13.19362545,103.5599976 61 | 11/16/2015,13.30986786,103.7099991 62 | 11/17/2015,12.67052937,102.3399963 63 | 11/18/2015,13.06769371,102.4300003 64 | 11/19/2015,13.53266907,103.5599976 65 | 11/20/2015,12.98051167,103.0899963 66 | 11/23/2015,12.91270351,102.2600021 67 | 11/24/2015,13.40673828,102.9400024 68 | 11/25/2015,13.28080845,102.4599991 69 | 11/27/2015,13.02894783,101.25 70 | 11/30/2015,13.32924271,101.9199982 71 | 12/1/2015,13.71672058,102.2799988 72 | 12/2/2015,13.35830307,100.6900024 73 | 12/3/2015,13.63922405,101.7600021 74 | 12/4/2015,14.36574554,104.0199966 75 | 12/7/2015,13.75546837,102.6699982 76 | 12/8/2015,13.63922405,102.8399963 77 | -------------------------------------------------------------------------------- /tests/test_data_importer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests functionality of Data Importer: 3 | util/data_importer.py 4 | """ 5 | 6 | import os 7 | import unittest 8 | import pandas as pd 9 | import numpy as np 10 | from arbitragelab.util import DataImporter 11 | 12 | 13 | class TestDataImporter(unittest.TestCase): 14 | """ 15 | Tests Data Importer class. 16 | """ 17 | # pylint: disable=pointless-string-statement 18 | 19 | def setUp(self): 20 | """ 21 | Loads price universe. 22 | """ 23 | 24 | np.random.seed(0) 25 | project_path = os.path.dirname(__file__) 26 | data_path = project_path + '/test_data/sp100_prices.csv' 27 | self.data = pd.read_csv(data_path, parse_dates=True, index_col="Date") 28 | self.data.dropna(inplace=True) 29 | 30 | #These tests are breaking due to yahoo_fin issue now: https://github.com/atreadw1492/yahoo_fin/issues/36 31 | ''' 32 | def test_ticker_collectors(self): 33 | """ 34 | Tests ticker collection collectors. 35 | """ 36 | 37 | self.assertTrue(len(DataImporter.get_sp500_tickers()) > 400) 38 | self.assertTrue(len(DataImporter.get_dow_tickers()) > 20) 39 | ''' 40 | 41 | def test_preprocessing_methods(self): 42 | """ 43 | Tests preprocessing methods. 44 | """ 45 | 46 | # Generate a valid DataFrame full of ones. 47 | sample_df = pd.DataFrame(data=np.ones((200, 20))) 48 | # Append a column with contents of nan values. 49 | sample_df[20] = np.nan 50 | 51 | # Assert that the remove_nuns method removes the last column. 52 | self.assertEqual(len(DataImporter.remove_nuns(sample_df).columns), 20) 53 | 54 | # Get the first column from the dataset, process to returns and check 55 | # its mean. 56 | returns_mean = DataImporter.get_returns_data(self.data.iloc[:, 1]).mean() 57 | 58 | self.assertAlmostEqual(returns_mean, 0, places=1) 59 | 60 | def test_price_retriever(self): 61 | """ 62 | Tests asset prices retriever. 63 | """ 64 | 65 | # Download a year worth of GOOG daily price data and check it's length. 66 | price_df = DataImporter.get_price_data('GOOG', '2015-01-01', '2016-01-01', '1d') 67 | self.assertTrue(len(price_df) > 200) 68 | 69 | @staticmethod 70 | def test_ticker_sector_info(): 71 | """ 72 | Tests ticker information augmentor. 73 | """ 74 | 75 | data_importer = DataImporter() 76 | 77 | # Make the expected sector info DataFrame, that has the basic structure of columns 78 | # as follows: [ticker_symbol, industry, sector]. 79 | expected_result = pd.DataFrame(data=[ 80 | ('GOOG', 'Internet Content & Information', 'Communication Services'), 81 | ('META', 'Internet Content & Information', 'Communication Services') 82 | ]) 83 | expected_result.columns = ['ticker', 'industry', 'sector'] 84 | 85 | # Call the get_ticker_sector_info method to request the necessary data. 86 | augmented_ticker_df = data_importer.get_ticker_sector_info(['GOOG', 'META'], 1) 87 | pd.testing.assert_frame_equal(augmented_ticker_df, expected_result) 88 | -------------------------------------------------------------------------------- /tests/test_feature_expander.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests Spread Modeling Feature Expander Implementation. 3 | """ 4 | 5 | import unittest 6 | import numpy as np 7 | 8 | from arbitragelab.ml_approach.feature_expander import FeatureExpander 9 | 10 | class TestFeatureExpander(unittest.TestCase): 11 | """ 12 | Tests feature expansion class. 13 | """ 14 | 15 | def test_feature_expander(self): 16 | """ 17 | Tests higher order term generation. 18 | """ 19 | 20 | # Set the input data, which in this case is the standard XOR. 21 | data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) 22 | 23 | expanded_data = FeatureExpander(methods=['laguerre', 'power', 'chebyshev', 'legendre'], 24 | n_orders=2).fit(data).transform() 25 | 26 | # Check that it returned the right values. 27 | self.assertAlmostEqual(expanded_data.iloc[-1].mean(), 0.807, 2) 28 | self.assertAlmostEqual(expanded_data.iloc[:, 6].mean(), 0.5) 29 | 30 | expanded_data = FeatureExpander(methods=['product'], 31 | n_orders=2).fit(data).transform() 32 | 33 | # Check that it returned the right values. 34 | self.assertAlmostEqual(expanded_data.iloc[-1].mean(), 1) 35 | self.assertAlmostEqual(expanded_data.iloc[2].mean(), 0.33, 2) 36 | -------------------------------------------------------------------------------- /tests/test_filters.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests Spread Modeling filter functionality. 3 | """ 4 | import os 5 | import unittest 6 | 7 | import pandas as pd 8 | 9 | from arbitragelab.ml_approach.filters import ThresholdFilter, CorrelationFilter, VolatilityFilter 10 | 11 | 12 | class TestFilters(unittest.TestCase): 13 | """ 14 | Tests Filtering Classes. 15 | """ 16 | 17 | def setUp(self): 18 | """ 19 | Loads futures price data. 20 | """ 21 | 22 | project_path = os.path.dirname(__file__) 23 | 24 | # Load the needed contracts and calculate spread. 25 | cl_df = pd.read_csv(project_path + '/test_data/cl.csv', 26 | parse_dates=True, index_col="Dates")['PX_LAST'] 27 | rb_df = pd.read_csv(project_path + '/test_data/rb.csv', 28 | parse_dates=True, index_col="Dates")['PX_LAST'] 29 | df_spread = cl_df - rb_df 30 | 31 | # Concatenate everything for use cases that need all the data at the same time. 32 | working_df = pd.concat([cl_df, rb_df, df_spread], axis=1) 33 | working_df.columns = ["wti", "gasoline", "spread"] 34 | working_df.dropna(inplace=True) 35 | self.working_df = working_df 36 | 37 | # Calculate spread returns and std dev. 38 | spread_series = working_df['spread'] 39 | self.spread_diff_series = spread_series.diff() 40 | self.spread_diff_std = self.spread_diff_series.std() 41 | 42 | def test_threshold_filter(self): 43 | """ 44 | Tests the Threshold filter 45 | """ 46 | 47 | # Initialize ThresholdFilter with 2 std dev band for buying and selling triggers. 48 | thres_filter = ThresholdFilter(buy_threshold=-self.spread_diff_std*2, 49 | sell_threshold=self.spread_diff_std*2) 50 | 51 | std_events = thres_filter.fit_transform(self.spread_diff_series) 52 | 53 | # Check that the correct amount of triggers have been set. 54 | self.assertEqual(std_events['side'].value_counts().values.tolist(), 55 | [3817, 76, 58]) 56 | 57 | thres_filter.plot() 58 | 59 | def test_correlation_filter(self): 60 | """ 61 | Tests the Correlation filter. 62 | """ 63 | 64 | # Initialize CorrelationFilter with +-0.05 correlation change to trigger buy/sell. 65 | corr_filter = CorrelationFilter(buy_threshold=0.05, sell_threshold=-0.05, 66 | lookback=30) 67 | corr_filter.fit(self.working_df[['wti', 'gasoline']]) 68 | corr_events = corr_filter.transform(self.working_df[['wti', 'gasoline']]) 69 | 70 | # Check that the correct amount of triggers have been set. 71 | self.assertEqual(corr_events['side'].value_counts().values.tolist(), 72 | [3693, 130, 128]) 73 | 74 | corr_filter.plot() 75 | 76 | def test_volatility_filter(self): 77 | """ 78 | Tests the Volatility filter. 79 | """ 80 | 81 | vol_filter = VolatilityFilter(lookback=80) 82 | 83 | vol_events = vol_filter.fit_transform(self.spread_diff_series) 84 | 85 | self.assertEqual(vol_events['regime'].value_counts().values.tolist(), 86 | [1846, 1840, 80, 79, 13, 13]) 87 | 88 | vol_filter.plot() 89 | -------------------------------------------------------------------------------- /tests/test_heat_potentials.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests the heat potentials approach from the HeatPotentials module of ArbitrageLab. 3 | """ 4 | import unittest 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from arbitragelab.optimal_mean_reversion.heat_potentials import HeatPotentials 9 | 10 | 11 | class TestHeatPotentials(unittest.TestCase): 12 | """ 13 | Tests the HeatPotentials module. 14 | """ 15 | 16 | def setUp(self): 17 | """ 18 | Sets up the universal testing values. 19 | """ 20 | 21 | self.params = (1.8557, 0.00653, 0.15) 22 | 23 | def test_fit(self): 24 | """ 25 | Tests the correctness of the fit to a steady-state distribution. 26 | """ 27 | 28 | # Setting up the model 29 | test = HeatPotentials() 30 | 31 | test.fit(self.params, 0.01, 300) 32 | 33 | # Test the fitted parameters 34 | self.assertAlmostEqual(test.theta, 1, delta=1e-2) 35 | 36 | self.assertAlmostEqual(test.max_trade_duration, 1.959, delta=1e-3) 37 | 38 | # Tests calling the description function 39 | descr = test.description() 40 | self.assertIsInstance(descr, pd.Series) 41 | 42 | def test_helper_functions(self): 43 | """ 44 | Tests the helper functions. 45 | """ 46 | 47 | # Setting up the instance of the class 48 | test = HeatPotentials() 49 | 50 | test.fit(self.params, 0.1, 300) 51 | 52 | # Setting up the grid 53 | grid = test.v(test.max_trade_duration) 54 | 55 | # Calculating helper values 56 | upsilon = test.upsilon(test.max_trade_duration) 57 | 58 | omega = test.omega(test.max_trade_duration) 59 | 60 | # Testing helper functions calculation 61 | self.assertAlmostEqual(grid[-1], upsilon, delta=1e-4) 62 | 63 | self.assertAlmostEqual(omega, -0.14095, delta=1e-4) 64 | 65 | # Tests if the description function returns the instance of the correct class 66 | self.assertIsInstance(test.description(), pd.Series) 67 | 68 | def test_core_functionality(self): 69 | """ 70 | Tests the core functionality. 71 | """ 72 | 73 | # Setting up the instance of the class 74 | test = HeatPotentials() 75 | 76 | test.fit(self.params, 0.1, 300) 77 | 78 | # Setting the expected output 79 | expected_output = (5.2423, -3.243, 1.2267) 80 | 81 | # Testing the optimal levels and sharpe calculation 82 | np.testing.assert_almost_equal(test.optimal_levels(), expected_output, decimal=4) 83 | 84 | self.assertAlmostEqual(test.sharpe_calculation(test.max_trade_duration, 5.2423, -3.243), 85 | expected_output[2], delta=1e-3) 86 | -------------------------------------------------------------------------------- /tests/test_hedge_ratios_spread_construction.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests spread construction of Hedge Ratios - used in Pairs Selection module. 3 | """ 4 | # pylint: disable=protected-access 5 | 6 | import os 7 | import unittest 8 | import pandas as pd 9 | import numpy as np 10 | 11 | from arbitragelab.hedge_ratios import construct_spread 12 | 13 | 14 | class TestSpreadConstruction(unittest.TestCase): 15 | """ 16 | Tests construct_spread class. 17 | """ 18 | 19 | def setUp(self): 20 | """ 21 | Loads price universe and instantiates the pairs selection class. 22 | """ 23 | 24 | np.random.seed(0) 25 | 26 | project_path = os.path.dirname(__file__) 27 | data_path = project_path + '/test_data/sp100_prices.csv' 28 | self.data = pd.read_csv(data_path, parse_dates=True, index_col="Date") 29 | self.data.dropna(inplace=True) 30 | 31 | def test_spread_construction(self): 32 | """ 33 | Verifies spread construction function. 34 | """ 35 | 36 | hedge_ratios = pd.Series({'A': 1, 'AVB': 0.832406370860649}) 37 | spread = construct_spread(self.data[['AVB', 'A']], hedge_ratios=hedge_ratios) 38 | inverted_spread = construct_spread(self.data[['AVB', 'A']], hedge_ratios=hedge_ratios, dependent_variable='A') 39 | self.assertAlmostEqual(spread.mean(), -81.853, delta=1e-4) 40 | self.assertEqual((spread - inverted_spread).sum(), 0) 41 | -------------------------------------------------------------------------------- /tests/test_indexed_highlight.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests functionality of Indexed Highlighter: 3 | utils/indexed_highlight.py 4 | """ 5 | 6 | import warnings 7 | import unittest 8 | from unittest.mock import Mock, MagicMock 9 | 10 | from arbitragelab.util.indexed_highlight import IndexedHighlight 11 | 12 | class TestIndexedHighlight(unittest.TestCase): 13 | """ 14 | Tests Indexed Highlighter class. 15 | """ 16 | 17 | def setUp(self): 18 | """ 19 | Tests Initial instantiation of the IndexedHighlighter class. 20 | """ 21 | 22 | # This is here to hide 'deprecated in Matplotlib 3.1' warnings. The 23 | # functions mentioned here are needed for backward compatibility. 24 | warnings.simplefilter("ignore") 25 | 26 | placeholder_annotation = Mock() 27 | placeholder_annotation.xyann = [0, 0] 28 | 29 | artist = MagicMock(return_value=[]) 30 | artist.color = "White" 31 | artist.visible = False 32 | artist.axes.annotate.return_value = placeholder_annotation 33 | artist.axes.figure.canvas.callbacks.callbacks = {'button_press_event': {}} 34 | 35 | indexed_highlighter = IndexedHighlight([artist]) 36 | 37 | self.artist = artist 38 | self.indexed_highlighter = indexed_highlighter 39 | 40 | def test_assigned_highlights(self): 41 | """ 42 | Tests size of the highlighter list. 43 | """ 44 | 45 | self.assertEqual(len(self.indexed_highlighter.highlights), 1) 46 | 47 | def test_update_known_artist(self): 48 | """ 49 | Tests the update function with a known artist inside the event object. 50 | """ 51 | 52 | event = Mock() 53 | event.ind = [1] 54 | event.artist = self.artist 55 | event.artist.axes.xaxis.get_view_interval.return_value = [0, 0] 56 | event.artist.axes.yaxis.get_view_interval.return_value = [0, 0] 57 | event.mouseevent.xdata = 1 58 | event.mouseevent.ydata = 1 59 | 60 | bbox = Mock() 61 | bbox.corners.return_value = [] 62 | 63 | annotation = Mock() 64 | annotation.get_window_extent.return_value = bbox 65 | 66 | self.indexed_highlighter.update(event, annotation) 67 | self.assertTrue(isinstance(self.indexed_highlighter.highlights[0], Mock)) 68 | 69 | def test_update_unknown_artist(self): 70 | """ 71 | Tests the update function with a unknown artist inside the event object. 72 | """ 73 | 74 | bbox = Mock() 75 | bbox.corners.return_value = [] 76 | 77 | annotation = Mock() 78 | annotation.get_window_extent.return_value = bbox 79 | 80 | new_event = Mock() 81 | new_event.ind = [1] 82 | new_event.artist.axes.xaxis.get_view_interval.return_value = [0, 0] 83 | new_event.artist.axes.yaxis.get_view_interval.return_value = [0, 0] 84 | new_event.mouseevent.xdata = 1 85 | new_event.mouseevent.ydata = 1 86 | 87 | self.indexed_highlighter.update(new_event, annotation) 88 | self.assertTrue(isinstance(self.indexed_highlighter.highlights[0], Mock)) 89 | -------------------------------------------------------------------------------- /tests/test_kalman_filter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests the Kalman Filter Strategy from the Other Approaches module. 3 | """ 4 | 5 | import unittest 6 | import os 7 | 8 | import numpy as np 9 | import pandas as pd 10 | from sklearn.linear_model import LinearRegression 11 | 12 | from arbitragelab.other_approaches import KalmanFilterStrategy 13 | 14 | 15 | class TestKalmanFilter(unittest.TestCase): 16 | """ 17 | Test Kalman Filter Strategy functions. 18 | """ 19 | 20 | def setUp(self): 21 | """ 22 | Creates pairs data set from TIP and IEF tickers. 23 | """ 24 | 25 | project_path = os.path.dirname(__file__) 26 | data_path = project_path + '/test_data/stock_prices.csv' 27 | self.data = pd.read_csv(data_path, parse_dates=True, index_col="Date") 28 | self.kalman_filter = KalmanFilterStrategy() 29 | 30 | def test_kalman_filter_update(self): 31 | """ 32 | Tests the update method of Kalman filter. 33 | """ 34 | 35 | price_data_subset = self.data[['TIP', 'IEF']] 36 | linear_reg = LinearRegression(fit_intercept=True) # Fit regression on the whole dataset 37 | linear_reg.fit(price_data_subset['TIP'].values.reshape(-1, 1), price_data_subset['IEF']) 38 | 39 | for _, row in price_data_subset.iterrows(): 40 | self.kalman_filter.update(row['TIP'], row['IEF']) 41 | 42 | self.assertAlmostEqual(linear_reg.coef_[0], np.mean(self.kalman_filter.hedge_ratios), delta=0.02) 43 | self.assertAlmostEqual(np.mean(self.kalman_filter.spread_series), 0.089165, delta=1e-5) 44 | self.assertAlmostEqual(np.mean(self.kalman_filter.spread_std_series), 1.109202, delta=1e-5) 45 | 46 | def test_kalman_filter_trading_signals(self): 47 | """ 48 | Tests the generation of trading signals from Kalman filter module. 49 | """ 50 | 51 | price_data_subset = self.data[['TIP', 'IEF']] 52 | linear_reg = LinearRegression(fit_intercept=True) # Fit regression on the whole dataset 53 | linear_reg.fit(price_data_subset['TIP'].values.reshape(-1, 1), price_data_subset['IEF']) 54 | 55 | for _, row in price_data_subset.iterrows(): 56 | self.kalman_filter.update(row['TIP'], row['IEF']) 57 | 58 | signals = self.kalman_filter.trading_signals(entry_std_score=1, exit_std_score=1) 59 | 60 | self.assertAlmostEqual(signals['errors'].mean(), np.mean(self.kalman_filter.spread_series), delta=1e-5) 61 | self.assertAlmostEqual(signals['target_quantity'].sum(), -5, delta=0.1) 62 | self.assertAlmostEqual(abs(signals['target_quantity']).sum(), 27, delta=0.1) 63 | -------------------------------------------------------------------------------- /tests/test_mixed_copula.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for mixed copulas - CFG and CTG. 3 | """ 4 | # pylint: disable = 5 | 6 | import os 7 | import unittest 8 | 9 | import pandas as pd 10 | 11 | from arbitragelab.copula_approach.mixed_copulas import CFGMixCop, CTGMixCop 12 | 13 | 14 | class TestBasicCopulaStrategy(unittest.TestCase): 15 | """ 16 | Test the BasicCopulaStrategy class. 17 | """ 18 | 19 | def setUp(self): 20 | """ 21 | Get the correct directory and data. 22 | """ 23 | 24 | project_path = os.path.dirname(__file__) 25 | data_path = project_path + "/test_data/BKD_ESC_2009_2011.csv" 26 | self.stocks = pd.read_csv(data_path, parse_dates=True, index_col="Date") 27 | 28 | def test_cfgmixcop_fit(self): 29 | """ 30 | Test CFGMixCop copula class. 31 | """ 32 | 33 | # Init without parameters 34 | _ = CFGMixCop() 35 | 36 | # Init with parameters 37 | cop = CFGMixCop([2, 2, 2]) 38 | 39 | # Fit to data 40 | cop.fit(self.stocks) 41 | 42 | # Check describe 43 | descr = cop.describe() 44 | self.assertEqual(descr['Descriptive Name'], 'Bivariate Clayton-Frank-Gumbel Mixed Copula') 45 | self.assertEqual(descr['Class Name'], 'CFGMixCop') 46 | self.assertAlmostEqual(descr['Clayton theta'], 6.6958756, delta=0.1) 47 | self.assertAlmostEqual(descr['Frank theta'], 4.0003041, 1) 48 | self.assertAlmostEqual(descr['Gumbel theta'], 4.7874674, 1) 49 | self.assertAlmostEqual(descr['Clayton weight'], 0.503564, 1) 50 | self.assertAlmostEqual(descr['Frank weight'], 0.0, 1) 51 | self.assertAlmostEqual(descr['Gumbel weight'], 0.4964350, 1) 52 | 53 | # Check side-loading pairs generation 54 | sample_pairs = cop.sample(num=100) 55 | self.assertEqual(str(type(sample_pairs)), "") 56 | self.assertEqual(sample_pairs.shape, (100, 2)) 57 | 58 | def test_ctgmixcop_fit(self): 59 | """ 60 | Test CTGMixCop copula class. 61 | """ 62 | 63 | # Init without parameters 64 | _ = CTGMixCop() 65 | 66 | # Init with parameters 67 | cop = CTGMixCop([4, 0.9, 4, 4]) 68 | 69 | # Fit to data, lower sample size to improve unit test speed 70 | cop.fit(self.stocks.iloc[:20]) 71 | 72 | # Check describe 73 | descr = cop.describe() 74 | self.assertEqual(descr['Descriptive Name'], 'Bivariate Clayton-Student-Gumbel Mixed Copula') 75 | self.assertEqual(descr['Class Name'], 'CTGMixCop') 76 | self.assertAlmostEqual(descr['Clayton theta'], 2.1268764, 1) 77 | self.assertAlmostEqual(descr['Student rho'], 0.001, 1) 78 | self.assertAlmostEqual(descr['Student nu'], 4.00676, 1) 79 | self.assertAlmostEqual(descr['Gumbel theta'], 5, 1) 80 | self.assertAlmostEqual(descr['Clayton weight'], 1, 1) 81 | self.assertAlmostEqual(descr['Student weight'], 0, 1) 82 | self.assertAlmostEqual(descr['Gumbel weight'], 0, 1) 83 | 84 | # Check side-loading pairs generation 85 | sample_pairs = cop.sample(num=100) 86 | self.assertEqual(str(type(sample_pairs)), "") 87 | self.assertEqual(sample_pairs.shape, (100, 2)) 88 | -------------------------------------------------------------------------------- /tests/test_ou_model_mudchanatongsuk.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test functions for the Mudchanatongsuk OU model in the Stochastic Control Approach module. 3 | """ 4 | 5 | import unittest 6 | import os 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from arbitragelab.stochastic_control_approach.ou_model_mudchanatongsuk import OUModelMudchanatongsuk 11 | 12 | 13 | class TestOUModelMudchanatongsuk(unittest.TestCase): 14 | """ 15 | Tests the Mudchanatongsuk OU model in the Stochastic Control Approach module. 16 | """ 17 | 18 | @classmethod 19 | def setUpClass(cls) -> None: 20 | """ 21 | Setup data and params. 22 | """ 23 | 24 | np.random.seed(0) 25 | 26 | project_path = os.path.dirname(__file__) 27 | 28 | path = project_path + '/test_data/gld_gdx_data.csv' 29 | data = pd.read_csv(path) 30 | data = data.set_index('Date') 31 | cls.dataframe = data[['GLD', 'GDX']] 32 | 33 | 34 | def test_fit(self): 35 | """ 36 | Tests the fit method in the class. 37 | """ 38 | 39 | # Creating an object of the class 40 | sc_mudchana = OUModelMudchanatongsuk() 41 | 42 | sc_mudchana.fit(self.dataframe) 43 | 44 | # Checking parameter values for spread calculation 45 | self.assertAlmostEqual(np.mean(sc_mudchana.spread), 2.0465361303, delta=1e-7) 46 | self.assertAlmostEqual(sc_mudchana.spread[7], 2.1073878043, delta=1e-7) 47 | self.assertAlmostEqual(sc_mudchana.spread[28], 2.0496029865, delta=1e-7) 48 | self.assertAlmostEqual(sc_mudchana.spread[-1], 2.0202245834, delta=1e-7) 49 | 50 | # Checking other parameter values. 51 | self.assertAlmostEqual(sc_mudchana.sigma, 0.503695, delta=1e-3) 52 | self.assertAlmostEqual(sc_mudchana.mu, 0.114877, delta=1e-3) 53 | self.assertAlmostEqual(sc_mudchana.k, 3.99205, delta=1e-3) 54 | self.assertAlmostEqual(sc_mudchana.theta, 1.98816, delta=1e-3) 55 | self.assertAlmostEqual(sc_mudchana.eta, 0.404292, delta=1e-3) 56 | self.assertAlmostEqual(sc_mudchana.rho, 0.96202, delta=1e-3) 57 | 58 | 59 | def test_describe(self): 60 | """ 61 | Tests the describe method in the class. 62 | """ 63 | 64 | # Creating an object of the class 65 | sc_mudchana = OUModelMudchanatongsuk() 66 | 67 | # Testing for the run fit before this method exception 68 | with self.assertRaises(Exception): 69 | sc_mudchana.describe() 70 | 71 | sc_mudchana.fit(self.dataframe) 72 | 73 | index = ['Ticker of first stock', 'Ticker of second stock', 74 | 'long-term mean of spread', 'rate of mean reversion of spread', 'standard deviation of spread', 'half-life of spread', 75 | 'Drift of stock B', 'standard deviation of stock B'] 76 | 77 | data = ['GLD', 'GDX', 1.98816, 3.99205, 0.404292, 0.173632, 0.114877, 0.503695] 78 | 79 | # Testing the output of describe method 80 | pd.testing.assert_series_equal(pd.Series(index=index,data=data), sc_mudchana.describe(), check_exact=False, atol=1e-3) 81 | 82 | 83 | def test_optimal_weights(self): 84 | """ 85 | Tests the optimal portfolio weights method in the class. 86 | """ 87 | 88 | # Creating an object of the class 89 | sc_mudchana = OUModelMudchanatongsuk() 90 | 91 | # Testing for the run fit before this method exception 92 | with self.assertRaises(Exception): 93 | sc_mudchana.optimal_portfolio_weights(self.dataframe, gamma=-10) 94 | 95 | sc_mudchana.fit(self.dataframe) 96 | 97 | # Testing for invalid value of gamma exception 98 | with self.assertRaises(Exception): 99 | sc_mudchana.optimal_portfolio_weights(self.dataframe, gamma=10) 100 | 101 | weights = sc_mudchana.optimal_portfolio_weights(self.dataframe, gamma=-10) 102 | # Checking the values of weights 103 | self.assertAlmostEqual(np.mean(weights), 0.4986890920, delta=1e-3) 104 | self.assertAlmostEqual(weights[7], 0.5117099817, delta=1e-3) 105 | self.assertAlmostEqual(weights[28], 0.5246204647, delta=1e-3) 106 | self.assertAlmostEqual(weights[-1], 0.4043368460, delta=1e-3) 107 | -------------------------------------------------------------------------------- /tests/test_ou_optimal_threshold.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests functions from O-U Model Optimal Threshold module. 3 | """ 4 | 5 | # pylint: disable=protected-access 6 | import unittest 7 | import os 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from arbitragelab.time_series_approach.ou_optimal_threshold import OUModelOptimalThreshold 12 | 13 | 14 | class TestOUModelOptimalThreshold(unittest.TestCase): 15 | """ 16 | Tests the base class of O-U Model Optimal Threshold module. 17 | """ 18 | 19 | def setUp(self): 20 | """ 21 | Set the file path for the data and testing variables. 22 | """ 23 | 24 | project_path = os.path.dirname(__file__) 25 | self.path = project_path + '/test_data/gld_gdx_data.csv' # Data Path 26 | 27 | data = pd.read_csv(self.path) 28 | data = data.set_index('Date') 29 | self.dataframe = data[['GLD', 'GDX']] 30 | 31 | self.assets = np.array(self.dataframe) 32 | self.assets_incorrect = np.zeros((4, 3)) # Data with incorrect dimensions 33 | 34 | asset_trans = self.assets.transpose() 35 | self.spread_series = (asset_trans[0][:] - asset_trans[0][0]) - 0.2 * \ 36 | (asset_trans[1][:] - asset_trans[1][0]) 37 | self.spread_series = np.exp(self.spread_series) 38 | 39 | # List with testing values for data frequency 40 | self.test_data_frequency = ["D", "M", "Y", "N"] 41 | 42 | def test_construct(self): 43 | """ 44 | Tests functions for O-U process construction. 45 | """ 46 | 47 | # Creating an object of class 48 | test = OUModelOptimalThreshold() 49 | 50 | # Testing normal usage 51 | test.construct_ou_model_from_given_parameters(0, 0, 0) 52 | self.assertEqual(test.theta, 0) 53 | self.assertEqual(test.mu, 0) 54 | self.assertEqual(test.sigma, 0) 55 | 56 | # Testing different types of data input 57 | test.fit_ou_model_to_data(self.dataframe, self.test_data_frequency[0]) 58 | test.fit_ou_model_to_data(self.assets, self.test_data_frequency[0]) 59 | test.fit_ou_model_to_data(self.spread_series, self.test_data_frequency[0]) 60 | 61 | # Testing different types of data frequency 62 | test.fit_ou_model_to_data(self.dataframe, self.test_data_frequency[0]) 63 | test.fit_ou_model_to_data(self.dataframe, self.test_data_frequency[1]) 64 | test.fit_ou_model_to_data(self.dataframe, self.test_data_frequency[2]) 65 | 66 | def test_exeptions(self): 67 | """ 68 | Tests exceptions in the module. 69 | """ 70 | 71 | # Creating an object of class 72 | test = OUModelOptimalThreshold() 73 | 74 | # Testing for wrong data dimensions 75 | with self.assertRaises(Exception): 76 | test.fit_ou_model_to_data(self.assets_incorrect, self.test_data_frequency[0]) 77 | 78 | # Testing for wrong data frequency 79 | with self.assertRaises(Exception): 80 | test.fit_ou_model_to_data(self.dataframe, self.test_data_frequency[3]) 81 | 82 | def test_numerical(self): 83 | """ 84 | Tests functions for numerical calculation. 85 | """ 86 | 87 | # Creating an object of class 88 | test = OUModelOptimalThreshold() 89 | 90 | # Testing whether the output value is correct 91 | self.assertAlmostEqual(test._w1(0), 0.0, places=1) 92 | self.assertAlmostEqual(test._w1(0.1), 0.00251386, places=5) 93 | self.assertAlmostEqual(test._w1(-0.1), -test._w1(0.1), places=5) 94 | self.assertAlmostEqual(test._w1(1), 3.566894, places=3) 95 | self.assertAlmostEqual(test._w1(-1), -test._w1(1), places=3) 96 | self.assertAlmostEqual(test._w1(100), 131.229, places=1) 97 | self.assertAlmostEqual(test._w1(-100), -test._w1(100), places=1) 98 | 99 | self.assertAlmostEqual(test._w2(0), 0.0, places=1) 100 | self.assertAlmostEqual(test._w2(0.1), -0.34718291, places=5) 101 | self.assertAlmostEqual(test._w2(-0.1), -test._w2(0.1), places=5) 102 | self.assertAlmostEqual(test._w2(1), -3.123865, places=3) 103 | self.assertAlmostEqual(test._w2(-1), -test._w2(1), places=3) 104 | self.assertAlmostEqual(test._w2(100), -11643.138, places=1) 105 | self.assertAlmostEqual(test._w2(-100), -test._w2(100), places=1) 106 | -------------------------------------------------------------------------------- /tests/test_quantile_time_series.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests AUTO ARIMA prediction functions. 3 | """ 4 | 5 | import unittest 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from arbitragelab.time_series_approach import QuantileTimeSeriesTradingStrategy 10 | 11 | 12 | class TestQuantileTimeSeries(unittest.TestCase): 13 | """ 14 | Tests Auto ARIMA predictions. 15 | """ 16 | 17 | def setUp(self): 18 | """ 19 | Set the file path for the tick data csv 20 | """ 21 | 22 | spread_data = [-0.2, 0.3, 0.5, 1.7, 1.0, 0.0, -5, -6, -9, 23 | -7, -2, 1, 1.1, 1.2, 1.3, 1.4, 1.8, 3, 0.2] 24 | 25 | forecast_data = [-0.21, 0.35, 0.55, 1.6, 1.0, 0.0, -5.5, -6, 26 | -9.1, -7.1, -2.1, 1, 1.1, 1.3, 1.5, 1.9, 2, 27 | 0.2, 5] 28 | self.spread_series = pd.Series(spread_data) 29 | self.forecast_series = pd.Series(forecast_data) 30 | 31 | def test_time_series_strategy(self): 32 | """ 33 | Tests get_trend_order function. 34 | """ 35 | 36 | trading_strategy = QuantileTimeSeriesTradingStrategy() 37 | trading_strategy.fit_thresholds(self.spread_series) 38 | trading_strategy.plot_thresholds() 39 | self.assertAlmostEqual(trading_strategy.short_diff_threshold, -4, delta=1e-2) 40 | self.assertAlmostEqual(trading_strategy.long_diff_threshold, 2.9, delta=1e-2) 41 | 42 | # Test predictions 43 | for pred, actual in zip(self.forecast_series.shift(-1), self.spread_series): 44 | trading_strategy.get_allocation(pred-actual, exit_threshold=0) 45 | 46 | self.assertEqual(trading_strategy.positions[5], -1) 47 | self.assertEqual(trading_strategy.positions[9], 1) 48 | self.assertEqual(trading_strategy.positions[18], 0) 49 | self.assertAlmostEqual(np.mean(trading_strategy.positions), 0.21, delta=1e-2) 50 | -------------------------------------------------------------------------------- /tests/test_regressor_committee.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests Regressor Committee Class. 3 | """ 4 | 5 | import unittest 6 | from sklearn.datasets import make_regression 7 | from sklearn.model_selection import train_test_split 8 | from arbitragelab.ml_approach.regressor_committee import RegressorCommittee 9 | 10 | # pylint: disable=unbalanced-tuple-unpacking 11 | 12 | class TestRegressorCommittee(unittest.TestCase): 13 | """ 14 | Test Regressor Committee Implementation. 15 | """ 16 | 17 | def test_mlp_committee(self): 18 | """ 19 | Tests the Multi Layer Perceptron implementation. 20 | """ 21 | 22 | # Generate regression data. 23 | features, target = make_regression(500) 24 | 25 | _, frame_size = features.shape 26 | 27 | mlp_params = {'frame_size': frame_size, 'hidden_size': 8, 'num_outputs': 1, 'loss_fn': "mean_squared_error", 28 | 'optmizer': "adam", 'metrics': [], 'hidden_layer_activation_function': "sigmoid", 29 | 'output_layer_act_func': "linear"} 30 | 31 | # Initialize mlp committee. 32 | committee = RegressorCommittee(mlp_params, num_committee=2, epochs=100, verbose=False) 33 | 34 | feat_train, feat_test, trgt_train, trgt_test = train_test_split( 35 | features, target, test_size=0.3, shuffle=False) 36 | 37 | result = committee.fit(feat_train, trgt_train, feat_test, trgt_test) 38 | 39 | # Check if fit return is a valid RegressorCommittee model. 40 | self.assertTrue(type(result), RegressorCommittee) 41 | 42 | # Check if amount of predicted values match the input values. 43 | self.assertTrue(len(committee.predict(feat_test)) > 0) 44 | -------------------------------------------------------------------------------- /tests/test_tar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests Spread modeling Threshold AutoRegression model implementation. 3 | """ 4 | import os 5 | import unittest 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from statsmodels.regression.linear_model import RegressionResults 10 | from statsmodels.iolib.summary import Summary 11 | 12 | from arbitragelab.ml_approach.tar import TAR 13 | 14 | class TestTAR(unittest.TestCase): 15 | """ 16 | Test Threshold AutoRegressive Implementation. 17 | """ 18 | 19 | def setUp(self): 20 | """ 21 | Loads data needed for model fitting. 22 | """ 23 | 24 | # Set working seed. 25 | np.random.seed(0) 26 | 27 | project_path = os.path.dirname(__file__) 28 | 29 | # Load non negative versions of CL and RB contracts. 30 | wti_contract_df = pd.read_csv( 31 | project_path + '/test_data/NonNegative_CL_forward_roll.csv').set_index('Dates') 32 | rbob_contract_df = pd.read_csv( 33 | project_path + '/test_data/NonNegative_nRB_forward_roll.csv').set_index('Dates') 34 | 35 | # Concatenate both contracts into one dataframe. 36 | working_df = pd.concat([wti_contract_df, rbob_contract_df], axis=1) 37 | working_df.index = pd.to_datetime(working_df.index) 38 | working_df.columns = ['wti', 'gasoline'] 39 | working_df.dropna(inplace=True) 40 | 41 | self.working_df = working_df 42 | 43 | def test_tar(self): 44 | """ 45 | Test TAR model using standard unprocessed spread as input value. 46 | """ 47 | #pylint: disable=too-many-function-args 48 | 49 | # Initialize TAR model with the standard [leg1 - leg2] spread as input value. 50 | model = TAR((self.working_df['gasoline'] - self.working_df['wti'])) 51 | 52 | # Check if returned a valid object. 53 | self.assertTrue(type(model), TAR) 54 | 55 | tar_results = model.fit() 56 | 57 | # Check that it returned valid regression results. 58 | self.assertTrue(type(tar_results), RegressionResults) 59 | 60 | # Check fitted values characteristics. 61 | self.assertAlmostEqual(tar_results.fittedvalues.mean(), 0, 0) 62 | self.assertAlmostEqual(tar_results.fittedvalues.max(), 0.011, 3) 63 | self.assertTrue(np.sign(tar_results.fittedvalues.min()), np.sign(-1)) 64 | 65 | self.assertTrue(type(tar_results.summary()), Summary) 66 | 67 | # Check that it returned valid custom model results. 68 | self.assertTrue(type(model.summary()), pd.DataFrame) 69 | -------------------------------------------------------------------------------- /tests/test_trading_minimum_profit.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests class of the Spread Trading module: 3 | trading/cointegration_approach/minimum_profit.py 4 | """ 5 | 6 | import os 7 | import unittest 8 | 9 | import pandas as pd 10 | import numpy as np 11 | 12 | from arbitragelab.trading.minimum_profit import MinimumProfitTradingRule 13 | 14 | 15 | class TestMinimumProfitTradingRule(unittest.TestCase): 16 | """ 17 | Test MinimumProfitTradingRule functions. 18 | """ 19 | 20 | def setUp(self): 21 | """ 22 | Creates spread and variables to use the minimum profit trading rule on. 23 | """ 24 | 25 | project_path = os.path.dirname(__file__) 26 | data_path = project_path + '/test_data/stock_prices.csv' 27 | 28 | price_data = pd.read_csv(data_path, parse_dates=True, index_col="Date")[['EEM', 'EWG']] 29 | beta = -1.6235743 30 | 31 | self.spread_series = price_data['EEM'] + beta * price_data['EWG'] 32 | self.shares = np.array([10, 15]) 33 | self.optimal_levels = np.array([-5.63296941, -4.77296941, -3.91296941]) 34 | 35 | def test_strategy_normal_use(self): 36 | """ 37 | Tests the normal use of the strategy, feeding spread value by value. 38 | """ 39 | 40 | strategy = MinimumProfitTradingRule(self.shares, self.optimal_levels) 41 | 42 | # Add initial spread value 43 | strategy.update_spread_value(self.spread_series[0]) 44 | 45 | # Run over next 46 | for ind in range(1, len(self.spread_series[1:])): 47 | strategy.update_spread_value(self.spread_series[ind]) 48 | trade, side = strategy.check_entry_signal() 49 | 50 | if trade: 51 | strategy.add_trade(start_timestamp=self.spread_series.index[ind], side_prediction=side) 52 | strategy.update_trades(update_timestamp=self.spread_series.index[ind]) 53 | 54 | self.assertEqual(len(strategy.open_trades), 0) 55 | self.assertEqual(len(strategy.closed_trades), 30) 56 | 57 | self.assertEqual(list(strategy.closed_trades.keys())[0].to_datetime64(), 58 | pd.Timestamp('2008-01-03 00:00+00:00').to_datetime64()) 59 | self.assertEqual(list(strategy.closed_trades.keys())[10].to_datetime64(), 60 | pd.Timestamp('2008-09-15 00:00+00:00').to_datetime64()) 61 | self.assertEqual(list(strategy.closed_trades.keys())[20].to_datetime64(), 62 | pd.Timestamp('2008-12-19 00:00+00:00').to_datetime64()) 63 | -------------------------------------------------------------------------------- /tests/test_trading_multi_coint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests class of the Spread Trading module: 3 | trading/cointegration_approach/multi_coint.py 4 | """ 5 | 6 | import os 7 | import unittest 8 | 9 | import pandas as pd 10 | 11 | from arbitragelab.trading.multi_coint import MultivariateCointegrationTradingRule 12 | 13 | 14 | class TestMultivariateCointegrationTradingRule(unittest.TestCase): 15 | """ 16 | Test MultivariateCointegrationTradingRule functions. 17 | """ 18 | 19 | def setUp(self): 20 | """ 21 | Creates spread and variables to use the multivariate cointegration profit trading rule on. 22 | """ 23 | 24 | project_path = os.path.dirname(__file__) 25 | data_path = project_path + '/test_data/stock_prices.csv' 26 | 27 | self.price_values = pd.read_csv(data_path, parse_dates=True, index_col="Date")[['EEM', 'EWG', 'IEF']].iloc[:200] 28 | 29 | self.coint_vec = pd.Series({'EEM': 0.778721, 30 | 'EWG': 4.545739, 31 | 'IEF': -6.459130}) 32 | 33 | def test_strategy_normal_use(self): 34 | """ 35 | Tests the normal use of the strategy, feeding spread value by value. 36 | """ 37 | 38 | strategy = MultivariateCointegrationTradingRule(self.coint_vec) 39 | 40 | # Add initial spread value 41 | strategy.update_price_values(self.price_values.iloc[0]) 42 | 43 | # Run over next 44 | for ind in range(1, len(self.price_values[1:])): 45 | 46 | time = self.price_values.index[ind] 47 | value = self.price_values.iloc[ind] 48 | 49 | strategy.update_price_values(value) 50 | 51 | # Getting signal 52 | pos_shares, neg_shares, pos_notional, neg_notional = strategy.get_signal() 53 | 54 | strategy.add_trade(start_timestamp=time, pos_shares=pos_shares, neg_shares=neg_shares) 55 | 56 | strategy.update_trades(update_timestamp=time) 57 | 58 | self.assertEqual(len(strategy.open_trades), 0) 59 | self.assertEqual(len(strategy.closed_trades), 198) 60 | 61 | self.assertEqual(pos_shares['EEM'], 51067.0) 62 | self.assertEqual(pos_shares['EWG'], 420773.0) 63 | self.assertEqual(neg_shares['IEF'], -115381.0) 64 | 65 | self.assertAlmostEqual(pos_notional['EEM'], 1462558.848, 2) 66 | self.assertAlmostEqual(pos_notional['EWG'], 8537484.555, 2) 67 | self.assertAlmostEqual(neg_notional['IEF'], -10000071.058, 2) 68 | 69 | self.assertEqual(list(strategy.closed_trades.keys())[0].to_datetime64(), 70 | pd.Timestamp('2008-01-03 00:00+00:00').to_datetime64()) 71 | self.assertEqual(list(strategy.closed_trades.keys())[50].to_datetime64(), 72 | pd.Timestamp('2008-03-17 00:00+00:00').to_datetime64()) 73 | self.assertEqual(list(strategy.closed_trades.keys())[100].to_datetime64(), 74 | pd.Timestamp('2008-05-28 00:00+00:00').to_datetime64()) 75 | 76 | 77 | def test_get_signal_warning(self): 78 | """ 79 | Tests the warning being raised when trying to get signal without providing data. 80 | """ 81 | 82 | strategy = MultivariateCointegrationTradingRule(self.coint_vec) 83 | 84 | # No data given, warning need to be raised 85 | with self.assertWarns(Warning): 86 | strategy.get_signal() 87 | 88 | def test_no_positions_to_close(self): 89 | """ 90 | Tests the situation when no positions should be closed. 91 | """ 92 | 93 | strategy = MultivariateCointegrationTradingRule(self.coint_vec) 94 | 95 | # Add initial spread value 96 | strategy.update_price_values(self.price_values.iloc[0]) 97 | strategy.update_price_values(self.price_values.iloc[1]) 98 | time = self.price_values.index[1] 99 | 100 | # Getting signal 101 | pos_shares, neg_shares, _, _ = strategy.get_signal() 102 | strategy.add_trade(start_timestamp=time, pos_shares=pos_shares, neg_shares=neg_shares) 103 | 104 | strategy.update_trades(update_timestamp=time) 105 | 106 | # Nothing to close the second time 107 | strategy.update_trades(update_timestamp=time) 108 | 109 | self.assertEqual(len(strategy.open_trades), 0) 110 | self.assertEqual(len(strategy.closed_trades), 1) 111 | 112 | self.assertEqual(list(strategy.closed_trades.keys())[0].to_datetime64(), 113 | pd.Timestamp('2008-01-03 00:00+00:00').to_datetime64()) 114 | -------------------------------------------------------------------------------- /tests/test_trading_z_score.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests class of the Spread Trading module: 3 | trading/cointegration_approach/z_score.py 4 | """ 5 | 6 | import os 7 | import unittest 8 | from collections import deque 9 | 10 | import pandas as pd 11 | 12 | from arbitragelab.trading.z_score import BollingerBandsTradingRule 13 | 14 | 15 | class TestBollingerBandsTradingRule(unittest.TestCase): 16 | """ 17 | Test BollingerBandsTradingRule functions. 18 | """ 19 | 20 | def setUp(self): 21 | """ 22 | Creates spread to use the bollinger bands trading rule on. 23 | """ 24 | 25 | project_path = os.path.dirname(__file__) 26 | data_path = project_path + '/test_data/stock_prices.csv' 27 | 28 | price_data = pd.read_csv(data_path, parse_dates=True, index_col="Date")[['EEM', 'EWG']] 29 | hedge_ratios = {'EEM': 1.0, 'EWG': 1.5766259695851286} 30 | 31 | weighted_prices = price_data * hedge_ratios 32 | non_dependent_variables = [x for x in weighted_prices.columns if x != 'EEM'] 33 | 34 | self.spread_series = weighted_prices['EEM'] - weighted_prices[non_dependent_variables].sum(axis=1) 35 | 36 | def test_strategy_normal_use(self): 37 | """ 38 | Tests the normal use of the strategy, feeding spread value by value. 39 | """ 40 | 41 | strategy = BollingerBandsTradingRule(10, 10, entry_z_score=2.5, exit_z_score_delta=3) 42 | 43 | # Add initial spread value 44 | strategy.update_spread_value(self.spread_series[0]) 45 | 46 | # Run over next 47 | for ind in range(1, len(self.spread_series[1:])): 48 | strategy.update_spread_value(self.spread_series[ind]) 49 | trade, side = strategy.check_entry_signal() 50 | 51 | if trade: 52 | strategy.add_trade(start_timestamp=self.spread_series.index[ind], side_prediction=side) 53 | strategy.update_trades(update_timestamp=self.spread_series.index[ind]) 54 | 55 | self.assertEqual(len(strategy.open_trades), 0) 56 | self.assertEqual(len(strategy.closed_trades), 3) 57 | 58 | self.assertEqual(list(strategy.closed_trades.keys())[0].to_datetime64(), 59 | pd.Timestamp('2008-06-25 00:00+00:00').to_datetime64()) 60 | self.assertEqual(list(strategy.closed_trades.keys())[1].to_datetime64(), 61 | pd.Timestamp('2011-02-09 00:00+00:00').to_datetime64()) 62 | self.assertEqual(list(strategy.closed_trades.keys())[2].to_datetime64(), 63 | pd.Timestamp('2013-01-25 00:00+00:00').to_datetime64()) 64 | 65 | def test_get_z_score(self): 66 | """ 67 | Tests the use of the get_z_score method. 68 | """ 69 | 70 | # Create a deque of spread values 71 | spread_slice = deque(maxlen=5) 72 | for element in self.spread_series[:5]: 73 | spread_slice.append(element) 74 | 75 | z_score = BollingerBandsTradingRule.get_z_score(spread_slice, 5, 5) 76 | 77 | self.assertAlmostEqual(z_score, 0.56609, delta=1e-5) 78 | 79 | def test_check_entry_signal_zero_std(self): 80 | """ 81 | Tests the generation of a negative signal if std of spread is zero. 82 | """ 83 | 84 | strategy = BollingerBandsTradingRule(5, 5, entry_z_score=2.5, exit_z_score_delta=3) 85 | 86 | # Feed same values 87 | for _ in range(5): 88 | strategy.update_spread_value(0.5) 89 | signal, _ = strategy.check_entry_signal() 90 | 91 | self.assertTrue(not signal) 92 | --------------------------------------------------------------------------------