├── qlib
    ├── cli
    │   ├── __init__.py
    │   └── data.py
    ├── contrib
    │   ├── __init__.py
    │   ├── eva
    │   │   └── __init__.py
    │   ├── ops
    │   │   └── __init__.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── utils
    │   │   │   └── __init__.py
    │   │   └── data.py
    │   ├── report
    │   │   ├── report
    │   │   │   ├── __init__.py
    │   │   │   └── template
    │   │   │   │   └── general_report_template.html
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   └── base.py
    │   │   ├── analysis_model
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   └── analysis_position
    │   │   │   └── __init__.py
    │   ├── tuner
    │   │   ├── __init__.py
    │   │   ├── space.py
    │   │   └── launcher.py
    │   ├── workflow
    │   │   └── __init__.py
    │   ├── meta
    │   │   ├── __init__.py
    │   │   └── data_selection
    │   │   │   └── __init__.py
    │   ├── strategy
    │   │   ├── optimizer
    │   │   │   ├── __init__.py
    │   │   │   └── base.py
    │   │   └── __init__.py
    │   ├── rolling
    │   │   ├── __init__.py
    │   │   └── __main__.py
    │   ├── online
    │   │   ├── __init__.py
    │   │   └── online_model.py
    │   ├── torch.py
    │   └── model
    │   │   ├── pytorch_utils.py
    │   │   ├── __init__.py
    │   │   └── svm.py
    ├── model
    │   ├── ens
    │   │   └── __init__.py
    │   ├── interpret
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── __init__.py
    │   ├── meta
    │   │   ├── __init__.py
    │   │   └── task.py
    │   ├── riskmodel
    │   │   └── __init__.py
    │   └── utils.py
    ├── rl
    │   ├── contrib
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── strategy
    │   │   ├── __init__.py
    │   │   └── single_order.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── __init__.py
    │   ├── seed.py
    │   ├── trainer
    │   │   └── __init__.py
    │   ├── utils
    │   │   └── __init__.py
    │   ├── order_execution
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   └── aux_info.py
    ├── walkforward
    │   ├── __init__.py
    │   └── walkforward_handler.py
    ├── workflow
    │   ├── online
    │   │   └── __init__.py
    │   ├── task
    │   │   └── __init__.py
    │   └── utils.py
    ├── data
    │   ├── _libs
    │   │   └── __init__.py
    │   ├── storage
    │   │   ├── arctic_storage
    │   │   │   ├── __init__.py
    │   │   │   ├── arctic_storage_structure.md
    │   │   │   └── base.py
    │   │   └── __init__.py
    │   ├── inst_processor.py
    │   ├── inst_info.py
    │   ├── dataset
    │   │   └── weight.py
    │   └── __init__.py
    ├── strategy
    │   └── __init__.py
    ├── utils
    │   └── exceptions.py
    ├── constant.py
    └── typehint.py
├── examples
    ├── model_rolling
    │   └── requirements.txt
    ├── benchmarks
    │   ├── Localformer
    │   │   ├── README.md
    │   │   └── requirements.txt
    │   ├── MLP
    │   │   ├── README.md
    │   │   └── requirements.txt
    │   ├── KRNN
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── Sandwich
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── TFT
    │   │   ├── requirements.txt
    │   │   ├── libs
    │   │   │   └── __init__.py
    │   │   ├── data_formatters
    │   │   │   └── __init__.py
    │   │   ├── expt_settings
    │   │   │   └── __init__.py
    │   │   ├── README.md
    │   │   └── workflow_config_tft_Alpha158.yaml
    │   ├── LightGBM
    │   │   ├── requirements.txt
    │   │   ├── README.md
    │   │   ├── features_resample_N.py
    │   │   ├── features_sample.py
    │   │   └── workflow_config_lightgbm_Alpha158.yaml
    │   ├── Transformer
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── XGBoost
    │   │   ├── requirements.txt
    │   │   ├── README.md
    │   │   └── workflow_config_xgboost_Alpha158.yaml
    │   ├── CatBoost
    │   │   ├── requirements.txt
    │   │   ├── README.md
    │   │   ├── workflow_config_catboost_Alpha158.yaml
    │   │   └── workflow_config_catboost_Alpha158_csi500.yaml
    │   ├── DoubleEnsemble
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── Linear
    │   │   └── requirements.txt
    │   ├── ADD
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── GRU
    │   │   ├── requirements.txt
    │   │   ├── csi300_gru_ts.pkl
    │   │   ├── model_gru_csi300.pkl
    │   │   └── README.md
    │   ├── HIST
    │   │   ├── requirements.txt
    │   │   ├── qlib_csi300_stock_index.npy
    │   │   └── README.md
    │   ├── SFM
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── TCN
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── TCTS
    │   │   ├── requirements.txt
    │   │   └── workflow.png
    │   ├── TabNet
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── ADARNN
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── ALSTM
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── GATs
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── IGMTF
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── LSTM
    │   │   ├── requirements.txt
    │   │   ├── csi300_lstm_ts.pkl
    │   │   ├── model_lstm_csi300.pkl
    │   │   └── README.md
    │   ├── TRA
    │   │   ├── requirements.txt
    │   │   ├── data
    │   │   │   └── README.md
    │   │   ├── run.sh
    │   │   ├── example.py
    │   │   └── configs
    │   │   │   ├── config_alstm.yaml
    │   │   │   ├── config_alstm_tra_init.yaml
    │   │   │   ├── config_transformer.yaml
    │   │   │   ├── config_transformer_tra_init.yaml
    │   │   │   ├── config_alstm_tra.yaml
    │   │   │   └── config_transformer_tra.yaml
    │   └── GeneralPtNN
    │   │   └── README.md
    ├── benchmarks_dynamic
    │   ├── DDG-DA
    │   │   ├── requirements.txt
    │   │   ├── Makefile
    │   │   └── workflow.py
    │   ├── baseline
    │   │   ├── README.md
    │   │   └── rolling_benchmark.py
    │   └── README.md
    ├── data_demo
    │   ├── README.md
    │   └── data_cache_demo.py
    ├── hyperparameter
    │   └── LightGBM
    │   │   ├── requirements.txt
    │   │   ├── Readme.md
    │   │   ├── hyperparameter_158.py
    │   │   └── hyperparameter_360.py
    ├── README.md
    ├── rl_order_execution
    │   ├── scripts
    │   │   ├── merge_orders.py
    │   │   ├── gen_pickle_data.py
    │   │   └── gen_training_orders.py
    │   └── exp_configs
    │   │   ├── backtest_twap.yml
    │   │   ├── backtest_ppo.yml
    │   │   ├── backtest_opds.yml
    │   │   ├── train_opds.yml
    │   │   └── train_ppo.yml
    ├── rolling_process_data
    │   ├── README.md
    │   └── rolling_handler.py
    ├── model_interpreter
    │   └── feature.py
    ├── nested_decision_execution
    │   └── README.md
    ├── portfolio
    │   ├── README.md
    │   └── prepare_riskdata.py
    ├── highfreq
    │   ├── README.md
    │   └── workflow_config_High_Freq_Tree_Alpha158.yaml
    ├── orderbook_data
    │   └── README.md
    └── online_srv
    │   └── update_online_pred.py
├── docs
    ├── changelog
    │   └── changelog.rst
    ├── _static
    │   ├── img
    │   │   ├── logo
    │   │   │   ├── 1.png
    │   │   │   ├── 2.png
    │   │   │   ├── 3.png
    │   │   │   ├── yellow_bg_rec.png
    │   │   │   ├── yel_bg_rec+word.png
    │   │   │   ├── white_bg_rec+word.png
    │   │   │   └── yellow_bg_rec+word .png
    │   │   ├── change doc.gif
    │   │   ├── framework.png
    │   │   ├── topk_drop.png
    │   │   ├── RL_framework.png
    │   │   ├── rdagent_logo.png
    │   │   ├── QlibRL_framework.png
    │   │   ├── analysis
    │   │   │   ├── report.png
    │   │   │   ├── score_ic.png
    │   │   │   ├── rank_label_buy.png
    │   │   │   ├── analysis_model_IC.png
    │   │   │   ├── rank_label_hold.png
    │   │   │   ├── rank_label_sell.png
    │   │   │   ├── risk_analysis_bar.png
    │   │   │   ├── risk_analysis_std.png
    │   │   │   ├── analysis_model_NDQ.png
    │   │   │   ├── cumulative_return_buy.png
    │   │   │   ├── cumulative_return_hold.png
    │   │   │   ├── cumulative_return_sell.png
    │   │   │   ├── analysis_model_long_short.png
    │   │   │   ├── analysis_model_monthly_IC.png
    │   │   │   ├── risk_analysis_max_drawdown.png
    │   │   │   ├── analysis_model_auto_correlation.png
    │   │   │   ├── analysis_model_cumulative_return.png
    │   │   │   ├── cumulative_return_buy_minus_sell.png
    │   │   │   ├── risk_analysis_annualized_return.png
    │   │   │   └── risk_analysis_information_ratio.png
    │   │   ├── online_serving.png
    │   │   ├── qrcode
    │   │   │   └── gitter_qr.png
    │   │   └── framework-abstract.jpg
    │   └── demo.sh
    ├── requirements.txt
    ├── component
    │   ├── rl
    │   │   └── toctree.rst
    │   └── online.rst
    ├── Makefile
    ├── make.bat
    ├── advanced
    │   ├── server.rst
    │   └── serial.rst
    └── start
    │   └── installation.rst
├── .gitattributes
├── .dockerignore
├── scripts
    ├── data_collector
    │   ├── us_index
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── contrib
    │   │   ├── fill_cn_1min_data
    │   │   │   ├── requirements.txt
    │   │   │   └── README.md
    │   │   └── future_trading_date_collector
    │   │   │   ├── requirements.txt
    │   │   │   └── README.md
    │   ├── crypto
    │   │   ├── requirement.txt
    │   │   └── README.md
    │   ├── fund
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── cn_index
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── pit
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── yahoo
    │   │   └── requirements.txt
    │   ├── baostock_5min
    │   │   └── requirements.txt
    │   ├── br_index
    │   │   └── requirements.txt
    │   └── crowd_source
    │   │   └── README.md
    ├── get_data.py
    ├── collect_info.py
    └── README.md
├── tests
    ├── dataset_tests
    │   ├── README.md
    │   └── test_datalayer.py
    ├── data_mid_layer_tests
    │   ├── README.md
    │   └── test_handler.py
    ├── dependency_tests
    │   ├── README.md
    │   └── test_mlflow.py
    ├── pytest.ini
    ├── conftest.py
    ├── test_contrib_model.py
    ├── test_workflow.py
    ├── misc
    │   ├── test_get_multi_proc.py
    │   └── test_sepdf.py
    ├── test_get_data.py
    └── test_register_ops.py
├── MANIFEST.in
├── .pylintrc
├── .deepsource.toml
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── documentation.md
    │   ├── question.md
    │   ├── feature-request.md
    │   └── bug-report.md
    ├── release-drafter.yml
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── test_qlib_from_source_slow.yml
├── .pre-commit-config.yaml
├── .mypy.ini
├── CODE_OF_CONDUCT.md
├── .readthedocs.yaml
├── .gitignore
├── Dockerfile
├── setup.py
└── LICENSE


/qlib/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/contrib/eva/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/contrib/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/model/ens/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/rl/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/walkforward/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/contrib/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/model/interpret/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/workflow/online/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/contrib/data/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qlib/contrib/report/report/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/model_rolling/requirements.txt:
--------------------------------------------------------------------------------
1 | xgboost
2 | 


--------------------------------------------------------------------------------
/docs/changelog/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CHANGES.rst
2 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Localformer/README.md:
--------------------------------------------------------------------------------
1 | # Localformer
2 | 


--------------------------------------------------------------------------------
/examples/benchmarks/MLP/README.md:
--------------------------------------------------------------------------------
1 | # Multi-Layer Perceptron (MLP)
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | .github/workflows merge=ours
2 | .gitattributes merge=ours


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/DDG-DA/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.10.0 
2 | 


--------------------------------------------------------------------------------
/qlib/contrib/tuner/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: skip-file
2 | # flake8: noqa
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/KRNN/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.23.4
2 | pandas==1.5.2
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Sandwich/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.23.4
2 | pandas==1.5.2
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu==1.15.0
2 | pandas==1.1.0
3 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | *.pyo
4 | *.pyd
5 | .Python
6 | .env
7 | .git
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | lightgbm
4 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Localformer/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | torch==1.2.0


--------------------------------------------------------------------------------
/examples/benchmarks/Transformer/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | torch==1.2.0


--------------------------------------------------------------------------------
/examples/benchmarks/XGBoost/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | xgboost==1.2.1


--------------------------------------------------------------------------------
/examples/benchmarks/CatBoost/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | catboost==0.24.3
4 | 


--------------------------------------------------------------------------------
/scripts/data_collector/us_index/requirements.txt:
--------------------------------------------------------------------------------
1 | fire
2 | requests
3 | pandas
4 | lxml
5 | loguru
6 | 


--------------------------------------------------------------------------------
/docs/_static/img/logo/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/1.png


--------------------------------------------------------------------------------
/docs/_static/img/logo/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/2.png


--------------------------------------------------------------------------------
/docs/_static/img/logo/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/3.png


--------------------------------------------------------------------------------
/examples/benchmarks/DoubleEnsemble/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | lightgbm==3.1.0


--------------------------------------------------------------------------------
/examples/benchmarks/Linear/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.17.4
2 | pandas>=1.0.1
3 | scikit-learn>=0.23.1
4 | 


--------------------------------------------------------------------------------
/qlib/data/_libs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 


--------------------------------------------------------------------------------
/qlib/strategy/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/DDG-DA/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: clean
2 | 
3 | clean:
4 | 	-rm -r *.pkl mlruns || true
5 | 


--------------------------------------------------------------------------------
/scripts/data_collector/contrib/fill_cn_1min_data/requirements.txt:
--------------------------------------------------------------------------------
1 | fire
2 | pandas
3 | loguru
4 | tqdm
5 | pyqlib


--------------------------------------------------------------------------------
/docs/_static/img/change doc.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/change doc.gif


--------------------------------------------------------------------------------
/docs/_static/img/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/framework.png


--------------------------------------------------------------------------------
/docs/_static/img/topk_drop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/topk_drop.png


--------------------------------------------------------------------------------
/examples/benchmarks/ADD/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/GRU/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/HIST/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0


--------------------------------------------------------------------------------
/examples/benchmarks/MLP/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/SFM/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TCN/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TCTS/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0


--------------------------------------------------------------------------------
/examples/benchmarks/TabNet/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0


--------------------------------------------------------------------------------
/docs/_static/img/RL_framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/RL_framework.png


--------------------------------------------------------------------------------
/docs/_static/img/rdagent_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/rdagent_logo.png


--------------------------------------------------------------------------------
/examples/benchmarks/ADARNN/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/ALSTM/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/GATs/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/IGMTF/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LSTM/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/docs/_static/img/QlibRL_framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/QlibRL_framework.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/report.png


--------------------------------------------------------------------------------
/docs/_static/img/online_serving.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/online_serving.png


--------------------------------------------------------------------------------
/docs/_static/img/qrcode/gitter_qr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/qrcode/gitter_qr.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython
2 | cmake
3 | numpy
4 | scipy
5 | scikit-learn
6 | pandas
7 | tianshou
8 | sphinx_rtd_theme
9 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TCTS/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/TCTS/workflow.png


--------------------------------------------------------------------------------
/scripts/data_collector/crypto/requirement.txt:
--------------------------------------------------------------------------------
1 | loguru
2 | fire
3 | requests
4 | numpy
5 | pandas
6 | tqdm
7 | lxml
8 | pycoingecko


--------------------------------------------------------------------------------
/tests/dataset_tests/README.md:
--------------------------------------------------------------------------------
1 | # About dataset tests
2 | Tests in this folder are for testing the prepared dataset from Yahoo
3 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/score_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/score_ic.png


--------------------------------------------------------------------------------
/docs/_static/img/framework-abstract.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/framework-abstract.jpg


--------------------------------------------------------------------------------
/docs/_static/img/logo/yellow_bg_rec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/yellow_bg_rec.png


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | seaborn
6 | 


--------------------------------------------------------------------------------
/scripts/data_collector/contrib/future_trading_date_collector/requirements.txt:
--------------------------------------------------------------------------------
1 | baostock
2 | fire
3 | numpy
4 | pandas
5 | loguru
6 | 


--------------------------------------------------------------------------------
/docs/_static/img/logo/yel_bg_rec+word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/yel_bg_rec+word.png


--------------------------------------------------------------------------------
/examples/benchmarks/GRU/csi300_gru_ts.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/GRU/csi300_gru_ts.pkl


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/data/README.md:
--------------------------------------------------------------------------------
1 | Data Link: https://drive.google.com/drive/folders/1fMqZYSeLyrHiWmVzygeI4sw3vp5Gt8cY?usp=sharing
2 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/rank_label_buy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/rank_label_buy.png


--------------------------------------------------------------------------------
/docs/_static/img/logo/white_bg_rec+word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/white_bg_rec+word.png


--------------------------------------------------------------------------------
/examples/benchmarks/GRU/model_gru_csi300.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/GRU/model_gru_csi300.pkl


--------------------------------------------------------------------------------
/examples/benchmarks/LSTM/csi300_lstm_ts.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/LSTM/csi300_lstm_ts.pkl


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | exclude tests/*
2 | include qlib/*
3 | include qlib/*/*
4 | include qlib/*/*/*
5 | include qlib/*/*/*/*
6 | include qlib/*/*/*/*/*
7 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_IC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_IC.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/rank_label_hold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/rank_label_hold.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/rank_label_sell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/rank_label_sell.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_bar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_bar.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_std.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_std.png


--------------------------------------------------------------------------------
/docs/_static/img/logo/yellow_bg_rec+word .png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/yellow_bg_rec+word .png


--------------------------------------------------------------------------------
/examples/benchmarks/LSTM/model_lstm_csi300.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/LSTM/model_lstm_csi300.pkl


--------------------------------------------------------------------------------
/examples/data_demo/README.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 | The examples in this folder try to demonstrate some common usage of data-related modules of Qlib
3 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_NDQ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_NDQ.png


--------------------------------------------------------------------------------
/examples/hyperparameter/LightGBM/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | lightgbm==3.1.0
4 | optuna==2.7.0
5 | optuna-dashboard==0.4.1
6 | 


--------------------------------------------------------------------------------
/scripts/data_collector/fund/requirements.txt:
--------------------------------------------------------------------------------
 1 | loguru
 2 | fire
 3 | requests
 4 | numpy
 5 | pandas
 6 | tqdm
 7 | lxml
 8 | loguru
 9 | yahooquery
10 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/cumulative_return_buy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/cumulative_return_buy.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/cumulative_return_hold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/cumulative_return_hold.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/cumulative_return_sell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/cumulative_return_sell.png


--------------------------------------------------------------------------------
/examples/benchmarks/HIST/qlib_csi300_stock_index.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/HIST/qlib_csi300_stock_index.npy


--------------------------------------------------------------------------------
/tests/data_mid_layer_tests/README.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 | The middle layers of data, which mainly includes
3 | - Handler
4 |     - processors
5 | - Datasets
6 | 


--------------------------------------------------------------------------------
/scripts/data_collector/cn_index/requirements.txt:
--------------------------------------------------------------------------------
 1 | baostock
 2 | fire
 3 | requests
 4 | pandas
 5 | lxml
 6 | loguru
 7 | tqdm
 8 | yahooquery
 9 | openpyxl
10 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_long_short.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_long_short.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_monthly_IC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_monthly_IC.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_max_drawdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_max_drawdown.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_auto_correlation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_auto_correlation.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_cumulative_return.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_cumulative_return.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/cumulative_return_buy_minus_sell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/cumulative_return_buy_minus_sell.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_annualized_return.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_annualized_return.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_information_ratio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_information_ratio.png


--------------------------------------------------------------------------------
/scripts/data_collector/pit/requirements.txt:
--------------------------------------------------------------------------------
 1 | loguru
 2 | fire
 3 | tqdm
 4 | requests
 5 | pandas
 6 | lxml
 7 | loguru
 8 | baostock
 9 | yahooquery
10 | beautifulsoup4
11 | 


--------------------------------------------------------------------------------
/qlib/contrib/report/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | """
5 | This module is designed to analysis data
6 | 
7 | """
8 | 


--------------------------------------------------------------------------------
/examples/benchmarks/ADD/README.md:
--------------------------------------------------------------------------------
1 | # ADD
2 | * Paper: [ADD: Augmented Disentanglement Distillation Framework for Improving Stock Trend Forecasting](https://arxiv.org/abs/2012.06289).
3 | 
4 | 


--------------------------------------------------------------------------------
/qlib/data/storage/arctic_storage/__init__.py:
--------------------------------------------------------------------------------
1 | from .instruments import ArcticInstrumentStorage
2 | from .feature import ArcticFeatureStorage
3 | from .calendar import ArcticCalendarStorage
4 | 


--------------------------------------------------------------------------------
/scripts/data_collector/yahoo/requirements.txt:
--------------------------------------------------------------------------------
 1 | loguru
 2 | fire
 3 | requests
 4 | numpy
 5 | pandas
 6 | tqdm
 7 | lxml
 8 | yahooquery
 9 | joblib
10 | beautifulsoup4
11 | bs4
12 | soupsieve


--------------------------------------------------------------------------------
/tests/dependency_tests/README.md:
--------------------------------------------------------------------------------
1 | Some implementations of Qlib depend on some assumptions of its dependencies.
2 | 
3 | So some tests are requried to ensure that these assumptions are valid.
4 | 


--------------------------------------------------------------------------------
/qlib/model/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | import warnings
5 | 
6 | from .base import Model
7 | 
8 | __all__ = ["Model", "warnings"]
9 | 


--------------------------------------------------------------------------------
/qlib/rl/strategy/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | from .single_order import SingleOrderStrategy
4 | 
5 | __all__ = ["SingleOrderStrategy"]
6 | 


--------------------------------------------------------------------------------
/scripts/data_collector/baostock_5min/requirements.txt:
--------------------------------------------------------------------------------
 1 | loguru
 2 | fire
 3 | requests
 4 | numpy
 5 | pandas
 6 | tqdm
 7 | lxml
 8 | yahooquery
 9 | joblib
10 | beautifulsoup4
11 | bs4
12 | soupsieve
13 | baostock


--------------------------------------------------------------------------------
/examples/benchmarks/GRU/README.md:
--------------------------------------------------------------------------------
1 | # Gated Recurrent Unit (GRU)
2 | * Paper: [Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation](https://aclanthology.org/D14-1179.pdf).
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LSTM/README.md:
--------------------------------------------------------------------------------
1 | # Long Short-Term Memory (LSTM)
2 | * Paper: [Long Short-Term Memory](https://direct.mit.edu/neco/article-abstract/9/8/1735/6109/Long-Short-Term-Memory?redirectedFrom=fulltext).
3 | 


--------------------------------------------------------------------------------
/qlib/cli/data.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import fire
 5 | 
 6 | from qlib.tests.data import GetData
 7 | 
 8 | if __name__ == "__main__":
 9 |     fire.Fire(GetData)
10 | 


--------------------------------------------------------------------------------
/qlib/contrib/workflow/__init__.py:
--------------------------------------------------------------------------------
1 | #  Copyright (c) Microsoft Corporation.
2 | #  Licensed under the MIT License.
3 | from .record_temp import MultiSegRecord, SignalMseRecord
4 | 
5 | __all__ = ["MultiSegRecord", "SignalMseRecord"]
6 | 


--------------------------------------------------------------------------------
/qlib/model/meta/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .dataset import MetaTaskDataset
5 | from .task import MetaTask
6 | 
7 | __all__ = ["MetaTask", "MetaTaskDataset"]
8 | 


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     slow: marks tests as slow (deselect with '-m "not slow"')
4 | filterwarnings =
5 |     ignore:.*rng.randint:DeprecationWarning
6 |     ignore:.*Casting input x to numpy array:UserWarning
7 | 


--------------------------------------------------------------------------------
/scripts/get_data.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import fire
 5 | 
 6 | from qlib.tests.data import GetData
 7 | 
 8 | if __name__ == "__main__":
 9 |     fire.Fire(GetData)
10 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 | # https://stackoverflow.com/a/53572939 
3 | # List of members which are set dynamically and missed by Pylint inference
4 | # system, and so shouldn't trigger E1101 when accessed.
5 | generated-members=numpy.*, torch.*
6 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TabNet/README.md:
--------------------------------------------------------------------------------
1 | # TabNet
2 | * Code: [https://github.com/dreamquark-ai/tabnet](https://github.com/dreamquark-ai/tabnet)
3 | * Paper: [TabNet: Attentive Interpretable Tabular Learning](https://arxiv.org/pdf/1908.07442.pdf).
4 | 


--------------------------------------------------------------------------------
/qlib/contrib/meta/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .data_selection import MetaDatasetDS, MetaModelDS, MetaTaskDS
5 | 
6 | __all__ = ["MetaTaskDS", "MetaDatasetDS", "MetaModelDS"]
7 | 


--------------------------------------------------------------------------------
/.deepsource.toml:
--------------------------------------------------------------------------------
 1 | version = 1
 2 | 
 3 | test_patterns = ["tests/test_*.py"]
 4 | 
 5 | exclude_patterns = ["examples/**"]
 6 | 
 7 | [[analyzers]]
 8 | name = "python"
 9 | enabled = true
10 | 
11 |   [analyzers.meta]
12 |   runtime_version = "3.x.x"
13 | 


--------------------------------------------------------------------------------
/examples/benchmarks/IGMTF/README.md:
--------------------------------------------------------------------------------
1 | # IGMTF
2 | * Code: [https://github.com/Wentao-Xu/IGMTF](https://github.com/Wentao-Xu/IGMTF)
3 | * Paper: [IGMTF: An Instance-wise Graph-based Framework for
4 | Multivariate Time Series Forecasting](https://arxiv.org/abs/2109.06489).


--------------------------------------------------------------------------------
/examples/benchmarks/TCN/README.md:
--------------------------------------------------------------------------------
1 | # TCN
2 | * Code: [https://github.com/locuslab/TCN](https://github.com/locuslab/TCN)
3 | * Paper: [An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling](https://arxiv.org/abs/1803.01271).
4 | 
5 | 


--------------------------------------------------------------------------------
/qlib/contrib/meta/data_selection/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .dataset import MetaDatasetDS, MetaTaskDS
5 | from .model import MetaModelDS
6 | 
7 | __all__ = ["MetaDatasetDS", "MetaTaskDS", "MetaModelDS"]
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F4D6 Documentation"
 3 | about: Report an issue related to documentation
 4 | 
 5 | ---
 6 | 
 7 | ## 📖 Documentation
 8 | 
 9 | <!-- Please specify whether it's tutorial part or API reference part, and describe it.-->
10 | 


--------------------------------------------------------------------------------
/examples/benchmarks/XGBoost/README.md:
--------------------------------------------------------------------------------
1 | # XGBoost
2 | * Code: [https://github.com/dmlc/xgboost](https://github.com/dmlc/xgboost)
3 | * Paper: XGBoost: A Scalable Tree Boosting System. [https://dl.acm.org/doi/pdf/10.1145/2939672.2939785](https://dl.acm.org/doi/pdf/10.1145/2939672.2939785).


--------------------------------------------------------------------------------
/examples/benchmarks/Transformer/README.md:
--------------------------------------------------------------------------------
1 | # Transformer
2 | * Code: [https://github.com/tensorflow/tensor2tensor](https://github.com/tensorflow/tensor2tensor)
3 | * Paper: [Attention is All you Need](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf).
4 | 


--------------------------------------------------------------------------------
/qlib/rl/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | """Common utilities to handle ad-hoc-styled data.
5 | 
6 | Most of these snippets comes from research project (paper code).
7 | Please take caution when using them in production.
8 | """
9 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Ignore RL tests on non-linux platform."""
 5 | collect_ignore = []
 6 | 
 7 | if sys.platform != "linux":
 8 |     for root, dirs, files in os.walk("rl"):
 9 |         for file in files:
10 |             collect_ignore.append(os.path.join(root, file))
11 | 


--------------------------------------------------------------------------------
/qlib/data/storage/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .storage import CalendarStorage, CalVT, FeatureStorage, InstKT, InstrumentStorage, InstVT
5 | 
6 | __all__ = ["CalendarStorage", "InstrumentStorage", "FeatureStorage", "CalVT", "InstVT", "InstKT"]
7 | 


--------------------------------------------------------------------------------
/docs/_static/demo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | git clone https://github.com/microsoft/qlib.git
 3 | cd qlib
 4 | ls
 5 | pip install pyqlib
 6 | # or
 7 | # pip install numpy
 8 | # pip install --upgrade cython
 9 | # python setup.py install
10 | cd examples
11 | ls
12 | qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml


--------------------------------------------------------------------------------
/examples/benchmarks/ADARNN/README.md:
--------------------------------------------------------------------------------
1 | # AdaRNN
2 | * Code: [https://github.com/jindongwang/transferlearning/tree/master/code/deep/adarnn](https://github.com/jindongwang/transferlearning/tree/master/code/deep/adarnn)
3 | * Paper: [AdaRNN: Adaptive Learning and Forecasting for Time Series](https://arxiv.org/pdf/2108.04443.pdf).
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/HIST/README.md:
--------------------------------------------------------------------------------
1 | # HIST
2 | * Code: [https://github.com/Wentao-Xu/HIST](https://github.com/Wentao-Xu/HIST)
3 | * Paper: [HIST: A Graph-based Framework for Stock Trend Forecasting via Mining Concept-Oriented Shared InformationAdaRNN: Adaptive Learning and Forecasting for Time Series](https://arxiv.org/abs/2110.13716).


--------------------------------------------------------------------------------
/qlib/contrib/report/analysis_model/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .analysis_model_performance import model_performance_graph
5 | from .factor_model_performance import factor_performance_graph
6 | 
7 | __all__ = ["model_performance_graph", "factor_performance_graph"]
8 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/psf/black
 3 |     rev: 23.7.0
 4 |     hooks:
 5 |     -   id: black
 6 |         args: ["qlib", "-l 120"]
 7 | 
 8 | -   repo: https://github.com/PyCQA/flake8
 9 |     rev: 4.0.1
10 |     hooks:
11 |         - id: flake8
12 |           args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]
13 | 


--------------------------------------------------------------------------------
/qlib/contrib/strategy/optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .base import BaseOptimizer
5 | from .enhanced_indexing import EnhancedIndexingOptimizer
6 | from .optimizer import PortfolioOptimizer
7 | 
8 | __all__ = ["BaseOptimizer", "PortfolioOptimizer", "EnhancedIndexingOptimizer"]
9 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Requirements
 2 | 
 3 | Here is the minimal hardware requirements to run the `workflow_by_code` example.
 4 | - Memory: 16G
 5 | - Free Disk: 5G
 6 | 
 7 | 
 8 | # NOTE
 9 | The results will slightly vary on different OSs(the variance of annualized return will be less than 2%).
10 | The evaluation results in the `README.md` page are from Linux OS.
11 | 


--------------------------------------------------------------------------------
/qlib/contrib/rolling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | """
4 | The difference between me and the scripts in examples/benchmarks/benchmarks_dynamic
5 | - This module only focus provide a general rolling implementation.
6 |   Anything specific that benchmark is placed in examples/benchmarks/benchmarks_dynamic
7 | """
8 | 


--------------------------------------------------------------------------------
/examples/benchmarks/CatBoost/README.md:
--------------------------------------------------------------------------------
1 | # CatBoost
2 | * Code: [https://github.com/catboost/catboost](https://github.com/catboost/catboost)
3 | * Paper: CatBoost: unbiased boosting with categorical features. [https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf](https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf).


--------------------------------------------------------------------------------
/qlib/rl/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .interpreter import ActionInterpreter, Interpreter, StateInterpreter
5 | from .reward import Reward, RewardCombination
6 | from .simulator import Simulator
7 | 
8 | __all__ = ["Interpreter", "StateInterpreter", "ActionInterpreter", "Reward", "RewardCombination", "Simulator"]
9 | 


--------------------------------------------------------------------------------
/docs/component/rl/toctree.rst:
--------------------------------------------------------------------------------
 1 | .. _rl:
 2 | 
 3 | ========================================================================
 4 | Reinforcement Learning in Quantitative Trading
 5 | ========================================================================
 6 | 
 7 | .. toctree::
 8 |     Guidance <guidance>
 9 |     Overall <overall>
10 |     Quick Start <quickstart>
11 |     Framework <framework>
12 | 


--------------------------------------------------------------------------------
/qlib/contrib/strategy/optimizer/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import abc
 5 | 
 6 | 
 7 | class BaseOptimizer(abc.ABC):
 8 |     """Construct portfolio with a optimization related method"""
 9 | 
10 |     @abc.abstractmethod
11 |     def __call__(self, *args, **kwargs) -> object:
12 |         """Generate a optimized portfolio allocation"""
13 | 


--------------------------------------------------------------------------------
/qlib/rl/seed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """Defines a set of initial state definitions and state-set definitions.
 5 | 
 6 | With single-asset order execution only, the only seed is order.
 7 | """
 8 | 
 9 | from typing import TypeVar
10 | 
11 | InitialStateType = TypeVar("InitialStateType")
12 | """Type of data that creates the simulator."""
13 | 


--------------------------------------------------------------------------------
/qlib/model/riskmodel/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .base import RiskModel
 5 | from .poet import POETCovEstimator
 6 | from .shrink import ShrinkCovEstimator
 7 | from .structured import StructuredCovEstimator
 8 | 
 9 | __all__ = [
10 |     "RiskModel",
11 |     "POETCovEstimator",
12 |     "ShrinkCovEstimator",
13 |     "StructuredCovEstimator",
14 | ]
15 | 


--------------------------------------------------------------------------------
/qlib/contrib/report/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | GRAPH_NAME_LIST = [
 5 |     "analysis_position.report_graph",
 6 |     "analysis_position.score_ic_graph",
 7 |     "analysis_position.cumulative_return_graph",
 8 |     "analysis_position.risk_analysis_graph",
 9 |     "analysis_position.rank_label_graph",
10 |     "analysis_model.model_performance_graph",
11 | ]
12 | 


--------------------------------------------------------------------------------
/.mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | exclude = (?x)(
 3 |     ^qlib/backtest/high_performance_ds\.py$
 4 |     | ^qlib/contrib
 5 |     | ^qlib/data
 6 |     | ^qlib/model
 7 |     | ^qlib/strategy
 8 |     | ^qlib/tests
 9 |     | ^qlib/utils
10 |     | ^qlib/workflow
11 |     | ^qlib/config\.py$
12 |     | ^qlib/log\.py$
13 |     | ^qlib/__init__\.py$
14 |   )
15 | ignore_missing_imports = true
16 | disallow_incomplete_defs = true
17 | follow_imports = skip
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓Questions & Help"
 3 | about: Have some questions? We can offer help.
 4 | labels: question
 5 | 
 6 | ---
 7 | 
 8 | ## ❓ Questions and Help
 9 | 
10 | We sincerely suggest you to carefully read the [documentation](http://qlib.readthedocs.io/) of our library as well as the official [paper](https://arxiv.org/abs/2009.11189). After that, if you still feel puzzled, please describe the question clearly under this issue.


--------------------------------------------------------------------------------
/examples/benchmarks/KRNN/README.md:
--------------------------------------------------------------------------------
1 | # KRNN
2 | * Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py](https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py)
3 | 
4 | 
5 | # Introductions about the settings/configs.
6 | * Torch_geometric is used in the original model in FOST, but we didn't use it.
7 | * make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.__version__==1.12.1
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/benchmarks/ALSTM/README.md:
--------------------------------------------------------------------------------
 1 | # ALSTM
 2 | 
 3 | - ALSTM contains a temporal attentive aggregation layer based on normal LSTM.
 4 | 
 5 | - Paper: A dual-stage attention-based recurrent neural network for time series prediction.
 6 | 
 7 |   [https://www.ijcai.org/Proceedings/2017/0366.pdf](https://www.ijcai.org/Proceedings/2017/0366.pdf)
 8 | 
 9 | - NOTE: Current version of implementation is just a simplified version of ALSTM. It is an LSTM with attention.
10 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Sandwich/README.md:
--------------------------------------------------------------------------------
1 | # Sandwich
2 | * Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py](https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py)
3 | 
4 | 
5 | # Introductions about the settings/configs.
6 | * Torch_geometric is used in the original model in FOST, but we didn't use it.
7 | make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.version==1.12.1
8 | 
9 | 


--------------------------------------------------------------------------------
/qlib/contrib/report/analysis_position/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .cumulative_return import cumulative_return_graph
 5 | from .rank_label import rank_label_graph
 6 | from .report import report_graph
 7 | from .risk_analysis import risk_analysis_graph
 8 | from .score_ic import score_ic_graph
 9 | 
10 | __all__ = ["cumulative_return_graph", "score_ic_graph", "report_graph", "rank_label_graph", "risk_analysis_graph"]
11 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/examples/benchmarks/SFM/README.md:
--------------------------------------------------------------------------------
1 | # State-Frequency-Memory
2 | - State Frequency Memory (SFM) is a novel recurrent network that uses Discrete Fourier Transform to decompose the hidden states of memory cells and capture the multi-frequency trading patterns from past market data to make stock price predictions. 
3 | - Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf)


--------------------------------------------------------------------------------
/examples/benchmarks/GATs/README.md:
--------------------------------------------------------------------------------
1 | # GATs
2 | * Graph Attention Networks(GATs) leverage masked self-attentional layers on graph-structured data. The nodes in stacked layers have different weights and they are able to attend over their
3 | neighborhoods’ features, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront.
4 | * This code used in Qlib is implemented with PyTorch by ourselves.
5 | * Paper: Graph Attention Networks https://arxiv.org/pdf/1710.10903.pdf


--------------------------------------------------------------------------------
/qlib/contrib/tuner/space.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # pylint: skip-file
 5 | # flake8: noqa
 6 | 
 7 | from hyperopt import hp
 8 | 
 9 | TopkAmountStrategySpace = {
10 |     "topk": hp.choice("topk", [30, 35, 40]),
11 |     "buffer_margin": hp.choice("buffer_margin", [200, 250, 300]),
12 | }
13 | 
14 | QLibDataLabelSpace = {
15 |     "labels": hp.choice(
16 |         "labels",
17 |         [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],
18 |     )
19 | }
20 | 


--------------------------------------------------------------------------------
/qlib/utils/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | # Base exception class
 6 | class QlibException(Exception):
 7 |     pass
 8 | 
 9 | 
10 | class RecorderInitializationError(QlibException):
11 |     """Error type for re-initialization when starting an experiment"""
12 | 
13 | 
14 | class LoadObjectError(QlibException):
15 |     """Error type for Recorder when can not load object"""
16 | 
17 | 
18 | class ExpAlreadyExistError(Exception):
19 |     """Experiment already exists"""
20 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/scripts/merge_orders.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | 
 4 | import pandas as pd
 5 | from tqdm import tqdm
 6 | 
 7 | for tag in ["test", "valid"]:
 8 |     files = os.listdir(os.path.join("data/orders/", tag))
 9 |     dfs = []
10 |     for f in tqdm(files):
11 |         df = pickle.load(open(os.path.join("data/orders/", tag, f), "rb"))
12 |         df = df.drop(["$close0"], axis=1)
13 |         dfs.append(df)
14 | 
15 |     total_df = pd.concat(dfs)
16 |     pickle.dump(total_df, open(os.path.join("data", "orders", f"{tag}_orders.pkl"), "wb"))
17 | 


--------------------------------------------------------------------------------
/qlib/rl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """Train, test, inference utilities."""
 5 | 
 6 | from .api import backtest, train
 7 | from .callbacks import Checkpoint, EarlyStopping, MetricsWriter
 8 | from .trainer import Trainer
 9 | from .vessel import TrainingVessel, TrainingVesselBase
10 | 
11 | __all__ = [
12 |     "Trainer",
13 |     "TrainingVessel",
14 |     "TrainingVesselBase",
15 |     "Checkpoint",
16 |     "EarlyStopping",
17 |     "MetricsWriter",
18 |     "train",
19 |     "backtest",
20 | ]
21 | 


--------------------------------------------------------------------------------
/scripts/data_collector/contrib/future_trading_date_collector/README.md:
--------------------------------------------------------------------------------
 1 | # Get future trading days
 2 | 
 3 | > `D.calendar(future=True)` will be used
 4 | 
 5 | ## Requirements
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Collector Data
12 | 
13 | ```bash
14 | # parse instruments, using in qlib/instruments.
15 | python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day
16 | ```
17 | 
18 | ## Parameters
19 | 
20 | - qlib_dir: qlib data directory
21 | - freq: value from [`day`, `1min`], default `day`
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/qlib/contrib/strategy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | from .cost_control import SoftTopkStrategy
 6 | from .rule_strategy import SBBStrategyBase, SBBStrategyEMA, TWAPStrategy
 7 | from .signal_strategy import EnhancedIndexingStrategy, TopkDropoutStrategy, WeightStrategyBase
 8 | 
 9 | __all__ = [
10 |     "TopkDropoutStrategy",
11 |     "WeightStrategyBase",
12 |     "EnhancedIndexingStrategy",
13 |     "TWAPStrategy",
14 |     "SBBStrategyBase",
15 |     "SBBStrategyEMA",
16 |     "SoftTopkStrategy",
17 | ]
18 | 


--------------------------------------------------------------------------------
/examples/hyperparameter/LightGBM/Readme.md:
--------------------------------------------------------------------------------
 1 | # LightGBM hyperparameter
 2 | 
 3 | ## Alpha158
 4 | First terminal
 5 | ```
 6 | optuna create-study --study LGBM_158 --storage sqlite:///db.sqlite3
 7 | optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3
 8 | ```
 9 | Second terminal
10 | ```
11 | python hyperparameter_158.py
12 | ```
13 | 
14 | ## Alpha360
15 | First terminal
16 | ```
17 | optuna create-study --study LGBM_360 --storage sqlite:///db.sqlite3
18 | optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3
19 | ```
20 | Second terminal
21 | ```
22 | python hyperparameter_360.py
23 | ```
24 | 


--------------------------------------------------------------------------------
/qlib/constant.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # REGION CONST
 5 | from typing import TypeVar
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | 
10 | REG_CN = "cn"
11 | REG_US = "us"
12 | REG_TW = "tw"
13 | 
14 | # Epsilon for avoiding division by zero.
15 | EPS = 1e-12
16 | 
17 | # Infinity in integer
18 | INF = int(1e18)
19 | ONE_DAY = pd.Timedelta("1day")
20 | ONE_MIN = pd.Timedelta("1min")
21 | EPS_T = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
22 | float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
23 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/README.md:
--------------------------------------------------------------------------------
 1 | # LightGBM
 2 | * Code: [https://github.com/microsoft/LightGBM](https://github.com/microsoft/LightGBM)
 3 | * Paper: LightGBM: A Highly Efficient Gradient Boosting
 4 | Decision Tree. [https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf](https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf).
 5 | 
 6 | 
 7 | # Introductions about the settings/configs.
 8 | 
 9 | `workflow_config_lightgbm_multi_freq.yaml`
10 | - It uses data sources of different frequencies (i.e. multiple frequencies) for daily prediction.
11 | 


--------------------------------------------------------------------------------
/qlib/workflow/task/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | Task related workflow is implemented in this folder
 5 | 
 6 | A typical task workflow
 7 | 
 8 | | Step                  | Description                                    |
 9 | |-----------------------+------------------------------------------------|
10 | | TaskGen               | Generating tasks.                              |
11 | | TaskManager(optional) | Manage generated tasks                         |
12 | | run task              | retrieve  tasks from TaskManager and run tasks. |
13 | """
14 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/backtest_twap.yml:
--------------------------------------------------------------------------------
 1 | order_file: ./data/orders/test_orders.pkl
 2 | start_time: "9:30"
 3 | end_time: "14:54"
 4 | data_granularity: "5min"
 5 | qlib:
 6 |   provider_uri_5min: ./data/bin/
 7 | exchange:
 8 |   limit_threshold: null
 9 |   deal_price: ["$close", "$close"]
10 |   volume_threshold: null
11 | strategies:
12 |   1day:
13 |     class: TWAPStrategy
14 |     kwargs: {}
15 |     module_path: qlib.contrib.strategy.rule_strategy
16 |   30min:
17 |     class: TWAPStrategy
18 |     kwargs: {}
19 |     module_path: qlib.contrib.strategy.rule_strategy
20 | concurrency: 16
21 | output_dir: outputs/twap/
22 | 


--------------------------------------------------------------------------------
/qlib/rl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .data_queue import DataQueue
 5 | from .env_wrapper import EnvWrapper, EnvWrapperStatus
 6 | from .finite_env import FiniteEnvType, vectorize_env
 7 | from .log import ConsoleWriter, CsvWriter, LogBuffer, LogCollector, LogLevel, LogWriter
 8 | 
 9 | __all__ = [
10 |     "LogLevel",
11 |     "DataQueue",
12 |     "EnvWrapper",
13 |     "FiniteEnvType",
14 |     "LogCollector",
15 |     "LogWriter",
16 |     "vectorize_env",
17 |     "ConsoleWriter",
18 |     "CsvWriter",
19 |     "EnvWrapperStatus",
20 |     "LogBuffer",
21 | ]
22 | 


--------------------------------------------------------------------------------
/scripts/data_collector/cn_index/README.md:
--------------------------------------------------------------------------------
 1 | # CSI300/CSI100/CSI500 History Companies Collection
 2 | 
 3 | ## Requirements
 4 | 
 5 | ```bash
 6 | pip install -r requirements.txt
 7 | ```
 8 | 
 9 | ## Collector Data
10 | 
11 | ```bash
12 | # parse instruments, using in qlib/instruments.
13 | python collector.py --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data --method parse_instruments
14 | 
15 | # parse new companies
16 | python collector.py --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data --method save_new_companies
17 | 
18 | # index_name support: CSI300, CSI100, CSI500
19 | # help
20 | python collector.py --help
21 | ```
22 | 
23 | 


--------------------------------------------------------------------------------
/scripts/data_collector/us_index/README.md:
--------------------------------------------------------------------------------
 1 | # NASDAQ100/SP500/SP400/DJIA History Companies Collection
 2 | 
 3 | ## Requirements
 4 | 
 5 | ```bash
 6 | pip install -r requirements.txt
 7 | ```
 8 | 
 9 | ## Collector Data
10 | 
11 | ```bash
12 | # parse instruments, using in qlib/instruments.
13 | python collector.py --index_name SP500 --qlib_dir ~/.qlib/qlib_data/us_data --method parse_instruments
14 | 
15 | # parse new companies
16 | python collector.py --index_name SP500 --qlib_dir ~/.qlib/qlib_data/us_data --method save_new_companies
17 | 
18 | # index_name support: SP500, NASDAQ100, DJIA, SP400
19 | # help
20 | python collector.py --help
21 | ```
22 | 
23 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.8"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/conf.py
17 | 
18 | # Build all formats
19 | formats: all
20 | 
21 | # Optionally set the version of Python and requirements required to build your docs
22 | python:
23 |   install:
24 |     - requirements: docs/requirements.txt
25 |     - method: pip
26 |       path: .
27 | 


--------------------------------------------------------------------------------
/qlib/contrib/online/__init__.py:
--------------------------------------------------------------------------------
 1 | # pylint: skip-file
 2 | # flake8: noqa
 3 | 
 4 | '''
 5 | TODO:
 6 | 
 7 | - Online needs that the model have such method
 8 |     def get_data_with_date(self, date, **kwargs):
 9 |         """
10 |         Will be called in online module
11 |         need to return the data that used to predict the label (score) of stocks at date.
12 | 
13 |         :param
14 |             date: pd.Timestamp
15 |                 predict date
16 |         :return:
17 |             data: the input data that used to predict the label (score) of stocks at predict date.
18 |         """
19 |         raise NotImplementedError("get_data_with_date for this model is not implemented.")
20 | 
21 | '''
22 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/features_resample_N.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from qlib.data.inst_processor import InstProcessor
 7 | from qlib.utils.resam import resam_calendar
 8 | 
 9 | 
10 | class ResampleNProcessor(InstProcessor):
11 |     def __init__(self, target_frq: str, **kwargs):
12 |         self.target_frq = target_frq
13 | 
14 |     def __call__(self, df: pd.DataFrame, *args, **kwargs):
15 |         df.index = pd.to_datetime(df.index)
16 |         res_index = resam_calendar(df.index, "1min", self.target_frq)
17 |         df = df.resample(self.target_frq).last().reindex(res_index)
18 |         return df
19 | 


--------------------------------------------------------------------------------
/scripts/data_collector/contrib/fill_cn_1min_data/README.md:
--------------------------------------------------------------------------------
 1 | # Use 1d data to fill in the missing symbols relative to 1min
 2 | 
 3 | 
 4 | ## Requirements
 5 | 
 6 | ```bash
 7 | pip install -r requirements.txt
 8 | ```
 9 | 
10 | ## fill 1min data
11 | 
12 | ```bash
13 | python fill_cn_1min_data.py --data_1min_dir ~/.qlib/csv_data/cn_data_1min --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data
14 | ```
15 | 
16 | ## Parameters
17 | 
18 | - data_1min_dir: csv data
19 | - qlib_data_1d_dir: qlib data directory
20 | - max_workers: `ThreadPoolExecutor(max_workers=max_workers)`, by default *16*
21 | - date_field_name: date field name, by default *date*
22 | - symbol_field_name: symbol field name, by default *symbol*
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/libs/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/baseline/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | This is the framework of periodically Rolling Retrain (RR) forecasting models. RR adapts to market dynamics by utilizing the up-to-date data periodically.
 4 | 
 5 | ## Run the Code
 6 | Users can try RR by running the following command:
 7 | ```bash
 8 |     python rolling_benchmark.py run
 9 | ```
10 | 
11 | The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter.
12 | For example, users can try `LightGBM` forecasting models by running the following command:
13 | ```bash
14 |     python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run
15 | 
16 | ```
17 | 


--------------------------------------------------------------------------------
/qlib/model/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from torch.utils.data import Dataset
 5 | 
 6 | 
 7 | class ConcatDataset(Dataset):
 8 |     def __init__(self, *datasets):
 9 |         self.datasets = datasets
10 | 
11 |     def __getitem__(self, i):
12 |         return tuple(d[i] for d in self.datasets)
13 | 
14 |     def __len__(self):
15 |         return min(len(d) for d in self.datasets)
16 | 
17 | 
18 | class IndexSampler:
19 |     def __init__(self, sampler):
20 |         self.sampler = sampler
21 | 
22 |     def __getitem__(self, i: int):
23 |         return self.sampler[i], i
24 | 
25 |     def __len__(self):
26 |         return len(self.sampler)
27 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/data_formatters/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/expt_settings/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/qlib/data/inst_processor.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import json
 3 | 
 4 | import pandas as pd
 5 | 
 6 | 
 7 | class InstProcessor:
 8 |     @abc.abstractmethod
 9 |     def __call__(self, df: pd.DataFrame, instrument, *args, **kwargs):
10 |         """
11 |         process the data
12 | 
13 |         NOTE: **The processor could change the content of `df` inplace !!!!! **
14 |         User should keep a copy of data outside
15 | 
16 |         Parameters
17 |         ----------
18 |         df : pd.DataFrame
19 |             The raw_df of handler or result from previous processor.
20 |         """
21 | 
22 |     def __str__(self):
23 |         return f"{self.__class__.__name__}:{json.dumps(self.__dict__, sort_keys=True, default=str)}"
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F31FFeature Request"
 3 | about: Request for a new Qlib feature
 4 | labels: enhancement
 5 | 
 6 | ---
 7 | 
 8 | ## 🌟 Feature Description
 9 | <!-- A clear and concise description of the feature proposal -->
10 | 
11 | ## Motivation
12 | 
13 | 1. Application scenario
14 | 2. Related works (Papers, Github repos etc.):
15 | 3. Any other relevant and important information:
16 | 
17 | <!-- Please describe why the feature is important. -->
18 | 
19 | ## Alternatives
20 | 
21 | <!-- A short description of any alternative solutions or features you've considered. -->
22 | 
23 | ## Additional Notes
24 | 
25 | <!-- Add any other context or screenshots about the feature request here. -->


--------------------------------------------------------------------------------
/qlib/contrib/rolling/__main__.py:
--------------------------------------------------------------------------------
 1 | import fire
 2 | from qlib import auto_init
 3 | from qlib.contrib.rolling.base import Rolling
 4 | from qlib.utils.mod import find_all_classes
 5 | 
 6 | if __name__ == "__main__":
 7 |     sub_commands = {}
 8 |     for cls in find_all_classes("qlib.contrib.rolling", Rolling):
 9 |         sub_commands[cls.__module__.split(".")[-1]] = cls
10 |     # The sub_commands will be like
11 |     # {'base': <class 'qlib.contrib.rolling.base.Rolling'>, ...}
12 |     # So the you can run it with commands like command below
13 |     # - `python -m qlib.contrib.rolling base --conf_path <path to the yaml> run`
14 |     # - base can be replace with other module names
15 |     auto_init()
16 |     fire.Fire(sub_commands)
17 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python3 -msphinx
 7 | SPHINXPROJ    = Quantlab
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	pip install -r requirements.txt
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/features_sample.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from qlib.data.inst_processor import InstProcessor
 6 | 
 7 | 
 8 | class Resample1minProcessor(InstProcessor):
 9 |     """This processor tries to resample the data. It will reasmple the data from 1min freq to day freq by selecting a specific miniute"""
10 | 
11 |     def __init__(self, hour: int, minute: int, **kwargs):
12 |         self.hour = hour
13 |         self.minute = minute
14 | 
15 |     def __call__(self, df: pd.DataFrame, *args, **kwargs):
16 |         df.index = pd.to_datetime(df.index)
17 |         df = df.loc[df.index.time == datetime.time(self.hour, self.minute)]
18 |         df.index = df.index.normalize()
19 |         return df
20 | 


--------------------------------------------------------------------------------
/scripts/data_collector/br_index/requirements.txt:
--------------------------------------------------------------------------------
 1 | async-generator==1.10
 2 | attrs==21.4.0
 3 | certifi==2022.12.7
 4 | cffi==1.15.0
 5 | charset-normalizer==2.0.12
 6 | cryptography==36.0.1
 7 | fire==0.4.0
 8 | h11==0.13.0
 9 | idna==3.3
10 | loguru==0.6.0
11 | lxml==4.9.1
12 | multitasking==0.0.10
13 | numpy==1.22.2
14 | outcome==1.1.0
15 | pandas==1.4.1
16 | pycoingecko==2.2.0
17 | pycparser==2.21
18 | pyOpenSSL==22.0.0
19 | PySocks==1.7.1
20 | python-dateutil==2.8.2
21 | pytz==2021.3
22 | requests==2.27.1
23 | requests-futures==1.0.0
24 | six==1.16.0
25 | sniffio==1.2.0
26 | sortedcontainers==2.4.0
27 | termcolor==1.1.0
28 | tqdm==4.63.0
29 | trio==0.20.0
30 | trio-websocket==0.9.2
31 | urllib3==1.26.19
32 | wget==3.2
33 | wsproto==1.1.0
34 | yahooquery==2.2.15
35 | 


--------------------------------------------------------------------------------
/qlib/data/storage/arctic_storage/arctic_storage_structure.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Libraries
 3 | 
 4 | 1. Metadatas contract specifications of instrument:
 5 |     - convert_meta
 6 |     - stock_meta
 7 |     - index_meta
 8 |     - future_meta
 9 | 
10 | 
11 | 2. OHLC price data
12 |     - bar_data
13 | 
14 | 
15 | 3. Auxilliary data for convert bonds
16 |     - convert_convert_price
17 |     - convert_cash_flow
18 |     - convert_coupon
19 |     - convert_derived
20 |     - convert_indicator
21 |     - convert_high_freq_factor
22 |     - convert_stoploss_return
23 | 
24 | 
25 | 4. Auxilliary data for stock
26 |    - ex_factor
27 |    - split
28 |    - limit_up_down
29 | 
30 | 
31 | 5. index related data
32 |    - vn_lib_metadata -> index_weights
33 |    - market_meta -> index_component
34 | 


--------------------------------------------------------------------------------
/examples/rolling_process_data/README.md:
--------------------------------------------------------------------------------
 1 | # Rolling Process Data
 2 | 
 3 | This workflow is an example for `Rolling Process Data`.
 4 | 
 5 | ## Background
 6 | 
 7 | When rolling train the models, data also needs to be generated in the different rolling windows. When the rolling window moves, the training data will change, and the processor's learnable state (such as standard deviation, mean, etc.) will also change. 
 8 | 
 9 | In order to avoid regenerating data, this example uses the `DataHandler-based DataLoader` to load the raw features that are not related to the rolling window, and then used Processors to generate processed-features related to the rolling window.
10 | 
11 | 
12 | ## Run the Code
13 | 
14 | Run the example by running the following command:
15 | ```bash
16 |     python workflow.py rolling_process
17 | ```


--------------------------------------------------------------------------------
/tests/test_contrib_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | from qlib.contrib.model import all_model_classes
 7 | 
 8 | 
 9 | class TestAllFlow(unittest.TestCase):
10 |     def test_0_initialize(self):
11 |         num = 0
12 |         for model_class in all_model_classes:
13 |             if model_class is not None:
14 |                 model = model_class()
15 |                 num += 1
16 |         print("There are {:}/{:} valid models in total.".format(num, len(all_model_classes)))
17 | 
18 | 
19 | def suite():
20 |     _suite = unittest.TestSuite()
21 |     _suite.addTest(TestAllFlow("test_0_initialize"))
22 |     return _suite
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     runner = unittest.TextTestRunner()
27 |     runner.run(suite())
28 | 


--------------------------------------------------------------------------------
/qlib/data/inst_info.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from ..utils.serial import Serializable
 4 | 
 5 | 
 6 | class BaseInstrumentInfo(Serializable):
 7 |     def __init__(self, **kwargs):
 8 |         pass
 9 | 
10 | 
11 | class ConvertInstrumentInfo(BaseInstrumentInfo):
12 |     def __init__(
13 |         self,
14 |         cash_flow_schedule,
15 |         coupon_schedule,
16 |         maturity_date,
17 |         call_date=datetime(2200, 1, 1),
18 |         principle=100,
19 |         stop_trading_date=datetime(2200, 1, 1),
20 |     ):
21 |         self.cash_flow_schedule = cash_flow_schedule
22 |         self.maturity_date = maturity_date
23 |         self.call_date = call_date
24 |         self.coupon_schedule = coupon_schedule
25 |         self.principle = principle
26 |         self.stop_trading_date = stop_trading_date
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # https://github.com/github/gitignore/blob/master/Python.gitignore
 2 | __pycache__/
 3 | 
 4 | *.pyc
 5 | *.pyd
 6 | *.so
 7 | *.ipynb
 8 | .ipynb_checkpoints
 9 | _build
10 | build/
11 | dist/
12 | 
13 | *.log
14 | *.pkl
15 | *.hd5
16 | *.csv
17 | 
18 | .env
19 | .vim
20 | .nvimrc
21 | .vscode
22 | 
23 | qlib/VERSION.txt
24 | qlib/data/_libs/expanding.cpp
25 | qlib/data/_libs/rolling.cpp
26 | examples/estimator/estimator_example/
27 | examples/rl/data/
28 | examples/rl/checkpoints/
29 | examples/rl/outputs/
30 | examples/rl_order_execution/data/
31 | examples/rl_order_execution/outputs/
32 | 
33 | *.egg-info/
34 | 
35 | # test related
36 | test-output.xml
37 | .output
38 | .data
39 | 
40 | # special software
41 | mlruns/
42 | 
43 | tags
44 | 
45 | .pytest_cache/
46 | .mypy_cache/
47 | .vscode/
48 | 
49 | *.swp
50 | 
51 | ./pretrain
52 | .idea/
53 | .aider*
54 | 


--------------------------------------------------------------------------------
/examples/benchmarks/DoubleEnsemble/README.md:
--------------------------------------------------------------------------------
1 | # DoubleEnsemble
2 | * DoubleEnsemble is an ensemble framework leveraging learning trajectory based sample reweighting and shuffling based feature selection, to solve both the low signal-to-noise ratio and increasing number of features problems. They identify the key samples based on the training dynamics on each sample and elicit key features based on the ablation impact of each feature via shuffling. The model is applicable to a wide range of base models, capable of extracting complex patterns, while mitigating the overfitting and instability issues for financial market prediction.
3 | * This code used in Qlib is implemented by ourselves.
4 | * Paper: DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis [https://arxiv.org/pdf/2010.01265.pdf](https://arxiv.org/pdf/2010.01265.pdf).


--------------------------------------------------------------------------------
/qlib/data/dataset/weight.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | class Reweighter:
 6 |     def __init__(self, *args, **kwargs):
 7 |         """
 8 |         To initialize the Reweighter, users should provide specific methods to let reweighter do the reweighting (such as sample-wise, rule-based).
 9 |         """
10 |         raise NotImplementedError()
11 | 
12 |     def reweight(self, data: object) -> object:
13 |         """
14 |         Get weights for data
15 | 
16 |         Parameters
17 |         ----------
18 |         data : object
19 |             The input data.
20 |             The first dimension is the index of samples
21 | 
22 |         Returns
23 |         -------
24 |         object:
25 |             the weights info for the data
26 |         """
27 |         raise NotImplementedError(f"This type of input is not supported")
28 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/README.md:
--------------------------------------------------------------------------------
 1 | # Temporal Fusion Transformers Benchmark
 2 | ## Source
 3 | **Reference**: Lim, Bryan, et al. "Temporal fusion transformers for interpretable multi-horizon time series forecasting." arXiv preprint arXiv:1912.09363 (2019).
 4 | 
 5 | **GitHub**: https://github.com/google-research/google-research/tree/master/tft
 6 | 
 7 | ## Run the Workflow
 8 | Users can follow the ``workflow_by_code_tft.py`` to run the benchmark. 
 9 | 
10 | ### Notes
11 | 1. Please be **aware** that this script can only support `Python 3.6 - 3.7`.
12 | 2. If the CUDA version on your machine is not 10.0, please remember to run the following commands `conda install anaconda cudatoolkit=10.0` and `conda install cudnn` on your machine.
13 | 3. The model must run in GPU, or an error will be raised.
14 | 4. New datasets should be registered in ``data_formatters``, for detail please visit the source.
15 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name-template: 'v$RESOLVED_VERSION 🌈'
 2 | tag-template: 'v$RESOLVED_VERSION'
 3 | categories:
 4 |   - title: '🌟 Features'
 5 |     labels:
 6 |       - 'feature'
 7 |       - 'enhancement'
 8 |   - title: '🐛 Bug Fixes'
 9 |     labels:
10 |       - 'fix'
11 |       - 'bugfix'
12 |       - 'bug'
13 |   - title: '📚 Documentation'
14 |     label: 
15 |       - 'doc'
16 |       - 'documentation'
17 |   - title: '🧹 Maintenance'
18 |     label: 
19 |       - 'maintenance'
20 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
21 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
22 | version-resolver:
23 |   major:
24 |     labels:
25 |       - 'major'
26 |   minor:
27 |     labels:
28 |       - 'minor'
29 |   patch:
30 |     labels:
31 |       - 'patch'
32 |   default: patch
33 | template: |
34 |   ## Changes
35 | 
36 |   $CHANGES
37 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3:latest
 2 | 
 3 | WORKDIR /qlib
 4 | 
 5 | COPY . .
 6 | 
 7 | RUN apt-get update && \
 8 |     apt-get install -y build-essential
 9 | 
10 | RUN conda create --name qlib_env python=3.8 -y
11 | RUN echo "conda activate qlib_env" >> ~/.bashrc
12 | ENV PATH /opt/conda/envs/qlib_env/bin:$PATH
13 | 
14 | RUN python -m pip install --upgrade pip
15 | 
16 | RUN python -m pip install numpy==1.23.5
17 | RUN python -m pip install pandas==1.5.3
18 | RUN python -m pip install importlib-metadata==5.2.0
19 | RUN python -m pip install "cloudpickle<3"
20 | RUN python -m pip install scikit-learn==1.3.2
21 | 
22 | RUN python -m pip install cython packaging tables matplotlib statsmodels
23 | RUN python -m pip install pybind11 cvxpy
24 | 
25 | ARG IS_STABLE="yes"
26 | 
27 | RUN if [ "$IS_STABLE" = "yes" ]; then \
28 |         python -m pip install pyqlib; \
29 |     else \
30 |         python setup.py install; \
31 |     fi
32 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # we used random seed(1 1000 2000 3000 4000 5000) in our experiments 
 4 | 
 5 | # Directly run from Qlib command `qrun`
 6 | qrun configs/config_alstm.yaml
 7 | 
 8 | qrun configs/config_transformer.yaml
 9 | 
10 | qrun configs/config_transformer_tra_init.yaml
11 | qrun configs/config_transformer_tra.yaml
12 | 
13 | qrun configs/config_alstm_tra_init.yaml
14 | qrun configs/config_alstm_tra.yaml
15 | 
16 | 
17 | # Or setting different parameters with example.py
18 | python example.py --config_file configs/config_alstm.yaml
19 | 
20 | python example.py --config_file configs/config_transformer.yaml
21 | 
22 | python example.py --config_file configs/config_transformer_tra_init.yaml
23 | python example.py --config_file configs/config_transformer_tra.yaml
24 | 
25 | python example.py --config_file configs/config_alstm_tra_init.yaml
26 | python example.py --config_file configs/config_alstm_tra.yaml
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/qlib/rl/contrib/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from pathlib import Path
 7 | 
 8 | import pandas as pd
 9 | 
10 | 
11 | def read_order_file(order_file: Path | pd.DataFrame) -> pd.DataFrame:
12 |     if isinstance(order_file, pd.DataFrame):
13 |         return order_file
14 | 
15 |     order_file = Path(order_file)
16 | 
17 |     if order_file.suffix == ".pkl":
18 |         order_df = pd.read_pickle(order_file).reset_index()
19 |     elif order_file.suffix == ".csv":
20 |         order_df = pd.read_csv(order_file)
21 |     else:
22 |         raise TypeError(f"Unsupported order file type: {order_file}")
23 | 
24 |     if "date" in order_df.columns:
25 |         # legacy dataframe columns
26 |         order_df = order_df.rename(columns={"date": "datetime", "order_type": "direction"})
27 |     order_df["datetime"] = order_df["datetime"].astype(str)
28 | 
29 |     return order_df
30 | 


--------------------------------------------------------------------------------
/qlib/contrib/tuner/launcher.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # pylint: skip-file
 5 | # flake8: noqa
 6 | 
 7 | # coding=utf-8
 8 | 
 9 | import argparse
10 | import importlib
11 | import os
12 | 
13 | import yaml
14 | 
15 | from .config import TunerConfigManager
16 | 
17 | args_parser = argparse.ArgumentParser(prog="tuner")
18 | args_parser.add_argument(
19 |     "-c",
20 |     "--config_path",
21 |     required=True,
22 |     type=str,
23 |     help="config path indicates where to load yaml config.",
24 | )
25 | 
26 | args = args_parser.parse_args()
27 | 
28 | TUNER_CONFIG_MANAGER = TunerConfigManager(args.config_path)
29 | 
30 | 
31 | def run():
32 |     # 1. Get pipeline class.
33 |     tuner_pipeline_class = getattr(importlib.import_module(".pipeline", package="qlib.contrib.tuner"), "Pipeline")
34 |     # 2. Init tuner pipeline.
35 |     tuner_pipeline = tuner_pipeline_class(TUNER_CONFIG_MANAGER)
36 |     # 3. Begin to tune
37 |     tuner_pipeline.run()
38 | 


--------------------------------------------------------------------------------
/tests/test_workflow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import shutil
 4 | import unittest
 5 | from pathlib import Path
 6 | 
 7 | from qlib.tests import TestAutoData
 8 | from qlib.workflow import R
 9 | 
10 | 
11 | class WorkflowTest(TestAutoData):
12 |     # Creating the directory manually doesn't work with mlflow,
13 |     # so we add a subfolder named .trash when we create the directory.
14 |     TMP_PATH = Path("./.mlruns_tmp/.trash")
15 | 
16 |     def tearDown(self) -> None:
17 |         if self.TMP_PATH.exists():
18 |             shutil.rmtree(self.TMP_PATH)
19 | 
20 |     def test_get_local_dir(self):
21 |         """ """
22 |         self.TMP_PATH.mkdir(parents=True, exist_ok=True)
23 | 
24 |         with R.start(uri=str(self.TMP_PATH)):
25 |             pass
26 | 
27 |         with R.uri_context(uri=str(self.TMP_PATH)):
28 |             resume_recorder = R.get_recorder()
29 |             resume_recorder.get_local_dir()
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     unittest.main()
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F41B Bug Report"
 3 | about: Submit a bug report to help us improve Qlib
 4 | labels: bug
 5 | 
 6 | ---
 7 | 
 8 | ## 🐛 Bug Description
 9 | 
10 | <!-- A clear and concise description of what the bug is. -->
11 | 
12 | ## To Reproduce
13 | 
14 | Steps to reproduce the behavior:
15 | 
16 | 1.
17 | 1.
18 | 1.
19 | 
20 | 
21 | ## Expected Behavior
22 | 
23 | <!-- A clear and concise description of what you expected to happen. -->
24 | 
25 | ## Screenshot
26 | 
27 | <!-- A screenshot of the error message or anything shouldn't appear-->
28 | 
29 | ## Environment
30 | 
31 | **Note**: User could run `cd scripts && python collect_info.py all` under project directory to get system information
32 | and paste them here directly.
33 | 
34 |  - Qlib version:
35 |  - Python version:
36 |  - OS (`Windows`, `Linux`, `MacOS`):
37 |  - Commit number (optional, please provide it if you are using the dev version):
38 | 
39 | ## Additional Notes
40 | 
41 | <!-- Add any other information about the problem here. -->
42 | 


--------------------------------------------------------------------------------
/examples/model_interpreter/feature.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | 
 5 | import qlib
 6 | from qlib.constant import REG_CN
 7 | from qlib.tests.config import CSI300_GBDT_TASK
 8 | from qlib.tests.data import GetData
 9 | from qlib.utils import init_instance_by_config
10 | 
11 | if __name__ == "__main__":
12 |     # use default data
13 |     provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
14 |     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
15 | 
16 |     qlib.init(provider_uri=provider_uri, region=REG_CN)
17 | 
18 |     ###################################
19 |     # train model
20 |     ###################################
21 |     # model initialization
22 |     model = init_instance_by_config(CSI300_GBDT_TASK["model"])
23 |     dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
24 |     model.fit(dataset)
25 | 
26 |     # get model feature importance
27 |     feature_importance = model.get_feature_importance()
28 |     print("feature importance:")
29 |     print(feature_importance)
30 | 


--------------------------------------------------------------------------------
/qlib/walkforward/walkforward_handler.py:
--------------------------------------------------------------------------------
 1 | from qlib.contrib.data.handler import check_transform_proc
 2 | from qlib.data.dataset.handler import DataHandlerLP
 3 | from qlib.data.dataset.loader import DataLoaderDH
 4 | 
 5 | 
 6 | class WFDataHandler(DataHandlerLP):
 7 |     def __init__(
 8 |         self,
 9 |         start_time=None,
10 |         end_time=None,
11 |         infer_processors=[],
12 |         learn_processors=[],
13 |         fit_start_time=None,
14 |         fit_end_time=None,
15 |         data_loader_kwargs={},
16 |     ):
17 |         infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
18 |         learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
19 | 
20 |         data_loader = DataLoaderDH(**data_loader_kwargs)
21 | 
22 |         super().__init__(
23 |             instruments=None,
24 |             start_time=start_time,
25 |             end_time=end_time,
26 |             data_loader=data_loader,
27 |             infer_processors=infer_processors,
28 |             learn_processors=learn_processors,
29 |         )
30 | 


--------------------------------------------------------------------------------
/tests/misc/test_get_multi_proc.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | from multiprocessing import Pool
 6 | 
 7 | import qlib
 8 | from qlib.data import D
 9 | from qlib.tests import TestAutoData
10 | 
11 | 
12 | def get_features(fields):
13 |     qlib.init(provider_uri=TestAutoData.provider_uri, expression_cache=None, dataset_cache=None, joblib_backend="loky")
14 |     return D.features(D.instruments("csi300"), fields)
15 | 
16 | 
17 | class TestGetData(TestAutoData):
18 |     FIELDS = "$open,$close,$high,$low,$volume,$factor,$change".split(",")
19 | 
20 |     def test_multi_proc(self):
21 |         """
22 |         For testing if it will raise error
23 |         """
24 |         iter_n = 2
25 |         pool = Pool(iter_n)
26 | 
27 |         res = []
28 |         for _ in range(iter_n):
29 |             res.append(pool.apply_async(get_features, (self.FIELDS,), {}))
30 | 
31 |         for r in res:
32 |             print(r.get())
33 | 
34 |         pool.close()
35 |         pool.join()
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     unittest.main()
40 | 


--------------------------------------------------------------------------------
/examples/benchmarks/GeneralPtNN/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Introduction
 4 | 
 5 | What is GeneralPtNN
 6 | - Fix previous design that fail to support both Time-series and tabular data
 7 | - Now you can just replace the Pytorch model structure to run a NN model.
 8 | 
 9 | We provide an example to demonstrate the effectiveness of the current design.
10 | - `workflow_config_gru.yaml` align with previous results [GRU(Kyunghyun Cho, et al.)](../README.md#Alpha158-dataset)
11 |   - `workflow_config_gru2mlp.yaml` to demonstrate we can convert config from time-series to tabular data with minimal changes
12 |     - You only have to change the net & dataset class to make the conversion.
13 | - `workflow_config_mlp.yaml` achieved similar functionality with [MLP](../README.md#Alpha158-dataset)
14 | 
15 | # TODO
16 | 
17 | - We will align existing models to current design.
18 | 
19 | - The result of `workflow_config_mlp.yaml` is different with the result of [MLP](../README.md#Alpha158-dataset) since GeneralPtNN has a different stopping method compared to previous implementations. Specificly, GeneralPtNN controls training according to epoches, whereas previous methods controlled by max_steps. 
20 | 


--------------------------------------------------------------------------------
/examples/rolling_process_data/rolling_handler.py:
--------------------------------------------------------------------------------
 1 | from qlib.contrib.data.handler import check_transform_proc
 2 | from qlib.data.dataset.handler import DataHandlerLP
 3 | from qlib.data.dataset.loader import DataLoaderDH
 4 | 
 5 | 
 6 | class RollingDataHandler(DataHandlerLP):
 7 |     def __init__(
 8 |         self,
 9 |         start_time=None,
10 |         end_time=None,
11 |         infer_processors=[],
12 |         learn_processors=[],
13 |         fit_start_time=None,
14 |         fit_end_time=None,
15 |         data_loader_kwargs={},
16 |     ):
17 |         infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
18 |         learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
19 | 
20 |         data_loader = {
21 |             "class": "DataLoaderDH",
22 |             "kwargs": {**data_loader_kwargs},
23 |         }
24 | 
25 |         super().__init__(
26 |             instruments=None,
27 |             start_time=start_time,
28 |             end_time=end_time,
29 |             data_loader=data_loader,
30 |             infer_processors=infer_processors,
31 |             learn_processors=learn_processors,
32 |         )
33 | 


--------------------------------------------------------------------------------
/qlib/contrib/report/report/template/general_report_template.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <title>{{title}}</title>
 6 | </head>
 7 | <body>
 8 | {% macro print_section(m_level, m_content_items) -%}
 9 | {% for content_item in m_content_items %}
10 | {% if 'header' in content_item %}
11 | <{{"h%d"|format(m_level)}}>{{content_item.header}}</{{"h%d"|format(m_level)}}>
12 | {%- endif %}
13 | {% if content_item.type == "html" %}
14 | {{content_item.content}}
15 | {% elif content_item.type == "base64image" %}
16 | <img src="{{content_item.content}}"  alt="Red dot" />
17 | {% elif content_item.type == "base64imagelist" %}
18 | {% for base64image in content_item.content %}
19 | <img src="{{base64image}}"  alt="Red dot" />
20 | {%- endfor %}
21 | {% elif content_item.type == "subsections" %}
22 | {{print_section(m_level+1, content_item.content)}}
23 | {% elif content_item.type == "itemlist" %}
24 | <ul>
25 | {% for dk, dv in content_item.content.items() %}
26 | <li><b>{{dk}}:</b> {{dv}}</li>
27 | {%- endfor %}
28 | </ul>
29 | {%- endif %}
30 | {%- endfor %}
31 | {%- endmacro -%}
32 | 
33 | {{print_section(1, component_list)}}
34 | 
35 | </body>
36 | </html>


--------------------------------------------------------------------------------
/qlib/rl/order_execution/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """
 5 | Currently it supports single-asset order execution.
 6 | Multi-asset is on the way.
 7 | """
 8 | 
 9 | from .interpreter import (
10 |     CategoricalActionInterpreter,
11 |     CurrentStepStateInterpreter,
12 |     FullHistoryStateInterpreter,
13 |     TwapRelativeActionInterpreter,
14 | )
15 | from .network import Recurrent
16 | from .policy import PPO, AllOne
17 | from .reward import PAPenaltyReward
18 | from .simulator_simple import SingleAssetOrderExecutionSimple
19 | from .state import SAOEMetrics, SAOEState
20 | from .strategy import ProxySAOEStrategy, SAOEIntStrategy, SAOEStateAdapter, SAOEStrategy
21 | 
22 | __all__ = [
23 |     "FullHistoryStateInterpreter",
24 |     "CurrentStepStateInterpreter",
25 |     "CategoricalActionInterpreter",
26 |     "TwapRelativeActionInterpreter",
27 |     "Recurrent",
28 |     "AllOne",
29 |     "PPO",
30 |     "PAPenaltyReward",
31 |     "SingleAssetOrderExecutionSimple",
32 |     "SAOEStateAdapter",
33 |     "SAOEMetrics",
34 |     "SAOEState",
35 |     "SAOEStrategy",
36 |     "ProxySAOEStrategy",
37 |     "SAOEIntStrategy",
38 | ]
39 | 


--------------------------------------------------------------------------------
/qlib/contrib/torch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | This module is not a necessary part of Qlib.
 5 | They are just some tools for convenience
 6 | It is should not imported into the core part of qlib
 7 | """
 8 | import numpy as np
 9 | import pandas as pd
10 | import torch
11 | 
12 | 
13 | def data_to_tensor(data, device="cpu", raise_error=False):
14 |     if isinstance(data, torch.Tensor):
15 |         if device == "cpu":
16 |             return data.cpu()
17 |         else:
18 |             return data.to(device)
19 |     if isinstance(data, (pd.DataFrame, pd.Series)):
20 |         return data_to_tensor(torch.from_numpy(data.values).float(), device)
21 |     elif isinstance(data, np.ndarray):
22 |         return data_to_tensor(torch.from_numpy(data).float(), device)
23 |     elif isinstance(data, (tuple, list)):
24 |         return [data_to_tensor(i, device) for i in data]
25 |     elif isinstance(data, dict):
26 |         return {k: data_to_tensor(v, device) for k, v in data.items()}
27 |     else:
28 |         if raise_error:
29 |             raise ValueError(f"Unsupported data type: {type(data)}.")
30 |         else:
31 |             return data
32 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Extension
 2 | import numpy
 3 | import os
 4 | 
 5 | 
 6 | def read(rel_path: str) -> str:
 7 |     here = os.path.abspath(os.path.dirname(__file__))
 8 |     with open(os.path.join(here, rel_path), encoding="utf-8") as fp:
 9 |         return fp.read()
10 | 
11 | 
12 | def get_version(rel_path: str) -> str:
13 |     for line in read(rel_path).splitlines():
14 |         if line.startswith("__version__"):
15 |             delim = '"' if '"' in line else "'"
16 |             return line.split(delim)[1]
17 |     raise RuntimeError("Unable to find version string.")
18 | 
19 | 
20 | NUMPY_INCLUDE = numpy.get_include()
21 | 
22 | VERSION = get_version("qlib/__init__.py")
23 | 
24 | 
25 | setup(
26 |     version=VERSION,
27 |     ext_modules=[
28 |         Extension(
29 |             "qlib.data._libs.rolling",
30 |             ["qlib/data/_libs/rolling.pyx"],
31 |             language="c++",
32 |             include_dirs=[NUMPY_INCLUDE],
33 |         ),
34 |         Extension(
35 |             "qlib.data._libs.expanding",
36 |             ["qlib/data/_libs/expanding.pyx"],
37 |             language="c++",
38 |             include_dirs=[NUMPY_INCLUDE],
39 |         ),
40 |     ],
41 | )
42 | 


--------------------------------------------------------------------------------
/qlib/rl/strategy/single_order.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from qlib.backtest import Order
 7 | from qlib.backtest.decision import OrderHelper, TradeDecisionWO, TradeRange
 8 | from qlib.strategy.base import BaseStrategy
 9 | 
10 | 
11 | class SingleOrderStrategy(BaseStrategy):
12 |     """Strategy used to generate a trade decision with exactly one order."""
13 | 
14 |     def __init__(
15 |         self,
16 |         order: Order,
17 |         trade_range: TradeRange | None = None,
18 |     ) -> None:
19 |         super().__init__()
20 | 
21 |         self._order = order
22 |         self._trade_range = trade_range
23 | 
24 |     def generate_trade_decision(self, execute_result: list | None = None) -> TradeDecisionWO:
25 |         oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper()
26 |         order_list = [
27 |             oh.create(
28 |                 code=self._order.stock_id,
29 |                 amount=self._order.amount,
30 |                 direction=self._order.direction,
31 |             ),
32 |         ]
33 |         return TradeDecisionWO(order_list, self, self._trade_range)
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/docs/advanced/server.rst:
--------------------------------------------------------------------------------
 1 | .. _server:
 2 | 
 3 | =============================
 4 | ``Online`` & ``Offline`` mode
 5 | =============================
 6 | .. currentmodule:: qlib
 7 | 
 8 | 
 9 | Introduction
10 | ============
11 | 
12 | ``Qlib`` supports ``Online`` mode and ``Offline`` mode. Only the ``Offline`` mode is introduced in this document.
13 | 
14 | The ``Online`` mode is designed to solve the following problems:
15 | 
16 | - Manage the data in a centralized way. Users don't have to manage data of different versions.
17 | - Reduce the amount of cache to be generated.
18 | - Make the data can be accessed in a remote way.
19 | 
20 | Qlib-Server
21 | ===========
22 | 
23 | ``Qlib-Server`` is the assorted server system for ``Qlib``, which utilizes ``Qlib`` for basic calculations and provides extensive server system and cache mechanism. With QLibServer, the data provided for ``Qlib`` can be managed in a centralized manner. With ``Qlib-Server``, users can use ``Qlib`` in ``Online`` mode.
24 | 
25 | 
26 | 
27 | Reference
28 | =========
29 | If users are interested in ``Qlib-Server`` and ``Online`` mode, please refer to `Qlib-Server Project <https://github.com/microsoft/qlib-server>`_ and `Qlib-Server Document <https://qlib-server.readthedocs.io/en/latest/>`_.
30 | 


--------------------------------------------------------------------------------
/qlib/contrib/online/online_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # pylint: skip-file
 5 | # flake8: noqa
 6 | 
 7 | import random
 8 | 
 9 | import pandas as pd
10 | 
11 | from ...data import D
12 | from ..model.base import Model
13 | 
14 | 
15 | class ScoreFileModel(Model):
16 |     """
17 |     This model will load a score file, and return score at date exists in score file.
18 |     """
19 | 
20 |     def __init__(self, score_path):
21 |         pred_test = pd.read_csv(score_path, index_col=[0, 1], parse_dates=True, infer_datetime_format=True)
22 |         self.pred = pred_test
23 | 
24 |     def get_data_with_date(self, date, **kwargs):
25 |         score = self.pred.loc(axis=0)[:, date]  # (stock_id, trade_date) multi_index, score in pdate
26 |         score_series = score.reset_index(level="datetime", drop=True)[
27 |             "score"
28 |         ]  # pd.Series ; index:stock_id, data: score
29 |         return score_series
30 | 
31 |     def predict(self, x_test, **kwargs):
32 |         return x_test
33 | 
34 |     def score(self, x_test, **kwargs):
35 |         return
36 | 
37 |     def fit(self, x_train, y_train, x_valid, y_valid, w_train=None, w_valid=None, **kwargs):
38 |         return
39 | 
40 |     def save(self, fname, **kwargs):
41 |         return
42 | 


--------------------------------------------------------------------------------
/tests/dependency_tests/test_mlflow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import unittest
 4 | import platform
 5 | import mlflow
 6 | import time
 7 | from pathlib import Path
 8 | import shutil
 9 | 
10 | 
11 | class MLflowTest(unittest.TestCase):
12 |     TMP_PATH = Path("./.mlruns_tmp/")
13 | 
14 |     def tearDown(self) -> None:
15 |         if self.TMP_PATH.exists():
16 |             shutil.rmtree(self.TMP_PATH)
17 | 
18 |     def test_creating_client(self):
19 |         """
20 |         Please refer to qlib/workflow/expm.py:MLflowExpManager._client
21 |         we don't cache _client (this is helpful to reduce maintainance work when MLflowExpManager's uri is chagned)
22 | 
23 |         This implementation is based on the assumption creating a client is fast
24 |         """
25 |         start = time.time()
26 |         for i in range(10):
27 |             _ = mlflow.tracking.MlflowClient(tracking_uri=str(self.TMP_PATH))
28 |         end = time.time()
29 |         elapsed = end - start
30 |         if platform.system() == "Linux":
31 |             self.assertLess(elapsed, 1e-2)  # it can be done in less than 10ms
32 |         else:
33 |             self.assertLess(elapsed, 2e-2)
34 |         print(elapsed)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     unittest.main()
39 | 


--------------------------------------------------------------------------------
/qlib/rl/aux_info.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from typing import TYPE_CHECKING, Generic, Optional, TypeVar
 7 | 
 8 | from qlib.typehint import final
 9 | 
10 | from .simulator import StateType
11 | 
12 | if TYPE_CHECKING:
13 |     from .utils.env_wrapper import EnvWrapper
14 | 
15 | 
16 | __all__ = ["AuxiliaryInfoCollector"]
17 | 
18 | AuxInfoType = TypeVar("AuxInfoType")
19 | 
20 | 
21 | class AuxiliaryInfoCollector(Generic[StateType, AuxInfoType]):
22 |     """Override this class to collect customized auxiliary information from environment."""
23 | 
24 |     env: Optional[EnvWrapper] = None
25 | 
26 |     @final
27 |     def __call__(self, simulator_state: StateType) -> AuxInfoType:
28 |         return self.collect(simulator_state)
29 | 
30 |     def collect(self, simulator_state: StateType) -> AuxInfoType:
31 |         """Override this for customized auxiliary info.
32 |         Usually useful in Multi-agent RL.
33 | 
34 |         Parameters
35 |         ----------
36 |         simulator_state
37 |             Retrieved with ``simulator.get_state()``.
38 | 
39 |         Returns
40 |         -------
41 |         Auxiliary information.
42 |         """
43 |         raise NotImplementedError("collect is not implemented!")
44 | 


--------------------------------------------------------------------------------
/examples/nested_decision_execution/README.md:
--------------------------------------------------------------------------------
 1 | # Nested Decision Execution
 2 | 
 3 | This workflow is an example for nested decision execution in backtesting. Qlib supports nested decision execution in backtesting. It means that users can use different strategies to make trade decision in different frequencies.
 4 | 
 5 | ## Weekly Portfolio Generation and Daily Order Execution
 6 | 
 7 | This workflow provides an example that uses a DropoutTopkStrategy (a strategy based on the daily frequency Lightgbm model) in weekly frequency for portfolio generation and uses SBBStrategyEMA (a rule-based strategy that uses EMA for decision-making) to execute orders in daily frequency. 
 8 | 
 9 | ### Usage
10 | 
11 | Start backtesting by running the following command:
12 | ```bash
13 |     python workflow.py backtest
14 | ```
15 | 
16 | Start collecting data by running the following command:
17 | ```bash
18 |     python workflow.py collect_data
19 | ```
20 | 
21 | ## Daily Portfolio Generation and Minutely Order Execution
22 | 
23 | This workflow also provides a high-frequency example that uses a DropoutTopkStrategy for portfolio generation in daily frequency and uses SBBStrategyEMA to execute orders in minutely frequency. 
24 | 
25 | ### Usage
26 | 
27 | Start backtesting by running the following command:
28 | ```bash
29 |     python workflow.py backtest_highfreq
30 | ```


--------------------------------------------------------------------------------
/tests/data_mid_layer_tests/test_handler.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import shutil
 4 | import unittest
 5 | from qlib.tests import TestAutoData
 6 | from qlib.data import D
 7 | from qlib.data.dataset.handler import DataHandlerLP
 8 | 
 9 | 
10 | class HandlerTests(TestAutoData):
11 |     def to_str(self, obj):
12 |         return "".join(str(obj).split())
13 | 
14 |     def test_handler_df(self):
15 |         df = D.features(["sh600519"], start_time="20190101", end_time="20190201", fields=["$close"])
16 |         dh = DataHandlerLP.from_df(df)
17 |         print(dh.fetch())
18 |         self.assertTrue(dh._data.equals(df))
19 |         self.assertTrue(dh._infer is dh._data)
20 |         self.assertTrue(dh._learn is dh._data)
21 |         self.assertTrue(dh.data_loader._data is dh._data)
22 |         fname = "_handler_test.pkl"
23 |         dh.to_pickle(fname, dump_all=True)
24 | 
25 |         with open(fname, "rb") as f:
26 |             dh_d = pickle.load(f)
27 | 
28 |         self.assertTrue(dh_d._data.equals(df))
29 |         self.assertTrue(dh_d._infer is dh_d._data)
30 |         self.assertTrue(dh_d._learn is dh_d._data)
31 |         # Data loader will no longer be useful
32 |         self.assertTrue("_data" not in dh_d.data_loader.__dict__.keys())
33 |         os.remove(fname)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     unittest.main()
38 | 


--------------------------------------------------------------------------------
/qlib/model/interpret/base.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | """
 5 | Interfaces to interpret models
 6 | """
 7 | 
 8 | from abc import abstractmethod
 9 | 
10 | import pandas as pd
11 | 
12 | 
13 | class FeatureInt:
14 |     """Feature (Int)erpreter"""
15 | 
16 |     @abstractmethod
17 |     def get_feature_importance(self) -> pd.Series:
18 |         """get feature importance
19 | 
20 |         Returns
21 |         -------
22 |             The index is the feature name.
23 | 
24 |             The greater the value, the higher importance.
25 |         """
26 | 
27 | 
28 | class LightGBMFInt(FeatureInt):
29 |     """LightGBM (F)eature (Int)erpreter"""
30 | 
31 |     def __init__(self):
32 |         self.model = None
33 | 
34 |     def get_feature_importance(self, *args, **kwargs) -> pd.Series:
35 |         """get feature importance
36 | 
37 |         Notes
38 |         -----
39 |             parameters reference:
40 |             https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html?highlight=feature_importance#lightgbm.Booster.feature_importance
41 |         """
42 |         return pd.Series(
43 |             self.model.feature_importance(*args, **kwargs), index=self.model.feature_name()
44 |         ).sort_values(  # pylint: disable=E1101
45 |             ascending=False
46 |         )
47 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--- Provide a general summary of your changes in the Title above -->
 2 | 
 3 | ## Description
 4 | <!--- Describe your changes in detail -->
 5 | 
 6 | ## Motivation and Context
 7 | <!--- Are there any related issues? If so, please put the link here. -->
 8 | <!--- Why is this change required? What problem does it solve? -->
 9 | 
10 | ## How Has This Been Tested?
11 | <!---  Put an `x` in all the boxes that apply: --->
12 | - [ ] Pass the test by running: `pytest qlib/tests/test_all_pipeline.py` under upper directory of `qlib`.
13 | - [ ] If you are adding a new feature, test on your own test scripts.
14 | 
15 | <!--- **ATTENTION**: If you are adding a new feature, please make sure your codes are **correctly tested**. If our test scripts do not cover your cases, please provide your own test scripts under the `tests` folder and test them. More information about test scripts can be found [here](https://docs.python.org/3/library/unittest.html#basic-example), or you could refer to those we provide under the `tests` folder. -->
16 | 
17 | ## Screenshots of Test Results (if appropriate):
18 | 1. Pipeline test:
19 | 2. Your own tests:
20 | 
21 | ## Types of changes
22 | <!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
23 | - [ ] Fix bugs
24 | - [ ] Add new feature
25 | - [ ] Update documentation
26 | 


--------------------------------------------------------------------------------
/qlib/contrib/model/pytorch_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | def count_parameters(models_or_parameters, unit="m"):
 8 |     """
 9 |     This function is to obtain the storage size unit of a (or multiple) models.
10 | 
11 |     Parameters
12 |     ----------
13 |     models_or_parameters : PyTorch model(s) or a list of parameters.
14 |     unit : the storage size unit.
15 | 
16 |     Returns
17 |     -------
18 |     The number of parameters of the given model(s) or parameters.
19 |     """
20 |     if isinstance(models_or_parameters, nn.Module):
21 |         counts = sum(v.numel() for v in models_or_parameters.parameters())
22 |     elif isinstance(models_or_parameters, nn.Parameter):
23 |         counts = models_or_parameters.numel()
24 |     elif isinstance(models_or_parameters, (list, tuple)):
25 |         return sum(count_parameters(x, unit) for x in models_or_parameters)
26 |     else:
27 |         counts = sum(v.numel() for v in models_or_parameters)
28 |     unit = unit.lower()
29 |     if unit in ("kb", "k"):
30 |         counts /= 2**10
31 |     elif unit in ("mb", "m"):
32 |         counts /= 2**20
33 |     elif unit in ("gb", "g"):
34 |         counts /= 2**30
35 |     elif unit is not None:
36 |         raise ValueError("Unknown unit: {:}".format(unit))
37 |     return counts
38 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import qlib
 4 | from ruamel.yaml import YAML
 5 | from qlib.utils import init_instance_by_config
 6 | 
 7 | 
 8 | def main(seed, config_file="configs/config_alstm.yaml"):
 9 |     # set random seed
10 |     with open(config_file) as f:
11 |         yaml = YAML(typ="safe", pure=True)
12 |         config = yaml.load(f)
13 | 
14 |     # seed_suffix = "/seed1000" if "init" in config_file else f"/seed{seed}"
15 |     seed_suffix = ""
16 |     config["task"]["model"]["kwargs"].update(
17 |         {"seed": seed, "logdir": config["task"]["model"]["kwargs"]["logdir"] + seed_suffix}
18 |     )
19 | 
20 |     # initialize workflow
21 |     qlib.init(
22 |         provider_uri=config["qlib_init"]["provider_uri"],
23 |         region=config["qlib_init"]["region"],
24 |     )
25 |     dataset = init_instance_by_config(config["task"]["dataset"])
26 |     model = init_instance_by_config(config["task"]["model"])
27 | 
28 |     # train model
29 |     model.fit(dataset)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     # set params from cmd
34 |     parser = argparse.ArgumentParser(allow_abbrev=False)
35 |     parser.add_argument("--seed", type=int, default=1000, help="random seed")
36 |     parser.add_argument("--config_file", type=str, default="configs/config_alstm.yaml", help="config file")
37 |     args = parser.parse_args()
38 |     main(**vars(args))
39 | 


--------------------------------------------------------------------------------
/docs/start/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _installation:
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | .. currentmodule:: qlib
 8 | 
 9 | 
10 | ``Qlib`` Installation
11 | =====================
12 | .. note::
13 | 
14 |    `Qlib` supports both `Windows` and `Linux`. It's recommended to use `Qlib` in `Linux`. ``Qlib`` supports Python3, which is up to Python3.8.
15 | 
16 | Users can easily install ``Qlib`` by pip according to the following command:
17 | 
18 | .. code-block:: bash
19 | 
20 |    pip install pyqlib
21 | 
22 | 
23 | Also, Users can install ``Qlib`` by the source code according to the following steps:
24 | 
25 | - Enter the root directory of ``Qlib``, in which the file ``setup.py`` exists.
26 | - Then, please execute the following command to install the environment dependencies and install ``Qlib``:
27 | 
28 |    .. code-block:: bash
29 | 
30 |       $ pip install numpy
31 |       $ pip install --upgrade cython
32 |       $ git clone https://github.com/microsoft/qlib.git && cd qlib
33 |       $ python setup.py install
34 | 
35 | .. note::
36 |    It's recommended to use anaconda/miniconda to setup the environment. ``Qlib`` needs lightgbm and pytorch packages, use pip to install them.
37 | 
38 | 
39 | 
40 | Use the following code to make sure the installation successful:
41 | 
42 | .. code-block:: python
43 | 
44 |    >>> import qlib
45 |    >>> qlib.__version__
46 |    <LATEST VERSION>
47 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/baseline/rolling_benchmark.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import os
 4 | from pathlib import Path
 5 | from typing import Union
 6 | 
 7 | import fire
 8 | 
 9 | from qlib import auto_init
10 | from qlib.contrib.rolling.base import Rolling
11 | from qlib.tests.data import GetData
12 | 
13 | DIRNAME = Path(__file__).absolute().resolve().parent
14 | 
15 | 
16 | class RollingBenchmark(Rolling):
17 |     # The config in the README.md
18 |     CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"]
19 | 
20 |     DEFAULT_CONF = CONF_LIST[0]
21 | 
22 |     def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
23 |         # This code is for being compatible with the previous old code
24 |         conf_path = Path(conf_path)
25 |         super().__init__(conf_path=conf_path, horizon=horizon, **kwargs)
26 | 
27 |         for f in self.CONF_LIST:
28 |             if conf_path.samefile(f):
29 |                 break
30 |         else:
31 |             self.logger.warning("Model type is not in the benchmark!")
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     kwargs = {}
36 |     if os.environ.get("PROVIDER_URI", "") == "":
37 |         GetData().qlib_data(exists_skip=True)
38 |     else:
39 |         kwargs["provider_uri"] = os.environ["PROVIDER_URI"]
40 |     auto_init(**kwargs)
41 |     fire.Fire(RollingBenchmark)
42 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/DDG-DA/workflow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import os
 4 | from pathlib import Path
 5 | from typing import Union
 6 | 
 7 | import fire
 8 | 
 9 | from qlib import auto_init
10 | from qlib.contrib.rolling.ddgda import DDGDA
11 | from qlib.tests.data import GetData
12 | 
13 | DIRNAME = Path(__file__).absolute().resolve().parent
14 | BENCH_DIR = DIRNAME.parent / "baseline"
15 | 
16 | 
17 | class DDGDABench(DDGDA):
18 |     # The config in the README.md
19 |     CONF_LIST = [
20 |         BENCH_DIR / "workflow_config_linear_Alpha158.yaml",
21 |         BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml",
22 |     ]
23 | 
24 |     DEFAULT_CONF = CONF_LIST[0]  # Linear by default due to efficiency
25 | 
26 |     def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
27 |         # This code is for being compatible with the previous old code
28 |         conf_path = Path(conf_path)
29 |         super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs)
30 | 
31 |         for f in self.CONF_LIST:
32 |             if conf_path.samefile(f):
33 |                 break
34 |         else:
35 |             self.logger.warning("Model type is not in the benchmark!")
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     kwargs = {}
40 |     if os.environ.get("PROVIDER_URI", "") == "":
41 |         GetData().qlib_data(exists_skip=True)
42 |     else:
43 |         kwargs["provider_uri"] = os.environ["PROVIDER_URI"]
44 |     auto_init(**kwargs)
45 |     fire.Fire(DDGDABench)
46 | 


--------------------------------------------------------------------------------
/.github/workflows/test_qlib_from_source_slow.yml:
--------------------------------------------------------------------------------
 1 | name: Test qlib from source slow
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   build:
11 |     timeout-minutes: 720
12 |     # we may retry for 3 times for `Unit tests with Pytest`
13 | 
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       matrix:
17 |         os: [windows-latest, ubuntu-24.04, ubuntu-22.04]
18 |         # In github action, using python 3.7, pip install will not match the latest version of the package.
19 |         # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
20 |         # All things considered, we have removed python 3.7.
21 |         python-version: ["3.11", "3.12", "3.13"]
22 | 
23 |     steps:
24 |     - name: Test qlib from source slow
25 |       uses: actions/checkout@v3
26 | 
27 |     - name: Set up Python ${{ matrix.python-version }}
28 |       uses: actions/setup-python@v4
29 |       with:
30 |         python-version: ${{ matrix.python-version }}
31 | 
32 |     - name: Set up Python tools
33 |       run: |
34 |         make dev
35 | 
36 |     - name: Downloads dependencies data
37 |       run: |
38 |         python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
39 | 
40 |     - name: Unit tests with Pytest
41 |       uses: nick-fields/retry@v2
42 |       with:
43 |         timeout_minutes: 240
44 |         max_attempts: 3
45 |         command: |
46 |           cd tests
47 |           python -m pytest . -m "slow" --durations=0
48 | 


--------------------------------------------------------------------------------
/scripts/data_collector/pit/README.md:
--------------------------------------------------------------------------------
 1 | # Collect Point-in-Time Data
 2 | 
 3 | > *Please pay **ATTENTION** that the data is collected from [baostock](http://baostock.com) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*
 4 | 
 5 | ## Requirements
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Collector Data
12 | 
13 | 
14 | ### Download Quarterly CN Data
15 | 
16 | ```bash
17 | cd qlib/scripts/data_collector/pit/
18 | # download from baostock.com
19 | python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly
20 | ```
21 | 
22 | Downloading all data from the stock is very time-consuming. If you just want to run a quick test on a few stocks,  you can run the command below
23 | ```bash
24 | python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_regex "^(600519|000725).*"
25 | ```
26 | 
27 | 
28 | ### Normalize Data
29 | ```bash
30 | python collector.py normalize_data --interval quarterly --source_dir ~/.qlib/stock_data/source/pit --normalize_dir ~/.qlib/stock_data/source/pit_normalized
31 | ```
32 | 
33 | 
34 | 
35 | ### Dump Data into PIT Format
36 | 
37 | ```bash
38 | cd qlib/scripts
39 | python dump_pit.py dump --data_path ~/.qlib/stock_data/source/pit_normalized --qlib_dir ~/.qlib/qlib_data/cn_data --interval quarterly
40 | ```
41 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_alstm.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 256
16 |   num_layers: 2
17 |   num_heads: 2
18 |   use_attn: True
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 1
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0002
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/alstm
40 |       model_type: LSTM
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: False
46 |       model_init_state: 
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 1024


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_alstm_tra_init.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 256
16 |   num_layers: 2
17 |   num_heads: 2
18 |   use_attn: True
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 3
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0002
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/alstm_tra_init
40 |       model_type: LSTM
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: False
46 |       model_init_state: 
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 512


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_transformer.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 64
16 |   num_layers: 2
17 |   num_heads: 4
18 |   use_attn: False
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 1
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0002
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/transformer
40 |       model_type: Transformer
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: False
46 |       model_init_state: 
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 1024


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_transformer_tra_init.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 64
16 |   num_layers: 2
17 |   num_heads: 4
18 |   use_attn: False
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 3
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0002
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/transformer_tra_init
40 |       model_type: Transformer
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: False
46 |       model_init_state: 
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 512


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_alstm_tra.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 256
16 |   num_layers: 2
17 |   num_heads: 2
18 |   use_attn: True
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 10
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0001
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/alstm_tra
40 |       model_type: LSTM
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 2.0
44 |       rho: 0.99
45 |       freeze_model: True
46 |       model_init_state: output/test/alstm_tra_init/model.bin
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 1024


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_transformer_tra.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 64
16 |   num_layers: 2
17 |   num_heads: 4
18 |   use_attn: False
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 3
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0005
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/transformer_tra
40 |       model_type: Transformer
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: True
46 |       model_init_state: output/test/transformer_tra_init/model.bin
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 512


--------------------------------------------------------------------------------
/scripts/data_collector/crowd_source/README.md:
--------------------------------------------------------------------------------
 1 | # Crowd Source Data
 2 | 
 3 | ## Initiative
 4 | Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might have data which is wrong. This can introduce survivorship bias into our training process.
 5 | 
 6 | The Crowd Source Data is introduced to merged data from multiple data source and cross validate against each other, so that:
 7 | 1. We will have a more complete history record.
 8 | 2. We can identify the anomaly data and apply correction when necessary.
 9 | 
10 | ## Related Repo
11 | The raw data is hosted on dolthub repo: https://www.dolthub.com/repositories/chenditc/investment_data
12 | 
13 | The processing script and sql is hosted on github repo: https://github.com/chenditc/investment_data
14 | 
15 | The packaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data
16 | 
17 | ## How to use it in qlib
18 | ### Option 1: Download release bin data
19 | User can download data in qlib bin format and use it directly: https://github.com/chenditc/investment_data/releases/latest
20 | ```bash
21 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
22 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
23 | ```
24 | 
25 | ### Option 2: Generate qlib data from dolthub
26 | Dolthub data will be update daily, so that if user wants to get up to date data, they can dump qlib bin using docker:
27 | ```
28 | docker run -v /<some output directory>:/output -it --rm chenditc/investment_data bash dump_qlib_bin.sh && cp ./qlib_bin.tar.gz /output/
29 | ```
30 | 
31 | ## FAQ and other info
32 | See: https://github.com/chenditc/investment_data/blob/main/README.md
33 | 


--------------------------------------------------------------------------------
/qlib/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | from __future__ import division, print_function
 6 | 
 7 | from .cache import (
 8 |     DatasetCache,
 9 |     DatasetURICache,
10 |     DiskDatasetCache,
11 |     DiskExpressionCache,
12 |     ArcticExpressionCache,
13 |     ExpressionCache,
14 |     MemoryCalendarCache,
15 |     SimpleDatasetCache,
16 | )
17 | from .data import (
18 |     BaseProvider,
19 |     CalendarProvider,
20 |     ClientCalendarProvider,
21 |     ClientDatasetProvider,
22 |     ClientInstrumentProvider,
23 |     ClientProvider,
24 |     D,
25 |     DatasetProvider,
26 |     ExpressionProvider,
27 |     FeatureProvider,
28 |     InstrumentProvider,
29 |     LocalCalendarProvider,
30 |     LocalDatasetProvider,
31 |     LocalExpressionProvider,
32 |     LocalFeatureProvider,
33 |     LocalInstrumentProvider,
34 |     LocalPITProvider,
35 |     LocalProvider,
36 | )
37 | 
38 | 
39 | __all__ = [
40 |     "D",
41 |     "CalendarProvider",
42 |     "InstrumentProvider",
43 |     "FeatureProvider",
44 |     "ExpressionProvider",
45 |     "DatasetProvider",
46 |     "LocalCalendarProvider",
47 |     "LocalInstrumentProvider",
48 |     "LocalFeatureProvider",
49 |     "LocalPITProvider",
50 |     "LocalExpressionProvider",
51 |     "LocalDatasetProvider",
52 |     "ClientCalendarProvider",
53 |     "ClientInstrumentProvider",
54 |     "ClientDatasetProvider",
55 |     "BaseProvider",
56 |     "LocalProvider",
57 |     "ClientProvider",
58 |     "ExpressionCache",
59 |     "DatasetCache",
60 |     "DiskExpressionCache",
61 |     "DiskDatasetCache",
62 |     "SimpleDatasetCache",
63 |     "DatasetURICache",
64 |     "MemoryCalendarCache",
65 |     "ArcticExpressionCache",
66 | ]
67 | 


--------------------------------------------------------------------------------
/scripts/data_collector/fund/README.md:
--------------------------------------------------------------------------------
 1 | # Collect Fund Data
 2 | 
 3 | > *Please pay **ATTENTION** that the data is collected from [天天基金网](https://fund.eastmoney.com/) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*
 4 | 
 5 | ## Requirements
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Collector Data
12 | 
13 | 
14 | ### CN Data
15 | 
16 | #### 1d from East Money
17 | 
18 | ```bash
19 | 
20 | # download from eastmoney.com
21 | python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
22 | 
23 | # normalize
24 | python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ
25 | 
26 | # dump data
27 | cd qlib/scripts
28 | python dump_bin.py dump_all --data_path ~/.qlib/fund_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/cn_fund_data --freq day --date_field_name FSRQ --include_fields DWJZ,LJJZ
29 | 
30 | ```
31 | 
32 | ### using data
33 | 
34 | ```python
35 | import qlib
36 | from qlib.data import D
37 | 
38 | qlib.init(provider_uri="~/.qlib/qlib_data/cn_fund_data")
39 | df = D.features(D.instruments(market="all"), ["$DWJZ", "$LJJZ"], freq="day")
40 | ```
41 | 
42 | 
43 | ### Help
44 | ```bash
45 | pythono collector.py collector_data --help
46 | ```
47 | 
48 | ## Parameters
49 | 
50 | - interval: 1d
51 | - region: CN
52 | 
53 | ## 免责声明
54 | 
55 | 本项目仅供学习研究使用，不作为任何行为的指导和建议，由此而引发任何争议和纠纷，与本项目无任何关系
56 | 


--------------------------------------------------------------------------------
/qlib/contrib/report/data/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | This module is responsible for analysing data
 5 | 
 6 | Assumptions
 7 | - The analyse each feature individually
 8 | 
 9 | """
10 | import pandas as pd
11 | 
12 | from qlib.contrib.report.utils import sub_fig_generator
13 | from qlib.log import TimeInspector
14 | 
15 | 
16 | class FeaAnalyser:
17 |     def __init__(self, dataset: pd.DataFrame):
18 |         """
19 | 
20 |         Parameters
21 |         ----------
22 |         dataset : pd.DataFrame
23 | 
24 |             We often have multiple columns for dataset. Each column corresponds to one sub figure.
25 |             There will be a datatime column in the index levels.
26 |             Aggretation will be used for more summarized metrics overtime.
27 |             Here is an example of data:
28 | 
29 |             .. code-block::
30 | 
31 |                                             return
32 |                 datetime   instrument
33 |                 2007-02-06 equity_tpx     0.010087
34 |                            equity_spx     0.000786
35 |         """
36 |         self._dataset = dataset
37 |         with TimeInspector.logt("calc_stat_values"):
38 |             self.calc_stat_values()
39 | 
40 |     def calc_stat_values(self):
41 |         pass
42 | 
43 |     def plot_single(self, col, ax):
44 |         raise NotImplementedError(f"This type of input is not supported")
45 | 
46 |     def skip(self, col):
47 |         return False
48 | 
49 |     def plot_all(self, *args, **kwargs):
50 |         ax_gen = iter(sub_fig_generator(*args, **kwargs))
51 |         for col in self._dataset:
52 |             if not self.skip(col):
53 |                 ax = next(ax_gen)
54 |                 self.plot_single(col, ax)
55 | 


--------------------------------------------------------------------------------
/qlib/workflow/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import atexit
 5 | import logging
 6 | import sys
 7 | import traceback
 8 | 
 9 | from ..log import get_module_logger
10 | from . import R
11 | from .recorder import Recorder
12 | 
13 | logger = get_module_logger("workflow", logging.INFO)
14 | 
15 | 
16 | # function to handle the experiment when unusual program ending occurs
17 | def experiment_exit_handler():
18 |     """
19 |     Method for handling the experiment when any unusual program ending occurs.
20 |     The `atexit` handler should be put in the last, since, as long as the program ends, it will be called.
21 |     Thus, if any exception or user interruption occurs beforehand, we should handle them first. Once `R` is
22 |     ended, another call of `R.end_exp` will not take effect.
23 | 
24 |     Limitations:
25 |     - If pdb is used in your program, excepthook will not be triggered when it ends.  The status will be finished
26 |     """
27 |     sys.excepthook = experiment_exception_hook  # handle uncaught exception
28 |     atexit.register(R.end_exp, recorder_status=Recorder.STATUS_FI)  # will not take effect if experiment ends
29 | 
30 | 
31 | def experiment_exception_hook(exc_type, value, tb):
32 |     """
33 |     End an experiment with status to be "FAILED". This exception tries to catch those uncaught exception
34 |     and end the experiment automatically.
35 | 
36 |     Parameters
37 |     exc_type: Exception type
38 |     value: Exception's value
39 |     tb: Exception's traceback
40 |     """
41 |     logger.error(f"An exception has been raised[{exc_type.__name__}: {value}].")
42 | 
43 |     # Same as original format
44 |     traceback.print_tb(tb)
45 |     print(f"{exc_type.__name__}: {value}")
46 | 
47 |     R.end_exp(recorder_status=Recorder.STATUS_FA)
48 | 


--------------------------------------------------------------------------------
/tests/test_get_data.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import shutil
 5 | import unittest
 6 | from pathlib import Path
 7 | 
 8 | import qlib
 9 | from qlib.data import D
10 | from qlib.tests.data import GetData
11 | 
12 | DATA_DIR = Path(__file__).parent.joinpath("test_get_data")
13 | SOURCE_DIR = DATA_DIR.joinpath("source")
14 | SOURCE_DIR.mkdir(exist_ok=True, parents=True)
15 | QLIB_DIR = DATA_DIR.joinpath("qlib")
16 | QLIB_DIR.mkdir(exist_ok=True, parents=True)
17 | 
18 | 
19 | class TestGetData(unittest.TestCase):
20 |     FIELDS = "$open,$close,$high,$low,$volume,$factor,$change".split(",")
21 | 
22 |     @classmethod
23 |     def setUpClass(cls) -> None:
24 |         provider_uri = str(QLIB_DIR.resolve())
25 |         qlib.init(
26 |             provider_uri=provider_uri,
27 |             expression_cache=None,
28 |             dataset_cache=None,
29 |         )
30 | 
31 |     @classmethod
32 |     def tearDownClass(cls) -> None:
33 |         shutil.rmtree(str(DATA_DIR.resolve()))
34 | 
35 |     def test_0_qlib_data(self):
36 |         GetData().qlib_data(
37 |             name="qlib_data_simple", target_dir=QLIB_DIR, region="cn", interval="1d", delete_old=False, exists_skip=True
38 |         )
39 |         df = D.features(D.instruments("csi300"), self.FIELDS)
40 |         self.assertListEqual(list(df.columns), self.FIELDS, "get qlib data failed")
41 |         self.assertFalse(df.dropna().empty, "get qlib data failed")
42 | 
43 |     def test_1_csv_data(self):
44 |         GetData().download_data(file_name="csv_data_cn.zip", target_dir=SOURCE_DIR)
45 |         stock_name = set(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
46 |         self.assertEqual(len(stock_name), 85, "get csv data failed")
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     unittest.main()
51 | 


--------------------------------------------------------------------------------
/examples/portfolio/README.md:
--------------------------------------------------------------------------------
 1 | # Portfolio Optimization Strategy
 2 | 
 3 | ## Introduction
 4 | 
 5 | In `qlib/examples/benchmarks` we have various **alpha** models that predict
 6 | the stock returns. We also use a simple rule based `TopkDropoutStrategy` to
 7 | evaluate the investing performance of these models. However, such a strategy
 8 | is too simple to control the portfolio risk like correlation and volatility.
 9 | 
10 | To this end, an optimization based strategy should be used to for the
11 | trade-off between return and risk. In this doc, we will show how to use
12 | `EnhancedIndexingStrategy` to maximize portfolio return while minimizing
13 | tracking error relative to a benchmark.
14 | 
15 | 
16 | ## Preparation
17 | 
18 | We use China stock market data for our example.
19 | 
20 | 1. Prepare CSI300 weight:
21 | 
22 |    ```bash
23 |    wget https://github.com/SunsetWolf/qlib_dataset/releases/download/v0/csi300_weight.zip
24 |    unzip -d ~/.qlib/qlib_data/cn_data csi300_weight.zip
25 |    rm -f csi300_weight.zip
26 |    ```
27 |    NOTE:  We don't find any public free resource to get the weight in the benchmark. To run the example, we manually create this weight data.
28 | 
29 | 2. Prepare risk model data:
30 | 
31 |    ```bash
32 |    python prepare_riskdata.py
33 |    ```
34 | 
35 | Here we use a **Statistical Risk Model** implemented in `qlib.model.riskmodel`.
36 | However users are strongly recommended to use other risk models for better quality:
37 | * **Fundamental Risk Model** like MSCI BARRA
38 | * [Deep Risk Model](https://arxiv.org/abs/2107.05201)
39 | 
40 | 
41 | ## End-to-End Workflow
42 | 
43 | You can finish workflow with `EnhancedIndexingStrategy` by running
44 | `qrun config_enhanced_indexing.yaml`.
45 | 
46 | In this config, we mainly changed the strategy section compared to
47 | `qlib/examples/benchmarks/workflow_config_lightgbm_Alpha158.yaml`.
48 | 


--------------------------------------------------------------------------------
/qlib/rl/order_execution/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from typing import Any, cast
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | from qlib.backtest.decision import OrderDir
12 | from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
13 | from qlib.constant import float_or_ndarray
14 | 
15 | 
16 | def dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame:
17 |     # dataframe.append is deprecated
18 |     other_df = pd.DataFrame(other).set_index("datetime")
19 |     other_df.index.name = "datetime"
20 | 
21 |     res = pd.concat([df, other_df], axis=0)
22 |     return res
23 | 
24 | 
25 | def price_advantage(
26 |     exec_price: float_or_ndarray,
27 |     baseline_price: float,
28 |     direction: OrderDir | int,
29 | ) -> float_or_ndarray:
30 |     if baseline_price == 0:  # something is wrong with data. Should be nan here
31 |         if isinstance(exec_price, float):
32 |             return 0.0
33 |         else:
34 |             return np.zeros_like(exec_price)
35 |     if direction == OrderDir.BUY:
36 |         res = (1 - exec_price / baseline_price) * 10000
37 |     elif direction == OrderDir.SELL:
38 |         res = (exec_price / baseline_price - 1) * 10000
39 |     else:
40 |         raise ValueError(f"Unexpected order direction: {direction}")
41 |     res_wo_nan: np.ndarray = np.nan_to_num(res, nan=0.0)
42 |     if res_wo_nan.size == 1:
43 |         return res_wo_nan.item()
44 |     else:
45 |         return cast(float_or_ndarray, res_wo_nan)
46 | 
47 | 
48 | def get_simulator_executor(executor: BaseExecutor) -> SimulatorExecutor:
49 |     while isinstance(executor, NestedExecutor):
50 |         executor = executor.inner_executor
51 |     assert isinstance(executor, SimulatorExecutor)
52 |     return executor
53 | 


--------------------------------------------------------------------------------
/scripts/data_collector/crypto/README.md:
--------------------------------------------------------------------------------
 1 | # Collect Crypto Data
 2 | 
 3 | > *Please pay **ATTENTION** that the data is collected from [Coingecko](https://www.coingecko.com/en/api) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*
 4 | 
 5 | ## Requirements
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Usage of the dataset
12 | > *Crypto dataset only support Data retrieval function but not support backtest function due to the lack of OHLC data.*
13 | 
14 | ## Collector Data
15 | 
16 | 
17 | ### Crypto Data
18 | 
19 | #### 1d from Coingecko
20 | 
21 | ```bash
22 | 
23 | # download from https://api.coingecko.com/api/v3/
24 | python collector.py download_data --source_dir ~/.qlib/crypto_data/source/1d --start 2015-01-01 --end 2021-11-30 --delay 1 --interval 1d
25 | 
26 | # normalize
27 | python collector.py normalize_data --source_dir ~/.qlib/crypto_data/source/1d --normalize_dir ~/.qlib/crypto_data/source/1d_nor --interval 1d --date_field_name date
28 | 
29 | # dump data
30 | cd qlib/scripts
31 | python dump_bin.py dump_all --data_path ~/.qlib/crypto_data/source/1d_nor --qlib_dir ~/.qlib/qlib_data/crypto_data --freq day --date_field_name date --include_fields prices,total_volumes,market_caps
32 | 
33 | ```
34 | 
35 | ### using data
36 | 
37 | ```python
38 | import qlib
39 | from qlib.data import D
40 | 
41 | qlib.init(provider_uri="~/.qlib/qlib_data/crypto_data")
42 | df = D.features(D.instruments(market="all"), ["$prices", "$total_volumes","$market_caps"], freq="day")
43 | ```
44 | 
45 | 
46 | ### Help
47 | ```bash
48 | python collector.py collector_data --help
49 | ```
50 | 
51 | ## Parameters
52 | 
53 | - interval: 1d
54 | - delay: 1
55 | 


--------------------------------------------------------------------------------
/qlib/contrib/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | try:
 4 |     from .catboost_model import CatBoostModel
 5 | except ModuleNotFoundError:
 6 |     CatBoostModel = None
 7 |     print("ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)")
 8 | try:
 9 |     from .double_ensemble import DEnsembleModel
10 |     from .gbdt import LGBModel
11 | except ModuleNotFoundError:
12 |     DEnsembleModel, LGBModel = None, None
13 |     print(
14 |         "ModuleNotFoundError. DEnsembleModel and LGBModel are skipped. (optional: maybe installing lightgbm can fix it.)"
15 |     )
16 | try:
17 |     from .xgboost import XGBModel
18 | except ModuleNotFoundError:
19 |     XGBModel = None
20 |     print("ModuleNotFoundError. XGBModel is skipped(optional: maybe installing xgboost can fix it).")
21 | try:
22 |     from .linear import LinearModel
23 | except ModuleNotFoundError:
24 |     LinearModel = None
25 |     print("ModuleNotFoundError. LinearModel is skipped(optional: maybe installing scipy and sklearn can fix it).")
26 | # import pytorch models
27 | try:
28 |     from .pytorch_add import ADD
29 |     from .pytorch_alstm import ALSTM
30 |     from .pytorch_gats import GATs
31 |     from .pytorch_gru import GRU
32 |     from .pytorch_lstm import LSTM
33 |     from .pytorch_nn import DNNModelPytorch
34 |     from .pytorch_sfm import SFM_Model
35 |     from .pytorch_tabnet import TabnetModel
36 |     from .pytorch_tcn import TCN
37 | 
38 |     pytorch_classes = (ALSTM, GATs, GRU, LSTM, DNNModelPytorch, TabnetModel, SFM_Model, TCN, ADD)
39 | except ModuleNotFoundError:
40 |     pytorch_classes = ()
41 |     print("ModuleNotFoundError.  PyTorch models are skipped (optional: maybe installing pytorch can fix it).")
42 | 
43 | all_model_classes = (CatBoostModel, DEnsembleModel, LGBModel, XGBModel, LinearModel) + pytorch_classes
44 | 


--------------------------------------------------------------------------------
/examples/highfreq/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | This folder contains 2 examples
 3 | - A high-frequency dataset example
 4 | - An example of predicting the price trend in high-frequency data
 5 | 
 6 | ## High-Frequency Dataset
 7 | 
 8 | This dataset is an example for RL high frequency trading.
 9 | 
10 | ### Get High-Frequency Data
11 | 
12 | Get high-frequency data by running the following command:
13 | ```bash
14 |     python workflow.py get_data
15 | ```
16 | 
17 | ### Dump & Reload & Reinitialize the Dataset
18 | 
19 | 
20 | The High-Frequency Dataset is implemented as `qlib.data.dataset.DatasetH` in the `workflow.py`. `DatatsetH` is the subclass of [`qlib.utils.serial.Serializable`](https://qlib.readthedocs.io/en/latest/advanced/serial.html), whose state can be dumped in or loaded from disk in `pickle` format.
21 | 
22 | ### About Reinitialization
23 | 
24 | After reloading `Dataset` from disk, `Qlib` also support reinitializing the dataset. It means that users can reset some states of `Dataset` or `DataHandler` such as `instruments`, `start_time`, `end_time` and `segments`, etc.,  and generate new data according to the states.
25 | 
26 | The example is given in `workflow.py`, users can run the code as follows.
27 | 
28 | ### Run the Code
29 | 
30 | Run the example by running the following command:
31 | ```bash
32 |     python workflow.py dump_and_load_dataset
33 | ```
34 | 
35 | ## Benchmarks Performance (predicting the price trend in high-frequency data)
36 | 
37 | Here are the results of models for predicting the price trend in high-frequency data. We will keep updating benchmark models in future.
38 | 
39 | | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Long precision| Short Precision | Long-Short Average Return | Long-Short Average Sharpe |
40 | |---|---|---|---|---|---|---|---|---|---|
41 | | LightGBM | Alpha158 | 0.0349±0.00 | 0.3805±0.00| 0.0435±0.00 | 0.4724±0.00 | 0.5111±0.00 | 0.5428±0.00 | 0.000074±0.00 | 0.2677±0.00 |
42 | 


--------------------------------------------------------------------------------
/tests/dataset_tests/test_datalayer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from qlib.data import D
 6 | from qlib.tests import TestAutoData
 7 | 
 8 | 
 9 | class TestDataset(TestAutoData):
10 |     def testCSI300(self):
11 |         close_p = D.features(D.instruments("csi300"), ["$close"])
12 |         size = close_p.groupby("datetime", group_keys=False).size()
13 |         cnt = close_p.groupby("datetime", group_keys=False).count()["$close"]
14 |         size_desc = size.describe(percentiles=np.arange(0.1, 1.0, 0.1))
15 |         cnt_desc = cnt.describe(percentiles=np.arange(0.1, 1.0, 0.1))
16 | 
17 |         print(size_desc)
18 |         print(cnt_desc)
19 | 
20 |         self.assertLessEqual(size_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks")
21 |         self.assertGreaterEqual(size_desc.loc["80%"], 290, "Insufficient number of CSI300 constituent stocks")
22 | 
23 |         self.assertLessEqual(cnt_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks")
24 |         # FIXME: Due to the low quality of data. Hard to make sure there are enough data
25 |         # self.assertEqual(cnt_desc.loc["80%"], 300, "Insufficient number of CSI300 constituent stocks")
26 | 
27 |     def testClose(self):
28 |         close_p = D.features(D.instruments("csi300"), ["Ref($close, 1)/$close - 1"])
29 |         close_desc = close_p.describe(percentiles=np.arange(0.1, 1.0, 0.1))
30 |         print(close_desc)
31 |         self.assertLessEqual(abs(close_desc.loc["90%"][0]), 0.1, "Close value is abnormal")
32 |         self.assertLessEqual(abs(close_desc.loc["10%"][0]), 0.1, "Close value is abnormal")
33 |         # FIXME: The yahoo data is not perfect. We have to
34 |         # self.assertLessEqual(abs(close_desc.loc["max"][0]), 0.2, "Close value is abnormal")
35 |         # self.assertGreaterEqual(close_desc.loc["min"][0], -0.2, "Close value is abnormal")
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     unittest.main()
40 | 


--------------------------------------------------------------------------------
/examples/portfolio/prepare_riskdata.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | from qlib.data import D
 9 | from qlib.model.riskmodel import StructuredCovEstimator
10 | 
11 | 
12 | def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
13 |     universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
14 | 
15 |     price_all = (
16 |         D.features(D.instruments("all"), ["$close"], start_time=start_time).squeeze().unstack(level="instrument")
17 |     )
18 | 
19 |     # StructuredCovEstimator is a statistical risk model
20 |     riskmodel = StructuredCovEstimator()
21 | 
22 |     for i in range(T - 1, len(price_all)):
23 |         date = price_all.index[i]
24 |         ref_date = price_all.index[i - T + 1]
25 | 
26 |         print(date)
27 | 
28 |         codes = universe.loc[date].index
29 |         price = price_all.loc[ref_date:date, codes]
30 | 
31 |         # calculate return and remove extreme return
32 |         ret = price.pct_change()
33 |         ret.clip(ret.quantile(0.025), ret.quantile(0.975), axis=1, inplace=True)
34 | 
35 |         # run risk model
36 |         F, cov_b, var_u = riskmodel.predict(ret, is_price=False, return_decomposed_components=True)
37 | 
38 |         # save risk data
39 |         root = riskdata_root + "/" + date.strftime("%Y%m%d")
40 |         os.makedirs(root, exist_ok=True)
41 | 
42 |         pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl")
43 |         pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl")
44 |         # for specific_risk we follow the convention to save volatility
45 |         pd.Series(np.sqrt(var_u), index=codes).to_pickle(root + "/specific_risk.pkl")
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     import qlib
50 | 
51 |     qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
52 | 
53 |     prepare_data()
54 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/backtest_ppo.yml:
--------------------------------------------------------------------------------
 1 | order_file: ./data/orders/test_orders.pkl
 2 | start_time: "9:30"
 3 | end_time: "14:54"
 4 | data_granularity: "5min"
 5 | qlib:
 6 |   provider_uri_5min: ./data/bin/
 7 | exchange:
 8 |   limit_threshold: null
 9 |   deal_price: ["$close", "$close"]
10 |   volume_threshold: null
11 | strategies:
12 |   1day:
13 |     class: SAOEIntStrategy
14 |     kwargs:
15 |       data_granularity: 5
16 |       action_interpreter:
17 |         class: CategoricalActionInterpreter
18 |         kwargs:
19 |           max_step: 8
20 |           values: 4
21 |         module_path: qlib.rl.order_execution.interpreter
22 |       network:
23 |         class: Recurrent
24 |         kwargs: {}
25 |         module_path: qlib.rl.order_execution.network
26 |       policy:
27 |         class: PPO  # PPO, DQN
28 |         kwargs:
29 |           lr: 0.0001
30 |           # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use.
31 |           # weight_file: outputs/ppo/checkpoints/latest.pth
32 |         module_path: qlib.rl.order_execution.policy
33 |       state_interpreter:
34 |         class: FullHistoryStateInterpreter
35 |         kwargs:
36 |           data_dim: 5
37 |           data_ticks: 48
38 |           max_step: 8
39 |           processed_data_provider:
40 |             class: HandlerProcessedDataProvider
41 |             kwargs:
42 |               data_dir: ./data/pickle/
43 |               feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
44 |               feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
45 |             module_path: qlib.rl.data.native
46 |         module_path: qlib.rl.order_execution.interpreter
47 |     module_path: qlib.rl.order_execution.strategy
48 |   30min:
49 |     class: TWAPStrategy
50 |     kwargs: {}
51 |     module_path: qlib.contrib.strategy.rule_strategy
52 | concurrency: 16
53 | output_dir: outputs/ppo/
54 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/backtest_opds.yml:
--------------------------------------------------------------------------------
 1 | order_file: ./data/orders/test_orders.pkl
 2 | start_time: "9:30"
 3 | end_time: "14:54"
 4 | data_granularity: "5min"
 5 | qlib:
 6 |   provider_uri_5min: ./data/bin/
 7 | exchange:
 8 |   limit_threshold: null
 9 |   deal_price: ["$close", "$close"]
10 |   volume_threshold: null
11 | strategies:
12 |   1day:
13 |     class: SAOEIntStrategy
14 |     kwargs:
15 |       data_granularity: 5
16 |       action_interpreter:
17 |         class: CategoricalActionInterpreter
18 |         kwargs:
19 |           max_step: 8
20 |           values: 4
21 |         module_path: qlib.rl.order_execution.interpreter
22 |       network:
23 |         class: Recurrent
24 |         kwargs: {}
25 |         module_path: qlib.rl.order_execution.network
26 |       policy:
27 |         class: PPO  # PPO, DQN
28 |         kwargs:
29 |           lr: 0.0001
30 |           # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use.
31 |           # weight_file: outputs/opds/checkpoints/latest.pth
32 |         module_path: qlib.rl.order_execution.policy
33 |       state_interpreter:
34 |         class: FullHistoryStateInterpreter
35 |         kwargs:
36 |           data_dim: 5
37 |           data_ticks: 48
38 |           max_step: 8
39 |           processed_data_provider:
40 |             class: HandlerProcessedDataProvider
41 |             kwargs:
42 |               data_dir: ./data/pickle/
43 |               feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
44 |               feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
45 |             module_path: qlib.rl.data.native
46 |         module_path: qlib.rl.order_execution.interpreter
47 |     module_path: qlib.rl.order_execution.strategy
48 |   30min:
49 |     class: TWAPStrategy
50 |     kwargs: {}
51 |     module_path: qlib.contrib.strategy.rule_strategy
52 | concurrency: 16
53 | output_dir: outputs/opds/
54 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/train_opds.yml:
--------------------------------------------------------------------------------
 1 | simulator:
 2 |   data_granularity: 5
 3 |   time_per_step: 30
 4 |   vol_limit: null
 5 | env:
 6 |   concurrency: 32
 7 |   parallel_mode: dummy
 8 | action_interpreter:
 9 |   class: CategoricalActionInterpreter
10 |   kwargs:
11 |     values: 4
12 |     max_step: 8
13 |   module_path: qlib.rl.order_execution.interpreter
14 | state_interpreter:
15 |   class: FullHistoryStateInterpreter
16 |   kwargs:
17 |     data_dim: 5
18 |     data_ticks: 48  # 48 = 240 min / 5 min
19 |     max_step: 8
20 |     processed_data_provider:
21 |       class: HandlerProcessedDataProvider
22 |       kwargs:
23 |         data_dir: ./data/pickle/
24 |         feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
25 |         feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
26 |         backtest: false
27 |       module_path: qlib.rl.data.native
28 |   module_path: qlib.rl.order_execution.interpreter
29 | reward:
30 |   class: PAPenaltyReward
31 |   kwargs:
32 |     penalty: 4.0
33 |     scale: 0.01
34 |   module_path: qlib.rl.order_execution.reward
35 | data:
36 |   source:
37 |     order_dir: ./data/orders
38 |     feature_root_dir: ./data/pickle/
39 |     feature_columns_today: ["$close0", "$volume0"]
40 |     feature_columns_yesterday: []
41 |     total_time: 240
42 |     default_start_time_index: 0
43 |     default_end_time_index: 235
44 |     proc_data_dim: 5
45 |   num_workers: 0
46 |   queue_size: 20
47 | network:
48 |   class: Recurrent
49 |   module_path: qlib.rl.order_execution.network
50 | policy:
51 |   class: PPO  # PPO, DQN
52 |   kwargs:
53 |     lr: 0.0001
54 |   module_path: qlib.rl.order_execution.policy
55 | runtime:
56 |   seed: 42
57 |   use_cuda: false
58 | trainer:
59 |   max_epoch: 500
60 |   repeat_per_collect: 25
61 |   earlystop_patience: 50
62 |   episode_per_collect: 10000
63 |   batch_size: 1024
64 |   val_every_n_epoch: 4
65 |   checkpoint_path: ./outputs/opds
66 |   checkpoint_every_n_iters: 1
67 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/scripts/gen_pickle_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import argparse
 5 | import os
 6 | import shutil
 7 | from copy import deepcopy
 8 | 
 9 | import yaml
10 | 
11 | from qlib.contrib.data.highfreq_provider import HighFreqProvider
12 | 
13 | loader = yaml.FullLoader
14 | 
15 | if __name__ == "__main__":
16 |     parser = argparse.ArgumentParser()
17 |     parser.add_argument("-c", "--config", type=str, default="config.yml")
18 |     parser.add_argument("-d", "--dest", type=str, default=".")
19 |     parser.add_argument("-s", "--split", type=str, choices=["none", "date", "stock", "both"], default="stock")
20 |     args = parser.parse_args()
21 | 
22 |     conf = yaml.load(open(args.config), Loader=loader)
23 | 
24 |     for k, v in conf.items():
25 |         if isinstance(v, dict) and "path" in v:
26 |             v["path"] = os.path.join(args.dest, v["path"])
27 |     provider = HighFreqProvider(**conf)
28 | 
29 |     # Gen dataframe
30 |     if "feature_conf" in conf:
31 |         feature = provider._gen_dataframe(deepcopy(provider.feature_conf))
32 |     if "backtest_conf" in conf:
33 |         backtest = provider._gen_dataframe(deepcopy(provider.backtest_conf))
34 | 
35 |     provider.feature_conf["path"] = os.path.splitext(provider.feature_conf["path"])[0] + "/"
36 |     provider.backtest_conf["path"] = os.path.splitext(provider.backtest_conf["path"])[0] + "/"
37 |     # Split by date
38 |     if args.split == "date" or args.split == "both":
39 |         provider._gen_day_dataset(deepcopy(provider.feature_conf), "feature")
40 |         provider._gen_day_dataset(deepcopy(provider.backtest_conf), "backtest")
41 | 
42 |     # Split by stock
43 |     if args.split == "stock" or args.split == "both":
44 |         provider._gen_stock_dataset(deepcopy(provider.feature_conf), "feature")
45 |         provider._gen_stock_dataset(deepcopy(provider.backtest_conf), "backtest")
46 | 
47 |     shutil.rmtree("stat/", ignore_errors=True)
48 | 


--------------------------------------------------------------------------------
/qlib/model/meta/task.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from qlib.data.dataset import Dataset
 5 | 
 6 | from ...utils import init_instance_by_config
 7 | 
 8 | 
 9 | class MetaTask:
10 |     """
11 |     A single meta-task, a meta-dataset contains a list of them.
12 |     It serves as a component as in MetaDatasetDS
13 | 
14 |     The data processing is different
15 | 
16 |     - the processed input may be different between training and testing
17 | 
18 |         - When training, the X, y, X_test, y_test in training tasks are necessary (# PROC_MODE_FULL #)
19 |           but not necessary in test tasks. (# PROC_MODE_TEST #)
20 |         - When the metamodel can be transferred into other dataset, only meta_info is necessary  (# PROC_MODE_TRANSFER #)
21 |     """
22 | 
23 |     PROC_MODE_FULL = "full"
24 |     PROC_MODE_TEST = "test"
25 |     PROC_MODE_TRANSFER = "transfer"
26 | 
27 |     def __init__(self, task: dict, meta_info: object, mode: str = PROC_MODE_FULL):
28 |         """
29 |         The `__init__` func is responsible for
30 | 
31 |         - store the task
32 |         - store the origin input data for
33 |         - process the input data for meta data
34 | 
35 |         Parameters
36 |         ----------
37 |         task : dict
38 |             the task to be enhanced by metamodel
39 | 
40 |         meta_info : object
41 |             the input for metamodel
42 |         """
43 |         self.task = task
44 |         self.meta_info = meta_info  # the original meta input information, it will be processed later
45 |         self.mode = mode
46 | 
47 |     def get_dataset(self) -> Dataset:
48 |         return init_instance_by_config(self.task["dataset"], accept_types=Dataset)
49 | 
50 |     def get_meta_input(self) -> object:
51 |         """
52 |         Return the **processed** meta_info
53 |         """
54 |         return self.meta_info
55 | 
56 |     def __repr__(self):
57 |         return f"MetaTask(task={self.task}, meta_info={self.meta_info})"
58 | 


--------------------------------------------------------------------------------
/qlib/rl/data/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | from __future__ import annotations
 4 | 
 5 | from abc import abstractmethod
 6 | 
 7 | import pandas as pd
 8 | 
 9 | 
10 | class BaseIntradayBacktestData:
11 |     """
12 |     Raw market data that is often used in backtesting (thus called BacktestData).
13 | 
14 |     Base class for all types of backtest data. Currently, each type of simulator has its corresponding backtest
15 |     data type.
16 |     """
17 | 
18 |     @abstractmethod
19 |     def __repr__(self) -> str:
20 |         raise NotImplementedError
21 | 
22 |     @abstractmethod
23 |     def __len__(self) -> int:
24 |         raise NotImplementedError
25 | 
26 |     @abstractmethod
27 |     def get_deal_price(self) -> pd.Series:
28 |         raise NotImplementedError
29 | 
30 |     @abstractmethod
31 |     def get_volume(self) -> pd.Series:
32 |         raise NotImplementedError
33 | 
34 |     @abstractmethod
35 |     def get_time_index(self) -> pd.DatetimeIndex:
36 |         raise NotImplementedError
37 | 
38 | 
39 | class BaseIntradayProcessedData:
40 |     """Processed market data after data cleanup and feature engineering.
41 | 
42 |     It contains both processed data for "today" and "yesterday", as some algorithms
43 |     might use the market information of the previous day to assist decision making.
44 |     """
45 | 
46 |     today: pd.DataFrame
47 |     """Processed data for "today".
48 |     Number of records must be ``time_length``, and columns must be ``feature_dim``."""
49 | 
50 |     yesterday: pd.DataFrame
51 |     """Processed data for "yesterday".
52 |     Number of records must be ``time_length``, and columns must be ``feature_dim``."""
53 | 
54 | 
55 | class ProcessedDataProvider:
56 |     """Provider of processed data"""
57 | 
58 |     def get_data(
59 |         self,
60 |         stock_id: str,
61 |         date: pd.Timestamp,
62 |         feature_dim: int,
63 |         time_index: pd.Index,
64 |     ) -> BaseIntradayProcessedData:
65 |         raise NotImplementedError
66 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/train_ppo.yml:
--------------------------------------------------------------------------------
 1 | simulator:
 2 |   data_granularity: 5
 3 |   time_per_step: 30
 4 |   vol_limit: null
 5 | env:
 6 |   concurrency: 32
 7 |   parallel_mode: dummy
 8 | action_interpreter:
 9 |   class: CategoricalActionInterpreter
10 |   kwargs:
11 |     values: 4
12 |     max_step: 8
13 |   module_path: qlib.rl.order_execution.interpreter
14 | state_interpreter:
15 |   class: FullHistoryStateInterpreter
16 |   kwargs:
17 |     data_dim: 5
18 |     data_ticks: 48  # 48 = 240 min / 5 min
19 |     max_step: 8
20 |     processed_data_provider:
21 |       class: HandlerProcessedDataProvider
22 |       kwargs:
23 |         data_dir: ./data/pickle/
24 |         feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
25 |         feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
26 |         backtest: false
27 |       module_path: qlib.rl.data.native
28 |   module_path: qlib.rl.order_execution.interpreter
29 | reward:
30 |   class: PPOReward
31 |   kwargs:
32 |     max_step: 8
33 |     start_time_index: 0
34 |     end_time_index: 46  # 46 = (240 - 5) min / 5 min - 1
35 |   module_path: qlib.rl.order_execution.reward
36 | data:
37 |   source:
38 |     order_dir: ./data/orders
39 |     feature_root_dir: ./data/pickle/
40 |     feature_columns_today: ["$close0", "$volume0"]
41 |     feature_columns_yesterday: []
42 |     total_time: 240
43 |     default_start_time_index: 0
44 |     default_end_time_index: 235
45 |     proc_data_dim: 5
46 |   num_workers: 0
47 |   queue_size: 20
48 | network:
49 |   class: Recurrent
50 |   module_path: qlib.rl.order_execution.network
51 | policy:
52 |   class: PPO  # PPO, DQN
53 |   kwargs:
54 |     lr: 0.0001
55 |   module_path: qlib.rl.order_execution.policy
56 | runtime:
57 |   seed: 42
58 |   use_cuda: false
59 | trainer:
60 |   max_epoch: 500
61 |   repeat_per_collect: 25
62 |   earlystop_patience: 50
63 |   episode_per_collect: 10000
64 |   batch_size: 1024
65 |   val_every_n_epoch: 4
66 |   checkpoint_path: ./outputs/ppo
67 |   checkpoint_every_n_iters: 1
68 | 


--------------------------------------------------------------------------------
/tests/test_register_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | 
 8 | from qlib.data import D
 9 | from qlib.data.ops import ElemOperator, PairOperator
10 | from qlib.tests import TestAutoData
11 | 
12 | 
13 | class Diff(ElemOperator):
14 |     """Feature First Difference
15 |     Parameters
16 |     ----------
17 |     feature : Expression
18 |         feature instance
19 |     Returns
20 |     ----------
21 |     Expression
22 |         a feature instance with first difference
23 |     """
24 | 
25 |     def _load_internal(self, instrument, start_index, end_index, freq):
26 |         series = self.feature.load(instrument, start_index, end_index, freq)
27 |         return series.diff()
28 | 
29 |     def get_extended_window_size(self):
30 |         lft_etd, rght_etd = self.feature.get_extended_window_size()
31 |         return lft_etd + 1, rght_etd
32 | 
33 | 
34 | class Distance(PairOperator):
35 |     """Feature Distance
36 |     Parameters
37 |     ----------
38 |     feature : Expression
39 |         feature instance
40 |     Returns
41 |     ----------
42 |     Expression
43 |         a feature instance with distance
44 |     """
45 | 
46 |     def _load_internal(self, instrument, start_index, end_index, freq):
47 |         series_left = self.feature_left.load(instrument, start_index, end_index, freq)
48 |         series_right = self.feature_right.load(instrument, start_index, end_index, freq)
49 |         return np.abs(series_left - series_right)
50 | 
51 | 
52 | class TestRegiterCustomOps(TestAutoData):
53 |     @classmethod
54 |     def setUpClass(cls) -> None:
55 |         cls._setup_kwargs.update({"custom_ops": [Diff, Distance]})
56 |         super().setUpClass()
57 | 
58 |     def test_regiter_custom_ops(self):
59 |         instruments = ["SH600000"]
60 |         fields = ["Diff($close)", "Distance($close, Ref($close, 1))"]
61 |         print(D.features(instruments, fields, start_time="2010-01-01", end_time="2017-12-31", freq="day"))
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     unittest.main()
66 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | sys:
 2 |     rel_path: .
 3 | qlib_init:
 4 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 5 |     region: cn
 6 | market: &market csi300
 7 | benchmark: &benchmark SH000300
 8 | data_handler_config: &data_handler_config
 9 |     start_time: 2008-01-01
10 |     end_time: 2020-08-01
11 |     fit_start_time: 2008-01-01
12 |     fit_end_time: 2014-12-31
13 |     instruments: *market
14 | port_analysis_config: &port_analysis_config
15 |     strategy:
16 |         class: TopkDropoutStrategy
17 |         module_path: qlib.contrib.strategy
18 |         kwargs:
19 |             signal: <PRED>
20 |             topk: 50
21 |             n_drop: 5
22 |     backtest:
23 |         start_time: 2017-01-01
24 |         end_time: 2020-08-01
25 |         account: 100000000
26 |         benchmark: *benchmark
27 |         exchange_kwargs:
28 |             limit_threshold: 0.095
29 |             deal_price: close
30 |             open_cost: 0.0005
31 |             close_cost: 0.0015
32 |             min_cost: 5
33 | task:
34 |     model:
35 |         class: TFTModel
36 |         module_path: tft
37 |     dataset:
38 |         class: DatasetH
39 |         module_path: qlib.data.dataset
40 |         kwargs:
41 |             handler:
42 |                 class: Alpha158
43 |                 module_path: qlib.contrib.data.handler
44 |                 kwargs: *data_handler_config
45 |             segments:
46 |                 train: [2008-01-01, 2014-12-31]
47 |                 valid: [2015-01-01, 2016-12-31]
48 |                 test: [2017-01-01, 2020-08-01]
49 |     record: 
50 |         - class: SignalRecord
51 |           module_path: qlib.workflow.record_temp
52 |           kwargs: 
53 |             model: <MODEL>
54 |             dataset: <DATASET>
55 |         - class: SigAnaRecord
56 |           module_path: qlib.workflow.record_temp
57 |           kwargs: 
58 |             ana_long_short: False
59 |             ann_scaler: 252
60 |         - class: PortAnaRecord
61 |           module_path: qlib.workflow.record_temp
62 |           kwargs: 
63 |             config: *port_analysis_config
64 | 


--------------------------------------------------------------------------------
/examples/hyperparameter/LightGBM/hyperparameter_158.py:
--------------------------------------------------------------------------------
 1 | import optuna
 2 | 
 3 | import qlib
 4 | from qlib.constant import REG_CN
 5 | from qlib.tests.config import CSI300_DATASET_CONFIG
 6 | from qlib.tests.data import GetData
 7 | from qlib.utils import init_instance_by_config
 8 | 
 9 | 
10 | def objective(trial):
11 |     task = {
12 |         "model": {
13 |             "class": "LGBModel",
14 |             "module_path": "qlib.contrib.model.gbdt",
15 |             "kwargs": {
16 |                 "loss": "mse",
17 |                 "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1),
18 |                 "learning_rate": trial.suggest_uniform("learning_rate", 0, 1),
19 |                 "subsample": trial.suggest_uniform("subsample", 0, 1),
20 |                 "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4),
21 |                 "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4),
22 |                 "max_depth": 10,
23 |                 "num_leaves": trial.suggest_int("num_leaves", 1, 1024),
24 |                 "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
25 |                 "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
26 |                 "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
27 |                 "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50),
28 |                 "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
29 |             },
30 |         },
31 |     }
32 |     evals_result = dict()
33 |     model = init_instance_by_config(task["model"])
34 |     model.fit(dataset, evals_result=evals_result)
35 |     return min(evals_result["valid"])
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     provider_uri = "~/.qlib/qlib_data/cn_data"
40 |     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
41 |     qlib.init(provider_uri=provider_uri, region="cn")
42 | 
43 |     dataset = init_instance_by_config(CSI300_DATASET_CONFIG)
44 | 
45 |     study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3")
46 |     study.optimize(objective, n_jobs=6)
47 | 


--------------------------------------------------------------------------------
/scripts/collect_info.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | import fire
 6 | import pkg_resources
 7 | 
 8 | import qlib
 9 | 
10 | QLIB_PATH = Path(__file__).absolute().resolve().parent.parent
11 | 
12 | 
13 | class InfoCollector:
14 |     """
15 |     User could collect system info by following commands
16 |     `cd scripts && python collect_info.py all`
17 |     - NOTE: please avoid running this script in the project folder which contains `qlib`
18 |     """
19 | 
20 |     def sys(self):
21 |         """collect system related info"""
22 |         for method in ["system", "machine", "platform", "version"]:
23 |             print(getattr(platform, method)())
24 | 
25 |     def py(self):
26 |         """collect Python related info"""
27 |         print("Python version: {}".format(sys.version.replace("\n", " ")))
28 | 
29 |     def qlib(self):
30 |         """collect qlib related info"""
31 |         print("Qlib version: {}".format(qlib.__version__))
32 |         REQUIRED = [
33 |             "numpy",
34 |             "pandas",
35 |             "scipy",
36 |             "requests",
37 |             "sacred",
38 |             "python-socketio",
39 |             "redis",
40 |             "python-redis-lock",
41 |             "schedule",
42 |             "cvxpy",
43 |             "hyperopt",
44 |             "fire",
45 |             "statsmodels",
46 |             "xlrd",
47 |             "plotly",
48 |             "matplotlib",
49 |             "tables",
50 |             "pyyaml",
51 |             "mlflow",
52 |             "tqdm",
53 |             "loguru",
54 |             "lightgbm",
55 |             "tornado",
56 |             "joblib",
57 |             "fire",
58 |             "ruamel.yaml",
59 |         ]
60 | 
61 |         for package in REQUIRED:
62 |             version = pkg_resources.get_distribution(package).version
63 |             print(f"{package}=={version}")
64 | 
65 |     def all(self):
66 |         """collect all info"""
67 |         for method in ["sys", "py", "qlib"]:
68 |             getattr(self, method)()
69 |             print()
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     fire.Fire(InfoCollector)
74 | 


--------------------------------------------------------------------------------
/examples/data_demo/data_cache_demo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | The motivation of this demo
 5 | - To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing
 6 | """
 7 | 
 8 | from copy import deepcopy
 9 | from pathlib import Path
10 | import pickle
11 | from pprint import pprint
12 | from ruamel.yaml import YAML
13 | import subprocess
14 | from qlib.log import TimeInspector
15 | 
16 | from qlib import init
17 | from qlib.data.dataset.handler import DataHandlerLP
18 | from qlib.utils import init_instance_by_config
19 | 
20 | # For general purpose, we use relative path
21 | DIRNAME = Path(__file__).absolute().resolve().parent
22 | 
23 | if __name__ == "__main__":
24 |     init()
25 | 
26 |     config_path = DIRNAME.parent / "benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml"
27 | 
28 |     # 1) show original time
29 |     with TimeInspector.logt("The original time without handler cache:"):
30 |         subprocess.run(f"qrun {config_path}", shell=True)
31 | 
32 |     # 2) dump handler
33 |     yaml = YAML(typ="safe", pure=True)
34 |     task_config = yaml.load(config_path.open())
35 |     hd_conf = task_config["task"]["dataset"]["kwargs"]["handler"]
36 |     pprint(hd_conf)
37 |     hd: DataHandlerLP = init_instance_by_config(hd_conf)
38 |     hd_path = DIRNAME / "handler.pkl"
39 |     hd.to_pickle(hd_path, dump_all=True)
40 | 
41 |     # 3) create new task with handler cache
42 |     new_task_config = deepcopy(task_config)
43 |     new_task_config["task"]["dataset"]["kwargs"]["handler"] = f"file://{hd_path}"
44 |     new_task_config["sys"] = {"path": [str(config_path.parent.resolve())]}
45 |     new_task_path = DIRNAME / "new_task.yaml"
46 |     print("The location of the new task", new_task_path)
47 | 
48 |     # save new task
49 |     with new_task_path.open("w") as f:
50 |         yaml.safe_dump(new_task_config, f, indent=4, sort_keys=False)
51 | 
52 |     # 4) train model with new task
53 |     with TimeInspector.logt("The time for task with handler cache:"):
54 |         subprocess.run(f"qrun {new_task_path}", shell=True)
55 | 


--------------------------------------------------------------------------------
/tests/misc/test_sepdf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import unittest
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | from qlib.contrib.data.utils.sepdf import SepDataFrame
 9 | 
10 | 
11 | class SepDF(unittest.TestCase):
12 |     def to_str(self, obj):
13 |         return "".join(str(obj).split())
14 | 
15 |     def test_index_data(self):
16 |         np.random.seed(42)
17 | 
18 |         index = [
19 |             np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
20 |             np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
21 |         ]
22 | 
23 |         cols = [
24 |             np.repeat(np.array(["g1", "g2"]), 2),
25 |             np.arange(4),
26 |         ]
27 |         df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=cols)
28 |         sdf = SepDataFrame(df_dict={"g2": df["g2"]}, join=None)
29 |         sdf[("g2", 4)] = 3
30 |         sdf["g1"] = df["g1"]
31 |         exp = """
32 |         {'g2':                 2         3  4
33 |         bar one  0.647689  1.523030  3
34 |             two  1.579213  0.767435  3
35 |         baz one -0.463418 -0.465730  3
36 |             two -1.724918 -0.562288  3
37 |         foo one -0.908024 -1.412304  3
38 |             two  0.067528 -1.424748  3
39 |         qux one -1.150994  0.375698  3
40 |             two -0.601707  1.852278  3, 'g1':                 0         1
41 |         bar one  0.496714 -0.138264
42 |             two -0.234153 -0.234137
43 |         baz one -0.469474  0.542560
44 |             two  0.241962 -1.913280
45 |         foo one -1.012831  0.314247
46 |             two  1.465649 -0.225776
47 |         qux one -0.544383  0.110923
48 |             two -0.600639 -0.291694}
49 |         """
50 |         self.assertEqual(self.to_str(sdf._df_dict), self.to_str(exp))
51 | 
52 |         del df["g1"]
53 |         del df["g2"]
54 |         # it will not raise error, and df will be an empty dataframe
55 | 
56 |         del sdf["g1"]
57 |         del sdf["g2"]
58 |         # sdf should support deleting all the columns
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     unittest.main()
63 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | Due to the non-stationary nature of the environment of the financial market, the data distribution may change in different periods, which makes the performance of models build on training data decays in the future test data.
 3 | So adapting the forecasting models/strategies to market dynamics is very important to the model/strategies' performance.
 4 | 
 5 | The table below shows the performances of different solutions on different forecasting models.
 6 | 
 7 | ## Alpha158 Dataset
 8 | Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
 9 | ```bash
10 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
11 | mkdir -p ~/.qlib/qlib_data/cn_data
12 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
13 | rm -f qlib_bin.tar.gz
14 | ```
15 | 
16 | | Model Name       | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
17 | |------------------|---------|------|------|---------|-----------|-------------------|-------------------|--------------|
18 | | RR[Linear]       |Alpha158 |0.0945|0.5989|0.1069   |0.6495     |0.0857             |1.3682             |-0.0986       |
19 | | DDG-DA[Linear]   |Alpha158 |0.0983|0.6157|0.1108   |0.6646     |0.0764             |1.1904             |-0.0769       |
20 | | RR[LightGBM]     |Alpha158 |0.0816|0.5887|0.0912   |0.6263     |0.0771             |1.3196             |-0.0909       |
21 | | DDG-DA[LightGBM] |Alpha158 |0.0878|0.6185|0.0975   |0.6524     |0.1261             |2.0096             |-0.0744       |
22 | 
23 | - The label horizon of the `Alpha158` dataset is set to 20.
24 | - The rolling time intervals are set to 20 trading days.
25 | - The test rolling periods are from January 2017 to August 2020.
26 | - The results are based on the crowd-sourced version. The Yahoo version of qlib data does not contain `VWAP`, so all related factors are missing and filled with 0, which leads to a rank-deficient matrix (a matrix does not have full rank) and makes lower-level optimization of DDG-DA can not be solved.
27 | 


--------------------------------------------------------------------------------
/examples/orderbook_data/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | This example tries to demonstrate how Qlib supports data without fixed shared frequency.
 4 | 
 5 | For example,
 6 | - Daily prices volume data are fixed-frequency data. The data comes in a fixed frequency (i.e. daily)
 7 | - Orders are not fixed data and they may come at any time point
 8 | 
 9 | To support such non-fixed-frequency, Qlib implements an Arctic-based backend.
10 | Here is an example to import and query data based on this backend.
11 | 
12 | # Installation
13 | 
14 | Please refer to [the installation docs](https://docs.mongodb.com/manual/installation/) of mongodb.
15 | Current version of script with default value tries to connect localhost **via default port without authentication**.
16 | 
17 | Run following command to install necessary libraries
18 | ```
19 | pip install pytest coverage gdown
20 | pip install arctic  # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied.
21 | ```
22 | 
23 | # Importing example data
24 | 
25 | 
26 | 1. (Optional) Please follow the first part of [this section](https://github.com/microsoft/qlib#data-preparation) to **get 1min data** of Qlib.
27 | 2. Please follow following steps to download example data
28 | ```bash
29 | cd examples/orderbook_data/
30 | gdown https://drive.google.com/uc?id=15nZF7tFT_eKVZAcMFL1qPS4jGyJflH7e  # Proxies may be necessary here.
31 | python ../../scripts/get_data.py _unzip --file_path highfreq_orderbook_example_data.zip --target_dir .
32 | ```
33 | 
34 | 3. Please import the example data to your mongo db
35 | ```bash
36 | python create_dataset.py initialize_library  # Initialization Libraries
37 | python create_dataset.py import_data  # Initialization Libraries
38 | ```
39 | 
40 | # Query Examples
41 | 
42 | After importing these data, you run `example.py` to create some high-frequency features.
43 | ```bash
44 | pytest -s --disable-warnings example.py   # If you want run all examples
45 | pytest -s --disable-warnings example.py::TestClass::test_exp_10  # If you want to run specific example
46 | ```
47 | 
48 | 
49 | # Known limitations
50 | Expression computing between different frequencies are not supported yet
51 | 


--------------------------------------------------------------------------------
/examples/online_srv/update_online_pred.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """
 5 | This example shows how OnlineTool works when we need update prediction.
 6 | There are two parts including first_train and update_online_pred.
 7 | Firstly, we will finish the training and set the trained models to the `online` models.
 8 | Next, we will finish updating online predictions.
 9 | """
10 | import copy
11 | 
12 | import fire
13 | 
14 | import qlib
15 | from qlib.constant import REG_CN
16 | from qlib.model.trainer import task_train
17 | from qlib.tests.config import CSI300_GBDT_TASK
18 | from qlib.workflow.online.utils import OnlineToolR
19 | 
20 | task = copy.deepcopy(CSI300_GBDT_TASK)
21 | 
22 | task["record"] = {
23 |     "class": "SignalRecord",
24 |     "module_path": "qlib.workflow.record_temp",
25 | }
26 | 
27 | 
28 | class UpdatePredExample:
29 |     def __init__(
30 |         self, provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_name="online_srv", task_config=task
31 |     ):
32 |         qlib.init(provider_uri=provider_uri, region=region)
33 |         self.experiment_name = experiment_name
34 |         self.online_tool = OnlineToolR(self.experiment_name)
35 |         self.task_config = task_config
36 | 
37 |     def first_train(self):
38 |         rec = task_train(self.task_config, experiment_name=self.experiment_name)
39 |         self.online_tool.reset_online_tag(rec)  # set to online model
40 | 
41 |     def update_online_pred(self):
42 |         self.online_tool.update_online_pred()
43 | 
44 |     def main(self):
45 |         self.first_train()
46 |         self.update_online_pred()
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     ## to train a model and set it to online model, use the command below
51 |     # python update_online_pred.py first_train
52 |     ## to update online predictions once a day, use the command below
53 |     # python update_online_pred.py update_online_pred
54 |     ## to see the whole process with your own parameters, use the command below
55 |     # python update_online_pred.py main --experiment_name="your_exp_name"
56 |     fire.Fire(UpdatePredExample)
57 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | - [Download Qlib Data](#Download-Qlib-Data)
 3 |   - [Download CN Data](#Download-CN-Data)
 4 |   - [Download US Data](#Download-US-Data)
 5 |   - [Download CN Simple Data](#Download-CN-Simple-Data)
 6 |   - [Help](#Help)
 7 | - [Using in Qlib](#Using-in-Qlib)
 8 |   - [US data](#US-data)
 9 |   - [CN data](#CN-data)
10 | 
11 | 
12 | ## Download Qlib Data
13 | 
14 | 
15 | ### Download CN Data
16 | 
17 | ```bash
18 | # daily data
19 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
20 | 
21 | # 1min  data (Optional for running non-high-frequency strategies)
22 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
23 | ```
24 | 
25 | ### Download US Data
26 | 
27 | 
28 | ```bash
29 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us
30 | ```
31 | 
32 | ### Download CN Simple Data
33 | 
34 | ```bash
35 | python get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --region cn
36 | ```
37 | 
38 | ### Help
39 | 
40 | ```bash
41 | python get_data.py qlib_data --help
42 | ```
43 | 
44 | ## Using in Qlib
45 | > For more information: https://qlib.readthedocs.io/en/latest/start/initialization.html
46 | 
47 | 
48 | ### US data
49 | 
50 | > Need to download data first: [Download US Data](#Download-US-Data)
51 | 
52 | ```python
53 | import qlib
54 | from qlib.config import REG_US
55 | provider_uri = "~/.qlib/qlib_data/us_data"  # target_dir
56 | qlib.init(provider_uri=provider_uri, region=REG_US)
57 | ```
58 | 
59 | ### CN data
60 | 
61 | > Need to download data first: [Download CN Data](#Download-CN-Data)
62 | 
63 | ```python
64 | import qlib
65 | from qlib.constant import REG_CN
66 | 
67 | provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
68 | qlib.init(provider_uri=provider_uri, region=REG_CN)
69 | ```
70 | 
71 | ## Use Crowd Sourced Data
72 | The is also a [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
73 | ```bash
74 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
75 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
76 | ```
77 | 


--------------------------------------------------------------------------------
/qlib/data/storage/arctic_storage/base.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | if sys.version_info >= (3, 9):
 4 |     from zoneinfo import ZoneInfo  # noqa
 5 | else:
 6 |     from backports.zoneinfo import ZoneInfo  # noqa
 7 | from qlib.log import get_module_logger
 8 | from arctic import Arctic
 9 | from arctic.auth import Credential
10 | from arctic.hooks import register_get_auth_hook
11 | 
12 | try:
13 |     from vnpy.trader.database import SETTINGS
14 | except ImportError:
15 |     SETTINGS = {}
16 | 
17 | 
18 | logger = get_module_logger("arctic_storage")
19 | 
20 | 
21 | def db_symbol_to_qlib(db_symbol: str) -> str:
22 |     """convert db_symbol to qlib symbol
23 | 
24 |     Parameters
25 |     ----------
26 |     db_symbol : str
27 |         db_symbol
28 | 
29 |     Returns
30 |     -------
31 |     str
32 |         qlib symbol
33 |     """
34 |     code, exch = db_symbol.split("_")
35 |     exch = "SH" if exch == "SSE" else "SZ"
36 |     return f"{exch}{code}"
37 | 
38 | 
39 | def qlib_symbol_to_db(qlib_symbol: str) -> str:
40 |     """convert db_symbol to qlib symbol
41 | 
42 |     Parameters
43 |     ----------
44 |     qlib_symbol : str
45 |         qlib style symbol
46 | 
47 |     Returns
48 |     -------
49 |     str
50 |         qlib symbol
51 |     """
52 |     exch = "SSE" if qlib_symbol[:2].lower() == "sh" else "SZSE"
53 |     return f"{qlib_symbol[2:]}_{exch}"
54 | 
55 | 
56 | def arctic_auth_hook(*_):
57 |     if bool(SETTINGS.get("database.password", "")) and bool(SETTINGS.get("database.user", "")):
58 |         return Credential(
59 |             database="admin",
60 |             user=SETTINGS["database.user"],
61 |             password=SETTINGS["database.password"],
62 |         )
63 |     return None
64 | 
65 | 
66 | register_get_auth_hook(arctic_auth_hook)
67 | 
68 | 
69 | class ArcticStorageMixin:
70 |     """ArcticStorageMixin, applicable to ArcticXXXStorage
71 |     Subclasses need
72 |     """
73 | 
74 |     def _get_arctic_store(self):
75 |         """get arctic store"""
76 |         if not hasattr(self, "arctic_store"):
77 |             self.arctic_store = Arctic(
78 |                 SETTINGS["database.host"], tz_aware=True, tzinfo=ZoneInfo(SETTINGS["database.timezone"])
79 |             )
80 |         return self.arctic_store
81 | 


--------------------------------------------------------------------------------
/examples/hyperparameter/LightGBM/hyperparameter_360.py:
--------------------------------------------------------------------------------
 1 | import optuna
 2 | 
 3 | import qlib
 4 | from qlib.constant import REG_CN
 5 | from qlib.tests.config import CSI300_MARKET, DATASET_ALPHA360_CLASS, get_dataset_config
 6 | from qlib.tests.data import GetData
 7 | from qlib.utils import init_instance_by_config
 8 | 
 9 | DATASET_CONFIG = get_dataset_config(market=CSI300_MARKET, dataset_class=DATASET_ALPHA360_CLASS)
10 | 
11 | 
12 | def objective(trial):
13 |     task = {
14 |         "model": {
15 |             "class": "LGBModel",
16 |             "module_path": "qlib.contrib.model.gbdt",
17 |             "kwargs": {
18 |                 "loss": "mse",
19 |                 "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1),
20 |                 "learning_rate": trial.suggest_uniform("learning_rate", 0, 1),
21 |                 "subsample": trial.suggest_uniform("subsample", 0, 1),
22 |                 "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4),
23 |                 "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4),
24 |                 "max_depth": 10,
25 |                 "num_leaves": trial.suggest_int("num_leaves", 1, 1024),
26 |                 "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
27 |                 "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
28 |                 "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
29 |                 "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50),
30 |                 "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
31 |             },
32 |         },
33 |     }
34 | 
35 |     evals_result = dict()
36 |     model = init_instance_by_config(task["model"])
37 |     model.fit(dataset, evals_result=evals_result)
38 |     return min(evals_result["valid"])
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     provider_uri = "~/.qlib/qlib_data/cn_data"
43 |     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
44 |     qlib.init(provider_uri=provider_uri, region=REG_CN)
45 | 
46 |     dataset = init_instance_by_config(DATASET_CONFIG)
47 | 
48 |     study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3")
49 |     study.optimize(objective, n_jobs=6)
50 | 


--------------------------------------------------------------------------------
/qlib/typehint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """Commonly used types."""
 5 | 
 6 | import sys
 7 | from pathlib import Path
 8 | from typing import Union
 9 | 
10 | __all__ = ["Literal", "TypedDict", "final", "InstConf"]
11 | 
12 | if sys.version_info >= (3, 8):
13 |     from typing import Literal, TypedDict, final  # type: ignore  # pylint: disable=no-name-in-module
14 | else:
15 |     from typing_extensions import Literal, TypedDict, final
16 | 
17 | 
18 | class InstDictConf(TypedDict):
19 |     """
20 |     InstDictConf  is a Dict-based config to describe an instance
21 | 
22 |         case 1)
23 |         {
24 |             'class': 'ClassName',
25 |             'kwargs': dict, #  It is optional. {} will be used if not given
26 |             'model_path': path, # It is optional if module is given in the class
27 |         }
28 |         case 2)
29 |         {
30 |             'class': <The class it self>,
31 |             'kwargs': dict, #  It is optional. {} will be used if not given
32 |         }
33 |     """
34 | 
35 |     # class: str  # because class is a keyword of Python. We have to comment it
36 |     kwargs: dict  # It is optional. {} will be used if not given
37 |     module_path: str  # It is optional if module is given in the class
38 | 
39 | 
40 | InstConf = Union[InstDictConf, str, object, Path]
41 | """
42 | InstConf is a type to describe an instance; it will be passed into init_instance_by_config for Qlib
43 | 
44 |     config : Union[str, dict, object, Path]
45 | 
46 |         InstDictConf example.
47 |             please refer to the docs of InstDictConf
48 | 
49 |         str example.
50 |             1) specify a pickle object
51 |                 - path like 'file:///<path to pickle file>/obj.pkl'
52 |             2) specify a class name
53 |                 - "ClassName":  getattr(module, "ClassName")() will be used.
54 |             3) specify module path with class name
55 |                 - "a.b.c.ClassName" getattr(<a.b.c.module>, "ClassName")() will be used.
56 | 
57 |         object example:
58 |             instance of accept_types
59 | 
60 |         Path example:
61 |             specify a pickle object
62 |                 - it will be treated like 'file:///<path to pickle file>/obj.pkl'
63 | """
64 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/scripts/gen_training_orders.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import os
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | from pathlib import Path
 9 | 
10 | DATA_PATH = Path(os.path.join("data", "pickle", "backtest"))
11 | OUTPUT_PATH = Path(os.path.join("data", "orders"))
12 | 
13 | 
14 | def generate_order(stock: str, start_idx: int, end_idx: int) -> bool:
15 |     dataset = pd.read_pickle(DATA_PATH / f"{stock}.pkl")
16 |     df = dataset.handler.fetch(level=None).reset_index()
17 |     if len(df) == 0 or df.isnull().values.any() or min(df["$volume0"]) < 1e-5:
18 |         return False
19 | 
20 |     df["date"] = df["datetime"].dt.date.astype("datetime64")
21 |     df = df.set_index(["instrument", "datetime", "date"])
22 |     df = df.groupby("date", group_keys=False).take(range(start_idx, end_idx)).droplevel(level=0)
23 | 
24 |     order_all = pd.DataFrame(df.groupby(level=(2, 0), group_keys=False).mean().dropna())
25 |     order_all["amount"] = np.random.lognormal(-3.28, 1.14) * order_all["$volume0"]
26 |     order_all = order_all[order_all["amount"] > 0.0]
27 |     order_all["order_type"] = 0
28 |     order_all = order_all.drop(columns=["$volume0"])
29 | 
30 |     order_train = order_all[order_all.index.get_level_values(0) <= pd.Timestamp("2021-06-30")]
31 |     order_test = order_all[order_all.index.get_level_values(0) > pd.Timestamp("2021-06-30")]
32 |     order_valid = order_test[order_test.index.get_level_values(0) <= pd.Timestamp("2021-09-30")]
33 |     order_test = order_test[order_test.index.get_level_values(0) > pd.Timestamp("2021-09-30")]
34 | 
35 |     for order, tag in zip((order_train, order_valid, order_test, order_all), ("train", "valid", "test", "all")):
36 |         path = OUTPUT_PATH / tag
37 |         os.makedirs(path, exist_ok=True)
38 |         if len(order) > 0:
39 |             order.to_pickle(path / f"{stock}.pkl.target")
40 |     return True
41 | 
42 | 
43 | np.random.seed(1234)
44 | file_list = sorted(os.listdir(DATA_PATH))
45 | stocks = [f.replace(".pkl", "") for f in file_list]
46 | np.random.shuffle(stocks)
47 | 
48 | cnt = 0
49 | for stock in stocks:
50 |     if generate_order(stock, 0, 240 // 5 - 1):
51 |         cnt += 1
52 |         if cnt == 100:
53 |             break
54 | 


--------------------------------------------------------------------------------
/qlib/contrib/model/svm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from typing import Text, Union, cast
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | from sklearn.svm import SVR
 9 | 
10 | from qlib.data.dataset.weight import Reweighter
11 | 
12 | from ...data.dataset import DatasetH
13 | from ...data.dataset.handler import DataHandlerLP
14 | from ...model.base import Model
15 | 
16 | 
17 | class SVMRegression(Model):
18 |     """SVM Regression Model"""
19 | 
20 |     def __init__(
21 |         self,
22 |         kernel="rbf",
23 |         degree=3,
24 |         gamma="scale",
25 |         coef0=0.0,
26 |         tol=1e-3,
27 |         C=1.0,
28 |         epsilon=0.1,
29 |         shrinking=True,
30 |         cache_size=200,
31 |         verbose=False,
32 |         max_iter=-1,
33 |     ):
34 |         self.predictor = SVR(
35 |             kernel=kernel,
36 |             degree=degree,
37 |             gamma=gamma,
38 |             coef0=coef0,
39 |             tol=tol,
40 |             C=C,
41 |             epsilon=epsilon,
42 |             shrinking=shrinking,
43 |             cache_size=cache_size,
44 |             verbose=verbose,
45 |             max_iter=max_iter,
46 |         )
47 |         self.factor_names_ = None
48 | 
49 |     def fit(self, dataset: DatasetH, reweighter: Reweighter = None):
50 |         df_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
51 |         if df_train.empty:
52 |             raise ValueError("Empty data from dataset, please check your dataset config.")
53 |         X, y = df_train["feature"].values, np.squeeze(df_train["label"].values)
54 |         w = None if reweighter is None else cast(pd.Series, reweighter.reweight(df_train)).value
55 |         self.factor_names_ = df_train["feature"].columns
56 |         self.predictor.fit(X, y, w)
57 |         return self
58 | 
59 |     def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
60 |         if self.predictor.fit_status_ != 0:
61 |             raise ValueError("model is not fitted yet!")
62 |         x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
63 |         x_test = x_test[self.factor_names_]
64 |         return pd.Series(self.predictor.predict(x_test), index=x_test.index)
65 | 


--------------------------------------------------------------------------------
/docs/component/online.rst:
--------------------------------------------------------------------------------
 1 | .. _online_serving:
 2 | 
 3 | ==============
 4 | Online Serving
 5 | ==============
 6 | .. currentmodule:: qlib
 7 | 
 8 | 
 9 | Introduction
10 | ============
11 | 
12 | .. image:: ../_static/img/online_serving.png
13 |     :align: center
14 | 
15 | 
16 | In addition to backtesting, one way to test a model is effective is to make predictions in real market conditions or even do real trading based on those predictions.
17 | ``Online Serving`` is a set of modules for online models using the latest data,
18 | which including `Online Manager <#Online Manager>`_, `Online Strategy <#Online Strategy>`_, `Online Tool <#Online Tool>`_, `Updater <#Updater>`_.
19 | 
20 | `Here <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are several examples for reference, which demonstrate different features of ``Online Serving``.
21 | If you have many models or `task` needs to be managed, please consider `Task Management <../advanced/task_management.html>`_.
22 | The `examples <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are based on some components in `Task Management <../advanced/task_management.html>`_ such as ``TrainerRM`` or ``Collector``.
23 | 
24 | **NOTE**: User should keep his data source updated to support online serving. For example, Qlib provides `a batch of scripts <https://github.com/microsoft/qlib/blob/main/scripts/data_collector/yahoo/README.md#automatic-update-of-daily-frequency-datafrom-yahoo-finance>`_ to help users update Yahoo daily data.
25 | 
26 | Known limitations currently
27 | - Currently, the daily updating prediction for the next trading day is supported. But generating orders for the next trading day is not supported due to the `limitations of public data <https://github.com/microsoft/qlib/issues/215#issuecomment-766293563>_`
28 | 
29 | 
30 | Online Manager
31 | ==============
32 | 
33 | .. automodule:: qlib.workflow.online.manager
34 |     :members:
35 |     :noindex:
36 | 
37 | Online Strategy
38 | ===============
39 | 
40 | .. automodule:: qlib.workflow.online.strategy
41 |     :members:
42 |     :noindex:
43 | 
44 | Online Tool
45 | ===========
46 | 
47 | .. automodule:: qlib.workflow.online.utils
48 |     :members:
49 |     :noindex:
50 | 
51 | Updater
52 | =======
53 | 
54 | .. automodule:: qlib.workflow.online.update
55 |     :members:
56 |     :noindex:
57 | 


--------------------------------------------------------------------------------
/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: XGBModel
34 |         module_path: qlib.contrib.model.xgboost
35 |         kwargs:
36 |             eval_metric: rmse
37 |             colsample_bytree: 0.8879
38 |             eta: 0.0421
39 |             max_depth: 8
40 |             n_estimators: 647
41 |             subsample: 0.8789
42 |             nthread: 20
43 |     dataset:
44 |         class: DatasetH
45 |         module_path: qlib.data.dataset
46 |         kwargs:
47 |             handler:
48 |                 class: Alpha158
49 |                 module_path: qlib.contrib.data.handler
50 |                 kwargs: *data_handler_config
51 |             segments:
52 |                 train: [2008-01-01, 2014-12-31]
53 |                 valid: [2015-01-01, 2016-12-31]
54 |                 test: [2017-01-01, 2020-08-01]
55 |     record: 
56 |         - class: SignalRecord
57 |           module_path: qlib.workflow.record_temp
58 |           kwargs: 
59 |             model: <MODEL>
60 |             dataset: <DATASET>
61 |         - class: SigAnaRecord
62 |           module_path: qlib.workflow.record_temp
63 |           kwargs: 
64 |             ana_long_short: False
65 |             ann_scaler: 252
66 |         - class: PortAnaRecord
67 |           module_path: qlib.workflow.record_temp
68 |           kwargs: 
69 |             config: *port_analysis_config
70 | 


--------------------------------------------------------------------------------
/qlib/contrib/data/data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # We remove arctic from core framework of Qlib to contrib due to
 5 | # - Arctic has very strict limitation on pandas and numpy version
 6 | #    - https://github.com/man-group/arctic/pull/908
 7 | # - pip fail to computing the right version number!!!!
 8 | #    - Maybe we can solve this problem by poetry
 9 | 
10 | import pandas as pd
11 | import pymongo
12 | 
13 | # FIXME: So if you want to use arctic-based provider, please install arctic manually
14 | # `pip install arctic` may not be enough.
15 | from arctic import Arctic
16 | 
17 | from qlib.data.data import FeatureProvider
18 | 
19 | 
20 | class ArcticFeatureProvider(FeatureProvider):
21 |     def __init__(
22 |         self, uri="127.0.0.1", retry_time=0, market_transaction_time_list=[("09:15", "11:30"), ("13:00", "15:00")]
23 |     ):
24 |         super().__init__()
25 |         self.uri = uri
26 |         # TODO:
27 |         # retry connecting if error occurs
28 |         # does it real matters?
29 |         self.retry_time = retry_time
30 |         # NOTE: this is especially important for TResample operator
31 |         self.market_transaction_time_list = market_transaction_time_list
32 | 
33 |     def feature(self, instrument, field, start_index, end_index, freq):
34 |         field = str(field)[1:]
35 |         with pymongo.MongoClient(self.uri) as client:
36 |             # TODO: this will result in frequently connecting the server and performance issue
37 |             arctic = Arctic(client)
38 | 
39 |             if freq not in arctic.list_libraries():
40 |                 raise ValueError("lib {} not in arctic".format(freq))
41 | 
42 |             if instrument not in arctic[freq].list_symbols():
43 |                 # instruments does not exist
44 |                 return pd.Series()
45 |             else:
46 |                 df = arctic[freq].read(instrument, columns=[field], chunk_range=(start_index, end_index))
47 |                 s = df[field]
48 | 
49 |                 if not s.empty:
50 |                     s = pd.concat(
51 |                         [
52 |                             s.between_time(time_tuple[0], time_tuple[1])
53 |                             for time_tuple in self.market_transaction_time_list
54 |                         ]
55 |                     )
56 |                 return s
57 | 


--------------------------------------------------------------------------------
/docs/advanced/serial.rst:
--------------------------------------------------------------------------------
 1 | .. _serial:
 2 | 
 3 | =============
 4 | Serialization
 5 | =============
 6 | .. currentmodule:: qlib
 7 | 
 8 | Introduction
 9 | ============
10 | ``Qlib`` supports dumping the state of ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc. into a disk and reloading them.
11 | 
12 | Serializable Class
13 | ==================
14 | 
15 | ``Qlib`` provides a base class ``qlib.utils.serial.Serializable``, whose state can be dumped into or loaded from disk in `pickle` format.
16 | When users dump the state of a ``Serializable`` instance, the attributes of the instance whose name **does not** start with `_` will be saved on the disk.
17 | However, users can use ``config`` method or override ``default_dump_all`` attribute to prevent this feature.
18 | 
19 | Users can also override ``pickle_backend`` attribute to choose a pickle backend. The supported value is "pickle" (default and common) and "dill" (dump more things such as function, more information in `here <https://pypi.org/project/dill/>`_).
20 | 
21 | Example
22 | =======
23 | ``Qlib``'s serializable class includes  ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc., which are subclass of  ``qlib.utils.serial.Serializable``.
24 | Specifically, ``qlib.data.dataset.DatasetH`` is one of them. Users can serialize ``DatasetH`` as follows.
25 | 
26 | .. code-block:: Python
27 | 
28 |     ##=============dump dataset=============
29 |     dataset.to_pickle(path="dataset.pkl") # dataset is an instance of qlib.data.dataset.DatasetH
30 | 
31 |     ##=============reload dataset=============
32 |     with open("dataset.pkl", "rb") as file_dataset:
33 |         dataset = pickle.load(file_dataset)
34 | 
35 | .. note::
36 |     Only state of ``DatasetH`` should be saved on the disk, such as some `mean` and `variance` used for data normalization, etc.
37 | 
38 |     After reloading the ``DatasetH``, users need to reinitialize it. It means that users can reset some states of ``DatasetH`` or ``QlibDataHandler`` such as `instruments`, `start_time`, `end_time` and `segments`, etc.,  and generate new data according to the states (data is not state and should not be saved on the disk).
39 | 
40 | A more detailed example is in this `link <https://github.com/microsoft/qlib/tree/main/examples/highfreq>`_.
41 | 
42 | 
43 | API
44 | ===
45 | Please refer to `Serializable API <../reference/api.html#module-qlib.utils.serial.Serializable>`_.
46 | 


--------------------------------------------------------------------------------
/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data_1min"
 3 |     region: cn
 4 | market: &market 'csi300'
 5 | start_time: &start_time "2020-09-15 00:00:00"
 6 | end_time: &end_time "2021-01-18 16:00:00"
 7 | train_end_time: &train_end_time "2020-11-15 16:00:00"
 8 | valid_start_time: &valid_start_time "2020-11-16 00:00:00"
 9 | valid_end_time: &valid_end_time "2020-11-30 16:00:00"
10 | test_start_time: &test_start_time "2020-12-01 00:00:00"
11 | data_handler_config: &data_handler_config
12 |     start_time: *start_time
13 |     end_time: *end_time
14 |     fit_start_time: *start_time
15 |     fit_end_time: *train_end_time
16 |     instruments: *market
17 |     freq: '1min'
18 |     infer_processors:
19 |         - class: 'RobustZScoreNorm'
20 |           kwargs:
21 |               fields_group: 'feature'
22 |               clip_outlier: false
23 |         - class: "Fillna"
24 |           kwargs:
25 |               fields_group: 'feature'
26 |     learn_processors:
27 |         - class: 'DropnaLabel'
28 |         - class: 'CSRankNorm'
29 |           kwargs:
30 |               fields_group: 'label'
31 |     label: ["Ref($close, -2) / Ref($close, -1) - 1"]
32 |     
33 | task:
34 |     model:
35 |         class: "HFLGBModel"
36 |         module_path: "qlib.contrib.model.highfreq_gdbt_model"
37 |         kwargs:
38 |             objective: 'binary'
39 |             metric: ['binary_logloss','auc']
40 |             verbosity: -1
41 |             learning_rate: 0.01
42 |             max_depth: 8
43 |             num_leaves: 150
44 |             lambda_l1: 1.5
45 |             lambda_l2: 1
46 |             num_threads: 20
47 |     dataset:
48 |         class: "DatasetH"
49 |         module_path: "qlib.data.dataset"
50 |         kwargs:
51 |             handler:
52 |                 class: "Alpha158"
53 |                 module_path: "qlib.contrib.data.handler"
54 |                 kwargs: *data_handler_config
55 |             segments:
56 |                 train: [*start_time, *train_end_time]
57 |                 valid: [*train_end_time, *valid_end_time]
58 |                 test: [*test_start_time, *end_time]
59 |     record: 
60 |         - class: "SignalRecord"
61 |           module_path: "qlib.workflow.record_temp"
62 |           kwargs: {}
63 |         - class: "HFSignalRecord"
64 |           module_path: "qlib.workflow.record_temp"
65 |           kwargs: {}


--------------------------------------------------------------------------------
/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: CatBoostModel
34 |         module_path: qlib.contrib.model.catboost_model
35 |         kwargs:
36 |             loss: RMSE
37 |             learning_rate: 0.0421
38 |             subsample: 0.8789
39 |             max_depth: 6
40 |             num_leaves: 100
41 |             thread_count: 20
42 |             grow_policy: Lossguide
43 |             bootstrap_type: Poisson
44 |     dataset:
45 |         class: DatasetH
46 |         module_path: qlib.data.dataset
47 |         kwargs:
48 |             handler:
49 |                 class: Alpha158
50 |                 module_path: qlib.contrib.data.handler
51 |                 kwargs: *data_handler_config
52 |             segments:
53 |                 train: [2008-01-01, 2014-12-31]
54 |                 valid: [2015-01-01, 2016-12-31]
55 |                 test: [2017-01-01, 2020-08-01]
56 |     record: 
57 |         - class: SignalRecord
58 |           module_path: qlib.workflow.record_temp
59 |           kwargs: 
60 |             model: <MODEL>
61 |             dataset: <DATASET>
62 |         - class: SigAnaRecord
63 |           module_path: qlib.workflow.record_temp
64 |           kwargs: 
65 |             ana_long_short: False
66 |             ann_scaler: 252
67 |         - class: PortAnaRecord
68 |           module_path: qlib.workflow.record_temp
69 |           kwargs: 
70 |             config: *port_analysis_config
71 | 


--------------------------------------------------------------------------------
/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi500
 5 | benchmark: &benchmark SH000905
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: CatBoostModel
34 |         module_path: qlib.contrib.model.catboost_model
35 |         kwargs:
36 |             loss: RMSE
37 |             learning_rate: 0.0421
38 |             subsample: 0.8789
39 |             max_depth: 6
40 |             num_leaves: 100
41 |             thread_count: 20
42 |             grow_policy: Lossguide
43 |             bootstrap_type: Poisson
44 |     dataset:
45 |         class: DatasetH
46 |         module_path: qlib.data.dataset
47 |         kwargs:
48 |             handler:
49 |                 class: Alpha158
50 |                 module_path: qlib.contrib.data.handler
51 |                 kwargs: *data_handler_config
52 |             segments:
53 |                 train: [2008-01-01, 2014-12-31]
54 |                 valid: [2015-01-01, 2016-12-31]
55 |                 test: [2017-01-01, 2020-08-01]
56 |     record: 
57 |         - class: SignalRecord
58 |           module_path: qlib.workflow.record_temp
59 |           kwargs: 
60 |             model: <MODEL>
61 |             dataset: <DATASET>
62 |         - class: SigAnaRecord
63 |           module_path: qlib.workflow.record_temp
64 |           kwargs: 
65 |             ana_long_short: False
66 |             ann_scaler: 252
67 |         - class: PortAnaRecord
68 |           module_path: qlib.workflow.record_temp
69 |           kwargs: 
70 |             config: *port_analysis_config
71 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: LGBModel
34 |         module_path: qlib.contrib.model.gbdt
35 |         kwargs:
36 |             loss: mse
37 |             colsample_bytree: 0.8879
38 |             learning_rate: 0.2
39 |             subsample: 0.8789
40 |             lambda_l1: 205.6999
41 |             lambda_l2: 580.9768
42 |             max_depth: 8
43 |             num_leaves: 210
44 |             num_threads: 20
45 |     dataset:
46 |         class: DatasetH
47 |         module_path: qlib.data.dataset
48 |         kwargs:
49 |             handler:
50 |                 class: Alpha158
51 |                 module_path: qlib.contrib.data.handler
52 |                 kwargs: *data_handler_config
53 |             segments:
54 |                 train: [2008-01-01, 2014-12-31]
55 |                 valid: [2015-01-01, 2016-12-31]
56 |                 test: [2017-01-01, 2020-08-01]
57 |     record: 
58 |         - class: SignalRecord
59 |           module_path: qlib.workflow.record_temp
60 |           kwargs: 
61 |             model: <MODEL>
62 |             dataset: <DATASET>
63 |         - class: SigAnaRecord
64 |           module_path: qlib.workflow.record_temp
65 |           kwargs: 
66 |             ana_long_short: False
67 |             ann_scaler: 252
68 |         - class: PortAnaRecord
69 |           module_path: qlib.workflow.record_temp
70 |           kwargs: 
71 |             config: *port_analysis_config
72 | 


--------------------------------------------------------------------------------