├── qlib ├── cli │ ├── __init__.py │ └── data.py ├── contrib │ ├── __init__.py │ ├── eva │ │ └── __init__.py │ ├── ops │ │ └── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── utils │ │ │ └── __init__.py │ │ └── data.py │ ├── report │ │ ├── report │ │ │ ├── __init__.py │ │ │ └── template │ │ │ │ └── general_report_template.html │ │ ├── data │ │ │ ├── __init__.py │ │ │ └── base.py │ │ ├── analysis_model │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── analysis_position │ │ │ └── __init__.py │ ├── tuner │ │ ├── __init__.py │ │ ├── space.py │ │ └── launcher.py │ ├── workflow │ │ └── __init__.py │ ├── meta │ │ ├── __init__.py │ │ └── data_selection │ │ │ └── __init__.py │ ├── strategy │ │ ├── optimizer │ │ │ ├── __init__.py │ │ │ └── base.py │ │ └── __init__.py │ ├── rolling │ │ ├── __init__.py │ │ └── __main__.py │ ├── online │ │ ├── __init__.py │ │ └── online_model.py │ ├── torch.py │ └── model │ │ ├── pytorch_utils.py │ │ ├── __init__.py │ │ └── svm.py ├── model │ ├── ens │ │ └── __init__.py │ ├── interpret │ │ ├── __init__.py │ │ └── base.py │ ├── __init__.py │ ├── meta │ │ ├── __init__.py │ │ └── task.py │ ├── riskmodel │ │ └── __init__.py │ └── utils.py ├── rl │ ├── contrib │ │ ├── __init__.py │ │ └── utils.py │ ├── strategy │ │ ├── __init__.py │ │ └── single_order.py │ ├── data │ │ ├── __init__.py │ │ └── base.py │ ├── __init__.py │ ├── seed.py │ ├── trainer │ │ └── __init__.py │ ├── utils │ │ └── __init__.py │ ├── order_execution │ │ ├── __init__.py │ │ └── utils.py │ └── aux_info.py ├── walkforward │ ├── __init__.py │ └── walkforward_handler.py ├── workflow │ ├── online │ │ └── __init__.py │ ├── task │ │ └── __init__.py │ └── utils.py ├── data │ ├── _libs │ │ └── __init__.py │ ├── storage │ │ ├── arctic_storage │ │ │ ├── __init__.py │ │ │ ├── arctic_storage_structure.md │ │ │ └── base.py │ │ └── __init__.py │ ├── inst_processor.py │ ├── inst_info.py │ ├── dataset │ │ └── weight.py │ └── __init__.py ├── strategy │ └── __init__.py ├── utils │ └── exceptions.py ├── constant.py └── typehint.py ├── examples ├── model_rolling │ └── requirements.txt ├── benchmarks │ ├── Localformer │ │ ├── README.md │ │ └── requirements.txt │ ├── MLP │ │ ├── README.md │ │ └── requirements.txt │ ├── KRNN │ │ ├── requirements.txt │ │ └── README.md │ ├── Sandwich │ │ ├── requirements.txt │ │ └── README.md │ ├── TFT │ │ ├── requirements.txt │ │ ├── libs │ │ │ └── __init__.py │ │ ├── data_formatters │ │ │ └── __init__.py │ │ ├── expt_settings │ │ │ └── __init__.py │ │ ├── README.md │ │ └── workflow_config_tft_Alpha158.yaml │ ├── LightGBM │ │ ├── requirements.txt │ │ ├── README.md │ │ ├── features_resample_N.py │ │ ├── features_sample.py │ │ └── workflow_config_lightgbm_Alpha158.yaml │ ├── Transformer │ │ ├── requirements.txt │ │ └── README.md │ ├── XGBoost │ │ ├── requirements.txt │ │ ├── README.md │ │ └── workflow_config_xgboost_Alpha158.yaml │ ├── CatBoost │ │ ├── requirements.txt │ │ ├── README.md │ │ ├── workflow_config_catboost_Alpha158.yaml │ │ └── workflow_config_catboost_Alpha158_csi500.yaml │ ├── DoubleEnsemble │ │ ├── requirements.txt │ │ └── README.md │ ├── Linear │ │ └── requirements.txt │ ├── ADD │ │ ├── requirements.txt │ │ └── README.md │ ├── GRU │ │ ├── requirements.txt │ │ ├── csi300_gru_ts.pkl │ │ ├── model_gru_csi300.pkl │ │ └── README.md │ ├── HIST │ │ ├── requirements.txt │ │ ├── qlib_csi300_stock_index.npy │ │ └── README.md │ ├── SFM │ │ ├── requirements.txt │ │ └── README.md │ ├── TCN │ │ ├── requirements.txt │ │ └── README.md │ ├── TCTS │ │ ├── requirements.txt │ │ └── workflow.png │ ├── TabNet │ │ ├── requirements.txt │ │ └── README.md │ ├── ADARNN │ │ ├── requirements.txt │ │ └── README.md │ ├── ALSTM │ │ ├── requirements.txt │ │ └── README.md │ ├── GATs │ │ ├── requirements.txt │ │ └── README.md │ ├── IGMTF │ │ ├── requirements.txt │ │ └── README.md │ ├── LSTM │ │ ├── requirements.txt │ │ ├── csi300_lstm_ts.pkl │ │ ├── model_lstm_csi300.pkl │ │ └── README.md │ ├── TRA │ │ ├── requirements.txt │ │ ├── data │ │ │ └── README.md │ │ ├── run.sh │ │ ├── example.py │ │ └── configs │ │ │ ├── config_alstm.yaml │ │ │ ├── config_alstm_tra_init.yaml │ │ │ ├── config_transformer.yaml │ │ │ ├── config_transformer_tra_init.yaml │ │ │ ├── config_alstm_tra.yaml │ │ │ └── config_transformer_tra.yaml │ └── GeneralPtNN │ │ └── README.md ├── benchmarks_dynamic │ ├── DDG-DA │ │ ├── requirements.txt │ │ ├── Makefile │ │ └── workflow.py │ ├── baseline │ │ ├── README.md │ │ └── rolling_benchmark.py │ └── README.md ├── data_demo │ ├── README.md │ └── data_cache_demo.py ├── hyperparameter │ └── LightGBM │ │ ├── requirements.txt │ │ ├── Readme.md │ │ ├── hyperparameter_158.py │ │ └── hyperparameter_360.py ├── README.md ├── rl_order_execution │ ├── scripts │ │ ├── merge_orders.py │ │ ├── gen_pickle_data.py │ │ └── gen_training_orders.py │ └── exp_configs │ │ ├── backtest_twap.yml │ │ ├── backtest_ppo.yml │ │ ├── backtest_opds.yml │ │ ├── train_opds.yml │ │ └── train_ppo.yml ├── rolling_process_data │ ├── README.md │ └── rolling_handler.py ├── model_interpreter │ └── feature.py ├── nested_decision_execution │ └── README.md ├── portfolio │ ├── README.md │ └── prepare_riskdata.py ├── highfreq │ ├── README.md │ └── workflow_config_High_Freq_Tree_Alpha158.yaml ├── orderbook_data │ └── README.md └── online_srv │ └── update_online_pred.py ├── docs ├── changelog │ └── changelog.rst ├── _static │ ├── img │ │ ├── logo │ │ │ ├── 1.png │ │ │ ├── 2.png │ │ │ ├── 3.png │ │ │ ├── yellow_bg_rec.png │ │ │ ├── yel_bg_rec+word.png │ │ │ ├── white_bg_rec+word.png │ │ │ └── yellow_bg_rec+word .png │ │ ├── change doc.gif │ │ ├── framework.png │ │ ├── topk_drop.png │ │ ├── RL_framework.png │ │ ├── rdagent_logo.png │ │ ├── QlibRL_framework.png │ │ ├── analysis │ │ │ ├── report.png │ │ │ ├── score_ic.png │ │ │ ├── rank_label_buy.png │ │ │ ├── analysis_model_IC.png │ │ │ ├── rank_label_hold.png │ │ │ ├── rank_label_sell.png │ │ │ ├── risk_analysis_bar.png │ │ │ ├── risk_analysis_std.png │ │ │ ├── analysis_model_NDQ.png │ │ │ ├── cumulative_return_buy.png │ │ │ ├── cumulative_return_hold.png │ │ │ ├── cumulative_return_sell.png │ │ │ ├── analysis_model_long_short.png │ │ │ ├── analysis_model_monthly_IC.png │ │ │ ├── risk_analysis_max_drawdown.png │ │ │ ├── analysis_model_auto_correlation.png │ │ │ ├── analysis_model_cumulative_return.png │ │ │ ├── cumulative_return_buy_minus_sell.png │ │ │ ├── risk_analysis_annualized_return.png │ │ │ └── risk_analysis_information_ratio.png │ │ ├── online_serving.png │ │ ├── qrcode │ │ │ └── gitter_qr.png │ │ └── framework-abstract.jpg │ └── demo.sh ├── requirements.txt ├── component │ ├── rl │ │ └── toctree.rst │ └── online.rst ├── Makefile ├── make.bat ├── advanced │ ├── server.rst │ └── serial.rst └── start │ └── installation.rst ├── .gitattributes ├── .dockerignore ├── scripts ├── data_collector │ ├── us_index │ │ ├── requirements.txt │ │ └── README.md │ ├── contrib │ │ ├── fill_cn_1min_data │ │ │ ├── requirements.txt │ │ │ └── README.md │ │ └── future_trading_date_collector │ │ │ ├── requirements.txt │ │ │ └── README.md │ ├── crypto │ │ ├── requirement.txt │ │ └── README.md │ ├── fund │ │ ├── requirements.txt │ │ └── README.md │ ├── cn_index │ │ ├── requirements.txt │ │ └── README.md │ ├── pit │ │ ├── requirements.txt │ │ └── README.md │ ├── yahoo │ │ └── requirements.txt │ ├── baostock_5min │ │ └── requirements.txt │ ├── br_index │ │ └── requirements.txt │ └── crowd_source │ │ └── README.md ├── get_data.py ├── collect_info.py └── README.md ├── tests ├── dataset_tests │ ├── README.md │ └── test_datalayer.py ├── data_mid_layer_tests │ ├── README.md │ └── test_handler.py ├── dependency_tests │ ├── README.md │ └── test_mlflow.py ├── pytest.ini ├── conftest.py ├── test_contrib_model.py ├── test_workflow.py ├── misc │ ├── test_get_multi_proc.py │ └── test_sepdf.py ├── test_get_data.py └── test_register_ops.py ├── MANIFEST.in ├── .pylintrc ├── .deepsource.toml ├── .github ├── ISSUE_TEMPLATE │ ├── documentation.md │ ├── question.md │ ├── feature-request.md │ └── bug-report.md ├── release-drafter.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── test_qlib_from_source_slow.yml ├── .pre-commit-config.yaml ├── .mypy.ini ├── CODE_OF_CONDUCT.md ├── .readthedocs.yaml ├── .gitignore ├── Dockerfile ├── setup.py └── LICENSE /qlib/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/contrib/eva/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/contrib/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/model/ens/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/rl/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/walkforward/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/contrib/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/model/interpret/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/workflow/online/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/contrib/data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qlib/contrib/report/report/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/model_rolling/requirements.txt: -------------------------------------------------------------------------------- 1 | xgboost 2 | -------------------------------------------------------------------------------- /docs/changelog/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../CHANGES.rst 2 | -------------------------------------------------------------------------------- /examples/benchmarks/Localformer/README.md: -------------------------------------------------------------------------------- 1 | # Localformer 2 | -------------------------------------------------------------------------------- /examples/benchmarks/MLP/README.md: -------------------------------------------------------------------------------- 1 | # Multi-Layer Perceptron (MLP) 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | .github/workflows merge=ours 2 | .gitattributes merge=ours -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/DDG-DA/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.10.0 2 | -------------------------------------------------------------------------------- /qlib/contrib/tuner/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # flake8: noqa 3 | -------------------------------------------------------------------------------- /examples/benchmarks/KRNN/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.23.4 2 | pandas==1.5.2 3 | -------------------------------------------------------------------------------- /examples/benchmarks/Sandwich/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.23.4 2 | pandas==1.5.2 3 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow-gpu==1.15.0 2 | pandas==1.1.0 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *.pyo 4 | *.pyd 5 | .Python 6 | .env 7 | .git 8 | 9 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | lightgbm 4 | -------------------------------------------------------------------------------- /examples/benchmarks/Localformer/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | torch==1.2.0 -------------------------------------------------------------------------------- /examples/benchmarks/Transformer/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | torch==1.2.0 -------------------------------------------------------------------------------- /examples/benchmarks/XGBoost/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | xgboost==1.2.1 -------------------------------------------------------------------------------- /examples/benchmarks/CatBoost/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | catboost==0.24.3 4 | -------------------------------------------------------------------------------- /scripts/data_collector/us_index/requirements.txt: -------------------------------------------------------------------------------- 1 | fire 2 | requests 3 | pandas 4 | lxml 5 | loguru 6 | -------------------------------------------------------------------------------- /docs/_static/img/logo/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/1.png -------------------------------------------------------------------------------- /docs/_static/img/logo/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/2.png -------------------------------------------------------------------------------- /docs/_static/img/logo/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/3.png -------------------------------------------------------------------------------- /examples/benchmarks/DoubleEnsemble/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | lightgbm==3.1.0 -------------------------------------------------------------------------------- /examples/benchmarks/Linear/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.17.4 2 | pandas>=1.0.1 3 | scikit-learn>=0.23.1 4 | -------------------------------------------------------------------------------- /qlib/data/_libs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | -------------------------------------------------------------------------------- /qlib/strategy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/DDG-DA/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | clean: 4 | -rm -r *.pkl mlruns || true 5 | -------------------------------------------------------------------------------- /scripts/data_collector/contrib/fill_cn_1min_data/requirements.txt: -------------------------------------------------------------------------------- 1 | fire 2 | pandas 3 | loguru 4 | tqdm 5 | pyqlib -------------------------------------------------------------------------------- /docs/_static/img/change doc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/change doc.gif -------------------------------------------------------------------------------- /docs/_static/img/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/framework.png -------------------------------------------------------------------------------- /docs/_static/img/topk_drop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/topk_drop.png -------------------------------------------------------------------------------- /examples/benchmarks/ADD/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/GRU/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/HIST/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 -------------------------------------------------------------------------------- /examples/benchmarks/MLP/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/SFM/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/TCN/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/TCTS/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 -------------------------------------------------------------------------------- /examples/benchmarks/TabNet/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 -------------------------------------------------------------------------------- /docs/_static/img/RL_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/RL_framework.png -------------------------------------------------------------------------------- /docs/_static/img/rdagent_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/rdagent_logo.png -------------------------------------------------------------------------------- /examples/benchmarks/ADARNN/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/ALSTM/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/GATs/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/IGMTF/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/LSTM/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /docs/_static/img/QlibRL_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/QlibRL_framework.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/report.png -------------------------------------------------------------------------------- /docs/_static/img/online_serving.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/online_serving.png -------------------------------------------------------------------------------- /docs/_static/img/qrcode/gitter_qr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/qrcode/gitter_qr.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | cmake 3 | numpy 4 | scipy 5 | scikit-learn 6 | pandas 7 | tianshou 8 | sphinx_rtd_theme 9 | -------------------------------------------------------------------------------- /examples/benchmarks/TCTS/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/TCTS/workflow.png -------------------------------------------------------------------------------- /scripts/data_collector/crypto/requirement.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | requests 4 | numpy 5 | pandas 6 | tqdm 7 | lxml 8 | pycoingecko -------------------------------------------------------------------------------- /tests/dataset_tests/README.md: -------------------------------------------------------------------------------- 1 | # About dataset tests 2 | Tests in this folder are for testing the prepared dataset from Yahoo 3 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/score_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/score_ic.png -------------------------------------------------------------------------------- /docs/_static/img/framework-abstract.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/framework-abstract.jpg -------------------------------------------------------------------------------- /docs/_static/img/logo/yellow_bg_rec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/yellow_bg_rec.png -------------------------------------------------------------------------------- /examples/benchmarks/TRA/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | seaborn 6 | -------------------------------------------------------------------------------- /scripts/data_collector/contrib/future_trading_date_collector/requirements.txt: -------------------------------------------------------------------------------- 1 | baostock 2 | fire 3 | numpy 4 | pandas 5 | loguru 6 | -------------------------------------------------------------------------------- /docs/_static/img/logo/yel_bg_rec+word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/yel_bg_rec+word.png -------------------------------------------------------------------------------- /examples/benchmarks/GRU/csi300_gru_ts.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/GRU/csi300_gru_ts.pkl -------------------------------------------------------------------------------- /examples/benchmarks/TRA/data/README.md: -------------------------------------------------------------------------------- 1 | Data Link: https://drive.google.com/drive/folders/1fMqZYSeLyrHiWmVzygeI4sw3vp5Gt8cY?usp=sharing 2 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/rank_label_buy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/rank_label_buy.png -------------------------------------------------------------------------------- /docs/_static/img/logo/white_bg_rec+word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/white_bg_rec+word.png -------------------------------------------------------------------------------- /examples/benchmarks/GRU/model_gru_csi300.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/GRU/model_gru_csi300.pkl -------------------------------------------------------------------------------- /examples/benchmarks/LSTM/csi300_lstm_ts.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/LSTM/csi300_lstm_ts.pkl -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | exclude tests/* 2 | include qlib/* 3 | include qlib/*/* 4 | include qlib/*/*/* 5 | include qlib/*/*/*/* 6 | include qlib/*/*/*/*/* 7 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_IC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_IC.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/rank_label_hold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/rank_label_hold.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/rank_label_sell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/rank_label_sell.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_bar.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_std.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_std.png -------------------------------------------------------------------------------- /docs/_static/img/logo/yellow_bg_rec+word .png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/logo/yellow_bg_rec+word .png -------------------------------------------------------------------------------- /examples/benchmarks/LSTM/model_lstm_csi300.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/LSTM/model_lstm_csi300.pkl -------------------------------------------------------------------------------- /examples/data_demo/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | The examples in this folder try to demonstrate some common usage of data-related modules of Qlib 3 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_NDQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_NDQ.png -------------------------------------------------------------------------------- /examples/hyperparameter/LightGBM/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | lightgbm==3.1.0 4 | optuna==2.7.0 5 | optuna-dashboard==0.4.1 6 | -------------------------------------------------------------------------------- /scripts/data_collector/fund/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | requests 4 | numpy 5 | pandas 6 | tqdm 7 | lxml 8 | loguru 9 | yahooquery 10 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/cumulative_return_buy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/cumulative_return_buy.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/cumulative_return_hold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/cumulative_return_hold.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/cumulative_return_sell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/cumulative_return_sell.png -------------------------------------------------------------------------------- /examples/benchmarks/HIST/qlib_csi300_stock_index.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/examples/benchmarks/HIST/qlib_csi300_stock_index.npy -------------------------------------------------------------------------------- /tests/data_mid_layer_tests/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | The middle layers of data, which mainly includes 3 | - Handler 4 | - processors 5 | - Datasets 6 | -------------------------------------------------------------------------------- /scripts/data_collector/cn_index/requirements.txt: -------------------------------------------------------------------------------- 1 | baostock 2 | fire 3 | requests 4 | pandas 5 | lxml 6 | loguru 7 | tqdm 8 | yahooquery 9 | openpyxl 10 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_long_short.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_long_short.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_monthly_IC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_monthly_IC.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_max_drawdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_max_drawdown.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_auto_correlation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_auto_correlation.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_cumulative_return.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/analysis_model_cumulative_return.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/cumulative_return_buy_minus_sell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/cumulative_return_buy_minus_sell.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_annualized_return.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_annualized_return.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_information_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qianyun210603/qlib/HEAD/docs/_static/img/analysis/risk_analysis_information_ratio.png -------------------------------------------------------------------------------- /scripts/data_collector/pit/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | tqdm 4 | requests 5 | pandas 6 | lxml 7 | loguru 8 | baostock 9 | yahooquery 10 | beautifulsoup4 11 | -------------------------------------------------------------------------------- /qlib/contrib/report/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | This module is designed to analysis data 6 | 7 | """ 8 | -------------------------------------------------------------------------------- /examples/benchmarks/ADD/README.md: -------------------------------------------------------------------------------- 1 | # ADD 2 | * Paper: [ADD: Augmented Disentanglement Distillation Framework for Improving Stock Trend Forecasting](https://arxiv.org/abs/2012.06289). 3 | 4 | -------------------------------------------------------------------------------- /qlib/data/storage/arctic_storage/__init__.py: -------------------------------------------------------------------------------- 1 | from .instruments import ArcticInstrumentStorage 2 | from .feature import ArcticFeatureStorage 3 | from .calendar import ArcticCalendarStorage 4 | -------------------------------------------------------------------------------- /scripts/data_collector/yahoo/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | requests 4 | numpy 5 | pandas 6 | tqdm 7 | lxml 8 | yahooquery 9 | joblib 10 | beautifulsoup4 11 | bs4 12 | soupsieve -------------------------------------------------------------------------------- /tests/dependency_tests/README.md: -------------------------------------------------------------------------------- 1 | Some implementations of Qlib depend on some assumptions of its dependencies. 2 | 3 | So some tests are requried to ensure that these assumptions are valid. 4 | -------------------------------------------------------------------------------- /qlib/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import warnings 5 | 6 | from .base import Model 7 | 8 | __all__ = ["Model", "warnings"] 9 | -------------------------------------------------------------------------------- /qlib/rl/strategy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | from .single_order import SingleOrderStrategy 4 | 5 | __all__ = ["SingleOrderStrategy"] 6 | -------------------------------------------------------------------------------- /scripts/data_collector/baostock_5min/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | requests 4 | numpy 5 | pandas 6 | tqdm 7 | lxml 8 | yahooquery 9 | joblib 10 | beautifulsoup4 11 | bs4 12 | soupsieve 13 | baostock -------------------------------------------------------------------------------- /examples/benchmarks/GRU/README.md: -------------------------------------------------------------------------------- 1 | # Gated Recurrent Unit (GRU) 2 | * Paper: [Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation](https://aclanthology.org/D14-1179.pdf). 3 | -------------------------------------------------------------------------------- /examples/benchmarks/LSTM/README.md: -------------------------------------------------------------------------------- 1 | # Long Short-Term Memory (LSTM) 2 | * Paper: [Long Short-Term Memory](https://direct.mit.edu/neco/article-abstract/9/8/1735/6109/Long-Short-Term-Memory?redirectedFrom=fulltext). 3 | -------------------------------------------------------------------------------- /qlib/cli/data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import fire 5 | 6 | from qlib.tests.data import GetData 7 | 8 | if __name__ == "__main__": 9 | fire.Fire(GetData) 10 | -------------------------------------------------------------------------------- /qlib/contrib/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | from .record_temp import MultiSegRecord, SignalMseRecord 4 | 5 | __all__ = ["MultiSegRecord", "SignalMseRecord"] 6 | -------------------------------------------------------------------------------- /qlib/model/meta/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .dataset import MetaTaskDataset 5 | from .task import MetaTask 6 | 7 | __all__ = ["MetaTask", "MetaTaskDataset"] 8 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | slow: marks tests as slow (deselect with '-m "not slow"') 4 | filterwarnings = 5 | ignore:.*rng.randint:DeprecationWarning 6 | ignore:.*Casting input x to numpy array:UserWarning 7 | -------------------------------------------------------------------------------- /scripts/get_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import fire 5 | 6 | from qlib.tests.data import GetData 7 | 8 | if __name__ == "__main__": 9 | fire.Fire(GetData) 10 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [TYPECHECK] 2 | # https://stackoverflow.com/a/53572939 3 | # List of members which are set dynamically and missed by Pylint inference 4 | # system, and so shouldn't trigger E1101 when accessed. 5 | generated-members=numpy.*, torch.* 6 | -------------------------------------------------------------------------------- /examples/benchmarks/TabNet/README.md: -------------------------------------------------------------------------------- 1 | # TabNet 2 | * Code: [https://github.com/dreamquark-ai/tabnet](https://github.com/dreamquark-ai/tabnet) 3 | * Paper: [TabNet: Attentive Interpretable Tabular Learning](https://arxiv.org/pdf/1908.07442.pdf). 4 | -------------------------------------------------------------------------------- /qlib/contrib/meta/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .data_selection import MetaDatasetDS, MetaModelDS, MetaTaskDS 5 | 6 | __all__ = ["MetaTaskDS", "MetaDatasetDS", "MetaModelDS"] 7 | -------------------------------------------------------------------------------- /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | test_patterns = ["tests/test_*.py"] 4 | 5 | exclude_patterns = ["examples/**"] 6 | 7 | [[analyzers]] 8 | name = "python" 9 | enabled = true 10 | 11 | [analyzers.meta] 12 | runtime_version = "3.x.x" 13 | -------------------------------------------------------------------------------- /examples/benchmarks/IGMTF/README.md: -------------------------------------------------------------------------------- 1 | # IGMTF 2 | * Code: [https://github.com/Wentao-Xu/IGMTF](https://github.com/Wentao-Xu/IGMTF) 3 | * Paper: [IGMTF: An Instance-wise Graph-based Framework for 4 | Multivariate Time Series Forecasting](https://arxiv.org/abs/2109.06489). -------------------------------------------------------------------------------- /examples/benchmarks/TCN/README.md: -------------------------------------------------------------------------------- 1 | # TCN 2 | * Code: [https://github.com/locuslab/TCN](https://github.com/locuslab/TCN) 3 | * Paper: [An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling](https://arxiv.org/abs/1803.01271). 4 | 5 | -------------------------------------------------------------------------------- /qlib/contrib/meta/data_selection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .dataset import MetaDatasetDS, MetaTaskDS 5 | from .model import MetaModelDS 6 | 7 | __all__ = ["MetaDatasetDS", "MetaTaskDS", "MetaModelDS"] 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F4D6 Documentation" 3 | about: Report an issue related to documentation 4 | 5 | --- 6 | 7 | ## 📖 Documentation 8 | 9 | 10 | -------------------------------------------------------------------------------- /examples/benchmarks/XGBoost/README.md: -------------------------------------------------------------------------------- 1 | # XGBoost 2 | * Code: [https://github.com/dmlc/xgboost](https://github.com/dmlc/xgboost) 3 | * Paper: XGBoost: A Scalable Tree Boosting System. [https://dl.acm.org/doi/pdf/10.1145/2939672.2939785](https://dl.acm.org/doi/pdf/10.1145/2939672.2939785). -------------------------------------------------------------------------------- /examples/benchmarks/Transformer/README.md: -------------------------------------------------------------------------------- 1 | # Transformer 2 | * Code: [https://github.com/tensorflow/tensor2tensor](https://github.com/tensorflow/tensor2tensor) 3 | * Paper: [Attention is All you Need](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf). 4 | -------------------------------------------------------------------------------- /qlib/rl/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """Common utilities to handle ad-hoc-styled data. 5 | 6 | Most of these snippets comes from research project (paper code). 7 | Please take caution when using them in production. 8 | """ 9 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | """Ignore RL tests on non-linux platform.""" 5 | collect_ignore = [] 6 | 7 | if sys.platform != "linux": 8 | for root, dirs, files in os.walk("rl"): 9 | for file in files: 10 | collect_ignore.append(os.path.join(root, file)) 11 | -------------------------------------------------------------------------------- /qlib/data/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .storage import CalendarStorage, CalVT, FeatureStorage, InstKT, InstrumentStorage, InstVT 5 | 6 | __all__ = ["CalendarStorage", "InstrumentStorage", "FeatureStorage", "CalVT", "InstVT", "InstKT"] 7 | -------------------------------------------------------------------------------- /docs/_static/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | git clone https://github.com/microsoft/qlib.git 3 | cd qlib 4 | ls 5 | pip install pyqlib 6 | # or 7 | # pip install numpy 8 | # pip install --upgrade cython 9 | # python setup.py install 10 | cd examples 11 | ls 12 | qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml -------------------------------------------------------------------------------- /examples/benchmarks/ADARNN/README.md: -------------------------------------------------------------------------------- 1 | # AdaRNN 2 | * Code: [https://github.com/jindongwang/transferlearning/tree/master/code/deep/adarnn](https://github.com/jindongwang/transferlearning/tree/master/code/deep/adarnn) 3 | * Paper: [AdaRNN: Adaptive Learning and Forecasting for Time Series](https://arxiv.org/pdf/2108.04443.pdf). 4 | 5 | -------------------------------------------------------------------------------- /examples/benchmarks/HIST/README.md: -------------------------------------------------------------------------------- 1 | # HIST 2 | * Code: [https://github.com/Wentao-Xu/HIST](https://github.com/Wentao-Xu/HIST) 3 | * Paper: [HIST: A Graph-based Framework for Stock Trend Forecasting via Mining Concept-Oriented Shared InformationAdaRNN: Adaptive Learning and Forecasting for Time Series](https://arxiv.org/abs/2110.13716). -------------------------------------------------------------------------------- /qlib/contrib/report/analysis_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .analysis_model_performance import model_performance_graph 5 | from .factor_model_performance import factor_performance_graph 6 | 7 | __all__ = ["model_performance_graph", "factor_performance_graph"] 8 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 23.7.0 4 | hooks: 5 | - id: black 6 | args: ["qlib", "-l 120"] 7 | 8 | - repo: https://github.com/PyCQA/flake8 9 | rev: 4.0.1 10 | hooks: 11 | - id: flake8 12 | args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"] 13 | -------------------------------------------------------------------------------- /qlib/contrib/strategy/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .base import BaseOptimizer 5 | from .enhanced_indexing import EnhancedIndexingOptimizer 6 | from .optimizer import PortfolioOptimizer 7 | 8 | __all__ = ["BaseOptimizer", "PortfolioOptimizer", "EnhancedIndexingOptimizer"] 9 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Requirements 2 | 3 | Here is the minimal hardware requirements to run the `workflow_by_code` example. 4 | - Memory: 16G 5 | - Free Disk: 5G 6 | 7 | 8 | # NOTE 9 | The results will slightly vary on different OSs(the variance of annualized return will be less than 2%). 10 | The evaluation results in the `README.md` page are from Linux OS. 11 | -------------------------------------------------------------------------------- /qlib/contrib/rolling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | The difference between me and the scripts in examples/benchmarks/benchmarks_dynamic 5 | - This module only focus provide a general rolling implementation. 6 | Anything specific that benchmark is placed in examples/benchmarks/benchmarks_dynamic 7 | """ 8 | -------------------------------------------------------------------------------- /examples/benchmarks/CatBoost/README.md: -------------------------------------------------------------------------------- 1 | # CatBoost 2 | * Code: [https://github.com/catboost/catboost](https://github.com/catboost/catboost) 3 | * Paper: CatBoost: unbiased boosting with categorical features. [https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf](https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf). -------------------------------------------------------------------------------- /qlib/rl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .interpreter import ActionInterpreter, Interpreter, StateInterpreter 5 | from .reward import Reward, RewardCombination 6 | from .simulator import Simulator 7 | 8 | __all__ = ["Interpreter", "StateInterpreter", "ActionInterpreter", "Reward", "RewardCombination", "Simulator"] 9 | -------------------------------------------------------------------------------- /docs/component/rl/toctree.rst: -------------------------------------------------------------------------------- 1 | .. _rl: 2 | 3 | ======================================================================== 4 | Reinforcement Learning in Quantitative Trading 5 | ======================================================================== 6 | 7 | .. toctree:: 8 | Guidance 9 | Overall 10 | Quick Start 11 | Framework 12 | -------------------------------------------------------------------------------- /qlib/contrib/strategy/optimizer/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import abc 5 | 6 | 7 | class BaseOptimizer(abc.ABC): 8 | """Construct portfolio with a optimization related method""" 9 | 10 | @abc.abstractmethod 11 | def __call__(self, *args, **kwargs) -> object: 12 | """Generate a optimized portfolio allocation""" 13 | -------------------------------------------------------------------------------- /qlib/rl/seed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """Defines a set of initial state definitions and state-set definitions. 5 | 6 | With single-asset order execution only, the only seed is order. 7 | """ 8 | 9 | from typing import TypeVar 10 | 11 | InitialStateType = TypeVar("InitialStateType") 12 | """Type of data that creates the simulator.""" 13 | -------------------------------------------------------------------------------- /qlib/model/riskmodel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .base import RiskModel 5 | from .poet import POETCovEstimator 6 | from .shrink import ShrinkCovEstimator 7 | from .structured import StructuredCovEstimator 8 | 9 | __all__ = [ 10 | "RiskModel", 11 | "POETCovEstimator", 12 | "ShrinkCovEstimator", 13 | "StructuredCovEstimator", 14 | ] 15 | -------------------------------------------------------------------------------- /qlib/contrib/report/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | GRAPH_NAME_LIST = [ 5 | "analysis_position.report_graph", 6 | "analysis_position.score_ic_graph", 7 | "analysis_position.cumulative_return_graph", 8 | "analysis_position.risk_analysis_graph", 9 | "analysis_position.rank_label_graph", 10 | "analysis_model.model_performance_graph", 11 | ] 12 | -------------------------------------------------------------------------------- /.mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | exclude = (?x)( 3 | ^qlib/backtest/high_performance_ds\.py$ 4 | | ^qlib/contrib 5 | | ^qlib/data 6 | | ^qlib/model 7 | | ^qlib/strategy 8 | | ^qlib/tests 9 | | ^qlib/utils 10 | | ^qlib/workflow 11 | | ^qlib/config\.py$ 12 | | ^qlib/log\.py$ 13 | | ^qlib/__init__\.py$ 14 | ) 15 | ignore_missing_imports = true 16 | disallow_incomplete_defs = true 17 | follow_imports = skip 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓Questions & Help" 3 | about: Have some questions? We can offer help. 4 | labels: question 5 | 6 | --- 7 | 8 | ## ❓ Questions and Help 9 | 10 | We sincerely suggest you to carefully read the [documentation](http://qlib.readthedocs.io/) of our library as well as the official [paper](https://arxiv.org/abs/2009.11189). After that, if you still feel puzzled, please describe the question clearly under this issue. -------------------------------------------------------------------------------- /examples/benchmarks/KRNN/README.md: -------------------------------------------------------------------------------- 1 | # KRNN 2 | * Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py](https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py) 3 | 4 | 5 | # Introductions about the settings/configs. 6 | * Torch_geometric is used in the original model in FOST, but we didn't use it. 7 | * make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.__version__==1.12.1 8 | 9 | -------------------------------------------------------------------------------- /examples/benchmarks/ALSTM/README.md: -------------------------------------------------------------------------------- 1 | # ALSTM 2 | 3 | - ALSTM contains a temporal attentive aggregation layer based on normal LSTM. 4 | 5 | - Paper: A dual-stage attention-based recurrent neural network for time series prediction. 6 | 7 | [https://www.ijcai.org/Proceedings/2017/0366.pdf](https://www.ijcai.org/Proceedings/2017/0366.pdf) 8 | 9 | - NOTE: Current version of implementation is just a simplified version of ALSTM. It is an LSTM with attention. 10 | -------------------------------------------------------------------------------- /examples/benchmarks/Sandwich/README.md: -------------------------------------------------------------------------------- 1 | # Sandwich 2 | * Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py](https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py) 3 | 4 | 5 | # Introductions about the settings/configs. 6 | * Torch_geometric is used in the original model in FOST, but we didn't use it. 7 | make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.version==1.12.1 8 | 9 | -------------------------------------------------------------------------------- /qlib/contrib/report/analysis_position/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .cumulative_return import cumulative_return_graph 5 | from .rank_label import rank_label_graph 6 | from .report import report_graph 7 | from .risk_analysis import risk_analysis_graph 8 | from .score_ic import score_ic_graph 9 | 10 | __all__ = ["cumulative_return_graph", "score_ic_graph", "report_graph", "rank_label_graph", "risk_analysis_graph"] 11 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /examples/benchmarks/SFM/README.md: -------------------------------------------------------------------------------- 1 | # State-Frequency-Memory 2 | - State Frequency Memory (SFM) is a novel recurrent network that uses Discrete Fourier Transform to decompose the hidden states of memory cells and capture the multi-frequency trading patterns from past market data to make stock price predictions. 3 | - Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf) -------------------------------------------------------------------------------- /examples/benchmarks/GATs/README.md: -------------------------------------------------------------------------------- 1 | # GATs 2 | * Graph Attention Networks(GATs) leverage masked self-attentional layers on graph-structured data. The nodes in stacked layers have different weights and they are able to attend over their 3 | neighborhoods’ features, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront. 4 | * This code used in Qlib is implemented with PyTorch by ourselves. 5 | * Paper: Graph Attention Networks https://arxiv.org/pdf/1710.10903.pdf -------------------------------------------------------------------------------- /qlib/contrib/tuner/space.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # pylint: skip-file 5 | # flake8: noqa 6 | 7 | from hyperopt import hp 8 | 9 | TopkAmountStrategySpace = { 10 | "topk": hp.choice("topk", [30, 35, 40]), 11 | "buffer_margin": hp.choice("buffer_margin", [200, 250, 300]), 12 | } 13 | 14 | QLibDataLabelSpace = { 15 | "labels": hp.choice( 16 | "labels", 17 | [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]], 18 | ) 19 | } 20 | -------------------------------------------------------------------------------- /qlib/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | # Base exception class 6 | class QlibException(Exception): 7 | pass 8 | 9 | 10 | class RecorderInitializationError(QlibException): 11 | """Error type for re-initialization when starting an experiment""" 12 | 13 | 14 | class LoadObjectError(QlibException): 15 | """Error type for Recorder when can not load object""" 16 | 17 | 18 | class ExpAlreadyExistError(Exception): 19 | """Experiment already exists""" 20 | -------------------------------------------------------------------------------- /examples/rl_order_execution/scripts/merge_orders.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | import pandas as pd 5 | from tqdm import tqdm 6 | 7 | for tag in ["test", "valid"]: 8 | files = os.listdir(os.path.join("data/orders/", tag)) 9 | dfs = [] 10 | for f in tqdm(files): 11 | df = pickle.load(open(os.path.join("data/orders/", tag, f), "rb")) 12 | df = df.drop(["$close0"], axis=1) 13 | dfs.append(df) 14 | 15 | total_df = pd.concat(dfs) 16 | pickle.dump(total_df, open(os.path.join("data", "orders", f"{tag}_orders.pkl"), "wb")) 17 | -------------------------------------------------------------------------------- /qlib/rl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """Train, test, inference utilities.""" 5 | 6 | from .api import backtest, train 7 | from .callbacks import Checkpoint, EarlyStopping, MetricsWriter 8 | from .trainer import Trainer 9 | from .vessel import TrainingVessel, TrainingVesselBase 10 | 11 | __all__ = [ 12 | "Trainer", 13 | "TrainingVessel", 14 | "TrainingVesselBase", 15 | "Checkpoint", 16 | "EarlyStopping", 17 | "MetricsWriter", 18 | "train", 19 | "backtest", 20 | ] 21 | -------------------------------------------------------------------------------- /scripts/data_collector/contrib/future_trading_date_collector/README.md: -------------------------------------------------------------------------------- 1 | # Get future trading days 2 | 3 | > `D.calendar(future=True)` will be used 4 | 5 | ## Requirements 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Collector Data 12 | 13 | ```bash 14 | # parse instruments, using in qlib/instruments. 15 | python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day 16 | ``` 17 | 18 | ## Parameters 19 | 20 | - qlib_dir: qlib data directory 21 | - freq: value from [`day`, `1min`], default `day` 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /qlib/contrib/strategy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | from .cost_control import SoftTopkStrategy 6 | from .rule_strategy import SBBStrategyBase, SBBStrategyEMA, TWAPStrategy 7 | from .signal_strategy import EnhancedIndexingStrategy, TopkDropoutStrategy, WeightStrategyBase 8 | 9 | __all__ = [ 10 | "TopkDropoutStrategy", 11 | "WeightStrategyBase", 12 | "EnhancedIndexingStrategy", 13 | "TWAPStrategy", 14 | "SBBStrategyBase", 15 | "SBBStrategyEMA", 16 | "SoftTopkStrategy", 17 | ] 18 | -------------------------------------------------------------------------------- /examples/hyperparameter/LightGBM/Readme.md: -------------------------------------------------------------------------------- 1 | # LightGBM hyperparameter 2 | 3 | ## Alpha158 4 | First terminal 5 | ``` 6 | optuna create-study --study LGBM_158 --storage sqlite:///db.sqlite3 7 | optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 8 | ``` 9 | Second terminal 10 | ``` 11 | python hyperparameter_158.py 12 | ``` 13 | 14 | ## Alpha360 15 | First terminal 16 | ``` 17 | optuna create-study --study LGBM_360 --storage sqlite:///db.sqlite3 18 | optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 19 | ``` 20 | Second terminal 21 | ``` 22 | python hyperparameter_360.py 23 | ``` 24 | -------------------------------------------------------------------------------- /qlib/constant.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # REGION CONST 5 | from typing import TypeVar 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | REG_CN = "cn" 11 | REG_US = "us" 12 | REG_TW = "tw" 13 | 14 | # Epsilon for avoiding division by zero. 15 | EPS = 1e-12 16 | 17 | # Infinity in integer 18 | INF = int(1e18) 19 | ONE_DAY = pd.Timedelta("1day") 20 | ONE_MIN = pd.Timedelta("1min") 21 | EPS_T = pd.Timedelta("1s") # use 1 second to exclude the right interval point 22 | float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) 23 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/README.md: -------------------------------------------------------------------------------- 1 | # LightGBM 2 | * Code: [https://github.com/microsoft/LightGBM](https://github.com/microsoft/LightGBM) 3 | * Paper: LightGBM: A Highly Efficient Gradient Boosting 4 | Decision Tree. [https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf](https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf). 5 | 6 | 7 | # Introductions about the settings/configs. 8 | 9 | `workflow_config_lightgbm_multi_freq.yaml` 10 | - It uses data sources of different frequencies (i.e. multiple frequencies) for daily prediction. 11 | -------------------------------------------------------------------------------- /qlib/workflow/task/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | Task related workflow is implemented in this folder 5 | 6 | A typical task workflow 7 | 8 | | Step | Description | 9 | |-----------------------+------------------------------------------------| 10 | | TaskGen | Generating tasks. | 11 | | TaskManager(optional) | Manage generated tasks | 12 | | run task | retrieve tasks from TaskManager and run tasks. | 13 | """ 14 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/backtest_twap.yml: -------------------------------------------------------------------------------- 1 | order_file: ./data/orders/test_orders.pkl 2 | start_time: "9:30" 3 | end_time: "14:54" 4 | data_granularity: "5min" 5 | qlib: 6 | provider_uri_5min: ./data/bin/ 7 | exchange: 8 | limit_threshold: null 9 | deal_price: ["$close", "$close"] 10 | volume_threshold: null 11 | strategies: 12 | 1day: 13 | class: TWAPStrategy 14 | kwargs: {} 15 | module_path: qlib.contrib.strategy.rule_strategy 16 | 30min: 17 | class: TWAPStrategy 18 | kwargs: {} 19 | module_path: qlib.contrib.strategy.rule_strategy 20 | concurrency: 16 21 | output_dir: outputs/twap/ 22 | -------------------------------------------------------------------------------- /qlib/rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .data_queue import DataQueue 5 | from .env_wrapper import EnvWrapper, EnvWrapperStatus 6 | from .finite_env import FiniteEnvType, vectorize_env 7 | from .log import ConsoleWriter, CsvWriter, LogBuffer, LogCollector, LogLevel, LogWriter 8 | 9 | __all__ = [ 10 | "LogLevel", 11 | "DataQueue", 12 | "EnvWrapper", 13 | "FiniteEnvType", 14 | "LogCollector", 15 | "LogWriter", 16 | "vectorize_env", 17 | "ConsoleWriter", 18 | "CsvWriter", 19 | "EnvWrapperStatus", 20 | "LogBuffer", 21 | ] 22 | -------------------------------------------------------------------------------- /scripts/data_collector/cn_index/README.md: -------------------------------------------------------------------------------- 1 | # CSI300/CSI100/CSI500 History Companies Collection 2 | 3 | ## Requirements 4 | 5 | ```bash 6 | pip install -r requirements.txt 7 | ``` 8 | 9 | ## Collector Data 10 | 11 | ```bash 12 | # parse instruments, using in qlib/instruments. 13 | python collector.py --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data --method parse_instruments 14 | 15 | # parse new companies 16 | python collector.py --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data --method save_new_companies 17 | 18 | # index_name support: CSI300, CSI100, CSI500 19 | # help 20 | python collector.py --help 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /scripts/data_collector/us_index/README.md: -------------------------------------------------------------------------------- 1 | # NASDAQ100/SP500/SP400/DJIA History Companies Collection 2 | 3 | ## Requirements 4 | 5 | ```bash 6 | pip install -r requirements.txt 7 | ``` 8 | 9 | ## Collector Data 10 | 11 | ```bash 12 | # parse instruments, using in qlib/instruments. 13 | python collector.py --index_name SP500 --qlib_dir ~/.qlib/qlib_data/us_data --method parse_instruments 14 | 15 | # parse new companies 16 | python collector.py --index_name SP500 --qlib_dir ~/.qlib/qlib_data/us_data --method save_new_companies 17 | 18 | # index_name support: SP500, NASDAQ100, DJIA, SP400 19 | # help 20 | python collector.py --help 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.8" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | # Build all formats 19 | formats: all 20 | 21 | # Optionally set the version of Python and requirements required to build your docs 22 | python: 23 | install: 24 | - requirements: docs/requirements.txt 25 | - method: pip 26 | path: . 27 | -------------------------------------------------------------------------------- /qlib/contrib/online/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # flake8: noqa 3 | 4 | ''' 5 | TODO: 6 | 7 | - Online needs that the model have such method 8 | def get_data_with_date(self, date, **kwargs): 9 | """ 10 | Will be called in online module 11 | need to return the data that used to predict the label (score) of stocks at date. 12 | 13 | :param 14 | date: pd.Timestamp 15 | predict date 16 | :return: 17 | data: the input data that used to predict the label (score) of stocks at predict date. 18 | """ 19 | raise NotImplementedError("get_data_with_date for this model is not implemented.") 20 | 21 | ''' 22 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/features_resample_N.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import pandas as pd 5 | 6 | from qlib.data.inst_processor import InstProcessor 7 | from qlib.utils.resam import resam_calendar 8 | 9 | 10 | class ResampleNProcessor(InstProcessor): 11 | def __init__(self, target_frq: str, **kwargs): 12 | self.target_frq = target_frq 13 | 14 | def __call__(self, df: pd.DataFrame, *args, **kwargs): 15 | df.index = pd.to_datetime(df.index) 16 | res_index = resam_calendar(df.index, "1min", self.target_frq) 17 | df = df.resample(self.target_frq).last().reindex(res_index) 18 | return df 19 | -------------------------------------------------------------------------------- /scripts/data_collector/contrib/fill_cn_1min_data/README.md: -------------------------------------------------------------------------------- 1 | # Use 1d data to fill in the missing symbols relative to 1min 2 | 3 | 4 | ## Requirements 5 | 6 | ```bash 7 | pip install -r requirements.txt 8 | ``` 9 | 10 | ## fill 1min data 11 | 12 | ```bash 13 | python fill_cn_1min_data.py --data_1min_dir ~/.qlib/csv_data/cn_data_1min --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data 14 | ``` 15 | 16 | ## Parameters 17 | 18 | - data_1min_dir: csv data 19 | - qlib_data_1d_dir: qlib data directory 20 | - max_workers: `ThreadPoolExecutor(max_workers=max_workers)`, by default *16* 21 | - date_field_name: date field name, by default *date* 22 | - symbol_field_name: symbol field name, by default *symbol* 23 | 24 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/libs/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/baseline/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This is the framework of periodically Rolling Retrain (RR) forecasting models. RR adapts to market dynamics by utilizing the up-to-date data periodically. 4 | 5 | ## Run the Code 6 | Users can try RR by running the following command: 7 | ```bash 8 | python rolling_benchmark.py run 9 | ``` 10 | 11 | The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter. 12 | For example, users can try `LightGBM` forecasting models by running the following command: 13 | ```bash 14 | python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run 15 | 16 | ``` 17 | -------------------------------------------------------------------------------- /qlib/model/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from torch.utils.data import Dataset 5 | 6 | 7 | class ConcatDataset(Dataset): 8 | def __init__(self, *datasets): 9 | self.datasets = datasets 10 | 11 | def __getitem__(self, i): 12 | return tuple(d[i] for d in self.datasets) 13 | 14 | def __len__(self): 15 | return min(len(d) for d in self.datasets) 16 | 17 | 18 | class IndexSampler: 19 | def __init__(self, sampler): 20 | self.sampler = sampler 21 | 22 | def __getitem__(self, i: int): 23 | return self.sampler[i], i 24 | 25 | def __len__(self): 26 | return len(self.sampler) 27 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/data_formatters/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/expt_settings/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /qlib/data/inst_processor.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import json 3 | 4 | import pandas as pd 5 | 6 | 7 | class InstProcessor: 8 | @abc.abstractmethod 9 | def __call__(self, df: pd.DataFrame, instrument, *args, **kwargs): 10 | """ 11 | process the data 12 | 13 | NOTE: **The processor could change the content of `df` inplace !!!!! ** 14 | User should keep a copy of data outside 15 | 16 | Parameters 17 | ---------- 18 | df : pd.DataFrame 19 | The raw_df of handler or result from previous processor. 20 | """ 21 | 22 | def __str__(self): 23 | return f"{self.__class__.__name__}:{json.dumps(self.__dict__, sort_keys=True, default=str)}" 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F31FFeature Request" 3 | about: Request for a new Qlib feature 4 | labels: enhancement 5 | 6 | --- 7 | 8 | ## 🌟 Feature Description 9 | 10 | 11 | ## Motivation 12 | 13 | 1. Application scenario 14 | 2. Related works (Papers, Github repos etc.): 15 | 3. Any other relevant and important information: 16 | 17 | 18 | 19 | ## Alternatives 20 | 21 | 22 | 23 | ## Additional Notes 24 | 25 | -------------------------------------------------------------------------------- /qlib/contrib/rolling/__main__.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from qlib import auto_init 3 | from qlib.contrib.rolling.base import Rolling 4 | from qlib.utils.mod import find_all_classes 5 | 6 | if __name__ == "__main__": 7 | sub_commands = {} 8 | for cls in find_all_classes("qlib.contrib.rolling", Rolling): 9 | sub_commands[cls.__module__.split(".")[-1]] = cls 10 | # The sub_commands will be like 11 | # {'base': , ...} 12 | # So the you can run it with commands like command below 13 | # - `python -m qlib.contrib.rolling base --conf_path run` 14 | # - base can be replace with other module names 15 | auto_init() 16 | fire.Fire(sub_commands) 17 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python3 -msphinx 7 | SPHINXPROJ = Quantlab 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | pip install -r requirements.txt 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/features_sample.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pandas as pd 4 | 5 | from qlib.data.inst_processor import InstProcessor 6 | 7 | 8 | class Resample1minProcessor(InstProcessor): 9 | """This processor tries to resample the data. It will reasmple the data from 1min freq to day freq by selecting a specific miniute""" 10 | 11 | def __init__(self, hour: int, minute: int, **kwargs): 12 | self.hour = hour 13 | self.minute = minute 14 | 15 | def __call__(self, df: pd.DataFrame, *args, **kwargs): 16 | df.index = pd.to_datetime(df.index) 17 | df = df.loc[df.index.time == datetime.time(self.hour, self.minute)] 18 | df.index = df.index.normalize() 19 | return df 20 | -------------------------------------------------------------------------------- /scripts/data_collector/br_index/requirements.txt: -------------------------------------------------------------------------------- 1 | async-generator==1.10 2 | attrs==21.4.0 3 | certifi==2022.12.7 4 | cffi==1.15.0 5 | charset-normalizer==2.0.12 6 | cryptography==36.0.1 7 | fire==0.4.0 8 | h11==0.13.0 9 | idna==3.3 10 | loguru==0.6.0 11 | lxml==4.9.1 12 | multitasking==0.0.10 13 | numpy==1.22.2 14 | outcome==1.1.0 15 | pandas==1.4.1 16 | pycoingecko==2.2.0 17 | pycparser==2.21 18 | pyOpenSSL==22.0.0 19 | PySocks==1.7.1 20 | python-dateutil==2.8.2 21 | pytz==2021.3 22 | requests==2.27.1 23 | requests-futures==1.0.0 24 | six==1.16.0 25 | sniffio==1.2.0 26 | sortedcontainers==2.4.0 27 | termcolor==1.1.0 28 | tqdm==4.63.0 29 | trio==0.20.0 30 | trio-websocket==0.9.2 31 | urllib3==1.26.19 32 | wget==3.2 33 | wsproto==1.1.0 34 | yahooquery==2.2.15 35 | -------------------------------------------------------------------------------- /qlib/data/storage/arctic_storage/arctic_storage_structure.md: -------------------------------------------------------------------------------- 1 | 2 | ## Libraries 3 | 4 | 1. Metadatas contract specifications of instrument: 5 | - convert_meta 6 | - stock_meta 7 | - index_meta 8 | - future_meta 9 | 10 | 11 | 2. OHLC price data 12 | - bar_data 13 | 14 | 15 | 3. Auxilliary data for convert bonds 16 | - convert_convert_price 17 | - convert_cash_flow 18 | - convert_coupon 19 | - convert_derived 20 | - convert_indicator 21 | - convert_high_freq_factor 22 | - convert_stoploss_return 23 | 24 | 25 | 4. Auxilliary data for stock 26 | - ex_factor 27 | - split 28 | - limit_up_down 29 | 30 | 31 | 5. index related data 32 | - vn_lib_metadata -> index_weights 33 | - market_meta -> index_component 34 | -------------------------------------------------------------------------------- /examples/rolling_process_data/README.md: -------------------------------------------------------------------------------- 1 | # Rolling Process Data 2 | 3 | This workflow is an example for `Rolling Process Data`. 4 | 5 | ## Background 6 | 7 | When rolling train the models, data also needs to be generated in the different rolling windows. When the rolling window moves, the training data will change, and the processor's learnable state (such as standard deviation, mean, etc.) will also change. 8 | 9 | In order to avoid regenerating data, this example uses the `DataHandler-based DataLoader` to load the raw features that are not related to the rolling window, and then used Processors to generate processed-features related to the rolling window. 10 | 11 | 12 | ## Run the Code 13 | 14 | Run the example by running the following command: 15 | ```bash 16 | python workflow.py rolling_process 17 | ``` -------------------------------------------------------------------------------- /tests/test_contrib_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | from qlib.contrib.model import all_model_classes 7 | 8 | 9 | class TestAllFlow(unittest.TestCase): 10 | def test_0_initialize(self): 11 | num = 0 12 | for model_class in all_model_classes: 13 | if model_class is not None: 14 | model = model_class() 15 | num += 1 16 | print("There are {:}/{:} valid models in total.".format(num, len(all_model_classes))) 17 | 18 | 19 | def suite(): 20 | _suite = unittest.TestSuite() 21 | _suite.addTest(TestAllFlow("test_0_initialize")) 22 | return _suite 23 | 24 | 25 | if __name__ == "__main__": 26 | runner = unittest.TextTestRunner() 27 | runner.run(suite()) 28 | -------------------------------------------------------------------------------- /qlib/data/inst_info.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from ..utils.serial import Serializable 4 | 5 | 6 | class BaseInstrumentInfo(Serializable): 7 | def __init__(self, **kwargs): 8 | pass 9 | 10 | 11 | class ConvertInstrumentInfo(BaseInstrumentInfo): 12 | def __init__( 13 | self, 14 | cash_flow_schedule, 15 | coupon_schedule, 16 | maturity_date, 17 | call_date=datetime(2200, 1, 1), 18 | principle=100, 19 | stop_trading_date=datetime(2200, 1, 1), 20 | ): 21 | self.cash_flow_schedule = cash_flow_schedule 22 | self.maturity_date = maturity_date 23 | self.call_date = call_date 24 | self.coupon_schedule = coupon_schedule 25 | self.principle = principle 26 | self.stop_trading_date = stop_trading_date 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # https://github.com/github/gitignore/blob/master/Python.gitignore 2 | __pycache__/ 3 | 4 | *.pyc 5 | *.pyd 6 | *.so 7 | *.ipynb 8 | .ipynb_checkpoints 9 | _build 10 | build/ 11 | dist/ 12 | 13 | *.log 14 | *.pkl 15 | *.hd5 16 | *.csv 17 | 18 | .env 19 | .vim 20 | .nvimrc 21 | .vscode 22 | 23 | qlib/VERSION.txt 24 | qlib/data/_libs/expanding.cpp 25 | qlib/data/_libs/rolling.cpp 26 | examples/estimator/estimator_example/ 27 | examples/rl/data/ 28 | examples/rl/checkpoints/ 29 | examples/rl/outputs/ 30 | examples/rl_order_execution/data/ 31 | examples/rl_order_execution/outputs/ 32 | 33 | *.egg-info/ 34 | 35 | # test related 36 | test-output.xml 37 | .output 38 | .data 39 | 40 | # special software 41 | mlruns/ 42 | 43 | tags 44 | 45 | .pytest_cache/ 46 | .mypy_cache/ 47 | .vscode/ 48 | 49 | *.swp 50 | 51 | ./pretrain 52 | .idea/ 53 | .aider* 54 | -------------------------------------------------------------------------------- /examples/benchmarks/DoubleEnsemble/README.md: -------------------------------------------------------------------------------- 1 | # DoubleEnsemble 2 | * DoubleEnsemble is an ensemble framework leveraging learning trajectory based sample reweighting and shuffling based feature selection, to solve both the low signal-to-noise ratio and increasing number of features problems. They identify the key samples based on the training dynamics on each sample and elicit key features based on the ablation impact of each feature via shuffling. The model is applicable to a wide range of base models, capable of extracting complex patterns, while mitigating the overfitting and instability issues for financial market prediction. 3 | * This code used in Qlib is implemented by ourselves. 4 | * Paper: DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis [https://arxiv.org/pdf/2010.01265.pdf](https://arxiv.org/pdf/2010.01265.pdf). -------------------------------------------------------------------------------- /qlib/data/dataset/weight.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | class Reweighter: 6 | def __init__(self, *args, **kwargs): 7 | """ 8 | To initialize the Reweighter, users should provide specific methods to let reweighter do the reweighting (such as sample-wise, rule-based). 9 | """ 10 | raise NotImplementedError() 11 | 12 | def reweight(self, data: object) -> object: 13 | """ 14 | Get weights for data 15 | 16 | Parameters 17 | ---------- 18 | data : object 19 | The input data. 20 | The first dimension is the index of samples 21 | 22 | Returns 23 | ------- 24 | object: 25 | the weights info for the data 26 | """ 27 | raise NotImplementedError(f"This type of input is not supported") 28 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/README.md: -------------------------------------------------------------------------------- 1 | # Temporal Fusion Transformers Benchmark 2 | ## Source 3 | **Reference**: Lim, Bryan, et al. "Temporal fusion transformers for interpretable multi-horizon time series forecasting." arXiv preprint arXiv:1912.09363 (2019). 4 | 5 | **GitHub**: https://github.com/google-research/google-research/tree/master/tft 6 | 7 | ## Run the Workflow 8 | Users can follow the ``workflow_by_code_tft.py`` to run the benchmark. 9 | 10 | ### Notes 11 | 1. Please be **aware** that this script can only support `Python 3.6 - 3.7`. 12 | 2. If the CUDA version on your machine is not 10.0, please remember to run the following commands `conda install anaconda cudatoolkit=10.0` and `conda install cudnn` on your machine. 13 | 3. The model must run in GPU, or an error will be raised. 14 | 4. New datasets should be registered in ``data_formatters``, for detail please visit the source. 15 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$RESOLVED_VERSION 🌈' 2 | tag-template: 'v$RESOLVED_VERSION' 3 | categories: 4 | - title: '🌟 Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: '🐛 Bug Fixes' 9 | labels: 10 | - 'fix' 11 | - 'bugfix' 12 | - 'bug' 13 | - title: '📚 Documentation' 14 | label: 15 | - 'doc' 16 | - 'documentation' 17 | - title: '🧹 Maintenance' 18 | label: 19 | - 'maintenance' 20 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 21 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. 22 | version-resolver: 23 | major: 24 | labels: 25 | - 'major' 26 | minor: 27 | labels: 28 | - 'minor' 29 | patch: 30 | labels: 31 | - 'patch' 32 | default: patch 33 | template: | 34 | ## Changes 35 | 36 | $CHANGES 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:latest 2 | 3 | WORKDIR /qlib 4 | 5 | COPY . . 6 | 7 | RUN apt-get update && \ 8 | apt-get install -y build-essential 9 | 10 | RUN conda create --name qlib_env python=3.8 -y 11 | RUN echo "conda activate qlib_env" >> ~/.bashrc 12 | ENV PATH /opt/conda/envs/qlib_env/bin:$PATH 13 | 14 | RUN python -m pip install --upgrade pip 15 | 16 | RUN python -m pip install numpy==1.23.5 17 | RUN python -m pip install pandas==1.5.3 18 | RUN python -m pip install importlib-metadata==5.2.0 19 | RUN python -m pip install "cloudpickle<3" 20 | RUN python -m pip install scikit-learn==1.3.2 21 | 22 | RUN python -m pip install cython packaging tables matplotlib statsmodels 23 | RUN python -m pip install pybind11 cvxpy 24 | 25 | ARG IS_STABLE="yes" 26 | 27 | RUN if [ "$IS_STABLE" = "yes" ]; then \ 28 | python -m pip install pyqlib; \ 29 | else \ 30 | python setup.py install; \ 31 | fi 32 | -------------------------------------------------------------------------------- /examples/benchmarks/TRA/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # we used random seed(1 1000 2000 3000 4000 5000) in our experiments 4 | 5 | # Directly run from Qlib command `qrun` 6 | qrun configs/config_alstm.yaml 7 | 8 | qrun configs/config_transformer.yaml 9 | 10 | qrun configs/config_transformer_tra_init.yaml 11 | qrun configs/config_transformer_tra.yaml 12 | 13 | qrun configs/config_alstm_tra_init.yaml 14 | qrun configs/config_alstm_tra.yaml 15 | 16 | 17 | # Or setting different parameters with example.py 18 | python example.py --config_file configs/config_alstm.yaml 19 | 20 | python example.py --config_file configs/config_transformer.yaml 21 | 22 | python example.py --config_file configs/config_transformer_tra_init.yaml 23 | python example.py --config_file configs/config_transformer_tra.yaml 24 | 25 | python example.py --config_file configs/config_alstm_tra_init.yaml 26 | python example.py --config_file configs/config_alstm_tra.yaml 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /qlib/rl/contrib/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from __future__ import annotations 5 | 6 | from pathlib import Path 7 | 8 | import pandas as pd 9 | 10 | 11 | def read_order_file(order_file: Path | pd.DataFrame) -> pd.DataFrame: 12 | if isinstance(order_file, pd.DataFrame): 13 | return order_file 14 | 15 | order_file = Path(order_file) 16 | 17 | if order_file.suffix == ".pkl": 18 | order_df = pd.read_pickle(order_file).reset_index() 19 | elif order_file.suffix == ".csv": 20 | order_df = pd.read_csv(order_file) 21 | else: 22 | raise TypeError(f"Unsupported order file type: {order_file}") 23 | 24 | if "date" in order_df.columns: 25 | # legacy dataframe columns 26 | order_df = order_df.rename(columns={"date": "datetime", "order_type": "direction"}) 27 | order_df["datetime"] = order_df["datetime"].astype(str) 28 | 29 | return order_df 30 | -------------------------------------------------------------------------------- /qlib/contrib/tuner/launcher.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # pylint: skip-file 5 | # flake8: noqa 6 | 7 | # coding=utf-8 8 | 9 | import argparse 10 | import importlib 11 | import os 12 | 13 | import yaml 14 | 15 | from .config import TunerConfigManager 16 | 17 | args_parser = argparse.ArgumentParser(prog="tuner") 18 | args_parser.add_argument( 19 | "-c", 20 | "--config_path", 21 | required=True, 22 | type=str, 23 | help="config path indicates where to load yaml config.", 24 | ) 25 | 26 | args = args_parser.parse_args() 27 | 28 | TUNER_CONFIG_MANAGER = TunerConfigManager(args.config_path) 29 | 30 | 31 | def run(): 32 | # 1. Get pipeline class. 33 | tuner_pipeline_class = getattr(importlib.import_module(".pipeline", package="qlib.contrib.tuner"), "Pipeline") 34 | # 2. Init tuner pipeline. 35 | tuner_pipeline = tuner_pipeline_class(TUNER_CONFIG_MANAGER) 36 | # 3. Begin to tune 37 | tuner_pipeline.run() 38 | -------------------------------------------------------------------------------- /tests/test_workflow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import shutil 4 | import unittest 5 | from pathlib import Path 6 | 7 | from qlib.tests import TestAutoData 8 | from qlib.workflow import R 9 | 10 | 11 | class WorkflowTest(TestAutoData): 12 | # Creating the directory manually doesn't work with mlflow, 13 | # so we add a subfolder named .trash when we create the directory. 14 | TMP_PATH = Path("./.mlruns_tmp/.trash") 15 | 16 | def tearDown(self) -> None: 17 | if self.TMP_PATH.exists(): 18 | shutil.rmtree(self.TMP_PATH) 19 | 20 | def test_get_local_dir(self): 21 | """ """ 22 | self.TMP_PATH.mkdir(parents=True, exist_ok=True) 23 | 24 | with R.start(uri=str(self.TMP_PATH)): 25 | pass 26 | 27 | with R.uri_context(uri=str(self.TMP_PATH)): 28 | resume_recorder = R.get_recorder() 29 | resume_recorder.get_local_dir() 30 | 31 | 32 | if __name__ == "__main__": 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug Report" 3 | about: Submit a bug report to help us improve Qlib 4 | labels: bug 5 | 6 | --- 7 | 8 | ## 🐛 Bug Description 9 | 10 | 11 | 12 | ## To Reproduce 13 | 14 | Steps to reproduce the behavior: 15 | 16 | 1. 17 | 1. 18 | 1. 19 | 20 | 21 | ## Expected Behavior 22 | 23 | 24 | 25 | ## Screenshot 26 | 27 | 28 | 29 | ## Environment 30 | 31 | **Note**: User could run `cd scripts && python collect_info.py all` under project directory to get system information 32 | and paste them here directly. 33 | 34 | - Qlib version: 35 | - Python version: 36 | - OS (`Windows`, `Linux`, `MacOS`): 37 | - Commit number (optional, please provide it if you are using the dev version): 38 | 39 | ## Additional Notes 40 | 41 | 42 | -------------------------------------------------------------------------------- /examples/model_interpreter/feature.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | import qlib 6 | from qlib.constant import REG_CN 7 | from qlib.tests.config import CSI300_GBDT_TASK 8 | from qlib.tests.data import GetData 9 | from qlib.utils import init_instance_by_config 10 | 11 | if __name__ == "__main__": 12 | # use default data 13 | provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir 14 | GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True) 15 | 16 | qlib.init(provider_uri=provider_uri, region=REG_CN) 17 | 18 | ################################### 19 | # train model 20 | ################################### 21 | # model initialization 22 | model = init_instance_by_config(CSI300_GBDT_TASK["model"]) 23 | dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) 24 | model.fit(dataset) 25 | 26 | # get model feature importance 27 | feature_importance = model.get_feature_importance() 28 | print("feature importance:") 29 | print(feature_importance) 30 | -------------------------------------------------------------------------------- /qlib/walkforward/walkforward_handler.py: -------------------------------------------------------------------------------- 1 | from qlib.contrib.data.handler import check_transform_proc 2 | from qlib.data.dataset.handler import DataHandlerLP 3 | from qlib.data.dataset.loader import DataLoaderDH 4 | 5 | 6 | class WFDataHandler(DataHandlerLP): 7 | def __init__( 8 | self, 9 | start_time=None, 10 | end_time=None, 11 | infer_processors=[], 12 | learn_processors=[], 13 | fit_start_time=None, 14 | fit_end_time=None, 15 | data_loader_kwargs={}, 16 | ): 17 | infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) 18 | learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) 19 | 20 | data_loader = DataLoaderDH(**data_loader_kwargs) 21 | 22 | super().__init__( 23 | instruments=None, 24 | start_time=start_time, 25 | end_time=end_time, 26 | data_loader=data_loader, 27 | infer_processors=infer_processors, 28 | learn_processors=learn_processors, 29 | ) 30 | -------------------------------------------------------------------------------- /tests/misc/test_get_multi_proc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | from multiprocessing import Pool 6 | 7 | import qlib 8 | from qlib.data import D 9 | from qlib.tests import TestAutoData 10 | 11 | 12 | def get_features(fields): 13 | qlib.init(provider_uri=TestAutoData.provider_uri, expression_cache=None, dataset_cache=None, joblib_backend="loky") 14 | return D.features(D.instruments("csi300"), fields) 15 | 16 | 17 | class TestGetData(TestAutoData): 18 | FIELDS = "$open,$close,$high,$low,$volume,$factor,$change".split(",") 19 | 20 | def test_multi_proc(self): 21 | """ 22 | For testing if it will raise error 23 | """ 24 | iter_n = 2 25 | pool = Pool(iter_n) 26 | 27 | res = [] 28 | for _ in range(iter_n): 29 | res.append(pool.apply_async(get_features, (self.FIELDS,), {})) 30 | 31 | for r in res: 32 | print(r.get()) 33 | 34 | pool.close() 35 | pool.join() 36 | 37 | 38 | if __name__ == "__main__": 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /examples/benchmarks/GeneralPtNN/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Introduction 4 | 5 | What is GeneralPtNN 6 | - Fix previous design that fail to support both Time-series and tabular data 7 | - Now you can just replace the Pytorch model structure to run a NN model. 8 | 9 | We provide an example to demonstrate the effectiveness of the current design. 10 | - `workflow_config_gru.yaml` align with previous results [GRU(Kyunghyun Cho, et al.)](../README.md#Alpha158-dataset) 11 | - `workflow_config_gru2mlp.yaml` to demonstrate we can convert config from time-series to tabular data with minimal changes 12 | - You only have to change the net & dataset class to make the conversion. 13 | - `workflow_config_mlp.yaml` achieved similar functionality with [MLP](../README.md#Alpha158-dataset) 14 | 15 | # TODO 16 | 17 | - We will align existing models to current design. 18 | 19 | - The result of `workflow_config_mlp.yaml` is different with the result of [MLP](../README.md#Alpha158-dataset) since GeneralPtNN has a different stopping method compared to previous implementations. Specificly, GeneralPtNN controls training according to epoches, whereas previous methods controlled by max_steps. 20 | -------------------------------------------------------------------------------- /examples/rolling_process_data/rolling_handler.py: -------------------------------------------------------------------------------- 1 | from qlib.contrib.data.handler import check_transform_proc 2 | from qlib.data.dataset.handler import DataHandlerLP 3 | from qlib.data.dataset.loader import DataLoaderDH 4 | 5 | 6 | class RollingDataHandler(DataHandlerLP): 7 | def __init__( 8 | self, 9 | start_time=None, 10 | end_time=None, 11 | infer_processors=[], 12 | learn_processors=[], 13 | fit_start_time=None, 14 | fit_end_time=None, 15 | data_loader_kwargs={}, 16 | ): 17 | infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) 18 | learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) 19 | 20 | data_loader = { 21 | "class": "DataLoaderDH", 22 | "kwargs": {**data_loader_kwargs}, 23 | } 24 | 25 | super().__init__( 26 | instruments=None, 27 | start_time=start_time, 28 | end_time=end_time, 29 | data_loader=data_loader, 30 | infer_processors=infer_processors, 31 | learn_processors=learn_processors, 32 | ) 33 | -------------------------------------------------------------------------------- /qlib/contrib/report/report/template/general_report_template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{title}} 6 | 7 | 8 | {% macro print_section(m_level, m_content_items) -%} 9 | {% for content_item in m_content_items %} 10 | {% if 'header' in content_item %} 11 | <{{"h%d"|format(m_level)}}>{{content_item.header}} 12 | {%- endif %} 13 | {% if content_item.type == "html" %} 14 | {{content_item.content}} 15 | {% elif content_item.type == "base64image" %} 16 | Red dot 17 | {% elif content_item.type == "base64imagelist" %} 18 | {% for base64image in content_item.content %} 19 | Red dot 20 | {%- endfor %} 21 | {% elif content_item.type == "subsections" %} 22 | {{print_section(m_level+1, content_item.content)}} 23 | {% elif content_item.type == "itemlist" %} 24 |
    25 | {% for dk, dv in content_item.content.items() %} 26 |
  • {{dk}}: {{dv}}
  • 27 | {%- endfor %} 28 |
29 | {%- endif %} 30 | {%- endfor %} 31 | {%- endmacro -%} 32 | 33 | {{print_section(1, component_list)}} 34 | 35 | 36 | -------------------------------------------------------------------------------- /qlib/rl/order_execution/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | Currently it supports single-asset order execution. 6 | Multi-asset is on the way. 7 | """ 8 | 9 | from .interpreter import ( 10 | CategoricalActionInterpreter, 11 | CurrentStepStateInterpreter, 12 | FullHistoryStateInterpreter, 13 | TwapRelativeActionInterpreter, 14 | ) 15 | from .network import Recurrent 16 | from .policy import PPO, AllOne 17 | from .reward import PAPenaltyReward 18 | from .simulator_simple import SingleAssetOrderExecutionSimple 19 | from .state import SAOEMetrics, SAOEState 20 | from .strategy import ProxySAOEStrategy, SAOEIntStrategy, SAOEStateAdapter, SAOEStrategy 21 | 22 | __all__ = [ 23 | "FullHistoryStateInterpreter", 24 | "CurrentStepStateInterpreter", 25 | "CategoricalActionInterpreter", 26 | "TwapRelativeActionInterpreter", 27 | "Recurrent", 28 | "AllOne", 29 | "PPO", 30 | "PAPenaltyReward", 31 | "SingleAssetOrderExecutionSimple", 32 | "SAOEStateAdapter", 33 | "SAOEMetrics", 34 | "SAOEState", 35 | "SAOEStrategy", 36 | "ProxySAOEStrategy", 37 | "SAOEIntStrategy", 38 | ] 39 | -------------------------------------------------------------------------------- /qlib/contrib/torch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | This module is not a necessary part of Qlib. 5 | They are just some tools for convenience 6 | It is should not imported into the core part of qlib 7 | """ 8 | import numpy as np 9 | import pandas as pd 10 | import torch 11 | 12 | 13 | def data_to_tensor(data, device="cpu", raise_error=False): 14 | if isinstance(data, torch.Tensor): 15 | if device == "cpu": 16 | return data.cpu() 17 | else: 18 | return data.to(device) 19 | if isinstance(data, (pd.DataFrame, pd.Series)): 20 | return data_to_tensor(torch.from_numpy(data.values).float(), device) 21 | elif isinstance(data, np.ndarray): 22 | return data_to_tensor(torch.from_numpy(data).float(), device) 23 | elif isinstance(data, (tuple, list)): 24 | return [data_to_tensor(i, device) for i in data] 25 | elif isinstance(data, dict): 26 | return {k: data_to_tensor(v, device) for k, v in data.items()} 27 | else: 28 | if raise_error: 29 | raise ValueError(f"Unsupported data type: {type(data)}.") 30 | else: 31 | return data 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | import numpy 3 | import os 4 | 5 | 6 | def read(rel_path: str) -> str: 7 | here = os.path.abspath(os.path.dirname(__file__)) 8 | with open(os.path.join(here, rel_path), encoding="utf-8") as fp: 9 | return fp.read() 10 | 11 | 12 | def get_version(rel_path: str) -> str: 13 | for line in read(rel_path).splitlines(): 14 | if line.startswith("__version__"): 15 | delim = '"' if '"' in line else "'" 16 | return line.split(delim)[1] 17 | raise RuntimeError("Unable to find version string.") 18 | 19 | 20 | NUMPY_INCLUDE = numpy.get_include() 21 | 22 | VERSION = get_version("qlib/__init__.py") 23 | 24 | 25 | setup( 26 | version=VERSION, 27 | ext_modules=[ 28 | Extension( 29 | "qlib.data._libs.rolling", 30 | ["qlib/data/_libs/rolling.pyx"], 31 | language="c++", 32 | include_dirs=[NUMPY_INCLUDE], 33 | ), 34 | Extension( 35 | "qlib.data._libs.expanding", 36 | ["qlib/data/_libs/expanding.pyx"], 37 | language="c++", 38 | include_dirs=[NUMPY_INCLUDE], 39 | ), 40 | ], 41 | ) 42 | -------------------------------------------------------------------------------- /qlib/rl/strategy/single_order.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from __future__ import annotations 5 | 6 | from qlib.backtest import Order 7 | from qlib.backtest.decision import OrderHelper, TradeDecisionWO, TradeRange 8 | from qlib.strategy.base import BaseStrategy 9 | 10 | 11 | class SingleOrderStrategy(BaseStrategy): 12 | """Strategy used to generate a trade decision with exactly one order.""" 13 | 14 | def __init__( 15 | self, 16 | order: Order, 17 | trade_range: TradeRange | None = None, 18 | ) -> None: 19 | super().__init__() 20 | 21 | self._order = order 22 | self._trade_range = trade_range 23 | 24 | def generate_trade_decision(self, execute_result: list | None = None) -> TradeDecisionWO: 25 | oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper() 26 | order_list = [ 27 | oh.create( 28 | code=self._order.stock_id, 29 | amount=self._order.amount, 30 | direction=self._order.direction, 31 | ), 32 | ] 33 | return TradeDecisionWO(order_list, self, self._trade_range) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /docs/advanced/server.rst: -------------------------------------------------------------------------------- 1 | .. _server: 2 | 3 | ============================= 4 | ``Online`` & ``Offline`` mode 5 | ============================= 6 | .. currentmodule:: qlib 7 | 8 | 9 | Introduction 10 | ============ 11 | 12 | ``Qlib`` supports ``Online`` mode and ``Offline`` mode. Only the ``Offline`` mode is introduced in this document. 13 | 14 | The ``Online`` mode is designed to solve the following problems: 15 | 16 | - Manage the data in a centralized way. Users don't have to manage data of different versions. 17 | - Reduce the amount of cache to be generated. 18 | - Make the data can be accessed in a remote way. 19 | 20 | Qlib-Server 21 | =========== 22 | 23 | ``Qlib-Server`` is the assorted server system for ``Qlib``, which utilizes ``Qlib`` for basic calculations and provides extensive server system and cache mechanism. With QLibServer, the data provided for ``Qlib`` can be managed in a centralized manner. With ``Qlib-Server``, users can use ``Qlib`` in ``Online`` mode. 24 | 25 | 26 | 27 | Reference 28 | ========= 29 | If users are interested in ``Qlib-Server`` and ``Online`` mode, please refer to `Qlib-Server Project `_ and `Qlib-Server Document `_. 30 | -------------------------------------------------------------------------------- /qlib/contrib/online/online_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # pylint: skip-file 5 | # flake8: noqa 6 | 7 | import random 8 | 9 | import pandas as pd 10 | 11 | from ...data import D 12 | from ..model.base import Model 13 | 14 | 15 | class ScoreFileModel(Model): 16 | """ 17 | This model will load a score file, and return score at date exists in score file. 18 | """ 19 | 20 | def __init__(self, score_path): 21 | pred_test = pd.read_csv(score_path, index_col=[0, 1], parse_dates=True, infer_datetime_format=True) 22 | self.pred = pred_test 23 | 24 | def get_data_with_date(self, date, **kwargs): 25 | score = self.pred.loc(axis=0)[:, date] # (stock_id, trade_date) multi_index, score in pdate 26 | score_series = score.reset_index(level="datetime", drop=True)[ 27 | "score" 28 | ] # pd.Series ; index:stock_id, data: score 29 | return score_series 30 | 31 | def predict(self, x_test, **kwargs): 32 | return x_test 33 | 34 | def score(self, x_test, **kwargs): 35 | return 36 | 37 | def fit(self, x_train, y_train, x_valid, y_valid, w_train=None, w_valid=None, **kwargs): 38 | return 39 | 40 | def save(self, fname, **kwargs): 41 | return 42 | -------------------------------------------------------------------------------- /tests/dependency_tests/test_mlflow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import unittest 4 | import platform 5 | import mlflow 6 | import time 7 | from pathlib import Path 8 | import shutil 9 | 10 | 11 | class MLflowTest(unittest.TestCase): 12 | TMP_PATH = Path("./.mlruns_tmp/") 13 | 14 | def tearDown(self) -> None: 15 | if self.TMP_PATH.exists(): 16 | shutil.rmtree(self.TMP_PATH) 17 | 18 | def test_creating_client(self): 19 | """ 20 | Please refer to qlib/workflow/expm.py:MLflowExpManager._client 21 | we don't cache _client (this is helpful to reduce maintainance work when MLflowExpManager's uri is chagned) 22 | 23 | This implementation is based on the assumption creating a client is fast 24 | """ 25 | start = time.time() 26 | for i in range(10): 27 | _ = mlflow.tracking.MlflowClient(tracking_uri=str(self.TMP_PATH)) 28 | end = time.time() 29 | elapsed = end - start 30 | if platform.system() == "Linux": 31 | self.assertLess(elapsed, 1e-2) # it can be done in less than 10ms 32 | else: 33 | self.assertLess(elapsed, 2e-2) 34 | print(elapsed) 35 | 36 | 37 | if __name__ == "__main__": 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /qlib/rl/aux_info.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from __future__ import annotations 5 | 6 | from typing import TYPE_CHECKING, Generic, Optional, TypeVar 7 | 8 | from qlib.typehint import final 9 | 10 | from .simulator import StateType 11 | 12 | if TYPE_CHECKING: 13 | from .utils.env_wrapper import EnvWrapper 14 | 15 | 16 | __all__ = ["AuxiliaryInfoCollector"] 17 | 18 | AuxInfoType = TypeVar("AuxInfoType") 19 | 20 | 21 | class AuxiliaryInfoCollector(Generic[StateType, AuxInfoType]): 22 | """Override this class to collect customized auxiliary information from environment.""" 23 | 24 | env: Optional[EnvWrapper] = None 25 | 26 | @final 27 | def __call__(self, simulator_state: StateType) -> AuxInfoType: 28 | return self.collect(simulator_state) 29 | 30 | def collect(self, simulator_state: StateType) -> AuxInfoType: 31 | """Override this for customized auxiliary info. 32 | Usually useful in Multi-agent RL. 33 | 34 | Parameters 35 | ---------- 36 | simulator_state 37 | Retrieved with ``simulator.get_state()``. 38 | 39 | Returns 40 | ------- 41 | Auxiliary information. 42 | """ 43 | raise NotImplementedError("collect is not implemented!") 44 | -------------------------------------------------------------------------------- /examples/nested_decision_execution/README.md: -------------------------------------------------------------------------------- 1 | # Nested Decision Execution 2 | 3 | This workflow is an example for nested decision execution in backtesting. Qlib supports nested decision execution in backtesting. It means that users can use different strategies to make trade decision in different frequencies. 4 | 5 | ## Weekly Portfolio Generation and Daily Order Execution 6 | 7 | This workflow provides an example that uses a DropoutTopkStrategy (a strategy based on the daily frequency Lightgbm model) in weekly frequency for portfolio generation and uses SBBStrategyEMA (a rule-based strategy that uses EMA for decision-making) to execute orders in daily frequency. 8 | 9 | ### Usage 10 | 11 | Start backtesting by running the following command: 12 | ```bash 13 | python workflow.py backtest 14 | ``` 15 | 16 | Start collecting data by running the following command: 17 | ```bash 18 | python workflow.py collect_data 19 | ``` 20 | 21 | ## Daily Portfolio Generation and Minutely Order Execution 22 | 23 | This workflow also provides a high-frequency example that uses a DropoutTopkStrategy for portfolio generation in daily frequency and uses SBBStrategyEMA to execute orders in minutely frequency. 24 | 25 | ### Usage 26 | 27 | Start backtesting by running the following command: 28 | ```bash 29 | python workflow.py backtest_highfreq 30 | ``` -------------------------------------------------------------------------------- /tests/data_mid_layer_tests/test_handler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import shutil 4 | import unittest 5 | from qlib.tests import TestAutoData 6 | from qlib.data import D 7 | from qlib.data.dataset.handler import DataHandlerLP 8 | 9 | 10 | class HandlerTests(TestAutoData): 11 | def to_str(self, obj): 12 | return "".join(str(obj).split()) 13 | 14 | def test_handler_df(self): 15 | df = D.features(["sh600519"], start_time="20190101", end_time="20190201", fields=["$close"]) 16 | dh = DataHandlerLP.from_df(df) 17 | print(dh.fetch()) 18 | self.assertTrue(dh._data.equals(df)) 19 | self.assertTrue(dh._infer is dh._data) 20 | self.assertTrue(dh._learn is dh._data) 21 | self.assertTrue(dh.data_loader._data is dh._data) 22 | fname = "_handler_test.pkl" 23 | dh.to_pickle(fname, dump_all=True) 24 | 25 | with open(fname, "rb") as f: 26 | dh_d = pickle.load(f) 27 | 28 | self.assertTrue(dh_d._data.equals(df)) 29 | self.assertTrue(dh_d._infer is dh_d._data) 30 | self.assertTrue(dh_d._learn is dh_d._data) 31 | # Data loader will no longer be useful 32 | self.assertTrue("_data" not in dh_d.data_loader.__dict__.keys()) 33 | os.remove(fname) 34 | 35 | 36 | if __name__ == "__main__": 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /qlib/model/interpret/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | Interfaces to interpret models 6 | """ 7 | 8 | from abc import abstractmethod 9 | 10 | import pandas as pd 11 | 12 | 13 | class FeatureInt: 14 | """Feature (Int)erpreter""" 15 | 16 | @abstractmethod 17 | def get_feature_importance(self) -> pd.Series: 18 | """get feature importance 19 | 20 | Returns 21 | ------- 22 | The index is the feature name. 23 | 24 | The greater the value, the higher importance. 25 | """ 26 | 27 | 28 | class LightGBMFInt(FeatureInt): 29 | """LightGBM (F)eature (Int)erpreter""" 30 | 31 | def __init__(self): 32 | self.model = None 33 | 34 | def get_feature_importance(self, *args, **kwargs) -> pd.Series: 35 | """get feature importance 36 | 37 | Notes 38 | ----- 39 | parameters reference: 40 | https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html?highlight=feature_importance#lightgbm.Booster.feature_importance 41 | """ 42 | return pd.Series( 43 | self.model.feature_importance(*args, **kwargs), index=self.model.feature_name() 44 | ).sort_values( # pylint: disable=E1101 45 | ascending=False 46 | ) 47 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Description 4 | 5 | 6 | ## Motivation and Context 7 | 8 | 9 | 10 | ## How Has This Been Tested? 11 | 12 | - [ ] Pass the test by running: `pytest qlib/tests/test_all_pipeline.py` under upper directory of `qlib`. 13 | - [ ] If you are adding a new feature, test on your own test scripts. 14 | 15 | 16 | 17 | ## Screenshots of Test Results (if appropriate): 18 | 1. Pipeline test: 19 | 2. Your own tests: 20 | 21 | ## Types of changes 22 | 23 | - [ ] Fix bugs 24 | - [ ] Add new feature 25 | - [ ] Update documentation 26 | -------------------------------------------------------------------------------- /qlib/contrib/model/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch.nn as nn 5 | 6 | 7 | def count_parameters(models_or_parameters, unit="m"): 8 | """ 9 | This function is to obtain the storage size unit of a (or multiple) models. 10 | 11 | Parameters 12 | ---------- 13 | models_or_parameters : PyTorch model(s) or a list of parameters. 14 | unit : the storage size unit. 15 | 16 | Returns 17 | ------- 18 | The number of parameters of the given model(s) or parameters. 19 | """ 20 | if isinstance(models_or_parameters, nn.Module): 21 | counts = sum(v.numel() for v in models_or_parameters.parameters()) 22 | elif isinstance(models_or_parameters, nn.Parameter): 23 | counts = models_or_parameters.numel() 24 | elif isinstance(models_or_parameters, (list, tuple)): 25 | return sum(count_parameters(x, unit) for x in models_or_parameters) 26 | else: 27 | counts = sum(v.numel() for v in models_or_parameters) 28 | unit = unit.lower() 29 | if unit in ("kb", "k"): 30 | counts /= 2**10 31 | elif unit in ("mb", "m"): 32 | counts /= 2**20 33 | elif unit in ("gb", "g"): 34 | counts /= 2**30 35 | elif unit is not None: 36 | raise ValueError("Unknown unit: {:}".format(unit)) 37 | return counts 38 | -------------------------------------------------------------------------------- /examples/benchmarks/TRA/example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import qlib 4 | from ruamel.yaml import YAML 5 | from qlib.utils import init_instance_by_config 6 | 7 | 8 | def main(seed, config_file="configs/config_alstm.yaml"): 9 | # set random seed 10 | with open(config_file) as f: 11 | yaml = YAML(typ="safe", pure=True) 12 | config = yaml.load(f) 13 | 14 | # seed_suffix = "/seed1000" if "init" in config_file else f"/seed{seed}" 15 | seed_suffix = "" 16 | config["task"]["model"]["kwargs"].update( 17 | {"seed": seed, "logdir": config["task"]["model"]["kwargs"]["logdir"] + seed_suffix} 18 | ) 19 | 20 | # initialize workflow 21 | qlib.init( 22 | provider_uri=config["qlib_init"]["provider_uri"], 23 | region=config["qlib_init"]["region"], 24 | ) 25 | dataset = init_instance_by_config(config["task"]["dataset"]) 26 | model = init_instance_by_config(config["task"]["model"]) 27 | 28 | # train model 29 | model.fit(dataset) 30 | 31 | 32 | if __name__ == "__main__": 33 | # set params from cmd 34 | parser = argparse.ArgumentParser(allow_abbrev=False) 35 | parser.add_argument("--seed", type=int, default=1000, help="random seed") 36 | parser.add_argument("--config_file", type=str, default="configs/config_alstm.yaml", help="config file") 37 | args = parser.parse_args() 38 | main(**vars(args)) 39 | -------------------------------------------------------------------------------- /docs/start/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | .. currentmodule:: qlib 8 | 9 | 10 | ``Qlib`` Installation 11 | ===================== 12 | .. note:: 13 | 14 | `Qlib` supports both `Windows` and `Linux`. It's recommended to use `Qlib` in `Linux`. ``Qlib`` supports Python3, which is up to Python3.8. 15 | 16 | Users can easily install ``Qlib`` by pip according to the following command: 17 | 18 | .. code-block:: bash 19 | 20 | pip install pyqlib 21 | 22 | 23 | Also, Users can install ``Qlib`` by the source code according to the following steps: 24 | 25 | - Enter the root directory of ``Qlib``, in which the file ``setup.py`` exists. 26 | - Then, please execute the following command to install the environment dependencies and install ``Qlib``: 27 | 28 | .. code-block:: bash 29 | 30 | $ pip install numpy 31 | $ pip install --upgrade cython 32 | $ git clone https://github.com/microsoft/qlib.git && cd qlib 33 | $ python setup.py install 34 | 35 | .. note:: 36 | It's recommended to use anaconda/miniconda to setup the environment. ``Qlib`` needs lightgbm and pytorch packages, use pip to install them. 37 | 38 | 39 | 40 | Use the following code to make sure the installation successful: 41 | 42 | .. code-block:: python 43 | 44 | >>> import qlib 45 | >>> qlib.__version__ 46 | 47 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/baseline/rolling_benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import os 4 | from pathlib import Path 5 | from typing import Union 6 | 7 | import fire 8 | 9 | from qlib import auto_init 10 | from qlib.contrib.rolling.base import Rolling 11 | from qlib.tests.data import GetData 12 | 13 | DIRNAME = Path(__file__).absolute().resolve().parent 14 | 15 | 16 | class RollingBenchmark(Rolling): 17 | # The config in the README.md 18 | CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"] 19 | 20 | DEFAULT_CONF = CONF_LIST[0] 21 | 22 | def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None: 23 | # This code is for being compatible with the previous old code 24 | conf_path = Path(conf_path) 25 | super().__init__(conf_path=conf_path, horizon=horizon, **kwargs) 26 | 27 | for f in self.CONF_LIST: 28 | if conf_path.samefile(f): 29 | break 30 | else: 31 | self.logger.warning("Model type is not in the benchmark!") 32 | 33 | 34 | if __name__ == "__main__": 35 | kwargs = {} 36 | if os.environ.get("PROVIDER_URI", "") == "": 37 | GetData().qlib_data(exists_skip=True) 38 | else: 39 | kwargs["provider_uri"] = os.environ["PROVIDER_URI"] 40 | auto_init(**kwargs) 41 | fire.Fire(RollingBenchmark) 42 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/DDG-DA/workflow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import os 4 | from pathlib import Path 5 | from typing import Union 6 | 7 | import fire 8 | 9 | from qlib import auto_init 10 | from qlib.contrib.rolling.ddgda import DDGDA 11 | from qlib.tests.data import GetData 12 | 13 | DIRNAME = Path(__file__).absolute().resolve().parent 14 | BENCH_DIR = DIRNAME.parent / "baseline" 15 | 16 | 17 | class DDGDABench(DDGDA): 18 | # The config in the README.md 19 | CONF_LIST = [ 20 | BENCH_DIR / "workflow_config_linear_Alpha158.yaml", 21 | BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml", 22 | ] 23 | 24 | DEFAULT_CONF = CONF_LIST[0] # Linear by default due to efficiency 25 | 26 | def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None: 27 | # This code is for being compatible with the previous old code 28 | conf_path = Path(conf_path) 29 | super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs) 30 | 31 | for f in self.CONF_LIST: 32 | if conf_path.samefile(f): 33 | break 34 | else: 35 | self.logger.warning("Model type is not in the benchmark!") 36 | 37 | 38 | if __name__ == "__main__": 39 | kwargs = {} 40 | if os.environ.get("PROVIDER_URI", "") == "": 41 | GetData().qlib_data(exists_skip=True) 42 | else: 43 | kwargs["provider_uri"] = os.environ["PROVIDER_URI"] 44 | auto_init(**kwargs) 45 | fire.Fire(DDGDABench) 46 | -------------------------------------------------------------------------------- /.github/workflows/test_qlib_from_source_slow.yml: -------------------------------------------------------------------------------- 1 | name: Test qlib from source slow 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | timeout-minutes: 720 12 | # we may retry for 3 times for `Unit tests with Pytest` 13 | 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | os: [windows-latest, ubuntu-24.04, ubuntu-22.04] 18 | # In github action, using python 3.7, pip install will not match the latest version of the package. 19 | # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future. 20 | # All things considered, we have removed python 3.7. 21 | python-version: ["3.11", "3.12", "3.13"] 22 | 23 | steps: 24 | - name: Test qlib from source slow 25 | uses: actions/checkout@v3 26 | 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | 32 | - name: Set up Python tools 33 | run: | 34 | make dev 35 | 36 | - name: Downloads dependencies data 37 | run: | 38 | python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn 39 | 40 | - name: Unit tests with Pytest 41 | uses: nick-fields/retry@v2 42 | with: 43 | timeout_minutes: 240 44 | max_attempts: 3 45 | command: | 46 | cd tests 47 | python -m pytest . -m "slow" --durations=0 48 | -------------------------------------------------------------------------------- /scripts/data_collector/pit/README.md: -------------------------------------------------------------------------------- 1 | # Collect Point-in-Time Data 2 | 3 | > *Please pay **ATTENTION** that the data is collected from [baostock](http://baostock.com) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)* 4 | 5 | ## Requirements 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Collector Data 12 | 13 | 14 | ### Download Quarterly CN Data 15 | 16 | ```bash 17 | cd qlib/scripts/data_collector/pit/ 18 | # download from baostock.com 19 | python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly 20 | ``` 21 | 22 | Downloading all data from the stock is very time-consuming. If you just want to run a quick test on a few stocks, you can run the command below 23 | ```bash 24 | python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_regex "^(600519|000725).*" 25 | ``` 26 | 27 | 28 | ### Normalize Data 29 | ```bash 30 | python collector.py normalize_data --interval quarterly --source_dir ~/.qlib/stock_data/source/pit --normalize_dir ~/.qlib/stock_data/source/pit_normalized 31 | ``` 32 | 33 | 34 | 35 | ### Dump Data into PIT Format 36 | 37 | ```bash 38 | cd qlib/scripts 39 | python dump_pit.py dump --data_path ~/.qlib/stock_data/source/pit_normalized --qlib_dir ~/.qlib/qlib_data/cn_data --interval quarterly 40 | ``` 41 | -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_alstm.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 256 16 | num_layers: 2 17 | num_heads: 2 18 | use_attn: True 19 | dropout: 0.1 20 | 21 | num_states: &num_states 1 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0002 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/alstm 40 | model_type: LSTM 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: False 46 | model_init_state: 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 1024 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_alstm_tra_init.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 256 16 | num_layers: 2 17 | num_heads: 2 18 | use_attn: True 19 | dropout: 0.1 20 | 21 | num_states: &num_states 3 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0002 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/alstm_tra_init 40 | model_type: LSTM 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: False 46 | model_init_state: 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 512 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_transformer.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 64 16 | num_layers: 2 17 | num_heads: 4 18 | use_attn: False 19 | dropout: 0.1 20 | 21 | num_states: &num_states 1 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0002 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/transformer 40 | model_type: Transformer 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: False 46 | model_init_state: 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 1024 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_transformer_tra_init.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 64 16 | num_layers: 2 17 | num_heads: 4 18 | use_attn: False 19 | dropout: 0.1 20 | 21 | num_states: &num_states 3 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0002 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/transformer_tra_init 40 | model_type: Transformer 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: False 46 | model_init_state: 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 512 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_alstm_tra.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 256 16 | num_layers: 2 17 | num_heads: 2 18 | use_attn: True 19 | dropout: 0.1 20 | 21 | num_states: &num_states 10 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0001 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/alstm_tra 40 | model_type: LSTM 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 2.0 44 | rho: 0.99 45 | freeze_model: True 46 | model_init_state: output/test/alstm_tra_init/model.bin 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 1024 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_transformer_tra.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 64 16 | num_layers: 2 17 | num_heads: 4 18 | use_attn: False 19 | dropout: 0.1 20 | 21 | num_states: &num_states 3 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0005 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/transformer_tra 40 | model_type: Transformer 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: True 46 | model_init_state: output/test/transformer_tra_init/model.bin 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 512 -------------------------------------------------------------------------------- /scripts/data_collector/crowd_source/README.md: -------------------------------------------------------------------------------- 1 | # Crowd Source Data 2 | 3 | ## Initiative 4 | Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might have data which is wrong. This can introduce survivorship bias into our training process. 5 | 6 | The Crowd Source Data is introduced to merged data from multiple data source and cross validate against each other, so that: 7 | 1. We will have a more complete history record. 8 | 2. We can identify the anomaly data and apply correction when necessary. 9 | 10 | ## Related Repo 11 | The raw data is hosted on dolthub repo: https://www.dolthub.com/repositories/chenditc/investment_data 12 | 13 | The processing script and sql is hosted on github repo: https://github.com/chenditc/investment_data 14 | 15 | The packaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data 16 | 17 | ## How to use it in qlib 18 | ### Option 1: Download release bin data 19 | User can download data in qlib bin format and use it directly: https://github.com/chenditc/investment_data/releases/latest 20 | ```bash 21 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz 22 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2 23 | ``` 24 | 25 | ### Option 2: Generate qlib data from dolthub 26 | Dolthub data will be update daily, so that if user wants to get up to date data, they can dump qlib bin using docker: 27 | ``` 28 | docker run -v /:/output -it --rm chenditc/investment_data bash dump_qlib_bin.sh && cp ./qlib_bin.tar.gz /output/ 29 | ``` 30 | 31 | ## FAQ and other info 32 | See: https://github.com/chenditc/investment_data/blob/main/README.md 33 | -------------------------------------------------------------------------------- /qlib/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | from __future__ import division, print_function 6 | 7 | from .cache import ( 8 | DatasetCache, 9 | DatasetURICache, 10 | DiskDatasetCache, 11 | DiskExpressionCache, 12 | ArcticExpressionCache, 13 | ExpressionCache, 14 | MemoryCalendarCache, 15 | SimpleDatasetCache, 16 | ) 17 | from .data import ( 18 | BaseProvider, 19 | CalendarProvider, 20 | ClientCalendarProvider, 21 | ClientDatasetProvider, 22 | ClientInstrumentProvider, 23 | ClientProvider, 24 | D, 25 | DatasetProvider, 26 | ExpressionProvider, 27 | FeatureProvider, 28 | InstrumentProvider, 29 | LocalCalendarProvider, 30 | LocalDatasetProvider, 31 | LocalExpressionProvider, 32 | LocalFeatureProvider, 33 | LocalInstrumentProvider, 34 | LocalPITProvider, 35 | LocalProvider, 36 | ) 37 | 38 | 39 | __all__ = [ 40 | "D", 41 | "CalendarProvider", 42 | "InstrumentProvider", 43 | "FeatureProvider", 44 | "ExpressionProvider", 45 | "DatasetProvider", 46 | "LocalCalendarProvider", 47 | "LocalInstrumentProvider", 48 | "LocalFeatureProvider", 49 | "LocalPITProvider", 50 | "LocalExpressionProvider", 51 | "LocalDatasetProvider", 52 | "ClientCalendarProvider", 53 | "ClientInstrumentProvider", 54 | "ClientDatasetProvider", 55 | "BaseProvider", 56 | "LocalProvider", 57 | "ClientProvider", 58 | "ExpressionCache", 59 | "DatasetCache", 60 | "DiskExpressionCache", 61 | "DiskDatasetCache", 62 | "SimpleDatasetCache", 63 | "DatasetURICache", 64 | "MemoryCalendarCache", 65 | "ArcticExpressionCache", 66 | ] 67 | -------------------------------------------------------------------------------- /scripts/data_collector/fund/README.md: -------------------------------------------------------------------------------- 1 | # Collect Fund Data 2 | 3 | > *Please pay **ATTENTION** that the data is collected from [天天基金网](https://fund.eastmoney.com/) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)* 4 | 5 | ## Requirements 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Collector Data 12 | 13 | 14 | ### CN Data 15 | 16 | #### 1d from East Money 17 | 18 | ```bash 19 | 20 | # download from eastmoney.com 21 | python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d 22 | 23 | # normalize 24 | python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ 25 | 26 | # dump data 27 | cd qlib/scripts 28 | python dump_bin.py dump_all --data_path ~/.qlib/fund_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/cn_fund_data --freq day --date_field_name FSRQ --include_fields DWJZ,LJJZ 29 | 30 | ``` 31 | 32 | ### using data 33 | 34 | ```python 35 | import qlib 36 | from qlib.data import D 37 | 38 | qlib.init(provider_uri="~/.qlib/qlib_data/cn_fund_data") 39 | df = D.features(D.instruments(market="all"), ["$DWJZ", "$LJJZ"], freq="day") 40 | ``` 41 | 42 | 43 | ### Help 44 | ```bash 45 | pythono collector.py collector_data --help 46 | ``` 47 | 48 | ## Parameters 49 | 50 | - interval: 1d 51 | - region: CN 52 | 53 | ## 免责声明 54 | 55 | 本项目仅供学习研究使用,不作为任何行为的指导和建议,由此而引发任何争议和纠纷,与本项目无任何关系 56 | -------------------------------------------------------------------------------- /qlib/contrib/report/data/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | This module is responsible for analysing data 5 | 6 | Assumptions 7 | - The analyse each feature individually 8 | 9 | """ 10 | import pandas as pd 11 | 12 | from qlib.contrib.report.utils import sub_fig_generator 13 | from qlib.log import TimeInspector 14 | 15 | 16 | class FeaAnalyser: 17 | def __init__(self, dataset: pd.DataFrame): 18 | """ 19 | 20 | Parameters 21 | ---------- 22 | dataset : pd.DataFrame 23 | 24 | We often have multiple columns for dataset. Each column corresponds to one sub figure. 25 | There will be a datatime column in the index levels. 26 | Aggretation will be used for more summarized metrics overtime. 27 | Here is an example of data: 28 | 29 | .. code-block:: 30 | 31 | return 32 | datetime instrument 33 | 2007-02-06 equity_tpx 0.010087 34 | equity_spx 0.000786 35 | """ 36 | self._dataset = dataset 37 | with TimeInspector.logt("calc_stat_values"): 38 | self.calc_stat_values() 39 | 40 | def calc_stat_values(self): 41 | pass 42 | 43 | def plot_single(self, col, ax): 44 | raise NotImplementedError(f"This type of input is not supported") 45 | 46 | def skip(self, col): 47 | return False 48 | 49 | def plot_all(self, *args, **kwargs): 50 | ax_gen = iter(sub_fig_generator(*args, **kwargs)) 51 | for col in self._dataset: 52 | if not self.skip(col): 53 | ax = next(ax_gen) 54 | self.plot_single(col, ax) 55 | -------------------------------------------------------------------------------- /qlib/workflow/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import atexit 5 | import logging 6 | import sys 7 | import traceback 8 | 9 | from ..log import get_module_logger 10 | from . import R 11 | from .recorder import Recorder 12 | 13 | logger = get_module_logger("workflow", logging.INFO) 14 | 15 | 16 | # function to handle the experiment when unusual program ending occurs 17 | def experiment_exit_handler(): 18 | """ 19 | Method for handling the experiment when any unusual program ending occurs. 20 | The `atexit` handler should be put in the last, since, as long as the program ends, it will be called. 21 | Thus, if any exception or user interruption occurs beforehand, we should handle them first. Once `R` is 22 | ended, another call of `R.end_exp` will not take effect. 23 | 24 | Limitations: 25 | - If pdb is used in your program, excepthook will not be triggered when it ends. The status will be finished 26 | """ 27 | sys.excepthook = experiment_exception_hook # handle uncaught exception 28 | atexit.register(R.end_exp, recorder_status=Recorder.STATUS_FI) # will not take effect if experiment ends 29 | 30 | 31 | def experiment_exception_hook(exc_type, value, tb): 32 | """ 33 | End an experiment with status to be "FAILED". This exception tries to catch those uncaught exception 34 | and end the experiment automatically. 35 | 36 | Parameters 37 | exc_type: Exception type 38 | value: Exception's value 39 | tb: Exception's traceback 40 | """ 41 | logger.error(f"An exception has been raised[{exc_type.__name__}: {value}].") 42 | 43 | # Same as original format 44 | traceback.print_tb(tb) 45 | print(f"{exc_type.__name__}: {value}") 46 | 47 | R.end_exp(recorder_status=Recorder.STATUS_FA) 48 | -------------------------------------------------------------------------------- /tests/test_get_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import shutil 5 | import unittest 6 | from pathlib import Path 7 | 8 | import qlib 9 | from qlib.data import D 10 | from qlib.tests.data import GetData 11 | 12 | DATA_DIR = Path(__file__).parent.joinpath("test_get_data") 13 | SOURCE_DIR = DATA_DIR.joinpath("source") 14 | SOURCE_DIR.mkdir(exist_ok=True, parents=True) 15 | QLIB_DIR = DATA_DIR.joinpath("qlib") 16 | QLIB_DIR.mkdir(exist_ok=True, parents=True) 17 | 18 | 19 | class TestGetData(unittest.TestCase): 20 | FIELDS = "$open,$close,$high,$low,$volume,$factor,$change".split(",") 21 | 22 | @classmethod 23 | def setUpClass(cls) -> None: 24 | provider_uri = str(QLIB_DIR.resolve()) 25 | qlib.init( 26 | provider_uri=provider_uri, 27 | expression_cache=None, 28 | dataset_cache=None, 29 | ) 30 | 31 | @classmethod 32 | def tearDownClass(cls) -> None: 33 | shutil.rmtree(str(DATA_DIR.resolve())) 34 | 35 | def test_0_qlib_data(self): 36 | GetData().qlib_data( 37 | name="qlib_data_simple", target_dir=QLIB_DIR, region="cn", interval="1d", delete_old=False, exists_skip=True 38 | ) 39 | df = D.features(D.instruments("csi300"), self.FIELDS) 40 | self.assertListEqual(list(df.columns), self.FIELDS, "get qlib data failed") 41 | self.assertFalse(df.dropna().empty, "get qlib data failed") 42 | 43 | def test_1_csv_data(self): 44 | GetData().download_data(file_name="csv_data_cn.zip", target_dir=SOURCE_DIR) 45 | stock_name = set(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv"))) 46 | self.assertEqual(len(stock_name), 85, "get csv data failed") 47 | 48 | 49 | if __name__ == "__main__": 50 | unittest.main() 51 | -------------------------------------------------------------------------------- /examples/portfolio/README.md: -------------------------------------------------------------------------------- 1 | # Portfolio Optimization Strategy 2 | 3 | ## Introduction 4 | 5 | In `qlib/examples/benchmarks` we have various **alpha** models that predict 6 | the stock returns. We also use a simple rule based `TopkDropoutStrategy` to 7 | evaluate the investing performance of these models. However, such a strategy 8 | is too simple to control the portfolio risk like correlation and volatility. 9 | 10 | To this end, an optimization based strategy should be used to for the 11 | trade-off between return and risk. In this doc, we will show how to use 12 | `EnhancedIndexingStrategy` to maximize portfolio return while minimizing 13 | tracking error relative to a benchmark. 14 | 15 | 16 | ## Preparation 17 | 18 | We use China stock market data for our example. 19 | 20 | 1. Prepare CSI300 weight: 21 | 22 | ```bash 23 | wget https://github.com/SunsetWolf/qlib_dataset/releases/download/v0/csi300_weight.zip 24 | unzip -d ~/.qlib/qlib_data/cn_data csi300_weight.zip 25 | rm -f csi300_weight.zip 26 | ``` 27 | NOTE: We don't find any public free resource to get the weight in the benchmark. To run the example, we manually create this weight data. 28 | 29 | 2. Prepare risk model data: 30 | 31 | ```bash 32 | python prepare_riskdata.py 33 | ``` 34 | 35 | Here we use a **Statistical Risk Model** implemented in `qlib.model.riskmodel`. 36 | However users are strongly recommended to use other risk models for better quality: 37 | * **Fundamental Risk Model** like MSCI BARRA 38 | * [Deep Risk Model](https://arxiv.org/abs/2107.05201) 39 | 40 | 41 | ## End-to-End Workflow 42 | 43 | You can finish workflow with `EnhancedIndexingStrategy` by running 44 | `qrun config_enhanced_indexing.yaml`. 45 | 46 | In this config, we mainly changed the strategy section compared to 47 | `qlib/examples/benchmarks/workflow_config_lightgbm_Alpha158.yaml`. 48 | -------------------------------------------------------------------------------- /qlib/rl/order_execution/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from __future__ import annotations 5 | 6 | from typing import Any, cast 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from qlib.backtest.decision import OrderDir 12 | from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor 13 | from qlib.constant import float_or_ndarray 14 | 15 | 16 | def dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame: 17 | # dataframe.append is deprecated 18 | other_df = pd.DataFrame(other).set_index("datetime") 19 | other_df.index.name = "datetime" 20 | 21 | res = pd.concat([df, other_df], axis=0) 22 | return res 23 | 24 | 25 | def price_advantage( 26 | exec_price: float_or_ndarray, 27 | baseline_price: float, 28 | direction: OrderDir | int, 29 | ) -> float_or_ndarray: 30 | if baseline_price == 0: # something is wrong with data. Should be nan here 31 | if isinstance(exec_price, float): 32 | return 0.0 33 | else: 34 | return np.zeros_like(exec_price) 35 | if direction == OrderDir.BUY: 36 | res = (1 - exec_price / baseline_price) * 10000 37 | elif direction == OrderDir.SELL: 38 | res = (exec_price / baseline_price - 1) * 10000 39 | else: 40 | raise ValueError(f"Unexpected order direction: {direction}") 41 | res_wo_nan: np.ndarray = np.nan_to_num(res, nan=0.0) 42 | if res_wo_nan.size == 1: 43 | return res_wo_nan.item() 44 | else: 45 | return cast(float_or_ndarray, res_wo_nan) 46 | 47 | 48 | def get_simulator_executor(executor: BaseExecutor) -> SimulatorExecutor: 49 | while isinstance(executor, NestedExecutor): 50 | executor = executor.inner_executor 51 | assert isinstance(executor, SimulatorExecutor) 52 | return executor 53 | -------------------------------------------------------------------------------- /scripts/data_collector/crypto/README.md: -------------------------------------------------------------------------------- 1 | # Collect Crypto Data 2 | 3 | > *Please pay **ATTENTION** that the data is collected from [Coingecko](https://www.coingecko.com/en/api) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)* 4 | 5 | ## Requirements 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Usage of the dataset 12 | > *Crypto dataset only support Data retrieval function but not support backtest function due to the lack of OHLC data.* 13 | 14 | ## Collector Data 15 | 16 | 17 | ### Crypto Data 18 | 19 | #### 1d from Coingecko 20 | 21 | ```bash 22 | 23 | # download from https://api.coingecko.com/api/v3/ 24 | python collector.py download_data --source_dir ~/.qlib/crypto_data/source/1d --start 2015-01-01 --end 2021-11-30 --delay 1 --interval 1d 25 | 26 | # normalize 27 | python collector.py normalize_data --source_dir ~/.qlib/crypto_data/source/1d --normalize_dir ~/.qlib/crypto_data/source/1d_nor --interval 1d --date_field_name date 28 | 29 | # dump data 30 | cd qlib/scripts 31 | python dump_bin.py dump_all --data_path ~/.qlib/crypto_data/source/1d_nor --qlib_dir ~/.qlib/qlib_data/crypto_data --freq day --date_field_name date --include_fields prices,total_volumes,market_caps 32 | 33 | ``` 34 | 35 | ### using data 36 | 37 | ```python 38 | import qlib 39 | from qlib.data import D 40 | 41 | qlib.init(provider_uri="~/.qlib/qlib_data/crypto_data") 42 | df = D.features(D.instruments(market="all"), ["$prices", "$total_volumes","$market_caps"], freq="day") 43 | ``` 44 | 45 | 46 | ### Help 47 | ```bash 48 | python collector.py collector_data --help 49 | ``` 50 | 51 | ## Parameters 52 | 53 | - interval: 1d 54 | - delay: 1 55 | -------------------------------------------------------------------------------- /qlib/contrib/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | try: 4 | from .catboost_model import CatBoostModel 5 | except ModuleNotFoundError: 6 | CatBoostModel = None 7 | print("ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)") 8 | try: 9 | from .double_ensemble import DEnsembleModel 10 | from .gbdt import LGBModel 11 | except ModuleNotFoundError: 12 | DEnsembleModel, LGBModel = None, None 13 | print( 14 | "ModuleNotFoundError. DEnsembleModel and LGBModel are skipped. (optional: maybe installing lightgbm can fix it.)" 15 | ) 16 | try: 17 | from .xgboost import XGBModel 18 | except ModuleNotFoundError: 19 | XGBModel = None 20 | print("ModuleNotFoundError. XGBModel is skipped(optional: maybe installing xgboost can fix it).") 21 | try: 22 | from .linear import LinearModel 23 | except ModuleNotFoundError: 24 | LinearModel = None 25 | print("ModuleNotFoundError. LinearModel is skipped(optional: maybe installing scipy and sklearn can fix it).") 26 | # import pytorch models 27 | try: 28 | from .pytorch_add import ADD 29 | from .pytorch_alstm import ALSTM 30 | from .pytorch_gats import GATs 31 | from .pytorch_gru import GRU 32 | from .pytorch_lstm import LSTM 33 | from .pytorch_nn import DNNModelPytorch 34 | from .pytorch_sfm import SFM_Model 35 | from .pytorch_tabnet import TabnetModel 36 | from .pytorch_tcn import TCN 37 | 38 | pytorch_classes = (ALSTM, GATs, GRU, LSTM, DNNModelPytorch, TabnetModel, SFM_Model, TCN, ADD) 39 | except ModuleNotFoundError: 40 | pytorch_classes = () 41 | print("ModuleNotFoundError. PyTorch models are skipped (optional: maybe installing pytorch can fix it).") 42 | 43 | all_model_classes = (CatBoostModel, DEnsembleModel, LGBModel, XGBModel, LinearModel) + pytorch_classes 44 | -------------------------------------------------------------------------------- /examples/highfreq/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This folder contains 2 examples 3 | - A high-frequency dataset example 4 | - An example of predicting the price trend in high-frequency data 5 | 6 | ## High-Frequency Dataset 7 | 8 | This dataset is an example for RL high frequency trading. 9 | 10 | ### Get High-Frequency Data 11 | 12 | Get high-frequency data by running the following command: 13 | ```bash 14 | python workflow.py get_data 15 | ``` 16 | 17 | ### Dump & Reload & Reinitialize the Dataset 18 | 19 | 20 | The High-Frequency Dataset is implemented as `qlib.data.dataset.DatasetH` in the `workflow.py`. `DatatsetH` is the subclass of [`qlib.utils.serial.Serializable`](https://qlib.readthedocs.io/en/latest/advanced/serial.html), whose state can be dumped in or loaded from disk in `pickle` format. 21 | 22 | ### About Reinitialization 23 | 24 | After reloading `Dataset` from disk, `Qlib` also support reinitializing the dataset. It means that users can reset some states of `Dataset` or `DataHandler` such as `instruments`, `start_time`, `end_time` and `segments`, etc., and generate new data according to the states. 25 | 26 | The example is given in `workflow.py`, users can run the code as follows. 27 | 28 | ### Run the Code 29 | 30 | Run the example by running the following command: 31 | ```bash 32 | python workflow.py dump_and_load_dataset 33 | ``` 34 | 35 | ## Benchmarks Performance (predicting the price trend in high-frequency data) 36 | 37 | Here are the results of models for predicting the price trend in high-frequency data. We will keep updating benchmark models in future. 38 | 39 | | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Long precision| Short Precision | Long-Short Average Return | Long-Short Average Sharpe | 40 | |---|---|---|---|---|---|---|---|---|---| 41 | | LightGBM | Alpha158 | 0.0349±0.00 | 0.3805±0.00| 0.0435±0.00 | 0.4724±0.00 | 0.5111±0.00 | 0.5428±0.00 | 0.000074±0.00 | 0.2677±0.00 | 42 | -------------------------------------------------------------------------------- /tests/dataset_tests/test_datalayer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from qlib.data import D 6 | from qlib.tests import TestAutoData 7 | 8 | 9 | class TestDataset(TestAutoData): 10 | def testCSI300(self): 11 | close_p = D.features(D.instruments("csi300"), ["$close"]) 12 | size = close_p.groupby("datetime", group_keys=False).size() 13 | cnt = close_p.groupby("datetime", group_keys=False).count()["$close"] 14 | size_desc = size.describe(percentiles=np.arange(0.1, 1.0, 0.1)) 15 | cnt_desc = cnt.describe(percentiles=np.arange(0.1, 1.0, 0.1)) 16 | 17 | print(size_desc) 18 | print(cnt_desc) 19 | 20 | self.assertLessEqual(size_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks") 21 | self.assertGreaterEqual(size_desc.loc["80%"], 290, "Insufficient number of CSI300 constituent stocks") 22 | 23 | self.assertLessEqual(cnt_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks") 24 | # FIXME: Due to the low quality of data. Hard to make sure there are enough data 25 | # self.assertEqual(cnt_desc.loc["80%"], 300, "Insufficient number of CSI300 constituent stocks") 26 | 27 | def testClose(self): 28 | close_p = D.features(D.instruments("csi300"), ["Ref($close, 1)/$close - 1"]) 29 | close_desc = close_p.describe(percentiles=np.arange(0.1, 1.0, 0.1)) 30 | print(close_desc) 31 | self.assertLessEqual(abs(close_desc.loc["90%"][0]), 0.1, "Close value is abnormal") 32 | self.assertLessEqual(abs(close_desc.loc["10%"][0]), 0.1, "Close value is abnormal") 33 | # FIXME: The yahoo data is not perfect. We have to 34 | # self.assertLessEqual(abs(close_desc.loc["max"][0]), 0.2, "Close value is abnormal") 35 | # self.assertGreaterEqual(close_desc.loc["min"][0], -0.2, "Close value is abnormal") 36 | 37 | 38 | if __name__ == "__main__": 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /examples/portfolio/prepare_riskdata.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import os 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from qlib.data import D 9 | from qlib.model.riskmodel import StructuredCovEstimator 10 | 11 | 12 | def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"): 13 | universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index() 14 | 15 | price_all = ( 16 | D.features(D.instruments("all"), ["$close"], start_time=start_time).squeeze().unstack(level="instrument") 17 | ) 18 | 19 | # StructuredCovEstimator is a statistical risk model 20 | riskmodel = StructuredCovEstimator() 21 | 22 | for i in range(T - 1, len(price_all)): 23 | date = price_all.index[i] 24 | ref_date = price_all.index[i - T + 1] 25 | 26 | print(date) 27 | 28 | codes = universe.loc[date].index 29 | price = price_all.loc[ref_date:date, codes] 30 | 31 | # calculate return and remove extreme return 32 | ret = price.pct_change() 33 | ret.clip(ret.quantile(0.025), ret.quantile(0.975), axis=1, inplace=True) 34 | 35 | # run risk model 36 | F, cov_b, var_u = riskmodel.predict(ret, is_price=False, return_decomposed_components=True) 37 | 38 | # save risk data 39 | root = riskdata_root + "/" + date.strftime("%Y%m%d") 40 | os.makedirs(root, exist_ok=True) 41 | 42 | pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl") 43 | pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl") 44 | # for specific_risk we follow the convention to save volatility 45 | pd.Series(np.sqrt(var_u), index=codes).to_pickle(root + "/specific_risk.pkl") 46 | 47 | 48 | if __name__ == "__main__": 49 | import qlib 50 | 51 | qlib.init(provider_uri="~/.qlib/qlib_data/cn_data") 52 | 53 | prepare_data() 54 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/backtest_ppo.yml: -------------------------------------------------------------------------------- 1 | order_file: ./data/orders/test_orders.pkl 2 | start_time: "9:30" 3 | end_time: "14:54" 4 | data_granularity: "5min" 5 | qlib: 6 | provider_uri_5min: ./data/bin/ 7 | exchange: 8 | limit_threshold: null 9 | deal_price: ["$close", "$close"] 10 | volume_threshold: null 11 | strategies: 12 | 1day: 13 | class: SAOEIntStrategy 14 | kwargs: 15 | data_granularity: 5 16 | action_interpreter: 17 | class: CategoricalActionInterpreter 18 | kwargs: 19 | max_step: 8 20 | values: 4 21 | module_path: qlib.rl.order_execution.interpreter 22 | network: 23 | class: Recurrent 24 | kwargs: {} 25 | module_path: qlib.rl.order_execution.network 26 | policy: 27 | class: PPO # PPO, DQN 28 | kwargs: 29 | lr: 0.0001 30 | # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use. 31 | # weight_file: outputs/ppo/checkpoints/latest.pth 32 | module_path: qlib.rl.order_execution.policy 33 | state_interpreter: 34 | class: FullHistoryStateInterpreter 35 | kwargs: 36 | data_dim: 5 37 | data_ticks: 48 38 | max_step: 8 39 | processed_data_provider: 40 | class: HandlerProcessedDataProvider 41 | kwargs: 42 | data_dir: ./data/pickle/ 43 | feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"] 44 | feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"] 45 | module_path: qlib.rl.data.native 46 | module_path: qlib.rl.order_execution.interpreter 47 | module_path: qlib.rl.order_execution.strategy 48 | 30min: 49 | class: TWAPStrategy 50 | kwargs: {} 51 | module_path: qlib.contrib.strategy.rule_strategy 52 | concurrency: 16 53 | output_dir: outputs/ppo/ 54 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/backtest_opds.yml: -------------------------------------------------------------------------------- 1 | order_file: ./data/orders/test_orders.pkl 2 | start_time: "9:30" 3 | end_time: "14:54" 4 | data_granularity: "5min" 5 | qlib: 6 | provider_uri_5min: ./data/bin/ 7 | exchange: 8 | limit_threshold: null 9 | deal_price: ["$close", "$close"] 10 | volume_threshold: null 11 | strategies: 12 | 1day: 13 | class: SAOEIntStrategy 14 | kwargs: 15 | data_granularity: 5 16 | action_interpreter: 17 | class: CategoricalActionInterpreter 18 | kwargs: 19 | max_step: 8 20 | values: 4 21 | module_path: qlib.rl.order_execution.interpreter 22 | network: 23 | class: Recurrent 24 | kwargs: {} 25 | module_path: qlib.rl.order_execution.network 26 | policy: 27 | class: PPO # PPO, DQN 28 | kwargs: 29 | lr: 0.0001 30 | # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use. 31 | # weight_file: outputs/opds/checkpoints/latest.pth 32 | module_path: qlib.rl.order_execution.policy 33 | state_interpreter: 34 | class: FullHistoryStateInterpreter 35 | kwargs: 36 | data_dim: 5 37 | data_ticks: 48 38 | max_step: 8 39 | processed_data_provider: 40 | class: HandlerProcessedDataProvider 41 | kwargs: 42 | data_dir: ./data/pickle/ 43 | feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"] 44 | feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"] 45 | module_path: qlib.rl.data.native 46 | module_path: qlib.rl.order_execution.interpreter 47 | module_path: qlib.rl.order_execution.strategy 48 | 30min: 49 | class: TWAPStrategy 50 | kwargs: {} 51 | module_path: qlib.contrib.strategy.rule_strategy 52 | concurrency: 16 53 | output_dir: outputs/opds/ 54 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/train_opds.yml: -------------------------------------------------------------------------------- 1 | simulator: 2 | data_granularity: 5 3 | time_per_step: 30 4 | vol_limit: null 5 | env: 6 | concurrency: 32 7 | parallel_mode: dummy 8 | action_interpreter: 9 | class: CategoricalActionInterpreter 10 | kwargs: 11 | values: 4 12 | max_step: 8 13 | module_path: qlib.rl.order_execution.interpreter 14 | state_interpreter: 15 | class: FullHistoryStateInterpreter 16 | kwargs: 17 | data_dim: 5 18 | data_ticks: 48 # 48 = 240 min / 5 min 19 | max_step: 8 20 | processed_data_provider: 21 | class: HandlerProcessedDataProvider 22 | kwargs: 23 | data_dir: ./data/pickle/ 24 | feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"] 25 | feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"] 26 | backtest: false 27 | module_path: qlib.rl.data.native 28 | module_path: qlib.rl.order_execution.interpreter 29 | reward: 30 | class: PAPenaltyReward 31 | kwargs: 32 | penalty: 4.0 33 | scale: 0.01 34 | module_path: qlib.rl.order_execution.reward 35 | data: 36 | source: 37 | order_dir: ./data/orders 38 | feature_root_dir: ./data/pickle/ 39 | feature_columns_today: ["$close0", "$volume0"] 40 | feature_columns_yesterday: [] 41 | total_time: 240 42 | default_start_time_index: 0 43 | default_end_time_index: 235 44 | proc_data_dim: 5 45 | num_workers: 0 46 | queue_size: 20 47 | network: 48 | class: Recurrent 49 | module_path: qlib.rl.order_execution.network 50 | policy: 51 | class: PPO # PPO, DQN 52 | kwargs: 53 | lr: 0.0001 54 | module_path: qlib.rl.order_execution.policy 55 | runtime: 56 | seed: 42 57 | use_cuda: false 58 | trainer: 59 | max_epoch: 500 60 | repeat_per_collect: 25 61 | earlystop_patience: 50 62 | episode_per_collect: 10000 63 | batch_size: 1024 64 | val_every_n_epoch: 4 65 | checkpoint_path: ./outputs/opds 66 | checkpoint_every_n_iters: 1 67 | -------------------------------------------------------------------------------- /examples/rl_order_execution/scripts/gen_pickle_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import argparse 5 | import os 6 | import shutil 7 | from copy import deepcopy 8 | 9 | import yaml 10 | 11 | from qlib.contrib.data.highfreq_provider import HighFreqProvider 12 | 13 | loader = yaml.FullLoader 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("-c", "--config", type=str, default="config.yml") 18 | parser.add_argument("-d", "--dest", type=str, default=".") 19 | parser.add_argument("-s", "--split", type=str, choices=["none", "date", "stock", "both"], default="stock") 20 | args = parser.parse_args() 21 | 22 | conf = yaml.load(open(args.config), Loader=loader) 23 | 24 | for k, v in conf.items(): 25 | if isinstance(v, dict) and "path" in v: 26 | v["path"] = os.path.join(args.dest, v["path"]) 27 | provider = HighFreqProvider(**conf) 28 | 29 | # Gen dataframe 30 | if "feature_conf" in conf: 31 | feature = provider._gen_dataframe(deepcopy(provider.feature_conf)) 32 | if "backtest_conf" in conf: 33 | backtest = provider._gen_dataframe(deepcopy(provider.backtest_conf)) 34 | 35 | provider.feature_conf["path"] = os.path.splitext(provider.feature_conf["path"])[0] + "/" 36 | provider.backtest_conf["path"] = os.path.splitext(provider.backtest_conf["path"])[0] + "/" 37 | # Split by date 38 | if args.split == "date" or args.split == "both": 39 | provider._gen_day_dataset(deepcopy(provider.feature_conf), "feature") 40 | provider._gen_day_dataset(deepcopy(provider.backtest_conf), "backtest") 41 | 42 | # Split by stock 43 | if args.split == "stock" or args.split == "both": 44 | provider._gen_stock_dataset(deepcopy(provider.feature_conf), "feature") 45 | provider._gen_stock_dataset(deepcopy(provider.backtest_conf), "backtest") 46 | 47 | shutil.rmtree("stat/", ignore_errors=True) 48 | -------------------------------------------------------------------------------- /qlib/model/meta/task.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from qlib.data.dataset import Dataset 5 | 6 | from ...utils import init_instance_by_config 7 | 8 | 9 | class MetaTask: 10 | """ 11 | A single meta-task, a meta-dataset contains a list of them. 12 | It serves as a component as in MetaDatasetDS 13 | 14 | The data processing is different 15 | 16 | - the processed input may be different between training and testing 17 | 18 | - When training, the X, y, X_test, y_test in training tasks are necessary (# PROC_MODE_FULL #) 19 | but not necessary in test tasks. (# PROC_MODE_TEST #) 20 | - When the metamodel can be transferred into other dataset, only meta_info is necessary (# PROC_MODE_TRANSFER #) 21 | """ 22 | 23 | PROC_MODE_FULL = "full" 24 | PROC_MODE_TEST = "test" 25 | PROC_MODE_TRANSFER = "transfer" 26 | 27 | def __init__(self, task: dict, meta_info: object, mode: str = PROC_MODE_FULL): 28 | """ 29 | The `__init__` func is responsible for 30 | 31 | - store the task 32 | - store the origin input data for 33 | - process the input data for meta data 34 | 35 | Parameters 36 | ---------- 37 | task : dict 38 | the task to be enhanced by metamodel 39 | 40 | meta_info : object 41 | the input for metamodel 42 | """ 43 | self.task = task 44 | self.meta_info = meta_info # the original meta input information, it will be processed later 45 | self.mode = mode 46 | 47 | def get_dataset(self) -> Dataset: 48 | return init_instance_by_config(self.task["dataset"], accept_types=Dataset) 49 | 50 | def get_meta_input(self) -> object: 51 | """ 52 | Return the **processed** meta_info 53 | """ 54 | return self.meta_info 55 | 56 | def __repr__(self): 57 | return f"MetaTask(task={self.task}, meta_info={self.meta_info})" 58 | -------------------------------------------------------------------------------- /qlib/rl/data/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | from __future__ import annotations 4 | 5 | from abc import abstractmethod 6 | 7 | import pandas as pd 8 | 9 | 10 | class BaseIntradayBacktestData: 11 | """ 12 | Raw market data that is often used in backtesting (thus called BacktestData). 13 | 14 | Base class for all types of backtest data. Currently, each type of simulator has its corresponding backtest 15 | data type. 16 | """ 17 | 18 | @abstractmethod 19 | def __repr__(self) -> str: 20 | raise NotImplementedError 21 | 22 | @abstractmethod 23 | def __len__(self) -> int: 24 | raise NotImplementedError 25 | 26 | @abstractmethod 27 | def get_deal_price(self) -> pd.Series: 28 | raise NotImplementedError 29 | 30 | @abstractmethod 31 | def get_volume(self) -> pd.Series: 32 | raise NotImplementedError 33 | 34 | @abstractmethod 35 | def get_time_index(self) -> pd.DatetimeIndex: 36 | raise NotImplementedError 37 | 38 | 39 | class BaseIntradayProcessedData: 40 | """Processed market data after data cleanup and feature engineering. 41 | 42 | It contains both processed data for "today" and "yesterday", as some algorithms 43 | might use the market information of the previous day to assist decision making. 44 | """ 45 | 46 | today: pd.DataFrame 47 | """Processed data for "today". 48 | Number of records must be ``time_length``, and columns must be ``feature_dim``.""" 49 | 50 | yesterday: pd.DataFrame 51 | """Processed data for "yesterday". 52 | Number of records must be ``time_length``, and columns must be ``feature_dim``.""" 53 | 54 | 55 | class ProcessedDataProvider: 56 | """Provider of processed data""" 57 | 58 | def get_data( 59 | self, 60 | stock_id: str, 61 | date: pd.Timestamp, 62 | feature_dim: int, 63 | time_index: pd.Index, 64 | ) -> BaseIntradayProcessedData: 65 | raise NotImplementedError 66 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/train_ppo.yml: -------------------------------------------------------------------------------- 1 | simulator: 2 | data_granularity: 5 3 | time_per_step: 30 4 | vol_limit: null 5 | env: 6 | concurrency: 32 7 | parallel_mode: dummy 8 | action_interpreter: 9 | class: CategoricalActionInterpreter 10 | kwargs: 11 | values: 4 12 | max_step: 8 13 | module_path: qlib.rl.order_execution.interpreter 14 | state_interpreter: 15 | class: FullHistoryStateInterpreter 16 | kwargs: 17 | data_dim: 5 18 | data_ticks: 48 # 48 = 240 min / 5 min 19 | max_step: 8 20 | processed_data_provider: 21 | class: HandlerProcessedDataProvider 22 | kwargs: 23 | data_dir: ./data/pickle/ 24 | feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"] 25 | feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"] 26 | backtest: false 27 | module_path: qlib.rl.data.native 28 | module_path: qlib.rl.order_execution.interpreter 29 | reward: 30 | class: PPOReward 31 | kwargs: 32 | max_step: 8 33 | start_time_index: 0 34 | end_time_index: 46 # 46 = (240 - 5) min / 5 min - 1 35 | module_path: qlib.rl.order_execution.reward 36 | data: 37 | source: 38 | order_dir: ./data/orders 39 | feature_root_dir: ./data/pickle/ 40 | feature_columns_today: ["$close0", "$volume0"] 41 | feature_columns_yesterday: [] 42 | total_time: 240 43 | default_start_time_index: 0 44 | default_end_time_index: 235 45 | proc_data_dim: 5 46 | num_workers: 0 47 | queue_size: 20 48 | network: 49 | class: Recurrent 50 | module_path: qlib.rl.order_execution.network 51 | policy: 52 | class: PPO # PPO, DQN 53 | kwargs: 54 | lr: 0.0001 55 | module_path: qlib.rl.order_execution.policy 56 | runtime: 57 | seed: 42 58 | use_cuda: false 59 | trainer: 60 | max_epoch: 500 61 | repeat_per_collect: 25 62 | earlystop_patience: 50 63 | episode_per_collect: 10000 64 | batch_size: 1024 65 | val_every_n_epoch: 4 66 | checkpoint_path: ./outputs/ppo 67 | checkpoint_every_n_iters: 1 68 | -------------------------------------------------------------------------------- /tests/test_register_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from qlib.data import D 9 | from qlib.data.ops import ElemOperator, PairOperator 10 | from qlib.tests import TestAutoData 11 | 12 | 13 | class Diff(ElemOperator): 14 | """Feature First Difference 15 | Parameters 16 | ---------- 17 | feature : Expression 18 | feature instance 19 | Returns 20 | ---------- 21 | Expression 22 | a feature instance with first difference 23 | """ 24 | 25 | def _load_internal(self, instrument, start_index, end_index, freq): 26 | series = self.feature.load(instrument, start_index, end_index, freq) 27 | return series.diff() 28 | 29 | def get_extended_window_size(self): 30 | lft_etd, rght_etd = self.feature.get_extended_window_size() 31 | return lft_etd + 1, rght_etd 32 | 33 | 34 | class Distance(PairOperator): 35 | """Feature Distance 36 | Parameters 37 | ---------- 38 | feature : Expression 39 | feature instance 40 | Returns 41 | ---------- 42 | Expression 43 | a feature instance with distance 44 | """ 45 | 46 | def _load_internal(self, instrument, start_index, end_index, freq): 47 | series_left = self.feature_left.load(instrument, start_index, end_index, freq) 48 | series_right = self.feature_right.load(instrument, start_index, end_index, freq) 49 | return np.abs(series_left - series_right) 50 | 51 | 52 | class TestRegiterCustomOps(TestAutoData): 53 | @classmethod 54 | def setUpClass(cls) -> None: 55 | cls._setup_kwargs.update({"custom_ops": [Diff, Distance]}) 56 | super().setUpClass() 57 | 58 | def test_regiter_custom_ops(self): 59 | instruments = ["SH600000"] 60 | fields = ["Diff($close)", "Distance($close, Ref($close, 1))"] 61 | print(D.features(instruments, fields, start_time="2010-01-01", end_time="2017-12-31", freq="day")) 62 | 63 | 64 | if __name__ == "__main__": 65 | unittest.main() 66 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | sys: 2 | rel_path: . 3 | qlib_init: 4 | provider_uri: "~/.qlib/qlib_data/cn_data" 5 | region: cn 6 | market: &market csi300 7 | benchmark: &benchmark SH000300 8 | data_handler_config: &data_handler_config 9 | start_time: 2008-01-01 10 | end_time: 2020-08-01 11 | fit_start_time: 2008-01-01 12 | fit_end_time: 2014-12-31 13 | instruments: *market 14 | port_analysis_config: &port_analysis_config 15 | strategy: 16 | class: TopkDropoutStrategy 17 | module_path: qlib.contrib.strategy 18 | kwargs: 19 | signal: 20 | topk: 50 21 | n_drop: 5 22 | backtest: 23 | start_time: 2017-01-01 24 | end_time: 2020-08-01 25 | account: 100000000 26 | benchmark: *benchmark 27 | exchange_kwargs: 28 | limit_threshold: 0.095 29 | deal_price: close 30 | open_cost: 0.0005 31 | close_cost: 0.0015 32 | min_cost: 5 33 | task: 34 | model: 35 | class: TFTModel 36 | module_path: tft 37 | dataset: 38 | class: DatasetH 39 | module_path: qlib.data.dataset 40 | kwargs: 41 | handler: 42 | class: Alpha158 43 | module_path: qlib.contrib.data.handler 44 | kwargs: *data_handler_config 45 | segments: 46 | train: [2008-01-01, 2014-12-31] 47 | valid: [2015-01-01, 2016-12-31] 48 | test: [2017-01-01, 2020-08-01] 49 | record: 50 | - class: SignalRecord 51 | module_path: qlib.workflow.record_temp 52 | kwargs: 53 | model: 54 | dataset: 55 | - class: SigAnaRecord 56 | module_path: qlib.workflow.record_temp 57 | kwargs: 58 | ana_long_short: False 59 | ann_scaler: 252 60 | - class: PortAnaRecord 61 | module_path: qlib.workflow.record_temp 62 | kwargs: 63 | config: *port_analysis_config 64 | -------------------------------------------------------------------------------- /examples/hyperparameter/LightGBM/hyperparameter_158.py: -------------------------------------------------------------------------------- 1 | import optuna 2 | 3 | import qlib 4 | from qlib.constant import REG_CN 5 | from qlib.tests.config import CSI300_DATASET_CONFIG 6 | from qlib.tests.data import GetData 7 | from qlib.utils import init_instance_by_config 8 | 9 | 10 | def objective(trial): 11 | task = { 12 | "model": { 13 | "class": "LGBModel", 14 | "module_path": "qlib.contrib.model.gbdt", 15 | "kwargs": { 16 | "loss": "mse", 17 | "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), 18 | "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), 19 | "subsample": trial.suggest_uniform("subsample", 0, 1), 20 | "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), 21 | "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), 22 | "max_depth": 10, 23 | "num_leaves": trial.suggest_int("num_leaves", 1, 1024), 24 | "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), 25 | "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), 26 | "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), 27 | "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), 28 | "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), 29 | }, 30 | }, 31 | } 32 | evals_result = dict() 33 | model = init_instance_by_config(task["model"]) 34 | model.fit(dataset, evals_result=evals_result) 35 | return min(evals_result["valid"]) 36 | 37 | 38 | if __name__ == "__main__": 39 | provider_uri = "~/.qlib/qlib_data/cn_data" 40 | GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True) 41 | qlib.init(provider_uri=provider_uri, region="cn") 42 | 43 | dataset = init_instance_by_config(CSI300_DATASET_CONFIG) 44 | 45 | study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3") 46 | study.optimize(objective, n_jobs=6) 47 | -------------------------------------------------------------------------------- /scripts/collect_info.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import sys 3 | from pathlib import Path 4 | 5 | import fire 6 | import pkg_resources 7 | 8 | import qlib 9 | 10 | QLIB_PATH = Path(__file__).absolute().resolve().parent.parent 11 | 12 | 13 | class InfoCollector: 14 | """ 15 | User could collect system info by following commands 16 | `cd scripts && python collect_info.py all` 17 | - NOTE: please avoid running this script in the project folder which contains `qlib` 18 | """ 19 | 20 | def sys(self): 21 | """collect system related info""" 22 | for method in ["system", "machine", "platform", "version"]: 23 | print(getattr(platform, method)()) 24 | 25 | def py(self): 26 | """collect Python related info""" 27 | print("Python version: {}".format(sys.version.replace("\n", " "))) 28 | 29 | def qlib(self): 30 | """collect qlib related info""" 31 | print("Qlib version: {}".format(qlib.__version__)) 32 | REQUIRED = [ 33 | "numpy", 34 | "pandas", 35 | "scipy", 36 | "requests", 37 | "sacred", 38 | "python-socketio", 39 | "redis", 40 | "python-redis-lock", 41 | "schedule", 42 | "cvxpy", 43 | "hyperopt", 44 | "fire", 45 | "statsmodels", 46 | "xlrd", 47 | "plotly", 48 | "matplotlib", 49 | "tables", 50 | "pyyaml", 51 | "mlflow", 52 | "tqdm", 53 | "loguru", 54 | "lightgbm", 55 | "tornado", 56 | "joblib", 57 | "fire", 58 | "ruamel.yaml", 59 | ] 60 | 61 | for package in REQUIRED: 62 | version = pkg_resources.get_distribution(package).version 63 | print(f"{package}=={version}") 64 | 65 | def all(self): 66 | """collect all info""" 67 | for method in ["sys", "py", "qlib"]: 68 | getattr(self, method)() 69 | print() 70 | 71 | 72 | if __name__ == "__main__": 73 | fire.Fire(InfoCollector) 74 | -------------------------------------------------------------------------------- /examples/data_demo/data_cache_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | The motivation of this demo 5 | - To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing 6 | """ 7 | 8 | from copy import deepcopy 9 | from pathlib import Path 10 | import pickle 11 | from pprint import pprint 12 | from ruamel.yaml import YAML 13 | import subprocess 14 | from qlib.log import TimeInspector 15 | 16 | from qlib import init 17 | from qlib.data.dataset.handler import DataHandlerLP 18 | from qlib.utils import init_instance_by_config 19 | 20 | # For general purpose, we use relative path 21 | DIRNAME = Path(__file__).absolute().resolve().parent 22 | 23 | if __name__ == "__main__": 24 | init() 25 | 26 | config_path = DIRNAME.parent / "benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml" 27 | 28 | # 1) show original time 29 | with TimeInspector.logt("The original time without handler cache:"): 30 | subprocess.run(f"qrun {config_path}", shell=True) 31 | 32 | # 2) dump handler 33 | yaml = YAML(typ="safe", pure=True) 34 | task_config = yaml.load(config_path.open()) 35 | hd_conf = task_config["task"]["dataset"]["kwargs"]["handler"] 36 | pprint(hd_conf) 37 | hd: DataHandlerLP = init_instance_by_config(hd_conf) 38 | hd_path = DIRNAME / "handler.pkl" 39 | hd.to_pickle(hd_path, dump_all=True) 40 | 41 | # 3) create new task with handler cache 42 | new_task_config = deepcopy(task_config) 43 | new_task_config["task"]["dataset"]["kwargs"]["handler"] = f"file://{hd_path}" 44 | new_task_config["sys"] = {"path": [str(config_path.parent.resolve())]} 45 | new_task_path = DIRNAME / "new_task.yaml" 46 | print("The location of the new task", new_task_path) 47 | 48 | # save new task 49 | with new_task_path.open("w") as f: 50 | yaml.safe_dump(new_task_config, f, indent=4, sort_keys=False) 51 | 52 | # 4) train model with new task 53 | with TimeInspector.logt("The time for task with handler cache:"): 54 | subprocess.run(f"qrun {new_task_path}", shell=True) 55 | -------------------------------------------------------------------------------- /tests/misc/test_sepdf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import unittest 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from qlib.contrib.data.utils.sepdf import SepDataFrame 9 | 10 | 11 | class SepDF(unittest.TestCase): 12 | def to_str(self, obj): 13 | return "".join(str(obj).split()) 14 | 15 | def test_index_data(self): 16 | np.random.seed(42) 17 | 18 | index = [ 19 | np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]), 20 | np.array(["one", "two", "one", "two", "one", "two", "one", "two"]), 21 | ] 22 | 23 | cols = [ 24 | np.repeat(np.array(["g1", "g2"]), 2), 25 | np.arange(4), 26 | ] 27 | df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=cols) 28 | sdf = SepDataFrame(df_dict={"g2": df["g2"]}, join=None) 29 | sdf[("g2", 4)] = 3 30 | sdf["g1"] = df["g1"] 31 | exp = """ 32 | {'g2': 2 3 4 33 | bar one 0.647689 1.523030 3 34 | two 1.579213 0.767435 3 35 | baz one -0.463418 -0.465730 3 36 | two -1.724918 -0.562288 3 37 | foo one -0.908024 -1.412304 3 38 | two 0.067528 -1.424748 3 39 | qux one -1.150994 0.375698 3 40 | two -0.601707 1.852278 3, 'g1': 0 1 41 | bar one 0.496714 -0.138264 42 | two -0.234153 -0.234137 43 | baz one -0.469474 0.542560 44 | two 0.241962 -1.913280 45 | foo one -1.012831 0.314247 46 | two 1.465649 -0.225776 47 | qux one -0.544383 0.110923 48 | two -0.600639 -0.291694} 49 | """ 50 | self.assertEqual(self.to_str(sdf._df_dict), self.to_str(exp)) 51 | 52 | del df["g1"] 53 | del df["g2"] 54 | # it will not raise error, and df will be an empty dataframe 55 | 56 | del sdf["g1"] 57 | del sdf["g2"] 58 | # sdf should support deleting all the columns 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | Due to the non-stationary nature of the environment of the financial market, the data distribution may change in different periods, which makes the performance of models build on training data decays in the future test data. 3 | So adapting the forecasting models/strategies to market dynamics is very important to the model/strategies' performance. 4 | 5 | The table below shows the performances of different solutions on different forecasting models. 6 | 7 | ## Alpha158 Dataset 8 | Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases 9 | ```bash 10 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz 11 | mkdir -p ~/.qlib/qlib_data/cn_data 12 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2 13 | rm -f qlib_bin.tar.gz 14 | ``` 15 | 16 | | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | 17 | |------------------|---------|------|------|---------|-----------|-------------------|-------------------|--------------| 18 | | RR[Linear] |Alpha158 |0.0945|0.5989|0.1069 |0.6495 |0.0857 |1.3682 |-0.0986 | 19 | | DDG-DA[Linear] |Alpha158 |0.0983|0.6157|0.1108 |0.6646 |0.0764 |1.1904 |-0.0769 | 20 | | RR[LightGBM] |Alpha158 |0.0816|0.5887|0.0912 |0.6263 |0.0771 |1.3196 |-0.0909 | 21 | | DDG-DA[LightGBM] |Alpha158 |0.0878|0.6185|0.0975 |0.6524 |0.1261 |2.0096 |-0.0744 | 22 | 23 | - The label horizon of the `Alpha158` dataset is set to 20. 24 | - The rolling time intervals are set to 20 trading days. 25 | - The test rolling periods are from January 2017 to August 2020. 26 | - The results are based on the crowd-sourced version. The Yahoo version of qlib data does not contain `VWAP`, so all related factors are missing and filled with 0, which leads to a rank-deficient matrix (a matrix does not have full rank) and makes lower-level optimization of DDG-DA can not be solved. 27 | -------------------------------------------------------------------------------- /examples/orderbook_data/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This example tries to demonstrate how Qlib supports data without fixed shared frequency. 4 | 5 | For example, 6 | - Daily prices volume data are fixed-frequency data. The data comes in a fixed frequency (i.e. daily) 7 | - Orders are not fixed data and they may come at any time point 8 | 9 | To support such non-fixed-frequency, Qlib implements an Arctic-based backend. 10 | Here is an example to import and query data based on this backend. 11 | 12 | # Installation 13 | 14 | Please refer to [the installation docs](https://docs.mongodb.com/manual/installation/) of mongodb. 15 | Current version of script with default value tries to connect localhost **via default port without authentication**. 16 | 17 | Run following command to install necessary libraries 18 | ``` 19 | pip install pytest coverage gdown 20 | pip install arctic # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied. 21 | ``` 22 | 23 | # Importing example data 24 | 25 | 26 | 1. (Optional) Please follow the first part of [this section](https://github.com/microsoft/qlib#data-preparation) to **get 1min data** of Qlib. 27 | 2. Please follow following steps to download example data 28 | ```bash 29 | cd examples/orderbook_data/ 30 | gdown https://drive.google.com/uc?id=15nZF7tFT_eKVZAcMFL1qPS4jGyJflH7e # Proxies may be necessary here. 31 | python ../../scripts/get_data.py _unzip --file_path highfreq_orderbook_example_data.zip --target_dir . 32 | ``` 33 | 34 | 3. Please import the example data to your mongo db 35 | ```bash 36 | python create_dataset.py initialize_library # Initialization Libraries 37 | python create_dataset.py import_data # Initialization Libraries 38 | ``` 39 | 40 | # Query Examples 41 | 42 | After importing these data, you run `example.py` to create some high-frequency features. 43 | ```bash 44 | pytest -s --disable-warnings example.py # If you want run all examples 45 | pytest -s --disable-warnings example.py::TestClass::test_exp_10 # If you want to run specific example 46 | ``` 47 | 48 | 49 | # Known limitations 50 | Expression computing between different frequencies are not supported yet 51 | -------------------------------------------------------------------------------- /examples/online_srv/update_online_pred.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | This example shows how OnlineTool works when we need update prediction. 6 | There are two parts including first_train and update_online_pred. 7 | Firstly, we will finish the training and set the trained models to the `online` models. 8 | Next, we will finish updating online predictions. 9 | """ 10 | import copy 11 | 12 | import fire 13 | 14 | import qlib 15 | from qlib.constant import REG_CN 16 | from qlib.model.trainer import task_train 17 | from qlib.tests.config import CSI300_GBDT_TASK 18 | from qlib.workflow.online.utils import OnlineToolR 19 | 20 | task = copy.deepcopy(CSI300_GBDT_TASK) 21 | 22 | task["record"] = { 23 | "class": "SignalRecord", 24 | "module_path": "qlib.workflow.record_temp", 25 | } 26 | 27 | 28 | class UpdatePredExample: 29 | def __init__( 30 | self, provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_name="online_srv", task_config=task 31 | ): 32 | qlib.init(provider_uri=provider_uri, region=region) 33 | self.experiment_name = experiment_name 34 | self.online_tool = OnlineToolR(self.experiment_name) 35 | self.task_config = task_config 36 | 37 | def first_train(self): 38 | rec = task_train(self.task_config, experiment_name=self.experiment_name) 39 | self.online_tool.reset_online_tag(rec) # set to online model 40 | 41 | def update_online_pred(self): 42 | self.online_tool.update_online_pred() 43 | 44 | def main(self): 45 | self.first_train() 46 | self.update_online_pred() 47 | 48 | 49 | if __name__ == "__main__": 50 | ## to train a model and set it to online model, use the command below 51 | # python update_online_pred.py first_train 52 | ## to update online predictions once a day, use the command below 53 | # python update_online_pred.py update_online_pred 54 | ## to see the whole process with your own parameters, use the command below 55 | # python update_online_pred.py main --experiment_name="your_exp_name" 56 | fire.Fire(UpdatePredExample) 57 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | 2 | - [Download Qlib Data](#Download-Qlib-Data) 3 | - [Download CN Data](#Download-CN-Data) 4 | - [Download US Data](#Download-US-Data) 5 | - [Download CN Simple Data](#Download-CN-Simple-Data) 6 | - [Help](#Help) 7 | - [Using in Qlib](#Using-in-Qlib) 8 | - [US data](#US-data) 9 | - [CN data](#CN-data) 10 | 11 | 12 | ## Download Qlib Data 13 | 14 | 15 | ### Download CN Data 16 | 17 | ```bash 18 | # daily data 19 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn 20 | 21 | # 1min data (Optional for running non-high-frequency strategies) 22 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min 23 | ``` 24 | 25 | ### Download US Data 26 | 27 | 28 | ```bash 29 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us 30 | ``` 31 | 32 | ### Download CN Simple Data 33 | 34 | ```bash 35 | python get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --region cn 36 | ``` 37 | 38 | ### Help 39 | 40 | ```bash 41 | python get_data.py qlib_data --help 42 | ``` 43 | 44 | ## Using in Qlib 45 | > For more information: https://qlib.readthedocs.io/en/latest/start/initialization.html 46 | 47 | 48 | ### US data 49 | 50 | > Need to download data first: [Download US Data](#Download-US-Data) 51 | 52 | ```python 53 | import qlib 54 | from qlib.config import REG_US 55 | provider_uri = "~/.qlib/qlib_data/us_data" # target_dir 56 | qlib.init(provider_uri=provider_uri, region=REG_US) 57 | ``` 58 | 59 | ### CN data 60 | 61 | > Need to download data first: [Download CN Data](#Download-CN-Data) 62 | 63 | ```python 64 | import qlib 65 | from qlib.constant import REG_CN 66 | 67 | provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir 68 | qlib.init(provider_uri=provider_uri, region=REG_CN) 69 | ``` 70 | 71 | ## Use Crowd Sourced Data 72 | The is also a [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases 73 | ```bash 74 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz 75 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2 76 | ``` 77 | -------------------------------------------------------------------------------- /qlib/data/storage/arctic_storage/base.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if sys.version_info >= (3, 9): 4 | from zoneinfo import ZoneInfo # noqa 5 | else: 6 | from backports.zoneinfo import ZoneInfo # noqa 7 | from qlib.log import get_module_logger 8 | from arctic import Arctic 9 | from arctic.auth import Credential 10 | from arctic.hooks import register_get_auth_hook 11 | 12 | try: 13 | from vnpy.trader.database import SETTINGS 14 | except ImportError: 15 | SETTINGS = {} 16 | 17 | 18 | logger = get_module_logger("arctic_storage") 19 | 20 | 21 | def db_symbol_to_qlib(db_symbol: str) -> str: 22 | """convert db_symbol to qlib symbol 23 | 24 | Parameters 25 | ---------- 26 | db_symbol : str 27 | db_symbol 28 | 29 | Returns 30 | ------- 31 | str 32 | qlib symbol 33 | """ 34 | code, exch = db_symbol.split("_") 35 | exch = "SH" if exch == "SSE" else "SZ" 36 | return f"{exch}{code}" 37 | 38 | 39 | def qlib_symbol_to_db(qlib_symbol: str) -> str: 40 | """convert db_symbol to qlib symbol 41 | 42 | Parameters 43 | ---------- 44 | qlib_symbol : str 45 | qlib style symbol 46 | 47 | Returns 48 | ------- 49 | str 50 | qlib symbol 51 | """ 52 | exch = "SSE" if qlib_symbol[:2].lower() == "sh" else "SZSE" 53 | return f"{qlib_symbol[2:]}_{exch}" 54 | 55 | 56 | def arctic_auth_hook(*_): 57 | if bool(SETTINGS.get("database.password", "")) and bool(SETTINGS.get("database.user", "")): 58 | return Credential( 59 | database="admin", 60 | user=SETTINGS["database.user"], 61 | password=SETTINGS["database.password"], 62 | ) 63 | return None 64 | 65 | 66 | register_get_auth_hook(arctic_auth_hook) 67 | 68 | 69 | class ArcticStorageMixin: 70 | """ArcticStorageMixin, applicable to ArcticXXXStorage 71 | Subclasses need 72 | """ 73 | 74 | def _get_arctic_store(self): 75 | """get arctic store""" 76 | if not hasattr(self, "arctic_store"): 77 | self.arctic_store = Arctic( 78 | SETTINGS["database.host"], tz_aware=True, tzinfo=ZoneInfo(SETTINGS["database.timezone"]) 79 | ) 80 | return self.arctic_store 81 | -------------------------------------------------------------------------------- /examples/hyperparameter/LightGBM/hyperparameter_360.py: -------------------------------------------------------------------------------- 1 | import optuna 2 | 3 | import qlib 4 | from qlib.constant import REG_CN 5 | from qlib.tests.config import CSI300_MARKET, DATASET_ALPHA360_CLASS, get_dataset_config 6 | from qlib.tests.data import GetData 7 | from qlib.utils import init_instance_by_config 8 | 9 | DATASET_CONFIG = get_dataset_config(market=CSI300_MARKET, dataset_class=DATASET_ALPHA360_CLASS) 10 | 11 | 12 | def objective(trial): 13 | task = { 14 | "model": { 15 | "class": "LGBModel", 16 | "module_path": "qlib.contrib.model.gbdt", 17 | "kwargs": { 18 | "loss": "mse", 19 | "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), 20 | "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), 21 | "subsample": trial.suggest_uniform("subsample", 0, 1), 22 | "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), 23 | "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), 24 | "max_depth": 10, 25 | "num_leaves": trial.suggest_int("num_leaves", 1, 1024), 26 | "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), 27 | "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), 28 | "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), 29 | "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), 30 | "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), 31 | }, 32 | }, 33 | } 34 | 35 | evals_result = dict() 36 | model = init_instance_by_config(task["model"]) 37 | model.fit(dataset, evals_result=evals_result) 38 | return min(evals_result["valid"]) 39 | 40 | 41 | if __name__ == "__main__": 42 | provider_uri = "~/.qlib/qlib_data/cn_data" 43 | GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True) 44 | qlib.init(provider_uri=provider_uri, region=REG_CN) 45 | 46 | dataset = init_instance_by_config(DATASET_CONFIG) 47 | 48 | study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3") 49 | study.optimize(objective, n_jobs=6) 50 | -------------------------------------------------------------------------------- /qlib/typehint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """Commonly used types.""" 5 | 6 | import sys 7 | from pathlib import Path 8 | from typing import Union 9 | 10 | __all__ = ["Literal", "TypedDict", "final", "InstConf"] 11 | 12 | if sys.version_info >= (3, 8): 13 | from typing import Literal, TypedDict, final # type: ignore # pylint: disable=no-name-in-module 14 | else: 15 | from typing_extensions import Literal, TypedDict, final 16 | 17 | 18 | class InstDictConf(TypedDict): 19 | """ 20 | InstDictConf is a Dict-based config to describe an instance 21 | 22 | case 1) 23 | { 24 | 'class': 'ClassName', 25 | 'kwargs': dict, # It is optional. {} will be used if not given 26 | 'model_path': path, # It is optional if module is given in the class 27 | } 28 | case 2) 29 | { 30 | 'class': , 31 | 'kwargs': dict, # It is optional. {} will be used if not given 32 | } 33 | """ 34 | 35 | # class: str # because class is a keyword of Python. We have to comment it 36 | kwargs: dict # It is optional. {} will be used if not given 37 | module_path: str # It is optional if module is given in the class 38 | 39 | 40 | InstConf = Union[InstDictConf, str, object, Path] 41 | """ 42 | InstConf is a type to describe an instance; it will be passed into init_instance_by_config for Qlib 43 | 44 | config : Union[str, dict, object, Path] 45 | 46 | InstDictConf example. 47 | please refer to the docs of InstDictConf 48 | 49 | str example. 50 | 1) specify a pickle object 51 | - path like 'file:////obj.pkl' 52 | 2) specify a class name 53 | - "ClassName": getattr(module, "ClassName")() will be used. 54 | 3) specify module path with class name 55 | - "a.b.c.ClassName" getattr(, "ClassName")() will be used. 56 | 57 | object example: 58 | instance of accept_types 59 | 60 | Path example: 61 | specify a pickle object 62 | - it will be treated like 'file:////obj.pkl' 63 | """ 64 | -------------------------------------------------------------------------------- /examples/rl_order_execution/scripts/gen_training_orders.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from pathlib import Path 9 | 10 | DATA_PATH = Path(os.path.join("data", "pickle", "backtest")) 11 | OUTPUT_PATH = Path(os.path.join("data", "orders")) 12 | 13 | 14 | def generate_order(stock: str, start_idx: int, end_idx: int) -> bool: 15 | dataset = pd.read_pickle(DATA_PATH / f"{stock}.pkl") 16 | df = dataset.handler.fetch(level=None).reset_index() 17 | if len(df) == 0 or df.isnull().values.any() or min(df["$volume0"]) < 1e-5: 18 | return False 19 | 20 | df["date"] = df["datetime"].dt.date.astype("datetime64") 21 | df = df.set_index(["instrument", "datetime", "date"]) 22 | df = df.groupby("date", group_keys=False).take(range(start_idx, end_idx)).droplevel(level=0) 23 | 24 | order_all = pd.DataFrame(df.groupby(level=(2, 0), group_keys=False).mean().dropna()) 25 | order_all["amount"] = np.random.lognormal(-3.28, 1.14) * order_all["$volume0"] 26 | order_all = order_all[order_all["amount"] > 0.0] 27 | order_all["order_type"] = 0 28 | order_all = order_all.drop(columns=["$volume0"]) 29 | 30 | order_train = order_all[order_all.index.get_level_values(0) <= pd.Timestamp("2021-06-30")] 31 | order_test = order_all[order_all.index.get_level_values(0) > pd.Timestamp("2021-06-30")] 32 | order_valid = order_test[order_test.index.get_level_values(0) <= pd.Timestamp("2021-09-30")] 33 | order_test = order_test[order_test.index.get_level_values(0) > pd.Timestamp("2021-09-30")] 34 | 35 | for order, tag in zip((order_train, order_valid, order_test, order_all), ("train", "valid", "test", "all")): 36 | path = OUTPUT_PATH / tag 37 | os.makedirs(path, exist_ok=True) 38 | if len(order) > 0: 39 | order.to_pickle(path / f"{stock}.pkl.target") 40 | return True 41 | 42 | 43 | np.random.seed(1234) 44 | file_list = sorted(os.listdir(DATA_PATH)) 45 | stocks = [f.replace(".pkl", "") for f in file_list] 46 | np.random.shuffle(stocks) 47 | 48 | cnt = 0 49 | for stock in stocks: 50 | if generate_order(stock, 0, 240 // 5 - 1): 51 | cnt += 1 52 | if cnt == 100: 53 | break 54 | -------------------------------------------------------------------------------- /qlib/contrib/model/svm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import Text, Union, cast 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from sklearn.svm import SVR 9 | 10 | from qlib.data.dataset.weight import Reweighter 11 | 12 | from ...data.dataset import DatasetH 13 | from ...data.dataset.handler import DataHandlerLP 14 | from ...model.base import Model 15 | 16 | 17 | class SVMRegression(Model): 18 | """SVM Regression Model""" 19 | 20 | def __init__( 21 | self, 22 | kernel="rbf", 23 | degree=3, 24 | gamma="scale", 25 | coef0=0.0, 26 | tol=1e-3, 27 | C=1.0, 28 | epsilon=0.1, 29 | shrinking=True, 30 | cache_size=200, 31 | verbose=False, 32 | max_iter=-1, 33 | ): 34 | self.predictor = SVR( 35 | kernel=kernel, 36 | degree=degree, 37 | gamma=gamma, 38 | coef0=coef0, 39 | tol=tol, 40 | C=C, 41 | epsilon=epsilon, 42 | shrinking=shrinking, 43 | cache_size=cache_size, 44 | verbose=verbose, 45 | max_iter=max_iter, 46 | ) 47 | self.factor_names_ = None 48 | 49 | def fit(self, dataset: DatasetH, reweighter: Reweighter = None): 50 | df_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) 51 | if df_train.empty: 52 | raise ValueError("Empty data from dataset, please check your dataset config.") 53 | X, y = df_train["feature"].values, np.squeeze(df_train["label"].values) 54 | w = None if reweighter is None else cast(pd.Series, reweighter.reweight(df_train)).value 55 | self.factor_names_ = df_train["feature"].columns 56 | self.predictor.fit(X, y, w) 57 | return self 58 | 59 | def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"): 60 | if self.predictor.fit_status_ != 0: 61 | raise ValueError("model is not fitted yet!") 62 | x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I) 63 | x_test = x_test[self.factor_names_] 64 | return pd.Series(self.predictor.predict(x_test), index=x_test.index) 65 | -------------------------------------------------------------------------------- /docs/component/online.rst: -------------------------------------------------------------------------------- 1 | .. _online_serving: 2 | 3 | ============== 4 | Online Serving 5 | ============== 6 | .. currentmodule:: qlib 7 | 8 | 9 | Introduction 10 | ============ 11 | 12 | .. image:: ../_static/img/online_serving.png 13 | :align: center 14 | 15 | 16 | In addition to backtesting, one way to test a model is effective is to make predictions in real market conditions or even do real trading based on those predictions. 17 | ``Online Serving`` is a set of modules for online models using the latest data, 18 | which including `Online Manager <#Online Manager>`_, `Online Strategy <#Online Strategy>`_, `Online Tool <#Online Tool>`_, `Updater <#Updater>`_. 19 | 20 | `Here `_ are several examples for reference, which demonstrate different features of ``Online Serving``. 21 | If you have many models or `task` needs to be managed, please consider `Task Management <../advanced/task_management.html>`_. 22 | The `examples `_ are based on some components in `Task Management <../advanced/task_management.html>`_ such as ``TrainerRM`` or ``Collector``. 23 | 24 | **NOTE**: User should keep his data source updated to support online serving. For example, Qlib provides `a batch of scripts `_ to help users update Yahoo daily data. 25 | 26 | Known limitations currently 27 | - Currently, the daily updating prediction for the next trading day is supported. But generating orders for the next trading day is not supported due to the `limitations of public data _` 28 | 29 | 30 | Online Manager 31 | ============== 32 | 33 | .. automodule:: qlib.workflow.online.manager 34 | :members: 35 | :noindex: 36 | 37 | Online Strategy 38 | =============== 39 | 40 | .. automodule:: qlib.workflow.online.strategy 41 | :members: 42 | :noindex: 43 | 44 | Online Tool 45 | =========== 46 | 47 | .. automodule:: qlib.workflow.online.utils 48 | :members: 49 | :noindex: 50 | 51 | Updater 52 | ======= 53 | 54 | .. automodule:: qlib.workflow.online.update 55 | :members: 56 | :noindex: 57 | -------------------------------------------------------------------------------- /examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: XGBModel 34 | module_path: qlib.contrib.model.xgboost 35 | kwargs: 36 | eval_metric: rmse 37 | colsample_bytree: 0.8879 38 | eta: 0.0421 39 | max_depth: 8 40 | n_estimators: 647 41 | subsample: 0.8789 42 | nthread: 20 43 | dataset: 44 | class: DatasetH 45 | module_path: qlib.data.dataset 46 | kwargs: 47 | handler: 48 | class: Alpha158 49 | module_path: qlib.contrib.data.handler 50 | kwargs: *data_handler_config 51 | segments: 52 | train: [2008-01-01, 2014-12-31] 53 | valid: [2015-01-01, 2016-12-31] 54 | test: [2017-01-01, 2020-08-01] 55 | record: 56 | - class: SignalRecord 57 | module_path: qlib.workflow.record_temp 58 | kwargs: 59 | model: 60 | dataset: 61 | - class: SigAnaRecord 62 | module_path: qlib.workflow.record_temp 63 | kwargs: 64 | ana_long_short: False 65 | ann_scaler: 252 66 | - class: PortAnaRecord 67 | module_path: qlib.workflow.record_temp 68 | kwargs: 69 | config: *port_analysis_config 70 | -------------------------------------------------------------------------------- /qlib/contrib/data/data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # We remove arctic from core framework of Qlib to contrib due to 5 | # - Arctic has very strict limitation on pandas and numpy version 6 | # - https://github.com/man-group/arctic/pull/908 7 | # - pip fail to computing the right version number!!!! 8 | # - Maybe we can solve this problem by poetry 9 | 10 | import pandas as pd 11 | import pymongo 12 | 13 | # FIXME: So if you want to use arctic-based provider, please install arctic manually 14 | # `pip install arctic` may not be enough. 15 | from arctic import Arctic 16 | 17 | from qlib.data.data import FeatureProvider 18 | 19 | 20 | class ArcticFeatureProvider(FeatureProvider): 21 | def __init__( 22 | self, uri="127.0.0.1", retry_time=0, market_transaction_time_list=[("09:15", "11:30"), ("13:00", "15:00")] 23 | ): 24 | super().__init__() 25 | self.uri = uri 26 | # TODO: 27 | # retry connecting if error occurs 28 | # does it real matters? 29 | self.retry_time = retry_time 30 | # NOTE: this is especially important for TResample operator 31 | self.market_transaction_time_list = market_transaction_time_list 32 | 33 | def feature(self, instrument, field, start_index, end_index, freq): 34 | field = str(field)[1:] 35 | with pymongo.MongoClient(self.uri) as client: 36 | # TODO: this will result in frequently connecting the server and performance issue 37 | arctic = Arctic(client) 38 | 39 | if freq not in arctic.list_libraries(): 40 | raise ValueError("lib {} not in arctic".format(freq)) 41 | 42 | if instrument not in arctic[freq].list_symbols(): 43 | # instruments does not exist 44 | return pd.Series() 45 | else: 46 | df = arctic[freq].read(instrument, columns=[field], chunk_range=(start_index, end_index)) 47 | s = df[field] 48 | 49 | if not s.empty: 50 | s = pd.concat( 51 | [ 52 | s.between_time(time_tuple[0], time_tuple[1]) 53 | for time_tuple in self.market_transaction_time_list 54 | ] 55 | ) 56 | return s 57 | -------------------------------------------------------------------------------- /docs/advanced/serial.rst: -------------------------------------------------------------------------------- 1 | .. _serial: 2 | 3 | ============= 4 | Serialization 5 | ============= 6 | .. currentmodule:: qlib 7 | 8 | Introduction 9 | ============ 10 | ``Qlib`` supports dumping the state of ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc. into a disk and reloading them. 11 | 12 | Serializable Class 13 | ================== 14 | 15 | ``Qlib`` provides a base class ``qlib.utils.serial.Serializable``, whose state can be dumped into or loaded from disk in `pickle` format. 16 | When users dump the state of a ``Serializable`` instance, the attributes of the instance whose name **does not** start with `_` will be saved on the disk. 17 | However, users can use ``config`` method or override ``default_dump_all`` attribute to prevent this feature. 18 | 19 | Users can also override ``pickle_backend`` attribute to choose a pickle backend. The supported value is "pickle" (default and common) and "dill" (dump more things such as function, more information in `here `_). 20 | 21 | Example 22 | ======= 23 | ``Qlib``'s serializable class includes ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc., which are subclass of ``qlib.utils.serial.Serializable``. 24 | Specifically, ``qlib.data.dataset.DatasetH`` is one of them. Users can serialize ``DatasetH`` as follows. 25 | 26 | .. code-block:: Python 27 | 28 | ##=============dump dataset============= 29 | dataset.to_pickle(path="dataset.pkl") # dataset is an instance of qlib.data.dataset.DatasetH 30 | 31 | ##=============reload dataset============= 32 | with open("dataset.pkl", "rb") as file_dataset: 33 | dataset = pickle.load(file_dataset) 34 | 35 | .. note:: 36 | Only state of ``DatasetH`` should be saved on the disk, such as some `mean` and `variance` used for data normalization, etc. 37 | 38 | After reloading the ``DatasetH``, users need to reinitialize it. It means that users can reset some states of ``DatasetH`` or ``QlibDataHandler`` such as `instruments`, `start_time`, `end_time` and `segments`, etc., and generate new data according to the states (data is not state and should not be saved on the disk). 39 | 40 | A more detailed example is in this `link `_. 41 | 42 | 43 | API 44 | === 45 | Please refer to `Serializable API <../reference/api.html#module-qlib.utils.serial.Serializable>`_. 46 | -------------------------------------------------------------------------------- /examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data_1min" 3 | region: cn 4 | market: &market 'csi300' 5 | start_time: &start_time "2020-09-15 00:00:00" 6 | end_time: &end_time "2021-01-18 16:00:00" 7 | train_end_time: &train_end_time "2020-11-15 16:00:00" 8 | valid_start_time: &valid_start_time "2020-11-16 00:00:00" 9 | valid_end_time: &valid_end_time "2020-11-30 16:00:00" 10 | test_start_time: &test_start_time "2020-12-01 00:00:00" 11 | data_handler_config: &data_handler_config 12 | start_time: *start_time 13 | end_time: *end_time 14 | fit_start_time: *start_time 15 | fit_end_time: *train_end_time 16 | instruments: *market 17 | freq: '1min' 18 | infer_processors: 19 | - class: 'RobustZScoreNorm' 20 | kwargs: 21 | fields_group: 'feature' 22 | clip_outlier: false 23 | - class: "Fillna" 24 | kwargs: 25 | fields_group: 'feature' 26 | learn_processors: 27 | - class: 'DropnaLabel' 28 | - class: 'CSRankNorm' 29 | kwargs: 30 | fields_group: 'label' 31 | label: ["Ref($close, -2) / Ref($close, -1) - 1"] 32 | 33 | task: 34 | model: 35 | class: "HFLGBModel" 36 | module_path: "qlib.contrib.model.highfreq_gdbt_model" 37 | kwargs: 38 | objective: 'binary' 39 | metric: ['binary_logloss','auc'] 40 | verbosity: -1 41 | learning_rate: 0.01 42 | max_depth: 8 43 | num_leaves: 150 44 | lambda_l1: 1.5 45 | lambda_l2: 1 46 | num_threads: 20 47 | dataset: 48 | class: "DatasetH" 49 | module_path: "qlib.data.dataset" 50 | kwargs: 51 | handler: 52 | class: "Alpha158" 53 | module_path: "qlib.contrib.data.handler" 54 | kwargs: *data_handler_config 55 | segments: 56 | train: [*start_time, *train_end_time] 57 | valid: [*train_end_time, *valid_end_time] 58 | test: [*test_start_time, *end_time] 59 | record: 60 | - class: "SignalRecord" 61 | module_path: "qlib.workflow.record_temp" 62 | kwargs: {} 63 | - class: "HFSignalRecord" 64 | module_path: "qlib.workflow.record_temp" 65 | kwargs: {} -------------------------------------------------------------------------------- /examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: CatBoostModel 34 | module_path: qlib.contrib.model.catboost_model 35 | kwargs: 36 | loss: RMSE 37 | learning_rate: 0.0421 38 | subsample: 0.8789 39 | max_depth: 6 40 | num_leaves: 100 41 | thread_count: 20 42 | grow_policy: Lossguide 43 | bootstrap_type: Poisson 44 | dataset: 45 | class: DatasetH 46 | module_path: qlib.data.dataset 47 | kwargs: 48 | handler: 49 | class: Alpha158 50 | module_path: qlib.contrib.data.handler 51 | kwargs: *data_handler_config 52 | segments: 53 | train: [2008-01-01, 2014-12-31] 54 | valid: [2015-01-01, 2016-12-31] 55 | test: [2017-01-01, 2020-08-01] 56 | record: 57 | - class: SignalRecord 58 | module_path: qlib.workflow.record_temp 59 | kwargs: 60 | model: 61 | dataset: 62 | - class: SigAnaRecord 63 | module_path: qlib.workflow.record_temp 64 | kwargs: 65 | ana_long_short: False 66 | ann_scaler: 252 67 | - class: PortAnaRecord 68 | module_path: qlib.workflow.record_temp 69 | kwargs: 70 | config: *port_analysis_config 71 | -------------------------------------------------------------------------------- /examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi500 5 | benchmark: &benchmark SH000905 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: CatBoostModel 34 | module_path: qlib.contrib.model.catboost_model 35 | kwargs: 36 | loss: RMSE 37 | learning_rate: 0.0421 38 | subsample: 0.8789 39 | max_depth: 6 40 | num_leaves: 100 41 | thread_count: 20 42 | grow_policy: Lossguide 43 | bootstrap_type: Poisson 44 | dataset: 45 | class: DatasetH 46 | module_path: qlib.data.dataset 47 | kwargs: 48 | handler: 49 | class: Alpha158 50 | module_path: qlib.contrib.data.handler 51 | kwargs: *data_handler_config 52 | segments: 53 | train: [2008-01-01, 2014-12-31] 54 | valid: [2015-01-01, 2016-12-31] 55 | test: [2017-01-01, 2020-08-01] 56 | record: 57 | - class: SignalRecord 58 | module_path: qlib.workflow.record_temp 59 | kwargs: 60 | model: 61 | dataset: 62 | - class: SigAnaRecord 63 | module_path: qlib.workflow.record_temp 64 | kwargs: 65 | ana_long_short: False 66 | ann_scaler: 252 67 | - class: PortAnaRecord 68 | module_path: qlib.workflow.record_temp 69 | kwargs: 70 | config: *port_analysis_config 71 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: LGBModel 34 | module_path: qlib.contrib.model.gbdt 35 | kwargs: 36 | loss: mse 37 | colsample_bytree: 0.8879 38 | learning_rate: 0.2 39 | subsample: 0.8789 40 | lambda_l1: 205.6999 41 | lambda_l2: 580.9768 42 | max_depth: 8 43 | num_leaves: 210 44 | num_threads: 20 45 | dataset: 46 | class: DatasetH 47 | module_path: qlib.data.dataset 48 | kwargs: 49 | handler: 50 | class: Alpha158 51 | module_path: qlib.contrib.data.handler 52 | kwargs: *data_handler_config 53 | segments: 54 | train: [2008-01-01, 2014-12-31] 55 | valid: [2015-01-01, 2016-12-31] 56 | test: [2017-01-01, 2020-08-01] 57 | record: 58 | - class: SignalRecord 59 | module_path: qlib.workflow.record_temp 60 | kwargs: 61 | model: 62 | dataset: 63 | - class: SigAnaRecord 64 | module_path: qlib.workflow.record_temp 65 | kwargs: 66 | ana_long_short: False 67 | ann_scaler: 252 68 | - class: PortAnaRecord 69 | module_path: qlib.workflow.record_temp 70 | kwargs: 71 | config: *port_analysis_config 72 | --------------------------------------------------------------------------------