├── CHANGELOG.md ├── cli ├── __init__.py └── data.py ├── contrib ├── __init__.py ├── eva │ └── __init__.py ├── ops │ └── __init__.py ├── data │ ├── __init__.py │ ├── utils │ │ └── __init__.py │ └── data.py ├── tuner │ ├── __init__.py │ ├── space.py │ └── launcher.py ├── report │ ├── data │ │ ├── __init__.py │ │ └── base.py │ ├── analysis_model │ │ └── __init__.py │ ├── __init__.py │ └── analysis_position │ │ └── __init__.py ├── meta │ ├── __init__.py │ └── data_selection │ │ └── __init__.py ├── workflow │ └── __init__.py ├── strategy │ ├── optimizer │ │ ├── __init__.py │ │ └── base.py │ └── __init__.py ├── rolling │ ├── __init__.py │ └── __main__.py ├── online │ ├── __init__.py │ └── online_model.py ├── torch.py └── model │ ├── pytorch_utils.py │ └── __init__.py ├── model ├── ens │ └── __init__.py ├── interpret │ ├── __init__.py │ └── base.py ├── __init__.py ├── meta │ ├── __init__.py │ └── task.py ├── riskmodel │ └── __init__.py └── utils.py ├── rl ├── contrib │ ├── __init__.py │ └── utils.py ├── strategy │ ├── __init__.py │ └── single_order.py ├── data │ ├── __init__.py │ └── base.py ├── __init__.py ├── seed.py ├── trainer │ └── __init__.py ├── utils │ └── __init__.py ├── order_execution │ ├── __init__.py │ └── utils.py └── aux_info.py ├── workflow ├── online │ └── __init__.py ├── task │ └── __init__.py └── utils.py ├── examples ├── model_rolling │ └── requirements.txt ├── benchmarks │ ├── Localformer │ │ ├── README.md │ │ └── requirements.txt │ ├── MLP │ │ ├── README.md │ │ └── requirements.txt │ ├── KRNN │ │ ├── requirements.txt │ │ └── README.md │ ├── Sandwich │ │ ├── requirements.txt │ │ └── README.md │ ├── TFT │ │ ├── requirements.txt │ │ ├── libs │ │ │ └── __init__.py │ │ ├── data_formatters │ │ │ └── __init__.py │ │ ├── expt_settings │ │ │ └── __init__.py │ │ ├── README.md │ │ └── workflow_config_tft_Alpha158.yaml │ ├── LightGBM │ │ ├── requirements.txt │ │ ├── README.md │ │ ├── features_resample_N.py │ │ ├── features_sample.py │ │ ├── workflow_config_lightgbm_Alpha158.yaml │ │ └── workflow_config_lightgbm_Alpha158_csi500.yaml │ ├── Transformer │ │ ├── requirements.txt │ │ └── README.md │ ├── XGBoost │ │ ├── requirements.txt │ │ ├── README.md │ │ └── workflow_config_xgboost_Alpha158.yaml │ ├── CatBoost │ │ ├── requirements.txt │ │ ├── README.md │ │ ├── workflow_config_catboost_Alpha158.yaml │ │ └── workflow_config_catboost_Alpha158_csi500.yaml │ ├── DoubleEnsemble │ │ ├── requirements.txt │ │ └── README.md │ ├── Linear │ │ ├── requirements.txt │ │ └── workflow_config_linear_Alpha158.yaml │ ├── ADD │ │ ├── requirements.txt │ │ └── README.md │ ├── GRU │ │ ├── requirements.txt │ │ ├── csi300_gru_ts.pkl │ │ ├── model_gru_csi300.pkl │ │ └── README.md │ ├── HIST │ │ ├── requirements.txt │ │ ├── qlib_csi300_stock_index.npy │ │ └── README.md │ ├── SFM │ │ ├── requirements.txt │ │ └── README.md │ ├── TCN │ │ ├── requirements.txt │ │ └── README.md │ ├── TCTS │ │ ├── requirements.txt │ │ └── workflow.png │ ├── TabNet │ │ ├── requirements.txt │ │ └── README.md │ ├── ADARNN │ │ ├── requirements.txt │ │ └── README.md │ ├── ALSTM │ │ ├── requirements.txt │ │ └── README.md │ ├── GATs │ │ ├── requirements.txt │ │ └── README.md │ ├── IGMTF │ │ ├── requirements.txt │ │ └── README.md │ ├── LSTM │ │ ├── requirements.txt │ │ ├── csi300_lstm_ts.pkl │ │ ├── model_lstm_csi300.pkl │ │ └── README.md │ ├── TRA │ │ ├── requirements.txt │ │ ├── data │ │ │ └── README.md │ │ ├── run.sh │ │ ├── example.py │ │ └── configs │ │ │ ├── config_alstm.yaml │ │ │ ├── config_alstm_tra_init.yaml │ │ │ ├── config_transformer.yaml │ │ │ ├── config_transformer_tra_init.yaml │ │ │ ├── config_alstm_tra.yaml │ │ │ └── config_transformer_tra.yaml │ └── GeneralPtNN │ │ └── README.md ├── benchmarks_dynamic │ ├── DDG-DA │ │ ├── requirements.txt │ │ ├── Makefile │ │ ├── workflow.py │ │ └── README.md │ ├── baseline │ │ ├── README.md │ │ ├── rolling_benchmark.py │ │ └── workflow_config_lightgbm_Alpha158.yaml │ └── README.md ├── data_demo │ ├── README.md │ ├── data_cache_demo.py │ └── data_mem_resuse_demo.py ├── hyperparameter │ └── LightGBM │ │ ├── requirements.txt │ │ ├── Readme.md │ │ ├── hyperparameter_158.py │ │ └── hyperparameter_360.py ├── README.md ├── rl_order_execution │ ├── scripts │ │ ├── merge_orders.py │ │ ├── gen_pickle_data.py │ │ └── gen_training_orders.py │ └── exp_configs │ │ ├── backtest_twap.yml │ │ ├── backtest_ppo.yml │ │ ├── backtest_opds.yml │ │ ├── train_opds.yml │ │ └── train_ppo.yml ├── rolling_process_data │ ├── README.md │ └── rolling_handler.py ├── model_interpreter │ └── feature.py ├── nested_decision_execution │ └── README.md ├── portfolio │ ├── README.md │ ├── prepare_riskdata.py │ └── config_enhanced_indexing.yaml ├── highfreq │ ├── README.md │ └── workflow_config_High_Freq_Tree_Alpha158.yaml ├── orderbook_data │ └── README.md └── online_srv │ └── update_online_pred.py ├── docs ├── changelog │ └── changelog.rst ├── _static │ ├── img │ │ ├── logo │ │ │ ├── 1.png │ │ │ ├── 2.png │ │ │ ├── 3.png │ │ │ ├── yellow_bg_rec.png │ │ │ ├── yel_bg_rec+word.png │ │ │ ├── white_bg_rec+word.png │ │ │ └── yellow_bg_rec+word .png │ │ ├── change doc.gif │ │ ├── framework.png │ │ ├── topk_drop.png │ │ ├── RL_framework.png │ │ ├── rdagent_logo.png │ │ ├── QlibRL_framework.png │ │ ├── analysis │ │ │ ├── report.png │ │ │ ├── score_ic.png │ │ │ ├── rank_label_buy.png │ │ │ ├── analysis_model_IC.png │ │ │ ├── rank_label_hold.png │ │ │ ├── rank_label_sell.png │ │ │ ├── risk_analysis_bar.png │ │ │ ├── risk_analysis_std.png │ │ │ ├── analysis_model_NDQ.png │ │ │ ├── cumulative_return_buy.png │ │ │ ├── cumulative_return_hold.png │ │ │ ├── cumulative_return_sell.png │ │ │ ├── analysis_model_long_short.png │ │ │ ├── analysis_model_monthly_IC.png │ │ │ ├── risk_analysis_max_drawdown.png │ │ │ ├── analysis_model_auto_correlation.png │ │ │ ├── analysis_model_cumulative_return.png │ │ │ ├── cumulative_return_buy_minus_sell.png │ │ │ ├── risk_analysis_annualized_return.png │ │ │ └── risk_analysis_information_ratio.png │ │ ├── online_serving.png │ │ ├── qrcode │ │ │ └── gitter_qr.png │ │ └── framework-abstract.jpg │ └── demo.sh ├── requirements.txt ├── component │ ├── rl │ │ └── toctree.rst │ └── online.rst ├── Makefile ├── make.bat ├── advanced │ ├── server.rst │ └── serial.rst ├── start │ └── installation.rst └── index.rst ├── data ├── _libs │ └── __init__.py ├── storage │ └── __init__.py ├── inst_processor.py ├── dataset │ └── weight.py └── __init__.py ├── strategy └── __init__.py ├── scripts ├── data_collector │ ├── contrib │ │ ├── fill_cn_1min_data │ │ │ ├── requirements.txt │ │ │ └── README.md │ │ └── future_trading_date_collector │ │ │ ├── requirements.txt │ │ │ └── README.md │ ├── us_index │ │ ├── requirements.txt │ │ └── README.md │ ├── crypto │ │ ├── requirement.txt │ │ └── README.md │ ├── fund │ │ ├── requirements.txt │ │ └── README.md │ ├── cn_index │ │ ├── requirements.txt │ │ └── README.md │ ├── pit │ │ ├── requirements.txt │ │ └── README.md │ ├── yahoo │ │ └── requirements.txt │ ├── baostock_5min │ │ └── requirements.txt │ ├── br_index │ │ └── requirements.txt │ └── crowd_source │ │ └── README.md ├── get_data.py ├── collect_info.py └── README.md ├── tests ├── dataset_tests │ ├── README.md │ └── test_datalayer.py ├── data_mid_layer_tests │ ├── README.md │ └── test_handler.py ├── dependency_tests │ ├── README.md │ └── test_mlflow.py ├── pytest.ini ├── conftest.py ├── test_contrib_model.py ├── test_workflow.py ├── misc │ ├── test_get_multi_proc.py │ └── test_sepdf.py ├── test_get_data.py ├── test_register_ops.py └── ops │ └── test_elem_operator.py ├── MANIFEST.in ├── CODE_OF_CONDUCT.md ├── utils └── exceptions.py ├── constant.py ├── setup.py ├── Dockerfile ├── LICENSE └── typehint.py /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /contrib/eva/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /contrib/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model/ens/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /contrib/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model/interpret/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /workflow/online/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /contrib/data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/model_rolling/requirements.txt: -------------------------------------------------------------------------------- 1 | xgboost 2 | -------------------------------------------------------------------------------- /docs/changelog/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../CHANGES.rst 2 | -------------------------------------------------------------------------------- /examples/benchmarks/Localformer/README.md: -------------------------------------------------------------------------------- 1 | # Localformer 2 | -------------------------------------------------------------------------------- /contrib/tuner/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # flake8: noqa 3 | -------------------------------------------------------------------------------- /examples/benchmarks/MLP/README.md: -------------------------------------------------------------------------------- 1 | # Multi-Layer Perceptron (MLP) 2 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/DDG-DA/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.10.0 2 | -------------------------------------------------------------------------------- /examples/benchmarks/KRNN/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.23.4 2 | pandas==1.5.2 3 | -------------------------------------------------------------------------------- /examples/benchmarks/Sandwich/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.23.4 2 | pandas==1.5.2 3 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow-gpu==1.15.0 2 | pandas==1.1.0 3 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | lightgbm 4 | -------------------------------------------------------------------------------- /examples/benchmarks/Localformer/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | torch==1.2.0 -------------------------------------------------------------------------------- /examples/benchmarks/Transformer/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | torch==1.2.0 -------------------------------------------------------------------------------- /examples/benchmarks/XGBoost/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | xgboost==1.2.1 -------------------------------------------------------------------------------- /data/_libs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | -------------------------------------------------------------------------------- /docs/_static/img/logo/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/1.png -------------------------------------------------------------------------------- /docs/_static/img/logo/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/2.png -------------------------------------------------------------------------------- /docs/_static/img/logo/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/3.png -------------------------------------------------------------------------------- /examples/benchmarks/CatBoost/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | catboost==0.24.3 4 | -------------------------------------------------------------------------------- /strategy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | -------------------------------------------------------------------------------- /examples/benchmarks/DoubleEnsemble/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | lightgbm==3.1.0 -------------------------------------------------------------------------------- /examples/benchmarks/Linear/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.17.4 2 | pandas>=1.0.1 3 | scikit-learn>=0.23.1 4 | -------------------------------------------------------------------------------- /docs/_static/img/change doc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/change doc.gif -------------------------------------------------------------------------------- /docs/_static/img/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/framework.png -------------------------------------------------------------------------------- /docs/_static/img/topk_drop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/topk_drop.png -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/DDG-DA/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | clean: 4 | -rm -r *.pkl mlruns || true 5 | -------------------------------------------------------------------------------- /scripts/data_collector/contrib/fill_cn_1min_data/requirements.txt: -------------------------------------------------------------------------------- 1 | fire 2 | pandas 3 | loguru 4 | tqdm 5 | pyqlib -------------------------------------------------------------------------------- /docs/_static/img/RL_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/RL_framework.png -------------------------------------------------------------------------------- /docs/_static/img/rdagent_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/rdagent_logo.png -------------------------------------------------------------------------------- /examples/benchmarks/ADD/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/GRU/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/HIST/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 -------------------------------------------------------------------------------- /examples/benchmarks/MLP/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/SFM/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/TCN/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/TCTS/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 -------------------------------------------------------------------------------- /examples/benchmarks/TabNet/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 -------------------------------------------------------------------------------- /docs/_static/img/QlibRL_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/QlibRL_framework.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/report.png -------------------------------------------------------------------------------- /docs/_static/img/online_serving.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/online_serving.png -------------------------------------------------------------------------------- /docs/_static/img/qrcode/gitter_qr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/qrcode/gitter_qr.png -------------------------------------------------------------------------------- /examples/benchmarks/ADARNN/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/ALSTM/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/GATs/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/IGMTF/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/LSTM/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | pandas==1.1.2 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | -------------------------------------------------------------------------------- /examples/benchmarks/TCTS/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/TCTS/workflow.png -------------------------------------------------------------------------------- /scripts/data_collector/us_index/requirements.txt: -------------------------------------------------------------------------------- 1 | fire 2 | requests 3 | pandas 4 | lxml 5 | loguru 6 | fake-useragent 7 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/score_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/score_ic.png -------------------------------------------------------------------------------- /docs/_static/img/framework-abstract.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/framework-abstract.jpg -------------------------------------------------------------------------------- /docs/_static/img/logo/yellow_bg_rec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/yellow_bg_rec.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | cmake 3 | numpy 4 | scipy 5 | scikit-learn 6 | pandas 7 | tianshou 8 | sphinx_rtd_theme 9 | -------------------------------------------------------------------------------- /scripts/data_collector/crypto/requirement.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | requests 4 | numpy 5 | pandas 6 | tqdm 7 | lxml 8 | pycoingecko -------------------------------------------------------------------------------- /tests/dataset_tests/README.md: -------------------------------------------------------------------------------- 1 | # About dataset tests 2 | Tests in this folder are for testing the prepared dataset from Yahoo 3 | -------------------------------------------------------------------------------- /docs/_static/img/logo/yel_bg_rec+word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/yel_bg_rec+word.png -------------------------------------------------------------------------------- /examples/benchmarks/GRU/csi300_gru_ts.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/GRU/csi300_gru_ts.pkl -------------------------------------------------------------------------------- /examples/benchmarks/TRA/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | scikit_learn==0.23.2 4 | torch==1.7.0 5 | seaborn 6 | -------------------------------------------------------------------------------- /scripts/data_collector/contrib/future_trading_date_collector/requirements.txt: -------------------------------------------------------------------------------- 1 | baostock 2 | fire 3 | numpy 4 | pandas 5 | loguru 6 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/rank_label_buy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/rank_label_buy.png -------------------------------------------------------------------------------- /docs/_static/img/logo/white_bg_rec+word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/white_bg_rec+word.png -------------------------------------------------------------------------------- /examples/benchmarks/GRU/model_gru_csi300.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/GRU/model_gru_csi300.pkl -------------------------------------------------------------------------------- /examples/benchmarks/LSTM/csi300_lstm_ts.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/LSTM/csi300_lstm_ts.pkl -------------------------------------------------------------------------------- /examples/benchmarks/TRA/data/README.md: -------------------------------------------------------------------------------- 1 | Data Link: https://drive.google.com/drive/folders/1fMqZYSeLyrHiWmVzygeI4sw3vp5Gt8cY?usp=sharing 2 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_IC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_IC.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/rank_label_hold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/rank_label_hold.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/rank_label_sell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/rank_label_sell.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_bar.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_std.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_std.png -------------------------------------------------------------------------------- /docs/_static/img/logo/yellow_bg_rec+word .png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/yellow_bg_rec+word .png -------------------------------------------------------------------------------- /examples/benchmarks/LSTM/model_lstm_csi300.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/LSTM/model_lstm_csi300.pkl -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | exclude tests/* 2 | include qlib/* 3 | include qlib/*/* 4 | include qlib/*/*/* 5 | include qlib/*/*/*/* 6 | include qlib/*/*/*/*/* 7 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_NDQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_NDQ.png -------------------------------------------------------------------------------- /examples/data_demo/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | The examples in this folder try to demonstrate some common usage of data-related modules of Qlib 3 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/cumulative_return_buy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/cumulative_return_buy.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/cumulative_return_hold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/cumulative_return_hold.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/cumulative_return_sell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/cumulative_return_sell.png -------------------------------------------------------------------------------- /examples/benchmarks/HIST/qlib_csi300_stock_index.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/HIST/qlib_csi300_stock_index.npy -------------------------------------------------------------------------------- /examples/hyperparameter/LightGBM/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.1.2 2 | numpy==1.21.0 3 | lightgbm==3.1.0 4 | optuna==2.7.0 5 | optuna-dashboard==0.4.1 6 | -------------------------------------------------------------------------------- /scripts/data_collector/fund/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | requests 4 | numpy 5 | pandas 6 | tqdm 7 | lxml 8 | loguru 9 | yahooquery 10 | -------------------------------------------------------------------------------- /tests/data_mid_layer_tests/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | The middle layers of data, which mainly includes 3 | - Handler 4 | - processors 5 | - Datasets 6 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_long_short.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_long_short.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_monthly_IC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_monthly_IC.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_max_drawdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_max_drawdown.png -------------------------------------------------------------------------------- /scripts/data_collector/cn_index/requirements.txt: -------------------------------------------------------------------------------- 1 | baostock 2 | fire 3 | requests 4 | pandas 5 | lxml 6 | loguru 7 | tqdm 8 | yahooquery 9 | openpyxl 10 | -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_auto_correlation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_auto_correlation.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/analysis_model_cumulative_return.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_cumulative_return.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/cumulative_return_buy_minus_sell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/cumulative_return_buy_minus_sell.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_annualized_return.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_annualized_return.png -------------------------------------------------------------------------------- /docs/_static/img/analysis/risk_analysis_information_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_information_ratio.png -------------------------------------------------------------------------------- /scripts/data_collector/pit/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | tqdm 4 | requests 5 | pandas 6 | lxml 7 | loguru 8 | baostock 9 | yahooquery 10 | beautifulsoup4 11 | -------------------------------------------------------------------------------- /contrib/report/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | This module is designed to analysis data 6 | 7 | """ 8 | -------------------------------------------------------------------------------- /examples/benchmarks/ADD/README.md: -------------------------------------------------------------------------------- 1 | # ADD 2 | * Paper: [ADD: Augmented Disentanglement Distillation Framework for Improving Stock Trend Forecasting](https://arxiv.org/abs/2012.06289). 3 | 4 | -------------------------------------------------------------------------------- /scripts/data_collector/yahoo/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | requests 4 | numpy 5 | pandas 6 | tqdm 7 | lxml 8 | yahooquery 9 | joblib 10 | beautifulsoup4 11 | bs4 12 | soupsieve -------------------------------------------------------------------------------- /tests/dependency_tests/README.md: -------------------------------------------------------------------------------- 1 | Some implementations of Qlib depend on some assumptions of its dependencies. 2 | 3 | So some tests are requried to ensure that these assumptions are valid. 4 | -------------------------------------------------------------------------------- /rl/strategy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | from .single_order import SingleOrderStrategy 4 | 5 | __all__ = ["SingleOrderStrategy"] 6 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import warnings 5 | 6 | from .base import Model 7 | 8 | 9 | __all__ = ["Model", "warnings"] 10 | -------------------------------------------------------------------------------- /scripts/data_collector/baostock_5min/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | fire 3 | requests 4 | numpy 5 | pandas 6 | tqdm 7 | lxml 8 | yahooquery 9 | joblib 10 | beautifulsoup4 11 | bs4 12 | soupsieve 13 | baostock -------------------------------------------------------------------------------- /examples/benchmarks/GRU/README.md: -------------------------------------------------------------------------------- 1 | # Gated Recurrent Unit (GRU) 2 | * Paper: [Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation](https://aclanthology.org/D14-1179.pdf). 3 | -------------------------------------------------------------------------------- /examples/benchmarks/LSTM/README.md: -------------------------------------------------------------------------------- 1 | # Long Short-Term Memory (LSTM) 2 | * Paper: [Long Short-Term Memory](https://direct.mit.edu/neco/article-abstract/9/8/1735/6109/Long-Short-Term-Memory?redirectedFrom=fulltext). 3 | -------------------------------------------------------------------------------- /cli/data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import fire 5 | from qlib.tests.data import GetData 6 | 7 | 8 | if __name__ == "__main__": 9 | fire.Fire(GetData) 10 | -------------------------------------------------------------------------------- /model/meta/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .task import MetaTask 5 | from .dataset import MetaTaskDataset 6 | 7 | 8 | __all__ = ["MetaTask", "MetaTaskDataset"] 9 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | slow: marks tests as slow (deselect with '-m "not slow"') 4 | filterwarnings = 5 | ignore:.*rng.randint:DeprecationWarning 6 | ignore:.*Casting input x to numpy array:UserWarning 7 | -------------------------------------------------------------------------------- /scripts/get_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import fire 5 | from qlib.tests.data import GetData 6 | 7 | 8 | if __name__ == "__main__": 9 | fire.Fire(GetData) 10 | -------------------------------------------------------------------------------- /contrib/report/analysis_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .analysis_model_performance import model_performance_graph 5 | 6 | 7 | __all__ = ["model_performance_graph"] 8 | -------------------------------------------------------------------------------- /examples/benchmarks/TabNet/README.md: -------------------------------------------------------------------------------- 1 | # TabNet 2 | * Code: [https://github.com/dreamquark-ai/tabnet](https://github.com/dreamquark-ai/tabnet) 3 | * Paper: [TabNet: Attentive Interpretable Tabular Learning](https://arxiv.org/pdf/1908.07442.pdf). 4 | -------------------------------------------------------------------------------- /contrib/meta/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .data_selection import MetaTaskDS, MetaDatasetDS, MetaModelDS 5 | 6 | 7 | __all__ = ["MetaTaskDS", "MetaDatasetDS", "MetaModelDS"] 8 | -------------------------------------------------------------------------------- /contrib/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | from .record_temp import MultiSegRecord 4 | from .record_temp import SignalMseRecord 5 | 6 | 7 | __all__ = ["MultiSegRecord", "SignalMseRecord"] 8 | -------------------------------------------------------------------------------- /examples/benchmarks/IGMTF/README.md: -------------------------------------------------------------------------------- 1 | # IGMTF 2 | * Code: [https://github.com/Wentao-Xu/IGMTF](https://github.com/Wentao-Xu/IGMTF) 3 | * Paper: [IGMTF: An Instance-wise Graph-based Framework for 4 | Multivariate Time Series Forecasting](https://arxiv.org/abs/2109.06489). -------------------------------------------------------------------------------- /examples/benchmarks/TCN/README.md: -------------------------------------------------------------------------------- 1 | # TCN 2 | * Code: [https://github.com/locuslab/TCN](https://github.com/locuslab/TCN) 3 | * Paper: [An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling](https://arxiv.org/abs/1803.01271). 4 | 5 | -------------------------------------------------------------------------------- /contrib/meta/data_selection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .dataset import MetaDatasetDS, MetaTaskDS 5 | from .model import MetaModelDS 6 | 7 | 8 | __all__ = ["MetaDatasetDS", "MetaTaskDS", "MetaModelDS"] 9 | -------------------------------------------------------------------------------- /examples/benchmarks/XGBoost/README.md: -------------------------------------------------------------------------------- 1 | # XGBoost 2 | * Code: [https://github.com/dmlc/xgboost](https://github.com/dmlc/xgboost) 3 | * Paper: XGBoost: A Scalable Tree Boosting System. [https://dl.acm.org/doi/pdf/10.1145/2939672.2939785](https://dl.acm.org/doi/pdf/10.1145/2939672.2939785). -------------------------------------------------------------------------------- /examples/benchmarks/Transformer/README.md: -------------------------------------------------------------------------------- 1 | # Transformer 2 | * Code: [https://github.com/tensorflow/tensor2tensor](https://github.com/tensorflow/tensor2tensor) 3 | * Paper: [Attention is All you Need](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf). 4 | -------------------------------------------------------------------------------- /rl/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """Common utilities to handle ad-hoc-styled data. 5 | 6 | Most of these snippets comes from research project (paper code). 7 | Please take caution when using them in production. 8 | """ 9 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | """Ignore RL tests on non-linux platform.""" 5 | collect_ignore = [] 6 | 7 | if sys.platform != "linux": 8 | for root, dirs, files in os.walk("rl"): 9 | for file in files: 10 | collect_ignore.append(os.path.join(root, file)) 11 | -------------------------------------------------------------------------------- /data/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .storage import CalendarStorage, InstrumentStorage, FeatureStorage, CalVT, InstVT, InstKT 5 | 6 | 7 | __all__ = ["CalendarStorage", "InstrumentStorage", "FeatureStorage", "CalVT", "InstVT", "InstKT"] 8 | -------------------------------------------------------------------------------- /docs/_static/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | git clone https://github.com/microsoft/qlib.git 3 | cd qlib 4 | ls 5 | pip install pyqlib 6 | # or 7 | # pip install numpy 8 | # pip install --upgrade cython 9 | # python setup.py install 10 | cd examples 11 | ls 12 | qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml -------------------------------------------------------------------------------- /examples/benchmarks/ADARNN/README.md: -------------------------------------------------------------------------------- 1 | # AdaRNN 2 | * Code: [https://github.com/jindongwang/transferlearning/tree/master/code/deep/adarnn](https://github.com/jindongwang/transferlearning/tree/master/code/deep/adarnn) 3 | * Paper: [AdaRNN: Adaptive Learning and Forecasting for Time Series](https://arxiv.org/pdf/2108.04443.pdf). 4 | 5 | -------------------------------------------------------------------------------- /examples/benchmarks/HIST/README.md: -------------------------------------------------------------------------------- 1 | # HIST 2 | * Code: [https://github.com/Wentao-Xu/HIST](https://github.com/Wentao-Xu/HIST) 3 | * Paper: [HIST: A Graph-based Framework for Stock Trend Forecasting via Mining Concept-Oriented Shared InformationAdaRNN: Adaptive Learning and Forecasting for Time Series](https://arxiv.org/abs/2110.13716). -------------------------------------------------------------------------------- /contrib/strategy/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .base import BaseOptimizer 5 | from .optimizer import PortfolioOptimizer 6 | from .enhanced_indexing import EnhancedIndexingOptimizer 7 | 8 | 9 | __all__ = ["BaseOptimizer", "PortfolioOptimizer", "EnhancedIndexingOptimizer"] 10 | -------------------------------------------------------------------------------- /contrib/rolling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | The difference between me and the scripts in examples/benchmarks/benchmarks_dynamic 5 | - This module only focus provide a general rolling implementation. 6 | Anything specific that benchmark is placed in examples/benchmarks/benchmarks_dynamic 7 | """ 8 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Requirements 2 | 3 | Here is the minimal hardware requirements to run the `workflow_by_code` example. 4 | - Memory: 16G 5 | - Free Disk: 5G 6 | 7 | 8 | # NOTE 9 | The results will slightly vary on different OSs(the variance of annualized return will be less than 2%). 10 | The evaluation results in the `README.md` page are from Linux OS. 11 | -------------------------------------------------------------------------------- /rl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .interpreter import Interpreter, StateInterpreter, ActionInterpreter 5 | from .reward import Reward, RewardCombination 6 | from .simulator import Simulator 7 | 8 | __all__ = ["Interpreter", "StateInterpreter", "ActionInterpreter", "Reward", "RewardCombination", "Simulator"] 9 | -------------------------------------------------------------------------------- /examples/benchmarks/CatBoost/README.md: -------------------------------------------------------------------------------- 1 | # CatBoost 2 | * Code: [https://github.com/catboost/catboost](https://github.com/catboost/catboost) 3 | * Paper: CatBoost: unbiased boosting with categorical features. [https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf](https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf). -------------------------------------------------------------------------------- /docs/component/rl/toctree.rst: -------------------------------------------------------------------------------- 1 | .. _rl: 2 | 3 | ======================================================================== 4 | Reinforcement Learning in Quantitative Trading 5 | ======================================================================== 6 | 7 | .. toctree:: 8 | Guidance 9 | Overall 10 | Quick Start 11 | Framework 12 | -------------------------------------------------------------------------------- /contrib/strategy/optimizer/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import abc 5 | 6 | 7 | class BaseOptimizer(abc.ABC): 8 | """Construct portfolio with a optimization related method""" 9 | 10 | @abc.abstractmethod 11 | def __call__(self, *args, **kwargs) -> object: 12 | """Generate a optimized portfolio allocation""" 13 | -------------------------------------------------------------------------------- /rl/seed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """Defines a set of initial state definitions and state-set definitions. 5 | 6 | With single-asset order execution only, the only seed is order. 7 | """ 8 | 9 | from typing import TypeVar 10 | 11 | InitialStateType = TypeVar("InitialStateType") 12 | """Type of data that creates the simulator.""" 13 | -------------------------------------------------------------------------------- /contrib/report/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | GRAPH_NAME_LIST = [ 5 | "analysis_position.report_graph", 6 | "analysis_position.score_ic_graph", 7 | "analysis_position.cumulative_return_graph", 8 | "analysis_position.risk_analysis_graph", 9 | "analysis_position.rank_label_graph", 10 | "analysis_model.model_performance_graph", 11 | ] 12 | -------------------------------------------------------------------------------- /model/riskmodel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .base import RiskModel 5 | from .poet import POETCovEstimator 6 | from .shrink import ShrinkCovEstimator 7 | from .structured import StructuredCovEstimator 8 | 9 | 10 | __all__ = [ 11 | "RiskModel", 12 | "POETCovEstimator", 13 | "ShrinkCovEstimator", 14 | "StructuredCovEstimator", 15 | ] 16 | -------------------------------------------------------------------------------- /examples/benchmarks/KRNN/README.md: -------------------------------------------------------------------------------- 1 | # KRNN 2 | * Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py](https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py) 3 | 4 | 5 | # Introductions about the settings/configs. 6 | * Torch_geometric is used in the original model in FOST, but we didn't use it. 7 | * make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.__version__==1.12.1 8 | 9 | -------------------------------------------------------------------------------- /examples/benchmarks/ALSTM/README.md: -------------------------------------------------------------------------------- 1 | # ALSTM 2 | 3 | - ALSTM contains a temporal attentive aggregation layer based on normal LSTM. 4 | 5 | - Paper: A dual-stage attention-based recurrent neural network for time series prediction. 6 | 7 | [https://www.ijcai.org/Proceedings/2017/0366.pdf](https://www.ijcai.org/Proceedings/2017/0366.pdf) 8 | 9 | - NOTE: Current version of implementation is just a simplified version of ALSTM. It is an LSTM with attention. 10 | -------------------------------------------------------------------------------- /examples/benchmarks/Sandwich/README.md: -------------------------------------------------------------------------------- 1 | # Sandwich 2 | * Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py](https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py) 3 | 4 | 5 | # Introductions about the settings/configs. 6 | * Torch_geometric is used in the original model in FOST, but we didn't use it. 7 | make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.version==1.12.1 8 | 9 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /contrib/report/analysis_position/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .cumulative_return import cumulative_return_graph 5 | from .score_ic import score_ic_graph 6 | from .report import report_graph 7 | from .rank_label import rank_label_graph 8 | from .risk_analysis import risk_analysis_graph 9 | 10 | 11 | __all__ = ["cumulative_return_graph", "score_ic_graph", "report_graph", "rank_label_graph", "risk_analysis_graph"] 12 | -------------------------------------------------------------------------------- /examples/benchmarks/SFM/README.md: -------------------------------------------------------------------------------- 1 | # State-Frequency-Memory 2 | - State Frequency Memory (SFM) is a novel recurrent network that uses Discrete Fourier Transform to decompose the hidden states of memory cells and capture the multi-frequency trading patterns from past market data to make stock price predictions. 3 | - Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf) -------------------------------------------------------------------------------- /examples/benchmarks/GATs/README.md: -------------------------------------------------------------------------------- 1 | # GATs 2 | * Graph Attention Networks(GATs) leverage masked self-attentional layers on graph-structured data. The nodes in stacked layers have different weights and they are able to attend over their 3 | neighborhoods’ features, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront. 4 | * This code used in Qlib is implemented with PyTorch by ourselves. 5 | * Paper: Graph Attention Networks https://arxiv.org/pdf/1710.10903.pdf -------------------------------------------------------------------------------- /utils/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | # Base exception class 6 | class QlibException(Exception): 7 | pass 8 | 9 | 10 | class RecorderInitializationError(QlibException): 11 | """Error type for re-initialization when starting an experiment""" 12 | 13 | 14 | class LoadObjectError(QlibException): 15 | """Error type for Recorder when can not load object""" 16 | 17 | 18 | class ExpAlreadyExistError(Exception): 19 | """Experiment already exists""" 20 | -------------------------------------------------------------------------------- /contrib/tuner/space.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # pylint: skip-file 5 | # flake8: noqa 6 | 7 | from hyperopt import hp 8 | 9 | 10 | TopkAmountStrategySpace = { 11 | "topk": hp.choice("topk", [30, 35, 40]), 12 | "buffer_margin": hp.choice("buffer_margin", [200, 250, 300]), 13 | } 14 | 15 | QLibDataLabelSpace = { 16 | "labels": hp.choice( 17 | "labels", 18 | [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]], 19 | ) 20 | } 21 | -------------------------------------------------------------------------------- /examples/rl_order_execution/scripts/merge_orders.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import pandas as pd 4 | from tqdm import tqdm 5 | 6 | for tag in ["test", "valid"]: 7 | files = os.listdir(os.path.join("data/orders/", tag)) 8 | dfs = [] 9 | for f in tqdm(files): 10 | df = pickle.load(open(os.path.join("data/orders/", tag, f), "rb")) 11 | df = df.drop(["$close0"], axis=1) 12 | dfs.append(df) 13 | 14 | total_df = pd.concat(dfs) 15 | pickle.dump(total_df, open(os.path.join("data", "orders", f"{tag}_orders.pkl"), "wb")) 16 | -------------------------------------------------------------------------------- /rl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """Train, test, inference utilities.""" 5 | 6 | from .api import backtest, train 7 | from .callbacks import Checkpoint, EarlyStopping, MetricsWriter 8 | from .trainer import Trainer 9 | from .vessel import TrainingVessel, TrainingVesselBase 10 | 11 | __all__ = [ 12 | "Trainer", 13 | "TrainingVessel", 14 | "TrainingVesselBase", 15 | "Checkpoint", 16 | "EarlyStopping", 17 | "MetricsWriter", 18 | "train", 19 | "backtest", 20 | ] 21 | -------------------------------------------------------------------------------- /scripts/data_collector/contrib/future_trading_date_collector/README.md: -------------------------------------------------------------------------------- 1 | # Get future trading days 2 | 3 | > `D.calendar(future=True)` will be used 4 | 5 | ## Requirements 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Collector Data 12 | 13 | ```bash 14 | # parse instruments, using in qlib/instruments. 15 | python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day 16 | ``` 17 | 18 | ## Parameters 19 | 20 | - qlib_dir: qlib data directory 21 | - freq: value from [`day`, `1min`], default `day` 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /constant.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # REGION CONST 5 | from typing import TypeVar 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | REG_CN = "cn" 11 | REG_US = "us" 12 | REG_TW = "tw" 13 | 14 | # Epsilon for avoiding division by zero. 15 | EPS = 1e-12 16 | 17 | # Infinity in integer 18 | INF = int(1e18) 19 | ONE_DAY = pd.Timedelta("1day") 20 | ONE_MIN = pd.Timedelta("1min") 21 | EPS_T = pd.Timedelta("1s") # use 1 second to exclude the right interval point 22 | float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) 23 | -------------------------------------------------------------------------------- /examples/hyperparameter/LightGBM/Readme.md: -------------------------------------------------------------------------------- 1 | # LightGBM hyperparameter 2 | 3 | ## Alpha158 4 | First terminal 5 | ``` 6 | optuna create-study --study LGBM_158 --storage sqlite:///db.sqlite3 7 | optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 8 | ``` 9 | Second terminal 10 | ``` 11 | python hyperparameter_158.py 12 | ``` 13 | 14 | ## Alpha360 15 | First terminal 16 | ``` 17 | optuna create-study --study LGBM_360 --storage sqlite:///db.sqlite3 18 | optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 19 | ``` 20 | Second terminal 21 | ``` 22 | python hyperparameter_360.py 23 | ``` 24 | -------------------------------------------------------------------------------- /workflow/task/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | Task related workflow is implemented in this folder 5 | 6 | A typical task workflow 7 | 8 | | Step | Description | 9 | |-----------------------+------------------------------------------------| 10 | | TaskGen | Generating tasks. | 11 | | TaskManager(optional) | Manage generated tasks | 12 | | run task | retrieve tasks from TaskManager and run tasks. | 13 | """ 14 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/README.md: -------------------------------------------------------------------------------- 1 | # LightGBM 2 | * Code: [https://github.com/microsoft/LightGBM](https://github.com/microsoft/LightGBM) 3 | * Paper: LightGBM: A Highly Efficient Gradient Boosting 4 | Decision Tree. [https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf](https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf). 5 | 6 | 7 | # Introductions about the settings/configs. 8 | 9 | `workflow_config_lightgbm_multi_freq.yaml` 10 | - It uses data sources of different frequencies (i.e. multiple frequencies) for daily prediction. 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | from setuptools import Extension, setup 5 | 6 | 7 | NUMPY_INCLUDE = numpy.get_include() 8 | 9 | 10 | setup( 11 | ext_modules=[ 12 | Extension( 13 | "qlib.data._libs.rolling", 14 | ["qlib/data/_libs/rolling.pyx"], 15 | language="c++", 16 | include_dirs=[NUMPY_INCLUDE], 17 | ), 18 | Extension( 19 | "qlib.data._libs.expanding", 20 | ["qlib/data/_libs/expanding.pyx"], 21 | language="c++", 22 | include_dirs=[NUMPY_INCLUDE], 23 | ), 24 | ], 25 | ) 26 | -------------------------------------------------------------------------------- /rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .data_queue import DataQueue 5 | from .env_wrapper import EnvWrapper, EnvWrapperStatus 6 | from .finite_env import FiniteEnvType, vectorize_env 7 | from .log import ConsoleWriter, CsvWriter, LogBuffer, LogCollector, LogLevel, LogWriter 8 | 9 | __all__ = [ 10 | "LogLevel", 11 | "DataQueue", 12 | "EnvWrapper", 13 | "FiniteEnvType", 14 | "LogCollector", 15 | "LogWriter", 16 | "vectorize_env", 17 | "ConsoleWriter", 18 | "CsvWriter", 19 | "EnvWrapperStatus", 20 | "LogBuffer", 21 | ] 22 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/backtest_twap.yml: -------------------------------------------------------------------------------- 1 | order_file: ./data/orders/test_orders.pkl 2 | start_time: "9:30" 3 | end_time: "14:54" 4 | data_granularity: "5min" 5 | qlib: 6 | provider_uri_5min: ./data/bin/ 7 | exchange: 8 | limit_threshold: null 9 | deal_price: ["$close", "$close"] 10 | volume_threshold: null 11 | strategies: 12 | 1day: 13 | class: TWAPStrategy 14 | kwargs: {} 15 | module_path: qlib.contrib.strategy.rule_strategy 16 | 30min: 17 | class: TWAPStrategy 18 | kwargs: {} 19 | module_path: qlib.contrib.strategy.rule_strategy 20 | concurrency: 16 21 | output_dir: outputs/twap/ 22 | -------------------------------------------------------------------------------- /scripts/data_collector/cn_index/README.md: -------------------------------------------------------------------------------- 1 | # CSI300/CSI100/CSI500 History Companies Collection 2 | 3 | ## Requirements 4 | 5 | ```bash 6 | pip install -r requirements.txt 7 | ``` 8 | 9 | ## Collector Data 10 | 11 | ```bash 12 | # parse instruments, using in qlib/instruments. 13 | python collector.py --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data --method parse_instruments 14 | 15 | # parse new companies 16 | python collector.py --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data --method save_new_companies 17 | 18 | # index_name support: CSI300, CSI100, CSI500 19 | # help 20 | python collector.py --help 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /scripts/data_collector/us_index/README.md: -------------------------------------------------------------------------------- 1 | # NASDAQ100/SP500/SP400/DJIA History Companies Collection 2 | 3 | ## Requirements 4 | 5 | ```bash 6 | pip install -r requirements.txt 7 | ``` 8 | 9 | ## Collector Data 10 | 11 | ```bash 12 | # parse instruments, using in qlib/instruments. 13 | python collector.py --index_name SP500 --qlib_dir ~/.qlib/qlib_data/us_data --method parse_instruments 14 | 15 | # parse new companies 16 | python collector.py --index_name SP500 --qlib_dir ~/.qlib/qlib_data/us_data --method save_new_companies 17 | 18 | # index_name support: SP500, NASDAQ100, DJIA, SP400 19 | # help 20 | python collector.py --help 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /contrib/strategy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | from .signal_strategy import ( 6 | TopkDropoutStrategy, 7 | WeightStrategyBase, 8 | EnhancedIndexingStrategy, 9 | ) 10 | 11 | from .rule_strategy import ( 12 | TWAPStrategy, 13 | SBBStrategyBase, 14 | SBBStrategyEMA, 15 | ) 16 | 17 | from .cost_control import SoftTopkStrategy 18 | 19 | 20 | __all__ = [ 21 | "TopkDropoutStrategy", 22 | "WeightStrategyBase", 23 | "EnhancedIndexingStrategy", 24 | "TWAPStrategy", 25 | "SBBStrategyBase", 26 | "SBBStrategyEMA", 27 | "SoftTopkStrategy", 28 | ] 29 | -------------------------------------------------------------------------------- /contrib/online/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # flake8: noqa 3 | 4 | ''' 5 | TODO: 6 | 7 | - Online needs that the model have such method 8 | def get_data_with_date(self, date, **kwargs): 9 | """ 10 | Will be called in online module 11 | need to return the data that used to predict the label (score) of stocks at date. 12 | 13 | :param 14 | date: pd.Timestamp 15 | predict date 16 | :return: 17 | data: the input data that used to predict the label (score) of stocks at predict date. 18 | """ 19 | raise NotImplementedError("get_data_with_date for this model is not implemented.") 20 | 21 | ''' 22 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/features_resample_N.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import pandas as pd 5 | 6 | from qlib.data.inst_processor import InstProcessor 7 | from qlib.utils.resam import resam_calendar 8 | 9 | 10 | class ResampleNProcessor(InstProcessor): 11 | def __init__(self, target_frq: str, **kwargs): 12 | self.target_frq = target_frq 13 | 14 | def __call__(self, df: pd.DataFrame, *args, **kwargs): 15 | df.index = pd.to_datetime(df.index) 16 | res_index = resam_calendar(df.index, "1min", self.target_frq) 17 | df = df.resample(self.target_frq).last().reindex(res_index) 18 | return df 19 | -------------------------------------------------------------------------------- /model/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from torch.utils.data import Dataset 5 | 6 | 7 | class ConcatDataset(Dataset): 8 | def __init__(self, *datasets): 9 | self.datasets = datasets 10 | 11 | def __getitem__(self, i): 12 | return tuple(d[i] for d in self.datasets) 13 | 14 | def __len__(self): 15 | return min(len(d) for d in self.datasets) 16 | 17 | 18 | class IndexSampler: 19 | def __init__(self, sampler): 20 | self.sampler = sampler 21 | 22 | def __getitem__(self, i: int): 23 | return self.sampler[i], i 24 | 25 | def __len__(self): 26 | return len(self.sampler) 27 | -------------------------------------------------------------------------------- /scripts/data_collector/contrib/fill_cn_1min_data/README.md: -------------------------------------------------------------------------------- 1 | # Use 1d data to fill in the missing symbols relative to 1min 2 | 3 | 4 | ## Requirements 5 | 6 | ```bash 7 | pip install -r requirements.txt 8 | ``` 9 | 10 | ## fill 1min data 11 | 12 | ```bash 13 | python fill_cn_1min_data.py --data_1min_dir ~/.qlib/csv_data/cn_data_1min --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data 14 | ``` 15 | 16 | ## Parameters 17 | 18 | - data_1min_dir: csv data 19 | - qlib_data_1d_dir: qlib data directory 20 | - max_workers: `ThreadPoolExecutor(max_workers=max_workers)`, by default *16* 21 | - date_field_name: date field name, by default *date* 22 | - symbol_field_name: symbol field name, by default *symbol* 23 | 24 | -------------------------------------------------------------------------------- /data/inst_processor.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import json 3 | import pandas as pd 4 | 5 | 6 | class InstProcessor: 7 | @abc.abstractmethod 8 | def __call__(self, df: pd.DataFrame, instrument, *args, **kwargs): 9 | """ 10 | process the data 11 | 12 | NOTE: **The processor could change the content of `df` inplace !!!!! ** 13 | User should keep a copy of data outside 14 | 15 | Parameters 16 | ---------- 17 | df : pd.DataFrame 18 | The raw_df of handler or result from previous processor. 19 | """ 20 | 21 | def __str__(self): 22 | return f"{self.__class__.__name__}:{json.dumps(self.__dict__, sort_keys=True, default=str)}" 23 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/libs/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/baseline/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This is the framework of periodically Rolling Retrain (RR) forecasting models. RR adapts to market dynamics by utilizing the up-to-date data periodically. 4 | 5 | ## Run the Code 6 | Users can try RR by running the following command: 7 | ```bash 8 | python rolling_benchmark.py run 9 | ``` 10 | 11 | The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter. 12 | For example, users can try `LightGBM` forecasting models by running the following command: 13 | ```bash 14 | python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run 15 | 16 | ``` 17 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/data_formatters/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/expt_settings/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /contrib/rolling/__main__.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from qlib import auto_init 3 | from qlib.contrib.rolling.base import Rolling 4 | from qlib.utils.mod import find_all_classes 5 | 6 | if __name__ == "__main__": 7 | sub_commands = {} 8 | for cls in find_all_classes("qlib.contrib.rolling", Rolling): 9 | sub_commands[cls.__module__.split(".")[-1]] = cls 10 | # The sub_commands will be like 11 | # {'base': , ...} 12 | # So the you can run it with commands like command below 13 | # - `python -m qlib.contrib.rolling base --conf_path run` 14 | # - base can be replace with other module names 15 | auto_init() 16 | fire.Fire(sub_commands) 17 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python3 -msphinx 7 | SPHINXPROJ = Quantlab 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | pip install -r requirements.txt 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/features_sample.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pandas as pd 3 | 4 | from qlib.data.inst_processor import InstProcessor 5 | 6 | 7 | class Resample1minProcessor(InstProcessor): 8 | """This processor tries to resample the data. It will reasmple the data from 1min freq to day freq by selecting a specific miniute""" 9 | 10 | def __init__(self, hour: int, minute: int, **kwargs): 11 | self.hour = hour 12 | self.minute = minute 13 | 14 | def __call__(self, df: pd.DataFrame, *args, **kwargs): 15 | df.index = pd.to_datetime(df.index) 16 | df = df.loc[df.index.time == datetime.time(self.hour, self.minute)] 17 | df.index = df.index.normalize() 18 | return df 19 | -------------------------------------------------------------------------------- /scripts/data_collector/br_index/requirements.txt: -------------------------------------------------------------------------------- 1 | async-generator==1.10 2 | attrs==21.4.0 3 | certifi==2022.12.7 4 | cffi==1.15.0 5 | charset-normalizer==2.0.12 6 | cryptography==36.0.1 7 | fire==0.4.0 8 | h11==0.13.0 9 | idna==3.3 10 | loguru==0.6.0 11 | lxml==4.9.1 12 | multitasking==0.0.10 13 | numpy==1.22.2 14 | outcome==1.1.0 15 | pandas==1.4.1 16 | pycoingecko==2.2.0 17 | pycparser==2.21 18 | pyOpenSSL==22.0.0 19 | PySocks==1.7.1 20 | python-dateutil==2.8.2 21 | pytz==2021.3 22 | requests==2.27.1 23 | requests-futures==1.0.0 24 | six==1.16.0 25 | sniffio==1.2.0 26 | sortedcontainers==2.4.0 27 | termcolor==1.1.0 28 | tqdm==4.63.0 29 | trio==0.20.0 30 | trio-websocket==0.9.2 31 | urllib3==1.26.19 32 | wget==3.2 33 | wsproto==1.1.0 34 | yahooquery==2.2.15 35 | -------------------------------------------------------------------------------- /examples/rolling_process_data/README.md: -------------------------------------------------------------------------------- 1 | # Rolling Process Data 2 | 3 | This workflow is an example for `Rolling Process Data`. 4 | 5 | ## Background 6 | 7 | When rolling train the models, data also needs to be generated in the different rolling windows. When the rolling window moves, the training data will change, and the processor's learnable state (such as standard deviation, mean, etc.) will also change. 8 | 9 | In order to avoid regenerating data, this example uses the `DataHandler-based DataLoader` to load the raw features that are not related to the rolling window, and then used Processors to generate processed-features related to the rolling window. 10 | 11 | 12 | ## Run the Code 13 | 14 | Run the example by running the following command: 15 | ```bash 16 | python workflow.py rolling_process 17 | ``` -------------------------------------------------------------------------------- /tests/test_contrib_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | from qlib.contrib.model import all_model_classes 7 | 8 | 9 | class TestAllFlow(unittest.TestCase): 10 | def test_0_initialize(self): 11 | num = 0 12 | for model_class in all_model_classes: 13 | if model_class is not None: 14 | model = model_class() 15 | num += 1 16 | print("There are {:}/{:} valid models in total.".format(num, len(all_model_classes))) 17 | 18 | 19 | def suite(): 20 | _suite = unittest.TestSuite() 21 | _suite.addTest(TestAllFlow("test_0_initialize")) 22 | return _suite 23 | 24 | 25 | if __name__ == "__main__": 26 | runner = unittest.TextTestRunner() 27 | runner.run(suite()) 28 | -------------------------------------------------------------------------------- /examples/benchmarks/DoubleEnsemble/README.md: -------------------------------------------------------------------------------- 1 | # DoubleEnsemble 2 | * DoubleEnsemble is an ensemble framework leveraging learning trajectory based sample reweighting and shuffling based feature selection, to solve both the low signal-to-noise ratio and increasing number of features problems. They identify the key samples based on the training dynamics on each sample and elicit key features based on the ablation impact of each feature via shuffling. The model is applicable to a wide range of base models, capable of extracting complex patterns, while mitigating the overfitting and instability issues for financial market prediction. 3 | * This code used in Qlib is implemented by ourselves. 4 | * Paper: DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis [https://arxiv.org/pdf/2010.01265.pdf](https://arxiv.org/pdf/2010.01265.pdf). -------------------------------------------------------------------------------- /data/dataset/weight.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | class Reweighter: 6 | def __init__(self, *args, **kwargs): 7 | """ 8 | To initialize the Reweighter, users should provide specific methods to let reweighter do the reweighting (such as sample-wise, rule-based). 9 | """ 10 | raise NotImplementedError() 11 | 12 | def reweight(self, data: object) -> object: 13 | """ 14 | Get weights for data 15 | 16 | Parameters 17 | ---------- 18 | data : object 19 | The input data. 20 | The first dimension is the index of samples 21 | 22 | Returns 23 | ------- 24 | object: 25 | the weights info for the data 26 | """ 27 | raise NotImplementedError(f"This type of input is not supported") 28 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/README.md: -------------------------------------------------------------------------------- 1 | # Temporal Fusion Transformers Benchmark 2 | ## Source 3 | **Reference**: Lim, Bryan, et al. "Temporal fusion transformers for interpretable multi-horizon time series forecasting." arXiv preprint arXiv:1912.09363 (2019). 4 | 5 | **GitHub**: https://github.com/google-research/google-research/tree/master/tft 6 | 7 | ## Run the Workflow 8 | Users can follow the ``workflow_by_code_tft.py`` to run the benchmark. 9 | 10 | ### Notes 11 | 1. Please be **aware** that this script can only support `Python 3.6 - 3.7`. 12 | 2. If the CUDA version on your machine is not 10.0, please remember to run the following commands `conda install anaconda cudatoolkit=10.0` and `conda install cudnn` on your machine. 13 | 3. The model must run in GPU, or an error will be raised. 14 | 4. New datasets should be registered in ``data_formatters``, for detail please visit the source. 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:latest 2 | 3 | WORKDIR /qlib 4 | 5 | COPY . . 6 | 7 | RUN apt-get update && \ 8 | apt-get install -y build-essential 9 | 10 | RUN conda create --name qlib_env python=3.8 -y 11 | RUN echo "conda activate qlib_env" >> ~/.bashrc 12 | ENV PATH /opt/conda/envs/qlib_env/bin:$PATH 13 | 14 | RUN python -m pip install --upgrade pip 15 | 16 | RUN python -m pip install numpy==1.23.5 17 | RUN python -m pip install pandas==1.5.3 18 | RUN python -m pip install importlib-metadata==5.2.0 19 | RUN python -m pip install "cloudpickle<3" 20 | RUN python -m pip install scikit-learn==1.3.2 21 | 22 | RUN python -m pip install cython packaging tables matplotlib statsmodels 23 | RUN python -m pip install pybind11 cvxpy 24 | 25 | ARG IS_STABLE="yes" 26 | 27 | RUN if [ "$IS_STABLE" = "yes" ]; then \ 28 | python -m pip install pyqlib; \ 29 | else \ 30 | python setup.py install; \ 31 | fi 32 | -------------------------------------------------------------------------------- /examples/benchmarks/TRA/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # we used random seed(1 1000 2000 3000 4000 5000) in our experiments 4 | 5 | # Directly run from Qlib command `qrun` 6 | qrun configs/config_alstm.yaml 7 | 8 | qrun configs/config_transformer.yaml 9 | 10 | qrun configs/config_transformer_tra_init.yaml 11 | qrun configs/config_transformer_tra.yaml 12 | 13 | qrun configs/config_alstm_tra_init.yaml 14 | qrun configs/config_alstm_tra.yaml 15 | 16 | 17 | # Or setting different parameters with example.py 18 | python example.py --config_file configs/config_alstm.yaml 19 | 20 | python example.py --config_file configs/config_transformer.yaml 21 | 22 | python example.py --config_file configs/config_transformer_tra_init.yaml 23 | python example.py --config_file configs/config_transformer_tra.yaml 24 | 25 | python example.py --config_file configs/config_alstm_tra_init.yaml 26 | python example.py --config_file configs/config_alstm_tra.yaml 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /rl/contrib/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from __future__ import annotations 5 | 6 | from pathlib import Path 7 | 8 | import pandas as pd 9 | 10 | 11 | def read_order_file(order_file: Path | pd.DataFrame) -> pd.DataFrame: 12 | if isinstance(order_file, pd.DataFrame): 13 | return order_file 14 | 15 | order_file = Path(order_file) 16 | 17 | if order_file.suffix == ".pkl": 18 | order_df = pd.read_pickle(order_file).reset_index() 19 | elif order_file.suffix == ".csv": 20 | order_df = pd.read_csv(order_file) 21 | else: 22 | raise TypeError(f"Unsupported order file type: {order_file}") 23 | 24 | if "date" in order_df.columns: 25 | # legacy dataframe columns 26 | order_df = order_df.rename(columns={"date": "datetime", "order_type": "direction"}) 27 | order_df["datetime"] = order_df["datetime"].astype(str) 28 | 29 | return order_df 30 | -------------------------------------------------------------------------------- /contrib/tuner/launcher.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # pylint: skip-file 5 | # flake8: noqa 6 | 7 | # coding=utf-8 8 | 9 | import argparse 10 | import importlib 11 | import os 12 | import yaml 13 | 14 | from .config import TunerConfigManager 15 | 16 | 17 | args_parser = argparse.ArgumentParser(prog="tuner") 18 | args_parser.add_argument( 19 | "-c", 20 | "--config_path", 21 | required=True, 22 | type=str, 23 | help="config path indicates where to load yaml config.", 24 | ) 25 | 26 | args = args_parser.parse_args() 27 | 28 | TUNER_CONFIG_MANAGER = TunerConfigManager(args.config_path) 29 | 30 | 31 | def run(): 32 | # 1. Get pipeline class. 33 | tuner_pipeline_class = getattr(importlib.import_module(".pipeline", package="qlib.contrib.tuner"), "Pipeline") 34 | # 2. Init tuner pipeline. 35 | tuner_pipeline = tuner_pipeline_class(TUNER_CONFIG_MANAGER) 36 | # 3. Begin to tune 37 | tuner_pipeline.run() 38 | -------------------------------------------------------------------------------- /tests/test_workflow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import unittest 4 | from pathlib import Path 5 | import shutil 6 | 7 | from qlib.workflow import R 8 | from qlib.tests import TestAutoData 9 | 10 | 11 | class WorkflowTest(TestAutoData): 12 | # Creating the directory manually doesn't work with mlflow, 13 | # so we add a subfolder named .trash when we create the directory. 14 | TMP_PATH = Path("./.mlruns_tmp/.trash") 15 | 16 | def tearDown(self) -> None: 17 | if self.TMP_PATH.exists(): 18 | shutil.rmtree(self.TMP_PATH) 19 | 20 | def test_get_local_dir(self): 21 | """ """ 22 | self.TMP_PATH.mkdir(parents=True, exist_ok=True) 23 | 24 | with R.start(uri=str(self.TMP_PATH)): 25 | pass 26 | 27 | with R.uri_context(uri=str(self.TMP_PATH)): 28 | resume_recorder = R.get_recorder() 29 | resume_recorder.get_local_dir() 30 | 31 | 32 | if __name__ == "__main__": 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /examples/model_interpreter/feature.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | import qlib 6 | from qlib.constant import REG_CN 7 | 8 | from qlib.utils import init_instance_by_config 9 | from qlib.tests.data import GetData 10 | from qlib.tests.config import CSI300_GBDT_TASK 11 | 12 | 13 | if __name__ == "__main__": 14 | # use default data 15 | provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir 16 | GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True) 17 | 18 | qlib.init(provider_uri=provider_uri, region=REG_CN) 19 | 20 | ################################### 21 | # train model 22 | ################################### 23 | # model initialization 24 | model = init_instance_by_config(CSI300_GBDT_TASK["model"]) 25 | dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) 26 | model.fit(dataset) 27 | 28 | # get model feature importance 29 | feature_importance = model.get_feature_importance() 30 | print("feature importance:") 31 | print(feature_importance) 32 | -------------------------------------------------------------------------------- /tests/misc/test_get_multi_proc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import qlib 7 | from qlib.data import D 8 | from qlib.tests import TestAutoData 9 | from multiprocessing import Pool 10 | 11 | 12 | def get_features(fields): 13 | qlib.init(provider_uri=TestAutoData.provider_uri, expression_cache=None, dataset_cache=None, joblib_backend="loky") 14 | return D.features(D.instruments("csi300"), fields) 15 | 16 | 17 | class TestGetData(TestAutoData): 18 | FIELDS = "$open,$close,$high,$low,$volume,$factor,$change".split(",") 19 | 20 | def test_multi_proc(self): 21 | """ 22 | For testing if it will raise error 23 | """ 24 | iter_n = 2 25 | pool = Pool(iter_n) 26 | 27 | res = [] 28 | for _ in range(iter_n): 29 | res.append(pool.apply_async(get_features, (self.FIELDS,), {})) 30 | 31 | for r in res: 32 | print(r.get()) 33 | 34 | pool.close() 35 | pool.join() 36 | 37 | 38 | if __name__ == "__main__": 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /examples/benchmarks/GeneralPtNN/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Introduction 4 | 5 | What is GeneralPtNN 6 | - Fix previous design that fail to support both Time-series and tabular data 7 | - Now you can just replace the Pytorch model structure to run a NN model. 8 | 9 | We provide an example to demonstrate the effectiveness of the current design. 10 | - `workflow_config_gru.yaml` align with previous results [GRU(Kyunghyun Cho, et al.)](../README.md#Alpha158-dataset) 11 | - `workflow_config_gru2mlp.yaml` to demonstrate we can convert config from time-series to tabular data with minimal changes 12 | - You only have to change the net & dataset class to make the conversion. 13 | - `workflow_config_mlp.yaml` achieved similar functionality with [MLP](../README.md#Alpha158-dataset) 14 | 15 | # TODO 16 | 17 | - We will align existing models to current design. 18 | 19 | - The result of `workflow_config_mlp.yaml` is different with the result of [MLP](../README.md#Alpha158-dataset) since GeneralPtNN has a different stopping method compared to previous implementations. Specificly, GeneralPtNN controls training according to epoches, whereas previous methods controlled by max_steps. 20 | -------------------------------------------------------------------------------- /examples/rolling_process_data/rolling_handler.py: -------------------------------------------------------------------------------- 1 | from qlib.data.dataset.handler import DataHandlerLP 2 | from qlib.data.dataset.loader import DataLoaderDH 3 | from qlib.contrib.data.handler import check_transform_proc 4 | 5 | 6 | class RollingDataHandler(DataHandlerLP): 7 | def __init__( 8 | self, 9 | start_time=None, 10 | end_time=None, 11 | infer_processors=[], 12 | learn_processors=[], 13 | fit_start_time=None, 14 | fit_end_time=None, 15 | data_loader_kwargs={}, 16 | ): 17 | infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) 18 | learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) 19 | 20 | data_loader = { 21 | "class": "DataLoaderDH", 22 | "kwargs": {**data_loader_kwargs}, 23 | } 24 | 25 | super().__init__( 26 | instruments=None, 27 | start_time=start_time, 28 | end_time=end_time, 29 | data_loader=data_loader, 30 | infer_processors=infer_processors, 31 | learn_processors=learn_processors, 32 | ) 33 | -------------------------------------------------------------------------------- /rl/order_execution/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | Currently it supports single-asset order execution. 6 | Multi-asset is on the way. 7 | """ 8 | 9 | from .interpreter import ( 10 | FullHistoryStateInterpreter, 11 | CurrentStepStateInterpreter, 12 | CategoricalActionInterpreter, 13 | TwapRelativeActionInterpreter, 14 | ) 15 | from .network import Recurrent 16 | from .policy import AllOne, PPO 17 | from .reward import PAPenaltyReward 18 | from .simulator_simple import SingleAssetOrderExecutionSimple 19 | from .state import SAOEMetrics, SAOEState 20 | from .strategy import SAOEStateAdapter, SAOEStrategy, ProxySAOEStrategy, SAOEIntStrategy 21 | 22 | __all__ = [ 23 | "FullHistoryStateInterpreter", 24 | "CurrentStepStateInterpreter", 25 | "CategoricalActionInterpreter", 26 | "TwapRelativeActionInterpreter", 27 | "Recurrent", 28 | "AllOne", 29 | "PPO", 30 | "PAPenaltyReward", 31 | "SingleAssetOrderExecutionSimple", 32 | "SAOEStateAdapter", 33 | "SAOEMetrics", 34 | "SAOEState", 35 | "SAOEStrategy", 36 | "ProxySAOEStrategy", 37 | "SAOEIntStrategy", 38 | ] 39 | -------------------------------------------------------------------------------- /contrib/torch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | This module is not a necessary part of Qlib. 5 | They are just some tools for convenience 6 | It is should not imported into the core part of qlib 7 | """ 8 | import torch 9 | import numpy as np 10 | import pandas as pd 11 | 12 | 13 | def data_to_tensor(data, device="cpu", raise_error=False): 14 | if isinstance(data, torch.Tensor): 15 | if device == "cpu": 16 | return data.cpu() 17 | else: 18 | return data.to(device) 19 | if isinstance(data, (pd.DataFrame, pd.Series)): 20 | return data_to_tensor(torch.from_numpy(data.values).float(), device) 21 | elif isinstance(data, np.ndarray): 22 | return data_to_tensor(torch.from_numpy(data).float(), device) 23 | elif isinstance(data, (tuple, list)): 24 | return [data_to_tensor(i, device) for i in data] 25 | elif isinstance(data, dict): 26 | return {k: data_to_tensor(v, device) for k, v in data.items()} 27 | else: 28 | if raise_error: 29 | raise ValueError(f"Unsupported data type: {type(data)}.") 30 | else: 31 | return data 32 | -------------------------------------------------------------------------------- /rl/strategy/single_order.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from __future__ import annotations 5 | 6 | from qlib.backtest import Order 7 | from qlib.backtest.decision import OrderHelper, TradeDecisionWO, TradeRange 8 | from qlib.strategy.base import BaseStrategy 9 | 10 | 11 | class SingleOrderStrategy(BaseStrategy): 12 | """Strategy used to generate a trade decision with exactly one order.""" 13 | 14 | def __init__( 15 | self, 16 | order: Order, 17 | trade_range: TradeRange | None = None, 18 | ) -> None: 19 | super().__init__() 20 | 21 | self._order = order 22 | self._trade_range = trade_range 23 | 24 | def generate_trade_decision(self, execute_result: list | None = None) -> TradeDecisionWO: 25 | oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper() 26 | order_list = [ 27 | oh.create( 28 | code=self._order.stock_id, 29 | amount=self._order.amount, 30 | direction=self._order.direction, 31 | ), 32 | ] 33 | return TradeDecisionWO(order_list, self, self._trade_range) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /docs/advanced/server.rst: -------------------------------------------------------------------------------- 1 | .. _server: 2 | 3 | ============================= 4 | ``Online`` & ``Offline`` mode 5 | ============================= 6 | .. currentmodule:: qlib 7 | 8 | 9 | Introduction 10 | ============ 11 | 12 | ``Qlib`` supports ``Online`` mode and ``Offline`` mode. Only the ``Offline`` mode is introduced in this document. 13 | 14 | The ``Online`` mode is designed to solve the following problems: 15 | 16 | - Manage the data in a centralized way. Users don't have to manage data of different versions. 17 | - Reduce the amount of cache to be generated. 18 | - Make the data can be accessed in a remote way. 19 | 20 | Qlib-Server 21 | =========== 22 | 23 | ``Qlib-Server`` is the assorted server system for ``Qlib``, which utilizes ``Qlib`` for basic calculations and provides extensive server system and cache mechanism. With QLibServer, the data provided for ``Qlib`` can be managed in a centralized manner. With ``Qlib-Server``, users can use ``Qlib`` in ``Online`` mode. 24 | 25 | 26 | 27 | Reference 28 | ========= 29 | If users are interested in ``Qlib-Server`` and ``Online`` mode, please refer to `Qlib-Server Project `_ and `Qlib-Server Document `_. 30 | -------------------------------------------------------------------------------- /contrib/online/online_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # pylint: skip-file 5 | # flake8: noqa 6 | 7 | import random 8 | import pandas as pd 9 | from ...data import D 10 | from ..model.base import Model 11 | 12 | 13 | class ScoreFileModel(Model): 14 | """ 15 | This model will load a score file, and return score at date exists in score file. 16 | """ 17 | 18 | def __init__(self, score_path): 19 | pred_test = pd.read_csv(score_path, index_col=[0, 1], parse_dates=True, infer_datetime_format=True) 20 | self.pred = pred_test 21 | 22 | def get_data_with_date(self, date, **kwargs): 23 | score = self.pred.loc(axis=0)[:, date] # (stock_id, trade_date) multi_index, score in pdate 24 | score_series = score.reset_index(level="datetime", drop=True)[ 25 | "score" 26 | ] # pd.Series ; index:stock_id, data: score 27 | return score_series 28 | 29 | def predict(self, x_test, **kwargs): 30 | return x_test 31 | 32 | def score(self, x_test, **kwargs): 33 | return 34 | 35 | def fit(self, x_train, y_train, x_valid, y_valid, w_train=None, w_valid=None, **kwargs): 36 | return 37 | 38 | def save(self, fname, **kwargs): 39 | return 40 | -------------------------------------------------------------------------------- /rl/aux_info.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from __future__ import annotations 5 | 6 | from typing import TYPE_CHECKING, Generic, Optional, TypeVar 7 | 8 | from qlib.typehint import final 9 | 10 | from .simulator import StateType 11 | 12 | if TYPE_CHECKING: 13 | from .utils.env_wrapper import EnvWrapper 14 | 15 | 16 | __all__ = ["AuxiliaryInfoCollector"] 17 | 18 | AuxInfoType = TypeVar("AuxInfoType") 19 | 20 | 21 | class AuxiliaryInfoCollector(Generic[StateType, AuxInfoType]): 22 | """Override this class to collect customized auxiliary information from environment.""" 23 | 24 | env: Optional[EnvWrapper] = None 25 | 26 | @final 27 | def __call__(self, simulator_state: StateType) -> AuxInfoType: 28 | return self.collect(simulator_state) 29 | 30 | def collect(self, simulator_state: StateType) -> AuxInfoType: 31 | """Override this for customized auxiliary info. 32 | Usually useful in Multi-agent RL. 33 | 34 | Parameters 35 | ---------- 36 | simulator_state 37 | Retrieved with ``simulator.get_state()``. 38 | 39 | Returns 40 | ------- 41 | Auxiliary information. 42 | """ 43 | raise NotImplementedError("collect is not implemented!") 44 | -------------------------------------------------------------------------------- /tests/dependency_tests/test_mlflow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import unittest 4 | import platform 5 | import mlflow 6 | import time 7 | from pathlib import Path 8 | import shutil 9 | 10 | 11 | class MLflowTest(unittest.TestCase): 12 | TMP_PATH = Path("./.mlruns_tmp/") 13 | 14 | def tearDown(self) -> None: 15 | if self.TMP_PATH.exists(): 16 | shutil.rmtree(self.TMP_PATH) 17 | 18 | def test_creating_client(self): 19 | """ 20 | Please refer to qlib/workflow/expm.py:MLflowExpManager._client 21 | we don't cache _client (this is helpful to reduce maintainance work when MLflowExpManager's uri is chagned) 22 | 23 | This implementation is based on the assumption creating a client is fast 24 | """ 25 | start = time.time() 26 | for i in range(10): 27 | _ = mlflow.tracking.MlflowClient(tracking_uri=str(self.TMP_PATH)) 28 | end = time.time() 29 | elapsed = end - start 30 | if platform.system() == "Linux": 31 | self.assertLess(elapsed, 1e-2) # it can be done in less than 10ms 32 | else: 33 | self.assertLess(elapsed, 2e-2) 34 | print(elapsed) 35 | 36 | 37 | if __name__ == "__main__": 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /examples/nested_decision_execution/README.md: -------------------------------------------------------------------------------- 1 | # Nested Decision Execution 2 | 3 | This workflow is an example for nested decision execution in backtesting. Qlib supports nested decision execution in backtesting. It means that users can use different strategies to make trade decision in different frequencies. 4 | 5 | ## Weekly Portfolio Generation and Daily Order Execution 6 | 7 | This workflow provides an example that uses a DropoutTopkStrategy (a strategy based on the daily frequency Lightgbm model) in weekly frequency for portfolio generation and uses SBBStrategyEMA (a rule-based strategy that uses EMA for decision-making) to execute orders in daily frequency. 8 | 9 | ### Usage 10 | 11 | Start backtesting by running the following command: 12 | ```bash 13 | python workflow.py backtest 14 | ``` 15 | 16 | Start collecting data by running the following command: 17 | ```bash 18 | python workflow.py collect_data 19 | ``` 20 | 21 | ## Daily Portfolio Generation and Minutely Order Execution 22 | 23 | This workflow also provides a high-frequency example that uses a DropoutTopkStrategy for portfolio generation in daily frequency and uses SBBStrategyEMA to execute orders in minutely frequency. 24 | 25 | ### Usage 26 | 27 | Start backtesting by running the following command: 28 | ```bash 29 | python workflow.py backtest_highfreq 30 | ``` -------------------------------------------------------------------------------- /model/interpret/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | Interfaces to interpret models 6 | """ 7 | 8 | import pandas as pd 9 | from abc import abstractmethod 10 | 11 | 12 | class FeatureInt: 13 | """Feature (Int)erpreter""" 14 | 15 | @abstractmethod 16 | def get_feature_importance(self) -> pd.Series: 17 | """get feature importance 18 | 19 | Returns 20 | ------- 21 | The index is the feature name. 22 | 23 | The greater the value, the higher importance. 24 | """ 25 | 26 | 27 | class LightGBMFInt(FeatureInt): 28 | """LightGBM (F)eature (Int)erpreter""" 29 | 30 | def __init__(self): 31 | self.model = None 32 | 33 | def get_feature_importance(self, *args, **kwargs) -> pd.Series: 34 | """get feature importance 35 | 36 | Notes 37 | ----- 38 | parameters reference: 39 | https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html?highlight=feature_importance#lightgbm.Booster.feature_importance 40 | """ 41 | return pd.Series( 42 | self.model.feature_importance(*args, **kwargs), index=self.model.feature_name() 43 | ).sort_values( # pylint: disable=E1101 44 | ascending=False 45 | ) 46 | -------------------------------------------------------------------------------- /tests/data_mid_layer_tests/test_handler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import shutil 4 | import unittest 5 | from qlib.tests import TestAutoData 6 | from qlib.data import D 7 | from qlib.data.dataset.handler import DataHandlerLP 8 | 9 | 10 | class HandlerTests(TestAutoData): 11 | def to_str(self, obj): 12 | return "".join(str(obj).split()) 13 | 14 | def test_handler_df(self): 15 | df = D.features(["sh600519"], start_time="20190101", end_time="20190201", fields=["$close"]) 16 | dh = DataHandlerLP.from_df(df) 17 | print(dh.fetch()) 18 | self.assertTrue(dh._data.equals(df)) 19 | self.assertTrue(dh._infer is dh._data) 20 | self.assertTrue(dh._learn is dh._data) 21 | self.assertTrue(dh.data_loader._data is dh._data) 22 | fname = "_handler_test.pkl" 23 | dh.to_pickle(fname, dump_all=True) 24 | 25 | with open(fname, "rb") as f: 26 | dh_d = pickle.load(f) 27 | 28 | self.assertTrue(dh_d._data.equals(df)) 29 | self.assertTrue(dh_d._infer is dh_d._data) 30 | self.assertTrue(dh_d._learn is dh_d._data) 31 | # Data loader will no longer be useful 32 | self.assertTrue("_data" not in dh_d.data_loader.__dict__.keys()) 33 | os.remove(fname) 34 | 35 | 36 | if __name__ == "__main__": 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /contrib/model/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch.nn as nn 5 | 6 | 7 | def count_parameters(models_or_parameters, unit="m"): 8 | """ 9 | This function is to obtain the storage size unit of a (or multiple) models. 10 | 11 | Parameters 12 | ---------- 13 | models_or_parameters : PyTorch model(s) or a list of parameters. 14 | unit : the storage size unit. 15 | 16 | Returns 17 | ------- 18 | The number of parameters of the given model(s) or parameters. 19 | """ 20 | if isinstance(models_or_parameters, nn.Module): 21 | counts = sum(v.numel() for v in models_or_parameters.parameters()) 22 | elif isinstance(models_or_parameters, nn.Parameter): 23 | counts = models_or_parameters.numel() 24 | elif isinstance(models_or_parameters, (list, tuple)): 25 | return sum(count_parameters(x, unit) for x in models_or_parameters) 26 | else: 27 | counts = sum(v.numel() for v in models_or_parameters) 28 | unit = unit.lower() 29 | if unit in ("kb", "k"): 30 | counts /= 2**10 31 | elif unit in ("mb", "m"): 32 | counts /= 2**20 33 | elif unit in ("gb", "g"): 34 | counts /= 2**30 35 | elif unit is not None: 36 | raise ValueError("Unknown unit: {:}".format(unit)) 37 | return counts 38 | -------------------------------------------------------------------------------- /examples/benchmarks/TRA/example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import qlib 4 | from ruamel.yaml import YAML 5 | from qlib.utils import init_instance_by_config 6 | 7 | 8 | def main(seed, config_file="configs/config_alstm.yaml"): 9 | # set random seed 10 | with open(config_file) as f: 11 | yaml = YAML(typ="safe", pure=True) 12 | config = yaml.load(f) 13 | 14 | # seed_suffix = "/seed1000" if "init" in config_file else f"/seed{seed}" 15 | seed_suffix = "" 16 | config["task"]["model"]["kwargs"].update( 17 | {"seed": seed, "logdir": config["task"]["model"]["kwargs"]["logdir"] + seed_suffix} 18 | ) 19 | 20 | # initialize workflow 21 | qlib.init( 22 | provider_uri=config["qlib_init"]["provider_uri"], 23 | region=config["qlib_init"]["region"], 24 | ) 25 | dataset = init_instance_by_config(config["task"]["dataset"]) 26 | model = init_instance_by_config(config["task"]["model"]) 27 | 28 | # train model 29 | model.fit(dataset) 30 | 31 | 32 | if __name__ == "__main__": 33 | # set params from cmd 34 | parser = argparse.ArgumentParser(allow_abbrev=False) 35 | parser.add_argument("--seed", type=int, default=1000, help="random seed") 36 | parser.add_argument("--config_file", type=str, default="configs/config_alstm.yaml", help="config file") 37 | args = parser.parse_args() 38 | main(**vars(args)) 39 | -------------------------------------------------------------------------------- /docs/start/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | .. currentmodule:: qlib 8 | 9 | 10 | ``Qlib`` Installation 11 | ===================== 12 | .. note:: 13 | 14 | `Qlib` supports both `Windows` and `Linux`. It's recommended to use `Qlib` in `Linux`. ``Qlib`` supports Python3, which is up to Python3.8. 15 | 16 | Users can easily install ``Qlib`` by pip according to the following command: 17 | 18 | .. code-block:: bash 19 | 20 | pip install pyqlib 21 | 22 | 23 | Also, Users can install ``Qlib`` by the source code according to the following steps: 24 | 25 | - Enter the root directory of ``Qlib``, in which the file ``setup.py`` exists. 26 | - Then, please execute the following command to install the environment dependencies and install ``Qlib``: 27 | 28 | .. code-block:: bash 29 | 30 | $ pip install numpy 31 | $ pip install --upgrade cython 32 | $ git clone https://github.com/microsoft/qlib.git && cd qlib 33 | $ python setup.py install 34 | 35 | .. note:: 36 | It's recommended to use anaconda/miniconda to setup the environment. ``Qlib`` needs lightgbm and pytorch packages, use pip to install them. 37 | 38 | 39 | 40 | Use the following code to make sure the installation successful: 41 | 42 | .. code-block:: python 43 | 44 | >>> import qlib 45 | >>> qlib.__version__ 46 | 47 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/baseline/rolling_benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import os 4 | from pathlib import Path 5 | from typing import Union 6 | 7 | import fire 8 | 9 | from qlib import auto_init 10 | from qlib.contrib.rolling.base import Rolling 11 | from qlib.tests.data import GetData 12 | 13 | DIRNAME = Path(__file__).absolute().resolve().parent 14 | 15 | 16 | class RollingBenchmark(Rolling): 17 | # The config in the README.md 18 | CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"] 19 | 20 | DEFAULT_CONF = CONF_LIST[0] 21 | 22 | def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None: 23 | # This code is for being compatible with the previous old code 24 | conf_path = Path(conf_path) 25 | super().__init__(conf_path=conf_path, horizon=horizon, **kwargs) 26 | 27 | for f in self.CONF_LIST: 28 | if conf_path.samefile(f): 29 | break 30 | else: 31 | self.logger.warning("Model type is not in the benchmark!") 32 | 33 | 34 | if __name__ == "__main__": 35 | kwargs = {} 36 | if os.environ.get("PROVIDER_URI", "") == "": 37 | GetData().qlib_data(exists_skip=True) 38 | else: 39 | kwargs["provider_uri"] = os.environ["PROVIDER_URI"] 40 | auto_init(**kwargs) 41 | fire.Fire(RollingBenchmark) 42 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/DDG-DA/workflow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import os 4 | from pathlib import Path 5 | from typing import Union 6 | 7 | import fire 8 | 9 | from qlib import auto_init 10 | from qlib.contrib.rolling.ddgda import DDGDA 11 | from qlib.tests.data import GetData 12 | 13 | DIRNAME = Path(__file__).absolute().resolve().parent 14 | BENCH_DIR = DIRNAME.parent / "baseline" 15 | 16 | 17 | class DDGDABench(DDGDA): 18 | # The config in the README.md 19 | CONF_LIST = [ 20 | BENCH_DIR / "workflow_config_linear_Alpha158.yaml", 21 | BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml", 22 | ] 23 | 24 | DEFAULT_CONF = CONF_LIST[0] # Linear by default due to efficiency 25 | 26 | def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None: 27 | # This code is for being compatible with the previous old code 28 | conf_path = Path(conf_path) 29 | super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs) 30 | 31 | for f in self.CONF_LIST: 32 | if conf_path.samefile(f): 33 | break 34 | else: 35 | self.logger.warning("Model type is not in the benchmark!") 36 | 37 | 38 | if __name__ == "__main__": 39 | kwargs = {} 40 | if os.environ.get("PROVIDER_URI", "") == "": 41 | GetData().qlib_data(exists_skip=True) 42 | else: 43 | kwargs["provider_uri"] = os.environ["PROVIDER_URI"] 44 | auto_init(**kwargs) 45 | fire.Fire(DDGDABench) 46 | -------------------------------------------------------------------------------- /scripts/data_collector/pit/README.md: -------------------------------------------------------------------------------- 1 | # Collect Point-in-Time Data 2 | 3 | > *Please pay **ATTENTION** that the data is collected from [baostock](http://baostock.com) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)* 4 | 5 | ## Requirements 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Collector Data 12 | 13 | 14 | ### Download Quarterly CN Data 15 | 16 | ```bash 17 | cd qlib/scripts/data_collector/pit/ 18 | # download from baostock.com 19 | python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly 20 | ``` 21 | 22 | Downloading all data from the stock is very time-consuming. If you just want to run a quick test on a few stocks, you can run the command below 23 | ```bash 24 | python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_regex "^(600519|000725).*" 25 | ``` 26 | 27 | 28 | ### Normalize Data 29 | ```bash 30 | python collector.py normalize_data --interval quarterly --source_dir ~/.qlib/stock_data/source/pit --normalize_dir ~/.qlib/stock_data/source/pit_normalized 31 | ``` 32 | 33 | 34 | 35 | ### Dump Data into PIT Format 36 | 37 | ```bash 38 | cd qlib/scripts 39 | python dump_pit.py dump --data_path ~/.qlib/stock_data/source/pit_normalized --qlib_dir ~/.qlib/qlib_data/cn_data --interval quarterly 40 | ``` 41 | -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_alstm.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 256 16 | num_layers: 2 17 | num_heads: 2 18 | use_attn: True 19 | dropout: 0.1 20 | 21 | num_states: &num_states 1 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0002 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/alstm 40 | model_type: LSTM 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: False 46 | model_init_state: 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 1024 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_alstm_tra_init.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 256 16 | num_layers: 2 17 | num_heads: 2 18 | use_attn: True 19 | dropout: 0.1 20 | 21 | num_states: &num_states 3 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0002 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/alstm_tra_init 40 | model_type: LSTM 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: False 46 | model_init_state: 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 512 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_transformer.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 64 16 | num_layers: 2 17 | num_heads: 4 18 | use_attn: False 19 | dropout: 0.1 20 | 21 | num_states: &num_states 1 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0002 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/transformer 40 | model_type: Transformer 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: False 46 | model_init_state: 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 1024 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_transformer_tra_init.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 64 16 | num_layers: 2 17 | num_heads: 4 18 | use_attn: False 19 | dropout: 0.1 20 | 21 | num_states: &num_states 3 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0002 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/transformer_tra_init 40 | model_type: Transformer 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: False 46 | model_init_state: 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 512 -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_alstm_tra.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 256 16 | num_layers: 2 17 | num_heads: 2 18 | use_attn: True 19 | dropout: 0.1 20 | 21 | num_states: &num_states 10 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0001 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/alstm_tra 40 | model_type: LSTM 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 2.0 44 | rho: 0.99 45 | freeze_model: True 46 | model_init_state: output/test/alstm_tra_init/model.bin 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 1024 -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | from .data import ( 9 | D, 10 | CalendarProvider, 11 | InstrumentProvider, 12 | FeatureProvider, 13 | ExpressionProvider, 14 | DatasetProvider, 15 | LocalCalendarProvider, 16 | LocalInstrumentProvider, 17 | LocalFeatureProvider, 18 | LocalPITProvider, 19 | LocalExpressionProvider, 20 | LocalDatasetProvider, 21 | ClientCalendarProvider, 22 | ClientInstrumentProvider, 23 | ClientDatasetProvider, 24 | BaseProvider, 25 | LocalProvider, 26 | ClientProvider, 27 | ) 28 | 29 | from .cache import ( 30 | ExpressionCache, 31 | DatasetCache, 32 | DiskExpressionCache, 33 | DiskDatasetCache, 34 | SimpleDatasetCache, 35 | DatasetURICache, 36 | MemoryCalendarCache, 37 | ) 38 | 39 | 40 | __all__ = [ 41 | "D", 42 | "CalendarProvider", 43 | "InstrumentProvider", 44 | "FeatureProvider", 45 | "ExpressionProvider", 46 | "DatasetProvider", 47 | "LocalCalendarProvider", 48 | "LocalInstrumentProvider", 49 | "LocalFeatureProvider", 50 | "LocalPITProvider", 51 | "LocalExpressionProvider", 52 | "LocalDatasetProvider", 53 | "ClientCalendarProvider", 54 | "ClientInstrumentProvider", 55 | "ClientDatasetProvider", 56 | "BaseProvider", 57 | "LocalProvider", 58 | "ClientProvider", 59 | "ExpressionCache", 60 | "DatasetCache", 61 | "DiskExpressionCache", 62 | "DiskDatasetCache", 63 | "SimpleDatasetCache", 64 | "DatasetURICache", 65 | "MemoryCalendarCache", 66 | ] 67 | -------------------------------------------------------------------------------- /examples/benchmarks/TRA/configs/config_transformer_tra.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | 5 | data_loader_config: &data_loader_config 6 | class: StaticDataLoader 7 | module_path: qlib.data.dataset.loader 8 | kwargs: 9 | config: 10 | feature: data/feature.pkl 11 | label: data/label.pkl 12 | 13 | model_config: &model_config 14 | input_size: 16 15 | hidden_size: 64 16 | num_layers: 2 17 | num_heads: 4 18 | use_attn: False 19 | dropout: 0.1 20 | 21 | num_states: &num_states 3 22 | 23 | tra_config: &tra_config 24 | num_states: *num_states 25 | hidden_size: 16 26 | tau: 1.0 27 | src_info: LR_TPE 28 | 29 | task: 30 | model: 31 | class: TRAModel 32 | module_path: src/model.py 33 | kwargs: 34 | lr: 0.0005 35 | n_epochs: 500 36 | max_steps_per_epoch: 100 37 | early_stop: 20 38 | seed: 1000 39 | logdir: output/test/transformer_tra 40 | model_type: Transformer 41 | model_config: *model_config 42 | tra_config: *tra_config 43 | lamb: 1.0 44 | rho: 0.99 45 | freeze_model: True 46 | model_init_state: output/test/transformer_tra_init/model.bin 47 | dataset: 48 | class: MTSDatasetH 49 | module_path: src/dataset.py 50 | kwargs: 51 | handler: 52 | class: DataHandler 53 | module_path: qlib.data.dataset.handler 54 | kwargs: 55 | data_loader: *data_loader_config 56 | segments: 57 | train: [2007-10-30, 2016-05-27] 58 | valid: [2016-09-26, 2018-05-29] 59 | test: [2018-09-21, 2020-06-30] 60 | seq_len: 60 61 | horizon: 21 62 | num_states: *num_states 63 | batch_size: 512 -------------------------------------------------------------------------------- /scripts/data_collector/crowd_source/README.md: -------------------------------------------------------------------------------- 1 | # Crowd Source Data 2 | 3 | ## Initiative 4 | Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might have data which is wrong. This can introduce survivorship bias into our training process. 5 | 6 | The Crowd Source Data is introduced to merged data from multiple data source and cross validate against each other, so that: 7 | 1. We will have a more complete history record. 8 | 2. We can identify the anomaly data and apply correction when necessary. 9 | 10 | ## Related Repo 11 | The raw data is hosted on dolthub repo: https://www.dolthub.com/repositories/chenditc/investment_data 12 | 13 | The processing script and sql is hosted on github repo: https://github.com/chenditc/investment_data 14 | 15 | The packaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data 16 | 17 | ## How to use it in qlib 18 | ### Option 1: Download release bin data 19 | User can download data in qlib bin format and use it directly: https://github.com/chenditc/investment_data/releases/latest 20 | ```bash 21 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz 22 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2 23 | ``` 24 | 25 | ### Option 2: Generate qlib data from dolthub 26 | Dolthub data will be update daily, so that if user wants to get up to date data, they can dump qlib bin using docker: 27 | ``` 28 | docker run -v /:/output -it --rm chenditc/investment_data bash dump_qlib_bin.sh && cp ./qlib_bin.tar.gz /output/ 29 | ``` 30 | 31 | ## FAQ and other info 32 | See: https://github.com/chenditc/investment_data/blob/main/README.md 33 | -------------------------------------------------------------------------------- /scripts/data_collector/fund/README.md: -------------------------------------------------------------------------------- 1 | # Collect Fund Data 2 | 3 | > *Please pay **ATTENTION** that the data is collected from [天天基金网](https://fund.eastmoney.com/) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)* 4 | 5 | ## Requirements 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Collector Data 12 | 13 | 14 | ### CN Data 15 | 16 | #### 1d from East Money 17 | 18 | ```bash 19 | 20 | # download from eastmoney.com 21 | python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d 22 | 23 | # normalize 24 | python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ 25 | 26 | # dump data 27 | cd qlib/scripts 28 | python dump_bin.py dump_all --data_path ~/.qlib/fund_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/cn_fund_data --freq day --date_field_name FSRQ --include_fields DWJZ,LJJZ 29 | 30 | ``` 31 | 32 | ### using data 33 | 34 | ```python 35 | import qlib 36 | from qlib.data import D 37 | 38 | qlib.init(provider_uri="~/.qlib/qlib_data/cn_fund_data") 39 | df = D.features(D.instruments(market="all"), ["$DWJZ", "$LJJZ"], freq="day") 40 | ``` 41 | 42 | 43 | ### Help 44 | ```bash 45 | pythono collector.py collector_data --help 46 | ``` 47 | 48 | ## Parameters 49 | 50 | - interval: 1d 51 | - region: CN 52 | 53 | ## 免责声明 54 | 55 | 本项目仅供学习研究使用,不作为任何行为的指导和建议,由此而引发任何争议和纠纷,与本项目无任何关系 56 | -------------------------------------------------------------------------------- /contrib/report/data/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | This module is responsible for analysing data 5 | 6 | Assumptions 7 | - The analyse each feature individually 8 | 9 | """ 10 | import pandas as pd 11 | from qlib.log import TimeInspector 12 | from qlib.contrib.report.utils import sub_fig_generator 13 | 14 | 15 | class FeaAnalyser: 16 | def __init__(self, dataset: pd.DataFrame): 17 | """ 18 | 19 | Parameters 20 | ---------- 21 | dataset : pd.DataFrame 22 | 23 | We often have multiple columns for dataset. Each column corresponds to one sub figure. 24 | There will be a datatime column in the index levels. 25 | Aggretation will be used for more summarized metrics overtime. 26 | Here is an example of data: 27 | 28 | .. code-block:: 29 | 30 | return 31 | datetime instrument 32 | 2007-02-06 equity_tpx 0.010087 33 | equity_spx 0.000786 34 | """ 35 | self._dataset = dataset 36 | with TimeInspector.logt("calc_stat_values"): 37 | self.calc_stat_values() 38 | 39 | def calc_stat_values(self): 40 | pass 41 | 42 | def plot_single(self, col, ax): 43 | raise NotImplementedError(f"This type of input is not supported") 44 | 45 | def skip(self, col): 46 | return False 47 | 48 | def plot_all(self, *args, **kwargs): 49 | ax_gen = iter(sub_fig_generator(*args, **kwargs)) 50 | for col in self._dataset: 51 | if not self.skip(col): 52 | ax = next(ax_gen) 53 | self.plot_single(col, ax) 54 | -------------------------------------------------------------------------------- /workflow/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import atexit 5 | import logging 6 | import sys 7 | import traceback 8 | 9 | from ..log import get_module_logger 10 | from . import R 11 | from .recorder import Recorder 12 | 13 | logger = get_module_logger("workflow", logging.INFO) 14 | 15 | 16 | # function to handle the experiment when unusual program ending occurs 17 | def experiment_exit_handler(): 18 | """ 19 | Method for handling the experiment when any unusual program ending occurs. 20 | The `atexit` handler should be put in the last, since, as long as the program ends, it will be called. 21 | Thus, if any exception or user interruption occurs beforehand, we should handle them first. Once `R` is 22 | ended, another call of `R.end_exp` will not take effect. 23 | 24 | Limitations: 25 | - If pdb is used in your program, excepthook will not be triggered when it ends. The status will be finished 26 | """ 27 | sys.excepthook = experiment_exception_hook # handle uncaught exception 28 | atexit.register(R.end_exp, recorder_status=Recorder.STATUS_FI) # will not take effect if experiment ends 29 | 30 | 31 | def experiment_exception_hook(exc_type, value, tb): 32 | """ 33 | End an experiment with status to be "FAILED". This exception tries to catch those uncaught exception 34 | and end the experiment automatically. 35 | 36 | Parameters 37 | exc_type: Exception type 38 | value: Exception's value 39 | tb: Exception's traceback 40 | """ 41 | logger.error(f"An exception has been raised[{exc_type.__name__}: {value}].") 42 | 43 | # Same as original format 44 | traceback.print_tb(tb) 45 | print(f"{exc_type.__name__}: {value}") 46 | 47 | R.end_exp(recorder_status=Recorder.STATUS_FA) 48 | -------------------------------------------------------------------------------- /tests/test_get_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import shutil 5 | import unittest 6 | from pathlib import Path 7 | 8 | import qlib 9 | from qlib.data import D 10 | from qlib.tests.data import GetData 11 | 12 | DATA_DIR = Path(__file__).parent.joinpath("test_get_data") 13 | SOURCE_DIR = DATA_DIR.joinpath("source") 14 | SOURCE_DIR.mkdir(exist_ok=True, parents=True) 15 | QLIB_DIR = DATA_DIR.joinpath("qlib") 16 | QLIB_DIR.mkdir(exist_ok=True, parents=True) 17 | 18 | 19 | class TestGetData(unittest.TestCase): 20 | FIELDS = "$open,$close,$high,$low,$volume,$factor,$change".split(",") 21 | 22 | @classmethod 23 | def setUpClass(cls) -> None: 24 | provider_uri = str(QLIB_DIR.resolve()) 25 | qlib.init( 26 | provider_uri=provider_uri, 27 | expression_cache=None, 28 | dataset_cache=None, 29 | ) 30 | 31 | @classmethod 32 | def tearDownClass(cls) -> None: 33 | shutil.rmtree(str(DATA_DIR.resolve())) 34 | 35 | def test_0_qlib_data(self): 36 | GetData().qlib_data( 37 | name="qlib_data_simple", target_dir=QLIB_DIR, region="cn", interval="1d", delete_old=False, exists_skip=True 38 | ) 39 | df = D.features(D.instruments("csi300"), self.FIELDS) 40 | self.assertListEqual(list(df.columns), self.FIELDS, "get qlib data failed") 41 | self.assertFalse(df.dropna().empty, "get qlib data failed") 42 | 43 | def test_1_csv_data(self): 44 | GetData().download_data(file_name="csv_data_cn.zip", target_dir=SOURCE_DIR) 45 | stock_name = set(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv"))) 46 | self.assertEqual(len(stock_name), 85, "get csv data failed") 47 | 48 | 49 | if __name__ == "__main__": 50 | unittest.main() 51 | -------------------------------------------------------------------------------- /examples/portfolio/README.md: -------------------------------------------------------------------------------- 1 | # Portfolio Optimization Strategy 2 | 3 | ## Introduction 4 | 5 | In `qlib/examples/benchmarks` we have various **alpha** models that predict 6 | the stock returns. We also use a simple rule based `TopkDropoutStrategy` to 7 | evaluate the investing performance of these models. However, such a strategy 8 | is too simple to control the portfolio risk like correlation and volatility. 9 | 10 | To this end, an optimization based strategy should be used to for the 11 | trade-off between return and risk. In this doc, we will show how to use 12 | `EnhancedIndexingStrategy` to maximize portfolio return while minimizing 13 | tracking error relative to a benchmark. 14 | 15 | 16 | ## Preparation 17 | 18 | We use China stock market data for our example. 19 | 20 | 1. Prepare CSI300 weight: 21 | 22 | ```bash 23 | wget https://github.com/SunsetWolf/qlib_dataset/releases/download/v0/csi300_weight.zip 24 | unzip -d ~/.qlib/qlib_data/cn_data csi300_weight.zip 25 | rm -f csi300_weight.zip 26 | ``` 27 | NOTE: We don't find any public free resource to get the weight in the benchmark. To run the example, we manually create this weight data. 28 | 29 | 2. Prepare risk model data: 30 | 31 | ```bash 32 | python prepare_riskdata.py 33 | ``` 34 | 35 | Here we use a **Statistical Risk Model** implemented in `qlib.model.riskmodel`. 36 | However users are strongly recommended to use other risk models for better quality: 37 | * **Fundamental Risk Model** like MSCI BARRA 38 | * [Deep Risk Model](https://arxiv.org/abs/2107.05201) 39 | 40 | 41 | ## End-to-End Workflow 42 | 43 | You can finish workflow with `EnhancedIndexingStrategy` by running 44 | `qrun config_enhanced_indexing.yaml`. 45 | 46 | In this config, we mainly changed the strategy section compared to 47 | `qlib/examples/benchmarks/workflow_config_lightgbm_Alpha158.yaml`. 48 | -------------------------------------------------------------------------------- /rl/order_execution/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from __future__ import annotations 5 | 6 | from typing import Any, cast 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from qlib.backtest.decision import OrderDir 12 | from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor 13 | from qlib.constant import float_or_ndarray 14 | 15 | 16 | def dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame: 17 | # dataframe.append is deprecated 18 | other_df = pd.DataFrame(other).set_index("datetime") 19 | other_df.index.name = "datetime" 20 | 21 | res = pd.concat([df, other_df], axis=0) 22 | return res 23 | 24 | 25 | def price_advantage( 26 | exec_price: float_or_ndarray, 27 | baseline_price: float, 28 | direction: OrderDir | int, 29 | ) -> float_or_ndarray: 30 | if baseline_price == 0: # something is wrong with data. Should be nan here 31 | if isinstance(exec_price, float): 32 | return 0.0 33 | else: 34 | return np.zeros_like(exec_price) 35 | if direction == OrderDir.BUY: 36 | res = (1 - exec_price / baseline_price) * 10000 37 | elif direction == OrderDir.SELL: 38 | res = (exec_price / baseline_price - 1) * 10000 39 | else: 40 | raise ValueError(f"Unexpected order direction: {direction}") 41 | res_wo_nan: np.ndarray = np.nan_to_num(res, nan=0.0) 42 | if res_wo_nan.size == 1: 43 | return res_wo_nan.item() 44 | else: 45 | return cast(float_or_ndarray, res_wo_nan) 46 | 47 | 48 | def get_simulator_executor(executor: BaseExecutor) -> SimulatorExecutor: 49 | while isinstance(executor, NestedExecutor): 50 | executor = executor.inner_executor 51 | assert isinstance(executor, SimulatorExecutor) 52 | return executor 53 | -------------------------------------------------------------------------------- /scripts/data_collector/crypto/README.md: -------------------------------------------------------------------------------- 1 | # Collect Crypto Data 2 | 3 | > *Please pay **ATTENTION** that the data is collected from [Coingecko](https://www.coingecko.com/en/api) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)* 4 | 5 | ## Requirements 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Usage of the dataset 12 | > *Crypto dataset only support Data retrieval function but not support backtest function due to the lack of OHLC data.* 13 | 14 | ## Collector Data 15 | 16 | 17 | ### Crypto Data 18 | 19 | #### 1d from Coingecko 20 | 21 | ```bash 22 | 23 | # download from https://api.coingecko.com/api/v3/ 24 | python collector.py download_data --source_dir ~/.qlib/crypto_data/source/1d --start 2015-01-01 --end 2021-11-30 --delay 1 --interval 1d 25 | 26 | # normalize 27 | python collector.py normalize_data --source_dir ~/.qlib/crypto_data/source/1d --normalize_dir ~/.qlib/crypto_data/source/1d_nor --interval 1d --date_field_name date 28 | 29 | # dump data 30 | cd qlib/scripts 31 | python dump_bin.py dump_all --data_path ~/.qlib/crypto_data/source/1d_nor --qlib_dir ~/.qlib/qlib_data/crypto_data --freq day --date_field_name date --include_fields prices,total_volumes,market_caps 32 | 33 | ``` 34 | 35 | ### using data 36 | 37 | ```python 38 | import qlib 39 | from qlib.data import D 40 | 41 | qlib.init(provider_uri="~/.qlib/qlib_data/crypto_data") 42 | df = D.features(D.instruments(market="all"), ["$prices", "$total_volumes","$market_caps"], freq="day") 43 | ``` 44 | 45 | 46 | ### Help 47 | ```bash 48 | python collector.py collector_data --help 49 | ``` 50 | 51 | ## Parameters 52 | 53 | - interval: 1d 54 | - delay: 1 55 | -------------------------------------------------------------------------------- /contrib/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | try: 4 | from .catboost_model import CatBoostModel 5 | except ModuleNotFoundError: 6 | CatBoostModel = None 7 | print("ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)") 8 | try: 9 | from .double_ensemble import DEnsembleModel 10 | from .gbdt import LGBModel 11 | except ModuleNotFoundError: 12 | DEnsembleModel, LGBModel = None, None 13 | print( 14 | "ModuleNotFoundError. DEnsembleModel and LGBModel are skipped. (optional: maybe installing lightgbm can fix it.)" 15 | ) 16 | try: 17 | from .xgboost import XGBModel 18 | except ModuleNotFoundError: 19 | XGBModel = None 20 | print("ModuleNotFoundError. XGBModel is skipped(optional: maybe installing xgboost can fix it).") 21 | try: 22 | from .linear import LinearModel 23 | except ModuleNotFoundError: 24 | LinearModel = None 25 | print("ModuleNotFoundError. LinearModel is skipped(optional: maybe installing scipy and sklearn can fix it).") 26 | # import pytorch models 27 | try: 28 | from .pytorch_alstm import ALSTM 29 | from .pytorch_gats import GATs 30 | from .pytorch_gru import GRU 31 | from .pytorch_lstm import LSTM 32 | from .pytorch_nn import DNNModelPytorch 33 | from .pytorch_tabnet import TabnetModel 34 | from .pytorch_sfm import SFM_Model 35 | from .pytorch_tcn import TCN 36 | from .pytorch_add import ADD 37 | 38 | pytorch_classes = (ALSTM, GATs, GRU, LSTM, DNNModelPytorch, TabnetModel, SFM_Model, TCN, ADD) 39 | except ModuleNotFoundError: 40 | pytorch_classes = () 41 | print("ModuleNotFoundError. PyTorch models are skipped (optional: maybe installing pytorch can fix it).") 42 | 43 | all_model_classes = (CatBoostModel, DEnsembleModel, LGBModel, XGBModel, LinearModel) + pytorch_classes 44 | -------------------------------------------------------------------------------- /examples/highfreq/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This folder contains 2 examples 3 | - A high-frequency dataset example 4 | - An example of predicting the price trend in high-frequency data 5 | 6 | ## High-Frequency Dataset 7 | 8 | This dataset is an example for RL high frequency trading. 9 | 10 | ### Get High-Frequency Data 11 | 12 | Get high-frequency data by running the following command: 13 | ```bash 14 | python workflow.py get_data 15 | ``` 16 | 17 | ### Dump & Reload & Reinitialize the Dataset 18 | 19 | 20 | The High-Frequency Dataset is implemented as `qlib.data.dataset.DatasetH` in the `workflow.py`. `DatatsetH` is the subclass of [`qlib.utils.serial.Serializable`](https://qlib.readthedocs.io/en/latest/advanced/serial.html), whose state can be dumped in or loaded from disk in `pickle` format. 21 | 22 | ### About Reinitialization 23 | 24 | After reloading `Dataset` from disk, `Qlib` also support reinitializing the dataset. It means that users can reset some states of `Dataset` or `DataHandler` such as `instruments`, `start_time`, `end_time` and `segments`, etc., and generate new data according to the states. 25 | 26 | The example is given in `workflow.py`, users can run the code as follows. 27 | 28 | ### Run the Code 29 | 30 | Run the example by running the following command: 31 | ```bash 32 | python workflow.py dump_and_load_dataset 33 | ``` 34 | 35 | ## Benchmarks Performance (predicting the price trend in high-frequency data) 36 | 37 | Here are the results of models for predicting the price trend in high-frequency data. We will keep updating benchmark models in future. 38 | 39 | | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Long precision| Short Precision | Long-Short Average Return | Long-Short Average Sharpe | 40 | |---|---|---|---|---|---|---|---|---|---| 41 | | LightGBM | Alpha158 | 0.0349±0.00 | 0.3805±0.00| 0.0435±0.00 | 0.4724±0.00 | 0.5111±0.00 | 0.5428±0.00 | 0.000074±0.00 | 0.2677±0.00 | 42 | -------------------------------------------------------------------------------- /tests/dataset_tests/test_datalayer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from qlib.data import D 4 | from qlib.tests import TestAutoData 5 | 6 | 7 | class TestDataset(TestAutoData): 8 | def testCSI300(self): 9 | close_p = D.features(D.instruments("csi300"), ["$close"]) 10 | size = close_p.groupby("datetime", group_keys=False).size() 11 | cnt = close_p.groupby("datetime", group_keys=False).count()["$close"] 12 | size_desc = size.describe(percentiles=np.arange(0.1, 1.0, 0.1)) 13 | cnt_desc = cnt.describe(percentiles=np.arange(0.1, 1.0, 0.1)) 14 | 15 | print(size_desc) 16 | print(cnt_desc) 17 | 18 | self.assertLessEqual(size_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks") 19 | self.assertGreaterEqual(size_desc.loc["80%"], 290, "Insufficient number of CSI300 constituent stocks") 20 | 21 | self.assertLessEqual(cnt_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks") 22 | # FIXME: Due to the low quality of data. Hard to make sure there are enough data 23 | # self.assertEqual(cnt_desc.loc["80%"], 300, "Insufficient number of CSI300 constituent stocks") 24 | 25 | def testClose(self): 26 | close_p = D.features(D.instruments("csi300"), ["Ref($close, 1)/$close - 1"]) 27 | close_desc = close_p.describe(percentiles=np.arange(0.1, 1.0, 0.1)) 28 | print(close_desc) 29 | self.assertLessEqual(abs(close_desc.loc["90%"][0]), 0.1, "Close value is abnormal") 30 | self.assertLessEqual(abs(close_desc.loc["10%"][0]), 0.1, "Close value is abnormal") 31 | # FIXME: The yahoo data is not perfect. We have to 32 | # self.assertLessEqual(abs(close_desc.loc["max"][0]), 0.2, "Close value is abnormal") 33 | # self.assertGreaterEqual(close_desc.loc["min"][0], -0.2, "Close value is abnormal") 34 | 35 | 36 | if __name__ == "__main__": 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /examples/portfolio/prepare_riskdata.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import os 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from qlib.data import D 8 | from qlib.model.riskmodel import StructuredCovEstimator 9 | 10 | 11 | def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"): 12 | universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index() 13 | 14 | price_all = ( 15 | D.features(D.instruments("all"), ["$close"], start_time=start_time).squeeze().unstack(level="instrument") 16 | ) 17 | 18 | # StructuredCovEstimator is a statistical risk model 19 | riskmodel = StructuredCovEstimator() 20 | 21 | for i in range(T - 1, len(price_all)): 22 | date = price_all.index[i] 23 | ref_date = price_all.index[i - T + 1] 24 | 25 | print(date) 26 | 27 | codes = universe.loc[date].index 28 | price = price_all.loc[ref_date:date, codes] 29 | 30 | # calculate return and remove extreme return 31 | ret = price.pct_change() 32 | ret.clip(ret.quantile(0.025), ret.quantile(0.975), axis=1, inplace=True) 33 | 34 | # run risk model 35 | F, cov_b, var_u = riskmodel.predict(ret, is_price=False, return_decomposed_components=True) 36 | 37 | # save risk data 38 | root = riskdata_root + "/" + date.strftime("%Y%m%d") 39 | os.makedirs(root, exist_ok=True) 40 | 41 | pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl") 42 | pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl") 43 | # for specific_risk we follow the convention to save volatility 44 | pd.Series(np.sqrt(var_u), index=codes).to_pickle(root + "/specific_risk.pkl") 45 | 46 | 47 | if __name__ == "__main__": 48 | import qlib 49 | 50 | qlib.init(provider_uri="~/.qlib/qlib_data/cn_data") 51 | 52 | prepare_data() 53 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/backtest_ppo.yml: -------------------------------------------------------------------------------- 1 | order_file: ./data/orders/test_orders.pkl 2 | start_time: "9:30" 3 | end_time: "14:54" 4 | data_granularity: "5min" 5 | qlib: 6 | provider_uri_5min: ./data/bin/ 7 | exchange: 8 | limit_threshold: null 9 | deal_price: ["$close", "$close"] 10 | volume_threshold: null 11 | strategies: 12 | 1day: 13 | class: SAOEIntStrategy 14 | kwargs: 15 | data_granularity: 5 16 | action_interpreter: 17 | class: CategoricalActionInterpreter 18 | kwargs: 19 | max_step: 8 20 | values: 4 21 | module_path: qlib.rl.order_execution.interpreter 22 | network: 23 | class: Recurrent 24 | kwargs: {} 25 | module_path: qlib.rl.order_execution.network 26 | policy: 27 | class: PPO # PPO, DQN 28 | kwargs: 29 | lr: 0.0001 30 | # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use. 31 | # weight_file: outputs/ppo/checkpoints/latest.pth 32 | module_path: qlib.rl.order_execution.policy 33 | state_interpreter: 34 | class: FullHistoryStateInterpreter 35 | kwargs: 36 | data_dim: 5 37 | data_ticks: 48 38 | max_step: 8 39 | processed_data_provider: 40 | class: HandlerProcessedDataProvider 41 | kwargs: 42 | data_dir: ./data/pickle/ 43 | feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"] 44 | feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"] 45 | module_path: qlib.rl.data.native 46 | module_path: qlib.rl.order_execution.interpreter 47 | module_path: qlib.rl.order_execution.strategy 48 | 30min: 49 | class: TWAPStrategy 50 | kwargs: {} 51 | module_path: qlib.contrib.strategy.rule_strategy 52 | concurrency: 16 53 | output_dir: outputs/ppo/ 54 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/backtest_opds.yml: -------------------------------------------------------------------------------- 1 | order_file: ./data/orders/test_orders.pkl 2 | start_time: "9:30" 3 | end_time: "14:54" 4 | data_granularity: "5min" 5 | qlib: 6 | provider_uri_5min: ./data/bin/ 7 | exchange: 8 | limit_threshold: null 9 | deal_price: ["$close", "$close"] 10 | volume_threshold: null 11 | strategies: 12 | 1day: 13 | class: SAOEIntStrategy 14 | kwargs: 15 | data_granularity: 5 16 | action_interpreter: 17 | class: CategoricalActionInterpreter 18 | kwargs: 19 | max_step: 8 20 | values: 4 21 | module_path: qlib.rl.order_execution.interpreter 22 | network: 23 | class: Recurrent 24 | kwargs: {} 25 | module_path: qlib.rl.order_execution.network 26 | policy: 27 | class: PPO # PPO, DQN 28 | kwargs: 29 | lr: 0.0001 30 | # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use. 31 | # weight_file: outputs/opds/checkpoints/latest.pth 32 | module_path: qlib.rl.order_execution.policy 33 | state_interpreter: 34 | class: FullHistoryStateInterpreter 35 | kwargs: 36 | data_dim: 5 37 | data_ticks: 48 38 | max_step: 8 39 | processed_data_provider: 40 | class: HandlerProcessedDataProvider 41 | kwargs: 42 | data_dir: ./data/pickle/ 43 | feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"] 44 | feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"] 45 | module_path: qlib.rl.data.native 46 | module_path: qlib.rl.order_execution.interpreter 47 | module_path: qlib.rl.order_execution.strategy 48 | 30min: 49 | class: TWAPStrategy 50 | kwargs: {} 51 | module_path: qlib.contrib.strategy.rule_strategy 52 | concurrency: 16 53 | output_dir: outputs/opds/ 54 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/train_opds.yml: -------------------------------------------------------------------------------- 1 | simulator: 2 | data_granularity: 5 3 | time_per_step: 30 4 | vol_limit: null 5 | env: 6 | concurrency: 32 7 | parallel_mode: dummy 8 | action_interpreter: 9 | class: CategoricalActionInterpreter 10 | kwargs: 11 | values: 4 12 | max_step: 8 13 | module_path: qlib.rl.order_execution.interpreter 14 | state_interpreter: 15 | class: FullHistoryStateInterpreter 16 | kwargs: 17 | data_dim: 5 18 | data_ticks: 48 # 48 = 240 min / 5 min 19 | max_step: 8 20 | processed_data_provider: 21 | class: HandlerProcessedDataProvider 22 | kwargs: 23 | data_dir: ./data/pickle/ 24 | feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"] 25 | feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"] 26 | backtest: false 27 | module_path: qlib.rl.data.native 28 | module_path: qlib.rl.order_execution.interpreter 29 | reward: 30 | class: PAPenaltyReward 31 | kwargs: 32 | penalty: 4.0 33 | scale: 0.01 34 | module_path: qlib.rl.order_execution.reward 35 | data: 36 | source: 37 | order_dir: ./data/orders 38 | feature_root_dir: ./data/pickle/ 39 | feature_columns_today: ["$close0", "$volume0"] 40 | feature_columns_yesterday: [] 41 | total_time: 240 42 | default_start_time_index: 0 43 | default_end_time_index: 235 44 | proc_data_dim: 5 45 | num_workers: 0 46 | queue_size: 20 47 | network: 48 | class: Recurrent 49 | module_path: qlib.rl.order_execution.network 50 | policy: 51 | class: PPO # PPO, DQN 52 | kwargs: 53 | lr: 0.0001 54 | module_path: qlib.rl.order_execution.policy 55 | runtime: 56 | seed: 42 57 | use_cuda: false 58 | trainer: 59 | max_epoch: 500 60 | repeat_per_collect: 25 61 | earlystop_patience: 50 62 | episode_per_collect: 10000 63 | batch_size: 1024 64 | val_every_n_epoch: 4 65 | checkpoint_path: ./outputs/opds 66 | checkpoint_every_n_iters: 1 67 | -------------------------------------------------------------------------------- /examples/rl_order_execution/scripts/gen_pickle_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import yaml 5 | import argparse 6 | import os 7 | import shutil 8 | from copy import deepcopy 9 | 10 | from qlib.contrib.data.highfreq_provider import HighFreqProvider 11 | 12 | loader = yaml.FullLoader 13 | 14 | if __name__ == "__main__": 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("-c", "--config", type=str, default="config.yml") 17 | parser.add_argument("-d", "--dest", type=str, default=".") 18 | parser.add_argument("-s", "--split", type=str, choices=["none", "date", "stock", "both"], default="stock") 19 | args = parser.parse_args() 20 | 21 | conf = yaml.load(open(args.config), Loader=loader) 22 | 23 | for k, v in conf.items(): 24 | if isinstance(v, dict) and "path" in v: 25 | v["path"] = os.path.join(args.dest, v["path"]) 26 | provider = HighFreqProvider(**conf) 27 | 28 | # Gen dataframe 29 | if "feature_conf" in conf: 30 | feature = provider._gen_dataframe(deepcopy(provider.feature_conf)) 31 | if "backtest_conf" in conf: 32 | backtest = provider._gen_dataframe(deepcopy(provider.backtest_conf)) 33 | 34 | provider.feature_conf["path"] = os.path.splitext(provider.feature_conf["path"])[0] + "/" 35 | provider.backtest_conf["path"] = os.path.splitext(provider.backtest_conf["path"])[0] + "/" 36 | # Split by date 37 | if args.split == "date" or args.split == "both": 38 | provider._gen_day_dataset(deepcopy(provider.feature_conf), "feature") 39 | provider._gen_day_dataset(deepcopy(provider.backtest_conf), "backtest") 40 | 41 | # Split by stock 42 | if args.split == "stock" or args.split == "both": 43 | provider._gen_stock_dataset(deepcopy(provider.feature_conf), "feature") 44 | provider._gen_stock_dataset(deepcopy(provider.backtest_conf), "backtest") 45 | 46 | shutil.rmtree("stat/", ignore_errors=True) 47 | -------------------------------------------------------------------------------- /model/meta/task.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from qlib.data.dataset import Dataset 5 | from ...utils import init_instance_by_config 6 | 7 | 8 | class MetaTask: 9 | """ 10 | A single meta-task, a meta-dataset contains a list of them. 11 | It serves as a component as in MetaDatasetDS 12 | 13 | The data processing is different 14 | 15 | - the processed input may be different between training and testing 16 | 17 | - When training, the X, y, X_test, y_test in training tasks are necessary (# PROC_MODE_FULL #) 18 | but not necessary in test tasks. (# PROC_MODE_TEST #) 19 | - When the meta model can be transferred into other dataset, only meta_info is necessary (# PROC_MODE_TRANSFER #) 20 | """ 21 | 22 | PROC_MODE_FULL = "full" 23 | PROC_MODE_TEST = "test" 24 | PROC_MODE_TRANSFER = "transfer" 25 | 26 | def __init__(self, task: dict, meta_info: object, mode: str = PROC_MODE_FULL): 27 | """ 28 | The `__init__` func is responsible for 29 | 30 | - store the task 31 | - store the origin input data for 32 | - process the input data for meta data 33 | 34 | Parameters 35 | ---------- 36 | task : dict 37 | the task to be enhanced by meta model 38 | 39 | meta_info : object 40 | the input for meta model 41 | """ 42 | self.task = task 43 | self.meta_info = meta_info # the original meta input information, it will be processed later 44 | self.mode = mode 45 | 46 | def get_dataset(self) -> Dataset: 47 | return init_instance_by_config(self.task["dataset"], accept_types=Dataset) 48 | 49 | def get_meta_input(self) -> object: 50 | """ 51 | Return the **processed** meta_info 52 | """ 53 | return self.meta_info 54 | 55 | def __repr__(self): 56 | return f"MetaTask(task={self.task}, meta_info={self.meta_info})" 57 | -------------------------------------------------------------------------------- /rl/data/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | from __future__ import annotations 4 | 5 | from abc import abstractmethod 6 | 7 | import pandas as pd 8 | 9 | 10 | class BaseIntradayBacktestData: 11 | """ 12 | Raw market data that is often used in backtesting (thus called BacktestData). 13 | 14 | Base class for all types of backtest data. Currently, each type of simulator has its corresponding backtest 15 | data type. 16 | """ 17 | 18 | @abstractmethod 19 | def __repr__(self) -> str: 20 | raise NotImplementedError 21 | 22 | @abstractmethod 23 | def __len__(self) -> int: 24 | raise NotImplementedError 25 | 26 | @abstractmethod 27 | def get_deal_price(self) -> pd.Series: 28 | raise NotImplementedError 29 | 30 | @abstractmethod 31 | def get_volume(self) -> pd.Series: 32 | raise NotImplementedError 33 | 34 | @abstractmethod 35 | def get_time_index(self) -> pd.DatetimeIndex: 36 | raise NotImplementedError 37 | 38 | 39 | class BaseIntradayProcessedData: 40 | """Processed market data after data cleanup and feature engineering. 41 | 42 | It contains both processed data for "today" and "yesterday", as some algorithms 43 | might use the market information of the previous day to assist decision making. 44 | """ 45 | 46 | today: pd.DataFrame 47 | """Processed data for "today". 48 | Number of records must be ``time_length``, and columns must be ``feature_dim``.""" 49 | 50 | yesterday: pd.DataFrame 51 | """Processed data for "yesterday". 52 | Number of records must be ``time_length``, and columns must be ``feature_dim``.""" 53 | 54 | 55 | class ProcessedDataProvider: 56 | """Provider of processed data""" 57 | 58 | def get_data( 59 | self, 60 | stock_id: str, 61 | date: pd.Timestamp, 62 | feature_dim: int, 63 | time_index: pd.Index, 64 | ) -> BaseIntradayProcessedData: 65 | raise NotImplementedError 66 | -------------------------------------------------------------------------------- /examples/rl_order_execution/exp_configs/train_ppo.yml: -------------------------------------------------------------------------------- 1 | simulator: 2 | data_granularity: 5 3 | time_per_step: 30 4 | vol_limit: null 5 | env: 6 | concurrency: 32 7 | parallel_mode: dummy 8 | action_interpreter: 9 | class: CategoricalActionInterpreter 10 | kwargs: 11 | values: 4 12 | max_step: 8 13 | module_path: qlib.rl.order_execution.interpreter 14 | state_interpreter: 15 | class: FullHistoryStateInterpreter 16 | kwargs: 17 | data_dim: 5 18 | data_ticks: 48 # 48 = 240 min / 5 min 19 | max_step: 8 20 | processed_data_provider: 21 | class: HandlerProcessedDataProvider 22 | kwargs: 23 | data_dir: ./data/pickle/ 24 | feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"] 25 | feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"] 26 | backtest: false 27 | module_path: qlib.rl.data.native 28 | module_path: qlib.rl.order_execution.interpreter 29 | reward: 30 | class: PPOReward 31 | kwargs: 32 | max_step: 8 33 | start_time_index: 0 34 | end_time_index: 46 # 46 = (240 - 5) min / 5 min - 1 35 | module_path: qlib.rl.order_execution.reward 36 | data: 37 | source: 38 | order_dir: ./data/orders 39 | feature_root_dir: ./data/pickle/ 40 | feature_columns_today: ["$close0", "$volume0"] 41 | feature_columns_yesterday: [] 42 | total_time: 240 43 | default_start_time_index: 0 44 | default_end_time_index: 235 45 | proc_data_dim: 5 46 | num_workers: 0 47 | queue_size: 20 48 | network: 49 | class: Recurrent 50 | module_path: qlib.rl.order_execution.network 51 | policy: 52 | class: PPO # PPO, DQN 53 | kwargs: 54 | lr: 0.0001 55 | module_path: qlib.rl.order_execution.policy 56 | runtime: 57 | seed: 42 58 | use_cuda: false 59 | trainer: 60 | max_epoch: 500 61 | repeat_per_collect: 25 62 | earlystop_patience: 50 63 | episode_per_collect: 10000 64 | batch_size: 1024 65 | val_every_n_epoch: 4 66 | checkpoint_path: ./outputs/ppo 67 | checkpoint_every_n_iters: 1 68 | -------------------------------------------------------------------------------- /tests/test_register_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | import numpy as np 6 | 7 | from qlib.data import D 8 | from qlib.data.ops import ElemOperator, PairOperator 9 | from qlib.tests import TestAutoData 10 | 11 | 12 | class Diff(ElemOperator): 13 | """Feature First Difference 14 | Parameters 15 | ---------- 16 | feature : Expression 17 | feature instance 18 | Returns 19 | ---------- 20 | Expression 21 | a feature instance with first difference 22 | """ 23 | 24 | def _load_internal(self, instrument, start_index, end_index, freq): 25 | series = self.feature.load(instrument, start_index, end_index, freq) 26 | return series.diff() 27 | 28 | def get_extended_window_size(self): 29 | lft_etd, rght_etd = self.feature.get_extended_window_size() 30 | return lft_etd + 1, rght_etd 31 | 32 | 33 | class Distance(PairOperator): 34 | """Feature Distance 35 | Parameters 36 | ---------- 37 | feature : Expression 38 | feature instance 39 | Returns 40 | ---------- 41 | Expression 42 | a feature instance with distance 43 | """ 44 | 45 | def _load_internal(self, instrument, start_index, end_index, freq): 46 | series_left = self.feature_left.load(instrument, start_index, end_index, freq) 47 | series_right = self.feature_right.load(instrument, start_index, end_index, freq) 48 | return np.abs(series_left - series_right) 49 | 50 | 51 | class TestRegiterCustomOps(TestAutoData): 52 | @classmethod 53 | def setUpClass(cls) -> None: 54 | cls._setup_kwargs.update({"custom_ops": [Diff, Distance]}) 55 | super().setUpClass() 56 | 57 | def test_regiter_custom_ops(self): 58 | instruments = ["SH600000"] 59 | fields = ["Diff($close)", "Distance($close, Ref($close, 1))"] 60 | print(D.features(instruments, fields, start_time="2010-01-01", end_time="2017-12-31", freq="day")) 61 | 62 | 63 | if __name__ == "__main__": 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | sys: 2 | rel_path: . 3 | qlib_init: 4 | provider_uri: "~/.qlib/qlib_data/cn_data" 5 | region: cn 6 | market: &market csi300 7 | benchmark: &benchmark SH000300 8 | data_handler_config: &data_handler_config 9 | start_time: 2008-01-01 10 | end_time: 2020-08-01 11 | fit_start_time: 2008-01-01 12 | fit_end_time: 2014-12-31 13 | instruments: *market 14 | port_analysis_config: &port_analysis_config 15 | strategy: 16 | class: TopkDropoutStrategy 17 | module_path: qlib.contrib.strategy 18 | kwargs: 19 | signal: 20 | topk: 50 21 | n_drop: 5 22 | backtest: 23 | start_time: 2017-01-01 24 | end_time: 2020-08-01 25 | account: 100000000 26 | benchmark: *benchmark 27 | exchange_kwargs: 28 | limit_threshold: 0.095 29 | deal_price: close 30 | open_cost: 0.0005 31 | close_cost: 0.0015 32 | min_cost: 5 33 | task: 34 | model: 35 | class: TFTModel 36 | module_path: tft 37 | dataset: 38 | class: DatasetH 39 | module_path: qlib.data.dataset 40 | kwargs: 41 | handler: 42 | class: Alpha158 43 | module_path: qlib.contrib.data.handler 44 | kwargs: *data_handler_config 45 | segments: 46 | train: [2008-01-01, 2014-12-31] 47 | valid: [2015-01-01, 2016-12-31] 48 | test: [2017-01-01, 2020-08-01] 49 | record: 50 | - class: SignalRecord 51 | module_path: qlib.workflow.record_temp 52 | kwargs: 53 | model: 54 | dataset: 55 | - class: SigAnaRecord 56 | module_path: qlib.workflow.record_temp 57 | kwargs: 58 | ana_long_short: False 59 | ann_scaler: 252 60 | - class: PortAnaRecord 61 | module_path: qlib.workflow.record_temp 62 | kwargs: 63 | config: *port_analysis_config 64 | -------------------------------------------------------------------------------- /examples/hyperparameter/LightGBM/hyperparameter_158.py: -------------------------------------------------------------------------------- 1 | import qlib 2 | import optuna 3 | from qlib.constant import REG_CN 4 | from qlib.utils import init_instance_by_config 5 | from qlib.tests.config import CSI300_DATASET_CONFIG 6 | from qlib.tests.data import GetData 7 | 8 | 9 | def objective(trial): 10 | task = { 11 | "model": { 12 | "class": "LGBModel", 13 | "module_path": "qlib.contrib.model.gbdt", 14 | "kwargs": { 15 | "loss": "mse", 16 | "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), 17 | "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), 18 | "subsample": trial.suggest_uniform("subsample", 0, 1), 19 | "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), 20 | "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), 21 | "max_depth": 10, 22 | "num_leaves": trial.suggest_int("num_leaves", 1, 1024), 23 | "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), 24 | "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), 25 | "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), 26 | "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), 27 | "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), 28 | }, 29 | }, 30 | } 31 | evals_result = dict() 32 | model = init_instance_by_config(task["model"]) 33 | model.fit(dataset, evals_result=evals_result) 34 | return min(evals_result["valid"]) 35 | 36 | 37 | if __name__ == "__main__": 38 | provider_uri = "~/.qlib/qlib_data/cn_data" 39 | GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True) 40 | qlib.init(provider_uri=provider_uri, region="cn") 41 | 42 | dataset = init_instance_by_config(CSI300_DATASET_CONFIG) 43 | 44 | study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3") 45 | study.optimize(objective, n_jobs=6) 46 | -------------------------------------------------------------------------------- /tests/misc/test_sepdf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | import unittest 4 | import numpy as np 5 | import pandas as pd 6 | from qlib.contrib.data.utils.sepdf import SepDataFrame 7 | 8 | 9 | class SepDF(unittest.TestCase): 10 | def to_str(self, obj): 11 | return "".join(str(obj).split()) 12 | 13 | def test_index_data(self): 14 | np.random.seed(42) 15 | 16 | index = [ 17 | np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]), 18 | np.array(["one", "two", "one", "two", "one", "two", "one", "two"]), 19 | ] 20 | 21 | cols = [ 22 | np.repeat(np.array(["g1", "g2"]), 2), 23 | np.arange(4), 24 | ] 25 | df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=cols) 26 | sdf = SepDataFrame(df_dict={"g2": df["g2"]}, join=None) 27 | sdf[("g2", 4)] = 3 28 | sdf["g1"] = df["g1"] 29 | exp = """ 30 | {'g2': 2 3 4 31 | bar one 0.647689 1.523030 3 32 | two 1.579213 0.767435 3 33 | baz one -0.463418 -0.465730 3 34 | two -1.724918 -0.562288 3 35 | foo one -0.908024 -1.412304 3 36 | two 0.067528 -1.424748 3 37 | qux one -1.150994 0.375698 3 38 | two -0.601707 1.852278 3, 'g1': 0 1 39 | bar one 0.496714 -0.138264 40 | two -0.234153 -0.234137 41 | baz one -0.469474 0.542560 42 | two 0.241962 -1.913280 43 | foo one -1.012831 0.314247 44 | two 1.465649 -0.225776 45 | qux one -0.544383 0.110923 46 | two -0.600639 -0.291694} 47 | """ 48 | self.assertEqual(self.to_str(sdf._df_dict), self.to_str(exp)) 49 | 50 | del df["g1"] 51 | del df["g2"] 52 | # it will not raise error, and df will be an empty dataframe 53 | 54 | del sdf["g1"] 55 | del sdf["g2"] 56 | # sdf should support deleting all the columns 57 | 58 | 59 | if __name__ == "__main__": 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /scripts/collect_info.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import platform 3 | import qlib 4 | import fire 5 | import pkg_resources 6 | from pathlib import Path 7 | 8 | QLIB_PATH = Path(__file__).absolute().resolve().parent.parent 9 | 10 | 11 | class InfoCollector: 12 | """ 13 | User could collect system info by following commands 14 | `cd scripts && python collect_info.py all` 15 | - NOTE: please avoid running this script in the project folder which contains `qlib` 16 | """ 17 | 18 | def sys(self): 19 | """collect system related info""" 20 | for method in ["system", "machine", "platform", "version"]: 21 | print(getattr(platform, method)()) 22 | 23 | def py(self): 24 | """collect Python related info""" 25 | print("Python version: {}".format(sys.version.replace("\n", " "))) 26 | 27 | def qlib(self): 28 | """collect qlib related info""" 29 | print("Qlib version: {}".format(qlib.__version__)) 30 | REQUIRED = [ 31 | "setuptools", 32 | "wheel", 33 | "cython", 34 | "pyyaml", 35 | "numpy", 36 | "pandas", 37 | "mlflow", 38 | "filelock", 39 | "redis", 40 | "dill", 41 | "fire", 42 | "ruamel.yaml", 43 | "python-redis-lock", 44 | "tqdm", 45 | "pymongo", 46 | "loguru", 47 | "lightgbm", 48 | "gym", 49 | "cvxpy", 50 | "joblib", 51 | "matplotlib", 52 | "jupyter", 53 | "nbconvert", 54 | "pyarrow", 55 | "pydantic-settings", 56 | "setuptools-scm", 57 | ] 58 | 59 | for package in REQUIRED: 60 | version = pkg_resources.get_distribution(package).version 61 | print(f"{package}=={version}") 62 | 63 | def all(self): 64 | """collect all info""" 65 | for method in ["sys", "py", "qlib"]: 66 | getattr(self, method)() 67 | print() 68 | 69 | 70 | if __name__ == "__main__": 71 | fire.Fire(InfoCollector) 72 | -------------------------------------------------------------------------------- /examples/data_demo/data_cache_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | The motivation of this demo 5 | - To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing 6 | """ 7 | 8 | from copy import deepcopy 9 | from pathlib import Path 10 | import pickle 11 | from pprint import pprint 12 | from ruamel.yaml import YAML 13 | import subprocess 14 | from qlib.log import TimeInspector 15 | 16 | from qlib import init 17 | from qlib.data.dataset.handler import DataHandlerLP 18 | from qlib.utils import init_instance_by_config 19 | 20 | # For general purpose, we use relative path 21 | DIRNAME = Path(__file__).absolute().resolve().parent 22 | 23 | if __name__ == "__main__": 24 | init() 25 | 26 | config_path = DIRNAME.parent / "benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml" 27 | 28 | # 1) show original time 29 | with TimeInspector.logt("The original time without handler cache:"): 30 | subprocess.run(f"qrun {config_path}", shell=True) 31 | 32 | # 2) dump handler 33 | yaml = YAML(typ="safe", pure=True) 34 | task_config = yaml.load(config_path.open()) 35 | hd_conf = task_config["task"]["dataset"]["kwargs"]["handler"] 36 | pprint(hd_conf) 37 | hd: DataHandlerLP = init_instance_by_config(hd_conf) 38 | hd_path = DIRNAME / "handler.pkl" 39 | hd.to_pickle(hd_path, dump_all=True) 40 | 41 | # 3) create new task with handler cache 42 | new_task_config = deepcopy(task_config) 43 | new_task_config["task"]["dataset"]["kwargs"]["handler"] = f"file://{hd_path}" 44 | new_task_config["sys"] = {"path": [str(config_path.parent.resolve())]} 45 | new_task_path = DIRNAME / "new_task.yaml" 46 | print("The location of the new task", new_task_path) 47 | 48 | # save new task 49 | with new_task_path.open("w") as f: 50 | yaml.safe_dump(new_task_config, f, indent=4, sort_keys=False) 51 | 52 | # 4) train model with new task 53 | with TimeInspector.logt("The time for task with handler cache:"): 54 | subprocess.run(f"qrun {new_task_path}", shell=True) 55 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | Due to the non-stationary nature of the environment of the financial market, the data distribution may change in different periods, which makes the performance of models build on training data decays in the future test data. 3 | So adapting the forecasting models/strategies to market dynamics is very important to the model/strategies' performance. 4 | 5 | The table below shows the performances of different solutions on different forecasting models. 6 | 7 | ## Alpha158 Dataset 8 | Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases 9 | ```bash 10 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz 11 | mkdir -p ~/.qlib/qlib_data/cn_data 12 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2 13 | rm -f qlib_bin.tar.gz 14 | ``` 15 | 16 | | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | 17 | |------------------|---------|------|------|---------|-----------|-------------------|-------------------|--------------| 18 | | RR[Linear] |Alpha158 |0.0945|0.5989|0.1069 |0.6495 |0.0857 |1.3682 |-0.0986 | 19 | | DDG-DA[Linear] |Alpha158 |0.0983|0.6157|0.1108 |0.6646 |0.0764 |1.1904 |-0.0769 | 20 | | RR[LightGBM] |Alpha158 |0.0816|0.5887|0.0912 |0.6263 |0.0771 |1.3196 |-0.0909 | 21 | | DDG-DA[LightGBM] |Alpha158 |0.0878|0.6185|0.0975 |0.6524 |0.1261 |2.0096 |-0.0744 | 22 | 23 | - The label horizon of the `Alpha158` dataset is set to 20. 24 | - The rolling time intervals are set to 20 trading days. 25 | - The test rolling periods are from January 2017 to August 2020. 26 | - The results are based on the crowd-sourced version. The Yahoo version of qlib data does not contain `VWAP`, so all related factors are missing and filled with 0, which leads to a rank-deficient matrix (a matrix does not have full rank) and makes lower-level optimization of DDG-DA can not be solved. 27 | -------------------------------------------------------------------------------- /examples/orderbook_data/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This example tries to demonstrate how Qlib supports data without fixed shared frequency. 4 | 5 | For example, 6 | - Daily prices volume data are fixed-frequency data. The data comes in a fixed frequency (i.e. daily) 7 | - Orders are not fixed data and they may come at any time point 8 | 9 | To support such non-fixed-frequency, Qlib implements an Arctic-based backend. 10 | Here is an example to import and query data based on this backend. 11 | 12 | # Installation 13 | 14 | Please refer to [the installation docs](https://docs.mongodb.com/manual/installation/) of mongodb. 15 | Current version of script with default value tries to connect localhost **via default port without authentication**. 16 | 17 | Run following command to install necessary libraries 18 | ``` 19 | pip install pytest coverage gdown 20 | pip install arctic # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied. 21 | ``` 22 | 23 | # Importing example data 24 | 25 | 26 | 1. (Optional) Please follow the first part of [this section](https://github.com/microsoft/qlib#data-preparation) to **get 1min data** of Qlib. 27 | 2. Please follow following steps to download example data 28 | ```bash 29 | cd examples/orderbook_data/ 30 | gdown https://drive.google.com/uc?id=15FuUqWn2rkCi8uhJYGEQWKakcEqLJNDG # Proxies may be necessary here. 31 | python ../../scripts/get_data.py _unzip --file_path highfreq_orderbook_example_data.zip --target_dir . 32 | ``` 33 | 34 | 3. Please import the example data to your mongo db 35 | ```bash 36 | python create_dataset.py initialize_library # Initialization Libraries 37 | python create_dataset.py import_data # Initialization Libraries 38 | ``` 39 | 40 | # Query Examples 41 | 42 | After importing these data, you run `example.py` to create some high-frequency features. 43 | ```bash 44 | pytest -s --disable-warnings example.py # If you want run all examples 45 | pytest -s --disable-warnings example.py::TestClass::test_exp_10 # If you want to run specific example 46 | ``` 47 | 48 | 49 | # Known limitations 50 | Expression computing between different frequencies are not supported yet 51 | -------------------------------------------------------------------------------- /examples/online_srv/update_online_pred.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | This example shows how OnlineTool works when we need update prediction. 6 | There are two parts including first_train and update_online_pred. 7 | Firstly, we will finish the training and set the trained models to the `online` models. 8 | Next, we will finish updating online predictions. 9 | """ 10 | import copy 11 | import fire 12 | import qlib 13 | from qlib.constant import REG_CN 14 | from qlib.model.trainer import task_train 15 | from qlib.workflow.online.utils import OnlineToolR 16 | from qlib.tests.config import CSI300_GBDT_TASK 17 | 18 | task = copy.deepcopy(CSI300_GBDT_TASK) 19 | 20 | task["record"] = { 21 | "class": "SignalRecord", 22 | "module_path": "qlib.workflow.record_temp", 23 | } 24 | 25 | 26 | class UpdatePredExample: 27 | def __init__( 28 | self, provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_name="online_srv", task_config=task 29 | ): 30 | qlib.init(provider_uri=provider_uri, region=region) 31 | self.experiment_name = experiment_name 32 | self.online_tool = OnlineToolR(self.experiment_name) 33 | self.task_config = task_config 34 | 35 | def first_train(self): 36 | rec = task_train(self.task_config, experiment_name=self.experiment_name) 37 | self.online_tool.reset_online_tag(rec) # set to online model 38 | 39 | def update_online_pred(self): 40 | self.online_tool.update_online_pred() 41 | 42 | def main(self): 43 | self.first_train() 44 | self.update_online_pred() 45 | 46 | 47 | if __name__ == "__main__": 48 | ## to train a model and set it to online model, use the command below 49 | # python update_online_pred.py first_train 50 | ## to update online predictions once a day, use the command below 51 | # python update_online_pred.py update_online_pred 52 | ## to see the whole process with your own parameters, use the command below 53 | # python update_online_pred.py main --experiment_name="your_exp_name" 54 | fire.Fire(UpdatePredExample) 55 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | 2 | - [Download Qlib Data](#Download-Qlib-Data) 3 | - [Download CN Data](#Download-CN-Data) 4 | - [Download US Data](#Download-US-Data) 5 | - [Download CN Simple Data](#Download-CN-Simple-Data) 6 | - [Help](#Help) 7 | - [Using in Qlib](#Using-in-Qlib) 8 | - [US data](#US-data) 9 | - [CN data](#CN-data) 10 | 11 | 12 | ## Download Qlib Data 13 | 14 | 15 | ### Download CN Data 16 | 17 | ```bash 18 | # daily data 19 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn 20 | 21 | # 1min data (Optional for running non-high-frequency strategies) 22 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min 23 | ``` 24 | 25 | ### Download US Data 26 | 27 | 28 | ```bash 29 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us 30 | ``` 31 | 32 | ### Download CN Simple Data 33 | 34 | ```bash 35 | python get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --region cn 36 | ``` 37 | 38 | ### Help 39 | 40 | ```bash 41 | python get_data.py qlib_data --help 42 | ``` 43 | 44 | ## Using in Qlib 45 | > For more information: https://qlib.readthedocs.io/en/latest/start/initialization.html 46 | 47 | 48 | ### US data 49 | 50 | > Need to download data first: [Download US Data](#Download-US-Data) 51 | 52 | ```python 53 | import qlib 54 | from qlib.config import REG_US 55 | provider_uri = "~/.qlib/qlib_data/us_data" # target_dir 56 | qlib.init(provider_uri=provider_uri, region=REG_US) 57 | ``` 58 | 59 | ### CN data 60 | 61 | > Need to download data first: [Download CN Data](#Download-CN-Data) 62 | 63 | ```python 64 | import qlib 65 | from qlib.constant import REG_CN 66 | 67 | provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir 68 | qlib.init(provider_uri=provider_uri, region=REG_CN) 69 | ``` 70 | 71 | ## Use Crowd Sourced Data 72 | The is also a [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases 73 | ```bash 74 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz 75 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2 76 | ``` 77 | -------------------------------------------------------------------------------- /typehint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """Commonly used types.""" 5 | 6 | import sys 7 | from typing import Union 8 | from pathlib import Path 9 | 10 | __all__ = ["Literal", "TypedDict", "final"] 11 | 12 | if sys.version_info >= (3, 8): 13 | from typing import Literal, TypedDict, final # type: ignore # pylint: disable=no-name-in-module 14 | else: 15 | from typing_extensions import Literal, TypedDict, final 16 | 17 | 18 | class InstDictConf(TypedDict): 19 | """ 20 | InstDictConf is a Dict-based config to describe an instance 21 | 22 | case 1) 23 | { 24 | 'class': 'ClassName', 25 | 'kwargs': dict, # It is optional. {} will be used if not given 26 | 'model_path': path, # It is optional if module is given in the class 27 | } 28 | case 2) 29 | { 30 | 'class': , 31 | 'kwargs': dict, # It is optional. {} will be used if not given 32 | } 33 | """ 34 | 35 | # class: str # because class is a keyword of Python. We have to comment it 36 | kwargs: dict # It is optional. {} will be used if not given 37 | module_path: str # It is optional if module is given in the class 38 | 39 | 40 | InstConf = Union[InstDictConf, str, object, Path] 41 | """ 42 | InstConf is a type to describe an instance; it will be passed into init_instance_by_config for Qlib 43 | 44 | config : Union[str, dict, object, Path] 45 | 46 | InstDictConf example. 47 | please refer to the docs of InstDictConf 48 | 49 | str example. 50 | 1) specify a pickle object 51 | - path like 'file:////obj.pkl' 52 | 2) specify a class name 53 | - "ClassName": getattr(module, "ClassName")() will be used. 54 | 3) specify module path with class name 55 | - "a.b.c.ClassName" getattr(, "ClassName")() will be used. 56 | 57 | object example: 58 | instance of accept_types 59 | 60 | Path example: 61 | specify a pickle object 62 | - it will be treated like 'file:////obj.pkl' 63 | """ 64 | -------------------------------------------------------------------------------- /examples/hyperparameter/LightGBM/hyperparameter_360.py: -------------------------------------------------------------------------------- 1 | import qlib 2 | import optuna 3 | from qlib.constant import REG_CN 4 | from qlib.utils import init_instance_by_config 5 | from qlib.tests.data import GetData 6 | from qlib.tests.config import get_dataset_config, CSI300_MARKET, DATASET_ALPHA360_CLASS 7 | 8 | DATASET_CONFIG = get_dataset_config(market=CSI300_MARKET, dataset_class=DATASET_ALPHA360_CLASS) 9 | 10 | 11 | def objective(trial): 12 | task = { 13 | "model": { 14 | "class": "LGBModel", 15 | "module_path": "qlib.contrib.model.gbdt", 16 | "kwargs": { 17 | "loss": "mse", 18 | "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), 19 | "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), 20 | "subsample": trial.suggest_uniform("subsample", 0, 1), 21 | "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), 22 | "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), 23 | "max_depth": 10, 24 | "num_leaves": trial.suggest_int("num_leaves", 1, 1024), 25 | "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), 26 | "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), 27 | "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), 28 | "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), 29 | "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), 30 | }, 31 | }, 32 | } 33 | 34 | evals_result = dict() 35 | model = init_instance_by_config(task["model"]) 36 | model.fit(dataset, evals_result=evals_result) 37 | return min(evals_result["valid"]) 38 | 39 | 40 | if __name__ == "__main__": 41 | provider_uri = "~/.qlib/qlib_data/cn_data" 42 | GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True) 43 | qlib.init(provider_uri=provider_uri, region=REG_CN) 44 | 45 | dataset = init_instance_by_config(DATASET_CONFIG) 46 | 47 | study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3") 48 | study.optimize(objective, n_jobs=6) 49 | -------------------------------------------------------------------------------- /examples/rl_order_execution/scripts/gen_training_orders.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from pathlib import Path 9 | 10 | DATA_PATH = Path(os.path.join("data", "pickle", "backtest")) 11 | OUTPUT_PATH = Path(os.path.join("data", "orders")) 12 | 13 | 14 | def generate_order(stock: str, start_idx: int, end_idx: int) -> bool: 15 | dataset = pd.read_pickle(DATA_PATH / f"{stock}.pkl") 16 | df = dataset.handler.fetch(level=None).reset_index() 17 | if len(df) == 0 or df.isnull().values.any() or min(df["$volume0"]) < 1e-5: 18 | return False 19 | 20 | df["date"] = df["datetime"].dt.date.astype("datetime64") 21 | df = df.set_index(["instrument", "datetime", "date"]) 22 | df = df.groupby("date", group_keys=False).take(range(start_idx, end_idx)).droplevel(level=0) 23 | 24 | order_all = pd.DataFrame(df.groupby(level=(2, 0), group_keys=False).mean().dropna()) 25 | order_all["amount"] = np.random.lognormal(-3.28, 1.14) * order_all["$volume0"] 26 | order_all = order_all[order_all["amount"] > 0.0] 27 | order_all["order_type"] = 0 28 | order_all = order_all.drop(columns=["$volume0"]) 29 | 30 | order_train = order_all[order_all.index.get_level_values(0) <= pd.Timestamp("2021-06-30")] 31 | order_test = order_all[order_all.index.get_level_values(0) > pd.Timestamp("2021-06-30")] 32 | order_valid = order_test[order_test.index.get_level_values(0) <= pd.Timestamp("2021-09-30")] 33 | order_test = order_test[order_test.index.get_level_values(0) > pd.Timestamp("2021-09-30")] 34 | 35 | for order, tag in zip((order_train, order_valid, order_test, order_all), ("train", "valid", "test", "all")): 36 | path = OUTPUT_PATH / tag 37 | os.makedirs(path, exist_ok=True) 38 | if len(order) > 0: 39 | order.to_pickle(path / f"{stock}.pkl.target") 40 | return True 41 | 42 | 43 | np.random.seed(1234) 44 | file_list = sorted(os.listdir(DATA_PATH)) 45 | stocks = [f.replace(".pkl", "") for f in file_list] 46 | np.random.shuffle(stocks) 47 | 48 | cnt = 0 49 | for stock in stocks: 50 | if generate_order(stock, 0, 240 // 5 - 1): 51 | cnt += 1 52 | if cnt == 100: 53 | break 54 | -------------------------------------------------------------------------------- /docs/component/online.rst: -------------------------------------------------------------------------------- 1 | .. _online_serving: 2 | 3 | ============== 4 | Online Serving 5 | ============== 6 | .. currentmodule:: qlib 7 | 8 | 9 | Introduction 10 | ============ 11 | 12 | .. image:: ../_static/img/online_serving.png 13 | :align: center 14 | 15 | 16 | In addition to backtesting, one way to test a model is effective is to make predictions in real market conditions or even do real trading based on those predictions. 17 | ``Online Serving`` is a set of modules for online models using the latest data, 18 | which including `Online Manager <#Online Manager>`_, `Online Strategy <#Online Strategy>`_, `Online Tool <#Online Tool>`_, `Updater <#Updater>`_. 19 | 20 | `Here `_ are several examples for reference, which demonstrate different features of ``Online Serving``. 21 | If you have many models or `task` needs to be managed, please consider `Task Management <../advanced/task_management.html>`_. 22 | The `examples `_ are based on some components in `Task Management <../advanced/task_management.html>`_ such as ``TrainerRM`` or ``Collector``. 23 | 24 | **NOTE**: User should keep his data source updated to support online serving. For example, Qlib provides `a batch of scripts `_ to help users update Yahoo daily data. 25 | 26 | Known limitations currently 27 | - Currently, the daily updating prediction for the next trading day is supported. But generating orders for the next trading day is not supported due to the `limitations of public data _` 28 | 29 | 30 | Online Manager 31 | ============== 32 | 33 | .. automodule:: qlib.workflow.online.manager 34 | :members: 35 | :noindex: 36 | 37 | Online Strategy 38 | =============== 39 | 40 | .. automodule:: qlib.workflow.online.strategy 41 | :members: 42 | :noindex: 43 | 44 | Online Tool 45 | =========== 46 | 47 | .. automodule:: qlib.workflow.online.utils 48 | :members: 49 | :noindex: 50 | 51 | Updater 52 | ======= 53 | 54 | .. automodule:: qlib.workflow.online.update 55 | :members: 56 | :noindex: 57 | -------------------------------------------------------------------------------- /contrib/data/data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # We remove arctic from core framework of Qlib to contrib due to 5 | # - Arctic has very strict limitation on pandas and numpy version 6 | # - https://github.com/man-group/arctic/pull/908 7 | # - pip fail to computing the right version number!!!! 8 | # - Maybe we can solve this problem by poetry 9 | 10 | # FIXME: So if you want to use arctic-based provider, please install arctic manually 11 | # `pip install arctic` may not be enough. 12 | from arctic import Arctic 13 | import pandas as pd 14 | import pymongo 15 | 16 | from qlib.data.data import FeatureProvider 17 | 18 | 19 | class ArcticFeatureProvider(FeatureProvider): 20 | def __init__( 21 | self, uri="127.0.0.1", retry_time=0, market_transaction_time_list=[("09:15", "11:30"), ("13:00", "15:00")] 22 | ): 23 | super().__init__() 24 | self.uri = uri 25 | # TODO: 26 | # retry connecting if error occurs 27 | # does it real matters? 28 | self.retry_time = retry_time 29 | # NOTE: this is especially important for TResample operator 30 | self.market_transaction_time_list = market_transaction_time_list 31 | 32 | def feature(self, instrument, field, start_index, end_index, freq): 33 | field = str(field)[1:] 34 | with pymongo.MongoClient(self.uri) as client: 35 | # TODO: this will result in frequently connecting the server and performance issue 36 | arctic = Arctic(client) 37 | 38 | if freq not in arctic.list_libraries(): 39 | raise ValueError("lib {} not in arctic".format(freq)) 40 | 41 | if instrument not in arctic[freq].list_symbols(): 42 | # instruments does not exist 43 | return pd.Series() 44 | else: 45 | df = arctic[freq].read(instrument, columns=[field], chunk_range=(start_index, end_index)) 46 | s = df[field] 47 | 48 | if not s.empty: 49 | s = pd.concat( 50 | [ 51 | s.between_time(time_tuple[0], time_tuple[1]) 52 | for time_tuple in self.market_transaction_time_list 53 | ] 54 | ) 55 | return s 56 | -------------------------------------------------------------------------------- /examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: XGBModel 34 | module_path: qlib.contrib.model.xgboost 35 | kwargs: 36 | eval_metric: rmse 37 | colsample_bytree: 0.8879 38 | eta: 0.0421 39 | max_depth: 8 40 | n_estimators: 647 41 | subsample: 0.8789 42 | nthread: 20 43 | dataset: 44 | class: DatasetH 45 | module_path: qlib.data.dataset 46 | kwargs: 47 | handler: 48 | class: Alpha158 49 | module_path: qlib.contrib.data.handler 50 | kwargs: *data_handler_config 51 | segments: 52 | train: [2008-01-01, 2014-12-31] 53 | valid: [2015-01-01, 2016-12-31] 54 | test: [2017-01-01, 2020-08-01] 55 | record: 56 | - class: SignalRecord 57 | module_path: qlib.workflow.record_temp 58 | kwargs: 59 | model: 60 | dataset: 61 | - class: SigAnaRecord 62 | module_path: qlib.workflow.record_temp 63 | kwargs: 64 | ana_long_short: False 65 | ann_scaler: 252 66 | - class: PortAnaRecord 67 | module_path: qlib.workflow.record_temp 68 | kwargs: 69 | config: *port_analysis_config 70 | -------------------------------------------------------------------------------- /docs/advanced/serial.rst: -------------------------------------------------------------------------------- 1 | .. _serial: 2 | 3 | ============= 4 | Serialization 5 | ============= 6 | .. currentmodule:: qlib 7 | 8 | Introduction 9 | ============ 10 | ``Qlib`` supports dumping the state of ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc. into a disk and reloading them. 11 | 12 | Serializable Class 13 | ================== 14 | 15 | ``Qlib`` provides a base class ``qlib.utils.serial.Serializable``, whose state can be dumped into or loaded from disk in `pickle` format. 16 | When users dump the state of a ``Serializable`` instance, the attributes of the instance whose name **does not** start with `_` will be saved on the disk. 17 | However, users can use ``config`` method or override ``default_dump_all`` attribute to prevent this feature. 18 | 19 | Users can also override ``pickle_backend`` attribute to choose a pickle backend. The supported value is "pickle" (default and common) and "dill" (dump more things such as function, more information in `here `_). 20 | 21 | Example 22 | ======= 23 | ``Qlib``'s serializable class includes ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc., which are subclass of ``qlib.utils.serial.Serializable``. 24 | Specifically, ``qlib.data.dataset.DatasetH`` is one of them. Users can serialize ``DatasetH`` as follows. 25 | 26 | .. code-block:: Python 27 | 28 | ##=============dump dataset============= 29 | dataset.to_pickle(path="dataset.pkl") # dataset is an instance of qlib.data.dataset.DatasetH 30 | 31 | ##=============reload dataset============= 32 | with open("dataset.pkl", "rb") as file_dataset: 33 | dataset = pickle.load(file_dataset) 34 | 35 | .. note:: 36 | Only state of ``DatasetH`` should be saved on the disk, such as some `mean` and `variance` used for data normalization, etc. 37 | 38 | After reloading the ``DatasetH``, users need to reinitialize it. It means that users can reset some states of ``DatasetH`` or ``QlibDataHandler`` such as `instruments`, `start_time`, `end_time` and `segments`, etc., and generate new data according to the states (data is not state and should not be saved on the disk). 39 | 40 | A more detailed example is in this `link `_. 41 | 42 | 43 | API 44 | === 45 | Please refer to `Serializable API <../reference/api.html#module-qlib.utils.serial.Serializable>`_. 46 | -------------------------------------------------------------------------------- /examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data_1min" 3 | region: cn 4 | market: &market 'csi300' 5 | start_time: &start_time "2020-09-15 00:00:00" 6 | end_time: &end_time "2021-01-18 16:00:00" 7 | train_end_time: &train_end_time "2020-11-15 16:00:00" 8 | valid_start_time: &valid_start_time "2020-11-16 00:00:00" 9 | valid_end_time: &valid_end_time "2020-11-30 16:00:00" 10 | test_start_time: &test_start_time "2020-12-01 00:00:00" 11 | data_handler_config: &data_handler_config 12 | start_time: *start_time 13 | end_time: *end_time 14 | fit_start_time: *start_time 15 | fit_end_time: *train_end_time 16 | instruments: *market 17 | freq: '1min' 18 | infer_processors: 19 | - class: 'RobustZScoreNorm' 20 | kwargs: 21 | fields_group: 'feature' 22 | clip_outlier: false 23 | - class: "Fillna" 24 | kwargs: 25 | fields_group: 'feature' 26 | learn_processors: 27 | - class: 'DropnaLabel' 28 | - class: 'CSRankNorm' 29 | kwargs: 30 | fields_group: 'label' 31 | label: ["Ref($close, -2) / Ref($close, -1) - 1"] 32 | 33 | task: 34 | model: 35 | class: "HFLGBModel" 36 | module_path: "qlib.contrib.model.highfreq_gdbt_model" 37 | kwargs: 38 | objective: 'binary' 39 | metric: ['binary_logloss','auc'] 40 | verbosity: -1 41 | learning_rate: 0.01 42 | max_depth: 8 43 | num_leaves: 150 44 | lambda_l1: 1.5 45 | lambda_l2: 1 46 | num_threads: 20 47 | dataset: 48 | class: "DatasetH" 49 | module_path: "qlib.data.dataset" 50 | kwargs: 51 | handler: 52 | class: "Alpha158" 53 | module_path: "qlib.contrib.data.handler" 54 | kwargs: *data_handler_config 55 | segments: 56 | train: [*start_time, *train_end_time] 57 | valid: [*train_end_time, *valid_end_time] 58 | test: [*test_start_time, *end_time] 59 | record: 60 | - class: "SignalRecord" 61 | module_path: "qlib.workflow.record_temp" 62 | kwargs: {} 63 | - class: "HFSignalRecord" 64 | module_path: "qlib.workflow.record_temp" 65 | kwargs: {} -------------------------------------------------------------------------------- /examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: CatBoostModel 34 | module_path: qlib.contrib.model.catboost_model 35 | kwargs: 36 | loss: RMSE 37 | learning_rate: 0.0421 38 | subsample: 0.8789 39 | max_depth: 6 40 | num_leaves: 100 41 | thread_count: 20 42 | grow_policy: Lossguide 43 | bootstrap_type: Poisson 44 | dataset: 45 | class: DatasetH 46 | module_path: qlib.data.dataset 47 | kwargs: 48 | handler: 49 | class: Alpha158 50 | module_path: qlib.contrib.data.handler 51 | kwargs: *data_handler_config 52 | segments: 53 | train: [2008-01-01, 2014-12-31] 54 | valid: [2015-01-01, 2016-12-31] 55 | test: [2017-01-01, 2020-08-01] 56 | record: 57 | - class: SignalRecord 58 | module_path: qlib.workflow.record_temp 59 | kwargs: 60 | model: 61 | dataset: 62 | - class: SigAnaRecord 63 | module_path: qlib.workflow.record_temp 64 | kwargs: 65 | ana_long_short: False 66 | ann_scaler: 252 67 | - class: PortAnaRecord 68 | module_path: qlib.workflow.record_temp 69 | kwargs: 70 | config: *port_analysis_config 71 | -------------------------------------------------------------------------------- /examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi500 5 | benchmark: &benchmark SH000905 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: CatBoostModel 34 | module_path: qlib.contrib.model.catboost_model 35 | kwargs: 36 | loss: RMSE 37 | learning_rate: 0.0421 38 | subsample: 0.8789 39 | max_depth: 6 40 | num_leaves: 100 41 | thread_count: 20 42 | grow_policy: Lossguide 43 | bootstrap_type: Poisson 44 | dataset: 45 | class: DatasetH 46 | module_path: qlib.data.dataset 47 | kwargs: 48 | handler: 49 | class: Alpha158 50 | module_path: qlib.contrib.data.handler 51 | kwargs: *data_handler_config 52 | segments: 53 | train: [2008-01-01, 2014-12-31] 54 | valid: [2015-01-01, 2016-12-31] 55 | test: [2017-01-01, 2020-08-01] 56 | record: 57 | - class: SignalRecord 58 | module_path: qlib.workflow.record_temp 59 | kwargs: 60 | model: 61 | dataset: 62 | - class: SigAnaRecord 63 | module_path: qlib.workflow.record_temp 64 | kwargs: 65 | ana_long_short: False 66 | ann_scaler: 252 67 | - class: PortAnaRecord 68 | module_path: qlib.workflow.record_temp 69 | kwargs: 70 | config: *port_analysis_config 71 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: LGBModel 34 | module_path: qlib.contrib.model.gbdt 35 | kwargs: 36 | loss: mse 37 | colsample_bytree: 0.8879 38 | learning_rate: 0.2 39 | subsample: 0.8789 40 | lambda_l1: 205.6999 41 | lambda_l2: 580.9768 42 | max_depth: 8 43 | num_leaves: 210 44 | num_threads: 20 45 | dataset: 46 | class: DatasetH 47 | module_path: qlib.data.dataset 48 | kwargs: 49 | handler: 50 | class: Alpha158 51 | module_path: qlib.contrib.data.handler 52 | kwargs: *data_handler_config 53 | segments: 54 | train: [2008-01-01, 2014-12-31] 55 | valid: [2015-01-01, 2016-12-31] 56 | test: [2017-01-01, 2020-08-01] 57 | record: 58 | - class: SignalRecord 59 | module_path: qlib.workflow.record_temp 60 | kwargs: 61 | model: 62 | dataset: 63 | - class: SigAnaRecord 64 | module_path: qlib.workflow.record_temp 65 | kwargs: 66 | ana_long_short: False 67 | ann_scaler: 252 68 | - class: PortAnaRecord 69 | module_path: qlib.workflow.record_temp 70 | kwargs: 71 | config: *port_analysis_config 72 | -------------------------------------------------------------------------------- /examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi500 5 | benchmark: &benchmark SH000905 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: LGBModel 34 | module_path: qlib.contrib.model.gbdt 35 | kwargs: 36 | loss: mse 37 | colsample_bytree: 0.9 38 | learning_rate: 0.1 39 | subsample: 0.9 40 | lambda_l1: 205.6999 41 | lambda_l2: 580.9768 42 | max_depth: 8 43 | num_leaves: 250 44 | num_threads: 20 45 | dataset: 46 | class: DatasetH 47 | module_path: qlib.data.dataset 48 | kwargs: 49 | handler: 50 | class: Alpha158 51 | module_path: qlib.contrib.data.handler 52 | kwargs: *data_handler_config 53 | segments: 54 | train: [2008-01-01, 2014-12-31] 55 | valid: [2015-01-01, 2016-12-31] 56 | test: [2017-01-01, 2020-08-01] 57 | record: 58 | - class: SignalRecord 59 | module_path: qlib.workflow.record_temp 60 | kwargs: 61 | model: 62 | dataset: 63 | - class: SigAnaRecord 64 | module_path: qlib.workflow.record_temp 65 | kwargs: 66 | ana_long_short: False 67 | ann_scaler: 252 68 | - class: PortAnaRecord 69 | module_path: qlib.workflow.record_temp 70 | kwargs: 71 | config: *port_analysis_config 72 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: TopkDropoutStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | signal: 18 | topk: 50 19 | n_drop: 5 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: LGBModel 34 | module_path: qlib.contrib.model.gbdt 35 | kwargs: 36 | loss: mse 37 | colsample_bytree: 0.8879 38 | learning_rate: 0.2 39 | subsample: 0.8789 40 | lambda_l1: 205.6999 41 | lambda_l2: 580.9768 42 | max_depth: 8 43 | num_leaves: 210 44 | num_threads: 20 45 | dataset: 46 | class: DatasetH 47 | module_path: qlib.data.dataset 48 | kwargs: 49 | handler: 50 | class: Alpha158 51 | module_path: qlib.contrib.data.handler 52 | kwargs: *data_handler_config 53 | segments: 54 | train: [2008-01-01, 2014-12-31] 55 | valid: [2015-01-01, 2016-12-31] 56 | test: [2017-01-01, 2020-08-01] 57 | record: 58 | - class: SignalRecord 59 | module_path: qlib.workflow.record_temp 60 | kwargs: 61 | model: 62 | dataset: 63 | - class: SigAnaRecord 64 | module_path: qlib.workflow.record_temp 65 | kwargs: 66 | ana_long_short: False 67 | ann_scaler: 252 68 | - class: PortAnaRecord 69 | module_path: qlib.workflow.record_temp 70 | kwargs: 71 | config: *port_analysis_config 72 | -------------------------------------------------------------------------------- /examples/portfolio/config_enhanced_indexing.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | port_analysis_config: &port_analysis_config 13 | strategy: 14 | class: EnhancedIndexingStrategy 15 | module_path: qlib.contrib.strategy 16 | kwargs: 17 | model: 18 | dataset: 19 | riskmodel_root: ./riskdata 20 | backtest: 21 | start_time: 2017-01-01 22 | end_time: 2020-08-01 23 | account: 100000000 24 | benchmark: *benchmark 25 | exchange_kwargs: 26 | limit_threshold: 0.095 27 | deal_price: close 28 | open_cost: 0.0005 29 | close_cost: 0.0015 30 | min_cost: 5 31 | task: 32 | model: 33 | class: LGBModel 34 | module_path: qlib.contrib.model.gbdt 35 | kwargs: 36 | loss: mse 37 | colsample_bytree: 0.8879 38 | learning_rate: 0.2 39 | subsample: 0.8789 40 | lambda_l1: 205.6999 41 | lambda_l2: 580.9768 42 | max_depth: 8 43 | num_leaves: 210 44 | num_threads: 20 45 | dataset: 46 | class: DatasetH 47 | module_path: qlib.data.dataset 48 | kwargs: 49 | handler: 50 | class: Alpha158 51 | module_path: qlib.contrib.data.handler 52 | kwargs: *data_handler_config 53 | segments: 54 | train: [2008-01-01, 2014-12-31] 55 | valid: [2015-01-01, 2016-12-31] 56 | test: [2017-01-01, 2020-08-01] 57 | record: 58 | - class: SignalRecord 59 | module_path: qlib.workflow.record_temp 60 | kwargs: 61 | model: 62 | dataset: 63 | - class: SigAnaRecord 64 | module_path: qlib.workflow.record_temp 65 | kwargs: 66 | ana_long_short: False 67 | ann_scaler: 252 68 | - class: PortAnaRecord 69 | module_path: qlib.workflow.record_temp 70 | kwargs: 71 | config: *port_analysis_config 72 | -------------------------------------------------------------------------------- /examples/benchmarks_dynamic/DDG-DA/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This is the implementation of `DDG-DA` based on `Meta Controller` component provided by `Qlib`. 3 | 4 | Please refer to the paper for more details: *DDG-DA: Data Distribution Generation for Predictable Concept Drift Adaptation* [[arXiv](https://arxiv.org/abs/2201.04038)] 5 | 6 | 7 | # Background 8 | In many real-world scenarios, we often deal with streaming data that is sequentially collected over time. Due to the non-stationary nature of the environment, the streaming data distribution may change in unpredictable ways, which is known as concept drift. To handle concept drift, previous methods first detect when/where the concept drift happens and then adapt models to fit the distribution of the latest data. However, there are still many cases that some underlying factors of environment evolution are predictable, making it possible to model the future concept drift trend of the streaming data, while such cases are not fully explored in previous work. 9 | 10 | Therefore, we propose a novel method `DDG-DA`, that can effectively forecast the evolution of data distribution and improve the performance of models. Specifically, we first train a predictor to estimate the future data distribution, then leverage it to generate training samples, and finally train models on the generated data. 11 | 12 | # Dataset 13 | The data in the paper are private. So we conduct experiments on Qlib's public dataset. 14 | Though the dataset is different, the conclusion remains the same. By applying `DDG-DA`, users can see rising trends at the test phase both in the proxy models' ICs and the performances of the forecasting models. 15 | 16 | # Run the Code 17 | Users can try `DDG-DA` by running the following command: 18 | ```bash 19 | python workflow.py run 20 | ``` 21 | 22 | The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `forecast_model` parameter when `DDG-DA` initializes. For example, users can try `LightGBM` forecasting models by running the following command: 23 | ```bash 24 | python workflow.py --conf_path=../workflow_config_lightgbm_Alpha158.yaml run 25 | ``` 26 | 27 | # Results 28 | The results of related methods in Qlib's public dataset can be found [here](../) 29 | 30 | # Requirements 31 | Here are the minimal hardware requirements to run the ``workflow.py`` of DDG-DA. 32 | * Memory: 45G 33 | * Disk: 4G 34 | 35 | Pytorch with CPU & RAM will be enough for this example. 36 | -------------------------------------------------------------------------------- /examples/data_demo/data_mem_resuse_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | """ 4 | The motivation of this demo 5 | - To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing 6 | """ 7 | 8 | from copy import deepcopy 9 | from pathlib import Path 10 | import pickle 11 | from pprint import pprint 12 | from ruamel.yaml import YAML 13 | import subprocess 14 | 15 | from qlib import init 16 | from qlib.data.dataset.handler import DataHandlerLP 17 | from qlib.log import TimeInspector 18 | from qlib.model.trainer import task_train 19 | from qlib.utils import init_instance_by_config 20 | 21 | # For general purpose, we use relative path 22 | DIRNAME = Path(__file__).absolute().resolve().parent 23 | 24 | if __name__ == "__main__": 25 | init() 26 | 27 | repeat = 2 28 | exp_name = "data_mem_reuse_demo" 29 | 30 | config_path = DIRNAME.parent / "benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml" 31 | yaml = YAML(typ="safe", pure=True) 32 | task_config = yaml.load(config_path.open()) 33 | 34 | # 1) without using processed data in memory 35 | with TimeInspector.logt("The original time without reusing processed data in memory:"): 36 | for i in range(repeat): 37 | task_train(task_config["task"], experiment_name=exp_name) 38 | 39 | # 2) prepare processed data in memory. 40 | hd_conf = task_config["task"]["dataset"]["kwargs"]["handler"] 41 | pprint(hd_conf) 42 | hd: DataHandlerLP = init_instance_by_config(hd_conf) 43 | 44 | # 3) with reusing processed data in memory 45 | new_task = deepcopy(task_config["task"]) 46 | new_task["dataset"]["kwargs"]["handler"] = hd 47 | print(new_task) 48 | 49 | with TimeInspector.logt("The time with reusing processed data in memory:"): 50 | # this will save the time to reload and process data from disk(in `DataHandlerLP`) 51 | # It still takes a lot of time in the backtest phase 52 | for i in range(repeat): 53 | task_train(new_task, experiment_name=exp_name) 54 | 55 | # 4) User can change other parts exclude processed data in memory(handler) 56 | new_task = deepcopy(task_config["task"]) 57 | new_task["dataset"]["kwargs"]["segments"]["train"] = ("20100101", "20131231") 58 | with TimeInspector.logt("The time with reusing processed data in memory:"): 59 | task_train(new_task, experiment_name=exp_name) 60 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | ``Qlib`` Documentation 3 | ====================== 4 | 5 | ``Qlib`` is an AI-oriented quantitative investment platform, which aims to realize the potential, empower the research, and create the value of AI technologies in quantitative investment. 6 | 7 | .. _user_guide: 8 | 9 | Document Structure 10 | ==================== 11 | 12 | .. toctree:: 13 | :hidden: 14 | 15 | Home 16 | 17 | .. toctree:: 18 | :maxdepth: 3 19 | :caption: GETTING STARTED: 20 | 21 | Introduction 22 | Quick Start 23 | 24 | .. toctree:: 25 | :maxdepth: 3 26 | :caption: FIRST STEPS: 27 | 28 | Installation 29 | Initialization 30 | Data Retrieval 31 | Custom Model Integration 32 | 33 | 34 | .. toctree:: 35 | :maxdepth: 3 36 | :caption: MAIN COMPONENTS: 37 | 38 | Workflow: Workflow Management 39 | Data Layer: Data Framework & Usage 40 | Forecast Model: Model Training & Prediction 41 | Portfolio Management and Backtest 42 | Nested Decision Execution: High-Frequency Trading 43 | Meta Controller: Meta-Task & Meta-Dataset & Meta-Model 44 | Qlib Recorder: Experiment Management 45 | Analysis: Evaluation & Results Analysis 46 | Online Serving: Online Management & Strategy & Tool 47 | Reinforcement Learning 48 | 49 | .. toctree:: 50 | :maxdepth: 3 51 | :caption: OTHER COMPONENTS/FEATURES/TOPICS: 52 | 53 | Building Formulaic Alphas 54 | Online & Offline mode 55 | Serialization 56 | Task Management 57 | Point-In-Time database 58 | 59 | .. toctree:: 60 | :maxdepth: 3 61 | :caption: FOR DEVELOPERS: 62 | 63 | Code Standard & Development Guidance 64 | How to build image 65 | 66 | .. toctree:: 67 | :maxdepth: 3 68 | :caption: REFERENCE: 69 | 70 | API 71 | 72 | .. toctree:: 73 | :maxdepth: 3 74 | 75 | FAQ 76 | 77 | .. toctree:: 78 | :maxdepth: 3 79 | :caption: Change Log: 80 | 81 | Change Log 82 | -------------------------------------------------------------------------------- /examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml: -------------------------------------------------------------------------------- 1 | qlib_init: 2 | provider_uri: "~/.qlib/qlib_data/cn_data" 3 | region: cn 4 | market: &market csi300 5 | benchmark: &benchmark SH000300 6 | data_handler_config: &data_handler_config 7 | start_time: 2008-01-01 8 | end_time: 2020-08-01 9 | fit_start_time: 2008-01-01 10 | fit_end_time: 2014-12-31 11 | instruments: *market 12 | infer_processors: 13 | - class: RobustZScoreNorm 14 | kwargs: 15 | fields_group: feature 16 | clip_outlier: true 17 | - class: Fillna 18 | kwargs: 19 | fields_group: feature 20 | learn_processors: 21 | - class: DropnaLabel 22 | - class: CSRankNorm 23 | kwargs: 24 | fields_group: label 25 | port_analysis_config: &port_analysis_config 26 | strategy: 27 | class: TopkDropoutStrategy 28 | module_path: qlib.contrib.strategy 29 | kwargs: 30 | signal: 31 | topk: 50 32 | n_drop: 5 33 | backtest: 34 | start_time: 2017-01-01 35 | end_time: 2020-08-01 36 | account: 100000000 37 | benchmark: *benchmark 38 | exchange_kwargs: 39 | limit_threshold: 0.095 40 | deal_price: close 41 | open_cost: 0.0005 42 | close_cost: 0.0015 43 | min_cost: 5 44 | task: 45 | model: 46 | class: LinearModel 47 | module_path: qlib.contrib.model.linear 48 | kwargs: 49 | estimator: ols 50 | dataset: 51 | class: DatasetH 52 | module_path: qlib.data.dataset 53 | kwargs: 54 | handler: 55 | class: Alpha158 56 | module_path: qlib.contrib.data.handler 57 | kwargs: *data_handler_config 58 | segments: 59 | train: [2008-01-01, 2014-12-31] 60 | valid: [2015-01-01, 2016-12-31] 61 | test: [2017-01-01, 2020-08-01] 62 | record: 63 | - class: SignalRecord 64 | module_path: qlib.workflow.record_temp 65 | kwargs: 66 | model: 67 | dataset: 68 | - class: SigAnaRecord 69 | module_path: qlib.workflow.record_temp 70 | kwargs: 71 | ana_long_short: True 72 | ann_scaler: 252 73 | - class: PortAnaRecord 74 | module_path: qlib.workflow.record_temp 75 | kwargs: 76 | config: *port_analysis_config 77 | -------------------------------------------------------------------------------- /tests/ops/test_elem_operator.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import pytest 4 | 5 | from qlib.data import DatasetProvider 6 | from qlib.data.data import ExpressionD 7 | from qlib.tests import TestOperatorData, TestMockData, MOCK_DF 8 | from qlib.config import C 9 | 10 | 11 | class TestElementOperator(TestMockData): 12 | def setUp(self) -> None: 13 | self.instrument = "0050" 14 | self.start_time = "2022-01-01" 15 | self.end_time = "2022-02-01" 16 | self.freq = "day" 17 | self.mock_df = MOCK_DF[MOCK_DF["symbol"] == self.instrument] 18 | 19 | def test_Abs(self): 20 | field = "Abs($close-Ref($close, 1))" 21 | result = ExpressionD.expression(self.instrument, field, self.start_time, self.end_time, self.freq) 22 | self.assertGreaterEqual(result.min(), 0) 23 | result = result.to_numpy() 24 | prev_close = self.mock_df["close"].shift(1) 25 | close = self.mock_df["close"] 26 | change = prev_close - close 27 | golden = change.abs().to_numpy() 28 | self.assertIsNone(np.testing.assert_allclose(result, golden)) 29 | 30 | def test_Sign(self): 31 | field = "Sign($close-Ref($close, 1))" 32 | result = ExpressionD.expression(self.instrument, field, self.start_time, self.end_time, self.freq) 33 | result = result.to_numpy() 34 | prev_close = self.mock_df["close"].shift(1) 35 | close = self.mock_df["close"] 36 | change = close - prev_close 37 | change[change > 0] = 1.0 38 | change[change < 0] = -1.0 39 | golden = change.to_numpy() 40 | self.assertIsNone(np.testing.assert_allclose(result, golden)) 41 | 42 | 43 | class TestOperatorDataSetting(TestOperatorData): 44 | def test_setting(self): 45 | self.assertEqual(len(self.instruments_d), 1) 46 | self.assertGreater(len(self.cal), 0) 47 | 48 | 49 | class TestInstElementOperator(TestOperatorData): 50 | def setUp(self) -> None: 51 | freq = "day" 52 | expressions = [ 53 | "$change", 54 | "Abs($change)", 55 | ] 56 | columns = ["change", "abs"] 57 | self.data = DatasetProvider.inst_calculator( 58 | self.inst, self.start_time, self.end_time, freq, expressions, self.spans, C, [] 59 | ) 60 | self.data.columns = columns 61 | 62 | @pytest.mark.slow 63 | def test_abs(self): 64 | abs_values = self.data["abs"] 65 | self.assertGreater(abs_values[2], 0) 66 | 67 | 68 | if __name__ == "__main__": 69 | unittest.main() 70 | --------------------------------------------------------------------------------