├── CHANGELOG.md
├── cli
    ├── __init__.py
    └── data.py
├── contrib
    ├── __init__.py
    ├── eva
    │   └── __init__.py
    ├── ops
    │   └── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── utils
    │   │   └── __init__.py
    │   └── data.py
    ├── tuner
    │   ├── __init__.py
    │   ├── space.py
    │   └── launcher.py
    ├── report
    │   ├── data
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── analysis_model
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── analysis_position
    │   │   └── __init__.py
    ├── meta
    │   ├── __init__.py
    │   └── data_selection
    │   │   └── __init__.py
    ├── workflow
    │   └── __init__.py
    ├── strategy
    │   ├── optimizer
    │   │   ├── __init__.py
    │   │   └── base.py
    │   └── __init__.py
    ├── rolling
    │   ├── __init__.py
    │   └── __main__.py
    ├── online
    │   ├── __init__.py
    │   └── online_model.py
    ├── torch.py
    └── model
    │   ├── pytorch_utils.py
    │   └── __init__.py
├── model
    ├── ens
    │   └── __init__.py
    ├── interpret
    │   ├── __init__.py
    │   └── base.py
    ├── __init__.py
    ├── meta
    │   ├── __init__.py
    │   └── task.py
    ├── riskmodel
    │   └── __init__.py
    └── utils.py
├── rl
    ├── contrib
    │   ├── __init__.py
    │   └── utils.py
    ├── strategy
    │   ├── __init__.py
    │   └── single_order.py
    ├── data
    │   ├── __init__.py
    │   └── base.py
    ├── __init__.py
    ├── seed.py
    ├── trainer
    │   └── __init__.py
    ├── utils
    │   └── __init__.py
    ├── order_execution
    │   ├── __init__.py
    │   └── utils.py
    └── aux_info.py
├── workflow
    ├── online
    │   └── __init__.py
    ├── task
    │   └── __init__.py
    └── utils.py
├── examples
    ├── model_rolling
    │   └── requirements.txt
    ├── benchmarks
    │   ├── Localformer
    │   │   ├── README.md
    │   │   └── requirements.txt
    │   ├── MLP
    │   │   ├── README.md
    │   │   └── requirements.txt
    │   ├── KRNN
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── Sandwich
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── TFT
    │   │   ├── requirements.txt
    │   │   ├── libs
    │   │   │   └── __init__.py
    │   │   ├── data_formatters
    │   │   │   └── __init__.py
    │   │   ├── expt_settings
    │   │   │   └── __init__.py
    │   │   ├── README.md
    │   │   └── workflow_config_tft_Alpha158.yaml
    │   ├── LightGBM
    │   │   ├── requirements.txt
    │   │   ├── README.md
    │   │   ├── features_resample_N.py
    │   │   ├── features_sample.py
    │   │   ├── workflow_config_lightgbm_Alpha158.yaml
    │   │   └── workflow_config_lightgbm_Alpha158_csi500.yaml
    │   ├── Transformer
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── XGBoost
    │   │   ├── requirements.txt
    │   │   ├── README.md
    │   │   └── workflow_config_xgboost_Alpha158.yaml
    │   ├── CatBoost
    │   │   ├── requirements.txt
    │   │   ├── README.md
    │   │   ├── workflow_config_catboost_Alpha158.yaml
    │   │   └── workflow_config_catboost_Alpha158_csi500.yaml
    │   ├── DoubleEnsemble
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── Linear
    │   │   ├── requirements.txt
    │   │   └── workflow_config_linear_Alpha158.yaml
    │   ├── ADD
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── GRU
    │   │   ├── requirements.txt
    │   │   ├── csi300_gru_ts.pkl
    │   │   ├── model_gru_csi300.pkl
    │   │   └── README.md
    │   ├── HIST
    │   │   ├── requirements.txt
    │   │   ├── qlib_csi300_stock_index.npy
    │   │   └── README.md
    │   ├── SFM
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── TCN
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── TCTS
    │   │   ├── requirements.txt
    │   │   └── workflow.png
    │   ├── TabNet
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── ADARNN
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── ALSTM
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── GATs
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── IGMTF
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── LSTM
    │   │   ├── requirements.txt
    │   │   ├── csi300_lstm_ts.pkl
    │   │   ├── model_lstm_csi300.pkl
    │   │   └── README.md
    │   ├── TRA
    │   │   ├── requirements.txt
    │   │   ├── data
    │   │   │   └── README.md
    │   │   ├── run.sh
    │   │   ├── example.py
    │   │   └── configs
    │   │   │   ├── config_alstm.yaml
    │   │   │   ├── config_alstm_tra_init.yaml
    │   │   │   ├── config_transformer.yaml
    │   │   │   ├── config_transformer_tra_init.yaml
    │   │   │   ├── config_alstm_tra.yaml
    │   │   │   └── config_transformer_tra.yaml
    │   └── GeneralPtNN
    │   │   └── README.md
    ├── benchmarks_dynamic
    │   ├── DDG-DA
    │   │   ├── requirements.txt
    │   │   ├── Makefile
    │   │   ├── workflow.py
    │   │   └── README.md
    │   ├── baseline
    │   │   ├── README.md
    │   │   ├── rolling_benchmark.py
    │   │   └── workflow_config_lightgbm_Alpha158.yaml
    │   └── README.md
    ├── data_demo
    │   ├── README.md
    │   ├── data_cache_demo.py
    │   └── data_mem_resuse_demo.py
    ├── hyperparameter
    │   └── LightGBM
    │   │   ├── requirements.txt
    │   │   ├── Readme.md
    │   │   ├── hyperparameter_158.py
    │   │   └── hyperparameter_360.py
    ├── README.md
    ├── rl_order_execution
    │   ├── scripts
    │   │   ├── merge_orders.py
    │   │   ├── gen_pickle_data.py
    │   │   └── gen_training_orders.py
    │   └── exp_configs
    │   │   ├── backtest_twap.yml
    │   │   ├── backtest_ppo.yml
    │   │   ├── backtest_opds.yml
    │   │   ├── train_opds.yml
    │   │   └── train_ppo.yml
    ├── rolling_process_data
    │   ├── README.md
    │   └── rolling_handler.py
    ├── model_interpreter
    │   └── feature.py
    ├── nested_decision_execution
    │   └── README.md
    ├── portfolio
    │   ├── README.md
    │   ├── prepare_riskdata.py
    │   └── config_enhanced_indexing.yaml
    ├── highfreq
    │   ├── README.md
    │   └── workflow_config_High_Freq_Tree_Alpha158.yaml
    ├── orderbook_data
    │   └── README.md
    └── online_srv
    │   └── update_online_pred.py
├── docs
    ├── changelog
    │   └── changelog.rst
    ├── _static
    │   ├── img
    │   │   ├── logo
    │   │   │   ├── 1.png
    │   │   │   ├── 2.png
    │   │   │   ├── 3.png
    │   │   │   ├── yellow_bg_rec.png
    │   │   │   ├── yel_bg_rec+word.png
    │   │   │   ├── white_bg_rec+word.png
    │   │   │   └── yellow_bg_rec+word .png
    │   │   ├── change doc.gif
    │   │   ├── framework.png
    │   │   ├── topk_drop.png
    │   │   ├── RL_framework.png
    │   │   ├── rdagent_logo.png
    │   │   ├── QlibRL_framework.png
    │   │   ├── analysis
    │   │   │   ├── report.png
    │   │   │   ├── score_ic.png
    │   │   │   ├── rank_label_buy.png
    │   │   │   ├── analysis_model_IC.png
    │   │   │   ├── rank_label_hold.png
    │   │   │   ├── rank_label_sell.png
    │   │   │   ├── risk_analysis_bar.png
    │   │   │   ├── risk_analysis_std.png
    │   │   │   ├── analysis_model_NDQ.png
    │   │   │   ├── cumulative_return_buy.png
    │   │   │   ├── cumulative_return_hold.png
    │   │   │   ├── cumulative_return_sell.png
    │   │   │   ├── analysis_model_long_short.png
    │   │   │   ├── analysis_model_monthly_IC.png
    │   │   │   ├── risk_analysis_max_drawdown.png
    │   │   │   ├── analysis_model_auto_correlation.png
    │   │   │   ├── analysis_model_cumulative_return.png
    │   │   │   ├── cumulative_return_buy_minus_sell.png
    │   │   │   ├── risk_analysis_annualized_return.png
    │   │   │   └── risk_analysis_information_ratio.png
    │   │   ├── online_serving.png
    │   │   ├── qrcode
    │   │   │   └── gitter_qr.png
    │   │   └── framework-abstract.jpg
    │   └── demo.sh
    ├── requirements.txt
    ├── component
    │   ├── rl
    │   │   └── toctree.rst
    │   └── online.rst
    ├── Makefile
    ├── make.bat
    ├── advanced
    │   ├── server.rst
    │   └── serial.rst
    ├── start
    │   └── installation.rst
    └── index.rst
├── data
    ├── _libs
    │   └── __init__.py
    ├── storage
    │   └── __init__.py
    ├── inst_processor.py
    ├── dataset
    │   └── weight.py
    └── __init__.py
├── strategy
    └── __init__.py
├── scripts
    ├── data_collector
    │   ├── contrib
    │   │   ├── fill_cn_1min_data
    │   │   │   ├── requirements.txt
    │   │   │   └── README.md
    │   │   └── future_trading_date_collector
    │   │   │   ├── requirements.txt
    │   │   │   └── README.md
    │   ├── us_index
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── crypto
    │   │   ├── requirement.txt
    │   │   └── README.md
    │   ├── fund
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── cn_index
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── pit
    │   │   ├── requirements.txt
    │   │   └── README.md
    │   ├── yahoo
    │   │   └── requirements.txt
    │   ├── baostock_5min
    │   │   └── requirements.txt
    │   ├── br_index
    │   │   └── requirements.txt
    │   └── crowd_source
    │   │   └── README.md
    ├── get_data.py
    ├── collect_info.py
    └── README.md
├── tests
    ├── dataset_tests
    │   ├── README.md
    │   └── test_datalayer.py
    ├── data_mid_layer_tests
    │   ├── README.md
    │   └── test_handler.py
    ├── dependency_tests
    │   ├── README.md
    │   └── test_mlflow.py
    ├── pytest.ini
    ├── conftest.py
    ├── test_contrib_model.py
    ├── test_workflow.py
    ├── misc
    │   ├── test_get_multi_proc.py
    │   └── test_sepdf.py
    ├── test_get_data.py
    ├── test_register_ops.py
    └── ops
    │   └── test_elem_operator.py
├── MANIFEST.in
├── CODE_OF_CONDUCT.md
├── utils
    └── exceptions.py
├── constant.py
├── setup.py
├── Dockerfile
├── LICENSE
└── typehint.py


/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/contrib/eva/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/contrib/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/model/ens/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/contrib/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/model/interpret/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/workflow/online/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/contrib/data/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/model_rolling/requirements.txt:
--------------------------------------------------------------------------------
1 | xgboost
2 | 


--------------------------------------------------------------------------------
/docs/changelog/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CHANGES.rst
2 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Localformer/README.md:
--------------------------------------------------------------------------------
1 | # Localformer
2 | 


--------------------------------------------------------------------------------
/contrib/tuner/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: skip-file
2 | # flake8: noqa
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/MLP/README.md:
--------------------------------------------------------------------------------
1 | # Multi-Layer Perceptron (MLP)
2 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/DDG-DA/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.10.0 
2 | 


--------------------------------------------------------------------------------
/examples/benchmarks/KRNN/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.23.4
2 | pandas==1.5.2
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Sandwich/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.23.4
2 | pandas==1.5.2
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu==1.15.0
2 | pandas==1.1.0
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | lightgbm
4 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Localformer/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | torch==1.2.0


--------------------------------------------------------------------------------
/examples/benchmarks/Transformer/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | torch==1.2.0


--------------------------------------------------------------------------------
/examples/benchmarks/XGBoost/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | xgboost==1.2.1


--------------------------------------------------------------------------------
/data/_libs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 


--------------------------------------------------------------------------------
/docs/_static/img/logo/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/1.png


--------------------------------------------------------------------------------
/docs/_static/img/logo/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/2.png


--------------------------------------------------------------------------------
/docs/_static/img/logo/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/3.png


--------------------------------------------------------------------------------
/examples/benchmarks/CatBoost/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | catboost==0.24.3
4 | 


--------------------------------------------------------------------------------
/strategy/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/DoubleEnsemble/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | lightgbm==3.1.0


--------------------------------------------------------------------------------
/examples/benchmarks/Linear/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.17.4
2 | pandas>=1.0.1
3 | scikit-learn>=0.23.1
4 | 


--------------------------------------------------------------------------------
/docs/_static/img/change doc.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/change doc.gif


--------------------------------------------------------------------------------
/docs/_static/img/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/framework.png


--------------------------------------------------------------------------------
/docs/_static/img/topk_drop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/topk_drop.png


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/DDG-DA/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: clean
2 | 
3 | clean:
4 | 	-rm -r *.pkl mlruns || true
5 | 


--------------------------------------------------------------------------------
/scripts/data_collector/contrib/fill_cn_1min_data/requirements.txt:
--------------------------------------------------------------------------------
1 | fire
2 | pandas
3 | loguru
4 | tqdm
5 | pyqlib


--------------------------------------------------------------------------------
/docs/_static/img/RL_framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/RL_framework.png


--------------------------------------------------------------------------------
/docs/_static/img/rdagent_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/rdagent_logo.png


--------------------------------------------------------------------------------
/examples/benchmarks/ADD/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/GRU/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/HIST/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0


--------------------------------------------------------------------------------
/examples/benchmarks/MLP/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/SFM/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TCN/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TCTS/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0


--------------------------------------------------------------------------------
/examples/benchmarks/TabNet/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0


--------------------------------------------------------------------------------
/docs/_static/img/QlibRL_framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/QlibRL_framework.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/report.png


--------------------------------------------------------------------------------
/docs/_static/img/online_serving.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/online_serving.png


--------------------------------------------------------------------------------
/docs/_static/img/qrcode/gitter_qr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/qrcode/gitter_qr.png


--------------------------------------------------------------------------------
/examples/benchmarks/ADARNN/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/ALSTM/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/GATs/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/IGMTF/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LSTM/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | pandas==1.1.2
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TCTS/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/TCTS/workflow.png


--------------------------------------------------------------------------------
/scripts/data_collector/us_index/requirements.txt:
--------------------------------------------------------------------------------
1 | fire
2 | requests
3 | pandas
4 | lxml
5 | loguru
6 | fake-useragent
7 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/score_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/score_ic.png


--------------------------------------------------------------------------------
/docs/_static/img/framework-abstract.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/framework-abstract.jpg


--------------------------------------------------------------------------------
/docs/_static/img/logo/yellow_bg_rec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/yellow_bg_rec.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython
2 | cmake
3 | numpy
4 | scipy
5 | scikit-learn
6 | pandas
7 | tianshou
8 | sphinx_rtd_theme
9 | 


--------------------------------------------------------------------------------
/scripts/data_collector/crypto/requirement.txt:
--------------------------------------------------------------------------------
1 | loguru
2 | fire
3 | requests
4 | numpy
5 | pandas
6 | tqdm
7 | lxml
8 | pycoingecko


--------------------------------------------------------------------------------
/tests/dataset_tests/README.md:
--------------------------------------------------------------------------------
1 | # About dataset tests
2 | Tests in this folder are for testing the prepared dataset from Yahoo
3 | 


--------------------------------------------------------------------------------
/docs/_static/img/logo/yel_bg_rec+word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/yel_bg_rec+word.png


--------------------------------------------------------------------------------
/examples/benchmarks/GRU/csi300_gru_ts.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/GRU/csi300_gru_ts.pkl


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | scikit_learn==0.23.2
4 | torch==1.7.0
5 | seaborn
6 | 


--------------------------------------------------------------------------------
/scripts/data_collector/contrib/future_trading_date_collector/requirements.txt:
--------------------------------------------------------------------------------
1 | baostock
2 | fire
3 | numpy
4 | pandas
5 | loguru
6 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/rank_label_buy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/rank_label_buy.png


--------------------------------------------------------------------------------
/docs/_static/img/logo/white_bg_rec+word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/white_bg_rec+word.png


--------------------------------------------------------------------------------
/examples/benchmarks/GRU/model_gru_csi300.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/GRU/model_gru_csi300.pkl


--------------------------------------------------------------------------------
/examples/benchmarks/LSTM/csi300_lstm_ts.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/LSTM/csi300_lstm_ts.pkl


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/data/README.md:
--------------------------------------------------------------------------------
1 | Data Link: https://drive.google.com/drive/folders/1fMqZYSeLyrHiWmVzygeI4sw3vp5Gt8cY?usp=sharing
2 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_IC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_IC.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/rank_label_hold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/rank_label_hold.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/rank_label_sell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/rank_label_sell.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_bar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_bar.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_std.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_std.png


--------------------------------------------------------------------------------
/docs/_static/img/logo/yellow_bg_rec+word .png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/logo/yellow_bg_rec+word .png


--------------------------------------------------------------------------------
/examples/benchmarks/LSTM/model_lstm_csi300.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/LSTM/model_lstm_csi300.pkl


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | exclude tests/*
2 | include qlib/*
3 | include qlib/*/*
4 | include qlib/*/*/*
5 | include qlib/*/*/*/*
6 | include qlib/*/*/*/*/*
7 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_NDQ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_NDQ.png


--------------------------------------------------------------------------------
/examples/data_demo/README.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 | The examples in this folder try to demonstrate some common usage of data-related modules of Qlib
3 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/cumulative_return_buy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/cumulative_return_buy.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/cumulative_return_hold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/cumulative_return_hold.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/cumulative_return_sell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/cumulative_return_sell.png


--------------------------------------------------------------------------------
/examples/benchmarks/HIST/qlib_csi300_stock_index.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/examples/benchmarks/HIST/qlib_csi300_stock_index.npy


--------------------------------------------------------------------------------
/examples/hyperparameter/LightGBM/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.1.2
2 | numpy==1.21.0
3 | lightgbm==3.1.0
4 | optuna==2.7.0
5 | optuna-dashboard==0.4.1
6 | 


--------------------------------------------------------------------------------
/scripts/data_collector/fund/requirements.txt:
--------------------------------------------------------------------------------
 1 | loguru
 2 | fire
 3 | requests
 4 | numpy
 5 | pandas
 6 | tqdm
 7 | lxml
 8 | loguru
 9 | yahooquery
10 | 


--------------------------------------------------------------------------------
/tests/data_mid_layer_tests/README.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 | The middle layers of data, which mainly includes
3 | - Handler
4 |     - processors
5 | - Datasets
6 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_long_short.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_long_short.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_monthly_IC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_monthly_IC.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_max_drawdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_max_drawdown.png


--------------------------------------------------------------------------------
/scripts/data_collector/cn_index/requirements.txt:
--------------------------------------------------------------------------------
 1 | baostock
 2 | fire
 3 | requests
 4 | pandas
 5 | lxml
 6 | loguru
 7 | tqdm
 8 | yahooquery
 9 | openpyxl
10 | 


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_auto_correlation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_auto_correlation.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/analysis_model_cumulative_return.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/analysis_model_cumulative_return.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/cumulative_return_buy_minus_sell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/cumulative_return_buy_minus_sell.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_annualized_return.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_annualized_return.png


--------------------------------------------------------------------------------
/docs/_static/img/analysis/risk_analysis_information_ratio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhostev/sqlib/HEAD/docs/_static/img/analysis/risk_analysis_information_ratio.png


--------------------------------------------------------------------------------
/scripts/data_collector/pit/requirements.txt:
--------------------------------------------------------------------------------
 1 | loguru
 2 | fire
 3 | tqdm
 4 | requests
 5 | pandas
 6 | lxml
 7 | loguru
 8 | baostock
 9 | yahooquery
10 | beautifulsoup4
11 | 


--------------------------------------------------------------------------------
/contrib/report/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | """
5 | This module is designed to analysis data
6 | 
7 | """
8 | 


--------------------------------------------------------------------------------
/examples/benchmarks/ADD/README.md:
--------------------------------------------------------------------------------
1 | # ADD
2 | * Paper: [ADD: Augmented Disentanglement Distillation Framework for Improving Stock Trend Forecasting](https://arxiv.org/abs/2012.06289).
3 | 
4 | 


--------------------------------------------------------------------------------
/scripts/data_collector/yahoo/requirements.txt:
--------------------------------------------------------------------------------
 1 | loguru
 2 | fire
 3 | requests
 4 | numpy
 5 | pandas
 6 | tqdm
 7 | lxml
 8 | yahooquery
 9 | joblib
10 | beautifulsoup4
11 | bs4
12 | soupsieve


--------------------------------------------------------------------------------
/tests/dependency_tests/README.md:
--------------------------------------------------------------------------------
1 | Some implementations of Qlib depend on some assumptions of its dependencies.
2 | 
3 | So some tests are requried to ensure that these assumptions are valid.
4 | 


--------------------------------------------------------------------------------
/rl/strategy/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | from .single_order import SingleOrderStrategy
4 | 
5 | __all__ = ["SingleOrderStrategy"]
6 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import warnings
 5 | 
 6 | from .base import Model
 7 | 
 8 | 
 9 | __all__ = ["Model", "warnings"]
10 | 


--------------------------------------------------------------------------------
/scripts/data_collector/baostock_5min/requirements.txt:
--------------------------------------------------------------------------------
 1 | loguru
 2 | fire
 3 | requests
 4 | numpy
 5 | pandas
 6 | tqdm
 7 | lxml
 8 | yahooquery
 9 | joblib
10 | beautifulsoup4
11 | bs4
12 | soupsieve
13 | baostock


--------------------------------------------------------------------------------
/examples/benchmarks/GRU/README.md:
--------------------------------------------------------------------------------
1 | # Gated Recurrent Unit (GRU)
2 | * Paper: [Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation](https://aclanthology.org/D14-1179.pdf).
3 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LSTM/README.md:
--------------------------------------------------------------------------------
1 | # Long Short-Term Memory (LSTM)
2 | * Paper: [Long Short-Term Memory](https://direct.mit.edu/neco/article-abstract/9/8/1735/6109/Long-Short-Term-Memory?redirectedFrom=fulltext).
3 | 


--------------------------------------------------------------------------------
/cli/data.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import fire
 5 | from qlib.tests.data import GetData
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     fire.Fire(GetData)
10 | 


--------------------------------------------------------------------------------
/model/meta/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .task import MetaTask
5 | from .dataset import MetaTaskDataset
6 | 
7 | 
8 | __all__ = ["MetaTask", "MetaTaskDataset"]
9 | 


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     slow: marks tests as slow (deselect with '-m "not slow"')
4 | filterwarnings =
5 |     ignore:.*rng.randint:DeprecationWarning
6 |     ignore:.*Casting input x to numpy array:UserWarning
7 | 


--------------------------------------------------------------------------------
/scripts/get_data.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import fire
 5 | from qlib.tests.data import GetData
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     fire.Fire(GetData)
10 | 


--------------------------------------------------------------------------------
/contrib/report/analysis_model/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .analysis_model_performance import model_performance_graph
5 | 
6 | 
7 | __all__ = ["model_performance_graph"]
8 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TabNet/README.md:
--------------------------------------------------------------------------------
1 | # TabNet
2 | * Code: [https://github.com/dreamquark-ai/tabnet](https://github.com/dreamquark-ai/tabnet)
3 | * Paper: [TabNet: Attentive Interpretable Tabular Learning](https://arxiv.org/pdf/1908.07442.pdf).
4 | 


--------------------------------------------------------------------------------
/contrib/meta/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .data_selection import MetaTaskDS, MetaDatasetDS, MetaModelDS
5 | 
6 | 
7 | __all__ = ["MetaTaskDS", "MetaDatasetDS", "MetaModelDS"]
8 | 


--------------------------------------------------------------------------------
/contrib/workflow/__init__.py:
--------------------------------------------------------------------------------
1 | #  Copyright (c) Microsoft Corporation.
2 | #  Licensed under the MIT License.
3 | from .record_temp import MultiSegRecord
4 | from .record_temp import SignalMseRecord
5 | 
6 | 
7 | __all__ = ["MultiSegRecord", "SignalMseRecord"]
8 | 


--------------------------------------------------------------------------------
/examples/benchmarks/IGMTF/README.md:
--------------------------------------------------------------------------------
1 | # IGMTF
2 | * Code: [https://github.com/Wentao-Xu/IGMTF](https://github.com/Wentao-Xu/IGMTF)
3 | * Paper: [IGMTF: An Instance-wise Graph-based Framework for
4 | Multivariate Time Series Forecasting](https://arxiv.org/abs/2109.06489).


--------------------------------------------------------------------------------
/examples/benchmarks/TCN/README.md:
--------------------------------------------------------------------------------
1 | # TCN
2 | * Code: [https://github.com/locuslab/TCN](https://github.com/locuslab/TCN)
3 | * Paper: [An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling](https://arxiv.org/abs/1803.01271).
4 | 
5 | 


--------------------------------------------------------------------------------
/contrib/meta/data_selection/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .dataset import MetaDatasetDS, MetaTaskDS
5 | from .model import MetaModelDS
6 | 
7 | 
8 | __all__ = ["MetaDatasetDS", "MetaTaskDS", "MetaModelDS"]
9 | 


--------------------------------------------------------------------------------
/examples/benchmarks/XGBoost/README.md:
--------------------------------------------------------------------------------
1 | # XGBoost
2 | * Code: [https://github.com/dmlc/xgboost](https://github.com/dmlc/xgboost)
3 | * Paper: XGBoost: A Scalable Tree Boosting System. [https://dl.acm.org/doi/pdf/10.1145/2939672.2939785](https://dl.acm.org/doi/pdf/10.1145/2939672.2939785).


--------------------------------------------------------------------------------
/examples/benchmarks/Transformer/README.md:
--------------------------------------------------------------------------------
1 | # Transformer
2 | * Code: [https://github.com/tensorflow/tensor2tensor](https://github.com/tensorflow/tensor2tensor)
3 | * Paper: [Attention is All you Need](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf).
4 | 


--------------------------------------------------------------------------------
/rl/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | """Common utilities to handle ad-hoc-styled data.
5 | 
6 | Most of these snippets comes from research project (paper code).
7 | Please take caution when using them in production.
8 | """
9 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Ignore RL tests on non-linux platform."""
 5 | collect_ignore = []
 6 | 
 7 | if sys.platform != "linux":
 8 |     for root, dirs, files in os.walk("rl"):
 9 |         for file in files:
10 |             collect_ignore.append(os.path.join(root, file))
11 | 


--------------------------------------------------------------------------------
/data/storage/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .storage import CalendarStorage, InstrumentStorage, FeatureStorage, CalVT, InstVT, InstKT
5 | 
6 | 
7 | __all__ = ["CalendarStorage", "InstrumentStorage", "FeatureStorage", "CalVT", "InstVT", "InstKT"]
8 | 


--------------------------------------------------------------------------------
/docs/_static/demo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | git clone https://github.com/microsoft/qlib.git
 3 | cd qlib
 4 | ls
 5 | pip install pyqlib
 6 | # or
 7 | # pip install numpy
 8 | # pip install --upgrade cython
 9 | # python setup.py install
10 | cd examples
11 | ls
12 | qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml


--------------------------------------------------------------------------------
/examples/benchmarks/ADARNN/README.md:
--------------------------------------------------------------------------------
1 | # AdaRNN
2 | * Code: [https://github.com/jindongwang/transferlearning/tree/master/code/deep/adarnn](https://github.com/jindongwang/transferlearning/tree/master/code/deep/adarnn)
3 | * Paper: [AdaRNN: Adaptive Learning and Forecasting for Time Series](https://arxiv.org/pdf/2108.04443.pdf).
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/benchmarks/HIST/README.md:
--------------------------------------------------------------------------------
1 | # HIST
2 | * Code: [https://github.com/Wentao-Xu/HIST](https://github.com/Wentao-Xu/HIST)
3 | * Paper: [HIST: A Graph-based Framework for Stock Trend Forecasting via Mining Concept-Oriented Shared InformationAdaRNN: Adaptive Learning and Forecasting for Time Series](https://arxiv.org/abs/2110.13716).


--------------------------------------------------------------------------------
/contrib/strategy/optimizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .base import BaseOptimizer
 5 | from .optimizer import PortfolioOptimizer
 6 | from .enhanced_indexing import EnhancedIndexingOptimizer
 7 | 
 8 | 
 9 | __all__ = ["BaseOptimizer", "PortfolioOptimizer", "EnhancedIndexingOptimizer"]
10 | 


--------------------------------------------------------------------------------
/contrib/rolling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | """
4 | The difference between me and the scripts in examples/benchmarks/benchmarks_dynamic
5 | - This module only focus provide a general rolling implementation.
6 |   Anything specific that benchmark is placed in examples/benchmarks/benchmarks_dynamic
7 | """
8 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Requirements
 2 | 
 3 | Here is the minimal hardware requirements to run the `workflow_by_code` example.
 4 | - Memory: 16G
 5 | - Free Disk: 5G
 6 | 
 7 | 
 8 | # NOTE
 9 | The results will slightly vary on different OSs(the variance of annualized return will be less than 2%).
10 | The evaluation results in the `README.md` page are from Linux OS.
11 | 


--------------------------------------------------------------------------------
/rl/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .interpreter import Interpreter, StateInterpreter, ActionInterpreter
5 | from .reward import Reward, RewardCombination
6 | from .simulator import Simulator
7 | 
8 | __all__ = ["Interpreter", "StateInterpreter", "ActionInterpreter", "Reward", "RewardCombination", "Simulator"]
9 | 


--------------------------------------------------------------------------------
/examples/benchmarks/CatBoost/README.md:
--------------------------------------------------------------------------------
1 | # CatBoost
2 | * Code: [https://github.com/catboost/catboost](https://github.com/catboost/catboost)
3 | * Paper: CatBoost: unbiased boosting with categorical features. [https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf](https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf).


--------------------------------------------------------------------------------
/docs/component/rl/toctree.rst:
--------------------------------------------------------------------------------
 1 | .. _rl:
 2 | 
 3 | ========================================================================
 4 | Reinforcement Learning in Quantitative Trading
 5 | ========================================================================
 6 | 
 7 | .. toctree::
 8 |     Guidance <guidance>
 9 |     Overall <overall>
10 |     Quick Start <quickstart>
11 |     Framework <framework>
12 | 


--------------------------------------------------------------------------------
/contrib/strategy/optimizer/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import abc
 5 | 
 6 | 
 7 | class BaseOptimizer(abc.ABC):
 8 |     """Construct portfolio with a optimization related method"""
 9 | 
10 |     @abc.abstractmethod
11 |     def __call__(self, *args, **kwargs) -> object:
12 |         """Generate a optimized portfolio allocation"""
13 | 


--------------------------------------------------------------------------------
/rl/seed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """Defines a set of initial state definitions and state-set definitions.
 5 | 
 6 | With single-asset order execution only, the only seed is order.
 7 | """
 8 | 
 9 | from typing import TypeVar
10 | 
11 | InitialStateType = TypeVar("InitialStateType")
12 | """Type of data that creates the simulator."""
13 | 


--------------------------------------------------------------------------------
/contrib/report/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | GRAPH_NAME_LIST = [
 5 |     "analysis_position.report_graph",
 6 |     "analysis_position.score_ic_graph",
 7 |     "analysis_position.cumulative_return_graph",
 8 |     "analysis_position.risk_analysis_graph",
 9 |     "analysis_position.rank_label_graph",
10 |     "analysis_model.model_performance_graph",
11 | ]
12 | 


--------------------------------------------------------------------------------
/model/riskmodel/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .base import RiskModel
 5 | from .poet import POETCovEstimator
 6 | from .shrink import ShrinkCovEstimator
 7 | from .structured import StructuredCovEstimator
 8 | 
 9 | 
10 | __all__ = [
11 |     "RiskModel",
12 |     "POETCovEstimator",
13 |     "ShrinkCovEstimator",
14 |     "StructuredCovEstimator",
15 | ]
16 | 


--------------------------------------------------------------------------------
/examples/benchmarks/KRNN/README.md:
--------------------------------------------------------------------------------
1 | # KRNN
2 | * Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py](https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py)
3 | 
4 | 
5 | # Introductions about the settings/configs.
6 | * Torch_geometric is used in the original model in FOST, but we didn't use it.
7 | * make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.__version__==1.12.1
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/benchmarks/ALSTM/README.md:
--------------------------------------------------------------------------------
 1 | # ALSTM
 2 | 
 3 | - ALSTM contains a temporal attentive aggregation layer based on normal LSTM.
 4 | 
 5 | - Paper: A dual-stage attention-based recurrent neural network for time series prediction.
 6 | 
 7 |   [https://www.ijcai.org/Proceedings/2017/0366.pdf](https://www.ijcai.org/Proceedings/2017/0366.pdf)
 8 | 
 9 | - NOTE: Current version of implementation is just a simplified version of ALSTM. It is an LSTM with attention.
10 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Sandwich/README.md:
--------------------------------------------------------------------------------
1 | # Sandwich
2 | * Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py](https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py)
3 | 
4 | 
5 | # Introductions about the settings/configs.
6 | * Torch_geometric is used in the original model in FOST, but we didn't use it.
7 | make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.version==1.12.1
8 | 
9 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/contrib/report/analysis_position/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .cumulative_return import cumulative_return_graph
 5 | from .score_ic import score_ic_graph
 6 | from .report import report_graph
 7 | from .rank_label import rank_label_graph
 8 | from .risk_analysis import risk_analysis_graph
 9 | 
10 | 
11 | __all__ = ["cumulative_return_graph", "score_ic_graph", "report_graph", "rank_label_graph", "risk_analysis_graph"]
12 | 


--------------------------------------------------------------------------------
/examples/benchmarks/SFM/README.md:
--------------------------------------------------------------------------------
1 | # State-Frequency-Memory
2 | - State Frequency Memory (SFM) is a novel recurrent network that uses Discrete Fourier Transform to decompose the hidden states of memory cells and capture the multi-frequency trading patterns from past market data to make stock price predictions. 
3 | - Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf)


--------------------------------------------------------------------------------
/examples/benchmarks/GATs/README.md:
--------------------------------------------------------------------------------
1 | # GATs
2 | * Graph Attention Networks(GATs) leverage masked self-attentional layers on graph-structured data. The nodes in stacked layers have different weights and they are able to attend over their
3 | neighborhoods’ features, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront.
4 | * This code used in Qlib is implemented with PyTorch by ourselves.
5 | * Paper: Graph Attention Networks https://arxiv.org/pdf/1710.10903.pdf


--------------------------------------------------------------------------------
/utils/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | # Base exception class
 6 | class QlibException(Exception):
 7 |     pass
 8 | 
 9 | 
10 | class RecorderInitializationError(QlibException):
11 |     """Error type for re-initialization when starting an experiment"""
12 | 
13 | 
14 | class LoadObjectError(QlibException):
15 |     """Error type for Recorder when can not load object"""
16 | 
17 | 
18 | class ExpAlreadyExistError(Exception):
19 |     """Experiment already exists"""
20 | 


--------------------------------------------------------------------------------
/contrib/tuner/space.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # pylint: skip-file
 5 | # flake8: noqa
 6 | 
 7 | from hyperopt import hp
 8 | 
 9 | 
10 | TopkAmountStrategySpace = {
11 |     "topk": hp.choice("topk", [30, 35, 40]),
12 |     "buffer_margin": hp.choice("buffer_margin", [200, 250, 300]),
13 | }
14 | 
15 | QLibDataLabelSpace = {
16 |     "labels": hp.choice(
17 |         "labels",
18 |         [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],
19 |     )
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/scripts/merge_orders.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import os
 3 | import pandas as pd
 4 | from tqdm import tqdm
 5 | 
 6 | for tag in ["test", "valid"]:
 7 |     files = os.listdir(os.path.join("data/orders/", tag))
 8 |     dfs = []
 9 |     for f in tqdm(files):
10 |         df = pickle.load(open(os.path.join("data/orders/", tag, f), "rb"))
11 |         df = df.drop(["$close0"], axis=1)
12 |         dfs.append(df)
13 | 
14 |     total_df = pd.concat(dfs)
15 |     pickle.dump(total_df, open(os.path.join("data", "orders", f"{tag}_orders.pkl"), "wb"))
16 | 


--------------------------------------------------------------------------------
/rl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """Train, test, inference utilities."""
 5 | 
 6 | from .api import backtest, train
 7 | from .callbacks import Checkpoint, EarlyStopping, MetricsWriter
 8 | from .trainer import Trainer
 9 | from .vessel import TrainingVessel, TrainingVesselBase
10 | 
11 | __all__ = [
12 |     "Trainer",
13 |     "TrainingVessel",
14 |     "TrainingVesselBase",
15 |     "Checkpoint",
16 |     "EarlyStopping",
17 |     "MetricsWriter",
18 |     "train",
19 |     "backtest",
20 | ]
21 | 


--------------------------------------------------------------------------------
/scripts/data_collector/contrib/future_trading_date_collector/README.md:
--------------------------------------------------------------------------------
 1 | # Get future trading days
 2 | 
 3 | > `D.calendar(future=True)` will be used
 4 | 
 5 | ## Requirements
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Collector Data
12 | 
13 | ```bash
14 | # parse instruments, using in qlib/instruments.
15 | python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day
16 | ```
17 | 
18 | ## Parameters
19 | 
20 | - qlib_dir: qlib data directory
21 | - freq: value from [`day`, `1min`], default `day`
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/constant.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # REGION CONST
 5 | from typing import TypeVar
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | 
10 | REG_CN = "cn"
11 | REG_US = "us"
12 | REG_TW = "tw"
13 | 
14 | # Epsilon for avoiding division by zero.
15 | EPS = 1e-12
16 | 
17 | # Infinity in integer
18 | INF = int(1e18)
19 | ONE_DAY = pd.Timedelta("1day")
20 | ONE_MIN = pd.Timedelta("1min")
21 | EPS_T = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
22 | float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
23 | 


--------------------------------------------------------------------------------
/examples/hyperparameter/LightGBM/Readme.md:
--------------------------------------------------------------------------------
 1 | # LightGBM hyperparameter
 2 | 
 3 | ## Alpha158
 4 | First terminal
 5 | ```
 6 | optuna create-study --study LGBM_158 --storage sqlite:///db.sqlite3
 7 | optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3
 8 | ```
 9 | Second terminal
10 | ```
11 | python hyperparameter_158.py
12 | ```
13 | 
14 | ## Alpha360
15 | First terminal
16 | ```
17 | optuna create-study --study LGBM_360 --storage sqlite:///db.sqlite3
18 | optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3
19 | ```
20 | Second terminal
21 | ```
22 | python hyperparameter_360.py
23 | ```
24 | 


--------------------------------------------------------------------------------
/workflow/task/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | Task related workflow is implemented in this folder
 5 | 
 6 | A typical task workflow
 7 | 
 8 | | Step                  | Description                                    |
 9 | |-----------------------+------------------------------------------------|
10 | | TaskGen               | Generating tasks.                              |
11 | | TaskManager(optional) | Manage generated tasks                         |
12 | | run task              | retrieve  tasks from TaskManager and run tasks. |
13 | """
14 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/README.md:
--------------------------------------------------------------------------------
 1 | # LightGBM
 2 | * Code: [https://github.com/microsoft/LightGBM](https://github.com/microsoft/LightGBM)
 3 | * Paper: LightGBM: A Highly Efficient Gradient Boosting
 4 | Decision Tree. [https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf](https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf).
 5 | 
 6 | 
 7 | # Introductions about the settings/configs.
 8 | 
 9 | `workflow_config_lightgbm_multi_freq.yaml`
10 | - It uses data sources of different frequencies (i.e. multiple frequencies) for daily prediction.
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy
 4 | from setuptools import Extension, setup
 5 | 
 6 | 
 7 | NUMPY_INCLUDE = numpy.get_include()
 8 | 
 9 | 
10 | setup(
11 |     ext_modules=[
12 |         Extension(
13 |             "qlib.data._libs.rolling",
14 |             ["qlib/data/_libs/rolling.pyx"],
15 |             language="c++",
16 |             include_dirs=[NUMPY_INCLUDE],
17 |         ),
18 |         Extension(
19 |             "qlib.data._libs.expanding",
20 |             ["qlib/data/_libs/expanding.pyx"],
21 |             language="c++",
22 |             include_dirs=[NUMPY_INCLUDE],
23 |         ),
24 |     ],
25 | )
26 | 


--------------------------------------------------------------------------------
/rl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .data_queue import DataQueue
 5 | from .env_wrapper import EnvWrapper, EnvWrapperStatus
 6 | from .finite_env import FiniteEnvType, vectorize_env
 7 | from .log import ConsoleWriter, CsvWriter, LogBuffer, LogCollector, LogLevel, LogWriter
 8 | 
 9 | __all__ = [
10 |     "LogLevel",
11 |     "DataQueue",
12 |     "EnvWrapper",
13 |     "FiniteEnvType",
14 |     "LogCollector",
15 |     "LogWriter",
16 |     "vectorize_env",
17 |     "ConsoleWriter",
18 |     "CsvWriter",
19 |     "EnvWrapperStatus",
20 |     "LogBuffer",
21 | ]
22 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/backtest_twap.yml:
--------------------------------------------------------------------------------
 1 | order_file: ./data/orders/test_orders.pkl
 2 | start_time: "9:30"
 3 | end_time: "14:54"
 4 | data_granularity: "5min"
 5 | qlib:
 6 |   provider_uri_5min: ./data/bin/
 7 | exchange:
 8 |   limit_threshold: null
 9 |   deal_price: ["$close", "$close"]
10 |   volume_threshold: null
11 | strategies:
12 |   1day:
13 |     class: TWAPStrategy
14 |     kwargs: {}
15 |     module_path: qlib.contrib.strategy.rule_strategy
16 |   30min:
17 |     class: TWAPStrategy
18 |     kwargs: {}
19 |     module_path: qlib.contrib.strategy.rule_strategy
20 | concurrency: 16
21 | output_dir: outputs/twap/
22 | 


--------------------------------------------------------------------------------
/scripts/data_collector/cn_index/README.md:
--------------------------------------------------------------------------------
 1 | # CSI300/CSI100/CSI500 History Companies Collection
 2 | 
 3 | ## Requirements
 4 | 
 5 | ```bash
 6 | pip install -r requirements.txt
 7 | ```
 8 | 
 9 | ## Collector Data
10 | 
11 | ```bash
12 | # parse instruments, using in qlib/instruments.
13 | python collector.py --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data --method parse_instruments
14 | 
15 | # parse new companies
16 | python collector.py --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data --method save_new_companies
17 | 
18 | # index_name support: CSI300, CSI100, CSI500
19 | # help
20 | python collector.py --help
21 | ```
22 | 
23 | 


--------------------------------------------------------------------------------
/scripts/data_collector/us_index/README.md:
--------------------------------------------------------------------------------
 1 | # NASDAQ100/SP500/SP400/DJIA History Companies Collection
 2 | 
 3 | ## Requirements
 4 | 
 5 | ```bash
 6 | pip install -r requirements.txt
 7 | ```
 8 | 
 9 | ## Collector Data
10 | 
11 | ```bash
12 | # parse instruments, using in qlib/instruments.
13 | python collector.py --index_name SP500 --qlib_dir ~/.qlib/qlib_data/us_data --method parse_instruments
14 | 
15 | # parse new companies
16 | python collector.py --index_name SP500 --qlib_dir ~/.qlib/qlib_data/us_data --method save_new_companies
17 | 
18 | # index_name support: SP500, NASDAQ100, DJIA, SP400
19 | # help
20 | python collector.py --help
21 | ```
22 | 
23 | 


--------------------------------------------------------------------------------
/contrib/strategy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | from .signal_strategy import (
 6 |     TopkDropoutStrategy,
 7 |     WeightStrategyBase,
 8 |     EnhancedIndexingStrategy,
 9 | )
10 | 
11 | from .rule_strategy import (
12 |     TWAPStrategy,
13 |     SBBStrategyBase,
14 |     SBBStrategyEMA,
15 | )
16 | 
17 | from .cost_control import SoftTopkStrategy
18 | 
19 | 
20 | __all__ = [
21 |     "TopkDropoutStrategy",
22 |     "WeightStrategyBase",
23 |     "EnhancedIndexingStrategy",
24 |     "TWAPStrategy",
25 |     "SBBStrategyBase",
26 |     "SBBStrategyEMA",
27 |     "SoftTopkStrategy",
28 | ]
29 | 


--------------------------------------------------------------------------------
/contrib/online/__init__.py:
--------------------------------------------------------------------------------
 1 | # pylint: skip-file
 2 | # flake8: noqa
 3 | 
 4 | '''
 5 | TODO:
 6 | 
 7 | - Online needs that the model have such method
 8 |     def get_data_with_date(self, date, **kwargs):
 9 |         """
10 |         Will be called in online module
11 |         need to return the data that used to predict the label (score) of stocks at date.
12 | 
13 |         :param
14 |             date: pd.Timestamp
15 |                 predict date
16 |         :return:
17 |             data: the input data that used to predict the label (score) of stocks at predict date.
18 |         """
19 |         raise NotImplementedError("get_data_with_date for this model is not implemented.")
20 | 
21 | '''
22 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/features_resample_N.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from qlib.data.inst_processor import InstProcessor
 7 | from qlib.utils.resam import resam_calendar
 8 | 
 9 | 
10 | class ResampleNProcessor(InstProcessor):
11 |     def __init__(self, target_frq: str, **kwargs):
12 |         self.target_frq = target_frq
13 | 
14 |     def __call__(self, df: pd.DataFrame, *args, **kwargs):
15 |         df.index = pd.to_datetime(df.index)
16 |         res_index = resam_calendar(df.index, "1min", self.target_frq)
17 |         df = df.resample(self.target_frq).last().reindex(res_index)
18 |         return df
19 | 


--------------------------------------------------------------------------------
/model/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from torch.utils.data import Dataset
 5 | 
 6 | 
 7 | class ConcatDataset(Dataset):
 8 |     def __init__(self, *datasets):
 9 |         self.datasets = datasets
10 | 
11 |     def __getitem__(self, i):
12 |         return tuple(d[i] for d in self.datasets)
13 | 
14 |     def __len__(self):
15 |         return min(len(d) for d in self.datasets)
16 | 
17 | 
18 | class IndexSampler:
19 |     def __init__(self, sampler):
20 |         self.sampler = sampler
21 | 
22 |     def __getitem__(self, i: int):
23 |         return self.sampler[i], i
24 | 
25 |     def __len__(self):
26 |         return len(self.sampler)
27 | 


--------------------------------------------------------------------------------
/scripts/data_collector/contrib/fill_cn_1min_data/README.md:
--------------------------------------------------------------------------------
 1 | # Use 1d data to fill in the missing symbols relative to 1min
 2 | 
 3 | 
 4 | ## Requirements
 5 | 
 6 | ```bash
 7 | pip install -r requirements.txt
 8 | ```
 9 | 
10 | ## fill 1min data
11 | 
12 | ```bash
13 | python fill_cn_1min_data.py --data_1min_dir ~/.qlib/csv_data/cn_data_1min --qlib_data_1d_dir ~/.qlib/qlib_data/cn_data
14 | ```
15 | 
16 | ## Parameters
17 | 
18 | - data_1min_dir: csv data
19 | - qlib_data_1d_dir: qlib data directory
20 | - max_workers: `ThreadPoolExecutor(max_workers=max_workers)`, by default *16*
21 | - date_field_name: date field name, by default *date*
22 | - symbol_field_name: symbol field name, by default *symbol*
23 | 
24 | 


--------------------------------------------------------------------------------
/data/inst_processor.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import json
 3 | import pandas as pd
 4 | 
 5 | 
 6 | class InstProcessor:
 7 |     @abc.abstractmethod
 8 |     def __call__(self, df: pd.DataFrame, instrument, *args, **kwargs):
 9 |         """
10 |         process the data
11 | 
12 |         NOTE: **The processor could change the content of `df` inplace !!!!! **
13 |         User should keep a copy of data outside
14 | 
15 |         Parameters
16 |         ----------
17 |         df : pd.DataFrame
18 |             The raw_df of handler or result from previous processor.
19 |         """
20 | 
21 |     def __str__(self):
22 |         return f"{self.__class__.__name__}:{json.dumps(self.__dict__, sort_keys=True, default=str)}"
23 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/libs/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/baseline/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | This is the framework of periodically Rolling Retrain (RR) forecasting models. RR adapts to market dynamics by utilizing the up-to-date data periodically.
 4 | 
 5 | ## Run the Code
 6 | Users can try RR by running the following command:
 7 | ```bash
 8 |     python rolling_benchmark.py run
 9 | ```
10 | 
11 | The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter.
12 | For example, users can try `LightGBM` forecasting models by running the following command:
13 | ```bash
14 |     python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run
15 | 
16 | ```
17 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/data_formatters/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/expt_settings/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/contrib/rolling/__main__.py:
--------------------------------------------------------------------------------
 1 | import fire
 2 | from qlib import auto_init
 3 | from qlib.contrib.rolling.base import Rolling
 4 | from qlib.utils.mod import find_all_classes
 5 | 
 6 | if __name__ == "__main__":
 7 |     sub_commands = {}
 8 |     for cls in find_all_classes("qlib.contrib.rolling", Rolling):
 9 |         sub_commands[cls.__module__.split(".")[-1]] = cls
10 |     # The sub_commands will be like
11 |     # {'base': <class 'qlib.contrib.rolling.base.Rolling'>, ...}
12 |     # So the you can run it with commands like command below
13 |     # - `python -m qlib.contrib.rolling base --conf_path <path to the yaml> run`
14 |     # - base can be replace with other module names
15 |     auto_init()
16 |     fire.Fire(sub_commands)
17 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python3 -msphinx
 7 | SPHINXPROJ    = Quantlab
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	pip install -r requirements.txt
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/features_sample.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import pandas as pd
 3 | 
 4 | from qlib.data.inst_processor import InstProcessor
 5 | 
 6 | 
 7 | class Resample1minProcessor(InstProcessor):
 8 |     """This processor tries to resample the data. It will reasmple the data from 1min freq to day freq by selecting a specific miniute"""
 9 | 
10 |     def __init__(self, hour: int, minute: int, **kwargs):
11 |         self.hour = hour
12 |         self.minute = minute
13 | 
14 |     def __call__(self, df: pd.DataFrame, *args, **kwargs):
15 |         df.index = pd.to_datetime(df.index)
16 |         df = df.loc[df.index.time == datetime.time(self.hour, self.minute)]
17 |         df.index = df.index.normalize()
18 |         return df
19 | 


--------------------------------------------------------------------------------
/scripts/data_collector/br_index/requirements.txt:
--------------------------------------------------------------------------------
 1 | async-generator==1.10
 2 | attrs==21.4.0
 3 | certifi==2022.12.7
 4 | cffi==1.15.0
 5 | charset-normalizer==2.0.12
 6 | cryptography==36.0.1
 7 | fire==0.4.0
 8 | h11==0.13.0
 9 | idna==3.3
10 | loguru==0.6.0
11 | lxml==4.9.1
12 | multitasking==0.0.10
13 | numpy==1.22.2
14 | outcome==1.1.0
15 | pandas==1.4.1
16 | pycoingecko==2.2.0
17 | pycparser==2.21
18 | pyOpenSSL==22.0.0
19 | PySocks==1.7.1
20 | python-dateutil==2.8.2
21 | pytz==2021.3
22 | requests==2.27.1
23 | requests-futures==1.0.0
24 | six==1.16.0
25 | sniffio==1.2.0
26 | sortedcontainers==2.4.0
27 | termcolor==1.1.0
28 | tqdm==4.63.0
29 | trio==0.20.0
30 | trio-websocket==0.9.2
31 | urllib3==1.26.19
32 | wget==3.2
33 | wsproto==1.1.0
34 | yahooquery==2.2.15
35 | 


--------------------------------------------------------------------------------
/examples/rolling_process_data/README.md:
--------------------------------------------------------------------------------
 1 | # Rolling Process Data
 2 | 
 3 | This workflow is an example for `Rolling Process Data`.
 4 | 
 5 | ## Background
 6 | 
 7 | When rolling train the models, data also needs to be generated in the different rolling windows. When the rolling window moves, the training data will change, and the processor's learnable state (such as standard deviation, mean, etc.) will also change. 
 8 | 
 9 | In order to avoid regenerating data, this example uses the `DataHandler-based DataLoader` to load the raw features that are not related to the rolling window, and then used Processors to generate processed-features related to the rolling window.
10 | 
11 | 
12 | ## Run the Code
13 | 
14 | Run the example by running the following command:
15 | ```bash
16 |     python workflow.py rolling_process
17 | ```


--------------------------------------------------------------------------------
/tests/test_contrib_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | from qlib.contrib.model import all_model_classes
 7 | 
 8 | 
 9 | class TestAllFlow(unittest.TestCase):
10 |     def test_0_initialize(self):
11 |         num = 0
12 |         for model_class in all_model_classes:
13 |             if model_class is not None:
14 |                 model = model_class()
15 |                 num += 1
16 |         print("There are {:}/{:} valid models in total.".format(num, len(all_model_classes)))
17 | 
18 | 
19 | def suite():
20 |     _suite = unittest.TestSuite()
21 |     _suite.addTest(TestAllFlow("test_0_initialize"))
22 |     return _suite
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     runner = unittest.TextTestRunner()
27 |     runner.run(suite())
28 | 


--------------------------------------------------------------------------------
/examples/benchmarks/DoubleEnsemble/README.md:
--------------------------------------------------------------------------------
1 | # DoubleEnsemble
2 | * DoubleEnsemble is an ensemble framework leveraging learning trajectory based sample reweighting and shuffling based feature selection, to solve both the low signal-to-noise ratio and increasing number of features problems. They identify the key samples based on the training dynamics on each sample and elicit key features based on the ablation impact of each feature via shuffling. The model is applicable to a wide range of base models, capable of extracting complex patterns, while mitigating the overfitting and instability issues for financial market prediction.
3 | * This code used in Qlib is implemented by ourselves.
4 | * Paper: DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis [https://arxiv.org/pdf/2010.01265.pdf](https://arxiv.org/pdf/2010.01265.pdf).


--------------------------------------------------------------------------------
/data/dataset/weight.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | class Reweighter:
 6 |     def __init__(self, *args, **kwargs):
 7 |         """
 8 |         To initialize the Reweighter, users should provide specific methods to let reweighter do the reweighting (such as sample-wise, rule-based).
 9 |         """
10 |         raise NotImplementedError()
11 | 
12 |     def reweight(self, data: object) -> object:
13 |         """
14 |         Get weights for data
15 | 
16 |         Parameters
17 |         ----------
18 |         data : object
19 |             The input data.
20 |             The first dimension is the index of samples
21 | 
22 |         Returns
23 |         -------
24 |         object:
25 |             the weights info for the data
26 |         """
27 |         raise NotImplementedError(f"This type of input is not supported")
28 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/README.md:
--------------------------------------------------------------------------------
 1 | # Temporal Fusion Transformers Benchmark
 2 | ## Source
 3 | **Reference**: Lim, Bryan, et al. "Temporal fusion transformers for interpretable multi-horizon time series forecasting." arXiv preprint arXiv:1912.09363 (2019).
 4 | 
 5 | **GitHub**: https://github.com/google-research/google-research/tree/master/tft
 6 | 
 7 | ## Run the Workflow
 8 | Users can follow the ``workflow_by_code_tft.py`` to run the benchmark. 
 9 | 
10 | ### Notes
11 | 1. Please be **aware** that this script can only support `Python 3.6 - 3.7`.
12 | 2. If the CUDA version on your machine is not 10.0, please remember to run the following commands `conda install anaconda cudatoolkit=10.0` and `conda install cudnn` on your machine.
13 | 3. The model must run in GPU, or an error will be raised.
14 | 4. New datasets should be registered in ``data_formatters``, for detail please visit the source.
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3:latest
 2 | 
 3 | WORKDIR /qlib
 4 | 
 5 | COPY . .
 6 | 
 7 | RUN apt-get update && \
 8 |     apt-get install -y build-essential
 9 | 
10 | RUN conda create --name qlib_env python=3.8 -y
11 | RUN echo "conda activate qlib_env" >> ~/.bashrc
12 | ENV PATH /opt/conda/envs/qlib_env/bin:$PATH
13 | 
14 | RUN python -m pip install --upgrade pip
15 | 
16 | RUN python -m pip install numpy==1.23.5
17 | RUN python -m pip install pandas==1.5.3
18 | RUN python -m pip install importlib-metadata==5.2.0
19 | RUN python -m pip install "cloudpickle<3"
20 | RUN python -m pip install scikit-learn==1.3.2
21 | 
22 | RUN python -m pip install cython packaging tables matplotlib statsmodels
23 | RUN python -m pip install pybind11 cvxpy
24 | 
25 | ARG IS_STABLE="yes"
26 | 
27 | RUN if [ "$IS_STABLE" = "yes" ]; then \
28 |         python -m pip install pyqlib; \
29 |     else \
30 |         python setup.py install; \
31 |     fi
32 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # we used random seed(1 1000 2000 3000 4000 5000) in our experiments 
 4 | 
 5 | # Directly run from Qlib command `qrun`
 6 | qrun configs/config_alstm.yaml
 7 | 
 8 | qrun configs/config_transformer.yaml
 9 | 
10 | qrun configs/config_transformer_tra_init.yaml
11 | qrun configs/config_transformer_tra.yaml
12 | 
13 | qrun configs/config_alstm_tra_init.yaml
14 | qrun configs/config_alstm_tra.yaml
15 | 
16 | 
17 | # Or setting different parameters with example.py
18 | python example.py --config_file configs/config_alstm.yaml
19 | 
20 | python example.py --config_file configs/config_transformer.yaml
21 | 
22 | python example.py --config_file configs/config_transformer_tra_init.yaml
23 | python example.py --config_file configs/config_transformer_tra.yaml
24 | 
25 | python example.py --config_file configs/config_alstm_tra_init.yaml
26 | python example.py --config_file configs/config_alstm_tra.yaml
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/rl/contrib/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from pathlib import Path
 7 | 
 8 | import pandas as pd
 9 | 
10 | 
11 | def read_order_file(order_file: Path | pd.DataFrame) -> pd.DataFrame:
12 |     if isinstance(order_file, pd.DataFrame):
13 |         return order_file
14 | 
15 |     order_file = Path(order_file)
16 | 
17 |     if order_file.suffix == ".pkl":
18 |         order_df = pd.read_pickle(order_file).reset_index()
19 |     elif order_file.suffix == ".csv":
20 |         order_df = pd.read_csv(order_file)
21 |     else:
22 |         raise TypeError(f"Unsupported order file type: {order_file}")
23 | 
24 |     if "date" in order_df.columns:
25 |         # legacy dataframe columns
26 |         order_df = order_df.rename(columns={"date": "datetime", "order_type": "direction"})
27 |     order_df["datetime"] = order_df["datetime"].astype(str)
28 | 
29 |     return order_df
30 | 


--------------------------------------------------------------------------------
/contrib/tuner/launcher.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # pylint: skip-file
 5 | # flake8: noqa
 6 | 
 7 | # coding=utf-8
 8 | 
 9 | import argparse
10 | import importlib
11 | import os
12 | import yaml
13 | 
14 | from .config import TunerConfigManager
15 | 
16 | 
17 | args_parser = argparse.ArgumentParser(prog="tuner")
18 | args_parser.add_argument(
19 |     "-c",
20 |     "--config_path",
21 |     required=True,
22 |     type=str,
23 |     help="config path indicates where to load yaml config.",
24 | )
25 | 
26 | args = args_parser.parse_args()
27 | 
28 | TUNER_CONFIG_MANAGER = TunerConfigManager(args.config_path)
29 | 
30 | 
31 | def run():
32 |     # 1. Get pipeline class.
33 |     tuner_pipeline_class = getattr(importlib.import_module(".pipeline", package="qlib.contrib.tuner"), "Pipeline")
34 |     # 2. Init tuner pipeline.
35 |     tuner_pipeline = tuner_pipeline_class(TUNER_CONFIG_MANAGER)
36 |     # 3. Begin to tune
37 |     tuner_pipeline.run()
38 | 


--------------------------------------------------------------------------------
/tests/test_workflow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import unittest
 4 | from pathlib import Path
 5 | import shutil
 6 | 
 7 | from qlib.workflow import R
 8 | from qlib.tests import TestAutoData
 9 | 
10 | 
11 | class WorkflowTest(TestAutoData):
12 |     # Creating the directory manually doesn't work with mlflow,
13 |     # so we add a subfolder named .trash when we create the directory.
14 |     TMP_PATH = Path("./.mlruns_tmp/.trash")
15 | 
16 |     def tearDown(self) -> None:
17 |         if self.TMP_PATH.exists():
18 |             shutil.rmtree(self.TMP_PATH)
19 | 
20 |     def test_get_local_dir(self):
21 |         """ """
22 |         self.TMP_PATH.mkdir(parents=True, exist_ok=True)
23 | 
24 |         with R.start(uri=str(self.TMP_PATH)):
25 |             pass
26 | 
27 |         with R.uri_context(uri=str(self.TMP_PATH)):
28 |             resume_recorder = R.get_recorder()
29 |             resume_recorder.get_local_dir()
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     unittest.main()
34 | 


--------------------------------------------------------------------------------
/examples/model_interpreter/feature.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | 
 5 | import qlib
 6 | from qlib.constant import REG_CN
 7 | 
 8 | from qlib.utils import init_instance_by_config
 9 | from qlib.tests.data import GetData
10 | from qlib.tests.config import CSI300_GBDT_TASK
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     # use default data
15 |     provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
16 |     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
17 | 
18 |     qlib.init(provider_uri=provider_uri, region=REG_CN)
19 | 
20 |     ###################################
21 |     # train model
22 |     ###################################
23 |     # model initialization
24 |     model = init_instance_by_config(CSI300_GBDT_TASK["model"])
25 |     dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
26 |     model.fit(dataset)
27 | 
28 |     # get model feature importance
29 |     feature_importance = model.get_feature_importance()
30 |     print("feature importance:")
31 |     print(feature_importance)
32 | 


--------------------------------------------------------------------------------
/tests/misc/test_get_multi_proc.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | import qlib
 7 | from qlib.data import D
 8 | from qlib.tests import TestAutoData
 9 | from multiprocessing import Pool
10 | 
11 | 
12 | def get_features(fields):
13 |     qlib.init(provider_uri=TestAutoData.provider_uri, expression_cache=None, dataset_cache=None, joblib_backend="loky")
14 |     return D.features(D.instruments("csi300"), fields)
15 | 
16 | 
17 | class TestGetData(TestAutoData):
18 |     FIELDS = "$open,$close,$high,$low,$volume,$factor,$change".split(",")
19 | 
20 |     def test_multi_proc(self):
21 |         """
22 |         For testing if it will raise error
23 |         """
24 |         iter_n = 2
25 |         pool = Pool(iter_n)
26 | 
27 |         res = []
28 |         for _ in range(iter_n):
29 |             res.append(pool.apply_async(get_features, (self.FIELDS,), {}))
30 | 
31 |         for r in res:
32 |             print(r.get())
33 | 
34 |         pool.close()
35 |         pool.join()
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     unittest.main()
40 | 


--------------------------------------------------------------------------------
/examples/benchmarks/GeneralPtNN/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Introduction
 4 | 
 5 | What is GeneralPtNN
 6 | - Fix previous design that fail to support both Time-series and tabular data
 7 | - Now you can just replace the Pytorch model structure to run a NN model.
 8 | 
 9 | We provide an example to demonstrate the effectiveness of the current design.
10 | - `workflow_config_gru.yaml` align with previous results [GRU(Kyunghyun Cho, et al.)](../README.md#Alpha158-dataset)
11 |   - `workflow_config_gru2mlp.yaml` to demonstrate we can convert config from time-series to tabular data with minimal changes
12 |     - You only have to change the net & dataset class to make the conversion.
13 | - `workflow_config_mlp.yaml` achieved similar functionality with [MLP](../README.md#Alpha158-dataset)
14 | 
15 | # TODO
16 | 
17 | - We will align existing models to current design.
18 | 
19 | - The result of `workflow_config_mlp.yaml` is different with the result of [MLP](../README.md#Alpha158-dataset) since GeneralPtNN has a different stopping method compared to previous implementations. Specificly, GeneralPtNN controls training according to epoches, whereas previous methods controlled by max_steps. 
20 | 


--------------------------------------------------------------------------------
/examples/rolling_process_data/rolling_handler.py:
--------------------------------------------------------------------------------
 1 | from qlib.data.dataset.handler import DataHandlerLP
 2 | from qlib.data.dataset.loader import DataLoaderDH
 3 | from qlib.contrib.data.handler import check_transform_proc
 4 | 
 5 | 
 6 | class RollingDataHandler(DataHandlerLP):
 7 |     def __init__(
 8 |         self,
 9 |         start_time=None,
10 |         end_time=None,
11 |         infer_processors=[],
12 |         learn_processors=[],
13 |         fit_start_time=None,
14 |         fit_end_time=None,
15 |         data_loader_kwargs={},
16 |     ):
17 |         infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
18 |         learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
19 | 
20 |         data_loader = {
21 |             "class": "DataLoaderDH",
22 |             "kwargs": {**data_loader_kwargs},
23 |         }
24 | 
25 |         super().__init__(
26 |             instruments=None,
27 |             start_time=start_time,
28 |             end_time=end_time,
29 |             data_loader=data_loader,
30 |             infer_processors=infer_processors,
31 |             learn_processors=learn_processors,
32 |         )
33 | 


--------------------------------------------------------------------------------
/rl/order_execution/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """
 5 | Currently it supports single-asset order execution.
 6 | Multi-asset is on the way.
 7 | """
 8 | 
 9 | from .interpreter import (
10 |     FullHistoryStateInterpreter,
11 |     CurrentStepStateInterpreter,
12 |     CategoricalActionInterpreter,
13 |     TwapRelativeActionInterpreter,
14 | )
15 | from .network import Recurrent
16 | from .policy import AllOne, PPO
17 | from .reward import PAPenaltyReward
18 | from .simulator_simple import SingleAssetOrderExecutionSimple
19 | from .state import SAOEMetrics, SAOEState
20 | from .strategy import SAOEStateAdapter, SAOEStrategy, ProxySAOEStrategy, SAOEIntStrategy
21 | 
22 | __all__ = [
23 |     "FullHistoryStateInterpreter",
24 |     "CurrentStepStateInterpreter",
25 |     "CategoricalActionInterpreter",
26 |     "TwapRelativeActionInterpreter",
27 |     "Recurrent",
28 |     "AllOne",
29 |     "PPO",
30 |     "PAPenaltyReward",
31 |     "SingleAssetOrderExecutionSimple",
32 |     "SAOEStateAdapter",
33 |     "SAOEMetrics",
34 |     "SAOEState",
35 |     "SAOEStrategy",
36 |     "ProxySAOEStrategy",
37 |     "SAOEIntStrategy",
38 | ]
39 | 


--------------------------------------------------------------------------------
/contrib/torch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | This module is not a necessary part of Qlib.
 5 | They are just some tools for convenience
 6 | It is should not imported into the core part of qlib
 7 | """
 8 | import torch
 9 | import numpy as np
10 | import pandas as pd
11 | 
12 | 
13 | def data_to_tensor(data, device="cpu", raise_error=False):
14 |     if isinstance(data, torch.Tensor):
15 |         if device == "cpu":
16 |             return data.cpu()
17 |         else:
18 |             return data.to(device)
19 |     if isinstance(data, (pd.DataFrame, pd.Series)):
20 |         return data_to_tensor(torch.from_numpy(data.values).float(), device)
21 |     elif isinstance(data, np.ndarray):
22 |         return data_to_tensor(torch.from_numpy(data).float(), device)
23 |     elif isinstance(data, (tuple, list)):
24 |         return [data_to_tensor(i, device) for i in data]
25 |     elif isinstance(data, dict):
26 |         return {k: data_to_tensor(v, device) for k, v in data.items()}
27 |     else:
28 |         if raise_error:
29 |             raise ValueError(f"Unsupported data type: {type(data)}.")
30 |         else:
31 |             return data
32 | 


--------------------------------------------------------------------------------
/rl/strategy/single_order.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from qlib.backtest import Order
 7 | from qlib.backtest.decision import OrderHelper, TradeDecisionWO, TradeRange
 8 | from qlib.strategy.base import BaseStrategy
 9 | 
10 | 
11 | class SingleOrderStrategy(BaseStrategy):
12 |     """Strategy used to generate a trade decision with exactly one order."""
13 | 
14 |     def __init__(
15 |         self,
16 |         order: Order,
17 |         trade_range: TradeRange | None = None,
18 |     ) -> None:
19 |         super().__init__()
20 | 
21 |         self._order = order
22 |         self._trade_range = trade_range
23 | 
24 |     def generate_trade_decision(self, execute_result: list | None = None) -> TradeDecisionWO:
25 |         oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper()
26 |         order_list = [
27 |             oh.create(
28 |                 code=self._order.stock_id,
29 |                 amount=self._order.amount,
30 |                 direction=self._order.direction,
31 |             ),
32 |         ]
33 |         return TradeDecisionWO(order_list, self, self._trade_range)
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/docs/advanced/server.rst:
--------------------------------------------------------------------------------
 1 | .. _server:
 2 | 
 3 | =============================
 4 | ``Online`` & ``Offline`` mode
 5 | =============================
 6 | .. currentmodule:: qlib
 7 | 
 8 | 
 9 | Introduction
10 | ============
11 | 
12 | ``Qlib`` supports ``Online`` mode and ``Offline`` mode. Only the ``Offline`` mode is introduced in this document.
13 | 
14 | The ``Online`` mode is designed to solve the following problems:
15 | 
16 | - Manage the data in a centralized way. Users don't have to manage data of different versions.
17 | - Reduce the amount of cache to be generated.
18 | - Make the data can be accessed in a remote way.
19 | 
20 | Qlib-Server
21 | ===========
22 | 
23 | ``Qlib-Server`` is the assorted server system for ``Qlib``, which utilizes ``Qlib`` for basic calculations and provides extensive server system and cache mechanism. With QLibServer, the data provided for ``Qlib`` can be managed in a centralized manner. With ``Qlib-Server``, users can use ``Qlib`` in ``Online`` mode.
24 | 
25 | 
26 | 
27 | Reference
28 | =========
29 | If users are interested in ``Qlib-Server`` and ``Online`` mode, please refer to `Qlib-Server Project <https://github.com/microsoft/qlib-server>`_ and `Qlib-Server Document <https://qlib-server.readthedocs.io/en/latest/>`_.
30 | 


--------------------------------------------------------------------------------
/contrib/online/online_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # pylint: skip-file
 5 | # flake8: noqa
 6 | 
 7 | import random
 8 | import pandas as pd
 9 | from ...data import D
10 | from ..model.base import Model
11 | 
12 | 
13 | class ScoreFileModel(Model):
14 |     """
15 |     This model will load a score file, and return score at date exists in score file.
16 |     """
17 | 
18 |     def __init__(self, score_path):
19 |         pred_test = pd.read_csv(score_path, index_col=[0, 1], parse_dates=True, infer_datetime_format=True)
20 |         self.pred = pred_test
21 | 
22 |     def get_data_with_date(self, date, **kwargs):
23 |         score = self.pred.loc(axis=0)[:, date]  # (stock_id, trade_date) multi_index, score in pdate
24 |         score_series = score.reset_index(level="datetime", drop=True)[
25 |             "score"
26 |         ]  # pd.Series ; index:stock_id, data: score
27 |         return score_series
28 | 
29 |     def predict(self, x_test, **kwargs):
30 |         return x_test
31 | 
32 |     def score(self, x_test, **kwargs):
33 |         return
34 | 
35 |     def fit(self, x_train, y_train, x_valid, y_valid, w_train=None, w_valid=None, **kwargs):
36 |         return
37 | 
38 |     def save(self, fname, **kwargs):
39 |         return
40 | 


--------------------------------------------------------------------------------
/rl/aux_info.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from typing import TYPE_CHECKING, Generic, Optional, TypeVar
 7 | 
 8 | from qlib.typehint import final
 9 | 
10 | from .simulator import StateType
11 | 
12 | if TYPE_CHECKING:
13 |     from .utils.env_wrapper import EnvWrapper
14 | 
15 | 
16 | __all__ = ["AuxiliaryInfoCollector"]
17 | 
18 | AuxInfoType = TypeVar("AuxInfoType")
19 | 
20 | 
21 | class AuxiliaryInfoCollector(Generic[StateType, AuxInfoType]):
22 |     """Override this class to collect customized auxiliary information from environment."""
23 | 
24 |     env: Optional[EnvWrapper] = None
25 | 
26 |     @final
27 |     def __call__(self, simulator_state: StateType) -> AuxInfoType:
28 |         return self.collect(simulator_state)
29 | 
30 |     def collect(self, simulator_state: StateType) -> AuxInfoType:
31 |         """Override this for customized auxiliary info.
32 |         Usually useful in Multi-agent RL.
33 | 
34 |         Parameters
35 |         ----------
36 |         simulator_state
37 |             Retrieved with ``simulator.get_state()``.
38 | 
39 |         Returns
40 |         -------
41 |         Auxiliary information.
42 |         """
43 |         raise NotImplementedError("collect is not implemented!")
44 | 


--------------------------------------------------------------------------------
/tests/dependency_tests/test_mlflow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import unittest
 4 | import platform
 5 | import mlflow
 6 | import time
 7 | from pathlib import Path
 8 | import shutil
 9 | 
10 | 
11 | class MLflowTest(unittest.TestCase):
12 |     TMP_PATH = Path("./.mlruns_tmp/")
13 | 
14 |     def tearDown(self) -> None:
15 |         if self.TMP_PATH.exists():
16 |             shutil.rmtree(self.TMP_PATH)
17 | 
18 |     def test_creating_client(self):
19 |         """
20 |         Please refer to qlib/workflow/expm.py:MLflowExpManager._client
21 |         we don't cache _client (this is helpful to reduce maintainance work when MLflowExpManager's uri is chagned)
22 | 
23 |         This implementation is based on the assumption creating a client is fast
24 |         """
25 |         start = time.time()
26 |         for i in range(10):
27 |             _ = mlflow.tracking.MlflowClient(tracking_uri=str(self.TMP_PATH))
28 |         end = time.time()
29 |         elapsed = end - start
30 |         if platform.system() == "Linux":
31 |             self.assertLess(elapsed, 1e-2)  # it can be done in less than 10ms
32 |         else:
33 |             self.assertLess(elapsed, 2e-2)
34 |         print(elapsed)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     unittest.main()
39 | 


--------------------------------------------------------------------------------
/examples/nested_decision_execution/README.md:
--------------------------------------------------------------------------------
 1 | # Nested Decision Execution
 2 | 
 3 | This workflow is an example for nested decision execution in backtesting. Qlib supports nested decision execution in backtesting. It means that users can use different strategies to make trade decision in different frequencies.
 4 | 
 5 | ## Weekly Portfolio Generation and Daily Order Execution
 6 | 
 7 | This workflow provides an example that uses a DropoutTopkStrategy (a strategy based on the daily frequency Lightgbm model) in weekly frequency for portfolio generation and uses SBBStrategyEMA (a rule-based strategy that uses EMA for decision-making) to execute orders in daily frequency. 
 8 | 
 9 | ### Usage
10 | 
11 | Start backtesting by running the following command:
12 | ```bash
13 |     python workflow.py backtest
14 | ```
15 | 
16 | Start collecting data by running the following command:
17 | ```bash
18 |     python workflow.py collect_data
19 | ```
20 | 
21 | ## Daily Portfolio Generation and Minutely Order Execution
22 | 
23 | This workflow also provides a high-frequency example that uses a DropoutTopkStrategy for portfolio generation in daily frequency and uses SBBStrategyEMA to execute orders in minutely frequency. 
24 | 
25 | ### Usage
26 | 
27 | Start backtesting by running the following command:
28 | ```bash
29 |     python workflow.py backtest_highfreq
30 | ```


--------------------------------------------------------------------------------
/model/interpret/base.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | """
 5 | Interfaces to interpret models
 6 | """
 7 | 
 8 | import pandas as pd
 9 | from abc import abstractmethod
10 | 
11 | 
12 | class FeatureInt:
13 |     """Feature (Int)erpreter"""
14 | 
15 |     @abstractmethod
16 |     def get_feature_importance(self) -> pd.Series:
17 |         """get feature importance
18 | 
19 |         Returns
20 |         -------
21 |             The index is the feature name.
22 | 
23 |             The greater the value, the higher importance.
24 |         """
25 | 
26 | 
27 | class LightGBMFInt(FeatureInt):
28 |     """LightGBM (F)eature (Int)erpreter"""
29 | 
30 |     def __init__(self):
31 |         self.model = None
32 | 
33 |     def get_feature_importance(self, *args, **kwargs) -> pd.Series:
34 |         """get feature importance
35 | 
36 |         Notes
37 |         -----
38 |             parameters reference:
39 |             https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html?highlight=feature_importance#lightgbm.Booster.feature_importance
40 |         """
41 |         return pd.Series(
42 |             self.model.feature_importance(*args, **kwargs), index=self.model.feature_name()
43 |         ).sort_values(  # pylint: disable=E1101
44 |             ascending=False
45 |         )
46 | 


--------------------------------------------------------------------------------
/tests/data_mid_layer_tests/test_handler.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import shutil
 4 | import unittest
 5 | from qlib.tests import TestAutoData
 6 | from qlib.data import D
 7 | from qlib.data.dataset.handler import DataHandlerLP
 8 | 
 9 | 
10 | class HandlerTests(TestAutoData):
11 |     def to_str(self, obj):
12 |         return "".join(str(obj).split())
13 | 
14 |     def test_handler_df(self):
15 |         df = D.features(["sh600519"], start_time="20190101", end_time="20190201", fields=["$close"])
16 |         dh = DataHandlerLP.from_df(df)
17 |         print(dh.fetch())
18 |         self.assertTrue(dh._data.equals(df))
19 |         self.assertTrue(dh._infer is dh._data)
20 |         self.assertTrue(dh._learn is dh._data)
21 |         self.assertTrue(dh.data_loader._data is dh._data)
22 |         fname = "_handler_test.pkl"
23 |         dh.to_pickle(fname, dump_all=True)
24 | 
25 |         with open(fname, "rb") as f:
26 |             dh_d = pickle.load(f)
27 | 
28 |         self.assertTrue(dh_d._data.equals(df))
29 |         self.assertTrue(dh_d._infer is dh_d._data)
30 |         self.assertTrue(dh_d._learn is dh_d._data)
31 |         # Data loader will no longer be useful
32 |         self.assertTrue("_data" not in dh_d.data_loader.__dict__.keys())
33 |         os.remove(fname)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     unittest.main()
38 | 


--------------------------------------------------------------------------------
/contrib/model/pytorch_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | def count_parameters(models_or_parameters, unit="m"):
 8 |     """
 9 |     This function is to obtain the storage size unit of a (or multiple) models.
10 | 
11 |     Parameters
12 |     ----------
13 |     models_or_parameters : PyTorch model(s) or a list of parameters.
14 |     unit : the storage size unit.
15 | 
16 |     Returns
17 |     -------
18 |     The number of parameters of the given model(s) or parameters.
19 |     """
20 |     if isinstance(models_or_parameters, nn.Module):
21 |         counts = sum(v.numel() for v in models_or_parameters.parameters())
22 |     elif isinstance(models_or_parameters, nn.Parameter):
23 |         counts = models_or_parameters.numel()
24 |     elif isinstance(models_or_parameters, (list, tuple)):
25 |         return sum(count_parameters(x, unit) for x in models_or_parameters)
26 |     else:
27 |         counts = sum(v.numel() for v in models_or_parameters)
28 |     unit = unit.lower()
29 |     if unit in ("kb", "k"):
30 |         counts /= 2**10
31 |     elif unit in ("mb", "m"):
32 |         counts /= 2**20
33 |     elif unit in ("gb", "g"):
34 |         counts /= 2**30
35 |     elif unit is not None:
36 |         raise ValueError("Unknown unit: {:}".format(unit))
37 |     return counts
38 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import qlib
 4 | from ruamel.yaml import YAML
 5 | from qlib.utils import init_instance_by_config
 6 | 
 7 | 
 8 | def main(seed, config_file="configs/config_alstm.yaml"):
 9 |     # set random seed
10 |     with open(config_file) as f:
11 |         yaml = YAML(typ="safe", pure=True)
12 |         config = yaml.load(f)
13 | 
14 |     # seed_suffix = "/seed1000" if "init" in config_file else f"/seed{seed}"
15 |     seed_suffix = ""
16 |     config["task"]["model"]["kwargs"].update(
17 |         {"seed": seed, "logdir": config["task"]["model"]["kwargs"]["logdir"] + seed_suffix}
18 |     )
19 | 
20 |     # initialize workflow
21 |     qlib.init(
22 |         provider_uri=config["qlib_init"]["provider_uri"],
23 |         region=config["qlib_init"]["region"],
24 |     )
25 |     dataset = init_instance_by_config(config["task"]["dataset"])
26 |     model = init_instance_by_config(config["task"]["model"])
27 | 
28 |     # train model
29 |     model.fit(dataset)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     # set params from cmd
34 |     parser = argparse.ArgumentParser(allow_abbrev=False)
35 |     parser.add_argument("--seed", type=int, default=1000, help="random seed")
36 |     parser.add_argument("--config_file", type=str, default="configs/config_alstm.yaml", help="config file")
37 |     args = parser.parse_args()
38 |     main(**vars(args))
39 | 


--------------------------------------------------------------------------------
/docs/start/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _installation:
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | .. currentmodule:: qlib
 8 | 
 9 | 
10 | ``Qlib`` Installation
11 | =====================
12 | .. note::
13 | 
14 |    `Qlib` supports both `Windows` and `Linux`. It's recommended to use `Qlib` in `Linux`. ``Qlib`` supports Python3, which is up to Python3.8.
15 | 
16 | Users can easily install ``Qlib`` by pip according to the following command:
17 | 
18 | .. code-block:: bash
19 | 
20 |    pip install pyqlib
21 | 
22 | 
23 | Also, Users can install ``Qlib`` by the source code according to the following steps:
24 | 
25 | - Enter the root directory of ``Qlib``, in which the file ``setup.py`` exists.
26 | - Then, please execute the following command to install the environment dependencies and install ``Qlib``:
27 | 
28 |    .. code-block:: bash
29 | 
30 |       $ pip install numpy
31 |       $ pip install --upgrade cython
32 |       $ git clone https://github.com/microsoft/qlib.git && cd qlib
33 |       $ python setup.py install
34 | 
35 | .. note::
36 |    It's recommended to use anaconda/miniconda to setup the environment. ``Qlib`` needs lightgbm and pytorch packages, use pip to install them.
37 | 
38 | 
39 | 
40 | Use the following code to make sure the installation successful:
41 | 
42 | .. code-block:: python
43 | 
44 |    >>> import qlib
45 |    >>> qlib.__version__
46 |    <LATEST VERSION>
47 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/baseline/rolling_benchmark.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import os
 4 | from pathlib import Path
 5 | from typing import Union
 6 | 
 7 | import fire
 8 | 
 9 | from qlib import auto_init
10 | from qlib.contrib.rolling.base import Rolling
11 | from qlib.tests.data import GetData
12 | 
13 | DIRNAME = Path(__file__).absolute().resolve().parent
14 | 
15 | 
16 | class RollingBenchmark(Rolling):
17 |     # The config in the README.md
18 |     CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"]
19 | 
20 |     DEFAULT_CONF = CONF_LIST[0]
21 | 
22 |     def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
23 |         # This code is for being compatible with the previous old code
24 |         conf_path = Path(conf_path)
25 |         super().__init__(conf_path=conf_path, horizon=horizon, **kwargs)
26 | 
27 |         for f in self.CONF_LIST:
28 |             if conf_path.samefile(f):
29 |                 break
30 |         else:
31 |             self.logger.warning("Model type is not in the benchmark!")
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     kwargs = {}
36 |     if os.environ.get("PROVIDER_URI", "") == "":
37 |         GetData().qlib_data(exists_skip=True)
38 |     else:
39 |         kwargs["provider_uri"] = os.environ["PROVIDER_URI"]
40 |     auto_init(**kwargs)
41 |     fire.Fire(RollingBenchmark)
42 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/DDG-DA/workflow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import os
 4 | from pathlib import Path
 5 | from typing import Union
 6 | 
 7 | import fire
 8 | 
 9 | from qlib import auto_init
10 | from qlib.contrib.rolling.ddgda import DDGDA
11 | from qlib.tests.data import GetData
12 | 
13 | DIRNAME = Path(__file__).absolute().resolve().parent
14 | BENCH_DIR = DIRNAME.parent / "baseline"
15 | 
16 | 
17 | class DDGDABench(DDGDA):
18 |     # The config in the README.md
19 |     CONF_LIST = [
20 |         BENCH_DIR / "workflow_config_linear_Alpha158.yaml",
21 |         BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml",
22 |     ]
23 | 
24 |     DEFAULT_CONF = CONF_LIST[0]  # Linear by default due to efficiency
25 | 
26 |     def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
27 |         # This code is for being compatible with the previous old code
28 |         conf_path = Path(conf_path)
29 |         super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs)
30 | 
31 |         for f in self.CONF_LIST:
32 |             if conf_path.samefile(f):
33 |                 break
34 |         else:
35 |             self.logger.warning("Model type is not in the benchmark!")
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     kwargs = {}
40 |     if os.environ.get("PROVIDER_URI", "") == "":
41 |         GetData().qlib_data(exists_skip=True)
42 |     else:
43 |         kwargs["provider_uri"] = os.environ["PROVIDER_URI"]
44 |     auto_init(**kwargs)
45 |     fire.Fire(DDGDABench)
46 | 


--------------------------------------------------------------------------------
/scripts/data_collector/pit/README.md:
--------------------------------------------------------------------------------
 1 | # Collect Point-in-Time Data
 2 | 
 3 | > *Please pay **ATTENTION** that the data is collected from [baostock](http://baostock.com) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*
 4 | 
 5 | ## Requirements
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Collector Data
12 | 
13 | 
14 | ### Download Quarterly CN Data
15 | 
16 | ```bash
17 | cd qlib/scripts/data_collector/pit/
18 | # download from baostock.com
19 | python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly
20 | ```
21 | 
22 | Downloading all data from the stock is very time-consuming. If you just want to run a quick test on a few stocks,  you can run the command below
23 | ```bash
24 | python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_regex "^(600519|000725).*"
25 | ```
26 | 
27 | 
28 | ### Normalize Data
29 | ```bash
30 | python collector.py normalize_data --interval quarterly --source_dir ~/.qlib/stock_data/source/pit --normalize_dir ~/.qlib/stock_data/source/pit_normalized
31 | ```
32 | 
33 | 
34 | 
35 | ### Dump Data into PIT Format
36 | 
37 | ```bash
38 | cd qlib/scripts
39 | python dump_pit.py dump --data_path ~/.qlib/stock_data/source/pit_normalized --qlib_dir ~/.qlib/qlib_data/cn_data --interval quarterly
40 | ```
41 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_alstm.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 256
16 |   num_layers: 2
17 |   num_heads: 2
18 |   use_attn: True
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 1
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0002
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/alstm
40 |       model_type: LSTM
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: False
46 |       model_init_state: 
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 1024


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_alstm_tra_init.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 256
16 |   num_layers: 2
17 |   num_heads: 2
18 |   use_attn: True
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 3
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0002
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/alstm_tra_init
40 |       model_type: LSTM
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: False
46 |       model_init_state: 
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 512


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_transformer.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 64
16 |   num_layers: 2
17 |   num_heads: 4
18 |   use_attn: False
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 1
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0002
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/transformer
40 |       model_type: Transformer
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: False
46 |       model_init_state: 
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 1024


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_transformer_tra_init.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 64
16 |   num_layers: 2
17 |   num_heads: 4
18 |   use_attn: False
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 3
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0002
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/transformer_tra_init
40 |       model_type: Transformer
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: False
46 |       model_init_state: 
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 512


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_alstm_tra.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 256
16 |   num_layers: 2
17 |   num_heads: 2
18 |   use_attn: True
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 10
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0001
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/alstm_tra
40 |       model_type: LSTM
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 2.0
44 |       rho: 0.99
45 |       freeze_model: True
46 |       model_init_state: output/test/alstm_tra_init/model.bin
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 1024


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | from __future__ import division
 6 | from __future__ import print_function
 7 | 
 8 | from .data import (
 9 |     D,
10 |     CalendarProvider,
11 |     InstrumentProvider,
12 |     FeatureProvider,
13 |     ExpressionProvider,
14 |     DatasetProvider,
15 |     LocalCalendarProvider,
16 |     LocalInstrumentProvider,
17 |     LocalFeatureProvider,
18 |     LocalPITProvider,
19 |     LocalExpressionProvider,
20 |     LocalDatasetProvider,
21 |     ClientCalendarProvider,
22 |     ClientInstrumentProvider,
23 |     ClientDatasetProvider,
24 |     BaseProvider,
25 |     LocalProvider,
26 |     ClientProvider,
27 | )
28 | 
29 | from .cache import (
30 |     ExpressionCache,
31 |     DatasetCache,
32 |     DiskExpressionCache,
33 |     DiskDatasetCache,
34 |     SimpleDatasetCache,
35 |     DatasetURICache,
36 |     MemoryCalendarCache,
37 | )
38 | 
39 | 
40 | __all__ = [
41 |     "D",
42 |     "CalendarProvider",
43 |     "InstrumentProvider",
44 |     "FeatureProvider",
45 |     "ExpressionProvider",
46 |     "DatasetProvider",
47 |     "LocalCalendarProvider",
48 |     "LocalInstrumentProvider",
49 |     "LocalFeatureProvider",
50 |     "LocalPITProvider",
51 |     "LocalExpressionProvider",
52 |     "LocalDatasetProvider",
53 |     "ClientCalendarProvider",
54 |     "ClientInstrumentProvider",
55 |     "ClientDatasetProvider",
56 |     "BaseProvider",
57 |     "LocalProvider",
58 |     "ClientProvider",
59 |     "ExpressionCache",
60 |     "DatasetCache",
61 |     "DiskExpressionCache",
62 |     "DiskDatasetCache",
63 |     "SimpleDatasetCache",
64 |     "DatasetURICache",
65 |     "MemoryCalendarCache",
66 | ]
67 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TRA/configs/config_transformer_tra.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |   provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |   region: cn
 4 | 
 5 | data_loader_config: &data_loader_config
 6 |   class: StaticDataLoader
 7 |   module_path: qlib.data.dataset.loader
 8 |   kwargs:
 9 |     config:
10 |       feature: data/feature.pkl
11 |       label: data/label.pkl
12 | 
13 | model_config: &model_config
14 |   input_size: 16
15 |   hidden_size: 64
16 |   num_layers: 2
17 |   num_heads: 4
18 |   use_attn: False
19 |   dropout: 0.1
20 | 
21 | num_states: &num_states 3
22 | 
23 | tra_config: &tra_config
24 |   num_states: *num_states
25 |   hidden_size: 16
26 |   tau: 1.0
27 |   src_info: LR_TPE
28 | 
29 | task:
30 |   model:
31 |     class: TRAModel
32 |     module_path: src/model.py
33 |     kwargs:
34 |       lr: 0.0005
35 |       n_epochs: 500
36 |       max_steps_per_epoch: 100
37 |       early_stop: 20
38 |       seed: 1000
39 |       logdir: output/test/transformer_tra
40 |       model_type: Transformer
41 |       model_config: *model_config
42 |       tra_config: *tra_config
43 |       lamb: 1.0
44 |       rho: 0.99
45 |       freeze_model: True
46 |       model_init_state: output/test/transformer_tra_init/model.bin
47 |   dataset:
48 |     class: MTSDatasetH
49 |     module_path: src/dataset.py
50 |     kwargs:
51 |       handler:
52 |         class: DataHandler
53 |         module_path: qlib.data.dataset.handler
54 |         kwargs:
55 |           data_loader: *data_loader_config
56 |       segments:
57 |         train: [2007-10-30, 2016-05-27]
58 |         valid: [2016-09-26, 2018-05-29]
59 |         test: [2018-09-21, 2020-06-30]
60 |       seq_len: 60
61 |       horizon: 21
62 |       num_states: *num_states
63 |       batch_size: 512


--------------------------------------------------------------------------------
/scripts/data_collector/crowd_source/README.md:
--------------------------------------------------------------------------------
 1 | # Crowd Source Data
 2 | 
 3 | ## Initiative
 4 | Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might have data which is wrong. This can introduce survivorship bias into our training process.
 5 | 
 6 | The Crowd Source Data is introduced to merged data from multiple data source and cross validate against each other, so that:
 7 | 1. We will have a more complete history record.
 8 | 2. We can identify the anomaly data and apply correction when necessary.
 9 | 
10 | ## Related Repo
11 | The raw data is hosted on dolthub repo: https://www.dolthub.com/repositories/chenditc/investment_data
12 | 
13 | The processing script and sql is hosted on github repo: https://github.com/chenditc/investment_data
14 | 
15 | The packaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data
16 | 
17 | ## How to use it in qlib
18 | ### Option 1: Download release bin data
19 | User can download data in qlib bin format and use it directly: https://github.com/chenditc/investment_data/releases/latest
20 | ```bash
21 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
22 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
23 | ```
24 | 
25 | ### Option 2: Generate qlib data from dolthub
26 | Dolthub data will be update daily, so that if user wants to get up to date data, they can dump qlib bin using docker:
27 | ```
28 | docker run -v /<some output directory>:/output -it --rm chenditc/investment_data bash dump_qlib_bin.sh && cp ./qlib_bin.tar.gz /output/
29 | ```
30 | 
31 | ## FAQ and other info
32 | See: https://github.com/chenditc/investment_data/blob/main/README.md
33 | 


--------------------------------------------------------------------------------
/scripts/data_collector/fund/README.md:
--------------------------------------------------------------------------------
 1 | # Collect Fund Data
 2 | 
 3 | > *Please pay **ATTENTION** that the data is collected from [天天基金网](https://fund.eastmoney.com/) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*
 4 | 
 5 | ## Requirements
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Collector Data
12 | 
13 | 
14 | ### CN Data
15 | 
16 | #### 1d from East Money
17 | 
18 | ```bash
19 | 
20 | # download from eastmoney.com
21 | python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_data --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
22 | 
23 | # normalize
24 | python collector.py normalize_data --source_dir ~/.qlib/fund_data/source/cn_data --normalize_dir ~/.qlib/fund_data/source/cn_1d_nor --region CN --interval 1d --date_field_name FSRQ
25 | 
26 | # dump data
27 | cd qlib/scripts
28 | python dump_bin.py dump_all --data_path ~/.qlib/fund_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/cn_fund_data --freq day --date_field_name FSRQ --include_fields DWJZ,LJJZ
29 | 
30 | ```
31 | 
32 | ### using data
33 | 
34 | ```python
35 | import qlib
36 | from qlib.data import D
37 | 
38 | qlib.init(provider_uri="~/.qlib/qlib_data/cn_fund_data")
39 | df = D.features(D.instruments(market="all"), ["$DWJZ", "$LJJZ"], freq="day")
40 | ```
41 | 
42 | 
43 | ### Help
44 | ```bash
45 | pythono collector.py collector_data --help
46 | ```
47 | 
48 | ## Parameters
49 | 
50 | - interval: 1d
51 | - region: CN
52 | 
53 | ## 免责声明
54 | 
55 | 本项目仅供学习研究使用，不作为任何行为的指导和建议，由此而引发任何争议和纠纷，与本项目无任何关系
56 | 


--------------------------------------------------------------------------------
/contrib/report/data/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | This module is responsible for analysing data
 5 | 
 6 | Assumptions
 7 | - The analyse each feature individually
 8 | 
 9 | """
10 | import pandas as pd
11 | from qlib.log import TimeInspector
12 | from qlib.contrib.report.utils import sub_fig_generator
13 | 
14 | 
15 | class FeaAnalyser:
16 |     def __init__(self, dataset: pd.DataFrame):
17 |         """
18 | 
19 |         Parameters
20 |         ----------
21 |         dataset : pd.DataFrame
22 | 
23 |             We often have multiple columns for dataset. Each column corresponds to one sub figure.
24 |             There will be a datatime column in the index levels.
25 |             Aggretation will be used for more summarized metrics overtime.
26 |             Here is an example of data:
27 | 
28 |             .. code-block::
29 | 
30 |                                             return
31 |                 datetime   instrument
32 |                 2007-02-06 equity_tpx     0.010087
33 |                            equity_spx     0.000786
34 |         """
35 |         self._dataset = dataset
36 |         with TimeInspector.logt("calc_stat_values"):
37 |             self.calc_stat_values()
38 | 
39 |     def calc_stat_values(self):
40 |         pass
41 | 
42 |     def plot_single(self, col, ax):
43 |         raise NotImplementedError(f"This type of input is not supported")
44 | 
45 |     def skip(self, col):
46 |         return False
47 | 
48 |     def plot_all(self, *args, **kwargs):
49 |         ax_gen = iter(sub_fig_generator(*args, **kwargs))
50 |         for col in self._dataset:
51 |             if not self.skip(col):
52 |                 ax = next(ax_gen)
53 |                 self.plot_single(col, ax)
54 | 


--------------------------------------------------------------------------------
/workflow/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import atexit
 5 | import logging
 6 | import sys
 7 | import traceback
 8 | 
 9 | from ..log import get_module_logger
10 | from . import R
11 | from .recorder import Recorder
12 | 
13 | logger = get_module_logger("workflow", logging.INFO)
14 | 
15 | 
16 | # function to handle the experiment when unusual program ending occurs
17 | def experiment_exit_handler():
18 |     """
19 |     Method for handling the experiment when any unusual program ending occurs.
20 |     The `atexit` handler should be put in the last, since, as long as the program ends, it will be called.
21 |     Thus, if any exception or user interruption occurs beforehand, we should handle them first. Once `R` is
22 |     ended, another call of `R.end_exp` will not take effect.
23 | 
24 |     Limitations:
25 |     - If pdb is used in your program, excepthook will not be triggered when it ends.  The status will be finished
26 |     """
27 |     sys.excepthook = experiment_exception_hook  # handle uncaught exception
28 |     atexit.register(R.end_exp, recorder_status=Recorder.STATUS_FI)  # will not take effect if experiment ends
29 | 
30 | 
31 | def experiment_exception_hook(exc_type, value, tb):
32 |     """
33 |     End an experiment with status to be "FAILED". This exception tries to catch those uncaught exception
34 |     and end the experiment automatically.
35 | 
36 |     Parameters
37 |     exc_type: Exception type
38 |     value: Exception's value
39 |     tb: Exception's traceback
40 |     """
41 |     logger.error(f"An exception has been raised[{exc_type.__name__}: {value}].")
42 | 
43 |     # Same as original format
44 |     traceback.print_tb(tb)
45 |     print(f"{exc_type.__name__}: {value}")
46 | 
47 |     R.end_exp(recorder_status=Recorder.STATUS_FA)
48 | 


--------------------------------------------------------------------------------
/tests/test_get_data.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) Microsoft Corporation.
 2 | #  Licensed under the MIT License.
 3 | 
 4 | import shutil
 5 | import unittest
 6 | from pathlib import Path
 7 | 
 8 | import qlib
 9 | from qlib.data import D
10 | from qlib.tests.data import GetData
11 | 
12 | DATA_DIR = Path(__file__).parent.joinpath("test_get_data")
13 | SOURCE_DIR = DATA_DIR.joinpath("source")
14 | SOURCE_DIR.mkdir(exist_ok=True, parents=True)
15 | QLIB_DIR = DATA_DIR.joinpath("qlib")
16 | QLIB_DIR.mkdir(exist_ok=True, parents=True)
17 | 
18 | 
19 | class TestGetData(unittest.TestCase):
20 |     FIELDS = "$open,$close,$high,$low,$volume,$factor,$change".split(",")
21 | 
22 |     @classmethod
23 |     def setUpClass(cls) -> None:
24 |         provider_uri = str(QLIB_DIR.resolve())
25 |         qlib.init(
26 |             provider_uri=provider_uri,
27 |             expression_cache=None,
28 |             dataset_cache=None,
29 |         )
30 | 
31 |     @classmethod
32 |     def tearDownClass(cls) -> None:
33 |         shutil.rmtree(str(DATA_DIR.resolve()))
34 | 
35 |     def test_0_qlib_data(self):
36 |         GetData().qlib_data(
37 |             name="qlib_data_simple", target_dir=QLIB_DIR, region="cn", interval="1d", delete_old=False, exists_skip=True
38 |         )
39 |         df = D.features(D.instruments("csi300"), self.FIELDS)
40 |         self.assertListEqual(list(df.columns), self.FIELDS, "get qlib data failed")
41 |         self.assertFalse(df.dropna().empty, "get qlib data failed")
42 | 
43 |     def test_1_csv_data(self):
44 |         GetData().download_data(file_name="csv_data_cn.zip", target_dir=SOURCE_DIR)
45 |         stock_name = set(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
46 |         self.assertEqual(len(stock_name), 85, "get csv data failed")
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     unittest.main()
51 | 


--------------------------------------------------------------------------------
/examples/portfolio/README.md:
--------------------------------------------------------------------------------
 1 | # Portfolio Optimization Strategy
 2 | 
 3 | ## Introduction
 4 | 
 5 | In `qlib/examples/benchmarks` we have various **alpha** models that predict
 6 | the stock returns. We also use a simple rule based `TopkDropoutStrategy` to
 7 | evaluate the investing performance of these models. However, such a strategy
 8 | is too simple to control the portfolio risk like correlation and volatility.
 9 | 
10 | To this end, an optimization based strategy should be used to for the
11 | trade-off between return and risk. In this doc, we will show how to use
12 | `EnhancedIndexingStrategy` to maximize portfolio return while minimizing
13 | tracking error relative to a benchmark.
14 | 
15 | 
16 | ## Preparation
17 | 
18 | We use China stock market data for our example.
19 | 
20 | 1. Prepare CSI300 weight:
21 | 
22 |    ```bash
23 |    wget https://github.com/SunsetWolf/qlib_dataset/releases/download/v0/csi300_weight.zip
24 |    unzip -d ~/.qlib/qlib_data/cn_data csi300_weight.zip
25 |    rm -f csi300_weight.zip
26 |    ```
27 |    NOTE:  We don't find any public free resource to get the weight in the benchmark. To run the example, we manually create this weight data.
28 | 
29 | 2. Prepare risk model data:
30 | 
31 |    ```bash
32 |    python prepare_riskdata.py
33 |    ```
34 | 
35 | Here we use a **Statistical Risk Model** implemented in `qlib.model.riskmodel`.
36 | However users are strongly recommended to use other risk models for better quality:
37 | * **Fundamental Risk Model** like MSCI BARRA
38 | * [Deep Risk Model](https://arxiv.org/abs/2107.05201)
39 | 
40 | 
41 | ## End-to-End Workflow
42 | 
43 | You can finish workflow with `EnhancedIndexingStrategy` by running
44 | `qrun config_enhanced_indexing.yaml`.
45 | 
46 | In this config, we mainly changed the strategy section compared to
47 | `qlib/examples/benchmarks/workflow_config_lightgbm_Alpha158.yaml`.
48 | 


--------------------------------------------------------------------------------
/rl/order_execution/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from typing import Any, cast
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | from qlib.backtest.decision import OrderDir
12 | from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
13 | from qlib.constant import float_or_ndarray
14 | 
15 | 
16 | def dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame:
17 |     # dataframe.append is deprecated
18 |     other_df = pd.DataFrame(other).set_index("datetime")
19 |     other_df.index.name = "datetime"
20 | 
21 |     res = pd.concat([df, other_df], axis=0)
22 |     return res
23 | 
24 | 
25 | def price_advantage(
26 |     exec_price: float_or_ndarray,
27 |     baseline_price: float,
28 |     direction: OrderDir | int,
29 | ) -> float_or_ndarray:
30 |     if baseline_price == 0:  # something is wrong with data. Should be nan here
31 |         if isinstance(exec_price, float):
32 |             return 0.0
33 |         else:
34 |             return np.zeros_like(exec_price)
35 |     if direction == OrderDir.BUY:
36 |         res = (1 - exec_price / baseline_price) * 10000
37 |     elif direction == OrderDir.SELL:
38 |         res = (exec_price / baseline_price - 1) * 10000
39 |     else:
40 |         raise ValueError(f"Unexpected order direction: {direction}")
41 |     res_wo_nan: np.ndarray = np.nan_to_num(res, nan=0.0)
42 |     if res_wo_nan.size == 1:
43 |         return res_wo_nan.item()
44 |     else:
45 |         return cast(float_or_ndarray, res_wo_nan)
46 | 
47 | 
48 | def get_simulator_executor(executor: BaseExecutor) -> SimulatorExecutor:
49 |     while isinstance(executor, NestedExecutor):
50 |         executor = executor.inner_executor
51 |     assert isinstance(executor, SimulatorExecutor)
52 |     return executor
53 | 


--------------------------------------------------------------------------------
/scripts/data_collector/crypto/README.md:
--------------------------------------------------------------------------------
 1 | # Collect Crypto Data
 2 | 
 3 | > *Please pay **ATTENTION** that the data is collected from [Coingecko](https://www.coingecko.com/en/api) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*
 4 | 
 5 | ## Requirements
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Usage of the dataset
12 | > *Crypto dataset only support Data retrieval function but not support backtest function due to the lack of OHLC data.*
13 | 
14 | ## Collector Data
15 | 
16 | 
17 | ### Crypto Data
18 | 
19 | #### 1d from Coingecko
20 | 
21 | ```bash
22 | 
23 | # download from https://api.coingecko.com/api/v3/
24 | python collector.py download_data --source_dir ~/.qlib/crypto_data/source/1d --start 2015-01-01 --end 2021-11-30 --delay 1 --interval 1d
25 | 
26 | # normalize
27 | python collector.py normalize_data --source_dir ~/.qlib/crypto_data/source/1d --normalize_dir ~/.qlib/crypto_data/source/1d_nor --interval 1d --date_field_name date
28 | 
29 | # dump data
30 | cd qlib/scripts
31 | python dump_bin.py dump_all --data_path ~/.qlib/crypto_data/source/1d_nor --qlib_dir ~/.qlib/qlib_data/crypto_data --freq day --date_field_name date --include_fields prices,total_volumes,market_caps
32 | 
33 | ```
34 | 
35 | ### using data
36 | 
37 | ```python
38 | import qlib
39 | from qlib.data import D
40 | 
41 | qlib.init(provider_uri="~/.qlib/qlib_data/crypto_data")
42 | df = D.features(D.instruments(market="all"), ["$prices", "$total_volumes","$market_caps"], freq="day")
43 | ```
44 | 
45 | 
46 | ### Help
47 | ```bash
48 | python collector.py collector_data --help
49 | ```
50 | 
51 | ## Parameters
52 | 
53 | - interval: 1d
54 | - delay: 1
55 | 


--------------------------------------------------------------------------------
/contrib/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | try:
 4 |     from .catboost_model import CatBoostModel
 5 | except ModuleNotFoundError:
 6 |     CatBoostModel = None
 7 |     print("ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)")
 8 | try:
 9 |     from .double_ensemble import DEnsembleModel
10 |     from .gbdt import LGBModel
11 | except ModuleNotFoundError:
12 |     DEnsembleModel, LGBModel = None, None
13 |     print(
14 |         "ModuleNotFoundError. DEnsembleModel and LGBModel are skipped. (optional: maybe installing lightgbm can fix it.)"
15 |     )
16 | try:
17 |     from .xgboost import XGBModel
18 | except ModuleNotFoundError:
19 |     XGBModel = None
20 |     print("ModuleNotFoundError. XGBModel is skipped(optional: maybe installing xgboost can fix it).")
21 | try:
22 |     from .linear import LinearModel
23 | except ModuleNotFoundError:
24 |     LinearModel = None
25 |     print("ModuleNotFoundError. LinearModel is skipped(optional: maybe installing scipy and sklearn can fix it).")
26 | # import pytorch models
27 | try:
28 |     from .pytorch_alstm import ALSTM
29 |     from .pytorch_gats import GATs
30 |     from .pytorch_gru import GRU
31 |     from .pytorch_lstm import LSTM
32 |     from .pytorch_nn import DNNModelPytorch
33 |     from .pytorch_tabnet import TabnetModel
34 |     from .pytorch_sfm import SFM_Model
35 |     from .pytorch_tcn import TCN
36 |     from .pytorch_add import ADD
37 | 
38 |     pytorch_classes = (ALSTM, GATs, GRU, LSTM, DNNModelPytorch, TabnetModel, SFM_Model, TCN, ADD)
39 | except ModuleNotFoundError:
40 |     pytorch_classes = ()
41 |     print("ModuleNotFoundError.  PyTorch models are skipped (optional: maybe installing pytorch can fix it).")
42 | 
43 | all_model_classes = (CatBoostModel, DEnsembleModel, LGBModel, XGBModel, LinearModel) + pytorch_classes
44 | 


--------------------------------------------------------------------------------
/examples/highfreq/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | This folder contains 2 examples
 3 | - A high-frequency dataset example
 4 | - An example of predicting the price trend in high-frequency data
 5 | 
 6 | ## High-Frequency Dataset
 7 | 
 8 | This dataset is an example for RL high frequency trading.
 9 | 
10 | ### Get High-Frequency Data
11 | 
12 | Get high-frequency data by running the following command:
13 | ```bash
14 |     python workflow.py get_data
15 | ```
16 | 
17 | ### Dump & Reload & Reinitialize the Dataset
18 | 
19 | 
20 | The High-Frequency Dataset is implemented as `qlib.data.dataset.DatasetH` in the `workflow.py`. `DatatsetH` is the subclass of [`qlib.utils.serial.Serializable`](https://qlib.readthedocs.io/en/latest/advanced/serial.html), whose state can be dumped in or loaded from disk in `pickle` format.
21 | 
22 | ### About Reinitialization
23 | 
24 | After reloading `Dataset` from disk, `Qlib` also support reinitializing the dataset. It means that users can reset some states of `Dataset` or `DataHandler` such as `instruments`, `start_time`, `end_time` and `segments`, etc.,  and generate new data according to the states.
25 | 
26 | The example is given in `workflow.py`, users can run the code as follows.
27 | 
28 | ### Run the Code
29 | 
30 | Run the example by running the following command:
31 | ```bash
32 |     python workflow.py dump_and_load_dataset
33 | ```
34 | 
35 | ## Benchmarks Performance (predicting the price trend in high-frequency data)
36 | 
37 | Here are the results of models for predicting the price trend in high-frequency data. We will keep updating benchmark models in future.
38 | 
39 | | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Long precision| Short Precision | Long-Short Average Return | Long-Short Average Sharpe |
40 | |---|---|---|---|---|---|---|---|---|---|
41 | | LightGBM | Alpha158 | 0.0349±0.00 | 0.3805±0.00| 0.0435±0.00 | 0.4724±0.00 | 0.5111±0.00 | 0.5428±0.00 | 0.000074±0.00 | 0.2677±0.00 |
42 | 


--------------------------------------------------------------------------------
/tests/dataset_tests/test_datalayer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from qlib.data import D
 4 | from qlib.tests import TestAutoData
 5 | 
 6 | 
 7 | class TestDataset(TestAutoData):
 8 |     def testCSI300(self):
 9 |         close_p = D.features(D.instruments("csi300"), ["$close"])
10 |         size = close_p.groupby("datetime", group_keys=False).size()
11 |         cnt = close_p.groupby("datetime", group_keys=False).count()["$close"]
12 |         size_desc = size.describe(percentiles=np.arange(0.1, 1.0, 0.1))
13 |         cnt_desc = cnt.describe(percentiles=np.arange(0.1, 1.0, 0.1))
14 | 
15 |         print(size_desc)
16 |         print(cnt_desc)
17 | 
18 |         self.assertLessEqual(size_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks")
19 |         self.assertGreaterEqual(size_desc.loc["80%"], 290, "Insufficient number of CSI300 constituent stocks")
20 | 
21 |         self.assertLessEqual(cnt_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks")
22 |         # FIXME: Due to the low quality of data. Hard to make sure there are enough data
23 |         # self.assertEqual(cnt_desc.loc["80%"], 300, "Insufficient number of CSI300 constituent stocks")
24 | 
25 |     def testClose(self):
26 |         close_p = D.features(D.instruments("csi300"), ["Ref($close, 1)/$close - 1"])
27 |         close_desc = close_p.describe(percentiles=np.arange(0.1, 1.0, 0.1))
28 |         print(close_desc)
29 |         self.assertLessEqual(abs(close_desc.loc["90%"][0]), 0.1, "Close value is abnormal")
30 |         self.assertLessEqual(abs(close_desc.loc["10%"][0]), 0.1, "Close value is abnormal")
31 |         # FIXME: The yahoo data is not perfect. We have to
32 |         # self.assertLessEqual(abs(close_desc.loc["max"][0]), 0.2, "Close value is abnormal")
33 |         # self.assertGreaterEqual(close_desc.loc["min"][0], -0.2, "Close value is abnormal")
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     unittest.main()
38 | 


--------------------------------------------------------------------------------
/examples/portfolio/prepare_riskdata.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import os
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | from qlib.data import D
 8 | from qlib.model.riskmodel import StructuredCovEstimator
 9 | 
10 | 
11 | def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
12 |     universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
13 | 
14 |     price_all = (
15 |         D.features(D.instruments("all"), ["$close"], start_time=start_time).squeeze().unstack(level="instrument")
16 |     )
17 | 
18 |     # StructuredCovEstimator is a statistical risk model
19 |     riskmodel = StructuredCovEstimator()
20 | 
21 |     for i in range(T - 1, len(price_all)):
22 |         date = price_all.index[i]
23 |         ref_date = price_all.index[i - T + 1]
24 | 
25 |         print(date)
26 | 
27 |         codes = universe.loc[date].index
28 |         price = price_all.loc[ref_date:date, codes]
29 | 
30 |         # calculate return and remove extreme return
31 |         ret = price.pct_change()
32 |         ret.clip(ret.quantile(0.025), ret.quantile(0.975), axis=1, inplace=True)
33 | 
34 |         # run risk model
35 |         F, cov_b, var_u = riskmodel.predict(ret, is_price=False, return_decomposed_components=True)
36 | 
37 |         # save risk data
38 |         root = riskdata_root + "/" + date.strftime("%Y%m%d")
39 |         os.makedirs(root, exist_ok=True)
40 | 
41 |         pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl")
42 |         pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl")
43 |         # for specific_risk we follow the convention to save volatility
44 |         pd.Series(np.sqrt(var_u), index=codes).to_pickle(root + "/specific_risk.pkl")
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     import qlib
49 | 
50 |     qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
51 | 
52 |     prepare_data()
53 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/backtest_ppo.yml:
--------------------------------------------------------------------------------
 1 | order_file: ./data/orders/test_orders.pkl
 2 | start_time: "9:30"
 3 | end_time: "14:54"
 4 | data_granularity: "5min"
 5 | qlib:
 6 |   provider_uri_5min: ./data/bin/
 7 | exchange:
 8 |   limit_threshold: null
 9 |   deal_price: ["$close", "$close"]
10 |   volume_threshold: null
11 | strategies:
12 |   1day:
13 |     class: SAOEIntStrategy
14 |     kwargs:
15 |       data_granularity: 5
16 |       action_interpreter:
17 |         class: CategoricalActionInterpreter
18 |         kwargs:
19 |           max_step: 8
20 |           values: 4
21 |         module_path: qlib.rl.order_execution.interpreter
22 |       network:
23 |         class: Recurrent
24 |         kwargs: {}
25 |         module_path: qlib.rl.order_execution.network
26 |       policy:
27 |         class: PPO  # PPO, DQN
28 |         kwargs:
29 |           lr: 0.0001
30 |           # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use.
31 |           # weight_file: outputs/ppo/checkpoints/latest.pth
32 |         module_path: qlib.rl.order_execution.policy
33 |       state_interpreter:
34 |         class: FullHistoryStateInterpreter
35 |         kwargs:
36 |           data_dim: 5
37 |           data_ticks: 48
38 |           max_step: 8
39 |           processed_data_provider:
40 |             class: HandlerProcessedDataProvider
41 |             kwargs:
42 |               data_dir: ./data/pickle/
43 |               feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
44 |               feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
45 |             module_path: qlib.rl.data.native
46 |         module_path: qlib.rl.order_execution.interpreter
47 |     module_path: qlib.rl.order_execution.strategy
48 |   30min:
49 |     class: TWAPStrategy
50 |     kwargs: {}
51 |     module_path: qlib.contrib.strategy.rule_strategy
52 | concurrency: 16
53 | output_dir: outputs/ppo/
54 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/backtest_opds.yml:
--------------------------------------------------------------------------------
 1 | order_file: ./data/orders/test_orders.pkl
 2 | start_time: "9:30"
 3 | end_time: "14:54"
 4 | data_granularity: "5min"
 5 | qlib:
 6 |   provider_uri_5min: ./data/bin/
 7 | exchange:
 8 |   limit_threshold: null
 9 |   deal_price: ["$close", "$close"]
10 |   volume_threshold: null
11 | strategies:
12 |   1day:
13 |     class: SAOEIntStrategy
14 |     kwargs:
15 |       data_granularity: 5
16 |       action_interpreter:
17 |         class: CategoricalActionInterpreter
18 |         kwargs:
19 |           max_step: 8
20 |           values: 4
21 |         module_path: qlib.rl.order_execution.interpreter
22 |       network:
23 |         class: Recurrent
24 |         kwargs: {}
25 |         module_path: qlib.rl.order_execution.network
26 |       policy:
27 |         class: PPO  # PPO, DQN
28 |         kwargs:
29 |           lr: 0.0001
30 |           # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use.
31 |           # weight_file: outputs/opds/checkpoints/latest.pth
32 |         module_path: qlib.rl.order_execution.policy
33 |       state_interpreter:
34 |         class: FullHistoryStateInterpreter
35 |         kwargs:
36 |           data_dim: 5
37 |           data_ticks: 48
38 |           max_step: 8
39 |           processed_data_provider:
40 |             class: HandlerProcessedDataProvider
41 |             kwargs:
42 |               data_dir: ./data/pickle/
43 |               feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
44 |               feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
45 |             module_path: qlib.rl.data.native
46 |         module_path: qlib.rl.order_execution.interpreter
47 |     module_path: qlib.rl.order_execution.strategy
48 |   30min:
49 |     class: TWAPStrategy
50 |     kwargs: {}
51 |     module_path: qlib.contrib.strategy.rule_strategy
52 | concurrency: 16
53 | output_dir: outputs/opds/
54 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/train_opds.yml:
--------------------------------------------------------------------------------
 1 | simulator:
 2 |   data_granularity: 5
 3 |   time_per_step: 30
 4 |   vol_limit: null
 5 | env:
 6 |   concurrency: 32
 7 |   parallel_mode: dummy
 8 | action_interpreter:
 9 |   class: CategoricalActionInterpreter
10 |   kwargs:
11 |     values: 4
12 |     max_step: 8
13 |   module_path: qlib.rl.order_execution.interpreter
14 | state_interpreter:
15 |   class: FullHistoryStateInterpreter
16 |   kwargs:
17 |     data_dim: 5
18 |     data_ticks: 48  # 48 = 240 min / 5 min
19 |     max_step: 8
20 |     processed_data_provider:
21 |       class: HandlerProcessedDataProvider
22 |       kwargs:
23 |         data_dir: ./data/pickle/
24 |         feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
25 |         feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
26 |         backtest: false
27 |       module_path: qlib.rl.data.native
28 |   module_path: qlib.rl.order_execution.interpreter
29 | reward:
30 |   class: PAPenaltyReward
31 |   kwargs:
32 |     penalty: 4.0
33 |     scale: 0.01
34 |   module_path: qlib.rl.order_execution.reward
35 | data:
36 |   source:
37 |     order_dir: ./data/orders
38 |     feature_root_dir: ./data/pickle/
39 |     feature_columns_today: ["$close0", "$volume0"]
40 |     feature_columns_yesterday: []
41 |     total_time: 240
42 |     default_start_time_index: 0
43 |     default_end_time_index: 235
44 |     proc_data_dim: 5
45 |   num_workers: 0
46 |   queue_size: 20
47 | network:
48 |   class: Recurrent
49 |   module_path: qlib.rl.order_execution.network
50 | policy:
51 |   class: PPO  # PPO, DQN
52 |   kwargs:
53 |     lr: 0.0001
54 |   module_path: qlib.rl.order_execution.policy
55 | runtime:
56 |   seed: 42
57 |   use_cuda: false
58 | trainer:
59 |   max_epoch: 500
60 |   repeat_per_collect: 25
61 |   earlystop_patience: 50
62 |   episode_per_collect: 10000
63 |   batch_size: 1024
64 |   val_every_n_epoch: 4
65 |   checkpoint_path: ./outputs/opds
66 |   checkpoint_every_n_iters: 1
67 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/scripts/gen_pickle_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import yaml
 5 | import argparse
 6 | import os
 7 | import shutil
 8 | from copy import deepcopy
 9 | 
10 | from qlib.contrib.data.highfreq_provider import HighFreqProvider
11 | 
12 | loader = yaml.FullLoader
13 | 
14 | if __name__ == "__main__":
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument("-c", "--config", type=str, default="config.yml")
17 |     parser.add_argument("-d", "--dest", type=str, default=".")
18 |     parser.add_argument("-s", "--split", type=str, choices=["none", "date", "stock", "both"], default="stock")
19 |     args = parser.parse_args()
20 | 
21 |     conf = yaml.load(open(args.config), Loader=loader)
22 | 
23 |     for k, v in conf.items():
24 |         if isinstance(v, dict) and "path" in v:
25 |             v["path"] = os.path.join(args.dest, v["path"])
26 |     provider = HighFreqProvider(**conf)
27 | 
28 |     # Gen dataframe
29 |     if "feature_conf" in conf:
30 |         feature = provider._gen_dataframe(deepcopy(provider.feature_conf))
31 |     if "backtest_conf" in conf:
32 |         backtest = provider._gen_dataframe(deepcopy(provider.backtest_conf))
33 | 
34 |     provider.feature_conf["path"] = os.path.splitext(provider.feature_conf["path"])[0] + "/"
35 |     provider.backtest_conf["path"] = os.path.splitext(provider.backtest_conf["path"])[0] + "/"
36 |     # Split by date
37 |     if args.split == "date" or args.split == "both":
38 |         provider._gen_day_dataset(deepcopy(provider.feature_conf), "feature")
39 |         provider._gen_day_dataset(deepcopy(provider.backtest_conf), "backtest")
40 | 
41 |     # Split by stock
42 |     if args.split == "stock" or args.split == "both":
43 |         provider._gen_stock_dataset(deepcopy(provider.feature_conf), "feature")
44 |         provider._gen_stock_dataset(deepcopy(provider.backtest_conf), "backtest")
45 | 
46 |     shutil.rmtree("stat/", ignore_errors=True)
47 | 


--------------------------------------------------------------------------------
/model/meta/task.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from qlib.data.dataset import Dataset
 5 | from ...utils import init_instance_by_config
 6 | 
 7 | 
 8 | class MetaTask:
 9 |     """
10 |     A single meta-task, a meta-dataset contains a list of them.
11 |     It serves as a component as in MetaDatasetDS
12 | 
13 |     The data processing is different
14 | 
15 |     - the processed input may be different between training and testing
16 | 
17 |         - When training, the X, y, X_test, y_test in training tasks are necessary (# PROC_MODE_FULL #)
18 |           but not necessary in test tasks. (# PROC_MODE_TEST #)
19 |         - When the meta model can be transferred into other dataset, only meta_info is necessary  (# PROC_MODE_TRANSFER #)
20 |     """
21 | 
22 |     PROC_MODE_FULL = "full"
23 |     PROC_MODE_TEST = "test"
24 |     PROC_MODE_TRANSFER = "transfer"
25 | 
26 |     def __init__(self, task: dict, meta_info: object, mode: str = PROC_MODE_FULL):
27 |         """
28 |         The `__init__` func is responsible for
29 | 
30 |         - store the task
31 |         - store the origin input data for
32 |         - process the input data for meta data
33 | 
34 |         Parameters
35 |         ----------
36 |         task : dict
37 |             the task to be enhanced by meta model
38 | 
39 |         meta_info : object
40 |             the input for meta model
41 |         """
42 |         self.task = task
43 |         self.meta_info = meta_info  # the original meta input information, it will be processed later
44 |         self.mode = mode
45 | 
46 |     def get_dataset(self) -> Dataset:
47 |         return init_instance_by_config(self.task["dataset"], accept_types=Dataset)
48 | 
49 |     def get_meta_input(self) -> object:
50 |         """
51 |         Return the **processed** meta_info
52 |         """
53 |         return self.meta_info
54 | 
55 |     def __repr__(self):
56 |         return f"MetaTask(task={self.task}, meta_info={self.meta_info})"
57 | 


--------------------------------------------------------------------------------
/rl/data/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | from __future__ import annotations
 4 | 
 5 | from abc import abstractmethod
 6 | 
 7 | import pandas as pd
 8 | 
 9 | 
10 | class BaseIntradayBacktestData:
11 |     """
12 |     Raw market data that is often used in backtesting (thus called BacktestData).
13 | 
14 |     Base class for all types of backtest data. Currently, each type of simulator has its corresponding backtest
15 |     data type.
16 |     """
17 | 
18 |     @abstractmethod
19 |     def __repr__(self) -> str:
20 |         raise NotImplementedError
21 | 
22 |     @abstractmethod
23 |     def __len__(self) -> int:
24 |         raise NotImplementedError
25 | 
26 |     @abstractmethod
27 |     def get_deal_price(self) -> pd.Series:
28 |         raise NotImplementedError
29 | 
30 |     @abstractmethod
31 |     def get_volume(self) -> pd.Series:
32 |         raise NotImplementedError
33 | 
34 |     @abstractmethod
35 |     def get_time_index(self) -> pd.DatetimeIndex:
36 |         raise NotImplementedError
37 | 
38 | 
39 | class BaseIntradayProcessedData:
40 |     """Processed market data after data cleanup and feature engineering.
41 | 
42 |     It contains both processed data for "today" and "yesterday", as some algorithms
43 |     might use the market information of the previous day to assist decision making.
44 |     """
45 | 
46 |     today: pd.DataFrame
47 |     """Processed data for "today".
48 |     Number of records must be ``time_length``, and columns must be ``feature_dim``."""
49 | 
50 |     yesterday: pd.DataFrame
51 |     """Processed data for "yesterday".
52 |     Number of records must be ``time_length``, and columns must be ``feature_dim``."""
53 | 
54 | 
55 | class ProcessedDataProvider:
56 |     """Provider of processed data"""
57 | 
58 |     def get_data(
59 |         self,
60 |         stock_id: str,
61 |         date: pd.Timestamp,
62 |         feature_dim: int,
63 |         time_index: pd.Index,
64 |     ) -> BaseIntradayProcessedData:
65 |         raise NotImplementedError
66 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/exp_configs/train_ppo.yml:
--------------------------------------------------------------------------------
 1 | simulator:
 2 |   data_granularity: 5
 3 |   time_per_step: 30
 4 |   vol_limit: null
 5 | env:
 6 |   concurrency: 32
 7 |   parallel_mode: dummy
 8 | action_interpreter:
 9 |   class: CategoricalActionInterpreter
10 |   kwargs:
11 |     values: 4
12 |     max_step: 8
13 |   module_path: qlib.rl.order_execution.interpreter
14 | state_interpreter:
15 |   class: FullHistoryStateInterpreter
16 |   kwargs:
17 |     data_dim: 5
18 |     data_ticks: 48  # 48 = 240 min / 5 min
19 |     max_step: 8
20 |     processed_data_provider:
21 |       class: HandlerProcessedDataProvider
22 |       kwargs:
23 |         data_dir: ./data/pickle/
24 |         feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
25 |         feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
26 |         backtest: false
27 |       module_path: qlib.rl.data.native
28 |   module_path: qlib.rl.order_execution.interpreter
29 | reward:
30 |   class: PPOReward
31 |   kwargs:
32 |     max_step: 8
33 |     start_time_index: 0
34 |     end_time_index: 46  # 46 = (240 - 5) min / 5 min - 1
35 |   module_path: qlib.rl.order_execution.reward
36 | data:
37 |   source:
38 |     order_dir: ./data/orders
39 |     feature_root_dir: ./data/pickle/
40 |     feature_columns_today: ["$close0", "$volume0"]
41 |     feature_columns_yesterday: []
42 |     total_time: 240
43 |     default_start_time_index: 0
44 |     default_end_time_index: 235
45 |     proc_data_dim: 5
46 |   num_workers: 0
47 |   queue_size: 20
48 | network:
49 |   class: Recurrent
50 |   module_path: qlib.rl.order_execution.network
51 | policy:
52 |   class: PPO  # PPO, DQN
53 |   kwargs:
54 |     lr: 0.0001
55 |   module_path: qlib.rl.order_execution.policy
56 | runtime:
57 |   seed: 42
58 |   use_cuda: false
59 | trainer:
60 |   max_epoch: 500
61 |   repeat_per_collect: 25
62 |   earlystop_patience: 50
63 |   episode_per_collect: 10000
64 |   batch_size: 1024
65 |   val_every_n_epoch: 4
66 |   checkpoint_path: ./outputs/ppo
67 |   checkpoint_every_n_iters: 1
68 | 


--------------------------------------------------------------------------------
/tests/test_register_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | import numpy as np
 6 | 
 7 | from qlib.data import D
 8 | from qlib.data.ops import ElemOperator, PairOperator
 9 | from qlib.tests import TestAutoData
10 | 
11 | 
12 | class Diff(ElemOperator):
13 |     """Feature First Difference
14 |     Parameters
15 |     ----------
16 |     feature : Expression
17 |         feature instance
18 |     Returns
19 |     ----------
20 |     Expression
21 |         a feature instance with first difference
22 |     """
23 | 
24 |     def _load_internal(self, instrument, start_index, end_index, freq):
25 |         series = self.feature.load(instrument, start_index, end_index, freq)
26 |         return series.diff()
27 | 
28 |     def get_extended_window_size(self):
29 |         lft_etd, rght_etd = self.feature.get_extended_window_size()
30 |         return lft_etd + 1, rght_etd
31 | 
32 | 
33 | class Distance(PairOperator):
34 |     """Feature Distance
35 |     Parameters
36 |     ----------
37 |     feature : Expression
38 |         feature instance
39 |     Returns
40 |     ----------
41 |     Expression
42 |         a feature instance with distance
43 |     """
44 | 
45 |     def _load_internal(self, instrument, start_index, end_index, freq):
46 |         series_left = self.feature_left.load(instrument, start_index, end_index, freq)
47 |         series_right = self.feature_right.load(instrument, start_index, end_index, freq)
48 |         return np.abs(series_left - series_right)
49 | 
50 | 
51 | class TestRegiterCustomOps(TestAutoData):
52 |     @classmethod
53 |     def setUpClass(cls) -> None:
54 |         cls._setup_kwargs.update({"custom_ops": [Diff, Distance]})
55 |         super().setUpClass()
56 | 
57 |     def test_regiter_custom_ops(self):
58 |         instruments = ["SH600000"]
59 |         fields = ["Diff($close)", "Distance($close, Ref($close, 1))"]
60 |         print(D.features(instruments, fields, start_time="2010-01-01", end_time="2017-12-31", freq="day"))
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     unittest.main()
65 | 


--------------------------------------------------------------------------------
/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | sys:
 2 |     rel_path: .
 3 | qlib_init:
 4 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 5 |     region: cn
 6 | market: &market csi300
 7 | benchmark: &benchmark SH000300
 8 | data_handler_config: &data_handler_config
 9 |     start_time: 2008-01-01
10 |     end_time: 2020-08-01
11 |     fit_start_time: 2008-01-01
12 |     fit_end_time: 2014-12-31
13 |     instruments: *market
14 | port_analysis_config: &port_analysis_config
15 |     strategy:
16 |         class: TopkDropoutStrategy
17 |         module_path: qlib.contrib.strategy
18 |         kwargs:
19 |             signal: <PRED>
20 |             topk: 50
21 |             n_drop: 5
22 |     backtest:
23 |         start_time: 2017-01-01
24 |         end_time: 2020-08-01
25 |         account: 100000000
26 |         benchmark: *benchmark
27 |         exchange_kwargs:
28 |             limit_threshold: 0.095
29 |             deal_price: close
30 |             open_cost: 0.0005
31 |             close_cost: 0.0015
32 |             min_cost: 5
33 | task:
34 |     model:
35 |         class: TFTModel
36 |         module_path: tft
37 |     dataset:
38 |         class: DatasetH
39 |         module_path: qlib.data.dataset
40 |         kwargs:
41 |             handler:
42 |                 class: Alpha158
43 |                 module_path: qlib.contrib.data.handler
44 |                 kwargs: *data_handler_config
45 |             segments:
46 |                 train: [2008-01-01, 2014-12-31]
47 |                 valid: [2015-01-01, 2016-12-31]
48 |                 test: [2017-01-01, 2020-08-01]
49 |     record: 
50 |         - class: SignalRecord
51 |           module_path: qlib.workflow.record_temp
52 |           kwargs: 
53 |             model: <MODEL>
54 |             dataset: <DATASET>
55 |         - class: SigAnaRecord
56 |           module_path: qlib.workflow.record_temp
57 |           kwargs: 
58 |             ana_long_short: False
59 |             ann_scaler: 252
60 |         - class: PortAnaRecord
61 |           module_path: qlib.workflow.record_temp
62 |           kwargs: 
63 |             config: *port_analysis_config
64 | 


--------------------------------------------------------------------------------
/examples/hyperparameter/LightGBM/hyperparameter_158.py:
--------------------------------------------------------------------------------
 1 | import qlib
 2 | import optuna
 3 | from qlib.constant import REG_CN
 4 | from qlib.utils import init_instance_by_config
 5 | from qlib.tests.config import CSI300_DATASET_CONFIG
 6 | from qlib.tests.data import GetData
 7 | 
 8 | 
 9 | def objective(trial):
10 |     task = {
11 |         "model": {
12 |             "class": "LGBModel",
13 |             "module_path": "qlib.contrib.model.gbdt",
14 |             "kwargs": {
15 |                 "loss": "mse",
16 |                 "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1),
17 |                 "learning_rate": trial.suggest_uniform("learning_rate", 0, 1),
18 |                 "subsample": trial.suggest_uniform("subsample", 0, 1),
19 |                 "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4),
20 |                 "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4),
21 |                 "max_depth": 10,
22 |                 "num_leaves": trial.suggest_int("num_leaves", 1, 1024),
23 |                 "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
24 |                 "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
25 |                 "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
26 |                 "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50),
27 |                 "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
28 |             },
29 |         },
30 |     }
31 |     evals_result = dict()
32 |     model = init_instance_by_config(task["model"])
33 |     model.fit(dataset, evals_result=evals_result)
34 |     return min(evals_result["valid"])
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     provider_uri = "~/.qlib/qlib_data/cn_data"
39 |     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
40 |     qlib.init(provider_uri=provider_uri, region="cn")
41 | 
42 |     dataset = init_instance_by_config(CSI300_DATASET_CONFIG)
43 | 
44 |     study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3")
45 |     study.optimize(objective, n_jobs=6)
46 | 


--------------------------------------------------------------------------------
/tests/misc/test_sepdf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | import unittest
 4 | import numpy as np
 5 | import pandas as pd
 6 | from qlib.contrib.data.utils.sepdf import SepDataFrame
 7 | 
 8 | 
 9 | class SepDF(unittest.TestCase):
10 |     def to_str(self, obj):
11 |         return "".join(str(obj).split())
12 | 
13 |     def test_index_data(self):
14 |         np.random.seed(42)
15 | 
16 |         index = [
17 |             np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
18 |             np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
19 |         ]
20 | 
21 |         cols = [
22 |             np.repeat(np.array(["g1", "g2"]), 2),
23 |             np.arange(4),
24 |         ]
25 |         df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=cols)
26 |         sdf = SepDataFrame(df_dict={"g2": df["g2"]}, join=None)
27 |         sdf[("g2", 4)] = 3
28 |         sdf["g1"] = df["g1"]
29 |         exp = """
30 |         {'g2':                 2         3  4
31 |         bar one  0.647689  1.523030  3
32 |             two  1.579213  0.767435  3
33 |         baz one -0.463418 -0.465730  3
34 |             two -1.724918 -0.562288  3
35 |         foo one -0.908024 -1.412304  3
36 |             two  0.067528 -1.424748  3
37 |         qux one -1.150994  0.375698  3
38 |             two -0.601707  1.852278  3, 'g1':                 0         1
39 |         bar one  0.496714 -0.138264
40 |             two -0.234153 -0.234137
41 |         baz one -0.469474  0.542560
42 |             two  0.241962 -1.913280
43 |         foo one -1.012831  0.314247
44 |             two  1.465649 -0.225776
45 |         qux one -0.544383  0.110923
46 |             two -0.600639 -0.291694}
47 |         """
48 |         self.assertEqual(self.to_str(sdf._df_dict), self.to_str(exp))
49 | 
50 |         del df["g1"]
51 |         del df["g2"]
52 |         # it will not raise error, and df will be an empty dataframe
53 | 
54 |         del sdf["g1"]
55 |         del sdf["g2"]
56 |         # sdf should support deleting all the columns
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     unittest.main()
61 | 


--------------------------------------------------------------------------------
/scripts/collect_info.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import platform
 3 | import qlib
 4 | import fire
 5 | import pkg_resources
 6 | from pathlib import Path
 7 | 
 8 | QLIB_PATH = Path(__file__).absolute().resolve().parent.parent
 9 | 
10 | 
11 | class InfoCollector:
12 |     """
13 |     User could collect system info by following commands
14 |     `cd scripts && python collect_info.py all`
15 |     - NOTE: please avoid running this script in the project folder which contains `qlib`
16 |     """
17 | 
18 |     def sys(self):
19 |         """collect system related info"""
20 |         for method in ["system", "machine", "platform", "version"]:
21 |             print(getattr(platform, method)())
22 | 
23 |     def py(self):
24 |         """collect Python related info"""
25 |         print("Python version: {}".format(sys.version.replace("\n", " ")))
26 | 
27 |     def qlib(self):
28 |         """collect qlib related info"""
29 |         print("Qlib version: {}".format(qlib.__version__))
30 |         REQUIRED = [
31 |             "setuptools",
32 |             "wheel",
33 |             "cython",
34 |             "pyyaml",
35 |             "numpy",
36 |             "pandas",
37 |             "mlflow",
38 |             "filelock",
39 |             "redis",
40 |             "dill",
41 |             "fire",
42 |             "ruamel.yaml",
43 |             "python-redis-lock",
44 |             "tqdm",
45 |             "pymongo",
46 |             "loguru",
47 |             "lightgbm",
48 |             "gym",
49 |             "cvxpy",
50 |             "joblib",
51 |             "matplotlib",
52 |             "jupyter",
53 |             "nbconvert",
54 |             "pyarrow",
55 |             "pydantic-settings",
56 |             "setuptools-scm",
57 |         ]
58 | 
59 |         for package in REQUIRED:
60 |             version = pkg_resources.get_distribution(package).version
61 |             print(f"{package}=={version}")
62 | 
63 |     def all(self):
64 |         """collect all info"""
65 |         for method in ["sys", "py", "qlib"]:
66 |             getattr(self, method)()
67 |             print()
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     fire.Fire(InfoCollector)
72 | 


--------------------------------------------------------------------------------
/examples/data_demo/data_cache_demo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | The motivation of this demo
 5 | - To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing
 6 | """
 7 | 
 8 | from copy import deepcopy
 9 | from pathlib import Path
10 | import pickle
11 | from pprint import pprint
12 | from ruamel.yaml import YAML
13 | import subprocess
14 | from qlib.log import TimeInspector
15 | 
16 | from qlib import init
17 | from qlib.data.dataset.handler import DataHandlerLP
18 | from qlib.utils import init_instance_by_config
19 | 
20 | # For general purpose, we use relative path
21 | DIRNAME = Path(__file__).absolute().resolve().parent
22 | 
23 | if __name__ == "__main__":
24 |     init()
25 | 
26 |     config_path = DIRNAME.parent / "benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml"
27 | 
28 |     # 1) show original time
29 |     with TimeInspector.logt("The original time without handler cache:"):
30 |         subprocess.run(f"qrun {config_path}", shell=True)
31 | 
32 |     # 2) dump handler
33 |     yaml = YAML(typ="safe", pure=True)
34 |     task_config = yaml.load(config_path.open())
35 |     hd_conf = task_config["task"]["dataset"]["kwargs"]["handler"]
36 |     pprint(hd_conf)
37 |     hd: DataHandlerLP = init_instance_by_config(hd_conf)
38 |     hd_path = DIRNAME / "handler.pkl"
39 |     hd.to_pickle(hd_path, dump_all=True)
40 | 
41 |     # 3) create new task with handler cache
42 |     new_task_config = deepcopy(task_config)
43 |     new_task_config["task"]["dataset"]["kwargs"]["handler"] = f"file://{hd_path}"
44 |     new_task_config["sys"] = {"path": [str(config_path.parent.resolve())]}
45 |     new_task_path = DIRNAME / "new_task.yaml"
46 |     print("The location of the new task", new_task_path)
47 | 
48 |     # save new task
49 |     with new_task_path.open("w") as f:
50 |         yaml.safe_dump(new_task_config, f, indent=4, sort_keys=False)
51 | 
52 |     # 4) train model with new task
53 |     with TimeInspector.logt("The time for task with handler cache:"):
54 |         subprocess.run(f"qrun {new_task_path}", shell=True)
55 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | Due to the non-stationary nature of the environment of the financial market, the data distribution may change in different periods, which makes the performance of models build on training data decays in the future test data.
 3 | So adapting the forecasting models/strategies to market dynamics is very important to the model/strategies' performance.
 4 | 
 5 | The table below shows the performances of different solutions on different forecasting models.
 6 | 
 7 | ## Alpha158 Dataset
 8 | Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
 9 | ```bash
10 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
11 | mkdir -p ~/.qlib/qlib_data/cn_data
12 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
13 | rm -f qlib_bin.tar.gz
14 | ```
15 | 
16 | | Model Name       | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
17 | |------------------|---------|------|------|---------|-----------|-------------------|-------------------|--------------|
18 | | RR[Linear]       |Alpha158 |0.0945|0.5989|0.1069   |0.6495     |0.0857             |1.3682             |-0.0986       |
19 | | DDG-DA[Linear]   |Alpha158 |0.0983|0.6157|0.1108   |0.6646     |0.0764             |1.1904             |-0.0769       |
20 | | RR[LightGBM]     |Alpha158 |0.0816|0.5887|0.0912   |0.6263     |0.0771             |1.3196             |-0.0909       |
21 | | DDG-DA[LightGBM] |Alpha158 |0.0878|0.6185|0.0975   |0.6524     |0.1261             |2.0096             |-0.0744       |
22 | 
23 | - The label horizon of the `Alpha158` dataset is set to 20.
24 | - The rolling time intervals are set to 20 trading days.
25 | - The test rolling periods are from January 2017 to August 2020.
26 | - The results are based on the crowd-sourced version. The Yahoo version of qlib data does not contain `VWAP`, so all related factors are missing and filled with 0, which leads to a rank-deficient matrix (a matrix does not have full rank) and makes lower-level optimization of DDG-DA can not be solved.
27 | 


--------------------------------------------------------------------------------
/examples/orderbook_data/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | This example tries to demonstrate how Qlib supports data without fixed shared frequency.
 4 | 
 5 | For example,
 6 | - Daily prices volume data are fixed-frequency data. The data comes in a fixed frequency (i.e. daily)
 7 | - Orders are not fixed data and they may come at any time point
 8 | 
 9 | To support such non-fixed-frequency, Qlib implements an Arctic-based backend.
10 | Here is an example to import and query data based on this backend.
11 | 
12 | # Installation
13 | 
14 | Please refer to [the installation docs](https://docs.mongodb.com/manual/installation/) of mongodb.
15 | Current version of script with default value tries to connect localhost **via default port without authentication**.
16 | 
17 | Run following command to install necessary libraries
18 | ```
19 | pip install pytest coverage gdown
20 | pip install arctic  # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied.
21 | ```
22 | 
23 | # Importing example data
24 | 
25 | 
26 | 1. (Optional) Please follow the first part of [this section](https://github.com/microsoft/qlib#data-preparation) to **get 1min data** of Qlib.
27 | 2. Please follow following steps to download example data
28 | ```bash
29 | cd examples/orderbook_data/
30 | gdown https://drive.google.com/uc?id=15FuUqWn2rkCi8uhJYGEQWKakcEqLJNDG  # Proxies may be necessary here.
31 | python ../../scripts/get_data.py _unzip --file_path highfreq_orderbook_example_data.zip --target_dir .
32 | ```
33 | 
34 | 3. Please import the example data to your mongo db
35 | ```bash
36 | python create_dataset.py initialize_library  # Initialization Libraries
37 | python create_dataset.py import_data  # Initialization Libraries
38 | ```
39 | 
40 | # Query Examples
41 | 
42 | After importing these data, you run `example.py` to create some high-frequency features.
43 | ```bash
44 | pytest -s --disable-warnings example.py   # If you want run all examples
45 | pytest -s --disable-warnings example.py::TestClass::test_exp_10  # If you want to run specific example
46 | ```
47 | 
48 | 
49 | # Known limitations
50 | Expression computing between different frequencies are not supported yet
51 | 


--------------------------------------------------------------------------------
/examples/online_srv/update_online_pred.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """
 5 | This example shows how OnlineTool works when we need update prediction.
 6 | There are two parts including first_train and update_online_pred.
 7 | Firstly, we will finish the training and set the trained models to the `online` models.
 8 | Next, we will finish updating online predictions.
 9 | """
10 | import copy
11 | import fire
12 | import qlib
13 | from qlib.constant import REG_CN
14 | from qlib.model.trainer import task_train
15 | from qlib.workflow.online.utils import OnlineToolR
16 | from qlib.tests.config import CSI300_GBDT_TASK
17 | 
18 | task = copy.deepcopy(CSI300_GBDT_TASK)
19 | 
20 | task["record"] = {
21 |     "class": "SignalRecord",
22 |     "module_path": "qlib.workflow.record_temp",
23 | }
24 | 
25 | 
26 | class UpdatePredExample:
27 |     def __init__(
28 |         self, provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_name="online_srv", task_config=task
29 |     ):
30 |         qlib.init(provider_uri=provider_uri, region=region)
31 |         self.experiment_name = experiment_name
32 |         self.online_tool = OnlineToolR(self.experiment_name)
33 |         self.task_config = task_config
34 | 
35 |     def first_train(self):
36 |         rec = task_train(self.task_config, experiment_name=self.experiment_name)
37 |         self.online_tool.reset_online_tag(rec)  # set to online model
38 | 
39 |     def update_online_pred(self):
40 |         self.online_tool.update_online_pred()
41 | 
42 |     def main(self):
43 |         self.first_train()
44 |         self.update_online_pred()
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     ## to train a model and set it to online model, use the command below
49 |     # python update_online_pred.py first_train
50 |     ## to update online predictions once a day, use the command below
51 |     # python update_online_pred.py update_online_pred
52 |     ## to see the whole process with your own parameters, use the command below
53 |     # python update_online_pred.py main --experiment_name="your_exp_name"
54 |     fire.Fire(UpdatePredExample)
55 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | - [Download Qlib Data](#Download-Qlib-Data)
 3 |   - [Download CN Data](#Download-CN-Data)
 4 |   - [Download US Data](#Download-US-Data)
 5 |   - [Download CN Simple Data](#Download-CN-Simple-Data)
 6 |   - [Help](#Help)
 7 | - [Using in Qlib](#Using-in-Qlib)
 8 |   - [US data](#US-data)
 9 |   - [CN data](#CN-data)
10 | 
11 | 
12 | ## Download Qlib Data
13 | 
14 | 
15 | ### Download CN Data
16 | 
17 | ```bash
18 | # daily data
19 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
20 | 
21 | # 1min  data (Optional for running non-high-frequency strategies)
22 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
23 | ```
24 | 
25 | ### Download US Data
26 | 
27 | 
28 | ```bash
29 | python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us
30 | ```
31 | 
32 | ### Download CN Simple Data
33 | 
34 | ```bash
35 | python get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --region cn
36 | ```
37 | 
38 | ### Help
39 | 
40 | ```bash
41 | python get_data.py qlib_data --help
42 | ```
43 | 
44 | ## Using in Qlib
45 | > For more information: https://qlib.readthedocs.io/en/latest/start/initialization.html
46 | 
47 | 
48 | ### US data
49 | 
50 | > Need to download data first: [Download US Data](#Download-US-Data)
51 | 
52 | ```python
53 | import qlib
54 | from qlib.config import REG_US
55 | provider_uri = "~/.qlib/qlib_data/us_data"  # target_dir
56 | qlib.init(provider_uri=provider_uri, region=REG_US)
57 | ```
58 | 
59 | ### CN data
60 | 
61 | > Need to download data first: [Download CN Data](#Download-CN-Data)
62 | 
63 | ```python
64 | import qlib
65 | from qlib.constant import REG_CN
66 | 
67 | provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
68 | qlib.init(provider_uri=provider_uri, region=REG_CN)
69 | ```
70 | 
71 | ## Use Crowd Sourced Data
72 | The is also a [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
73 | ```bash
74 | wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
75 | tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
76 | ```
77 | 


--------------------------------------------------------------------------------
/typehint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """Commonly used types."""
 5 | 
 6 | import sys
 7 | from typing import Union
 8 | from pathlib import Path
 9 | 
10 | __all__ = ["Literal", "TypedDict", "final"]
11 | 
12 | if sys.version_info >= (3, 8):
13 |     from typing import Literal, TypedDict, final  # type: ignore  # pylint: disable=no-name-in-module
14 | else:
15 |     from typing_extensions import Literal, TypedDict, final
16 | 
17 | 
18 | class InstDictConf(TypedDict):
19 |     """
20 |     InstDictConf  is a Dict-based config to describe an instance
21 | 
22 |         case 1)
23 |         {
24 |             'class': 'ClassName',
25 |             'kwargs': dict, #  It is optional. {} will be used if not given
26 |             'model_path': path, # It is optional if module is given in the class
27 |         }
28 |         case 2)
29 |         {
30 |             'class': <The class it self>,
31 |             'kwargs': dict, #  It is optional. {} will be used if not given
32 |         }
33 |     """
34 | 
35 |     # class: str  # because class is a keyword of Python. We have to comment it
36 |     kwargs: dict  # It is optional. {} will be used if not given
37 |     module_path: str  # It is optional if module is given in the class
38 | 
39 | 
40 | InstConf = Union[InstDictConf, str, object, Path]
41 | """
42 | InstConf is a type to describe an instance; it will be passed into init_instance_by_config for Qlib
43 | 
44 |     config : Union[str, dict, object, Path]
45 | 
46 |         InstDictConf example.
47 |             please refer to the docs of InstDictConf
48 | 
49 |         str example.
50 |             1) specify a pickle object
51 |                 - path like 'file:///<path to pickle file>/obj.pkl'
52 |             2) specify a class name
53 |                 - "ClassName":  getattr(module, "ClassName")() will be used.
54 |             3) specify module path with class name
55 |                 - "a.b.c.ClassName" getattr(<a.b.c.module>, "ClassName")() will be used.
56 | 
57 |         object example:
58 |             instance of accept_types
59 | 
60 |         Path example:
61 |             specify a pickle object
62 |                 - it will be treated like 'file:///<path to pickle file>/obj.pkl'
63 | """
64 | 


--------------------------------------------------------------------------------
/examples/hyperparameter/LightGBM/hyperparameter_360.py:
--------------------------------------------------------------------------------
 1 | import qlib
 2 | import optuna
 3 | from qlib.constant import REG_CN
 4 | from qlib.utils import init_instance_by_config
 5 | from qlib.tests.data import GetData
 6 | from qlib.tests.config import get_dataset_config, CSI300_MARKET, DATASET_ALPHA360_CLASS
 7 | 
 8 | DATASET_CONFIG = get_dataset_config(market=CSI300_MARKET, dataset_class=DATASET_ALPHA360_CLASS)
 9 | 
10 | 
11 | def objective(trial):
12 |     task = {
13 |         "model": {
14 |             "class": "LGBModel",
15 |             "module_path": "qlib.contrib.model.gbdt",
16 |             "kwargs": {
17 |                 "loss": "mse",
18 |                 "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1),
19 |                 "learning_rate": trial.suggest_uniform("learning_rate", 0, 1),
20 |                 "subsample": trial.suggest_uniform("subsample", 0, 1),
21 |                 "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4),
22 |                 "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4),
23 |                 "max_depth": 10,
24 |                 "num_leaves": trial.suggest_int("num_leaves", 1, 1024),
25 |                 "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
26 |                 "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
27 |                 "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
28 |                 "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50),
29 |                 "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
30 |             },
31 |         },
32 |     }
33 | 
34 |     evals_result = dict()
35 |     model = init_instance_by_config(task["model"])
36 |     model.fit(dataset, evals_result=evals_result)
37 |     return min(evals_result["valid"])
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     provider_uri = "~/.qlib/qlib_data/cn_data"
42 |     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
43 |     qlib.init(provider_uri=provider_uri, region=REG_CN)
44 | 
45 |     dataset = init_instance_by_config(DATASET_CONFIG)
46 | 
47 |     study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3")
48 |     study.optimize(objective, n_jobs=6)
49 | 


--------------------------------------------------------------------------------
/examples/rl_order_execution/scripts/gen_training_orders.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import os
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | from pathlib import Path
 9 | 
10 | DATA_PATH = Path(os.path.join("data", "pickle", "backtest"))
11 | OUTPUT_PATH = Path(os.path.join("data", "orders"))
12 | 
13 | 
14 | def generate_order(stock: str, start_idx: int, end_idx: int) -> bool:
15 |     dataset = pd.read_pickle(DATA_PATH / f"{stock}.pkl")
16 |     df = dataset.handler.fetch(level=None).reset_index()
17 |     if len(df) == 0 or df.isnull().values.any() or min(df["$volume0"]) < 1e-5:
18 |         return False
19 | 
20 |     df["date"] = df["datetime"].dt.date.astype("datetime64")
21 |     df = df.set_index(["instrument", "datetime", "date"])
22 |     df = df.groupby("date", group_keys=False).take(range(start_idx, end_idx)).droplevel(level=0)
23 | 
24 |     order_all = pd.DataFrame(df.groupby(level=(2, 0), group_keys=False).mean().dropna())
25 |     order_all["amount"] = np.random.lognormal(-3.28, 1.14) * order_all["$volume0"]
26 |     order_all = order_all[order_all["amount"] > 0.0]
27 |     order_all["order_type"] = 0
28 |     order_all = order_all.drop(columns=["$volume0"])
29 | 
30 |     order_train = order_all[order_all.index.get_level_values(0) <= pd.Timestamp("2021-06-30")]
31 |     order_test = order_all[order_all.index.get_level_values(0) > pd.Timestamp("2021-06-30")]
32 |     order_valid = order_test[order_test.index.get_level_values(0) <= pd.Timestamp("2021-09-30")]
33 |     order_test = order_test[order_test.index.get_level_values(0) > pd.Timestamp("2021-09-30")]
34 | 
35 |     for order, tag in zip((order_train, order_valid, order_test, order_all), ("train", "valid", "test", "all")):
36 |         path = OUTPUT_PATH / tag
37 |         os.makedirs(path, exist_ok=True)
38 |         if len(order) > 0:
39 |             order.to_pickle(path / f"{stock}.pkl.target")
40 |     return True
41 | 
42 | 
43 | np.random.seed(1234)
44 | file_list = sorted(os.listdir(DATA_PATH))
45 | stocks = [f.replace(".pkl", "") for f in file_list]
46 | np.random.shuffle(stocks)
47 | 
48 | cnt = 0
49 | for stock in stocks:
50 |     if generate_order(stock, 0, 240 // 5 - 1):
51 |         cnt += 1
52 |         if cnt == 100:
53 |             break
54 | 


--------------------------------------------------------------------------------
/docs/component/online.rst:
--------------------------------------------------------------------------------
 1 | .. _online_serving:
 2 | 
 3 | ==============
 4 | Online Serving
 5 | ==============
 6 | .. currentmodule:: qlib
 7 | 
 8 | 
 9 | Introduction
10 | ============
11 | 
12 | .. image:: ../_static/img/online_serving.png
13 |     :align: center
14 | 
15 | 
16 | In addition to backtesting, one way to test a model is effective is to make predictions in real market conditions or even do real trading based on those predictions.
17 | ``Online Serving`` is a set of modules for online models using the latest data,
18 | which including `Online Manager <#Online Manager>`_, `Online Strategy <#Online Strategy>`_, `Online Tool <#Online Tool>`_, `Updater <#Updater>`_.
19 | 
20 | `Here <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are several examples for reference, which demonstrate different features of ``Online Serving``.
21 | If you have many models or `task` needs to be managed, please consider `Task Management <../advanced/task_management.html>`_.
22 | The `examples <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are based on some components in `Task Management <../advanced/task_management.html>`_ such as ``TrainerRM`` or ``Collector``.
23 | 
24 | **NOTE**: User should keep his data source updated to support online serving. For example, Qlib provides `a batch of scripts <https://github.com/microsoft/qlib/blob/main/scripts/data_collector/yahoo/README.md#automatic-update-of-daily-frequency-datafrom-yahoo-finance>`_ to help users update Yahoo daily data.
25 | 
26 | Known limitations currently
27 | - Currently, the daily updating prediction for the next trading day is supported. But generating orders for the next trading day is not supported due to the `limitations of public data <https://github.com/microsoft/qlib/issues/215#issuecomment-766293563>_`
28 | 
29 | 
30 | Online Manager
31 | ==============
32 | 
33 | .. automodule:: qlib.workflow.online.manager
34 |     :members:
35 |     :noindex:
36 | 
37 | Online Strategy
38 | ===============
39 | 
40 | .. automodule:: qlib.workflow.online.strategy
41 |     :members:
42 |     :noindex:
43 | 
44 | Online Tool
45 | ===========
46 | 
47 | .. automodule:: qlib.workflow.online.utils
48 |     :members:
49 |     :noindex:
50 | 
51 | Updater
52 | =======
53 | 
54 | .. automodule:: qlib.workflow.online.update
55 |     :members:
56 |     :noindex:
57 | 


--------------------------------------------------------------------------------
/contrib/data/data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # We remove arctic from core framework of Qlib to contrib due to
 5 | # - Arctic has very strict limitation on pandas and numpy version
 6 | #    - https://github.com/man-group/arctic/pull/908
 7 | # - pip fail to computing the right version number!!!!
 8 | #    - Maybe we can solve this problem by poetry
 9 | 
10 | # FIXME: So if you want to use arctic-based provider, please install arctic manually
11 | # `pip install arctic` may not be enough.
12 | from arctic import Arctic
13 | import pandas as pd
14 | import pymongo
15 | 
16 | from qlib.data.data import FeatureProvider
17 | 
18 | 
19 | class ArcticFeatureProvider(FeatureProvider):
20 |     def __init__(
21 |         self, uri="127.0.0.1", retry_time=0, market_transaction_time_list=[("09:15", "11:30"), ("13:00", "15:00")]
22 |     ):
23 |         super().__init__()
24 |         self.uri = uri
25 |         # TODO:
26 |         # retry connecting if error occurs
27 |         # does it real matters?
28 |         self.retry_time = retry_time
29 |         # NOTE: this is especially important for TResample operator
30 |         self.market_transaction_time_list = market_transaction_time_list
31 | 
32 |     def feature(self, instrument, field, start_index, end_index, freq):
33 |         field = str(field)[1:]
34 |         with pymongo.MongoClient(self.uri) as client:
35 |             # TODO: this will result in frequently connecting the server and performance issue
36 |             arctic = Arctic(client)
37 | 
38 |             if freq not in arctic.list_libraries():
39 |                 raise ValueError("lib {} not in arctic".format(freq))
40 | 
41 |             if instrument not in arctic[freq].list_symbols():
42 |                 # instruments does not exist
43 |                 return pd.Series()
44 |             else:
45 |                 df = arctic[freq].read(instrument, columns=[field], chunk_range=(start_index, end_index))
46 |                 s = df[field]
47 | 
48 |                 if not s.empty:
49 |                     s = pd.concat(
50 |                         [
51 |                             s.between_time(time_tuple[0], time_tuple[1])
52 |                             for time_tuple in self.market_transaction_time_list
53 |                         ]
54 |                     )
55 |                 return s
56 | 


--------------------------------------------------------------------------------
/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: XGBModel
34 |         module_path: qlib.contrib.model.xgboost
35 |         kwargs:
36 |             eval_metric: rmse
37 |             colsample_bytree: 0.8879
38 |             eta: 0.0421
39 |             max_depth: 8
40 |             n_estimators: 647
41 |             subsample: 0.8789
42 |             nthread: 20
43 |     dataset:
44 |         class: DatasetH
45 |         module_path: qlib.data.dataset
46 |         kwargs:
47 |             handler:
48 |                 class: Alpha158
49 |                 module_path: qlib.contrib.data.handler
50 |                 kwargs: *data_handler_config
51 |             segments:
52 |                 train: [2008-01-01, 2014-12-31]
53 |                 valid: [2015-01-01, 2016-12-31]
54 |                 test: [2017-01-01, 2020-08-01]
55 |     record: 
56 |         - class: SignalRecord
57 |           module_path: qlib.workflow.record_temp
58 |           kwargs: 
59 |             model: <MODEL>
60 |             dataset: <DATASET>
61 |         - class: SigAnaRecord
62 |           module_path: qlib.workflow.record_temp
63 |           kwargs: 
64 |             ana_long_short: False
65 |             ann_scaler: 252
66 |         - class: PortAnaRecord
67 |           module_path: qlib.workflow.record_temp
68 |           kwargs: 
69 |             config: *port_analysis_config
70 | 


--------------------------------------------------------------------------------
/docs/advanced/serial.rst:
--------------------------------------------------------------------------------
 1 | .. _serial:
 2 | 
 3 | =============
 4 | Serialization
 5 | =============
 6 | .. currentmodule:: qlib
 7 | 
 8 | Introduction
 9 | ============
10 | ``Qlib`` supports dumping the state of ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc. into a disk and reloading them.
11 | 
12 | Serializable Class
13 | ==================
14 | 
15 | ``Qlib`` provides a base class ``qlib.utils.serial.Serializable``, whose state can be dumped into or loaded from disk in `pickle` format.
16 | When users dump the state of a ``Serializable`` instance, the attributes of the instance whose name **does not** start with `_` will be saved on the disk.
17 | However, users can use ``config`` method or override ``default_dump_all`` attribute to prevent this feature.
18 | 
19 | Users can also override ``pickle_backend`` attribute to choose a pickle backend. The supported value is "pickle" (default and common) and "dill" (dump more things such as function, more information in `here <https://pypi.org/project/dill/>`_).
20 | 
21 | Example
22 | =======
23 | ``Qlib``'s serializable class includes  ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc., which are subclass of  ``qlib.utils.serial.Serializable``.
24 | Specifically, ``qlib.data.dataset.DatasetH`` is one of them. Users can serialize ``DatasetH`` as follows.
25 | 
26 | .. code-block:: Python
27 | 
28 |     ##=============dump dataset=============
29 |     dataset.to_pickle(path="dataset.pkl") # dataset is an instance of qlib.data.dataset.DatasetH
30 | 
31 |     ##=============reload dataset=============
32 |     with open("dataset.pkl", "rb") as file_dataset:
33 |         dataset = pickle.load(file_dataset)
34 | 
35 | .. note::
36 |     Only state of ``DatasetH`` should be saved on the disk, such as some `mean` and `variance` used for data normalization, etc.
37 | 
38 |     After reloading the ``DatasetH``, users need to reinitialize it. It means that users can reset some states of ``DatasetH`` or ``QlibDataHandler`` such as `instruments`, `start_time`, `end_time` and `segments`, etc.,  and generate new data according to the states (data is not state and should not be saved on the disk).
39 | 
40 | A more detailed example is in this `link <https://github.com/microsoft/qlib/tree/main/examples/highfreq>`_.
41 | 
42 | 
43 | API
44 | ===
45 | Please refer to `Serializable API <../reference/api.html#module-qlib.utils.serial.Serializable>`_.
46 | 


--------------------------------------------------------------------------------
/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data_1min"
 3 |     region: cn
 4 | market: &market 'csi300'
 5 | start_time: &start_time "2020-09-15 00:00:00"
 6 | end_time: &end_time "2021-01-18 16:00:00"
 7 | train_end_time: &train_end_time "2020-11-15 16:00:00"
 8 | valid_start_time: &valid_start_time "2020-11-16 00:00:00"
 9 | valid_end_time: &valid_end_time "2020-11-30 16:00:00"
10 | test_start_time: &test_start_time "2020-12-01 00:00:00"
11 | data_handler_config: &data_handler_config
12 |     start_time: *start_time
13 |     end_time: *end_time
14 |     fit_start_time: *start_time
15 |     fit_end_time: *train_end_time
16 |     instruments: *market
17 |     freq: '1min'
18 |     infer_processors:
19 |         - class: 'RobustZScoreNorm'
20 |           kwargs:
21 |               fields_group: 'feature'
22 |               clip_outlier: false
23 |         - class: "Fillna"
24 |           kwargs:
25 |               fields_group: 'feature'
26 |     learn_processors:
27 |         - class: 'DropnaLabel'
28 |         - class: 'CSRankNorm'
29 |           kwargs:
30 |               fields_group: 'label'
31 |     label: ["Ref($close, -2) / Ref($close, -1) - 1"]
32 |     
33 | task:
34 |     model:
35 |         class: "HFLGBModel"
36 |         module_path: "qlib.contrib.model.highfreq_gdbt_model"
37 |         kwargs:
38 |             objective: 'binary'
39 |             metric: ['binary_logloss','auc']
40 |             verbosity: -1
41 |             learning_rate: 0.01
42 |             max_depth: 8
43 |             num_leaves: 150
44 |             lambda_l1: 1.5
45 |             lambda_l2: 1
46 |             num_threads: 20
47 |     dataset:
48 |         class: "DatasetH"
49 |         module_path: "qlib.data.dataset"
50 |         kwargs:
51 |             handler:
52 |                 class: "Alpha158"
53 |                 module_path: "qlib.contrib.data.handler"
54 |                 kwargs: *data_handler_config
55 |             segments:
56 |                 train: [*start_time, *train_end_time]
57 |                 valid: [*train_end_time, *valid_end_time]
58 |                 test: [*test_start_time, *end_time]
59 |     record: 
60 |         - class: "SignalRecord"
61 |           module_path: "qlib.workflow.record_temp"
62 |           kwargs: {}
63 |         - class: "HFSignalRecord"
64 |           module_path: "qlib.workflow.record_temp"
65 |           kwargs: {}


--------------------------------------------------------------------------------
/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: CatBoostModel
34 |         module_path: qlib.contrib.model.catboost_model
35 |         kwargs:
36 |             loss: RMSE
37 |             learning_rate: 0.0421
38 |             subsample: 0.8789
39 |             max_depth: 6
40 |             num_leaves: 100
41 |             thread_count: 20
42 |             grow_policy: Lossguide
43 |             bootstrap_type: Poisson
44 |     dataset:
45 |         class: DatasetH
46 |         module_path: qlib.data.dataset
47 |         kwargs:
48 |             handler:
49 |                 class: Alpha158
50 |                 module_path: qlib.contrib.data.handler
51 |                 kwargs: *data_handler_config
52 |             segments:
53 |                 train: [2008-01-01, 2014-12-31]
54 |                 valid: [2015-01-01, 2016-12-31]
55 |                 test: [2017-01-01, 2020-08-01]
56 |     record: 
57 |         - class: SignalRecord
58 |           module_path: qlib.workflow.record_temp
59 |           kwargs: 
60 |             model: <MODEL>
61 |             dataset: <DATASET>
62 |         - class: SigAnaRecord
63 |           module_path: qlib.workflow.record_temp
64 |           kwargs: 
65 |             ana_long_short: False
66 |             ann_scaler: 252
67 |         - class: PortAnaRecord
68 |           module_path: qlib.workflow.record_temp
69 |           kwargs: 
70 |             config: *port_analysis_config
71 | 


--------------------------------------------------------------------------------
/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi500
 5 | benchmark: &benchmark SH000905
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: CatBoostModel
34 |         module_path: qlib.contrib.model.catboost_model
35 |         kwargs:
36 |             loss: RMSE
37 |             learning_rate: 0.0421
38 |             subsample: 0.8789
39 |             max_depth: 6
40 |             num_leaves: 100
41 |             thread_count: 20
42 |             grow_policy: Lossguide
43 |             bootstrap_type: Poisson
44 |     dataset:
45 |         class: DatasetH
46 |         module_path: qlib.data.dataset
47 |         kwargs:
48 |             handler:
49 |                 class: Alpha158
50 |                 module_path: qlib.contrib.data.handler
51 |                 kwargs: *data_handler_config
52 |             segments:
53 |                 train: [2008-01-01, 2014-12-31]
54 |                 valid: [2015-01-01, 2016-12-31]
55 |                 test: [2017-01-01, 2020-08-01]
56 |     record: 
57 |         - class: SignalRecord
58 |           module_path: qlib.workflow.record_temp
59 |           kwargs: 
60 |             model: <MODEL>
61 |             dataset: <DATASET>
62 |         - class: SigAnaRecord
63 |           module_path: qlib.workflow.record_temp
64 |           kwargs: 
65 |             ana_long_short: False
66 |             ann_scaler: 252
67 |         - class: PortAnaRecord
68 |           module_path: qlib.workflow.record_temp
69 |           kwargs: 
70 |             config: *port_analysis_config
71 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: LGBModel
34 |         module_path: qlib.contrib.model.gbdt
35 |         kwargs:
36 |             loss: mse
37 |             colsample_bytree: 0.8879
38 |             learning_rate: 0.2
39 |             subsample: 0.8789
40 |             lambda_l1: 205.6999
41 |             lambda_l2: 580.9768
42 |             max_depth: 8
43 |             num_leaves: 210
44 |             num_threads: 20
45 |     dataset:
46 |         class: DatasetH
47 |         module_path: qlib.data.dataset
48 |         kwargs:
49 |             handler:
50 |                 class: Alpha158
51 |                 module_path: qlib.contrib.data.handler
52 |                 kwargs: *data_handler_config
53 |             segments:
54 |                 train: [2008-01-01, 2014-12-31]
55 |                 valid: [2015-01-01, 2016-12-31]
56 |                 test: [2017-01-01, 2020-08-01]
57 |     record: 
58 |         - class: SignalRecord
59 |           module_path: qlib.workflow.record_temp
60 |           kwargs: 
61 |             model: <MODEL>
62 |             dataset: <DATASET>
63 |         - class: SigAnaRecord
64 |           module_path: qlib.workflow.record_temp
65 |           kwargs: 
66 |             ana_long_short: False
67 |             ann_scaler: 252
68 |         - class: PortAnaRecord
69 |           module_path: qlib.workflow.record_temp
70 |           kwargs: 
71 |             config: *port_analysis_config
72 | 


--------------------------------------------------------------------------------
/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi500
 5 | benchmark: &benchmark SH000905
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: LGBModel
34 |         module_path: qlib.contrib.model.gbdt
35 |         kwargs:
36 |             loss: mse
37 |             colsample_bytree: 0.9
38 |             learning_rate: 0.1
39 |             subsample: 0.9
40 |             lambda_l1: 205.6999
41 |             lambda_l2: 580.9768
42 |             max_depth: 8
43 |             num_leaves: 250
44 |             num_threads: 20
45 |     dataset:
46 |         class: DatasetH
47 |         module_path: qlib.data.dataset
48 |         kwargs:
49 |             handler:
50 |                 class: Alpha158
51 |                 module_path: qlib.contrib.data.handler
52 |                 kwargs: *data_handler_config
53 |             segments:
54 |                 train: [2008-01-01, 2014-12-31]
55 |                 valid: [2015-01-01, 2016-12-31]
56 |                 test: [2017-01-01, 2020-08-01]
57 |     record: 
58 |         - class: SignalRecord
59 |           module_path: qlib.workflow.record_temp
60 |           kwargs: 
61 |             model: <MODEL>
62 |             dataset: <DATASET>
63 |         - class: SigAnaRecord
64 |           module_path: qlib.workflow.record_temp
65 |           kwargs: 
66 |             ana_long_short: False
67 |             ann_scaler: 252
68 |         - class: PortAnaRecord
69 |           module_path: qlib.workflow.record_temp
70 |           kwargs: 
71 |             config: *port_analysis_config
72 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: TopkDropoutStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             signal: <PRED>
18 |             topk: 50
19 |             n_drop: 5
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: LGBModel
34 |         module_path: qlib.contrib.model.gbdt
35 |         kwargs:
36 |             loss: mse
37 |             colsample_bytree: 0.8879
38 |             learning_rate: 0.2
39 |             subsample: 0.8789
40 |             lambda_l1: 205.6999
41 |             lambda_l2: 580.9768
42 |             max_depth: 8
43 |             num_leaves: 210
44 |             num_threads: 20
45 |     dataset:
46 |         class: DatasetH
47 |         module_path: qlib.data.dataset
48 |         kwargs:
49 |             handler:
50 |                 class: Alpha158
51 |                 module_path: qlib.contrib.data.handler
52 |                 kwargs: *data_handler_config
53 |             segments:
54 |                 train: [2008-01-01, 2014-12-31]
55 |                 valid: [2015-01-01, 2016-12-31]
56 |                 test: [2017-01-01, 2020-08-01]
57 |     record: 
58 |         - class: SignalRecord
59 |           module_path: qlib.workflow.record_temp
60 |           kwargs: 
61 |             model: <MODEL>
62 |             dataset: <DATASET>
63 |         - class: SigAnaRecord
64 |           module_path: qlib.workflow.record_temp
65 |           kwargs: 
66 |             ana_long_short: False
67 |             ann_scaler: 252
68 |         - class: PortAnaRecord
69 |           module_path: qlib.workflow.record_temp
70 |           kwargs: 
71 |             config: *port_analysis_config
72 | 


--------------------------------------------------------------------------------
/examples/portfolio/config_enhanced_indexing.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 | port_analysis_config: &port_analysis_config
13 |     strategy:
14 |         class: EnhancedIndexingStrategy
15 |         module_path: qlib.contrib.strategy
16 |         kwargs:
17 |             model: <MODEL>
18 |             dataset: <DATASET>
19 |             riskmodel_root: ./riskdata
20 |     backtest:
21 |         start_time: 2017-01-01
22 |         end_time: 2020-08-01
23 |         account: 100000000
24 |         benchmark: *benchmark
25 |         exchange_kwargs:
26 |             limit_threshold: 0.095
27 |             deal_price: close
28 |             open_cost: 0.0005
29 |             close_cost: 0.0015
30 |             min_cost: 5
31 | task:
32 |     model:
33 |         class: LGBModel
34 |         module_path: qlib.contrib.model.gbdt
35 |         kwargs:
36 |             loss: mse
37 |             colsample_bytree: 0.8879
38 |             learning_rate: 0.2
39 |             subsample: 0.8789
40 |             lambda_l1: 205.6999
41 |             lambda_l2: 580.9768
42 |             max_depth: 8
43 |             num_leaves: 210
44 |             num_threads: 20
45 |     dataset:
46 |         class: DatasetH
47 |         module_path: qlib.data.dataset
48 |         kwargs:
49 |             handler:
50 |                 class: Alpha158
51 |                 module_path: qlib.contrib.data.handler
52 |                 kwargs: *data_handler_config
53 |             segments:
54 |                 train: [2008-01-01, 2014-12-31]
55 |                 valid: [2015-01-01, 2016-12-31]
56 |                 test: [2017-01-01, 2020-08-01]
57 |     record:
58 |         - class: SignalRecord
59 |           module_path: qlib.workflow.record_temp
60 |           kwargs:
61 |             model: <MODEL>
62 |             dataset: <DATASET>
63 |         - class: SigAnaRecord
64 |           module_path: qlib.workflow.record_temp
65 |           kwargs:
66 |             ana_long_short: False
67 |             ann_scaler: 252
68 |         - class: PortAnaRecord
69 |           module_path: qlib.workflow.record_temp
70 |           kwargs:
71 |             config: *port_analysis_config
72 | 


--------------------------------------------------------------------------------
/examples/benchmarks_dynamic/DDG-DA/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | This is the implementation of `DDG-DA` based on `Meta Controller` component provided by `Qlib`.
 3 | 
 4 | Please refer to the paper for more details: *DDG-DA: Data Distribution Generation for Predictable Concept Drift Adaptation* [[arXiv](https://arxiv.org/abs/2201.04038)]
 5 | 
 6 | 
 7 | # Background
 8 | In many real-world scenarios, we often deal with streaming data that is sequentially collected over time. Due to the non-stationary nature of the environment, the streaming data distribution may change in unpredictable ways, which is known as concept drift. To handle concept drift, previous methods first detect when/where the concept drift happens and then adapt models to fit the distribution of the latest data. However, there are still many cases that some underlying factors of environment evolution are predictable, making it possible to model the future concept drift trend of the streaming data, while such cases are not fully explored in previous work.
 9 | 
10 | Therefore, we propose a novel method `DDG-DA`, that can effectively forecast the evolution of data distribution and improve the performance of models. Specifically, we first train a predictor to estimate the future data distribution, then leverage it to generate training samples, and finally train models on the generated data.
11 | 
12 | # Dataset
13 | The data in the paper are private. So we conduct experiments on Qlib's public dataset.
14 | Though the dataset is different, the conclusion remains the same. By applying `DDG-DA`, users can see rising trends at the test phase both in the proxy models' ICs and the performances of the forecasting models.
15 | 
16 | # Run the Code
17 | Users can try `DDG-DA` by running the following command:
18 | ```bash
19 |     python workflow.py run
20 | ```
21 | 
22 | The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `forecast_model` parameter when `DDG-DA` initializes. For example, users can try `LightGBM` forecasting models by running the following command:
23 | ```bash
24 |     python workflow.py --conf_path=../workflow_config_lightgbm_Alpha158.yaml run
25 | ```
26 | 
27 | # Results
28 | The results of related methods in Qlib's public dataset can be found [here](../)
29 | 
30 | # Requirements
31 | Here are the minimal hardware requirements to run the ``workflow.py`` of DDG-DA.
32 | * Memory: 45G
33 | * Disk: 4G
34 | 
35 | Pytorch with CPU & RAM will be enough for this example.
36 | 


--------------------------------------------------------------------------------
/examples/data_demo/data_mem_resuse_demo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | """
 4 | The motivation of this demo
 5 | - To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing
 6 | """
 7 | 
 8 | from copy import deepcopy
 9 | from pathlib import Path
10 | import pickle
11 | from pprint import pprint
12 | from ruamel.yaml import YAML
13 | import subprocess
14 | 
15 | from qlib import init
16 | from qlib.data.dataset.handler import DataHandlerLP
17 | from qlib.log import TimeInspector
18 | from qlib.model.trainer import task_train
19 | from qlib.utils import init_instance_by_config
20 | 
21 | # For general purpose, we use relative path
22 | DIRNAME = Path(__file__).absolute().resolve().parent
23 | 
24 | if __name__ == "__main__":
25 |     init()
26 | 
27 |     repeat = 2
28 |     exp_name = "data_mem_reuse_demo"
29 | 
30 |     config_path = DIRNAME.parent / "benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml"
31 |     yaml = YAML(typ="safe", pure=True)
32 |     task_config = yaml.load(config_path.open())
33 | 
34 |     # 1) without using processed data in memory
35 |     with TimeInspector.logt("The original time without reusing processed data in memory:"):
36 |         for i in range(repeat):
37 |             task_train(task_config["task"], experiment_name=exp_name)
38 | 
39 |     # 2) prepare processed data in memory.
40 |     hd_conf = task_config["task"]["dataset"]["kwargs"]["handler"]
41 |     pprint(hd_conf)
42 |     hd: DataHandlerLP = init_instance_by_config(hd_conf)
43 | 
44 |     # 3) with reusing processed data in memory
45 |     new_task = deepcopy(task_config["task"])
46 |     new_task["dataset"]["kwargs"]["handler"] = hd
47 |     print(new_task)
48 | 
49 |     with TimeInspector.logt("The time with reusing processed data in memory:"):
50 |         # this will save the time to reload and process data from disk(in `DataHandlerLP`)
51 |         # It still takes a lot of time in the backtest phase
52 |         for i in range(repeat):
53 |             task_train(new_task, experiment_name=exp_name)
54 | 
55 |     # 4) User can change other parts exclude processed data in memory(handler)
56 |     new_task = deepcopy(task_config["task"])
57 |     new_task["dataset"]["kwargs"]["segments"]["train"] = ("20100101", "20131231")
58 |     with TimeInspector.logt("The time with reusing processed data in memory:"):
59 |         task_train(new_task, experiment_name=exp_name)
60 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | ======================
 2 | ``Qlib`` Documentation
 3 | ======================
 4 | 
 5 | ``Qlib`` is an AI-oriented quantitative investment platform, which aims to realize the potential, empower the research, and create the value of AI technologies in quantitative investment.
 6 | 
 7 | .. _user_guide:
 8 | 
 9 | Document Structure
10 | ====================
11 | 
12 | .. toctree::
13 |    :hidden:
14 | 
15 |    Home <self>
16 | 
17 | .. toctree::
18 |    :maxdepth: 3
19 |    :caption: GETTING STARTED:
20 | 
21 |    Introduction <introduction/introduction.rst>
22 |    Quick Start <introduction/quick.rst>
23 | 
24 | .. toctree::
25 |    :maxdepth: 3
26 |    :caption: FIRST STEPS:
27 | 
28 |    Installation <start/installation.rst>
29 |    Initialization <start/initialization.rst>
30 |    Data Retrieval <start/getdata.rst>
31 |    Custom Model Integration <start/integration.rst>
32 | 
33 | 
34 | .. toctree::
35 |    :maxdepth: 3
36 |    :caption: MAIN COMPONENTS:
37 | 
38 |    Workflow: Workflow Management <component/workflow.rst>
39 |    Data Layer: Data Framework & Usage <component/data.rst>
40 |    Forecast Model: Model Training & Prediction <component/model.rst>
41 |    Portfolio Management and Backtest <component/strategy.rst>
42 |    Nested Decision Execution: High-Frequency Trading <component/highfreq.rst>
43 |    Meta Controller: Meta-Task & Meta-Dataset & Meta-Model <component/meta.rst>
44 |    Qlib Recorder: Experiment Management <component/recorder.rst>
45 |    Analysis: Evaluation & Results Analysis <component/report.rst>
46 |    Online Serving: Online Management & Strategy & Tool <component/online.rst>
47 |    Reinforcement Learning <component/rl/toctree>
48 | 
49 | .. toctree::
50 |    :maxdepth: 3
51 |    :caption: OTHER COMPONENTS/FEATURES/TOPICS:
52 | 
53 |    Building Formulaic Alphas <advanced/alpha.rst>
54 |    Online & Offline mode <advanced/server.rst>
55 |    Serialization <advanced/serial.rst>
56 |    Task Management <advanced/task_management.rst>
57 |    Point-In-Time database <advanced/PIT.rst>
58 | 
59 | .. toctree::
60 |    :maxdepth: 3
61 |    :caption: FOR DEVELOPERS:
62 | 
63 |    Code Standard & Development Guidance <developer/code_standard_and_dev_guide.rst>
64 |    How to build image <developer/how_to_build_image.rst>
65 | 
66 | .. toctree::
67 |    :maxdepth: 3
68 |    :caption: REFERENCE:
69 | 
70 |    API <reference/api.rst>
71 | 
72 | .. toctree::
73 |    :maxdepth: 3
74 | 
75 |    FAQ <FAQ/FAQ.rst>
76 | 
77 | .. toctree::
78 |    :maxdepth: 3
79 |    :caption: Change Log:
80 | 
81 |    Change Log <changelog/changelog.rst>
82 | 


--------------------------------------------------------------------------------
/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml:
--------------------------------------------------------------------------------
 1 | qlib_init:
 2 |     provider_uri: "~/.qlib/qlib_data/cn_data"
 3 |     region: cn
 4 | market: &market csi300
 5 | benchmark: &benchmark SH000300
 6 | data_handler_config: &data_handler_config
 7 |     start_time: 2008-01-01
 8 |     end_time: 2020-08-01
 9 |     fit_start_time: 2008-01-01
10 |     fit_end_time: 2014-12-31
11 |     instruments: *market
12 |     infer_processors:
13 |         - class: RobustZScoreNorm
14 |           kwargs:
15 |               fields_group: feature
16 |               clip_outlier: true
17 |         - class: Fillna
18 |           kwargs:
19 |               fields_group: feature
20 |     learn_processors:
21 |         - class: DropnaLabel
22 |         - class: CSRankNorm
23 |           kwargs:
24 |               fields_group: label
25 | port_analysis_config: &port_analysis_config
26 |     strategy:
27 |         class: TopkDropoutStrategy
28 |         module_path: qlib.contrib.strategy
29 |         kwargs:
30 |             signal: <PRED>
31 |             topk: 50
32 |             n_drop: 5
33 |     backtest:
34 |         start_time: 2017-01-01
35 |         end_time: 2020-08-01
36 |         account: 100000000
37 |         benchmark: *benchmark
38 |         exchange_kwargs:
39 |             limit_threshold: 0.095
40 |             deal_price: close
41 |             open_cost: 0.0005
42 |             close_cost: 0.0015
43 |             min_cost: 5
44 | task:
45 |     model:
46 |         class: LinearModel
47 |         module_path: qlib.contrib.model.linear
48 |         kwargs:
49 |             estimator: ols
50 |     dataset:
51 |         class: DatasetH
52 |         module_path: qlib.data.dataset
53 |         kwargs:
54 |             handler:
55 |                 class: Alpha158
56 |                 module_path: qlib.contrib.data.handler
57 |                 kwargs: *data_handler_config
58 |             segments:
59 |                 train: [2008-01-01, 2014-12-31]
60 |                 valid: [2015-01-01, 2016-12-31]
61 |                 test: [2017-01-01, 2020-08-01]
62 |     record: 
63 |         - class: SignalRecord
64 |           module_path: qlib.workflow.record_temp
65 |           kwargs: 
66 |             model: <MODEL>
67 |             dataset: <DATASET>
68 |         - class: SigAnaRecord
69 |           module_path: qlib.workflow.record_temp
70 |           kwargs: 
71 |             ana_long_short: True
72 |             ann_scaler: 252
73 |         - class: PortAnaRecord
74 |           module_path: qlib.workflow.record_temp
75 |           kwargs: 
76 |             config: *port_analysis_config
77 | 


--------------------------------------------------------------------------------
/tests/ops/test_elem_operator.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | from qlib.data import DatasetProvider
 6 | from qlib.data.data import ExpressionD
 7 | from qlib.tests import TestOperatorData, TestMockData, MOCK_DF
 8 | from qlib.config import C
 9 | 
10 | 
11 | class TestElementOperator(TestMockData):
12 |     def setUp(self) -> None:
13 |         self.instrument = "0050"
14 |         self.start_time = "2022-01-01"
15 |         self.end_time = "2022-02-01"
16 |         self.freq = "day"
17 |         self.mock_df = MOCK_DF[MOCK_DF["symbol"] == self.instrument]
18 | 
19 |     def test_Abs(self):
20 |         field = "Abs($close-Ref($close, 1))"
21 |         result = ExpressionD.expression(self.instrument, field, self.start_time, self.end_time, self.freq)
22 |         self.assertGreaterEqual(result.min(), 0)
23 |         result = result.to_numpy()
24 |         prev_close = self.mock_df["close"].shift(1)
25 |         close = self.mock_df["close"]
26 |         change = prev_close - close
27 |         golden = change.abs().to_numpy()
28 |         self.assertIsNone(np.testing.assert_allclose(result, golden))
29 | 
30 |     def test_Sign(self):
31 |         field = "Sign($close-Ref($close, 1))"
32 |         result = ExpressionD.expression(self.instrument, field, self.start_time, self.end_time, self.freq)
33 |         result = result.to_numpy()
34 |         prev_close = self.mock_df["close"].shift(1)
35 |         close = self.mock_df["close"]
36 |         change = close - prev_close
37 |         change[change > 0] = 1.0
38 |         change[change < 0] = -1.0
39 |         golden = change.to_numpy()
40 |         self.assertIsNone(np.testing.assert_allclose(result, golden))
41 | 
42 | 
43 | class TestOperatorDataSetting(TestOperatorData):
44 |     def test_setting(self):
45 |         self.assertEqual(len(self.instruments_d), 1)
46 |         self.assertGreater(len(self.cal), 0)
47 | 
48 | 
49 | class TestInstElementOperator(TestOperatorData):
50 |     def setUp(self) -> None:
51 |         freq = "day"
52 |         expressions = [
53 |             "$change",
54 |             "Abs($change)",
55 |         ]
56 |         columns = ["change", "abs"]
57 |         self.data = DatasetProvider.inst_calculator(
58 |             self.inst, self.start_time, self.end_time, freq, expressions, self.spans, C, []
59 |         )
60 |         self.data.columns = columns
61 | 
62 |     @pytest.mark.slow
63 |     def test_abs(self):
64 |         abs_values = self.data["abs"]
65 |         self.assertGreater(abs_values[2], 0)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     unittest.main()
70 | 


--------------------------------------------------------------------------------