├── .coveragerc ├── .dockerignore ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── alphamind ├── __init__.py ├── analysis │ ├── __init__.py │ ├── calculators.py │ ├── crosssetctions.py │ ├── factoranalysis.py │ ├── perfanalysis.py │ ├── quantileanalysis.py │ └── riskanalysis.py ├── api.py ├── benchmarks │ ├── __init__.py │ ├── benchmarks.py │ ├── data │ │ ├── __init__.py │ │ ├── neutralize.py │ │ ├── standardize.py │ │ └── winsorize.py │ ├── portfolio │ │ ├── __init__.py │ │ ├── linearbuild.py │ │ ├── longshortbuild.py │ │ ├── percentbuild.py │ │ └── rankbuild.py │ └── settlement │ │ ├── __init__.py │ │ └── simplesettle.py ├── bin │ ├── __init__.py │ ├── alphamind │ └── cli.py ├── data │ ├── __init__.py │ ├── dbmodel │ │ ├── __init__.py │ │ └── models │ │ │ ├── __init__.py │ │ │ ├── mysql.py │ │ │ └── postgres.py │ ├── engines │ │ ├── __init__.py │ │ ├── industries.py │ │ ├── sqlengine │ │ │ ├── __init__.py │ │ │ ├── mysql.py │ │ │ └── postgres.py │ │ ├── universe.py │ │ └── utilities.py │ ├── neutralize.py │ ├── processing.py │ ├── quantile.py │ ├── rank.py │ ├── standardize.py │ ├── transformer.py │ └── winsorize.py ├── exceptions │ ├── __init__.py │ └── exceptions.py ├── execution │ ├── __init__.py │ ├── baseexecutor.py │ ├── naiveexecutor.py │ ├── pipeline.py │ ├── targetvolexecutor.py │ └── thresholdexecutor.py ├── formula │ ├── __init__.py │ └── utilities.py ├── model │ ├── __init__.py │ ├── composer.py │ ├── data_preparing.py │ ├── linearmodel.py │ ├── loader.py │ ├── modelbase.py │ ├── svm.py │ └── treemodel.py ├── portfolio │ ├── __init__.py │ ├── constraints.py │ ├── evolver.py │ ├── linearbuilder.py │ ├── longshortbulder.py │ ├── meanvariancebuilder.py │ ├── optimizers.py │ ├── percentbuilder.py │ ├── rankbuilder.py │ └── riskmodel.py ├── settlement │ ├── __init__.py │ └── simplesettle.py ├── strategy │ ├── __init__.py │ ├── sample_strategy.json │ └── strategy.py ├── tests │ ├── __init__.py │ ├── analysis │ │ ├── __init__.py │ │ ├── test_factoranalysis.py │ │ ├── test_perfanalysis.py │ │ ├── test_quantilieanalysis.py │ │ └── test_riskanalysis.py │ ├── cython │ │ └── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── engines │ │ │ ├── __init__.py │ │ │ ├── test_sql_engine.py │ │ │ └── test_universe.py │ │ ├── test_neutralize.py │ │ ├── test_quantile.py │ │ ├── test_rank.py │ │ ├── test_standardize.py │ │ └── test_winsorize.py │ ├── execution │ │ ├── __init__.py │ │ ├── test_naiveexecutor.py │ │ ├── test_pipeline.py │ │ ├── test_targetvolexecutor.py │ │ └── test_thresholdexecutor.py │ ├── model │ │ ├── __init__.py │ │ ├── test_composer.py │ │ ├── test_linearmodel.py │ │ ├── test_loader.py │ │ ├── test_modelbase.py │ │ └── test_treemodel.py │ ├── portfolio │ │ ├── __init__.py │ │ ├── test_constraints.py │ │ ├── test_evolver.py │ │ ├── test_linearbuild.py │ │ ├── test_longshortbuild.py │ │ ├── test_meanvariancebuild.py │ │ ├── test_optimizers.py │ │ ├── test_percentbuild.py │ │ ├── test_rankbuild.py │ │ └── test_riskmodel.py │ ├── settlement │ │ ├── __init__.py │ │ └── test_simplesettle.py │ └── test_suite.py └── utilities.py ├── doc ├── Makefile ├── conf.py ├── docx │ └── style.docx ├── index.rst ├── make.bat └── src │ ├── changelog.rst │ ├── img │ └── alpha-mind.png │ ├── introduction.rst │ └── whatisit.rst ├── entrypoint.sh ├── install ├── 001.png ├── 002.png └── install.md ├── notebooks ├── Example 1 - Factor IC analysis.ipynb ├── Example 10 - Quadratic Optimizer Comparison with CVXOPT.ipynb ├── Example 11 - Long Short Strategy Model.ipynb ├── Example 12 - Machine Learning Model Prediction.ipynb ├── Example 13 - Evaluation within Industry Groups.ipynb ├── Example 2 - Strategy Analysis.ipynb ├── Example 3 - Multi Weight Gap Comparison.ipynb ├── Example 4 - Single Factor Analysis.ipynb ├── Example 5 - Style Factor Analysis.ipynb ├── Example 6 - Target Volatility Builder.ipynb ├── Example 7 - Portfolio Optimizer Performance.ipynb ├── Example 9 - Linear Optimizer Comparison with CVXOPT.ipynb ├── Quick Start 1 - Factor Preprocess.ipynb ├── Quick Start 2 - Factor Rank and Quantile.ipynb ├── Quick Start 3 - Portfolio Builder.ipynb ├── Quick Start 5 - Alpha Factor Quantile Analysis.ipynb ├── Quick Start 6 - Formula Based Stocks Screening.ipynb ├── Quick Start 7 - Single Factor IC Analysis.ipynb ├── Quick Start 8 - IC Decay Calculation.ipynb ├── Quick Start 9 - Back Testing Stock Screening.ipynb ├── Step By Step 01 - 入门.ipynb ├── Step By Step 02 - 数据获取.ipynb ├── Step By Step 03 - 因子设计.ipynb ├── Step By Step 04 - 因子预处理.ipynb ├── Step By Step 05 - 组合优化.ipynb ├── Step By Step 06 - 完整的回测.ipynb └── Step By Step 07 - 风险因子模型.ipynb ├── requirements.txt ├── requirements_docker.txt ├── scripts └── update_uqer_data_postgres.py ├── setup.cfg └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=alphamind 3 | omit=alphamind/tests/* -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | ./build 2 | ./dist 3 | ./Alpha_Mind.egg-info 4 | ./report 5 | ./script 6 | .git 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .idea/* 3 | build/* 4 | dist/* 5 | Alpha_Mind.egg-info/* 6 | *.pyd 7 | *.c 8 | *.cpp 9 | *.html 10 | *.nbc 11 | *.nbi 12 | /notebooks/.ipynb_checkpoints/* 13 | /notebooks/machine learning/.ipynb_checkpoints/* 14 | alphamind/cython/*.so 15 | alphamind/examples/*.xlsx 16 | alphamind/examples/*.csv 17 | doc/_build 18 | doc/build 19 | settings.json 20 | doc/source/_build -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | - "3.7" 5 | - "3.8" 6 | # command to install dependencies 7 | sudo: enabled 8 | dist: bionic 9 | addons: 10 | apt: 11 | packages: 12 | - g++ 13 | - coinor-cbc 14 | - coinor-libcbc-dev 15 | install: 16 | - pip install cython numpy 17 | - pip install -r requirements.txt 18 | - pip install coverage 19 | - pip install coveralls --ignore-installed 20 | script: 21 | - export NUMBA_DISABLE_JIT=1 22 | - coverage run --rcfile=./.coveragerc alphamind/tests/test_suite.py 23 | - coverage report --rcfile=./.coveragerc -i 24 | - coverage html --rcfile=./.coveragerc -i 25 | after_success: 26 | - coveralls 27 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/anaconda3:2021.05 2 | 3 | LABEL maintainer = "scrappedprince.li@gmail.com" 4 | RUN apt-get update && apt-get install build-essential default-libmysqlclient-dev coinor-cbc coinor-libcbc-dev -y 5 | ENV COIN_INSTALL_DIR /usr 6 | 7 | WORKDIR / 8 | COPY ./requirements_docker.txt /requirements.txt 9 | RUN pip install -r /requirements.txt -i https://pypi.douban.com/simple 10 | RUN pip install finance-python>=0.8.1 -i https://pypi.douban.com/simple 11 | 12 | WORKDIR / 13 | COPY ./alphamind /alphamind 14 | COPY ./notebooks /notebooks 15 | 16 | COPY ./setup.py /setup.py 17 | COPY ./setup.cfg /setup.cfg 18 | 19 | EXPOSE 8080 20 | COPY ./entrypoint.sh /entrypoint.sh 21 | RUN chmod +x /entrypoint.sh 22 | 23 | WORKDIR /notebooks 24 | ENTRYPOINT ["/entrypoint.sh"] 25 | CMD [] -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Cheng Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include .coveragerc 3 | include requirements.txt 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Alpha - Mind 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 19 |
Python version
Build Status 12 | travis build status 13 |
Coveragecoverage
20 | 21 | **Alpha - Mind** 是基于 **Python** 开发的股票多因子研究框架。 22 | 23 | ## TODO list 24 | 25 | **alpha-mind**的开发经过长期的暂停之后,将重启。下面的列表会给出一组现在规划中的功能或者改进: 26 | 27 | - [x] 增加对于数据后端MySQL的支持; 28 | - [ ] 增加对于数据后端CSV文件的支持,并且提供一份样例文件供用户测试使用; 29 | - [x] 删除所有的c++相关代码,方便alpha-mind的安装; 30 | - [x] 在windows以及linux平台提供可以直接pip安装的安装包; 31 | - [ ] 完整的文档; 32 | - [ ] alpha模型增加超参数调优的功能; 33 | - [ ] alpha模型增加多期预测能力; 34 | - [ ] 优化器增加多期优化的能力。 35 | 36 | ## 依赖 37 | 38 | 该项目主要有两个主要的github外部依赖: 39 | 40 | * [Finance-Python](https://github.com/alpha-miner/finance-python) 41 | 42 | * [portfolio - optimizer](https://github.com/alpha-miner/portfolio-optimizer):该项目是相同作者编写的用于资产组合配置的优化器工具包; 43 | 44 | 这两个库都可以直接使用pip进行安装。 45 | 46 | ## 功能 47 | 48 | alpha - mind 提供了多因子研究中常用的工具链,包括: 49 | 50 | * 数据清洗 51 | * alpha 模型 52 | * 风险模型 53 | * 组合优化 54 | * 执行器 55 | 56 | 所有的模块都设计了完整的测试用例以尽可能保证正确性。同时,所有的数值模型开发中都对性能给予了足够高的关注,参考了优秀的第三方工具以保证性能: 57 | 58 | * numpy 59 | * numba 60 | * cvxopt 61 | * cvxpy 62 | * pandas 63 | * scipy 64 | 65 | ## 安装 66 | 67 | 有个详细的安装说明,在install目录中,有比较详细的 68 | 69 | 安装需要直接clone或者下载源代码安装,具体流程为: 70 | 71 | 克隆项目到本地 72 | 73 | ```shell 74 | $ git clone https://github.com/rongliang-tech/alpha-mind.git 75 | ``` 76 | 77 | 然后直接使用一下命令安装 78 | 79 | ```shell 80 | $ python setup.py install 81 | ``` 82 | 83 | ### 使用Docker运行 84 | 85 | 1. `docker build -t alpha-mind:latest -f Dockerfile .` 86 | 87 | 2. `docker run -it -p 8080:8080 --name alpha-mind alpha-mind` 88 | 89 | 默认打开浏览器之后,进入: 127.0.0.1/lab 输入登录密码: `rongliang2021` 90 | 91 | 可以自定义初始密码, 参考:[jupyter生成密码](https://jupyter-notebook.readthedocs.io/en/stable/public_server.html#preparing-a-hashed-password) 92 | 93 | #### 提示 94 | 95 | 环境变量的配置在`./entrypoint.sh`中,包括: 96 | 97 | * `DB_VENDOR`: 如果使用mysql,请设置为`rl`; 98 | * `DB_URI`: 数据库的连接串。 99 | * `FACTOR_TABLES`: 使用的因子表 100 | -------------------------------------------------------------------------------- /alphamind/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | __version__ = "0.3.1" 9 | -------------------------------------------------------------------------------- /alphamind/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-6 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/analysis/calculators.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-18 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import pandas as pd 9 | 10 | 11 | def calculate_turn_over(pos_table: pd.DataFrame) -> pd.DataFrame: 12 | turn_over_table = {} 13 | total_factors = pos_table.columns.difference(['code']) 14 | pos_table.reset_index() 15 | 16 | for name in total_factors: 17 | pivot_position = pos_table.pivot(values=name, columns='code').fillna(0.) 18 | turn_over_series = pivot_position.diff().abs().sum(axis=1) 19 | turn_over_table[name] = turn_over_series.values 20 | 21 | turn_over_table = pd.DataFrame(turn_over_table, index=pos_table.trade_date.unique()) 22 | return turn_over_table[total_factors] 23 | -------------------------------------------------------------------------------- /alphamind/analysis/crosssetctions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-3-5 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import statsmodels.api as sm 11 | 12 | from alphamind.data.processing import factor_processing 13 | from alphamind.data.standardize import standardize 14 | from alphamind.data.winsorize import winsorize_normal 15 | from alphamind.utilities import alpha_logger 16 | 17 | 18 | def cs_impl(ref_date, 19 | factor_data, 20 | factor_name, 21 | risk_exposure, 22 | constraint_risk, 23 | industry_matrix, 24 | dx_returns): 25 | total_data = pd.merge(factor_data, risk_exposure, on='code') 26 | total_data = pd.merge(total_data, industry_matrix, on='code') 27 | total_data = total_data.replace([np.inf, -np.inf], np.nan).dropna() 28 | 29 | if len(total_data) < 0.33 * len(factor_data): 30 | alpha_logger.warning(f"valid data point({len(total_data)}) " 31 | f"is less than 33% of the total sample ({len(factor_data)}). Omit this run") 32 | return np.nan, np.nan, np.nan 33 | 34 | total_risk_exp = total_data[constraint_risk] 35 | 36 | er = total_data[[factor_name]].values.astype(float) 37 | er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values, 38 | [standardize]).flatten() 39 | industry = total_data.industry_name.values 40 | 41 | codes = total_data.code.tolist() 42 | target_pos = pd.DataFrame({'code': codes, 43 | 'weight': er, 44 | 'industry': industry}) 45 | target_pos['weight'] = target_pos['weight'] / target_pos['weight'].abs().sum() 46 | target_pos = pd.merge(target_pos, dx_returns, on=['code']) 47 | target_pos = pd.merge(target_pos, total_data[['code'] + constraint_risk], on=['code']) 48 | total_risk_exp = target_pos[constraint_risk] 49 | activate_weight = target_pos['weight'].values 50 | excess_return = np.exp(target_pos[['dx']].values) - 1. 51 | excess_return = factor_processing(excess_return, 52 | [winsorize_normal, standardize], 53 | total_risk_exp.values, 54 | [winsorize_normal, standardize]).flatten() 55 | port_ret = np.log(activate_weight @ excess_return + 1.) 56 | ic = np.corrcoef(excess_return, activate_weight)[0, 1] 57 | x = sm.add_constant(activate_weight) 58 | results = sm.OLS(excess_return, x).fit() 59 | t_stats = results.tvalues[1] 60 | 61 | alpha_logger.info(f"{ref_date} is finished with {len(target_pos)} stocks for {factor_name}") 62 | alpha_logger.info(f"{ref_date} risk_exposure: " 63 | f"{np.sum(np.square(target_pos.weight.values @ target_pos[constraint_risk].values))}") 64 | return port_ret, ic, t_stats 65 | 66 | 67 | def cross_section_analysis(ref_date, 68 | factor_name, 69 | universe, 70 | horizon, 71 | constraint_risk, 72 | engine): 73 | codes = engine.fetch_codes(ref_date, universe) 74 | 75 | risk_exposure = engine.fetch_risk_model(ref_date, codes)[1][['code'] + constraint_risk] 76 | factor_data = engine.fetch_factor(ref_date, factor_name, codes) 77 | industry_matrix = engine.fetch_industry_matrix(ref_date, codes, 'sw_adj', 1) 78 | dx_returns = engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=1) 79 | 80 | return cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk, 81 | industry_matrix, dx_returns) 82 | 83 | 84 | if __name__ == '__main__': 85 | from alphamind.api import SqlEngine, Universe, risk_styles, industry_styles 86 | 87 | factor_name = 'SIZE' 88 | data_source = 'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha' 89 | engine = SqlEngine(data_source) 90 | risk_names = list(set(risk_styles).difference({factor_name})) 91 | industry_names = list(set(industry_styles).difference({factor_name})) 92 | constraint_risk = risk_names + industry_names 93 | universe = Universe('custom', ['ashare_ex']) 94 | horizon = 9 95 | 96 | x = cross_section_analysis('2018-02-08', 97 | factor_name, 98 | universe, 99 | horizon, 100 | constraint_risk, 101 | engine=engine) 102 | print(x) 103 | -------------------------------------------------------------------------------- /alphamind/analysis/perfanalysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-12 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import pandas as pd 9 | 10 | from alphamind.analysis.riskanalysis import risk_analysis 11 | 12 | 13 | def perf_attribution_by_pos(net_weight_series: pd.Series, 14 | next_bar_return_series: pd.Series, 15 | benchmark_table: pd.DataFrame) -> pd.DataFrame: 16 | explained_table, _ = risk_analysis(net_weight_series, 17 | next_bar_return_series, 18 | benchmark_table) 19 | return explained_table.groupby(level=0).sum() 20 | -------------------------------------------------------------------------------- /alphamind/analysis/quantileanalysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-16 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Optional 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.data.processing import factor_processing 14 | from alphamind.data.quantile import quantile 15 | from alphamind.data.standardize import standardize 16 | from alphamind.data.winsorize import winsorize_normal 17 | from alphamind.utilities import agg_mean 18 | 19 | 20 | def quantile_analysis(factors: pd.DataFrame, 21 | factor_weights: np.ndarray, 22 | dx_return: np.ndarray, 23 | n_bins: int = 5, 24 | risk_exp: Optional[np.ndarray] = None, 25 | **kwargs): 26 | if 'pre_process' in kwargs: 27 | pre_process = kwargs['pre_process'] 28 | del kwargs['pre_process'] 29 | else: 30 | pre_process = [winsorize_normal, standardize] 31 | 32 | if 'post_process' in kwargs: 33 | post_process = kwargs['post_process'] 34 | del kwargs['post_process'] 35 | else: 36 | post_process = [standardize] 37 | 38 | er = factor_processing(factors.values, pre_process, risk_exp, post_process) @ factor_weights 39 | return er_quantile_analysis(er, n_bins, dx_return, **kwargs) 40 | 41 | 42 | def er_quantile_analysis(er: np.ndarray, 43 | n_bins: int, 44 | dx_return: np.ndarray, 45 | de_trend=False) -> np.ndarray: 46 | er = er.flatten() 47 | q_groups = quantile(er, n_bins) 48 | 49 | if dx_return.ndim < 2: 50 | dx_return.shape = -1, 1 51 | 52 | group_return = agg_mean(q_groups, dx_return).flatten() 53 | total_return = group_return.sum() 54 | ret = group_return.copy() 55 | 56 | if de_trend: 57 | resid = n_bins - 1 58 | res_weight = 1. / resid 59 | for i, value in enumerate(ret): 60 | ret[i] = (1. + res_weight) * value - res_weight * total_return 61 | 62 | return ret 63 | 64 | 65 | if __name__ == '__main__': 66 | n = 5000 67 | n_f = 5 68 | n_bins = 5 69 | 70 | x = np.random.randn(n, 5) 71 | risk_exp = np.random.randn(n, 3) 72 | x_w = np.random.randn(n_f) 73 | r = np.random.randn(n) 74 | 75 | f_df = pd.DataFrame(x) 76 | calculated = quantile_analysis(f_df, 77 | x_w, 78 | r, 79 | risk_exp=None, 80 | n_bins=n_bins, 81 | pre_process=[], # [winsorize_normal, standardize], 82 | post_process=[]) # [standardize]) 83 | 84 | er = x_w @ f_df.values.T 85 | expected = er_quantile_analysis(er, n_bins, r) 86 | 87 | print(calculated) 88 | print(expected) 89 | -------------------------------------------------------------------------------- /alphamind/analysis/riskanalysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-6 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Tuple 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.data.neutralize import neutralize 14 | 15 | 16 | def risk_analysis(net_weight_series: pd.Series, 17 | next_bar_return_series: pd.Series, 18 | risk_table: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: 19 | group_idx = net_weight_series.index.values.astype(int) 20 | net_pos = net_weight_series.values.reshape((-1, 1)) 21 | risk_factor_cols = risk_table.columns 22 | 23 | idiosyncratic, other_stats = neutralize(risk_table.values, 24 | next_bar_return_series.values, 25 | group_idx, 26 | detail=True) 27 | 28 | systematic = other_stats['explained'] 29 | exposure = other_stats['exposure'] 30 | 31 | explained_table = np.hstack((idiosyncratic, systematic[:, :, 0])) 32 | cols = ['idiosyncratic'] 33 | cols.extend(risk_factor_cols) 34 | 35 | explained_table = pd.DataFrame(explained_table * net_pos, columns=cols, 36 | index=net_weight_series.index) 37 | exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols, 38 | index=net_weight_series.index) 39 | return explained_table, exposure_table.groupby(level=0).first() 40 | -------------------------------------------------------------------------------- /alphamind/api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-16 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from alphamind.data.engines.sqlengine import SqlEngine 9 | from alphamind.data.engines.sqlengine import risk_styles 10 | from alphamind.data.engines.sqlengine import industry_styles 11 | from alphamind.data.engines.sqlengine import macro_styles 12 | from alphamind.analysis.factoranalysis import er_portfolio_analysis 13 | from alphamind.analysis.factoranalysis import factor_analysis 14 | from alphamind.analysis.quantileanalysis import er_quantile_analysis 15 | from alphamind.analysis.quantileanalysis import quantile_analysis 16 | from alphamind.data.engines.universe import Universe 17 | from alphamind.data.engines.utilities import industry_list 18 | from alphamind.data.neutralize import neutralize 19 | from alphamind.data.processing import factor_processing 20 | from alphamind.data.rank import percentile 21 | from alphamind.data.rank import rank 22 | from alphamind.data.standardize import Standardizer 23 | from alphamind.data.standardize import projection 24 | from alphamind.data.standardize import standardize 25 | from alphamind.data.winsorize import NormalWinsorizer 26 | from alphamind.data.winsorize import winsorize_normal 27 | from alphamind.execution.naiveexecutor import NaiveExecutor 28 | from alphamind.execution.pipeline import ExecutionPipeline 29 | from alphamind.execution.targetvolexecutor import TargetVolExecutor 30 | from alphamind.execution.thresholdexecutor import ThresholdExecutor 31 | from alphamind.model import ConstLinearModel 32 | from alphamind.model import LassoRegression 33 | from alphamind.model import LinearRegression 34 | from alphamind.model import LogisticRegression 35 | from alphamind.model import NvSVRModel 36 | from alphamind.model import RandomForestClassifier 37 | from alphamind.model import RandomForestRegressor 38 | from alphamind.model import XGBClassifier 39 | from alphamind.model import XGBRegressor 40 | from alphamind.model import XGBTrainer 41 | from alphamind.model import load_model 42 | from alphamind.model.composer import Composer 43 | from alphamind.model.composer import DataMeta 44 | from alphamind.model.composer import predict_by_model 45 | from alphamind.model.composer import train_model 46 | from alphamind.model.data_preparing import fetch_data_package 47 | from alphamind.model.data_preparing import fetch_predict_phase 48 | from alphamind.model.data_preparing import fetch_train_phase 49 | from alphamind.portfolio.constraints import BoundaryDirection 50 | from alphamind.portfolio.constraints import BoundaryType 51 | from alphamind.portfolio.constraints import Constraints 52 | from alphamind.portfolio.constraints import LinearConstraints 53 | from alphamind.portfolio.constraints import create_box_bounds 54 | from alphamind.portfolio.evolver import evolve_positions 55 | from alphamind.utilities import alpha_logger 56 | from alphamind.utilities import map_freq 57 | 58 | __all__ = [ 59 | 'SqlEngine', 60 | 'factor_analysis', 61 | 'er_portfolio_analysis', 62 | 'quantile_analysis', 63 | 'er_quantile_analysis', 64 | 'Universe', 65 | 'factor_processing', 66 | 'Constraints', 67 | 'LinearConstraints', 68 | 'BoundaryType', 69 | 'BoundaryDirection', 70 | 'create_box_bounds', 71 | 'evolve_positions', 72 | 'risk_styles', 73 | 'industry_styles', 74 | 'macro_styles', 75 | 'winsorize_normal', 76 | 'NormalWinsorizer', 77 | 'standardize', 78 | 'Standardizer', 79 | 'projection', 80 | 'neutralize', 81 | 'rank', 82 | 'percentile', 83 | 'industry_list', 84 | 'fetch_data_package', 85 | 'fetch_train_phase', 86 | 'fetch_predict_phase', 87 | 'Composer', 88 | 'DataMeta', 89 | 'train_model', 90 | 'predict_by_model', 91 | 'LinearRegression', 92 | 'LassoRegression', 93 | 'ConstLinearModel', 94 | 'LogisticRegression', 95 | 'RandomForestRegressor', 96 | 'RandomForestClassifier', 97 | 'XGBRegressor', 98 | 'XGBClassifier', 99 | 'XGBTrainer', 100 | 'NvSVRModel', 101 | 'load_model', 102 | 'NaiveExecutor', 103 | 'ThresholdExecutor', 104 | 'TargetVolExecutor', 105 | 'ExecutionPipeline', 106 | 'alpha_logger', 107 | 'map_freq' 108 | ] 109 | -------------------------------------------------------------------------------- /alphamind/benchmarks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/benchmarks/__init__.py -------------------------------------------------------------------------------- /alphamind/benchmarks/benchmarks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from alphamind.benchmarks.data.neutralize import benchmark_neutralize 9 | from alphamind.benchmarks.data.neutralize import benchmark_neutralize_with_groups 10 | from alphamind.benchmarks.data.standardize import benchmark_standardize 11 | from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group 12 | from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal 13 | from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group 14 | from alphamind.benchmarks.portfolio.linearbuild import benchmark_build_linear 15 | from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent 16 | from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent_with_group 17 | from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank 18 | from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group 19 | from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle 20 | from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group 21 | 22 | if __name__ == '__main__': 23 | benchmark_neutralize(3000, 10, 1000) 24 | benchmark_neutralize_with_groups(3000, 10, 1000, 30) 25 | benchmark_neutralize(30, 3, 50000) 26 | benchmark_neutralize_with_groups(30, 3, 50000, 3) 27 | benchmark_neutralize(50000, 50, 20) 28 | benchmark_neutralize_with_groups(50000, 50, 20, 50) 29 | benchmark_standardize(3000, 10, 1000) 30 | benchmark_standardize_with_group(3000, 10, 1000, 30) 31 | benchmark_standardize(100, 10, 50000) 32 | benchmark_standardize_with_group(100, 10, 5000, 4) 33 | benchmark_standardize(50000, 50, 20) 34 | benchmark_standardize_with_group(50000, 50, 20, 50) 35 | benchmark_winsorize_normal(3000, 10, 1000) 36 | benchmark_winsorize_normal_with_group(3000, 10, 1000, 30) 37 | benchmark_winsorize_normal(30, 10, 50000) 38 | benchmark_winsorize_normal_with_group(30, 10, 5000, 5) 39 | benchmark_winsorize_normal(50000, 50, 20) 40 | benchmark_winsorize_normal_with_group(50000, 50, 20, 50) 41 | benchmark_build_rank(3000, 1000, 300) 42 | benchmark_build_rank_with_group(3000, 1000, 10, 30) 43 | benchmark_build_rank(30, 50000, 3) 44 | benchmark_build_rank_with_group(30, 50000, 1, 3) 45 | benchmark_build_rank(50000, 20, 3000) 46 | benchmark_build_rank_with_group(50000, 20, 10, 300) 47 | benchmark_build_percent(3000, 1000, 0.1) 48 | benchmark_build_percent_with_group(3000, 1000, 0.1, 30) 49 | benchmark_build_percent(30, 50000, 0.1) 50 | benchmark_build_percent_with_group(30, 50000, 0.1, 3) 51 | benchmark_build_percent(50000, 20, 0.1) 52 | benchmark_build_percent_with_group(50000, 20, 0.1, 300) 53 | benchmark_build_linear(100, 3, 100) 54 | benchmark_build_linear(1000, 30, 10) 55 | benchmark_simple_settle(3000, 10, 1000) 56 | benchmark_simple_settle_with_group(3000, 10, 1000, 30) 57 | benchmark_simple_settle(30, 10, 50000) 58 | benchmark_simple_settle_with_group(30, 10, 50000, 5) 59 | benchmark_simple_settle(50000, 50, 20) 60 | benchmark_simple_settle_with_group(50000, 50, 20, 50) 61 | -------------------------------------------------------------------------------- /alphamind/benchmarks/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/benchmarks/data/__init__.py -------------------------------------------------------------------------------- /alphamind/benchmarks/data/neutralize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import datetime as dt 9 | 10 | import numpy as np 11 | from sklearn.linear_model import LinearRegression 12 | 13 | from alphamind.data.neutralize import neutralize 14 | 15 | 16 | def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None: 17 | print("-" * 60) 18 | print("Starting least square fitting benchmarking") 19 | print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, 20 | n_loops)) 21 | 22 | y = np.random.randn(n_samples, 5) 23 | x = np.random.randn(n_samples, n_features) 24 | 25 | start = dt.datetime.now() 26 | for _ in range(n_loops): 27 | calc_res = neutralize(x, y) 28 | impl_model_time = dt.datetime.now() - start 29 | 30 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 31 | 32 | start = dt.datetime.now() 33 | for _ in range(n_loops): 34 | benchmark_model = LinearRegression(fit_intercept=False) 35 | benchmark_model.fit(x, y) 36 | exp_res = y - x @ benchmark_model.coef_.T 37 | benchmark_model_time = dt.datetime.now() - start 38 | 39 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 40 | 41 | np.testing.assert_array_almost_equal(calc_res, exp_res) 42 | 43 | 44 | def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: int, 45 | n_groups: int) -> None: 46 | print("-" * 60) 47 | print("Starting least square fitting with group benchmarking") 48 | print( 49 | "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, 50 | n_features, 51 | n_loops, 52 | n_groups)) 53 | y = np.random.randn(n_samples, 5) 54 | x = np.random.randn(n_samples, n_features) 55 | groups = np.random.randint(n_groups, size=n_samples) 56 | 57 | start = dt.datetime.now() 58 | for _ in range(n_loops): 59 | _ = neutralize(x, y, groups) 60 | impl_model_time = dt.datetime.now() - start 61 | 62 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 63 | 64 | start = dt.datetime.now() 65 | 66 | model = LinearRegression(fit_intercept=False) 67 | for _ in range(n_loops): 68 | for i in range(n_groups): 69 | curr_x = x[groups == i] 70 | curr_y = y[groups == i] 71 | model.fit(curr_x, curr_y) 72 | _ = curr_y - curr_x @ model.coef_.T 73 | benchmark_model_time = dt.datetime.now() - start 74 | 75 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 76 | 77 | 78 | if __name__ == '__main__': 79 | benchmark_neutralize(3000, 10, 1000) 80 | benchmark_neutralize_with_groups(3000, 10, 1000, 30) 81 | -------------------------------------------------------------------------------- /alphamind/benchmarks/data/standardize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import datetime as dt 9 | 10 | import numpy as np 11 | import pandas as pd 12 | from scipy.stats import zscore 13 | 14 | from alphamind.data.standardize import standardize 15 | 16 | 17 | def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None: 18 | print("-" * 60) 19 | print("Starting standardizing benchmarking") 20 | print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, 21 | n_loops)) 22 | 23 | x = np.random.randn(n_samples, n_features) 24 | 25 | start = dt.datetime.now() 26 | for _ in range(n_loops): 27 | _ = standardize(x) 28 | impl_model_time = dt.datetime.now() - start 29 | 30 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 31 | 32 | start = dt.datetime.now() 33 | for _ in range(n_loops): 34 | _ = zscore(x) 35 | benchmark_model_time = dt.datetime.now() - start 36 | 37 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 38 | 39 | 40 | def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, 41 | n_groups: int) -> None: 42 | print("-" * 60) 43 | print("Starting standardizing with group-by values benchmarking") 44 | print( 45 | "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, 46 | n_features, 47 | n_loops, 48 | n_groups)) 49 | 50 | x = np.random.randn(n_samples, n_features) 51 | groups = np.random.randint(n_groups, size=n_samples) 52 | 53 | start = dt.datetime.now() 54 | for _ in range(n_loops): 55 | _ = standardize(x, groups=groups) 56 | impl_model_time = dt.datetime.now() - start 57 | 58 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 59 | 60 | start = dt.datetime.now() 61 | for _ in range(n_loops): 62 | _ = pd.DataFrame(x).groupby(groups).transform( 63 | lambda s: (s - s.mean(axis=0)) / s.std(axis=0)) 64 | benchmark_model_time = dt.datetime.now() - start 65 | 66 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 67 | 68 | 69 | if __name__ == '__main__': 70 | benchmark_standardize(3000, 10, 1000) 71 | benchmark_standardize_with_group(3000, 10, 1000, 30) 72 | -------------------------------------------------------------------------------- /alphamind/benchmarks/data/winsorize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import datetime as dt 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.data.winsorize import winsorize_normal 14 | 15 | 16 | def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None: 17 | print("-" * 60) 18 | print("Starting winsorize normal benchmarking") 19 | print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, 20 | n_loops)) 21 | 22 | num_stds = 2 23 | 24 | x = np.random.randn(n_samples, n_features) 25 | 26 | start = dt.datetime.now() 27 | for _ in range(n_loops): 28 | _ = winsorize_normal(x, num_stds) 29 | impl_model_time = dt.datetime.now() - start 30 | 31 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 32 | 33 | def impl(x): 34 | std_values = x.std(axis=0) 35 | mean_value = x.mean(axis=0) 36 | 37 | lower_bound = mean_value - num_stds * std_values 38 | upper_bound = mean_value + num_stds * std_values 39 | 40 | res = np.where(x > upper_bound, upper_bound, x) 41 | res = np.where(res < lower_bound, lower_bound, res) 42 | return res 43 | 44 | start = dt.datetime.now() 45 | for _ in range(n_loops): 46 | _ = impl(x) 47 | benchmark_model_time = dt.datetime.now() - start 48 | 49 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 50 | 51 | 52 | def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int, 53 | n_groups: int) -> None: 54 | print("-" * 60) 55 | print("Starting winsorize normal with group-by values benchmarking") 56 | print( 57 | "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, 58 | n_features, 59 | n_loops, 60 | n_groups)) 61 | 62 | num_stds = 2 63 | 64 | x = np.random.randn(n_samples, n_features) 65 | groups = np.random.randint(n_groups, size=n_samples) 66 | 67 | start = dt.datetime.now() 68 | for _ in range(n_loops): 69 | _ = winsorize_normal(x, num_stds, groups=groups) 70 | impl_model_time = dt.datetime.now() - start 71 | 72 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 73 | 74 | def impl(x): 75 | std_values = x.std(axis=0) 76 | mean_value = x.mean(axis=0) 77 | 78 | lower_bound = mean_value - num_stds * std_values 79 | upper_bound = mean_value + num_stds * std_values 80 | 81 | res = np.where(x > upper_bound, upper_bound, x) 82 | res = np.where(res < lower_bound, lower_bound, res) 83 | return res 84 | 85 | start = dt.datetime.now() 86 | for _ in range(n_loops): 87 | _ = pd.DataFrame(x).groupby(groups).transform(impl) 88 | benchmark_model_time = dt.datetime.now() - start 89 | 90 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 91 | 92 | 93 | if __name__ == '__main__': 94 | benchmark_winsorize_normal(3000, 10, 1000) 95 | benchmark_winsorize_normal_with_group(3000, 10, 1000, 30) 96 | -------------------------------------------------------------------------------- /alphamind/benchmarks/portfolio/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-27 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/benchmarks/portfolio/linearbuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-5 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import datetime as dt 9 | 10 | import numpy as np 11 | from cvxopt import matrix 12 | from cvxopt import solvers 13 | from scipy.optimize import linprog 14 | 15 | from alphamind.portfolio.linearbuilder import linear_builder 16 | 17 | solvers.options['show_progress'] = False 18 | 19 | 20 | def benchmark_build_linear(n_samples: int, n_risks: int, n_loop: int) -> None: 21 | print("-" * 60) 22 | print("Starting portfolio construction by linear programming") 23 | print( 24 | "Parameters(n_samples: {0}, n_risks: {1}, n_loop: {2})".format(n_samples, n_risks, n_loop)) 25 | 26 | er = np.random.randn(n_samples) 27 | risk_exp = np.random.randn(n_samples, n_risks) 28 | bm = np.random.rand(n_samples) 29 | bm /= bm.sum() 30 | 31 | lbound = -0.04 32 | ubound = 0.05 33 | 34 | risk_lbound = bm @ risk_exp 35 | risk_ubound = bm @ risk_exp 36 | 37 | start = dt.datetime.now() 38 | for _ in range(n_loop): 39 | status, v, x = linear_builder(er, 40 | lbound, 41 | ubound, 42 | risk_exp, 43 | risk_target=(risk_lbound, 44 | risk_ubound)) 45 | impl_model_time = dt.datetime.now() - start 46 | print('{0:20s}: {1}'.format('Implemented model (ECOS)', impl_model_time)) 47 | 48 | c = - er 49 | bounds = [(lbound, ubound) for _ in range(n_samples)] 50 | a_eq = np.ones((1, n_samples)) 51 | a_eq = np.vstack((a_eq, risk_exp.T)) 52 | b_eq = np.hstack((np.array([1.]), risk_exp.T @ bm)) 53 | start = dt.datetime.now() 54 | for _ in range(n_loop): 55 | res = linprog(c, A_eq=a_eq, b_eq=b_eq, bounds=bounds, options={'maxiter': 10000}) 56 | benchmark_model_time = dt.datetime.now() - start 57 | print('{0:20s}: {1}'.format('Benchmark model (scipy)', benchmark_model_time)) 58 | np.testing.assert_array_almost_equal(x, res['x']) 59 | 60 | c = matrix(-er) 61 | aneq = matrix(a_eq) 62 | b = matrix(b_eq) 63 | g = matrix(np.vstack((np.diag(np.ones(n_samples)), -np.diag(np.ones(n_samples))))) 64 | h = matrix(np.hstack((ubound * np.ones(n_samples), -lbound * np.ones(n_samples)))) 65 | 66 | solvers.lp(c, g, h, solver='glpk') 67 | start = dt.datetime.now() 68 | for _ in range(n_loop): 69 | res2 = solvers.lp(c, g, h, aneq, b, solver='glpk') 70 | benchmark_model_time = dt.datetime.now() - start 71 | print('{0:20s}: {1}'.format('Benchmark model (glpk)', benchmark_model_time)) 72 | np.testing.assert_array_almost_equal(x, np.array(res2['x']).flatten()) 73 | 74 | 75 | if __name__ == '__main__': 76 | benchmark_build_linear(2000, 30, 10) 77 | -------------------------------------------------------------------------------- /alphamind/benchmarks/portfolio/longshortbuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-9 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/benchmarks/portfolio/percentbuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-4 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import datetime as dt 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.portfolio.percentbuilder import percent_build 14 | 15 | 16 | def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) -> None: 17 | print("-" * 60) 18 | print("Starting portfolio construction by percent benchmarking") 19 | print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2})".format(n_samples, p_included, 20 | n_loops)) 21 | 22 | n_portfolio = 10 23 | 24 | x = np.random.randn(n_samples, n_portfolio) 25 | 26 | start = dt.datetime.now() 27 | for _ in range(n_loops): 28 | calc_weights = percent_build(x, p_included) 29 | impl_model_time = dt.datetime.now() - start 30 | 31 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 32 | 33 | start = dt.datetime.now() 34 | for _ in range(n_loops): 35 | exp_weights = np.zeros((len(x), n_portfolio)) 36 | n_incuded = int(p_included * len(x)) 37 | choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_incuded 38 | for j in range(n_portfolio): 39 | exp_weights[choosed_index[:, j], j] = 1. 40 | benchmark_model_time = dt.datetime.now() - start 41 | 42 | np.testing.assert_array_almost_equal(calc_weights, exp_weights) 43 | 44 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 45 | 46 | 47 | def benchmark_build_percent_with_group(n_samples: int, n_loops: int, p_included: float, 48 | n_groups: int) -> None: 49 | print("-" * 60) 50 | print("Starting portfolio construction by percent with group-by values benchmarking") 51 | print( 52 | "Parameters(n_samples: {0}, p_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, 53 | p_included, 54 | n_loops, 55 | n_groups)) 56 | 57 | n_portfolio = 10 58 | 59 | x = np.random.randn(n_samples, n_portfolio) 60 | groups = np.random.randint(n_groups, size=n_samples) 61 | 62 | start = dt.datetime.now() 63 | for _ in range(n_loops): 64 | calc_weights = percent_build(x, p_included, groups=groups) 65 | impl_model_time = dt.datetime.now() - start 66 | 67 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 68 | 69 | start = dt.datetime.now() 70 | for _ in range(n_loops): 71 | grouped_ordering = pd.DataFrame(-x).groupby(groups).rank() 72 | grouped_count = pd.DataFrame(-x).groupby(groups).transform(lambda x: x.count()) 73 | exp_weights = np.zeros((len(x), n_portfolio)) 74 | n_included = (grouped_count * p_included).astype(int) 75 | masks = (grouped_ordering <= n_included).values 76 | for j in range(n_portfolio): 77 | exp_weights[masks[:, j], j] = 1. 78 | benchmark_model_time = dt.datetime.now() - start 79 | 80 | np.testing.assert_array_almost_equal(calc_weights, exp_weights) 81 | 82 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 83 | 84 | 85 | if __name__ == '__main__': 86 | benchmark_build_percent(3000, 1000, 0.1) 87 | benchmark_build_percent_with_group(3000, 1000, 0.1, 30) 88 | -------------------------------------------------------------------------------- /alphamind/benchmarks/portfolio/rankbuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-27 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import datetime as dt 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.portfolio.rankbuilder import rank_build 14 | 15 | 16 | def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None: 17 | print("-" * 60) 18 | print("Starting portfolio construction by rank benchmarking") 19 | print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, 20 | n_loops)) 21 | 22 | n_portfolio = 10 23 | 24 | x = np.random.randn(n_samples, n_portfolio) 25 | 26 | start = dt.datetime.now() 27 | for _ in range(n_loops): 28 | calc_weights = rank_build(x, n_included) 29 | impl_model_time = dt.datetime.now() - start 30 | 31 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 32 | 33 | start = dt.datetime.now() 34 | for _ in range(n_loops): 35 | exp_weights = np.zeros((len(x), n_portfolio)) 36 | choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included 37 | for j in range(n_portfolio): 38 | exp_weights[choosed_index[:, j], j] = 1. 39 | benchmark_model_time = dt.datetime.now() - start 40 | 41 | np.testing.assert_array_almost_equal(calc_weights, exp_weights) 42 | 43 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 44 | 45 | 46 | def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int, 47 | n_groups: int) -> None: 48 | print("-" * 60) 49 | print("Starting portfolio construction by rank with group-by values benchmarking") 50 | print( 51 | "Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, 52 | n_included, 53 | n_loops, 54 | n_groups)) 55 | 56 | n_portfolio = 10 57 | 58 | x = np.random.randn(n_samples, n_portfolio) 59 | groups = np.random.randint(n_groups, size=n_samples) 60 | 61 | start = dt.datetime.now() 62 | for _ in range(n_loops): 63 | calc_weights = rank_build(x, n_included, groups=groups) 64 | impl_model_time = dt.datetime.now() - start 65 | 66 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 67 | 68 | start = dt.datetime.now() 69 | for _ in range(n_loops): 70 | grouped_ordering = pd.DataFrame(-x).groupby(groups).rank() 71 | exp_weights = np.zeros((len(x), n_portfolio)) 72 | masks = (grouped_ordering <= n_included).values 73 | for j in range(n_portfolio): 74 | exp_weights[masks[:, j], j] = 1. 75 | benchmark_model_time = dt.datetime.now() - start 76 | 77 | np.testing.assert_array_almost_equal(calc_weights, exp_weights) 78 | 79 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 80 | 81 | 82 | if __name__ == '__main__': 83 | benchmark_build_rank(3000, 1000, 300) 84 | benchmark_build_rank_with_group(3000, 1000, 10, 30) 85 | -------------------------------------------------------------------------------- /alphamind/benchmarks/settlement/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-28 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/benchmarks/settlement/simplesettle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-28 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import datetime as dt 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.settlement.simplesettle import simple_settle 14 | 15 | 16 | def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None: 17 | print("-" * 60) 18 | print("Starting simple settle benchmarking") 19 | print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples, 20 | n_portfolios, 21 | n_loops)) 22 | 23 | weights = np.random.randn(n_samples, n_portfolios) 24 | ret_series = np.random.randn(n_samples) 25 | 26 | start = dt.datetime.now() 27 | for _ in range(n_loops): 28 | calc_ret = simple_settle(weights, ret_series) 29 | impl_model_time = dt.datetime.now() - start 30 | 31 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 32 | 33 | start = dt.datetime.now() 34 | ret_series.shape = -1, 1 35 | for _ in range(n_loops): 36 | exp_ret = (weights * ret_series).sum(axis=0) 37 | benchmark_model_time = dt.datetime.now() - start 38 | 39 | np.testing.assert_array_almost_equal(calc_ret, exp_ret) 40 | 41 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 42 | 43 | 44 | def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int, 45 | n_groups: int) -> None: 46 | print("-" * 60) 47 | print("Starting simple settle with group-by values benchmarking") 48 | print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format( 49 | n_samples, n_portfolios, n_loops, n_groups)) 50 | 51 | weights = np.random.randn(n_samples, n_portfolios) 52 | ret_series = np.random.randn(n_samples) 53 | groups = np.random.randint(n_groups, size=n_samples) 54 | 55 | start = dt.datetime.now() 56 | for _ in range(n_loops): 57 | calc_ret = simple_settle(weights, ret_series, groups=groups) 58 | impl_model_time = dt.datetime.now() - start 59 | 60 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) 61 | 62 | start = dt.datetime.now() 63 | ret_series.shape = -1, 1 64 | for _ in range(n_loops): 65 | ret_mat = weights * ret_series 66 | exp_ret = pd.DataFrame(ret_mat).groupby(groups).sum().values 67 | benchmark_model_time = dt.datetime.now() - start 68 | 69 | np.testing.assert_array_almost_equal(calc_ret, exp_ret) 70 | 71 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) 72 | 73 | 74 | if __name__ == '__main__': 75 | benchmark_simple_settle(3000, 3, 1000) 76 | benchmark_simple_settle_with_group(3000, 3, 1000, 30) 77 | -------------------------------------------------------------------------------- /alphamind/bin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/bin/__init__.py -------------------------------------------------------------------------------- /alphamind/bin/alphamind: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-6-29 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | 9 | from alphamind.bin.cli import CLIFactory 10 | 11 | 12 | if __name__ == '__main__': 13 | 14 | parser = CLIFactory.get_parser() 15 | args = parser.parse_args() 16 | args.func(args) -------------------------------------------------------------------------------- /alphamind/bin/cli.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-6-29 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import argparse 9 | from collections import namedtuple 10 | 11 | from sqlalchemy import create_engine 12 | 13 | from alphamind.data.dbmodel import models 14 | from alphamind.utilities import alpha_logger 15 | 16 | 17 | def initdb(args): 18 | alpha_logger.info('DB: ' + args.url) 19 | engine = create_engine(args.url) 20 | models.Base.metadata.create_all(engine) 21 | alpha_logger.info('DB: initialization finished.') 22 | 23 | 24 | Arg = namedtuple( 25 | 'Arg', ['flags', 'help', 'action', 'default', 'nargs', 'type', 'choices', 'metavar']) 26 | Arg.__new__.__defaults__ = (None, None, None, None, None, None, None) 27 | 28 | 29 | class CLIFactory(object): 30 | args = { 31 | 'url': Arg( 32 | ('-u', '--url'), 33 | help='set the url for the db', 34 | type=str) 35 | } 36 | 37 | subparsers = ( 38 | { 39 | 'func': initdb, 40 | 'help': 'Initialize the metadata database', 41 | 'args': ('url',) 42 | }, 43 | ) 44 | 45 | subparsers_dict = {sp['func'].__name__: sp for sp in subparsers} 46 | 47 | @classmethod 48 | def get_parser(cls): 49 | parser = argparse.ArgumentParser() 50 | subparsers = parser.add_subparsers( 51 | help='sub-command help', dest='subcommand') 52 | subparsers.required = True 53 | 54 | subparser_list = cls.subparsers_dict.keys() 55 | for sub in subparser_list: 56 | sub = cls.subparsers_dict[sub] 57 | sp = subparsers.add_parser(sub['func'].__name__, help=sub['help']) 58 | for arg in sub['args']: 59 | arg = cls.args[arg] 60 | kwargs = { 61 | f: getattr(arg, f) 62 | for f in arg._fields if f != 'flags' and getattr(arg, f)} 63 | sp.add_argument(*arg.flags, **kwargs) 64 | sp.set_defaults(func=sub['func']) 65 | return parser 66 | 67 | 68 | def get_parser(): 69 | return CLIFactory.get_parser() 70 | -------------------------------------------------------------------------------- /alphamind/data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from alphamind.data.neutralize import neutralize 9 | from alphamind.data.rank import rank 10 | from alphamind.data.standardize import standardize 11 | from alphamind.data.winsorize import winsorize_normal as winsorize 12 | 13 | __all__ = ['standardize', 14 | 'winsorize', 15 | 'neutralize', 16 | 'rank'] 17 | -------------------------------------------------------------------------------- /alphamind/data/dbmodel/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-6-29 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/data/dbmodel/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2020-11-14 4 | 5 | @author: cheng.li 6 | """ 7 | import os 8 | 9 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql": 10 | from alphamind.data.dbmodel.models.mysql import Market 11 | from alphamind.data.dbmodel.models.mysql import IndexMarket 12 | from alphamind.data.dbmodel.models.mysql import Universe 13 | from alphamind.data.dbmodel.models.mysql import Industry 14 | from alphamind.data.dbmodel.models.mysql import RiskExposure 15 | from alphamind.data.dbmodel.models.mysql import RiskCovDay 16 | from alphamind.data.dbmodel.models.mysql import RiskCovShort 17 | from alphamind.data.dbmodel.models.mysql import RiskCovLong 18 | from alphamind.data.dbmodel.models.mysql import SpecificRiskDay 19 | from alphamind.data.dbmodel.models.mysql import SpecificRiskShort 20 | from alphamind.data.dbmodel.models.mysql import SpecificRiskLong 21 | from alphamind.data.dbmodel.models.mysql import IndexComponent 22 | from alphamind.data.dbmodel.models.mysql import IndexWeight 23 | else: 24 | from alphamind.data.dbmodel.models.postgres import Market 25 | from alphamind.data.dbmodel.models.postgres import IndexMarket 26 | from alphamind.data.dbmodel.models.postgres import Universe 27 | from alphamind.data.dbmodel.models.postgres import Industry 28 | from alphamind.data.dbmodel.models.postgres import RiskExposure 29 | from alphamind.data.dbmodel.models.postgres import RiskCovDay 30 | from alphamind.data.dbmodel.models.postgres import RiskCovShort 31 | from alphamind.data.dbmodel.models.postgres import RiskCovLong 32 | from alphamind.data.dbmodel.models.postgres import SpecificRiskDay 33 | from alphamind.data.dbmodel.models.postgres import SpecificRiskShort 34 | from alphamind.data.dbmodel.models.postgres import SpecificRiskLong 35 | from alphamind.data.dbmodel.models.postgres import FactorMaster 36 | from alphamind.data.dbmodel.models.postgres import IndexComponent 37 | from alphamind.data.dbmodel.models.postgres import RiskMaster -------------------------------------------------------------------------------- /alphamind/data/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/data/engines/__init__.py -------------------------------------------------------------------------------- /alphamind/data/engines/sqlengine/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2020-11-14 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import os 9 | 10 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql": 11 | from alphamind.data.engines.sqlengine.mysql import SqlEngine 12 | from alphamind.data.engines.sqlengine.mysql import total_risk_factors 13 | from alphamind.data.engines.sqlengine.mysql import industry_styles 14 | from alphamind.data.engines.sqlengine.mysql import risk_styles 15 | from alphamind.data.engines.sqlengine.mysql import macro_styles 16 | else: 17 | from alphamind.data.engines.sqlengine.postgres import SqlEngine 18 | from alphamind.data.engines.sqlengine.postgres import total_risk_factors 19 | from alphamind.data.engines.sqlengine.postgres import industry_styles 20 | from alphamind.data.engines.sqlengine.postgres import risk_styles 21 | from alphamind.data.engines.sqlengine.postgres import macro_styles 22 | -------------------------------------------------------------------------------- /alphamind/data/engines/utilities.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-12-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import os 9 | from typing import Dict 10 | from typing import Iterable 11 | 12 | from alphamind.data.dbmodel.models import Market 13 | from alphamind.data.dbmodel.models import RiskCovDay 14 | from alphamind.data.dbmodel.models import RiskCovLong 15 | from alphamind.data.dbmodel.models import RiskCovShort 16 | from alphamind.data.dbmodel.models import RiskExposure 17 | from alphamind.data.dbmodel.models import SpecificRiskDay 18 | from alphamind.data.dbmodel.models import SpecificRiskLong 19 | from alphamind.data.dbmodel.models import SpecificRiskShort 20 | from alphamind.data.engines.industries import INDUSTRY_MAPPING 21 | 22 | 23 | def _map_risk_model_table(risk_model: str) -> tuple: 24 | if risk_model == 'day': 25 | return RiskCovDay, SpecificRiskDay 26 | elif risk_model == 'short': 27 | return RiskCovShort, SpecificRiskShort 28 | elif risk_model == 'long': 29 | return RiskCovLong, SpecificRiskLong 30 | else: 31 | raise ValueError("risk model name {0} is not recognized".format(risk_model)) 32 | 33 | 34 | def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict: 35 | factor_cols = {} 36 | factors = set(factors).difference({'trade_date', 'code', 'isOpen'}) 37 | to_keep = factors.copy() 38 | for f in factors: 39 | for t in used_factor_tables: 40 | if f in t.columns: 41 | factor_cols[t.columns[f].name] = t 42 | to_keep.remove(f) 43 | break 44 | 45 | if to_keep: 46 | raise ValueError("factors in <{0}> can't be find".format(to_keep)) 47 | 48 | return factor_cols 49 | 50 | 51 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql": 52 | def _map_industry_category(category: str) -> str: 53 | if category == 'sw': 54 | return '申万行业分类(2014)' 55 | elif category == 'zz': 56 | return '中证行业分类' 57 | elif category == 'zx': 58 | return '中信标普行业分类' 59 | elif category == 'zjh': 60 | return '证监会行业分类(2012)-证监会' 61 | else: 62 | raise ValueError("No other industry is supported at the current time") 63 | else: 64 | def _map_industry_category(category: str) -> str: 65 | if category == 'sw': 66 | return '申万行业分类' 67 | elif category == 'sw_adj': 68 | return '申万行业分类修订' 69 | elif category == 'zz': 70 | return '中证行业分类' 71 | elif category == 'dx': 72 | return '东兴行业分类' 73 | elif category == 'zjh': 74 | return '证监会行业V2012' 75 | else: 76 | raise ValueError("No other industry is supported at the current time") 77 | 78 | 79 | def industry_list(category: str, level: int = 1) -> list: 80 | return INDUSTRY_MAPPING[category][level] 81 | -------------------------------------------------------------------------------- /alphamind/data/neutralize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Dict 9 | from typing import Tuple 10 | from typing import Union 11 | 12 | import numba as nb 13 | import numpy as np 14 | 15 | import alphamind.utilities as utils 16 | 17 | 18 | def neutralize(x: np.ndarray, 19 | y: np.ndarray, 20 | groups: np.ndarray = None, 21 | detail: bool = False, 22 | weights: np.ndarray = None) \ 23 | -> Union[np.ndarray, Tuple[np.ndarray, Dict]]: 24 | if y.ndim == 1: 25 | y = y.reshape((-1, 1)) 26 | 27 | if weights is None: 28 | weights = np.ones(len(y), dtype=float) 29 | 30 | output_dict = {} 31 | 32 | if detail: 33 | exposure = np.zeros(x.shape + (y.shape[1],)) 34 | explained = np.zeros(x.shape + (y.shape[1],)) 35 | output_dict['exposure'] = exposure 36 | output_dict['explained'] = explained 37 | 38 | if groups is not None: 39 | res = np.zeros(y.shape) 40 | index_diff, order = utils.groupby(groups) 41 | start = 0 42 | if detail: 43 | for diff_loc in index_diff: 44 | curr_idx = order[start:diff_loc + 1] 45 | curr_x, b = _sub_step(x, y, weights, curr_idx, res) 46 | exposure[curr_idx, :, :] = b 47 | explained[curr_idx] = ls_explain(curr_x, b) 48 | start = diff_loc + 1 49 | else: 50 | for diff_loc in index_diff: 51 | curr_idx = order[start:diff_loc + 1] 52 | _sub_step(x, y, weights, curr_idx, res) 53 | start = diff_loc + 1 54 | else: 55 | try: 56 | b = ls_fit(x, y, weights) 57 | except np.linalg.linalg.LinAlgError: 58 | b = ls_fit_pinv(x, y, weights) 59 | 60 | res = ls_res(x, y, b) 61 | 62 | if detail: 63 | explained[:, :, :] = ls_explain(x, b) 64 | exposure[:] = b 65 | 66 | if output_dict: 67 | return res, output_dict 68 | else: 69 | return res 70 | 71 | 72 | def _sub_step(x, y, w, curr_idx, res) -> Tuple[np.ndarray, np.ndarray]: 73 | curr_x, curr_y, curr_w = x[curr_idx], y[curr_idx], w[curr_idx] 74 | try: 75 | b = ls_fit(curr_x, curr_y, curr_w) 76 | except np.linalg.linalg.LinAlgError: 77 | b = ls_fit_pinv(curr_x, curr_y, curr_w) 78 | res[curr_idx] = ls_res(curr_x, curr_y, b) 79 | return curr_x, b 80 | 81 | 82 | @nb.njit(nogil=True, cache=True) 83 | def ls_fit(x: np.ndarray, y: np.ndarray, w: np.ndarray) -> np.ndarray: 84 | x_bar = x.T * w 85 | b = np.linalg.solve(x_bar @ x, x_bar @ y) 86 | return b 87 | 88 | 89 | @nb.njit(nogil=True, cache=True) 90 | def ls_fit_pinv(x: np.ndarray, y: np.ndarray, w: np.ndarray) -> np.ndarray: 91 | x_bar = x.T * w 92 | b = np.linalg.pinv(x_bar @ x) @ x_bar @ y 93 | return b 94 | 95 | 96 | @nb.njit(nogil=True, cache=True) 97 | def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray: 98 | return y - x @ b 99 | 100 | 101 | @nb.njit(nogil=True, cache=True) 102 | def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray: 103 | m, n = b.shape 104 | return b.reshape((1, m, n)) * x.reshape((-1, m, 1)) 105 | 106 | 107 | if __name__ == '__main__': 108 | x = np.random.randn(50000, 10) 109 | y = np.random.randn(50000, 1) 110 | w = np.ones(50000) 111 | 112 | import datetime as dt 113 | 114 | start = dt.datetime.now() 115 | for _ in range(1000): 116 | ls_fit(x, y, w) 117 | print(dt.datetime.now() - start) 118 | -------------------------------------------------------------------------------- /alphamind/data/processing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-21 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import List 9 | from typing import Optional 10 | 11 | import numpy as np 12 | 13 | from alphamind.data.neutralize import neutralize 14 | from alphamind.utilities import alpha_logger 15 | 16 | 17 | def factor_processing(raw_factors: np.ndarray, 18 | pre_process: Optional[List] = None, 19 | risk_factors: Optional[np.ndarray] = None, 20 | post_process: Optional[List] = None, 21 | groups=None) -> np.ndarray: 22 | new_factors = raw_factors 23 | 24 | if pre_process: 25 | for p in pre_process: 26 | new_factors = p(new_factors, groups=groups) 27 | 28 | if risk_factors is not None: 29 | risk_factors = risk_factors[:, risk_factors.sum(axis=0) != 0] 30 | new_factors = neutralize(risk_factors, new_factors, groups=groups) 31 | 32 | if post_process: 33 | for p in post_process: 34 | if p.__name__ == 'winsorize_normal': 35 | alpha_logger.warning("winsorize_normal " 36 | "normally should not be done after neutralize") 37 | new_factors = p(new_factors, groups=groups) 38 | 39 | return new_factors 40 | -------------------------------------------------------------------------------- /alphamind/data/quantile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-16 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | 10 | 11 | def quantile(x: np.ndarray, n_bins: int) -> np.ndarray: 12 | n = x.size 13 | sorter = x.argsort() 14 | inv = np.empty(n, dtype=int) 15 | inv[sorter] = np.arange(n, dtype=int) 16 | 17 | bin_size = float(n) / n_bins 18 | 19 | pillars = [int(i * bin_size) for i in range(1, n_bins + 1)] 20 | 21 | q_groups = np.empty(n, dtype=int) 22 | 23 | starter = 0 24 | for i, r in enumerate(pillars): 25 | q_groups[(inv >= starter) & (inv < r)] = i 26 | starter = r 27 | 28 | return q_groups 29 | -------------------------------------------------------------------------------- /alphamind/data/rank.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-8 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Optional 9 | 10 | import numpy as np 11 | from scipy.stats import rankdata 12 | 13 | import alphamind.utilities as utils 14 | 15 | 16 | def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray: 17 | if x.ndim == 1: 18 | x = x.reshape((-1, 1)) 19 | 20 | if groups is not None: 21 | res = np.zeros(x.shape, dtype=int) 22 | index_diff, order = utils.groupby(groups) 23 | 24 | start = 0 25 | for diff_loc in index_diff: 26 | curr_idx = order[start:diff_loc + 1] 27 | res[curr_idx] = (rankdata(x[curr_idx]).astype(float) - 1.).reshape((-1, 1)) 28 | start = diff_loc + 1 29 | return res 30 | else: 31 | return (rankdata(x).astype(float) - 1.).reshape((-1, 1)) 32 | 33 | 34 | def percentile(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray: 35 | if x.ndim == 1: 36 | x = x.reshape((-1, 1)) 37 | 38 | if groups is not None: 39 | res = np.zeros(x.shape, dtype=int) 40 | index_diff, order = utils.groupby(groups) 41 | 42 | start = 0 43 | for diff_loc in index_diff: 44 | curr_idx = order[start:diff_loc + 1] 45 | curr_values = x[curr_idx] 46 | length = len(curr_values) - 1. if len(curr_values) > 1 else 1. 47 | res[curr_idx] = (rankdata(curr_values).astype(float) - 1.) / length 48 | start = diff_loc + 1 49 | return res 50 | else: 51 | length = len(x) - 1. if len(x) > 1 else 1. 52 | return ((rankdata(x).astype(float) - 1.) / length).reshape((-1, 1)) 53 | -------------------------------------------------------------------------------- /alphamind/data/standardize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | 10 | from alphamind.utilities import aggregate 11 | from alphamind.utilities import array_index 12 | from alphamind.utilities import group_mapping 13 | from alphamind.utilities import simple_mean 14 | from alphamind.utilities import simple_sqrsum 15 | from alphamind.utilities import simple_std 16 | from alphamind.utilities import transform 17 | 18 | 19 | def standardize(x: np.ndarray, groups: np.ndarray = None, ddof=1) -> np.ndarray: 20 | if groups is not None: 21 | groups = group_mapping(groups) 22 | mean_values = transform(groups, x, 'mean') 23 | std_values = transform(groups, x, 'std', ddof) 24 | 25 | return (x - mean_values) / np.maximum(std_values, 1e-8) 26 | else: 27 | return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8) 28 | 29 | 30 | def projection(x: np.ndarray, groups: np.ndarray = None, axis=1) -> np.ndarray: 31 | if groups is not None and axis == 0: 32 | groups = group_mapping(groups) 33 | projected = transform(groups, x, 'project') 34 | return projected 35 | else: 36 | return x / simple_sqrsum(x, axis=axis).reshape((-1, 1)) 37 | 38 | 39 | class Standardizer(object): 40 | 41 | def __init__(self, ddof: int = 1): 42 | self.ddof = ddof 43 | self.mean = None 44 | self.std = None 45 | self.labels = None 46 | 47 | def fit(self, x: np.ndarray, groups: np.ndarray = None): 48 | if groups is not None: 49 | group_index = group_mapping(groups) 50 | self.mean = aggregate(group_index, x, 'mean') 51 | self.std = aggregate(group_index, x, 'std', self.ddof) 52 | self.labels = np.unique(groups) 53 | else: 54 | self.mean = simple_mean(x, axis=0) 55 | self.std = simple_std(x, axis=0, ddof=self.ddof) 56 | 57 | def transform(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray: 58 | if groups is not None: 59 | index = array_index(self.labels, groups) 60 | return (x - self.mean[index]) / np.maximum(self.std[index], 1e-8) 61 | else: 62 | return (x - self.mean) / np.maximum(self.std, 1e-8) 63 | 64 | def __call__(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray: 65 | return standardize(x, groups, self.ddof) 66 | -------------------------------------------------------------------------------- /alphamind/data/transformer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-23 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import copy 9 | 10 | import pandas as pd 11 | from simpleutils.asserts import require 12 | from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder 13 | from PyFin.api import transform as transform_impl 14 | 15 | 16 | def factor_translator(factor_pool): 17 | if not factor_pool: 18 | return None, None 19 | 20 | if isinstance(factor_pool, str): 21 | return {factor_pool: factor_pool}, [factor_pool] 22 | elif isinstance(factor_pool, SecurityValueHolder): 23 | return {str(factor_pool): factor_pool}, sorted(factor_pool.fields) 24 | elif isinstance(factor_pool, dict): 25 | dependency = set() 26 | for k, v in factor_pool.items(): 27 | require(isinstance(k, str), ValueError, 28 | 'factor_name {0} should be string.'.format(k)) 29 | require(isinstance(v, SecurityValueHolder) or isinstance(v, str), 30 | ValueError, 31 | 'expression {0} should be a value hodler or a string.'.format(v)) 32 | 33 | if isinstance(v, str): 34 | dependency = dependency.union([v]) 35 | else: 36 | dependency = dependency.union(v.fields) 37 | return factor_pool, sorted(dependency) 38 | elif isinstance(factor_pool, list): 39 | factor_dict = {} 40 | dependency = set() 41 | k = 1 42 | for i, f in enumerate(factor_pool): 43 | if isinstance(f, str): 44 | factor_dict[f] = f 45 | dependency = dependency.union([f]) 46 | elif isinstance(f, SecurityValueHolder): 47 | factor_dict[str(f)] = f 48 | dependency = dependency.union(f.fields) 49 | k += 1 50 | return factor_dict, sorted(dependency) 51 | else: 52 | raise ValueError('{0} is not in valid format as factors'.format(factor_pool)) 53 | 54 | 55 | class Transformer: 56 | 57 | def __init__(self, 58 | expressions): 59 | expression_dict, expression_dependency = \ 60 | factor_translator(copy.deepcopy(expressions)) 61 | 62 | if expression_dict: 63 | self.names = sorted(expression_dict.keys()) 64 | self.expressions = [expression_dict[n] for n in self.names] 65 | self.dependency = expression_dependency 66 | else: 67 | self.names = [] 68 | self.expressions = [] 69 | self.dependency = [] 70 | 71 | def transform(self, group_name, data): 72 | if len(data) > 0: 73 | transformed_data = transform_impl(data, 74 | self.expressions, 75 | self.names, 76 | group_name, 77 | dropna=False) 78 | return transformed_data 79 | else: 80 | return pd.DataFrame() 81 | 82 | 83 | if __name__ == '__main__': 84 | transformer = Transformer(['c', 'a']) 85 | -------------------------------------------------------------------------------- /alphamind/exceptions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/exceptions/__init__.py -------------------------------------------------------------------------------- /alphamind/exceptions/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-6-12 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | 9 | class PortfolioBuilderException(Exception): 10 | 11 | def __init__(self, msg): 12 | self.msg = msg 13 | 14 | def __str__(self): 15 | return str(self.msg) 16 | -------------------------------------------------------------------------------- /alphamind/execution/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/execution/__init__.py -------------------------------------------------------------------------------- /alphamind/execution/baseexecutor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-22 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import abc 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | 14 | class ExecutorBase(metaclass=abc.ABCMeta): 15 | 16 | def __init__(self): 17 | self.current_pos = pd.DataFrame() 18 | 19 | @abc.abstractmethod 20 | def execute(self, target_pos: pd.DataFrame) -> pd.DataFrame: 21 | pass 22 | 23 | @staticmethod 24 | def calc_turn_over(target_pos: pd.DataFrame, current_pos: pd.DataFrame) -> float: 25 | pos_merged = pd.merge(target_pos, current_pos, on=['code'], how='outer') 26 | pos_merged.fillna(0, inplace=True) 27 | turn_over = np.abs(pos_merged.weight_x - pos_merged.weight_y).sum() 28 | return turn_over 29 | 30 | def set_current(self, current_pos: pd.DataFrame): 31 | self.current_pos = current_pos.copy() 32 | 33 | def update(self, data_dict: dict): 34 | pass 35 | -------------------------------------------------------------------------------- /alphamind/execution/naiveexecutor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-22 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Tuple 9 | 10 | import pandas as pd 11 | 12 | from alphamind.execution.baseexecutor import ExecutorBase 13 | 14 | 15 | class NaiveExecutor(ExecutorBase): 16 | 17 | def __init__(self): 18 | super().__init__() 19 | 20 | def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]: 21 | if self.current_pos.empty: 22 | turn_over = target_pos.weight.abs().sum() 23 | else: 24 | turn_over = self.calc_turn_over(target_pos, self.current_pos) 25 | self.current_pos = target_pos.copy() 26 | return turn_over, target_pos 27 | -------------------------------------------------------------------------------- /alphamind/execution/pipeline.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import List 9 | from typing import Tuple 10 | 11 | import pandas as pd 12 | 13 | from alphamind.execution.baseexecutor import ExecutorBase 14 | 15 | 16 | class ExecutionPipeline(object): 17 | 18 | def __init__(self, executors: List[ExecutorBase]): 19 | self.executors = executors 20 | 21 | def execute(self, target_pos) -> Tuple[float, pd.DataFrame]: 22 | 23 | turn_over, planed_pos = 0., target_pos 24 | 25 | for executor in self.executors: 26 | turn_over, planed_pos = executor.execute(planed_pos) 27 | 28 | executed_pos = planed_pos 29 | 30 | for executor in self.executors: 31 | executor.set_current(executed_pos) 32 | 33 | return turn_over, executed_pos 34 | 35 | def update(self, data_dict): 36 | 37 | for executor in self.executors: 38 | executor.update(data_dict=data_dict) 39 | -------------------------------------------------------------------------------- /alphamind/execution/targetvolexecutor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-22 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Tuple 9 | 10 | import pandas as pd 11 | from PyFin.Math.Accumulators import MovingAverage 12 | from PyFin.Math.Accumulators import MovingStandardDeviation 13 | 14 | from alphamind.execution.baseexecutor import ExecutorBase 15 | 16 | 17 | class TargetVolExecutor(ExecutorBase): 18 | 19 | def __init__(self, window=30, target_vol=0.01): 20 | super().__init__() 21 | self.m_vol = MovingStandardDeviation(window, 'return') 22 | self.m_leverage = MovingAverage(window, 'leverage') 23 | self.target_vol = target_vol 24 | self.multiplier = 1. 25 | 26 | def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]: 27 | if not self.m_vol.isFull(): 28 | if self.current_pos.empty: 29 | turn_over = target_pos.weight.abs().sum() 30 | else: 31 | turn_over = self.calc_turn_over(target_pos, self.current_pos) 32 | return turn_over, target_pos 33 | else: 34 | c_vol = self.m_vol.result() 35 | c_leverage = self.m_leverage.result() 36 | self.multiplier = self.target_vol / c_vol * c_leverage 37 | candidate_pos = target_pos.copy() 38 | candidate_pos['weight'] = candidate_pos.weight.values * self.multiplier 39 | turn_over = self.calc_turn_over(candidate_pos, self.current_pos) 40 | return turn_over, candidate_pos 41 | 42 | def set_current(self, current_pos: pd.DataFrame): 43 | super().set_current(current_pos) 44 | self.m_leverage.push({'leverage': current_pos.weight.abs().sum()}) 45 | 46 | def update(self, data_dict: dict): 47 | self.m_vol.push(data_dict) 48 | -------------------------------------------------------------------------------- /alphamind/execution/thresholdexecutor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-22 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Tuple 9 | 10 | import pandas as pd 11 | 12 | from alphamind.execution.baseexecutor import ExecutorBase 13 | 14 | 15 | class ThresholdExecutor(ExecutorBase): 16 | 17 | def __init__(self, turn_over_threshold: float, is_relative=True): 18 | super().__init__() 19 | self.threshold = turn_over_threshold 20 | self.is_relative = is_relative 21 | 22 | def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]: 23 | 24 | if self.current_pos.empty: 25 | return target_pos.weight.abs().sum(), target_pos 26 | else: 27 | turn_over = self.calc_turn_over(target_pos, self.current_pos) 28 | 29 | is_break = turn_over >= self.threshold * self.current_pos.weight.sum() if self.is_relative else turn_over >= self.threshold 30 | 31 | if is_break: 32 | return turn_over, target_pos 33 | else: 34 | return 0., self.current_pos.copy() 35 | -------------------------------------------------------------------------------- /alphamind/formula/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/formula/__init__.py -------------------------------------------------------------------------------- /alphamind/formula/utilities.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-11-27 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from alphamind.utilities import decode 9 | from alphamind.utilities import encode 10 | 11 | 12 | def encode_formula(formula): 13 | str_repr = encode(formula) 14 | return {'desc': str_repr, 15 | 'formula_type': formula.__class__.__module__ + "." + formula.__class__.__name__, 16 | 'dependency': formula.fields, 17 | 'window': formula.window} 18 | 19 | 20 | def decode_formula(str_repr): 21 | formula = decode(str_repr) 22 | return formula 23 | 24 | 25 | if __name__ == '__main__': 26 | from PyFin.api import * 27 | 28 | eps_q_res = RES(20, LAST('eps_q') ^ LAST('roe_q')) 29 | print(eps_q_res) 30 | 31 | str_repr = encode_formula(eps_q_res) 32 | decoded_formula = decode_formula(str_repr) 33 | print(decoded_formula) 34 | -------------------------------------------------------------------------------- /alphamind/model/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-2 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from alphamind.model.linearmodel import ConstLinearModel 9 | from alphamind.model.linearmodel import LassoRegression 10 | from alphamind.model.linearmodel import LinearRegression 11 | from alphamind.model.linearmodel import LogisticRegression 12 | from alphamind.model.loader import load_model 13 | from alphamind.model.svm import NvSVRModel 14 | from alphamind.model.treemodel import RandomForestClassifier 15 | from alphamind.model.treemodel import RandomForestRegressor 16 | from alphamind.model.treemodel import XGBClassifier 17 | from alphamind.model.treemodel import XGBRegressor 18 | from alphamind.model.treemodel import XGBTrainer 19 | 20 | __all__ = ['LinearRegression', 21 | 'LassoRegression', 22 | 'ConstLinearModel', 23 | 'LogisticRegression', 24 | 'RandomForestRegressor', 25 | 'RandomForestClassifier', 26 | 'XGBRegressor', 27 | 'XGBClassifier', 28 | 'XGBTrainer', 29 | 'NvSVRModel', 30 | 'load_model'] 31 | -------------------------------------------------------------------------------- /alphamind/model/linearmodel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-10 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | from simpleutils.asserts import require 10 | from sklearn.linear_model import Lasso 11 | from sklearn.linear_model import LinearRegression as LinearRegressionImpl 12 | from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl 13 | 14 | from alphamind.model.modelbase import create_model_base 15 | 16 | 17 | class ConstLinearModelImpl(object): 18 | 19 | def __init__(self, weights: np.ndarray = None): 20 | self.weights = weights.flatten() 21 | 22 | def fit(self, x: np.ndarray, y: np.ndarray): 23 | raise NotImplementedError("Const linear model doesn't offer fit methodology") 24 | 25 | def predict(self, x: np.ndarray): 26 | return x @ self.weights 27 | 28 | def score(self, x: np.ndarray, y: np.ndarray) -> float: 29 | y_hat = self.predict(x) 30 | y_bar = y.mean() 31 | ssto = ((y - y_bar) ** 2).sum() 32 | sse = ((y - y_hat) ** 2).sum() 33 | return 1. - sse / ssto 34 | 35 | 36 | class ConstLinearModel(create_model_base()): 37 | 38 | def __init__(self, 39 | features=None, 40 | weights: dict = None, 41 | fit_target=None): 42 | super().__init__(features=features, fit_target=fit_target) 43 | if features is not None and weights is not None: 44 | require(len(features) == len(weights), 45 | ValueError, 46 | "length of features is not equal to length of weights") 47 | if weights: 48 | self.impl = ConstLinearModelImpl(np.array([weights[name] for name in self.features])) 49 | 50 | def save(self): 51 | model_desc = super().save() 52 | model_desc['weight'] = list(self.impl.weights) 53 | return model_desc 54 | 55 | @classmethod 56 | def load(cls, model_desc: dict): 57 | return super().load(model_desc) 58 | 59 | @property 60 | def weights(self): 61 | return self.impl.weights.tolist() 62 | 63 | 64 | class LinearRegression(create_model_base('sklearn')): 65 | 66 | def __init__(self, features=None, fit_intercept: bool = False, fit_target=None, **kwargs): 67 | super().__init__(features=features, fit_target=fit_target) 68 | self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs) 69 | 70 | def save(self) -> dict: 71 | model_desc = super().save() 72 | model_desc['weight'] = self.impl.coef_.tolist() 73 | return model_desc 74 | 75 | @property 76 | def weights(self): 77 | return self.impl.coef_.tolist() 78 | 79 | 80 | class LassoRegression(create_model_base('sklearn')): 81 | 82 | def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, fit_target=None, 83 | **kwargs): 84 | super().__init__(features=features, fit_target=fit_target) 85 | self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs) 86 | 87 | def save(self) -> dict: 88 | model_desc = super().save() 89 | model_desc['weight'] = self.impl.coef_.tolist() 90 | return model_desc 91 | 92 | @property 93 | def weights(self): 94 | return self.impl.coef_.tolist() 95 | 96 | 97 | class LogisticRegression(create_model_base('sklearn')): 98 | 99 | def __init__(self, features=None, fit_intercept: bool = False, fit_target=None, **kwargs): 100 | super().__init__(features=features, fit_target=fit_target) 101 | self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs) 102 | 103 | def save(self) -> dict: 104 | model_desc = super().save() 105 | model_desc['weight'] = self.impl.coef_.tolist() 106 | return model_desc 107 | 108 | @property 109 | def weights(self): 110 | return self.impl.coef_.tolist() 111 | 112 | 113 | if __name__ == '__main__': 114 | import pprint 115 | 116 | ls = ConstLinearModel(['a', 'b'], np.array([0.5, 0.5])) 117 | 118 | x = np.array([[0.2, 0.2], 119 | [0.1, 0.1], 120 | [0.3, 0.1]]) 121 | 122 | ls.predict(x) 123 | 124 | desc = ls.save() 125 | new_model = ConstLinearModel.load(desc) 126 | 127 | pprint.pprint(new_model.save()) 128 | -------------------------------------------------------------------------------- /alphamind/model/loader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-5 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from alphamind.model.linearmodel import ConstLinearModel 9 | from alphamind.model.linearmodel import LassoRegression 10 | from alphamind.model.linearmodel import LinearRegression 11 | from alphamind.model.linearmodel import LogisticRegression 12 | from alphamind.model.modelbase import ModelBase 13 | from alphamind.model.svm import NvSVRModel 14 | from alphamind.model.treemodel import RandomForestClassifier 15 | from alphamind.model.treemodel import RandomForestRegressor 16 | from alphamind.model.treemodel import XGBClassifier 17 | from alphamind.model.treemodel import XGBRegressor 18 | from alphamind.model.treemodel import XGBTrainer 19 | 20 | 21 | def load_model(model_desc: dict) -> ModelBase: 22 | model_name = model_desc['model_name'] 23 | model_name_parts = set(model_name.split('.')) 24 | 25 | if 'ConstLinearModel' in model_name_parts: 26 | return ConstLinearModel.load(model_desc) 27 | elif 'LinearRegression' in model_name_parts: 28 | return LinearRegression.load(model_desc) 29 | elif 'LassoRegression' in model_name_parts: 30 | return LassoRegression.load(model_desc) 31 | elif 'LogisticRegression' in model_name_parts: 32 | return LogisticRegression.load(model_desc) 33 | elif 'RandomForestRegressor' in model_name_parts: 34 | return RandomForestRegressor.load(model_desc) 35 | elif 'RandomForestClassifier' in model_name_parts: 36 | return RandomForestClassifier.load(model_desc) 37 | elif 'XGBRegressor' in model_name_parts: 38 | return XGBRegressor.load(model_desc) 39 | elif 'XGBClassifier' in model_name_parts: 40 | return XGBClassifier.load(model_desc) 41 | elif 'XGBTrainer' in model_name_parts: 42 | return XGBTrainer.load(model_desc) 43 | elif 'NvSVR' in model_name_parts: 44 | return NvSVRModel.load(model_desc) 45 | else: 46 | raise ValueError('{0} is not currently supported in model loader.'.format(model_name)) 47 | -------------------------------------------------------------------------------- /alphamind/model/modelbase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-4 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import abc 9 | from distutils.version import LooseVersion 10 | 11 | import arrow 12 | import numpy as np 13 | import pandas as pd 14 | from simpleutils.miscellaneous import list_eq 15 | from sklearn import __version__ as sklearn_version 16 | from xgboost import __version__ as xgbboot_version 17 | 18 | from alphamind.data.transformer import Transformer 19 | from alphamind.utilities import alpha_logger 20 | from alphamind.utilities import decode 21 | from alphamind.utilities import encode 22 | 23 | 24 | class ModelBase(metaclass=abc.ABCMeta): 25 | 26 | def __init__(self, features=None, fit_target=None): 27 | if features is not None: 28 | self.formulas = Transformer(features) 29 | self.features = self.formulas.names 30 | else: 31 | self.features = None 32 | 33 | if fit_target is not None: 34 | self.fit_target = Transformer(fit_target) 35 | else: 36 | self.fit_target = None 37 | self.impl = None 38 | self.trained_time = None 39 | 40 | def model_encode(self): 41 | return encode(self.impl) 42 | 43 | @classmethod 44 | def model_decode(cls, model_desc): 45 | return decode(model_desc) 46 | 47 | def __eq__(self, rhs): 48 | return self.model_encode() == rhs.model_encode() \ 49 | and self.trained_time == rhs.trained_time \ 50 | and list_eq(self.features, rhs.features) \ 51 | and encode(self.formulas) == encode(rhs.formulas) \ 52 | and encode(self.fit_target) == encode(rhs.fit_target) 53 | 54 | def fit(self, x: pd.DataFrame, y: np.ndarray): 55 | self.impl.fit(x[self.features].values, y.flatten()) 56 | self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss") 57 | 58 | def predict(self, x: pd.DataFrame) -> np.ndarray: 59 | return self.impl.predict(x[self.features].values) 60 | 61 | def score(self, x: pd.DataFrame, y: np.ndarray) -> float: 62 | return self.impl.score(x[self.features].values, y) 63 | 64 | def ic(self, x: pd.DataFrame, y: np.ndarray) -> float: 65 | predict_y = self.impl.predict(x[self.features].values) 66 | return np.corrcoef(predict_y, y)[0, 1] 67 | 68 | @abc.abstractmethod 69 | def save(self) -> dict: 70 | 71 | if self.__class__.__module__ == '__main__': 72 | alpha_logger.warning( 73 | "model is defined in a main module. The model_name may not be correct.") 74 | 75 | model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__, 76 | language='python', 77 | saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"), 78 | features=list(self.features), 79 | trained_time=self.trained_time, 80 | desc=self.model_encode(), 81 | formulas=encode(self.formulas), 82 | fit_target=encode(self.fit_target), 83 | internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__) 84 | return model_desc 85 | 86 | @classmethod 87 | @abc.abstractmethod 88 | def load(cls, model_desc: dict): 89 | obj_layout = cls() 90 | obj_layout.features = model_desc['features'] 91 | obj_layout.formulas = decode(model_desc['formulas']) 92 | obj_layout.trained_time = model_desc['trained_time'] 93 | obj_layout.impl = cls.model_decode(model_desc['desc']) 94 | if 'fit_target' in model_desc: 95 | obj_layout.fit_target = decode(model_desc['fit_target']) 96 | else: 97 | obj_layout.fit_target = None 98 | return obj_layout 99 | 100 | 101 | def create_model_base(party_name=None): 102 | if not party_name: 103 | return ModelBase 104 | else: 105 | class ExternalLibBase(ModelBase): 106 | _lib_name = party_name 107 | 108 | def save(self) -> dict: 109 | model_desc = super().save() 110 | if self._lib_name == 'sklearn': 111 | model_desc[self._lib_name + "_version"] = sklearn_version 112 | elif self._lib_name == 'xgboost': 113 | model_desc[self._lib_name + "_version"] = xgbboot_version 114 | else: 115 | raise ValueError( 116 | "3rd party lib name ({0}) is not recognized".format(self._lib_name)) 117 | return model_desc 118 | 119 | @classmethod 120 | def load(cls, model_desc: dict): 121 | obj_layout = super().load(model_desc) 122 | 123 | if cls._lib_name == 'sklearn': 124 | current_version = sklearn_version 125 | elif cls._lib_name == 'xgboost': 126 | current_version = xgbboot_version 127 | else: 128 | raise ValueError( 129 | "3rd party lib name ({0}) is not recognized".format(cls._lib_name)) 130 | 131 | if LooseVersion(current_version) < LooseVersion( 132 | model_desc[cls._lib_name + "_version"]): 133 | alpha_logger.warning( 134 | 'Current {2} version {0} is lower than the model version {1}. ' 135 | 'Loaded model may work incorrectly.'.format(sklearn_version, 136 | model_desc[cls._lib_name], 137 | cls._lib_name)) 138 | return obj_layout 139 | 140 | return ExternalLibBase 141 | -------------------------------------------------------------------------------- /alphamind/model/svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-7-9 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from sklearn.svm import NuSVR 9 | 10 | from alphamind.model.modelbase import create_model_base 11 | 12 | 13 | class NvSVRModel(create_model_base('sklearn')): 14 | 15 | def __init__(self, 16 | features=None, 17 | fit_target=None, 18 | **kwargs): 19 | super().__init__(features=features, fit_target=fit_target) 20 | self.impl = NuSVR(**kwargs) 21 | -------------------------------------------------------------------------------- /alphamind/portfolio/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-26 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/portfolio/evolver.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-11-23 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | 10 | 11 | def evolve_positions(positions: np.ndarray, dx_ret: np.ndarray) -> np.ndarray: 12 | # assume return is log return 13 | 14 | simple_return = np.exp(dx_ret) 15 | evolved_positions = positions * simple_return 16 | leverage = np.abs(positions).sum() 17 | evolved_positions = evolved_positions * leverage / np.abs(evolved_positions).sum() 18 | return evolved_positions 19 | -------------------------------------------------------------------------------- /alphamind/portfolio/linearbuilder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-5 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Tuple 9 | from typing import Union 10 | 11 | import numpy as np 12 | from alphamind.portfolio.optimizers import LPOptimizer 13 | from alphamind.portfolio.optimizers import L1LPOptimizer 14 | from alphamind.exceptions.exceptions import PortfolioBuilderException 15 | 16 | 17 | def linear_builder(er: np.ndarray, 18 | lbound: Union[np.ndarray, float] = None, 19 | ubound: Union[np.ndarray, float] = None, 20 | risk_constraints: np.ndarray = None, 21 | risk_target: Tuple[np.ndarray, np.ndarray] = None, 22 | turn_over_target: float = None, 23 | current_position: np.ndarray = None, 24 | method: str = "deprecated") -> Tuple[str, np.ndarray, np.ndarray]: 25 | er = er.flatten() 26 | 27 | if risk_constraints is not None: 28 | risk_lbound = risk_target[0].reshape((-1, 1)) 29 | risk_ubound = risk_target[1].reshape((-1, 1)) 30 | cons_matrix = np.concatenate((risk_constraints.T, risk_lbound, risk_ubound), axis=1) 31 | else: 32 | cons_matrix = None 33 | 34 | if not turn_over_target or current_position is None: 35 | prob = LPOptimizer(-er, cons_matrix, lbound, ubound) 36 | 37 | if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate': 38 | return prob.status(), prob.feval(), prob.x_value() 39 | else: 40 | raise PortfolioBuilderException(prob.status()) 41 | elif turn_over_target: 42 | prob = L1LPOptimizer(objective=-er, 43 | cons_matrix=cons_matrix, 44 | current_pos=current_position, 45 | target_turn_over=turn_over_target, 46 | lbound=lbound, 47 | ubound=ubound) 48 | 49 | if prob.status() == 'optimal' or prob.status() == 'optimal_inaccurate': 50 | return prob.status(), prob.feval(), prob.x_value() 51 | else: 52 | raise PortfolioBuilderException(prob.status()) 53 | 54 | 55 | if __name__ == '__main__': 56 | n = 5 57 | lb = np.zeros(n) 58 | ub = 4. / n * np.ones(n) 59 | er = np.random.randn(n) 60 | current_pos = np.random.randint(0, n, size=n) 61 | current_pos = current_pos / current_pos.sum() 62 | turn_over_target = 0.1 63 | 64 | cons = np.ones((n, 1)) 65 | risk_lbound = np.ones(1) 66 | risk_ubound = np.ones(1) 67 | 68 | status, fvalue, x_values = linear_builder(er, 69 | lb, 70 | ub, 71 | cons, 72 | (risk_lbound, risk_ubound), 73 | turn_over_target, 74 | current_pos, 75 | method='ecos') 76 | 77 | print(status) 78 | print(fvalue) 79 | print(x_values) 80 | print(current_pos) 81 | 82 | print(np.abs(x_values - current_pos).sum()) 83 | -------------------------------------------------------------------------------- /alphamind/portfolio/longshortbulder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-9 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | 10 | from alphamind.utilities import group_mapping 11 | from alphamind.utilities import simple_abssum 12 | from alphamind.utilities import transform 13 | 14 | 15 | def long_short_builder(er: np.ndarray, 16 | leverage: float = 1., 17 | groups: np.ndarray = None, 18 | masks: np.ndarray = None) -> np.ndarray: 19 | er = er.copy() 20 | 21 | if masks is not None: 22 | er[masks] = 0. 23 | er[~masks] = er[~masks] - er[~masks].mean() 24 | 25 | if er.ndim == 1: 26 | er = er.reshape((-1, 1)) 27 | 28 | if groups is None: 29 | return er / simple_abssum(er, axis=0) * leverage 30 | else: 31 | groups = group_mapping(groups) 32 | return transform(groups, er, 'scale', scale=leverage) 33 | -------------------------------------------------------------------------------- /alphamind/portfolio/meanvariancebuilder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-6-27 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | from typing import Dict 9 | from typing import Optional 10 | from typing import Tuple 11 | from typing import Union 12 | import numpy as np 13 | from alphamind.portfolio.optimizers import ( 14 | QuadraticOptimizer, 15 | TargetVolOptimizer 16 | ) 17 | 18 | from alphamind.exceptions.exceptions import PortfolioBuilderException 19 | 20 | 21 | def _create_bounds(lbound, 22 | ubound, 23 | bm, 24 | risk_exposure, 25 | risk_target): 26 | if lbound is not None: 27 | lbound = lbound - bm 28 | if ubound is not None: 29 | ubound = ubound - bm 30 | 31 | if risk_exposure is not None: 32 | cons_mat = risk_exposure.T 33 | bm_risk = cons_mat @ bm 34 | 35 | clbound = (risk_target[0] - bm_risk).reshape((-1, 1)) 36 | cubound = (risk_target[1] - bm_risk).reshape((-1, 1)) 37 | else: 38 | cons_mat = None 39 | clbound = None 40 | cubound = None 41 | 42 | return lbound, ubound, cons_mat, clbound, cubound 43 | 44 | 45 | def _create_result(optimizer, bm): 46 | if optimizer.status() == "optimal" or optimizer.status() == "optimal_inaccurate": 47 | return optimizer.status(), optimizer.feval(), optimizer.x_value() + bm 48 | else: 49 | raise PortfolioBuilderException(optimizer.status()) 50 | 51 | 52 | def mean_variance_builder(er: np.ndarray, 53 | risk_model: Dict[str, Union[None, np.ndarray]], 54 | bm: np.ndarray, 55 | lbound: Union[np.ndarray, float, None], 56 | ubound: Union[np.ndarray, float, None], 57 | risk_exposure: Optional[np.ndarray], 58 | risk_target: Optional[Tuple[np.ndarray, np.ndarray]], 59 | lam: float = 1., 60 | linear_solver: str = 'deprecated') -> Tuple[str, float, np.ndarray]: 61 | lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure, 62 | risk_target) 63 | if cons_mat is not None: 64 | cons_matrix = np.concatenate([cons_mat, clbound, cubound], axis=1) 65 | else: 66 | cons_matrix = None 67 | 68 | cov = risk_model['cov'] 69 | special_risk = risk_model['idsync'] 70 | risk_cov = risk_model['factor_cov'] 71 | risk_exposure = risk_model['factor_loading'] 72 | 73 | prob = QuadraticOptimizer(objective=-er, 74 | cons_matrix=cons_matrix, 75 | lbound=lbound, 76 | ubound=ubound, 77 | penalty=lam, 78 | cov=cov, 79 | factor_cov=risk_cov, 80 | factor_load=risk_exposure, 81 | factor_special=special_risk) 82 | 83 | if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate': 84 | return prob.status(), prob.feval(), prob.x_value() + bm 85 | else: 86 | raise PortfolioBuilderException(prob.status()) 87 | 88 | 89 | def target_vol_builder(er: np.ndarray, 90 | risk_model: Dict[str, Union[None, np.ndarray]], 91 | bm: np.ndarray, 92 | lbound: Union[np.ndarray, float], 93 | ubound: Union[np.ndarray, float], 94 | risk_exposure: Optional[np.ndarray], 95 | risk_target: Optional[Tuple[np.ndarray, np.ndarray]], 96 | vol_target: float = 1., 97 | linear_solver: str = 'ma27') -> Tuple[str, float, np.ndarray]: 98 | lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure, 99 | risk_target) 100 | 101 | if cons_mat is not None: 102 | cons_matrix = np.concatenate([cons_mat, clbound, cubound], axis=1) 103 | else: 104 | cons_matrix = None 105 | 106 | cov = risk_model['cov'] 107 | special_risk = risk_model['idsync'] 108 | risk_cov = risk_model['factor_cov'] 109 | risk_exposure = risk_model['factor_loading'] 110 | 111 | prob = TargetVolOptimizer(objective=-er, 112 | cons_matrix=cons_matrix, 113 | lbound=lbound, 114 | ubound=ubound, 115 | target_vol=vol_target, 116 | factor_cov=risk_cov, 117 | factor_load=risk_exposure, 118 | factor_special=special_risk, 119 | cov=cov) 120 | if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate': 121 | return prob.status(), prob.feval(), prob.x_value() + bm 122 | else: 123 | raise PortfolioBuilderException(prob.status()) 124 | -------------------------------------------------------------------------------- /alphamind/portfolio/percentbuilder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-4 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | from numpy import zeros 10 | from numpy import zeros_like 11 | 12 | from alphamind.utilities import groupby 13 | from alphamind.utilities import set_value 14 | 15 | 16 | def percent_build(er: np.ndarray, percent: float, groups: np.ndarray = None, 17 | masks: np.ndarray = None) -> np.ndarray: 18 | er = er.copy() 19 | 20 | if masks is not None: 21 | er[~masks] = -np.inf 22 | 23 | if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1): 24 | # fast path methods for single column er 25 | neg_er = -er.flatten() 26 | length = len(neg_er) 27 | weights = zeros((length, 1)) 28 | if groups is not None: 29 | index_diff, order = groupby(groups) 30 | start = 0 31 | for diff_loc in index_diff: 32 | current_index = order[start:diff_loc + 1] 33 | current_ordering = neg_er[current_index].argsort() 34 | current_ordering.shape = -1, 1 35 | use_rank = int(percent * len(current_index)) 36 | set_value(weights, current_index[current_ordering[:use_rank]], 1.) 37 | start = diff_loc + 1 38 | else: 39 | ordering = neg_er.argsort() 40 | use_rank = int(percent * len(neg_er)) 41 | weights[ordering[:use_rank]] = 1. 42 | return weights.reshape(er.shape) 43 | else: 44 | neg_er = -er 45 | weights = zeros_like(er) 46 | 47 | if groups is not None: 48 | index_diff, order = groupby(groups) 49 | start = 0 50 | for diff_loc in index_diff: 51 | current_index = order[start:diff_loc + 1] 52 | current_ordering = neg_er[current_index].argsort(axis=0) 53 | use_rank = int(percent * len(current_index)) 54 | set_value(weights, current_index[current_ordering[:use_rank]], 1) 55 | start = diff_loc + 1 56 | else: 57 | ordering = neg_er.argsort(axis=0) 58 | use_rank = int(percent * len(neg_er)) 59 | set_value(weights, ordering[:use_rank], 1.) 60 | return weights 61 | -------------------------------------------------------------------------------- /alphamind/portfolio/rankbuilder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-26 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | from numpy import zeros 10 | from numpy import zeros_like 11 | 12 | from alphamind.utilities import groupby 13 | from alphamind.utilities import set_value 14 | 15 | 16 | def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray = None, 17 | masks: np.ndarray = None) -> np.ndarray: 18 | er = er.copy() 19 | 20 | if masks is not None: 21 | er[~masks] = -np.inf 22 | 23 | if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1): 24 | # fast path methods for single column er 25 | neg_er = -er.flatten() 26 | length = len(neg_er) 27 | weights = zeros((length, 1)) 28 | if groups is not None: 29 | index_diff, order = groupby(groups) 30 | start = 0 31 | for diff_loc in index_diff: 32 | current_index = order[start:diff_loc + 1] 33 | current_ordering = neg_er[current_index].argsort() 34 | current_ordering.shape = -1, 1 35 | set_value(weights, current_index[current_ordering[:use_rank]], 1.) 36 | start = diff_loc + 1 37 | else: 38 | ordering = neg_er.argsort() 39 | weights[ordering[:use_rank]] = 1. 40 | return weights.reshape(er.shape) 41 | else: 42 | neg_er = -er 43 | weights = zeros_like(er) 44 | 45 | if groups is not None: 46 | index_diff, order = groupby(groups) 47 | start = 0 48 | for diff_loc in index_diff: 49 | current_index = order[start:diff_loc + 1] 50 | current_ordering = neg_er[current_index].argsort(axis=0) 51 | set_value(weights, current_index[current_ordering[:use_rank]], 1) 52 | start = diff_loc + 1 53 | else: 54 | ordering = neg_er.argsort(axis=0) 55 | set_value(weights, ordering[:use_rank], 1.) 56 | return weights 57 | -------------------------------------------------------------------------------- /alphamind/portfolio/riskmodel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-5-29 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import abc 9 | from typing import List 10 | 11 | import pandas as pd 12 | 13 | 14 | class RiskModel(metaclass=abc.ABCMeta): 15 | 16 | def get_risk_profile(self): 17 | pass 18 | 19 | 20 | class FullRiskModel(RiskModel): 21 | 22 | def __init__(self, sec_cov: pd.DataFrame): 23 | self.codes = sec_cov.index.tolist() 24 | self.sec_cov = sec_cov.loc[self.codes, self.codes] 25 | 26 | def get_cov(self, codes: List[int] = None): 27 | if codes: 28 | return self.sec_cov.loc[codes, codes].values 29 | else: 30 | return self.sec_cov.values 31 | 32 | def get_risk_profile(self, codes: List[int] = None): 33 | return dict( 34 | cov=self.get_cov(codes), 35 | factor_cov=None, 36 | factor_loading=None, 37 | idsync=None 38 | ) 39 | 40 | 41 | class FactorRiskModel(RiskModel): 42 | 43 | def __init__(self, 44 | factor_cov: pd.DataFrame, 45 | risk_exp: pd.DataFrame, 46 | idsync: pd.Series): 47 | self.factor_cov = factor_cov 48 | self.idsync = idsync 49 | self.codes = self.idsync.index.tolist() 50 | self.factor_names = sorted(self.factor_cov.index) 51 | self.risk_exp = risk_exp.loc[self.codes, self.factor_names] 52 | self.factor_cov = self.factor_cov.loc[self.factor_names, self.factor_names] 53 | self.idsync = self.idsync[self.codes] 54 | 55 | def get_risk_exp(self, codes: List[int] = None): 56 | if codes: 57 | return self.risk_exp.loc[codes, :].values 58 | else: 59 | return self.risk_exp.values 60 | 61 | def get_factor_cov(self): 62 | return self.factor_cov.values 63 | 64 | def get_idsync(self, codes: List[int] = None): 65 | if codes: 66 | return self.idsync[codes].values 67 | else: 68 | return self.idsync.values 69 | 70 | def get_risk_profile(self, codes: List[int] = None): 71 | return dict( 72 | cov=None, 73 | factor_cov=self.get_factor_cov(), 74 | factor_loading=self.get_risk_exp(codes), 75 | idsync=self.get_idsync(codes) 76 | ) 77 | -------------------------------------------------------------------------------- /alphamind/settlement/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-28 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/settlement/simplesettle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-28 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | 12 | def simple_settle(weights: np.ndarray, 13 | dx_return: np.ndarray, 14 | groups: np.ndarray = None, 15 | benchmark: np.ndarray = None) -> pd.DataFrame: 16 | weights = weights.flatten() 17 | dx_return = dx_return.flatten() 18 | 19 | if benchmark is not None: 20 | net_pos = weights - benchmark 21 | else: 22 | net_pos = weights 23 | 24 | ret_arr = net_pos * dx_return 25 | 26 | if groups is not None: 27 | ret_agg = pd.Series(ret_arr).groupby(groups).sum() 28 | ret_agg.loc['total'] = ret_agg.sum() 29 | else: 30 | ret_agg = pd.Series(ret_arr.sum(), index=['total']) 31 | 32 | ret_agg.index.name = 'industry' 33 | ret_agg.name = 'er' 34 | 35 | pos_table = pd.DataFrame(net_pos, columns=['weight']) 36 | pos_table['ret'] = dx_return 37 | 38 | if groups is not None: 39 | ic_table = pos_table.groupby(groups).corr()['ret'].loc[(slice(None), 'weight')] 40 | ic_table.loc['total'] = pos_table.corr().iloc[0, 1] 41 | else: 42 | ic_table = pd.Series(pos_table.corr().iloc[0, 1], index=['total']) 43 | 44 | return pd.DataFrame({'er': ret_agg.values, 45 | 'ic': ic_table.values}, 46 | index=ret_agg.index) 47 | -------------------------------------------------------------------------------- /alphamind/strategy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/strategy/__init__.py -------------------------------------------------------------------------------- /alphamind/strategy/sample_strategy.json: -------------------------------------------------------------------------------- 1 | { 2 | "strategy_name": "sample_strategy", 3 | "data_process": { 4 | "pre_process": [ 5 | "winsorize", 6 | "standardize" 7 | ], 8 | "neutralize_risk": [ 9 | "SIZE", 10 | "industry_styles" 11 | ], 12 | "post_process": [ 13 | "winsorize", 14 | "standardize" 15 | ] 16 | }, 17 | "risk_model": { 18 | "type": "short", 19 | "neutralize_risk": [ 20 | "SIZE", 21 | "industry_styles" 22 | ] 23 | }, 24 | "alpha_model": { 25 | "model_type": "LinearRegression", 26 | "features": [ 27 | "EPS", 28 | "ROEDiluted" 29 | ], 30 | "parameters": { 31 | "fit_intercept": false 32 | } 33 | }, 34 | "freq": "1d", 35 | "batch": 4, 36 | "warm_start": 0, 37 | "universe": [ 38 | "zz500", 39 | [ 40 | "zz500" 41 | ] 42 | ], 43 | "benchmark": 905, 44 | "optimizer": { 45 | "type": "risk_neutral", 46 | "neutralize_risk": [ 47 | "SIZE", 48 | "industry_styles" 49 | ] 50 | }, 51 | "executor": { 52 | "type": "naive" 53 | } 54 | } -------------------------------------------------------------------------------- /alphamind/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/tests/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-8 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/tests/analysis/test_factoranalysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.analysis.factoranalysis import factor_analysis 14 | from alphamind.data.neutralize import neutralize 15 | from alphamind.data.processing import factor_processing 16 | from alphamind.data.standardize import standardize 17 | from alphamind.data.winsorize import winsorize_normal 18 | from alphamind.portfolio.constraints import ( 19 | Constraints, 20 | ) 21 | 22 | 23 | class TestFactorAnalysis(unittest.TestCase): 24 | def setUp(self): 25 | self.raw_factor = np.random.randn(1000, 1) 26 | self.risk_factor = np.random.randn(1000, 3) 27 | self.d1returns = np.random.randn(1000, 1) 28 | 29 | def test_factor_processing(self): 30 | new_factor = factor_processing(self.raw_factor) 31 | np.testing.assert_array_almost_equal(new_factor, self.raw_factor) 32 | 33 | new_factor = factor_processing(self.raw_factor, 34 | pre_process=[standardize, winsorize_normal]) 35 | 36 | np.testing.assert_array_almost_equal(new_factor, 37 | winsorize_normal(standardize(self.raw_factor))) 38 | 39 | new_factor = factor_processing(self.raw_factor, 40 | pre_process=[standardize, winsorize_normal], 41 | risk_factors=self.risk_factor) 42 | 43 | np.testing.assert_array_almost_equal(new_factor, neutralize(self.risk_factor, 44 | winsorize_normal(standardize( 45 | self.raw_factor)))) 46 | 47 | def test_factor_analysis(self): 48 | benchmark = np.random.randint(50, size=1000) 49 | benchmark = benchmark / benchmark.sum() 50 | industry = np.random.randint(30, size=1000) 51 | 52 | factor_df = pd.DataFrame(self.raw_factor.flatten(), index=range(len(self.raw_factor))) 53 | factor_weights = np.array([1.]) 54 | 55 | constraints = Constraints() 56 | names = np.array(['a', 'b', 'c']) 57 | constraints.add_exposure(names, self.risk_factor) 58 | targets = self.risk_factor.T @ benchmark 59 | for i, name in enumerate(names): 60 | constraints.set_constraints(name, targets[i], targets[i]) 61 | 62 | weight_table, analysis_table = factor_analysis(factor_df, 63 | factor_weights, 64 | d1returns=self.d1returns, 65 | industry=industry, 66 | benchmark=benchmark, 67 | risk_exp=self.risk_factor, 68 | constraints=constraints) 69 | 70 | weight = weight_table.weight 71 | 72 | self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0) 73 | np.testing.assert_array_almost_equal(weight @ self.risk_factor, 74 | benchmark @ self.risk_factor) 75 | self.assertTrue(weight @ factor_df.values > benchmark @ factor_df.values) 76 | 77 | def test_factor_analysis_with_several_factors(self): 78 | benchmark = np.random.randint(50, size=1000) 79 | benchmark = benchmark / benchmark.sum() 80 | industry = np.random.randint(30, size=1000) 81 | 82 | factor_df = pd.DataFrame(np.random.randn(1000, 2), index=range(len(self.raw_factor))) 83 | factor_weights = np.array([0.2, 0.8]) 84 | 85 | constraints = Constraints() 86 | names = np.array(['a', 'b', 'c']) 87 | constraints.add_exposure(names, self.risk_factor) 88 | targets = self.risk_factor.T @ benchmark 89 | for i, name in enumerate(names): 90 | constraints.set_constraints(name, targets[i], targets[i]) 91 | 92 | weight_table, analysis_table = factor_analysis(factor_df, 93 | factor_weights, 94 | d1returns=self.d1returns, 95 | industry=industry, 96 | benchmark=benchmark, 97 | risk_exp=self.risk_factor, 98 | constraints=constraints) 99 | 100 | weight = weight_table.weight 101 | self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0) 102 | np.testing.assert_array_almost_equal(weight @ self.risk_factor, 103 | benchmark @ self.risk_factor) 104 | 105 | 106 | if __name__ == '__main__': 107 | unittest.main() 108 | -------------------------------------------------------------------------------- /alphamind/tests/analysis/test_perfanalysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-12 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.analysis.perfanalysis import perf_attribution_by_pos 14 | 15 | 16 | class TestPerformanceAnalysis(unittest.TestCase): 17 | 18 | @classmethod 19 | def test_perf_attribution_by_pos(cls): 20 | n_samples = 36000 21 | n_dates = 20 22 | n_risk_factors = 35 23 | 24 | dates = np.sort(np.random.randint(n_dates, size=n_samples)) 25 | weights_series = pd.Series(data=np.random.randn(n_samples), index=dates) 26 | bm_series = pd.Series(data=np.random.randn(n_samples), index=dates) 27 | next_bar_return_series = pd.Series(data=np.random.randn(n_samples), index=dates) 28 | risk_table = pd.DataFrame(data=np.random.randn(n_samples, n_risk_factors), 29 | columns=list(range(n_risk_factors)), 30 | index=dates) 31 | 32 | explained_table = perf_attribution_by_pos(weights_series - bm_series, 33 | next_bar_return_series, 34 | risk_table) 35 | 36 | to_explain = (weights_series - bm_series).multiply(next_bar_return_series, axis=0) 37 | aggregated_to_explain = pd.Series(to_explain).groupby(dates).sum() 38 | aggregated_explained = explained_table.sum(axis=1) 39 | 40 | np.testing.assert_array_almost_equal(aggregated_to_explain.values, 41 | aggregated_explained.values) 42 | 43 | 44 | if __name__ == '__main__': 45 | unittest.main() 46 | -------------------------------------------------------------------------------- /alphamind/tests/analysis/test_quantilieanalysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-16 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.analysis.quantileanalysis import er_quantile_analysis 14 | from alphamind.analysis.quantileanalysis import quantile_analysis 15 | from alphamind.data.processing import factor_processing 16 | from alphamind.data.quantile import quantile 17 | from alphamind.data.standardize import standardize 18 | from alphamind.data.winsorize import winsorize_normal 19 | 20 | 21 | class TestQuantileAnalysis(unittest.TestCase): 22 | def setUp(self): 23 | n = 5000 24 | n_f = 5 25 | 26 | self.x = np.random.randn(n, 5) 27 | self.x_w = np.random.randn(n_f) 28 | self.r = np.random.randn(n) 29 | self.b_w = np.random.randint(0, 10, n) 30 | self.b_w = self.b_w / float(self.b_w.sum()) 31 | self.risk_exp = np.random.randn(n, 3) 32 | self.n_bins = 10 33 | 34 | def test_q_anl_impl(self): 35 | n_bins = 5 36 | 37 | x = self.x[:, 0] 38 | q_groups = quantile(x, n_bins) 39 | 40 | s = pd.Series(self.r, index=q_groups) 41 | grouped_return = s.groupby(level=0).mean().values.flatten() 42 | 43 | expected_res = grouped_return.copy() 44 | res = n_bins - 1 45 | res_weight = 1. / res 46 | 47 | for i, value in enumerate(expected_res): 48 | expected_res[i] = (1. + res_weight) * value - res_weight * grouped_return.sum() 49 | 50 | calculated_res = er_quantile_analysis(x, n_bins, self.r, de_trend=True) 51 | 52 | np.testing.assert_array_almost_equal(expected_res, calculated_res) 53 | 54 | def test_quantile_analysis_simple(self): 55 | f_df = pd.DataFrame(self.x) 56 | calculated = quantile_analysis(f_df, 57 | self.x_w, 58 | self.r, 59 | n_bins=self.n_bins, 60 | pre_process=[], 61 | post_process=[]) 62 | 63 | er = self.x_w @ self.x.T 64 | expected = er_quantile_analysis(er, self.n_bins, self.r) 65 | np.testing.assert_array_almost_equal(calculated, expected) 66 | 67 | def test_quantile_analysis_with_factor_processing(self): 68 | f_df = pd.DataFrame(self.x) 69 | calculated = quantile_analysis(f_df, 70 | self.x_w, 71 | self.r, 72 | n_bins=self.n_bins, 73 | risk_exp=self.risk_exp, 74 | pre_process=[winsorize_normal, standardize], 75 | post_process=[standardize]) 76 | 77 | er = self.x_w @ factor_processing(self.x, 78 | [winsorize_normal, standardize], 79 | self.risk_exp, 80 | [standardize]).T 81 | expected = er_quantile_analysis(er, self.n_bins, self.r) 82 | np.testing.assert_array_almost_equal(calculated, expected) 83 | 84 | 85 | if __name__ == '__main__': 86 | unittest.main() 87 | -------------------------------------------------------------------------------- /alphamind/tests/analysis/test_riskanalysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-8 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.analysis.riskanalysis import risk_analysis 14 | 15 | 16 | class TestRiskAnalysis(unittest.TestCase): 17 | 18 | @staticmethod 19 | def test_risk_analysis(): 20 | n_samples = 36000 21 | n_dates = 20 22 | n_risk_factors = 35 23 | 24 | dates = np.sort(np.random.randint(n_dates, size=n_samples)) 25 | weights_series = pd.Series(data=np.random.randn(n_samples), index=dates) 26 | bm_series = pd.Series(data=np.random.randn(n_samples), index=dates) 27 | next_bar_return_series = pd.Series(data=np.random.randn(n_samples), index=dates) 28 | risk_table = pd.DataFrame(data=np.random.randn(n_samples, n_risk_factors), 29 | columns=list(range(n_risk_factors)), 30 | index=dates) 31 | 32 | explained_table, _ = risk_analysis(weights_series - bm_series, 33 | next_bar_return_series, 34 | risk_table) 35 | 36 | to_explain = (weights_series - bm_series).multiply(next_bar_return_series, axis=0) 37 | aggregated = explained_table.sum(axis=1) 38 | 39 | np.testing.assert_array_almost_equal(to_explain.values, aggregated.values) 40 | 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /alphamind/tests/cython/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/cython/__init__.py -------------------------------------------------------------------------------- /alphamind/tests/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/data/__init__.py -------------------------------------------------------------------------------- /alphamind/tests/data/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/data/engines/__init__.py -------------------------------------------------------------------------------- /alphamind/tests/data/engines/test_universe.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-2-9 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | from alphamind.data.engines.universe import Universe 11 | from alphamind.data.engines.universe import load_universe 12 | 13 | 14 | class TestUniverse(unittest.TestCase): 15 | 16 | def test_universe_equal(self): 17 | universe1 = Universe('zz500') 18 | universe2 = Universe('zz500') 19 | self.assertEqual(universe1, universe2) 20 | 21 | universe1 = Universe('zz500') 22 | universe2 = Universe('zz800') 23 | self.assertNotEqual(universe1, universe2) 24 | 25 | def test_universe_persistence(self): 26 | universe = Universe('zz500') 27 | univ_desc = universe.save() 28 | loaded_universe = load_universe(univ_desc) 29 | self.assertEqual(universe, loaded_universe) 30 | 31 | def test_universe_arithmic(self): 32 | universe = Universe('zz500') + Universe('hs300') 33 | univ_desc = universe.save() 34 | loaded_universe = load_universe(univ_desc) 35 | self.assertEqual(universe, loaded_universe) 36 | 37 | universe = Universe('zz500') - Universe('hs300') 38 | univ_desc = universe.save() 39 | loaded_universe = load_universe(univ_desc) 40 | self.assertEqual(universe, loaded_universe) 41 | 42 | universe = Universe('zz500') & Universe('hs300') 43 | univ_desc = universe.save() 44 | loaded_universe = load_universe(univ_desc) 45 | self.assertEqual(universe, loaded_universe) 46 | -------------------------------------------------------------------------------- /alphamind/tests/data/test_neutralize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | from sklearn.linear_model import LinearRegression 12 | 13 | from alphamind.data.neutralize import neutralize 14 | 15 | 16 | class TestNeutralize(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.y = np.random.randn(3000, 4) 20 | self.x = np.random.randn(3000, 10) 21 | self.groups = np.random.randint(30, size=3000) 22 | 23 | def test_neutralize(self): 24 | calc_res = neutralize(self.x, self.y) 25 | 26 | model = LinearRegression(fit_intercept=False) 27 | model.fit(self.x, self.y) 28 | 29 | exp_res = self.y - self.x @ model.coef_.T 30 | 31 | np.testing.assert_array_almost_equal(calc_res, exp_res) 32 | 33 | def test_neutralize_with_group(self): 34 | 35 | calc_res = neutralize(self.x, self.y, self.groups) 36 | 37 | model = LinearRegression(fit_intercept=False) 38 | for i in range(30): 39 | curr_x = self.x[self.groups == i] 40 | curr_y = self.y[self.groups == i] 41 | model.fit(curr_x, curr_y) 42 | exp_res = curr_y - curr_x @ model.coef_.T 43 | np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res) 44 | 45 | def test_neutralize_explain_output(self): 46 | y = self.y[:, 0].flatten() 47 | 48 | calc_res, other_stats = neutralize(self.x, y, detail=True) 49 | 50 | model = LinearRegression(fit_intercept=False) 51 | model.fit(self.x, y) 52 | 53 | exp_res = y - self.x @ model.coef_.T 54 | exp_explained = self.x * model.coef_.T 55 | 56 | np.testing.assert_array_almost_equal(calc_res, exp_res.reshape(-1, 1)) 57 | np.testing.assert_array_almost_equal(other_stats['explained'][:, :, 0], exp_explained) 58 | 59 | calc_res, other_stats = neutralize(self.x, self.y, detail=True) 60 | 61 | model = LinearRegression(fit_intercept=False) 62 | model.fit(self.x, self.y) 63 | 64 | exp_res = self.y - self.x @ model.coef_.T 65 | np.testing.assert_array_almost_equal(calc_res, exp_res) 66 | 67 | for i in range(self.y.shape[1]): 68 | exp_explained = self.x * model.coef_.T[:, i] 69 | np.testing.assert_array_almost_equal(other_stats['explained'][:, :, i], exp_explained) 70 | 71 | def test_neutralize_explain_output_with_group(self): 72 | y = self.y[:, 0].flatten() 73 | 74 | calc_res, other_stats = neutralize(self.x, y, self.groups, detail=True) 75 | 76 | model = LinearRegression(fit_intercept=False) 77 | for i in range(30): 78 | curr_x = self.x[self.groups == i] 79 | curr_y = y[self.groups == i] 80 | model.fit(curr_x, curr_y) 81 | exp_res = curr_y - curr_x @ model.coef_.T 82 | exp_explained = curr_x * model.coef_.T 83 | np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1)) 84 | np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0], 85 | exp_explained) 86 | 87 | calc_res, other_stats = neutralize(self.x, self.y, self.groups, detail=True) 88 | 89 | model = LinearRegression(fit_intercept=False) 90 | for i in range(30): 91 | curr_x = self.x[self.groups == i] 92 | curr_y = self.y[self.groups == i] 93 | model.fit(curr_x, curr_y) 94 | exp_res = curr_y - curr_x @ model.coef_.T 95 | np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res) 96 | 97 | for j in range(self.y.shape[1]): 98 | exp_explained = curr_x * model.coef_.T[:, j] 99 | np.testing.assert_array_almost_equal( 100 | other_stats['explained'][self.groups == i, :, j], exp_explained) 101 | 102 | 103 | if __name__ == '__main__': 104 | unittest.main() 105 | -------------------------------------------------------------------------------- /alphamind/tests/data/test_quantile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-16 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | 12 | from alphamind.data.quantile import quantile 13 | 14 | 15 | class TestQuantile(unittest.TestCase): 16 | 17 | def test_quantile(self): 18 | n = 5000 19 | bins = 10 20 | s = np.random.randn(n) 21 | calculated = quantile(s, bins) 22 | 23 | rank = s.argsort().argsort() 24 | 25 | bin_size = float(n) / bins 26 | pillars = [int(i * bin_size) for i in range(1, bins + 1)] 27 | 28 | starter = 0 29 | for i, r in enumerate(pillars): 30 | self.assertTrue(np.all(calculated[(rank >= starter) & (rank < r)] == i)) 31 | starter = r 32 | 33 | 34 | if __name__ == "__main__": 35 | unittest.main() 36 | -------------------------------------------------------------------------------- /alphamind/tests/data/test_rank.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-8-8 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.data.rank import rank 14 | 15 | 16 | class TestRank(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.x = np.random.randn(1000, 1) 20 | self.groups = np.random.randint(0, 10, 1000) 21 | 22 | def test_rank(self): 23 | data_rank = rank(self.x) 24 | 25 | sorted_array = np.zeros_like(self.x) 26 | for i in range(self.x.shape[0]): 27 | for j in range(self.x.shape[1]): 28 | sorted_array[int(data_rank[i, j]), j] = self.x[i, j] 29 | 30 | arr_diff = np.diff(sorted_array, axis=0) 31 | np.testing.assert_array_less(0, arr_diff) 32 | 33 | def test_rank_with_groups(self): 34 | data = pd.DataFrame(data={'raw': self.x.tolist()}, index=self.groups) 35 | data['rank'] = rank(data['raw'].values, groups=data.index) 36 | groups = dict(list(data['rank'].groupby(level=0))) 37 | ret = [] 38 | for index in range(10): 39 | ret.append(groups[index].values) 40 | ret = np.concatenate(ret).reshape(-1, 1) 41 | 42 | expected_rank = data['raw'].groupby(level=0).apply( 43 | lambda x: x.values.argsort(axis=0).argsort(axis=0)) 44 | expected_rank = np.concatenate(expected_rank).reshape(-1, 1) 45 | np.testing.assert_array_equal(ret, expected_rank) 46 | -------------------------------------------------------------------------------- /alphamind/tests/data/test_standardize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | from scipy.stats import zscore 13 | 14 | from alphamind.data.standardize import Standardizer 15 | from alphamind.data.standardize import projection 16 | from alphamind.data.standardize import standardize 17 | 18 | 19 | class TestStandardize(unittest.TestCase): 20 | 21 | def setUp(self): 22 | self.x = np.random.randn(3000, 10) 23 | self.groups = np.random.randint(10, 30, size=3000) 24 | 25 | def test_standardize(self): 26 | calc_zscore = standardize(self.x) 27 | exp_zscore = zscore(self.x, ddof=1) 28 | 29 | np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) 30 | 31 | def test_projection(self): 32 | calc_projected = projection(self.x) 33 | exp_projected = self.x / np.sqrt(np.sum(np.square(self.x), axis=1).reshape((-1, 1))) 34 | 35 | np.testing.assert_array_almost_equal(calc_projected, exp_projected) 36 | 37 | def test_projection_with_groups(self): 38 | calc_projected = projection(self.x, self.groups, axis=0) 39 | exp_projected = pd.DataFrame(self.x).groupby( 40 | self.groups 41 | ).transform(lambda s: s / np.sqrt(np.square(s).sum(axis=0))) 42 | 43 | np.testing.assert_array_almost_equal(calc_projected, exp_projected) 44 | 45 | def test_standardize_with_group(self): 46 | calc_zscore = standardize(self.x, self.groups) 47 | exp_zscore = pd.DataFrame(self.x). \ 48 | groupby(self.groups). \ 49 | transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1)) 50 | np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) 51 | 52 | def test_standardizer(self): 53 | s = Standardizer() 54 | s.fit(self.x) 55 | calc_zscore = s.transform(self.x) 56 | 57 | exp_zscore = standardize(self.x) 58 | np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) 59 | np.testing.assert_array_almost_equal(s(self.x), exp_zscore) 60 | 61 | def test_grouped_standardizer(self): 62 | s = Standardizer() 63 | s.fit(self.x, self.groups) 64 | calc_zscore = s.transform(self.x, self.groups) 65 | 66 | exp_zscore = standardize(self.x, self.groups) 67 | np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) 68 | np.testing.assert_array_almost_equal(s(self.x, self.groups), exp_zscore) 69 | 70 | 71 | if __name__ == '__main__': 72 | import datetime as dt 73 | from sklearn.preprocessing import StandardScaler 74 | 75 | x = np.random.randn(1000, 2) 76 | y = np.random.randn(50, 2) 77 | 78 | start = dt.datetime.now() 79 | for i in range(10000): 80 | s1 = StandardScaler() 81 | s1.fit(x) 82 | x1 = s1.transform(y) 83 | print(dt.datetime.now() - start) 84 | 85 | start = dt.datetime.now() 86 | for i in range(10000): 87 | s2 = Standardizer(ddof=0) 88 | s2.fit(x) 89 | x2 = s2.transform(y) 90 | print(dt.datetime.now() - start) 91 | 92 | np.testing.assert_array_almost_equal(x1, x2) 93 | -------------------------------------------------------------------------------- /alphamind/tests/data/test_winsorize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.data.winsorize import NormalWinsorizer 14 | from alphamind.data.winsorize import winsorize_normal 15 | 16 | 17 | class TestWinsorize(unittest.TestCase): 18 | 19 | def setUp(self): 20 | np.random.seed(10) 21 | self.x = np.random.randn(3000, 10) 22 | self.groups = np.random.randint(10, 30, size=3000) 23 | self.num_stds = 2 24 | 25 | def test_winsorize_normal(self): 26 | calc_winsorized = winsorize_normal(self.x, self.num_stds) 27 | 28 | std_values = self.x.std(axis=0, ddof=1) 29 | mean_value = self.x.mean(axis=0) 30 | 31 | lower_bound = mean_value - self.num_stds * std_values 32 | upper_bound = mean_value + self.num_stds * std_values 33 | 34 | for i in range(np.size(calc_winsorized, 1)): 35 | col_data = self.x[:, i] 36 | col_data[col_data > upper_bound[i]] = upper_bound[i] 37 | col_data[col_data < lower_bound[i]] = lower_bound[i] 38 | 39 | calculated_col = calc_winsorized[:, i] 40 | np.testing.assert_array_almost_equal(col_data, calculated_col) 41 | 42 | def test_winsorize_normal_with_interp(self): 43 | calc_winsorized = winsorize_normal(self.x, self.num_stds, method='interp') 44 | 45 | std_values = self.x.std(axis=0, ddof=1) 46 | mean_value = self.x.mean(axis=0) 47 | 48 | lower_bound = mean_value - self.num_stds * std_values 49 | upper_bound = mean_value + self.num_stds * std_values 50 | 51 | for i in range(np.size(calc_winsorized, 1)): 52 | col_data = self.x[:, i].copy() 53 | 54 | idx = col_data > upper_bound[i] 55 | u_values = col_data[idx] 56 | q_values = u_values.argsort().argsort() 57 | if len(q_values) > 0: 58 | col_data[idx] = upper_bound[i] + q_values / len(q_values) * 0.5 * std_values[i] 59 | 60 | idx = col_data < lower_bound[i] 61 | l_values = col_data[idx] 62 | q_values = (-l_values).argsort().argsort() 63 | if len(q_values) > 0: 64 | col_data[idx] = lower_bound[i] - q_values / len(q_values) * 0.5 * std_values[i] 65 | 66 | calculated_col = calc_winsorized[:, i] 67 | np.testing.assert_array_almost_equal(col_data, calculated_col) 68 | 69 | def test_winsorize_normal_with_group(self): 70 | cal_winsorized = winsorize_normal(self.x, self.num_stds, groups=self.groups) 71 | 72 | def impl(x): 73 | std_values = x.std(axis=0, ddof=1) 74 | mean_value = x.mean(axis=0) 75 | 76 | lower_bound = mean_value - self.num_stds * std_values 77 | upper_bound = mean_value + self.num_stds * std_values 78 | 79 | res = np.where(x > upper_bound, upper_bound, x) 80 | res = np.where(res < lower_bound, lower_bound, res) 81 | return res 82 | 83 | exp_winsorized = pd.DataFrame(self.x).groupby(self.groups).transform(impl).values 84 | np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized) 85 | 86 | def test_winsorize_normal_with_group_and_interp(self): 87 | cal_winsorized = winsorize_normal(self.x, self.num_stds, groups=self.groups, 88 | method='interp') 89 | 90 | def impl(x): 91 | x = x.values 92 | std_values = x.std(axis=0, ddof=1) 93 | mean_value = x.mean(axis=0) 94 | 95 | lower_bound = mean_value - self.num_stds * std_values 96 | upper_bound = mean_value + self.num_stds * std_values 97 | 98 | col_data = x.copy() 99 | 100 | idx = col_data > upper_bound 101 | u_values = col_data[idx] 102 | q_values = u_values.argsort().argsort() 103 | if len(q_values) > 0: 104 | col_data[idx] = upper_bound + q_values / len(q_values) * 0.5 * std_values 105 | 106 | idx = col_data < lower_bound 107 | l_values = col_data[idx] 108 | q_values = (-l_values).argsort().argsort() 109 | if len(q_values) > 0: 110 | col_data[idx] = lower_bound - q_values / len(q_values) * 0.5 * std_values 111 | return col_data 112 | 113 | exp_winsorized = pd.DataFrame(self.x).groupby(self.groups).transform(impl).values 114 | np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized) 115 | 116 | def test_normal_winsorizer(self): 117 | s = NormalWinsorizer(num_stds=self.num_stds) 118 | s.fit(self.x) 119 | calc_winsorized1 = s.transform(self.x) 120 | calc_winsorized2 = s(self.x) 121 | 122 | std_values = self.x.std(axis=0, ddof=1) 123 | mean_value = self.x.mean(axis=0) 124 | 125 | lower_bound = mean_value - self.num_stds * std_values 126 | upper_bound = mean_value + self.num_stds * std_values 127 | 128 | for i in range(np.size(calc_winsorized1, 1)): 129 | col_data = self.x[:, i] 130 | col_data[col_data > upper_bound[i]] = upper_bound[i] 131 | col_data[col_data < lower_bound[i]] = lower_bound[i] 132 | 133 | calculated_col = calc_winsorized1[:, i] 134 | np.testing.assert_array_almost_equal(col_data, calculated_col) 135 | calculated_col = calc_winsorized2[:, i] 136 | np.testing.assert_array_almost_equal(col_data, calculated_col) 137 | 138 | 139 | if __name__ == "__main__": 140 | unittest.main() 141 | -------------------------------------------------------------------------------- /alphamind/tests/execution/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/execution/__init__.py -------------------------------------------------------------------------------- /alphamind/tests/execution/test_naiveexecutor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-22 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import pandas as pd 11 | 12 | from alphamind.execution.naiveexecutor import NaiveExecutor 13 | 14 | 15 | class TestNaiveExecutor(unittest.TestCase): 16 | 17 | def test_naive_executor(self): 18 | target_pos = pd.DataFrame({'code': [1, 2, 3], 19 | 'weight': [0.2, 0.3, 0.5], 20 | 'industry': ['a', 'b', 'c']}) 21 | 22 | # 1st round 23 | executor = NaiveExecutor() 24 | turn_over, executed_pos = executor.execute(target_pos) 25 | executor.set_current(executed_pos) 26 | self.assertAlmostEqual(turn_over, 1.0) 27 | 28 | # 2nd round 29 | target_pos = pd.DataFrame({'code': [1, 2, 4], 30 | 'weight': [0.3, 0.2, 0.5], 31 | 'industry': ['a', 'b', 'd']}) 32 | 33 | turn_over, executed_pos = executor.execute(target_pos) 34 | executor.set_current(executed_pos) 35 | self.assertAlmostEqual(turn_over, 1.2) 36 | 37 | # 3rd round 38 | target_pos = pd.DataFrame({'code': [1, 3, 4], 39 | 'weight': [0.3, 0.2, 0.5], 40 | 'industry': ['a', 'c', 'd']}) 41 | turn_over, executed_pos = executor.execute(target_pos) 42 | executor.set_current(executed_pos) 43 | self.assertAlmostEqual(turn_over, 0.4) 44 | 45 | 46 | if __name__ == '__main__': 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /alphamind/tests/execution/test_pipeline.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | from collections import deque 10 | 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from alphamind.execution.pipeline import ExecutionPipeline 15 | from alphamind.execution.targetvolexecutor import TargetVolExecutor 16 | from alphamind.execution.thresholdexecutor import ThresholdExecutor 17 | 18 | 19 | class TestExecutionPipeline(unittest.TestCase): 20 | 21 | def test_execution_pipeline(self): 22 | n = 100 23 | window = 60 24 | target_vol = 0.01 25 | turn_over_threshold = 0.5 26 | 27 | executor1 = TargetVolExecutor(window=window, target_vol=target_vol) 28 | executor2 = ThresholdExecutor(turn_over_threshold=turn_over_threshold) 29 | 30 | execution_pipeline = ExecutionPipeline(executors=[executor1, executor2]) 31 | 32 | return_1 = np.random.randn(2000, n) * 0.05 33 | return_2 = np.random.randn(2000, n) * 0.2 34 | return_total = np.concatenate((return_1, return_2)) 35 | codes = np.array(list(range(n))) 36 | 37 | ret_deq = deque(maxlen=window) 38 | 39 | for i, row in enumerate(return_total): 40 | weights = np.random.randint(0, 100, n) 41 | weights = weights / weights.sum() 42 | pos = pd.DataFrame({'code': codes, 'weight': weights}) 43 | turn_over, executed_pos = execution_pipeline.execute(pos) 44 | daily_return = row @ executed_pos.weight.values.flatten() 45 | data_dict = {'return': daily_return} 46 | execution_pipeline.update(data_dict=data_dict) 47 | ret_deq.append(daily_return) 48 | -------------------------------------------------------------------------------- /alphamind/tests/execution/test_targetvolexecutor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-22 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | from collections import deque 10 | 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from alphamind.execution.targetvolexecutor import TargetVolExecutor 15 | 16 | 17 | class TestTargetVolExecutor(unittest.TestCase): 18 | 19 | def test_target_vol_executor(self): 20 | n = 100 21 | window = 30 22 | target_vol = 0.01 23 | 24 | executor = TargetVolExecutor(window=window, target_vol=target_vol) 25 | 26 | return_1 = np.random.randn(2000, n) * 0.05 27 | return_2 = np.random.randn(2000, n) * 0.2 28 | return_total = np.concatenate((return_1, return_2)) 29 | 30 | weights = np.ones(n) / n 31 | codes = np.array(list(range(n))) 32 | 33 | ret_deq = deque(maxlen=window) 34 | 35 | for i, row in enumerate(return_total): 36 | pos = pd.DataFrame({'code': codes, 'weight': weights}) 37 | turn_over, executed_pos = executor.execute(pos) 38 | 39 | if i >= window: 40 | c_vol = np.std(ret_deq, ddof=1) 41 | executed_pos.equals(pos * target_vol / c_vol) 42 | else: 43 | executed_pos.equals(pos) 44 | 45 | executor.set_current(executed_pos) 46 | daily_return = row @ weights 47 | data_dict = {'return': daily_return} 48 | executor.update(data_dict=data_dict) 49 | ret_deq.append(daily_return) 50 | 51 | 52 | if __name__ == '__main__': 53 | unittest.main() 54 | -------------------------------------------------------------------------------- /alphamind/tests/execution/test_thresholdexecutor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-22 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import pandas as pd 11 | 12 | from alphamind.execution.thresholdexecutor import ThresholdExecutor 13 | 14 | 15 | class TestThresholdExecutor(unittest.TestCase): 16 | 17 | def test_threshold_executor(self): 18 | target_pos = pd.DataFrame({'code': [1, 2, 3], 19 | 'weight': [0.2, 0.3, 0.5], 20 | 'industry': ['a', 'b', 'c']}) 21 | 22 | executor = ThresholdExecutor(turn_over_threshold=0.5) 23 | 24 | # 1st round 25 | turn_over, executed_pos = executor.execute(target_pos) 26 | executor.set_current(executed_pos) 27 | self.assertTrue(target_pos.equals(executed_pos)) 28 | self.assertAlmostEqual(turn_over, target_pos.weight.sum()) 29 | 30 | # 2nd round 31 | target_pos = pd.DataFrame({'code': [1, 2, 4], 32 | 'weight': [0.3, 0.2, 0.5], 33 | 'industry': ['a', 'b', 'd']}) 34 | 35 | turn_over, executed_pos = executor.execute(target_pos) 36 | executor.set_current(executed_pos) 37 | self.assertTrue(target_pos.equals(executed_pos)) 38 | self.assertTrue(executed_pos.equals(executor.current_pos)) 39 | self.assertAlmostEqual(turn_over, 1.2) 40 | 41 | # 3rd round 42 | target_pos = pd.DataFrame({'code': [1, 3, 4], 43 | 'weight': [0.3, 0.2, 0.5], 44 | 'industry': ['a', 'c', 'd']}) 45 | turn_over, executed_pos2 = executor.execute(target_pos) 46 | executor.set_current(executed_pos2) 47 | self.assertTrue(executed_pos.equals(executed_pos2)) 48 | self.assertAlmostEqual(turn_over, 0.) 49 | 50 | 51 | if __name__ == '__main__': 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /alphamind/tests/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/model/__init__.py -------------------------------------------------------------------------------- /alphamind/tests/model/test_composer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-2-9 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | from alphamind.data.engines.universe import Universe 11 | from alphamind.model.composer import Composer 12 | from alphamind.model.composer import DataMeta 13 | from alphamind.model.treemodel import XGBClassifier 14 | 15 | 16 | class TestComposer(unittest.TestCase): 17 | 18 | def _assert_composer_equal(self, lhs: Composer, rhs: Composer): 19 | self.assertEqual(type(lhs.alpha_model), type(rhs.alpha_model)) 20 | self.assertEqual(lhs.data_meta, rhs.data_meta) 21 | 22 | def test_data_meta_persistence(self): 23 | freq = '5b' 24 | universe = Universe('zz800') 25 | batch = 4 26 | neutralized_risk = ['SIZE'] 27 | risk_model = 'long' 28 | pre_process = ['standardize', 'winsorize_normal'] 29 | post_process = ['standardize', 'winsorize_normal'] 30 | warm_start = 2 31 | data_source = 'postgresql://user:pwd@server/dummy' 32 | 33 | data_meta = DataMeta(freq=freq, 34 | universe=universe, 35 | batch=batch, 36 | neutralized_risk=neutralized_risk, 37 | risk_model=risk_model, 38 | pre_process=pre_process, 39 | post_process=post_process, 40 | warm_start=warm_start, 41 | data_source=data_source) 42 | 43 | data_desc = data_meta.save() 44 | 45 | loaded_data = DataMeta.load(data_desc) 46 | self.assertEqual(data_meta.freq, loaded_data.freq) 47 | self.assertEqual(data_meta.universe, loaded_data.universe) 48 | self.assertEqual(data_meta.batch, loaded_data.batch) 49 | self.assertEqual(data_meta.neutralized_risk, loaded_data.neutralized_risk) 50 | self.assertEqual(data_meta.risk_model, loaded_data.risk_model) 51 | self.assertEqual(data_meta.pre_process, loaded_data.pre_process) 52 | self.assertEqual(data_meta.post_process, loaded_data.post_process) 53 | self.assertEqual(data_meta.warm_start, loaded_data.warm_start) 54 | self.assertEqual(data_meta.data_source, loaded_data.data_source) 55 | 56 | def test_composer_persistence(self): 57 | freq = '5b' 58 | universe = Universe('zz800') 59 | batch = 4 60 | neutralized_risk = ['SIZE'] 61 | risk_model = 'long' 62 | pre_process = ['standardize', 'winsorize_normal'] 63 | post_process = ['standardize', 'winsorize_normal'] 64 | warm_start = 2 65 | data_source = 'postgresql://user:pwd@server/dummy' 66 | 67 | data_meta = DataMeta(freq=freq, 68 | universe=universe, 69 | batch=batch, 70 | neutralized_risk=neutralized_risk, 71 | risk_model=risk_model, 72 | pre_process=pre_process, 73 | post_process=post_process, 74 | warm_start=warm_start, 75 | data_source=data_source) 76 | 77 | features = {'f1': 'closePrice', 'f2': 'openPrice'} 78 | alpha_model = XGBClassifier(features=features) 79 | 80 | composer = Composer(alpha_model=alpha_model, 81 | data_meta=data_meta) 82 | 83 | comp_desc = composer.save() 84 | loaded_comp = Composer.load(comp_desc) 85 | self._assert_composer_equal(composer, loaded_comp) 86 | -------------------------------------------------------------------------------- /alphamind/tests/model/test_linearmodel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-4 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | from sklearn.linear_model import LinearRegression as LinearRegression2 13 | from sklearn.linear_model import LogisticRegression as LogisticRegression2 14 | 15 | from alphamind.model.linearmodel import ConstLinearModel 16 | from alphamind.model.linearmodel import LinearRegression 17 | from alphamind.model.linearmodel import LogisticRegression 18 | from alphamind.model.loader import load_model 19 | 20 | 21 | class TestLinearModel(unittest.TestCase): 22 | 23 | def setUp(self): 24 | self.n = 3 25 | self.features = ['a', 'b', 'c'] 26 | self.train_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c']) 27 | self.train_y = np.random.randn(1000) 28 | self.train_y_label = np.where(self.train_y > 0., 1, 0) 29 | self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c']) 30 | 31 | def test_const_linear_model(self): 32 | features = ['c', 'b', 'a'] 33 | weights = dict(c=3., b=2., a=1.) 34 | model = ConstLinearModel(features=features, 35 | weights=weights) 36 | 37 | calculated_y = model.predict(self.predict_x) 38 | expected_y = self.predict_x[features] @ np.array([weights[f] for f in features]) 39 | np.testing.assert_array_almost_equal(calculated_y, expected_y) 40 | 41 | def test_const_linear_model_persistence(self): 42 | weights = dict(c=3., b=2., a=1.) 43 | model = ConstLinearModel(features=['a', 'b', 'c'], 44 | weights=weights) 45 | 46 | desc = model.save() 47 | new_model = load_model(desc) 48 | 49 | self.assertEqual(model.features, new_model.features) 50 | np.testing.assert_array_almost_equal(model.weights, new_model.weights) 51 | 52 | def test_const_linear_model_score(self): 53 | model = LinearRegression(['a', 'b', 'c'], fit_intercept=False) 54 | model.fit(self.train_x, self.train_y) 55 | 56 | expected_score = model.score(self.train_x, self.train_y) 57 | 58 | const_model = ConstLinearModel(features=['a', 'b', 'c'], 59 | weights=dict(zip(model.features, model.weights))) 60 | calculated_score = const_model.score(self.train_x, self.train_y) 61 | 62 | self.assertAlmostEqual(expected_score, calculated_score) 63 | 64 | def test_linear_regression(self): 65 | model = LinearRegression(['a', 'b', 'c'], fit_intercept=False) 66 | model.fit(self.train_x, self.train_y) 67 | 68 | calculated_y = model.predict(self.predict_x) 69 | 70 | expected_model = LinearRegression2(fit_intercept=False) 71 | expected_model.fit(self.train_x, self.train_y) 72 | expected_y = expected_model.predict(self.predict_x) 73 | 74 | np.testing.assert_array_almost_equal(calculated_y, expected_y) 75 | np.testing.assert_array_almost_equal(expected_model.coef_, model.weights) 76 | 77 | def test_linear_regression_persistence(self): 78 | model = LinearRegression(['a', 'b', 'c'], fit_intercept=False) 79 | model.fit(self.train_x, self.train_y) 80 | 81 | desc = model.save() 82 | new_model = load_model(desc) 83 | 84 | calculated_y = new_model.predict(self.predict_x) 85 | expected_y = model.predict(self.predict_x) 86 | 87 | np.testing.assert_array_almost_equal(calculated_y, expected_y) 88 | np.testing.assert_array_almost_equal(new_model.weights, model.weights) 89 | 90 | def test_logistic_regression(self): 91 | model = LogisticRegression(['a', 'b', 'c'], fit_intercept=False) 92 | model.fit(self.train_x, self.train_y_label) 93 | 94 | calculated_y = model.predict(self.predict_x) 95 | 96 | expected_model = LogisticRegression2(fit_intercept=False) 97 | expected_model.fit(self.train_x, self.train_y_label) 98 | expected_y = expected_model.predict(self.predict_x) 99 | 100 | np.testing.assert_array_equal(calculated_y, expected_y) 101 | np.testing.assert_array_almost_equal(expected_model.coef_, model.weights) 102 | 103 | def test_logistic_regression_persistence(self): 104 | model = LinearRegression(['a', 'b', 'c'], fit_intercept=False) 105 | model.fit(self.train_x, self.train_y_label) 106 | 107 | desc = model.save() 108 | new_model = load_model(desc) 109 | 110 | calculated_y = new_model.predict(self.predict_x) 111 | expected_y = model.predict(self.predict_x) 112 | 113 | np.testing.assert_array_almost_equal(calculated_y, expected_y) 114 | np.testing.assert_array_almost_equal(new_model.weights, model.weights) 115 | -------------------------------------------------------------------------------- /alphamind/tests/model/test_loader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-9-5 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.model.linearmodel import LinearRegression 14 | from alphamind.model.loader import load_model 15 | 16 | 17 | class TestLoader(unittest.TestCase): 18 | 19 | def setUp(self): 20 | self.n = 3 21 | self.trained_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c']) 22 | self.trained_y = np.random.randn(1000, 1) 23 | 24 | self.predict_x = pd.DataFrame(np.random.randn(100, self.n), columns=['a', 'b', 'c']) 25 | 26 | def test_load_model(self): 27 | model = LinearRegression(['a', 'b', 'c']) 28 | model.fit(self.trained_x, self.trained_y) 29 | 30 | model_desc = model.save() 31 | new_model = load_model(model_desc) 32 | 33 | np.testing.assert_array_almost_equal(model.predict(self.predict_x), 34 | new_model.predict(self.predict_x)) 35 | 36 | self.assertEqual(model.features, new_model.features) 37 | self.assertEqual(model.trained_time, new_model.trained_time) 38 | -------------------------------------------------------------------------------- /alphamind/tests/model/test_modelbase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-2-8 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | from alphamind.model.linearmodel import ConstLinearModel 11 | 12 | 13 | class TestModelBase(unittest.TestCase): 14 | 15 | def test_simple_model_features(self): 16 | model = ConstLinearModel(features=['c', 'b', 'a']) 17 | self.assertListEqual(['a', 'b', 'c'], model.features) 18 | -------------------------------------------------------------------------------- /alphamind/tests/model/test_treemodel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-1-5 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.model.loader import load_model 14 | from alphamind.model.treemodel import RandomForestClassifier 15 | from alphamind.model.treemodel import RandomForestRegressor 16 | from alphamind.model.treemodel import XGBClassifier 17 | from alphamind.model.treemodel import XGBRegressor 18 | from alphamind.model.treemodel import XGBTrainer 19 | 20 | 21 | class TestTreeModel(unittest.TestCase): 22 | 23 | def setUp(self): 24 | self.features = list('0123456789') 25 | self.x = pd.DataFrame(np.random.randn(1000, 10), columns=self.features) 26 | self.y = np.random.randn(1000) 27 | self.sample_x = pd.DataFrame(np.random.randn(100, 10), columns=self.features) 28 | 29 | def test_random_forest_regress_persistence(self): 30 | model = RandomForestRegressor(features=self.features) 31 | model.fit(self.x, self.y) 32 | 33 | desc = model.save() 34 | new_model = load_model(desc) 35 | self.assertEqual(model.features, new_model.features) 36 | 37 | np.testing.assert_array_almost_equal(model.predict(self.sample_x), 38 | new_model.predict(self.sample_x)) 39 | np.testing.assert_array_almost_equal(model.importances, new_model.importances) 40 | 41 | def test_random_forest_classify_persistence(self): 42 | model = RandomForestClassifier(features=self.features) 43 | y = np.where(self.y > 0, 1, 0) 44 | model.fit(self.x, y) 45 | 46 | desc = model.save() 47 | new_model = load_model(desc) 48 | self.assertEqual(model.features, new_model.features) 49 | 50 | np.testing.assert_array_almost_equal(model.predict(self.sample_x), 51 | new_model.predict(self.sample_x)) 52 | np.testing.assert_array_almost_equal(model.importances, new_model.importances) 53 | 54 | def test_xgb_regress_persistence(self): 55 | model = XGBRegressor(features=self.features) 56 | model.fit(self.x, self.y) 57 | 58 | desc = model.save() 59 | new_model = load_model(desc) 60 | self.assertEqual(model.features, new_model.features) 61 | 62 | np.testing.assert_array_almost_equal(model.predict(self.sample_x), 63 | new_model.predict(self.sample_x)) 64 | np.testing.assert_array_almost_equal(model.importances, new_model.importances) 65 | 66 | def test_xgb_classify_persistence(self): 67 | model = XGBClassifier(features=self.features) 68 | y = np.where(self.y > 0, 1, 0) 69 | model.fit(self.x, y) 70 | 71 | desc = model.save() 72 | new_model = load_model(desc) 73 | self.assertEqual(model.features, new_model.features) 74 | 75 | np.testing.assert_array_almost_equal(model.predict(self.sample_x), 76 | new_model.predict(self.sample_x)) 77 | np.testing.assert_array_almost_equal(model.importances, new_model.importances) 78 | 79 | def test_xgb_trainer_equal_classifier(self): 80 | model1 = XGBClassifier(n_estimators=100, 81 | learning_rate=0.1, 82 | max_depth=3, 83 | features=self.features, 84 | random_state=42) 85 | 86 | model2 = XGBTrainer(features=self.features, 87 | objective='reg:logistic', 88 | booster='gbtree', 89 | tree_method='exact', 90 | n_estimators=100, 91 | learning_rate=0.1, 92 | max_depth=3, 93 | random_state=42) 94 | 95 | y = np.where(self.y > 0, 1, 0) 96 | model1.fit(self.x, y) 97 | model2.fit(self.x, y) 98 | 99 | predict1 = model1.predict(self.sample_x) 100 | predict2 = model2.predict(self.sample_x) 101 | predict2 = np.where(predict2 > 0.5, 1., 0.) 102 | np.testing.assert_array_almost_equal(predict1, predict2) 103 | 104 | def test_xgb_trainer_persistence(self): 105 | model = XGBTrainer(features=self.features, 106 | objective='binary:logistic', 107 | booster='gbtree', 108 | tree_method='hist', 109 | n_estimators=200) 110 | y = np.where(self.y > 0, 1, 0) 111 | model.fit(self.x, y) 112 | 113 | desc = model.save() 114 | new_model = load_model(desc) 115 | self.assertEqual(model.features, new_model.features) 116 | 117 | np.testing.assert_array_almost_equal(model.predict(self.sample_x), 118 | new_model.predict(self.sample_x)) 119 | np.testing.assert_array_almost_equal(model.importances, new_model.importances) 120 | -------------------------------------------------------------------------------- /alphamind/tests/portfolio/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-27 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/tests/portfolio/test_evolver.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-11-23 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | 12 | from alphamind.portfolio.evolver import evolve_positions 13 | 14 | 15 | class TestEvolver(unittest.TestCase): 16 | 17 | def test_evolve_positions_with_all_positive_position(self): 18 | positions = np.array([0.2, 0.2, 0.8]) 19 | dx_returns = np.array([0.06, 0.04, -0.10]) 20 | 21 | simple_return = np.exp(dx_returns) 22 | curr_pos = positions * simple_return 23 | expected_pos = curr_pos / curr_pos.sum() * positions.sum() 24 | 25 | calculated_pos = evolve_positions(positions, dx_returns) 26 | 27 | np.testing.assert_array_almost_equal(expected_pos, calculated_pos) 28 | 29 | def test_evolve_positions_with_negative_position(self): 30 | positions = np.array([0.2, 0.3, -0.8]) 31 | dx_returns = np.array([0.06, 0.04, -0.10]) 32 | 33 | simple_return = np.exp(dx_returns) 34 | curr_pos = positions * simple_return 35 | expected_pos = curr_pos / np.abs(curr_pos).sum() * np.abs(positions).sum() 36 | 37 | calculated_pos = evolve_positions(positions, dx_returns) 38 | 39 | np.testing.assert_array_almost_equal(expected_pos, calculated_pos) 40 | -------------------------------------------------------------------------------- /alphamind/tests/portfolio/test_linearbuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-5 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | 12 | from alphamind.portfolio.linearbuilder import linear_builder 13 | 14 | 15 | class TestLinearBuild(unittest.TestCase): 16 | def setUp(self): 17 | self.er = np.random.randn(3000) 18 | self.risk_exp = np.random.randn(3000, 30) 19 | self.risk_exp = np.concatenate([self.risk_exp, np.ones((3000, 1))], axis=1) 20 | self.bm = np.random.randint(100, size=3000).astype(float) 21 | self.current_pos = np.random.randint(0, 100, size=3000) 22 | self.current_pos = self.current_pos / self.current_pos.sum() 23 | 24 | def test_linear_build(self): 25 | bm = self.bm / self.bm.sum() 26 | eplson = 1e-6 27 | 28 | status, _, w = linear_builder(self.er, 29 | 0., 30 | 0.01, 31 | self.risk_exp, 32 | (bm @ self.risk_exp, bm @ self.risk_exp)) 33 | self.assertEqual(status, 'optimal') 34 | self.assertAlmostEqual(np.sum(w), 1.) 35 | self.assertTrue(np.all(w <= 0.01 + eplson)) 36 | self.assertTrue(np.all(w >= -eplson)) 37 | 38 | calc_risk = (w - bm) @ self.risk_exp 39 | expected_risk = np.zeros(self.risk_exp.shape[1]) 40 | np.testing.assert_array_almost_equal(calc_risk, expected_risk) 41 | 42 | def test_linear_build_with_interior(self): 43 | bm = self.bm / self.bm.sum() 44 | eplson = 1e-6 45 | 46 | status, _, w = linear_builder(self.er, 47 | 0., 48 | 0.01, 49 | self.risk_exp, 50 | (bm @ self.risk_exp, bm @ self.risk_exp), 51 | method='interior') 52 | self.assertEqual(status, 'optimal') 53 | self.assertAlmostEqual(np.sum(w), 1.) 54 | self.assertTrue(np.all(w <= 0.01 + eplson)) 55 | self.assertTrue(np.all(w >= -eplson)) 56 | 57 | calc_risk = (w - bm) @ self.risk_exp 58 | expected_risk = np.zeros(self.risk_exp.shape[1]) 59 | np.testing.assert_array_almost_equal(calc_risk, expected_risk) 60 | 61 | def test_linear_build_with_inequality_constraints(self): 62 | bm = self.bm / self.bm.sum() 63 | eplson = 1e-6 64 | 65 | risk_lbound = bm @ self.risk_exp 66 | risk_ubound = bm @ self.risk_exp 67 | 68 | risk_tolerance = 0.01 * np.abs(risk_lbound[:-1]) 69 | 70 | risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance 71 | risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance 72 | 73 | status, _, w = linear_builder(self.er, 74 | 0., 75 | 0.01, 76 | self.risk_exp, 77 | risk_target=(risk_lbound, risk_ubound)) 78 | self.assertEqual(status, 'optimal') 79 | self.assertAlmostEqual(np.sum(w), 1.) 80 | self.assertTrue(np.all(w <= 0.01 + eplson)) 81 | self.assertTrue(np.all(w >= -eplson)) 82 | 83 | calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp) 84 | self.assertTrue(np.all(np.abs(calc_risk) <= 1.01e-2)) 85 | 86 | def test_linear_build_with_to_constraint(self): 87 | bm = self.bm / self.bm.sum() 88 | eplson = 1e-6 89 | turn_over_target = 0.1 90 | 91 | risk_lbound = bm @ self.risk_exp 92 | risk_ubound = bm @ self.risk_exp 93 | 94 | risk_tolerance = 0.01 * np.abs(risk_lbound[:-1]) 95 | 96 | risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance 97 | risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance 98 | 99 | status, _, w = linear_builder(self.er, 100 | 0., 101 | 0.01, 102 | self.risk_exp, 103 | risk_target=(risk_lbound, risk_ubound), 104 | turn_over_target=turn_over_target, 105 | current_position=self.current_pos) 106 | self.assertEqual(status, 'optimal') 107 | self.assertAlmostEqual(np.sum(w), 1.) 108 | self.assertTrue(np.all(w <= 0.01 + eplson)) 109 | self.assertTrue(np.all(w >= -eplson)) 110 | self.assertAlmostEqual(np.abs(w - self.current_pos).sum(), turn_over_target) 111 | 112 | calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp) 113 | self.assertTrue(np.all(np.abs(calc_risk) <= 1.0001e-2)) 114 | 115 | def test_linear_build_with_to_constraint_with_ecos(self): 116 | bm = self.bm / self.bm.sum() 117 | eplson = 1e-6 118 | turn_over_target = 0.1 119 | 120 | risk_lbound = bm @ self.risk_exp 121 | risk_ubound = bm @ self.risk_exp 122 | 123 | risk_tolerance = 0.01 * np.abs(risk_lbound[:-1]) 124 | 125 | risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance 126 | risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance 127 | 128 | status, _, w = linear_builder(self.er, 129 | 0., 130 | 0.01, 131 | self.risk_exp, 132 | risk_target=(risk_lbound, risk_ubound), 133 | turn_over_target=turn_over_target, 134 | current_position=self.current_pos, 135 | method='ecos') 136 | self.assertEqual(status, 'optimal') 137 | self.assertAlmostEqual(np.sum(w), 1.) 138 | self.assertTrue(np.all(w <= 0.01 + eplson)) 139 | self.assertTrue(np.all(w >= -eplson)) 140 | self.assertAlmostEqual(np.abs(w - self.current_pos).sum(), turn_over_target) 141 | 142 | calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp) 143 | self.assertTrue(np.all(np.abs(calc_risk) <= 1.0001e-2)) 144 | 145 | 146 | if __name__ == '__main__': 147 | unittest.main() 148 | -------------------------------------------------------------------------------- /alphamind/tests/portfolio/test_longshortbuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-9 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.portfolio.longshortbulder import long_short_builder 14 | 15 | 16 | class TestLongShortBuild(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.x = np.random.randn(3000, 10) 20 | self.groups = np.random.randint(10, 40, size=3000) 21 | choices = np.random.choice(3000, 100, replace=False) 22 | self.masks = np.full(3000, False, dtype=bool) 23 | self.masks[choices] = True 24 | 25 | def test_long_short_build(self): 26 | x = self.x[:, 0].flatten() 27 | calc_weights = long_short_builder(x).flatten() 28 | expected_weights = x / np.abs(x).sum() 29 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 30 | 31 | calc_weights = long_short_builder(self.x, leverage=2) 32 | expected_weights = self.x / np.abs(self.x).sum(axis=0) * 2 33 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 34 | 35 | def test_long_short_build_with_group(self): 36 | x = self.x[:, 0].flatten() 37 | calc_weights = long_short_builder(x, groups=self.groups).flatten() 38 | expected_weights = pd.Series(x).groupby(self.groups).apply(lambda s: s / np.abs(s).sum()) 39 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 40 | 41 | calc_weights = long_short_builder(self.x, groups=self.groups) 42 | expected_weights = pd.DataFrame(self.x).groupby(self.groups).apply( 43 | lambda s: s / np.abs(s).sum(axis=0)) 44 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 45 | 46 | def test_long_short_build_with_masks(self): 47 | x = self.x[:, 0].flatten() 48 | calc_weights = long_short_builder(x, masks=self.masks, leverage=1.).flatten() 49 | self.assertAlmostEqual(calc_weights.sum(), 0.) 50 | 51 | masked_x = x.copy() 52 | masked_x[self.masks] = 0. 53 | masked_x[~self.masks] = masked_x[~self.masks] - masked_x[~self.masks].mean() 54 | expected_weights = masked_x / np.abs(masked_x).sum() 55 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 56 | 57 | 58 | if __name__ == '__main__': 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /alphamind/tests/portfolio/test_meanvariancebuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-6-27 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | 12 | from alphamind.portfolio.meanvariancebuilder import mean_variance_builder 13 | from alphamind.portfolio.meanvariancebuilder import target_vol_builder 14 | 15 | 16 | class TestMeanVarianceBuild(unittest.TestCase): 17 | 18 | def test_mean_variance_builder(self): 19 | er = np.array([0.01, 0.02, 0.03]) 20 | cov = np.array([[0.02, 0.01, 0.02], 21 | [0.01, 0.02, 0.03], 22 | [0.02, 0.03, 0.02]]) 23 | ids_var = np.diag([0.01, 0.02, 0.03]) 24 | cov += ids_var 25 | 26 | bm = np.array([0.3, 0.3, 0.4]) 27 | lbound = np.array([0., 0., 0.]) 28 | ubound = np.array([0.4, 0.4, 0.5]) 29 | 30 | risk_exposure = np.array([[1., 1., 1.], 31 | [1., 0., 1.]]).T 32 | risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7])) 33 | 34 | model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None) 35 | status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure, 36 | risk_target) 37 | 38 | self.assertTrue(status == 'optimal') 39 | self.assertAlmostEqual(x.sum(), bm.sum()) 40 | self.assertTrue(np.all(x <= ubound + 1.e-6)) 41 | self.assertTrue(np.all(x >= lbound) - 1.e-6) 42 | self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6)) 43 | self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6)) 44 | np.testing.assert_array_almost_equal(x, [0.1, 0.4, 0.5]) 45 | 46 | def test_mean_variance_builder_without_constraints(self): 47 | er = np.array([0.01, 0.02, 0.03]) 48 | cov = np.array([[0.02, 0.01, 0.02], 49 | [0.01, 0.02, 0.03], 50 | [0.02, 0.03, 0.02]]) 51 | ids_var = np.diag([0.01, 0.02, 0.03]) 52 | cov += ids_var 53 | 54 | bm = np.array([0., 0., 0.]) 55 | 56 | model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None) 57 | status, _, x = mean_variance_builder(er, model, bm, None, None, None, None, lam=1) 58 | np.testing.assert_array_almost_equal(x, np.linalg.inv(cov) @ er) 59 | 60 | def test_mean_variance_builder_without_constraints_with_factor_model(self): 61 | pass 62 | 63 | def test_mean_variance_builder_with_none_unity_lambda(self): 64 | er = np.array([0.01, 0.02, 0.03]) 65 | cov = np.array([[0.02, 0.01, 0.02], 66 | [0.01, 0.02, 0.03], 67 | [0.02, 0.03, 0.02]]) 68 | ids_var = np.diag([0.01, 0.02, 0.03]) 69 | cov += ids_var 70 | 71 | bm = np.array([0.3, 0.3, 0.4]) 72 | lbound = np.array([0., 0., 0.]) 73 | ubound = np.array([0.4, 0.4, 0.5]) 74 | 75 | risk_exposure = np.array([[1., 1., 1.], 76 | [1., 0., 1.]]).T 77 | risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7])) 78 | 79 | model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None) 80 | status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure, 81 | risk_target, lam=100) 82 | 83 | self.assertTrue(status == 'optimal') 84 | self.assertAlmostEqual(x.sum(), bm.sum()) 85 | self.assertTrue(np.all(x <= ubound + 1.e-6)) 86 | self.assertTrue(np.all(x >= lbound) - 1.e-6) 87 | self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6)) 88 | self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6)) 89 | np.testing.assert_array_almost_equal(x, [0.2950, 0.3000, 0.4050]) 90 | 91 | def test_target_vol_builder(self): 92 | er = np.array([0.1, 0.2, 0.3]) 93 | cov = np.array([[0.05, 0.01, 0.02], 94 | [0.01, 0.06, 0.03], 95 | [0.02, 0.03, 0.07]]) 96 | 97 | lbound = np.array([0., 0., 0.]) 98 | ubound = np.array([0.8, 0.8, 0.8]) 99 | 100 | bm = np.array([0.3, 0.3, 0.3]) 101 | 102 | risk_exposure = np.array([[1., 1., 1.]]).T 103 | risk_target = (np.array([bm.sum()]), np.array([bm.sum()])) 104 | model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None) 105 | status, _, x = target_vol_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target, 106 | 0.1) 107 | self.assertTrue(status == 'optimal') 108 | self.assertTrue(np.all(x <= ubound + 1.e-6)) 109 | self.assertTrue(np.all(x >= lbound) - 1.e-6) 110 | self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6)) 111 | self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6)) 112 | np.testing.assert_array_almost_equal(x, [-0.3, -0.10919033, 0.40919033] + bm) 113 | -------------------------------------------------------------------------------- /alphamind/tests/portfolio/test_percentbuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-5-4 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.portfolio.percentbuilder import percent_build 14 | 15 | 16 | class TestPercentBuild(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.n_samples = 3000 20 | self.p_included = 0.1 21 | self.n_groups = 30 22 | self.n_portfolios = range(1, 10) 23 | self.n_mask = 100 24 | 25 | def test_percent_build(self): 26 | n_include = int(self.n_samples * self.p_included) 27 | 28 | for n_portfolio in self.n_portfolios: 29 | x = np.random.randn(self.n_samples, n_portfolio) 30 | 31 | calc_weights = percent_build(x, self.p_included) 32 | 33 | expected_weights = np.zeros((len(x), n_portfolio)) 34 | 35 | masks = (-x).argsort(axis=0).argsort(axis=0) < n_include 36 | 37 | for j in range(x.shape[1]): 38 | expected_weights[masks[:, j], j] = 1. 39 | 40 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 41 | 42 | def test_percent_build_with_group(self): 43 | for n_portfolio in self.n_portfolios: 44 | 45 | x = np.random.randn(self.n_samples, n_portfolio) 46 | groups = np.random.randint(self.n_groups, size=self.n_samples) 47 | 48 | calc_weights = percent_build(x, self.p_included, groups) 49 | 50 | grouped_ordering = pd.DataFrame(-x).groupby(groups).rank() 51 | grouped_count = pd.DataFrame(-x).groupby(groups).transform(lambda x: x.count()) 52 | expected_weights = np.zeros((len(x), n_portfolio)) 53 | 54 | n_include = (grouped_count * self.p_included).astype(int) 55 | masks = (grouped_ordering <= n_include).values 56 | for j in range(x.shape[1]): 57 | expected_weights[masks[:, j], j] = 1. 58 | 59 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 60 | 61 | def test_percent_build_with_masks(self): 62 | for n_portfolio in self.n_portfolios: 63 | x = np.random.randn(self.n_samples, n_portfolio) 64 | choices = np.random.choice(self.n_samples, self.n_mask, replace=False) 65 | masks = np.full(self.n_samples, True, dtype=bool) 66 | masks[choices] = False 67 | 68 | calc_weights = percent_build(x, self.p_included, masks=masks) 69 | 70 | expected_weights = np.zeros((len(x), n_portfolio)) 71 | 72 | filtered_index = np.arange(len(x))[masks] 73 | filtered_x = x[masks] 74 | big_boolen = np.full(x.shape, False, dtype=bool) 75 | 76 | n_included = int(self.p_included * len(x)) 77 | chosen = (-filtered_x).argsort(axis=0).argsort(axis=0) < n_included 78 | big_boolen[filtered_index] = chosen 79 | 80 | for j in range(x.shape[1]): 81 | expected_weights[big_boolen[:, j], j] = 1. 82 | 83 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 84 | 85 | 86 | if __name__ == '__main__': 87 | unittest.main() 88 | -------------------------------------------------------------------------------- /alphamind/tests/portfolio/test_rankbuild.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-27 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.portfolio.rankbuilder import rank_build 14 | 15 | 16 | class TestRankBuild(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.n_samples = 3000 20 | self.n_included = 300 21 | self.n_groups = 30 22 | self.n_portfolio = range(1, 10) 23 | self.n_mask = 100 24 | 25 | def test_rank_build(self): 26 | for n_portfolio in self.n_portfolio: 27 | x = np.random.randn(self.n_samples, n_portfolio) 28 | 29 | calc_weights = rank_build(x, self.n_included) 30 | 31 | expected_weights = np.zeros((len(x), n_portfolio)) 32 | chosen = (-x).argsort(axis=0).argsort(axis=0) < self.n_included 33 | 34 | for j in range(x.shape[1]): 35 | expected_weights[chosen[:, j], j] = 1. 36 | 37 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 38 | 39 | def test_rank_build_with_group(self): 40 | n_include = int(self.n_included / self.n_groups) 41 | 42 | for n_portfolio in self.n_portfolio: 43 | 44 | x = np.random.randn(self.n_samples, n_portfolio) 45 | groups = np.random.randint(self.n_groups, size=self.n_samples) 46 | 47 | calc_weights = rank_build(x, n_include, groups) 48 | 49 | grouped_ordering = pd.DataFrame(-x).groupby(groups).rank() 50 | expected_weights = np.zeros((len(x), n_portfolio)) 51 | chosen = (grouped_ordering <= n_include).values 52 | for j in range(x.shape[1]): 53 | expected_weights[chosen[:, j], j] = 1. 54 | 55 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 56 | 57 | def test_rank_build_with_masks(self): 58 | for n_portfolio in self.n_portfolio: 59 | x = np.random.randn(self.n_samples, n_portfolio) 60 | choices = np.random.choice(self.n_samples, self.n_mask, replace=False) 61 | masks = np.full(self.n_samples, True, dtype=bool) 62 | masks[choices] = False 63 | 64 | calc_weights = rank_build(x, self.n_included, masks=masks) 65 | 66 | expected_weights = np.zeros((len(x), n_portfolio)) 67 | 68 | filtered_index = np.arange(len(x))[masks] 69 | filtered_x = x[masks] 70 | big_boolen = np.full(x.shape, False, dtype=bool) 71 | 72 | chosen = (-filtered_x).argsort(axis=0).argsort(axis=0) < self.n_included 73 | big_boolen[filtered_index] = chosen 74 | 75 | for j in range(x.shape[1]): 76 | expected_weights[big_boolen[:, j], j] = 1. 77 | 78 | np.testing.assert_array_almost_equal(calc_weights, expected_weights) 79 | 80 | 81 | if __name__ == '__main__': 82 | unittest.main() 83 | -------------------------------------------------------------------------------- /alphamind/tests/portfolio/test_riskmodel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2018-5-29 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.portfolio.riskmodel import FactorRiskModel 14 | from alphamind.portfolio.riskmodel import FullRiskModel 15 | 16 | 17 | class TestRiskModel(unittest.TestCase): 18 | 19 | def setUp(self): 20 | self.factor_cov = pd.DataFrame([[0.5, -0.3], [-0.3, 0.7]], columns=['a', 'b'], 21 | index=['a', 'b']) 22 | self.risk_exp = pd.DataFrame([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]], columns=['a', 'b'], 23 | index=[1, 2, 3]) 24 | self.idsync = pd.Series([0.1, 0.3, 0.2], index=[1, 2, 3]) 25 | self.sec_cov = self.risk_exp.values @ self.factor_cov.values @ self.risk_exp.values.T \ 26 | + np.diag(self.idsync.values) 27 | self.sec_cov = pd.DataFrame(self.sec_cov, columns=[1, 2, 3], index=[1, 2, 3]) 28 | 29 | def test_full_risk_model(self): 30 | self.assertEqual(self.sec_cov.shape, (3, 3)) 31 | model = FullRiskModel(self.sec_cov) 32 | 33 | codes = [1, 2] 34 | res = model.get_cov(codes) 35 | np.testing.assert_array_almost_equal(res, self.sec_cov.loc[codes, codes].values) 36 | 37 | res = model.get_cov() 38 | np.testing.assert_array_almost_equal(res, self.sec_cov.values) 39 | 40 | def test_factor_risk_model(self): 41 | self.assertEqual(self.factor_cov.shape, (2, 2)) 42 | self.assertEqual(self.risk_exp.shape, (3, 2)) 43 | self.assertEqual(self.idsync.shape, (3,)) 44 | 45 | model = FactorRiskModel(self.factor_cov, 46 | self.risk_exp, 47 | self.idsync) 48 | 49 | res = model.get_factor_cov() 50 | np.testing.assert_array_almost_equal(res, self.factor_cov.values) 51 | 52 | codes = [1, 3] 53 | res = model.get_risk_exp(codes) 54 | np.testing.assert_array_almost_equal(res, self.risk_exp.loc[codes, :]) 55 | res = model.get_idsync(codes) 56 | np.testing.assert_array_almost_equal(res, self.idsync[codes]) 57 | 58 | res = model.get_risk_exp() 59 | np.testing.assert_array_almost_equal(res, self.risk_exp) 60 | res = model.get_idsync() 61 | np.testing.assert_array_almost_equal(res, self.idsync) 62 | -------------------------------------------------------------------------------- /alphamind/tests/settlement/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-28 4 | 5 | @author: cheng.li 6 | """ 7 | -------------------------------------------------------------------------------- /alphamind/tests/settlement/test_simplesettle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-28 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from alphamind.settlement.simplesettle import simple_settle 14 | 15 | 16 | class TestSimpleSettle(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.n_samples = 3000 20 | self.n_groups = 30 21 | self.weights = np.random.randn(self.n_samples) 22 | self.ret_series = np.random.randn(self.n_samples) 23 | self.groups = np.random.randint(self.n_groups, size=self.n_samples) 24 | 25 | def test_simples_settle(self): 26 | calc_ret = simple_settle(self.weights, self.ret_series) 27 | 28 | ret_series = self.ret_series.reshape((-1, 1)) 29 | expected_ret = self.weights @ ret_series 30 | 31 | self.assertAlmostEqual(calc_ret['er'][0], expected_ret[0]) 32 | 33 | def test_simple_settle_with_group(self): 34 | calc_ret = simple_settle(self.weights, self.ret_series, self.groups) 35 | 36 | ret_series = self.weights * self.ret_series 37 | expected_ret = pd.Series(ret_series).groupby(self.groups).sum().values 38 | 39 | np.testing.assert_array_almost_equal(calc_ret['er'].values[:-1], expected_ret) 40 | self.assertAlmostEqual(calc_ret['er'].values[-1], expected_ret.sum()) 41 | 42 | 43 | if __name__ == '__main__': 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /alphamind/tests/test_suite.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import os 9 | 10 | SKIP_ENGINE_TESTS = True 11 | 12 | if not SKIP_ENGINE_TESTS: 13 | try: 14 | DATA_ENGINE_URI = os.environ['DB_URI'] 15 | except KeyError: 16 | DATA_ENGINE_URI = "mysql+mysqldb://reader:Reader#2020@121.37.138.1:13317/vision?charset=utf8" 17 | else: 18 | DATA_ENGINE_URI = None 19 | 20 | 21 | if __name__ == '__main__': 22 | from simpleutils import add_parent_path 23 | 24 | add_parent_path(__file__, 3) 25 | 26 | from simpleutils import TestRunner 27 | from alphamind.utilities import alpha_logger 28 | from alphamind.tests.data.test_neutralize import TestNeutralize 29 | from alphamind.tests.data.test_standardize import TestStandardize 30 | from alphamind.tests.data.test_winsorize import TestWinsorize 31 | from alphamind.tests.data.test_quantile import TestQuantile 32 | from alphamind.tests.data.engines.test_sql_engine import TestSqlEngine 33 | from alphamind.tests.data.engines.test_universe import TestUniverse 34 | from alphamind.tests.portfolio.test_constraints import TestConstraints 35 | from alphamind.tests.portfolio.test_evolver import TestEvolver 36 | from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild 37 | from alphamind.tests.portfolio.test_rankbuild import TestRankBuild 38 | from alphamind.tests.portfolio.test_percentbuild import TestPercentBuild 39 | from alphamind.tests.portfolio.test_linearbuild import TestLinearBuild 40 | from alphamind.tests.portfolio.test_meanvariancebuild import TestMeanVarianceBuild 41 | from alphamind.tests.portfolio.test_riskmodel import TestRiskModel 42 | from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle 43 | from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis 44 | from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis 45 | from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis 46 | from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis 47 | from alphamind.tests.model.test_modelbase import TestModelBase 48 | from alphamind.tests.model.test_linearmodel import TestLinearModel 49 | from alphamind.tests.model.test_treemodel import TestTreeModel 50 | from alphamind.tests.model.test_loader import TestLoader 51 | from alphamind.tests.model.test_composer import TestComposer 52 | from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor 53 | from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor 54 | from alphamind.tests.execution.test_targetvolexecutor import TestTargetVolExecutor 55 | from alphamind.tests.execution.test_pipeline import TestExecutionPipeline 56 | from alphamind.tests.portfolio.test_optimizers import TestOptimizers 57 | 58 | runner = TestRunner([TestNeutralize, 59 | TestStandardize, 60 | TestWinsorize, 61 | TestQuantile, 62 | TestSqlEngine, 63 | TestUniverse, 64 | TestConstraints, 65 | TestEvolver, 66 | TestLongShortBuild, 67 | TestRankBuild, 68 | TestPercentBuild, 69 | TestLinearBuild, 70 | TestMeanVarianceBuild, 71 | TestRiskModel, 72 | TestSimpleSettle, 73 | TestRiskAnalysis, 74 | TestPerformanceAnalysis, 75 | TestFactorAnalysis, 76 | TestQuantileAnalysis, 77 | TestModelBase, 78 | TestLinearModel, 79 | TestTreeModel, 80 | TestLoader, 81 | TestComposer, 82 | TestNaiveExecutor, 83 | TestThresholdExecutor, 84 | TestTargetVolExecutor, 85 | TestExecutionPipeline, 86 | TestOptimizers], 87 | alpha_logger) 88 | runner.run() 89 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = alpha-mind 8 | SOURCEDIR = . 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | import sphinx_rtd_theme 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = '多因子回测框架' 22 | title = '多因子回测框架文档' 23 | copyright = '2020, 融量' 24 | author = '融量' 25 | 26 | master_doc = 'index' 27 | 28 | # The full version, including alpha/beta/rc tags 29 | version = '0.1.0' 30 | release = version 31 | numfig = True 32 | 33 | # -- General configuration --------------------------------------------------- 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | 39 | extensions = [ 40 | "sphinx_rtd_theme", 41 | "docxbuilder" 42 | ] 43 | 44 | # Add any paths that contain templates here, relative to this directory. 45 | templates_path = ['_templates'] 46 | 47 | # The language for content autogenerated by Sphinx. Refer to documentation 48 | # for a list of supported languages. 49 | # 50 | # This is also used if you do content translation via gettext catalogs. 51 | # Usually you set "language" from the command line for these cases. 52 | language = 'zh_CN' 53 | 54 | # List of patterns, relative to source directory, that match files and 55 | # directories to ignore when looking for source files. 56 | # This pattern also affects html_static_path and html_extra_path. 57 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 58 | 59 | 60 | # -- Options for HTML output ------------------------------------------------- 61 | 62 | # The theme to use for HTML and HTML Help pages. See the documentation for 63 | # a list of builtin themes. 64 | # 65 | html_theme = "sphinx_rtd_theme" 66 | html_show_sourcelink = False 67 | 68 | html_theme_options = { 69 | 'logo_only': False, 70 | 'display_version': True, 71 | 'prev_next_buttons_location': 'bottom', 72 | 'style_external_links': False, 73 | # 'style_nav_header_background': 'blue', 74 | # Toc options 75 | 'collapse_navigation': True, 76 | 'sticky_navigation': True, 77 | 'navigation_depth': 4, 78 | 'includehidden': True, 79 | 'titles_only': False 80 | } 81 | 82 | # Add any paths that contain custom static files (such as style sheets) here, 83 | # relative to this directory. They are copied after the builtin static files, 84 | # so a file named "default.css" will overwrite the builtin "default.css". 85 | html_static_path = ['_static'] 86 | 87 | 88 | # -- Options for latex output ---------------------------------------------- 89 | 90 | latex_engine = "xelatex" 91 | 92 | latex_elements = { 93 | "papersize": "a4paper", 94 | 'fncychap': "\\usepackage[Sonny]{fncychap}", 95 | 'inputenc': "", 96 | 'utf8extra': "", 97 | 'fontpkg': '\\usepackage{amsmath,amsfonts,amssymb,amsthm}', 98 | 'preamble': r""" 99 | \setcounter{secnumdepth}{2} 100 | \setcounter{tocdepth}{2} 101 | \usepackage{fontspec} 102 | """, 103 | "figure_align": "H" 104 | } 105 | 106 | latex_documents = [ 107 | ("index", 'main.tex', '多因子回测框架文档', 108 | '融量量化团队', 'manual') 109 | ] 110 | 111 | 112 | # -- Options for docx output ----------------------------------------------- 113 | docx_documents = [ 114 | ('index', 'quantitative_research.docx', { 115 | 'title': '标准化模型服务文档', 116 | 'created': '融量量化团队', 117 | 'subject': '融量量化团队', 118 | 'keywords': ['sphinx'] 119 | }, True), 120 | ] 121 | 122 | docx_style = "docx/style.docx" 123 | docx_pagebreak_before_section = 1 124 | -------------------------------------------------------------------------------- /doc/docx/style.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/doc/docx/style.docx -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. alpha-mind documentation master file, created by 2 | sphinx-quickstart on Tue May 29 16:58:56 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 多因子回测框架 7 | ==================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: 目录 12 | 13 | src/changelog 14 | src/whatisit 15 | src/introduction 16 | 17 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=build 12 | set SPHINXPROJ=alpha-mind 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /doc/src/changelog.rst: -------------------------------------------------------------------------------- 1 | ************ 2 | 更新说明 3 | ************ 4 | 5 | Release 0.1.0 6 | ============================== 7 | 8 | Features added 9 | -------------- 10 | 11 | * 增加使用说明。 -------------------------------------------------------------------------------- /doc/src/img/alpha-mind.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/doc/src/img/alpha-mind.png -------------------------------------------------------------------------------- /doc/src/introduction.rst: -------------------------------------------------------------------------------- 1 | ************ 2 | 入门介绍 3 | ************ 4 | 5 | 在这篇很短的入门介绍中,我们将为您介绍使用多因子建模框架完成数据导入、因子挖掘、因子组合、组合优化 6 | 以及回测的全流程。 7 | 8 | 9 | 流程图 10 | =============== 11 | 12 | 略 13 | 14 | 数据接入 15 | =============== 16 | 17 | 多因子框架有自带的数据表结构需求,现阶段可以对接mysql以及postgresql(未来会接入更多的 18 | 数据库,例如:sqlserver)。数据表结构对于用户是透明的,用户在指定数据源的时候,只需要使用如下的语句: 19 | 20 | .. code-block:: py 21 | :linenos: 22 | 23 | from alphamind.api import * 24 | 25 | data_source = "url_for_some_database" 26 | engine = SqlEngine(data_source) 27 | 28 | 回测设置 29 | =============== 30 | 31 | 回测阶段,可以做各种配置,例如: 32 | 33 | * 起始时间 34 | * 调仓周期 35 | * 股票池,一般为某个指数成分股 36 | * 行业分类,现在支持申万行业,代码为"sw"; 37 | * 基准指数 38 | * 组合方法 39 | 40 | 在代码中,可以表示为: 41 | 42 | .. code-block:: py 43 | :linenos: 44 | 45 | start_date = '2020-01-01' 46 | end_date = '2020-02-21' 47 | 48 | freq = '10b' 49 | industry_name = 'sw' 50 | universe = Universe('hs300') 51 | benchmark_code = 300 52 | method = 'risk_neutral' 53 | 54 | 因子池 55 | ==================== 56 | 57 | 用户可以定义完整的因子池,多因子框架支持任意多个因子的回测: 58 | 59 | .. code-block:: py 60 | :linenos: 61 | 62 | alpha_factors = { 63 | 'f01': CSQuantiles(LAST('EMA5D')), 64 | 'f02': CSQuantiles(LAST('EMV6D')), 65 | } 66 | 67 | 这里面,我们使用了EMA50和EMV6D两个因子,并且都对他们做了分位数化。 68 | 69 | 机器学习模型 70 | ===================== 71 | 72 | 为了将因子组合起来,我们会搭载一个alpha模型: 73 | 74 | .. code-block:: py 75 | :linenos: 76 | 77 | weights = dict(f01=1., f02=1.) 78 | alpha_model = ConstLinearModel(features=alpha_factors, weights=weights) 79 | 80 | 这里我们使用了一个等权加权模型。多因子框架,支持多款不同的机器学习模型,用户也可以接入自己自定义的模型。 81 | 82 | 组合优化 83 | ===================== 84 | 85 | 多因子框架中,组合优化的基本原理是mean-variance优化, 但是支持很多特性: 86 | 87 | * 总杠杆率约束; 88 | * 行业权重约束; 89 | * 风格因子约束; 90 | * 换手率约束; 91 | * 成分股权重限制; 92 | 93 | .. code-block:: py 94 | :linenos: 95 | 96 | # Constraintes settings 97 | 98 | industry_names = industry_list(industry_name, industry_level) 99 | constraint_risk = ['SIZE', 'SIZENL', 'BETA'] 100 | total_risk_names = constraint_risk + ['benchmark', 'total'] 101 | all_styles = risk_styles + industry_styles + macro_styles 102 | 103 | b_type = [] 104 | l_val = [] 105 | u_val = [] 106 | 107 | previous_pos = pd.DataFrame() 108 | rets = [] 109 | turn_overs = [] 110 | leverags = [] 111 | 112 | for name in total_risk_names: 113 | if name == 'benchmark': 114 | b_type.append(BoundaryType.RELATIVE) 115 | l_val.append(0.8) 116 | u_val.append(1.0) 117 | else: 118 | b_type.append(BoundaryType.ABSOLUTE) 119 | l_val.append(0.0) 120 | u_val.append(0.0) 121 | 122 | bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val) 123 | turn_over_target = 0.4 124 | 125 | 上面的代码,使得: 126 | 127 | * 成分股权重不低于80%; 128 | * 总权重为100%(无杠杆和现金保留) 129 | * 在SIZE,SIZENL以及BETA三个风格因子上,相对于基准无暴露; 130 | * 单次换手不超过40%(双边计算) 131 | 132 | 将一切组合起来... 133 | =========================== 134 | 135 | 通过简单的调用内置函数,就可以完成完整的回测: 136 | 137 | .. code-block:: py 138 | :linenos: 139 | 140 | running_setting = RunningSetting(weights_bandwidth=weights_bandwidth, 141 | rebalance_method=method, 142 | bounds=bounds, 143 | turn_over_target=turn_over_target) 144 | 145 | # Strategy 146 | strategy = Strategy(alpha_model, 147 | data_meta, 148 | universe=universe, 149 | start_date=start_date, 150 | end_date=end_date, 151 | freq=freq, 152 | benchmark=benchmark_code) 153 | 154 | strategy.prepare_backtest_data() 155 | ret_df, positions = strategy.run(running_setting=running_setting) 156 | 157 | 158 | 画图 159 | =============== 160 | 161 | 上一步返回的`ret_df`包含具体的收益信息,`positions`包含完整的持仓记录。用户可以自行绘图 162 | 查看结果,例如: 163 | 164 | .. code-block:: py 165 | :linenos: 166 | 167 | ret_df[['turn_over', 'excess_return']].cumsum().plot(figsize=(14, 7), secondary_y='turn_over') 168 | 169 | 将累计超额收益以及累积换手绘制出来。 170 | 171 | 完整的例子 172 | ================= 173 | 174 | 完整的代码可以在notbook文件夹下,例子:Example 2 - Strategy Analysis.ipynb 175 | -------------------------------------------------------------------------------- /doc/src/whatisit.rst: -------------------------------------------------------------------------------- 1 | ******************** 2 | 什么是Alpha - Mind? 3 | ******************** 4 | 5 | Alpha - Mind功能 6 | =================== 7 | 8 | **Alpha - Mind** 是基于多因子分析方法论为基础的,alpha建模的全流程工具。主要包括以下4个大的功能模块: 9 | 10 | 因子变换 11 | ---------------- 12 | 13 | 对于原始数据进行填充、去极端值、中性化、算术计算以及时序和截面操作。帮助用户将原始数据 14 | 加工成可以入模的标准因子。 15 | 16 | 因子组合 17 | ---------------- 18 | 19 | 单因子效果有限,真实场景下,我们往往需要将多个因子进行合成。这个合成的过程,可以使用标准的四则运算等方式, 20 | 或者基于机器学习的方法。Alpha - Mind同时支持这两种方式。 21 | 22 | 23 | 组合优化 24 | --------------- 25 | 26 | 因子值(alpha)是对股票好坏的预期排序,但是真实投资时候,我们不能完全按照股票本身的alpha大小进行配置,这里面可能涉及 27 | 到的因素包括:行业配置的限制、风格暴露的限制、个股的合规限制、跟踪误差限制以及还手率限制。 28 | 29 | 策略回测 30 | --------------- 31 | 32 | 完成上面所有的步骤之后,我们就完成了数据到股票组合的全流程,随后我们在每个调仓周期上进行调仓,最后得到整个回测周期上的 33 | 策略绩效(包括:收益、波动、回撤等)。 34 | 35 | 流程图 36 | --------------- 37 | 38 | .. image:: img/alpha-mind.png 39 | 40 | 41 | -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export PYTHONPATH=$PYTHONPATH:/ 4 | export DB_VENDOR="mysql" 5 | # export DB_URI="mysql+mysqldb://dxrw:dxRW20_2@121.37.138.1:13317/dxtest?charset=utf8" 6 | export DB_URI="mysql+mysqldb://reader:Reader#2020@121.37.138.1:13316/vision_product?charset=utf8" 7 | export FACTOR_TABLES="factor_momentum" 8 | jupyter lab --ip="0.0.0.0" --port=8080 --allow-root --ServerApp.token='' --ServerApp.password='sha1:f7761f682bc4:1aba35e73699fe62570573de373bf95b491022a7' -------------------------------------------------------------------------------- /install/001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/install/001.png -------------------------------------------------------------------------------- /install/002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/install/002.png -------------------------------------------------------------------------------- /install/install.md: -------------------------------------------------------------------------------- 1 | ## Alpha-Mind 安装指南 2 | 3 | ### 一、安装环境 4 | 5 | #### mysql驱动 6 | 7 | `Alpha-Mind`使用过程中调用的因子数据保存在数据库中,因此,`Alpha-Mind`的运行环境需要安装适当的数据库驱动。这里我们选择使用`mysql`。 8 | 9 | `mysql`驱动官网下载地址: 10 | 11 | https://dev.mysql.com/downloads/ 12 | 13 | 进入官网下载网址后,可以看到以下页面: 14 | 15 | ![](001.png) 16 | 17 | 点击`Connector/Python`,进入`Python`驱动下载页面: 18 | 19 | ![]()![002](002.png) 20 | 21 | 进入页面后: 22 | 23 | - 选择正确的操作系统 24 | - 选择正确的操作系统版本 25 | - 选择合适的版本进行下载 26 | 27 | 下载并安装数据库驱动 28 | 29 | #### 编译器 30 | 31 | `Alpha-Mind`依赖的一些包,如`ecos`需要编译安装模式,所以,`Alpha-Mind`运行安装的环境需要有C编译器。 32 | 33 | - Windows环境 34 | 请安装Virual Studio 2015以上的版本 35 | 36 | - Linux环境 37 | 38 | ```bash 39 | yum -y install gcc 40 | yum -y install gcc-c++ 41 | ``` 42 | 43 | 或者 44 | 45 | ```bash 46 | apt-get install gcc 47 | apt-get install gcc-c++ 48 | ``` 49 | 50 | 51 | 52 | ### 二、安装 53 | 54 | 在安装环境准备充分后,执行以下命令,开始安装`Alpha-Mind`开发包 55 | 56 | ```bash 57 | pip install Alpha-Mind 58 | ``` 59 | 60 | 如果网络环境不好,可以选择国内的镜像,例如阿里云镜像 61 | 62 | ```bash 63 | pip install Alpha-Mind -i https://mirrors.aliyun.com/pypi/simple 64 | ``` 65 | 66 | #### 环境变量设置 67 | 68 | `Alpha-Mind`安装完成以后,需要正确设置环境变量,才能正常工作: 69 | 70 | - **DB_VENDOR**: 数据库类型,这里可以设置为`mysql`: 71 | 72 | `DB_VENDOR=mysql` 73 | 74 | - **DB_URI**: 数据库连接配置 75 | 76 | `DB_URI=mysql+mysqldb://rlUser:123456@10.16.50.12:3306/rl?charset=utf8` 77 | 78 | - **FACTOR_TABLES**: 因子数据表名称,用`,`分割,例如: 79 | 80 | `FACTOR_TABLES=factor_momentum,factor_power_volume,factor_basic_derivation` -------------------------------------------------------------------------------- /notebooks/Quick Start 6 - Formula Based Stocks Screening.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Alphamind新手入门之六:公式化选股" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "%matplotlib inline\n", 17 | "\n", 18 | "import os\n", 19 | "from PyFin.api import *\n", 20 | "from alphamind.api import *" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## 1. 公式书写\n", 28 | "-------------------------" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# CSTopN 横截面选取Top N个值\n", 38 | "universe_name = 'hs300'\n", 39 | "# 选取申万一级每个行业中,ROE因子值最高的前3个\n", 40 | "# 具体的公式化函数可以参考融量官方说明文档\n", 41 | "formula = CSTopN(LAST('EMA5D'), 3) \n", 42 | "ref_date = '2020-01-02'" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## 2. 获取数据\n", 50 | "---------------" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "depends = formula.fields\n", 60 | "engine = SqlEngine(os.environ['DB_URI'])\n", 61 | "universe = Universe(universe_name) # 设置股票池\n", 62 | "codes = universe.query(engine, dates=[ref_date])" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "factors = engine.fetch_factor(ref_date, depends, codes.code.tolist()).dropna()\n", 72 | "factors.index = [1] * len(factors)\n", 73 | "factors = factors[['code'] + depends]" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## 3. Stock Screening\n", 81 | "---------------------" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 6, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "res = formula.transform(factors, name='factor', category_field='code')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 7, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/html": [ 101 | "
\n", 102 | "\n", 115 | "\n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | "
factorcode
11.02010000438
11.02010001184
11.02010019213
\n", 141 | "
" 142 | ], 143 | "text/plain": [ 144 | " factor code\n", 145 | "1 1.0 2010000438\n", 146 | "1 1.0 2010001184\n", 147 | "1 1.0 2010019213" 148 | ] 149 | }, 150 | "execution_count": 7, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "res[res.factor == 1]" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python 3", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.8.8" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 4 188 | } 189 | -------------------------------------------------------------------------------- /notebooks/Step By Step 01 - 入门.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Step By Step 01 - 入门\n", 8 | "----------" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## 一、常用工具包\n", 16 | "\n", 17 | "在**alpha-mind**的使用中,经常会用到的工具包括:\n", 18 | "\n", 19 | "* *pandas*:主要用于表格数据处理;\n", 20 | "* *numpy*:高性能的向量计算工具;\n", 21 | "* *matplotlib*:作图工具。\n", 22 | "\n", 23 | "除此之外,我们还会用到一些比较专门的工具:\n", 24 | "\n", 25 | "* *scipy*:一些常用的数值算法;\n", 26 | "* *cvxpy*:优化工具包\n", 27 | "\n", 28 | "下面的代码验证以下包已经正确按照:" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import numpy as np\n", 38 | "import pandas as pd\n", 39 | "from matplotlib import pyplot as plt\n", 40 | "import scipy\n", 41 | "import cvxpy" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### 二、**alpha-mind**\n", 49 | "\n", 50 | "**alpha-mind**是标准的python工具包,可以直接从**pip**安装:\n", 51 | "\n", 52 | "```bash\n", 53 | "$ pip install alpha-mind\n", 54 | "```" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "'0.3.1'" 66 | ] 67 | }, 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "import alphamind as ad\n", 75 | "from alphamind.api import *\n", 76 | "ad.__version__" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "可以验证alpha-mind已经安装成功,下面我们可以正式开始探索它的各种功能!" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [] 92 | } 93 | ], 94 | "metadata": { 95 | "kernelspec": { 96 | "display_name": "Python 3", 97 | "language": "python", 98 | "name": "python3" 99 | }, 100 | "language_info": { 101 | "codemirror_mode": { 102 | "name": "ipython", 103 | "version": 3 104 | }, 105 | "file_extension": ".py", 106 | "mimetype": "text/x-python", 107 | "name": "python", 108 | "nbconvert_exporter": "python", 109 | "pygments_lexer": "ipython3", 110 | "version": "3.8.8" 111 | } 112 | }, 113 | "nbformat": 4, 114 | "nbformat_minor": 4 115 | } 116 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | arrow 2 | cvxpy 3 | cvxopt 4 | cython 5 | deprecated 6 | ecos 7 | finance-python 8 | jupyter 9 | jupyterlab 10 | matplotlib 11 | mysqlclient 12 | numba 13 | numpy 14 | pandas 15 | portfolio-optimizer 16 | psycopg2-binary 17 | scikit-learn 18 | scipy 19 | simpleutils 20 | sqlalchemy 21 | statsmodels 22 | xgboost 23 | xlsxwriter 24 | -------------------------------------------------------------------------------- /requirements_docker.txt: -------------------------------------------------------------------------------- 1 | arrow 2 | cvxpy 3 | cvxopt 4 | deprecated 5 | ecos 6 | mysqlclient 7 | portfolio-optimizer 8 | psycopg2-binary 9 | simpleutils 10 | xgboost 11 | xlsxwriter 12 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 2017-4-25 4 | 5 | @author: cheng.li 6 | """ 7 | 8 | import io 9 | from setuptools import setup 10 | from setuptools import find_packages 11 | 12 | VERSION = "0.3.1" 13 | 14 | setup( 15 | name='Alpha-Mind', 16 | version=VERSION, 17 | packages=find_packages(), 18 | url='', 19 | license='MIT', 20 | author='wegamekinglc', 21 | author_email='', 22 | scripts=['alphamind/bin/alphamind'], 23 | install_requires=io.open('requirements.txt', encoding='utf8').read(), 24 | description='', 25 | include_package_data=True 26 | ) --------------------------------------------------------------------------------