├── .coveragerc
├── .dockerignore
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── alphamind
├── __init__.py
├── analysis
│ ├── __init__.py
│ ├── calculators.py
│ ├── crosssetctions.py
│ ├── factoranalysis.py
│ ├── perfanalysis.py
│ ├── quantileanalysis.py
│ └── riskanalysis.py
├── api.py
├── benchmarks
│ ├── __init__.py
│ ├── benchmarks.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── neutralize.py
│ │ ├── standardize.py
│ │ └── winsorize.py
│ ├── portfolio
│ │ ├── __init__.py
│ │ ├── linearbuild.py
│ │ ├── longshortbuild.py
│ │ ├── percentbuild.py
│ │ └── rankbuild.py
│ └── settlement
│ │ ├── __init__.py
│ │ └── simplesettle.py
├── bin
│ ├── __init__.py
│ ├── alphamind
│ └── cli.py
├── data
│ ├── __init__.py
│ ├── dbmodel
│ │ ├── __init__.py
│ │ └── models
│ │ │ ├── __init__.py
│ │ │ ├── mysql.py
│ │ │ └── postgres.py
│ ├── engines
│ │ ├── __init__.py
│ │ ├── industries.py
│ │ ├── sqlengine
│ │ │ ├── __init__.py
│ │ │ ├── mysql.py
│ │ │ └── postgres.py
│ │ ├── universe.py
│ │ └── utilities.py
│ ├── neutralize.py
│ ├── processing.py
│ ├── quantile.py
│ ├── rank.py
│ ├── standardize.py
│ ├── transformer.py
│ └── winsorize.py
├── exceptions
│ ├── __init__.py
│ └── exceptions.py
├── execution
│ ├── __init__.py
│ ├── baseexecutor.py
│ ├── naiveexecutor.py
│ ├── pipeline.py
│ ├── targetvolexecutor.py
│ └── thresholdexecutor.py
├── formula
│ ├── __init__.py
│ └── utilities.py
├── model
│ ├── __init__.py
│ ├── composer.py
│ ├── data_preparing.py
│ ├── linearmodel.py
│ ├── loader.py
│ ├── modelbase.py
│ ├── svm.py
│ └── treemodel.py
├── portfolio
│ ├── __init__.py
│ ├── constraints.py
│ ├── evolver.py
│ ├── linearbuilder.py
│ ├── longshortbulder.py
│ ├── meanvariancebuilder.py
│ ├── optimizers.py
│ ├── percentbuilder.py
│ ├── rankbuilder.py
│ └── riskmodel.py
├── settlement
│ ├── __init__.py
│ └── simplesettle.py
├── strategy
│ ├── __init__.py
│ ├── sample_strategy.json
│ └── strategy.py
├── tests
│ ├── __init__.py
│ ├── analysis
│ │ ├── __init__.py
│ │ ├── test_factoranalysis.py
│ │ ├── test_perfanalysis.py
│ │ ├── test_quantilieanalysis.py
│ │ └── test_riskanalysis.py
│ ├── cython
│ │ └── __init__.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── engines
│ │ │ ├── __init__.py
│ │ │ ├── test_sql_engine.py
│ │ │ └── test_universe.py
│ │ ├── test_neutralize.py
│ │ ├── test_quantile.py
│ │ ├── test_rank.py
│ │ ├── test_standardize.py
│ │ └── test_winsorize.py
│ ├── execution
│ │ ├── __init__.py
│ │ ├── test_naiveexecutor.py
│ │ ├── test_pipeline.py
│ │ ├── test_targetvolexecutor.py
│ │ └── test_thresholdexecutor.py
│ ├── model
│ │ ├── __init__.py
│ │ ├── test_composer.py
│ │ ├── test_linearmodel.py
│ │ ├── test_loader.py
│ │ ├── test_modelbase.py
│ │ └── test_treemodel.py
│ ├── portfolio
│ │ ├── __init__.py
│ │ ├── test_constraints.py
│ │ ├── test_evolver.py
│ │ ├── test_linearbuild.py
│ │ ├── test_longshortbuild.py
│ │ ├── test_meanvariancebuild.py
│ │ ├── test_optimizers.py
│ │ ├── test_percentbuild.py
│ │ ├── test_rankbuild.py
│ │ └── test_riskmodel.py
│ ├── settlement
│ │ ├── __init__.py
│ │ └── test_simplesettle.py
│ └── test_suite.py
└── utilities.py
├── doc
├── Makefile
├── conf.py
├── docx
│ └── style.docx
├── index.rst
├── make.bat
└── src
│ ├── changelog.rst
│ ├── img
│ └── alpha-mind.png
│ ├── introduction.rst
│ └── whatisit.rst
├── entrypoint.sh
├── install
├── 001.png
├── 002.png
└── install.md
├── notebooks
├── Example 1 - Factor IC analysis.ipynb
├── Example 10 - Quadratic Optimizer Comparison with CVXOPT.ipynb
├── Example 11 - Long Short Strategy Model.ipynb
├── Example 12 - Machine Learning Model Prediction.ipynb
├── Example 13 - Evaluation within Industry Groups.ipynb
├── Example 2 - Strategy Analysis.ipynb
├── Example 3 - Multi Weight Gap Comparison.ipynb
├── Example 4 - Single Factor Analysis.ipynb
├── Example 5 - Style Factor Analysis.ipynb
├── Example 6 - Target Volatility Builder.ipynb
├── Example 7 - Portfolio Optimizer Performance.ipynb
├── Example 9 - Linear Optimizer Comparison with CVXOPT.ipynb
├── Quick Start 1 - Factor Preprocess.ipynb
├── Quick Start 2 - Factor Rank and Quantile.ipynb
├── Quick Start 3 - Portfolio Builder.ipynb
├── Quick Start 5 - Alpha Factor Quantile Analysis.ipynb
├── Quick Start 6 - Formula Based Stocks Screening.ipynb
├── Quick Start 7 - Single Factor IC Analysis.ipynb
├── Quick Start 8 - IC Decay Calculation.ipynb
├── Quick Start 9 - Back Testing Stock Screening.ipynb
├── Step By Step 01 - 入门.ipynb
├── Step By Step 02 - 数据获取.ipynb
├── Step By Step 03 - 因子设计.ipynb
├── Step By Step 04 - 因子预处理.ipynb
├── Step By Step 05 - 组合优化.ipynb
├── Step By Step 06 - 完整的回测.ipynb
└── Step By Step 07 - 风险因子模型.ipynb
├── requirements.txt
├── requirements_docker.txt
├── scripts
└── update_uqer_data_postgres.py
├── setup.cfg
└── setup.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source=alphamind
3 | omit=alphamind/tests/*
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | ./build
2 | ./dist
3 | ./Alpha_Mind.egg-info
4 | ./report
5 | ./script
6 | .git
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .idea/*
3 | build/*
4 | dist/*
5 | Alpha_Mind.egg-info/*
6 | *.pyd
7 | *.c
8 | *.cpp
9 | *.html
10 | *.nbc
11 | *.nbi
12 | /notebooks/.ipynb_checkpoints/*
13 | /notebooks/machine learning/.ipynb_checkpoints/*
14 | alphamind/cython/*.so
15 | alphamind/examples/*.xlsx
16 | alphamind/examples/*.csv
17 | doc/_build
18 | doc/build
19 | settings.json
20 | doc/source/_build
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.6"
4 | - "3.7"
5 | - "3.8"
6 | # command to install dependencies
7 | sudo: enabled
8 | dist: bionic
9 | addons:
10 | apt:
11 | packages:
12 | - g++
13 | - coinor-cbc
14 | - coinor-libcbc-dev
15 | install:
16 | - pip install cython numpy
17 | - pip install -r requirements.txt
18 | - pip install coverage
19 | - pip install coveralls --ignore-installed
20 | script:
21 | - export NUMBA_DISABLE_JIT=1
22 | - coverage run --rcfile=./.coveragerc alphamind/tests/test_suite.py
23 | - coverage report --rcfile=./.coveragerc -i
24 | - coverage html --rcfile=./.coveragerc -i
25 | after_success:
26 | - coveralls
27 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/anaconda3:2021.05
2 |
3 | LABEL maintainer = "scrappedprince.li@gmail.com"
4 | RUN apt-get update && apt-get install build-essential default-libmysqlclient-dev coinor-cbc coinor-libcbc-dev -y
5 | ENV COIN_INSTALL_DIR /usr
6 |
7 | WORKDIR /
8 | COPY ./requirements_docker.txt /requirements.txt
9 | RUN pip install -r /requirements.txt -i https://pypi.douban.com/simple
10 | RUN pip install finance-python>=0.8.1 -i https://pypi.douban.com/simple
11 |
12 | WORKDIR /
13 | COPY ./alphamind /alphamind
14 | COPY ./notebooks /notebooks
15 |
16 | COPY ./setup.py /setup.py
17 | COPY ./setup.cfg /setup.cfg
18 |
19 | EXPOSE 8080
20 | COPY ./entrypoint.sh /entrypoint.sh
21 | RUN chmod +x /entrypoint.sh
22 |
23 | WORKDIR /notebooks
24 | ENTRYPOINT ["/entrypoint.sh"]
25 | CMD []
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2017 Cheng Li
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include MANIFEST.in
2 | include .coveragerc
3 | include requirements.txt
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Alpha - Mind
2 |
3 |
4 |
5 | Python version |
6 | |
7 |
8 |
9 |
10 | Build Status |
11 |
12 |
13 | |
14 |
15 |
16 | Coverage |
17 |  |
18 |
19 |
20 |
21 | **Alpha - Mind** 是基于 **Python** 开发的股票多因子研究框架。
22 |
23 | ## TODO list
24 |
25 | **alpha-mind**的开发经过长期的暂停之后,将重启。下面的列表会给出一组现在规划中的功能或者改进:
26 |
27 | - [x] 增加对于数据后端MySQL的支持;
28 | - [ ] 增加对于数据后端CSV文件的支持,并且提供一份样例文件供用户测试使用;
29 | - [x] 删除所有的c++相关代码,方便alpha-mind的安装;
30 | - [x] 在windows以及linux平台提供可以直接pip安装的安装包;
31 | - [ ] 完整的文档;
32 | - [ ] alpha模型增加超参数调优的功能;
33 | - [ ] alpha模型增加多期预测能力;
34 | - [ ] 优化器增加多期优化的能力。
35 |
36 | ## 依赖
37 |
38 | 该项目主要有两个主要的github外部依赖:
39 |
40 | * [Finance-Python](https://github.com/alpha-miner/finance-python)
41 |
42 | * [portfolio - optimizer](https://github.com/alpha-miner/portfolio-optimizer):该项目是相同作者编写的用于资产组合配置的优化器工具包;
43 |
44 | 这两个库都可以直接使用pip进行安装。
45 |
46 | ## 功能
47 |
48 | alpha - mind 提供了多因子研究中常用的工具链,包括:
49 |
50 | * 数据清洗
51 | * alpha 模型
52 | * 风险模型
53 | * 组合优化
54 | * 执行器
55 |
56 | 所有的模块都设计了完整的测试用例以尽可能保证正确性。同时,所有的数值模型开发中都对性能给予了足够高的关注,参考了优秀的第三方工具以保证性能:
57 |
58 | * numpy
59 | * numba
60 | * cvxopt
61 | * cvxpy
62 | * pandas
63 | * scipy
64 |
65 | ## 安装
66 |
67 | 有个详细的安装说明,在install目录中,有比较详细的
68 |
69 | 安装需要直接clone或者下载源代码安装,具体流程为:
70 |
71 | 克隆项目到本地
72 |
73 | ```shell
74 | $ git clone https://github.com/rongliang-tech/alpha-mind.git
75 | ```
76 |
77 | 然后直接使用一下命令安装
78 |
79 | ```shell
80 | $ python setup.py install
81 | ```
82 |
83 | ### 使用Docker运行
84 |
85 | 1. `docker build -t alpha-mind:latest -f Dockerfile .`
86 |
87 | 2. `docker run -it -p 8080:8080 --name alpha-mind alpha-mind`
88 |
89 | 默认打开浏览器之后,进入: 127.0.0.1/lab 输入登录密码: `rongliang2021`
90 |
91 | 可以自定义初始密码, 参考:[jupyter生成密码](https://jupyter-notebook.readthedocs.io/en/stable/public_server.html#preparing-a-hashed-password)
92 |
93 | #### 提示
94 |
95 | 环境变量的配置在`./entrypoint.sh`中,包括:
96 |
97 | * `DB_VENDOR`: 如果使用mysql,请设置为`rl`;
98 | * `DB_URI`: 数据库的连接串。
99 | * `FACTOR_TABLES`: 使用的因子表
100 |
--------------------------------------------------------------------------------
/alphamind/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | __version__ = "0.3.1"
9 |
--------------------------------------------------------------------------------
/alphamind/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-6
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/analysis/calculators.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-18
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import pandas as pd
9 |
10 |
11 | def calculate_turn_over(pos_table: pd.DataFrame) -> pd.DataFrame:
12 | turn_over_table = {}
13 | total_factors = pos_table.columns.difference(['code'])
14 | pos_table.reset_index()
15 |
16 | for name in total_factors:
17 | pivot_position = pos_table.pivot(values=name, columns='code').fillna(0.)
18 | turn_over_series = pivot_position.diff().abs().sum(axis=1)
19 | turn_over_table[name] = turn_over_series.values
20 |
21 | turn_over_table = pd.DataFrame(turn_over_table, index=pos_table.trade_date.unique())
22 | return turn_over_table[total_factors]
23 |
--------------------------------------------------------------------------------
/alphamind/analysis/crosssetctions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-3-5
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 | import pandas as pd
10 | import statsmodels.api as sm
11 |
12 | from alphamind.data.processing import factor_processing
13 | from alphamind.data.standardize import standardize
14 | from alphamind.data.winsorize import winsorize_normal
15 | from alphamind.utilities import alpha_logger
16 |
17 |
18 | def cs_impl(ref_date,
19 | factor_data,
20 | factor_name,
21 | risk_exposure,
22 | constraint_risk,
23 | industry_matrix,
24 | dx_returns):
25 | total_data = pd.merge(factor_data, risk_exposure, on='code')
26 | total_data = pd.merge(total_data, industry_matrix, on='code')
27 | total_data = total_data.replace([np.inf, -np.inf], np.nan).dropna()
28 |
29 | if len(total_data) < 0.33 * len(factor_data):
30 | alpha_logger.warning(f"valid data point({len(total_data)}) "
31 | f"is less than 33% of the total sample ({len(factor_data)}). Omit this run")
32 | return np.nan, np.nan, np.nan
33 |
34 | total_risk_exp = total_data[constraint_risk]
35 |
36 | er = total_data[[factor_name]].values.astype(float)
37 | er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values,
38 | [standardize]).flatten()
39 | industry = total_data.industry_name.values
40 |
41 | codes = total_data.code.tolist()
42 | target_pos = pd.DataFrame({'code': codes,
43 | 'weight': er,
44 | 'industry': industry})
45 | target_pos['weight'] = target_pos['weight'] / target_pos['weight'].abs().sum()
46 | target_pos = pd.merge(target_pos, dx_returns, on=['code'])
47 | target_pos = pd.merge(target_pos, total_data[['code'] + constraint_risk], on=['code'])
48 | total_risk_exp = target_pos[constraint_risk]
49 | activate_weight = target_pos['weight'].values
50 | excess_return = np.exp(target_pos[['dx']].values) - 1.
51 | excess_return = factor_processing(excess_return,
52 | [winsorize_normal, standardize],
53 | total_risk_exp.values,
54 | [winsorize_normal, standardize]).flatten()
55 | port_ret = np.log(activate_weight @ excess_return + 1.)
56 | ic = np.corrcoef(excess_return, activate_weight)[0, 1]
57 | x = sm.add_constant(activate_weight)
58 | results = sm.OLS(excess_return, x).fit()
59 | t_stats = results.tvalues[1]
60 |
61 | alpha_logger.info(f"{ref_date} is finished with {len(target_pos)} stocks for {factor_name}")
62 | alpha_logger.info(f"{ref_date} risk_exposure: "
63 | f"{np.sum(np.square(target_pos.weight.values @ target_pos[constraint_risk].values))}")
64 | return port_ret, ic, t_stats
65 |
66 |
67 | def cross_section_analysis(ref_date,
68 | factor_name,
69 | universe,
70 | horizon,
71 | constraint_risk,
72 | engine):
73 | codes = engine.fetch_codes(ref_date, universe)
74 |
75 | risk_exposure = engine.fetch_risk_model(ref_date, codes)[1][['code'] + constraint_risk]
76 | factor_data = engine.fetch_factor(ref_date, factor_name, codes)
77 | industry_matrix = engine.fetch_industry_matrix(ref_date, codes, 'sw_adj', 1)
78 | dx_returns = engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=1)
79 |
80 | return cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk,
81 | industry_matrix, dx_returns)
82 |
83 |
84 | if __name__ == '__main__':
85 | from alphamind.api import SqlEngine, Universe, risk_styles, industry_styles
86 |
87 | factor_name = 'SIZE'
88 | data_source = 'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
89 | engine = SqlEngine(data_source)
90 | risk_names = list(set(risk_styles).difference({factor_name}))
91 | industry_names = list(set(industry_styles).difference({factor_name}))
92 | constraint_risk = risk_names + industry_names
93 | universe = Universe('custom', ['ashare_ex'])
94 | horizon = 9
95 |
96 | x = cross_section_analysis('2018-02-08',
97 | factor_name,
98 | universe,
99 | horizon,
100 | constraint_risk,
101 | engine=engine)
102 | print(x)
103 |
--------------------------------------------------------------------------------
/alphamind/analysis/perfanalysis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-12
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import pandas as pd
9 |
10 | from alphamind.analysis.riskanalysis import risk_analysis
11 |
12 |
13 | def perf_attribution_by_pos(net_weight_series: pd.Series,
14 | next_bar_return_series: pd.Series,
15 | benchmark_table: pd.DataFrame) -> pd.DataFrame:
16 | explained_table, _ = risk_analysis(net_weight_series,
17 | next_bar_return_series,
18 | benchmark_table)
19 | return explained_table.groupby(level=0).sum()
20 |
--------------------------------------------------------------------------------
/alphamind/analysis/quantileanalysis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-16
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Optional
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.data.processing import factor_processing
14 | from alphamind.data.quantile import quantile
15 | from alphamind.data.standardize import standardize
16 | from alphamind.data.winsorize import winsorize_normal
17 | from alphamind.utilities import agg_mean
18 |
19 |
20 | def quantile_analysis(factors: pd.DataFrame,
21 | factor_weights: np.ndarray,
22 | dx_return: np.ndarray,
23 | n_bins: int = 5,
24 | risk_exp: Optional[np.ndarray] = None,
25 | **kwargs):
26 | if 'pre_process' in kwargs:
27 | pre_process = kwargs['pre_process']
28 | del kwargs['pre_process']
29 | else:
30 | pre_process = [winsorize_normal, standardize]
31 |
32 | if 'post_process' in kwargs:
33 | post_process = kwargs['post_process']
34 | del kwargs['post_process']
35 | else:
36 | post_process = [standardize]
37 |
38 | er = factor_processing(factors.values, pre_process, risk_exp, post_process) @ factor_weights
39 | return er_quantile_analysis(er, n_bins, dx_return, **kwargs)
40 |
41 |
42 | def er_quantile_analysis(er: np.ndarray,
43 | n_bins: int,
44 | dx_return: np.ndarray,
45 | de_trend=False) -> np.ndarray:
46 | er = er.flatten()
47 | q_groups = quantile(er, n_bins)
48 |
49 | if dx_return.ndim < 2:
50 | dx_return.shape = -1, 1
51 |
52 | group_return = agg_mean(q_groups, dx_return).flatten()
53 | total_return = group_return.sum()
54 | ret = group_return.copy()
55 |
56 | if de_trend:
57 | resid = n_bins - 1
58 | res_weight = 1. / resid
59 | for i, value in enumerate(ret):
60 | ret[i] = (1. + res_weight) * value - res_weight * total_return
61 |
62 | return ret
63 |
64 |
65 | if __name__ == '__main__':
66 | n = 5000
67 | n_f = 5
68 | n_bins = 5
69 |
70 | x = np.random.randn(n, 5)
71 | risk_exp = np.random.randn(n, 3)
72 | x_w = np.random.randn(n_f)
73 | r = np.random.randn(n)
74 |
75 | f_df = pd.DataFrame(x)
76 | calculated = quantile_analysis(f_df,
77 | x_w,
78 | r,
79 | risk_exp=None,
80 | n_bins=n_bins,
81 | pre_process=[], # [winsorize_normal, standardize],
82 | post_process=[]) # [standardize])
83 |
84 | er = x_w @ f_df.values.T
85 | expected = er_quantile_analysis(er, n_bins, r)
86 |
87 | print(calculated)
88 | print(expected)
89 |
--------------------------------------------------------------------------------
/alphamind/analysis/riskanalysis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-6
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Tuple
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.data.neutralize import neutralize
14 |
15 |
16 | def risk_analysis(net_weight_series: pd.Series,
17 | next_bar_return_series: pd.Series,
18 | risk_table: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
19 | group_idx = net_weight_series.index.values.astype(int)
20 | net_pos = net_weight_series.values.reshape((-1, 1))
21 | risk_factor_cols = risk_table.columns
22 |
23 | idiosyncratic, other_stats = neutralize(risk_table.values,
24 | next_bar_return_series.values,
25 | group_idx,
26 | detail=True)
27 |
28 | systematic = other_stats['explained']
29 | exposure = other_stats['exposure']
30 |
31 | explained_table = np.hstack((idiosyncratic, systematic[:, :, 0]))
32 | cols = ['idiosyncratic']
33 | cols.extend(risk_factor_cols)
34 |
35 | explained_table = pd.DataFrame(explained_table * net_pos, columns=cols,
36 | index=net_weight_series.index)
37 | exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols,
38 | index=net_weight_series.index)
39 | return explained_table, exposure_table.groupby(level=0).first()
40 |
--------------------------------------------------------------------------------
/alphamind/api.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-16
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from alphamind.data.engines.sqlengine import SqlEngine
9 | from alphamind.data.engines.sqlengine import risk_styles
10 | from alphamind.data.engines.sqlengine import industry_styles
11 | from alphamind.data.engines.sqlengine import macro_styles
12 | from alphamind.analysis.factoranalysis import er_portfolio_analysis
13 | from alphamind.analysis.factoranalysis import factor_analysis
14 | from alphamind.analysis.quantileanalysis import er_quantile_analysis
15 | from alphamind.analysis.quantileanalysis import quantile_analysis
16 | from alphamind.data.engines.universe import Universe
17 | from alphamind.data.engines.utilities import industry_list
18 | from alphamind.data.neutralize import neutralize
19 | from alphamind.data.processing import factor_processing
20 | from alphamind.data.rank import percentile
21 | from alphamind.data.rank import rank
22 | from alphamind.data.standardize import Standardizer
23 | from alphamind.data.standardize import projection
24 | from alphamind.data.standardize import standardize
25 | from alphamind.data.winsorize import NormalWinsorizer
26 | from alphamind.data.winsorize import winsorize_normal
27 | from alphamind.execution.naiveexecutor import NaiveExecutor
28 | from alphamind.execution.pipeline import ExecutionPipeline
29 | from alphamind.execution.targetvolexecutor import TargetVolExecutor
30 | from alphamind.execution.thresholdexecutor import ThresholdExecutor
31 | from alphamind.model import ConstLinearModel
32 | from alphamind.model import LassoRegression
33 | from alphamind.model import LinearRegression
34 | from alphamind.model import LogisticRegression
35 | from alphamind.model import NvSVRModel
36 | from alphamind.model import RandomForestClassifier
37 | from alphamind.model import RandomForestRegressor
38 | from alphamind.model import XGBClassifier
39 | from alphamind.model import XGBRegressor
40 | from alphamind.model import XGBTrainer
41 | from alphamind.model import load_model
42 | from alphamind.model.composer import Composer
43 | from alphamind.model.composer import DataMeta
44 | from alphamind.model.composer import predict_by_model
45 | from alphamind.model.composer import train_model
46 | from alphamind.model.data_preparing import fetch_data_package
47 | from alphamind.model.data_preparing import fetch_predict_phase
48 | from alphamind.model.data_preparing import fetch_train_phase
49 | from alphamind.portfolio.constraints import BoundaryDirection
50 | from alphamind.portfolio.constraints import BoundaryType
51 | from alphamind.portfolio.constraints import Constraints
52 | from alphamind.portfolio.constraints import LinearConstraints
53 | from alphamind.portfolio.constraints import create_box_bounds
54 | from alphamind.portfolio.evolver import evolve_positions
55 | from alphamind.utilities import alpha_logger
56 | from alphamind.utilities import map_freq
57 |
58 | __all__ = [
59 | 'SqlEngine',
60 | 'factor_analysis',
61 | 'er_portfolio_analysis',
62 | 'quantile_analysis',
63 | 'er_quantile_analysis',
64 | 'Universe',
65 | 'factor_processing',
66 | 'Constraints',
67 | 'LinearConstraints',
68 | 'BoundaryType',
69 | 'BoundaryDirection',
70 | 'create_box_bounds',
71 | 'evolve_positions',
72 | 'risk_styles',
73 | 'industry_styles',
74 | 'macro_styles',
75 | 'winsorize_normal',
76 | 'NormalWinsorizer',
77 | 'standardize',
78 | 'Standardizer',
79 | 'projection',
80 | 'neutralize',
81 | 'rank',
82 | 'percentile',
83 | 'industry_list',
84 | 'fetch_data_package',
85 | 'fetch_train_phase',
86 | 'fetch_predict_phase',
87 | 'Composer',
88 | 'DataMeta',
89 | 'train_model',
90 | 'predict_by_model',
91 | 'LinearRegression',
92 | 'LassoRegression',
93 | 'ConstLinearModel',
94 | 'LogisticRegression',
95 | 'RandomForestRegressor',
96 | 'RandomForestClassifier',
97 | 'XGBRegressor',
98 | 'XGBClassifier',
99 | 'XGBTrainer',
100 | 'NvSVRModel',
101 | 'load_model',
102 | 'NaiveExecutor',
103 | 'ThresholdExecutor',
104 | 'TargetVolExecutor',
105 | 'ExecutionPipeline',
106 | 'alpha_logger',
107 | 'map_freq'
108 | ]
109 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/benchmarks/__init__.py
--------------------------------------------------------------------------------
/alphamind/benchmarks/benchmarks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from alphamind.benchmarks.data.neutralize import benchmark_neutralize
9 | from alphamind.benchmarks.data.neutralize import benchmark_neutralize_with_groups
10 | from alphamind.benchmarks.data.standardize import benchmark_standardize
11 | from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
12 | from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
13 | from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group
14 | from alphamind.benchmarks.portfolio.linearbuild import benchmark_build_linear
15 | from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent
16 | from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent_with_group
17 | from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
18 | from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
19 | from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
20 | from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group
21 |
22 | if __name__ == '__main__':
23 | benchmark_neutralize(3000, 10, 1000)
24 | benchmark_neutralize_with_groups(3000, 10, 1000, 30)
25 | benchmark_neutralize(30, 3, 50000)
26 | benchmark_neutralize_with_groups(30, 3, 50000, 3)
27 | benchmark_neutralize(50000, 50, 20)
28 | benchmark_neutralize_with_groups(50000, 50, 20, 50)
29 | benchmark_standardize(3000, 10, 1000)
30 | benchmark_standardize_with_group(3000, 10, 1000, 30)
31 | benchmark_standardize(100, 10, 50000)
32 | benchmark_standardize_with_group(100, 10, 5000, 4)
33 | benchmark_standardize(50000, 50, 20)
34 | benchmark_standardize_with_group(50000, 50, 20, 50)
35 | benchmark_winsorize_normal(3000, 10, 1000)
36 | benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
37 | benchmark_winsorize_normal(30, 10, 50000)
38 | benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
39 | benchmark_winsorize_normal(50000, 50, 20)
40 | benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
41 | benchmark_build_rank(3000, 1000, 300)
42 | benchmark_build_rank_with_group(3000, 1000, 10, 30)
43 | benchmark_build_rank(30, 50000, 3)
44 | benchmark_build_rank_with_group(30, 50000, 1, 3)
45 | benchmark_build_rank(50000, 20, 3000)
46 | benchmark_build_rank_with_group(50000, 20, 10, 300)
47 | benchmark_build_percent(3000, 1000, 0.1)
48 | benchmark_build_percent_with_group(3000, 1000, 0.1, 30)
49 | benchmark_build_percent(30, 50000, 0.1)
50 | benchmark_build_percent_with_group(30, 50000, 0.1, 3)
51 | benchmark_build_percent(50000, 20, 0.1)
52 | benchmark_build_percent_with_group(50000, 20, 0.1, 300)
53 | benchmark_build_linear(100, 3, 100)
54 | benchmark_build_linear(1000, 30, 10)
55 | benchmark_simple_settle(3000, 10, 1000)
56 | benchmark_simple_settle_with_group(3000, 10, 1000, 30)
57 | benchmark_simple_settle(30, 10, 50000)
58 | benchmark_simple_settle_with_group(30, 10, 50000, 5)
59 | benchmark_simple_settle(50000, 50, 20)
60 | benchmark_simple_settle_with_group(50000, 50, 20, 50)
61 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/benchmarks/data/__init__.py
--------------------------------------------------------------------------------
/alphamind/benchmarks/data/neutralize.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import datetime as dt
9 |
10 | import numpy as np
11 | from sklearn.linear_model import LinearRegression
12 |
13 | from alphamind.data.neutralize import neutralize
14 |
15 |
16 | def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
17 | print("-" * 60)
18 | print("Starting least square fitting benchmarking")
19 | print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
20 | n_loops))
21 |
22 | y = np.random.randn(n_samples, 5)
23 | x = np.random.randn(n_samples, n_features)
24 |
25 | start = dt.datetime.now()
26 | for _ in range(n_loops):
27 | calc_res = neutralize(x, y)
28 | impl_model_time = dt.datetime.now() - start
29 |
30 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
31 |
32 | start = dt.datetime.now()
33 | for _ in range(n_loops):
34 | benchmark_model = LinearRegression(fit_intercept=False)
35 | benchmark_model.fit(x, y)
36 | exp_res = y - x @ benchmark_model.coef_.T
37 | benchmark_model_time = dt.datetime.now() - start
38 |
39 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
40 |
41 | np.testing.assert_array_almost_equal(calc_res, exp_res)
42 |
43 |
44 | def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: int,
45 | n_groups: int) -> None:
46 | print("-" * 60)
47 | print("Starting least square fitting with group benchmarking")
48 | print(
49 | "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
50 | n_features,
51 | n_loops,
52 | n_groups))
53 | y = np.random.randn(n_samples, 5)
54 | x = np.random.randn(n_samples, n_features)
55 | groups = np.random.randint(n_groups, size=n_samples)
56 |
57 | start = dt.datetime.now()
58 | for _ in range(n_loops):
59 | _ = neutralize(x, y, groups)
60 | impl_model_time = dt.datetime.now() - start
61 |
62 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
63 |
64 | start = dt.datetime.now()
65 |
66 | model = LinearRegression(fit_intercept=False)
67 | for _ in range(n_loops):
68 | for i in range(n_groups):
69 | curr_x = x[groups == i]
70 | curr_y = y[groups == i]
71 | model.fit(curr_x, curr_y)
72 | _ = curr_y - curr_x @ model.coef_.T
73 | benchmark_model_time = dt.datetime.now() - start
74 |
75 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
76 |
77 |
78 | if __name__ == '__main__':
79 | benchmark_neutralize(3000, 10, 1000)
80 | benchmark_neutralize_with_groups(3000, 10, 1000, 30)
81 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/data/standardize.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import datetime as dt
9 |
10 | import numpy as np
11 | import pandas as pd
12 | from scipy.stats import zscore
13 |
14 | from alphamind.data.standardize import standardize
15 |
16 |
17 | def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
18 | print("-" * 60)
19 | print("Starting standardizing benchmarking")
20 | print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
21 | n_loops))
22 |
23 | x = np.random.randn(n_samples, n_features)
24 |
25 | start = dt.datetime.now()
26 | for _ in range(n_loops):
27 | _ = standardize(x)
28 | impl_model_time = dt.datetime.now() - start
29 |
30 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
31 |
32 | start = dt.datetime.now()
33 | for _ in range(n_loops):
34 | _ = zscore(x)
35 | benchmark_model_time = dt.datetime.now() - start
36 |
37 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
38 |
39 |
40 | def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int,
41 | n_groups: int) -> None:
42 | print("-" * 60)
43 | print("Starting standardizing with group-by values benchmarking")
44 | print(
45 | "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
46 | n_features,
47 | n_loops,
48 | n_groups))
49 |
50 | x = np.random.randn(n_samples, n_features)
51 | groups = np.random.randint(n_groups, size=n_samples)
52 |
53 | start = dt.datetime.now()
54 | for _ in range(n_loops):
55 | _ = standardize(x, groups=groups)
56 | impl_model_time = dt.datetime.now() - start
57 |
58 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
59 |
60 | start = dt.datetime.now()
61 | for _ in range(n_loops):
62 | _ = pd.DataFrame(x).groupby(groups).transform(
63 | lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
64 | benchmark_model_time = dt.datetime.now() - start
65 |
66 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
67 |
68 |
69 | if __name__ == '__main__':
70 | benchmark_standardize(3000, 10, 1000)
71 | benchmark_standardize_with_group(3000, 10, 1000, 30)
72 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/data/winsorize.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import datetime as dt
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.data.winsorize import winsorize_normal
14 |
15 |
16 | def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None:
17 | print("-" * 60)
18 | print("Starting winsorize normal benchmarking")
19 | print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
20 | n_loops))
21 |
22 | num_stds = 2
23 |
24 | x = np.random.randn(n_samples, n_features)
25 |
26 | start = dt.datetime.now()
27 | for _ in range(n_loops):
28 | _ = winsorize_normal(x, num_stds)
29 | impl_model_time = dt.datetime.now() - start
30 |
31 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
32 |
33 | def impl(x):
34 | std_values = x.std(axis=0)
35 | mean_value = x.mean(axis=0)
36 |
37 | lower_bound = mean_value - num_stds * std_values
38 | upper_bound = mean_value + num_stds * std_values
39 |
40 | res = np.where(x > upper_bound, upper_bound, x)
41 | res = np.where(res < lower_bound, lower_bound, res)
42 | return res
43 |
44 | start = dt.datetime.now()
45 | for _ in range(n_loops):
46 | _ = impl(x)
47 | benchmark_model_time = dt.datetime.now() - start
48 |
49 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
50 |
51 |
52 | def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int,
53 | n_groups: int) -> None:
54 | print("-" * 60)
55 | print("Starting winsorize normal with group-by values benchmarking")
56 | print(
57 | "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
58 | n_features,
59 | n_loops,
60 | n_groups))
61 |
62 | num_stds = 2
63 |
64 | x = np.random.randn(n_samples, n_features)
65 | groups = np.random.randint(n_groups, size=n_samples)
66 |
67 | start = dt.datetime.now()
68 | for _ in range(n_loops):
69 | _ = winsorize_normal(x, num_stds, groups=groups)
70 | impl_model_time = dt.datetime.now() - start
71 |
72 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
73 |
74 | def impl(x):
75 | std_values = x.std(axis=0)
76 | mean_value = x.mean(axis=0)
77 |
78 | lower_bound = mean_value - num_stds * std_values
79 | upper_bound = mean_value + num_stds * std_values
80 |
81 | res = np.where(x > upper_bound, upper_bound, x)
82 | res = np.where(res < lower_bound, lower_bound, res)
83 | return res
84 |
85 | start = dt.datetime.now()
86 | for _ in range(n_loops):
87 | _ = pd.DataFrame(x).groupby(groups).transform(impl)
88 | benchmark_model_time = dt.datetime.now() - start
89 |
90 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
91 |
92 |
93 | if __name__ == '__main__':
94 | benchmark_winsorize_normal(3000, 10, 1000)
95 | benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
96 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-27
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/linearbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-5
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import datetime as dt
9 |
10 | import numpy as np
11 | from cvxopt import matrix
12 | from cvxopt import solvers
13 | from scipy.optimize import linprog
14 |
15 | from alphamind.portfolio.linearbuilder import linear_builder
16 |
17 | solvers.options['show_progress'] = False
18 |
19 |
20 | def benchmark_build_linear(n_samples: int, n_risks: int, n_loop: int) -> None:
21 | print("-" * 60)
22 | print("Starting portfolio construction by linear programming")
23 | print(
24 | "Parameters(n_samples: {0}, n_risks: {1}, n_loop: {2})".format(n_samples, n_risks, n_loop))
25 |
26 | er = np.random.randn(n_samples)
27 | risk_exp = np.random.randn(n_samples, n_risks)
28 | bm = np.random.rand(n_samples)
29 | bm /= bm.sum()
30 |
31 | lbound = -0.04
32 | ubound = 0.05
33 |
34 | risk_lbound = bm @ risk_exp
35 | risk_ubound = bm @ risk_exp
36 |
37 | start = dt.datetime.now()
38 | for _ in range(n_loop):
39 | status, v, x = linear_builder(er,
40 | lbound,
41 | ubound,
42 | risk_exp,
43 | risk_target=(risk_lbound,
44 | risk_ubound))
45 | impl_model_time = dt.datetime.now() - start
46 | print('{0:20s}: {1}'.format('Implemented model (ECOS)', impl_model_time))
47 |
48 | c = - er
49 | bounds = [(lbound, ubound) for _ in range(n_samples)]
50 | a_eq = np.ones((1, n_samples))
51 | a_eq = np.vstack((a_eq, risk_exp.T))
52 | b_eq = np.hstack((np.array([1.]), risk_exp.T @ bm))
53 | start = dt.datetime.now()
54 | for _ in range(n_loop):
55 | res = linprog(c, A_eq=a_eq, b_eq=b_eq, bounds=bounds, options={'maxiter': 10000})
56 | benchmark_model_time = dt.datetime.now() - start
57 | print('{0:20s}: {1}'.format('Benchmark model (scipy)', benchmark_model_time))
58 | np.testing.assert_array_almost_equal(x, res['x'])
59 |
60 | c = matrix(-er)
61 | aneq = matrix(a_eq)
62 | b = matrix(b_eq)
63 | g = matrix(np.vstack((np.diag(np.ones(n_samples)), -np.diag(np.ones(n_samples)))))
64 | h = matrix(np.hstack((ubound * np.ones(n_samples), -lbound * np.ones(n_samples))))
65 |
66 | solvers.lp(c, g, h, solver='glpk')
67 | start = dt.datetime.now()
68 | for _ in range(n_loop):
69 | res2 = solvers.lp(c, g, h, aneq, b, solver='glpk')
70 | benchmark_model_time = dt.datetime.now() - start
71 | print('{0:20s}: {1}'.format('Benchmark model (glpk)', benchmark_model_time))
72 | np.testing.assert_array_almost_equal(x, np.array(res2['x']).flatten())
73 |
74 |
75 | if __name__ == '__main__':
76 | benchmark_build_linear(2000, 30, 10)
77 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/longshortbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-9
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/percentbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-4
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import datetime as dt
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.portfolio.percentbuilder import percent_build
14 |
15 |
16 | def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) -> None:
17 | print("-" * 60)
18 | print("Starting portfolio construction by percent benchmarking")
19 | print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2})".format(n_samples, p_included,
20 | n_loops))
21 |
22 | n_portfolio = 10
23 |
24 | x = np.random.randn(n_samples, n_portfolio)
25 |
26 | start = dt.datetime.now()
27 | for _ in range(n_loops):
28 | calc_weights = percent_build(x, p_included)
29 | impl_model_time = dt.datetime.now() - start
30 |
31 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
32 |
33 | start = dt.datetime.now()
34 | for _ in range(n_loops):
35 | exp_weights = np.zeros((len(x), n_portfolio))
36 | n_incuded = int(p_included * len(x))
37 | choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_incuded
38 | for j in range(n_portfolio):
39 | exp_weights[choosed_index[:, j], j] = 1.
40 | benchmark_model_time = dt.datetime.now() - start
41 |
42 | np.testing.assert_array_almost_equal(calc_weights, exp_weights)
43 |
44 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
45 |
46 |
47 | def benchmark_build_percent_with_group(n_samples: int, n_loops: int, p_included: float,
48 | n_groups: int) -> None:
49 | print("-" * 60)
50 | print("Starting portfolio construction by percent with group-by values benchmarking")
51 | print(
52 | "Parameters(n_samples: {0}, p_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
53 | p_included,
54 | n_loops,
55 | n_groups))
56 |
57 | n_portfolio = 10
58 |
59 | x = np.random.randn(n_samples, n_portfolio)
60 | groups = np.random.randint(n_groups, size=n_samples)
61 |
62 | start = dt.datetime.now()
63 | for _ in range(n_loops):
64 | calc_weights = percent_build(x, p_included, groups=groups)
65 | impl_model_time = dt.datetime.now() - start
66 |
67 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
68 |
69 | start = dt.datetime.now()
70 | for _ in range(n_loops):
71 | grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
72 | grouped_count = pd.DataFrame(-x).groupby(groups).transform(lambda x: x.count())
73 | exp_weights = np.zeros((len(x), n_portfolio))
74 | n_included = (grouped_count * p_included).astype(int)
75 | masks = (grouped_ordering <= n_included).values
76 | for j in range(n_portfolio):
77 | exp_weights[masks[:, j], j] = 1.
78 | benchmark_model_time = dt.datetime.now() - start
79 |
80 | np.testing.assert_array_almost_equal(calc_weights, exp_weights)
81 |
82 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
83 |
84 |
85 | if __name__ == '__main__':
86 | benchmark_build_percent(3000, 1000, 0.1)
87 | benchmark_build_percent_with_group(3000, 1000, 0.1, 30)
88 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/rankbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-27
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import datetime as dt
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.portfolio.rankbuilder import rank_build
14 |
15 |
16 | def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
17 | print("-" * 60)
18 | print("Starting portfolio construction by rank benchmarking")
19 | print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included,
20 | n_loops))
21 |
22 | n_portfolio = 10
23 |
24 | x = np.random.randn(n_samples, n_portfolio)
25 |
26 | start = dt.datetime.now()
27 | for _ in range(n_loops):
28 | calc_weights = rank_build(x, n_included)
29 | impl_model_time = dt.datetime.now() - start
30 |
31 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
32 |
33 | start = dt.datetime.now()
34 | for _ in range(n_loops):
35 | exp_weights = np.zeros((len(x), n_portfolio))
36 | choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included
37 | for j in range(n_portfolio):
38 | exp_weights[choosed_index[:, j], j] = 1.
39 | benchmark_model_time = dt.datetime.now() - start
40 |
41 | np.testing.assert_array_almost_equal(calc_weights, exp_weights)
42 |
43 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
44 |
45 |
46 | def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int,
47 | n_groups: int) -> None:
48 | print("-" * 60)
49 | print("Starting portfolio construction by rank with group-by values benchmarking")
50 | print(
51 | "Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
52 | n_included,
53 | n_loops,
54 | n_groups))
55 |
56 | n_portfolio = 10
57 |
58 | x = np.random.randn(n_samples, n_portfolio)
59 | groups = np.random.randint(n_groups, size=n_samples)
60 |
61 | start = dt.datetime.now()
62 | for _ in range(n_loops):
63 | calc_weights = rank_build(x, n_included, groups=groups)
64 | impl_model_time = dt.datetime.now() - start
65 |
66 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
67 |
68 | start = dt.datetime.now()
69 | for _ in range(n_loops):
70 | grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
71 | exp_weights = np.zeros((len(x), n_portfolio))
72 | masks = (grouped_ordering <= n_included).values
73 | for j in range(n_portfolio):
74 | exp_weights[masks[:, j], j] = 1.
75 | benchmark_model_time = dt.datetime.now() - start
76 |
77 | np.testing.assert_array_almost_equal(calc_weights, exp_weights)
78 |
79 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
80 |
81 |
82 | if __name__ == '__main__':
83 | benchmark_build_rank(3000, 1000, 300)
84 | benchmark_build_rank_with_group(3000, 1000, 10, 30)
85 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/settlement/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/benchmarks/settlement/simplesettle.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import datetime as dt
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.settlement.simplesettle import simple_settle
14 |
15 |
16 | def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None:
17 | print("-" * 60)
18 | print("Starting simple settle benchmarking")
19 | print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples,
20 | n_portfolios,
21 | n_loops))
22 |
23 | weights = np.random.randn(n_samples, n_portfolios)
24 | ret_series = np.random.randn(n_samples)
25 |
26 | start = dt.datetime.now()
27 | for _ in range(n_loops):
28 | calc_ret = simple_settle(weights, ret_series)
29 | impl_model_time = dt.datetime.now() - start
30 |
31 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
32 |
33 | start = dt.datetime.now()
34 | ret_series.shape = -1, 1
35 | for _ in range(n_loops):
36 | exp_ret = (weights * ret_series).sum(axis=0)
37 | benchmark_model_time = dt.datetime.now() - start
38 |
39 | np.testing.assert_array_almost_equal(calc_ret, exp_ret)
40 |
41 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
42 |
43 |
44 | def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int,
45 | n_groups: int) -> None:
46 | print("-" * 60)
47 | print("Starting simple settle with group-by values benchmarking")
48 | print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(
49 | n_samples, n_portfolios, n_loops, n_groups))
50 |
51 | weights = np.random.randn(n_samples, n_portfolios)
52 | ret_series = np.random.randn(n_samples)
53 | groups = np.random.randint(n_groups, size=n_samples)
54 |
55 | start = dt.datetime.now()
56 | for _ in range(n_loops):
57 | calc_ret = simple_settle(weights, ret_series, groups=groups)
58 | impl_model_time = dt.datetime.now() - start
59 |
60 | print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
61 |
62 | start = dt.datetime.now()
63 | ret_series.shape = -1, 1
64 | for _ in range(n_loops):
65 | ret_mat = weights * ret_series
66 | exp_ret = pd.DataFrame(ret_mat).groupby(groups).sum().values
67 | benchmark_model_time = dt.datetime.now() - start
68 |
69 | np.testing.assert_array_almost_equal(calc_ret, exp_ret)
70 |
71 | print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
72 |
73 |
74 | if __name__ == '__main__':
75 | benchmark_simple_settle(3000, 3, 1000)
76 | benchmark_simple_settle_with_group(3000, 3, 1000, 30)
77 |
--------------------------------------------------------------------------------
/alphamind/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/bin/__init__.py
--------------------------------------------------------------------------------
/alphamind/bin/alphamind:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-6-29
4 |
5 | @author: cheng.li
6 | """
7 |
8 |
9 | from alphamind.bin.cli import CLIFactory
10 |
11 |
12 | if __name__ == '__main__':
13 |
14 | parser = CLIFactory.get_parser()
15 | args = parser.parse_args()
16 | args.func(args)
--------------------------------------------------------------------------------
/alphamind/bin/cli.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-6-29
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import argparse
9 | from collections import namedtuple
10 |
11 | from sqlalchemy import create_engine
12 |
13 | from alphamind.data.dbmodel import models
14 | from alphamind.utilities import alpha_logger
15 |
16 |
17 | def initdb(args):
18 | alpha_logger.info('DB: ' + args.url)
19 | engine = create_engine(args.url)
20 | models.Base.metadata.create_all(engine)
21 | alpha_logger.info('DB: initialization finished.')
22 |
23 |
24 | Arg = namedtuple(
25 | 'Arg', ['flags', 'help', 'action', 'default', 'nargs', 'type', 'choices', 'metavar'])
26 | Arg.__new__.__defaults__ = (None, None, None, None, None, None, None)
27 |
28 |
29 | class CLIFactory(object):
30 | args = {
31 | 'url': Arg(
32 | ('-u', '--url'),
33 | help='set the url for the db',
34 | type=str)
35 | }
36 |
37 | subparsers = (
38 | {
39 | 'func': initdb,
40 | 'help': 'Initialize the metadata database',
41 | 'args': ('url',)
42 | },
43 | )
44 |
45 | subparsers_dict = {sp['func'].__name__: sp for sp in subparsers}
46 |
47 | @classmethod
48 | def get_parser(cls):
49 | parser = argparse.ArgumentParser()
50 | subparsers = parser.add_subparsers(
51 | help='sub-command help', dest='subcommand')
52 | subparsers.required = True
53 |
54 | subparser_list = cls.subparsers_dict.keys()
55 | for sub in subparser_list:
56 | sub = cls.subparsers_dict[sub]
57 | sp = subparsers.add_parser(sub['func'].__name__, help=sub['help'])
58 | for arg in sub['args']:
59 | arg = cls.args[arg]
60 | kwargs = {
61 | f: getattr(arg, f)
62 | for f in arg._fields if f != 'flags' and getattr(arg, f)}
63 | sp.add_argument(*arg.flags, **kwargs)
64 | sp.set_defaults(func=sub['func'])
65 | return parser
66 |
67 |
68 | def get_parser():
69 | return CLIFactory.get_parser()
70 |
--------------------------------------------------------------------------------
/alphamind/data/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from alphamind.data.neutralize import neutralize
9 | from alphamind.data.rank import rank
10 | from alphamind.data.standardize import standardize
11 | from alphamind.data.winsorize import winsorize_normal as winsorize
12 |
13 | __all__ = ['standardize',
14 | 'winsorize',
15 | 'neutralize',
16 | 'rank']
17 |
--------------------------------------------------------------------------------
/alphamind/data/dbmodel/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-6-29
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/data/dbmodel/models/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2020-11-14
4 |
5 | @author: cheng.li
6 | """
7 | import os
8 |
9 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql":
10 | from alphamind.data.dbmodel.models.mysql import Market
11 | from alphamind.data.dbmodel.models.mysql import IndexMarket
12 | from alphamind.data.dbmodel.models.mysql import Universe
13 | from alphamind.data.dbmodel.models.mysql import Industry
14 | from alphamind.data.dbmodel.models.mysql import RiskExposure
15 | from alphamind.data.dbmodel.models.mysql import RiskCovDay
16 | from alphamind.data.dbmodel.models.mysql import RiskCovShort
17 | from alphamind.data.dbmodel.models.mysql import RiskCovLong
18 | from alphamind.data.dbmodel.models.mysql import SpecificRiskDay
19 | from alphamind.data.dbmodel.models.mysql import SpecificRiskShort
20 | from alphamind.data.dbmodel.models.mysql import SpecificRiskLong
21 | from alphamind.data.dbmodel.models.mysql import IndexComponent
22 | from alphamind.data.dbmodel.models.mysql import IndexWeight
23 | else:
24 | from alphamind.data.dbmodel.models.postgres import Market
25 | from alphamind.data.dbmodel.models.postgres import IndexMarket
26 | from alphamind.data.dbmodel.models.postgres import Universe
27 | from alphamind.data.dbmodel.models.postgres import Industry
28 | from alphamind.data.dbmodel.models.postgres import RiskExposure
29 | from alphamind.data.dbmodel.models.postgres import RiskCovDay
30 | from alphamind.data.dbmodel.models.postgres import RiskCovShort
31 | from alphamind.data.dbmodel.models.postgres import RiskCovLong
32 | from alphamind.data.dbmodel.models.postgres import SpecificRiskDay
33 | from alphamind.data.dbmodel.models.postgres import SpecificRiskShort
34 | from alphamind.data.dbmodel.models.postgres import SpecificRiskLong
35 | from alphamind.data.dbmodel.models.postgres import FactorMaster
36 | from alphamind.data.dbmodel.models.postgres import IndexComponent
37 | from alphamind.data.dbmodel.models.postgres import RiskMaster
--------------------------------------------------------------------------------
/alphamind/data/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/data/engines/__init__.py
--------------------------------------------------------------------------------
/alphamind/data/engines/sqlengine/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2020-11-14
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import os
9 |
10 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql":
11 | from alphamind.data.engines.sqlengine.mysql import SqlEngine
12 | from alphamind.data.engines.sqlengine.mysql import total_risk_factors
13 | from alphamind.data.engines.sqlengine.mysql import industry_styles
14 | from alphamind.data.engines.sqlengine.mysql import risk_styles
15 | from alphamind.data.engines.sqlengine.mysql import macro_styles
16 | else:
17 | from alphamind.data.engines.sqlengine.postgres import SqlEngine
18 | from alphamind.data.engines.sqlengine.postgres import total_risk_factors
19 | from alphamind.data.engines.sqlengine.postgres import industry_styles
20 | from alphamind.data.engines.sqlengine.postgres import risk_styles
21 | from alphamind.data.engines.sqlengine.postgres import macro_styles
22 |
--------------------------------------------------------------------------------
/alphamind/data/engines/utilities.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-12-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import os
9 | from typing import Dict
10 | from typing import Iterable
11 |
12 | from alphamind.data.dbmodel.models import Market
13 | from alphamind.data.dbmodel.models import RiskCovDay
14 | from alphamind.data.dbmodel.models import RiskCovLong
15 | from alphamind.data.dbmodel.models import RiskCovShort
16 | from alphamind.data.dbmodel.models import RiskExposure
17 | from alphamind.data.dbmodel.models import SpecificRiskDay
18 | from alphamind.data.dbmodel.models import SpecificRiskLong
19 | from alphamind.data.dbmodel.models import SpecificRiskShort
20 | from alphamind.data.engines.industries import INDUSTRY_MAPPING
21 |
22 |
23 | def _map_risk_model_table(risk_model: str) -> tuple:
24 | if risk_model == 'day':
25 | return RiskCovDay, SpecificRiskDay
26 | elif risk_model == 'short':
27 | return RiskCovShort, SpecificRiskShort
28 | elif risk_model == 'long':
29 | return RiskCovLong, SpecificRiskLong
30 | else:
31 | raise ValueError("risk model name {0} is not recognized".format(risk_model))
32 |
33 |
34 | def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
35 | factor_cols = {}
36 | factors = set(factors).difference({'trade_date', 'code', 'isOpen'})
37 | to_keep = factors.copy()
38 | for f in factors:
39 | for t in used_factor_tables:
40 | if f in t.columns:
41 | factor_cols[t.columns[f].name] = t
42 | to_keep.remove(f)
43 | break
44 |
45 | if to_keep:
46 | raise ValueError("factors in <{0}> can't be find".format(to_keep))
47 |
48 | return factor_cols
49 |
50 |
51 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql":
52 | def _map_industry_category(category: str) -> str:
53 | if category == 'sw':
54 | return '申万行业分类(2014)'
55 | elif category == 'zz':
56 | return '中证行业分类'
57 | elif category == 'zx':
58 | return '中信标普行业分类'
59 | elif category == 'zjh':
60 | return '证监会行业分类(2012)-证监会'
61 | else:
62 | raise ValueError("No other industry is supported at the current time")
63 | else:
64 | def _map_industry_category(category: str) -> str:
65 | if category == 'sw':
66 | return '申万行业分类'
67 | elif category == 'sw_adj':
68 | return '申万行业分类修订'
69 | elif category == 'zz':
70 | return '中证行业分类'
71 | elif category == 'dx':
72 | return '东兴行业分类'
73 | elif category == 'zjh':
74 | return '证监会行业V2012'
75 | else:
76 | raise ValueError("No other industry is supported at the current time")
77 |
78 |
79 | def industry_list(category: str, level: int = 1) -> list:
80 | return INDUSTRY_MAPPING[category][level]
81 |
--------------------------------------------------------------------------------
/alphamind/data/neutralize.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Dict
9 | from typing import Tuple
10 | from typing import Union
11 |
12 | import numba as nb
13 | import numpy as np
14 |
15 | import alphamind.utilities as utils
16 |
17 |
18 | def neutralize(x: np.ndarray,
19 | y: np.ndarray,
20 | groups: np.ndarray = None,
21 | detail: bool = False,
22 | weights: np.ndarray = None) \
23 | -> Union[np.ndarray, Tuple[np.ndarray, Dict]]:
24 | if y.ndim == 1:
25 | y = y.reshape((-1, 1))
26 |
27 | if weights is None:
28 | weights = np.ones(len(y), dtype=float)
29 |
30 | output_dict = {}
31 |
32 | if detail:
33 | exposure = np.zeros(x.shape + (y.shape[1],))
34 | explained = np.zeros(x.shape + (y.shape[1],))
35 | output_dict['exposure'] = exposure
36 | output_dict['explained'] = explained
37 |
38 | if groups is not None:
39 | res = np.zeros(y.shape)
40 | index_diff, order = utils.groupby(groups)
41 | start = 0
42 | if detail:
43 | for diff_loc in index_diff:
44 | curr_idx = order[start:diff_loc + 1]
45 | curr_x, b = _sub_step(x, y, weights, curr_idx, res)
46 | exposure[curr_idx, :, :] = b
47 | explained[curr_idx] = ls_explain(curr_x, b)
48 | start = diff_loc + 1
49 | else:
50 | for diff_loc in index_diff:
51 | curr_idx = order[start:diff_loc + 1]
52 | _sub_step(x, y, weights, curr_idx, res)
53 | start = diff_loc + 1
54 | else:
55 | try:
56 | b = ls_fit(x, y, weights)
57 | except np.linalg.linalg.LinAlgError:
58 | b = ls_fit_pinv(x, y, weights)
59 |
60 | res = ls_res(x, y, b)
61 |
62 | if detail:
63 | explained[:, :, :] = ls_explain(x, b)
64 | exposure[:] = b
65 |
66 | if output_dict:
67 | return res, output_dict
68 | else:
69 | return res
70 |
71 |
72 | def _sub_step(x, y, w, curr_idx, res) -> Tuple[np.ndarray, np.ndarray]:
73 | curr_x, curr_y, curr_w = x[curr_idx], y[curr_idx], w[curr_idx]
74 | try:
75 | b = ls_fit(curr_x, curr_y, curr_w)
76 | except np.linalg.linalg.LinAlgError:
77 | b = ls_fit_pinv(curr_x, curr_y, curr_w)
78 | res[curr_idx] = ls_res(curr_x, curr_y, b)
79 | return curr_x, b
80 |
81 |
82 | @nb.njit(nogil=True, cache=True)
83 | def ls_fit(x: np.ndarray, y: np.ndarray, w: np.ndarray) -> np.ndarray:
84 | x_bar = x.T * w
85 | b = np.linalg.solve(x_bar @ x, x_bar @ y)
86 | return b
87 |
88 |
89 | @nb.njit(nogil=True, cache=True)
90 | def ls_fit_pinv(x: np.ndarray, y: np.ndarray, w: np.ndarray) -> np.ndarray:
91 | x_bar = x.T * w
92 | b = np.linalg.pinv(x_bar @ x) @ x_bar @ y
93 | return b
94 |
95 |
96 | @nb.njit(nogil=True, cache=True)
97 | def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
98 | return y - x @ b
99 |
100 |
101 | @nb.njit(nogil=True, cache=True)
102 | def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
103 | m, n = b.shape
104 | return b.reshape((1, m, n)) * x.reshape((-1, m, 1))
105 |
106 |
107 | if __name__ == '__main__':
108 | x = np.random.randn(50000, 10)
109 | y = np.random.randn(50000, 1)
110 | w = np.ones(50000)
111 |
112 | import datetime as dt
113 |
114 | start = dt.datetime.now()
115 | for _ in range(1000):
116 | ls_fit(x, y, w)
117 | print(dt.datetime.now() - start)
118 |
--------------------------------------------------------------------------------
/alphamind/data/processing.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-21
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import List
9 | from typing import Optional
10 |
11 | import numpy as np
12 |
13 | from alphamind.data.neutralize import neutralize
14 | from alphamind.utilities import alpha_logger
15 |
16 |
17 | def factor_processing(raw_factors: np.ndarray,
18 | pre_process: Optional[List] = None,
19 | risk_factors: Optional[np.ndarray] = None,
20 | post_process: Optional[List] = None,
21 | groups=None) -> np.ndarray:
22 | new_factors = raw_factors
23 |
24 | if pre_process:
25 | for p in pre_process:
26 | new_factors = p(new_factors, groups=groups)
27 |
28 | if risk_factors is not None:
29 | risk_factors = risk_factors[:, risk_factors.sum(axis=0) != 0]
30 | new_factors = neutralize(risk_factors, new_factors, groups=groups)
31 |
32 | if post_process:
33 | for p in post_process:
34 | if p.__name__ == 'winsorize_normal':
35 | alpha_logger.warning("winsorize_normal "
36 | "normally should not be done after neutralize")
37 | new_factors = p(new_factors, groups=groups)
38 |
39 | return new_factors
40 |
--------------------------------------------------------------------------------
/alphamind/data/quantile.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-16
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 |
10 |
11 | def quantile(x: np.ndarray, n_bins: int) -> np.ndarray:
12 | n = x.size
13 | sorter = x.argsort()
14 | inv = np.empty(n, dtype=int)
15 | inv[sorter] = np.arange(n, dtype=int)
16 |
17 | bin_size = float(n) / n_bins
18 |
19 | pillars = [int(i * bin_size) for i in range(1, n_bins + 1)]
20 |
21 | q_groups = np.empty(n, dtype=int)
22 |
23 | starter = 0
24 | for i, r in enumerate(pillars):
25 | q_groups[(inv >= starter) & (inv < r)] = i
26 | starter = r
27 |
28 | return q_groups
29 |
--------------------------------------------------------------------------------
/alphamind/data/rank.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-8
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Optional
9 |
10 | import numpy as np
11 | from scipy.stats import rankdata
12 |
13 | import alphamind.utilities as utils
14 |
15 |
16 | def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:
17 | if x.ndim == 1:
18 | x = x.reshape((-1, 1))
19 |
20 | if groups is not None:
21 | res = np.zeros(x.shape, dtype=int)
22 | index_diff, order = utils.groupby(groups)
23 |
24 | start = 0
25 | for diff_loc in index_diff:
26 | curr_idx = order[start:diff_loc + 1]
27 | res[curr_idx] = (rankdata(x[curr_idx]).astype(float) - 1.).reshape((-1, 1))
28 | start = diff_loc + 1
29 | return res
30 | else:
31 | return (rankdata(x).astype(float) - 1.).reshape((-1, 1))
32 |
33 |
34 | def percentile(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:
35 | if x.ndim == 1:
36 | x = x.reshape((-1, 1))
37 |
38 | if groups is not None:
39 | res = np.zeros(x.shape, dtype=int)
40 | index_diff, order = utils.groupby(groups)
41 |
42 | start = 0
43 | for diff_loc in index_diff:
44 | curr_idx = order[start:diff_loc + 1]
45 | curr_values = x[curr_idx]
46 | length = len(curr_values) - 1. if len(curr_values) > 1 else 1.
47 | res[curr_idx] = (rankdata(curr_values).astype(float) - 1.) / length
48 | start = diff_loc + 1
49 | return res
50 | else:
51 | length = len(x) - 1. if len(x) > 1 else 1.
52 | return ((rankdata(x).astype(float) - 1.) / length).reshape((-1, 1))
53 |
--------------------------------------------------------------------------------
/alphamind/data/standardize.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 |
10 | from alphamind.utilities import aggregate
11 | from alphamind.utilities import array_index
12 | from alphamind.utilities import group_mapping
13 | from alphamind.utilities import simple_mean
14 | from alphamind.utilities import simple_sqrsum
15 | from alphamind.utilities import simple_std
16 | from alphamind.utilities import transform
17 |
18 |
19 | def standardize(x: np.ndarray, groups: np.ndarray = None, ddof=1) -> np.ndarray:
20 | if groups is not None:
21 | groups = group_mapping(groups)
22 | mean_values = transform(groups, x, 'mean')
23 | std_values = transform(groups, x, 'std', ddof)
24 |
25 | return (x - mean_values) / np.maximum(std_values, 1e-8)
26 | else:
27 | return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)
28 |
29 |
30 | def projection(x: np.ndarray, groups: np.ndarray = None, axis=1) -> np.ndarray:
31 | if groups is not None and axis == 0:
32 | groups = group_mapping(groups)
33 | projected = transform(groups, x, 'project')
34 | return projected
35 | else:
36 | return x / simple_sqrsum(x, axis=axis).reshape((-1, 1))
37 |
38 |
39 | class Standardizer(object):
40 |
41 | def __init__(self, ddof: int = 1):
42 | self.ddof = ddof
43 | self.mean = None
44 | self.std = None
45 | self.labels = None
46 |
47 | def fit(self, x: np.ndarray, groups: np.ndarray = None):
48 | if groups is not None:
49 | group_index = group_mapping(groups)
50 | self.mean = aggregate(group_index, x, 'mean')
51 | self.std = aggregate(group_index, x, 'std', self.ddof)
52 | self.labels = np.unique(groups)
53 | else:
54 | self.mean = simple_mean(x, axis=0)
55 | self.std = simple_std(x, axis=0, ddof=self.ddof)
56 |
57 | def transform(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
58 | if groups is not None:
59 | index = array_index(self.labels, groups)
60 | return (x - self.mean[index]) / np.maximum(self.std[index], 1e-8)
61 | else:
62 | return (x - self.mean) / np.maximum(self.std, 1e-8)
63 |
64 | def __call__(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
65 | return standardize(x, groups, self.ddof)
66 |
--------------------------------------------------------------------------------
/alphamind/data/transformer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-23
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import copy
9 |
10 | import pandas as pd
11 | from simpleutils.asserts import require
12 | from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
13 | from PyFin.api import transform as transform_impl
14 |
15 |
16 | def factor_translator(factor_pool):
17 | if not factor_pool:
18 | return None, None
19 |
20 | if isinstance(factor_pool, str):
21 | return {factor_pool: factor_pool}, [factor_pool]
22 | elif isinstance(factor_pool, SecurityValueHolder):
23 | return {str(factor_pool): factor_pool}, sorted(factor_pool.fields)
24 | elif isinstance(factor_pool, dict):
25 | dependency = set()
26 | for k, v in factor_pool.items():
27 | require(isinstance(k, str), ValueError,
28 | 'factor_name {0} should be string.'.format(k))
29 | require(isinstance(v, SecurityValueHolder) or isinstance(v, str),
30 | ValueError,
31 | 'expression {0} should be a value hodler or a string.'.format(v))
32 |
33 | if isinstance(v, str):
34 | dependency = dependency.union([v])
35 | else:
36 | dependency = dependency.union(v.fields)
37 | return factor_pool, sorted(dependency)
38 | elif isinstance(factor_pool, list):
39 | factor_dict = {}
40 | dependency = set()
41 | k = 1
42 | for i, f in enumerate(factor_pool):
43 | if isinstance(f, str):
44 | factor_dict[f] = f
45 | dependency = dependency.union([f])
46 | elif isinstance(f, SecurityValueHolder):
47 | factor_dict[str(f)] = f
48 | dependency = dependency.union(f.fields)
49 | k += 1
50 | return factor_dict, sorted(dependency)
51 | else:
52 | raise ValueError('{0} is not in valid format as factors'.format(factor_pool))
53 |
54 |
55 | class Transformer:
56 |
57 | def __init__(self,
58 | expressions):
59 | expression_dict, expression_dependency = \
60 | factor_translator(copy.deepcopy(expressions))
61 |
62 | if expression_dict:
63 | self.names = sorted(expression_dict.keys())
64 | self.expressions = [expression_dict[n] for n in self.names]
65 | self.dependency = expression_dependency
66 | else:
67 | self.names = []
68 | self.expressions = []
69 | self.dependency = []
70 |
71 | def transform(self, group_name, data):
72 | if len(data) > 0:
73 | transformed_data = transform_impl(data,
74 | self.expressions,
75 | self.names,
76 | group_name,
77 | dropna=False)
78 | return transformed_data
79 | else:
80 | return pd.DataFrame()
81 |
82 |
83 | if __name__ == '__main__':
84 | transformer = Transformer(['c', 'a'])
85 |
--------------------------------------------------------------------------------
/alphamind/exceptions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/exceptions/__init__.py
--------------------------------------------------------------------------------
/alphamind/exceptions/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-6-12
4 |
5 | @author: cheng.li
6 | """
7 |
8 |
9 | class PortfolioBuilderException(Exception):
10 |
11 | def __init__(self, msg):
12 | self.msg = msg
13 |
14 | def __str__(self):
15 | return str(self.msg)
16 |
--------------------------------------------------------------------------------
/alphamind/execution/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/execution/__init__.py
--------------------------------------------------------------------------------
/alphamind/execution/baseexecutor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-22
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import abc
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 |
14 | class ExecutorBase(metaclass=abc.ABCMeta):
15 |
16 | def __init__(self):
17 | self.current_pos = pd.DataFrame()
18 |
19 | @abc.abstractmethod
20 | def execute(self, target_pos: pd.DataFrame) -> pd.DataFrame:
21 | pass
22 |
23 | @staticmethod
24 | def calc_turn_over(target_pos: pd.DataFrame, current_pos: pd.DataFrame) -> float:
25 | pos_merged = pd.merge(target_pos, current_pos, on=['code'], how='outer')
26 | pos_merged.fillna(0, inplace=True)
27 | turn_over = np.abs(pos_merged.weight_x - pos_merged.weight_y).sum()
28 | return turn_over
29 |
30 | def set_current(self, current_pos: pd.DataFrame):
31 | self.current_pos = current_pos.copy()
32 |
33 | def update(self, data_dict: dict):
34 | pass
35 |
--------------------------------------------------------------------------------
/alphamind/execution/naiveexecutor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-22
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Tuple
9 |
10 | import pandas as pd
11 |
12 | from alphamind.execution.baseexecutor import ExecutorBase
13 |
14 |
15 | class NaiveExecutor(ExecutorBase):
16 |
17 | def __init__(self):
18 | super().__init__()
19 |
20 | def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]:
21 | if self.current_pos.empty:
22 | turn_over = target_pos.weight.abs().sum()
23 | else:
24 | turn_over = self.calc_turn_over(target_pos, self.current_pos)
25 | self.current_pos = target_pos.copy()
26 | return turn_over, target_pos
27 |
--------------------------------------------------------------------------------
/alphamind/execution/pipeline.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import List
9 | from typing import Tuple
10 |
11 | import pandas as pd
12 |
13 | from alphamind.execution.baseexecutor import ExecutorBase
14 |
15 |
16 | class ExecutionPipeline(object):
17 |
18 | def __init__(self, executors: List[ExecutorBase]):
19 | self.executors = executors
20 |
21 | def execute(self, target_pos) -> Tuple[float, pd.DataFrame]:
22 |
23 | turn_over, planed_pos = 0., target_pos
24 |
25 | for executor in self.executors:
26 | turn_over, planed_pos = executor.execute(planed_pos)
27 |
28 | executed_pos = planed_pos
29 |
30 | for executor in self.executors:
31 | executor.set_current(executed_pos)
32 |
33 | return turn_over, executed_pos
34 |
35 | def update(self, data_dict):
36 |
37 | for executor in self.executors:
38 | executor.update(data_dict=data_dict)
39 |
--------------------------------------------------------------------------------
/alphamind/execution/targetvolexecutor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-22
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Tuple
9 |
10 | import pandas as pd
11 | from PyFin.Math.Accumulators import MovingAverage
12 | from PyFin.Math.Accumulators import MovingStandardDeviation
13 |
14 | from alphamind.execution.baseexecutor import ExecutorBase
15 |
16 |
17 | class TargetVolExecutor(ExecutorBase):
18 |
19 | def __init__(self, window=30, target_vol=0.01):
20 | super().__init__()
21 | self.m_vol = MovingStandardDeviation(window, 'return')
22 | self.m_leverage = MovingAverage(window, 'leverage')
23 | self.target_vol = target_vol
24 | self.multiplier = 1.
25 |
26 | def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]:
27 | if not self.m_vol.isFull():
28 | if self.current_pos.empty:
29 | turn_over = target_pos.weight.abs().sum()
30 | else:
31 | turn_over = self.calc_turn_over(target_pos, self.current_pos)
32 | return turn_over, target_pos
33 | else:
34 | c_vol = self.m_vol.result()
35 | c_leverage = self.m_leverage.result()
36 | self.multiplier = self.target_vol / c_vol * c_leverage
37 | candidate_pos = target_pos.copy()
38 | candidate_pos['weight'] = candidate_pos.weight.values * self.multiplier
39 | turn_over = self.calc_turn_over(candidate_pos, self.current_pos)
40 | return turn_over, candidate_pos
41 |
42 | def set_current(self, current_pos: pd.DataFrame):
43 | super().set_current(current_pos)
44 | self.m_leverage.push({'leverage': current_pos.weight.abs().sum()})
45 |
46 | def update(self, data_dict: dict):
47 | self.m_vol.push(data_dict)
48 |
--------------------------------------------------------------------------------
/alphamind/execution/thresholdexecutor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-22
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Tuple
9 |
10 | import pandas as pd
11 |
12 | from alphamind.execution.baseexecutor import ExecutorBase
13 |
14 |
15 | class ThresholdExecutor(ExecutorBase):
16 |
17 | def __init__(self, turn_over_threshold: float, is_relative=True):
18 | super().__init__()
19 | self.threshold = turn_over_threshold
20 | self.is_relative = is_relative
21 |
22 | def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]:
23 |
24 | if self.current_pos.empty:
25 | return target_pos.weight.abs().sum(), target_pos
26 | else:
27 | turn_over = self.calc_turn_over(target_pos, self.current_pos)
28 |
29 | is_break = turn_over >= self.threshold * self.current_pos.weight.sum() if self.is_relative else turn_over >= self.threshold
30 |
31 | if is_break:
32 | return turn_over, target_pos
33 | else:
34 | return 0., self.current_pos.copy()
35 |
--------------------------------------------------------------------------------
/alphamind/formula/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/formula/__init__.py
--------------------------------------------------------------------------------
/alphamind/formula/utilities.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-11-27
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from alphamind.utilities import decode
9 | from alphamind.utilities import encode
10 |
11 |
12 | def encode_formula(formula):
13 | str_repr = encode(formula)
14 | return {'desc': str_repr,
15 | 'formula_type': formula.__class__.__module__ + "." + formula.__class__.__name__,
16 | 'dependency': formula.fields,
17 | 'window': formula.window}
18 |
19 |
20 | def decode_formula(str_repr):
21 | formula = decode(str_repr)
22 | return formula
23 |
24 |
25 | if __name__ == '__main__':
26 | from PyFin.api import *
27 |
28 | eps_q_res = RES(20, LAST('eps_q') ^ LAST('roe_q'))
29 | print(eps_q_res)
30 |
31 | str_repr = encode_formula(eps_q_res)
32 | decoded_formula = decode_formula(str_repr)
33 | print(decoded_formula)
34 |
--------------------------------------------------------------------------------
/alphamind/model/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-2
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from alphamind.model.linearmodel import ConstLinearModel
9 | from alphamind.model.linearmodel import LassoRegression
10 | from alphamind.model.linearmodel import LinearRegression
11 | from alphamind.model.linearmodel import LogisticRegression
12 | from alphamind.model.loader import load_model
13 | from alphamind.model.svm import NvSVRModel
14 | from alphamind.model.treemodel import RandomForestClassifier
15 | from alphamind.model.treemodel import RandomForestRegressor
16 | from alphamind.model.treemodel import XGBClassifier
17 | from alphamind.model.treemodel import XGBRegressor
18 | from alphamind.model.treemodel import XGBTrainer
19 |
20 | __all__ = ['LinearRegression',
21 | 'LassoRegression',
22 | 'ConstLinearModel',
23 | 'LogisticRegression',
24 | 'RandomForestRegressor',
25 | 'RandomForestClassifier',
26 | 'XGBRegressor',
27 | 'XGBClassifier',
28 | 'XGBTrainer',
29 | 'NvSVRModel',
30 | 'load_model']
31 |
--------------------------------------------------------------------------------
/alphamind/model/linearmodel.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-10
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 | from simpleutils.asserts import require
10 | from sklearn.linear_model import Lasso
11 | from sklearn.linear_model import LinearRegression as LinearRegressionImpl
12 | from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl
13 |
14 | from alphamind.model.modelbase import create_model_base
15 |
16 |
17 | class ConstLinearModelImpl(object):
18 |
19 | def __init__(self, weights: np.ndarray = None):
20 | self.weights = weights.flatten()
21 |
22 | def fit(self, x: np.ndarray, y: np.ndarray):
23 | raise NotImplementedError("Const linear model doesn't offer fit methodology")
24 |
25 | def predict(self, x: np.ndarray):
26 | return x @ self.weights
27 |
28 | def score(self, x: np.ndarray, y: np.ndarray) -> float:
29 | y_hat = self.predict(x)
30 | y_bar = y.mean()
31 | ssto = ((y - y_bar) ** 2).sum()
32 | sse = ((y - y_hat) ** 2).sum()
33 | return 1. - sse / ssto
34 |
35 |
36 | class ConstLinearModel(create_model_base()):
37 |
38 | def __init__(self,
39 | features=None,
40 | weights: dict = None,
41 | fit_target=None):
42 | super().__init__(features=features, fit_target=fit_target)
43 | if features is not None and weights is not None:
44 | require(len(features) == len(weights),
45 | ValueError,
46 | "length of features is not equal to length of weights")
47 | if weights:
48 | self.impl = ConstLinearModelImpl(np.array([weights[name] for name in self.features]))
49 |
50 | def save(self):
51 | model_desc = super().save()
52 | model_desc['weight'] = list(self.impl.weights)
53 | return model_desc
54 |
55 | @classmethod
56 | def load(cls, model_desc: dict):
57 | return super().load(model_desc)
58 |
59 | @property
60 | def weights(self):
61 | return self.impl.weights.tolist()
62 |
63 |
64 | class LinearRegression(create_model_base('sklearn')):
65 |
66 | def __init__(self, features=None, fit_intercept: bool = False, fit_target=None, **kwargs):
67 | super().__init__(features=features, fit_target=fit_target)
68 | self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
69 |
70 | def save(self) -> dict:
71 | model_desc = super().save()
72 | model_desc['weight'] = self.impl.coef_.tolist()
73 | return model_desc
74 |
75 | @property
76 | def weights(self):
77 | return self.impl.coef_.tolist()
78 |
79 |
80 | class LassoRegression(create_model_base('sklearn')):
81 |
82 | def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, fit_target=None,
83 | **kwargs):
84 | super().__init__(features=features, fit_target=fit_target)
85 | self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
86 |
87 | def save(self) -> dict:
88 | model_desc = super().save()
89 | model_desc['weight'] = self.impl.coef_.tolist()
90 | return model_desc
91 |
92 | @property
93 | def weights(self):
94 | return self.impl.coef_.tolist()
95 |
96 |
97 | class LogisticRegression(create_model_base('sklearn')):
98 |
99 | def __init__(self, features=None, fit_intercept: bool = False, fit_target=None, **kwargs):
100 | super().__init__(features=features, fit_target=fit_target)
101 | self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
102 |
103 | def save(self) -> dict:
104 | model_desc = super().save()
105 | model_desc['weight'] = self.impl.coef_.tolist()
106 | return model_desc
107 |
108 | @property
109 | def weights(self):
110 | return self.impl.coef_.tolist()
111 |
112 |
113 | if __name__ == '__main__':
114 | import pprint
115 |
116 | ls = ConstLinearModel(['a', 'b'], np.array([0.5, 0.5]))
117 |
118 | x = np.array([[0.2, 0.2],
119 | [0.1, 0.1],
120 | [0.3, 0.1]])
121 |
122 | ls.predict(x)
123 |
124 | desc = ls.save()
125 | new_model = ConstLinearModel.load(desc)
126 |
127 | pprint.pprint(new_model.save())
128 |
--------------------------------------------------------------------------------
/alphamind/model/loader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-5
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from alphamind.model.linearmodel import ConstLinearModel
9 | from alphamind.model.linearmodel import LassoRegression
10 | from alphamind.model.linearmodel import LinearRegression
11 | from alphamind.model.linearmodel import LogisticRegression
12 | from alphamind.model.modelbase import ModelBase
13 | from alphamind.model.svm import NvSVRModel
14 | from alphamind.model.treemodel import RandomForestClassifier
15 | from alphamind.model.treemodel import RandomForestRegressor
16 | from alphamind.model.treemodel import XGBClassifier
17 | from alphamind.model.treemodel import XGBRegressor
18 | from alphamind.model.treemodel import XGBTrainer
19 |
20 |
21 | def load_model(model_desc: dict) -> ModelBase:
22 | model_name = model_desc['model_name']
23 | model_name_parts = set(model_name.split('.'))
24 |
25 | if 'ConstLinearModel' in model_name_parts:
26 | return ConstLinearModel.load(model_desc)
27 | elif 'LinearRegression' in model_name_parts:
28 | return LinearRegression.load(model_desc)
29 | elif 'LassoRegression' in model_name_parts:
30 | return LassoRegression.load(model_desc)
31 | elif 'LogisticRegression' in model_name_parts:
32 | return LogisticRegression.load(model_desc)
33 | elif 'RandomForestRegressor' in model_name_parts:
34 | return RandomForestRegressor.load(model_desc)
35 | elif 'RandomForestClassifier' in model_name_parts:
36 | return RandomForestClassifier.load(model_desc)
37 | elif 'XGBRegressor' in model_name_parts:
38 | return XGBRegressor.load(model_desc)
39 | elif 'XGBClassifier' in model_name_parts:
40 | return XGBClassifier.load(model_desc)
41 | elif 'XGBTrainer' in model_name_parts:
42 | return XGBTrainer.load(model_desc)
43 | elif 'NvSVR' in model_name_parts:
44 | return NvSVRModel.load(model_desc)
45 | else:
46 | raise ValueError('{0} is not currently supported in model loader.'.format(model_name))
47 |
--------------------------------------------------------------------------------
/alphamind/model/modelbase.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-4
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import abc
9 | from distutils.version import LooseVersion
10 |
11 | import arrow
12 | import numpy as np
13 | import pandas as pd
14 | from simpleutils.miscellaneous import list_eq
15 | from sklearn import __version__ as sklearn_version
16 | from xgboost import __version__ as xgbboot_version
17 |
18 | from alphamind.data.transformer import Transformer
19 | from alphamind.utilities import alpha_logger
20 | from alphamind.utilities import decode
21 | from alphamind.utilities import encode
22 |
23 |
24 | class ModelBase(metaclass=abc.ABCMeta):
25 |
26 | def __init__(self, features=None, fit_target=None):
27 | if features is not None:
28 | self.formulas = Transformer(features)
29 | self.features = self.formulas.names
30 | else:
31 | self.features = None
32 |
33 | if fit_target is not None:
34 | self.fit_target = Transformer(fit_target)
35 | else:
36 | self.fit_target = None
37 | self.impl = None
38 | self.trained_time = None
39 |
40 | def model_encode(self):
41 | return encode(self.impl)
42 |
43 | @classmethod
44 | def model_decode(cls, model_desc):
45 | return decode(model_desc)
46 |
47 | def __eq__(self, rhs):
48 | return self.model_encode() == rhs.model_encode() \
49 | and self.trained_time == rhs.trained_time \
50 | and list_eq(self.features, rhs.features) \
51 | and encode(self.formulas) == encode(rhs.formulas) \
52 | and encode(self.fit_target) == encode(rhs.fit_target)
53 |
54 | def fit(self, x: pd.DataFrame, y: np.ndarray):
55 | self.impl.fit(x[self.features].values, y.flatten())
56 | self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
57 |
58 | def predict(self, x: pd.DataFrame) -> np.ndarray:
59 | return self.impl.predict(x[self.features].values)
60 |
61 | def score(self, x: pd.DataFrame, y: np.ndarray) -> float:
62 | return self.impl.score(x[self.features].values, y)
63 |
64 | def ic(self, x: pd.DataFrame, y: np.ndarray) -> float:
65 | predict_y = self.impl.predict(x[self.features].values)
66 | return np.corrcoef(predict_y, y)[0, 1]
67 |
68 | @abc.abstractmethod
69 | def save(self) -> dict:
70 |
71 | if self.__class__.__module__ == '__main__':
72 | alpha_logger.warning(
73 | "model is defined in a main module. The model_name may not be correct.")
74 |
75 | model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__,
76 | language='python',
77 | saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"),
78 | features=list(self.features),
79 | trained_time=self.trained_time,
80 | desc=self.model_encode(),
81 | formulas=encode(self.formulas),
82 | fit_target=encode(self.fit_target),
83 | internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
84 | return model_desc
85 |
86 | @classmethod
87 | @abc.abstractmethod
88 | def load(cls, model_desc: dict):
89 | obj_layout = cls()
90 | obj_layout.features = model_desc['features']
91 | obj_layout.formulas = decode(model_desc['formulas'])
92 | obj_layout.trained_time = model_desc['trained_time']
93 | obj_layout.impl = cls.model_decode(model_desc['desc'])
94 | if 'fit_target' in model_desc:
95 | obj_layout.fit_target = decode(model_desc['fit_target'])
96 | else:
97 | obj_layout.fit_target = None
98 | return obj_layout
99 |
100 |
101 | def create_model_base(party_name=None):
102 | if not party_name:
103 | return ModelBase
104 | else:
105 | class ExternalLibBase(ModelBase):
106 | _lib_name = party_name
107 |
108 | def save(self) -> dict:
109 | model_desc = super().save()
110 | if self._lib_name == 'sklearn':
111 | model_desc[self._lib_name + "_version"] = sklearn_version
112 | elif self._lib_name == 'xgboost':
113 | model_desc[self._lib_name + "_version"] = xgbboot_version
114 | else:
115 | raise ValueError(
116 | "3rd party lib name ({0}) is not recognized".format(self._lib_name))
117 | return model_desc
118 |
119 | @classmethod
120 | def load(cls, model_desc: dict):
121 | obj_layout = super().load(model_desc)
122 |
123 | if cls._lib_name == 'sklearn':
124 | current_version = sklearn_version
125 | elif cls._lib_name == 'xgboost':
126 | current_version = xgbboot_version
127 | else:
128 | raise ValueError(
129 | "3rd party lib name ({0}) is not recognized".format(cls._lib_name))
130 |
131 | if LooseVersion(current_version) < LooseVersion(
132 | model_desc[cls._lib_name + "_version"]):
133 | alpha_logger.warning(
134 | 'Current {2} version {0} is lower than the model version {1}. '
135 | 'Loaded model may work incorrectly.'.format(sklearn_version,
136 | model_desc[cls._lib_name],
137 | cls._lib_name))
138 | return obj_layout
139 |
140 | return ExternalLibBase
141 |
--------------------------------------------------------------------------------
/alphamind/model/svm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-7-9
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from sklearn.svm import NuSVR
9 |
10 | from alphamind.model.modelbase import create_model_base
11 |
12 |
13 | class NvSVRModel(create_model_base('sklearn')):
14 |
15 | def __init__(self,
16 | features=None,
17 | fit_target=None,
18 | **kwargs):
19 | super().__init__(features=features, fit_target=fit_target)
20 | self.impl = NuSVR(**kwargs)
21 |
--------------------------------------------------------------------------------
/alphamind/portfolio/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-26
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/portfolio/evolver.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-11-23
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 |
10 |
11 | def evolve_positions(positions: np.ndarray, dx_ret: np.ndarray) -> np.ndarray:
12 | # assume return is log return
13 |
14 | simple_return = np.exp(dx_ret)
15 | evolved_positions = positions * simple_return
16 | leverage = np.abs(positions).sum()
17 | evolved_positions = evolved_positions * leverage / np.abs(evolved_positions).sum()
18 | return evolved_positions
19 |
--------------------------------------------------------------------------------
/alphamind/portfolio/linearbuilder.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-5
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Tuple
9 | from typing import Union
10 |
11 | import numpy as np
12 | from alphamind.portfolio.optimizers import LPOptimizer
13 | from alphamind.portfolio.optimizers import L1LPOptimizer
14 | from alphamind.exceptions.exceptions import PortfolioBuilderException
15 |
16 |
17 | def linear_builder(er: np.ndarray,
18 | lbound: Union[np.ndarray, float] = None,
19 | ubound: Union[np.ndarray, float] = None,
20 | risk_constraints: np.ndarray = None,
21 | risk_target: Tuple[np.ndarray, np.ndarray] = None,
22 | turn_over_target: float = None,
23 | current_position: np.ndarray = None,
24 | method: str = "deprecated") -> Tuple[str, np.ndarray, np.ndarray]:
25 | er = er.flatten()
26 |
27 | if risk_constraints is not None:
28 | risk_lbound = risk_target[0].reshape((-1, 1))
29 | risk_ubound = risk_target[1].reshape((-1, 1))
30 | cons_matrix = np.concatenate((risk_constraints.T, risk_lbound, risk_ubound), axis=1)
31 | else:
32 | cons_matrix = None
33 |
34 | if not turn_over_target or current_position is None:
35 | prob = LPOptimizer(-er, cons_matrix, lbound, ubound)
36 |
37 | if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate':
38 | return prob.status(), prob.feval(), prob.x_value()
39 | else:
40 | raise PortfolioBuilderException(prob.status())
41 | elif turn_over_target:
42 | prob = L1LPOptimizer(objective=-er,
43 | cons_matrix=cons_matrix,
44 | current_pos=current_position,
45 | target_turn_over=turn_over_target,
46 | lbound=lbound,
47 | ubound=ubound)
48 |
49 | if prob.status() == 'optimal' or prob.status() == 'optimal_inaccurate':
50 | return prob.status(), prob.feval(), prob.x_value()
51 | else:
52 | raise PortfolioBuilderException(prob.status())
53 |
54 |
55 | if __name__ == '__main__':
56 | n = 5
57 | lb = np.zeros(n)
58 | ub = 4. / n * np.ones(n)
59 | er = np.random.randn(n)
60 | current_pos = np.random.randint(0, n, size=n)
61 | current_pos = current_pos / current_pos.sum()
62 | turn_over_target = 0.1
63 |
64 | cons = np.ones((n, 1))
65 | risk_lbound = np.ones(1)
66 | risk_ubound = np.ones(1)
67 |
68 | status, fvalue, x_values = linear_builder(er,
69 | lb,
70 | ub,
71 | cons,
72 | (risk_lbound, risk_ubound),
73 | turn_over_target,
74 | current_pos,
75 | method='ecos')
76 |
77 | print(status)
78 | print(fvalue)
79 | print(x_values)
80 | print(current_pos)
81 |
82 | print(np.abs(x_values - current_pos).sum())
83 |
--------------------------------------------------------------------------------
/alphamind/portfolio/longshortbulder.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-9
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 |
10 | from alphamind.utilities import group_mapping
11 | from alphamind.utilities import simple_abssum
12 | from alphamind.utilities import transform
13 |
14 |
15 | def long_short_builder(er: np.ndarray,
16 | leverage: float = 1.,
17 | groups: np.ndarray = None,
18 | masks: np.ndarray = None) -> np.ndarray:
19 | er = er.copy()
20 |
21 | if masks is not None:
22 | er[masks] = 0.
23 | er[~masks] = er[~masks] - er[~masks].mean()
24 |
25 | if er.ndim == 1:
26 | er = er.reshape((-1, 1))
27 |
28 | if groups is None:
29 | return er / simple_abssum(er, axis=0) * leverage
30 | else:
31 | groups = group_mapping(groups)
32 | return transform(groups, er, 'scale', scale=leverage)
33 |
--------------------------------------------------------------------------------
/alphamind/portfolio/meanvariancebuilder.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-6-27
4 |
5 | @author: cheng.li
6 | """
7 |
8 | from typing import Dict
9 | from typing import Optional
10 | from typing import Tuple
11 | from typing import Union
12 | import numpy as np
13 | from alphamind.portfolio.optimizers import (
14 | QuadraticOptimizer,
15 | TargetVolOptimizer
16 | )
17 |
18 | from alphamind.exceptions.exceptions import PortfolioBuilderException
19 |
20 |
21 | def _create_bounds(lbound,
22 | ubound,
23 | bm,
24 | risk_exposure,
25 | risk_target):
26 | if lbound is not None:
27 | lbound = lbound - bm
28 | if ubound is not None:
29 | ubound = ubound - bm
30 |
31 | if risk_exposure is not None:
32 | cons_mat = risk_exposure.T
33 | bm_risk = cons_mat @ bm
34 |
35 | clbound = (risk_target[0] - bm_risk).reshape((-1, 1))
36 | cubound = (risk_target[1] - bm_risk).reshape((-1, 1))
37 | else:
38 | cons_mat = None
39 | clbound = None
40 | cubound = None
41 |
42 | return lbound, ubound, cons_mat, clbound, cubound
43 |
44 |
45 | def _create_result(optimizer, bm):
46 | if optimizer.status() == "optimal" or optimizer.status() == "optimal_inaccurate":
47 | return optimizer.status(), optimizer.feval(), optimizer.x_value() + bm
48 | else:
49 | raise PortfolioBuilderException(optimizer.status())
50 |
51 |
52 | def mean_variance_builder(er: np.ndarray,
53 | risk_model: Dict[str, Union[None, np.ndarray]],
54 | bm: np.ndarray,
55 | lbound: Union[np.ndarray, float, None],
56 | ubound: Union[np.ndarray, float, None],
57 | risk_exposure: Optional[np.ndarray],
58 | risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
59 | lam: float = 1.,
60 | linear_solver: str = 'deprecated') -> Tuple[str, float, np.ndarray]:
61 | lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure,
62 | risk_target)
63 | if cons_mat is not None:
64 | cons_matrix = np.concatenate([cons_mat, clbound, cubound], axis=1)
65 | else:
66 | cons_matrix = None
67 |
68 | cov = risk_model['cov']
69 | special_risk = risk_model['idsync']
70 | risk_cov = risk_model['factor_cov']
71 | risk_exposure = risk_model['factor_loading']
72 |
73 | prob = QuadraticOptimizer(objective=-er,
74 | cons_matrix=cons_matrix,
75 | lbound=lbound,
76 | ubound=ubound,
77 | penalty=lam,
78 | cov=cov,
79 | factor_cov=risk_cov,
80 | factor_load=risk_exposure,
81 | factor_special=special_risk)
82 |
83 | if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate':
84 | return prob.status(), prob.feval(), prob.x_value() + bm
85 | else:
86 | raise PortfolioBuilderException(prob.status())
87 |
88 |
89 | def target_vol_builder(er: np.ndarray,
90 | risk_model: Dict[str, Union[None, np.ndarray]],
91 | bm: np.ndarray,
92 | lbound: Union[np.ndarray, float],
93 | ubound: Union[np.ndarray, float],
94 | risk_exposure: Optional[np.ndarray],
95 | risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
96 | vol_target: float = 1.,
97 | linear_solver: str = 'ma27') -> Tuple[str, float, np.ndarray]:
98 | lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure,
99 | risk_target)
100 |
101 | if cons_mat is not None:
102 | cons_matrix = np.concatenate([cons_mat, clbound, cubound], axis=1)
103 | else:
104 | cons_matrix = None
105 |
106 | cov = risk_model['cov']
107 | special_risk = risk_model['idsync']
108 | risk_cov = risk_model['factor_cov']
109 | risk_exposure = risk_model['factor_loading']
110 |
111 | prob = TargetVolOptimizer(objective=-er,
112 | cons_matrix=cons_matrix,
113 | lbound=lbound,
114 | ubound=ubound,
115 | target_vol=vol_target,
116 | factor_cov=risk_cov,
117 | factor_load=risk_exposure,
118 | factor_special=special_risk,
119 | cov=cov)
120 | if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate':
121 | return prob.status(), prob.feval(), prob.x_value() + bm
122 | else:
123 | raise PortfolioBuilderException(prob.status())
124 |
--------------------------------------------------------------------------------
/alphamind/portfolio/percentbuilder.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-4
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 | from numpy import zeros
10 | from numpy import zeros_like
11 |
12 | from alphamind.utilities import groupby
13 | from alphamind.utilities import set_value
14 |
15 |
16 | def percent_build(er: np.ndarray, percent: float, groups: np.ndarray = None,
17 | masks: np.ndarray = None) -> np.ndarray:
18 | er = er.copy()
19 |
20 | if masks is not None:
21 | er[~masks] = -np.inf
22 |
23 | if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1):
24 | # fast path methods for single column er
25 | neg_er = -er.flatten()
26 | length = len(neg_er)
27 | weights = zeros((length, 1))
28 | if groups is not None:
29 | index_diff, order = groupby(groups)
30 | start = 0
31 | for diff_loc in index_diff:
32 | current_index = order[start:diff_loc + 1]
33 | current_ordering = neg_er[current_index].argsort()
34 | current_ordering.shape = -1, 1
35 | use_rank = int(percent * len(current_index))
36 | set_value(weights, current_index[current_ordering[:use_rank]], 1.)
37 | start = diff_loc + 1
38 | else:
39 | ordering = neg_er.argsort()
40 | use_rank = int(percent * len(neg_er))
41 | weights[ordering[:use_rank]] = 1.
42 | return weights.reshape(er.shape)
43 | else:
44 | neg_er = -er
45 | weights = zeros_like(er)
46 |
47 | if groups is not None:
48 | index_diff, order = groupby(groups)
49 | start = 0
50 | for diff_loc in index_diff:
51 | current_index = order[start:diff_loc + 1]
52 | current_ordering = neg_er[current_index].argsort(axis=0)
53 | use_rank = int(percent * len(current_index))
54 | set_value(weights, current_index[current_ordering[:use_rank]], 1)
55 | start = diff_loc + 1
56 | else:
57 | ordering = neg_er.argsort(axis=0)
58 | use_rank = int(percent * len(neg_er))
59 | set_value(weights, ordering[:use_rank], 1.)
60 | return weights
61 |
--------------------------------------------------------------------------------
/alphamind/portfolio/rankbuilder.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-26
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 | from numpy import zeros
10 | from numpy import zeros_like
11 |
12 | from alphamind.utilities import groupby
13 | from alphamind.utilities import set_value
14 |
15 |
16 | def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray = None,
17 | masks: np.ndarray = None) -> np.ndarray:
18 | er = er.copy()
19 |
20 | if masks is not None:
21 | er[~masks] = -np.inf
22 |
23 | if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1):
24 | # fast path methods for single column er
25 | neg_er = -er.flatten()
26 | length = len(neg_er)
27 | weights = zeros((length, 1))
28 | if groups is not None:
29 | index_diff, order = groupby(groups)
30 | start = 0
31 | for diff_loc in index_diff:
32 | current_index = order[start:diff_loc + 1]
33 | current_ordering = neg_er[current_index].argsort()
34 | current_ordering.shape = -1, 1
35 | set_value(weights, current_index[current_ordering[:use_rank]], 1.)
36 | start = diff_loc + 1
37 | else:
38 | ordering = neg_er.argsort()
39 | weights[ordering[:use_rank]] = 1.
40 | return weights.reshape(er.shape)
41 | else:
42 | neg_er = -er
43 | weights = zeros_like(er)
44 |
45 | if groups is not None:
46 | index_diff, order = groupby(groups)
47 | start = 0
48 | for diff_loc in index_diff:
49 | current_index = order[start:diff_loc + 1]
50 | current_ordering = neg_er[current_index].argsort(axis=0)
51 | set_value(weights, current_index[current_ordering[:use_rank]], 1)
52 | start = diff_loc + 1
53 | else:
54 | ordering = neg_er.argsort(axis=0)
55 | set_value(weights, ordering[:use_rank], 1.)
56 | return weights
57 |
--------------------------------------------------------------------------------
/alphamind/portfolio/riskmodel.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-5-29
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import abc
9 | from typing import List
10 |
11 | import pandas as pd
12 |
13 |
14 | class RiskModel(metaclass=abc.ABCMeta):
15 |
16 | def get_risk_profile(self):
17 | pass
18 |
19 |
20 | class FullRiskModel(RiskModel):
21 |
22 | def __init__(self, sec_cov: pd.DataFrame):
23 | self.codes = sec_cov.index.tolist()
24 | self.sec_cov = sec_cov.loc[self.codes, self.codes]
25 |
26 | def get_cov(self, codes: List[int] = None):
27 | if codes:
28 | return self.sec_cov.loc[codes, codes].values
29 | else:
30 | return self.sec_cov.values
31 |
32 | def get_risk_profile(self, codes: List[int] = None):
33 | return dict(
34 | cov=self.get_cov(codes),
35 | factor_cov=None,
36 | factor_loading=None,
37 | idsync=None
38 | )
39 |
40 |
41 | class FactorRiskModel(RiskModel):
42 |
43 | def __init__(self,
44 | factor_cov: pd.DataFrame,
45 | risk_exp: pd.DataFrame,
46 | idsync: pd.Series):
47 | self.factor_cov = factor_cov
48 | self.idsync = idsync
49 | self.codes = self.idsync.index.tolist()
50 | self.factor_names = sorted(self.factor_cov.index)
51 | self.risk_exp = risk_exp.loc[self.codes, self.factor_names]
52 | self.factor_cov = self.factor_cov.loc[self.factor_names, self.factor_names]
53 | self.idsync = self.idsync[self.codes]
54 |
55 | def get_risk_exp(self, codes: List[int] = None):
56 | if codes:
57 | return self.risk_exp.loc[codes, :].values
58 | else:
59 | return self.risk_exp.values
60 |
61 | def get_factor_cov(self):
62 | return self.factor_cov.values
63 |
64 | def get_idsync(self, codes: List[int] = None):
65 | if codes:
66 | return self.idsync[codes].values
67 | else:
68 | return self.idsync.values
69 |
70 | def get_risk_profile(self, codes: List[int] = None):
71 | return dict(
72 | cov=None,
73 | factor_cov=self.get_factor_cov(),
74 | factor_loading=self.get_risk_exp(codes),
75 | idsync=self.get_idsync(codes)
76 | )
77 |
--------------------------------------------------------------------------------
/alphamind/settlement/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/settlement/simplesettle.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import numpy as np
9 | import pandas as pd
10 |
11 |
12 | def simple_settle(weights: np.ndarray,
13 | dx_return: np.ndarray,
14 | groups: np.ndarray = None,
15 | benchmark: np.ndarray = None) -> pd.DataFrame:
16 | weights = weights.flatten()
17 | dx_return = dx_return.flatten()
18 |
19 | if benchmark is not None:
20 | net_pos = weights - benchmark
21 | else:
22 | net_pos = weights
23 |
24 | ret_arr = net_pos * dx_return
25 |
26 | if groups is not None:
27 | ret_agg = pd.Series(ret_arr).groupby(groups).sum()
28 | ret_agg.loc['total'] = ret_agg.sum()
29 | else:
30 | ret_agg = pd.Series(ret_arr.sum(), index=['total'])
31 |
32 | ret_agg.index.name = 'industry'
33 | ret_agg.name = 'er'
34 |
35 | pos_table = pd.DataFrame(net_pos, columns=['weight'])
36 | pos_table['ret'] = dx_return
37 |
38 | if groups is not None:
39 | ic_table = pos_table.groupby(groups).corr()['ret'].loc[(slice(None), 'weight')]
40 | ic_table.loc['total'] = pos_table.corr().iloc[0, 1]
41 | else:
42 | ic_table = pd.Series(pos_table.corr().iloc[0, 1], index=['total'])
43 |
44 | return pd.DataFrame({'er': ret_agg.values,
45 | 'ic': ic_table.values},
46 | index=ret_agg.index)
47 |
--------------------------------------------------------------------------------
/alphamind/strategy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/strategy/__init__.py
--------------------------------------------------------------------------------
/alphamind/strategy/sample_strategy.json:
--------------------------------------------------------------------------------
1 | {
2 | "strategy_name": "sample_strategy",
3 | "data_process": {
4 | "pre_process": [
5 | "winsorize",
6 | "standardize"
7 | ],
8 | "neutralize_risk": [
9 | "SIZE",
10 | "industry_styles"
11 | ],
12 | "post_process": [
13 | "winsorize",
14 | "standardize"
15 | ]
16 | },
17 | "risk_model": {
18 | "type": "short",
19 | "neutralize_risk": [
20 | "SIZE",
21 | "industry_styles"
22 | ]
23 | },
24 | "alpha_model": {
25 | "model_type": "LinearRegression",
26 | "features": [
27 | "EPS",
28 | "ROEDiluted"
29 | ],
30 | "parameters": {
31 | "fit_intercept": false
32 | }
33 | },
34 | "freq": "1d",
35 | "batch": 4,
36 | "warm_start": 0,
37 | "universe": [
38 | "zz500",
39 | [
40 | "zz500"
41 | ]
42 | ],
43 | "benchmark": 905,
44 | "optimizer": {
45 | "type": "risk_neutral",
46 | "neutralize_risk": [
47 | "SIZE",
48 | "industry_styles"
49 | ]
50 | },
51 | "executor": {
52 | "type": "naive"
53 | }
54 | }
--------------------------------------------------------------------------------
/alphamind/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/tests/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-8
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/tests/analysis/test_factoranalysis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.analysis.factoranalysis import factor_analysis
14 | from alphamind.data.neutralize import neutralize
15 | from alphamind.data.processing import factor_processing
16 | from alphamind.data.standardize import standardize
17 | from alphamind.data.winsorize import winsorize_normal
18 | from alphamind.portfolio.constraints import (
19 | Constraints,
20 | )
21 |
22 |
23 | class TestFactorAnalysis(unittest.TestCase):
24 | def setUp(self):
25 | self.raw_factor = np.random.randn(1000, 1)
26 | self.risk_factor = np.random.randn(1000, 3)
27 | self.d1returns = np.random.randn(1000, 1)
28 |
29 | def test_factor_processing(self):
30 | new_factor = factor_processing(self.raw_factor)
31 | np.testing.assert_array_almost_equal(new_factor, self.raw_factor)
32 |
33 | new_factor = factor_processing(self.raw_factor,
34 | pre_process=[standardize, winsorize_normal])
35 |
36 | np.testing.assert_array_almost_equal(new_factor,
37 | winsorize_normal(standardize(self.raw_factor)))
38 |
39 | new_factor = factor_processing(self.raw_factor,
40 | pre_process=[standardize, winsorize_normal],
41 | risk_factors=self.risk_factor)
42 |
43 | np.testing.assert_array_almost_equal(new_factor, neutralize(self.risk_factor,
44 | winsorize_normal(standardize(
45 | self.raw_factor))))
46 |
47 | def test_factor_analysis(self):
48 | benchmark = np.random.randint(50, size=1000)
49 | benchmark = benchmark / benchmark.sum()
50 | industry = np.random.randint(30, size=1000)
51 |
52 | factor_df = pd.DataFrame(self.raw_factor.flatten(), index=range(len(self.raw_factor)))
53 | factor_weights = np.array([1.])
54 |
55 | constraints = Constraints()
56 | names = np.array(['a', 'b', 'c'])
57 | constraints.add_exposure(names, self.risk_factor)
58 | targets = self.risk_factor.T @ benchmark
59 | for i, name in enumerate(names):
60 | constraints.set_constraints(name, targets[i], targets[i])
61 |
62 | weight_table, analysis_table = factor_analysis(factor_df,
63 | factor_weights,
64 | d1returns=self.d1returns,
65 | industry=industry,
66 | benchmark=benchmark,
67 | risk_exp=self.risk_factor,
68 | constraints=constraints)
69 |
70 | weight = weight_table.weight
71 |
72 | self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
73 | np.testing.assert_array_almost_equal(weight @ self.risk_factor,
74 | benchmark @ self.risk_factor)
75 | self.assertTrue(weight @ factor_df.values > benchmark @ factor_df.values)
76 |
77 | def test_factor_analysis_with_several_factors(self):
78 | benchmark = np.random.randint(50, size=1000)
79 | benchmark = benchmark / benchmark.sum()
80 | industry = np.random.randint(30, size=1000)
81 |
82 | factor_df = pd.DataFrame(np.random.randn(1000, 2), index=range(len(self.raw_factor)))
83 | factor_weights = np.array([0.2, 0.8])
84 |
85 | constraints = Constraints()
86 | names = np.array(['a', 'b', 'c'])
87 | constraints.add_exposure(names, self.risk_factor)
88 | targets = self.risk_factor.T @ benchmark
89 | for i, name in enumerate(names):
90 | constraints.set_constraints(name, targets[i], targets[i])
91 |
92 | weight_table, analysis_table = factor_analysis(factor_df,
93 | factor_weights,
94 | d1returns=self.d1returns,
95 | industry=industry,
96 | benchmark=benchmark,
97 | risk_exp=self.risk_factor,
98 | constraints=constraints)
99 |
100 | weight = weight_table.weight
101 | self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
102 | np.testing.assert_array_almost_equal(weight @ self.risk_factor,
103 | benchmark @ self.risk_factor)
104 |
105 |
106 | if __name__ == '__main__':
107 | unittest.main()
108 |
--------------------------------------------------------------------------------
/alphamind/tests/analysis/test_perfanalysis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-12
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.analysis.perfanalysis import perf_attribution_by_pos
14 |
15 |
16 | class TestPerformanceAnalysis(unittest.TestCase):
17 |
18 | @classmethod
19 | def test_perf_attribution_by_pos(cls):
20 | n_samples = 36000
21 | n_dates = 20
22 | n_risk_factors = 35
23 |
24 | dates = np.sort(np.random.randint(n_dates, size=n_samples))
25 | weights_series = pd.Series(data=np.random.randn(n_samples), index=dates)
26 | bm_series = pd.Series(data=np.random.randn(n_samples), index=dates)
27 | next_bar_return_series = pd.Series(data=np.random.randn(n_samples), index=dates)
28 | risk_table = pd.DataFrame(data=np.random.randn(n_samples, n_risk_factors),
29 | columns=list(range(n_risk_factors)),
30 | index=dates)
31 |
32 | explained_table = perf_attribution_by_pos(weights_series - bm_series,
33 | next_bar_return_series,
34 | risk_table)
35 |
36 | to_explain = (weights_series - bm_series).multiply(next_bar_return_series, axis=0)
37 | aggregated_to_explain = pd.Series(to_explain).groupby(dates).sum()
38 | aggregated_explained = explained_table.sum(axis=1)
39 |
40 | np.testing.assert_array_almost_equal(aggregated_to_explain.values,
41 | aggregated_explained.values)
42 |
43 |
44 | if __name__ == '__main__':
45 | unittest.main()
46 |
--------------------------------------------------------------------------------
/alphamind/tests/analysis/test_quantilieanalysis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-16
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.analysis.quantileanalysis import er_quantile_analysis
14 | from alphamind.analysis.quantileanalysis import quantile_analysis
15 | from alphamind.data.processing import factor_processing
16 | from alphamind.data.quantile import quantile
17 | from alphamind.data.standardize import standardize
18 | from alphamind.data.winsorize import winsorize_normal
19 |
20 |
21 | class TestQuantileAnalysis(unittest.TestCase):
22 | def setUp(self):
23 | n = 5000
24 | n_f = 5
25 |
26 | self.x = np.random.randn(n, 5)
27 | self.x_w = np.random.randn(n_f)
28 | self.r = np.random.randn(n)
29 | self.b_w = np.random.randint(0, 10, n)
30 | self.b_w = self.b_w / float(self.b_w.sum())
31 | self.risk_exp = np.random.randn(n, 3)
32 | self.n_bins = 10
33 |
34 | def test_q_anl_impl(self):
35 | n_bins = 5
36 |
37 | x = self.x[:, 0]
38 | q_groups = quantile(x, n_bins)
39 |
40 | s = pd.Series(self.r, index=q_groups)
41 | grouped_return = s.groupby(level=0).mean().values.flatten()
42 |
43 | expected_res = grouped_return.copy()
44 | res = n_bins - 1
45 | res_weight = 1. / res
46 |
47 | for i, value in enumerate(expected_res):
48 | expected_res[i] = (1. + res_weight) * value - res_weight * grouped_return.sum()
49 |
50 | calculated_res = er_quantile_analysis(x, n_bins, self.r, de_trend=True)
51 |
52 | np.testing.assert_array_almost_equal(expected_res, calculated_res)
53 |
54 | def test_quantile_analysis_simple(self):
55 | f_df = pd.DataFrame(self.x)
56 | calculated = quantile_analysis(f_df,
57 | self.x_w,
58 | self.r,
59 | n_bins=self.n_bins,
60 | pre_process=[],
61 | post_process=[])
62 |
63 | er = self.x_w @ self.x.T
64 | expected = er_quantile_analysis(er, self.n_bins, self.r)
65 | np.testing.assert_array_almost_equal(calculated, expected)
66 |
67 | def test_quantile_analysis_with_factor_processing(self):
68 | f_df = pd.DataFrame(self.x)
69 | calculated = quantile_analysis(f_df,
70 | self.x_w,
71 | self.r,
72 | n_bins=self.n_bins,
73 | risk_exp=self.risk_exp,
74 | pre_process=[winsorize_normal, standardize],
75 | post_process=[standardize])
76 |
77 | er = self.x_w @ factor_processing(self.x,
78 | [winsorize_normal, standardize],
79 | self.risk_exp,
80 | [standardize]).T
81 | expected = er_quantile_analysis(er, self.n_bins, self.r)
82 | np.testing.assert_array_almost_equal(calculated, expected)
83 |
84 |
85 | if __name__ == '__main__':
86 | unittest.main()
87 |
--------------------------------------------------------------------------------
/alphamind/tests/analysis/test_riskanalysis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-8
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.analysis.riskanalysis import risk_analysis
14 |
15 |
16 | class TestRiskAnalysis(unittest.TestCase):
17 |
18 | @staticmethod
19 | def test_risk_analysis():
20 | n_samples = 36000
21 | n_dates = 20
22 | n_risk_factors = 35
23 |
24 | dates = np.sort(np.random.randint(n_dates, size=n_samples))
25 | weights_series = pd.Series(data=np.random.randn(n_samples), index=dates)
26 | bm_series = pd.Series(data=np.random.randn(n_samples), index=dates)
27 | next_bar_return_series = pd.Series(data=np.random.randn(n_samples), index=dates)
28 | risk_table = pd.DataFrame(data=np.random.randn(n_samples, n_risk_factors),
29 | columns=list(range(n_risk_factors)),
30 | index=dates)
31 |
32 | explained_table, _ = risk_analysis(weights_series - bm_series,
33 | next_bar_return_series,
34 | risk_table)
35 |
36 | to_explain = (weights_series - bm_series).multiply(next_bar_return_series, axis=0)
37 | aggregated = explained_table.sum(axis=1)
38 |
39 | np.testing.assert_array_almost_equal(to_explain.values, aggregated.values)
40 |
41 |
42 | if __name__ == '__main__':
43 | unittest.main()
44 |
--------------------------------------------------------------------------------
/alphamind/tests/cython/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/cython/__init__.py
--------------------------------------------------------------------------------
/alphamind/tests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/data/__init__.py
--------------------------------------------------------------------------------
/alphamind/tests/data/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/data/engines/__init__.py
--------------------------------------------------------------------------------
/alphamind/tests/data/engines/test_universe.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-2-9
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | from alphamind.data.engines.universe import Universe
11 | from alphamind.data.engines.universe import load_universe
12 |
13 |
14 | class TestUniverse(unittest.TestCase):
15 |
16 | def test_universe_equal(self):
17 | universe1 = Universe('zz500')
18 | universe2 = Universe('zz500')
19 | self.assertEqual(universe1, universe2)
20 |
21 | universe1 = Universe('zz500')
22 | universe2 = Universe('zz800')
23 | self.assertNotEqual(universe1, universe2)
24 |
25 | def test_universe_persistence(self):
26 | universe = Universe('zz500')
27 | univ_desc = universe.save()
28 | loaded_universe = load_universe(univ_desc)
29 | self.assertEqual(universe, loaded_universe)
30 |
31 | def test_universe_arithmic(self):
32 | universe = Universe('zz500') + Universe('hs300')
33 | univ_desc = universe.save()
34 | loaded_universe = load_universe(univ_desc)
35 | self.assertEqual(universe, loaded_universe)
36 |
37 | universe = Universe('zz500') - Universe('hs300')
38 | univ_desc = universe.save()
39 | loaded_universe = load_universe(univ_desc)
40 | self.assertEqual(universe, loaded_universe)
41 |
42 | universe = Universe('zz500') & Universe('hs300')
43 | univ_desc = universe.save()
44 | loaded_universe = load_universe(univ_desc)
45 | self.assertEqual(universe, loaded_universe)
46 |
--------------------------------------------------------------------------------
/alphamind/tests/data/test_neutralize.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | from sklearn.linear_model import LinearRegression
12 |
13 | from alphamind.data.neutralize import neutralize
14 |
15 |
16 | class TestNeutralize(unittest.TestCase):
17 |
18 | def setUp(self):
19 | self.y = np.random.randn(3000, 4)
20 | self.x = np.random.randn(3000, 10)
21 | self.groups = np.random.randint(30, size=3000)
22 |
23 | def test_neutralize(self):
24 | calc_res = neutralize(self.x, self.y)
25 |
26 | model = LinearRegression(fit_intercept=False)
27 | model.fit(self.x, self.y)
28 |
29 | exp_res = self.y - self.x @ model.coef_.T
30 |
31 | np.testing.assert_array_almost_equal(calc_res, exp_res)
32 |
33 | def test_neutralize_with_group(self):
34 |
35 | calc_res = neutralize(self.x, self.y, self.groups)
36 |
37 | model = LinearRegression(fit_intercept=False)
38 | for i in range(30):
39 | curr_x = self.x[self.groups == i]
40 | curr_y = self.y[self.groups == i]
41 | model.fit(curr_x, curr_y)
42 | exp_res = curr_y - curr_x @ model.coef_.T
43 | np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res)
44 |
45 | def test_neutralize_explain_output(self):
46 | y = self.y[:, 0].flatten()
47 |
48 | calc_res, other_stats = neutralize(self.x, y, detail=True)
49 |
50 | model = LinearRegression(fit_intercept=False)
51 | model.fit(self.x, y)
52 |
53 | exp_res = y - self.x @ model.coef_.T
54 | exp_explained = self.x * model.coef_.T
55 |
56 | np.testing.assert_array_almost_equal(calc_res, exp_res.reshape(-1, 1))
57 | np.testing.assert_array_almost_equal(other_stats['explained'][:, :, 0], exp_explained)
58 |
59 | calc_res, other_stats = neutralize(self.x, self.y, detail=True)
60 |
61 | model = LinearRegression(fit_intercept=False)
62 | model.fit(self.x, self.y)
63 |
64 | exp_res = self.y - self.x @ model.coef_.T
65 | np.testing.assert_array_almost_equal(calc_res, exp_res)
66 |
67 | for i in range(self.y.shape[1]):
68 | exp_explained = self.x * model.coef_.T[:, i]
69 | np.testing.assert_array_almost_equal(other_stats['explained'][:, :, i], exp_explained)
70 |
71 | def test_neutralize_explain_output_with_group(self):
72 | y = self.y[:, 0].flatten()
73 |
74 | calc_res, other_stats = neutralize(self.x, y, self.groups, detail=True)
75 |
76 | model = LinearRegression(fit_intercept=False)
77 | for i in range(30):
78 | curr_x = self.x[self.groups == i]
79 | curr_y = y[self.groups == i]
80 | model.fit(curr_x, curr_y)
81 | exp_res = curr_y - curr_x @ model.coef_.T
82 | exp_explained = curr_x * model.coef_.T
83 | np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1))
84 | np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0],
85 | exp_explained)
86 |
87 | calc_res, other_stats = neutralize(self.x, self.y, self.groups, detail=True)
88 |
89 | model = LinearRegression(fit_intercept=False)
90 | for i in range(30):
91 | curr_x = self.x[self.groups == i]
92 | curr_y = self.y[self.groups == i]
93 | model.fit(curr_x, curr_y)
94 | exp_res = curr_y - curr_x @ model.coef_.T
95 | np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res)
96 |
97 | for j in range(self.y.shape[1]):
98 | exp_explained = curr_x * model.coef_.T[:, j]
99 | np.testing.assert_array_almost_equal(
100 | other_stats['explained'][self.groups == i, :, j], exp_explained)
101 |
102 |
103 | if __name__ == '__main__':
104 | unittest.main()
105 |
--------------------------------------------------------------------------------
/alphamind/tests/data/test_quantile.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-16
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 |
12 | from alphamind.data.quantile import quantile
13 |
14 |
15 | class TestQuantile(unittest.TestCase):
16 |
17 | def test_quantile(self):
18 | n = 5000
19 | bins = 10
20 | s = np.random.randn(n)
21 | calculated = quantile(s, bins)
22 |
23 | rank = s.argsort().argsort()
24 |
25 | bin_size = float(n) / bins
26 | pillars = [int(i * bin_size) for i in range(1, bins + 1)]
27 |
28 | starter = 0
29 | for i, r in enumerate(pillars):
30 | self.assertTrue(np.all(calculated[(rank >= starter) & (rank < r)] == i))
31 | starter = r
32 |
33 |
34 | if __name__ == "__main__":
35 | unittest.main()
36 |
--------------------------------------------------------------------------------
/alphamind/tests/data/test_rank.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-8-8
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.data.rank import rank
14 |
15 |
16 | class TestRank(unittest.TestCase):
17 |
18 | def setUp(self):
19 | self.x = np.random.randn(1000, 1)
20 | self.groups = np.random.randint(0, 10, 1000)
21 |
22 | def test_rank(self):
23 | data_rank = rank(self.x)
24 |
25 | sorted_array = np.zeros_like(self.x)
26 | for i in range(self.x.shape[0]):
27 | for j in range(self.x.shape[1]):
28 | sorted_array[int(data_rank[i, j]), j] = self.x[i, j]
29 |
30 | arr_diff = np.diff(sorted_array, axis=0)
31 | np.testing.assert_array_less(0, arr_diff)
32 |
33 | def test_rank_with_groups(self):
34 | data = pd.DataFrame(data={'raw': self.x.tolist()}, index=self.groups)
35 | data['rank'] = rank(data['raw'].values, groups=data.index)
36 | groups = dict(list(data['rank'].groupby(level=0)))
37 | ret = []
38 | for index in range(10):
39 | ret.append(groups[index].values)
40 | ret = np.concatenate(ret).reshape(-1, 1)
41 |
42 | expected_rank = data['raw'].groupby(level=0).apply(
43 | lambda x: x.values.argsort(axis=0).argsort(axis=0))
44 | expected_rank = np.concatenate(expected_rank).reshape(-1, 1)
45 | np.testing.assert_array_equal(ret, expected_rank)
46 |
--------------------------------------------------------------------------------
/alphamind/tests/data/test_standardize.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 | from scipy.stats import zscore
13 |
14 | from alphamind.data.standardize import Standardizer
15 | from alphamind.data.standardize import projection
16 | from alphamind.data.standardize import standardize
17 |
18 |
19 | class TestStandardize(unittest.TestCase):
20 |
21 | def setUp(self):
22 | self.x = np.random.randn(3000, 10)
23 | self.groups = np.random.randint(10, 30, size=3000)
24 |
25 | def test_standardize(self):
26 | calc_zscore = standardize(self.x)
27 | exp_zscore = zscore(self.x, ddof=1)
28 |
29 | np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
30 |
31 | def test_projection(self):
32 | calc_projected = projection(self.x)
33 | exp_projected = self.x / np.sqrt(np.sum(np.square(self.x), axis=1).reshape((-1, 1)))
34 |
35 | np.testing.assert_array_almost_equal(calc_projected, exp_projected)
36 |
37 | def test_projection_with_groups(self):
38 | calc_projected = projection(self.x, self.groups, axis=0)
39 | exp_projected = pd.DataFrame(self.x).groupby(
40 | self.groups
41 | ).transform(lambda s: s / np.sqrt(np.square(s).sum(axis=0)))
42 |
43 | np.testing.assert_array_almost_equal(calc_projected, exp_projected)
44 |
45 | def test_standardize_with_group(self):
46 | calc_zscore = standardize(self.x, self.groups)
47 | exp_zscore = pd.DataFrame(self.x). \
48 | groupby(self.groups). \
49 | transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1))
50 | np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
51 |
52 | def test_standardizer(self):
53 | s = Standardizer()
54 | s.fit(self.x)
55 | calc_zscore = s.transform(self.x)
56 |
57 | exp_zscore = standardize(self.x)
58 | np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
59 | np.testing.assert_array_almost_equal(s(self.x), exp_zscore)
60 |
61 | def test_grouped_standardizer(self):
62 | s = Standardizer()
63 | s.fit(self.x, self.groups)
64 | calc_zscore = s.transform(self.x, self.groups)
65 |
66 | exp_zscore = standardize(self.x, self.groups)
67 | np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
68 | np.testing.assert_array_almost_equal(s(self.x, self.groups), exp_zscore)
69 |
70 |
71 | if __name__ == '__main__':
72 | import datetime as dt
73 | from sklearn.preprocessing import StandardScaler
74 |
75 | x = np.random.randn(1000, 2)
76 | y = np.random.randn(50, 2)
77 |
78 | start = dt.datetime.now()
79 | for i in range(10000):
80 | s1 = StandardScaler()
81 | s1.fit(x)
82 | x1 = s1.transform(y)
83 | print(dt.datetime.now() - start)
84 |
85 | start = dt.datetime.now()
86 | for i in range(10000):
87 | s2 = Standardizer(ddof=0)
88 | s2.fit(x)
89 | x2 = s2.transform(y)
90 | print(dt.datetime.now() - start)
91 |
92 | np.testing.assert_array_almost_equal(x1, x2)
93 |
--------------------------------------------------------------------------------
/alphamind/tests/data/test_winsorize.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.data.winsorize import NormalWinsorizer
14 | from alphamind.data.winsorize import winsorize_normal
15 |
16 |
17 | class TestWinsorize(unittest.TestCase):
18 |
19 | def setUp(self):
20 | np.random.seed(10)
21 | self.x = np.random.randn(3000, 10)
22 | self.groups = np.random.randint(10, 30, size=3000)
23 | self.num_stds = 2
24 |
25 | def test_winsorize_normal(self):
26 | calc_winsorized = winsorize_normal(self.x, self.num_stds)
27 |
28 | std_values = self.x.std(axis=0, ddof=1)
29 | mean_value = self.x.mean(axis=0)
30 |
31 | lower_bound = mean_value - self.num_stds * std_values
32 | upper_bound = mean_value + self.num_stds * std_values
33 |
34 | for i in range(np.size(calc_winsorized, 1)):
35 | col_data = self.x[:, i]
36 | col_data[col_data > upper_bound[i]] = upper_bound[i]
37 | col_data[col_data < lower_bound[i]] = lower_bound[i]
38 |
39 | calculated_col = calc_winsorized[:, i]
40 | np.testing.assert_array_almost_equal(col_data, calculated_col)
41 |
42 | def test_winsorize_normal_with_interp(self):
43 | calc_winsorized = winsorize_normal(self.x, self.num_stds, method='interp')
44 |
45 | std_values = self.x.std(axis=0, ddof=1)
46 | mean_value = self.x.mean(axis=0)
47 |
48 | lower_bound = mean_value - self.num_stds * std_values
49 | upper_bound = mean_value + self.num_stds * std_values
50 |
51 | for i in range(np.size(calc_winsorized, 1)):
52 | col_data = self.x[:, i].copy()
53 |
54 | idx = col_data > upper_bound[i]
55 | u_values = col_data[idx]
56 | q_values = u_values.argsort().argsort()
57 | if len(q_values) > 0:
58 | col_data[idx] = upper_bound[i] + q_values / len(q_values) * 0.5 * std_values[i]
59 |
60 | idx = col_data < lower_bound[i]
61 | l_values = col_data[idx]
62 | q_values = (-l_values).argsort().argsort()
63 | if len(q_values) > 0:
64 | col_data[idx] = lower_bound[i] - q_values / len(q_values) * 0.5 * std_values[i]
65 |
66 | calculated_col = calc_winsorized[:, i]
67 | np.testing.assert_array_almost_equal(col_data, calculated_col)
68 |
69 | def test_winsorize_normal_with_group(self):
70 | cal_winsorized = winsorize_normal(self.x, self.num_stds, groups=self.groups)
71 |
72 | def impl(x):
73 | std_values = x.std(axis=0, ddof=1)
74 | mean_value = x.mean(axis=0)
75 |
76 | lower_bound = mean_value - self.num_stds * std_values
77 | upper_bound = mean_value + self.num_stds * std_values
78 |
79 | res = np.where(x > upper_bound, upper_bound, x)
80 | res = np.where(res < lower_bound, lower_bound, res)
81 | return res
82 |
83 | exp_winsorized = pd.DataFrame(self.x).groupby(self.groups).transform(impl).values
84 | np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized)
85 |
86 | def test_winsorize_normal_with_group_and_interp(self):
87 | cal_winsorized = winsorize_normal(self.x, self.num_stds, groups=self.groups,
88 | method='interp')
89 |
90 | def impl(x):
91 | x = x.values
92 | std_values = x.std(axis=0, ddof=1)
93 | mean_value = x.mean(axis=0)
94 |
95 | lower_bound = mean_value - self.num_stds * std_values
96 | upper_bound = mean_value + self.num_stds * std_values
97 |
98 | col_data = x.copy()
99 |
100 | idx = col_data > upper_bound
101 | u_values = col_data[idx]
102 | q_values = u_values.argsort().argsort()
103 | if len(q_values) > 0:
104 | col_data[idx] = upper_bound + q_values / len(q_values) * 0.5 * std_values
105 |
106 | idx = col_data < lower_bound
107 | l_values = col_data[idx]
108 | q_values = (-l_values).argsort().argsort()
109 | if len(q_values) > 0:
110 | col_data[idx] = lower_bound - q_values / len(q_values) * 0.5 * std_values
111 | return col_data
112 |
113 | exp_winsorized = pd.DataFrame(self.x).groupby(self.groups).transform(impl).values
114 | np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized)
115 |
116 | def test_normal_winsorizer(self):
117 | s = NormalWinsorizer(num_stds=self.num_stds)
118 | s.fit(self.x)
119 | calc_winsorized1 = s.transform(self.x)
120 | calc_winsorized2 = s(self.x)
121 |
122 | std_values = self.x.std(axis=0, ddof=1)
123 | mean_value = self.x.mean(axis=0)
124 |
125 | lower_bound = mean_value - self.num_stds * std_values
126 | upper_bound = mean_value + self.num_stds * std_values
127 |
128 | for i in range(np.size(calc_winsorized1, 1)):
129 | col_data = self.x[:, i]
130 | col_data[col_data > upper_bound[i]] = upper_bound[i]
131 | col_data[col_data < lower_bound[i]] = lower_bound[i]
132 |
133 | calculated_col = calc_winsorized1[:, i]
134 | np.testing.assert_array_almost_equal(col_data, calculated_col)
135 | calculated_col = calc_winsorized2[:, i]
136 | np.testing.assert_array_almost_equal(col_data, calculated_col)
137 |
138 |
139 | if __name__ == "__main__":
140 | unittest.main()
141 |
--------------------------------------------------------------------------------
/alphamind/tests/execution/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/execution/__init__.py
--------------------------------------------------------------------------------
/alphamind/tests/execution/test_naiveexecutor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-22
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import pandas as pd
11 |
12 | from alphamind.execution.naiveexecutor import NaiveExecutor
13 |
14 |
15 | class TestNaiveExecutor(unittest.TestCase):
16 |
17 | def test_naive_executor(self):
18 | target_pos = pd.DataFrame({'code': [1, 2, 3],
19 | 'weight': [0.2, 0.3, 0.5],
20 | 'industry': ['a', 'b', 'c']})
21 |
22 | # 1st round
23 | executor = NaiveExecutor()
24 | turn_over, executed_pos = executor.execute(target_pos)
25 | executor.set_current(executed_pos)
26 | self.assertAlmostEqual(turn_over, 1.0)
27 |
28 | # 2nd round
29 | target_pos = pd.DataFrame({'code': [1, 2, 4],
30 | 'weight': [0.3, 0.2, 0.5],
31 | 'industry': ['a', 'b', 'd']})
32 |
33 | turn_over, executed_pos = executor.execute(target_pos)
34 | executor.set_current(executed_pos)
35 | self.assertAlmostEqual(turn_over, 1.2)
36 |
37 | # 3rd round
38 | target_pos = pd.DataFrame({'code': [1, 3, 4],
39 | 'weight': [0.3, 0.2, 0.5],
40 | 'industry': ['a', 'c', 'd']})
41 | turn_over, executed_pos = executor.execute(target_pos)
42 | executor.set_current(executed_pos)
43 | self.assertAlmostEqual(turn_over, 0.4)
44 |
45 |
46 | if __name__ == '__main__':
47 | unittest.main()
48 |
--------------------------------------------------------------------------------
/alphamind/tests/execution/test_pipeline.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 | from collections import deque
10 |
11 | import numpy as np
12 | import pandas as pd
13 |
14 | from alphamind.execution.pipeline import ExecutionPipeline
15 | from alphamind.execution.targetvolexecutor import TargetVolExecutor
16 | from alphamind.execution.thresholdexecutor import ThresholdExecutor
17 |
18 |
19 | class TestExecutionPipeline(unittest.TestCase):
20 |
21 | def test_execution_pipeline(self):
22 | n = 100
23 | window = 60
24 | target_vol = 0.01
25 | turn_over_threshold = 0.5
26 |
27 | executor1 = TargetVolExecutor(window=window, target_vol=target_vol)
28 | executor2 = ThresholdExecutor(turn_over_threshold=turn_over_threshold)
29 |
30 | execution_pipeline = ExecutionPipeline(executors=[executor1, executor2])
31 |
32 | return_1 = np.random.randn(2000, n) * 0.05
33 | return_2 = np.random.randn(2000, n) * 0.2
34 | return_total = np.concatenate((return_1, return_2))
35 | codes = np.array(list(range(n)))
36 |
37 | ret_deq = deque(maxlen=window)
38 |
39 | for i, row in enumerate(return_total):
40 | weights = np.random.randint(0, 100, n)
41 | weights = weights / weights.sum()
42 | pos = pd.DataFrame({'code': codes, 'weight': weights})
43 | turn_over, executed_pos = execution_pipeline.execute(pos)
44 | daily_return = row @ executed_pos.weight.values.flatten()
45 | data_dict = {'return': daily_return}
46 | execution_pipeline.update(data_dict=data_dict)
47 | ret_deq.append(daily_return)
48 |
--------------------------------------------------------------------------------
/alphamind/tests/execution/test_targetvolexecutor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-22
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 | from collections import deque
10 |
11 | import numpy as np
12 | import pandas as pd
13 |
14 | from alphamind.execution.targetvolexecutor import TargetVolExecutor
15 |
16 |
17 | class TestTargetVolExecutor(unittest.TestCase):
18 |
19 | def test_target_vol_executor(self):
20 | n = 100
21 | window = 30
22 | target_vol = 0.01
23 |
24 | executor = TargetVolExecutor(window=window, target_vol=target_vol)
25 |
26 | return_1 = np.random.randn(2000, n) * 0.05
27 | return_2 = np.random.randn(2000, n) * 0.2
28 | return_total = np.concatenate((return_1, return_2))
29 |
30 | weights = np.ones(n) / n
31 | codes = np.array(list(range(n)))
32 |
33 | ret_deq = deque(maxlen=window)
34 |
35 | for i, row in enumerate(return_total):
36 | pos = pd.DataFrame({'code': codes, 'weight': weights})
37 | turn_over, executed_pos = executor.execute(pos)
38 |
39 | if i >= window:
40 | c_vol = np.std(ret_deq, ddof=1)
41 | executed_pos.equals(pos * target_vol / c_vol)
42 | else:
43 | executed_pos.equals(pos)
44 |
45 | executor.set_current(executed_pos)
46 | daily_return = row @ weights
47 | data_dict = {'return': daily_return}
48 | executor.update(data_dict=data_dict)
49 | ret_deq.append(daily_return)
50 |
51 |
52 | if __name__ == '__main__':
53 | unittest.main()
54 |
--------------------------------------------------------------------------------
/alphamind/tests/execution/test_thresholdexecutor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-22
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import pandas as pd
11 |
12 | from alphamind.execution.thresholdexecutor import ThresholdExecutor
13 |
14 |
15 | class TestThresholdExecutor(unittest.TestCase):
16 |
17 | def test_threshold_executor(self):
18 | target_pos = pd.DataFrame({'code': [1, 2, 3],
19 | 'weight': [0.2, 0.3, 0.5],
20 | 'industry': ['a', 'b', 'c']})
21 |
22 | executor = ThresholdExecutor(turn_over_threshold=0.5)
23 |
24 | # 1st round
25 | turn_over, executed_pos = executor.execute(target_pos)
26 | executor.set_current(executed_pos)
27 | self.assertTrue(target_pos.equals(executed_pos))
28 | self.assertAlmostEqual(turn_over, target_pos.weight.sum())
29 |
30 | # 2nd round
31 | target_pos = pd.DataFrame({'code': [1, 2, 4],
32 | 'weight': [0.3, 0.2, 0.5],
33 | 'industry': ['a', 'b', 'd']})
34 |
35 | turn_over, executed_pos = executor.execute(target_pos)
36 | executor.set_current(executed_pos)
37 | self.assertTrue(target_pos.equals(executed_pos))
38 | self.assertTrue(executed_pos.equals(executor.current_pos))
39 | self.assertAlmostEqual(turn_over, 1.2)
40 |
41 | # 3rd round
42 | target_pos = pd.DataFrame({'code': [1, 3, 4],
43 | 'weight': [0.3, 0.2, 0.5],
44 | 'industry': ['a', 'c', 'd']})
45 | turn_over, executed_pos2 = executor.execute(target_pos)
46 | executor.set_current(executed_pos2)
47 | self.assertTrue(executed_pos.equals(executed_pos2))
48 | self.assertAlmostEqual(turn_over, 0.)
49 |
50 |
51 | if __name__ == '__main__':
52 | unittest.main()
53 |
--------------------------------------------------------------------------------
/alphamind/tests/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/model/__init__.py
--------------------------------------------------------------------------------
/alphamind/tests/model/test_composer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-2-9
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | from alphamind.data.engines.universe import Universe
11 | from alphamind.model.composer import Composer
12 | from alphamind.model.composer import DataMeta
13 | from alphamind.model.treemodel import XGBClassifier
14 |
15 |
16 | class TestComposer(unittest.TestCase):
17 |
18 | def _assert_composer_equal(self, lhs: Composer, rhs: Composer):
19 | self.assertEqual(type(lhs.alpha_model), type(rhs.alpha_model))
20 | self.assertEqual(lhs.data_meta, rhs.data_meta)
21 |
22 | def test_data_meta_persistence(self):
23 | freq = '5b'
24 | universe = Universe('zz800')
25 | batch = 4
26 | neutralized_risk = ['SIZE']
27 | risk_model = 'long'
28 | pre_process = ['standardize', 'winsorize_normal']
29 | post_process = ['standardize', 'winsorize_normal']
30 | warm_start = 2
31 | data_source = 'postgresql://user:pwd@server/dummy'
32 |
33 | data_meta = DataMeta(freq=freq,
34 | universe=universe,
35 | batch=batch,
36 | neutralized_risk=neutralized_risk,
37 | risk_model=risk_model,
38 | pre_process=pre_process,
39 | post_process=post_process,
40 | warm_start=warm_start,
41 | data_source=data_source)
42 |
43 | data_desc = data_meta.save()
44 |
45 | loaded_data = DataMeta.load(data_desc)
46 | self.assertEqual(data_meta.freq, loaded_data.freq)
47 | self.assertEqual(data_meta.universe, loaded_data.universe)
48 | self.assertEqual(data_meta.batch, loaded_data.batch)
49 | self.assertEqual(data_meta.neutralized_risk, loaded_data.neutralized_risk)
50 | self.assertEqual(data_meta.risk_model, loaded_data.risk_model)
51 | self.assertEqual(data_meta.pre_process, loaded_data.pre_process)
52 | self.assertEqual(data_meta.post_process, loaded_data.post_process)
53 | self.assertEqual(data_meta.warm_start, loaded_data.warm_start)
54 | self.assertEqual(data_meta.data_source, loaded_data.data_source)
55 |
56 | def test_composer_persistence(self):
57 | freq = '5b'
58 | universe = Universe('zz800')
59 | batch = 4
60 | neutralized_risk = ['SIZE']
61 | risk_model = 'long'
62 | pre_process = ['standardize', 'winsorize_normal']
63 | post_process = ['standardize', 'winsorize_normal']
64 | warm_start = 2
65 | data_source = 'postgresql://user:pwd@server/dummy'
66 |
67 | data_meta = DataMeta(freq=freq,
68 | universe=universe,
69 | batch=batch,
70 | neutralized_risk=neutralized_risk,
71 | risk_model=risk_model,
72 | pre_process=pre_process,
73 | post_process=post_process,
74 | warm_start=warm_start,
75 | data_source=data_source)
76 |
77 | features = {'f1': 'closePrice', 'f2': 'openPrice'}
78 | alpha_model = XGBClassifier(features=features)
79 |
80 | composer = Composer(alpha_model=alpha_model,
81 | data_meta=data_meta)
82 |
83 | comp_desc = composer.save()
84 | loaded_comp = Composer.load(comp_desc)
85 | self._assert_composer_equal(composer, loaded_comp)
86 |
--------------------------------------------------------------------------------
/alphamind/tests/model/test_linearmodel.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-4
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 | from sklearn.linear_model import LinearRegression as LinearRegression2
13 | from sklearn.linear_model import LogisticRegression as LogisticRegression2
14 |
15 | from alphamind.model.linearmodel import ConstLinearModel
16 | from alphamind.model.linearmodel import LinearRegression
17 | from alphamind.model.linearmodel import LogisticRegression
18 | from alphamind.model.loader import load_model
19 |
20 |
21 | class TestLinearModel(unittest.TestCase):
22 |
23 | def setUp(self):
24 | self.n = 3
25 | self.features = ['a', 'b', 'c']
26 | self.train_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
27 | self.train_y = np.random.randn(1000)
28 | self.train_y_label = np.where(self.train_y > 0., 1, 0)
29 | self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
30 |
31 | def test_const_linear_model(self):
32 | features = ['c', 'b', 'a']
33 | weights = dict(c=3., b=2., a=1.)
34 | model = ConstLinearModel(features=features,
35 | weights=weights)
36 |
37 | calculated_y = model.predict(self.predict_x)
38 | expected_y = self.predict_x[features] @ np.array([weights[f] for f in features])
39 | np.testing.assert_array_almost_equal(calculated_y, expected_y)
40 |
41 | def test_const_linear_model_persistence(self):
42 | weights = dict(c=3., b=2., a=1.)
43 | model = ConstLinearModel(features=['a', 'b', 'c'],
44 | weights=weights)
45 |
46 | desc = model.save()
47 | new_model = load_model(desc)
48 |
49 | self.assertEqual(model.features, new_model.features)
50 | np.testing.assert_array_almost_equal(model.weights, new_model.weights)
51 |
52 | def test_const_linear_model_score(self):
53 | model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
54 | model.fit(self.train_x, self.train_y)
55 |
56 | expected_score = model.score(self.train_x, self.train_y)
57 |
58 | const_model = ConstLinearModel(features=['a', 'b', 'c'],
59 | weights=dict(zip(model.features, model.weights)))
60 | calculated_score = const_model.score(self.train_x, self.train_y)
61 |
62 | self.assertAlmostEqual(expected_score, calculated_score)
63 |
64 | def test_linear_regression(self):
65 | model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
66 | model.fit(self.train_x, self.train_y)
67 |
68 | calculated_y = model.predict(self.predict_x)
69 |
70 | expected_model = LinearRegression2(fit_intercept=False)
71 | expected_model.fit(self.train_x, self.train_y)
72 | expected_y = expected_model.predict(self.predict_x)
73 |
74 | np.testing.assert_array_almost_equal(calculated_y, expected_y)
75 | np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
76 |
77 | def test_linear_regression_persistence(self):
78 | model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
79 | model.fit(self.train_x, self.train_y)
80 |
81 | desc = model.save()
82 | new_model = load_model(desc)
83 |
84 | calculated_y = new_model.predict(self.predict_x)
85 | expected_y = model.predict(self.predict_x)
86 |
87 | np.testing.assert_array_almost_equal(calculated_y, expected_y)
88 | np.testing.assert_array_almost_equal(new_model.weights, model.weights)
89 |
90 | def test_logistic_regression(self):
91 | model = LogisticRegression(['a', 'b', 'c'], fit_intercept=False)
92 | model.fit(self.train_x, self.train_y_label)
93 |
94 | calculated_y = model.predict(self.predict_x)
95 |
96 | expected_model = LogisticRegression2(fit_intercept=False)
97 | expected_model.fit(self.train_x, self.train_y_label)
98 | expected_y = expected_model.predict(self.predict_x)
99 |
100 | np.testing.assert_array_equal(calculated_y, expected_y)
101 | np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
102 |
103 | def test_logistic_regression_persistence(self):
104 | model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
105 | model.fit(self.train_x, self.train_y_label)
106 |
107 | desc = model.save()
108 | new_model = load_model(desc)
109 |
110 | calculated_y = new_model.predict(self.predict_x)
111 | expected_y = model.predict(self.predict_x)
112 |
113 | np.testing.assert_array_almost_equal(calculated_y, expected_y)
114 | np.testing.assert_array_almost_equal(new_model.weights, model.weights)
115 |
--------------------------------------------------------------------------------
/alphamind/tests/model/test_loader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-9-5
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.model.linearmodel import LinearRegression
14 | from alphamind.model.loader import load_model
15 |
16 |
17 | class TestLoader(unittest.TestCase):
18 |
19 | def setUp(self):
20 | self.n = 3
21 | self.trained_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
22 | self.trained_y = np.random.randn(1000, 1)
23 |
24 | self.predict_x = pd.DataFrame(np.random.randn(100, self.n), columns=['a', 'b', 'c'])
25 |
26 | def test_load_model(self):
27 | model = LinearRegression(['a', 'b', 'c'])
28 | model.fit(self.trained_x, self.trained_y)
29 |
30 | model_desc = model.save()
31 | new_model = load_model(model_desc)
32 |
33 | np.testing.assert_array_almost_equal(model.predict(self.predict_x),
34 | new_model.predict(self.predict_x))
35 |
36 | self.assertEqual(model.features, new_model.features)
37 | self.assertEqual(model.trained_time, new_model.trained_time)
38 |
--------------------------------------------------------------------------------
/alphamind/tests/model/test_modelbase.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-2-8
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | from alphamind.model.linearmodel import ConstLinearModel
11 |
12 |
13 | class TestModelBase(unittest.TestCase):
14 |
15 | def test_simple_model_features(self):
16 | model = ConstLinearModel(features=['c', 'b', 'a'])
17 | self.assertListEqual(['a', 'b', 'c'], model.features)
18 |
--------------------------------------------------------------------------------
/alphamind/tests/model/test_treemodel.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-1-5
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.model.loader import load_model
14 | from alphamind.model.treemodel import RandomForestClassifier
15 | from alphamind.model.treemodel import RandomForestRegressor
16 | from alphamind.model.treemodel import XGBClassifier
17 | from alphamind.model.treemodel import XGBRegressor
18 | from alphamind.model.treemodel import XGBTrainer
19 |
20 |
21 | class TestTreeModel(unittest.TestCase):
22 |
23 | def setUp(self):
24 | self.features = list('0123456789')
25 | self.x = pd.DataFrame(np.random.randn(1000, 10), columns=self.features)
26 | self.y = np.random.randn(1000)
27 | self.sample_x = pd.DataFrame(np.random.randn(100, 10), columns=self.features)
28 |
29 | def test_random_forest_regress_persistence(self):
30 | model = RandomForestRegressor(features=self.features)
31 | model.fit(self.x, self.y)
32 |
33 | desc = model.save()
34 | new_model = load_model(desc)
35 | self.assertEqual(model.features, new_model.features)
36 |
37 | np.testing.assert_array_almost_equal(model.predict(self.sample_x),
38 | new_model.predict(self.sample_x))
39 | np.testing.assert_array_almost_equal(model.importances, new_model.importances)
40 |
41 | def test_random_forest_classify_persistence(self):
42 | model = RandomForestClassifier(features=self.features)
43 | y = np.where(self.y > 0, 1, 0)
44 | model.fit(self.x, y)
45 |
46 | desc = model.save()
47 | new_model = load_model(desc)
48 | self.assertEqual(model.features, new_model.features)
49 |
50 | np.testing.assert_array_almost_equal(model.predict(self.sample_x),
51 | new_model.predict(self.sample_x))
52 | np.testing.assert_array_almost_equal(model.importances, new_model.importances)
53 |
54 | def test_xgb_regress_persistence(self):
55 | model = XGBRegressor(features=self.features)
56 | model.fit(self.x, self.y)
57 |
58 | desc = model.save()
59 | new_model = load_model(desc)
60 | self.assertEqual(model.features, new_model.features)
61 |
62 | np.testing.assert_array_almost_equal(model.predict(self.sample_x),
63 | new_model.predict(self.sample_x))
64 | np.testing.assert_array_almost_equal(model.importances, new_model.importances)
65 |
66 | def test_xgb_classify_persistence(self):
67 | model = XGBClassifier(features=self.features)
68 | y = np.where(self.y > 0, 1, 0)
69 | model.fit(self.x, y)
70 |
71 | desc = model.save()
72 | new_model = load_model(desc)
73 | self.assertEqual(model.features, new_model.features)
74 |
75 | np.testing.assert_array_almost_equal(model.predict(self.sample_x),
76 | new_model.predict(self.sample_x))
77 | np.testing.assert_array_almost_equal(model.importances, new_model.importances)
78 |
79 | def test_xgb_trainer_equal_classifier(self):
80 | model1 = XGBClassifier(n_estimators=100,
81 | learning_rate=0.1,
82 | max_depth=3,
83 | features=self.features,
84 | random_state=42)
85 |
86 | model2 = XGBTrainer(features=self.features,
87 | objective='reg:logistic',
88 | booster='gbtree',
89 | tree_method='exact',
90 | n_estimators=100,
91 | learning_rate=0.1,
92 | max_depth=3,
93 | random_state=42)
94 |
95 | y = np.where(self.y > 0, 1, 0)
96 | model1.fit(self.x, y)
97 | model2.fit(self.x, y)
98 |
99 | predict1 = model1.predict(self.sample_x)
100 | predict2 = model2.predict(self.sample_x)
101 | predict2 = np.where(predict2 > 0.5, 1., 0.)
102 | np.testing.assert_array_almost_equal(predict1, predict2)
103 |
104 | def test_xgb_trainer_persistence(self):
105 | model = XGBTrainer(features=self.features,
106 | objective='binary:logistic',
107 | booster='gbtree',
108 | tree_method='hist',
109 | n_estimators=200)
110 | y = np.where(self.y > 0, 1, 0)
111 | model.fit(self.x, y)
112 |
113 | desc = model.save()
114 | new_model = load_model(desc)
115 | self.assertEqual(model.features, new_model.features)
116 |
117 | np.testing.assert_array_almost_equal(model.predict(self.sample_x),
118 | new_model.predict(self.sample_x))
119 | np.testing.assert_array_almost_equal(model.importances, new_model.importances)
120 |
--------------------------------------------------------------------------------
/alphamind/tests/portfolio/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-27
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_evolver.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-11-23
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 |
12 | from alphamind.portfolio.evolver import evolve_positions
13 |
14 |
15 | class TestEvolver(unittest.TestCase):
16 |
17 | def test_evolve_positions_with_all_positive_position(self):
18 | positions = np.array([0.2, 0.2, 0.8])
19 | dx_returns = np.array([0.06, 0.04, -0.10])
20 |
21 | simple_return = np.exp(dx_returns)
22 | curr_pos = positions * simple_return
23 | expected_pos = curr_pos / curr_pos.sum() * positions.sum()
24 |
25 | calculated_pos = evolve_positions(positions, dx_returns)
26 |
27 | np.testing.assert_array_almost_equal(expected_pos, calculated_pos)
28 |
29 | def test_evolve_positions_with_negative_position(self):
30 | positions = np.array([0.2, 0.3, -0.8])
31 | dx_returns = np.array([0.06, 0.04, -0.10])
32 |
33 | simple_return = np.exp(dx_returns)
34 | curr_pos = positions * simple_return
35 | expected_pos = curr_pos / np.abs(curr_pos).sum() * np.abs(positions).sum()
36 |
37 | calculated_pos = evolve_positions(positions, dx_returns)
38 |
39 | np.testing.assert_array_almost_equal(expected_pos, calculated_pos)
40 |
--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_linearbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-5
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 |
12 | from alphamind.portfolio.linearbuilder import linear_builder
13 |
14 |
15 | class TestLinearBuild(unittest.TestCase):
16 | def setUp(self):
17 | self.er = np.random.randn(3000)
18 | self.risk_exp = np.random.randn(3000, 30)
19 | self.risk_exp = np.concatenate([self.risk_exp, np.ones((3000, 1))], axis=1)
20 | self.bm = np.random.randint(100, size=3000).astype(float)
21 | self.current_pos = np.random.randint(0, 100, size=3000)
22 | self.current_pos = self.current_pos / self.current_pos.sum()
23 |
24 | def test_linear_build(self):
25 | bm = self.bm / self.bm.sum()
26 | eplson = 1e-6
27 |
28 | status, _, w = linear_builder(self.er,
29 | 0.,
30 | 0.01,
31 | self.risk_exp,
32 | (bm @ self.risk_exp, bm @ self.risk_exp))
33 | self.assertEqual(status, 'optimal')
34 | self.assertAlmostEqual(np.sum(w), 1.)
35 | self.assertTrue(np.all(w <= 0.01 + eplson))
36 | self.assertTrue(np.all(w >= -eplson))
37 |
38 | calc_risk = (w - bm) @ self.risk_exp
39 | expected_risk = np.zeros(self.risk_exp.shape[1])
40 | np.testing.assert_array_almost_equal(calc_risk, expected_risk)
41 |
42 | def test_linear_build_with_interior(self):
43 | bm = self.bm / self.bm.sum()
44 | eplson = 1e-6
45 |
46 | status, _, w = linear_builder(self.er,
47 | 0.,
48 | 0.01,
49 | self.risk_exp,
50 | (bm @ self.risk_exp, bm @ self.risk_exp),
51 | method='interior')
52 | self.assertEqual(status, 'optimal')
53 | self.assertAlmostEqual(np.sum(w), 1.)
54 | self.assertTrue(np.all(w <= 0.01 + eplson))
55 | self.assertTrue(np.all(w >= -eplson))
56 |
57 | calc_risk = (w - bm) @ self.risk_exp
58 | expected_risk = np.zeros(self.risk_exp.shape[1])
59 | np.testing.assert_array_almost_equal(calc_risk, expected_risk)
60 |
61 | def test_linear_build_with_inequality_constraints(self):
62 | bm = self.bm / self.bm.sum()
63 | eplson = 1e-6
64 |
65 | risk_lbound = bm @ self.risk_exp
66 | risk_ubound = bm @ self.risk_exp
67 |
68 | risk_tolerance = 0.01 * np.abs(risk_lbound[:-1])
69 |
70 | risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance
71 | risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance
72 |
73 | status, _, w = linear_builder(self.er,
74 | 0.,
75 | 0.01,
76 | self.risk_exp,
77 | risk_target=(risk_lbound, risk_ubound))
78 | self.assertEqual(status, 'optimal')
79 | self.assertAlmostEqual(np.sum(w), 1.)
80 | self.assertTrue(np.all(w <= 0.01 + eplson))
81 | self.assertTrue(np.all(w >= -eplson))
82 |
83 | calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp)
84 | self.assertTrue(np.all(np.abs(calc_risk) <= 1.01e-2))
85 |
86 | def test_linear_build_with_to_constraint(self):
87 | bm = self.bm / self.bm.sum()
88 | eplson = 1e-6
89 | turn_over_target = 0.1
90 |
91 | risk_lbound = bm @ self.risk_exp
92 | risk_ubound = bm @ self.risk_exp
93 |
94 | risk_tolerance = 0.01 * np.abs(risk_lbound[:-1])
95 |
96 | risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance
97 | risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance
98 |
99 | status, _, w = linear_builder(self.er,
100 | 0.,
101 | 0.01,
102 | self.risk_exp,
103 | risk_target=(risk_lbound, risk_ubound),
104 | turn_over_target=turn_over_target,
105 | current_position=self.current_pos)
106 | self.assertEqual(status, 'optimal')
107 | self.assertAlmostEqual(np.sum(w), 1.)
108 | self.assertTrue(np.all(w <= 0.01 + eplson))
109 | self.assertTrue(np.all(w >= -eplson))
110 | self.assertAlmostEqual(np.abs(w - self.current_pos).sum(), turn_over_target)
111 |
112 | calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp)
113 | self.assertTrue(np.all(np.abs(calc_risk) <= 1.0001e-2))
114 |
115 | def test_linear_build_with_to_constraint_with_ecos(self):
116 | bm = self.bm / self.bm.sum()
117 | eplson = 1e-6
118 | turn_over_target = 0.1
119 |
120 | risk_lbound = bm @ self.risk_exp
121 | risk_ubound = bm @ self.risk_exp
122 |
123 | risk_tolerance = 0.01 * np.abs(risk_lbound[:-1])
124 |
125 | risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance
126 | risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance
127 |
128 | status, _, w = linear_builder(self.er,
129 | 0.,
130 | 0.01,
131 | self.risk_exp,
132 | risk_target=(risk_lbound, risk_ubound),
133 | turn_over_target=turn_over_target,
134 | current_position=self.current_pos,
135 | method='ecos')
136 | self.assertEqual(status, 'optimal')
137 | self.assertAlmostEqual(np.sum(w), 1.)
138 | self.assertTrue(np.all(w <= 0.01 + eplson))
139 | self.assertTrue(np.all(w >= -eplson))
140 | self.assertAlmostEqual(np.abs(w - self.current_pos).sum(), turn_over_target)
141 |
142 | calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp)
143 | self.assertTrue(np.all(np.abs(calc_risk) <= 1.0001e-2))
144 |
145 |
146 | if __name__ == '__main__':
147 | unittest.main()
148 |
--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_longshortbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-9
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.portfolio.longshortbulder import long_short_builder
14 |
15 |
16 | class TestLongShortBuild(unittest.TestCase):
17 |
18 | def setUp(self):
19 | self.x = np.random.randn(3000, 10)
20 | self.groups = np.random.randint(10, 40, size=3000)
21 | choices = np.random.choice(3000, 100, replace=False)
22 | self.masks = np.full(3000, False, dtype=bool)
23 | self.masks[choices] = True
24 |
25 | def test_long_short_build(self):
26 | x = self.x[:, 0].flatten()
27 | calc_weights = long_short_builder(x).flatten()
28 | expected_weights = x / np.abs(x).sum()
29 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
30 |
31 | calc_weights = long_short_builder(self.x, leverage=2)
32 | expected_weights = self.x / np.abs(self.x).sum(axis=0) * 2
33 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
34 |
35 | def test_long_short_build_with_group(self):
36 | x = self.x[:, 0].flatten()
37 | calc_weights = long_short_builder(x, groups=self.groups).flatten()
38 | expected_weights = pd.Series(x).groupby(self.groups).apply(lambda s: s / np.abs(s).sum())
39 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
40 |
41 | calc_weights = long_short_builder(self.x, groups=self.groups)
42 | expected_weights = pd.DataFrame(self.x).groupby(self.groups).apply(
43 | lambda s: s / np.abs(s).sum(axis=0))
44 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
45 |
46 | def test_long_short_build_with_masks(self):
47 | x = self.x[:, 0].flatten()
48 | calc_weights = long_short_builder(x, masks=self.masks, leverage=1.).flatten()
49 | self.assertAlmostEqual(calc_weights.sum(), 0.)
50 |
51 | masked_x = x.copy()
52 | masked_x[self.masks] = 0.
53 | masked_x[~self.masks] = masked_x[~self.masks] - masked_x[~self.masks].mean()
54 | expected_weights = masked_x / np.abs(masked_x).sum()
55 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
56 |
57 |
58 | if __name__ == '__main__':
59 | unittest.main()
60 |
--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_meanvariancebuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-6-27
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 |
12 | from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
13 | from alphamind.portfolio.meanvariancebuilder import target_vol_builder
14 |
15 |
16 | class TestMeanVarianceBuild(unittest.TestCase):
17 |
18 | def test_mean_variance_builder(self):
19 | er = np.array([0.01, 0.02, 0.03])
20 | cov = np.array([[0.02, 0.01, 0.02],
21 | [0.01, 0.02, 0.03],
22 | [0.02, 0.03, 0.02]])
23 | ids_var = np.diag([0.01, 0.02, 0.03])
24 | cov += ids_var
25 |
26 | bm = np.array([0.3, 0.3, 0.4])
27 | lbound = np.array([0., 0., 0.])
28 | ubound = np.array([0.4, 0.4, 0.5])
29 |
30 | risk_exposure = np.array([[1., 1., 1.],
31 | [1., 0., 1.]]).T
32 | risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7]))
33 |
34 | model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
35 | status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure,
36 | risk_target)
37 |
38 | self.assertTrue(status == 'optimal')
39 | self.assertAlmostEqual(x.sum(), bm.sum())
40 | self.assertTrue(np.all(x <= ubound + 1.e-6))
41 | self.assertTrue(np.all(x >= lbound) - 1.e-6)
42 | self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6))
43 | self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6))
44 | np.testing.assert_array_almost_equal(x, [0.1, 0.4, 0.5])
45 |
46 | def test_mean_variance_builder_without_constraints(self):
47 | er = np.array([0.01, 0.02, 0.03])
48 | cov = np.array([[0.02, 0.01, 0.02],
49 | [0.01, 0.02, 0.03],
50 | [0.02, 0.03, 0.02]])
51 | ids_var = np.diag([0.01, 0.02, 0.03])
52 | cov += ids_var
53 |
54 | bm = np.array([0., 0., 0.])
55 |
56 | model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
57 | status, _, x = mean_variance_builder(er, model, bm, None, None, None, None, lam=1)
58 | np.testing.assert_array_almost_equal(x, np.linalg.inv(cov) @ er)
59 |
60 | def test_mean_variance_builder_without_constraints_with_factor_model(self):
61 | pass
62 |
63 | def test_mean_variance_builder_with_none_unity_lambda(self):
64 | er = np.array([0.01, 0.02, 0.03])
65 | cov = np.array([[0.02, 0.01, 0.02],
66 | [0.01, 0.02, 0.03],
67 | [0.02, 0.03, 0.02]])
68 | ids_var = np.diag([0.01, 0.02, 0.03])
69 | cov += ids_var
70 |
71 | bm = np.array([0.3, 0.3, 0.4])
72 | lbound = np.array([0., 0., 0.])
73 | ubound = np.array([0.4, 0.4, 0.5])
74 |
75 | risk_exposure = np.array([[1., 1., 1.],
76 | [1., 0., 1.]]).T
77 | risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7]))
78 |
79 | model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
80 | status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure,
81 | risk_target, lam=100)
82 |
83 | self.assertTrue(status == 'optimal')
84 | self.assertAlmostEqual(x.sum(), bm.sum())
85 | self.assertTrue(np.all(x <= ubound + 1.e-6))
86 | self.assertTrue(np.all(x >= lbound) - 1.e-6)
87 | self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6))
88 | self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6))
89 | np.testing.assert_array_almost_equal(x, [0.2950, 0.3000, 0.4050])
90 |
91 | def test_target_vol_builder(self):
92 | er = np.array([0.1, 0.2, 0.3])
93 | cov = np.array([[0.05, 0.01, 0.02],
94 | [0.01, 0.06, 0.03],
95 | [0.02, 0.03, 0.07]])
96 |
97 | lbound = np.array([0., 0., 0.])
98 | ubound = np.array([0.8, 0.8, 0.8])
99 |
100 | bm = np.array([0.3, 0.3, 0.3])
101 |
102 | risk_exposure = np.array([[1., 1., 1.]]).T
103 | risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))
104 | model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
105 | status, _, x = target_vol_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target,
106 | 0.1)
107 | self.assertTrue(status == 'optimal')
108 | self.assertTrue(np.all(x <= ubound + 1.e-6))
109 | self.assertTrue(np.all(x >= lbound) - 1.e-6)
110 | self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6))
111 | self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6))
112 | np.testing.assert_array_almost_equal(x, [-0.3, -0.10919033, 0.40919033] + bm)
113 |
--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_percentbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-4
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.portfolio.percentbuilder import percent_build
14 |
15 |
16 | class TestPercentBuild(unittest.TestCase):
17 |
18 | def setUp(self):
19 | self.n_samples = 3000
20 | self.p_included = 0.1
21 | self.n_groups = 30
22 | self.n_portfolios = range(1, 10)
23 | self.n_mask = 100
24 |
25 | def test_percent_build(self):
26 | n_include = int(self.n_samples * self.p_included)
27 |
28 | for n_portfolio in self.n_portfolios:
29 | x = np.random.randn(self.n_samples, n_portfolio)
30 |
31 | calc_weights = percent_build(x, self.p_included)
32 |
33 | expected_weights = np.zeros((len(x), n_portfolio))
34 |
35 | masks = (-x).argsort(axis=0).argsort(axis=0) < n_include
36 |
37 | for j in range(x.shape[1]):
38 | expected_weights[masks[:, j], j] = 1.
39 |
40 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
41 |
42 | def test_percent_build_with_group(self):
43 | for n_portfolio in self.n_portfolios:
44 |
45 | x = np.random.randn(self.n_samples, n_portfolio)
46 | groups = np.random.randint(self.n_groups, size=self.n_samples)
47 |
48 | calc_weights = percent_build(x, self.p_included, groups)
49 |
50 | grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
51 | grouped_count = pd.DataFrame(-x).groupby(groups).transform(lambda x: x.count())
52 | expected_weights = np.zeros((len(x), n_portfolio))
53 |
54 | n_include = (grouped_count * self.p_included).astype(int)
55 | masks = (grouped_ordering <= n_include).values
56 | for j in range(x.shape[1]):
57 | expected_weights[masks[:, j], j] = 1.
58 |
59 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
60 |
61 | def test_percent_build_with_masks(self):
62 | for n_portfolio in self.n_portfolios:
63 | x = np.random.randn(self.n_samples, n_portfolio)
64 | choices = np.random.choice(self.n_samples, self.n_mask, replace=False)
65 | masks = np.full(self.n_samples, True, dtype=bool)
66 | masks[choices] = False
67 |
68 | calc_weights = percent_build(x, self.p_included, masks=masks)
69 |
70 | expected_weights = np.zeros((len(x), n_portfolio))
71 |
72 | filtered_index = np.arange(len(x))[masks]
73 | filtered_x = x[masks]
74 | big_boolen = np.full(x.shape, False, dtype=bool)
75 |
76 | n_included = int(self.p_included * len(x))
77 | chosen = (-filtered_x).argsort(axis=0).argsort(axis=0) < n_included
78 | big_boolen[filtered_index] = chosen
79 |
80 | for j in range(x.shape[1]):
81 | expected_weights[big_boolen[:, j], j] = 1.
82 |
83 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
84 |
85 |
86 | if __name__ == '__main__':
87 | unittest.main()
88 |
--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_rankbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-27
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.portfolio.rankbuilder import rank_build
14 |
15 |
16 | class TestRankBuild(unittest.TestCase):
17 |
18 | def setUp(self):
19 | self.n_samples = 3000
20 | self.n_included = 300
21 | self.n_groups = 30
22 | self.n_portfolio = range(1, 10)
23 | self.n_mask = 100
24 |
25 | def test_rank_build(self):
26 | for n_portfolio in self.n_portfolio:
27 | x = np.random.randn(self.n_samples, n_portfolio)
28 |
29 | calc_weights = rank_build(x, self.n_included)
30 |
31 | expected_weights = np.zeros((len(x), n_portfolio))
32 | chosen = (-x).argsort(axis=0).argsort(axis=0) < self.n_included
33 |
34 | for j in range(x.shape[1]):
35 | expected_weights[chosen[:, j], j] = 1.
36 |
37 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
38 |
39 | def test_rank_build_with_group(self):
40 | n_include = int(self.n_included / self.n_groups)
41 |
42 | for n_portfolio in self.n_portfolio:
43 |
44 | x = np.random.randn(self.n_samples, n_portfolio)
45 | groups = np.random.randint(self.n_groups, size=self.n_samples)
46 |
47 | calc_weights = rank_build(x, n_include, groups)
48 |
49 | grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
50 | expected_weights = np.zeros((len(x), n_portfolio))
51 | chosen = (grouped_ordering <= n_include).values
52 | for j in range(x.shape[1]):
53 | expected_weights[chosen[:, j], j] = 1.
54 |
55 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
56 |
57 | def test_rank_build_with_masks(self):
58 | for n_portfolio in self.n_portfolio:
59 | x = np.random.randn(self.n_samples, n_portfolio)
60 | choices = np.random.choice(self.n_samples, self.n_mask, replace=False)
61 | masks = np.full(self.n_samples, True, dtype=bool)
62 | masks[choices] = False
63 |
64 | calc_weights = rank_build(x, self.n_included, masks=masks)
65 |
66 | expected_weights = np.zeros((len(x), n_portfolio))
67 |
68 | filtered_index = np.arange(len(x))[masks]
69 | filtered_x = x[masks]
70 | big_boolen = np.full(x.shape, False, dtype=bool)
71 |
72 | chosen = (-filtered_x).argsort(axis=0).argsort(axis=0) < self.n_included
73 | big_boolen[filtered_index] = chosen
74 |
75 | for j in range(x.shape[1]):
76 | expected_weights[big_boolen[:, j], j] = 1.
77 |
78 | np.testing.assert_array_almost_equal(calc_weights, expected_weights)
79 |
80 |
81 | if __name__ == '__main__':
82 | unittest.main()
83 |
--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_riskmodel.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2018-5-29
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.portfolio.riskmodel import FactorRiskModel
14 | from alphamind.portfolio.riskmodel import FullRiskModel
15 |
16 |
17 | class TestRiskModel(unittest.TestCase):
18 |
19 | def setUp(self):
20 | self.factor_cov = pd.DataFrame([[0.5, -0.3], [-0.3, 0.7]], columns=['a', 'b'],
21 | index=['a', 'b'])
22 | self.risk_exp = pd.DataFrame([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]], columns=['a', 'b'],
23 | index=[1, 2, 3])
24 | self.idsync = pd.Series([0.1, 0.3, 0.2], index=[1, 2, 3])
25 | self.sec_cov = self.risk_exp.values @ self.factor_cov.values @ self.risk_exp.values.T \
26 | + np.diag(self.idsync.values)
27 | self.sec_cov = pd.DataFrame(self.sec_cov, columns=[1, 2, 3], index=[1, 2, 3])
28 |
29 | def test_full_risk_model(self):
30 | self.assertEqual(self.sec_cov.shape, (3, 3))
31 | model = FullRiskModel(self.sec_cov)
32 |
33 | codes = [1, 2]
34 | res = model.get_cov(codes)
35 | np.testing.assert_array_almost_equal(res, self.sec_cov.loc[codes, codes].values)
36 |
37 | res = model.get_cov()
38 | np.testing.assert_array_almost_equal(res, self.sec_cov.values)
39 |
40 | def test_factor_risk_model(self):
41 | self.assertEqual(self.factor_cov.shape, (2, 2))
42 | self.assertEqual(self.risk_exp.shape, (3, 2))
43 | self.assertEqual(self.idsync.shape, (3,))
44 |
45 | model = FactorRiskModel(self.factor_cov,
46 | self.risk_exp,
47 | self.idsync)
48 |
49 | res = model.get_factor_cov()
50 | np.testing.assert_array_almost_equal(res, self.factor_cov.values)
51 |
52 | codes = [1, 3]
53 | res = model.get_risk_exp(codes)
54 | np.testing.assert_array_almost_equal(res, self.risk_exp.loc[codes, :])
55 | res = model.get_idsync(codes)
56 | np.testing.assert_array_almost_equal(res, self.idsync[codes])
57 |
58 | res = model.get_risk_exp()
59 | np.testing.assert_array_almost_equal(res, self.risk_exp)
60 | res = model.get_idsync()
61 | np.testing.assert_array_almost_equal(res, self.idsync)
62 |
--------------------------------------------------------------------------------
/alphamind/tests/settlement/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 |
5 | @author: cheng.li
6 | """
7 |
--------------------------------------------------------------------------------
/alphamind/tests/settlement/test_simplesettle.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import unittest
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from alphamind.settlement.simplesettle import simple_settle
14 |
15 |
16 | class TestSimpleSettle(unittest.TestCase):
17 |
18 | def setUp(self):
19 | self.n_samples = 3000
20 | self.n_groups = 30
21 | self.weights = np.random.randn(self.n_samples)
22 | self.ret_series = np.random.randn(self.n_samples)
23 | self.groups = np.random.randint(self.n_groups, size=self.n_samples)
24 |
25 | def test_simples_settle(self):
26 | calc_ret = simple_settle(self.weights, self.ret_series)
27 |
28 | ret_series = self.ret_series.reshape((-1, 1))
29 | expected_ret = self.weights @ ret_series
30 |
31 | self.assertAlmostEqual(calc_ret['er'][0], expected_ret[0])
32 |
33 | def test_simple_settle_with_group(self):
34 | calc_ret = simple_settle(self.weights, self.ret_series, self.groups)
35 |
36 | ret_series = self.weights * self.ret_series
37 | expected_ret = pd.Series(ret_series).groupby(self.groups).sum().values
38 |
39 | np.testing.assert_array_almost_equal(calc_ret['er'].values[:-1], expected_ret)
40 | self.assertAlmostEqual(calc_ret['er'].values[-1], expected_ret.sum())
41 |
42 |
43 | if __name__ == '__main__':
44 | unittest.main()
45 |
--------------------------------------------------------------------------------
/alphamind/tests/test_suite.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import os
9 |
10 | SKIP_ENGINE_TESTS = True
11 |
12 | if not SKIP_ENGINE_TESTS:
13 | try:
14 | DATA_ENGINE_URI = os.environ['DB_URI']
15 | except KeyError:
16 | DATA_ENGINE_URI = "mysql+mysqldb://reader:Reader#2020@121.37.138.1:13317/vision?charset=utf8"
17 | else:
18 | DATA_ENGINE_URI = None
19 |
20 |
21 | if __name__ == '__main__':
22 | from simpleutils import add_parent_path
23 |
24 | add_parent_path(__file__, 3)
25 |
26 | from simpleutils import TestRunner
27 | from alphamind.utilities import alpha_logger
28 | from alphamind.tests.data.test_neutralize import TestNeutralize
29 | from alphamind.tests.data.test_standardize import TestStandardize
30 | from alphamind.tests.data.test_winsorize import TestWinsorize
31 | from alphamind.tests.data.test_quantile import TestQuantile
32 | from alphamind.tests.data.engines.test_sql_engine import TestSqlEngine
33 | from alphamind.tests.data.engines.test_universe import TestUniverse
34 | from alphamind.tests.portfolio.test_constraints import TestConstraints
35 | from alphamind.tests.portfolio.test_evolver import TestEvolver
36 | from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild
37 | from alphamind.tests.portfolio.test_rankbuild import TestRankBuild
38 | from alphamind.tests.portfolio.test_percentbuild import TestPercentBuild
39 | from alphamind.tests.portfolio.test_linearbuild import TestLinearBuild
40 | from alphamind.tests.portfolio.test_meanvariancebuild import TestMeanVarianceBuild
41 | from alphamind.tests.portfolio.test_riskmodel import TestRiskModel
42 | from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle
43 | from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis
44 | from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
45 | from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis
46 | from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis
47 | from alphamind.tests.model.test_modelbase import TestModelBase
48 | from alphamind.tests.model.test_linearmodel import TestLinearModel
49 | from alphamind.tests.model.test_treemodel import TestTreeModel
50 | from alphamind.tests.model.test_loader import TestLoader
51 | from alphamind.tests.model.test_composer import TestComposer
52 | from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor
53 | from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor
54 | from alphamind.tests.execution.test_targetvolexecutor import TestTargetVolExecutor
55 | from alphamind.tests.execution.test_pipeline import TestExecutionPipeline
56 | from alphamind.tests.portfolio.test_optimizers import TestOptimizers
57 |
58 | runner = TestRunner([TestNeutralize,
59 | TestStandardize,
60 | TestWinsorize,
61 | TestQuantile,
62 | TestSqlEngine,
63 | TestUniverse,
64 | TestConstraints,
65 | TestEvolver,
66 | TestLongShortBuild,
67 | TestRankBuild,
68 | TestPercentBuild,
69 | TestLinearBuild,
70 | TestMeanVarianceBuild,
71 | TestRiskModel,
72 | TestSimpleSettle,
73 | TestRiskAnalysis,
74 | TestPerformanceAnalysis,
75 | TestFactorAnalysis,
76 | TestQuantileAnalysis,
77 | TestModelBase,
78 | TestLinearModel,
79 | TestTreeModel,
80 | TestLoader,
81 | TestComposer,
82 | TestNaiveExecutor,
83 | TestThresholdExecutor,
84 | TestTargetVolExecutor,
85 | TestExecutionPipeline,
86 | TestOptimizers],
87 | alpha_logger)
88 | runner.run()
89 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = alpha-mind
8 | SOURCEDIR = .
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 |
17 | import sphinx_rtd_theme
18 |
19 | # -- Project information -----------------------------------------------------
20 |
21 | project = '多因子回测框架'
22 | title = '多因子回测框架文档'
23 | copyright = '2020, 融量'
24 | author = '融量'
25 |
26 | master_doc = 'index'
27 |
28 | # The full version, including alpha/beta/rc tags
29 | version = '0.1.0'
30 | release = version
31 | numfig = True
32 |
33 | # -- General configuration ---------------------------------------------------
34 |
35 | # Add any Sphinx extension module names here, as strings. They can be
36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
37 | # ones.
38 |
39 | extensions = [
40 | "sphinx_rtd_theme",
41 | "docxbuilder"
42 | ]
43 |
44 | # Add any paths that contain templates here, relative to this directory.
45 | templates_path = ['_templates']
46 |
47 | # The language for content autogenerated by Sphinx. Refer to documentation
48 | # for a list of supported languages.
49 | #
50 | # This is also used if you do content translation via gettext catalogs.
51 | # Usually you set "language" from the command line for these cases.
52 | language = 'zh_CN'
53 |
54 | # List of patterns, relative to source directory, that match files and
55 | # directories to ignore when looking for source files.
56 | # This pattern also affects html_static_path and html_extra_path.
57 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
58 |
59 |
60 | # -- Options for HTML output -------------------------------------------------
61 |
62 | # The theme to use for HTML and HTML Help pages. See the documentation for
63 | # a list of builtin themes.
64 | #
65 | html_theme = "sphinx_rtd_theme"
66 | html_show_sourcelink = False
67 |
68 | html_theme_options = {
69 | 'logo_only': False,
70 | 'display_version': True,
71 | 'prev_next_buttons_location': 'bottom',
72 | 'style_external_links': False,
73 | # 'style_nav_header_background': 'blue',
74 | # Toc options
75 | 'collapse_navigation': True,
76 | 'sticky_navigation': True,
77 | 'navigation_depth': 4,
78 | 'includehidden': True,
79 | 'titles_only': False
80 | }
81 |
82 | # Add any paths that contain custom static files (such as style sheets) here,
83 | # relative to this directory. They are copied after the builtin static files,
84 | # so a file named "default.css" will overwrite the builtin "default.css".
85 | html_static_path = ['_static']
86 |
87 |
88 | # -- Options for latex output ----------------------------------------------
89 |
90 | latex_engine = "xelatex"
91 |
92 | latex_elements = {
93 | "papersize": "a4paper",
94 | 'fncychap': "\\usepackage[Sonny]{fncychap}",
95 | 'inputenc': "",
96 | 'utf8extra': "",
97 | 'fontpkg': '\\usepackage{amsmath,amsfonts,amssymb,amsthm}',
98 | 'preamble': r"""
99 | \setcounter{secnumdepth}{2}
100 | \setcounter{tocdepth}{2}
101 | \usepackage{fontspec}
102 | """,
103 | "figure_align": "H"
104 | }
105 |
106 | latex_documents = [
107 | ("index", 'main.tex', '多因子回测框架文档',
108 | '融量量化团队', 'manual')
109 | ]
110 |
111 |
112 | # -- Options for docx output -----------------------------------------------
113 | docx_documents = [
114 | ('index', 'quantitative_research.docx', {
115 | 'title': '标准化模型服务文档',
116 | 'created': '融量量化团队',
117 | 'subject': '融量量化团队',
118 | 'keywords': ['sphinx']
119 | }, True),
120 | ]
121 |
122 | docx_style = "docx/style.docx"
123 | docx_pagebreak_before_section = 1
124 |
--------------------------------------------------------------------------------
/doc/docx/style.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/doc/docx/style.docx
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | .. alpha-mind documentation master file, created by
2 | sphinx-quickstart on Tue May 29 16:58:56 2018.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | 多因子回测框架
7 | ====================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: 目录
12 |
13 | src/changelog
14 | src/whatisit
15 | src/introduction
16 |
17 |
--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=build
12 | set SPHINXPROJ=alpha-mind
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | echo.installed, then set the SPHINXBUILD environment variable to point
21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | echo.may add the Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/doc/src/changelog.rst:
--------------------------------------------------------------------------------
1 | ************
2 | 更新说明
3 | ************
4 |
5 | Release 0.1.0
6 | ==============================
7 |
8 | Features added
9 | --------------
10 |
11 | * 增加使用说明。
--------------------------------------------------------------------------------
/doc/src/img/alpha-mind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/doc/src/img/alpha-mind.png
--------------------------------------------------------------------------------
/doc/src/introduction.rst:
--------------------------------------------------------------------------------
1 | ************
2 | 入门介绍
3 | ************
4 |
5 | 在这篇很短的入门介绍中,我们将为您介绍使用多因子建模框架完成数据导入、因子挖掘、因子组合、组合优化
6 | 以及回测的全流程。
7 |
8 |
9 | 流程图
10 | ===============
11 |
12 | 略
13 |
14 | 数据接入
15 | ===============
16 |
17 | 多因子框架有自带的数据表结构需求,现阶段可以对接mysql以及postgresql(未来会接入更多的
18 | 数据库,例如:sqlserver)。数据表结构对于用户是透明的,用户在指定数据源的时候,只需要使用如下的语句:
19 |
20 | .. code-block:: py
21 | :linenos:
22 |
23 | from alphamind.api import *
24 |
25 | data_source = "url_for_some_database"
26 | engine = SqlEngine(data_source)
27 |
28 | 回测设置
29 | ===============
30 |
31 | 回测阶段,可以做各种配置,例如:
32 |
33 | * 起始时间
34 | * 调仓周期
35 | * 股票池,一般为某个指数成分股
36 | * 行业分类,现在支持申万行业,代码为"sw";
37 | * 基准指数
38 | * 组合方法
39 |
40 | 在代码中,可以表示为:
41 |
42 | .. code-block:: py
43 | :linenos:
44 |
45 | start_date = '2020-01-01'
46 | end_date = '2020-02-21'
47 |
48 | freq = '10b'
49 | industry_name = 'sw'
50 | universe = Universe('hs300')
51 | benchmark_code = 300
52 | method = 'risk_neutral'
53 |
54 | 因子池
55 | ====================
56 |
57 | 用户可以定义完整的因子池,多因子框架支持任意多个因子的回测:
58 |
59 | .. code-block:: py
60 | :linenos:
61 |
62 | alpha_factors = {
63 | 'f01': CSQuantiles(LAST('EMA5D')),
64 | 'f02': CSQuantiles(LAST('EMV6D')),
65 | }
66 |
67 | 这里面,我们使用了EMA50和EMV6D两个因子,并且都对他们做了分位数化。
68 |
69 | 机器学习模型
70 | =====================
71 |
72 | 为了将因子组合起来,我们会搭载一个alpha模型:
73 |
74 | .. code-block:: py
75 | :linenos:
76 |
77 | weights = dict(f01=1., f02=1.)
78 | alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)
79 |
80 | 这里我们使用了一个等权加权模型。多因子框架,支持多款不同的机器学习模型,用户也可以接入自己自定义的模型。
81 |
82 | 组合优化
83 | =====================
84 |
85 | 多因子框架中,组合优化的基本原理是mean-variance优化, 但是支持很多特性:
86 |
87 | * 总杠杆率约束;
88 | * 行业权重约束;
89 | * 风格因子约束;
90 | * 换手率约束;
91 | * 成分股权重限制;
92 |
93 | .. code-block:: py
94 | :linenos:
95 |
96 | # Constraintes settings
97 |
98 | industry_names = industry_list(industry_name, industry_level)
99 | constraint_risk = ['SIZE', 'SIZENL', 'BETA']
100 | total_risk_names = constraint_risk + ['benchmark', 'total']
101 | all_styles = risk_styles + industry_styles + macro_styles
102 |
103 | b_type = []
104 | l_val = []
105 | u_val = []
106 |
107 | previous_pos = pd.DataFrame()
108 | rets = []
109 | turn_overs = []
110 | leverags = []
111 |
112 | for name in total_risk_names:
113 | if name == 'benchmark':
114 | b_type.append(BoundaryType.RELATIVE)
115 | l_val.append(0.8)
116 | u_val.append(1.0)
117 | else:
118 | b_type.append(BoundaryType.ABSOLUTE)
119 | l_val.append(0.0)
120 | u_val.append(0.0)
121 |
122 | bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
123 | turn_over_target = 0.4
124 |
125 | 上面的代码,使得:
126 |
127 | * 成分股权重不低于80%;
128 | * 总权重为100%(无杠杆和现金保留)
129 | * 在SIZE,SIZENL以及BETA三个风格因子上,相对于基准无暴露;
130 | * 单次换手不超过40%(双边计算)
131 |
132 | 将一切组合起来...
133 | ===========================
134 |
135 | 通过简单的调用内置函数,就可以完成完整的回测:
136 |
137 | .. code-block:: py
138 | :linenos:
139 |
140 | running_setting = RunningSetting(weights_bandwidth=weights_bandwidth,
141 | rebalance_method=method,
142 | bounds=bounds,
143 | turn_over_target=turn_over_target)
144 |
145 | # Strategy
146 | strategy = Strategy(alpha_model,
147 | data_meta,
148 | universe=universe,
149 | start_date=start_date,
150 | end_date=end_date,
151 | freq=freq,
152 | benchmark=benchmark_code)
153 |
154 | strategy.prepare_backtest_data()
155 | ret_df, positions = strategy.run(running_setting=running_setting)
156 |
157 |
158 | 画图
159 | ===============
160 |
161 | 上一步返回的`ret_df`包含具体的收益信息,`positions`包含完整的持仓记录。用户可以自行绘图
162 | 查看结果,例如:
163 |
164 | .. code-block:: py
165 | :linenos:
166 |
167 | ret_df[['turn_over', 'excess_return']].cumsum().plot(figsize=(14, 7), secondary_y='turn_over')
168 |
169 | 将累计超额收益以及累积换手绘制出来。
170 |
171 | 完整的例子
172 | =================
173 |
174 | 完整的代码可以在notbook文件夹下,例子:Example 2 - Strategy Analysis.ipynb
175 |
--------------------------------------------------------------------------------
/doc/src/whatisit.rst:
--------------------------------------------------------------------------------
1 | ********************
2 | 什么是Alpha - Mind?
3 | ********************
4 |
5 | Alpha - Mind功能
6 | ===================
7 |
8 | **Alpha - Mind** 是基于多因子分析方法论为基础的,alpha建模的全流程工具。主要包括以下4个大的功能模块:
9 |
10 | 因子变换
11 | ----------------
12 |
13 | 对于原始数据进行填充、去极端值、中性化、算术计算以及时序和截面操作。帮助用户将原始数据
14 | 加工成可以入模的标准因子。
15 |
16 | 因子组合
17 | ----------------
18 |
19 | 单因子效果有限,真实场景下,我们往往需要将多个因子进行合成。这个合成的过程,可以使用标准的四则运算等方式,
20 | 或者基于机器学习的方法。Alpha - Mind同时支持这两种方式。
21 |
22 |
23 | 组合优化
24 | ---------------
25 |
26 | 因子值(alpha)是对股票好坏的预期排序,但是真实投资时候,我们不能完全按照股票本身的alpha大小进行配置,这里面可能涉及
27 | 到的因素包括:行业配置的限制、风格暴露的限制、个股的合规限制、跟踪误差限制以及还手率限制。
28 |
29 | 策略回测
30 | ---------------
31 |
32 | 完成上面所有的步骤之后,我们就完成了数据到股票组合的全流程,随后我们在每个调仓周期上进行调仓,最后得到整个回测周期上的
33 | 策略绩效(包括:收益、波动、回撤等)。
34 |
35 | 流程图
36 | ---------------
37 |
38 | .. image:: img/alpha-mind.png
39 |
40 |
41 |
--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | export PYTHONPATH=$PYTHONPATH:/
4 | export DB_VENDOR="mysql"
5 | # export DB_URI="mysql+mysqldb://dxrw:dxRW20_2@121.37.138.1:13317/dxtest?charset=utf8"
6 | export DB_URI="mysql+mysqldb://reader:Reader#2020@121.37.138.1:13316/vision_product?charset=utf8"
7 | export FACTOR_TABLES="factor_momentum"
8 | jupyter lab --ip="0.0.0.0" --port=8080 --allow-root --ServerApp.token='' --ServerApp.password='sha1:f7761f682bc4:1aba35e73699fe62570573de373bf95b491022a7'
--------------------------------------------------------------------------------
/install/001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/install/001.png
--------------------------------------------------------------------------------
/install/002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/install/002.png
--------------------------------------------------------------------------------
/install/install.md:
--------------------------------------------------------------------------------
1 | ## Alpha-Mind 安装指南
2 |
3 | ### 一、安装环境
4 |
5 | #### mysql驱动
6 |
7 | `Alpha-Mind`使用过程中调用的因子数据保存在数据库中,因此,`Alpha-Mind`的运行环境需要安装适当的数据库驱动。这里我们选择使用`mysql`。
8 |
9 | `mysql`驱动官网下载地址:
10 |
11 | https://dev.mysql.com/downloads/
12 |
13 | 进入官网下载网址后,可以看到以下页面:
14 |
15 | 
16 |
17 | 点击`Connector/Python`,进入`Python`驱动下载页面:
18 |
19 | ![]()
20 |
21 | 进入页面后:
22 |
23 | - 选择正确的操作系统
24 | - 选择正确的操作系统版本
25 | - 选择合适的版本进行下载
26 |
27 | 下载并安装数据库驱动
28 |
29 | #### 编译器
30 |
31 | `Alpha-Mind`依赖的一些包,如`ecos`需要编译安装模式,所以,`Alpha-Mind`运行安装的环境需要有C编译器。
32 |
33 | - Windows环境
34 | 请安装Virual Studio 2015以上的版本
35 |
36 | - Linux环境
37 |
38 | ```bash
39 | yum -y install gcc
40 | yum -y install gcc-c++
41 | ```
42 |
43 | 或者
44 |
45 | ```bash
46 | apt-get install gcc
47 | apt-get install gcc-c++
48 | ```
49 |
50 |
51 |
52 | ### 二、安装
53 |
54 | 在安装环境准备充分后,执行以下命令,开始安装`Alpha-Mind`开发包
55 |
56 | ```bash
57 | pip install Alpha-Mind
58 | ```
59 |
60 | 如果网络环境不好,可以选择国内的镜像,例如阿里云镜像
61 |
62 | ```bash
63 | pip install Alpha-Mind -i https://mirrors.aliyun.com/pypi/simple
64 | ```
65 |
66 | #### 环境变量设置
67 |
68 | `Alpha-Mind`安装完成以后,需要正确设置环境变量,才能正常工作:
69 |
70 | - **DB_VENDOR**: 数据库类型,这里可以设置为`mysql`:
71 |
72 | `DB_VENDOR=mysql`
73 |
74 | - **DB_URI**: 数据库连接配置
75 |
76 | `DB_URI=mysql+mysqldb://rlUser:123456@10.16.50.12:3306/rl?charset=utf8`
77 |
78 | - **FACTOR_TABLES**: 因子数据表名称,用`,`分割,例如:
79 |
80 | `FACTOR_TABLES=factor_momentum,factor_power_volume,factor_basic_derivation`
--------------------------------------------------------------------------------
/notebooks/Quick Start 6 - Formula Based Stocks Screening.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Alphamind新手入门之六:公式化选股"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 2,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "%matplotlib inline\n",
17 | "\n",
18 | "import os\n",
19 | "from PyFin.api import *\n",
20 | "from alphamind.api import *"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "## 1. 公式书写\n",
28 | "-------------------------"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 3,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "# CSTopN 横截面选取Top N个值\n",
38 | "universe_name = 'hs300'\n",
39 | "# 选取申万一级每个行业中,ROE因子值最高的前3个\n",
40 | "# 具体的公式化函数可以参考融量官方说明文档\n",
41 | "formula = CSTopN(LAST('EMA5D'), 3) \n",
42 | "ref_date = '2020-01-02'"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "## 2. 获取数据\n",
50 | "---------------"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 4,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "depends = formula.fields\n",
60 | "engine = SqlEngine(os.environ['DB_URI'])\n",
61 | "universe = Universe(universe_name) # 设置股票池\n",
62 | "codes = universe.query(engine, dates=[ref_date])"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 5,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "factors = engine.fetch_factor(ref_date, depends, codes.code.tolist()).dropna()\n",
72 | "factors.index = [1] * len(factors)\n",
73 | "factors = factors[['code'] + depends]"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "## 3. Stock Screening\n",
81 | "---------------------"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 6,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "res = formula.transform(factors, name='factor', category_field='code')"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 7,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "data": {
100 | "text/html": [
101 | "\n",
102 | "\n",
115 | "
\n",
116 | " \n",
117 | " \n",
118 | " | \n",
119 | " factor | \n",
120 | " code | \n",
121 | "
\n",
122 | " \n",
123 | " \n",
124 | " \n",
125 | " 1 | \n",
126 | " 1.0 | \n",
127 | " 2010000438 | \n",
128 | "
\n",
129 | " \n",
130 | " 1 | \n",
131 | " 1.0 | \n",
132 | " 2010001184 | \n",
133 | "
\n",
134 | " \n",
135 | " 1 | \n",
136 | " 1.0 | \n",
137 | " 2010019213 | \n",
138 | "
\n",
139 | " \n",
140 | "
\n",
141 | "
"
142 | ],
143 | "text/plain": [
144 | " factor code\n",
145 | "1 1.0 2010000438\n",
146 | "1 1.0 2010001184\n",
147 | "1 1.0 2010019213"
148 | ]
149 | },
150 | "execution_count": 7,
151 | "metadata": {},
152 | "output_type": "execute_result"
153 | }
154 | ],
155 | "source": [
156 | "res[res.factor == 1]"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": null,
162 | "metadata": {},
163 | "outputs": [],
164 | "source": []
165 | }
166 | ],
167 | "metadata": {
168 | "kernelspec": {
169 | "display_name": "Python 3",
170 | "language": "python",
171 | "name": "python3"
172 | },
173 | "language_info": {
174 | "codemirror_mode": {
175 | "name": "ipython",
176 | "version": 3
177 | },
178 | "file_extension": ".py",
179 | "mimetype": "text/x-python",
180 | "name": "python",
181 | "nbconvert_exporter": "python",
182 | "pygments_lexer": "ipython3",
183 | "version": "3.8.8"
184 | }
185 | },
186 | "nbformat": 4,
187 | "nbformat_minor": 4
188 | }
189 |
--------------------------------------------------------------------------------
/notebooks/Step By Step 01 - 入门.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Step By Step 01 - 入门\n",
8 | "----------"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "## 一、常用工具包\n",
16 | "\n",
17 | "在**alpha-mind**的使用中,经常会用到的工具包括:\n",
18 | "\n",
19 | "* *pandas*:主要用于表格数据处理;\n",
20 | "* *numpy*:高性能的向量计算工具;\n",
21 | "* *matplotlib*:作图工具。\n",
22 | "\n",
23 | "除此之外,我们还会用到一些比较专门的工具:\n",
24 | "\n",
25 | "* *scipy*:一些常用的数值算法;\n",
26 | "* *cvxpy*:优化工具包\n",
27 | "\n",
28 | "下面的代码验证以下包已经正确按照:"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "import numpy as np\n",
38 | "import pandas as pd\n",
39 | "from matplotlib import pyplot as plt\n",
40 | "import scipy\n",
41 | "import cvxpy"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "### 二、**alpha-mind**\n",
49 | "\n",
50 | "**alpha-mind**是标准的python工具包,可以直接从**pip**安装:\n",
51 | "\n",
52 | "```bash\n",
53 | "$ pip install alpha-mind\n",
54 | "```"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 4,
60 | "metadata": {},
61 | "outputs": [
62 | {
63 | "data": {
64 | "text/plain": [
65 | "'0.3.1'"
66 | ]
67 | },
68 | "execution_count": 4,
69 | "metadata": {},
70 | "output_type": "execute_result"
71 | }
72 | ],
73 | "source": [
74 | "import alphamind as ad\n",
75 | "from alphamind.api import *\n",
76 | "ad.__version__"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "可以验证alpha-mind已经安装成功,下面我们可以正式开始探索它的各种功能!"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {},
90 | "outputs": [],
91 | "source": []
92 | }
93 | ],
94 | "metadata": {
95 | "kernelspec": {
96 | "display_name": "Python 3",
97 | "language": "python",
98 | "name": "python3"
99 | },
100 | "language_info": {
101 | "codemirror_mode": {
102 | "name": "ipython",
103 | "version": 3
104 | },
105 | "file_extension": ".py",
106 | "mimetype": "text/x-python",
107 | "name": "python",
108 | "nbconvert_exporter": "python",
109 | "pygments_lexer": "ipython3",
110 | "version": "3.8.8"
111 | }
112 | },
113 | "nbformat": 4,
114 | "nbformat_minor": 4
115 | }
116 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | arrow
2 | cvxpy
3 | cvxopt
4 | cython
5 | deprecated
6 | ecos
7 | finance-python
8 | jupyter
9 | jupyterlab
10 | matplotlib
11 | mysqlclient
12 | numba
13 | numpy
14 | pandas
15 | portfolio-optimizer
16 | psycopg2-binary
17 | scikit-learn
18 | scipy
19 | simpleutils
20 | sqlalchemy
21 | statsmodels
22 | xgboost
23 | xlsxwriter
24 |
--------------------------------------------------------------------------------
/requirements_docker.txt:
--------------------------------------------------------------------------------
1 | arrow
2 | cvxpy
3 | cvxopt
4 | deprecated
5 | ecos
6 | mysqlclient
7 | portfolio-optimizer
8 | psycopg2-binary
9 | simpleutils
10 | xgboost
11 | xlsxwriter
12 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 |
5 | @author: cheng.li
6 | """
7 |
8 | import io
9 | from setuptools import setup
10 | from setuptools import find_packages
11 |
12 | VERSION = "0.3.1"
13 |
14 | setup(
15 | name='Alpha-Mind',
16 | version=VERSION,
17 | packages=find_packages(),
18 | url='',
19 | license='MIT',
20 | author='wegamekinglc',
21 | author_email='',
22 | scripts=['alphamind/bin/alphamind'],
23 | install_requires=io.open('requirements.txt', encoding='utf8').read(),
24 | description='',
25 | include_package_data=True
26 | )
--------------------------------------------------------------------------------