├── .coveragerc
├── .dockerignore
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── alphamind
    ├── __init__.py
    ├── analysis
    │   ├── __init__.py
    │   ├── calculators.py
    │   ├── crosssetctions.py
    │   ├── factoranalysis.py
    │   ├── perfanalysis.py
    │   ├── quantileanalysis.py
    │   └── riskanalysis.py
    ├── api.py
    ├── benchmarks
    │   ├── __init__.py
    │   ├── benchmarks.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── neutralize.py
    │   │   ├── standardize.py
    │   │   └── winsorize.py
    │   ├── portfolio
    │   │   ├── __init__.py
    │   │   ├── linearbuild.py
    │   │   ├── longshortbuild.py
    │   │   ├── percentbuild.py
    │   │   └── rankbuild.py
    │   └── settlement
    │   │   ├── __init__.py
    │   │   └── simplesettle.py
    ├── bin
    │   ├── __init__.py
    │   ├── alphamind
    │   └── cli.py
    ├── data
    │   ├── __init__.py
    │   ├── dbmodel
    │   │   ├── __init__.py
    │   │   └── models
    │   │   │   ├── __init__.py
    │   │   │   ├── mysql.py
    │   │   │   └── postgres.py
    │   ├── engines
    │   │   ├── __init__.py
    │   │   ├── industries.py
    │   │   ├── sqlengine
    │   │   │   ├── __init__.py
    │   │   │   ├── mysql.py
    │   │   │   └── postgres.py
    │   │   ├── universe.py
    │   │   └── utilities.py
    │   ├── neutralize.py
    │   ├── processing.py
    │   ├── quantile.py
    │   ├── rank.py
    │   ├── standardize.py
    │   ├── transformer.py
    │   └── winsorize.py
    ├── exceptions
    │   ├── __init__.py
    │   └── exceptions.py
    ├── execution
    │   ├── __init__.py
    │   ├── baseexecutor.py
    │   ├── naiveexecutor.py
    │   ├── pipeline.py
    │   ├── targetvolexecutor.py
    │   └── thresholdexecutor.py
    ├── formula
    │   ├── __init__.py
    │   └── utilities.py
    ├── model
    │   ├── __init__.py
    │   ├── composer.py
    │   ├── data_preparing.py
    │   ├── linearmodel.py
    │   ├── loader.py
    │   ├── modelbase.py
    │   ├── svm.py
    │   └── treemodel.py
    ├── portfolio
    │   ├── __init__.py
    │   ├── constraints.py
    │   ├── evolver.py
    │   ├── linearbuilder.py
    │   ├── longshortbulder.py
    │   ├── meanvariancebuilder.py
    │   ├── optimizers.py
    │   ├── percentbuilder.py
    │   ├── rankbuilder.py
    │   └── riskmodel.py
    ├── settlement
    │   ├── __init__.py
    │   └── simplesettle.py
    ├── strategy
    │   ├── __init__.py
    │   ├── sample_strategy.json
    │   └── strategy.py
    ├── tests
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── __init__.py
    │   │   ├── test_factoranalysis.py
    │   │   ├── test_perfanalysis.py
    │   │   ├── test_quantilieanalysis.py
    │   │   └── test_riskanalysis.py
    │   ├── cython
    │   │   └── __init__.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── engines
    │   │   │   ├── __init__.py
    │   │   │   ├── test_sql_engine.py
    │   │   │   └── test_universe.py
    │   │   ├── test_neutralize.py
    │   │   ├── test_quantile.py
    │   │   ├── test_rank.py
    │   │   ├── test_standardize.py
    │   │   └── test_winsorize.py
    │   ├── execution
    │   │   ├── __init__.py
    │   │   ├── test_naiveexecutor.py
    │   │   ├── test_pipeline.py
    │   │   ├── test_targetvolexecutor.py
    │   │   └── test_thresholdexecutor.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── test_composer.py
    │   │   ├── test_linearmodel.py
    │   │   ├── test_loader.py
    │   │   ├── test_modelbase.py
    │   │   └── test_treemodel.py
    │   ├── portfolio
    │   │   ├── __init__.py
    │   │   ├── test_constraints.py
    │   │   ├── test_evolver.py
    │   │   ├── test_linearbuild.py
    │   │   ├── test_longshortbuild.py
    │   │   ├── test_meanvariancebuild.py
    │   │   ├── test_optimizers.py
    │   │   ├── test_percentbuild.py
    │   │   ├── test_rankbuild.py
    │   │   └── test_riskmodel.py
    │   ├── settlement
    │   │   ├── __init__.py
    │   │   └── test_simplesettle.py
    │   └── test_suite.py
    └── utilities.py
├── doc
    ├── Makefile
    ├── conf.py
    ├── docx
    │   └── style.docx
    ├── index.rst
    ├── make.bat
    └── src
    │   ├── changelog.rst
    │   ├── img
    │       └── alpha-mind.png
    │   ├── introduction.rst
    │   └── whatisit.rst
├── entrypoint.sh
├── install
    ├── 001.png
    ├── 002.png
    └── install.md
├── notebooks
    ├── Example 1 - Factor IC analysis.ipynb
    ├── Example 10 - Quadratic Optimizer Comparison with CVXOPT.ipynb
    ├── Example 11 - Long Short Strategy Model.ipynb
    ├── Example 12 - Machine Learning Model Prediction.ipynb
    ├── Example 13 - Evaluation within Industry Groups.ipynb
    ├── Example 2 - Strategy Analysis.ipynb
    ├── Example 3 - Multi Weight Gap Comparison.ipynb
    ├── Example 4 - Single Factor Analysis.ipynb
    ├── Example 5 - Style Factor Analysis.ipynb
    ├── Example 6 - Target Volatility Builder.ipynb
    ├── Example 7 - Portfolio Optimizer Performance.ipynb
    ├── Example 9 - Linear Optimizer Comparison with CVXOPT.ipynb
    ├── Quick Start 1 - Factor Preprocess.ipynb
    ├── Quick Start 2 - Factor Rank and Quantile.ipynb
    ├── Quick Start 3 - Portfolio Builder.ipynb
    ├── Quick Start 5 - Alpha Factor Quantile Analysis.ipynb
    ├── Quick Start 6 - Formula Based Stocks Screening.ipynb
    ├── Quick Start 7 - Single Factor IC Analysis.ipynb
    ├── Quick Start 8 - IC Decay Calculation.ipynb
    ├── Quick Start 9 - Back Testing Stock Screening.ipynb
    ├── Step By Step 01 - 入门.ipynb
    ├── Step By Step 02 - 数据获取.ipynb
    ├── Step By Step 03 - 因子设计.ipynb
    ├── Step By Step 04 - 因子预处理.ipynb
    ├── Step By Step 05 - 组合优化.ipynb
    ├── Step By Step 06 - 完整的回测.ipynb
    └── Step By Step 07 - 风险因子模型.ipynb
├── requirements.txt
├── requirements_docker.txt
├── scripts
    └── update_uqer_data_postgres.py
├── setup.cfg
└── setup.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source=alphamind
3 | omit=alphamind/tests/*


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | ./build
2 | ./dist
3 | ./Alpha_Mind.egg-info
4 | ./report
5 | ./script
6 | .git
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .idea/*
 3 | build/*
 4 | dist/*
 5 | Alpha_Mind.egg-info/*
 6 | *.pyd
 7 | *.c
 8 | *.cpp
 9 | *.html
10 | *.nbc
11 | *.nbi
12 | /notebooks/.ipynb_checkpoints/*
13 | /notebooks/machine learning/.ipynb_checkpoints/*
14 | alphamind/cython/*.so
15 | alphamind/examples/*.xlsx
16 | alphamind/examples/*.csv
17 | doc/_build
18 | doc/build
19 | settings.json
20 | doc/source/_build


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 |   - "3.7"
 5 |   - "3.8"
 6 | # command to install dependencies
 7 | sudo: enabled
 8 | dist: bionic
 9 | addons:
10 |   apt:
11 |     packages:
12 |       - g++
13 |       - coinor-cbc
14 |       - coinor-libcbc-dev
15 | install:
16 |   - pip install cython numpy
17 |   - pip install -r requirements.txt
18 |   - pip install coverage
19 |   - pip install coveralls --ignore-installed
20 | script:
21 |   - export NUMBA_DISABLE_JIT=1
22 |   - coverage run --rcfile=./.coveragerc alphamind/tests/test_suite.py
23 |   - coverage report --rcfile=./.coveragerc -i
24 |   - coverage html --rcfile=./.coveragerc -i
25 | after_success:
26 |   - coveralls
27 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/anaconda3:2021.05
 2 | 
 3 | LABEL maintainer = "scrappedprince.li@gmail.com"
 4 | RUN apt-get update && apt-get install build-essential default-libmysqlclient-dev coinor-cbc coinor-libcbc-dev -y
 5 | ENV COIN_INSTALL_DIR /usr
 6 | 
 7 | WORKDIR /
 8 | COPY ./requirements_docker.txt /requirements.txt
 9 | RUN pip install -r /requirements.txt -i https://pypi.douban.com/simple
10 | RUN pip install finance-python>=0.8.1 -i https://pypi.douban.com/simple
11 | 
12 | WORKDIR /
13 | COPY ./alphamind /alphamind
14 | COPY ./notebooks /notebooks
15 | 
16 | COPY ./setup.py /setup.py
17 | COPY ./setup.cfg /setup.cfg
18 | 
19 | EXPOSE 8080
20 | COPY ./entrypoint.sh /entrypoint.sh
21 | RUN chmod +x /entrypoint.sh
22 | 
23 | WORKDIR /notebooks
24 | ENTRYPOINT ["/entrypoint.sh"]
25 | CMD []


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017 Cheng Li
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include MANIFEST.in
2 | include .coveragerc
3 | include requirements.txt
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Alpha - Mind
  2 | 
  3 | <table>
  4 | <tr>
  5 |   <td>Python version</td>
  6 |   <td><img src="https://img.shields.io/badge/python-3.6-blue.svg"/> </td>
  7 |   </tr>
  8 | <tr>
  9 | <tr>
 10 |   <td>Build Status</td>
 11 |   <td>
 12 |     <img src="https://travis-ci.org/alpha-miner/alpha-mind.svg" alt="travis build status" />
 13 |   </td>
 14 | </tr>
 15 | <tr>
 16 |   <td>Coverage</td>
 17 |   <td><img src="https://coveralls.io/repos/github/alpha-miner/alpha-mind/badge.svg?branch=master" alt="coverage" /></td>
 18 | </tr>
 19 | </table>
 20 | 
 21 | **Alpha - Mind** 是基于 **Python** 开发的股票多因子研究框架。
 22 | 
 23 | ## TODO list
 24 | 
 25 | **alpha-mind**的开发经过长期的暂停之后，将重启。下面的列表会给出一组现在规划中的功能或者改进：
 26 | 
 27 | - [x] 增加对于数据后端MySQL的支持；
 28 | - [ ] 增加对于数据后端CSV文件的支持，并且提供一份样例文件供用户测试使用；
 29 | - [x] 删除所有的c++相关代码，方便alpha-mind的安装；
 30 | - [x] 在windows以及linux平台提供可以直接pip安装的安装包；
 31 | - [ ] 完整的文档；
 32 | - [ ] alpha模型增加超参数调优的功能；
 33 | - [ ] alpha模型增加多期预测能力；
 34 | - [ ] 优化器增加多期优化的能力。
 35 | 
 36 | ## 依赖
 37 | 
 38 | 该项目主要有两个主要的github外部依赖：
 39 | 
 40 | * [Finance-Python](https://github.com/alpha-miner/finance-python)
 41 | 
 42 | * [portfolio - optimizer](https://github.com/alpha-miner/portfolio-optimizer)：该项目是相同作者编写的用于资产组合配置的优化器工具包；
 43 | 
 44 | 这两个库都可以直接使用pip进行安装。
 45 | 
 46 | ## 功能
 47 | 
 48 | alpha - mind 提供了多因子研究中常用的工具链，包括：
 49 | 
 50 | * 数据清洗
 51 | * alpha 模型
 52 | * 风险模型
 53 | * 组合优化
 54 | * 执行器
 55 | 
 56 | 所有的模块都设计了完整的测试用例以尽可能保证正确性。同时，所有的数值模型开发中都对性能给予了足够高的关注，参考了优秀的第三方工具以保证性能：
 57 | 
 58 | * numpy
 59 | * numba
 60 | * cvxopt
 61 | * cvxpy
 62 | * pandas
 63 | * scipy
 64 | 
 65 | ## 安装
 66 | 
 67 | 有个详细的安装说明，在install目录中，有比较详细的
 68 | 
 69 | 安装需要直接clone或者下载源代码安装，具体流程为：
 70 | 
 71 | 克隆项目到本地
 72 | 
 73 | ```shell
 74 | $ git clone https://github.com/rongliang-tech/alpha-mind.git
 75 | ```
 76 | 
 77 | 然后直接使用一下命令安装
 78 | 
 79 | ```shell
 80 | $ python setup.py install
 81 | ```
 82 | 
 83 | ### 使用Docker运行
 84 | 
 85 | 1. `docker build -t alpha-mind:latest -f Dockerfile .`
 86 | 
 87 | 2. `docker run -it -p 8080:8080 --name alpha-mind alpha-mind`
 88 | 
 89 | 默认打开浏览器之后，进入: 127.0.0.1/lab 输入登录密码: `rongliang2021`
 90 | 
 91 | 可以自定义初始密码， 参考：[jupyter生成密码](https://jupyter-notebook.readthedocs.io/en/stable/public_server.html#preparing-a-hashed-password)
 92 | 
 93 | #### 提示
 94 | 
 95 | 环境变量的配置在`./entrypoint.sh`中，包括：
 96 | 
 97 | * `DB_VENDOR`: 如果使用mysql，请设置为`rl`;
 98 | * `DB_URI`: 数据库的连接串。
 99 | * `FACTOR_TABLES`: 使用的因子表
100 | 


--------------------------------------------------------------------------------
/alphamind/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 | 
5 | @author: cheng.li
6 | """
7 | 
8 | __version__ = "0.3.1"
9 | 


--------------------------------------------------------------------------------
/alphamind/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-6
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/analysis/calculators.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-18
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import pandas as pd
 9 | 
10 | 
11 | def calculate_turn_over(pos_table: pd.DataFrame) -> pd.DataFrame:
12 |     turn_over_table = {}
13 |     total_factors = pos_table.columns.difference(['code'])
14 |     pos_table.reset_index()
15 | 
16 |     for name in total_factors:
17 |         pivot_position = pos_table.pivot(values=name, columns='code').fillna(0.)
18 |         turn_over_series = pivot_position.diff().abs().sum(axis=1)
19 |         turn_over_table[name] = turn_over_series.values
20 | 
21 |     turn_over_table = pd.DataFrame(turn_over_table, index=pos_table.trade_date.unique())
22 |     return turn_over_table[total_factors]
23 | 


--------------------------------------------------------------------------------
/alphamind/analysis/crosssetctions.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2018-3-5
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | import statsmodels.api as sm
 11 | 
 12 | from alphamind.data.processing import factor_processing
 13 | from alphamind.data.standardize import standardize
 14 | from alphamind.data.winsorize import winsorize_normal
 15 | from alphamind.utilities import alpha_logger
 16 | 
 17 | 
 18 | def cs_impl(ref_date,
 19 |             factor_data,
 20 |             factor_name,
 21 |             risk_exposure,
 22 |             constraint_risk,
 23 |             industry_matrix,
 24 |             dx_returns):
 25 |     total_data = pd.merge(factor_data, risk_exposure, on='code')
 26 |     total_data = pd.merge(total_data, industry_matrix, on='code')
 27 |     total_data = total_data.replace([np.inf, -np.inf], np.nan).dropna()
 28 | 
 29 |     if len(total_data) < 0.33 * len(factor_data):
 30 |         alpha_logger.warning(f"valid data point({len(total_data)}) "
 31 |                              f"is less than 33% of the total sample ({len(factor_data)}). Omit this run")
 32 |         return np.nan, np.nan, np.nan
 33 | 
 34 |     total_risk_exp = total_data[constraint_risk]
 35 | 
 36 |     er = total_data[[factor_name]].values.astype(float)
 37 |     er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values,
 38 |                            [standardize]).flatten()
 39 |     industry = total_data.industry_name.values
 40 | 
 41 |     codes = total_data.code.tolist()
 42 |     target_pos = pd.DataFrame({'code': codes,
 43 |                                'weight': er,
 44 |                                'industry': industry})
 45 |     target_pos['weight'] = target_pos['weight'] / target_pos['weight'].abs().sum()
 46 |     target_pos = pd.merge(target_pos, dx_returns, on=['code'])
 47 |     target_pos = pd.merge(target_pos, total_data[['code'] + constraint_risk], on=['code'])
 48 |     total_risk_exp = target_pos[constraint_risk]
 49 |     activate_weight = target_pos['weight'].values
 50 |     excess_return = np.exp(target_pos[['dx']].values) - 1.
 51 |     excess_return = factor_processing(excess_return,
 52 |                                       [winsorize_normal, standardize],
 53 |                                       total_risk_exp.values,
 54 |                                       [winsorize_normal, standardize]).flatten()
 55 |     port_ret = np.log(activate_weight @ excess_return + 1.)
 56 |     ic = np.corrcoef(excess_return, activate_weight)[0, 1]
 57 |     x = sm.add_constant(activate_weight)
 58 |     results = sm.OLS(excess_return, x).fit()
 59 |     t_stats = results.tvalues[1]
 60 | 
 61 |     alpha_logger.info(f"{ref_date} is finished with {len(target_pos)} stocks for {factor_name}")
 62 |     alpha_logger.info(f"{ref_date} risk_exposure: "
 63 |                       f"{np.sum(np.square(target_pos.weight.values @ target_pos[constraint_risk].values))}")
 64 |     return port_ret, ic, t_stats
 65 | 
 66 | 
 67 | def cross_section_analysis(ref_date,
 68 |                            factor_name,
 69 |                            universe,
 70 |                            horizon,
 71 |                            constraint_risk,
 72 |                            engine):
 73 |     codes = engine.fetch_codes(ref_date, universe)
 74 | 
 75 |     risk_exposure = engine.fetch_risk_model(ref_date, codes)[1][['code'] + constraint_risk]
 76 |     factor_data = engine.fetch_factor(ref_date, factor_name, codes)
 77 |     industry_matrix = engine.fetch_industry_matrix(ref_date, codes, 'sw_adj', 1)
 78 |     dx_returns = engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=1)
 79 | 
 80 |     return cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk,
 81 |                    industry_matrix, dx_returns)
 82 | 
 83 | 
 84 | if __name__ == '__main__':
 85 |     from alphamind.api import SqlEngine, Universe, risk_styles, industry_styles
 86 | 
 87 |     factor_name = 'SIZE'
 88 |     data_source = 'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
 89 |     engine = SqlEngine(data_source)
 90 |     risk_names = list(set(risk_styles).difference({factor_name}))
 91 |     industry_names = list(set(industry_styles).difference({factor_name}))
 92 |     constraint_risk = risk_names + industry_names
 93 |     universe = Universe('custom', ['ashare_ex'])
 94 |     horizon = 9
 95 | 
 96 |     x = cross_section_analysis('2018-02-08',
 97 |                                factor_name,
 98 |                                universe,
 99 |                                horizon,
100 |                                constraint_risk,
101 |                                engine=engine)
102 |     print(x)
103 | 


--------------------------------------------------------------------------------
/alphamind/analysis/perfanalysis.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-12
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import pandas as pd
 9 | 
10 | from alphamind.analysis.riskanalysis import risk_analysis
11 | 
12 | 
13 | def perf_attribution_by_pos(net_weight_series: pd.Series,
14 |                             next_bar_return_series: pd.Series,
15 |                             benchmark_table: pd.DataFrame) -> pd.DataFrame:
16 |     explained_table, _ = risk_analysis(net_weight_series,
17 |                                        next_bar_return_series,
18 |                                        benchmark_table)
19 |     return explained_table.groupby(level=0).sum()
20 | 


--------------------------------------------------------------------------------
/alphamind/analysis/quantileanalysis.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-8-16
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import Optional
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.data.processing import factor_processing
14 | from alphamind.data.quantile import quantile
15 | from alphamind.data.standardize import standardize
16 | from alphamind.data.winsorize import winsorize_normal
17 | from alphamind.utilities import agg_mean
18 | 
19 | 
20 | def quantile_analysis(factors: pd.DataFrame,
21 |                       factor_weights: np.ndarray,
22 |                       dx_return: np.ndarray,
23 |                       n_bins: int = 5,
24 |                       risk_exp: Optional[np.ndarray] = None,
25 |                       **kwargs):
26 |     if 'pre_process' in kwargs:
27 |         pre_process = kwargs['pre_process']
28 |         del kwargs['pre_process']
29 |     else:
30 |         pre_process = [winsorize_normal, standardize]
31 | 
32 |     if 'post_process' in kwargs:
33 |         post_process = kwargs['post_process']
34 |         del kwargs['post_process']
35 |     else:
36 |         post_process = [standardize]
37 | 
38 |     er = factor_processing(factors.values, pre_process, risk_exp, post_process) @ factor_weights
39 |     return er_quantile_analysis(er, n_bins, dx_return, **kwargs)
40 | 
41 | 
42 | def er_quantile_analysis(er: np.ndarray,
43 |                          n_bins: int,
44 |                          dx_return: np.ndarray,
45 |                          de_trend=False) -> np.ndarray:
46 |     er = er.flatten()
47 |     q_groups = quantile(er, n_bins)
48 | 
49 |     if dx_return.ndim < 2:
50 |         dx_return.shape = -1, 1
51 | 
52 |     group_return = agg_mean(q_groups, dx_return).flatten()
53 |     total_return = group_return.sum()
54 |     ret = group_return.copy()
55 | 
56 |     if de_trend:
57 |         resid = n_bins - 1
58 |         res_weight = 1. / resid
59 |         for i, value in enumerate(ret):
60 |             ret[i] = (1. + res_weight) * value - res_weight * total_return
61 | 
62 |     return ret
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     n = 5000
67 |     n_f = 5
68 |     n_bins = 5
69 | 
70 |     x = np.random.randn(n, 5)
71 |     risk_exp = np.random.randn(n, 3)
72 |     x_w = np.random.randn(n_f)
73 |     r = np.random.randn(n)
74 | 
75 |     f_df = pd.DataFrame(x)
76 |     calculated = quantile_analysis(f_df,
77 |                                    x_w,
78 |                                    r,
79 |                                    risk_exp=None,
80 |                                    n_bins=n_bins,
81 |                                    pre_process=[],  # [winsorize_normal, standardize],
82 |                                    post_process=[])  # [standardize])
83 | 
84 |     er = x_w @ f_df.values.T
85 |     expected = er_quantile_analysis(er, n_bins, r)
86 | 
87 |     print(calculated)
88 |     print(expected)
89 | 


--------------------------------------------------------------------------------
/alphamind/analysis/riskanalysis.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-6
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import Tuple
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.data.neutralize import neutralize
14 | 
15 | 
16 | def risk_analysis(net_weight_series: pd.Series,
17 |                   next_bar_return_series: pd.Series,
18 |                   risk_table: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
19 |     group_idx = net_weight_series.index.values.astype(int)
20 |     net_pos = net_weight_series.values.reshape((-1, 1))
21 |     risk_factor_cols = risk_table.columns
22 | 
23 |     idiosyncratic, other_stats = neutralize(risk_table.values,
24 |                                             next_bar_return_series.values,
25 |                                             group_idx,
26 |                                             detail=True)
27 | 
28 |     systematic = other_stats['explained']
29 |     exposure = other_stats['exposure']
30 | 
31 |     explained_table = np.hstack((idiosyncratic, systematic[:, :, 0]))
32 |     cols = ['idiosyncratic']
33 |     cols.extend(risk_factor_cols)
34 | 
35 |     explained_table = pd.DataFrame(explained_table * net_pos, columns=cols,
36 |                                    index=net_weight_series.index)
37 |     exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols,
38 |                                   index=net_weight_series.index)
39 |     return explained_table, exposure_table.groupby(level=0).first()
40 | 


--------------------------------------------------------------------------------
/alphamind/api.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-8-16
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | from alphamind.data.engines.sqlengine import SqlEngine
  9 | from alphamind.data.engines.sqlengine import risk_styles
 10 | from alphamind.data.engines.sqlengine import industry_styles
 11 | from alphamind.data.engines.sqlengine import macro_styles
 12 | from alphamind.analysis.factoranalysis import er_portfolio_analysis
 13 | from alphamind.analysis.factoranalysis import factor_analysis
 14 | from alphamind.analysis.quantileanalysis import er_quantile_analysis
 15 | from alphamind.analysis.quantileanalysis import quantile_analysis
 16 | from alphamind.data.engines.universe import Universe
 17 | from alphamind.data.engines.utilities import industry_list
 18 | from alphamind.data.neutralize import neutralize
 19 | from alphamind.data.processing import factor_processing
 20 | from alphamind.data.rank import percentile
 21 | from alphamind.data.rank import rank
 22 | from alphamind.data.standardize import Standardizer
 23 | from alphamind.data.standardize import projection
 24 | from alphamind.data.standardize import standardize
 25 | from alphamind.data.winsorize import NormalWinsorizer
 26 | from alphamind.data.winsorize import winsorize_normal
 27 | from alphamind.execution.naiveexecutor import NaiveExecutor
 28 | from alphamind.execution.pipeline import ExecutionPipeline
 29 | from alphamind.execution.targetvolexecutor import TargetVolExecutor
 30 | from alphamind.execution.thresholdexecutor import ThresholdExecutor
 31 | from alphamind.model import ConstLinearModel
 32 | from alphamind.model import LassoRegression
 33 | from alphamind.model import LinearRegression
 34 | from alphamind.model import LogisticRegression
 35 | from alphamind.model import NvSVRModel
 36 | from alphamind.model import RandomForestClassifier
 37 | from alphamind.model import RandomForestRegressor
 38 | from alphamind.model import XGBClassifier
 39 | from alphamind.model import XGBRegressor
 40 | from alphamind.model import XGBTrainer
 41 | from alphamind.model import load_model
 42 | from alphamind.model.composer import Composer
 43 | from alphamind.model.composer import DataMeta
 44 | from alphamind.model.composer import predict_by_model
 45 | from alphamind.model.composer import train_model
 46 | from alphamind.model.data_preparing import fetch_data_package
 47 | from alphamind.model.data_preparing import fetch_predict_phase
 48 | from alphamind.model.data_preparing import fetch_train_phase
 49 | from alphamind.portfolio.constraints import BoundaryDirection
 50 | from alphamind.portfolio.constraints import BoundaryType
 51 | from alphamind.portfolio.constraints import Constraints
 52 | from alphamind.portfolio.constraints import LinearConstraints
 53 | from alphamind.portfolio.constraints import create_box_bounds
 54 | from alphamind.portfolio.evolver import evolve_positions
 55 | from alphamind.utilities import alpha_logger
 56 | from alphamind.utilities import map_freq
 57 | 
 58 | __all__ = [
 59 |     'SqlEngine',
 60 |     'factor_analysis',
 61 |     'er_portfolio_analysis',
 62 |     'quantile_analysis',
 63 |     'er_quantile_analysis',
 64 |     'Universe',
 65 |     'factor_processing',
 66 |     'Constraints',
 67 |     'LinearConstraints',
 68 |     'BoundaryType',
 69 |     'BoundaryDirection',
 70 |     'create_box_bounds',
 71 |     'evolve_positions',
 72 |     'risk_styles',
 73 |     'industry_styles',
 74 |     'macro_styles',
 75 |     'winsorize_normal',
 76 |     'NormalWinsorizer',
 77 |     'standardize',
 78 |     'Standardizer',
 79 |     'projection',
 80 |     'neutralize',
 81 |     'rank',
 82 |     'percentile',
 83 |     'industry_list',
 84 |     'fetch_data_package',
 85 |     'fetch_train_phase',
 86 |     'fetch_predict_phase',
 87 |     'Composer',
 88 |     'DataMeta',
 89 |     'train_model',
 90 |     'predict_by_model',
 91 |     'LinearRegression',
 92 |     'LassoRegression',
 93 |     'ConstLinearModel',
 94 |     'LogisticRegression',
 95 |     'RandomForestRegressor',
 96 |     'RandomForestClassifier',
 97 |     'XGBRegressor',
 98 |     'XGBClassifier',
 99 |     'XGBTrainer',
100 |     'NvSVRModel',
101 |     'load_model',
102 |     'NaiveExecutor',
103 |     'ThresholdExecutor',
104 |     'TargetVolExecutor',
105 |     'ExecutionPipeline',
106 |     'alpha_logger',
107 |     'map_freq'
108 | ]
109 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/benchmarks/__init__.py


--------------------------------------------------------------------------------
/alphamind/benchmarks/benchmarks.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from alphamind.benchmarks.data.neutralize import benchmark_neutralize
 9 | from alphamind.benchmarks.data.neutralize import benchmark_neutralize_with_groups
10 | from alphamind.benchmarks.data.standardize import benchmark_standardize
11 | from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
12 | from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
13 | from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group
14 | from alphamind.benchmarks.portfolio.linearbuild import benchmark_build_linear
15 | from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent
16 | from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent_with_group
17 | from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
18 | from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
19 | from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
20 | from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group
21 | 
22 | if __name__ == '__main__':
23 |     benchmark_neutralize(3000, 10, 1000)
24 |     benchmark_neutralize_with_groups(3000, 10, 1000, 30)
25 |     benchmark_neutralize(30, 3, 50000)
26 |     benchmark_neutralize_with_groups(30, 3, 50000, 3)
27 |     benchmark_neutralize(50000, 50, 20)
28 |     benchmark_neutralize_with_groups(50000, 50, 20, 50)
29 |     benchmark_standardize(3000, 10, 1000)
30 |     benchmark_standardize_with_group(3000, 10, 1000, 30)
31 |     benchmark_standardize(100, 10, 50000)
32 |     benchmark_standardize_with_group(100, 10, 5000, 4)
33 |     benchmark_standardize(50000, 50, 20)
34 |     benchmark_standardize_with_group(50000, 50, 20, 50)
35 |     benchmark_winsorize_normal(3000, 10, 1000)
36 |     benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
37 |     benchmark_winsorize_normal(30, 10, 50000)
38 |     benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
39 |     benchmark_winsorize_normal(50000, 50, 20)
40 |     benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
41 |     benchmark_build_rank(3000, 1000, 300)
42 |     benchmark_build_rank_with_group(3000, 1000, 10, 30)
43 |     benchmark_build_rank(30, 50000, 3)
44 |     benchmark_build_rank_with_group(30, 50000, 1, 3)
45 |     benchmark_build_rank(50000, 20, 3000)
46 |     benchmark_build_rank_with_group(50000, 20, 10, 300)
47 |     benchmark_build_percent(3000, 1000, 0.1)
48 |     benchmark_build_percent_with_group(3000, 1000, 0.1, 30)
49 |     benchmark_build_percent(30, 50000, 0.1)
50 |     benchmark_build_percent_with_group(30, 50000, 0.1, 3)
51 |     benchmark_build_percent(50000, 20, 0.1)
52 |     benchmark_build_percent_with_group(50000, 20, 0.1, 300)
53 |     benchmark_build_linear(100, 3, 100)
54 |     benchmark_build_linear(1000, 30, 10)
55 |     benchmark_simple_settle(3000, 10, 1000)
56 |     benchmark_simple_settle_with_group(3000, 10, 1000, 30)
57 |     benchmark_simple_settle(30, 10, 50000)
58 |     benchmark_simple_settle_with_group(30, 10, 50000, 5)
59 |     benchmark_simple_settle(50000, 50, 20)
60 |     benchmark_simple_settle_with_group(50000, 50, 20, 50)
61 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/benchmarks/data/__init__.py


--------------------------------------------------------------------------------
/alphamind/benchmarks/data/neutralize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import datetime as dt
 9 | 
10 | import numpy as np
11 | from sklearn.linear_model import LinearRegression
12 | 
13 | from alphamind.data.neutralize import neutralize
14 | 
15 | 
16 | def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
17 |     print("-" * 60)
18 |     print("Starting least square fitting benchmarking")
19 |     print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
20 |                                                                              n_loops))
21 | 
22 |     y = np.random.randn(n_samples, 5)
23 |     x = np.random.randn(n_samples, n_features)
24 | 
25 |     start = dt.datetime.now()
26 |     for _ in range(n_loops):
27 |         calc_res = neutralize(x, y)
28 |     impl_model_time = dt.datetime.now() - start
29 | 
30 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
31 | 
32 |     start = dt.datetime.now()
33 |     for _ in range(n_loops):
34 |         benchmark_model = LinearRegression(fit_intercept=False)
35 |         benchmark_model.fit(x, y)
36 |         exp_res = y - x @ benchmark_model.coef_.T
37 |     benchmark_model_time = dt.datetime.now() - start
38 | 
39 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
40 | 
41 |     np.testing.assert_array_almost_equal(calc_res, exp_res)
42 | 
43 | 
44 | def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: int,
45 |                                      n_groups: int) -> None:
46 |     print("-" * 60)
47 |     print("Starting least square fitting with group benchmarking")
48 |     print(
49 |         "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
50 |                                                                                           n_features,
51 |                                                                                           n_loops,
52 |                                                                                           n_groups))
53 |     y = np.random.randn(n_samples, 5)
54 |     x = np.random.randn(n_samples, n_features)
55 |     groups = np.random.randint(n_groups, size=n_samples)
56 | 
57 |     start = dt.datetime.now()
58 |     for _ in range(n_loops):
59 |         _ = neutralize(x, y, groups)
60 |     impl_model_time = dt.datetime.now() - start
61 | 
62 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
63 | 
64 |     start = dt.datetime.now()
65 | 
66 |     model = LinearRegression(fit_intercept=False)
67 |     for _ in range(n_loops):
68 |         for i in range(n_groups):
69 |             curr_x = x[groups == i]
70 |             curr_y = y[groups == i]
71 |             model.fit(curr_x, curr_y)
72 |             _ = curr_y - curr_x @ model.coef_.T
73 |     benchmark_model_time = dt.datetime.now() - start
74 | 
75 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     benchmark_neutralize(3000, 10, 1000)
80 |     benchmark_neutralize_with_groups(3000, 10, 1000, 30)
81 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/data/standardize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import datetime as dt
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | from scipy.stats import zscore
13 | 
14 | from alphamind.data.standardize import standardize
15 | 
16 | 
17 | def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
18 |     print("-" * 60)
19 |     print("Starting standardizing benchmarking")
20 |     print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
21 |                                                                              n_loops))
22 | 
23 |     x = np.random.randn(n_samples, n_features)
24 | 
25 |     start = dt.datetime.now()
26 |     for _ in range(n_loops):
27 |         _ = standardize(x)
28 |     impl_model_time = dt.datetime.now() - start
29 | 
30 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
31 | 
32 |     start = dt.datetime.now()
33 |     for _ in range(n_loops):
34 |         _ = zscore(x)
35 |     benchmark_model_time = dt.datetime.now() - start
36 | 
37 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
38 | 
39 | 
40 | def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int,
41 |                                      n_groups: int) -> None:
42 |     print("-" * 60)
43 |     print("Starting standardizing with group-by values benchmarking")
44 |     print(
45 |         "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
46 |                                                                                           n_features,
47 |                                                                                           n_loops,
48 |                                                                                           n_groups))
49 | 
50 |     x = np.random.randn(n_samples, n_features)
51 |     groups = np.random.randint(n_groups, size=n_samples)
52 | 
53 |     start = dt.datetime.now()
54 |     for _ in range(n_loops):
55 |         _ = standardize(x, groups=groups)
56 |     impl_model_time = dt.datetime.now() - start
57 | 
58 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
59 | 
60 |     start = dt.datetime.now()
61 |     for _ in range(n_loops):
62 |         _ = pd.DataFrame(x).groupby(groups).transform(
63 |             lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
64 |     benchmark_model_time = dt.datetime.now() - start
65 | 
66 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     benchmark_standardize(3000, 10, 1000)
71 |     benchmark_standardize_with_group(3000, 10, 1000, 30)
72 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/data/winsorize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import datetime as dt
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.data.winsorize import winsorize_normal
14 | 
15 | 
16 | def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None:
17 |     print("-" * 60)
18 |     print("Starting winsorize normal benchmarking")
19 |     print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
20 |                                                                              n_loops))
21 | 
22 |     num_stds = 2
23 | 
24 |     x = np.random.randn(n_samples, n_features)
25 | 
26 |     start = dt.datetime.now()
27 |     for _ in range(n_loops):
28 |         _ = winsorize_normal(x, num_stds)
29 |     impl_model_time = dt.datetime.now() - start
30 | 
31 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
32 | 
33 |     def impl(x):
34 |         std_values = x.std(axis=0)
35 |         mean_value = x.mean(axis=0)
36 | 
37 |         lower_bound = mean_value - num_stds * std_values
38 |         upper_bound = mean_value + num_stds * std_values
39 | 
40 |         res = np.where(x > upper_bound, upper_bound, x)
41 |         res = np.where(res < lower_bound, lower_bound, res)
42 |         return res
43 | 
44 |     start = dt.datetime.now()
45 |     for _ in range(n_loops):
46 |         _ = impl(x)
47 |     benchmark_model_time = dt.datetime.now() - start
48 | 
49 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
50 | 
51 | 
52 | def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int,
53 |                                           n_groups: int) -> None:
54 |     print("-" * 60)
55 |     print("Starting winsorize normal with group-by values benchmarking")
56 |     print(
57 |         "Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
58 |                                                                                           n_features,
59 |                                                                                           n_loops,
60 |                                                                                           n_groups))
61 | 
62 |     num_stds = 2
63 | 
64 |     x = np.random.randn(n_samples, n_features)
65 |     groups = np.random.randint(n_groups, size=n_samples)
66 | 
67 |     start = dt.datetime.now()
68 |     for _ in range(n_loops):
69 |         _ = winsorize_normal(x, num_stds, groups=groups)
70 |     impl_model_time = dt.datetime.now() - start
71 | 
72 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
73 | 
74 |     def impl(x):
75 |         std_values = x.std(axis=0)
76 |         mean_value = x.mean(axis=0)
77 | 
78 |         lower_bound = mean_value - num_stds * std_values
79 |         upper_bound = mean_value + num_stds * std_values
80 | 
81 |         res = np.where(x > upper_bound, upper_bound, x)
82 |         res = np.where(res < lower_bound, lower_bound, res)
83 |         return res
84 | 
85 |     start = dt.datetime.now()
86 |     for _ in range(n_loops):
87 |         _ = pd.DataFrame(x).groupby(groups).transform(impl)
88 |     benchmark_model_time = dt.datetime.now() - start
89 | 
90 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     benchmark_winsorize_normal(3000, 10, 1000)
95 |     benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
96 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-27
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/linearbuild.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-5
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import datetime as dt
 9 | 
10 | import numpy as np
11 | from cvxopt import matrix
12 | from cvxopt import solvers
13 | from scipy.optimize import linprog
14 | 
15 | from alphamind.portfolio.linearbuilder import linear_builder
16 | 
17 | solvers.options['show_progress'] = False
18 | 
19 | 
20 | def benchmark_build_linear(n_samples: int, n_risks: int, n_loop: int) -> None:
21 |     print("-" * 60)
22 |     print("Starting portfolio construction by linear programming")
23 |     print(
24 |         "Parameters(n_samples: {0}, n_risks: {1}, n_loop: {2})".format(n_samples, n_risks, n_loop))
25 | 
26 |     er = np.random.randn(n_samples)
27 |     risk_exp = np.random.randn(n_samples, n_risks)
28 |     bm = np.random.rand(n_samples)
29 |     bm /= bm.sum()
30 | 
31 |     lbound = -0.04
32 |     ubound = 0.05
33 | 
34 |     risk_lbound = bm @ risk_exp
35 |     risk_ubound = bm @ risk_exp
36 | 
37 |     start = dt.datetime.now()
38 |     for _ in range(n_loop):
39 |         status, v, x = linear_builder(er,
40 |                                       lbound,
41 |                                       ubound,
42 |                                       risk_exp,
43 |                                       risk_target=(risk_lbound,
44 |                                                    risk_ubound))
45 |     impl_model_time = dt.datetime.now() - start
46 |     print('{0:20s}: {1}'.format('Implemented model (ECOS)', impl_model_time))
47 | 
48 |     c = - er
49 |     bounds = [(lbound, ubound) for _ in range(n_samples)]
50 |     a_eq = np.ones((1, n_samples))
51 |     a_eq = np.vstack((a_eq, risk_exp.T))
52 |     b_eq = np.hstack((np.array([1.]), risk_exp.T @ bm))
53 |     start = dt.datetime.now()
54 |     for _ in range(n_loop):
55 |         res = linprog(c, A_eq=a_eq, b_eq=b_eq, bounds=bounds, options={'maxiter': 10000})
56 |     benchmark_model_time = dt.datetime.now() - start
57 |     print('{0:20s}: {1}'.format('Benchmark model (scipy)', benchmark_model_time))
58 |     np.testing.assert_array_almost_equal(x, res['x'])
59 | 
60 |     c = matrix(-er)
61 |     aneq = matrix(a_eq)
62 |     b = matrix(b_eq)
63 |     g = matrix(np.vstack((np.diag(np.ones(n_samples)), -np.diag(np.ones(n_samples)))))
64 |     h = matrix(np.hstack((ubound * np.ones(n_samples), -lbound * np.ones(n_samples))))
65 | 
66 |     solvers.lp(c, g, h, solver='glpk')
67 |     start = dt.datetime.now()
68 |     for _ in range(n_loop):
69 |         res2 = solvers.lp(c, g, h, aneq, b, solver='glpk')
70 |     benchmark_model_time = dt.datetime.now() - start
71 |     print('{0:20s}: {1}'.format('Benchmark model (glpk)', benchmark_model_time))
72 |     np.testing.assert_array_almost_equal(x, np.array(res2['x']).flatten())
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     benchmark_build_linear(2000, 30, 10)
77 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/longshortbuild.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-9
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/percentbuild.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-4
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import datetime as dt
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.portfolio.percentbuilder import percent_build
14 | 
15 | 
16 | def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) -> None:
17 |     print("-" * 60)
18 |     print("Starting portfolio construction by percent benchmarking")
19 |     print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2})".format(n_samples, p_included,
20 |                                                                              n_loops))
21 | 
22 |     n_portfolio = 10
23 | 
24 |     x = np.random.randn(n_samples, n_portfolio)
25 | 
26 |     start = dt.datetime.now()
27 |     for _ in range(n_loops):
28 |         calc_weights = percent_build(x, p_included)
29 |     impl_model_time = dt.datetime.now() - start
30 | 
31 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
32 | 
33 |     start = dt.datetime.now()
34 |     for _ in range(n_loops):
35 |         exp_weights = np.zeros((len(x), n_portfolio))
36 |         n_incuded = int(p_included * len(x))
37 |         choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_incuded
38 |         for j in range(n_portfolio):
39 |             exp_weights[choosed_index[:, j], j] = 1.
40 |     benchmark_model_time = dt.datetime.now() - start
41 | 
42 |     np.testing.assert_array_almost_equal(calc_weights, exp_weights)
43 | 
44 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
45 | 
46 | 
47 | def benchmark_build_percent_with_group(n_samples: int, n_loops: int, p_included: float,
48 |                                        n_groups: int) -> None:
49 |     print("-" * 60)
50 |     print("Starting  portfolio construction by percent with group-by values benchmarking")
51 |     print(
52 |         "Parameters(n_samples: {0}, p_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
53 |                                                                                           p_included,
54 |                                                                                           n_loops,
55 |                                                                                           n_groups))
56 | 
57 |     n_portfolio = 10
58 | 
59 |     x = np.random.randn(n_samples, n_portfolio)
60 |     groups = np.random.randint(n_groups, size=n_samples)
61 | 
62 |     start = dt.datetime.now()
63 |     for _ in range(n_loops):
64 |         calc_weights = percent_build(x, p_included, groups=groups)
65 |     impl_model_time = dt.datetime.now() - start
66 | 
67 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
68 | 
69 |     start = dt.datetime.now()
70 |     for _ in range(n_loops):
71 |         grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
72 |         grouped_count = pd.DataFrame(-x).groupby(groups).transform(lambda x: x.count())
73 |         exp_weights = np.zeros((len(x), n_portfolio))
74 |         n_included = (grouped_count * p_included).astype(int)
75 |         masks = (grouped_ordering <= n_included).values
76 |         for j in range(n_portfolio):
77 |             exp_weights[masks[:, j], j] = 1.
78 |     benchmark_model_time = dt.datetime.now() - start
79 | 
80 |     np.testing.assert_array_almost_equal(calc_weights, exp_weights)
81 | 
82 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     benchmark_build_percent(3000, 1000, 0.1)
87 |     benchmark_build_percent_with_group(3000, 1000, 0.1, 30)
88 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/portfolio/rankbuild.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-27
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import datetime as dt
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.portfolio.rankbuilder import rank_build
14 | 
15 | 
16 | def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
17 |     print("-" * 60)
18 |     print("Starting portfolio construction by rank benchmarking")
19 |     print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included,
20 |                                                                              n_loops))
21 | 
22 |     n_portfolio = 10
23 | 
24 |     x = np.random.randn(n_samples, n_portfolio)
25 | 
26 |     start = dt.datetime.now()
27 |     for _ in range(n_loops):
28 |         calc_weights = rank_build(x, n_included)
29 |     impl_model_time = dt.datetime.now() - start
30 | 
31 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
32 | 
33 |     start = dt.datetime.now()
34 |     for _ in range(n_loops):
35 |         exp_weights = np.zeros((len(x), n_portfolio))
36 |         choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included
37 |         for j in range(n_portfolio):
38 |             exp_weights[choosed_index[:, j], j] = 1.
39 |     benchmark_model_time = dt.datetime.now() - start
40 | 
41 |     np.testing.assert_array_almost_equal(calc_weights, exp_weights)
42 | 
43 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
44 | 
45 | 
46 | def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int,
47 |                                     n_groups: int) -> None:
48 |     print("-" * 60)
49 |     print("Starting  portfolio construction by rank with group-by values benchmarking")
50 |     print(
51 |         "Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
52 |                                                                                           n_included,
53 |                                                                                           n_loops,
54 |                                                                                           n_groups))
55 | 
56 |     n_portfolio = 10
57 | 
58 |     x = np.random.randn(n_samples, n_portfolio)
59 |     groups = np.random.randint(n_groups, size=n_samples)
60 | 
61 |     start = dt.datetime.now()
62 |     for _ in range(n_loops):
63 |         calc_weights = rank_build(x, n_included, groups=groups)
64 |     impl_model_time = dt.datetime.now() - start
65 | 
66 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
67 | 
68 |     start = dt.datetime.now()
69 |     for _ in range(n_loops):
70 |         grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
71 |         exp_weights = np.zeros((len(x), n_portfolio))
72 |         masks = (grouped_ordering <= n_included).values
73 |         for j in range(n_portfolio):
74 |             exp_weights[masks[:, j], j] = 1.
75 |     benchmark_model_time = dt.datetime.now() - start
76 | 
77 |     np.testing.assert_array_almost_equal(calc_weights, exp_weights)
78 | 
79 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     benchmark_build_rank(3000, 1000, 300)
84 |     benchmark_build_rank_with_group(3000, 1000, 10, 30)
85 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/settlement/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/benchmarks/settlement/simplesettle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-28
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import datetime as dt
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.settlement.simplesettle import simple_settle
14 | 
15 | 
16 | def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None:
17 |     print("-" * 60)
18 |     print("Starting simple settle benchmarking")
19 |     print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples,
20 |                                                                                n_portfolios,
21 |                                                                                n_loops))
22 | 
23 |     weights = np.random.randn(n_samples, n_portfolios)
24 |     ret_series = np.random.randn(n_samples)
25 | 
26 |     start = dt.datetime.now()
27 |     for _ in range(n_loops):
28 |         calc_ret = simple_settle(weights, ret_series)
29 |     impl_model_time = dt.datetime.now() - start
30 | 
31 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
32 | 
33 |     start = dt.datetime.now()
34 |     ret_series.shape = -1, 1
35 |     for _ in range(n_loops):
36 |         exp_ret = (weights * ret_series).sum(axis=0)
37 |     benchmark_model_time = dt.datetime.now() - start
38 | 
39 |     np.testing.assert_array_almost_equal(calc_ret, exp_ret)
40 | 
41 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
42 | 
43 | 
44 | def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int,
45 |                                        n_groups: int) -> None:
46 |     print("-" * 60)
47 |     print("Starting simple settle with group-by values benchmarking")
48 |     print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(
49 |         n_samples, n_portfolios, n_loops, n_groups))
50 | 
51 |     weights = np.random.randn(n_samples, n_portfolios)
52 |     ret_series = np.random.randn(n_samples)
53 |     groups = np.random.randint(n_groups, size=n_samples)
54 | 
55 |     start = dt.datetime.now()
56 |     for _ in range(n_loops):
57 |         calc_ret = simple_settle(weights, ret_series, groups=groups)
58 |     impl_model_time = dt.datetime.now() - start
59 | 
60 |     print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
61 | 
62 |     start = dt.datetime.now()
63 |     ret_series.shape = -1, 1
64 |     for _ in range(n_loops):
65 |         ret_mat = weights * ret_series
66 |         exp_ret = pd.DataFrame(ret_mat).groupby(groups).sum().values
67 |     benchmark_model_time = dt.datetime.now() - start
68 | 
69 |     np.testing.assert_array_almost_equal(calc_ret, exp_ret)
70 | 
71 |     print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     benchmark_simple_settle(3000, 3, 1000)
76 |     benchmark_simple_settle_with_group(3000, 3, 1000, 30)
77 | 


--------------------------------------------------------------------------------
/alphamind/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/bin/__init__.py


--------------------------------------------------------------------------------
/alphamind/bin/alphamind:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-6-29
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | 
 9 | from alphamind.bin.cli import CLIFactory
10 | 
11 | 
12 | if __name__ == '__main__':
13 | 
14 |     parser = CLIFactory.get_parser()
15 |     args = parser.parse_args()
16 |     args.func(args)


--------------------------------------------------------------------------------
/alphamind/bin/cli.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-6-29
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import argparse
 9 | from collections import namedtuple
10 | 
11 | from sqlalchemy import create_engine
12 | 
13 | from alphamind.data.dbmodel import models
14 | from alphamind.utilities import alpha_logger
15 | 
16 | 
17 | def initdb(args):
18 |     alpha_logger.info('DB: ' + args.url)
19 |     engine = create_engine(args.url)
20 |     models.Base.metadata.create_all(engine)
21 |     alpha_logger.info('DB: initialization finished.')
22 | 
23 | 
24 | Arg = namedtuple(
25 |     'Arg', ['flags', 'help', 'action', 'default', 'nargs', 'type', 'choices', 'metavar'])
26 | Arg.__new__.__defaults__ = (None, None, None, None, None, None, None)
27 | 
28 | 
29 | class CLIFactory(object):
30 |     args = {
31 |         'url': Arg(
32 |             ('-u', '--url'),
33 |             help='set the url for the db',
34 |             type=str)
35 |     }
36 | 
37 |     subparsers = (
38 |         {
39 |             'func': initdb,
40 |             'help': 'Initialize the metadata database',
41 |             'args': ('url',)
42 |         },
43 |     )
44 | 
45 |     subparsers_dict = {sp['func'].__name__: sp for sp in subparsers}
46 | 
47 |     @classmethod
48 |     def get_parser(cls):
49 |         parser = argparse.ArgumentParser()
50 |         subparsers = parser.add_subparsers(
51 |             help='sub-command help', dest='subcommand')
52 |         subparsers.required = True
53 | 
54 |         subparser_list = cls.subparsers_dict.keys()
55 |         for sub in subparser_list:
56 |             sub = cls.subparsers_dict[sub]
57 |             sp = subparsers.add_parser(sub['func'].__name__, help=sub['help'])
58 |             for arg in sub['args']:
59 |                 arg = cls.args[arg]
60 |                 kwargs = {
61 |                     f: getattr(arg, f)
62 |                     for f in arg._fields if f != 'flags' and getattr(arg, f)}
63 |                 sp.add_argument(*arg.flags, **kwargs)
64 |             sp.set_defaults(func=sub['func'])
65 |         return parser
66 | 
67 | 
68 | def get_parser():
69 |     return CLIFactory.get_parser()
70 | 


--------------------------------------------------------------------------------
/alphamind/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from alphamind.data.neutralize import neutralize
 9 | from alphamind.data.rank import rank
10 | from alphamind.data.standardize import standardize
11 | from alphamind.data.winsorize import winsorize_normal as winsorize
12 | 
13 | __all__ = ['standardize',
14 |            'winsorize',
15 |            'neutralize',
16 |            'rank']
17 | 


--------------------------------------------------------------------------------
/alphamind/data/dbmodel/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-6-29
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/data/dbmodel/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2020-11-14
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | import os
 8 | 
 9 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql":
10 |     from alphamind.data.dbmodel.models.mysql import Market
11 |     from alphamind.data.dbmodel.models.mysql import IndexMarket
12 |     from alphamind.data.dbmodel.models.mysql import Universe
13 |     from alphamind.data.dbmodel.models.mysql import Industry
14 |     from alphamind.data.dbmodel.models.mysql import RiskExposure
15 |     from alphamind.data.dbmodel.models.mysql import RiskCovDay
16 |     from alphamind.data.dbmodel.models.mysql import RiskCovShort
17 |     from alphamind.data.dbmodel.models.mysql import RiskCovLong
18 |     from alphamind.data.dbmodel.models.mysql import SpecificRiskDay
19 |     from alphamind.data.dbmodel.models.mysql import SpecificRiskShort
20 |     from alphamind.data.dbmodel.models.mysql import SpecificRiskLong
21 |     from alphamind.data.dbmodel.models.mysql import IndexComponent
22 |     from alphamind.data.dbmodel.models.mysql import IndexWeight
23 | else:
24 |     from alphamind.data.dbmodel.models.postgres import Market
25 |     from alphamind.data.dbmodel.models.postgres import IndexMarket
26 |     from alphamind.data.dbmodel.models.postgres import Universe
27 |     from alphamind.data.dbmodel.models.postgres import Industry
28 |     from alphamind.data.dbmodel.models.postgres import RiskExposure
29 |     from alphamind.data.dbmodel.models.postgres import RiskCovDay
30 |     from alphamind.data.dbmodel.models.postgres import RiskCovShort
31 |     from alphamind.data.dbmodel.models.postgres import RiskCovLong
32 |     from alphamind.data.dbmodel.models.postgres import SpecificRiskDay
33 |     from alphamind.data.dbmodel.models.postgres import SpecificRiskShort
34 |     from alphamind.data.dbmodel.models.postgres import SpecificRiskLong
35 |     from alphamind.data.dbmodel.models.postgres import FactorMaster
36 |     from alphamind.data.dbmodel.models.postgres import IndexComponent
37 |     from alphamind.data.dbmodel.models.postgres import RiskMaster


--------------------------------------------------------------------------------
/alphamind/data/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/data/engines/__init__.py


--------------------------------------------------------------------------------
/alphamind/data/engines/sqlengine/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2020-11-14
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import os
 9 | 
10 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql":
11 |     from alphamind.data.engines.sqlengine.mysql import SqlEngine
12 |     from alphamind.data.engines.sqlengine.mysql import total_risk_factors
13 |     from alphamind.data.engines.sqlengine.mysql import industry_styles
14 |     from alphamind.data.engines.sqlengine.mysql import risk_styles
15 |     from alphamind.data.engines.sqlengine.mysql import macro_styles
16 | else:
17 |     from alphamind.data.engines.sqlengine.postgres import SqlEngine
18 |     from alphamind.data.engines.sqlengine.postgres import total_risk_factors
19 |     from alphamind.data.engines.sqlengine.postgres import industry_styles
20 |     from alphamind.data.engines.sqlengine.postgres import risk_styles
21 |     from alphamind.data.engines.sqlengine.postgres import macro_styles
22 | 


--------------------------------------------------------------------------------
/alphamind/data/engines/utilities.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-12-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import os
 9 | from typing import Dict
10 | from typing import Iterable
11 | 
12 | from alphamind.data.dbmodel.models import Market
13 | from alphamind.data.dbmodel.models import RiskCovDay
14 | from alphamind.data.dbmodel.models import RiskCovLong
15 | from alphamind.data.dbmodel.models import RiskCovShort
16 | from alphamind.data.dbmodel.models import RiskExposure
17 | from alphamind.data.dbmodel.models import SpecificRiskDay
18 | from alphamind.data.dbmodel.models import SpecificRiskLong
19 | from alphamind.data.dbmodel.models import SpecificRiskShort
20 | from alphamind.data.engines.industries import INDUSTRY_MAPPING
21 | 
22 | 
23 | def _map_risk_model_table(risk_model: str) -> tuple:
24 |     if risk_model == 'day':
25 |         return RiskCovDay, SpecificRiskDay
26 |     elif risk_model == 'short':
27 |         return RiskCovShort, SpecificRiskShort
28 |     elif risk_model == 'long':
29 |         return RiskCovLong, SpecificRiskLong
30 |     else:
31 |         raise ValueError("risk model name {0} is not recognized".format(risk_model))
32 | 
33 | 
34 | def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
35 |     factor_cols = {}
36 |     factors = set(factors).difference({'trade_date', 'code', 'isOpen'})
37 |     to_keep = factors.copy()
38 |     for f in factors:
39 |         for t in used_factor_tables:
40 |             if f in t.columns:
41 |                 factor_cols[t.columns[f].name] = t
42 |                 to_keep.remove(f)
43 |                 break
44 | 
45 |     if to_keep:
46 |         raise ValueError("factors in <{0}> can't be find".format(to_keep))
47 | 
48 |     return factor_cols
49 | 
50 | 
51 | if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "mysql":
52 |     def _map_industry_category(category: str) -> str:
53 |         if category == 'sw':
54 |             return '申万行业分类(2014)'
55 |         elif category == 'zz':
56 |             return '中证行业分类'
57 |         elif category == 'zx':
58 |             return '中信标普行业分类'
59 |         elif category == 'zjh':
60 |             return '证监会行业分类(2012)-证监会'
61 |         else:
62 |             raise ValueError("No other industry is supported at the current time")
63 | else:
64 |     def _map_industry_category(category: str) -> str:
65 |         if category == 'sw':
66 |             return '申万行业分类'
67 |         elif category == 'sw_adj':
68 |             return '申万行业分类修订'
69 |         elif category == 'zz':
70 |             return '中证行业分类'
71 |         elif category == 'dx':
72 |             return '东兴行业分类'
73 |         elif category == 'zjh':
74 |             return '证监会行业V2012'
75 |         else:
76 |             raise ValueError("No other industry is supported at the current time")
77 | 
78 | 
79 | def industry_list(category: str, level: int = 1) -> list:
80 |     return INDUSTRY_MAPPING[category][level]
81 | 


--------------------------------------------------------------------------------
/alphamind/data/neutralize.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-4-25
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | from typing import Dict
  9 | from typing import Tuple
 10 | from typing import Union
 11 | 
 12 | import numba as nb
 13 | import numpy as np
 14 | 
 15 | import alphamind.utilities as utils
 16 | 
 17 | 
 18 | def neutralize(x: np.ndarray,
 19 |                y: np.ndarray,
 20 |                groups: np.ndarray = None,
 21 |                detail: bool = False,
 22 |                weights: np.ndarray = None) \
 23 |         -> Union[np.ndarray, Tuple[np.ndarray, Dict]]:
 24 |     if y.ndim == 1:
 25 |         y = y.reshape((-1, 1))
 26 | 
 27 |     if weights is None:
 28 |         weights = np.ones(len(y), dtype=float)
 29 | 
 30 |     output_dict = {}
 31 | 
 32 |     if detail:
 33 |         exposure = np.zeros(x.shape + (y.shape[1],))
 34 |         explained = np.zeros(x.shape + (y.shape[1],))
 35 |         output_dict['exposure'] = exposure
 36 |         output_dict['explained'] = explained
 37 | 
 38 |     if groups is not None:
 39 |         res = np.zeros(y.shape)
 40 |         index_diff, order = utils.groupby(groups)
 41 |         start = 0
 42 |         if detail:
 43 |             for diff_loc in index_diff:
 44 |                 curr_idx = order[start:diff_loc + 1]
 45 |                 curr_x, b = _sub_step(x, y, weights, curr_idx, res)
 46 |                 exposure[curr_idx, :, :] = b
 47 |                 explained[curr_idx] = ls_explain(curr_x, b)
 48 |                 start = diff_loc + 1
 49 |         else:
 50 |             for diff_loc in index_diff:
 51 |                 curr_idx = order[start:diff_loc + 1]
 52 |                 _sub_step(x, y, weights, curr_idx, res)
 53 |                 start = diff_loc + 1
 54 |     else:
 55 |         try:
 56 |             b = ls_fit(x, y, weights)
 57 |         except np.linalg.linalg.LinAlgError:
 58 |             b = ls_fit_pinv(x, y, weights)
 59 | 
 60 |         res = ls_res(x, y, b)
 61 | 
 62 |         if detail:
 63 |             explained[:, :, :] = ls_explain(x, b)
 64 |             exposure[:] = b
 65 | 
 66 |     if output_dict:
 67 |         return res, output_dict
 68 |     else:
 69 |         return res
 70 | 
 71 | 
 72 | def _sub_step(x, y, w, curr_idx, res) -> Tuple[np.ndarray, np.ndarray]:
 73 |     curr_x, curr_y, curr_w = x[curr_idx], y[curr_idx], w[curr_idx]
 74 |     try:
 75 |         b = ls_fit(curr_x, curr_y, curr_w)
 76 |     except np.linalg.linalg.LinAlgError:
 77 |         b = ls_fit_pinv(curr_x, curr_y, curr_w)
 78 |     res[curr_idx] = ls_res(curr_x, curr_y, b)
 79 |     return curr_x, b
 80 | 
 81 | 
 82 | @nb.njit(nogil=True, cache=True)
 83 | def ls_fit(x: np.ndarray, y: np.ndarray, w: np.ndarray) -> np.ndarray:
 84 |     x_bar = x.T * w
 85 |     b = np.linalg.solve(x_bar @ x, x_bar @ y)
 86 |     return b
 87 | 
 88 | 
 89 | @nb.njit(nogil=True, cache=True)
 90 | def ls_fit_pinv(x: np.ndarray, y: np.ndarray, w: np.ndarray) -> np.ndarray:
 91 |     x_bar = x.T * w
 92 |     b = np.linalg.pinv(x_bar @ x) @ x_bar @ y
 93 |     return b
 94 | 
 95 | 
 96 | @nb.njit(nogil=True, cache=True)
 97 | def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
 98 |     return y - x @ b
 99 | 
100 | 
101 | @nb.njit(nogil=True, cache=True)
102 | def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
103 |     m, n = b.shape
104 |     return b.reshape((1, m, n)) * x.reshape((-1, m, 1))
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     x = np.random.randn(50000, 10)
109 |     y = np.random.randn(50000, 1)
110 |     w = np.ones(50000)
111 | 
112 |     import datetime as dt
113 | 
114 |     start = dt.datetime.now()
115 |     for _ in range(1000):
116 |         ls_fit(x, y, w)
117 |     print(dt.datetime.now() - start)
118 | 


--------------------------------------------------------------------------------
/alphamind/data/processing.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-8-21
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import List
 9 | from typing import Optional
10 | 
11 | import numpy as np
12 | 
13 | from alphamind.data.neutralize import neutralize
14 | from alphamind.utilities import alpha_logger
15 | 
16 | 
17 | def factor_processing(raw_factors: np.ndarray,
18 |                       pre_process: Optional[List] = None,
19 |                       risk_factors: Optional[np.ndarray] = None,
20 |                       post_process: Optional[List] = None,
21 |                       groups=None) -> np.ndarray:
22 |     new_factors = raw_factors
23 | 
24 |     if pre_process:
25 |         for p in pre_process:
26 |             new_factors = p(new_factors, groups=groups)
27 | 
28 |     if risk_factors is not None:
29 |         risk_factors = risk_factors[:, risk_factors.sum(axis=0) != 0]
30 |         new_factors = neutralize(risk_factors, new_factors, groups=groups)
31 | 
32 |     if post_process:
33 |         for p in post_process:
34 |             if p.__name__ == 'winsorize_normal':
35 |                 alpha_logger.warning("winsorize_normal "
36 |                                      "normally should not be done after neutralize")
37 |             new_factors = p(new_factors, groups=groups)
38 | 
39 |     return new_factors
40 | 


--------------------------------------------------------------------------------
/alphamind/data/quantile.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-8-16
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def quantile(x: np.ndarray, n_bins: int) -> np.ndarray:
12 |     n = x.size
13 |     sorter = x.argsort()
14 |     inv = np.empty(n, dtype=int)
15 |     inv[sorter] = np.arange(n, dtype=int)
16 | 
17 |     bin_size = float(n) / n_bins
18 | 
19 |     pillars = [int(i * bin_size) for i in range(1, n_bins + 1)]
20 | 
21 |     q_groups = np.empty(n, dtype=int)
22 | 
23 |     starter = 0
24 |     for i, r in enumerate(pillars):
25 |         q_groups[(inv >= starter) & (inv < r)] = i
26 |         starter = r
27 | 
28 |     return q_groups
29 | 


--------------------------------------------------------------------------------
/alphamind/data/rank.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-8-8
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import Optional
 9 | 
10 | import numpy as np
11 | from scipy.stats import rankdata
12 | 
13 | import alphamind.utilities as utils
14 | 
15 | 
16 | def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:
17 |     if x.ndim == 1:
18 |         x = x.reshape((-1, 1))
19 | 
20 |     if groups is not None:
21 |         res = np.zeros(x.shape, dtype=int)
22 |         index_diff, order = utils.groupby(groups)
23 | 
24 |         start = 0
25 |         for diff_loc in index_diff:
26 |             curr_idx = order[start:diff_loc + 1]
27 |             res[curr_idx] = (rankdata(x[curr_idx]).astype(float) - 1.).reshape((-1, 1))
28 |             start = diff_loc + 1
29 |         return res
30 |     else:
31 |         return (rankdata(x).astype(float) - 1.).reshape((-1, 1))
32 | 
33 | 
34 | def percentile(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:
35 |     if x.ndim == 1:
36 |         x = x.reshape((-1, 1))
37 | 
38 |     if groups is not None:
39 |         res = np.zeros(x.shape, dtype=int)
40 |         index_diff, order = utils.groupby(groups)
41 | 
42 |         start = 0
43 |         for diff_loc in index_diff:
44 |             curr_idx = order[start:diff_loc + 1]
45 |             curr_values = x[curr_idx]
46 |             length = len(curr_values) - 1. if len(curr_values) > 1 else 1.
47 |             res[curr_idx] = (rankdata(curr_values).astype(float) - 1.) / length
48 |             start = diff_loc + 1
49 |         return res
50 |     else:
51 |         length = len(x) - 1. if len(x) > 1 else 1.
52 |         return ((rankdata(x).astype(float) - 1.) / length).reshape((-1, 1))
53 | 


--------------------------------------------------------------------------------
/alphamind/data/standardize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import numpy as np
 9 | 
10 | from alphamind.utilities import aggregate
11 | from alphamind.utilities import array_index
12 | from alphamind.utilities import group_mapping
13 | from alphamind.utilities import simple_mean
14 | from alphamind.utilities import simple_sqrsum
15 | from alphamind.utilities import simple_std
16 | from alphamind.utilities import transform
17 | 
18 | 
19 | def standardize(x: np.ndarray, groups: np.ndarray = None, ddof=1) -> np.ndarray:
20 |     if groups is not None:
21 |         groups = group_mapping(groups)
22 |         mean_values = transform(groups, x, 'mean')
23 |         std_values = transform(groups, x, 'std', ddof)
24 | 
25 |         return (x - mean_values) / np.maximum(std_values, 1e-8)
26 |     else:
27 |         return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)
28 | 
29 | 
30 | def projection(x: np.ndarray, groups: np.ndarray = None, axis=1) -> np.ndarray:
31 |     if groups is not None and axis == 0:
32 |         groups = group_mapping(groups)
33 |         projected = transform(groups, x, 'project')
34 |         return projected
35 |     else:
36 |         return x / simple_sqrsum(x, axis=axis).reshape((-1, 1))
37 | 
38 | 
39 | class Standardizer(object):
40 | 
41 |     def __init__(self, ddof: int = 1):
42 |         self.ddof = ddof
43 |         self.mean = None
44 |         self.std = None
45 |         self.labels = None
46 | 
47 |     def fit(self, x: np.ndarray, groups: np.ndarray = None):
48 |         if groups is not None:
49 |             group_index = group_mapping(groups)
50 |             self.mean = aggregate(group_index, x, 'mean')
51 |             self.std = aggregate(group_index, x, 'std', self.ddof)
52 |             self.labels = np.unique(groups)
53 |         else:
54 |             self.mean = simple_mean(x, axis=0)
55 |             self.std = simple_std(x, axis=0, ddof=self.ddof)
56 | 
57 |     def transform(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
58 |         if groups is not None:
59 |             index = array_index(self.labels, groups)
60 |             return (x - self.mean[index]) / np.maximum(self.std[index], 1e-8)
61 |         else:
62 |             return (x - self.mean) / np.maximum(self.std, 1e-8)
63 | 
64 |     def __call__(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
65 |         return standardize(x, groups, self.ddof)
66 | 


--------------------------------------------------------------------------------
/alphamind/data/transformer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-8-23
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import copy
 9 | 
10 | import pandas as pd
11 | from simpleutils.asserts import require
12 | from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
13 | from PyFin.api import transform as transform_impl
14 | 
15 | 
16 | def factor_translator(factor_pool):
17 |     if not factor_pool:
18 |         return None, None
19 | 
20 |     if isinstance(factor_pool, str):
21 |         return {factor_pool: factor_pool}, [factor_pool]
22 |     elif isinstance(factor_pool, SecurityValueHolder):
23 |         return {str(factor_pool): factor_pool}, sorted(factor_pool.fields)
24 |     elif isinstance(factor_pool, dict):
25 |         dependency = set()
26 |         for k, v in factor_pool.items():
27 |             require(isinstance(k, str), ValueError,
28 |                     'factor_name {0} should be string.'.format(k))
29 |             require(isinstance(v, SecurityValueHolder) or isinstance(v, str),
30 |                     ValueError,
31 |                     'expression {0} should be a value hodler or a string.'.format(v))
32 | 
33 |             if isinstance(v, str):
34 |                 dependency = dependency.union([v])
35 |             else:
36 |                 dependency = dependency.union(v.fields)
37 |         return factor_pool, sorted(dependency)
38 |     elif isinstance(factor_pool, list):
39 |         factor_dict = {}
40 |         dependency = set()
41 |         k = 1
42 |         for i, f in enumerate(factor_pool):
43 |             if isinstance(f, str):
44 |                 factor_dict[f] = f
45 |                 dependency = dependency.union([f])
46 |             elif isinstance(f, SecurityValueHolder):
47 |                 factor_dict[str(f)] = f
48 |                 dependency = dependency.union(f.fields)
49 |                 k += 1
50 |         return factor_dict, sorted(dependency)
51 |     else:
52 |         raise ValueError('{0} is not in valid format as factors'.format(factor_pool))
53 | 
54 | 
55 | class Transformer:
56 | 
57 |     def __init__(self,
58 |                  expressions):
59 |         expression_dict, expression_dependency = \
60 |             factor_translator(copy.deepcopy(expressions))
61 | 
62 |         if expression_dict:
63 |             self.names = sorted(expression_dict.keys())
64 |             self.expressions = [expression_dict[n] for n in self.names]
65 |             self.dependency = expression_dependency
66 |         else:
67 |             self.names = []
68 |             self.expressions = []
69 |             self.dependency = []
70 | 
71 |     def transform(self, group_name, data):
72 |         if len(data) > 0:
73 |             transformed_data = transform_impl(data,
74 |                                               self.expressions,
75 |                                               self.names,
76 |                                               group_name,
77 |                                               dropna=False)
78 |             return transformed_data
79 |         else:
80 |             return pd.DataFrame()
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     transformer = Transformer(['c', 'a'])
85 | 


--------------------------------------------------------------------------------
/alphamind/exceptions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/exceptions/__init__.py


--------------------------------------------------------------------------------
/alphamind/exceptions/exceptions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2018-6-12
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | 
 9 | class PortfolioBuilderException(Exception):
10 | 
11 |     def __init__(self, msg):
12 |         self.msg = msg
13 | 
14 |     def __str__(self):
15 |         return str(self.msg)
16 | 


--------------------------------------------------------------------------------
/alphamind/execution/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/execution/__init__.py


--------------------------------------------------------------------------------
/alphamind/execution/baseexecutor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-22
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import abc
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | 
14 | class ExecutorBase(metaclass=abc.ABCMeta):
15 | 
16 |     def __init__(self):
17 |         self.current_pos = pd.DataFrame()
18 | 
19 |     @abc.abstractmethod
20 |     def execute(self, target_pos: pd.DataFrame) -> pd.DataFrame:
21 |         pass
22 | 
23 |     @staticmethod
24 |     def calc_turn_over(target_pos: pd.DataFrame, current_pos: pd.DataFrame) -> float:
25 |         pos_merged = pd.merge(target_pos, current_pos, on=['code'], how='outer')
26 |         pos_merged.fillna(0, inplace=True)
27 |         turn_over = np.abs(pos_merged.weight_x - pos_merged.weight_y).sum()
28 |         return turn_over
29 | 
30 |     def set_current(self, current_pos: pd.DataFrame):
31 |         self.current_pos = current_pos.copy()
32 | 
33 |     def update(self, data_dict: dict):
34 |         pass
35 | 


--------------------------------------------------------------------------------
/alphamind/execution/naiveexecutor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-22
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import Tuple
 9 | 
10 | import pandas as pd
11 | 
12 | from alphamind.execution.baseexecutor import ExecutorBase
13 | 
14 | 
15 | class NaiveExecutor(ExecutorBase):
16 | 
17 |     def __init__(self):
18 |         super().__init__()
19 | 
20 |     def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]:
21 |         if self.current_pos.empty:
22 |             turn_over = target_pos.weight.abs().sum()
23 |         else:
24 |             turn_over = self.calc_turn_over(target_pos, self.current_pos)
25 |         self.current_pos = target_pos.copy()
26 |         return turn_over, target_pos
27 | 


--------------------------------------------------------------------------------
/alphamind/execution/pipeline.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import List
 9 | from typing import Tuple
10 | 
11 | import pandas as pd
12 | 
13 | from alphamind.execution.baseexecutor import ExecutorBase
14 | 
15 | 
16 | class ExecutionPipeline(object):
17 | 
18 |     def __init__(self, executors: List[ExecutorBase]):
19 |         self.executors = executors
20 | 
21 |     def execute(self, target_pos) -> Tuple[float, pd.DataFrame]:
22 | 
23 |         turn_over, planed_pos = 0., target_pos
24 | 
25 |         for executor in self.executors:
26 |             turn_over, planed_pos = executor.execute(planed_pos)
27 | 
28 |         executed_pos = planed_pos
29 | 
30 |         for executor in self.executors:
31 |             executor.set_current(executed_pos)
32 | 
33 |         return turn_over, executed_pos
34 | 
35 |     def update(self, data_dict):
36 | 
37 |         for executor in self.executors:
38 |             executor.update(data_dict=data_dict)
39 | 


--------------------------------------------------------------------------------
/alphamind/execution/targetvolexecutor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-22
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import Tuple
 9 | 
10 | import pandas as pd
11 | from PyFin.Math.Accumulators import MovingAverage
12 | from PyFin.Math.Accumulators import MovingStandardDeviation
13 | 
14 | from alphamind.execution.baseexecutor import ExecutorBase
15 | 
16 | 
17 | class TargetVolExecutor(ExecutorBase):
18 | 
19 |     def __init__(self, window=30, target_vol=0.01):
20 |         super().__init__()
21 |         self.m_vol = MovingStandardDeviation(window, 'return')
22 |         self.m_leverage = MovingAverage(window, 'leverage')
23 |         self.target_vol = target_vol
24 |         self.multiplier = 1.
25 | 
26 |     def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]:
27 |         if not self.m_vol.isFull():
28 |             if self.current_pos.empty:
29 |                 turn_over = target_pos.weight.abs().sum()
30 |             else:
31 |                 turn_over = self.calc_turn_over(target_pos, self.current_pos)
32 |             return turn_over, target_pos
33 |         else:
34 |             c_vol = self.m_vol.result()
35 |             c_leverage = self.m_leverage.result()
36 |             self.multiplier = self.target_vol / c_vol * c_leverage
37 |             candidate_pos = target_pos.copy()
38 |             candidate_pos['weight'] = candidate_pos.weight.values * self.multiplier
39 |             turn_over = self.calc_turn_over(candidate_pos, self.current_pos)
40 |             return turn_over, candidate_pos
41 | 
42 |     def set_current(self, current_pos: pd.DataFrame):
43 |         super().set_current(current_pos)
44 |         self.m_leverage.push({'leverage': current_pos.weight.abs().sum()})
45 | 
46 |     def update(self, data_dict: dict):
47 |         self.m_vol.push(data_dict)
48 | 


--------------------------------------------------------------------------------
/alphamind/execution/thresholdexecutor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-22
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import Tuple
 9 | 
10 | import pandas as pd
11 | 
12 | from alphamind.execution.baseexecutor import ExecutorBase
13 | 
14 | 
15 | class ThresholdExecutor(ExecutorBase):
16 | 
17 |     def __init__(self, turn_over_threshold: float, is_relative=True):
18 |         super().__init__()
19 |         self.threshold = turn_over_threshold
20 |         self.is_relative = is_relative
21 | 
22 |     def execute(self, target_pos: pd.DataFrame) -> Tuple[float, pd.DataFrame]:
23 | 
24 |         if self.current_pos.empty:
25 |             return target_pos.weight.abs().sum(), target_pos
26 |         else:
27 |             turn_over = self.calc_turn_over(target_pos, self.current_pos)
28 | 
29 |             is_break = turn_over >= self.threshold * self.current_pos.weight.sum() if self.is_relative else turn_over >= self.threshold
30 | 
31 |             if is_break:
32 |                 return turn_over, target_pos
33 |             else:
34 |                 return 0., self.current_pos.copy()
35 | 


--------------------------------------------------------------------------------
/alphamind/formula/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/formula/__init__.py


--------------------------------------------------------------------------------
/alphamind/formula/utilities.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-11-27
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from alphamind.utilities import decode
 9 | from alphamind.utilities import encode
10 | 
11 | 
12 | def encode_formula(formula):
13 |     str_repr = encode(formula)
14 |     return {'desc': str_repr,
15 |             'formula_type': formula.__class__.__module__ + "." + formula.__class__.__name__,
16 |             'dependency': formula.fields,
17 |             'window': formula.window}
18 | 
19 | 
20 | def decode_formula(str_repr):
21 |     formula = decode(str_repr)
22 |     return formula
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     from PyFin.api import *
27 | 
28 |     eps_q_res = RES(20, LAST('eps_q') ^ LAST('roe_q'))
29 |     print(eps_q_res)
30 | 
31 |     str_repr = encode_formula(eps_q_res)
32 |     decoded_formula = decode_formula(str_repr)
33 |     print(decoded_formula)
34 | 


--------------------------------------------------------------------------------
/alphamind/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-2
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from alphamind.model.linearmodel import ConstLinearModel
 9 | from alphamind.model.linearmodel import LassoRegression
10 | from alphamind.model.linearmodel import LinearRegression
11 | from alphamind.model.linearmodel import LogisticRegression
12 | from alphamind.model.loader import load_model
13 | from alphamind.model.svm import NvSVRModel
14 | from alphamind.model.treemodel import RandomForestClassifier
15 | from alphamind.model.treemodel import RandomForestRegressor
16 | from alphamind.model.treemodel import XGBClassifier
17 | from alphamind.model.treemodel import XGBRegressor
18 | from alphamind.model.treemodel import XGBTrainer
19 | 
20 | __all__ = ['LinearRegression',
21 |            'LassoRegression',
22 |            'ConstLinearModel',
23 |            'LogisticRegression',
24 |            'RandomForestRegressor',
25 |            'RandomForestClassifier',
26 |            'XGBRegressor',
27 |            'XGBClassifier',
28 |            'XGBTrainer',
29 |            'NvSVRModel',
30 |            'load_model']
31 | 


--------------------------------------------------------------------------------
/alphamind/model/linearmodel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-5-10
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import numpy as np
  9 | from simpleutils.asserts import require
 10 | from sklearn.linear_model import Lasso
 11 | from sklearn.linear_model import LinearRegression as LinearRegressionImpl
 12 | from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl
 13 | 
 14 | from alphamind.model.modelbase import create_model_base
 15 | 
 16 | 
 17 | class ConstLinearModelImpl(object):
 18 | 
 19 |     def __init__(self, weights: np.ndarray = None):
 20 |         self.weights = weights.flatten()
 21 | 
 22 |     def fit(self, x: np.ndarray, y: np.ndarray):
 23 |         raise NotImplementedError("Const linear model doesn't offer fit methodology")
 24 | 
 25 |     def predict(self, x: np.ndarray):
 26 |         return x @ self.weights
 27 | 
 28 |     def score(self, x: np.ndarray, y: np.ndarray) -> float:
 29 |         y_hat = self.predict(x)
 30 |         y_bar = y.mean()
 31 |         ssto = ((y - y_bar) ** 2).sum()
 32 |         sse = ((y - y_hat) ** 2).sum()
 33 |         return 1. - sse / ssto
 34 | 
 35 | 
 36 | class ConstLinearModel(create_model_base()):
 37 | 
 38 |     def __init__(self,
 39 |                  features=None,
 40 |                  weights: dict = None,
 41 |                  fit_target=None):
 42 |         super().__init__(features=features, fit_target=fit_target)
 43 |         if features is not None and weights is not None:
 44 |             require(len(features) == len(weights),
 45 |                     ValueError,
 46 |                     "length of features is not equal to length of weights")
 47 |         if weights:
 48 |             self.impl = ConstLinearModelImpl(np.array([weights[name] for name in self.features]))
 49 | 
 50 |     def save(self):
 51 |         model_desc = super().save()
 52 |         model_desc['weight'] = list(self.impl.weights)
 53 |         return model_desc
 54 | 
 55 |     @classmethod
 56 |     def load(cls, model_desc: dict):
 57 |         return super().load(model_desc)
 58 | 
 59 |     @property
 60 |     def weights(self):
 61 |         return self.impl.weights.tolist()
 62 | 
 63 | 
 64 | class LinearRegression(create_model_base('sklearn')):
 65 | 
 66 |     def __init__(self, features=None, fit_intercept: bool = False, fit_target=None, **kwargs):
 67 |         super().__init__(features=features, fit_target=fit_target)
 68 |         self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
 69 | 
 70 |     def save(self) -> dict:
 71 |         model_desc = super().save()
 72 |         model_desc['weight'] = self.impl.coef_.tolist()
 73 |         return model_desc
 74 | 
 75 |     @property
 76 |     def weights(self):
 77 |         return self.impl.coef_.tolist()
 78 | 
 79 | 
 80 | class LassoRegression(create_model_base('sklearn')):
 81 | 
 82 |     def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, fit_target=None,
 83 |                  **kwargs):
 84 |         super().__init__(features=features, fit_target=fit_target)
 85 |         self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
 86 | 
 87 |     def save(self) -> dict:
 88 |         model_desc = super().save()
 89 |         model_desc['weight'] = self.impl.coef_.tolist()
 90 |         return model_desc
 91 | 
 92 |     @property
 93 |     def weights(self):
 94 |         return self.impl.coef_.tolist()
 95 | 
 96 | 
 97 | class LogisticRegression(create_model_base('sklearn')):
 98 | 
 99 |     def __init__(self, features=None, fit_intercept: bool = False, fit_target=None, **kwargs):
100 |         super().__init__(features=features, fit_target=fit_target)
101 |         self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
102 | 
103 |     def save(self) -> dict:
104 |         model_desc = super().save()
105 |         model_desc['weight'] = self.impl.coef_.tolist()
106 |         return model_desc
107 | 
108 |     @property
109 |     def weights(self):
110 |         return self.impl.coef_.tolist()
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     import pprint
115 | 
116 |     ls = ConstLinearModel(['a', 'b'], np.array([0.5, 0.5]))
117 | 
118 |     x = np.array([[0.2, 0.2],
119 |                   [0.1, 0.1],
120 |                   [0.3, 0.1]])
121 | 
122 |     ls.predict(x)
123 | 
124 |     desc = ls.save()
125 |     new_model = ConstLinearModel.load(desc)
126 | 
127 |     pprint.pprint(new_model.save())
128 | 


--------------------------------------------------------------------------------
/alphamind/model/loader.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-5
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from alphamind.model.linearmodel import ConstLinearModel
 9 | from alphamind.model.linearmodel import LassoRegression
10 | from alphamind.model.linearmodel import LinearRegression
11 | from alphamind.model.linearmodel import LogisticRegression
12 | from alphamind.model.modelbase import ModelBase
13 | from alphamind.model.svm import NvSVRModel
14 | from alphamind.model.treemodel import RandomForestClassifier
15 | from alphamind.model.treemodel import RandomForestRegressor
16 | from alphamind.model.treemodel import XGBClassifier
17 | from alphamind.model.treemodel import XGBRegressor
18 | from alphamind.model.treemodel import XGBTrainer
19 | 
20 | 
21 | def load_model(model_desc: dict) -> ModelBase:
22 |     model_name = model_desc['model_name']
23 |     model_name_parts = set(model_name.split('.'))
24 | 
25 |     if 'ConstLinearModel' in model_name_parts:
26 |         return ConstLinearModel.load(model_desc)
27 |     elif 'LinearRegression' in model_name_parts:
28 |         return LinearRegression.load(model_desc)
29 |     elif 'LassoRegression' in model_name_parts:
30 |         return LassoRegression.load(model_desc)
31 |     elif 'LogisticRegression' in model_name_parts:
32 |         return LogisticRegression.load(model_desc)
33 |     elif 'RandomForestRegressor' in model_name_parts:
34 |         return RandomForestRegressor.load(model_desc)
35 |     elif 'RandomForestClassifier' in model_name_parts:
36 |         return RandomForestClassifier.load(model_desc)
37 |     elif 'XGBRegressor' in model_name_parts:
38 |         return XGBRegressor.load(model_desc)
39 |     elif 'XGBClassifier' in model_name_parts:
40 |         return XGBClassifier.load(model_desc)
41 |     elif 'XGBTrainer' in model_name_parts:
42 |         return XGBTrainer.load(model_desc)
43 |     elif 'NvSVR' in model_name_parts:
44 |         return NvSVRModel.load(model_desc)
45 |     else:
46 |         raise ValueError('{0} is not currently supported in model loader.'.format(model_name))
47 | 


--------------------------------------------------------------------------------
/alphamind/model/modelbase.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-9-4
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import abc
  9 | from distutils.version import LooseVersion
 10 | 
 11 | import arrow
 12 | import numpy as np
 13 | import pandas as pd
 14 | from simpleutils.miscellaneous import list_eq
 15 | from sklearn import __version__ as sklearn_version
 16 | from xgboost import __version__ as xgbboot_version
 17 | 
 18 | from alphamind.data.transformer import Transformer
 19 | from alphamind.utilities import alpha_logger
 20 | from alphamind.utilities import decode
 21 | from alphamind.utilities import encode
 22 | 
 23 | 
 24 | class ModelBase(metaclass=abc.ABCMeta):
 25 | 
 26 |     def __init__(self, features=None, fit_target=None):
 27 |         if features is not None:
 28 |             self.formulas = Transformer(features)
 29 |             self.features = self.formulas.names
 30 |         else:
 31 |             self.features = None
 32 | 
 33 |         if fit_target is not None:
 34 |             self.fit_target = Transformer(fit_target)
 35 |         else:
 36 |             self.fit_target = None
 37 |         self.impl = None
 38 |         self.trained_time = None
 39 | 
 40 |     def model_encode(self):
 41 |         return encode(self.impl)
 42 | 
 43 |     @classmethod
 44 |     def model_decode(cls, model_desc):
 45 |         return decode(model_desc)
 46 | 
 47 |     def __eq__(self, rhs):
 48 |         return self.model_encode() == rhs.model_encode() \
 49 |                and self.trained_time == rhs.trained_time \
 50 |                and list_eq(self.features, rhs.features) \
 51 |                and encode(self.formulas) == encode(rhs.formulas) \
 52 |                and encode(self.fit_target) == encode(rhs.fit_target)
 53 | 
 54 |     def fit(self, x: pd.DataFrame, y: np.ndarray):
 55 |         self.impl.fit(x[self.features].values, y.flatten())
 56 |         self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
 57 | 
 58 |     def predict(self, x: pd.DataFrame) -> np.ndarray:
 59 |         return self.impl.predict(x[self.features].values)
 60 | 
 61 |     def score(self, x: pd.DataFrame, y: np.ndarray) -> float:
 62 |         return self.impl.score(x[self.features].values, y)
 63 | 
 64 |     def ic(self, x: pd.DataFrame, y: np.ndarray) -> float:
 65 |         predict_y = self.impl.predict(x[self.features].values)
 66 |         return np.corrcoef(predict_y, y)[0, 1]
 67 | 
 68 |     @abc.abstractmethod
 69 |     def save(self) -> dict:
 70 | 
 71 |         if self.__class__.__module__ == '__main__':
 72 |             alpha_logger.warning(
 73 |                 "model is defined in a main module. The model_name may not be correct.")
 74 | 
 75 |         model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__,
 76 |                           language='python',
 77 |                           saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"),
 78 |                           features=list(self.features),
 79 |                           trained_time=self.trained_time,
 80 |                           desc=self.model_encode(),
 81 |                           formulas=encode(self.formulas),
 82 |                           fit_target=encode(self.fit_target),
 83 |                           internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
 84 |         return model_desc
 85 | 
 86 |     @classmethod
 87 |     @abc.abstractmethod
 88 |     def load(cls, model_desc: dict):
 89 |         obj_layout = cls()
 90 |         obj_layout.features = model_desc['features']
 91 |         obj_layout.formulas = decode(model_desc['formulas'])
 92 |         obj_layout.trained_time = model_desc['trained_time']
 93 |         obj_layout.impl = cls.model_decode(model_desc['desc'])
 94 |         if 'fit_target' in model_desc:
 95 |             obj_layout.fit_target = decode(model_desc['fit_target'])
 96 |         else:
 97 |             obj_layout.fit_target = None
 98 |         return obj_layout
 99 | 
100 | 
101 | def create_model_base(party_name=None):
102 |     if not party_name:
103 |         return ModelBase
104 |     else:
105 |         class ExternalLibBase(ModelBase):
106 |             _lib_name = party_name
107 | 
108 |             def save(self) -> dict:
109 |                 model_desc = super().save()
110 |                 if self._lib_name == 'sklearn':
111 |                     model_desc[self._lib_name + "_version"] = sklearn_version
112 |                 elif self._lib_name == 'xgboost':
113 |                     model_desc[self._lib_name + "_version"] = xgbboot_version
114 |                 else:
115 |                     raise ValueError(
116 |                         "3rd party lib name ({0}) is not recognized".format(self._lib_name))
117 |                 return model_desc
118 | 
119 |             @classmethod
120 |             def load(cls, model_desc: dict):
121 |                 obj_layout = super().load(model_desc)
122 | 
123 |                 if cls._lib_name == 'sklearn':
124 |                     current_version = sklearn_version
125 |                 elif cls._lib_name == 'xgboost':
126 |                     current_version = xgbboot_version
127 |                 else:
128 |                     raise ValueError(
129 |                         "3rd party lib name ({0}) is not recognized".format(cls._lib_name))
130 | 
131 |                 if LooseVersion(current_version) < LooseVersion(
132 |                         model_desc[cls._lib_name + "_version"]):
133 |                     alpha_logger.warning(
134 |                         'Current {2} version {0} is lower than the model version {1}. '
135 |                         'Loaded model may work incorrectly.'.format(sklearn_version,
136 |                                                                     model_desc[cls._lib_name],
137 |                                                                     cls._lib_name))
138 |                 return obj_layout
139 | 
140 |         return ExternalLibBase
141 | 


--------------------------------------------------------------------------------
/alphamind/model/svm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2018-7-9
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from sklearn.svm import NuSVR
 9 | 
10 | from alphamind.model.modelbase import create_model_base
11 | 
12 | 
13 | class NvSVRModel(create_model_base('sklearn')):
14 | 
15 |     def __init__(self,
16 |                  features=None,
17 |                  fit_target=None,
18 |                  **kwargs):
19 |         super().__init__(features=features, fit_target=fit_target)
20 |         self.impl = NuSVR(**kwargs)
21 | 


--------------------------------------------------------------------------------
/alphamind/portfolio/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-26
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/portfolio/evolver.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-11-23
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def evolve_positions(positions: np.ndarray, dx_ret: np.ndarray) -> np.ndarray:
12 |     # assume return is log return
13 | 
14 |     simple_return = np.exp(dx_ret)
15 |     evolved_positions = positions * simple_return
16 |     leverage = np.abs(positions).sum()
17 |     evolved_positions = evolved_positions * leverage / np.abs(evolved_positions).sum()
18 |     return evolved_positions
19 | 


--------------------------------------------------------------------------------
/alphamind/portfolio/linearbuilder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-5
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | from typing import Tuple
 9 | from typing import Union
10 | 
11 | import numpy as np
12 | from alphamind.portfolio.optimizers import LPOptimizer
13 | from alphamind.portfolio.optimizers import L1LPOptimizer
14 | from alphamind.exceptions.exceptions import PortfolioBuilderException
15 | 
16 | 
17 | def linear_builder(er: np.ndarray,
18 |                    lbound: Union[np.ndarray, float] = None,
19 |                    ubound: Union[np.ndarray, float] = None,
20 |                    risk_constraints: np.ndarray = None,
21 |                    risk_target: Tuple[np.ndarray, np.ndarray] = None,
22 |                    turn_over_target: float = None,
23 |                    current_position: np.ndarray = None,
24 |                    method: str = "deprecated") -> Tuple[str, np.ndarray, np.ndarray]:
25 |     er = er.flatten()
26 | 
27 |     if risk_constraints is not None:
28 |         risk_lbound = risk_target[0].reshape((-1, 1))
29 |         risk_ubound = risk_target[1].reshape((-1, 1))
30 |         cons_matrix = np.concatenate((risk_constraints.T, risk_lbound, risk_ubound), axis=1)
31 |     else:
32 |         cons_matrix = None
33 | 
34 |     if not turn_over_target or current_position is None:
35 |         prob = LPOptimizer(-er, cons_matrix, lbound, ubound)
36 | 
37 |         if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate':
38 |             return prob.status(), prob.feval(), prob.x_value()
39 |         else:
40 |             raise PortfolioBuilderException(prob.status())
41 |     elif turn_over_target:
42 |         prob = L1LPOptimizer(objective=-er,
43 |                              cons_matrix=cons_matrix,
44 |                              current_pos=current_position,
45 |                              target_turn_over=turn_over_target,
46 |                              lbound=lbound,
47 |                              ubound=ubound)
48 | 
49 |         if prob.status() == 'optimal' or prob.status() == 'optimal_inaccurate':
50 |             return prob.status(), prob.feval(), prob.x_value()
51 |         else:
52 |             raise PortfolioBuilderException(prob.status())
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     n = 5
57 |     lb = np.zeros(n)
58 |     ub = 4. / n * np.ones(n)
59 |     er = np.random.randn(n)
60 |     current_pos = np.random.randint(0, n, size=n)
61 |     current_pos = current_pos / current_pos.sum()
62 |     turn_over_target = 0.1
63 | 
64 |     cons = np.ones((n, 1))
65 |     risk_lbound = np.ones(1)
66 |     risk_ubound = np.ones(1)
67 | 
68 |     status, fvalue, x_values = linear_builder(er,
69 |                                               lb,
70 |                                               ub,
71 |                                               cons,
72 |                                               (risk_lbound, risk_ubound),
73 |                                               turn_over_target,
74 |                                               current_pos,
75 |                                               method='ecos')
76 | 
77 |     print(status)
78 |     print(fvalue)
79 |     print(x_values)
80 |     print(current_pos)
81 | 
82 |     print(np.abs(x_values - current_pos).sum())
83 | 


--------------------------------------------------------------------------------
/alphamind/portfolio/longshortbulder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-9
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import numpy as np
 9 | 
10 | from alphamind.utilities import group_mapping
11 | from alphamind.utilities import simple_abssum
12 | from alphamind.utilities import transform
13 | 
14 | 
15 | def long_short_builder(er: np.ndarray,
16 |                        leverage: float = 1.,
17 |                        groups: np.ndarray = None,
18 |                        masks: np.ndarray = None) -> np.ndarray:
19 |     er = er.copy()
20 | 
21 |     if masks is not None:
22 |         er[masks] = 0.
23 |         er[~masks] = er[~masks] - er[~masks].mean()
24 | 
25 |     if er.ndim == 1:
26 |         er = er.reshape((-1, 1))
27 | 
28 |     if groups is None:
29 |         return er / simple_abssum(er, axis=0) * leverage
30 |     else:
31 |         groups = group_mapping(groups)
32 |         return transform(groups, er, 'scale', scale=leverage)
33 | 


--------------------------------------------------------------------------------
/alphamind/portfolio/meanvariancebuilder.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-6-27
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | from typing import Dict
  9 | from typing import Optional
 10 | from typing import Tuple
 11 | from typing import Union
 12 | import numpy as np
 13 | from alphamind.portfolio.optimizers import (
 14 |     QuadraticOptimizer,
 15 |     TargetVolOptimizer
 16 | )
 17 | 
 18 | from alphamind.exceptions.exceptions import PortfolioBuilderException
 19 | 
 20 | 
 21 | def _create_bounds(lbound,
 22 |                    ubound,
 23 |                    bm,
 24 |                    risk_exposure,
 25 |                    risk_target):
 26 |     if lbound is not None:
 27 |         lbound = lbound - bm
 28 |     if ubound is not None:
 29 |         ubound = ubound - bm
 30 | 
 31 |     if risk_exposure is not None:
 32 |         cons_mat = risk_exposure.T
 33 |         bm_risk = cons_mat @ bm
 34 | 
 35 |         clbound = (risk_target[0] - bm_risk).reshape((-1, 1))
 36 |         cubound = (risk_target[1] - bm_risk).reshape((-1, 1))
 37 |     else:
 38 |         cons_mat = None
 39 |         clbound = None
 40 |         cubound = None
 41 | 
 42 |     return lbound, ubound, cons_mat, clbound, cubound
 43 | 
 44 | 
 45 | def _create_result(optimizer, bm):
 46 |     if optimizer.status() == "optimal" or optimizer.status() == "optimal_inaccurate":
 47 |         return optimizer.status(), optimizer.feval(), optimizer.x_value() + bm
 48 |     else:
 49 |         raise PortfolioBuilderException(optimizer.status())
 50 | 
 51 | 
 52 | def mean_variance_builder(er: np.ndarray,
 53 |                           risk_model: Dict[str, Union[None, np.ndarray]],
 54 |                           bm: np.ndarray,
 55 |                           lbound: Union[np.ndarray, float, None],
 56 |                           ubound: Union[np.ndarray, float, None],
 57 |                           risk_exposure: Optional[np.ndarray],
 58 |                           risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
 59 |                           lam: float = 1.,
 60 |                           linear_solver: str = 'deprecated') -> Tuple[str, float, np.ndarray]:
 61 |     lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure,
 62 |                                                                 risk_target)
 63 |     if cons_mat is not None:
 64 |         cons_matrix = np.concatenate([cons_mat, clbound, cubound], axis=1)
 65 |     else:
 66 |         cons_matrix = None
 67 | 
 68 |     cov = risk_model['cov']
 69 |     special_risk = risk_model['idsync']
 70 |     risk_cov = risk_model['factor_cov']
 71 |     risk_exposure = risk_model['factor_loading']
 72 | 
 73 |     prob = QuadraticOptimizer(objective=-er,
 74 |                               cons_matrix=cons_matrix,
 75 |                               lbound=lbound,
 76 |                               ubound=ubound,
 77 |                               penalty=lam,
 78 |                               cov=cov,
 79 |                               factor_cov=risk_cov,
 80 |                               factor_load=risk_exposure,
 81 |                               factor_special=special_risk)
 82 | 
 83 |     if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate':
 84 |         return prob.status(), prob.feval(), prob.x_value() + bm
 85 |     else:
 86 |         raise PortfolioBuilderException(prob.status())
 87 | 
 88 | 
 89 | def target_vol_builder(er: np.ndarray,
 90 |                        risk_model: Dict[str, Union[None, np.ndarray]],
 91 |                        bm: np.ndarray,
 92 |                        lbound: Union[np.ndarray, float],
 93 |                        ubound: Union[np.ndarray, float],
 94 |                        risk_exposure: Optional[np.ndarray],
 95 |                        risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
 96 |                        vol_target: float = 1.,
 97 |                        linear_solver: str = 'ma27') -> Tuple[str, float, np.ndarray]:
 98 |     lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure,
 99 |                                                                 risk_target)
100 | 
101 |     if cons_mat is not None:
102 |         cons_matrix = np.concatenate([cons_mat, clbound, cubound], axis=1)
103 |     else:
104 |         cons_matrix = None
105 | 
106 |     cov = risk_model['cov']
107 |     special_risk = risk_model['idsync']
108 |     risk_cov = risk_model['factor_cov']
109 |     risk_exposure = risk_model['factor_loading']
110 | 
111 |     prob = TargetVolOptimizer(objective=-er,
112 |                               cons_matrix=cons_matrix,
113 |                               lbound=lbound,
114 |                               ubound=ubound,
115 |                               target_vol=vol_target,
116 |                               factor_cov=risk_cov,
117 |                               factor_load=risk_exposure,
118 |                               factor_special=special_risk,
119 |                               cov=cov)
120 |     if prob.status() == "optimal" or prob.status() == 'optimal_inaccurate':
121 |         return prob.status(), prob.feval(), prob.x_value() + bm
122 |     else:
123 |         raise PortfolioBuilderException(prob.status())
124 | 


--------------------------------------------------------------------------------
/alphamind/portfolio/percentbuilder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-4
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import numpy as np
 9 | from numpy import zeros
10 | from numpy import zeros_like
11 | 
12 | from alphamind.utilities import groupby
13 | from alphamind.utilities import set_value
14 | 
15 | 
16 | def percent_build(er: np.ndarray, percent: float, groups: np.ndarray = None,
17 |                   masks: np.ndarray = None) -> np.ndarray:
18 |     er = er.copy()
19 | 
20 |     if masks is not None:
21 |         er[~masks] = -np.inf
22 | 
23 |     if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1):
24 |         # fast path methods for single column er
25 |         neg_er = -er.flatten()
26 |         length = len(neg_er)
27 |         weights = zeros((length, 1))
28 |         if groups is not None:
29 |             index_diff, order = groupby(groups)
30 |             start = 0
31 |             for diff_loc in index_diff:
32 |                 current_index = order[start:diff_loc + 1]
33 |                 current_ordering = neg_er[current_index].argsort()
34 |                 current_ordering.shape = -1, 1
35 |                 use_rank = int(percent * len(current_index))
36 |                 set_value(weights, current_index[current_ordering[:use_rank]], 1.)
37 |                 start = diff_loc + 1
38 |         else:
39 |             ordering = neg_er.argsort()
40 |             use_rank = int(percent * len(neg_er))
41 |             weights[ordering[:use_rank]] = 1.
42 |         return weights.reshape(er.shape)
43 |     else:
44 |         neg_er = -er
45 |         weights = zeros_like(er)
46 | 
47 |         if groups is not None:
48 |             index_diff, order = groupby(groups)
49 |             start = 0
50 |             for diff_loc in index_diff:
51 |                 current_index = order[start:diff_loc + 1]
52 |                 current_ordering = neg_er[current_index].argsort(axis=0)
53 |                 use_rank = int(percent * len(current_index))
54 |                 set_value(weights, current_index[current_ordering[:use_rank]], 1)
55 |                 start = diff_loc + 1
56 |         else:
57 |             ordering = neg_er.argsort(axis=0)
58 |             use_rank = int(percent * len(neg_er))
59 |             set_value(weights, ordering[:use_rank], 1.)
60 |         return weights
61 | 


--------------------------------------------------------------------------------
/alphamind/portfolio/rankbuilder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-26
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import numpy as np
 9 | from numpy import zeros
10 | from numpy import zeros_like
11 | 
12 | from alphamind.utilities import groupby
13 | from alphamind.utilities import set_value
14 | 
15 | 
16 | def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray = None,
17 |                masks: np.ndarray = None) -> np.ndarray:
18 |     er = er.copy()
19 | 
20 |     if masks is not None:
21 |         er[~masks] = -np.inf
22 | 
23 |     if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1):
24 |         # fast path methods for single column er
25 |         neg_er = -er.flatten()
26 |         length = len(neg_er)
27 |         weights = zeros((length, 1))
28 |         if groups is not None:
29 |             index_diff, order = groupby(groups)
30 |             start = 0
31 |             for diff_loc in index_diff:
32 |                 current_index = order[start:diff_loc + 1]
33 |                 current_ordering = neg_er[current_index].argsort()
34 |                 current_ordering.shape = -1, 1
35 |                 set_value(weights, current_index[current_ordering[:use_rank]], 1.)
36 |                 start = diff_loc + 1
37 |         else:
38 |             ordering = neg_er.argsort()
39 |             weights[ordering[:use_rank]] = 1.
40 |         return weights.reshape(er.shape)
41 |     else:
42 |         neg_er = -er
43 |         weights = zeros_like(er)
44 | 
45 |         if groups is not None:
46 |             index_diff, order = groupby(groups)
47 |             start = 0
48 |             for diff_loc in index_diff:
49 |                 current_index = order[start:diff_loc + 1]
50 |                 current_ordering = neg_er[current_index].argsort(axis=0)
51 |                 set_value(weights, current_index[current_ordering[:use_rank]], 1)
52 |                 start = diff_loc + 1
53 |         else:
54 |             ordering = neg_er.argsort(axis=0)
55 |             set_value(weights, ordering[:use_rank], 1.)
56 |         return weights
57 | 


--------------------------------------------------------------------------------
/alphamind/portfolio/riskmodel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2018-5-29
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import abc
 9 | from typing import List
10 | 
11 | import pandas as pd
12 | 
13 | 
14 | class RiskModel(metaclass=abc.ABCMeta):
15 | 
16 |     def get_risk_profile(self):
17 |         pass
18 | 
19 | 
20 | class FullRiskModel(RiskModel):
21 | 
22 |     def __init__(self, sec_cov: pd.DataFrame):
23 |         self.codes = sec_cov.index.tolist()
24 |         self.sec_cov = sec_cov.loc[self.codes, self.codes]
25 | 
26 |     def get_cov(self, codes: List[int] = None):
27 |         if codes:
28 |             return self.sec_cov.loc[codes, codes].values
29 |         else:
30 |             return self.sec_cov.values
31 | 
32 |     def get_risk_profile(self, codes: List[int] = None):
33 |         return dict(
34 |             cov=self.get_cov(codes),
35 |             factor_cov=None,
36 |             factor_loading=None,
37 |             idsync=None
38 |         )
39 | 
40 | 
41 | class FactorRiskModel(RiskModel):
42 | 
43 |     def __init__(self,
44 |                  factor_cov: pd.DataFrame,
45 |                  risk_exp: pd.DataFrame,
46 |                  idsync: pd.Series):
47 |         self.factor_cov = factor_cov
48 |         self.idsync = idsync
49 |         self.codes = self.idsync.index.tolist()
50 |         self.factor_names = sorted(self.factor_cov.index)
51 |         self.risk_exp = risk_exp.loc[self.codes, self.factor_names]
52 |         self.factor_cov = self.factor_cov.loc[self.factor_names, self.factor_names]
53 |         self.idsync = self.idsync[self.codes]
54 | 
55 |     def get_risk_exp(self, codes: List[int] = None):
56 |         if codes:
57 |             return self.risk_exp.loc[codes, :].values
58 |         else:
59 |             return self.risk_exp.values
60 | 
61 |     def get_factor_cov(self):
62 |         return self.factor_cov.values
63 | 
64 |     def get_idsync(self, codes: List[int] = None):
65 |         if codes:
66 |             return self.idsync[codes].values
67 |         else:
68 |             return self.idsync.values
69 | 
70 |     def get_risk_profile(self, codes: List[int] = None):
71 |         return dict(
72 |             cov=None,
73 |             factor_cov=self.get_factor_cov(),
74 |             factor_loading=self.get_risk_exp(codes),
75 |             idsync=self.get_idsync(codes)
76 |         )
77 | 


--------------------------------------------------------------------------------
/alphamind/settlement/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/settlement/simplesettle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-28
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | 
12 | def simple_settle(weights: np.ndarray,
13 |                   dx_return: np.ndarray,
14 |                   groups: np.ndarray = None,
15 |                   benchmark: np.ndarray = None) -> pd.DataFrame:
16 |     weights = weights.flatten()
17 |     dx_return = dx_return.flatten()
18 | 
19 |     if benchmark is not None:
20 |         net_pos = weights - benchmark
21 |     else:
22 |         net_pos = weights
23 | 
24 |     ret_arr = net_pos * dx_return
25 | 
26 |     if groups is not None:
27 |         ret_agg = pd.Series(ret_arr).groupby(groups).sum()
28 |         ret_agg.loc['total'] = ret_agg.sum()
29 |     else:
30 |         ret_agg = pd.Series(ret_arr.sum(), index=['total'])
31 | 
32 |     ret_agg.index.name = 'industry'
33 |     ret_agg.name = 'er'
34 | 
35 |     pos_table = pd.DataFrame(net_pos, columns=['weight'])
36 |     pos_table['ret'] = dx_return
37 | 
38 |     if groups is not None:
39 |         ic_table = pos_table.groupby(groups).corr()['ret'].loc[(slice(None), 'weight')]
40 |         ic_table.loc['total'] = pos_table.corr().iloc[0, 1]
41 |     else:
42 |         ic_table = pd.Series(pos_table.corr().iloc[0, 1], index=['total'])
43 | 
44 |     return pd.DataFrame({'er': ret_agg.values,
45 |                          'ic': ic_table.values},
46 |                         index=ret_agg.index)
47 | 


--------------------------------------------------------------------------------
/alphamind/strategy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/strategy/__init__.py


--------------------------------------------------------------------------------
/alphamind/strategy/sample_strategy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "strategy_name": "sample_strategy",
 3 |   "data_process": {
 4 |     "pre_process": [
 5 |       "winsorize",
 6 |       "standardize"
 7 |     ],
 8 |     "neutralize_risk": [
 9 |       "SIZE",
10 |       "industry_styles"
11 |     ],
12 |     "post_process": [
13 |       "winsorize",
14 |       "standardize"
15 |     ]
16 |   },
17 |   "risk_model": {
18 |     "type": "short",
19 |     "neutralize_risk": [
20 |       "SIZE",
21 |       "industry_styles"
22 |     ]
23 |   },
24 |   "alpha_model": {
25 |     "model_type": "LinearRegression",
26 |     "features": [
27 |       "EPS",
28 |       "ROEDiluted"
29 |     ],
30 |     "parameters": {
31 |       "fit_intercept": false
32 |     }
33 |   },
34 |   "freq": "1d",
35 |   "batch": 4,
36 |   "warm_start": 0,
37 |   "universe": [
38 |     "zz500",
39 |     [
40 |       "zz500"
41 |     ]
42 |   ],
43 |   "benchmark": 905,
44 |   "optimizer": {
45 |     "type": "risk_neutral",
46 |     "neutralize_risk": [
47 |       "SIZE",
48 |       "industry_styles"
49 |     ]
50 |   },
51 |   "executor": {
52 |     "type": "naive"
53 |   }
54 | }


--------------------------------------------------------------------------------
/alphamind/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-25
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/tests/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-5-8
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/tests/analysis/test_factoranalysis.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-5-25
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import unittest
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | from alphamind.analysis.factoranalysis import factor_analysis
 14 | from alphamind.data.neutralize import neutralize
 15 | from alphamind.data.processing import factor_processing
 16 | from alphamind.data.standardize import standardize
 17 | from alphamind.data.winsorize import winsorize_normal
 18 | from alphamind.portfolio.constraints import (
 19 |     Constraints,
 20 | )
 21 | 
 22 | 
 23 | class TestFactorAnalysis(unittest.TestCase):
 24 |     def setUp(self):
 25 |         self.raw_factor = np.random.randn(1000, 1)
 26 |         self.risk_factor = np.random.randn(1000, 3)
 27 |         self.d1returns = np.random.randn(1000, 1)
 28 | 
 29 |     def test_factor_processing(self):
 30 |         new_factor = factor_processing(self.raw_factor)
 31 |         np.testing.assert_array_almost_equal(new_factor, self.raw_factor)
 32 | 
 33 |         new_factor = factor_processing(self.raw_factor,
 34 |                                        pre_process=[standardize, winsorize_normal])
 35 | 
 36 |         np.testing.assert_array_almost_equal(new_factor,
 37 |                                              winsorize_normal(standardize(self.raw_factor)))
 38 | 
 39 |         new_factor = factor_processing(self.raw_factor,
 40 |                                        pre_process=[standardize, winsorize_normal],
 41 |                                        risk_factors=self.risk_factor)
 42 | 
 43 |         np.testing.assert_array_almost_equal(new_factor, neutralize(self.risk_factor,
 44 |                                                                     winsorize_normal(standardize(
 45 |                                                                         self.raw_factor))))
 46 | 
 47 |     def test_factor_analysis(self):
 48 |         benchmark = np.random.randint(50, size=1000)
 49 |         benchmark = benchmark / benchmark.sum()
 50 |         industry = np.random.randint(30, size=1000)
 51 | 
 52 |         factor_df = pd.DataFrame(self.raw_factor.flatten(), index=range(len(self.raw_factor)))
 53 |         factor_weights = np.array([1.])
 54 | 
 55 |         constraints = Constraints()
 56 |         names = np.array(['a', 'b', 'c'])
 57 |         constraints.add_exposure(names, self.risk_factor)
 58 |         targets = self.risk_factor.T @ benchmark
 59 |         for i, name in enumerate(names):
 60 |             constraints.set_constraints(name, targets[i], targets[i])
 61 | 
 62 |         weight_table, analysis_table = factor_analysis(factor_df,
 63 |                                                        factor_weights,
 64 |                                                        d1returns=self.d1returns,
 65 |                                                        industry=industry,
 66 |                                                        benchmark=benchmark,
 67 |                                                        risk_exp=self.risk_factor,
 68 |                                                        constraints=constraints)
 69 | 
 70 |         weight = weight_table.weight
 71 | 
 72 |         self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
 73 |         np.testing.assert_array_almost_equal(weight @ self.risk_factor,
 74 |                                              benchmark @ self.risk_factor)
 75 |         self.assertTrue(weight @ factor_df.values > benchmark @ factor_df.values)
 76 | 
 77 |     def test_factor_analysis_with_several_factors(self):
 78 |         benchmark = np.random.randint(50, size=1000)
 79 |         benchmark = benchmark / benchmark.sum()
 80 |         industry = np.random.randint(30, size=1000)
 81 | 
 82 |         factor_df = pd.DataFrame(np.random.randn(1000, 2), index=range(len(self.raw_factor)))
 83 |         factor_weights = np.array([0.2, 0.8])
 84 | 
 85 |         constraints = Constraints()
 86 |         names = np.array(['a', 'b', 'c'])
 87 |         constraints.add_exposure(names, self.risk_factor)
 88 |         targets = self.risk_factor.T @ benchmark
 89 |         for i, name in enumerate(names):
 90 |             constraints.set_constraints(name, targets[i], targets[i])
 91 | 
 92 |         weight_table, analysis_table = factor_analysis(factor_df,
 93 |                                                        factor_weights,
 94 |                                                        d1returns=self.d1returns,
 95 |                                                        industry=industry,
 96 |                                                        benchmark=benchmark,
 97 |                                                        risk_exp=self.risk_factor,
 98 |                                                        constraints=constraints)
 99 | 
100 |         weight = weight_table.weight
101 |         self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
102 |         np.testing.assert_array_almost_equal(weight @ self.risk_factor,
103 |                                              benchmark @ self.risk_factor)
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     unittest.main()
108 | 


--------------------------------------------------------------------------------
/alphamind/tests/analysis/test_perfanalysis.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-12
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.analysis.perfanalysis import perf_attribution_by_pos
14 | 
15 | 
16 | class TestPerformanceAnalysis(unittest.TestCase):
17 | 
18 |     @classmethod
19 |     def test_perf_attribution_by_pos(cls):
20 |         n_samples = 36000
21 |         n_dates = 20
22 |         n_risk_factors = 35
23 | 
24 |         dates = np.sort(np.random.randint(n_dates, size=n_samples))
25 |         weights_series = pd.Series(data=np.random.randn(n_samples), index=dates)
26 |         bm_series = pd.Series(data=np.random.randn(n_samples), index=dates)
27 |         next_bar_return_series = pd.Series(data=np.random.randn(n_samples), index=dates)
28 |         risk_table = pd.DataFrame(data=np.random.randn(n_samples, n_risk_factors),
29 |                                   columns=list(range(n_risk_factors)),
30 |                                   index=dates)
31 | 
32 |         explained_table = perf_attribution_by_pos(weights_series - bm_series,
33 |                                                   next_bar_return_series,
34 |                                                   risk_table)
35 | 
36 |         to_explain = (weights_series - bm_series).multiply(next_bar_return_series, axis=0)
37 |         aggregated_to_explain = pd.Series(to_explain).groupby(dates).sum()
38 |         aggregated_explained = explained_table.sum(axis=1)
39 | 
40 |         np.testing.assert_array_almost_equal(aggregated_to_explain.values,
41 |                                              aggregated_explained.values)
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     unittest.main()
46 | 


--------------------------------------------------------------------------------
/alphamind/tests/analysis/test_quantilieanalysis.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-8-16
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.analysis.quantileanalysis import er_quantile_analysis
14 | from alphamind.analysis.quantileanalysis import quantile_analysis
15 | from alphamind.data.processing import factor_processing
16 | from alphamind.data.quantile import quantile
17 | from alphamind.data.standardize import standardize
18 | from alphamind.data.winsorize import winsorize_normal
19 | 
20 | 
21 | class TestQuantileAnalysis(unittest.TestCase):
22 |     def setUp(self):
23 |         n = 5000
24 |         n_f = 5
25 | 
26 |         self.x = np.random.randn(n, 5)
27 |         self.x_w = np.random.randn(n_f)
28 |         self.r = np.random.randn(n)
29 |         self.b_w = np.random.randint(0, 10, n)
30 |         self.b_w = self.b_w / float(self.b_w.sum())
31 |         self.risk_exp = np.random.randn(n, 3)
32 |         self.n_bins = 10
33 | 
34 |     def test_q_anl_impl(self):
35 |         n_bins = 5
36 | 
37 |         x = self.x[:, 0]
38 |         q_groups = quantile(x, n_bins)
39 | 
40 |         s = pd.Series(self.r, index=q_groups)
41 |         grouped_return = s.groupby(level=0).mean().values.flatten()
42 | 
43 |         expected_res = grouped_return.copy()
44 |         res = n_bins - 1
45 |         res_weight = 1. / res
46 | 
47 |         for i, value in enumerate(expected_res):
48 |             expected_res[i] = (1. + res_weight) * value - res_weight * grouped_return.sum()
49 | 
50 |         calculated_res = er_quantile_analysis(x, n_bins, self.r, de_trend=True)
51 | 
52 |         np.testing.assert_array_almost_equal(expected_res, calculated_res)
53 | 
54 |     def test_quantile_analysis_simple(self):
55 |         f_df = pd.DataFrame(self.x)
56 |         calculated = quantile_analysis(f_df,
57 |                                        self.x_w,
58 |                                        self.r,
59 |                                        n_bins=self.n_bins,
60 |                                        pre_process=[],
61 |                                        post_process=[])
62 | 
63 |         er = self.x_w @ self.x.T
64 |         expected = er_quantile_analysis(er, self.n_bins, self.r)
65 |         np.testing.assert_array_almost_equal(calculated, expected)
66 | 
67 |     def test_quantile_analysis_with_factor_processing(self):
68 |         f_df = pd.DataFrame(self.x)
69 |         calculated = quantile_analysis(f_df,
70 |                                        self.x_w,
71 |                                        self.r,
72 |                                        n_bins=self.n_bins,
73 |                                        risk_exp=self.risk_exp,
74 |                                        pre_process=[winsorize_normal, standardize],
75 |                                        post_process=[standardize])
76 | 
77 |         er = self.x_w @ factor_processing(self.x,
78 |                                           [winsorize_normal, standardize],
79 |                                           self.risk_exp,
80 |                                           [standardize]).T
81 |         expected = er_quantile_analysis(er, self.n_bins, self.r)
82 |         np.testing.assert_array_almost_equal(calculated, expected)
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/alphamind/tests/analysis/test_riskanalysis.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-8
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.analysis.riskanalysis import risk_analysis
14 | 
15 | 
16 | class TestRiskAnalysis(unittest.TestCase):
17 | 
18 |     @staticmethod
19 |     def test_risk_analysis():
20 |         n_samples = 36000
21 |         n_dates = 20
22 |         n_risk_factors = 35
23 | 
24 |         dates = np.sort(np.random.randint(n_dates, size=n_samples))
25 |         weights_series = pd.Series(data=np.random.randn(n_samples), index=dates)
26 |         bm_series = pd.Series(data=np.random.randn(n_samples), index=dates)
27 |         next_bar_return_series = pd.Series(data=np.random.randn(n_samples), index=dates)
28 |         risk_table = pd.DataFrame(data=np.random.randn(n_samples, n_risk_factors),
29 |                                   columns=list(range(n_risk_factors)),
30 |                                   index=dates)
31 | 
32 |         explained_table, _ = risk_analysis(weights_series - bm_series,
33 |                                            next_bar_return_series,
34 |                                            risk_table)
35 | 
36 |         to_explain = (weights_series - bm_series).multiply(next_bar_return_series, axis=0)
37 |         aggregated = explained_table.sum(axis=1)
38 | 
39 |         np.testing.assert_array_almost_equal(to_explain.values, aggregated.values)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------
/alphamind/tests/cython/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/cython/__init__.py


--------------------------------------------------------------------------------
/alphamind/tests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/data/__init__.py


--------------------------------------------------------------------------------
/alphamind/tests/data/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/data/engines/__init__.py


--------------------------------------------------------------------------------
/alphamind/tests/data/engines/test_universe.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2018-2-9
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | from alphamind.data.engines.universe import Universe
11 | from alphamind.data.engines.universe import load_universe
12 | 
13 | 
14 | class TestUniverse(unittest.TestCase):
15 | 
16 |     def test_universe_equal(self):
17 |         universe1 = Universe('zz500')
18 |         universe2 = Universe('zz500')
19 |         self.assertEqual(universe1, universe2)
20 | 
21 |         universe1 = Universe('zz500')
22 |         universe2 = Universe('zz800')
23 |         self.assertNotEqual(universe1, universe2)
24 | 
25 |     def test_universe_persistence(self):
26 |         universe = Universe('zz500')
27 |         univ_desc = universe.save()
28 |         loaded_universe = load_universe(univ_desc)
29 |         self.assertEqual(universe, loaded_universe)
30 | 
31 |     def test_universe_arithmic(self):
32 |         universe = Universe('zz500') + Universe('hs300')
33 |         univ_desc = universe.save()
34 |         loaded_universe = load_universe(univ_desc)
35 |         self.assertEqual(universe, loaded_universe)
36 | 
37 |         universe = Universe('zz500') - Universe('hs300')
38 |         univ_desc = universe.save()
39 |         loaded_universe = load_universe(univ_desc)
40 |         self.assertEqual(universe, loaded_universe)
41 | 
42 |         universe = Universe('zz500') & Universe('hs300')
43 |         univ_desc = universe.save()
44 |         loaded_universe = load_universe(univ_desc)
45 |         self.assertEqual(universe, loaded_universe)
46 | 


--------------------------------------------------------------------------------
/alphamind/tests/data/test_neutralize.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-4-25
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import unittest
  9 | 
 10 | import numpy as np
 11 | from sklearn.linear_model import LinearRegression
 12 | 
 13 | from alphamind.data.neutralize import neutralize
 14 | 
 15 | 
 16 | class TestNeutralize(unittest.TestCase):
 17 | 
 18 |     def setUp(self):
 19 |         self.y = np.random.randn(3000, 4)
 20 |         self.x = np.random.randn(3000, 10)
 21 |         self.groups = np.random.randint(30, size=3000)
 22 | 
 23 |     def test_neutralize(self):
 24 |         calc_res = neutralize(self.x, self.y)
 25 | 
 26 |         model = LinearRegression(fit_intercept=False)
 27 |         model.fit(self.x, self.y)
 28 | 
 29 |         exp_res = self.y - self.x @ model.coef_.T
 30 | 
 31 |         np.testing.assert_array_almost_equal(calc_res, exp_res)
 32 | 
 33 |     def test_neutralize_with_group(self):
 34 | 
 35 |         calc_res = neutralize(self.x, self.y, self.groups)
 36 | 
 37 |         model = LinearRegression(fit_intercept=False)
 38 |         for i in range(30):
 39 |             curr_x = self.x[self.groups == i]
 40 |             curr_y = self.y[self.groups == i]
 41 |             model.fit(curr_x, curr_y)
 42 |             exp_res = curr_y - curr_x @ model.coef_.T
 43 |             np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res)
 44 | 
 45 |     def test_neutralize_explain_output(self):
 46 |         y = self.y[:, 0].flatten()
 47 | 
 48 |         calc_res, other_stats = neutralize(self.x, y, detail=True)
 49 | 
 50 |         model = LinearRegression(fit_intercept=False)
 51 |         model.fit(self.x, y)
 52 | 
 53 |         exp_res = y - self.x @ model.coef_.T
 54 |         exp_explained = self.x * model.coef_.T
 55 | 
 56 |         np.testing.assert_array_almost_equal(calc_res, exp_res.reshape(-1, 1))
 57 |         np.testing.assert_array_almost_equal(other_stats['explained'][:, :, 0], exp_explained)
 58 | 
 59 |         calc_res, other_stats = neutralize(self.x, self.y, detail=True)
 60 | 
 61 |         model = LinearRegression(fit_intercept=False)
 62 |         model.fit(self.x, self.y)
 63 | 
 64 |         exp_res = self.y - self.x @ model.coef_.T
 65 |         np.testing.assert_array_almost_equal(calc_res, exp_res)
 66 | 
 67 |         for i in range(self.y.shape[1]):
 68 |             exp_explained = self.x * model.coef_.T[:, i]
 69 |             np.testing.assert_array_almost_equal(other_stats['explained'][:, :, i], exp_explained)
 70 | 
 71 |     def test_neutralize_explain_output_with_group(self):
 72 |         y = self.y[:, 0].flatten()
 73 | 
 74 |         calc_res, other_stats = neutralize(self.x, y, self.groups, detail=True)
 75 | 
 76 |         model = LinearRegression(fit_intercept=False)
 77 |         for i in range(30):
 78 |             curr_x = self.x[self.groups == i]
 79 |             curr_y = y[self.groups == i]
 80 |             model.fit(curr_x, curr_y)
 81 |             exp_res = curr_y - curr_x @ model.coef_.T
 82 |             exp_explained = curr_x * model.coef_.T
 83 |             np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1))
 84 |             np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0],
 85 |                                                  exp_explained)
 86 | 
 87 |         calc_res, other_stats = neutralize(self.x, self.y, self.groups, detail=True)
 88 | 
 89 |         model = LinearRegression(fit_intercept=False)
 90 |         for i in range(30):
 91 |             curr_x = self.x[self.groups == i]
 92 |             curr_y = self.y[self.groups == i]
 93 |             model.fit(curr_x, curr_y)
 94 |             exp_res = curr_y - curr_x @ model.coef_.T
 95 |             np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res)
 96 | 
 97 |             for j in range(self.y.shape[1]):
 98 |                 exp_explained = curr_x * model.coef_.T[:, j]
 99 |                 np.testing.assert_array_almost_equal(
100 |                     other_stats['explained'][self.groups == i, :, j], exp_explained)
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     unittest.main()
105 | 


--------------------------------------------------------------------------------
/alphamind/tests/data/test_quantile.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-8-16
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | 
12 | from alphamind.data.quantile import quantile
13 | 
14 | 
15 | class TestQuantile(unittest.TestCase):
16 | 
17 |     def test_quantile(self):
18 |         n = 5000
19 |         bins = 10
20 |         s = np.random.randn(n)
21 |         calculated = quantile(s, bins)
22 | 
23 |         rank = s.argsort().argsort()
24 | 
25 |         bin_size = float(n) / bins
26 |         pillars = [int(i * bin_size) for i in range(1, bins + 1)]
27 | 
28 |         starter = 0
29 |         for i, r in enumerate(pillars):
30 |             self.assertTrue(np.all(calculated[(rank >= starter) & (rank < r)] == i))
31 |             starter = r
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     unittest.main()
36 | 


--------------------------------------------------------------------------------
/alphamind/tests/data/test_rank.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-8-8
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.data.rank import rank
14 | 
15 | 
16 | class TestRank(unittest.TestCase):
17 | 
18 |     def setUp(self):
19 |         self.x = np.random.randn(1000, 1)
20 |         self.groups = np.random.randint(0, 10, 1000)
21 | 
22 |     def test_rank(self):
23 |         data_rank = rank(self.x)
24 | 
25 |         sorted_array = np.zeros_like(self.x)
26 |         for i in range(self.x.shape[0]):
27 |             for j in range(self.x.shape[1]):
28 |                 sorted_array[int(data_rank[i, j]), j] = self.x[i, j]
29 | 
30 |         arr_diff = np.diff(sorted_array, axis=0)
31 |         np.testing.assert_array_less(0, arr_diff)
32 | 
33 |     def test_rank_with_groups(self):
34 |         data = pd.DataFrame(data={'raw': self.x.tolist()}, index=self.groups)
35 |         data['rank'] = rank(data['raw'].values, groups=data.index)
36 |         groups = dict(list(data['rank'].groupby(level=0)))
37 |         ret = []
38 |         for index in range(10):
39 |             ret.append(groups[index].values)
40 |         ret = np.concatenate(ret).reshape(-1, 1)
41 | 
42 |         expected_rank = data['raw'].groupby(level=0).apply(
43 |             lambda x: x.values.argsort(axis=0).argsort(axis=0))
44 |         expected_rank = np.concatenate(expected_rank).reshape(-1, 1)
45 |         np.testing.assert_array_equal(ret, expected_rank)
46 | 


--------------------------------------------------------------------------------
/alphamind/tests/data/test_standardize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | from scipy.stats import zscore
13 | 
14 | from alphamind.data.standardize import Standardizer
15 | from alphamind.data.standardize import projection
16 | from alphamind.data.standardize import standardize
17 | 
18 | 
19 | class TestStandardize(unittest.TestCase):
20 | 
21 |     def setUp(self):
22 |         self.x = np.random.randn(3000, 10)
23 |         self.groups = np.random.randint(10, 30, size=3000)
24 | 
25 |     def test_standardize(self):
26 |         calc_zscore = standardize(self.x)
27 |         exp_zscore = zscore(self.x, ddof=1)
28 | 
29 |         np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
30 | 
31 |     def test_projection(self):
32 |         calc_projected = projection(self.x)
33 |         exp_projected = self.x / np.sqrt(np.sum(np.square(self.x), axis=1).reshape((-1, 1)))
34 | 
35 |         np.testing.assert_array_almost_equal(calc_projected, exp_projected)
36 | 
37 |     def test_projection_with_groups(self):
38 |         calc_projected = projection(self.x, self.groups, axis=0)
39 |         exp_projected = pd.DataFrame(self.x).groupby(
40 |             self.groups
41 |         ).transform(lambda s: s / np.sqrt(np.square(s).sum(axis=0)))
42 | 
43 |         np.testing.assert_array_almost_equal(calc_projected, exp_projected)
44 | 
45 |     def test_standardize_with_group(self):
46 |         calc_zscore = standardize(self.x, self.groups)
47 |         exp_zscore = pd.DataFrame(self.x). \
48 |             groupby(self.groups). \
49 |             transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1))
50 |         np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
51 | 
52 |     def test_standardizer(self):
53 |         s = Standardizer()
54 |         s.fit(self.x)
55 |         calc_zscore = s.transform(self.x)
56 | 
57 |         exp_zscore = standardize(self.x)
58 |         np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
59 |         np.testing.assert_array_almost_equal(s(self.x), exp_zscore)
60 | 
61 |     def test_grouped_standardizer(self):
62 |         s = Standardizer()
63 |         s.fit(self.x, self.groups)
64 |         calc_zscore = s.transform(self.x, self.groups)
65 | 
66 |         exp_zscore = standardize(self.x, self.groups)
67 |         np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
68 |         np.testing.assert_array_almost_equal(s(self.x, self.groups), exp_zscore)
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     import datetime as dt
73 |     from sklearn.preprocessing import StandardScaler
74 | 
75 |     x = np.random.randn(1000, 2)
76 |     y = np.random.randn(50, 2)
77 | 
78 |     start = dt.datetime.now()
79 |     for i in range(10000):
80 |         s1 = StandardScaler()
81 |         s1.fit(x)
82 |         x1 = s1.transform(y)
83 |     print(dt.datetime.now() - start)
84 | 
85 |     start = dt.datetime.now()
86 |     for i in range(10000):
87 |         s2 = Standardizer(ddof=0)
88 |         s2.fit(x)
89 |         x2 = s2.transform(y)
90 |     print(dt.datetime.now() - start)
91 | 
92 |     np.testing.assert_array_almost_equal(x1, x2)
93 | 


--------------------------------------------------------------------------------
/alphamind/tests/data/test_winsorize.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-4-25
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import unittest
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | from alphamind.data.winsorize import NormalWinsorizer
 14 | from alphamind.data.winsorize import winsorize_normal
 15 | 
 16 | 
 17 | class TestWinsorize(unittest.TestCase):
 18 | 
 19 |     def setUp(self):
 20 |         np.random.seed(10)
 21 |         self.x = np.random.randn(3000, 10)
 22 |         self.groups = np.random.randint(10, 30, size=3000)
 23 |         self.num_stds = 2
 24 | 
 25 |     def test_winsorize_normal(self):
 26 |         calc_winsorized = winsorize_normal(self.x, self.num_stds)
 27 | 
 28 |         std_values = self.x.std(axis=0, ddof=1)
 29 |         mean_value = self.x.mean(axis=0)
 30 | 
 31 |         lower_bound = mean_value - self.num_stds * std_values
 32 |         upper_bound = mean_value + self.num_stds * std_values
 33 | 
 34 |         for i in range(np.size(calc_winsorized, 1)):
 35 |             col_data = self.x[:, i]
 36 |             col_data[col_data > upper_bound[i]] = upper_bound[i]
 37 |             col_data[col_data < lower_bound[i]] = lower_bound[i]
 38 | 
 39 |             calculated_col = calc_winsorized[:, i]
 40 |             np.testing.assert_array_almost_equal(col_data, calculated_col)
 41 | 
 42 |     def test_winsorize_normal_with_interp(self):
 43 |         calc_winsorized = winsorize_normal(self.x, self.num_stds, method='interp')
 44 | 
 45 |         std_values = self.x.std(axis=0, ddof=1)
 46 |         mean_value = self.x.mean(axis=0)
 47 | 
 48 |         lower_bound = mean_value - self.num_stds * std_values
 49 |         upper_bound = mean_value + self.num_stds * std_values
 50 | 
 51 |         for i in range(np.size(calc_winsorized, 1)):
 52 |             col_data = self.x[:, i].copy()
 53 | 
 54 |             idx = col_data > upper_bound[i]
 55 |             u_values = col_data[idx]
 56 |             q_values = u_values.argsort().argsort()
 57 |             if len(q_values) > 0:
 58 |                 col_data[idx] = upper_bound[i] + q_values / len(q_values) * 0.5 * std_values[i]
 59 | 
 60 |             idx = col_data < lower_bound[i]
 61 |             l_values = col_data[idx]
 62 |             q_values = (-l_values).argsort().argsort()
 63 |             if len(q_values) > 0:
 64 |                 col_data[idx] = lower_bound[i] - q_values / len(q_values) * 0.5 * std_values[i]
 65 | 
 66 |             calculated_col = calc_winsorized[:, i]
 67 |             np.testing.assert_array_almost_equal(col_data, calculated_col)
 68 | 
 69 |     def test_winsorize_normal_with_group(self):
 70 |         cal_winsorized = winsorize_normal(self.x, self.num_stds, groups=self.groups)
 71 | 
 72 |         def impl(x):
 73 |             std_values = x.std(axis=0, ddof=1)
 74 |             mean_value = x.mean(axis=0)
 75 | 
 76 |             lower_bound = mean_value - self.num_stds * std_values
 77 |             upper_bound = mean_value + self.num_stds * std_values
 78 | 
 79 |             res = np.where(x > upper_bound, upper_bound, x)
 80 |             res = np.where(res < lower_bound, lower_bound, res)
 81 |             return res
 82 | 
 83 |         exp_winsorized = pd.DataFrame(self.x).groupby(self.groups).transform(impl).values
 84 |         np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized)
 85 | 
 86 |     def test_winsorize_normal_with_group_and_interp(self):
 87 |         cal_winsorized = winsorize_normal(self.x, self.num_stds, groups=self.groups,
 88 |                                           method='interp')
 89 | 
 90 |         def impl(x):
 91 |             x = x.values
 92 |             std_values = x.std(axis=0, ddof=1)
 93 |             mean_value = x.mean(axis=0)
 94 | 
 95 |             lower_bound = mean_value - self.num_stds * std_values
 96 |             upper_bound = mean_value + self.num_stds * std_values
 97 | 
 98 |             col_data = x.copy()
 99 | 
100 |             idx = col_data > upper_bound
101 |             u_values = col_data[idx]
102 |             q_values = u_values.argsort().argsort()
103 |             if len(q_values) > 0:
104 |                 col_data[idx] = upper_bound + q_values / len(q_values) * 0.5 * std_values
105 | 
106 |             idx = col_data < lower_bound
107 |             l_values = col_data[idx]
108 |             q_values = (-l_values).argsort().argsort()
109 |             if len(q_values) > 0:
110 |                 col_data[idx] = lower_bound - q_values / len(q_values) * 0.5 * std_values
111 |             return col_data
112 | 
113 |         exp_winsorized = pd.DataFrame(self.x).groupby(self.groups).transform(impl).values
114 |         np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized)
115 | 
116 |     def test_normal_winsorizer(self):
117 |         s = NormalWinsorizer(num_stds=self.num_stds)
118 |         s.fit(self.x)
119 |         calc_winsorized1 = s.transform(self.x)
120 |         calc_winsorized2 = s(self.x)
121 | 
122 |         std_values = self.x.std(axis=0, ddof=1)
123 |         mean_value = self.x.mean(axis=0)
124 | 
125 |         lower_bound = mean_value - self.num_stds * std_values
126 |         upper_bound = mean_value + self.num_stds * std_values
127 | 
128 |         for i in range(np.size(calc_winsorized1, 1)):
129 |             col_data = self.x[:, i]
130 |             col_data[col_data > upper_bound[i]] = upper_bound[i]
131 |             col_data[col_data < lower_bound[i]] = lower_bound[i]
132 | 
133 |             calculated_col = calc_winsorized1[:, i]
134 |             np.testing.assert_array_almost_equal(col_data, calculated_col)
135 |             calculated_col = calc_winsorized2[:, i]
136 |             np.testing.assert_array_almost_equal(col_data, calculated_col)
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     unittest.main()
141 | 


--------------------------------------------------------------------------------
/alphamind/tests/execution/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/execution/__init__.py


--------------------------------------------------------------------------------
/alphamind/tests/execution/test_naiveexecutor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-22
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import pandas as pd
11 | 
12 | from alphamind.execution.naiveexecutor import NaiveExecutor
13 | 
14 | 
15 | class TestNaiveExecutor(unittest.TestCase):
16 | 
17 |     def test_naive_executor(self):
18 |         target_pos = pd.DataFrame({'code': [1, 2, 3],
19 |                                    'weight': [0.2, 0.3, 0.5],
20 |                                    'industry': ['a', 'b', 'c']})
21 | 
22 |         # 1st round
23 |         executor = NaiveExecutor()
24 |         turn_over, executed_pos = executor.execute(target_pos)
25 |         executor.set_current(executed_pos)
26 |         self.assertAlmostEqual(turn_over, 1.0)
27 | 
28 |         # 2nd round
29 |         target_pos = pd.DataFrame({'code': [1, 2, 4],
30 |                                    'weight': [0.3, 0.2, 0.5],
31 |                                    'industry': ['a', 'b', 'd']})
32 | 
33 |         turn_over, executed_pos = executor.execute(target_pos)
34 |         executor.set_current(executed_pos)
35 |         self.assertAlmostEqual(turn_over, 1.2)
36 | 
37 |         # 3rd round
38 |         target_pos = pd.DataFrame({'code': [1, 3, 4],
39 |                                    'weight': [0.3, 0.2, 0.5],
40 |                                    'industry': ['a', 'c', 'd']})
41 |         turn_over, executed_pos = executor.execute(target_pos)
42 |         executor.set_current(executed_pos)
43 |         self.assertAlmostEqual(turn_over, 0.4)
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     unittest.main()
48 | 


--------------------------------------------------------------------------------
/alphamind/tests/execution/test_pipeline.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | from collections import deque
10 | 
11 | import numpy as np
12 | import pandas as pd
13 | 
14 | from alphamind.execution.pipeline import ExecutionPipeline
15 | from alphamind.execution.targetvolexecutor import TargetVolExecutor
16 | from alphamind.execution.thresholdexecutor import ThresholdExecutor
17 | 
18 | 
19 | class TestExecutionPipeline(unittest.TestCase):
20 | 
21 |     def test_execution_pipeline(self):
22 |         n = 100
23 |         window = 60
24 |         target_vol = 0.01
25 |         turn_over_threshold = 0.5
26 | 
27 |         executor1 = TargetVolExecutor(window=window, target_vol=target_vol)
28 |         executor2 = ThresholdExecutor(turn_over_threshold=turn_over_threshold)
29 | 
30 |         execution_pipeline = ExecutionPipeline(executors=[executor1, executor2])
31 | 
32 |         return_1 = np.random.randn(2000, n) * 0.05
33 |         return_2 = np.random.randn(2000, n) * 0.2
34 |         return_total = np.concatenate((return_1, return_2))
35 |         codes = np.array(list(range(n)))
36 | 
37 |         ret_deq = deque(maxlen=window)
38 | 
39 |         for i, row in enumerate(return_total):
40 |             weights = np.random.randint(0, 100, n)
41 |             weights = weights / weights.sum()
42 |             pos = pd.DataFrame({'code': codes, 'weight': weights})
43 |             turn_over, executed_pos = execution_pipeline.execute(pos)
44 |             daily_return = row @ executed_pos.weight.values.flatten()
45 |             data_dict = {'return': daily_return}
46 |             execution_pipeline.update(data_dict=data_dict)
47 |             ret_deq.append(daily_return)
48 | 


--------------------------------------------------------------------------------
/alphamind/tests/execution/test_targetvolexecutor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-22
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | from collections import deque
10 | 
11 | import numpy as np
12 | import pandas as pd
13 | 
14 | from alphamind.execution.targetvolexecutor import TargetVolExecutor
15 | 
16 | 
17 | class TestTargetVolExecutor(unittest.TestCase):
18 | 
19 |     def test_target_vol_executor(self):
20 |         n = 100
21 |         window = 30
22 |         target_vol = 0.01
23 | 
24 |         executor = TargetVolExecutor(window=window, target_vol=target_vol)
25 | 
26 |         return_1 = np.random.randn(2000, n) * 0.05
27 |         return_2 = np.random.randn(2000, n) * 0.2
28 |         return_total = np.concatenate((return_1, return_2))
29 | 
30 |         weights = np.ones(n) / n
31 |         codes = np.array(list(range(n)))
32 | 
33 |         ret_deq = deque(maxlen=window)
34 | 
35 |         for i, row in enumerate(return_total):
36 |             pos = pd.DataFrame({'code': codes, 'weight': weights})
37 |             turn_over, executed_pos = executor.execute(pos)
38 | 
39 |             if i >= window:
40 |                 c_vol = np.std(ret_deq, ddof=1)
41 |                 executed_pos.equals(pos * target_vol / c_vol)
42 |             else:
43 |                 executed_pos.equals(pos)
44 | 
45 |             executor.set_current(executed_pos)
46 |             daily_return = row @ weights
47 |             data_dict = {'return': daily_return}
48 |             executor.update(data_dict=data_dict)
49 |             ret_deq.append(daily_return)
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     unittest.main()
54 | 


--------------------------------------------------------------------------------
/alphamind/tests/execution/test_thresholdexecutor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-22
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import pandas as pd
11 | 
12 | from alphamind.execution.thresholdexecutor import ThresholdExecutor
13 | 
14 | 
15 | class TestThresholdExecutor(unittest.TestCase):
16 | 
17 |     def test_threshold_executor(self):
18 |         target_pos = pd.DataFrame({'code': [1, 2, 3],
19 |                                    'weight': [0.2, 0.3, 0.5],
20 |                                    'industry': ['a', 'b', 'c']})
21 | 
22 |         executor = ThresholdExecutor(turn_over_threshold=0.5)
23 | 
24 |         # 1st round
25 |         turn_over, executed_pos = executor.execute(target_pos)
26 |         executor.set_current(executed_pos)
27 |         self.assertTrue(target_pos.equals(executed_pos))
28 |         self.assertAlmostEqual(turn_over, target_pos.weight.sum())
29 | 
30 |         # 2nd round
31 |         target_pos = pd.DataFrame({'code': [1, 2, 4],
32 |                                    'weight': [0.3, 0.2, 0.5],
33 |                                    'industry': ['a', 'b', 'd']})
34 | 
35 |         turn_over, executed_pos = executor.execute(target_pos)
36 |         executor.set_current(executed_pos)
37 |         self.assertTrue(target_pos.equals(executed_pos))
38 |         self.assertTrue(executed_pos.equals(executor.current_pos))
39 |         self.assertAlmostEqual(turn_over, 1.2)
40 | 
41 |         # 3rd round
42 |         target_pos = pd.DataFrame({'code': [1, 3, 4],
43 |                                    'weight': [0.3, 0.2, 0.5],
44 |                                    'industry': ['a', 'c', 'd']})
45 |         turn_over, executed_pos2 = executor.execute(target_pos)
46 |         executor.set_current(executed_pos2)
47 |         self.assertTrue(executed_pos.equals(executed_pos2))
48 |         self.assertAlmostEqual(turn_over, 0.)
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     unittest.main()
53 | 


--------------------------------------------------------------------------------
/alphamind/tests/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/alphamind/tests/model/__init__.py


--------------------------------------------------------------------------------
/alphamind/tests/model/test_composer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2018-2-9
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | from alphamind.data.engines.universe import Universe
11 | from alphamind.model.composer import Composer
12 | from alphamind.model.composer import DataMeta
13 | from alphamind.model.treemodel import XGBClassifier
14 | 
15 | 
16 | class TestComposer(unittest.TestCase):
17 | 
18 |     def _assert_composer_equal(self, lhs: Composer, rhs: Composer):
19 |         self.assertEqual(type(lhs.alpha_model), type(rhs.alpha_model))
20 |         self.assertEqual(lhs.data_meta, rhs.data_meta)
21 | 
22 |     def test_data_meta_persistence(self):
23 |         freq = '5b'
24 |         universe = Universe('zz800')
25 |         batch = 4
26 |         neutralized_risk = ['SIZE']
27 |         risk_model = 'long'
28 |         pre_process = ['standardize', 'winsorize_normal']
29 |         post_process = ['standardize', 'winsorize_normal']
30 |         warm_start = 2
31 |         data_source = 'postgresql://user:pwd@server/dummy'
32 | 
33 |         data_meta = DataMeta(freq=freq,
34 |                              universe=universe,
35 |                              batch=batch,
36 |                              neutralized_risk=neutralized_risk,
37 |                              risk_model=risk_model,
38 |                              pre_process=pre_process,
39 |                              post_process=post_process,
40 |                              warm_start=warm_start,
41 |                              data_source=data_source)
42 | 
43 |         data_desc = data_meta.save()
44 | 
45 |         loaded_data = DataMeta.load(data_desc)
46 |         self.assertEqual(data_meta.freq, loaded_data.freq)
47 |         self.assertEqual(data_meta.universe, loaded_data.universe)
48 |         self.assertEqual(data_meta.batch, loaded_data.batch)
49 |         self.assertEqual(data_meta.neutralized_risk, loaded_data.neutralized_risk)
50 |         self.assertEqual(data_meta.risk_model, loaded_data.risk_model)
51 |         self.assertEqual(data_meta.pre_process, loaded_data.pre_process)
52 |         self.assertEqual(data_meta.post_process, loaded_data.post_process)
53 |         self.assertEqual(data_meta.warm_start, loaded_data.warm_start)
54 |         self.assertEqual(data_meta.data_source, loaded_data.data_source)
55 | 
56 |     def test_composer_persistence(self):
57 |         freq = '5b'
58 |         universe = Universe('zz800')
59 |         batch = 4
60 |         neutralized_risk = ['SIZE']
61 |         risk_model = 'long'
62 |         pre_process = ['standardize', 'winsorize_normal']
63 |         post_process = ['standardize', 'winsorize_normal']
64 |         warm_start = 2
65 |         data_source = 'postgresql://user:pwd@server/dummy'
66 | 
67 |         data_meta = DataMeta(freq=freq,
68 |                              universe=universe,
69 |                              batch=batch,
70 |                              neutralized_risk=neutralized_risk,
71 |                              risk_model=risk_model,
72 |                              pre_process=pre_process,
73 |                              post_process=post_process,
74 |                              warm_start=warm_start,
75 |                              data_source=data_source)
76 | 
77 |         features = {'f1': 'closePrice', 'f2': 'openPrice'}
78 |         alpha_model = XGBClassifier(features=features)
79 | 
80 |         composer = Composer(alpha_model=alpha_model,
81 |                             data_meta=data_meta)
82 | 
83 |         comp_desc = composer.save()
84 |         loaded_comp = Composer.load(comp_desc)
85 |         self._assert_composer_equal(composer, loaded_comp)
86 | 


--------------------------------------------------------------------------------
/alphamind/tests/model/test_linearmodel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-9-4
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import unittest
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | from sklearn.linear_model import LinearRegression as LinearRegression2
 13 | from sklearn.linear_model import LogisticRegression as LogisticRegression2
 14 | 
 15 | from alphamind.model.linearmodel import ConstLinearModel
 16 | from alphamind.model.linearmodel import LinearRegression
 17 | from alphamind.model.linearmodel import LogisticRegression
 18 | from alphamind.model.loader import load_model
 19 | 
 20 | 
 21 | class TestLinearModel(unittest.TestCase):
 22 | 
 23 |     def setUp(self):
 24 |         self.n = 3
 25 |         self.features = ['a', 'b', 'c']
 26 |         self.train_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
 27 |         self.train_y = np.random.randn(1000)
 28 |         self.train_y_label = np.where(self.train_y > 0., 1, 0)
 29 |         self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
 30 | 
 31 |     def test_const_linear_model(self):
 32 |         features = ['c', 'b', 'a']
 33 |         weights = dict(c=3., b=2., a=1.)
 34 |         model = ConstLinearModel(features=features,
 35 |                                  weights=weights)
 36 | 
 37 |         calculated_y = model.predict(self.predict_x)
 38 |         expected_y = self.predict_x[features] @ np.array([weights[f] for f in features])
 39 |         np.testing.assert_array_almost_equal(calculated_y, expected_y)
 40 | 
 41 |     def test_const_linear_model_persistence(self):
 42 |         weights = dict(c=3., b=2., a=1.)
 43 |         model = ConstLinearModel(features=['a', 'b', 'c'],
 44 |                                  weights=weights)
 45 | 
 46 |         desc = model.save()
 47 |         new_model = load_model(desc)
 48 | 
 49 |         self.assertEqual(model.features, new_model.features)
 50 |         np.testing.assert_array_almost_equal(model.weights, new_model.weights)
 51 | 
 52 |     def test_const_linear_model_score(self):
 53 |         model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
 54 |         model.fit(self.train_x, self.train_y)
 55 | 
 56 |         expected_score = model.score(self.train_x, self.train_y)
 57 | 
 58 |         const_model = ConstLinearModel(features=['a', 'b', 'c'],
 59 |                                        weights=dict(zip(model.features, model.weights)))
 60 |         calculated_score = const_model.score(self.train_x, self.train_y)
 61 | 
 62 |         self.assertAlmostEqual(expected_score, calculated_score)
 63 | 
 64 |     def test_linear_regression(self):
 65 |         model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
 66 |         model.fit(self.train_x, self.train_y)
 67 | 
 68 |         calculated_y = model.predict(self.predict_x)
 69 | 
 70 |         expected_model = LinearRegression2(fit_intercept=False)
 71 |         expected_model.fit(self.train_x, self.train_y)
 72 |         expected_y = expected_model.predict(self.predict_x)
 73 | 
 74 |         np.testing.assert_array_almost_equal(calculated_y, expected_y)
 75 |         np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
 76 | 
 77 |     def test_linear_regression_persistence(self):
 78 |         model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
 79 |         model.fit(self.train_x, self.train_y)
 80 | 
 81 |         desc = model.save()
 82 |         new_model = load_model(desc)
 83 | 
 84 |         calculated_y = new_model.predict(self.predict_x)
 85 |         expected_y = model.predict(self.predict_x)
 86 | 
 87 |         np.testing.assert_array_almost_equal(calculated_y, expected_y)
 88 |         np.testing.assert_array_almost_equal(new_model.weights, model.weights)
 89 | 
 90 |     def test_logistic_regression(self):
 91 |         model = LogisticRegression(['a', 'b', 'c'], fit_intercept=False)
 92 |         model.fit(self.train_x, self.train_y_label)
 93 | 
 94 |         calculated_y = model.predict(self.predict_x)
 95 | 
 96 |         expected_model = LogisticRegression2(fit_intercept=False)
 97 |         expected_model.fit(self.train_x, self.train_y_label)
 98 |         expected_y = expected_model.predict(self.predict_x)
 99 | 
100 |         np.testing.assert_array_equal(calculated_y, expected_y)
101 |         np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
102 | 
103 |     def test_logistic_regression_persistence(self):
104 |         model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
105 |         model.fit(self.train_x, self.train_y_label)
106 | 
107 |         desc = model.save()
108 |         new_model = load_model(desc)
109 | 
110 |         calculated_y = new_model.predict(self.predict_x)
111 |         expected_y = model.predict(self.predict_x)
112 | 
113 |         np.testing.assert_array_almost_equal(calculated_y, expected_y)
114 |         np.testing.assert_array_almost_equal(new_model.weights, model.weights)
115 | 


--------------------------------------------------------------------------------
/alphamind/tests/model/test_loader.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-9-5
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.model.linearmodel import LinearRegression
14 | from alphamind.model.loader import load_model
15 | 
16 | 
17 | class TestLoader(unittest.TestCase):
18 | 
19 |     def setUp(self):
20 |         self.n = 3
21 |         self.trained_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
22 |         self.trained_y = np.random.randn(1000, 1)
23 | 
24 |         self.predict_x = pd.DataFrame(np.random.randn(100, self.n), columns=['a', 'b', 'c'])
25 | 
26 |     def test_load_model(self):
27 |         model = LinearRegression(['a', 'b', 'c'])
28 |         model.fit(self.trained_x, self.trained_y)
29 | 
30 |         model_desc = model.save()
31 |         new_model = load_model(model_desc)
32 | 
33 |         np.testing.assert_array_almost_equal(model.predict(self.predict_x),
34 |                                              new_model.predict(self.predict_x))
35 | 
36 |         self.assertEqual(model.features, new_model.features)
37 |         self.assertEqual(model.trained_time, new_model.trained_time)
38 | 


--------------------------------------------------------------------------------
/alphamind/tests/model/test_modelbase.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2018-2-8
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | from alphamind.model.linearmodel import ConstLinearModel
11 | 
12 | 
13 | class TestModelBase(unittest.TestCase):
14 | 
15 |     def test_simple_model_features(self):
16 |         model = ConstLinearModel(features=['c', 'b', 'a'])
17 |         self.assertListEqual(['a', 'b', 'c'], model.features)
18 | 


--------------------------------------------------------------------------------
/alphamind/tests/model/test_treemodel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2018-1-5
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import unittest
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | from alphamind.model.loader import load_model
 14 | from alphamind.model.treemodel import RandomForestClassifier
 15 | from alphamind.model.treemodel import RandomForestRegressor
 16 | from alphamind.model.treemodel import XGBClassifier
 17 | from alphamind.model.treemodel import XGBRegressor
 18 | from alphamind.model.treemodel import XGBTrainer
 19 | 
 20 | 
 21 | class TestTreeModel(unittest.TestCase):
 22 | 
 23 |     def setUp(self):
 24 |         self.features = list('0123456789')
 25 |         self.x = pd.DataFrame(np.random.randn(1000, 10), columns=self.features)
 26 |         self.y = np.random.randn(1000)
 27 |         self.sample_x = pd.DataFrame(np.random.randn(100, 10), columns=self.features)
 28 | 
 29 |     def test_random_forest_regress_persistence(self):
 30 |         model = RandomForestRegressor(features=self.features)
 31 |         model.fit(self.x, self.y)
 32 | 
 33 |         desc = model.save()
 34 |         new_model = load_model(desc)
 35 |         self.assertEqual(model.features, new_model.features)
 36 | 
 37 |         np.testing.assert_array_almost_equal(model.predict(self.sample_x),
 38 |                                              new_model.predict(self.sample_x))
 39 |         np.testing.assert_array_almost_equal(model.importances, new_model.importances)
 40 | 
 41 |     def test_random_forest_classify_persistence(self):
 42 |         model = RandomForestClassifier(features=self.features)
 43 |         y = np.where(self.y > 0, 1, 0)
 44 |         model.fit(self.x, y)
 45 | 
 46 |         desc = model.save()
 47 |         new_model = load_model(desc)
 48 |         self.assertEqual(model.features, new_model.features)
 49 | 
 50 |         np.testing.assert_array_almost_equal(model.predict(self.sample_x),
 51 |                                              new_model.predict(self.sample_x))
 52 |         np.testing.assert_array_almost_equal(model.importances, new_model.importances)
 53 | 
 54 |     def test_xgb_regress_persistence(self):
 55 |         model = XGBRegressor(features=self.features)
 56 |         model.fit(self.x, self.y)
 57 | 
 58 |         desc = model.save()
 59 |         new_model = load_model(desc)
 60 |         self.assertEqual(model.features, new_model.features)
 61 | 
 62 |         np.testing.assert_array_almost_equal(model.predict(self.sample_x),
 63 |                                              new_model.predict(self.sample_x))
 64 |         np.testing.assert_array_almost_equal(model.importances, new_model.importances)
 65 | 
 66 |     def test_xgb_classify_persistence(self):
 67 |         model = XGBClassifier(features=self.features)
 68 |         y = np.where(self.y > 0, 1, 0)
 69 |         model.fit(self.x, y)
 70 | 
 71 |         desc = model.save()
 72 |         new_model = load_model(desc)
 73 |         self.assertEqual(model.features, new_model.features)
 74 | 
 75 |         np.testing.assert_array_almost_equal(model.predict(self.sample_x),
 76 |                                              new_model.predict(self.sample_x))
 77 |         np.testing.assert_array_almost_equal(model.importances, new_model.importances)
 78 | 
 79 |     def test_xgb_trainer_equal_classifier(self):
 80 |         model1 = XGBClassifier(n_estimators=100,
 81 |                                learning_rate=0.1,
 82 |                                max_depth=3,
 83 |                                features=self.features,
 84 |                                random_state=42)
 85 | 
 86 |         model2 = XGBTrainer(features=self.features,
 87 |                             objective='reg:logistic',
 88 |                             booster='gbtree',
 89 |                             tree_method='exact',
 90 |                             n_estimators=100,
 91 |                             learning_rate=0.1,
 92 |                             max_depth=3,
 93 |                             random_state=42)
 94 | 
 95 |         y = np.where(self.y > 0, 1, 0)
 96 |         model1.fit(self.x, y)
 97 |         model2.fit(self.x, y)
 98 | 
 99 |         predict1 = model1.predict(self.sample_x)
100 |         predict2 = model2.predict(self.sample_x)
101 |         predict2 = np.where(predict2 > 0.5, 1., 0.)
102 |         np.testing.assert_array_almost_equal(predict1, predict2)
103 | 
104 |     def test_xgb_trainer_persistence(self):
105 |         model = XGBTrainer(features=self.features,
106 |                            objective='binary:logistic',
107 |                            booster='gbtree',
108 |                            tree_method='hist',
109 |                            n_estimators=200)
110 |         y = np.where(self.y > 0, 1, 0)
111 |         model.fit(self.x, y)
112 | 
113 |         desc = model.save()
114 |         new_model = load_model(desc)
115 |         self.assertEqual(model.features, new_model.features)
116 | 
117 |         np.testing.assert_array_almost_equal(model.predict(self.sample_x),
118 |                                              new_model.predict(self.sample_x))
119 |         np.testing.assert_array_almost_equal(model.importances, new_model.importances)
120 | 


--------------------------------------------------------------------------------
/alphamind/tests/portfolio/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-27
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_evolver.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-11-23
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | 
12 | from alphamind.portfolio.evolver import evolve_positions
13 | 
14 | 
15 | class TestEvolver(unittest.TestCase):
16 | 
17 |     def test_evolve_positions_with_all_positive_position(self):
18 |         positions = np.array([0.2, 0.2, 0.8])
19 |         dx_returns = np.array([0.06, 0.04, -0.10])
20 | 
21 |         simple_return = np.exp(dx_returns)
22 |         curr_pos = positions * simple_return
23 |         expected_pos = curr_pos / curr_pos.sum() * positions.sum()
24 | 
25 |         calculated_pos = evolve_positions(positions, dx_returns)
26 | 
27 |         np.testing.assert_array_almost_equal(expected_pos, calculated_pos)
28 | 
29 |     def test_evolve_positions_with_negative_position(self):
30 |         positions = np.array([0.2, 0.3, -0.8])
31 |         dx_returns = np.array([0.06, 0.04, -0.10])
32 | 
33 |         simple_return = np.exp(dx_returns)
34 |         curr_pos = positions * simple_return
35 |         expected_pos = curr_pos / np.abs(curr_pos).sum() * np.abs(positions).sum()
36 | 
37 |         calculated_pos = evolve_positions(positions, dx_returns)
38 | 
39 |         np.testing.assert_array_almost_equal(expected_pos, calculated_pos)
40 | 


--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_linearbuild.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-5-5
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import unittest
  9 | 
 10 | import numpy as np
 11 | 
 12 | from alphamind.portfolio.linearbuilder import linear_builder
 13 | 
 14 | 
 15 | class TestLinearBuild(unittest.TestCase):
 16 |     def setUp(self):
 17 |         self.er = np.random.randn(3000)
 18 |         self.risk_exp = np.random.randn(3000, 30)
 19 |         self.risk_exp = np.concatenate([self.risk_exp, np.ones((3000, 1))], axis=1)
 20 |         self.bm = np.random.randint(100, size=3000).astype(float)
 21 |         self.current_pos = np.random.randint(0, 100, size=3000)
 22 |         self.current_pos = self.current_pos / self.current_pos.sum()
 23 | 
 24 |     def test_linear_build(self):
 25 |         bm = self.bm / self.bm.sum()
 26 |         eplson = 1e-6
 27 | 
 28 |         status, _, w = linear_builder(self.er,
 29 |                                       0.,
 30 |                                       0.01,
 31 |                                       self.risk_exp,
 32 |                                       (bm @ self.risk_exp, bm @ self.risk_exp))
 33 |         self.assertEqual(status, 'optimal')
 34 |         self.assertAlmostEqual(np.sum(w), 1.)
 35 |         self.assertTrue(np.all(w <= 0.01 + eplson))
 36 |         self.assertTrue(np.all(w >= -eplson))
 37 | 
 38 |         calc_risk = (w - bm) @ self.risk_exp
 39 |         expected_risk = np.zeros(self.risk_exp.shape[1])
 40 |         np.testing.assert_array_almost_equal(calc_risk, expected_risk)
 41 | 
 42 |     def test_linear_build_with_interior(self):
 43 |         bm = self.bm / self.bm.sum()
 44 |         eplson = 1e-6
 45 | 
 46 |         status, _, w = linear_builder(self.er,
 47 |                                       0.,
 48 |                                       0.01,
 49 |                                       self.risk_exp,
 50 |                                       (bm @ self.risk_exp, bm @ self.risk_exp),
 51 |                                       method='interior')
 52 |         self.assertEqual(status, 'optimal')
 53 |         self.assertAlmostEqual(np.sum(w), 1.)
 54 |         self.assertTrue(np.all(w <= 0.01 + eplson))
 55 |         self.assertTrue(np.all(w >= -eplson))
 56 | 
 57 |         calc_risk = (w - bm) @ self.risk_exp
 58 |         expected_risk = np.zeros(self.risk_exp.shape[1])
 59 |         np.testing.assert_array_almost_equal(calc_risk, expected_risk)
 60 | 
 61 |     def test_linear_build_with_inequality_constraints(self):
 62 |         bm = self.bm / self.bm.sum()
 63 |         eplson = 1e-6
 64 | 
 65 |         risk_lbound = bm @ self.risk_exp
 66 |         risk_ubound = bm @ self.risk_exp
 67 | 
 68 |         risk_tolerance = 0.01 * np.abs(risk_lbound[:-1])
 69 | 
 70 |         risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance
 71 |         risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance
 72 | 
 73 |         status, _, w = linear_builder(self.er,
 74 |                                       0.,
 75 |                                       0.01,
 76 |                                       self.risk_exp,
 77 |                                       risk_target=(risk_lbound, risk_ubound))
 78 |         self.assertEqual(status, 'optimal')
 79 |         self.assertAlmostEqual(np.sum(w), 1.)
 80 |         self.assertTrue(np.all(w <= 0.01 + eplson))
 81 |         self.assertTrue(np.all(w >= -eplson))
 82 | 
 83 |         calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp)
 84 |         self.assertTrue(np.all(np.abs(calc_risk) <= 1.01e-2))
 85 | 
 86 |     def test_linear_build_with_to_constraint(self):
 87 |         bm = self.bm / self.bm.sum()
 88 |         eplson = 1e-6
 89 |         turn_over_target = 0.1
 90 | 
 91 |         risk_lbound = bm @ self.risk_exp
 92 |         risk_ubound = bm @ self.risk_exp
 93 | 
 94 |         risk_tolerance = 0.01 * np.abs(risk_lbound[:-1])
 95 | 
 96 |         risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance
 97 |         risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance
 98 | 
 99 |         status, _, w = linear_builder(self.er,
100 |                                       0.,
101 |                                       0.01,
102 |                                       self.risk_exp,
103 |                                       risk_target=(risk_lbound, risk_ubound),
104 |                                       turn_over_target=turn_over_target,
105 |                                       current_position=self.current_pos)
106 |         self.assertEqual(status, 'optimal')
107 |         self.assertAlmostEqual(np.sum(w), 1.)
108 |         self.assertTrue(np.all(w <= 0.01 + eplson))
109 |         self.assertTrue(np.all(w >= -eplson))
110 |         self.assertAlmostEqual(np.abs(w - self.current_pos).sum(), turn_over_target)
111 | 
112 |         calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp)
113 |         self.assertTrue(np.all(np.abs(calc_risk) <= 1.0001e-2))
114 | 
115 |     def test_linear_build_with_to_constraint_with_ecos(self):
116 |         bm = self.bm / self.bm.sum()
117 |         eplson = 1e-6
118 |         turn_over_target = 0.1
119 | 
120 |         risk_lbound = bm @ self.risk_exp
121 |         risk_ubound = bm @ self.risk_exp
122 | 
123 |         risk_tolerance = 0.01 * np.abs(risk_lbound[:-1])
124 | 
125 |         risk_lbound[:-1] = risk_lbound[:-1] - risk_tolerance
126 |         risk_ubound[:-1] = risk_ubound[:-1] + risk_tolerance
127 | 
128 |         status, _, w = linear_builder(self.er,
129 |                                       0.,
130 |                                       0.01,
131 |                                       self.risk_exp,
132 |                                       risk_target=(risk_lbound, risk_ubound),
133 |                                       turn_over_target=turn_over_target,
134 |                                       current_position=self.current_pos,
135 |                                       method='ecos')
136 |         self.assertEqual(status, 'optimal')
137 |         self.assertAlmostEqual(np.sum(w), 1.)
138 |         self.assertTrue(np.all(w <= 0.01 + eplson))
139 |         self.assertTrue(np.all(w >= -eplson))
140 |         self.assertAlmostEqual(np.abs(w - self.current_pos).sum(), turn_over_target)
141 | 
142 |         calc_risk = (w - bm) @ self.risk_exp / np.abs(bm @ self.risk_exp)
143 |         self.assertTrue(np.all(np.abs(calc_risk) <= 1.0001e-2))
144 | 
145 | 
146 | if __name__ == '__main__':
147 |     unittest.main()
148 | 


--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_longshortbuild.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-9
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.portfolio.longshortbulder import long_short_builder
14 | 
15 | 
16 | class TestLongShortBuild(unittest.TestCase):
17 | 
18 |     def setUp(self):
19 |         self.x = np.random.randn(3000, 10)
20 |         self.groups = np.random.randint(10, 40, size=3000)
21 |         choices = np.random.choice(3000, 100, replace=False)
22 |         self.masks = np.full(3000, False, dtype=bool)
23 |         self.masks[choices] = True
24 | 
25 |     def test_long_short_build(self):
26 |         x = self.x[:, 0].flatten()
27 |         calc_weights = long_short_builder(x).flatten()
28 |         expected_weights = x / np.abs(x).sum()
29 |         np.testing.assert_array_almost_equal(calc_weights, expected_weights)
30 | 
31 |         calc_weights = long_short_builder(self.x, leverage=2)
32 |         expected_weights = self.x / np.abs(self.x).sum(axis=0) * 2
33 |         np.testing.assert_array_almost_equal(calc_weights, expected_weights)
34 | 
35 |     def test_long_short_build_with_group(self):
36 |         x = self.x[:, 0].flatten()
37 |         calc_weights = long_short_builder(x, groups=self.groups).flatten()
38 |         expected_weights = pd.Series(x).groupby(self.groups).apply(lambda s: s / np.abs(s).sum())
39 |         np.testing.assert_array_almost_equal(calc_weights, expected_weights)
40 | 
41 |         calc_weights = long_short_builder(self.x, groups=self.groups)
42 |         expected_weights = pd.DataFrame(self.x).groupby(self.groups).apply(
43 |             lambda s: s / np.abs(s).sum(axis=0))
44 |         np.testing.assert_array_almost_equal(calc_weights, expected_weights)
45 | 
46 |     def test_long_short_build_with_masks(self):
47 |         x = self.x[:, 0].flatten()
48 |         calc_weights = long_short_builder(x, masks=self.masks, leverage=1.).flatten()
49 |         self.assertAlmostEqual(calc_weights.sum(), 0.)
50 | 
51 |         masked_x = x.copy()
52 |         masked_x[self.masks] = 0.
53 |         masked_x[~self.masks] = masked_x[~self.masks] - masked_x[~self.masks].mean()
54 |         expected_weights = masked_x / np.abs(masked_x).sum()
55 |         np.testing.assert_array_almost_equal(calc_weights, expected_weights)
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     unittest.main()
60 | 


--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_meanvariancebuild.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on 2017-6-27
  4 | 
  5 | @author: cheng.li
  6 | """
  7 | 
  8 | import unittest
  9 | 
 10 | import numpy as np
 11 | 
 12 | from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
 13 | from alphamind.portfolio.meanvariancebuilder import target_vol_builder
 14 | 
 15 | 
 16 | class TestMeanVarianceBuild(unittest.TestCase):
 17 | 
 18 |     def test_mean_variance_builder(self):
 19 |         er = np.array([0.01, 0.02, 0.03])
 20 |         cov = np.array([[0.02, 0.01, 0.02],
 21 |                         [0.01, 0.02, 0.03],
 22 |                         [0.02, 0.03, 0.02]])
 23 |         ids_var = np.diag([0.01, 0.02, 0.03])
 24 |         cov += ids_var
 25 | 
 26 |         bm = np.array([0.3, 0.3, 0.4])
 27 |         lbound = np.array([0., 0., 0.])
 28 |         ubound = np.array([0.4, 0.4, 0.5])
 29 | 
 30 |         risk_exposure = np.array([[1., 1., 1.],
 31 |                                   [1., 0., 1.]]).T
 32 |         risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7]))
 33 | 
 34 |         model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
 35 |         status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure,
 36 |                                              risk_target)
 37 | 
 38 |         self.assertTrue(status == 'optimal')
 39 |         self.assertAlmostEqual(x.sum(), bm.sum())
 40 |         self.assertTrue(np.all(x <= ubound + 1.e-6))
 41 |         self.assertTrue(np.all(x >= lbound) - 1.e-6)
 42 |         self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6))
 43 |         self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6))
 44 |         np.testing.assert_array_almost_equal(x, [0.1, 0.4, 0.5])
 45 | 
 46 |     def test_mean_variance_builder_without_constraints(self):
 47 |         er = np.array([0.01, 0.02, 0.03])
 48 |         cov = np.array([[0.02, 0.01, 0.02],
 49 |                         [0.01, 0.02, 0.03],
 50 |                         [0.02, 0.03, 0.02]])
 51 |         ids_var = np.diag([0.01, 0.02, 0.03])
 52 |         cov += ids_var
 53 | 
 54 |         bm = np.array([0., 0., 0.])
 55 | 
 56 |         model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
 57 |         status, _, x = mean_variance_builder(er, model, bm, None, None, None, None, lam=1)
 58 |         np.testing.assert_array_almost_equal(x, np.linalg.inv(cov) @ er)
 59 | 
 60 |     def test_mean_variance_builder_without_constraints_with_factor_model(self):
 61 |         pass
 62 | 
 63 |     def test_mean_variance_builder_with_none_unity_lambda(self):
 64 |         er = np.array([0.01, 0.02, 0.03])
 65 |         cov = np.array([[0.02, 0.01, 0.02],
 66 |                         [0.01, 0.02, 0.03],
 67 |                         [0.02, 0.03, 0.02]])
 68 |         ids_var = np.diag([0.01, 0.02, 0.03])
 69 |         cov += ids_var
 70 | 
 71 |         bm = np.array([0.3, 0.3, 0.4])
 72 |         lbound = np.array([0., 0., 0.])
 73 |         ubound = np.array([0.4, 0.4, 0.5])
 74 | 
 75 |         risk_exposure = np.array([[1., 1., 1.],
 76 |                                   [1., 0., 1.]]).T
 77 |         risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7]))
 78 | 
 79 |         model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
 80 |         status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure,
 81 |                                              risk_target, lam=100)
 82 | 
 83 |         self.assertTrue(status == 'optimal')
 84 |         self.assertAlmostEqual(x.sum(), bm.sum())
 85 |         self.assertTrue(np.all(x <= ubound + 1.e-6))
 86 |         self.assertTrue(np.all(x >= lbound) - 1.e-6)
 87 |         self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6))
 88 |         self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6))
 89 |         np.testing.assert_array_almost_equal(x, [0.2950, 0.3000, 0.4050])
 90 | 
 91 |     def test_target_vol_builder(self):
 92 |         er = np.array([0.1, 0.2, 0.3])
 93 |         cov = np.array([[0.05, 0.01, 0.02],
 94 |                         [0.01, 0.06, 0.03],
 95 |                         [0.02, 0.03, 0.07]])
 96 | 
 97 |         lbound = np.array([0., 0., 0.])
 98 |         ubound = np.array([0.8, 0.8, 0.8])
 99 | 
100 |         bm = np.array([0.3, 0.3, 0.3])
101 | 
102 |         risk_exposure = np.array([[1., 1., 1.]]).T
103 |         risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))
104 |         model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
105 |         status, _, x = target_vol_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target,
106 |                                           0.1)
107 |         self.assertTrue(status == 'optimal')
108 |         self.assertTrue(np.all(x <= ubound + 1.e-6))
109 |         self.assertTrue(np.all(x >= lbound) - 1.e-6)
110 |         self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6))
111 |         self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6))
112 |         np.testing.assert_array_almost_equal(x, [-0.3, -0.10919033, 0.40919033] + bm)
113 | 


--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_percentbuild.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-5-4
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.portfolio.percentbuilder import percent_build
14 | 
15 | 
16 | class TestPercentBuild(unittest.TestCase):
17 | 
18 |     def setUp(self):
19 |         self.n_samples = 3000
20 |         self.p_included = 0.1
21 |         self.n_groups = 30
22 |         self.n_portfolios = range(1, 10)
23 |         self.n_mask = 100
24 | 
25 |     def test_percent_build(self):
26 |         n_include = int(self.n_samples * self.p_included)
27 | 
28 |         for n_portfolio in self.n_portfolios:
29 |             x = np.random.randn(self.n_samples, n_portfolio)
30 | 
31 |             calc_weights = percent_build(x, self.p_included)
32 | 
33 |             expected_weights = np.zeros((len(x), n_portfolio))
34 | 
35 |             masks = (-x).argsort(axis=0).argsort(axis=0) < n_include
36 | 
37 |             for j in range(x.shape[1]):
38 |                 expected_weights[masks[:, j], j] = 1.
39 | 
40 |             np.testing.assert_array_almost_equal(calc_weights, expected_weights)
41 | 
42 |     def test_percent_build_with_group(self):
43 |         for n_portfolio in self.n_portfolios:
44 | 
45 |             x = np.random.randn(self.n_samples, n_portfolio)
46 |             groups = np.random.randint(self.n_groups, size=self.n_samples)
47 | 
48 |             calc_weights = percent_build(x, self.p_included, groups)
49 | 
50 |             grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
51 |             grouped_count = pd.DataFrame(-x).groupby(groups).transform(lambda x: x.count())
52 |             expected_weights = np.zeros((len(x), n_portfolio))
53 | 
54 |             n_include = (grouped_count * self.p_included).astype(int)
55 |             masks = (grouped_ordering <= n_include).values
56 |             for j in range(x.shape[1]):
57 |                 expected_weights[masks[:, j], j] = 1.
58 | 
59 |             np.testing.assert_array_almost_equal(calc_weights, expected_weights)
60 | 
61 |     def test_percent_build_with_masks(self):
62 |         for n_portfolio in self.n_portfolios:
63 |             x = np.random.randn(self.n_samples, n_portfolio)
64 |             choices = np.random.choice(self.n_samples, self.n_mask, replace=False)
65 |             masks = np.full(self.n_samples, True, dtype=bool)
66 |             masks[choices] = False
67 | 
68 |             calc_weights = percent_build(x, self.p_included, masks=masks)
69 | 
70 |             expected_weights = np.zeros((len(x), n_portfolio))
71 | 
72 |             filtered_index = np.arange(len(x))[masks]
73 |             filtered_x = x[masks]
74 |             big_boolen = np.full(x.shape, False, dtype=bool)
75 | 
76 |             n_included = int(self.p_included * len(x))
77 |             chosen = (-filtered_x).argsort(axis=0).argsort(axis=0) < n_included
78 |             big_boolen[filtered_index] = chosen
79 | 
80 |             for j in range(x.shape[1]):
81 |                 expected_weights[big_boolen[:, j], j] = 1.
82 | 
83 |             np.testing.assert_array_almost_equal(calc_weights, expected_weights)
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     unittest.main()
88 | 


--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_rankbuild.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-27
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.portfolio.rankbuilder import rank_build
14 | 
15 | 
16 | class TestRankBuild(unittest.TestCase):
17 | 
18 |     def setUp(self):
19 |         self.n_samples = 3000
20 |         self.n_included = 300
21 |         self.n_groups = 30
22 |         self.n_portfolio = range(1, 10)
23 |         self.n_mask = 100
24 | 
25 |     def test_rank_build(self):
26 |         for n_portfolio in self.n_portfolio:
27 |             x = np.random.randn(self.n_samples, n_portfolio)
28 | 
29 |             calc_weights = rank_build(x, self.n_included)
30 | 
31 |             expected_weights = np.zeros((len(x), n_portfolio))
32 |             chosen = (-x).argsort(axis=0).argsort(axis=0) < self.n_included
33 | 
34 |             for j in range(x.shape[1]):
35 |                 expected_weights[chosen[:, j], j] = 1.
36 | 
37 |             np.testing.assert_array_almost_equal(calc_weights, expected_weights)
38 | 
39 |     def test_rank_build_with_group(self):
40 |         n_include = int(self.n_included / self.n_groups)
41 | 
42 |         for n_portfolio in self.n_portfolio:
43 | 
44 |             x = np.random.randn(self.n_samples, n_portfolio)
45 |             groups = np.random.randint(self.n_groups, size=self.n_samples)
46 | 
47 |             calc_weights = rank_build(x, n_include, groups)
48 | 
49 |             grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
50 |             expected_weights = np.zeros((len(x), n_portfolio))
51 |             chosen = (grouped_ordering <= n_include).values
52 |             for j in range(x.shape[1]):
53 |                 expected_weights[chosen[:, j], j] = 1.
54 | 
55 |             np.testing.assert_array_almost_equal(calc_weights, expected_weights)
56 | 
57 |     def test_rank_build_with_masks(self):
58 |         for n_portfolio in self.n_portfolio:
59 |             x = np.random.randn(self.n_samples, n_portfolio)
60 |             choices = np.random.choice(self.n_samples, self.n_mask, replace=False)
61 |             masks = np.full(self.n_samples, True, dtype=bool)
62 |             masks[choices] = False
63 | 
64 |             calc_weights = rank_build(x, self.n_included, masks=masks)
65 | 
66 |             expected_weights = np.zeros((len(x), n_portfolio))
67 | 
68 |             filtered_index = np.arange(len(x))[masks]
69 |             filtered_x = x[masks]
70 |             big_boolen = np.full(x.shape, False, dtype=bool)
71 | 
72 |             chosen = (-filtered_x).argsort(axis=0).argsort(axis=0) < self.n_included
73 |             big_boolen[filtered_index] = chosen
74 | 
75 |             for j in range(x.shape[1]):
76 |                 expected_weights[big_boolen[:, j], j] = 1.
77 | 
78 |             np.testing.assert_array_almost_equal(calc_weights, expected_weights)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     unittest.main()
83 | 


--------------------------------------------------------------------------------
/alphamind/tests/portfolio/test_riskmodel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2018-5-29
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.portfolio.riskmodel import FactorRiskModel
14 | from alphamind.portfolio.riskmodel import FullRiskModel
15 | 
16 | 
17 | class TestRiskModel(unittest.TestCase):
18 | 
19 |     def setUp(self):
20 |         self.factor_cov = pd.DataFrame([[0.5, -0.3], [-0.3, 0.7]], columns=['a', 'b'],
21 |                                        index=['a', 'b'])
22 |         self.risk_exp = pd.DataFrame([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]], columns=['a', 'b'],
23 |                                      index=[1, 2, 3])
24 |         self.idsync = pd.Series([0.1, 0.3, 0.2], index=[1, 2, 3])
25 |         self.sec_cov = self.risk_exp.values @ self.factor_cov.values @ self.risk_exp.values.T \
26 |                        + np.diag(self.idsync.values)
27 |         self.sec_cov = pd.DataFrame(self.sec_cov, columns=[1, 2, 3], index=[1, 2, 3])
28 | 
29 |     def test_full_risk_model(self):
30 |         self.assertEqual(self.sec_cov.shape, (3, 3))
31 |         model = FullRiskModel(self.sec_cov)
32 | 
33 |         codes = [1, 2]
34 |         res = model.get_cov(codes)
35 |         np.testing.assert_array_almost_equal(res, self.sec_cov.loc[codes, codes].values)
36 | 
37 |         res = model.get_cov()
38 |         np.testing.assert_array_almost_equal(res, self.sec_cov.values)
39 | 
40 |     def test_factor_risk_model(self):
41 |         self.assertEqual(self.factor_cov.shape, (2, 2))
42 |         self.assertEqual(self.risk_exp.shape, (3, 2))
43 |         self.assertEqual(self.idsync.shape, (3,))
44 | 
45 |         model = FactorRiskModel(self.factor_cov,
46 |                                 self.risk_exp,
47 |                                 self.idsync)
48 | 
49 |         res = model.get_factor_cov()
50 |         np.testing.assert_array_almost_equal(res, self.factor_cov.values)
51 | 
52 |         codes = [1, 3]
53 |         res = model.get_risk_exp(codes)
54 |         np.testing.assert_array_almost_equal(res, self.risk_exp.loc[codes, :])
55 |         res = model.get_idsync(codes)
56 |         np.testing.assert_array_almost_equal(res, self.idsync[codes])
57 | 
58 |         res = model.get_risk_exp()
59 |         np.testing.assert_array_almost_equal(res, self.risk_exp)
60 |         res = model.get_idsync()
61 |         np.testing.assert_array_almost_equal(res, self.idsync)
62 | 


--------------------------------------------------------------------------------
/alphamind/tests/settlement/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on 2017-4-28
4 | 
5 | @author: cheng.li
6 | """
7 | 


--------------------------------------------------------------------------------
/alphamind/tests/settlement/test_simplesettle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-28
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import unittest
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from alphamind.settlement.simplesettle import simple_settle
14 | 
15 | 
16 | class TestSimpleSettle(unittest.TestCase):
17 | 
18 |     def setUp(self):
19 |         self.n_samples = 3000
20 |         self.n_groups = 30
21 |         self.weights = np.random.randn(self.n_samples)
22 |         self.ret_series = np.random.randn(self.n_samples)
23 |         self.groups = np.random.randint(self.n_groups, size=self.n_samples)
24 | 
25 |     def test_simples_settle(self):
26 |         calc_ret = simple_settle(self.weights, self.ret_series)
27 | 
28 |         ret_series = self.ret_series.reshape((-1, 1))
29 |         expected_ret = self.weights @ ret_series
30 | 
31 |         self.assertAlmostEqual(calc_ret['er'][0], expected_ret[0])
32 | 
33 |     def test_simple_settle_with_group(self):
34 |         calc_ret = simple_settle(self.weights, self.ret_series, self.groups)
35 | 
36 |         ret_series = self.weights * self.ret_series
37 |         expected_ret = pd.Series(ret_series).groupby(self.groups).sum().values
38 | 
39 |         np.testing.assert_array_almost_equal(calc_ret['er'].values[:-1], expected_ret)
40 |         self.assertAlmostEqual(calc_ret['er'].values[-1], expected_ret.sum())
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     unittest.main()
45 | 


--------------------------------------------------------------------------------
/alphamind/tests/test_suite.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import os
 9 | 
10 | SKIP_ENGINE_TESTS = True
11 | 
12 | if not SKIP_ENGINE_TESTS:
13 |     try:
14 |         DATA_ENGINE_URI = os.environ['DB_URI']
15 |     except KeyError:
16 |         DATA_ENGINE_URI = "mysql+mysqldb://reader:Reader#2020@121.37.138.1:13317/vision?charset=utf8"
17 | else:
18 |     DATA_ENGINE_URI = None
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     from simpleutils import add_parent_path
23 | 
24 |     add_parent_path(__file__, 3)
25 | 
26 |     from simpleutils import TestRunner
27 |     from alphamind.utilities import alpha_logger
28 |     from alphamind.tests.data.test_neutralize import TestNeutralize
29 |     from alphamind.tests.data.test_standardize import TestStandardize
30 |     from alphamind.tests.data.test_winsorize import TestWinsorize
31 |     from alphamind.tests.data.test_quantile import TestQuantile
32 |     from alphamind.tests.data.engines.test_sql_engine import TestSqlEngine
33 |     from alphamind.tests.data.engines.test_universe import TestUniverse
34 |     from alphamind.tests.portfolio.test_constraints import TestConstraints
35 |     from alphamind.tests.portfolio.test_evolver import TestEvolver
36 |     from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild
37 |     from alphamind.tests.portfolio.test_rankbuild import TestRankBuild
38 |     from alphamind.tests.portfolio.test_percentbuild import TestPercentBuild
39 |     from alphamind.tests.portfolio.test_linearbuild import TestLinearBuild
40 |     from alphamind.tests.portfolio.test_meanvariancebuild import TestMeanVarianceBuild
41 |     from alphamind.tests.portfolio.test_riskmodel import TestRiskModel
42 |     from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle
43 |     from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis
44 |     from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
45 |     from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis
46 |     from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis
47 |     from alphamind.tests.model.test_modelbase import TestModelBase
48 |     from alphamind.tests.model.test_linearmodel import TestLinearModel
49 |     from alphamind.tests.model.test_treemodel import TestTreeModel
50 |     from alphamind.tests.model.test_loader import TestLoader
51 |     from alphamind.tests.model.test_composer import TestComposer
52 |     from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor
53 |     from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor
54 |     from alphamind.tests.execution.test_targetvolexecutor import TestTargetVolExecutor
55 |     from alphamind.tests.execution.test_pipeline import TestExecutionPipeline
56 |     from alphamind.tests.portfolio.test_optimizers import TestOptimizers
57 | 
58 |     runner = TestRunner([TestNeutralize,
59 |                          TestStandardize,
60 |                          TestWinsorize,
61 |                          TestQuantile,
62 |                          TestSqlEngine,
63 |                          TestUniverse,
64 |                          TestConstraints,
65 |                          TestEvolver,
66 |                          TestLongShortBuild,
67 |                          TestRankBuild,
68 |                          TestPercentBuild,
69 |                          TestLinearBuild,
70 |                          TestMeanVarianceBuild,
71 |                          TestRiskModel,
72 |                          TestSimpleSettle,
73 |                          TestRiskAnalysis,
74 |                          TestPerformanceAnalysis,
75 |                          TestFactorAnalysis,
76 |                          TestQuantileAnalysis,
77 |                          TestModelBase,
78 |                          TestLinearModel,
79 |                          TestTreeModel,
80 |                          TestLoader,
81 |                          TestComposer,
82 |                          TestNaiveExecutor,
83 |                          TestThresholdExecutor,
84 |                          TestTargetVolExecutor,
85 |                          TestExecutionPipeline,
86 |                          TestOptimizers],
87 |                         alpha_logger)
88 |     runner.run()
89 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = alpha-mind
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | # import os
 14 | # import sys
 15 | # sys.path.insert(0, os.path.abspath('.'))
 16 | 
 17 | import sphinx_rtd_theme
 18 | 
 19 | # -- Project information -----------------------------------------------------
 20 | 
 21 | project = '多因子回测框架'
 22 | title = '多因子回测框架文档'
 23 | copyright = '2020, 融量'
 24 | author = '融量'
 25 | 
 26 | master_doc = 'index'
 27 | 
 28 | # The full version, including alpha/beta/rc tags
 29 | version = '0.1.0'
 30 | release = version
 31 | numfig = True
 32 | 
 33 | # -- General configuration ---------------------------------------------------
 34 | 
 35 | # Add any Sphinx extension module names here, as strings. They can be
 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 37 | # ones.
 38 | 
 39 | extensions = [
 40 |     "sphinx_rtd_theme",
 41 |     "docxbuilder"
 42 | ]
 43 | 
 44 | # Add any paths that contain templates here, relative to this directory.
 45 | templates_path = ['_templates']
 46 | 
 47 | # The language for content autogenerated by Sphinx. Refer to documentation
 48 | # for a list of supported languages.
 49 | #
 50 | # This is also used if you do content translation via gettext catalogs.
 51 | # Usually you set "language" from the command line for these cases.
 52 | language = 'zh_CN'
 53 | 
 54 | # List of patterns, relative to source directory, that match files and
 55 | # directories to ignore when looking for source files.
 56 | # This pattern also affects html_static_path and html_extra_path.
 57 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 58 | 
 59 | 
 60 | # -- Options for HTML output -------------------------------------------------
 61 | 
 62 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 63 | # a list of builtin themes.
 64 | #
 65 | html_theme = "sphinx_rtd_theme"
 66 | html_show_sourcelink = False
 67 | 
 68 | html_theme_options = {
 69 |     'logo_only': False,
 70 |     'display_version': True,
 71 |     'prev_next_buttons_location': 'bottom',
 72 |     'style_external_links': False,
 73 |     # 'style_nav_header_background': 'blue',
 74 |     # Toc options
 75 |     'collapse_navigation': True,
 76 |     'sticky_navigation': True,
 77 |     'navigation_depth': 4,
 78 |     'includehidden': True,
 79 |     'titles_only': False
 80 | }
 81 | 
 82 | # Add any paths that contain custom static files (such as style sheets) here,
 83 | # relative to this directory. They are copied after the builtin static files,
 84 | # so a file named "default.css" will overwrite the builtin "default.css".
 85 | html_static_path = ['_static']
 86 | 
 87 | 
 88 | # -- Options for latex output ----------------------------------------------
 89 | 
 90 | latex_engine = "xelatex"
 91 | 
 92 | latex_elements = {
 93 |     "papersize": "a4paper",
 94 |     'fncychap': "\\usepackage[Sonny]{fncychap}",
 95 |     'inputenc': "",
 96 |     'utf8extra': "",
 97 |     'fontpkg': '\\usepackage{amsmath,amsfonts,amssymb,amsthm}',
 98 |     'preamble': r"""
 99 |     \setcounter{secnumdepth}{2}
100 |     \setcounter{tocdepth}{2}
101 |     \usepackage{fontspec}
102 |     """,
103 |     "figure_align": "H"
104 | }
105 | 
106 | latex_documents = [
107 |     ("index", 'main.tex', '多因子回测框架文档',
108 |      '融量量化团队', 'manual')
109 | ]
110 | 
111 | 
112 | # -- Options for docx output -----------------------------------------------
113 | docx_documents = [
114 |     ('index', 'quantitative_research.docx', {
115 |          'title': '标准化模型服务文档',
116 |          'created': '融量量化团队',
117 |          'subject': '融量量化团队',
118 |          'keywords': ['sphinx']
119 |      }, True),
120 | ]
121 | 
122 | docx_style = "docx/style.docx"
123 | docx_pagebreak_before_section = 1
124 | 


--------------------------------------------------------------------------------
/doc/docx/style.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/doc/docx/style.docx


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. alpha-mind documentation master file, created by
 2 |    sphinx-quickstart on Tue May 29 16:58:56 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | 多因子回测框架
 7 | ====================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: 目录
12 | 
13 |    src/changelog
14 |    src/whatisit
15 |    src/introduction
16 | 
17 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=build
12 | set SPHINXPROJ=alpha-mind
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/doc/src/changelog.rst:
--------------------------------------------------------------------------------
 1 | ************
 2 | 更新说明
 3 | ************
 4 | 
 5 | Release 0.1.0
 6 | ==============================
 7 | 
 8 | Features added
 9 | --------------
10 | 
11 | * 增加使用说明。


--------------------------------------------------------------------------------
/doc/src/img/alpha-mind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/doc/src/img/alpha-mind.png


--------------------------------------------------------------------------------
/doc/src/introduction.rst:
--------------------------------------------------------------------------------
  1 | ************
  2 | 入门介绍
  3 | ************
  4 | 
  5 | 在这篇很短的入门介绍中，我们将为您介绍使用多因子建模框架完成数据导入、因子挖掘、因子组合、组合优化
  6 | 以及回测的全流程。
  7 | 
  8 | 
  9 | 流程图
 10 | ===============
 11 | 
 12 | 略
 13 | 
 14 | 数据接入
 15 | ===============
 16 | 
 17 | 多因子框架有自带的数据表结构需求，现阶段可以对接mysql以及postgresql（未来会接入更多的
 18 | 数据库，例如：sqlserver）。数据表结构对于用户是透明的，用户在指定数据源的时候，只需要使用如下的语句：
 19 | 
 20 | .. code-block:: py
 21 |     :linenos:
 22 | 
 23 |     from alphamind.api import *
 24 | 
 25 |     data_source = "url_for_some_database"
 26 |     engine = SqlEngine(data_source)
 27 | 
 28 | 回测设置
 29 | ===============
 30 | 
 31 | 回测阶段，可以做各种配置，例如:
 32 | 
 33 | * 起始时间
 34 | * 调仓周期
 35 | * 股票池，一般为某个指数成分股
 36 | * 行业分类，现在支持申万行业，代码为"sw";
 37 | * 基准指数
 38 | * 组合方法
 39 | 
 40 | 在代码中，可以表示为：
 41 | 
 42 | .. code-block:: py
 43 |     :linenos:
 44 | 
 45 |     start_date = '2020-01-01'
 46 |     end_date = '2020-02-21'
 47 | 
 48 |     freq = '10b'
 49 |     industry_name = 'sw'
 50 |     universe = Universe('hs300')
 51 |     benchmark_code = 300
 52 |     method = 'risk_neutral'
 53 | 
 54 | 因子池
 55 | ====================
 56 | 
 57 | 用户可以定义完整的因子池，多因子框架支持任意多个因子的回测：
 58 | 
 59 | .. code-block:: py
 60 |     :linenos:
 61 | 
 62 |     alpha_factors = {
 63 |     'f01': CSQuantiles(LAST('EMA5D')),
 64 |     'f02': CSQuantiles(LAST('EMV6D')),
 65 |     }
 66 | 
 67 | 这里面，我们使用了EMA50和EMV6D两个因子，并且都对他们做了分位数化。
 68 | 
 69 | 机器学习模型
 70 | =====================
 71 | 
 72 | 为了将因子组合起来，我们会搭载一个alpha模型：
 73 | 
 74 | .. code-block:: py
 75 |     :linenos:
 76 | 
 77 |     weights = dict(f01=1., f02=1.)
 78 |     alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)
 79 | 
 80 | 这里我们使用了一个等权加权模型。多因子框架，支持多款不同的机器学习模型，用户也可以接入自己自定义的模型。
 81 | 
 82 | 组合优化
 83 | =====================
 84 | 
 85 | 多因子框架中，组合优化的基本原理是mean-variance优化， 但是支持很多特性：
 86 | 
 87 | * 总杠杆率约束；
 88 | * 行业权重约束；
 89 | * 风格因子约束；
 90 | * 换手率约束；
 91 | * 成分股权重限制；
 92 | 
 93 | .. code-block:: py
 94 |     :linenos:
 95 | 
 96 |     # Constraintes settings
 97 | 
 98 |     industry_names = industry_list(industry_name, industry_level)
 99 |     constraint_risk = ['SIZE', 'SIZENL', 'BETA']
100 |     total_risk_names = constraint_risk + ['benchmark', 'total']
101 |     all_styles = risk_styles + industry_styles + macro_styles
102 | 
103 |     b_type = []
104 |     l_val = []
105 |     u_val = []
106 | 
107 |     previous_pos = pd.DataFrame()
108 |     rets = []
109 |     turn_overs = []
110 |     leverags = []
111 | 
112 |     for name in total_risk_names:
113 |         if name == 'benchmark':
114 |             b_type.append(BoundaryType.RELATIVE)
115 |             l_val.append(0.8)
116 |             u_val.append(1.0)
117 |         else:
118 |             b_type.append(BoundaryType.ABSOLUTE)
119 |             l_val.append(0.0)
120 |             u_val.append(0.0)
121 | 
122 |     bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
123 |     turn_over_target = 0.4
124 | 
125 | 上面的代码，使得：
126 | 
127 | * 成分股权重不低于80%；
128 | * 总权重为100%（无杠杆和现金保留）
129 | * 在SIZE，SIZENL以及BETA三个风格因子上，相对于基准无暴露;
130 | * 单次换手不超过40%（双边计算）
131 | 
132 | 将一切组合起来...
133 | ===========================
134 | 
135 | 通过简单的调用内置函数，就可以完成完整的回测：
136 | 
137 | .. code-block:: py
138 |     :linenos:
139 | 
140 |     running_setting = RunningSetting(weights_bandwidth=weights_bandwidth,
141 |                                      rebalance_method=method,
142 |                                      bounds=bounds,
143 |                                      turn_over_target=turn_over_target)
144 |     
145 |     # Strategy
146 |     strategy = Strategy(alpha_model,
147 |                         data_meta,
148 |                         universe=universe,
149 |                         start_date=start_date,
150 |                         end_date=end_date,
151 |                         freq=freq,
152 |                         benchmark=benchmark_code)
153 | 
154 |     strategy.prepare_backtest_data()
155 |     ret_df, positions = strategy.run(running_setting=running_setting)
156 | 
157 | 
158 | 画图
159 | ===============
160 | 
161 | 上一步返回的`ret_df`包含具体的收益信息，`positions`包含完整的持仓记录。用户可以自行绘图
162 | 查看结果，例如：
163 | 
164 | .. code-block:: py
165 |     :linenos:
166 | 
167 |     ret_df[['turn_over', 'excess_return']].cumsum().plot(figsize=(14, 7), secondary_y='turn_over')
168 | 
169 | 将累计超额收益以及累积换手绘制出来。
170 | 
171 | 完整的例子
172 | =================
173 | 
174 | 完整的代码可以在notbook文件夹下，例子：Example 2 - Strategy Analysis.ipynb
175 | 


--------------------------------------------------------------------------------
/doc/src/whatisit.rst:
--------------------------------------------------------------------------------
 1 | ********************
 2 | 什么是Alpha - Mind？
 3 | ********************
 4 | 
 5 | Alpha - Mind功能
 6 | ===================
 7 | 
 8 | **Alpha - Mind** 是基于多因子分析方法论为基础的，alpha建模的全流程工具。主要包括以下4个大的功能模块：
 9 | 
10 | 因子变换
11 | ----------------
12 | 
13 | 对于原始数据进行填充、去极端值、中性化、算术计算以及时序和截面操作。帮助用户将原始数据
14 | 加工成可以入模的标准因子。
15 | 
16 | 因子组合
17 | ----------------
18 | 
19 | 单因子效果有限，真实场景下，我们往往需要将多个因子进行合成。这个合成的过程，可以使用标准的四则运算等方式，
20 | 或者基于机器学习的方法。Alpha - Mind同时支持这两种方式。
21 | 
22 | 
23 | 组合优化
24 | ---------------
25 | 
26 | 因子值（alpha）是对股票好坏的预期排序，但是真实投资时候，我们不能完全按照股票本身的alpha大小进行配置，这里面可能涉及
27 | 到的因素包括：行业配置的限制、风格暴露的限制、个股的合规限制、跟踪误差限制以及还手率限制。
28 | 
29 | 策略回测
30 | ---------------
31 | 
32 | 完成上面所有的步骤之后，我们就完成了数据到股票组合的全流程，随后我们在每个调仓周期上进行调仓，最后得到整个回测周期上的
33 | 策略绩效（包括：收益、波动、回撤等）。
34 | 
35 | 流程图
36 | ---------------
37 | 
38 | .. image:: img/alpha-mind.png
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | export PYTHONPATH=$PYTHONPATH:/
4 | export DB_VENDOR="mysql"
5 | # export DB_URI="mysql+mysqldb://dxrw:dxRW20_2@121.37.138.1:13317/dxtest?charset=utf8"
6 | export DB_URI="mysql+mysqldb://reader:Reader#2020@121.37.138.1:13316/vision_product?charset=utf8"
7 | export FACTOR_TABLES="factor_momentum"
8 | jupyter lab --ip="0.0.0.0" --port=8080 --allow-root --ServerApp.token='' --ServerApp.password='sha1:f7761f682bc4:1aba35e73699fe62570573de373bf95b491022a7'


--------------------------------------------------------------------------------
/install/001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/install/001.png


--------------------------------------------------------------------------------
/install/002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alpha-miner/alpha-mind/023fca01d2cea7cd50328396c60b06c99706c426/install/002.png


--------------------------------------------------------------------------------
/install/install.md:
--------------------------------------------------------------------------------
 1 | ## Alpha-Mind 安装指南
 2 | 
 3 | ### 一、安装环境
 4 | 
 5 | #### mysql驱动
 6 | 
 7 | `Alpha-Mind`使用过程中调用的因子数据保存在数据库中，因此，`Alpha-Mind`的运行环境需要安装适当的数据库驱动。这里我们选择使用`mysql`。
 8 | 
 9 | `mysql`驱动官网下载地址：
10 | 
11 | https://dev.mysql.com/downloads/
12 | 
13 | 进入官网下载网址后，可以看到以下页面：
14 | 
15 | ![](001.png)
16 | 
17 | 点击`Connector/Python`，进入`Python`驱动下载页面：
18 | 
19 | ![]()![002](002.png)
20 | 
21 | 进入页面后：
22 | 
23 | - 选择正确的操作系统
24 | - 选择正确的操作系统版本
25 | - 选择合适的版本进行下载
26 | 
27 | 下载并安装数据库驱动
28 | 
29 | #### 编译器
30 | 
31 | `Alpha-Mind`依赖的一些包，如`ecos`需要编译安装模式，所以，`Alpha-Mind`运行安装的环境需要有C编译器。
32 | 
33 | - Windows环境
34 |   请安装Virual Studio 2015以上的版本
35 | 
36 | - Linux环境
37 | 
38 |   ```bash
39 |   yum -y install gcc
40 |   yum -y install gcc-c++
41 |   ```
42 | 
43 |   或者
44 | 
45 |   ```bash
46 |   apt-get install gcc
47 |   apt-get install gcc-c++
48 |   ```
49 | 
50 | 
51 | 
52 | ### 二、安装
53 | 
54 | 在安装环境准备充分后，执行以下命令，开始安装`Alpha-Mind`开发包
55 | 
56 | ```bash
57 | pip install Alpha-Mind
58 | ```
59 | 
60 | 如果网络环境不好，可以选择国内的镜像，例如阿里云镜像
61 | 
62 | ```bash
63 | pip install Alpha-Mind -i https://mirrors.aliyun.com/pypi/simple
64 | ```
65 | 
66 | #### 环境变量设置
67 | 
68 | `Alpha-Mind`安装完成以后，需要正确设置环境变量，才能正常工作：
69 | 
70 | - **DB_VENDOR**: 数据库类型，这里可以设置为`mysql`：
71 | 
72 |   `DB_VENDOR=mysql`
73 | 
74 | - **DB_URI**: 数据库连接配置
75 | 
76 |   `DB_URI=mysql+mysqldb://rlUser:123456@10.16.50.12:3306/rl?charset=utf8`
77 | 
78 | - **FACTOR_TABLES**: 因子数据表名称，用`,`分割，例如：
79 | 
80 |   `FACTOR_TABLES=factor_momentum,factor_power_volume,factor_basic_derivation`


--------------------------------------------------------------------------------
/notebooks/Quick Start 6 - Formula Based Stocks Screening.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Alphamind新手入门之六：公式化选股"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "%matplotlib inline\n",
 17 |     "\n",
 18 |     "import os\n",
 19 |     "from PyFin.api import *\n",
 20 |     "from alphamind.api import *"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## 1. 公式书写\n",
 28 |     "-------------------------"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "# CSTopN 横截面选取Top N个值\n",
 38 |     "universe_name = 'hs300'\n",
 39 |     "# 选取申万一级每个行业中，ROE因子值最高的前3个\n",
 40 |     "# 具体的公式化函数可以参考融量官方说明文档\n",
 41 |     "formula = CSTopN(LAST('EMA5D'), 3) \n",
 42 |     "ref_date = '2020-01-02'"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "## 2. 获取数据\n",
 50 |     "---------------"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 4,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "depends = formula.fields\n",
 60 |     "engine = SqlEngine(os.environ['DB_URI'])\n",
 61 |     "universe = Universe(universe_name) # 设置股票池\n",
 62 |     "codes = universe.query(engine, dates=[ref_date])"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "factors = engine.fetch_factor(ref_date, depends, codes.code.tolist()).dropna()\n",
 72 |     "factors.index = [1] * len(factors)\n",
 73 |     "factors = factors[['code'] + depends]"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "## 3. Stock Screening\n",
 81 |     "---------------------"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 6,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "res = formula.transform(factors, name='factor', category_field='code')"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 7,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "data": {
100 |       "text/html": [
101 |        "<div>\n",
102 |        "<style scoped>\n",
103 |        "    .dataframe tbody tr th:only-of-type {\n",
104 |        "        vertical-align: middle;\n",
105 |        "    }\n",
106 |        "\n",
107 |        "    .dataframe tbody tr th {\n",
108 |        "        vertical-align: top;\n",
109 |        "    }\n",
110 |        "\n",
111 |        "    .dataframe thead th {\n",
112 |        "        text-align: right;\n",
113 |        "    }\n",
114 |        "</style>\n",
115 |        "<table border=\"1\" class=\"dataframe\">\n",
116 |        "  <thead>\n",
117 |        "    <tr style=\"text-align: right;\">\n",
118 |        "      <th></th>\n",
119 |        "      <th>factor</th>\n",
120 |        "      <th>code</th>\n",
121 |        "    </tr>\n",
122 |        "  </thead>\n",
123 |        "  <tbody>\n",
124 |        "    <tr>\n",
125 |        "      <th>1</th>\n",
126 |        "      <td>1.0</td>\n",
127 |        "      <td>2010000438</td>\n",
128 |        "    </tr>\n",
129 |        "    <tr>\n",
130 |        "      <th>1</th>\n",
131 |        "      <td>1.0</td>\n",
132 |        "      <td>2010001184</td>\n",
133 |        "    </tr>\n",
134 |        "    <tr>\n",
135 |        "      <th>1</th>\n",
136 |        "      <td>1.0</td>\n",
137 |        "      <td>2010019213</td>\n",
138 |        "    </tr>\n",
139 |        "  </tbody>\n",
140 |        "</table>\n",
141 |        "</div>"
142 |       ],
143 |       "text/plain": [
144 |        "   factor        code\n",
145 |        "1     1.0  2010000438\n",
146 |        "1     1.0  2010001184\n",
147 |        "1     1.0  2010019213"
148 |       ]
149 |      },
150 |      "execution_count": 7,
151 |      "metadata": {},
152 |      "output_type": "execute_result"
153 |     }
154 |    ],
155 |    "source": [
156 |     "res[res.factor == 1]"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": []
165 |   }
166 |  ],
167 |  "metadata": {
168 |   "kernelspec": {
169 |    "display_name": "Python 3",
170 |    "language": "python",
171 |    "name": "python3"
172 |   },
173 |   "language_info": {
174 |    "codemirror_mode": {
175 |     "name": "ipython",
176 |     "version": 3
177 |    },
178 |    "file_extension": ".py",
179 |    "mimetype": "text/x-python",
180 |    "name": "python",
181 |    "nbconvert_exporter": "python",
182 |    "pygments_lexer": "ipython3",
183 |    "version": "3.8.8"
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 4
188 | }
189 | 


--------------------------------------------------------------------------------
/notebooks/Step By Step 01 - 入门.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Step By Step 01 - 入门\n",
  8 |     "----------"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "## 一、常用工具包\n",
 16 |     "\n",
 17 |     "在**alpha-mind**的使用中，经常会用到的工具包括：\n",
 18 |     "\n",
 19 |     "* *pandas*：主要用于表格数据处理；\n",
 20 |     "* *numpy*：高性能的向量计算工具；\n",
 21 |     "* *matplotlib*：作图工具。\n",
 22 |     "\n",
 23 |     "除此之外，我们还会用到一些比较专门的工具：\n",
 24 |     "\n",
 25 |     "* *scipy*：一些常用的数值算法；\n",
 26 |     "* *cvxpy*：优化工具包\n",
 27 |     "\n",
 28 |     "下面的代码验证以下包已经正确按照："
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 1,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import numpy as np\n",
 38 |     "import pandas as pd\n",
 39 |     "from matplotlib import pyplot as plt\n",
 40 |     "import scipy\n",
 41 |     "import cvxpy"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "### 二、**alpha-mind**\n",
 49 |     "\n",
 50 |     "**alpha-mind**是标准的python工具包，可以直接从**pip**安装：\n",
 51 |     "\n",
 52 |     "```bash\n",
 53 |     "$ pip install alpha-mind\n",
 54 |     "```"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 4,
 60 |    "metadata": {},
 61 |    "outputs": [
 62 |     {
 63 |      "data": {
 64 |       "text/plain": [
 65 |        "'0.3.1'"
 66 |       ]
 67 |      },
 68 |      "execution_count": 4,
 69 |      "metadata": {},
 70 |      "output_type": "execute_result"
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "import alphamind as ad\n",
 75 |     "from alphamind.api import *\n",
 76 |     "ad.__version__"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "可以验证alpha-mind已经安装成功，下面我们可以正式开始探索它的各种功能！"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": []
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "Python 3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.8.8"
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 4
115 | }
116 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | arrow
 2 | cvxpy
 3 | cvxopt
 4 | cython
 5 | deprecated
 6 | ecos
 7 | finance-python
 8 | jupyter
 9 | jupyterlab
10 | matplotlib
11 | mysqlclient
12 | numba
13 | numpy
14 | pandas
15 | portfolio-optimizer
16 | psycopg2-binary
17 | scikit-learn
18 | scipy
19 | simpleutils
20 | sqlalchemy
21 | statsmodels
22 | xgboost
23 | xlsxwriter
24 | 


--------------------------------------------------------------------------------
/requirements_docker.txt:
--------------------------------------------------------------------------------
 1 | arrow
 2 | cvxpy
 3 | cvxopt
 4 | deprecated
 5 | ecos
 6 | mysqlclient
 7 | portfolio-optimizer
 8 | psycopg2-binary
 9 | simpleutils
10 | xgboost
11 | xlsxwriter
12 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on 2017-4-25
 4 | 
 5 | @author: cheng.li
 6 | """
 7 | 
 8 | import io
 9 | from setuptools import setup
10 | from setuptools import find_packages
11 | 
12 | VERSION = "0.3.1"
13 | 
14 | setup(
15 |     name='Alpha-Mind',
16 |     version=VERSION,
17 |     packages=find_packages(),
18 |     url='',
19 |     license='MIT',
20 |     author='wegamekinglc',
21 |     author_email='',
22 |     scripts=['alphamind/bin/alphamind'],
23 |     install_requires=io.open('requirements.txt', encoding='utf8').read(),
24 |     description='',
25 |     include_package_data=True
26 | )


--------------------------------------------------------------------------------