├── tests ├── __init__.py ├── test_ploting.py ├── data_config.py ├── test_patch_all.py ├── test_dataview.py ├── test_py_expression_eval.py └── test_research.py ├── jaqs_fxdayu ├── VERSION.txt ├── data │ ├── align.py │ ├── dataapi │ │ ├── utils.py │ │ ├── jrpc_py.py │ │ ├── data_api.py │ │ ├── __init__.py │ │ ├── README.md │ │ └── LICENSE │ ├── __init__.py │ ├── search_doc.py │ ├── signal_function_mod.py │ ├── built_in_funcs_docs.csv │ └── py_expression_eval.py ├── util │ ├── dtutil.py │ ├── fileio.py │ ├── pdutil.py │ ├── numeric.py │ ├── profile.py │ ├── sequence.py │ ├── __init__.py │ ├── concat.py │ └── dp.py ├── research │ ├── timingdigger │ │ ├── __init__.py │ │ ├── performance.py │ │ └── plotting.py │ ├── signaldigger │ │ ├── __init__.py │ │ ├── plotting.py │ │ ├── performance.py │ │ ├── process.py │ │ ├── signal_creator.py │ │ ├── optimizer.py │ │ └── analysis.py │ └── __init__.py ├── __init__.py └── patch_util │ ├── postimport.py │ └── __init__.py ├── requirements_doc.txt ├── requirements.txt ├── docs ├── digger │ ├── output_14_2.png │ └── output_18_2.png ├── hf_dataview │ └── output_17_0.png ├── timingdigger │ ├── output_21_2.png │ └── output_25_2.png ├── index.md ├── optimizer.md ├── dp.md └── analysis.md ├── publish └── publish_pypi.sh ├── mkdocs.yml ├── .gitignore ├── setup.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jaqs_fxdayu/VERSION.txt: -------------------------------------------------------------------------------- 1 | 0.2.2 -------------------------------------------------------------------------------- /requirements_doc.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/align.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.data.align import * 3 | -------------------------------------------------------------------------------- /jaqs_fxdayu/util/dtutil.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.util.dtutil import * 3 | -------------------------------------------------------------------------------- /jaqs_fxdayu/util/fileio.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.util.fileio import * 3 | -------------------------------------------------------------------------------- /jaqs_fxdayu/util/pdutil.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.util.pdutil import * 3 | -------------------------------------------------------------------------------- /jaqs_fxdayu/util/numeric.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.util.numeric import * 3 | -------------------------------------------------------------------------------- /jaqs_fxdayu/util/profile.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.util.profile import * 3 | -------------------------------------------------------------------------------- /jaqs_fxdayu/util/sequence.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.util.sequence import * 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jaqs==0.6.11 2 | pandas>=0.20.0 3 | scipy>=1.0.1 4 | sklearn>=0.0 5 | statsmodels>=0.8.0 -------------------------------------------------------------------------------- /docs/digger/output_14_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/digger/output_14_2.png -------------------------------------------------------------------------------- /docs/digger/output_18_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/digger/output_18_2.png -------------------------------------------------------------------------------- /jaqs_fxdayu/data/dataapi/utils.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.data.dataapi.utils import * 3 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/timingdigger/__init__.py: -------------------------------------------------------------------------------- 1 | from .digger import TimingDigger 2 | 3 | __all__ = ['TimingDigger'] 4 | -------------------------------------------------------------------------------- /docs/hf_dataview/output_17_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/hf_dataview/output_17_0.png -------------------------------------------------------------------------------- /jaqs_fxdayu/data/dataapi/jrpc_py.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.data.dataapi.jrpc_py import * 3 | -------------------------------------------------------------------------------- /docs/timingdigger/output_21_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/timingdigger/output_21_2.png -------------------------------------------------------------------------------- /docs/timingdigger/output_25_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/timingdigger/output_25_2.png -------------------------------------------------------------------------------- /jaqs_fxdayu/data/dataapi/data_api.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from jaqs.data.dataapi.data_api import * 3 | 4 | -------------------------------------------------------------------------------- /publish/publish_pypi.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python setup.py sdist 3 | python setup.py bdist_wheel --universal 4 | twine upload dist/* -------------------------------------------------------------------------------- /jaqs_fxdayu/util/__init__.py: -------------------------------------------------------------------------------- 1 | from .dtutil import * 2 | from .fileio import * 3 | from .numeric import * 4 | from .pdutil import * 5 | from .profile import * 6 | from .sequence import * 7 | from .dp import * 8 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/signaldigger/__init__.py: -------------------------------------------------------------------------------- 1 | from .digger import SignalDigger 2 | from .optimizer import Optimizer 3 | from .signal_creator import SignalCreator 4 | 5 | __all__ = ['SignalDigger', "Optimizer", "SignalCreator"] 6 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | from .signaldigger import Optimizer, SignalDigger, SignalCreator 4 | from .timingdigger import TimingDigger 5 | 6 | __all__ = ['SignalDigger', "TimingDigger", "Optimizer", "SignalCreator"] 7 | -------------------------------------------------------------------------------- /jaqs_fxdayu/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import join, dirname 2 | 3 | import matplotlib 4 | import matplotlib.pyplot # 防止被JAQS重载 5 | 6 | from .patch_util import patch_all 7 | 8 | with open(join(dirname(__file__), 'VERSION.txt'), 'rb') as f: 9 | __version__ = f.read().decode('ascii').strip() 10 | -------------------------------------------------------------------------------- /tests/test_ploting.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | 3 | _old = mpl.get_backend() 4 | 5 | from jaqs_fxdayu.data import DataView 6 | 7 | assert mpl.get_backend() == _old 8 | 9 | import importlib 10 | importlib.reload(mpl) 11 | 12 | from jaqs_fxdayu import patch_all 13 | 14 | patch_all() 15 | assert mpl.get_backend() == _old 16 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/dataapi/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | dataapi defines standard APIs for communicating with data service. 4 | 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | from __future__ import unicode_literals 10 | 11 | from .data_api import DataApi 12 | 13 | __all__ = ['DataApi'] 14 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | """ 4 | Modules relevant to data. 5 | 6 | """ 7 | 8 | from .dataapi import DataApi 9 | from .dataservice import RemoteDataService, DataService, LocalDataService 10 | from .dataview import DataView, EventDataView 11 | from .hf_dataview import HFDataView 12 | from .py_expression_eval import Parser 13 | 14 | # we do not expose align and basic 15 | __all__ = ['DataApi', 'DataService', 'RemoteDataService', 'LocalDataService', 'DataView', 'HFDataView', 'Parser', 'EventDataView'] 16 | -------------------------------------------------------------------------------- /tests/data_config.py: -------------------------------------------------------------------------------- 1 | data_config = { 2 | "remote.data.address": "tcp://192.168.0.101:23000", 3 | "remote.data.username": "18566262672", 4 | "remote.data.password": "eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI3MDI3NTAyMTIiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTg1NjYyNjI2NzIifQ.O_-yR0zYagrLRvPbggnru1Rapk4kiyAzcwYt2a3vlpM", 5 | } 6 | # data_config = { 7 | # "remote.data.address": "tcp://data.quantos.org:8910", 8 | # "remote.data.username": "18566262672", 9 | # "remote.data.password": "eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI3MDI3NTAyMTIiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTg1NjYyNjI2NzIifQ.O_-yR0zYagrLRvPbggnru1Rapk4kiyAzcwYt2a3vlpM", 10 | # "timeout": 180 11 | # } 12 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: jaqs-fxdayu 2 | site_author: xingetouzi 3 | repo_url: https://github.com/xingetouzi/jaqs-fxdayu/ 4 | pages: 5 | - 主页: index.md 6 | - API文档(基础部分): 7 | - dataservice: dataservice.md 8 | - dataview: dataview.md 9 | - digger: digger/digger.md 10 | - performance: performance.md 11 | - API文档(拓展部分): 12 | - analysis: analysis.md 13 | - process: process.md 14 | - optimizer: optimizer.md 15 | - multi_factor: multi_factor.md 16 | - dp: dp.md 17 | - timingdigger: timingdigger/timingdigger.md 18 | - hf_dataview: hf_dataview/hf_dataview.md 19 | 20 | theme: readthedocs 21 | 22 | markdown_extensions: 23 | - toc: 24 | permalink:  25 | - admonition 26 | - def_list 27 | 28 | copyright: Copyright © 2014 29 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/signaldigger/plotting.py: -------------------------------------------------------------------------------- 1 | from jaqs_fxdayu.patch_util import auto_register_patch 2 | from jaqs.research.signaldigger.plotting import * 3 | 4 | 5 | @auto_register_patch() 6 | def plot_ic_by_group(ic_group, ax=None): 7 | """ 8 | Plots Spearman Rank Information Coefficient for a given 9 | factor over provided forward returns. 10 | Separates by group. 11 | 12 | Parameters 13 | ---------- 14 | ic_group : pd.DataFrame 15 | group-wise mean period wise returns. 16 | ax : matplotlib.Axes, optional 17 | Axes upon which to plot. 18 | 19 | Returns 20 | ------- 21 | ax : matplotlib.Axes 22 | The axes that were plotted on. 23 | """ 24 | if ax is None: 25 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 26 | ic_group.plot(kind='bar', ax=ax) 27 | 28 | ax.set(title="Information Coefficient By Group", xlabel="") 29 | ax.set_xticklabels(ic_group.index, rotation=45) 30 | 31 | return ax 32 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/dataapi/README.md: -------------------------------------------------------------------------------- 1 | # DataApi 2 | 3 | 标准数据API定义。 4 | 5 | # 安装步骤 6 | 7 | ## 1、安装Python环境 8 | 9 | 如果本地还没有安装Python环境,强烈建议安装Anaconda(Python的集成开发环境,包含众多常用包,且易于安装,避免不必要的麻烦)。打开[Anaconda官网](http://www.continuum.io/downloads),选择相应的操作系统,确定要安装的Python版本,进行下载。 10 | 11 | 下载完成以后,按照图形界面步骤完成安装。在默认情况下,Anaconda会自动设置PATH环境。 12 | 13 | ***注***:如果安装过程遇到问题,或需要更详细的步骤,请参见[安装Anaconda Python环境教程](https://github.com/quantOS-org/JAQS/blob/master/doc/install.md#1安装python环境) 14 | 15 | ## 2、安装依赖包 16 | 17 | 如果Python环境不是类似Anaconda的集成开发环境,我们需要单独安装依赖包,在已经有pandas/numpy包前提下,还需要有以下几个包: 18 | - `pyzmq` 19 | - `msgpack_python` 20 | - `python-snappy` 21 | 22 | 可以通过单个安装完成,例如: `pip install pyzmq` 23 | 24 | 需要注意的是,`python-snappy`的安装需要比较多的编译依赖,请按照[如何安装python-snappy包](https://github.com/quantOS-org/JAQS/blob/master/doc/install.md#如何安装python-snappy包)所述安装。 25 | 26 | 27 | ## 3、使用DataApi 28 | 29 | ```python 30 | from DataApi import DataApi # 这里假设项目目录名为DataApi, 且存放在工作目录下 31 | 32 | api = DataApi(addr="tcp://data.tushare.org:8910") 33 | result, msg = api.login("phone", "token") # 示例账户,用户需要改为自己在www.quantos.org上注册的账户 34 | print(result) 35 | print(msg) 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/search_doc.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | from pathlib import Path 3 | 4 | import pandas as pd 5 | 6 | _path = Path(__file__).absolute().parent / "built_in_funcs_docs.csv" 7 | 8 | 9 | def get_doc(): 10 | f = open(str(_path),"rb") 11 | data = pd.read_csv(f) 12 | return data 13 | 14 | 15 | class FuncDoc(object): 16 | def __init__(self): 17 | self.doc = get_doc() 18 | self.types = self.doc["分类"].drop_duplicates().values 19 | self.funcs = self.doc["公式"].values 20 | self.descriptions = self.doc["说明"].values 21 | 22 | def search_by_type(self, _type): 23 | result = self.doc["分类"].apply(lambda x: x.find(_type) > -1) 24 | return self.doc[result] 25 | 26 | def search_by_func(self, func, precise=False): 27 | if precise: 28 | result = self.doc["公式"].apply(lambda x: x.find(func) == 0) 29 | else: 30 | result = self.doc["公式"].apply(lambda x: x.lower().find(func.lower()) > -1) 31 | return self.doc[result] 32 | 33 | def search_by_description(self, description): 34 | result = self.doc["说明"].apply(lambda x: x.find(description) > -1) 35 | return self.doc[result] 36 | 37 | 38 | if __name__ == "__main__": 39 | print(get_doc().to_dict) 40 | -------------------------------------------------------------------------------- /jaqs_fxdayu/patch_util/postimport.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import logging 3 | import sys 4 | from collections import defaultdict 5 | 6 | _post_import_hooks = defaultdict(list) 7 | 8 | 9 | class PostImportFinder(object): 10 | def __init__(self): 11 | self._skip = set() # 大概是判断重复引用的 12 | 13 | def find_module(self, fullname, path=None): 14 | if fullname in self._skip: 15 | return None 16 | self._skip.add(fullname) 17 | return PostImportLoader(self) 18 | 19 | 20 | class PostImportLoader(object): 21 | def __init__(self, finder): 22 | self._finder = finder 23 | 24 | def load_module(self, fullname): 25 | # if fullname.startswith("jaqs"): 26 | # logging.debug(fullname) 27 | importlib.import_module(fullname) 28 | module = sys.modules[fullname] 29 | for func in _post_import_hooks[fullname]: 30 | func(module) 31 | self._finder._skip.remove(fullname) 32 | return module 33 | 34 | 35 | def when_imported(fullname): 36 | def decorate(func): 37 | if fullname in sys.modules: 38 | func(sys.modules[fullname]) 39 | else: 40 | _post_import_hooks[fullname].append(func) 41 | return func 42 | 43 | return decorate 44 | 45 | 46 | sys.meta_path.insert(0, PostImportFinder()) 47 | -------------------------------------------------------------------------------- /tests/test_patch_all.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class TestPatchAll(unittest.TestCase): 5 | prefix = "jaqs_fxdayu." 6 | 7 | @classmethod 8 | def setUpClass(cls): 9 | from jaqs_fxdayu import patch_all 10 | patch_all() 11 | 12 | def test_dataview(self): 13 | from jaqs.data import DataView 14 | assert DataView.__module__.startswith(self.prefix) 15 | from jaqs.data.dataview import DataView 16 | assert DataView.__module__.startswith(self.prefix) 17 | 18 | def test_parser(self): 19 | from jaqs.data import Parser 20 | assert Parser.__module__.startswith(self.prefix) 21 | from jaqs.data.py_expression_eval import Parser 22 | assert Parser.__module__.startswith(self.prefix) 23 | 24 | def test_signaldigger(self): 25 | from jaqs.research import SignalDigger 26 | assert SignalDigger.__module__.startswith(self.prefix) 27 | from jaqs.research.signaldigger import SignalDigger 28 | assert SignalDigger.__module__.startswith(self.prefix) 29 | 30 | def test_performance(self): 31 | from jaqs.research.signaldigger import performance 32 | assert performance.calc_signal_ic.__module__.startswith(self.prefix) 33 | assert performance.calc_quantile_return_mean_std.__module__.startswith(self.prefix) 34 | assert performance.mean_information_coefficient.__module__.startswith(self.prefix) 35 | assert performance.price2ret.__module__.startswith(self.prefix) 36 | 37 | def test_plotting(self): 38 | from jaqs.research.signaldigger import plotting 39 | assert hasattr(plotting, "plot_ic_by_group") 40 | 41 | 42 | if __name__ == "__main__": 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### VirtualEnv template 3 | # Virtualenv 4 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 5 | .Python 6 | [Bb]in 7 | [Ii]nclude 8 | [Ll]ib 9 | [Ll]ib64 10 | [Ll]ocal 11 | [Ss]cripts 12 | pyvenv.cfg 13 | .venv 14 | pip-selfcheck.json 15 | ### Python template 16 | # Byte-compiled / optimized / DLL files 17 | __pycache__/ 18 | *.py[cod] 19 | *$py.class 20 | 21 | # C extensions 22 | *.so 23 | 24 | # Distribution / packaging 25 | env/ 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *,cover 61 | .hypothesis/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # SageMath parsed files 94 | *.sage.py 95 | 96 | # dotenv 97 | .env 98 | 99 | # virtualenv 100 | venv/ 101 | ENV/ 102 | 103 | # Spyder project settings 104 | .spyderproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | .idea/ 110 | .vscode/ 111 | output 112 | .persist 113 | docs/_source/data 114 | *.pdf -------------------------------------------------------------------------------- /jaqs_fxdayu/util/concat.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pandas.core.internals import BlockManager, BlockPlacement 3 | import numpy as np 4 | 5 | 6 | # 重组DataFrame并返回 7 | def block_concat(dfs, idx, columns): 8 | manager = BlockManager(iter_blocks(dfs), [columns, idx]) 9 | return pd.DataFrame(manager).copy() 10 | 11 | 12 | # 按块抛出输入的DataFrame的数据 13 | def iter_blocks(dfs): 14 | l = 0 15 | for df in dfs: 16 | for block in df._data.blocks: 17 | # yield Block(block.values, block._mgr_locs.add(l)) 18 | yield block.__class__(block.values, placement=block._mgr_locs.add(l)) 19 | l += len(df.columns) 20 | 21 | 22 | # 只支持column为MultiIndex的DataFrame(DataView中合并数据的格式)的横向合成。 23 | def quick_concat(dfs, level, index_name="trade_date", how="outer"): 24 | """ 25 | dfs: list of DataFrame 26 | level: MultiIndex列名 27 | index_name: 输出DataFrame的Index的名字。 28 | how: Index合并方式: 29 | outer: 并集 30 | iner: 交集 31 | """ 32 | columns = join_columns(dfs, level) 33 | if how == "outer": 34 | index = join_indexes([df.index for df in dfs], index_name) 35 | else: 36 | index = intersect1d_indexes([df.index for df in dfs], index_name) 37 | return block_concat( 38 | [pd.DataFrame(df, index) for df in dfs], 39 | index, columns 40 | ) 41 | 42 | 43 | # 并集合并索引 44 | def join_indexes(idxes, name=None): 45 | return pd.Index(np.concatenate([index.values for index in idxes]), name=name).sort_values().drop_duplicates() 46 | 47 | 48 | # 交集合并索引 49 | def intersect1d_indexes(idxes, name=None): 50 | return pd.Index(intersect1d(idxes), name=name).sort_values().drop_duplicates() 51 | 52 | 53 | def intersect1d(idxes): 54 | if len(idxes) == 2: 55 | return np.intersect1d(*idxes) 56 | elif len(idxes) > 2: 57 | return np.intersect1d(intersect1d(idxes[:-1]), idxes[-1]) 58 | 59 | 60 | # 合成新的columns(MultiIndex) 61 | def join_columns(dfs, level=None): 62 | """ 63 | dfs: list of DataFrame 64 | level: MultiIndex列名 65 | """ 66 | return pd.MultiIndex.from_tuples(np.concatenate([df.columns.values for df in dfs]), names=level) -------------------------------------------------------------------------------- /jaqs_fxdayu/patch_util/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import logging 3 | import warnings 4 | import sys 5 | from collections import defaultdict 6 | 7 | from jaqs_fxdayu.patch_util.postimport import when_imported 8 | 9 | _patch_hooks = defaultdict(list) 10 | _module = "jaqs" 11 | _patched = False 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def register_patch(fullname=_module): 17 | def decorator(func): 18 | if _patched: 19 | raise RuntimeWarning("Patch %s is registered after jaqs_fxdayu.patch method be called." % func) 20 | _patch_hooks[fullname].append(func) 21 | return func 22 | 23 | return decorator 24 | 25 | 26 | def reload_jaqs(): 27 | reload_lst = [] 28 | for m in list(sys.modules.keys()): 29 | if m.startswith("jaqs."): 30 | del sys.modules[m] 31 | reload_lst.append(m) 32 | for m in reload_lst: 33 | importlib.import_module(m) 34 | 35 | 36 | def patch_all(): 37 | import matplotlib 38 | import matplotlib.pyplot 39 | global _patched 40 | if _patched: 41 | warnings.warn("jaqs_fxdayu.patch method should be called only once!") 42 | return 43 | importlib.import_module("jaqs_fxdayu.data") 44 | importlib.import_module("jaqs_fxdayu.research.signaldigger") 45 | for fullname, hooks in _patch_hooks.items(): 46 | for func in hooks: 47 | when_imported(fullname)(func) 48 | _patched = True 49 | logger.debug("Finish Patch.") 50 | 51 | 52 | def auto_register_patch(fullname=None, name=None, parent_level=0): 53 | def decorator(obj): 54 | def _patch_module(m): 55 | attr = obj.__name__ if name is None else name 56 | logger.debug("Patch %s:%s ." % (m.__name__, attr)) 57 | setattr(m, attr, obj) 58 | 59 | module_name = obj.__module__.replace("jaqs_fxdayu", "jaqs") if fullname is None else fullname 60 | module_path = module_name.split(".") + [""] 61 | for level in range(parent_level + 1): 62 | register_patch(".".join(module_path[:-(level + 1)]))(_patch_module) 63 | return obj 64 | 65 | return decorator 66 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | 3 | try: 4 | from pip._internal.req import parse_requirements # for pip >= 10 5 | except ImportError: 6 | from pip.req import parse_requirements 7 | 8 | from os.path import dirname, join 9 | from setuptools import ( 10 | find_packages, 11 | setup, 12 | ) 13 | 14 | 15 | def readme(): 16 | with codecs.open('README.md', 'r', encoding='utf-8') as f: 17 | return f.read() 18 | 19 | 20 | def version(): 21 | with open(join(dirname(__file__), 'jaqs_fxdayu', 'VERSION.txt'), 'rb') as f: 22 | return f.read().decode('ascii').strip() 23 | 24 | 25 | requirements = [str(ir.req) for ir in parse_requirements("requirements.txt", session=False)] 26 | setup( 27 | name='jaqs_fxdayu', 28 | version=version(), 29 | packages=find_packages(exclude=["examples", "tests", "tests.*", "docs"]), 30 | author='xingetouzi', 31 | author_email='public@fxdayu.com', 32 | license='Apache License v2', 33 | package_data={'': ['*.csv', '*.txt']}, 34 | url='https://github.com/xingetouzi/jaqs_fxdayu', 35 | keywords="quantiatitive trading research finance", 36 | install_requires=requirements, 37 | description='Open source quantitative research&trading framework, base on https://github.com/quantOS-org/JAQS', 38 | long_description=readme(), 39 | zip_safe=False, 40 | classifiers=[ 41 | 'Programming Language :: Python', 42 | "Intended Audience :: Developers", 43 | "Intended Audience :: Education", 44 | "Intended Audience :: End Users/Desktop", 45 | "Intended Audience :: Financial and Insurance Industry", 46 | "Intended Audience :: Information Technology", 47 | "Intended Audience :: Science/Research", 48 | "License :: OSI Approved :: Apache Software License", 49 | "Natural Language :: Chinese (Simplified)", 50 | "Natural Language :: English", 51 | 'Operating System :: Microsoft :: Windows', 52 | 'Operating System :: Unix', 53 | 'Programming Language :: Python :: 3.5', 54 | 'Programming Language :: Python :: 3.6', 55 | ], 56 | ) 57 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/timingdigger/performance.py: -------------------------------------------------------------------------------- 1 | from jaqs_fxdayu.research.signaldigger.performance import * 2 | 3 | 4 | def cal_return_stats(ret): 5 | if isinstance(ret,pd.Series) or isinstance(ret,pd.DataFrame): 6 | ret = ret.values 7 | ret = ret.reshape(-1,1) 8 | summary_table = pd.DataFrame() 9 | if len(ret)==0: 10 | return pd.DataFrame(data=np.nan, 11 | columns=['t-stat','p-value',"mean","std","info_ratio", 12 | "skewness","kurtosis","pct5","pct25","pct50", 13 | "pct75","pct95","occurance"], 14 | index=[0]) 15 | t_stats, p_values = scst.ttest_1samp(ret, np.zeros(ret.shape[1]), axis=0) 16 | 17 | summary_table['t-stat'] = t_stats 18 | summary_table['p-value'] = np.round(p_values, 5) 19 | summary_table["mean"] = ret.mean() 20 | summary_table["std"] = ret.std() 21 | summary_table["info_ratio"] =summary_table["mean"]/summary_table["std"] 22 | summary_table["skewness"] = scst.skew(ret, axis=0) 23 | summary_table["kurtosis"] = scst.kurtosis(ret, axis=0) 24 | for percent in [5, 25, 50, 75, 95]: 25 | summary_table["pct" + str(percent)] = np.percentile(ret,percent) 26 | summary_table["occurance"] = len(ret) 27 | 28 | return summary_table 29 | 30 | 31 | def calc_performance_metrics(ser, cum_return=False, compound=False): 32 | """ 33 | Calculate annualized return, volatility and sharpe. 34 | We assumed data frequency to be day. 35 | 36 | Parameters 37 | ---------- 38 | ser : pd.DataFrame or pd.Series 39 | Index is int date, values are floats. 40 | ser should start from 0. 41 | cum_return : bool 42 | Whether ser is cumulative or daily return. 43 | compound 44 | Whether calculation of return is compound. 45 | 46 | Returns 47 | ------- 48 | res : dict 49 | 50 | """ 51 | if isinstance(ser, pd.DataFrame): 52 | ser = ser.iloc[:, 0] 53 | if cum_return: 54 | cum_ret = ser 55 | ret = cum2ret(cum_ret, period=1, compound=compound) 56 | else: 57 | ret = ser 58 | cum_ret = ret2cum(ret, compound=compound) 59 | 60 | total_ret = cum_ret.iat[-1] 61 | std = np.std(ret) 62 | mean = np.mean(ret) 63 | res = {'total_ret': total_ret, 64 | 'std(ret)': std, 65 | 'mean(ret)':mean, 66 | 'ir': mean/std} 67 | return res 68 | 69 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/signal_function_mod.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import warnings 6 | 7 | # talib函数库,自动剔除为空的日期,用于计算signal 8 | def ta(ta_method='MA', 9 | ta_column=0, 10 | Open=None, 11 | High=None, 12 | Low=None, 13 | Close=None, 14 | Volume=None, 15 | *args, 16 | **kwargs): 17 | try: 18 | from talib import abstract 19 | except ImportError: 20 | raise RuntimeError("如要在公式中使用talib相关函数,请先安装talib.") 21 | if not isinstance(ta_method, str): 22 | raise ValueError("格式错误!Ta方法需指定调用的talib函数名(str),检测到传入的为%s,需要传入str" % (type(ta_method))) 23 | else: 24 | if not (ta_method in abstract.__dict__): 25 | raise ValueError("指定的talib函数名有误,检测到传入的为%s,调用的talib库仅支持%s" % (ta_method, str(abstract.__dict__.keys()))) 26 | 27 | candle_dict = {"open": Open, 28 | "high": High, 29 | "low": Low, 30 | "close": Close, 31 | "volume": Volume} 32 | 33 | waiting_for_pop = [] 34 | for candle_type in candle_dict.keys(): 35 | if not isinstance(candle_dict[candle_type], pd.DataFrame): 36 | waiting_for_pop.append(candle_type) 37 | continue 38 | if candle_dict[candle_type].size == 0: 39 | raise ValueError("%s为空,请检查对应的传入数据." % (candle_type, )) 40 | # 剔除K线数据中的None 41 | for i in waiting_for_pop: 42 | candle_dict.pop(i) 43 | 44 | results = [] 45 | candle_pannel = pd.Panel.from_dict(candle_dict) 46 | 47 | for sec in candle_pannel.minor_axis: 48 | df = candle_pannel.minor_xs(sec).dropna() 49 | if len(df) == 0: 50 | warnings.warn("%s数据缺失严重,无法完成指标计算,请检查是否存在数据问题." % (sec,)) 51 | continue 52 | result = pd.DataFrame(getattr(abstract, ta_method)(df, *args, **kwargs)) 53 | 54 | if isinstance(ta_column, int): 55 | if ta_column >= len(result.columns) or ta_column < 0: 56 | raise ValueError("非法的ta_column,列号不能为负且不得超过%s,输入为%s" % (len(result.columns) - 1, ta_column)) 57 | result = pd.DataFrame(result.iloc[:, ta_column]) 58 | elif isinstance(ta_column, str): 59 | if not (ta_column in result.columns): 60 | raise ValueError("非法的ta_column,可选的列名有%s,输入为%s" % (str(result.columns), ta_column)) 61 | result = pd.DataFrame(result.loc[:, ta_column]) 62 | else: 63 | raise ValueError("ta_column格式有误,错误的类型为%s,请指定合法的列号(int),或列名(str)" % (type(ta_column))) 64 | 65 | result.columns = [sec, ] 66 | results.append(result) 67 | 68 | if len(results) == 0: 69 | return None 70 | else: 71 | tmp = pd.concat(results, axis=1) 72 | tmp = tmp.reindex(columns=candle_pannel.minor_axis, index=candle_pannel.major_axis) 73 | return tmp 74 | 75 | 76 | # 最大值的坐标 77 | def ts_argmax(df, window=10): 78 | return df.rolling(window).apply(np.argmax) + 1 79 | 80 | 81 | # 最小值的坐标 82 | def ts_argmin(df, window=10): 83 | return df.rolling(window).apply(np.argmin) + 1 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jaqs-fxdayu:股票多因子策略研究和分析框架jaqs拓展包 2 | 3 | ## 介绍 4 | 5 | 大鱼金融在jaqs官方版本的基础上,重点改进和优化了股票多因子研究部分的功能,帮助使用者更方便的去设计/评估和分析因子表现,优化因子效果,进行因子组合研究. 6 | 7 | 主要包括: 8 | 9 | ### 基础: 10 | - dataservice 11 | 12 | 对jaqs底层dataapi的一个高级封装,提供了一些常用数据的快捷查询方法——如K线、交易日历、指数成分信息、行业分类信息等。 13 | 14 | - dataview 15 | 16 | 可视为一个基于pandas的针对因子场景的数据库,方便因子的设计实现.jaqs_fxdayu改进了官方版本,提供更便捷灵活的因子数据查询和操作功能 17 | 18 | - digger 19 | 20 | 单因子分析及绩效可视化.改进官方版本 21 | 22 | - performance 23 | 24 | 因子选股研究中常用的绩效计算方法.改进官方版本 25 | 26 | ### 拓展: 27 | - analysis 28 | 29 | 单因子多维度分析.从因子ic,因子收益,选股潜在收益空间三个维度给出因子评价.新增模块 30 | 31 | - process 32 | 33 | 提供常用的因子处理操作,如去极值,中性化等.新增模块 34 | 35 | - optimizer 36 | 37 | 提供因子参数优化功能.新增模块 38 | 39 | - multi_factor 40 | 41 | 提供多因子处理和组合功能.新增模块 42 | 43 | - dp 44 | 45 | 针对A股因子研究和交易分析场景,提供了常用的小工具,如查询历史的交易日,历史的行业分类表等.新增模块 46 | 47 | - timingdigger 48 | 49 | 择时信号研究,通过TimingDigger,可以在设计完选股因子和事件后,加入简单的择时出场条件对因子进行进一步测试.支持设置止盈,止损等出场方式.新增模块 50 | 51 | - hf_dataview 52 | 53 | 针对高频因子(bar级别)和事件设计的数据操作和信号计算模块,功能和Dataview一致.新增模块 54 | 55 | ## 安装和更新 56 | ### 依赖 57 | 该模块基于JAQS进行拓展,且只支持`python3`,需要安装:`jaqs>=0.6.11` 58 | 59 | jaqs的安装可以参考[JAQS官方文档](http://jaqs.readthedocs.io/zh_CN/latest/install.html) 60 | 61 | - 如果未安装过jaqs,从pip安装: 62 | ```bash 63 | $ pip install jaqs 64 | ``` 65 | 66 | - 如果已安装过jaqs,进行更新: 67 | ```bash 68 | $ pip install -U --no-deps jaqs 69 | ``` 70 | 71 | ### 安装 72 | ```bash 73 | $ pip install jaqs_fxdayu 74 | ``` 75 | 76 | ### 更新 77 | 当有新版本发布时,使用以下命令更新 78 | ```bash 79 | $ pip uninstall jaqs_fxdayu 80 | $ pip install jaqs_fxdayu 81 | ``` 82 | 83 | ## 使用 84 | 该模块主要分为两部分: 85 | 86 | ### 基础API: 87 | 基于jaqs项目的原有模块进行替换和拓展。 88 | 支持monkey_patch或直接从jaqs_fxdayu中导入。 89 | 90 | 以使用Dataview为例: 91 | 92 | - monkey_patch: 93 | ``` 94 | import jaqs_fxdayu 95 | jaqs_fxdayu.patch_all() # 需要放在任何import jaqs.* 之前 96 | 97 | from jaqs.data import DataView 98 | 99 | dv = DataView() 100 | 101 | ... 102 | ``` 103 | 104 | !!! Note 105 | 该使用方法的好处是最大程度兼容原生JAQS的代码,方便迁移。 106 | 107 | - 直接导入: 108 | ``` 109 | from jaqs_fxdayu.data import DataView 110 | 111 | dv = DataView() 112 | 113 | ... 114 | ``` 115 | 116 | !!! Note 117 | 该使用方法的好处是更为直观,且支持IDE的静态代码提示功能。 118 | 119 | ### 拓展API: 120 | 主要为独立开发,提供一些因子分析中常用,而jaqs中未实现的拓展功能。 121 | 使用方法主要是从jaqs_fxdayu模块中导入: 122 | 例如: 123 | ```python 124 | from jaqs_fxdayu.research import Optimizer 125 | ``` 126 | 127 | 128 | ### 文档 129 | [详细文档地址](http://jaqs-fxdayu.readthedocs.io/zh_CN/latest/) 130 | 131 | ## 最新功能 132 | ### 2018/7/15 133 | 134 | dataview添加财务数据时,允许指定财报类型 135 | 136 | ### 2018/7/2 137 | 138 | TimingDigger/SignalDigger 支持根据group划分组内quantile 139 | 140 | 预处理因子/信号数据时,若传入group参数,quantile计算会在组内进行而非在全数据集上进行. 141 | 142 | ### 2018/6/9 143 | 144 | TimingDigger-create_event_report方法新增进出场点位画图功能 145 | 146 | 新增hf_dataview-针对高频因子(bar级别)和事件设计的数据操作和信号计算模块,功能和Dataview一致. 147 | 148 | ### 2018/6/5 149 | 选股叠加择时研究(TimingDigger)新增功能,通过TimingDigger,可以在设计完选股因子和事件后,加入简单的择时出场条件对因子进行进一步测试.支持设置止盈,止损等出场方式. 150 | 151 | ### 2018/4/19 152 | 参数优化器(optimizer)新增功能,支持在待优化公式中调用自定义方法. 153 | 154 | ### 2018/4/19 155 | 新增process-mad,用于因子去极值.优化了行业市值中性化的算法效率. 156 | 157 | ### 2018/4/16 158 | 新增multi_factor-get_factors_ret_df,用于获取因子收益序列矩阵.同时,combine_factors新增基于最近一段时间的因子收益进行多因子加权组合的方法. 159 | 160 | ### 2018/4/11 161 | 新增dataview-refresh_data方法,可对数据集进行更新. 162 | 163 | ### 2018/3/26 164 | 新增dataservice文档.dataservice是对jaqs底层dataapi的一个高级封装,提供了一些常用数据的快捷查询方法——如K线、交易日历、指数成分信息、行业分类信息等。 165 | 166 | ### 2018/3/26 167 | 168 | 新增模块dp,针对A股因子研究和交易分析场景,提供了常用的小工具,如查询历史的交易日,历史的行业分类表等 169 | 170 | 添加对performance模块的说明文档 performance:因子选股研究中常用的绩效计算方法 171 | 172 | 173 | ### 2018/3/20 174 | 175 | 作为单独模块发布,更新文档 176 | 177 | ### 2018/3/19 更新 178 | 179 | 新增dataview-fields可选字段查询方式,详见文档 dataview-fields可选字段查询方式 180 | 181 | ## 技术支持 182 | 183 | - [GitHub](https://github.com/xingetouzi/jaqs-fxdayu/tree/master) 184 | - [访问大鱼学院获得更多的案例和金融量化知识](http://www.fxdayu.com) 185 | - 加qq群(372592121)进行讨论 186 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # jaqs-fxdayu:股票多因子策略研究和分析框架jaqs拓展包 2 | 3 | ## 介绍 4 | 5 | 大鱼金融在jaqs官方版本的基础上,重点改进和优化了股票多因子研究部分的功能,帮助使用者更方便的去设计/评估和分析因子表现,优化因子效果,进行因子组合研究. 6 | 7 | 主要包括: 8 | 9 | ### 基础: 10 | - dataservice 11 | 12 | 对jaqs底层dataapi的一个高级封装,提供了一些常用数据的快捷查询方法——如K线、交易日历、指数成分信息、行业分类信息等。 13 | 14 | - dataview 15 | 16 | 可视为一个基于pandas的针对因子场景的数据库,方便因子的设计实现.jaqs_fxdayu改进了官方版本,提供更便捷灵活的因子数据查询和操作功能 17 | 18 | - digger 19 | 20 | 单因子分析及绩效可视化.改进官方版本 21 | 22 | - performance 23 | 24 | 因子选股研究中常用的绩效计算方法.改进官方版本 25 | 26 | ### 拓展: 27 | - analysis 28 | 29 | 单因子多维度分析.从因子ic,因子收益,选股潜在收益空间三个维度给出因子评价.新增模块 30 | 31 | - process 32 | 33 | 提供常用的因子处理操作,如去极值,中性化等.新增模块 34 | 35 | - optimizer 36 | 37 | 提供因子参数优化功能.新增模块 38 | 39 | - multi_factor 40 | 41 | 提供多因子处理和组合功能.新增模块 42 | 43 | - dp 44 | 45 | 针对A股因子研究和交易分析场景,提供了常用的小工具,如查询历史的交易日,历史的行业分类表等.新增模块 46 | 47 | - timingdigger 48 | 49 | 择时信号研究,通过TimingDigger,可以在设计完选股因子和事件后,加入简单的择时出场条件对因子进行进一步测试.支持设置止盈,止损等出场方式.新增模块 50 | 51 | - hf_dataview 52 | 53 | 针对高频因子(bar级别)和事件设计的数据操作和信号计算模块,功能和Dataview一致.新增模块 54 | 55 | ## 安装和更新 56 | ### 依赖 57 | 该模块基于JAQS进行拓展,且只支持`python3`,需要安装:`jaqs>=0.6.11` 58 | 59 | jaqs的安装可以参考[JAQS官方文档](http://jaqs.readthedocs.io/zh_CN/latest/install.html) 60 | 61 | - 如果未安装过jaqs,从pip安装: 62 | ```bash 63 | $ pip install jaqs 64 | ``` 65 | 66 | - 如果已安装过jaqs,进行更新: 67 | ```bash 68 | $ pip install -U --no-deps jaqs 69 | ``` 70 | 71 | ### 安装 72 | ```bash 73 | $ pip install jaqs_fxdayu 74 | ``` 75 | 76 | ### 更新 77 | 当有新版本发布时,使用以下命令更新 78 | ```bash 79 | $ pip uninstall jaqs_fxdayu 80 | $ pip install jaqs_fxdayu 81 | ``` 82 | 83 | ## 使用 84 | 该模块主要分为两部分: 85 | 86 | ### 基础API: 87 | 基于jaqs项目的原有模块进行替换和拓展。 88 | 支持monkey_patch或直接从jaqs_fxdayu中导入。 89 | 90 | 以使用Dataview为例: 91 | 92 | - monkey_patch: 93 | ``` 94 | import jaqs_fxdayu 95 | jaqs_fxdayu.patch_all() # 需要放在任何import jaqs.* 之前 96 | 97 | from jaqs.data import DataView 98 | 99 | dv = DataView() 100 | 101 | ... 102 | ``` 103 | 104 | !!! Note 105 | 该使用方法的好处是最大程度兼容原生JAQS的代码,方便迁移。 106 | 107 | - 直接导入: 108 | ``` 109 | from jaqs_fxdayu.data import DataView 110 | 111 | dv = DataView() 112 | 113 | ... 114 | ``` 115 | 116 | !!! Note 117 | 该使用方法的好处是更为直观,且支持IDE的静态代码提示功能。 118 | 119 | ### 拓展API: 120 | 主要为独立开发,提供一些因子分析中常用,而jaqs中未实现的拓展功能。 121 | 使用方法主要是从jaqs_fxdayu模块中导入: 122 | 例如: 123 | ```python 124 | from jaqs_fxdayu.research import Optimizer 125 | ``` 126 | 127 | 128 | ### 文档 129 | [详细文档地址](http://jaqs-fxdayu.readthedocs.io/zh_CN/latest/) 130 | 131 | ## 最新功能 132 | ### 2018/7/15 133 | 134 | dataview添加财务数据时,允许指定财报类型 135 | 136 | ### 2018/7/2 137 | 138 | TimingDigger/SignalDigger 支持根据group划分组内quantile 139 | 140 | 预处理因子/信号数据时,若传入group参数,quantile计算会在组内进行而非在全数据集上进行. 141 | 142 | ### 2018/6/9 143 | 144 | TimingDigger-create_event_report方法新增进出场点位画图功能 145 | 146 | 新增hf_dataview-针对高频因子(bar级别)和事件设计的数据操作和信号计算模块,功能和Dataview一致. 147 | 148 | ### 2018/6/5 149 | 选股叠加择时研究(TimingDigger)新增功能,通过TimingDigger,可以在设计完选股因子和事件后,加入简单的择时出场条件对因子进行进一步测试.支持设置止盈,止损等出场方式. 150 | 151 | ### 2018/4/19 152 | 参数优化器(optimizer)新增功能,支持在待优化公式中调用自定义方法. 153 | 154 | ### 2018/4/19 155 | 新增process-mad,用于因子去极值.优化了行业市值中性化的算法效率. 156 | 157 | ### 2018/4/16 158 | 新增multi_factor-get_factors_ret_df,用于获取因子收益序列矩阵.同时,combine_factors新增基于最近一段时间的因子收益进行多因子加权组合的方法. 159 | 160 | ### 2018/4/11 161 | 新增dataview-refresh_data方法,可对数据集进行更新. 162 | 163 | ### 2018/3/26 164 | 新增dataservice文档.dataservice是对jaqs底层dataapi的一个高级封装,提供了一些常用数据的快捷查询方法——如K线、交易日历、指数成分信息、行业分类信息等。 165 | 166 | ### 2018/3/26 167 | 168 | 新增模块dp,针对A股因子研究和交易分析场景,提供了常用的小工具,如查询历史的交易日,历史的行业分类表等 169 | 170 | 添加对performance模块的说明文档 performance:因子选股研究中常用的绩效计算方法 171 | 172 | 173 | ### 2018/3/20 174 | 175 | 作为单独模块发布,更新文档 176 | 177 | ### 2018/3/19 更新 178 | 179 | 新增dataview-fields可选字段查询方式,详见文档 dataview-fields可选字段查询方式 180 | 181 | ## 技术支持 182 | 183 | - [GitHub](https://github.com/xingetouzi/jaqs-fxdayu/tree/master) 184 | - [访问大鱼学院获得更多的案例和金融量化知识](http://www.fxdayu.com) 185 | - 加qq群(372592121)进行讨论 186 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/built_in_funcs_docs.csv: -------------------------------------------------------------------------------- 1 | 分类,说明,公式,示例 2 | 四则运算,加法运算,+,close + open 3 | 四则运算,减法运算,-,close - open 4 | 四则运算,乘法运算,*,vwap * volume 5 | 四则运算,除法运算,/,close / open 6 | 基本数学函数,"符号函数,返回值为{-1, 0, 1}",Sign(x),Sign(close-open) 7 | 基本数学函数,绝对值函数,Abs(x),Abs(close-open) 8 | 基本数学函数,自然对数,Log(x),Log(close/open) 9 | 基本数学函数,对x取负,-x,-close 10 | 基本数学函数,幂函数,^,close ^ 2 11 | 基本数学函数,幂函数x^y,"Pow(x,y)","Pow(close,2)" 12 | 基本数学函数,保持符号的幂函数,等价于Sign(x) * (Abs(x)^e),"SignedPower(x,e)","SignedPower(close-open, 0.5)" 13 | 基本数学函数,取余函数,%,oi % 10 14 | 逻辑运算,判断是否相等,==,close == open 15 | 逻辑运算,判断是否不等,!=,close != open 16 | 逻辑运算,大于,>,close > open 17 | 逻辑运算,小于,<,close < open 18 | 逻辑运算,大于等于,>=,close >= open 19 | 逻辑运算,小于等于,<=,close <= open 20 | 逻辑运算,逻辑与,&&,(close > open) && (close > vwap) 21 | 逻辑运算,逻辑或,||,(close > open) || (close > vwap) 22 | 逻辑运算,逻辑非,!,!(close>open) 23 | 逻辑运算,判断值是否为NaN,IsNan(x),IsNan(net_profit) 24 | 三角函数,正弦函数,Sin(x),Sin(close/open) 25 | 三角函数,余弦函数,Cos(x),Cos(close/open) 26 | 三角函数,正切函数,Tan(x),Tan(close/open) 27 | 三角函数,开平方函数,Sqrt(x),Sqrt(close^2 + open^2) 28 | 取整函数,向上取整,Ceil(x),Ceil(high) 29 | 取整函数,向下取整,Floor(x),Floor(low) 30 | 取整函数,四舍五入,Round(x),Round(close) 31 | 选择函数,取 x 和 y 同位置上的较大值组成新的DataFrame返回,"Max(x,y)","Max(close, open)" 32 | 选择函数,取 x 和 y 同位置上的较小值组成新的DataFrame返回,"Min(x,y)","Min(close,open)" 33 | 选择函数,cond为True取x的值,反之取y的值,"If(cond,x,y)","If(close > open, close, open) 表示取open和close的较大值" 34 | 时间序列函数 - 基本数学运算,指标n个周期前的值,"Delay(x,n)","Delay(close,1) 表示前一天收盘价" 35 | 时间序列函数 - 基本数学运算,指标在过去n天的和,"Ts_Sum(x,n)","Ts_Sum(volume,5) 表示一周成交量" 36 | 时间序列函数 - 基本数学运算,指标在过去 n 天的积,"Ts_Product(x,n)","Ts_Product(close/Delay(close,1),5) - 1 表示过去5天累计收益" 37 | 时间序列函数 - 基本数学运算,指标当前值与n天前的值的差,"Delta(x,n)","Delta(close,5)" 38 | 时间序列函数 - 基本数学运算,计算指标相比n天前的变化率,默认计算百分比变化率;当log为1时,计算对数变化率;为0时计算普通变化率,"Return(x,n,log)","Return(close,5,True)计算一周对数收益" 39 | 时间序列函数 - 基本数学运算,计算指标在过去n天的平均值,Ts_Mean(x,n),"Ts_Mean(close,5)" 40 | 时间序列函数 - 统计,指标在过去n天的标准差,"StdDev(x,n)","StdDev(close/Delay(close,1)-1, 10)" 41 | 时间序列函数 - 统计,两个指标在过去n天的协方差,"Covariance(x,y,n)","Covariance(close, open, 10)" 42 | 时间序列函数 - 统计,两个指标在过去n天的相关系数,"Correlation(x,y,n)","Correlation(close,open, 10)" 43 | 时间序列函数 - 统计,计算指标在过去n天的最小值,Ts_Min(x,n),Ts_Min(close,5) 44 | 时间序列函数 - 统计,计算指标在过去n天的最大值,Ts_Max(x,n),Ts_Max(close,5) 45 | 时间序列函数 - 统计,计算指标在过去n天的偏度,Ts_Skewness(x,n),Ts_Skewness(close,20) 46 | 时间序列函数 - 统计,计算指标在过去n天的峰度,Ts_Kurtosis(x,n),Ts_Kurtosis(close,20) 47 | 时间序列函数 - 排名,计算指标在过去n天的排名,返回值为名次,"Ts_Rank(x, n)","Ts_Rank(close, 5)" 48 | 时间序列函数 - 排名,"计算指标在过去n天的百分比,返回值为[0.0, 1.0]","Ts_Percentile(x, n)","Ts_Percentile(close, 5)" 49 | 时间序列函数 - 排名,计算指标在过去n天所属的quantile,返回值为表示quantile的整数,"Ts_Quantile(x, n)","Ts_Quantile(close, 5)" 50 | 时间序列函数 - 排名,指数移动平均,以halflife的衰减对x进行指数移动平均,"Ewma(x, halflife)","Ewma(x, 3)" 51 | 横截面函数 - 排名,将指标值在横截面方向排名,返回值为名次,Rank(x),"Rank( close/Delay(close,1)-1 ) 表示按日收益率进行排名" 52 | 横截面函数 - 排名,按分组数据g在每组内将指标值在横截面方向排名,返回值为名次,"GroupRank(x,g)","GroupRank(close/Delay(close,1)-1, g) 表示按分组g根据日收益率进行分组排名" 53 | 横截面函数 - 排名,将指标值在横截面方向排名,返回值为排名百分比,Percentile(x),Percentile(close) 54 | 横截面函数 - 排名,按分组数据g在每组内将指标值在横截面方向排名,返回值为排名百分比,"GroupPercentile(x, g, n)","GroupPercentile(close, sw1) 按申万1级行业" 55 | 横截面函数 - 排名,和Rank函数相同,但只有 cond 中值为True的标的参与排名,"ConditionRank(x, cond)","GroupRank(close/Delay(close,1)-1, cond) 表示按条件cond根据日收益率进行分组排名" 56 | 横截面函数 - 排名,根据指标值在横截面方向将标的分成n个quantile,返回值为所属quantile,"Quantile(x, n)","Quantile( close/Delay(close,1)-1,5)表示按日收益率分为5档" 57 | 横截面函数 - 排名,按分组数据g在每组内根据指标值在横截面方向将标的分成n个quantile,返回值为所属quantile,"GroupQuantile(x, g, n)","GroupQuantile(close/Delay(close,1)-1,g,5) 表示按日收益率和分组g进行分档,每组分为5档" 58 | 横截面函数 - 数据处理,将指标标准化,即在横截面上减去平均值后再除以标准差,Standardize(x),"Standardize(close/Delay(close,1)-1) 表示日收益率的标准化" 59 | 横截面函数 - 数据处理,"将指标横截面上去极值,用MAD (Maximum Absolute Deviation)方法, z_score为极值判断标准","Cutoff(x, z_score)","Cutoff(close,3) 表示去掉z_score大于3的极值" 60 | 财报函数,将累计财务数据转换为单季财务数据,CumToSingle(x),CumToSingle(net_profit) 61 | 财报函数,从累计财务数据计算TTM的财务数据,TTM(x),TTM(net_profit) 62 | 其他,"过去 n 天的指数衰减函数,其中 f 是平滑因子。这里 f 是平滑因子,可以赋一个小于 1 的值。Decay_exp(x, f, n) = (x[date] + x[date - 1] * f + … +x[date – n - 1] * (f (n – 1))) / (1 + f + … + f ^ (n - 1))","Decay_exp(x,f,n)","Decay_exp(close,0.9,10)" 63 | 其他,"过去n天的线性衰减函数。Decay_linear(x, n) = (x[date] * n + x[date - 1] * (n - 1) + … + x[date – n - 1]) / (n + (n - 1) + … + 1)","Decay_linear(x,n)","Decay_linear(close,15)" 64 | 其他,如果 x 的值介于 lower 和 upper,则将其设定为 newval,"Tail(x, lower, upper, newval)","Tail(close/open, 0.99, 1.01, 1.0)" 65 | 其他,Step(n) 为每个标的创建一个向量,向量中 n 代表最新日期,n-1 代表前一天,以此类推。,Step(n),Step(30) 66 | 其他,时间序列函数,计算 x 中的值在过去 n 天中为 nan (非数字)的次数,"CountNans(x,n)","CountNans((close-open)^0.5, 10) 表示过去10天内有几天close小于open" 67 | 时间序列函数 - 统计,计算指标在过去n天最大值的坐标,"Ts_Argmax(x,n)","Ts_Argmax(high,10)" 68 | 时间序列函数 - 统计,计算指标在过去n天最小值的坐标,"Ts_Argmin(x,n)","Ts_Argmin(low,10)" 69 | 技术指标,根据talib技术指标库计算x中每只股票的技术指标,"Ta(ta_method,ta_column,open,high,low,close,volume,*args)","Ta('MACD','macdsignal',open,high,low,close,volume) 表示对每只股票进行talib.macd计算并返回macdsignal的值(自动剔除停牌期数据)." 70 | -------------------------------------------------------------------------------- /jaqs_fxdayu/util/dp.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from collections import defaultdict 3 | 4 | 5 | English_classify = {'480000': 'Bank', 6 | '430000': 'Real_Estate', 7 | '460000': 'Leisure_Service', 8 | '640000': 'Mechanical_Equipment', 9 | '240000': 'Nonferrous_Metals', 10 | '510000': 'Synthesis', 11 | '410000': 'Public_Utility', 12 | '450000': 'Commercial_Trade', 13 | '730000': 'Communication', 14 | '330000': 'Household_Appliances', 15 | '720000': 'Media', 16 | '630000': 'Electrical_Equipment', 17 | '270000': 'Electronic_Engineering', 18 | '490000': 'Non_Bank_Finance', 19 | '370000': 'Medical_Biology', 20 | '710000': 'Computer', 21 | '280000': 'Car', 22 | '340000': 'Food_Beverage', 23 | '220000': 'Chemical_Engineering', 24 | '210000': 'Digging', 25 | '230000': 'Steel', 26 | '650000': 'Military', 27 | '110000': 'Agriculture_Fishing', 28 | '420000': 'Transportation', 29 | '620000': 'Architectural_Ornament', 30 | '350000': 'Textile_Garment', 31 | '610000': 'Building_Materials', 32 | '360000': 'Light_Manufacturing'} 33 | 34 | 35 | # 交易日列表(return pandas.Index) 36 | def trade_days(api, start, end): 37 | """ 38 | 39 | :param api: jaqs.data.DataApi 40 | :param start: int, sample: 20170101 41 | :param end: int, sample: 20180101 42 | :return: 43 | """ 44 | data, msg = api.query("jz.secTradeCal", "start_date={}&end_date={}".format(start, end)) 45 | if msg == "0,": 46 | return data.set_index("trade_date").rename_axis(int).index 47 | else: 48 | raise Exception(msg) 49 | 50 | 51 | def st_status(api, symbol, start, end): 52 | """ 53 | :param api: jaqs.data.DataApi 54 | :param symbol: str, sample: 600000.SH,000001.SZ 55 | :param start: int, sample: 20170101 56 | :param end: int, sample: 20180101 57 | :return: 58 | """ 59 | dates = trade_days(api, start, end) 60 | data, msg = api.query("lb.sState", "symbol={}".format(symbol)) 61 | if len(data) == 0: 62 | return None 63 | data["in_date"] = data["effDate"].apply(int) 64 | data["out_date"] = 99999999 65 | data = data.sort_values(by=["in_date"]) 66 | if msg != "0,": 67 | raise Exception(msg) 68 | 69 | return expand(data, dates, None, value="state").fillna(0) 70 | 71 | 72 | # 指数成分股(return pandas.DataFrame) 73 | def index_cons(api, index_code, start, end): 74 | """ 75 | 76 | :param api: jaqs.data.DataApi 77 | :param index_code: str, sample: 000300.SH 78 | :param start: int, sample: 20170101 79 | :param end: int, sample: 20180101 80 | :return: 81 | """ 82 | data, msg = api.query("lb.indexCons", "index_code={}&start_date={}&end_date={}".format(index_code, start, end)) 83 | if msg == "0,": 84 | data["in_date"] = data["in_date"].apply(int) 85 | data["out_date"] = data["out_date"].replace("", "99999999").apply(int) 86 | return data 87 | else: 88 | raise Exception(msg) 89 | 90 | 91 | # range扩展为daily 92 | def expand(data, index, default=False, prefix=True, key="symbol", start="in_date", end="out_date", value=None): 93 | """ 94 | 95 | :param data: pd.DataFrame 96 | :param index: pd.Index, 作为输出表的index 97 | :param default: 新表的默认值 98 | :param prefix: 将符合范围判断条件的数据设为该值 99 | :param key: 指定data中用来作为输出表的columns的列 100 | :param start: 指定用来作为开始取值范围的列 101 | :param end: 指定用来作为结束取值范围的列 102 | :param value: 以data中的特定列作为预设值 103 | :return: 104 | 105 | Examples 106 | -------- 107 | > dates 108 | Int64Index([20170626, 20170627, 20170628, 20170629, 20170630, 20170703, 20170704, 20170705], 109 | dtype='int64', name='trade_date') 110 | 111 | > industry 112 | in_date out_date symbol industry1_name industry1_code 113 | 0 20140101 99999999 000001.SZ 银行 480000 114 | 1 20140101 20151001 000006.SZ 房地产 430000 115 | 2 20151001 20170629 000006.SZ 采掘 210000 116 | 3 20170629 99999999 000006.SZ 房地产 430000 117 | 4 20140101 99999999 000651.SZ 家用电器 330000 118 | 119 | > expand(industry, dates, None, value="industry1_name") 120 | 000001.SZ 000006.SZ 000651.SZ 121 | trade_date 122 | 20170626 银行 采掘 家用电器 123 | 20170627 银行 采掘 家用电器 124 | 20170628 银行 采掘 家用电器 125 | 20170629 银行 房地产 家用电器 126 | 20170630 银行 房地产 家用电器 127 | 20170703 银行 房地产 家用电器 128 | 20170704 银行 房地产 家用电器 129 | 20170705 银行 房地产 家用电器 130 | 131 | """ 132 | if isinstance(data, pd.DataFrame) and isinstance(index, pd.Index): 133 | dct = defaultdict(lambda: pd.Series(default, index)) 134 | for name, row in data.iterrows(): 135 | s = dct[row[key]] 136 | s.loc[row[start]:row[end]] = prefix if value is None else row[value] 137 | return pd.DataFrame(dct) 138 | 139 | 140 | # 日线级指数表 141 | def daily_index_cons(api, index_code, start, end): 142 | """ 143 | 144 | :param api: jaqs.data.DataApi 145 | :param index_code: str, sample: 000300.SH 146 | :param start: int, sample: 20170101 147 | :param end: int, sample: 20180101 148 | :return: 149 | """ 150 | dates = trade_days(api, start, end) 151 | codes = index_cons(api, index_code, start, end) 152 | return expand(codes, dates) 153 | 154 | 155 | # 日线级行业分类表 156 | def daily_sec_industry(api, symbol, start, end, source="sw", value="industry1_code"): 157 | """ 158 | 159 | :param api: jaqs.data.DataApi 160 | :param symbol: str, sample: 600000.SH,000001.SZ 161 | :param start: int, sample: 20170101 162 | :param end: int, sample: 20180101 163 | :param source: str, sample: sw 164 | :param value: str, sample: industry1_code 165 | :return: 166 | """ 167 | dates = trade_days(api, start, end) 168 | data, msg = api.query("lb.secIndustry", "symbol={}&industry_src={}".format(symbol, source)) 169 | data["in_date"] = data["in_date"].apply(int) 170 | data["out_date"] = data["out_date"].replace("", "99999999").apply(int) 171 | if msg != "0,": 172 | raise Exception(msg) 173 | 174 | return expand(data, dates, None, value=value) 175 | -------------------------------------------------------------------------------- /tests/test_dataview.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import print_function 3 | 4 | # import jaqs_fxdayu 5 | # jaqs_fxdayu.patch_all() 6 | from jaqs_fxdayu.data import RemoteDataService 7 | from jaqs_fxdayu.data import DataView 8 | from .data_config import data_config 9 | 10 | # from config_path import DATA_CONFIG_PATH 11 | 12 | daily_path = '../output/tests/test_dataview_d' 13 | quarterly_path = '../output/tests/test_dataview_q' 14 | 15 | 16 | def test_write(): 17 | ds = RemoteDataService() 18 | ds.init_from_config(data_config) 19 | dv = DataView() 20 | 21 | secs = '600030.SH,000063.SZ,000001.SZ' 22 | props = {'start_date': 20160601, 'end_date': 20170601, 'symbol': secs, 23 | 'fields': 'open,close,high,low,volume,pb,net_assets,pcf_ncf', 24 | 'freq': 1} 25 | 26 | dv.init_from_config(props, data_api=ds) 27 | dv.prepare_data() 28 | assert dv.data_d.shape == (281, 48) 29 | assert dv.dates.shape == (281, ) 30 | # TODO 31 | """ 32 | PerformanceWarning: 33 | your performance may suffer as PyTables will pickle object types that it cannot 34 | map directly to c-types [inferred_type->mixed,key->block1_values] [items->[('000001.SZ', 'int_income'), ('000001.SZ', 'less_handling_chrg_comm_exp'), ('000001.SZ', 'net_int_income'), ('000001.SZ', 'oper_exp'), ('000001.SZ', 'symbol'), ('000063.SZ', 'int_income'), ('000063.SZ', 'less_handling_chrg_comm_exp'), ('000063.SZ', 'net_int_income'), ('000063.SZ', 'oper_exp'), ('000063.SZ', 'symbol'), ('600030.SH', 'int_income'), ('600030.SH', 'less_handling_chrg_comm_exp'), ('600030.SH', 'net_int_income'), ('600030.SH', 'oper_exp'), ('600030.SH', 'symbol')]] 35 | """ 36 | 37 | dv.save_dataview(folder_path=daily_path) 38 | 39 | 40 | def test_load(): 41 | dv = DataView() 42 | dv.load_dataview(folder_path=daily_path) 43 | 44 | assert dv.start_date == 20160601 and set(dv.symbol) == set('000001.SZ,600030.SH,000063.SZ'.split(',')) 45 | 46 | # test get_snapshot 47 | snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb') 48 | assert snap1.shape == (2, 2) 49 | assert set(snap1.columns.values) == {'close', 'pb'} 50 | assert set(snap1.index.values) == {'600030.SH', '000063.SZ'} 51 | 52 | # test get_ts 53 | ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302) 54 | assert ts1.shape == (38, 2) 55 | assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'} 56 | assert ts1.index.values[-1] == 20170302 57 | 58 | 59 | def test_add_field(): 60 | dv = DataView() 61 | dv.load_dataview(folder_path=daily_path) 62 | nrows, ncols = dv.data_d.shape 63 | n_securities = len(dv.data_d.columns.levels[0]) 64 | 65 | ds = RemoteDataService() 66 | ds.init_from_config(data_config) 67 | dv.add_field('total_share', ds) 68 | assert dv.data_d.shape == (nrows, ncols + 1 * n_securities) 69 | 70 | 71 | def test_add_formula_directly(): 72 | ds = RemoteDataService() 73 | ds.init_from_config(data_config) 74 | dv = DataView() 75 | 76 | secs = '600030.SH,000063.SZ,000001.SZ' 77 | props = {'start_date': 20160601, 'end_date': 20170601, 'symbol': secs, 78 | 'fields': 'open,close', 79 | 'freq': 1} 80 | dv.init_from_config(props, data_api=ds) 81 | dv.prepare_data() 82 | 83 | dv.add_formula("myfactor", 'close / open', is_quarterly=False) 84 | assert dv.data_d.shape == (281, 36) 85 | 86 | 87 | def test_add_formula(): 88 | dv = DataView() 89 | dv.load_dataview(folder_path=daily_path) 90 | nrows, ncols = dv.data_d.shape 91 | n_securities = len(dv.data_d.columns.levels[0]) 92 | 93 | formula = 'Delta(high - close, 1)' 94 | dv.add_formula('myvar1', formula, is_quarterly=False, add_data=True) 95 | assert dv.data_d.shape == (nrows, ncols + 1 * n_securities) 96 | 97 | formula2 = 'myvar1 - close' 98 | dv.add_formula('myvar2', formula2, is_quarterly=False, add_data=True) 99 | assert dv.data_d.shape == (nrows, ncols + 2 * n_securities) 100 | 101 | 102 | def test_dataview_universe(): 103 | ds = RemoteDataService() 104 | ds.init_from_config(data_config) 105 | dv = DataView() 106 | 107 | props = {'start_date': 20170227, 'end_date': 20170327, 'universe': '000016.SH', 108 | # 'symbol': 'rb1710.SHF,rb1801.SHF', 109 | 'fields': ('open,high,low,close,vwap,volume,turnover,' 110 | + 'sw1,zz2,' 111 | + 'roe,net_assets,' 112 | + 'total_oper_rev,oper_exp,tot_profit,int_income' 113 | ), 114 | 'freq': 1} 115 | 116 | dv.init_from_config(props, ds) 117 | dv.prepare_data() 118 | 119 | data_bench = dv.data_benchmark.copy() 120 | dv.data_benchmark = data_bench 121 | 122 | try: 123 | dv.data_benchmark = data_bench.iloc[3:] 124 | except ValueError: 125 | pass 126 | 127 | dv.remove_field('roe,net_assets') 128 | dv.remove_field('close') 129 | 130 | 131 | # quarterly 132 | def test_q(): 133 | ds = RemoteDataService() 134 | ds.init_from_config(data_config) 135 | dv = DataView() 136 | 137 | secs = '600030.SH,000063.SZ,000001.SZ' 138 | props = {'start_date': 20160609, 'end_date': 20170601, 'symbol': secs, 139 | 'fields': ('open,close,' 140 | + 'pb,net_assets,' 141 | + 'total_oper_rev,oper_exp,' 142 | + 'cash_paid_invest,' 143 | + 'capital_stk,' 144 | + 'roe'), 'freq': 1} 145 | 146 | dv.init_from_config(props, data_api=ds) 147 | dv.prepare_data() 148 | dv.save_dataview(folder_path=quarterly_path) 149 | 150 | 151 | def test_q_get(): 152 | dv = DataView() 153 | dv.load_dataview(folder_path=quarterly_path) 154 | res = dv.get("", 0, 0, 'total_oper_rev') 155 | assert set(res.index.values) == set(dv.dates[dv.dates >= dv.start_date]) 156 | 157 | 158 | def test_q_add_field(): 159 | dv = DataView() 160 | dv.load_dataview(folder_path=quarterly_path) 161 | nrows, ncols = dv.data_q.shape 162 | n_securities = len(dv.data_d.columns.levels[0]) 163 | 164 | ds = RemoteDataService() 165 | ds.init_from_config(data_config) 166 | dv.add_field("debttoassets", ds) 167 | assert dv.data_q.shape == (nrows, ncols + 1 * n_securities) 168 | 169 | 170 | def test_q_add_formula(): 171 | dv = DataView() 172 | folder_path = '../output/prepared/20160609_20170601_freq=1D' 173 | dv.load_dataview(folder_path=quarterly_path) 174 | nrows, ncols = dv.data_d.shape 175 | n_securities = len(dv.data_d.columns.levels[0]) 176 | 177 | formula = 'total_oper_rev / close' 178 | dv.add_formula('myvar1', formula, is_quarterly=False, add_data=True) 179 | df1 = dv.get_ts('myvar1') 180 | assert not df1.empty 181 | 182 | formula2 = 'Delta(oper_exp * myvar1 - open, 3)' 183 | dv.add_formula('myvar2', formula2, is_quarterly=False, add_data=True) 184 | df2 = dv.get_ts('myvar2') 185 | assert not df2.empty 186 | 187 | 188 | if __name__ == "__main__": 189 | g = globals() 190 | g = {k: v for k, v in g.items() if k.startswith('test_') and callable(v)} 191 | 192 | # for test_name, test_func in g.items(): 193 | for test_name in ['test_write', 'test_load', 'test_add_field', 'test_add_formula_directly', 194 | 'test_add_formula', 'test_dataview_universe', 195 | 'test_q', 'test_q_get', 'test_q_add_field', 'test_q_add_formula', 196 | ]: 197 | test_func = g[test_name] 198 | print("\n==========\nTesting {:s}...".format(test_name)) 199 | test_func() 200 | print("Test Complete.") 201 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/py_expression_eval.py: -------------------------------------------------------------------------------- 1 | from jaqs.data.py_expression_eval import * 2 | from jaqs.data.py_expression_eval import Parser as OriginParser 3 | 4 | from jaqs_fxdayu.patch_util import auto_register_patch 5 | from jaqs_fxdayu.util import fillinf 6 | from . import signal_function_mod as sfm 7 | 8 | 9 | @auto_register_patch(parent_level=1) 10 | class Parser(OriginParser): 11 | def __init__(self): 12 | super(Parser, self).__init__() 13 | self.functions.update({ 14 | 'Ta': self.ta, 15 | 'Ts_Argmax': self.ts_argmax, 16 | 'Ts_Argmin': self.ts_argmin 17 | }) 18 | 19 | def evaluate(self, values, ann_dts=None, trade_dts=None, index_member=None): 20 | """ 21 | Evaluate the value of expression using. Data of different frequency will be automatically expanded. 22 | 23 | Parameters 24 | ---------- 25 | values : dict 26 | Key is variable name, value is pd.DataFrame (index is date, column is symbol) 27 | ann_dts : pd.DataFrame 28 | Announcement dates of financial statements of securities. 29 | trade_dts : np.ndarray 30 | The date index of result. 31 | index_member : pd.DataFrame 32 | 33 | Returns 34 | ------- 35 | pd.DataFrame 36 | 37 | """ 38 | 39 | def _fillinf(df): 40 | try: 41 | df = fillinf(df) 42 | except: 43 | pass 44 | return df 45 | 46 | self.ann_dts = ann_dts 47 | self.trade_dts = trade_dts 48 | self.index_member = index_member 49 | 50 | values = values or {} 51 | nstack = [] 52 | L = len(self.tokens) 53 | for i in range(0, L): 54 | item = self.tokens[i] 55 | type_ = item.type_ 56 | if type_ == TNUMBER: 57 | nstack.append(item.number_) 58 | elif type_ == TOP2: 59 | n2 = nstack.pop() 60 | n1 = nstack.pop() 61 | f = self.ops2[item.index_] 62 | nstack.append(_fillinf(f(n1, n2))) 63 | elif type_ == TVAR: 64 | if item.index_ in values: 65 | nstack.append(_fillinf(values[item.index_])) 66 | elif item.index_ in self.functions: 67 | nstack.append(self.functions[item.index_]) 68 | else: 69 | raise Exception('undefined variable: ' + item.index_) 70 | elif type_ == TOP1: 71 | n1 = nstack.pop() 72 | f = self.ops1[item.index_] 73 | nstack.append(_fillinf(f(n1))) 74 | elif type_ == TFUNCALL: 75 | n1 = nstack.pop() 76 | f = nstack.pop() 77 | if callable(f): 78 | if type(n1) is list: 79 | nstack.append(_fillinf(f(*n1))) 80 | else: 81 | nstack.append(_fillinf(f(n1))) # call(f, n1) 82 | else: 83 | raise Exception(f + ' is not a function') 84 | else: 85 | raise Exception('invalid Expression') 86 | if len(nstack) > 1: 87 | raise Exception('invalid Expression (parity)') 88 | return _fillinf(nstack[0]) 89 | 90 | # ----------------------------------------------------- 91 | def reindex_df(self, df): 92 | # 修復因子中有缺的索引 93 | if isinstance(df, pd.DataFrame): 94 | if self.ann_dts is not None: 95 | if len(set(list(df.index)) - set(list(self.ann_dts))) == 0: 96 | return df.reindex(self.ann_dts) 97 | if self.trade_dts is not None: 98 | if len(set(list(df.index)) - set(list(self.trade_dts))) == 0: 99 | return df.reindex(self.trade_dts) 100 | return df 101 | 102 | # align functions 103 | def _align_bivariate(self, df1, df2, force_align=False): 104 | df1 = self.reindex_df(df1) 105 | df2 = self.reindex_df(df2) 106 | if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): 107 | len1 = len(df1.index) 108 | len2 = len(df2.index) 109 | if (self.ann_dts is not None) and (self.trade_dts is not None): 110 | if len1 > len2: 111 | df2 = align(df2, self.ann_dts, self.trade_dts) 112 | elif len1 < len2: 113 | df1 = align(df1, self.ann_dts, self.trade_dts) 114 | elif force_align: 115 | df1 = align(df1, self.ann_dts, self.trade_dts) 116 | df2 = align(df2, self.ann_dts, self.trade_dts) 117 | return (df1, df2) 118 | 119 | def _align_univariate(self, df1): 120 | df1 = self.reindex_df(df1) 121 | if isinstance(df1, pd.DataFrame): 122 | if (self.ann_dts is not None) and (self.trade_dts is not None): 123 | len1 = len(df1.index) 124 | len2 = len(self.trade_dts) 125 | if len1 != len2: 126 | return align(df1, self.ann_dts, self.trade_dts) 127 | return df1 128 | 129 | # ----------------------------------------------------- 130 | # functions 131 | # ta function 132 | def ta(self, 133 | ta_method, 134 | ta_column, 135 | Open, 136 | High, 137 | Low, 138 | Close, 139 | Volume, 140 | *args, 141 | **kwargs): 142 | return sfm.ta(ta_method, 143 | ta_column, 144 | Open, 145 | High, 146 | Low, 147 | Close, 148 | Volume, 149 | *args, 150 | **kwargs) 151 | 152 | def ts_argmax(self, *args, 153 | **kwargs): 154 | return sfm.ts_argmax(*args, **kwargs) 155 | 156 | def ts_argmin(self, *args, 157 | **kwargs): 158 | return sfm.ts_argmin(*args, **kwargs) 159 | 160 | def std_dev(self, df, n): 161 | return df.apply(lambda x: x.dropna().rolling(n).std()).reindex(df.index) 162 | 163 | def ts_sum(self, df, n): 164 | return df.apply(lambda x: x.dropna().rolling(n).sum()).reindex(df.index) 165 | 166 | def count_nans(self, df, n): 167 | return n - df.rolling(n).count() 168 | 169 | def ts_mean(self, df, n): 170 | return df.apply(lambda x: x.dropna().rolling(n).mean()).reindex(df.index) 171 | 172 | def ts_min(self, df, n): 173 | return df.apply(lambda x: x.dropna().rolling(n).min()).reindex(df.index) 174 | 175 | def ts_max(self, df, n): 176 | return df.apply(lambda x: x.dropna().rolling(n).max()).reindex(df.index) 177 | 178 | def ts_kurt(self, df, n): 179 | return df.apply(lambda x: x.dropna().rolling(n).kurt()).reindex(df.index) 180 | 181 | def ts_skew(self, df, n): 182 | return df.apply(lambda x: x.dropna().rolling(n).skew()).reindex(df.index) 183 | 184 | def ts_product(self, df, n): 185 | return df.apply(lambda x: x.dropna().rolling(n).apply(np.product)).reindex(df.index) 186 | 187 | def corr(self, x, y, n): 188 | (x, y) = self._align_bivariate(x, y) 189 | return x.rolling(n, min_periods=1).corr(y) 190 | 191 | def cov(self, x, y, n): 192 | (x, y) = self._align_bivariate(x, y) 193 | return x.rolling(n, min_periods=1).cov(y) 194 | 195 | def decay_linear(self, df, n): 196 | return df.apply(lambda x: x.dropna().rolling(n).apply(self.decay_linear_array)).reindex(df.index) 197 | 198 | def decay_exp(self, df, f, n): 199 | return df.apply(lambda x: x.dropna().rolling(n).apply(self.decay_exp_array, args=[f])).reindex(df.index) 200 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/signaldigger/performance.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from jaqs_fxdayu.patch_util import auto_register_patch 3 | 4 | from jaqs.research.signaldigger.performance import calc_ic_stats_table as __calc_ic_stats_stable 5 | 6 | 7 | @auto_register_patch() 8 | def calc_signal_ic(signal_data, by_group=False): 9 | """ 10 | Computes the Spearman Rank Correlation based Information Coefficient (IC) 11 | between signal values and N period forward returns for each period in 12 | the signal index. 13 | 14 | Parameters 15 | ---------- 16 | signal_data : pd.DataFrame - MultiIndex 17 | Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'quantile'] 18 | by_group : bool 19 | If True, compute period wise IC separately for each group. 20 | Returns 21 | ------- 22 | ic : pd.DataFrame 23 | Spearman Rank correlation between signal and provided forward returns. 24 | 25 | """ 26 | 27 | def src_ic(df): 28 | _ic = scst.spearmanr(df['signal'], df['return'])[0] 29 | return _ic 30 | 31 | signal_data = signal_data.copy() 32 | 33 | grouper = ['trade_date'] 34 | if by_group: 35 | grouper.append('group') 36 | 37 | ic = signal_data.groupby(grouper).apply(src_ic) 38 | ic = pd.DataFrame(ic) 39 | ic.columns = ['ic'] 40 | 41 | return ic 42 | 43 | 44 | @auto_register_patch() 45 | def mean_information_coefficient(ic, by_time=None, by_group=False): 46 | """ 47 | Get the mean information coefficient of specified groups. 48 | Answers questions like: 49 | What is the mean IC for each month? 50 | What is the mean IC for each group for our whole timerange? 51 | What is the mean IC for for each group, each week? 52 | 53 | Parameters 54 | ---------- 55 | by_time : str (pd time_rule), optional 56 | Time window to use when taking mean IC. 57 | See http://pandas.pydata.org/pandas-docs/stable/timeseries.html 58 | for available options. 59 | by_group : bool 60 | If True, compute period wise IC separately for each group. 61 | Returns 62 | ------- 63 | ic : pd.DataFrame 64 | Mean Spearman Rank correlation between signal and provided 65 | forward price movement windows. 66 | """ 67 | grouper = [] 68 | if by_time is not None: 69 | grouper.append(pd.TimeGrouper(by_time)) 70 | if by_group: 71 | grouper.append('group') 72 | 73 | if len(grouper) == 0: 74 | ic = ic.mean() 75 | else: 76 | if isinstance(ic.index, pd.MultiIndex): 77 | ic.index = pd.MultiIndex(levels=[pd.to_datetime(ic.index.levels[0], 78 | format="%Y%m%d"), 79 | ic.index.levels[1]], 80 | labels=ic.index.labels, 81 | names=ic.index.names) 82 | else: 83 | ic.index = pd.to_datetime(ic.index, format="%Y%m%d") 84 | ic = (ic.reset_index().set_index('trade_date').groupby(grouper).mean()) 85 | 86 | return ic 87 | 88 | 89 | @auto_register_patch() 90 | def calc_quantile_return_mean_std(signal_data, time_series=False): 91 | """ 92 | Computes mean returns for signal quantiles across 93 | provided forward returns columns. 94 | 95 | Parameters 96 | ---------- 97 | signal_data : pd.DataFrame - MultiIndex 98 | Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'quantile'] 99 | Returns 100 | ------- 101 | res : pd.DataFrame of dict 102 | 103 | """ 104 | signal_data = signal_data.copy() 105 | grouper = ['quantile'] 106 | if time_series: 107 | grouper.append('trade_date') 108 | 109 | group_mean_std = signal_data.groupby(grouper)['return'].agg(['mean', 'std', 'count']) 110 | # TODO: why? 111 | ''' 112 | std_error_ret = group_mean_std.loc[:, 'std'].copy() / np.sqrt(group_mean_std.loc[:, 'count'].copy()) 113 | ''' 114 | indexes = [] 115 | if time_series: 116 | quantile_daily_mean_std_dic = dict() 117 | quantiles = np.unique(group_mean_std.index.get_level_values(level='quantile')) 118 | for q in quantiles: # loop for different quantiles 119 | df_q = group_mean_std.loc[pd.IndexSlice[q, :], :] # bug 120 | df_q.index = df_q.index.droplevel(level='quantile') 121 | indexes.append(pd.Series(df_q.index)) 122 | quantile_daily_mean_std_dic[q] = df_q 123 | new_index = sorted(pd.concat(indexes).unique()) 124 | for q in quantiles: 125 | quantile_daily_mean_std_dic[q] = quantile_daily_mean_std_dic[q].reindex(new_index).fillna(0) 126 | return quantile_daily_mean_std_dic 127 | else: 128 | return group_mean_std 129 | 130 | 131 | @auto_register_patch() 132 | def daily_ret_to_cum(df_ret, axis=0): 133 | cum = df_ret.add(1.0).cumprod(axis=axis) 134 | return cum 135 | 136 | 137 | @auto_register_patch() 138 | def daily_ret_to_ret(daily_ret, period=5, axis=0): 139 | ret = daily_ret.add(1).rolling(period,axis=axis).apply(np.product).sub(1) 140 | return ret 141 | 142 | 143 | @auto_register_patch() 144 | def calc_ic_stats_table(ic_data): 145 | ic_data = ic_data.dropna() 146 | return __calc_ic_stats_stable(ic_data) 147 | 148 | 149 | @auto_register_patch() 150 | def price2ret(prices, period=5, axis=None, compound=True): 151 | """ 152 | 153 | Parameters 154 | ---------- 155 | prices : pd.DataFrame or pd.Series 156 | Index is datetime. 157 | period : int 158 | axis : {0, 1, None} 159 | 160 | Returns 161 | ------- 162 | ret : pd.DataFrame or pd.Series 163 | 164 | """ 165 | if axis is None: 166 | kwargs = dict() 167 | else: 168 | kwargs = {'axis': axis} 169 | 170 | if compound: 171 | ret = prices.pct_change(periods=period, **kwargs) 172 | else: 173 | ret = prices.diff(periods=period, **kwargs) / prices.iloc[0] 174 | return ret 175 | 176 | 177 | @auto_register_patch() 178 | def period_wise_ret_to_cum(ret, period, compound=True): 179 | """ 180 | Calculate cumulative returns from N-periods returns, no compounding. 181 | When 'period' N is greater than 1 the cumulative returns plot is computed 182 | building and averaging the cumulative returns of N interleaved portfolios 183 | (started at subsequent periods 1,2,3,...,N) each one rebalancing every N 184 | periods. 185 | 186 | Parameters 187 | ---------- 188 | ret: pd.Series or pd.DataFrame 189 | pd.Series containing N-periods returns 190 | period: integer 191 | Period for which the returns are computed 192 | compound : bool 193 | Whether calculate using compound return. 194 | 195 | Returns 196 | ------- 197 | pd.Series 198 | Cumulative returns series starting from zero. 199 | 200 | """ 201 | if isinstance(ret, pd.DataFrame): 202 | # deal with each column recursively 203 | return ret.apply(period_wise_ret_to_cum, axis=0, args=(period,)) 204 | elif isinstance(ret, pd.Series): 205 | if period == 1: 206 | return ret.add(1).cumprod().sub(1.0) 207 | 208 | # invest in each portfolio separately 209 | 210 | periods_index = np.arange(len(ret.index)) // period 211 | period_portfolios = ret.groupby(by=periods_index, axis=0).apply(lambda ser: pd.DataFrame(np.diag(ser))).fillna(0) 212 | period_portfolios.index = ret.index 213 | 214 | 215 | # cumulate returns separately 216 | if compound: 217 | cum_returns = period_portfolios.add(1).cumprod().sub(1.0) 218 | else: 219 | cum_returns = period_portfolios.cumsum() 220 | 221 | # since capital of all portfolios are the same, return in all equals average return 222 | res = cum_returns.mean(axis=1) 223 | 224 | return res 225 | else: 226 | raise NotImplementedError("ret must be Series or DataFrame.") 227 | 228 | 229 | _calc_signal_ic = calc_signal_ic 230 | _mean_information_coefficient = mean_information_coefficient 231 | _calc_ic_stats_table = calc_ic_stats_table 232 | _calc_quantile_return_mean_std = calc_quantile_return_mean_std 233 | _daily_ret_to_cum = daily_ret_to_cum 234 | _daily_ret_to_ret = daily_ret_to_ret 235 | _price2ret = price2ret 236 | _period_wise_ret_to_cum = period_wise_ret_to_cum 237 | 238 | from jaqs.research.signaldigger.performance import * 239 | 240 | calc_signal_ic = _calc_signal_ic 241 | mean_information_coefficient = _mean_information_coefficient 242 | calc_quantile_return_mean_std = _calc_quantile_return_mean_std 243 | daily_ret_to_cum = _daily_ret_to_cum 244 | daily_ret_to_ret = _daily_ret_to_ret 245 | price2ret = _price2ret 246 | calc_ic_stats_table = _calc_ic_stats_table 247 | period_wise_ret_to_cum = _period_wise_ret_to_cum 248 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/signaldigger/process.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | # 数据处理 3 | 4 | import jaqs.util as jutil 5 | import pandas as pd 6 | import numpy as np 7 | 8 | 9 | def _mask_df(df, mask): 10 | mask = mask.astype(bool) 11 | df[mask] = np.nan 12 | return df 13 | 14 | 15 | def _mask_non_index_member(df, index_member=None): 16 | if index_member is not None: 17 | index_member = index_member.astype(bool) 18 | return _mask_df(df, ~index_member) 19 | return df 20 | 21 | 22 | # 横截面标准化 - 对Dataframe数据 23 | def standardize(factor_df, index_member=None): 24 | """ 25 | 对因子值做z-score标准化-算样本方差选择自由度为n-1 26 | :param index_member: 27 | :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。 28 | 形如: 29 |  AAPL     BA    CMG    DAL LULU    30 | date 31 | 2016-06-24 0.165260 0.002198 0.085632 -0.078074 0.173832 32 | 2016-06-27 0.165537 0.003583 0.063299 -0.048674 0.180890 33 | 2016-06-28 0.135215 0.010403 0.059038 -0.034879 0.111691 34 | 2016-06-29 0.068774 0.019848 0.058476 -0.049971 0.042805 35 | 2016-06-30 0.039431 0.012271 0.037432 -0.027272 0.010902 36 | :return:z-score标准化后的因子值(pandas.Dataframe类型),index为datetime, colunms为股票代码。 37 | """ 38 | 39 | factor_df = jutil.fillinf(factor_df) 40 | factor_df = _mask_non_index_member(factor_df, index_member) 41 | return factor_df.sub(factor_df.mean(axis=1), axis=0).div(factor_df.std(axis=1), axis=0) 42 | 43 | 44 | # 横截面去极值 - 对Dataframe数据 45 | def winsorize(factor_df, alpha=0.05, index_member=None): 46 | """ 47 | 对因子值做去极值操作 48 | :param index_member: 49 | :param alpha: 极值范围 50 | :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。 51 | 形如: 52 |  AAPL     BA    CMG    DAL LULU    53 | date 54 | 2016-06-24 0.165260 0.002198 0.085632 -0.078074 0.173832 55 | 2016-06-27 0.165537 0.003583 0.063299 -0.048674 0.180890 56 | 2016-06-28 0.135215 0.010403 0.059038 -0.034879 0.111691 57 | 2016-06-29 0.068774 0.019848 0.058476 -0.049971 0.042805 58 | 2016-06-30 0.039431 0.012271 0.037432 -0.027272 0.010902 59 | :return:去极值后的因子值(pandas.Dataframe类型),index为datetime, colunms为股票代码。 60 | """ 61 | 62 | def winsorize_series(se): 63 | q = se.quantile([alpha / 2, 1 - alpha / 2]) 64 | se[se < q.iloc[0]] = q.iloc[0] 65 | se[se > q.iloc[1]] = q.iloc[1] 66 | return se 67 | 68 | factor_df = jutil.fillinf(factor_df) 69 | factor_df = _mask_non_index_member(factor_df, index_member) 70 | return factor_df.apply(lambda x: winsorize_series(x), axis=1) 71 | 72 | 73 | # 横截面去极值 - 对Dataframe数据 74 | def mad(factor_df, index_member=None): 75 | """ 76 | 对因子值做去极值操作 77 | :param index_member: 78 | :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。 79 | 形如: 80 |  AAPL     BA    CMG    DAL LULU    81 | date 82 | 2016-06-24 0.165260 0.002198 0.085632 -0.078074 0.173832 83 | 2016-06-27 0.165537 0.003583 0.063299 -0.048674 0.180890 84 | 2016-06-28 0.135215 0.010403 0.059038 -0.034879 0.111691 85 | 2016-06-29 0.068774 0.019848 0.058476 -0.049971 0.042805 86 | 2016-06-30 0.039431 0.012271 0.037432 -0.027272 0.010902 87 | :return:去极值后的因子值(pandas.Dataframe类型),index为datetime, colunms为股票代码。 88 | """ 89 | 90 | def _mad(series): 91 | if series.dropna().size==0: 92 | return series 93 | median = series.median() 94 | tmp = (series - median).abs().median() 95 | return series.clip(median - 5 * tmp, median + 5 * tmp) 96 | 97 | factor_df = jutil.fillinf(factor_df) 98 | factor_df = _mask_non_index_member(factor_df, index_member) 99 | return factor_df.apply(lambda x: _mad(x), axis=1) 100 | 101 | 102 | # 横截面排序并归一化 103 | def rank_standardize(factor_df, index_member=None): 104 | """ 105 | 输入因子值, 将因子用排序分值重构,并处理到0-1之间(默认为升序——因子越大 排序分值越大(越好) 106 | :param index_member: 107 | :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。 108 | 形如: 109 |  AAPL     BA    CMG    DAL LULU    110 | date 111 | 2016-06-24 0.165260 0.002198 0.085632 -0.078074 0.173832 112 | 2016-06-27 0.165537 0.003583 0.063299 -0.048674 0.180890 113 | 2016-06-28 0.135215 0.010403 0.059038 -0.034879 0.111691 114 | 2016-06-29 0.068774 0.019848 0.058476 -0.049971 0.042805 115 | 2016-06-30 0.039431 0.012271 0.037432 -0.027272 0.010902 116 | 117 | :return: 排序重构后的因子值。 取值范围在0-1之间 118 | """ 119 | factor_df = jutil.fillinf(factor_df) 120 | factor_df = _mask_non_index_member(factor_df, index_member) 121 | return jutil.rank_with_mask(factor_df, axis=1, normalize=True) 122 | 123 | 124 | # 将因子值加一个极小的扰动项,用于对quantile做区分 125 | def get_disturbed_factor(factor_df): 126 | """ 127 | 将因子值加一个极小的扰动项,用于对quantile区分 128 | :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。 129 | 形如: 130 |  AAPL     BA    CMG    DAL LULU    131 | date 132 | 2016-06-24 0.165260 0.002198 0.085632 -0.078074 0.173832 133 | 2016-06-27 0.165537 0.003583 0.063299 -0.048674 0.180890 134 | 2016-06-28 0.135215 0.010403 0.059038 -0.034879 0.111691 135 | 2016-06-29 0.068774 0.019848 0.058476 -0.049971 0.042805 136 | 2016-06-30 0.039431 0.012271 0.037432 -0.027272 0.010902 137 | 138 | :return: 重构后的因子值,每个值加了一个极小的扰动项。 139 | """ 140 | return factor_df + np.random.random(factor_df.shape) / 1000000000 141 | 142 | 143 | # 行业、市值中性化 - 对Dataframe数据 144 | def neutralize(factor_df, 145 | group, 146 | float_mv=None, 147 | index_member=None): 148 | """ 149 | 对因子做行业、市值中性化 150 | :param index_member: 151 | :param group: 行业分类(pandas.Dataframe类型),index为datetime, colunms为股票代码 152 | :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。 153 | 形如: 154 |  AAPL     BA    CMG    DAL LULU    155 | date 156 | 2016-06-24 0.165260 0.002198 0.085632 -0.078074 0.173832 157 | 2016-06-27 0.165537 0.003583 0.063299 -0.048674 0.180890 158 | 2016-06-28 0.135215 0.010403 0.059038 -0.034879 0.111691 159 | 2016-06-29 0.068774 0.019848 0.058476 -0.049971 0.042805 160 | 2016-06-30 0.039431 0.012271 0.037432 -0.027272 0.010902 161 | :param float_mv: 流通市值因子(pandas.Dataframe类型),index为datetime, colunms为股票代码.为空则不进行市值中性化 162 | :return: 中性化后的因子值(pandas.Dataframe类型),index为datetime, colunms为股票代码。 163 | """ 164 | def drop_nan(s): 165 | return s[s != "nan"] 166 | 167 | def _ols_by_numpy(x, y): 168 | m = np.linalg.lstsq(x, y)[0] 169 | resid = y - (x@m) 170 | return resid 171 | 172 | def _generate_cross_sectional_residual(data): 173 | for _, X in data.groupby(level=0): 174 | signal = X.pop("signal") 175 | X = pd.concat([X, pd.get_dummies(X.pop("industry"))], axis=1) 176 | signal = pd.Series(_ols_by_numpy(X.values, signal), index=signal.index, name=signal.name) 177 | yield signal 178 | 179 | data = [] 180 | 181 | # 用于恢复原先的索引和列 182 | origin_factor_columns = factor_df.columns 183 | origin_factor_index = factor_df.index 184 | 185 | factor_df = jutil.fillinf(factor_df) # 调整非法值 186 | factor_df = _mask_non_index_member(factor_df, index_member) # 剔除非指数成份股 187 | factor_df = factor_df.dropna(how="all").stack().rename("signal") # 删除全为空的截面 188 | data.append(factor_df) 189 | 190 | # 获取对数流动市值,并去极值、标准化。市值类因子不需进行这一步 191 | if float_mv is not None: 192 | float_mv = standardize(mad(np.log(float_mv), index_member=index_member), index_member).stack().rename("style") 193 | data.append(float_mv) 194 | 195 | # 行业 196 | industry_standard = drop_nan(group.stack()).rename("industry") 197 | data.append(industry_standard) 198 | 199 | data = pd.concat(data,axis=1).dropna() 200 | residuals = pd.concat(_generate_cross_sectional_residual(data)).unstack() 201 | 202 | # 恢复在中性化过程中剔除的行和列 203 | residuals.reindex(index=origin_factor_index,columns=origin_factor_columns) 204 | return residuals.reindex(index=origin_factor_index,columns=origin_factor_columns) 205 | -------------------------------------------------------------------------------- /tests/test_py_expression_eval.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | from __future__ import print_function 4 | 5 | from jaqs_fxdayu import patch_all 6 | 7 | patch_all() 8 | import pandas as pd 9 | import numpy as np 10 | 11 | try: 12 | import pytest 13 | except ImportError as e: 14 | if __name__ == "__main__": 15 | pass 16 | else: 17 | raise e 18 | from jaqs.data import RemoteDataService 19 | from jaqs.data import Parser 20 | from tests.data_config import data_config 21 | 22 | 23 | def test_group_rank(): 24 | shape = (500, 3000) 25 | df_val = pd.DataFrame(np.random.rand(*shape)) 26 | df_group = pd.DataFrame(np.random.randint(1, 5, size=shape[0] * shape[1]).reshape(*shape)) 27 | expr = parser.parse('GroupRank(val, mygroup)') 28 | res = parser.evaluate({'val': df_val, 'mygroup': df_group}) 29 | 30 | 31 | def test_group_quantile(): 32 | shape = (500, 3000) 33 | df_val = pd.DataFrame(np.random.rand(*shape)) 34 | df_group = pd.DataFrame(np.random.randint(1, 5, size=shape[0] * shape[1]).reshape(*shape)) 35 | expr = parser.parse('GroupQuantile(val, mygroup, 23)') 36 | res = parser.evaluate({'val': df_val, 'mygroup': df_group}) 37 | n = 100 38 | df_val = pd.DataFrame(np.arange(n).reshape(2, -1)) 39 | df_group = pd.DataFrame(np.array([1] * 25 + [2] * 25 + [2] * 20 + [3] * 20 + [9] * 10).reshape(2, -1)) 40 | expr = parser.parse('GroupQuantile(val, mygroup, 5)') 41 | res = parser.evaluate({'val': df_val, 'mygroup': df_group}) 42 | n1 = 5 43 | n2 = 4 44 | n3 = 2 45 | res_correct = np.array([0.] * n1 + [1.] * n1 + [2.] * n1 + [3.] * n1 + [4.] * n1 46 | + [0.] * n1 + [1.] * n1 + [2.] * n1 + [3.] * n1 + [4.] * n1 47 | + [0.] * n2 + [1.] * n2 + [2.] * n2 + [3.] * n2 + [4.] * n2 48 | + [0.] * n2 + [1.] * n2 + [2.] * n2 + [3.] * n2 + [4.] * n2 49 | + [0.] * n3 + [1.] * n3 + [2.] * n3 + [3.] * n3 + [4.] * n3).reshape(2, -1) + 1.0 50 | assert np.abs(res.values - res_correct).flatten().sum() < 1e-6 51 | 52 | 53 | def test_quantile(): 54 | val = pd.DataFrame(np.random.rand(500, 3000)) 55 | expr = parser.parse('Quantile(val, 12)') 56 | res = parser.evaluate({'val': val}) 57 | assert np.nanmean(val[res == 1].values.flatten()) < 0.11 58 | 59 | val = pd.DataFrame(np.random.rand(1000, 100)) 60 | expr = parser.parse('Ts_Quantile(val, 500, 12)') 61 | res = parser.evaluate({'val': val}) 62 | assert np.nanmean(val[res == 1].values.flatten()) < 0.11 63 | 64 | 65 | def test_ttm(): 66 | from jaqs.data import DataView 67 | 68 | ds = RemoteDataService() 69 | ds.init_from_config(data_config) 70 | dv = DataView() 71 | props = {'start_date': 20120101, 'end_date': 20170601, 'universe': '000016.SH', 72 | 'fields': ('net_profit_incl_min_int_inc'), 'freq': 1} 73 | dv.init_from_config(props, ds) 74 | dv.prepare_data() 75 | 76 | dv.add_formula('single', 'TTM(net_profit_incl_min_int_inc)', is_quarterly=True) 77 | 78 | 79 | def test_logical_and_or(): 80 | parser.parse('open + 3 && 1') 81 | res = parser.evaluate({'open': dfx}) 82 | assert np.all(res.values.flatten()) 83 | 84 | parser.parse('open + 3 && 0.0') 85 | res = parser.evaluate({'open': dfx}) 86 | assert not np.all(res.values.flatten()) 87 | 88 | 89 | def test_plus_minus_mul_div(): 90 | expression = parser.parse('close * open + close / open - close^3 % open') 91 | res = parser.evaluate({'close': dfy, 'open': dfx}) 92 | 93 | 94 | def test_eq_neq(): 95 | expression = parser.parse('(close == open) && (close != open) && (!close)') 96 | res = parser.evaluate({'close': dfy, 'open': dfx}) 97 | 98 | expression = parser.parse('(close > open)') 99 | res = parser.evaluate({'close': dfy, 'open': dfx}) 100 | 101 | expression = parser.parse('(close >= open)') 102 | res = parser.evaluate({'close': dfy, 'open': dfx}) 103 | 104 | expression = parser.parse('(close < open)') 105 | res = parser.evaluate({'close': dfy, 'open': dfx}) 106 | 107 | expression = parser.parse('(close <= open)') 108 | res = parser.evaluate({'close': dfy, 'open': dfx}) 109 | 110 | 111 | def test_cutoff_standardize(): 112 | expression = parser.parse('Standardize(Cutoff(close, 2.8))') 113 | res = parser.evaluate({'close': dfy, 'open': dfx}) 114 | 115 | 116 | def test_moving_avg(): 117 | expression = parser.parse('Ewma(close, 5)') 118 | res = parser.evaluate({'close': dfy}) 119 | expression = parser.parse('Ts_Mean(close, 5)') 120 | res = parser.evaluate({'close': dfy}) 121 | expression = parser.parse('Ts_Min(close, 5)') 122 | res = parser.evaluate({'close': dfy}) 123 | expression = parser.parse('Ts_Max(close, 5)') 124 | res = parser.evaluate({'close': dfy}) 125 | 126 | 127 | def test_cov_corr(): 128 | expression = parser.parse('Correlation(close, open, 5)') 129 | res = parser.evaluate({'close': dfy, 'open': dfx}) 130 | expression = parser.parse('Covariance(close, open, 5)') 131 | res = parser.evaluate({'close': dfy, 'open': dfx}) 132 | 133 | 134 | def test_return_delay_delta(): 135 | expression = parser.parse('Delta(close, 5)') 136 | res = parser.evaluate({'close': dfy}) 137 | expression = parser.parse('Delay(close, 5)') 138 | res = parser.evaluate({'close': dfy}) 139 | expression = parser.parse('Return(close, 5)') 140 | res = parser.evaluate({'close': dfy}) 141 | 142 | 143 | def test_skew(): 144 | expression = parser.parse('Ts_Skewness(close,4)') 145 | res = parser.evaluate({'close': dfy}) 146 | expression = parser.parse('Ts_Kurtosis(close,4)') 147 | res = parser.evaluate({'close': dfy}) 148 | 149 | 150 | def test_variables(): 151 | expression = parser.parse('Ts_Skewness(open,4)+close / what') 152 | res = set(expression.variables()) == {'open', 'close', 'what'} 153 | 154 | 155 | def test_product(): 156 | # parser.set_capital('lower') 157 | expression = parser.parse('Ts_Product(open,2)') 158 | res = parser.evaluate({'close': dfy, 'open': dfx}) 159 | # parser.set_capital('upper') 160 | 161 | 162 | def test_rank(): 163 | expression = parser.parse('Rank(close)') 164 | res = parser.evaluate({'close': dfy, 'open': dfx}) 165 | 166 | expression = parser.parse('Ts_Rank(close, 8)') 167 | res = parser.evaluate({'close': dfy, 'open': dfx}) 168 | 169 | 170 | def test_tail(): 171 | expression = parser.parse('Tail(close/open,0.99,1.01,1.0)') 172 | res = parser.evaluate({'close': dfy, 'open': dfx}) 173 | 174 | 175 | def test_step(): 176 | expression = parser.parse('Step(close,10)') 177 | res = parser.evaluate({'close': dfy, 'open': dfx}) 178 | 179 | 180 | def test_decay_linear(): 181 | expression = parser.parse('Decay_linear(open,2)') 182 | res = parser.evaluate({'close': dfy, 'open': dfx}) 183 | 184 | 185 | def test_decay_exp(): 186 | expression = parser.parse('Decay_exp(open, 0.5, 2)') 187 | res = parser.evaluate({'close': dfy, 'open': dfx}) 188 | 189 | 190 | def test_signed_power(): 191 | expression = parser.parse('SignedPower(close-open, 2)') 192 | res = parser.evaluate({'close': dfx, 'open': dfy}) 193 | 194 | 195 | def test_ewma(): 196 | expr = parser.parse('Ewma(close, 3)') 197 | res = parser.evaluate({'close': dfx}) 198 | assert abs(res.loc[20170801, '000001.SH'] - 3292.6) < 1e-1 199 | 200 | 201 | def test_if(): 202 | expr = parser.parse('If(close > 20, 3, -3)') 203 | res = parser.evaluate({'close': dfx}) 204 | assert res.iloc[0, 0] == 3. 205 | assert res.iloc[0, 2] == -3. 206 | 207 | 208 | ''' 209 | def test_group_apply(): 210 | import numpy as np 211 | np.random.seed(369) 212 | 213 | n = 20 214 | 215 | dic = {c: np.random.rand(n) for c in 'abcdefghijklmnopqrstuvwxyz'[:n]} 216 | df_value = pd.DataFrame(index=range(n), data=dic) 217 | 218 | r = np.random.randint(0, 5, n * df_value.shape[0]).reshape(df_value.shape[0], n) 219 | cols = df_value.columns.values.copy() 220 | np.random.shuffle(cols) 221 | 222 | df_group = pd.DataFrame(index=df_value.index, columns=cols, data=r) 223 | 224 | parser = Parser() 225 | expr = parser.parse('GroupApply(Standardize, GroupApply(Cutoff, close, 2.8))') 226 | res = parser.evaluate({'close': df_value}, df_group=df_group) 227 | 228 | assert abs(res.iloc[3, 6] - (-1.53432)) < 1e-5 229 | assert abs(res.iloc[19, 18] - (-1.17779)) < 1e-5 230 | 231 | 232 | ''' 233 | 234 | 235 | def test_calc_return(): 236 | expr = parser.parse('Return(close, 2, 0)') 237 | res = parser.evaluate({'close': dfx}) 238 | assert abs(res.loc[20170808, '000001.SH'] - 0.006067) < 1e-6 239 | 240 | expr = parser.parse('Return(close, 2, 1)') 241 | res = parser.evaluate({'close': dfx}) 242 | 243 | 244 | @pytest.fixture(autouse=True) 245 | def my_globals(request): 246 | ds = RemoteDataService() 247 | ds.init_from_config(data_config) 248 | 249 | df, msg = ds.daily("000001.SH, 600030.SH, 000300.SH", start_date=20170801, end_date=20170820, 250 | fields="open,high,low,close,vwap,preclose") 251 | 252 | multi_index_names = ['trade_date', 'symbol'] 253 | df_multi = df.set_index(multi_index_names, drop=False) 254 | df_multi.sort_index(axis=0, level=multi_index_names, inplace=True) 255 | 256 | dfx = df_multi.loc[pd.IndexSlice[:, :], pd.IndexSlice['close']].unstack() 257 | dfy = df_multi.loc[pd.IndexSlice[:, :], pd.IndexSlice['open']].unstack() 258 | 259 | parser = Parser() 260 | request.function.__globals__.update({'parser': parser, 'dfx': dfx, 'dfy': dfy}) 261 | 262 | 263 | if __name__ == "__main__": 264 | pytest.main([__file__]) 265 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/signaldigger/signal_creator.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | from .analysis import compute_downside_returns, compute_upside_returns 4 | from . import performance as pfm 5 | import pandas as pd 6 | import numpy as np 7 | import jaqs.util as jutil 8 | 9 | 10 | def _process_filter(_filter): 11 | if _filter is not None: 12 | _filter = jutil.fillinf(_filter) 13 | _filter = _filter.astype(int).fillna(0).astype(bool) 14 | return _filter 15 | 16 | 17 | def _assert(standard, tmp): 18 | if tmp is not None: 19 | assert np.all(standard.index == tmp.index) 20 | assert np.all(standard.columns == tmp.columns) 21 | 22 | 23 | def _get_df(index,columns,value): 24 | return pd.DataFrame(index=index, columns=columns, data=value) 25 | 26 | 27 | class SignalCreator(object): 28 | def __init__(self, 29 | price=None, daily_ret=None, 30 | benchmark_price=None, daily_benchmark_ret=None, 31 | high=None, low=None, 32 | group=None, 33 | period=5, n_quantiles=5, 34 | mask=None, 35 | can_enter=None, 36 | can_exit=None, 37 | forward=True, 38 | commission=0.0008): 39 | 40 | if price is None and daily_ret is None: 41 | raise ValueError("One of price / daily_ret must be provided.") 42 | if price is not None and daily_ret is not None: 43 | raise ValueError("Only one of price / daily_ret should be provided.") 44 | if benchmark_price is not None and daily_benchmark_ret is not None: 45 | raise ValueError("Only one of benchmark_price / daily_benchmark_ret should be provided.") 46 | if not (n_quantiles > 0 and isinstance(n_quantiles, int)): 47 | raise ValueError("n_quantiles must be a positive integer. Input is: {}".format(n_quantiles)) 48 | 49 | self.price = price 50 | self.daily_ret = daily_ret 51 | self.high = high 52 | self.low = low 53 | self.group = group 54 | self.n_quantiles = n_quantiles 55 | self.mask = _process_filter(mask) 56 | self.can_enter = _process_filter(can_enter) 57 | self.can_exit = _process_filter(can_exit) 58 | 59 | self.period = period 60 | self.benchmark_price = benchmark_price 61 | self.daily_benchmark_ret = daily_benchmark_ret 62 | self.forward = forward 63 | self.commission = commission 64 | 65 | self.benchmark_ret = None 66 | self.signal_data = None 67 | self.signal_ret = None 68 | 69 | def _judge(self, signal): 70 | # 生成filter的dataframe 71 | self.mask = _get_df(signal.index, signal.columns, False) if self.mask is None else self.mask 72 | self.can_enter = _get_df(signal.index, signal.columns, True) if self.can_enter is None else self.can_enter 73 | self.can_exit = _get_df(signal.index, signal.columns, True) if self.can_exit is None else self.can_exit 74 | 75 | # df shape确认 76 | _assert(signal, self.mask) 77 | _assert(signal, self.can_enter) 78 | _assert(signal, self.can_exit) 79 | _assert(signal, self.group) 80 | 81 | if self.signal_ret is not None: 82 | for ret_type in self.signal_ret.keys(): 83 | _assert(signal, self.signal_ret[ret_type]) 84 | else: 85 | _assert(signal, self.price) 86 | _assert(signal, self.daily_ret) 87 | _assert(signal, self.high) 88 | _assert(signal, self.low) 89 | 90 | def _cal_ret(self): 91 | if self.signal_ret is not None: 92 | return 93 | else: 94 | # 计算benchmark收益 95 | if self.benchmark_price is not None: 96 | self.benchmark_ret = pfm.price2ret(self.benchmark_price, self.period, axis=0, compound=True) 97 | elif self.daily_benchmark_ret is not None: 98 | self.benchmark_ret = pfm.daily_ret_to_ret(self.daily_benchmark_ret, self.period) 99 | 100 | # 计算区间持仓收益 101 | isRealPrice = False 102 | if self.daily_ret is not None: 103 | self.daily_ret = jutil.fillinf(self.daily_ret).fillna(0) 104 | self.price = pfm.daily_ret_to_cum(self.daily_ret) 105 | else: 106 | # 有price 107 | isRealPrice = True 108 | self.price = jutil.fillinf(self.price) 109 | 110 | self.can_enter = np.logical_and(self.price != np.NaN, self.can_enter) 111 | df_ret = pfm.price2ret(self.price, period=self.period, axis=0, compound=True) 112 | price_can_exit = self.price.copy() 113 | price_can_exit[~self.can_exit] = np.NaN 114 | price_can_exit = price_can_exit.fillna(method="bfill") 115 | ret_can_exit = pfm.price2ret(price_can_exit, period=self.period, axis=0, compound=True) 116 | df_ret[~self.can_exit] = ret_can_exit[~self.can_exit] 117 | 118 | if self.benchmark_ret is not None: 119 | # 计算持有期相对收益 120 | self.benchmark_ret = self.benchmark_ret.reindex(df_ret.index) 121 | residual_ret = df_ret.sub(self.benchmark_ret.values.flatten(), axis=0) 122 | else: 123 | residual_ret = df_ret 124 | residual_ret = jutil.fillinf(residual_ret) 125 | residual_ret -= self.commission 126 | 127 | # 计算潜在上涨空间和潜在下跌空间 128 | if self.high is not None and isRealPrice: 129 | self.high = jutil.fillinf(self.high) 130 | else: 131 | self.high = self.price 132 | upside_ret = compute_upside_returns(self.price, self.high, self.can_exit, self.period, compound=True) 133 | upside_ret = jutil.fillinf(upside_ret) 134 | upside_ret -= self.commission 135 | 136 | if self.low is not None and isRealPrice: 137 | self.low = jutil.fillinf(self.low) 138 | else: 139 | self.low = self.price 140 | downside_ret = compute_downside_returns(self.price, self.low, self.can_exit, self.period, compound=True) 141 | downside_ret = jutil.fillinf(downside_ret) 142 | downside_ret -= self.commission 143 | 144 | self.signal_ret = { 145 | "return": residual_ret, 146 | "upside_ret": upside_ret, 147 | "downside_ret": downside_ret 148 | } 149 | if self.forward: 150 | for ret_type in self.signal_ret.keys(): 151 | if self.signal_ret[ret_type] is not None: 152 | # point-in-time signal and forward return 153 | self.signal_ret[ret_type] = self.signal_ret[ret_type].shift(-self.period) 154 | else: 155 | self.can_enter = self.can_enter.shift(self.period) 156 | self.mask = self.mask.shift(self.period) 157 | 158 | # 处理mask 159 | self.mask = np.logical_or(self.mask.fillna(True), ~(self.can_enter.fillna(False))) 160 | 161 | def get_signal_data(self, signal): 162 | """ 163 | Returns 164 | ------- 165 | res : pd.DataFrame 166 | Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret(N)','downside_ret(N)','quantile'] 167 | """ 168 | self._judge(signal) # 判断signal与其他关键参数是否格式一致 169 | self._cal_ret() # 计算信号收益 170 | signal = jutil.fillinf(signal) 171 | signal = signal.shift(1) # avoid forward-looking bias 172 | 173 | # forward or not 174 | if not self.forward: 175 | signal = signal.shift(self.period) 176 | 177 | # 处理mask 178 | mask = np.logical_or(self.mask, signal.isnull()) 179 | 180 | # calculate quantile 181 | signal_masked = signal.copy() 182 | signal_masked = signal_masked[~mask] 183 | if self.n_quantiles == 1: 184 | df_quantile = signal_masked.copy() 185 | df_quantile.loc[:, :] = 1.0 186 | else: 187 | df_quantile = jutil.to_quantile(signal_masked, n_quantiles=self.n_quantiles) 188 | 189 | # ---------------------------------------------------------------------- 190 | # stack 191 | def stack_td_symbol(df): 192 | df = pd.DataFrame(df.stack(dropna=False)) # do not dropna 193 | df.index.names = ['trade_date', 'symbol'] 194 | df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True) 195 | return df 196 | 197 | # ---------------------------------------------------------------------- 198 | # concat signal value 199 | res = stack_td_symbol(signal) # 信号 200 | res.columns = ['signal'] 201 | 202 | for ret_type in self.signal_ret.keys(): 203 | if self.signal_ret[ret_type] is not None: 204 | res[ret_type] = stack_td_symbol(self.signal_ret[ret_type]).fillna(0) # 收益 205 | 206 | if self.group is not None: 207 | res["group"] = stack_td_symbol(self.group) 208 | 209 | res['quantile'] = stack_td_symbol(df_quantile) # quantile 210 | mask = stack_td_symbol(mask) 211 | res = res.loc[~(mask.iloc[:, 0]), :] 212 | 213 | if len(res) > 0: 214 | print("Nan Data Count (should be zero) : {:d}; " \ 215 | "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), 216 | len(res) * 100. / signal.size)) 217 | else: 218 | print("No signal available.") 219 | res = res.astype({'signal': float, 'return': float, 'quantile': int}) 220 | return res 221 | -------------------------------------------------------------------------------- /jaqs_fxdayu/data/dataapi/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/signaldigger/optimizer.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | # 参数优化器 3 | 4 | 5 | from itertools import product 6 | from .analysis import analysis 7 | from .signal_creator import SignalCreator 8 | import warnings 9 | import pandas as pd 10 | 11 | target_types = { 12 | 'factor': { 13 | "ic": [ 14 | "return_ic", 15 | "upside_ret_ic", 16 | "downside_ret_ic" 17 | ], 18 | "ret": [ 19 | "long_ret", 20 | "short_ret", 21 | "long_short_ret", 22 | 'top_quantile_ret', 23 | 'bottom_quantile_ret', 24 | "tmb_ret", 25 | "all_sample_ret"], 26 | "space": [ 27 | 'long_space', 28 | 'short_space', 29 | 'long_short_space', 30 | "top_quantile_space", 31 | "bottom_quantile_space", 32 | "tmb_space", 33 | "all_sample_space" 34 | ] 35 | }, 36 | "event": { 37 | "ret": [ 38 | "long_ret", 39 | "short_ret", 40 | "long_short_ret", 41 | ], 42 | "space": [ 43 | 'long_space', 44 | 'short_space', 45 | 'long_short_space', 46 | ] 47 | } 48 | } 49 | 50 | targets = { 51 | "ic": ["IC Mean", "IC Std.", "t-stat(IC)", "p-value(IC)", "IC Skew", "IC Kurtosis", "Ann. IR"], 52 | "ret": ['t-stat', "p-value", "skewness", "kurtosis", "Ann. Ret", "Ann. Vol", "Ann. IR", "occurance"], 53 | "space": [ 54 | 'Up_sp Mean', 55 | 'Up_sp Std', 56 | 'Up_sp IR', 57 | 'Up_sp Pct5', 58 | 'Up_sp Pct25 ', 59 | 'Up_sp Pct50 ', 60 | 'Up_sp Pct75', 61 | 'Up_sp Pct95', 62 | 'Up_sp Occur', 63 | 'Down_sp Mean', 64 | 'Down_sp Std', 65 | 'Down_sp IR', 66 | 'Down_sp Pct5', 67 | 'Down_sp Pct25 ', 68 | 'Down_sp Pct50 ', 69 | 'Down_sp Pct75', 70 | 'Down_sp Pct95', 71 | 'Down_sp Occur', 72 | ] 73 | } 74 | 75 | 76 | class Optimizer(object): 77 | ''' 78 | :param dataview: 包含了计算公式所需要的所有数据的jaqs.data.DataView对象 79 | :param formula: str(N) 需要优化的公式:如'(open - Delay(close, l1)) / Delay(close, l2)' 80 | :param params: dict(N) 需要优化的参数范围:如{"LEN1":range(1,10,1),"LEN2":range(1,10,1)} 81 | :param name: str (N) 信号的名称 82 | :param price: dataFrame (N) 价格与daily_ret不能同时存在 83 | :param daily_ret: dataFrame (N) 每日收益 84 | :param high: dataFrame (N) 最高价 用于计算上行收益空间 85 | :param low: dataFrame (N) 最低价 用于计算下行收益空间 86 | :param benchmark_price: dataFrame (N) 基准价格 若不为空收益计算模式为相对benchmark的收益 与daily_benchmark_ret不能同时存在 87 | :param daily_benchmark_ret: dataFrame (N) 基准日收益 若不为空收益计算模式为相对benchmark的收益 88 | :param period: int (5) 选股持有期 89 | :param n_quantiles: int (5) 90 | :param mask: 过滤条件 dataFrame (N) 91 | :param can_enter: dataFrame (N) 是否能进场 92 | :param can_exit: dataFrame (N) 是否能出场 93 | :param forward: bool(True) 是否forward return 94 | :param commission: float(0.0008) 手续费率 95 | :param is_event: bool(False) 是否是事件(0/1因子) 96 | :param is_quarterly: bool(False) 是否是季度因子 97 | ''' 98 | 99 | def __init__(self, 100 | dataview=None, 101 | formula=None, 102 | params=None, 103 | name=None, 104 | price=None, 105 | daily_ret=None, 106 | high=None, 107 | low=None, 108 | benchmark_price=None, 109 | daily_benchmark_ret=None, 110 | period=5, 111 | n_quantiles=5, 112 | mask=None, 113 | can_enter=None, 114 | can_exit=None, 115 | forward=True, 116 | commission=0.0008, 117 | is_event=False, 118 | is_quarterly=False, 119 | register_funcs=None, 120 | ): 121 | self.dataview = dataview 122 | self.formula = formula 123 | self.params = params 124 | if self.formula is not None: 125 | self._judge_params() 126 | self.name = name if name else formula 127 | if price is None and daily_ret is None: 128 | try: 129 | price = dataview.get_ts('close_adj') 130 | except: 131 | raise ValueError("One of price / ret must be provided.") 132 | self.period = period 133 | if is_event: 134 | n_quantiles = 1 135 | self.is_event = is_event 136 | self.is_quarterly = is_quarterly 137 | self.register_funcs = register_funcs 138 | self.signal_creator = SignalCreator( 139 | price=price, daily_ret=daily_ret, 140 | benchmark_price=benchmark_price, daily_benchmark_ret=daily_benchmark_ret, 141 | high=high, low=low, 142 | period=period, n_quantiles=n_quantiles, 143 | mask=mask, 144 | can_enter=can_enter, 145 | can_exit=can_exit, 146 | forward=forward, 147 | commission=commission 148 | ) 149 | self.all_signals = None 150 | self.all_signals_perf = None 151 | self.in_sample_range = None 152 | 153 | # 判断参数命名的规范性 154 | def _judge_params(self): 155 | if self.params is None: 156 | raise ValueError("未给优化器提供优化空间(需要参数params)") 157 | if not isinstance(self.params, dict): 158 | raise ValueError("优化空间参数不符合格式要求:如{'LEN1':range(1,10,1),'LEN2':range(1,10,1)}") 159 | for para in self.params.keys(): 160 | if len(para) < 2 or not para.isupper(): 161 | raise ValueError("formula的参数%s的命名不符合要求!参数名称需全部由大写英文字母组成,且字母数不少于2"%(para,)) 162 | 163 | # 判断target合法性 164 | def _judge_target(self, target_type, target): 165 | legal = True 166 | # 判断所提供的输入数据是否支持空间分析 167 | if self.signal_creator.high is None or self.signal_creator.low is None: 168 | if (target_type in target_types["factor"]["space"]) or \ 169 | (target_types in ["upside_ret_ic", "downside_ret_ic"]) or \ 170 | (target in targets["space"]): 171 | legal = False 172 | print("需要在Optimizer中传入[high]&[low],以支持收益空间分析和优化") 173 | # 判断是否target/target_type参数在可选的选项内 174 | if self.is_event: 175 | if target_type in target_types["event"]["ret"]: 176 | if not (target in targets["ret"]): 177 | legal = False 178 | print("可选的优化目标仅能从%s选取" % (str(targets["ret"]))) 179 | elif target_type in target_types["event"]["space"]: 180 | if not (target in targets["space"]): 181 | legal = False 182 | print("可选的优化目标仅能从%s选取" % (str(targets["space"]))) 183 | else: 184 | legal = False 185 | print("可选的优化类型仅能从%s选取" % (str(target_types["event"]["ret"] + target_types["event"]["space"]))) 186 | else: 187 | if target_type in target_types["factor"]["ret"]: 188 | if not (target in targets["ret"]): 189 | legal = False 190 | print("可选的优化目标仅能从%s选取" % (str(targets["ret"]))) 191 | elif target_type in target_types["factor"]["ic"]: 192 | if not (target in targets["ic"]): 193 | legal = False 194 | print("可选的优化目标仅能从%s选取" % (str(targets["ic"]))) 195 | elif target_type in target_types["factor"]["space"]: 196 | if not (target in targets["space"]): 197 | legal = False 198 | print("可选的优化目标仅能从%s选取" % (str(targets["space"]))) 199 | else: 200 | print("可选的优化类型仅能从%s选取" % ( 201 | str(target_types["factor"]["ret"] + target_types["factor"]["ic"] + target_types["factor"][ 202 | "space"]))) 203 | return legal 204 | 205 | def enumerate_optimizer(self, 206 | target_type="long_ret", 207 | target="Ann. IR", 208 | ascending=False, 209 | in_sample_range=None): 210 | ''' 211 | :param target_type: 目标种类 212 | :param target: 优化目标 213 | :param ascending: bool(False)升序or降序排列 214 | :param in_sample_range: [date_start(int),date_end(int)] (N) 定义样本内优化范围. 215 | :return: 216 | ''' 217 | 218 | if self._judge_target(target_type, target): # 判断target合法性 219 | self.get_all_signals_perf(in_sample_range) 220 | if len(self.all_signals_perf) == 0: 221 | return [] 222 | if target_type in (target_types["factor"]["ic"]): 223 | order_index = "ic" 224 | elif target_type in (target_types["factor"]["ret"]): 225 | order_index = "ret" 226 | else: 227 | order_index = "space" 228 | ordered_perf = self.all_signals_perf.values() 229 | return sorted(ordered_perf, 230 | key=lambda x: x[order_index].loc[target, target_type], 231 | reverse=(ascending == False)) 232 | return [] 233 | 234 | def get_all_signals(self): 235 | if self.all_signals is None: 236 | self.all_signals = dict() 237 | keys = list(self.params.keys()) 238 | for value in product(*self.params.values()): 239 | para_dict = dict(zip(keys, value)) 240 | formula = self.formula 241 | for vars in para_dict.keys(): 242 | formula = formula.replace(vars, str(para_dict[vars])) 243 | signal = self.dataview.add_formula(field_name=self.name, 244 | formula=formula, 245 | is_quarterly=self.is_quarterly, 246 | register_funcs=self.register_funcs) 247 | if (not isinstance(signal,pd.DataFrame)) or (signal.size==0): 248 | warnings.warn("待优化公式%s不能计算出有效结果,请检查数据和公式是否正确完备!") 249 | continue 250 | self.all_signals[self.name + str(para_dict)] = self.cal_signal(signal) 251 | 252 | def get_all_signals_perf(self, in_sample_range=None): 253 | self.get_all_signals() 254 | if self.all_signals_perf is None or \ 255 | (self.in_sample_range != in_sample_range) or \ 256 | (len(set(self.all_signals_perf.keys()) - set(self.all_signals.keys())) != 0): 257 | self.all_signals_perf = dict() 258 | for sig_name in self.all_signals.keys(): 259 | perf = self.cal_perf(self.all_signals[sig_name], in_sample_range) 260 | if perf is not None: 261 | self.all_signals_perf[sig_name] = perf 262 | self.all_signals_perf[sig_name]["signal_name"] = sig_name 263 | if len(self.all_signals_perf) == 0: 264 | print("没有计算出可用的信号绩效,请确保至少有一个信号可用.(可尝试增加样本内数据的时间范围以确保有信号发生)") 265 | self.in_sample_range = in_sample_range 266 | 267 | def cal_signal(self, signal): 268 | return self.signal_creator.get_signal_data(signal) 269 | 270 | # TODO 输入绩效要求,过滤掉不符合要求的结果 271 | def cal_perf(self, 272 | signal_data, 273 | in_sample_range=None, 274 | constraints=None): 275 | ''' 276 | :param signal_data: 277 | :param in_sample_range: like [20100312,20170405] 样本内范围起止时间 278 | :param constraints: like [{"target_type":"long_ret", 279 | "target":"Ann. IR", 280 | "condition":}] 281 | :return: 282 | ''' 283 | perf = None 284 | if signal_data is not None: 285 | if in_sample_range is not None: 286 | signal_data = signal_data.loc[in_sample_range[0]:in_sample_range[1]] 287 | if len(signal_data) > 0: 288 | perf = analysis(signal_data, self.is_event, self.period) 289 | return perf 290 | -------------------------------------------------------------------------------- /tests/test_research.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from jaqs_fxdayu import patch_all 5 | 6 | patch_all() 7 | 8 | import numpy as np 9 | import os 10 | from pathlib import Path 11 | from jaqs.data import DataView 12 | from jaqs.data import RemoteDataService 13 | from jaqs.research import SignalDigger 14 | from jaqs.research.signaldigger import performance as pfm 15 | from jaqs.research.signaldigger import plotting 16 | from jaqs_fxdayu.research.signaldigger.analysis import analysis 17 | from tests.data_config import data_config 18 | 19 | output_root = Path(__file__).absolute().parent 20 | 21 | dataview_folder = str(output_root / ".persist" / "test_signal") 22 | if not (os.path.isdir(dataview_folder)): 23 | os.makedirs(dataview_folder) 24 | 25 | 26 | # -------------------------------------------------------------------------------- 27 | # 定义信号过滤条件-非指数成分 28 | def mask_index_member(dv): 29 | df_index_member = dv.get_ts('index_member') 30 | mask_index_member = df_index_member == 0 31 | return mask_index_member 32 | 33 | 34 | # 定义可买卖条件——未停牌、未涨跌停 35 | def limit_up_down(dv): 36 | trade_status = dv.get_ts('trade_status') 37 | mask_sus = trade_status != 1 # 不可交易 38 | # 涨停 39 | dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False, 40 | add_data=True) 41 | # 跌停 42 | dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False, 43 | add_data=True) 44 | can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌 45 | can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌 46 | return can_enter, can_exit 47 | 48 | 49 | def test_save_dataview(): 50 | ds = RemoteDataService() 51 | ds.init_from_config(data_config) 52 | dv = DataView() 53 | print(DataView) 54 | props = {'start_date': 20170501, 'end_date': 20171001, 'universe': '000016.SH', 55 | 'fields': 'volume,pb,pe,ps,float_mv,sw1', 56 | 'freq': 1} 57 | 58 | dv.init_from_config(props, ds) 59 | dv.prepare_data() 60 | 61 | dv.save_dataview(dataview_folder) 62 | 63 | 64 | def test_analyze_signal(): 65 | # -------------------------------------------------------------------------------- 66 | # Step.1 load dataview 67 | dv = DataView() 68 | dv.load_dataview(dataview_folder) 69 | 70 | mask = mask_index_member(dv) 71 | can_enter, can_exit = limit_up_down(dv) 72 | 73 | # -------------------------------------------------------------------------------- 74 | # Step.3 get signal, benchmark and price data 75 | dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False, add_data=True) 76 | 77 | signal = dv.get_ts('divert') 78 | price = dv.get_ts('close_adj') 79 | price_bench = dv.data_benchmark 80 | 81 | # Step.4 analyze! 82 | my_period = 5 83 | obj = SignalDigger(output_folder='../output/test_signal', output_format='pdf') 84 | obj.process_signal_before_analysis(signal=signal, 85 | price=price, 86 | high=dv.get_ts("high_adj"), # 可为空 87 | low=dv.get_ts("low_adj"), # 可为空 88 | group=dv.get_ts("sw1"), 89 | n_quantiles=5, # quantile分类数 90 | mask=mask, # 过滤条件 91 | can_enter=can_enter, # 是否能进场 92 | can_exit=can_exit, # 是否能出场 93 | period=my_period, # 持有期 94 | benchmark_price=price_bench, # 基准价格 可不传入,持有期收益(return)计算为绝对收益 95 | commission=0.0008, 96 | ) 97 | signal_data = obj.signal_data 98 | result = analysis(signal_data, is_event=False, period=my_period) 99 | ic = pfm.calc_signal_ic(signal_data, by_group=True) 100 | mean_ic_by_group = pfm.mean_information_coefficient(ic, by_group=True) 101 | plotting.plot_ic_by_group(mean_ic_by_group) 102 | res = obj.create_full_report() 103 | 104 | 105 | def test_DIY_signal(): 106 | # -------------------------------------------------------------------------------- 107 | # Step.1 load dataview 108 | dv = DataView() 109 | dv.load_dataview(dataview_folder) 110 | # 方法1:add_formula 基于dataview里已有的字段,通过表达式定义因子 111 | dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True) 112 | # 方法2: append_df 构造一个因子表格(pandas.Dataframe),直接添加到dataview当中 113 | import pandas as pd 114 | import talib as ta 115 | 116 | close = dv.get_ts("close_adj").dropna(how='all', axis=1) 117 | slope_df = pd.DataFrame( 118 | {sec_symbol: -ta.LINEARREG_SLOPE(value.values, 10) for sec_symbol, value in close.iteritems()}, 119 | index=close.index) 120 | dv.append_df(slope_df, 'slope') 121 | dv.get_ts("slope") 122 | 123 | # 定义事件 124 | from jaqs_fxdayu.research.signaldigger import process 125 | 126 | Open = dv.get_ts("open_adj") 127 | High = dv.get_ts("high_adj") 128 | Low = dv.get_ts("low_adj") 129 | Close = dv.get_ts("close_adj") 130 | trade_status = dv.get_ts('trade_status') 131 | mask_sus = trade_status!=1 132 | # 剔除掉停牌期的数据 再计算指标 133 | open_masked = process._mask_df(Open, mask=mask_sus) 134 | high_masked = process._mask_df(High, mask=mask_sus) 135 | low_masked = process._mask_df(Low, mask=mask_sus) 136 | close_masked = process._mask_df(Close, mask=mask_sus) 137 | from jaqs_fxdayu.data import signal_function_mod as sfm 138 | MA5 = sfm.ta(ta_method='MA', 139 | ta_column=0, 140 | Open=open_masked, 141 | High=high_masked, 142 | Low=low_masked, 143 | Close=close_masked, 144 | Volume=None, 145 | timeperiod=10) 146 | MA10 = sfm.ta('MA', Close=close_masked, timeperiod=10) 147 | dv.append_df(MA5, 'MA5') 148 | dv.append_df(MA10, 'MA10') 149 | dv.add_formula("Cross", "(MA5>=MA10)&&(Delay(MA5 0: 54 | ax.scatter(entry_long["time"].values, entry_long["entry"].values, label="long", c='r', marker='>', linewidths=1) 55 | if entry_short.size > 0: 56 | ax.scatter(entry_short["time"].values, entry_short["entry"].values, label="short", c='b', marker='>', linewidths=1) 57 | ax.scatter(exit["time"].values, exit["exit"].values, label="exit", c='y', marker='<', linewidths=1) 58 | 59 | # 进出场连线 60 | for _,row in tmp.iterrows(): 61 | x = [trans_t(row["trade_date"]),trans_t(row["exit_time"])] 62 | y = [row["entry"],row["exit"]] 63 | if row["return"]>0: 64 | line_type = "r--" 65 | else: 66 | line_type = "g--" 67 | ax.plot(x,y,line_type,linewidth=1) 68 | 69 | ax.legend(loc='best') 70 | ax.set(title="Entry Exit Position of %s"%(symbol,), 71 | xlabel='Datetime') 72 | 73 | ax.yaxis.set_major_formatter(ScalarFormatter()) 74 | 75 | return ax,symbol 76 | 77 | 78 | def plot_mean_ic_heatmap(mean_ic, period, format="M",ax=None): 79 | """ 80 | Plots a heatmap of the information coefficient or returns by month. 81 | 82 | Parameters 83 | ---------- 84 | mean_monthly_ic : pd.DataFrame 85 | The mean monthly IC for N periods forward. 86 | 87 | Returns 88 | ------- 89 | ax : matplotlib.Axes 90 | The axes that were plotted on. 91 | """ 92 | MONTH_MAP = {1: 'Jan', 93 | 2: 'Feb', 94 | 3: 'Mar', 95 | 4: 'Apr', 96 | 5: 'May', 97 | 6: 'Jun', 98 | 7: 'Jul', 99 | 8: 'Aug', 100 | 9: 'Sep', 101 | 10: 'Oct', 102 | 11: 'Nov', 103 | 12: 'Dec'} 104 | 105 | num_plots = 1.0 106 | 107 | v_spaces = ((num_plots - 1) // 3) + 1 108 | 109 | if ax is None: 110 | f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6)) 111 | ax = ax.flatten() 112 | 113 | new_index_y = [] 114 | new_index_x = [] 115 | if format == "D": 116 | for date in mean_ic.index: 117 | new_index_x.append(date.day) 118 | new_index_y.append(str(date.year)+" "+MONTH_MAP[date.month]) 119 | names = ["month","day"] 120 | else: 121 | for date in mean_ic.index: 122 | new_index_y.append(date.year) 123 | new_index_x.append(MONTH_MAP[date.month]) 124 | names = ["year", "month"] 125 | 126 | mean_ic.index = pd.MultiIndex.from_arrays( 127 | [new_index_y, new_index_x], 128 | names=names) 129 | 130 | ic_ = mean_ic['ic'].unstack() 131 | sns.heatmap( 132 | ic_, 133 | annot=True, 134 | alpha=1.0, 135 | center=0.0, 136 | annot_kws={"size": 7}, 137 | linewidths=0.01, 138 | linecolor='white', 139 | cmap=cm.get_cmap('RdBu'), 140 | cbar=False, 141 | ax=ax) 142 | ax.set(ylabel='', xlabel='') 143 | 144 | ax.set_title("IC Mean HeatMap".format(period)) 145 | 146 | return ax 147 | 148 | 149 | def plot_quantile_returns_ts(mean_ret_by_q, ax=None): 150 | """ 151 | Plots mean period wise returns for signal quantiles. 152 | 153 | Parameters 154 | ---------- 155 | mean_ret_by_q : pd.DataFrame 156 | DataFrame with quantile, (group) and mean period wise return values. 157 | ax : matplotlib.Axes, optional 158 | Axes upon which to plot. 159 | 160 | Returns 161 | ------- 162 | ax : matplotlib.Axes 163 | The axes that were plotted on. 164 | 165 | """ 166 | if ax is None: 167 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 168 | 169 | ret_wide = pd.concat({k: v['mean'] for k, v in mean_ret_by_q.items()}, axis=1) 170 | format = '%Y%m%d' if len(str(ret_wide.index[0])) == 8 else '%Y%m%d%H%M%S' 171 | ret_wide.index = pd.to_datetime(ret_wide.index, format=format) 172 | ret_wide = ret_wide.mul(DECIMAL_TO_PCT) 173 | 174 | ret_wide.plot(lw=1.2, ax=ax, cmap=COLOR_MAP) 175 | ax.legend(loc='upper left') 176 | ymin, ymax = ret_wide.min().min(), ret_wide.max().max() 177 | ax.set(ylabel='Return (%)', 178 | title="Quantile Return (equal weight within quantile)", 179 | xlabel='DateTime', 180 | ylim=(ymin, ymax)) 181 | 182 | ax.yaxis.set_major_formatter(ScalarFormatter()) 183 | ax.axhline(1.0, linestyle='-', color='black', lw=1) 184 | 185 | return ax 186 | 187 | 188 | def plot_cumulative_returns_by_quantile(quantile_ret, ax=None): 189 | """ 190 | Plots the cumulative returns of various signal quantiles. 191 | 192 | Parameters 193 | ---------- 194 | quantile_ret : int: pd.DataFrame 195 | Cumulative returns by signal quantile. 196 | ax : matplotlib.Axes, optional 197 | Axes upon which to plot. 198 | 199 | Returns 200 | ------- 201 | ax : matplotlib.Axes 202 | """ 203 | 204 | if ax is None: 205 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 206 | 207 | cum_ret = quantile_ret 208 | format = '%Y%m%d' if len(str(cum_ret.index[0])) == 8 else '%Y%m%d%H%M%S' 209 | cum_ret.index = pd.to_datetime(cum_ret.index, format=format) 210 | cum_ret = cum_ret.mul(DECIMAL_TO_PCT) 211 | 212 | cum_ret.plot(lw=2, ax=ax, cmap=COLOR_MAP) 213 | ax.axhline(0.0, linestyle='-', color='black', lw=1) 214 | 215 | ax.legend(loc='upper left') 216 | ymin, ymax = cum_ret.min().min(), cum_ret.max().max() 217 | ax.set(ylabel='Cumulative Returns (%)', 218 | title='Cumulative Return of Each Quantile (equal weight within quantile)', 219 | xlabel='DateTime', 220 | ylim=(ymin, ymax)) 221 | perfs = ["total_ret_{:d} = {:.1f}%".format(col, performance.calc_performance_metrics(ser, cum_return=True, 222 | compound=False)['total_ret']) 223 | for col, ser in cum_ret.iteritems()] 224 | ax.text(.02, .30, 225 | '\n'.join(perfs), 226 | fontsize=12, 227 | bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5}, 228 | transform=ax.transAxes, 229 | verticalalignment='top') 230 | 231 | ax.yaxis.set_major_formatter(ScalarFormatter()) 232 | 233 | return ax 234 | 235 | 236 | def plot_mean_quantile_returns_spread_time_series(mean_returns_spread, 237 | period, 238 | ax=None): 239 | """ 240 | Plots mean period wise returns for signal quantiles. 241 | 242 | Parameters 243 | ---------- 244 | mean_returns_spread : pd.Series 245 | Series with difference between quantile mean returns by period. 246 | std_err : pd.Series 247 | Series with standard error of difference between quantile 248 | mean returns each period. 249 | bandwidth : float 250 | Width of displayed error bands in standard deviations. 251 | ax : matplotlib.Axes, optional 252 | Axes upon which to plot. 253 | 254 | Returns 255 | ------- 256 | ax : matplotlib.Axes 257 | The axes that were plotted on. 258 | """ 259 | 260 | periods = period 261 | title = ('Top Minus Bottom Quantile Return' 262 | .format(periods if periods is not None else "")) 263 | 264 | if ax is None: 265 | f, ax = plt.subplots(figsize=(18, 6)) 266 | format = '%Y%m%d' if len(str(mean_returns_spread.index[0])) == 8 else '%Y%m%d%H%M%S' 267 | mean_returns_spread.index = pd.to_datetime(mean_returns_spread.index, format=format) 268 | mean_returns_spread_bps = mean_returns_spread['mean_diff'] * DECIMAL_TO_PCT 269 | 270 | mean_returns_spread_bps.plot(alpha=0.4, ax=ax, lw=0.7, color='navy') 271 | mean_returns_spread_bps.rolling(30).mean().plot(color='green', 272 | alpha=0.7, 273 | ax=ax) 274 | ax.axhline(0.0, linestyle='-', color='black', lw=1, alpha=0.8) 275 | 276 | ax.legend(['mean returns spread', '30 moving avg'], loc='upper right') 277 | ylim = np.nanpercentile(abs(mean_returns_spread_bps.values), 95) 278 | ax.set(ylabel='Difference In Quantile Mean Return (%)', 279 | xlabel='', 280 | title=title, 281 | ylim=(-ylim, ylim)) 282 | 283 | return ax 284 | 285 | 286 | def plot_cumulative_return(ret, ax=None, title=None): 287 | """ 288 | Plots the cumulative returns of the returns series passed in. 289 | 290 | Parameters 291 | ---------- 292 | ret : pd.Series 293 | Period wise returns of dollar neutral portfolio weighted by signal 294 | value. 295 | ax : matplotlib.Axes, optional 296 | Axes upon which to plot. 297 | 298 | Returns 299 | ------- 300 | ax : matplotlib.Axes 301 | The axes that were plotted on. 302 | """ 303 | if ax is None: 304 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 305 | 306 | ret = ret.copy() 307 | 308 | cum = ret # pfm.daily_ret_to_cum(ret) 309 | format = '%Y%m%d' if len(str(cum.index[0])) == 8 else '%Y%m%d%H%M%S' 310 | cum.index = pd.to_datetime(cum.index, format=format) 311 | cum = cum.mul(DECIMAL_TO_PCT) 312 | 313 | cum.plot(ax=ax, lw=3, color='indianred', alpha=1.0) 314 | ax.axhline(0.0, linestyle='-', color='black', lw=1) 315 | 316 | metrics = performance.calc_performance_metrics(cum, cum_return=True, compound=False) 317 | ax.text(.85, .30, 318 | "total_ret = {:.1f}%\nmean(ret). = {:.4f}%\nstd(ret) = {:.4f}\nir = {:.4f}".format(metrics['total_ret'], 319 | metrics['mean(ret)'], 320 | metrics['std(ret)'], 321 | metrics['ir']), 322 | fontsize=12, 323 | bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5}, 324 | transform=ax.transAxes, 325 | verticalalignment='top') 326 | if title is None: 327 | title = "Cumulative Return" 328 | ax.set(ylabel='Cumulative Return (%)', 329 | title=title, 330 | xlabel='DateTime') 331 | 332 | return ax 333 | 334 | -------------------------------------------------------------------------------- /docs/optimizer.md: -------------------------------------------------------------------------------- 1 | 2 | # Optimizer 3 | 4 | ## 介绍 5 | Optimizer是optimizer模块中的一个核心类,提供了因子算法参数优化的功能 6 | 7 | *** 步骤 *** 8 | 1. 实例化Optimizer 9 | 2. 进行因子计算和参数优化 10 | 11 | # step 1 实例化Optimizer 12 | 13 | ## __init__ 14 | 15 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.__init__(*args, **kwargs) ` 16 | 17 | **简要描述:** 18 | 19 | - 初始化Optimizer 20 | 21 | **参数:** 22 | 23 | 24 | |参数名|必选|类型|说明| 25 | |:---- |:---|:----- |----- | 26 | |dataview|是 |jaqs.data.Dataview|包含了因子相关的所有标的证券及因子计算所要用到的所有字段的数据集| 27 | |formula|否 |string|需要优化的公式:如'(open - Delay(close, LEN1)) / Delay(close, LEN2)'| 28 | |params|否 |dict|需要优化的参数范围:如{"LEN1":range(1,10,1),"LEN2":range(1,10,1)}| 29 | |name|否 |string|信号的名称| 30 | |price |是,price与daily_ret二选一 |pandas.DataFrame|因子涉及到的股票的价格数据,用于作为进出场价用于计算收益,日期为索引,股票品种为columns| 31 | |daily_ret |是,price与daily_ret二选一 |pandas.DataFrame| 因子涉及到的股票的每日收益,日期为索引,股票品种为columns| 32 | |benchmark_price | 否,benchmark_price与daily_benchmark_ret二选一 |pandas.DataFrame or pandas.Series|基准价格,日期为索引。用于计算因子涉及到的股票的持有期**相对收益**--相对基准。默认为空,为空时计算的收益为**绝对收益**。| 33 | |daily_benchmark_ret | 否,benchmark_price与daily_benchmark_ret二选一 |pandas.DataFrame or pandas.Series|基准日收益,日期为索引。用于计算因子涉及到的股票的持有期**相对收益**--相对基准。默认为空,为空时计算的收益为**绝对收益**。| 34 | |high |否 |pandas.DataFrame|因子涉及到的股票的最高价数据,用于计算持有期潜在最大上涨收益,日期为索引,股票品种为columns,默认为空| 35 | |low |否 |pandas.DataFrame|因子涉及到的股票的最低价数据,用于计算持有期潜在最大下跌收益,日期为索引,股票品种为columns,默认为空| 36 | |period |否 |int|持有周期,默认为5,即持有5天| 37 | |n_quantiles |否 |int|根据每日因子值的大小分成n_quantiles组,默认为5,即将因子每天分成5组| 38 | |mask |否 |pandas.DataFrame|一张由bool值组成的表格,日期为索引,股票品种为columns,表示在做因子分析时是否要对某期的某个品种过滤。对应位置为True则**过滤**(剔除)——不纳入因子分析考虑。默认为空,不执行过滤操作| 39 | |can_enter |否 |pandas.DataFrame|一张由bool值组成的表格,日期为索引,股票品种为columns,表示某期的某个品种是否可以买入(进场)。对应位置为True则可以买入。默认为空,任何时间任何品种均可买入| 40 | |can_exit |否 |pandas.DataFrame|一张由bool值组成的表格,日期为索引,股票品种为columns,表示某期的某个品种是否可以卖出(出场)。对应位置为True则可以卖出。默认为空,任何时间任何品种均可卖出| 41 | |forward |否 |bool|收益对齐方式,forward=True则在当期因子下对齐下一期实现的收益;forward=False则在当期实现收益下对齐上一期的因子值。默认为True| 42 | |commission |否 |float|手续费比例,每次换仓收取的手续费百分比,默认为万分之八0.0008| 43 | |is_event |否 |bool|是否是事件(0/1因子),默认为否| 44 | |is_quarterly |否 |bool|是否是季度因子,默认为否| 45 | |register_funcs |否 |dict of function|待优化公式中用到的自定义方法所组成的dict,如{"name1":func1,"name2":func2}| 46 | 47 | **示例:** 48 | 49 | 50 | ```python 51 | import warnings 52 | warnings.filterwarnings('ignore') 53 | ``` 54 | 55 | 56 | ```python 57 | from jaqs_fxdayu.research import Optimizer 58 | from jaqs_fxdayu.data import DataView 59 | 60 | # 加载dataview数据集 61 | dv = DataView() 62 | dataview_folder = './data' 63 | dv.load_dataview(dataview_folder) 64 | 65 | def _cut_negative(df): 66 | return df[df>=0] 67 | 68 | # step 1:实例化Optimizer 69 | optimizer = Optimizer(dataview=dv, 70 | formula='CAL(- Correlation(vwap_adj, high, LEN))', 71 | params={"CAL":["Cut_Neg",""],"LEN":range(2,5,1),}, 72 | name='test', 73 | price = dv.get_ts('close_adj'), 74 | high=dv.get_ts('high_adj'), 75 | low=dv.get_ts('low_adj'), 76 | benchmark_price=None,#=None求绝对收益 #=price_bench求相对收益 77 | period=30, 78 | n_quantiles=5, 79 | commission=0.0008,#手续费 默认0.0008 80 | is_event=False,#是否是事件(0/1因子) 81 | is_quarterly=False, 82 | register_funcs={"Cut_Neg":_cut_negative})#是否是季度因子 默认为False 83 | ``` 84 | 85 | Dataview loaded successfully. 86 | 87 | 88 | # step 2 进行因子计算和参数优化 89 | 90 | ## dataview 91 | 92 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.dataview ` 93 | 94 | **简要描述:** 95 | 96 | - 优化器用到的数据集 97 | 98 | **示例:** 99 | 100 | 101 | ```python 102 | optimizer.dataview 103 | ``` 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | ## formula 113 | 114 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.formula ` 115 | 116 | **简要描述:** 117 | 118 | - 优化器所优化的因子表达式 119 | 120 | **示例:** 121 | 122 | 123 | ```python 124 | optimizer.formula 125 | ``` 126 | 127 | 128 | 129 | 130 | 'CAL(- Correlation(vwap_adj, high, LEN))' 131 | 132 | 133 | 134 | ## params 135 | 136 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.params ` 137 | 138 | **简要描述:** 139 | 140 | - 优化器所优化的参数范围 141 | 142 | **示例:** 143 | 144 | 145 | ```python 146 | optimizer.params 147 | ``` 148 | 149 | 150 | 151 | 152 | {'CAL': ['Cut_Neg', ''], 'LEN': range(2, 5)} 153 | 154 | 155 | 156 | ## name 157 | 158 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.name ` 159 | 160 | **简要描述:** 161 | 162 | - 优化器所优化的信号名称 163 | 164 | **示例:** 165 | 166 | 167 | ```python 168 | optimizer.name 169 | ``` 170 | 171 | 172 | 173 | 174 | 'test' 175 | 176 | 177 | 178 | ## period 179 | 180 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.period ` 181 | 182 | **简要描述:** 183 | 184 | - 待优化因子所指定的调仓周期 185 | 186 | **示例:** 187 | 188 | 189 | ```python 190 | optimizer.period 191 | ``` 192 | 193 | 194 | 195 | 196 | 30 197 | 198 | 199 | 200 | ## all_signals 201 | 202 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.all_signals ` 203 | 204 | **简要描述:** 205 | 206 | - 不同参数下计算得到的signal_data(关于signal_data的定义,详见文档digger部分-signal_data)所组成的字典 207 | - 在初始化Optimizer实例时指定了formula和params后,可以通过Optimizer.get_all_signals()计算不同参数下该公式算得的所有因子值;也可以手动指定 208 | 209 | **示例:** 210 | 211 | 212 | ```python 213 | print(optimizer.all_signals) 214 | ``` 215 | 216 | None 217 | 218 | 219 | ## get_all_signals 220 | 221 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.get_all_signals() ` 222 | 223 | **简要描述:** 224 | 225 | - 通过Optimizer.get_all_signals()计算不同参数下该公式算得的所有因子值,并更新Optimizer.all_signals属性 226 | 227 | **示例:** 228 | 229 | 230 | ```python 231 | optimizer.get_all_signals() 232 | ``` 233 | 234 | Nan Data Count (should be zero) : 0; Percentage of effective data: 13% 235 | Nan Data Count (should be zero) : 0; Percentage of effective data: 5% 236 | Nan Data Count (should be zero) : 0; Percentage of effective data: 2% 237 | Nan Data Count (should be zero) : 0; Percentage of effective data: 92% 238 | Nan Data Count (should be zero) : 0; Percentage of effective data: 94% 239 | Nan Data Count (should be zero) : 0; Percentage of effective data: 94% 240 | 241 | 242 | 243 | ```python 244 | print(optimizer.all_signals.keys()) 245 | print(optimizer.all_signals["test{'CAL': '', 'LEN': 2}"].head()) 246 | ``` 247 | 248 | dict_keys(["test{'CAL': 'Cut_Neg', 'LEN': 2}", "test{'CAL': 'Cut_Neg', 'LEN': 3}", "test{'CAL': 'Cut_Neg', 'LEN': 4}", "test{'CAL': '', 'LEN': 2}", "test{'CAL': '', 'LEN': 3}", "test{'CAL': '', 'LEN': 4}"]) 249 | signal return upside_ret downside_ret quantile 250 | trade_date symbol 251 | 20170503 000001.SZ -1.0 0.011546 0.031748 -0.038959 2 252 | 000002.SZ -1.0 0.109486 0.165690 -0.021479 2 253 | 000008.SZ -1.0 -0.071442 -0.005851 -0.119487 3 254 | 000009.SZ -1.0 -0.089585 -0.003136 -0.165520 2 255 | 000027.SZ -1.0 -0.016835 0.051678 -0.060567 5 256 | 257 | 258 | ## all_signals_perf 259 | 260 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.all_signals_perf ` 261 | 262 | **简要描述:** 263 | 264 | - 不同参数下计算得到的signal_data(关于signal_data的定义,详见文档digger部分-signal_data)的绩效表现所组成的字典 265 | - 在Optimizer.all_signals不为空的情况下,可以通过Optimizer.get_all_signals_perf()计算Optimizer.all_signals中不同因子的表现; 266 | - 在执行过Optimizer.get_all_signals_perf()后才能获取 267 | 268 | **返回:** 269 | 270 | dict of performance - 不同因子表现所组成的字典 271 | 其中每个performance(因子表现)也是一个字典,由ic、ret、space三个key构成,分别对应ic分析表、收益分析表、潜在收益空间分析表(关于这三张表的说明,详见文档-analysis) 272 | 273 | **示例:** 274 | 275 | 276 | ```python 277 | print(optimizer.all_signals_perf) 278 | ``` 279 | 280 | None 281 | 282 | 283 | ## get_all_signals_perf 284 | 285 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.get_all_signals_perf(in_sample_range=None) ` 286 | 287 | **简要描述:** 288 | 289 | - 在Optimizer.all_signals不为空的情况下,通过Optimizer.get_all_signals_perf()计算Optimizer.all_signals中不同因子的表现,并更新Optimizer.all_signals_perf属性; 290 | 291 | **参数:** 292 | 293 | |字段|必选|类型|说明| 294 | |:---- |:---|:----- |----- | 295 | |in_sample_range |否|list of int|因子表现计算的时间范围,如[20140101,20160101] 表示计算因子表现所涵盖的数据范围只在20140101到20160101之间。默认为None,在全样本上计算因子表现| 296 | 297 | 298 | **示例:** 299 | 300 | 301 | ```python 302 | optimizer.get_all_signals_perf() 303 | ``` 304 | 305 | 306 | ```python 307 | print(optimizer.all_signals_perf.keys()) 308 | print(optimizer.all_signals_perf["test{'CAL': '', 'LEN': 2}"].keys()) 309 | optimizer.all_signals_perf["test{'CAL': '', 'LEN': 2}"]["ic"] 310 | ``` 311 | 312 | dict_keys(["test{'CAL': 'Cut_Neg', 'LEN': 2}", "test{'CAL': 'Cut_Neg', 'LEN': 3}", "test{'CAL': 'Cut_Neg', 'LEN': 4}", "test{'CAL': '', 'LEN': 2}", "test{'CAL': '', 'LEN': 3}", "test{'CAL': '', 'LEN': 4}"]) 313 | dict_keys(['ic', 'ret', 'space', 'signal_name']) 314 | 315 | 316 | 317 | 318 | 319 |
320 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 |
return_icupside_ret_icdownside_ret_ic
IC Mean-0.025674-0.0386120.001965
IC Std.0.0587030.0631770.058535
t-stat(IC)-3.812846-5.3281010.292652
p-value(IC)0.0002800.0000010.770596
IC Skew0.6257320.6893230.226355
IC Kurtosis0.4340470.4958040.149208
Ann. IR-0.437363-0.6111750.033570
387 |
388 | 389 | 390 | 391 | ## enumerate_optimizer 392 | 393 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.enumerate_optimizer(target_type="long_ret",target="Ann. IR",ascending=False,in_sample_range=None) ` 394 | 395 | **简要描述:** 396 | 397 | - 枚举优化。按照指定的参数优化范围遍历每一种可能性,并给出最佳绩效下的排序结果 398 | 399 | **参数:** 400 | 401 | |字段|必选|类型|说明| 402 | |:---- |:---|:----- |----- | 403 | |target_type |是|string|待优化的目标类型,有ic类、持有收益类、收益空间类三个大类,下分小类,具体类型见下| 404 | |target |是|string|待优化的目标绩效指标,有ic类、持有收益类、收益空间类三个大类,下分小类,具体类型见下| 405 | |ascending |否|bool|输出结果是否升序排列,默认为False--降序排列(指标越大排名越前)| 406 | |in_sample_range |否|list of int|样本内优化范围 默认为None,在全样本上优化| 407 | 408 | #### 优化目标的详细介绍 409 | 目前,所有可优化的目标均围绕analysis模块中提供的三张绩效表——ic分析表、收益分析表、潜在收益空间分析表(关于这三张表的详细定义,参考文档-analysis)。 410 | 411 | #### target_type: 412 | * ic类: 413 | return_ic/upside_ret_ic/downside_ret_ic 414 | * 持有收益类 415 | long_ret/short_ret/long_short_ret/top_quantile_ret/bottom_quantile_ret/tmb_ret 416 | * 收益空间类 417 | long_space/short_space/long_short_space/top_quantile_space/bottom_quantile_space/tmb_space 418 | 419 | #### target: 420 | * ic类 421 | "IC Mean", "IC Std.", "t-stat(IC)", "p-value(IC)", "IC Skew", "IC Kurtosis", "Ann. IR" 422 | * 持有收益类 423 | 't-stat', "p-value", "skewness", "kurtosis", "Ann. Ret", "Ann. Vol", "Ann. IR", "occurance" 424 | * 收益空间类 425 | 'Up_sp Mean','Up_sp Std','Up_sp IR','Up_sp Pct5', 'Up_sp Pct25 ','Up_sp Pct50 ', 'Up_sp Pct75','Up_sp Pct95','Up_sp Occur','Down_sp Mean','Down_sp Std', 'Down_sp IR', 'Down_sp Pct5','Down_sp Pct25 ','Down_sp Pct50 ','Down_sp Pct75', 'Down_sp Pct95','Down_sp Occur' 426 | 427 | 428 | **返回:** 429 | 430 | list of performance - 绩效的排序结果(只计算了样本内的绩效) 431 | 其中每个performance(因子表现)是一个字典,由ic、ret、space三个key构成,分别对应ic分析表、收益分析表、潜在收益空间分析表(关于这三张表的说明,详见文档-analysis) 432 | 433 | 434 | **示例:** 435 | 436 | 437 | ```python 438 | ret_best = optimizer.enumerate_optimizer(target_type="top_quantile_ret",#优化目标类型 439 | target="Ann. IR",#优化目标 440 | in_sample_range=[20170501,20170801],#样本内范围 441 | ascending=False) 442 | ``` 443 | 444 | 445 | ```python 446 | print(len(ret_best)) 447 | print(ret_best[0].keys()) 448 | print(ret_best[0]["signal_name"]) 449 | ret_best[0]["ret"] 450 | ``` 451 | 452 | 6 453 | dict_keys(['ic', 'ret', 'space', 'signal_name']) 454 | test{'CAL': 'Cut_Neg', 'LEN': 4} 455 | 456 | 457 | 458 | 459 | 460 |
461 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 |
long_retlong_short_rettop_quantile_retbottom_quantile_rettmb_retall_sample_ret
t-stat6.483452-0.1014075.3922924.371209-0.5891159.697444
p-value0.0000000.9195800.0000000.0000300.5579600.000000
skewness0.293834-0.2233910.5719580.7897550.1395800.937413
kurtosis-0.3582580.0818830.3683761.2277290.2371432.696370
Ann. Ret0.301933-0.0044700.4404320.314685-0.0537470.329067
Ann. Vol0.1280630.1181900.2093610.2534710.2508850.238054
Ann. IR2.357699-0.0378182.1036941.241505-0.2142311.382322
occurance62.00000059.00000054.000000101.00000062.000000398.000000
561 |
562 | 563 | 564 | -------------------------------------------------------------------------------- /docs/dp.md: -------------------------------------------------------------------------------- 1 | 2 | # dp 3 | 4 | ## 介绍 5 | 针对A股因子研究和交易分析场景,提供了常用的小工具,如查询历史的交易日,历史的行业分类表等 6 | 7 | 8 | ```python 9 | import warnings 10 | warnings.filterwarnings('ignore') 11 | ``` 12 | 13 | 14 | ```python 15 | data_config = { 16 | "remote.data.address": "tcp://data.quantos.org:8910", 17 | "remote.data.username": "18566262672", 18 | "remote.data.password": "eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI3MDI3NTAyMTIiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTg1NjYyNjI2NzIifQ.O_-yR0zYagrLRvPbggnru1Rapk4kiyAzcwYt2a3vlpM" 19 | } 20 | 21 | from jaqs_fxdayu.data import DataApi 22 | 23 | api = DataApi(data_config["remote.data.address"]) # 传入连接到的远端数据服务器的tcp地址 24 | api.login(username=data_config["remote.data.username"], 25 | password=data_config["remote.data.password"]) 26 | ``` 27 | 28 | 29 | 30 | 31 | ('username: 18566262672', '0,') 32 | 33 | 34 | 35 | ## trade_days 36 | - ` jaqs_fxdayu.util.dp.trade_days(api, start, end) ` 37 | 38 | **简要描述:** 39 | 40 | - 返回起止日期间的交易日 41 | 42 | **参数:** 43 | 44 | |字段|必选|类型|说明| 45 | |:---- |:---|:----- |----- | 46 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi| 47 | |start |是|int |开始日期| 48 | |end |是|int |结束日期| 49 | 50 | **返回:** 51 | 52 | 起止日期间的交易日 53 | 54 | **示例:** 55 | 56 | 57 | ```python 58 | from jaqs_fxdayu.util.dp import trade_days 59 | trade_days(api, 20170101, 20180101) 60 | ``` 61 | 62 | 63 | 64 | 65 | Int64Index([20170103, 20170104, 20170105, 20170106, 20170109, 20170110, 66 | 20170111, 20170112, 20170113, 20170116, 67 | ... 68 | 20171218, 20171219, 20171220, 20171221, 20171222, 20171225, 69 | 20171226, 20171227, 20171228, 20171229], 70 | dtype='int64', name='trade_date', length=244) 71 | 72 | 73 | 74 | ## index_cons 75 | - ` jaqs_fxdayu.util.dp.index_cons(api, index_code, start, end) ` 76 | 77 | **简要描述:** 78 | 79 | - 获得某个指数起止时间段的历史成分股信息 80 | 81 | **参数:** 82 | 83 | |字段|必选|类型|说明| 84 | |:---- |:---|:----- |----- | 85 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi| 86 | |index_code |是| str |指数代码| 87 | |start |是|int |开始日期| 88 | |end |是|int |结束日期| 89 | 90 | **返回:** 91 | 92 | 某个指数起止时间段的历史成分股信息 93 | 94 | - 其中 in_date:纳入该指数的时间;out_date:从该指数移除的时间 95 | 96 | **示例:** 97 | 98 | 99 | ```python 100 | from jaqs_fxdayu.util.dp import index_cons 101 | index_cons(api, "000300.SH", 20170101, 20170501).head() 102 | ``` 103 | 104 | 105 | 106 | 107 |
108 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 |
in_dateindex_codeout_datesymbol
020050408000300.SH99999999000001.SZ
120050408000300.SH99999999000002.SZ
220161212000300.SH99999999000008.SZ
320050408000300.SH20171208000009.SZ
420140616000300.SH20170609000027.SZ
169 |
170 | 171 | 172 | 173 | ## daily_index_cons 174 | - ` jaqs_fxdayu.util.dp.daily_index_cons(api, index_code, start, end) ` 175 | 176 | **简要描述:** 177 | 178 | - 指定起止时间段,成分股是否还在某指数当中 179 | 180 | **参数:** 181 | 182 | |字段|必选|类型|说明| 183 | |:---- |:---|:----- |----- | 184 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi| 185 | |index_code |是| str |指数代码| 186 | |start |是|int |开始日期| 187 | |end |是|int |结束日期| 188 | 189 | **示例:** 190 | 191 | 192 | ```python 193 | from jaqs_fxdayu.util.dp import daily_index_cons 194 | daily_index_cons(api, "000300.SH", 20170101, 20170501).head() 195 | ``` 196 | 197 | 198 | 199 | 200 |
201 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 |
000001.SZ000002.SZ000008.SZ000009.SZ000027.SZ000039.SZ000060.SZ000061.SZ000063.SZ000069.SZ...601933.SH601939.SH601958.SH601985.SH601988.SH601989.SH601998.SH603000.SH603885.SH603993.SH
trade_date
20170103TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
20170104TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
20170105TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
20170106TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
20170109TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
388 |

5 rows × 301 columns

389 |
390 | 391 | 392 | 393 | ## st_status 394 | - ` jaqs_fxdayu.util.dp.st_status(api, symbol, start, end) ` 395 | 396 | **简要描述:** 397 | 398 | - 指定起止时间段,股票是否进入异常状态(含st,*st,退市等) 399 | - 注:从未进入过异常状态的股票不会在返回值里 400 | - 注:该方法目前只有在fxdayu数据源下才可访问到(非公开接口) 401 | 402 | **参数:** 403 | 404 | |字段|必选|类型|说明| 405 | |:---- |:---|:----- |----- | 406 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi| 407 | |symbol |是| str |股票代码,以","隔开| 408 | |start |是|int |开始日期| 409 | |end |是|int |结束日期| 410 | 411 | **示例:** 412 | 413 | 414 | ```python 415 | from jaqs_fxdayu.util.dp import st_status 416 | st_status(api, "000001.SZ,000003.SZ,000008.SZ", 20170101, 20170501).head() 417 | ``` 418 | 419 | 420 | 421 | 422 |
423 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 |
000003.SZ000008.SZ
trade_date
201701031.00.0
201701041.00.0
201701051.00.0
201701061.00.0
201701091.00.0
477 |
478 | 479 | 480 | 481 | ## daily_sec_industry 482 | - ` jaqs_fxdayu.util.dp.daily_sec_industry(api, symbol, start, end, source="sw", value="industry1_code") ` 483 | 484 | **简要描述:** 485 | 486 | - 指定起始时间段,查询某一系列股票在该时间段下的行业分类信息 487 | 488 | **参数:** 489 | 490 | |字段|必选|类型|说明| 491 | |:---- |:---|:----- |----- | 492 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi| 493 | |symbol |是| str |股票代码,用","隔开。如"600000.SH,000001.SZ"| 494 | |start |是|int |开始日期| 495 | |end |是|int |结束日期| 496 | |source |否|str |行业分类标准,目前仅支持"sw"(申万),"zz"(中证),"zjh"(证监会),默认"sw"| 497 | |value |否|str |行业等级,形式可为"industry?_code"(行业编码)/"industry?_name"(行业名称)。其中"?"可为1,2,3,4,分别代表1-4个行业等级。申万支持1-4,中证支持1-2。默认为industry1_code| 498 | 499 | **示例:** 500 | 501 | 502 | ```python 503 | from jaqs_fxdayu.util.dp import daily_sec_industry 504 | symbol_id = index_cons(api, "000300.SH", 20170501, 20171001,)["symbol"].dropna() 505 | symbols = ",".join(symbol_id) 506 | group = daily_sec_industry(api, symbols, 20170501, 20171001, source='zjh', value="industry1_name") 507 | group.tail() 508 | ``` 509 | 510 | 511 | 512 | 513 |
514 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 |
000001.SZ000002.SZ000008.SZ000009.SZ000027.SZ000039.SZ000060.SZ000061.SZ000063.SZ000069.SZ...601988.SH601989.SH601992.SH601997.SH601998.SH603000.SH603160.SH603858.SH603885.SH603993.SH
trade_date
20170925金融业房地产业制造业综合电力、热力、燃气及水生产和供应业制造业制造业租赁和商务服务业制造业水利、环境和公共设施管理业...金融业制造业制造业金融业金融业信息传输、软件和信息技术服务业制造业制造业交通运输、仓储和邮政业采矿业
20170926金融业房地产业制造业综合电力、热力、燃气及水生产和供应业制造业制造业租赁和商务服务业制造业水利、环境和公共设施管理业...金融业制造业制造业金融业金融业信息传输、软件和信息技术服务业制造业制造业交通运输、仓储和邮政业采矿业
20170927金融业房地产业制造业综合电力、热力、燃气及水生产和供应业制造业制造业租赁和商务服务业制造业水利、环境和公共设施管理业...金融业制造业制造业金融业金融业信息传输、软件和信息技术服务业制造业制造业交通运输、仓储和邮政业采矿业
20170928金融业房地产业制造业综合电力、热力、燃气及水生产和供应业制造业制造业租赁和商务服务业制造业水利、环境和公共设施管理业...金融业制造业制造业金融业金融业信息传输、软件和信息技术服务业制造业制造业交通运输、仓储和邮政业采矿业
20170929金融业房地产业制造业综合电力、热力、燃气及水生产和供应业制造业制造业租赁和商务服务业制造业水利、环境和公共设施管理业...金融业制造业制造业金融业金融业信息传输、软件和信息技术服务业制造业制造业交通运输、仓储和邮政业采矿业
701 |

5 rows × 330 columns

702 |
703 | 704 | 705 | -------------------------------------------------------------------------------- /jaqs_fxdayu/research/signaldigger/analysis.py: -------------------------------------------------------------------------------- 1 | # encoding = utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import scipy.stats as scst 6 | from jaqs.trade import common 7 | 8 | from . import performance as pfm 9 | 10 | 11 | def compute_downside_returns(price, 12 | low, 13 | can_exit=None, 14 | period=5, 15 | compound=True): 16 | """ 17 | Finds the N period downside_returns for each asset provided. 18 | 19 | Parameters 20 | ---------- 21 | price : pd.DataFrame 22 | Pricing data to use in forward price calculation. 23 | Assets as columns, dates as index. Pricing data must 24 | span the factor analysis time period plus an additional buffer window 25 | that is greater than the maximum number of expected periods 26 | in the forward returns calculations. 27 | low : pd.DataFrame 28 | Low pricing data to use in forward price calculation. 29 | Assets as columns, dates as index. Pricing data must 30 | span the factor analysis time period plus an additional buffer window 31 | that is greater than the maximum number of expected periods 32 | in the forward returns calculations. 33 | can_exit:bool 34 | shape like price&low 35 | period : int 36 | periods to compute returns on. 37 | compound : bool 38 | 39 | 40 | Returns 41 | ------- 42 | downside_returns : pd.DataFrame 43 | downside_returns in indexed by date 44 | """ 45 | if compound: 46 | downside_ret = (low.rolling(period).min() - price.shift(period)) / price.shift(period) 47 | else: 48 | downside_ret = (low.rolling(period).min() - price.shift(period)) / price.iloc[0] 49 | if can_exit is not None: 50 | low_can_exit = low.copy() 51 | low_can_exit[~can_exit] = np.NaN 52 | low_can_exit = low_can_exit.fillna(method="bfill") 53 | if compound: 54 | downside_ret_can_exit = (low_can_exit.rolling(period).min() - price.shift(period)) / price.shift(period) 55 | else: 56 | downside_ret_can_exit = (low_can_exit.rolling(period).min() - price.shift(period)) / price.iloc[0] 57 | downside_ret[~can_exit] = (downside_ret[downside_ret <= downside_ret_can_exit].fillna(0) + \ 58 | downside_ret_can_exit[downside_ret_can_exit < downside_ret].fillna(0))[~can_exit] 59 | 60 | return downside_ret 61 | 62 | 63 | def compute_upside_returns(price, 64 | high, 65 | can_exit=None, 66 | period=5, 67 | compound=True): 68 | """ 69 | Finds the N period upside_returns for each asset provided. 70 | 71 | Parameters 72 | ---------- 73 | price : pd.DataFrame 74 | Pricing data to use in forward price calculation. 75 | Assets as columns, dates as index. Pricing data must 76 | span the factor analysis time period plus an additional buffer window 77 | that is greater than the maximum number of expected periods 78 | in the forward returns calculations. 79 | high : pd.DataFrame 80 | High pricing data to use in forward price calculation. 81 | Assets as columns, dates as index. Pricing data must 82 | span the factor analysis time period plus an additional buffer window 83 | that is greater than the maximum number of expected periods 84 | in the forward returns calculations. 85 | can_exit:bool 86 | shape like price&low 87 | period : int 88 | periods to compute returns on. 89 | compound : bool 90 | 91 | 92 | Returns 93 | ------- 94 | upside_returns : pd.DataFrame 95 | upside_returns in indexed by date 96 | """ 97 | if compound: 98 | upside_ret = (high.rolling(period).max() - price.shift(period)) / price.shift(period) 99 | else: 100 | upside_ret = (high.rolling(period).max() - price.shift(period)) / price.iloc[0] 101 | if can_exit is not None: 102 | high_can_exit = high.copy() 103 | high_can_exit[~can_exit] = np.NaN 104 | high_can_exit = high_can_exit.fillna(method="bfill") 105 | if compound: 106 | upside_ret_can_exit = (high_can_exit.rolling(period).max() - price.shift(period)) / price.shift(period) 107 | else: 108 | upside_ret_can_exit = (high_can_exit.rolling(period).max() - price.shift(period)) / price.iloc[0] 109 | upside_ret[~can_exit] = (upside_ret[upside_ret >= upside_ret_can_exit].fillna(0) + \ 110 | upside_ret_can_exit[upside_ret_can_exit > upside_ret].fillna(0))[~can_exit] 111 | 112 | return upside_ret 113 | 114 | 115 | def cal_rets_stats(rets, period): 116 | ret_summary_table = pd.DataFrame() 117 | ratio = (1.0 * common.CALENDAR_CONST.TRADE_DAYS_PER_YEAR / period) 118 | mean = rets.mean() 119 | std = rets.std() 120 | annual_ret, annual_vol = mean * ratio, std * np.sqrt(ratio) 121 | t_stats, p_values = scst.ttest_1samp(rets, np.zeros(rets.shape[1]), axis=0) 122 | ret_summary_table['t-stat'] = t_stats 123 | ret_summary_table['p-value'] = np.round(p_values, 5) 124 | ret_summary_table["skewness"] = scst.skew(rets, axis=0) 125 | ret_summary_table["kurtosis"] = scst.kurtosis(rets, axis=0) 126 | ret_summary_table['Ann. Ret'] = annual_ret 127 | ret_summary_table['Ann. Vol'] = annual_vol 128 | ret_summary_table['Ann. IR'] = annual_ret / annual_vol 129 | ret_summary_table['occurance'] = len(rets) 130 | return ret_summary_table.T 131 | 132 | 133 | def ic_stats(signal_data): 134 | ICs = get_ics(signal_data) 135 | stats = [] 136 | for item in ICs.keys(): 137 | ic = ICs[item] 138 | ic.index = pd.to_datetime(ic.index, format="%Y%m%d") 139 | ic_summary_table = pfm.calc_ic_stats_table(ic).T 140 | ic_summary_table.columns = [item] 141 | stats.append(ic_summary_table) 142 | if len(stats) > 0: 143 | stats = pd.concat(stats, axis=1) 144 | return stats 145 | 146 | 147 | def get_ics(signal_data): 148 | ICs = dict() 149 | if not ("upside_ret" in signal_data.columns) or \ 150 | not ("downside_ret" in signal_data.columns): 151 | items = ["return"] 152 | else: 153 | items = ["return", "upside_ret", "downside_ret"] 154 | for item in items: 155 | data = signal_data[["signal", item]] 156 | data.columns = ["signal", "return"] 157 | ICs[item + "_ic"] = pfm.calc_signal_ic(data).dropna() 158 | 159 | return ICs 160 | 161 | 162 | def return_stats(signal_data, is_event, period): 163 | rets = get_rets(signal_data, is_event) 164 | stats = [] 165 | for ret_type in rets.keys(): 166 | if len(rets[ret_type]) > 0: 167 | ret_stats = cal_rets_stats(rets[ret_type].values.reshape((-1, 1)), period) 168 | ret_stats.columns = [ret_type] 169 | stats.append(ret_stats) 170 | if len(stats) > 0: 171 | stats = pd.concat(stats, axis=1) 172 | return stats 173 | 174 | 175 | def get_rets(signal_data, is_event): 176 | rets = dict() 177 | signal_data = signal_data.copy() 178 | n_quantiles = signal_data['quantile'].max() 179 | 180 | if is_event: 181 | rets["long_ret"] = signal_data[signal_data['signal'] == 1]["return"].dropna() 182 | rets['short_ret'] = signal_data[signal_data['signal'] == -1]["return"].dropna() * -1 183 | else: 184 | rets['long_ret'] = \ 185 | pfm.calc_period_wise_weighted_signal_return(signal_data, weight_method='long_only').dropna() 186 | rets['short_ret'] = \ 187 | pfm.calc_period_wise_weighted_signal_return(signal_data, weight_method='short_only').dropna() 188 | rets['long_short_ret'] = \ 189 | pfm.calc_period_wise_weighted_signal_return(signal_data, weight_method='long_short').dropna() 190 | # quantile return 191 | if not is_event: 192 | rets['top_quantile_ret'] = signal_data[signal_data['quantile'] == n_quantiles]["return"].dropna() 193 | rets['bottom_quantile_ret'] = signal_data[signal_data['quantile'] == 1]["return"].dropna() 194 | period_wise_quantile_ret_stats = pfm.calc_quantile_return_mean_std(signal_data, time_series=True) 195 | rets['tmb_ret'] = pfm.calc_return_diff_mean_std(period_wise_quantile_ret_stats[n_quantiles], 196 | period_wise_quantile_ret_stats[1])['mean_diff'].dropna() 197 | rets['all_sample_ret'] = signal_data["return"].dropna() 198 | return rets 199 | 200 | 201 | def weighted_signal_ret_space(signal_data): 202 | """ 203 | Computes period wise period_wise_returns for portfolio weighted by signal 204 | values. Weights are computed by demeaning signals and dividing 205 | by the sum of their absolute value (achieving gross leverage of 1). 206 | 207 | Parameters 208 | ---------- 209 | signal_data : pd.DataFrame - MultiIndex 210 | Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', "upside_ret","downside_ret", 'quantile'] 211 | 212 | Returns 213 | ------- 214 | space : pd.DataFrame of dict 215 | weighted_signal_ret_space 216 | """ 217 | 218 | def calc_norm_weights(ser, method): 219 | if method == 'long_only': 220 | ser = (ser + ser.abs()) / 2.0 221 | elif method == 'short_only': 222 | ser = (ser - ser.abs()) / 2.0 223 | else: 224 | raise ValueError("method can only be long_only or short_only," 225 | "but [{}] is provided".format(method)) 226 | return ser / ser.abs().sum() 227 | 228 | grouper = ['trade_date'] 229 | 230 | long_weights = signal_data.groupby(grouper)['signal'].apply(calc_norm_weights, "long_only") 231 | short_weights = signal_data.groupby(grouper)['signal'].apply(calc_norm_weights, "short_only") 232 | 233 | space = dict() 234 | space["long_space"] = dict() 235 | space["long_space"]["upside_space"] = signal_data['upside_ret'].multiply(long_weights, axis=0) 236 | space["long_space"]["downside_space"] = signal_data['downside_ret'].multiply(long_weights, axis=0) 237 | space["short_space"] = dict() 238 | space["short_space"]["upside_space"] = signal_data['downside_ret'].multiply(short_weights, axis=0) 239 | space["short_space"]["downside_space"] = signal_data['upside_ret'].multiply(short_weights, axis=0) 240 | space["long_short_space"] = dict() 241 | space["long_short_space"]["upside_space"] = space["long_space"]["upside_space"] + space["short_space"][ 242 | "upside_space"] 243 | space["long_short_space"]["downside_space"] = space["long_space"]["downside_space"] + space["short_space"][ 244 | "downside_space"] 245 | 246 | for dir_type in ["long_space", "short_space", "long_short_space"]: 247 | for space_type in ["upside_space", "downside_space"]: 248 | space[dir_type][space_type] = space[dir_type][space_type].groupby(level='trade_date').sum() 249 | space[dir_type][space_type] = pd.DataFrame(space[dir_type][space_type]).dropna() 250 | 251 | return space 252 | 253 | 254 | def calc_tb_quantile_ret_space_mean_std(signal_data, 255 | space_type="upside"): 256 | """ 257 | Computes mean space for signal top & bottom quantiles across 258 | provided upside_ret or downside_ret. 259 | 260 | Parameters 261 | ---------- 262 | signal_data : pd.DataFrame - MultiIndex 263 | Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret', "downside_ret", 'quantile'] 264 | 265 | Returns 266 | ------- 267 | quantile_space : pd.DataFrame of dict 268 | 269 | """ 270 | signal_data = signal_data.copy() 271 | n_quantiles = signal_data['quantile'].max() 272 | grouper = ['quantile'] 273 | grouper.append('trade_date') 274 | 275 | group_mean_std = signal_data.groupby(grouper)[space_type + "_ret"].agg(['mean', 'std', 'count']) 276 | indexes = [] 277 | quantile_daily_mean_std_dic = dict() 278 | for q in [1, n_quantiles]: # loop for different quantiles 279 | df_q = group_mean_std.loc[pd.IndexSlice[q, :], :] # bug 280 | df_q.index = df_q.index.droplevel(level='quantile') 281 | indexes.append(pd.Series(df_q.index)) 282 | quantile_daily_mean_std_dic[q] = df_q 283 | new_index = sorted(pd.concat(indexes).unique()) 284 | for q in [1, n_quantiles]: 285 | quantile_daily_mean_std_dic[q] = quantile_daily_mean_std_dic[q].reindex(new_index).fillna(0) 286 | return quantile_daily_mean_std_dic 287 | 288 | 289 | def cal_spaces_stats(space): 290 | space_summary_table = pd.DataFrame() 291 | if len(space["upside_space"]) > 0: 292 | space["Up_sp"] = space["upside_space"].values.reshape((-1, 1)) 293 | space["Down_sp"] = space["downside_space"].values.reshape((-1, 1)) 294 | for space_type in ["Up_sp", "Down_sp"]: 295 | mean = space[space_type].mean() 296 | std = space[space_type].std() 297 | space_summary_table[space_type + " Mean"] = [mean] 298 | space_summary_table[space_type + " Std"] = [std] 299 | space_summary_table[space_type + " IR"] = [mean / std] 300 | for percent in [5, 25, 50, 75, 95]: 301 | space_summary_table[space_type + " Pct" + str(percent)] = [np.percentile(space[space_type], 302 | percent)] 303 | space_summary_table[space_type + ' Occur'] = [len(space[space_type])] 304 | return space_summary_table.T 305 | 306 | 307 | def space_stats(signal_data, is_event): 308 | spaces = get_spaces(signal_data, is_event) 309 | stats_result = [] 310 | for dir_type in spaces.keys(): 311 | stats = cal_spaces_stats(spaces[dir_type]) 312 | if len(stats) > 0: 313 | stats.columns = [dir_type] 314 | stats_result.append(stats) 315 | if len(stats_result) > 0: 316 | stats_result = pd.concat(stats_result, axis=1) 317 | return stats_result 318 | 319 | 320 | def get_spaces(signal_data, is_event): 321 | spaces = dict() 322 | if not ("upside_ret" in signal_data.columns) or \ 323 | not ("downside_ret" in signal_data.columns): 324 | return spaces 325 | signal_data = signal_data.copy() 326 | n_quantiles = signal_data['quantile'].max() 327 | 328 | spaces = weighted_signal_ret_space(signal_data) 329 | if is_event: 330 | spaces["long_space"]["upside_space"] = signal_data[signal_data['signal'] == 1]["upside_ret"].dropna() 331 | spaces["long_space"]["downside_space"] = signal_data[signal_data['signal'] == 1]["downside_ret"].dropna() 332 | spaces["short_space"]["upside_space"] = signal_data[signal_data['signal'] == -1]["downside_ret"].dropna() * -1 333 | spaces["short_space"]["downside_space"] = signal_data[signal_data['signal'] == -1]["upside_ret"].dropna() * -1 334 | 335 | # quantile return space 336 | if not is_event: 337 | spaces["top_quantile_space"] = dict() 338 | spaces["bottom_quantile_space"] = dict() 339 | spaces["tmb_space"] = dict() 340 | 341 | spaces["top_quantile_space"]["upside_space"] = signal_data[signal_data['quantile'] == n_quantiles][ 342 | "upside_ret"].dropna() 343 | spaces["top_quantile_space"]["downside_space"] = signal_data[signal_data['quantile'] == n_quantiles][ 344 | "downside_ret"].dropna() 345 | spaces["bottom_quantile_space"]["upside_space"] = signal_data[signal_data['quantile'] == 1][ 346 | "upside_ret"].dropna() 347 | spaces["bottom_quantile_space"]["downside_space"] = signal_data[signal_data['quantile'] == 1][ 348 | "downside_ret"].dropna() 349 | 350 | tb_upside_mean_space = calc_tb_quantile_ret_space_mean_std(signal_data, 351 | space_type="upside") 352 | tb_downside_mean_space = calc_tb_quantile_ret_space_mean_std(signal_data, 353 | space_type="downside") 354 | spaces['tmb_space']["upside_space"] = pfm.calc_return_diff_mean_std(tb_upside_mean_space[n_quantiles], 355 | tb_downside_mean_space[1])[ 356 | 'mean_diff'].dropna() 357 | spaces['tmb_space']["downside_space"] = pfm.calc_return_diff_mean_std(tb_downside_mean_space[n_quantiles], 358 | tb_upside_mean_space[1])[ 359 | 'mean_diff'].dropna() 360 | 361 | spaces["all_sample_space"] = dict() 362 | spaces["all_sample_space"]["upside_space"] = signal_data["upside_ret"].dropna() 363 | spaces["all_sample_space"]["downside_space"] = signal_data["downside_ret"].dropna() 364 | return spaces 365 | 366 | 367 | def analysis(signal_data, is_event, period): 368 | if is_event: 369 | return { 370 | "ret": return_stats(signal_data, True, period), 371 | "space": space_stats(signal_data, True) 372 | } 373 | else: 374 | return { 375 | "ic": ic_stats(signal_data), 376 | "ret": return_stats(signal_data, False, period), 377 | "space": space_stats(signal_data, False) 378 | } 379 | -------------------------------------------------------------------------------- /docs/analysis.md: -------------------------------------------------------------------------------- 1 | 2 | # analysis 3 | 4 | ## 介绍 5 | 单因子多维度分析.从因子ic,因子收益,选股潜在收益空间三个维度给出因子评价.新增模块 6 | 7 | ## ic_stats 8 | - ` jaqs_fxdayu.research.signaldigger.analysis.ic_stats(signal_data) ` 9 | 10 | **简要描述:** 11 | 12 | - 因子ic分析表 13 | - 对事件因子(数值为0/1/-1的因子)无法使用该方法 14 | 15 | **参数:** 16 | 17 | |字段|必选|类型|说明| 18 | |:---- |:---|:----- |----- | 19 | |signal_data |是|pandas.DataFrame |trade_date+symbol为MultiIndex,columns为signal(因子)、return(持有期相对/绝对收益,必须)、upside_ret(持有期潜在最大上涨收益,非必须)、downside_ret(持有期潜在最大下跌收益,非必须)、group(分组/行业分类,非必须)、quantile(按因子值分组,非必须)| 20 | 21 | **返回:** 22 | 因子ic分析表 23 | * 列: 24 | * return_ic/upside_ret_ic/downside_ret_ic 25 | * 持有期收益的ic/持有期最大向上空间的ic/持有期最大向下空间的ic 26 | 27 | * 行: 28 | * "IC Mean", "IC Std.", "t-stat(IC)", "p-value(IC)", "IC Skew", "IC Kurtosis", "Ann. IR" 29 | * IC均值,IC标准差,IC的t统计量,对IC做0均值假设检验的p-value,IC偏度,IC峰度,iC的年化信息比率-mean/std 30 | 31 | 32 | **示例:** 33 | 34 | 35 | ```python 36 | import warnings 37 | warnings.filterwarnings('ignore') 38 | ``` 39 | 40 | 41 | ```python 42 | from jaqs_fxdayu.data import DataView 43 | from jaqs_fxdayu.research import SignalDigger 44 | 45 | # 加载dataview数据集 46 | dv = DataView() 47 | dataview_folder = './data' 48 | dv.load_dataview(dataview_folder) 49 | 50 | # 计算signal_data(通过jaqs.research.signaldigger.digger.SignalDigger.process_signal_before_analysis(*args, **kwargs)) 51 | sd = SignalDigger() 52 | sd.process_signal_before_analysis(signal=dv.get_ts("pe"), 53 | price=dv.get_ts("close_adj"), 54 | high=dv.get_ts("high_adj"), 55 | low=dv.get_ts("low_adj"), 56 | group=dv.get_ts("sw1"), 57 | n_quantiles=5, 58 | period=5, 59 | benchmark_price=dv.data_benchmark, 60 | ) 61 | signal_data = sd.signal_data 62 | signal_data.head() 63 | ``` 64 | 65 | Dataview loaded successfully. 66 | Nan Data Count (should be zero) : 0; Percentage of effective data: 99% 67 | 68 | 69 | 70 | 71 | 72 |
73 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 |
signalreturnupside_retdownside_retgroupquantile
trade_datesymbol
20170503000001.SZ6.7925-0.005637-0.003045-0.0423264800001
000002.SZ10.08210.0112250.016697-0.0294324300001
000008.SZ42.9544-0.0494080.000463-0.0929726400004
000009.SZ79.4778-0.0698220.009714-0.0954265100005
000027.SZ20.4542-0.0195170.009404-0.0416164100002
158 |
159 | 160 | 161 | 162 | 163 | ```python 164 | from jaqs_fxdayu.research.signaldigger.analysis import ic_stats 165 | 166 | ic_stats(signal_data) 167 | ``` 168 | 169 | 170 | 171 | 172 |
173 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 |
return_icupside_ret_icdownside_ret_ic
IC Mean-0.0228050.031198-2.035376e-01
IC Std.0.2073250.1593131.692702e-01
t-stat(IC)-1.1054671.968055-1.208439e+01
p-value(IC)0.2716100.0518312.894849e-21
IC Skew0.009493-0.0657154.407910e-01
IC Kurtosis-0.978744-0.639758-5.878823e-01
Ann. IR-0.1099980.195829-1.202442e+00
240 |
241 | 242 | 243 | 244 | ### return_stats 245 | - ` jaqs_fxdayu.research.signaldigger.analysis.return_stats(signal_data,is_event,period) ` 246 | 247 | **简要描述:** 248 | 249 | - 因子收益分析表--根据因子构建几种投资组合,通过组合表现分析因子的收益能力 250 | 251 | **参数:** 252 | 253 | |字段|必选|类型|说明| 254 | |:---- |:---|:----- |----- | 255 | |signal_data |是|pandas.DataFrame |trade_date+symbol为MultiIndex,columns为signal(因子)、return(持有期相对/绝对收益,必须)、upside_ret(持有期潜在最大上涨收益,非必须)、downside_ret(持有期潜在最大下跌收益,非必须)、group(分组/行业分类,非必须)、quantile(按因子值分组,非必须)| 256 | |is_event |是|bool |是否是事件因子(数值为0/1/-1的因子)| 257 | |period |是|int |换仓周期(天数),**注意:**必须与signal_data中收益的计算周期一致| 258 | 259 | **返回:** 260 | 261 | 收益分析表 262 | * 列: 263 | * long_ret/short_ret/long_short_ret/top_quantile_ret/bottom_quantile_ret/tmb_ret/all_sample_ret 264 | * 多头组合收益/空头组合收益/多空组合收益/因子值最大组合收益/因子值最小组合收益/因子值最大组(构建多头)+因子值最小组(构建空头)收益/全样本(无论信号大小和方向)-基准组合收益 265 | 266 | * 行: 267 | * 't-stat', "p-value", "skewness", "kurtosis", "Ann. Ret", "Ann. Vol", "Ann. IR", "occurance" 268 | * 持有期收益的t统计量,对持有期收益做0均值假设检验的p-value,偏度,峰度,持有期收益年化值,年化波动率,年化信息比率-年化收益/年化波动率,样本数量 269 | 270 | 271 | **示例:** 272 | 273 | 274 | ```python 275 | from jaqs_fxdayu.research.signaldigger.analysis import return_stats 276 | 277 | return_stats(signal_data,is_event=False,period=5) 278 | ``` 279 | 280 | 281 | 282 | 283 |
284 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 |
long_retlong_short_rettop_quantile_retbottom_quantile_rettmb_retall_sample_ret
t-stat-1.2038460.411628-4.728619-2.714885-0.755901-12.043624
p-value0.2313600.6814500.0000000.0066500.4514000.000000
skewness-0.0830570.3736800.4950421.348467-0.2619980.546392
kurtosis-0.5550380.0425356.1876679.207208-0.2720226.241350
Ann. Ret-0.1017350.021452-0.129940-0.051046-0.078894-0.120509
Ann. Vol0.1244710.0767590.3303550.2260400.1537270.268994
Ann. IR-0.8173330.279469-0.393336-0.225829-0.513207-0.447998
occurance106.000000106.0000006996.0000006996.000000106.00000034980.000000
384 |
385 | 386 | 387 | 388 | ## space_stats 389 | - ` jaqs_fxdayu.research.signaldigger.analysis.space_stats(signal_data,is_event) ` 390 | 391 | **简要描述:** 392 | 393 | - 因子潜在收益空间分析表--根据因子构建几种投资组合,通过组合在换仓周期内可能达到潜在最大上涨空间、潜在最大下跌空间来分析该因子选股收益的提升潜力,用于进一步辅助设计择时方案 394 | 395 | **参数:** 396 | 397 | |字段|必选|类型|说明| 398 | |:---- |:---|:----- |----- | 399 | |signal_data |是|pandas.DataFrame |trade_date+symbol为MultiIndex,columns为signal(因子)、return(持有期相对/绝对收益,必须)、upside_ret(持有期潜在最大上涨收益,非必须)、downside_ret(持有期潜在最大下跌收益,非必须)、group(分组/行业分类,非必须)、quantile(按因子值分组,非必须)| 400 | |is_event |是|bool |是否是事件因子(数值为0/1/-1的因子)| 401 | 402 | **返回:** 403 | 404 | 因子潜在收益空间分析表 405 | * 列: 406 | * long_space/short_space/long_short_space/top_quantile_space/bottom_quantile_space/tmb_space/all_sample_space 407 | * 多头组合空间/空头组合空间/多空组合空间/因子值最大组合空间/因子值最小组合空间/因子值最大组(构建多头)+因子值最小组(构建空头)空间/全样本(无论信号大小和方向)-基准组合空间 408 | 409 | * 行: 410 | * 'Up_sp Mean','Up_sp Std','Up_sp IR','Up_sp Pct5', 'Up_sp Pct25 ','Up_sp Pct50 ', 'Up_sp Pct75','Up_sp Pct95','Up_sp Occur','Down_sp Mean','Down_sp Std', 'Down_sp IR', 'Down_sp Pct5','Down_sp Pct25 ','Down_sp Pct50 ','Down_sp Pct75', 'Down_sp Pct95','Down_sp Occur' 411 | * 组合持有个股的上行空间均值,上行空间标准差,上行空间信息比率-均值/标准差,上行空间5%分位数,..25%分位数,..中位数,..75%分位数,..95%分位数,上行空间样本数,下行空间...(同上行空间) 412 | 413 | 414 | **示例:** 415 | 416 | 417 | ```python 418 | from jaqs_fxdayu.research.signaldigger.analysis import space_stats 419 | 420 | space_stats(signal_data,is_event=False) 421 | ``` 422 | 423 | 424 | 425 | 426 |
427 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 |
long_spacetop_quantile_spacebottom_quantile_spacetmb_spaceall_sample_space
Up_sp Mean-0.091582-0.089756-0.016239-0.013714-0.026786
Up_sp Std0.0333210.3432450.2129970.0176990.240319
Up_sp IR-2.748454-0.261492-0.076242-0.774819-0.111460
Up_sp Pct5-0.127152-1.000800-0.005893-0.040333-1.000800
Up_sp Pct25-0.1172860.0024570.004533-0.0285910.005062
Up_sp Pct50-0.1014190.0207560.017939-0.0137460.019105
Up_sp Pct75-0.0764780.0479800.039831-0.0000510.041935
Up_sp Pct95-0.0315150.1115570.0904020.0134960.098799
Up_sp Occur106.0000006996.0000006996.000000106.00000034980.000000
Down_sp Mean-0.167327-0.171114-0.076042-0.154875-0.092512
Down_sp Std0.0463460.3400020.2246990.0455010.245442
Down_sp IR-3.610429-0.503275-0.338419-3.403795-0.376919
Down_sp Pct5-0.220840-1.000800-1.000800-0.208216-1.000800
Down_sp Pct25-0.190647-0.067406-0.034329-0.183180-0.042842
Down_sp Pct50-0.176590-0.029282-0.017467-0.162556-0.021792
Down_sp Pct75-0.152016-0.012810-0.007824-0.139399-0.009769
Down_sp Pct95-0.1119720.0000000.000000-0.0867660.000000
Down_sp Occur106.0000006996.0000006996.000000106.00000034980.000000
598 |
599 | 600 | 601 | 602 | ## analysis 603 | - ` jaqs_fxdayu.research.signaldigger.analysis.analysis(signal_data,is_event,period) ` 604 | 605 | **简要描述:** 606 | 607 | - 同时获得因子ic分析表、收益分析表、潜在收益空间分析表——单独计算三张表的方法见上述api 608 | 609 | **参数:** 610 | 611 | |字段|必选|类型|说明| 612 | |:---- |:---|:----- |----- | 613 | |signal_data |是|pandas.DataFrame |trade_date+symbol为MultiIndex,columns为signal(因子)、return(持有期相对/绝对收益,必须)、upside_ret(持有期潜在最大上涨收益,非必须)、downside_ret(持有期潜在最大下跌收益,非必须)、group(分组/行业分类,非必须)、quantile(按因子值分组,非必须)| 614 | |is_event |是|bool |是否是事件因子(数值为0/1/-1的因子)| 615 | |period |是|int |换仓周期(天数),**注意:**必须与signal_data中收益的计算周期一致| 616 | 617 | **返回:** 618 | 619 | 由因子ic分析表、收益分析表、潜在收益空间分析表组成的字典(dict) 620 | 621 | **示例:** 622 | 623 | 624 | ```python 625 | from jaqs_fxdayu.research.signaldigger.analysis import analysis 626 | 627 | result = analysis(signal_data,is_event=False,period=5) 628 | print(result.keys()) 629 | result["ic"] 630 | ``` 631 | 632 | dict_keys(['ic', 'ret', 'space']) 633 | 634 | 635 | 636 | 637 | 638 |
639 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 |
return_icupside_ret_icdownside_ret_ic
IC Mean-0.0228050.031198-2.035376e-01
IC Std.0.2073250.1593131.692702e-01
t-stat(IC)-1.1054671.968055-1.208439e+01
p-value(IC)0.2716100.0518312.894849e-21
IC Skew0.009493-0.0657154.407910e-01
IC Kurtosis-0.978744-0.639758-5.878823e-01
Ann. IR-0.1099980.195829-1.202442e+00
706 |
707 | 708 | 709 | --------------------------------------------------------------------------------