├── tests
    ├── __init__.py
    ├── test_ploting.py
    ├── data_config.py
    ├── test_patch_all.py
    ├── test_dataview.py
    ├── test_py_expression_eval.py
    └── test_research.py
├── jaqs_fxdayu
    ├── VERSION.txt
    ├── data
    │   ├── align.py
    │   ├── dataapi
    │   │   ├── utils.py
    │   │   ├── jrpc_py.py
    │   │   ├── data_api.py
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── LICENSE
    │   ├── __init__.py
    │   ├── search_doc.py
    │   ├── signal_function_mod.py
    │   ├── built_in_funcs_docs.csv
    │   └── py_expression_eval.py
    ├── util
    │   ├── dtutil.py
    │   ├── fileio.py
    │   ├── pdutil.py
    │   ├── numeric.py
    │   ├── profile.py
    │   ├── sequence.py
    │   ├── __init__.py
    │   ├── concat.py
    │   └── dp.py
    ├── research
    │   ├── timingdigger
    │   │   ├── __init__.py
    │   │   ├── performance.py
    │   │   └── plotting.py
    │   ├── signaldigger
    │   │   ├── __init__.py
    │   │   ├── plotting.py
    │   │   ├── performance.py
    │   │   ├── process.py
    │   │   ├── signal_creator.py
    │   │   ├── optimizer.py
    │   │   └── analysis.py
    │   └── __init__.py
    ├── __init__.py
    └── patch_util
    │   ├── postimport.py
    │   └── __init__.py
├── requirements_doc.txt
├── requirements.txt
├── docs
    ├── digger
    │   ├── output_14_2.png
    │   └── output_18_2.png
    ├── hf_dataview
    │   └── output_17_0.png
    ├── timingdigger
    │   ├── output_21_2.png
    │   └── output_25_2.png
    ├── index.md
    ├── optimizer.md
    ├── dp.md
    └── analysis.md
├── publish
    └── publish_pypi.sh
├── mkdocs.yml
├── .gitignore
├── setup.py
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/VERSION.txt:
--------------------------------------------------------------------------------
1 | 0.2.2


--------------------------------------------------------------------------------
/requirements_doc.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/align.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.data.align import *
3 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/dtutil.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.util.dtutil import *
3 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/fileio.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.util.fileio import *
3 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/pdutil.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.util.pdutil import *
3 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/numeric.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.util.numeric import *
3 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/profile.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.util.profile import *
3 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/sequence.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.util.sequence import *
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jaqs==0.6.11
2 | pandas>=0.20.0
3 | scipy>=1.0.1
4 | sklearn>=0.0
5 | statsmodels>=0.8.0


--------------------------------------------------------------------------------
/docs/digger/output_14_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/digger/output_14_2.png


--------------------------------------------------------------------------------
/docs/digger/output_18_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/digger/output_18_2.png


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/dataapi/utils.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.data.dataapi.utils import *
3 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/timingdigger/__init__.py:
--------------------------------------------------------------------------------
1 | from .digger import TimingDigger
2 | 
3 | __all__ = ['TimingDigger']
4 | 


--------------------------------------------------------------------------------
/docs/hf_dataview/output_17_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/hf_dataview/output_17_0.png


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/dataapi/jrpc_py.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.data.dataapi.jrpc_py import *
3 | 


--------------------------------------------------------------------------------
/docs/timingdigger/output_21_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/timingdigger/output_21_2.png


--------------------------------------------------------------------------------
/docs/timingdigger/output_25_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingetouzi/jaqs-fxdayu/HEAD/docs/timingdigger/output_25_2.png


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/dataapi/data_api.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from jaqs.data.dataapi.data_api import *
3 | 
4 | 


--------------------------------------------------------------------------------
/publish/publish_pypi.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python setup.py sdist
3 | python setup.py bdist_wheel --universal
4 | twine upload dist/*


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/__init__.py:
--------------------------------------------------------------------------------
1 | from .dtutil import *
2 | from .fileio import *
3 | from .numeric import *
4 | from .pdutil import *
5 | from .profile import *
6 | from .sequence import *
7 | from .dp import *
8 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/signaldigger/__init__.py:
--------------------------------------------------------------------------------
1 | from .digger import SignalDigger
2 | from .optimizer import Optimizer
3 | from .signal_creator import SignalCreator
4 | 
5 | __all__ = ['SignalDigger', "Optimizer", "SignalCreator"]
6 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/__init__.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | 
3 | from .signaldigger import Optimizer, SignalDigger, SignalCreator
4 | from .timingdigger import TimingDigger
5 | 
6 | __all__ = ['SignalDigger', "TimingDigger", "Optimizer", "SignalCreator"]
7 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/__init__.py:
--------------------------------------------------------------------------------
 1 | from os.path import join, dirname
 2 | 
 3 | import matplotlib
 4 | import matplotlib.pyplot  # 防止被JAQS重载
 5 | 
 6 | from .patch_util import patch_all
 7 | 
 8 | with open(join(dirname(__file__), 'VERSION.txt'), 'rb') as f:
 9 |     __version__ = f.read().decode('ascii').strip()
10 | 


--------------------------------------------------------------------------------
/tests/test_ploting.py:
--------------------------------------------------------------------------------
 1 | import matplotlib as mpl
 2 | 
 3 | _old = mpl.get_backend()
 4 | 
 5 | from jaqs_fxdayu.data import DataView
 6 | 
 7 | assert mpl.get_backend() == _old
 8 | 
 9 | import importlib
10 | importlib.reload(mpl)
11 | 
12 | from jaqs_fxdayu import patch_all
13 | 
14 | patch_all()
15 | assert mpl.get_backend() == _old
16 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/dataapi/__init__.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | dataapi defines standard APIs for communicating with data service.
 4 | 
 5 | """
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | from __future__ import unicode_literals
10 | 
11 | from .data_api import DataApi
12 | 
13 | __all__ = ['DataApi']
14 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | """
 4 | Modules relevant to data.
 5 | 
 6 | """
 7 | 
 8 | from .dataapi import DataApi
 9 | from .dataservice import RemoteDataService, DataService, LocalDataService
10 | from .dataview import DataView, EventDataView
11 | from .hf_dataview import HFDataView
12 | from .py_expression_eval import Parser
13 | 
14 | # we do not expose align and basic
15 | __all__ = ['DataApi', 'DataService', 'RemoteDataService', 'LocalDataService', 'DataView', 'HFDataView', 'Parser', 'EventDataView']
16 | 


--------------------------------------------------------------------------------
/tests/data_config.py:
--------------------------------------------------------------------------------
 1 | data_config = {
 2 |     "remote.data.address": "tcp://192.168.0.101:23000",
 3 |     "remote.data.username": "18566262672",
 4 |     "remote.data.password": "eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI3MDI3NTAyMTIiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTg1NjYyNjI2NzIifQ.O_-yR0zYagrLRvPbggnru1Rapk4kiyAzcwYt2a3vlpM",
 5 | }
 6 | # data_config = {
 7 | #     "remote.data.address": "tcp://data.quantos.org:8910",
 8 | #     "remote.data.username": "18566262672",
 9 | #     "remote.data.password": "eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI3MDI3NTAyMTIiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTg1NjYyNjI2NzIifQ.O_-yR0zYagrLRvPbggnru1Rapk4kiyAzcwYt2a3vlpM",
10 | #     "timeout": 180
11 | # }
12 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: jaqs-fxdayu
 2 | site_author: xingetouzi
 3 | repo_url: https://github.com/xingetouzi/jaqs-fxdayu/
 4 | pages:
 5 |     - 主页: index.md
 6 |     - API文档(基础部分):
 7 |         - dataservice: dataservice.md
 8 |         - dataview: dataview.md
 9 |         - digger: digger/digger.md
10 |         - performance: performance.md
11 |     - API文档(拓展部分):
12 |         - analysis: analysis.md
13 |         - process: process.md
14 |         - optimizer: optimizer.md
15 |         - multi_factor: multi_factor.md
16 |         - dp: dp.md
17 |         - timingdigger: timingdigger/timingdigger.md
18 |         - hf_dataview: hf_dataview/hf_dataview.md
19 | 
20 | theme: readthedocs
21 | 
22 | markdown_extensions:
23 |     - toc:
24 |         permalink: 
25 |     - admonition
26 |     - def_list
27 | 
28 | copyright: Copyright &copy; 2014
29 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/signaldigger/plotting.py:
--------------------------------------------------------------------------------
 1 | from jaqs_fxdayu.patch_util import auto_register_patch
 2 | from jaqs.research.signaldigger.plotting import *
 3 | 
 4 | 
 5 | @auto_register_patch()
 6 | def plot_ic_by_group(ic_group, ax=None):
 7 |     """
 8 |     Plots Spearman Rank Information Coefficient for a given
 9 |     factor over provided forward returns.
10 |     Separates by group.
11 | 
12 |     Parameters
13 |     ----------
14 |     ic_group : pd.DataFrame
15 |         group-wise mean period wise returns.
16 |     ax : matplotlib.Axes, optional
17 |         Axes upon which to plot.
18 | 
19 |     Returns
20 |     -------
21 |     ax : matplotlib.Axes
22 |         The axes that were plotted on.
23 |     """
24 |     if ax is None:
25 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
26 |     ic_group.plot(kind='bar', ax=ax)
27 | 
28 |     ax.set(title="Information Coefficient By Group", xlabel="")
29 |     ax.set_xticklabels(ic_group.index, rotation=45)
30 | 
31 |     return ax
32 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/dataapi/README.md:
--------------------------------------------------------------------------------
 1 | # DataApi
 2 | 
 3 | 标准数据API定义。
 4 | 
 5 | # 安装步骤
 6 | 
 7 | ## 1、安装Python环境
 8 | 
 9 | 如果本地还没有安装Python环境，强烈建议安装Anaconda（Python的集成开发环境，包含众多常用包，且易于安装，避免不必要的麻烦）。打开[Anaconda官网](http://www.continuum.io/downloads)，选择相应的操作系统，确定要安装的Python版本，进行下载。
10 | 
11 | 下载完成以后，按照图形界面步骤完成安装。在默认情况下，Anaconda会自动设置PATH环境。
12 | 
13 | ***注***：如果安装过程遇到问题，或需要更详细的步骤，请参见[安装Anaconda Python环境教程](https://github.com/quantOS-org/JAQS/blob/master/doc/install.md#1安装python环境)
14 | 
15 | ## 2、安装依赖包
16 | 
17 | 如果Python环境不是类似Anaconda的集成开发环境，我们需要单独安装依赖包，在已经有pandas/numpy包前提下，还需要有以下几个包：
18 | - `pyzmq`
19 | - `msgpack_python`
20 | - `python-snappy`
21 | 
22 | 可以通过单个安装完成，例如： `pip install pyzmq`
23 | 
24 | 需要注意的是，`python-snappy`的安装需要比较多的编译依赖，请按照[如何安装python-snappy包](https://github.com/quantOS-org/JAQS/blob/master/doc/install.md#如何安装python-snappy包)所述安装。
25 | 
26 | 
27 | ## 3、使用DataApi
28 | 
29 | ```python
30 | from DataApi import DataApi  # 这里假设项目目录名为DataApi, 且存放在工作目录下
31 | 
32 | api = DataApi(addr="tcp://data.tushare.org:8910")
33 | result, msg = api.login("phone", "token") # 示例账户，用户需要改为自己在www.quantos.org上注册的账户
34 | print(result)
35 | print(msg)
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/search_doc.py:
--------------------------------------------------------------------------------
 1 | # encoding=utf-8
 2 | from pathlib import Path
 3 | 
 4 | import pandas as pd
 5 | 
 6 | _path = Path(__file__).absolute().parent / "built_in_funcs_docs.csv"
 7 | 
 8 | 
 9 | def get_doc():
10 |     f = open(str(_path),"rb")
11 |     data = pd.read_csv(f)
12 |     return data
13 | 
14 | 
15 | class FuncDoc(object):
16 |     def __init__(self):
17 |         self.doc = get_doc()
18 |         self.types = self.doc["分类"].drop_duplicates().values
19 |         self.funcs = self.doc["公式"].values
20 |         self.descriptions = self.doc["说明"].values
21 | 
22 |     def search_by_type(self, _type):
23 |         result = self.doc["分类"].apply(lambda x: x.find(_type) > -1)
24 |         return self.doc[result]
25 | 
26 |     def search_by_func(self, func, precise=False):
27 |         if precise:
28 |             result = self.doc["公式"].apply(lambda x: x.find(func) == 0)
29 |         else:
30 |             result = self.doc["公式"].apply(lambda x: x.lower().find(func.lower()) > -1)
31 |         return self.doc[result]
32 | 
33 |     def search_by_description(self, description):
34 |         result = self.doc["说明"].apply(lambda x: x.find(description) > -1)
35 |         return self.doc[result]
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     print(get_doc().to_dict)
40 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/patch_util/postimport.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import logging
 3 | import sys
 4 | from collections import defaultdict
 5 | 
 6 | _post_import_hooks = defaultdict(list)
 7 | 
 8 | 
 9 | class PostImportFinder(object):
10 |     def __init__(self):
11 |         self._skip = set()  # 大概是判断重复引用的
12 | 
13 |     def find_module(self, fullname, path=None):
14 |         if fullname in self._skip:
15 |             return None
16 |         self._skip.add(fullname)
17 |         return PostImportLoader(self)
18 | 
19 | 
20 | class PostImportLoader(object):
21 |     def __init__(self, finder):
22 |         self._finder = finder
23 | 
24 |     def load_module(self, fullname):
25 |         # if fullname.startswith("jaqs"):
26 |         #     logging.debug(fullname)
27 |         importlib.import_module(fullname)
28 |         module = sys.modules[fullname]
29 |         for func in _post_import_hooks[fullname]:
30 |             func(module)
31 |         self._finder._skip.remove(fullname)
32 |         return module
33 | 
34 | 
35 | def when_imported(fullname):
36 |     def decorate(func):
37 |         if fullname in sys.modules:
38 |             func(sys.modules[fullname])
39 |         else:
40 |             _post_import_hooks[fullname].append(func)
41 |         return func
42 | 
43 |     return decorate
44 | 
45 | 
46 | sys.meta_path.insert(0, PostImportFinder())
47 | 


--------------------------------------------------------------------------------
/tests/test_patch_all.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | class TestPatchAll(unittest.TestCase):
 5 |     prefix = "jaqs_fxdayu."
 6 | 
 7 |     @classmethod
 8 |     def setUpClass(cls):
 9 |         from jaqs_fxdayu import patch_all
10 |         patch_all()
11 | 
12 |     def test_dataview(self):
13 |         from jaqs.data import DataView
14 |         assert DataView.__module__.startswith(self.prefix)
15 |         from jaqs.data.dataview import DataView
16 |         assert DataView.__module__.startswith(self.prefix)
17 | 
18 |     def test_parser(self):
19 |         from jaqs.data import Parser
20 |         assert Parser.__module__.startswith(self.prefix)
21 |         from jaqs.data.py_expression_eval import Parser
22 |         assert Parser.__module__.startswith(self.prefix)
23 | 
24 |     def test_signaldigger(self):
25 |         from jaqs.research import SignalDigger
26 |         assert SignalDigger.__module__.startswith(self.prefix)
27 |         from jaqs.research.signaldigger import SignalDigger
28 |         assert SignalDigger.__module__.startswith(self.prefix)
29 | 
30 |     def test_performance(self):
31 |         from jaqs.research.signaldigger import performance
32 |         assert performance.calc_signal_ic.__module__.startswith(self.prefix)
33 |         assert performance.calc_quantile_return_mean_std.__module__.startswith(self.prefix)
34 |         assert performance.mean_information_coefficient.__module__.startswith(self.prefix)
35 |         assert performance.price2ret.__module__.startswith(self.prefix)
36 | 
37 |     def test_plotting(self):
38 |         from jaqs.research.signaldigger import plotting
39 |         assert hasattr(plotting, "plot_ic_by_group")
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### VirtualEnv template
  3 | # Virtualenv
  4 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
  5 | .Python
  6 | [Bb]in
  7 | [Ii]nclude
  8 | [Ll]ib
  9 | [Ll]ib64
 10 | [Ll]ocal
 11 | [Ss]cripts
 12 | pyvenv.cfg
 13 | .venv
 14 | pip-selfcheck.json
 15 | ### Python template
 16 | # Byte-compiled / optimized / DLL files
 17 | __pycache__/
 18 | *.py[cod]
 19 | *$py.class
 20 | 
 21 | # C extensions
 22 | *.so
 23 | 
 24 | # Distribution / packaging
 25 | env/
 26 | build/
 27 | develop-eggs/
 28 | dist/
 29 | downloads/
 30 | eggs/
 31 | .eggs/
 32 | lib/
 33 | lib64/
 34 | parts/
 35 | sdist/
 36 | var/
 37 | wheels/
 38 | *.egg-info/
 39 | .installed.cfg
 40 | *.egg
 41 | 
 42 | # PyInstaller
 43 | #  Usually these files are written by a python script from a template
 44 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 45 | *.manifest
 46 | *.spec
 47 | 
 48 | # Installer logs
 49 | pip-log.txt
 50 | pip-delete-this-directory.txt
 51 | 
 52 | # Unit test / coverage reports
 53 | htmlcov/
 54 | .tox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *,cover
 61 | .hypothesis/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # celery beat schedule file
 91 | celerybeat-schedule
 92 | 
 93 | # SageMath parsed files
 94 | *.sage.py
 95 | 
 96 | # dotenv
 97 | .env
 98 | 
 99 | # virtualenv
100 | venv/
101 | ENV/
102 | 
103 | # Spyder project settings
104 | .spyderproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | .idea/
110 | .vscode/
111 | output
112 | .persist
113 | docs/_source/data
114 | *.pdf


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/concat.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pandas.core.internals import BlockManager, BlockPlacement
 3 | import numpy as np
 4 | 
 5 | 
 6 | # 重组DataFrame并返回
 7 | def block_concat(dfs, idx, columns):
 8 |     manager = BlockManager(iter_blocks(dfs), [columns, idx])
 9 |     return pd.DataFrame(manager).copy()
10 | 
11 | 
12 | # 按块抛出输入的DataFrame的数据
13 | def iter_blocks(dfs):
14 |     l = 0
15 |     for df in dfs:
16 |         for block in df._data.blocks:
17 |             # yield Block(block.values, block._mgr_locs.add(l))
18 |             yield block.__class__(block.values, placement=block._mgr_locs.add(l))
19 |         l += len(df.columns)
20 | 
21 | 
22 | # 只支持column为MultiIndex的DataFrame(DataView中合并数据的格式)的横向合成。
23 | def quick_concat(dfs, level, index_name="trade_date", how="outer"):
24 |     """
25 |     dfs: list of DataFrame
26 |     level: MultiIndex列名
27 |     index_name: 输出DataFrame的Index的名字。
28 |     how: Index合并方式:
29 |         outer: 并集
30 |         iner: 交集
31 |     """
32 |     columns = join_columns(dfs, level)
33 |     if how == "outer":
34 |         index = join_indexes([df.index for df in dfs], index_name)
35 |     else:
36 |         index = intersect1d_indexes([df.index for df in dfs], index_name)
37 |     return block_concat(
38 |         [pd.DataFrame(df, index) for df in dfs],
39 |         index, columns
40 |     )
41 | 
42 | 
43 | # 并集合并索引
44 | def join_indexes(idxes, name=None):
45 |     return pd.Index(np.concatenate([index.values for index in idxes]), name=name).sort_values().drop_duplicates()
46 | 
47 | 
48 | # 交集合并索引
49 | def intersect1d_indexes(idxes, name=None):
50 |     return pd.Index(intersect1d(idxes), name=name).sort_values().drop_duplicates()
51 | 
52 | 
53 | def intersect1d(idxes):
54 |     if len(idxes) == 2:
55 |         return np.intersect1d(*idxes)
56 |     elif len(idxes) > 2:
57 |         return np.intersect1d(intersect1d(idxes[:-1]), idxes[-1])
58 | 
59 | 
60 | # 合成新的columns(MultiIndex)
61 | def join_columns(dfs, level=None):
62 |     """
63 |     dfs: list of DataFrame
64 |     level: MultiIndex列名
65 |     """
66 |     return pd.MultiIndex.from_tuples(np.concatenate([df.columns.values for df in dfs]), names=level)


--------------------------------------------------------------------------------
/jaqs_fxdayu/patch_util/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import logging
 3 | import warnings
 4 | import sys
 5 | from collections import defaultdict
 6 | 
 7 | from jaqs_fxdayu.patch_util.postimport import when_imported
 8 | 
 9 | _patch_hooks = defaultdict(list)
10 | _module = "jaqs"
11 | _patched = False
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | def register_patch(fullname=_module):
17 |     def decorator(func):
18 |         if _patched:
19 |             raise RuntimeWarning("Patch %s is registered after jaqs_fxdayu.patch method be called." % func)
20 |         _patch_hooks[fullname].append(func)
21 |         return func
22 | 
23 |     return decorator
24 | 
25 | 
26 | def reload_jaqs():
27 |     reload_lst = []
28 |     for m in list(sys.modules.keys()):
29 |         if m.startswith("jaqs."):
30 |             del sys.modules[m]
31 |             reload_lst.append(m)
32 |     for m in reload_lst:
33 |         importlib.import_module(m)
34 | 
35 | 
36 | def patch_all():
37 |     import matplotlib
38 |     import matplotlib.pyplot
39 |     global _patched
40 |     if _patched:
41 |         warnings.warn("jaqs_fxdayu.patch method should be called only once!")
42 |         return
43 |     importlib.import_module("jaqs_fxdayu.data")
44 |     importlib.import_module("jaqs_fxdayu.research.signaldigger")
45 |     for fullname, hooks in _patch_hooks.items():
46 |         for func in hooks:
47 |             when_imported(fullname)(func)
48 |     _patched = True
49 |     logger.debug("Finish Patch.")
50 | 
51 | 
52 | def auto_register_patch(fullname=None, name=None, parent_level=0):
53 |     def decorator(obj):
54 |         def _patch_module(m):
55 |             attr = obj.__name__ if name is None else name
56 |             logger.debug("Patch %s:%s ." % (m.__name__, attr))
57 |             setattr(m, attr, obj)
58 | 
59 |         module_name = obj.__module__.replace("jaqs_fxdayu", "jaqs") if fullname is None else fullname
60 |         module_path = module_name.split(".") + [""]
61 |         for level in range(parent_level + 1):
62 |             register_patch(".".join(module_path[:-(level + 1)]))(_patch_module)
63 |         return obj
64 | 
65 |     return decorator
66 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import codecs
 2 | 
 3 | try:
 4 |     from pip._internal.req import parse_requirements # for pip >= 10
 5 | except ImportError:
 6 |     from pip.req import parse_requirements
 7 | 
 8 | from os.path import dirname, join
 9 | from setuptools import (
10 |     find_packages,
11 |     setup,
12 | )
13 | 
14 | 
15 | def readme():
16 |     with codecs.open('README.md', 'r', encoding='utf-8') as f:
17 |         return f.read()
18 | 
19 | 
20 | def version():
21 |     with open(join(dirname(__file__), 'jaqs_fxdayu', 'VERSION.txt'), 'rb') as f:
22 |         return f.read().decode('ascii').strip()
23 | 
24 | 
25 | requirements = [str(ir.req) for ir in parse_requirements("requirements.txt", session=False)]
26 | setup(
27 |     name='jaqs_fxdayu',
28 |     version=version(),
29 |     packages=find_packages(exclude=["examples", "tests", "tests.*", "docs"]),
30 |     author='xingetouzi',
31 |     author_email='public@fxdayu.com',
32 |     license='Apache License v2',
33 |     package_data={'': ['*.csv', '*.txt']},
34 |     url='https://github.com/xingetouzi/jaqs_fxdayu',
35 |     keywords="quantiatitive trading research finance",
36 |     install_requires=requirements,
37 |     description='Open source quantitative research&trading framework, base on https://github.com/quantOS-org/JAQS',
38 |     long_description=readme(),
39 |     zip_safe=False,
40 |     classifiers=[
41 |         'Programming Language :: Python',
42 |         "Intended Audience :: Developers",
43 |         "Intended Audience :: Education",
44 |         "Intended Audience :: End Users/Desktop",
45 |         "Intended Audience :: Financial and Insurance Industry",
46 |         "Intended Audience :: Information Technology",
47 |         "Intended Audience :: Science/Research",
48 |         "License :: OSI Approved :: Apache Software License",
49 |         "Natural Language :: Chinese (Simplified)",
50 |         "Natural Language :: English",
51 |         'Operating System :: Microsoft :: Windows',
52 |         'Operating System :: Unix',
53 |         'Programming Language :: Python :: 3.5',
54 |         'Programming Language :: Python :: 3.6',
55 |     ],
56 | )
57 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/timingdigger/performance.py:
--------------------------------------------------------------------------------
 1 | from jaqs_fxdayu.research.signaldigger.performance import *
 2 | 
 3 | 
 4 | def cal_return_stats(ret):
 5 |     if isinstance(ret,pd.Series) or isinstance(ret,pd.DataFrame):
 6 |         ret = ret.values
 7 |     ret = ret.reshape(-1,1)
 8 |     summary_table = pd.DataFrame()
 9 |     if len(ret)==0:
10 |         return pd.DataFrame(data=np.nan,
11 |                             columns=['t-stat','p-value',"mean","std","info_ratio",
12 |                                      "skewness","kurtosis","pct5","pct25","pct50",
13 |                                      "pct75","pct95","occurance"],
14 |                             index=[0])
15 |     t_stats, p_values = scst.ttest_1samp(ret, np.zeros(ret.shape[1]), axis=0)
16 | 
17 |     summary_table['t-stat'] = t_stats
18 |     summary_table['p-value'] = np.round(p_values, 5)
19 |     summary_table["mean"] = ret.mean()
20 |     summary_table["std"] = ret.std()
21 |     summary_table["info_ratio"] =summary_table["mean"]/summary_table["std"]
22 |     summary_table["skewness"] = scst.skew(ret, axis=0)
23 |     summary_table["kurtosis"] = scst.kurtosis(ret, axis=0)
24 |     for percent in [5, 25, 50, 75, 95]:
25 |         summary_table["pct" + str(percent)] = np.percentile(ret,percent)
26 |     summary_table["occurance"] = len(ret)
27 | 
28 |     return summary_table
29 | 
30 | 
31 | def calc_performance_metrics(ser, cum_return=False, compound=False):
32 |     """
33 |     Calculate annualized return, volatility and sharpe.
34 |     We assumed data frequency to be day.
35 | 
36 |     Parameters
37 |     ----------
38 |     ser : pd.DataFrame or pd.Series
39 |         Index is int date, values are floats.
40 |         ser should start from 0.
41 |     cum_return : bool
42 |         Whether ser is cumulative or daily return.
43 |     compound
44 |         Whether calculation of return is compound.
45 | 
46 |     Returns
47 |     -------
48 |     res : dict
49 | 
50 |     """
51 |     if isinstance(ser, pd.DataFrame):
52 |         ser = ser.iloc[:, 0]
53 |     if cum_return:
54 |         cum_ret = ser
55 |         ret = cum2ret(cum_ret, period=1, compound=compound)
56 |     else:
57 |         ret = ser
58 |         cum_ret = ret2cum(ret, compound=compound)
59 | 
60 |     total_ret = cum_ret.iat[-1]
61 |     std = np.std(ret)
62 |     mean = np.mean(ret)
63 |     res = {'total_ret': total_ret,
64 |            'std(ret)': std,
65 |            'mean(ret)':mean,
66 |            'ir': mean/std}
67 |     return res
68 | 
69 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/signal_function_mod.py:
--------------------------------------------------------------------------------
 1 | # encoding=utf-8
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | import warnings
 6 | 
 7 | # talib函数库,自动剔除为空的日期,用于计算signal
 8 | def ta(ta_method='MA',
 9 |        ta_column=0,
10 |        Open=None,
11 |        High=None,
12 |        Low=None,
13 |        Close=None,
14 |        Volume=None,
15 |        *args,
16 |        **kwargs):
17 |     try:
18 |         from talib import abstract
19 |     except ImportError:
20 |         raise RuntimeError("如要在公式中使用talib相关函数,请先安装talib.")
21 |     if not isinstance(ta_method, str):
22 |         raise ValueError("格式错误!Ta方法需指定调用的talib函数名(str),检测到传入的为%s,需要传入str" % (type(ta_method)))
23 |     else:
24 |         if not (ta_method in abstract.__dict__):
25 |             raise ValueError("指定的talib函数名有误,检测到传入的为%s,调用的talib库仅支持%s" % (ta_method, str(abstract.__dict__.keys())))
26 | 
27 |     candle_dict = {"open": Open,
28 |                    "high": High,
29 |                    "low": Low,
30 |                    "close": Close,
31 |                    "volume": Volume}
32 | 
33 |     waiting_for_pop = []
34 |     for candle_type in candle_dict.keys():
35 |         if not isinstance(candle_dict[candle_type], pd.DataFrame):
36 |             waiting_for_pop.append(candle_type)
37 |             continue
38 |         if candle_dict[candle_type].size == 0:
39 |             raise ValueError("%s为空,请检查对应的传入数据." % (candle_type, ))
40 |     # 剔除K线数据中的None
41 |     for i in waiting_for_pop:
42 |         candle_dict.pop(i)
43 | 
44 |     results = []
45 |     candle_pannel = pd.Panel.from_dict(candle_dict)
46 | 
47 |     for sec in candle_pannel.minor_axis:
48 |         df = candle_pannel.minor_xs(sec).dropna()
49 |         if len(df) == 0:
50 |             warnings.warn("%s数据缺失严重,无法完成指标计算,请检查是否存在数据问题." % (sec,))
51 |             continue
52 |         result = pd.DataFrame(getattr(abstract, ta_method)(df, *args, **kwargs))
53 | 
54 |         if isinstance(ta_column, int):
55 |             if ta_column >= len(result.columns) or ta_column < 0:
56 |                 raise ValueError("非法的ta_column,列号不能为负且不得超过%s,输入为%s" % (len(result.columns) - 1, ta_column))
57 |             result = pd.DataFrame(result.iloc[:, ta_column])
58 |         elif isinstance(ta_column, str):
59 |             if not (ta_column in result.columns):
60 |                 raise ValueError("非法的ta_column,可选的列名有%s,输入为%s" % (str(result.columns), ta_column))
61 |             result = pd.DataFrame(result.loc[:, ta_column])
62 |         else:
63 |             raise ValueError("ta_column格式有误,错误的类型为%s,请指定合法的列号(int),或列名(str)" % (type(ta_column)))
64 | 
65 |         result.columns = [sec, ]
66 |         results.append(result)
67 | 
68 |     if len(results) == 0:
69 |         return None
70 |     else:
71 |         tmp = pd.concat(results, axis=1)
72 |         tmp = tmp.reindex(columns=candle_pannel.minor_axis, index=candle_pannel.major_axis)
73 |         return tmp
74 | 
75 | 
76 | # 最大值的坐标
77 | def ts_argmax(df, window=10):
78 |     return df.rolling(window).apply(np.argmax) + 1
79 | 
80 | 
81 | # 最小值的坐标
82 | def ts_argmin(df, window=10):
83 |     return df.rolling(window).apply(np.argmin) + 1
84 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # jaqs-fxdayu:股票多因子策略研究和分析框架jaqs拓展包
  2 | 
  3 | ## 介绍
  4 | 
  5 | 大鱼金融在jaqs官方版本的基础上,重点改进和优化了股票多因子研究部分的功能,帮助使用者更方便的去设计/评估和分析因子表现,优化因子效果，进行因子组合研究．
  6 | 
  7 | 主要包括:
  8 | 
  9 | ### 基础：
 10 | - dataservice
 11 | 
 12 | 对jaqs底层dataapi的一个高级封装，提供了一些常用数据的快捷查询方法——如K线、交易日历、指数成分信息、行业分类信息等。 
 13 | 
 14 | - dataview
 15 | 
 16 | 可视为一个基于pandas的针对因子场景的数据库,方便因子的设计实现.jaqs_fxdayu改进了官方版本,提供更便捷灵活的因子数据查询和操作功能
 17 | 
 18 | - digger
 19 | 
 20 | 单因子分析及绩效可视化.改进官方版本
 21 | 
 22 | - performance
 23 | 
 24 | 因子选股研究中常用的绩效计算方法.改进官方版本
 25 | 
 26 | ### 拓展：
 27 | - analysis
 28 | 
 29 | 单因子多维度分析.从因子ic,因子收益,选股潜在收益空间三个维度给出因子评价.新增模块
 30 | 
 31 | - process
 32 | 
 33 | 提供常用的因子处理操作，如去极值，中性化等.新增模块
 34 | 
 35 | - optimizer
 36 | 
 37 | 提供因子参数优化功能.新增模块
 38 | 
 39 | - multi_factor
 40 | 
 41 | 提供多因子处理和组合功能.新增模块
 42 | 
 43 | - dp
 44 | 
 45 | 针对A股因子研究和交易分析场景,提供了常用的小工具,如查询历史的交易日,历史的行业分类表等.新增模块
 46 | 
 47 | - timingdigger
 48 | 
 49 | 择时信号研究,通过TimingDigger,可以在设计完选股因子和事件后,加入简单的择时出场条件对因子进行进一步测试.支持设置止盈,止损等出场方式.新增模块
 50 | 
 51 | - hf_dataview
 52 | 
 53 | 针对高频因子(bar级别)和事件设计的数据操作和信号计算模块，功能和Dataview一致.新增模块
 54 | 
 55 | ## 安装和更新
 56 | ### 依赖
 57 | 该模块基于JAQS进行拓展，且只支持`python3`，需要安装：`jaqs>=0.6.11`
 58 | 
 59 | jaqs的安装可以参考[JAQS官方文档](http://jaqs.readthedocs.io/zh_CN/latest/install.html)
 60 | 
 61 | - 如果未安装过jaqs，从pip安装:
 62 | ```bash
 63 | $ pip install jaqs
 64 | ```
 65 | 
 66 | - 如果已安装过jaqs,进行更新:
 67 | ```bash
 68 | $ pip install -U --no-deps jaqs
 69 | ```
 70 | 
 71 | ### 安装
 72 | ```bash
 73 | $ pip install jaqs_fxdayu
 74 | ```
 75 | 
 76 | ### 更新
 77 | 当有新版本发布时，使用以下命令更新
 78 | ```bash
 79 | $ pip uninstall jaqs_fxdayu
 80 | $ pip install jaqs_fxdayu
 81 | ```
 82 | 
 83 | ## 使用
 84 | 该模块主要分为两部分：
 85 | 
 86 | ### 基础API：
 87 | 基于jaqs项目的原有模块进行替换和拓展。
 88 | 支持monkey_patch或直接从jaqs_fxdayu中导入。
 89 | 
 90 | 以使用Dataview为例：
 91 | 
 92 | - monkey_patch:
 93 | ```
 94 | import jaqs_fxdayu
 95 | jaqs_fxdayu.patch_all() # 需要放在任何import jaqs.* 之前
 96 | 
 97 | from jaqs.data import DataView
 98 | 
 99 | dv = DataView()
100 | 
101 | ...
102 | ```
103 | 
104 | !!! Note
105 |     该使用方法的好处是最大程度兼容原生JAQS的代码，方便迁移。
106 | 
107 | - 直接导入:
108 | ```
109 | from jaqs_fxdayu.data import DataView
110 | 
111 | dv = DataView()
112 | 
113 | ...
114 | ```
115 | 
116 | !!! Note
117 |     该使用方法的好处是更为直观，且支持IDE的静态代码提示功能。
118 | 
119 | ### 拓展API：
120 | 主要为独立开发，提供一些因子分析中常用，而jaqs中未实现的拓展功能。
121 | 使用方法主要是从jaqs_fxdayu模块中导入:
122 | 例如：
123 | ```python
124 | from jaqs_fxdayu.research import Optimizer
125 | ```
126 | 
127 | 
128 | ### 文档
129 | [详细文档地址](http://jaqs-fxdayu.readthedocs.io/zh_CN/latest/)
130 | 
131 | ## 最新功能
132 | ### 2018/7/15
133 | 
134 | dataview添加财务数据时，允许指定财报类型
135 | 
136 | ### 2018/7/2
137 | 
138 | TimingDigger/SignalDigger 支持根据group划分组内quantile
139 | 
140 | 预处理因子/信号数据时,若传入group参数,quantile计算会在组内进行而非在全数据集上进行.
141 | 
142 | ### 2018/6/9
143 | 
144 | TimingDigger-create_event_report方法新增进出场点位画图功能
145 | 
146 | 新增hf_dataview-针对高频因子(bar级别)和事件设计的数据操作和信号计算模块，功能和Dataview一致.
147 | 
148 | ### 2018/6/5
149 | 选股叠加择时研究(TimingDigger)新增功能,通过TimingDigger,可以在设计完选股因子和事件后,加入简单的择时出场条件对因子进行进一步测试.支持设置止盈，止损等出场方式.
150 | 
151 | ### 2018/4/19
152 | 参数优化器(optimizer)新增功能,支持在待优化公式中调用自定义方法.
153 | 
154 | ### 2018/4/19
155 | 新增process-mad,用于因子去极值.优化了行业市值中性化的算法效率.
156 | 
157 | ### 2018/4/16
158 | 新增multi_factor-get_factors_ret_df,用于获取因子收益序列矩阵．同时，combine_factors新增基于最近一段时间的因子收益进行多因子加权组合的方法．
159 | 
160 | ### 2018/4/11
161 | 新增dataview-refresh_data方法,可对数据集进行更新．
162 | 
163 | ### 2018/3/26
164 | 新增dataservice文档.dataservice是对jaqs底层dataapi的一个高级封装，提供了一些常用数据的快捷查询方法——如K线、交易日历、指数成分信息、行业分类信息等。 
165 | 
166 | ### 2018/3/26
167 | 
168 | 新增模块dp,针对A股因子研究和交易分析场景，提供了常用的小工具，如查询历史的交易日，历史的行业分类表等
169 | 
170 | 添加对performance模块的说明文档　performance:因子选股研究中常用的绩效计算方法
171 | 
172 | 
173 | ### 2018/3/20
174 | 
175 | 作为单独模块发布，更新文档
176 | 
177 | ### 2018/3/19 更新
178 | 
179 | 新增dataview-fields可选字段查询方式，详见文档　dataview-fields可选字段查询方式
180 | 
181 | ## 技术支持
182 | 
183 | - [GitHub](https://github.com/xingetouzi/jaqs-fxdayu/tree/master)
184 | - [访问大鱼学院获得更多的案例和金融量化知识](http://www.fxdayu.com)
185 | - 加qq群(372592121)进行讨论
186 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | # jaqs-fxdayu:股票多因子策略研究和分析框架jaqs拓展包
  2 | 
  3 | ## 介绍
  4 | 
  5 | 大鱼金融在jaqs官方版本的基础上,重点改进和优化了股票多因子研究部分的功能,帮助使用者更方便的去设计/评估和分析因子表现,优化因子效果，进行因子组合研究．
  6 | 
  7 | 主要包括:
  8 | 
  9 | ### 基础：
 10 | - dataservice
 11 | 
 12 | 对jaqs底层dataapi的一个高级封装，提供了一些常用数据的快捷查询方法——如K线、交易日历、指数成分信息、行业分类信息等。 
 13 | 
 14 | - dataview
 15 | 
 16 | 可视为一个基于pandas的针对因子场景的数据库,方便因子的设计实现.jaqs_fxdayu改进了官方版本,提供更便捷灵活的因子数据查询和操作功能
 17 | 
 18 | - digger
 19 | 
 20 | 单因子分析及绩效可视化.改进官方版本
 21 | 
 22 | - performance
 23 | 
 24 | 因子选股研究中常用的绩效计算方法.改进官方版本
 25 | 
 26 | ### 拓展：
 27 | - analysis
 28 | 
 29 | 单因子多维度分析.从因子ic,因子收益,选股潜在收益空间三个维度给出因子评价.新增模块
 30 | 
 31 | - process
 32 | 
 33 | 提供常用的因子处理操作，如去极值，中性化等.新增模块
 34 | 
 35 | - optimizer
 36 | 
 37 | 提供因子参数优化功能.新增模块
 38 | 
 39 | - multi_factor
 40 | 
 41 | 提供多因子处理和组合功能.新增模块
 42 | 
 43 | - dp
 44 | 
 45 | 针对A股因子研究和交易分析场景,提供了常用的小工具,如查询历史的交易日,历史的行业分类表等.新增模块
 46 | 
 47 | - timingdigger
 48 | 
 49 | 择时信号研究,通过TimingDigger,可以在设计完选股因子和事件后,加入简单的择时出场条件对因子进行进一步测试.支持设置止盈,止损等出场方式.新增模块
 50 | 
 51 | - hf_dataview
 52 | 
 53 | 针对高频因子(bar级别)和事件设计的数据操作和信号计算模块，功能和Dataview一致.新增模块
 54 | 
 55 | ## 安装和更新
 56 | ### 依赖
 57 | 该模块基于JAQS进行拓展，且只支持`python3`，需要安装：`jaqs>=0.6.11`
 58 | 
 59 | jaqs的安装可以参考[JAQS官方文档](http://jaqs.readthedocs.io/zh_CN/latest/install.html)
 60 | 
 61 | - 如果未安装过jaqs，从pip安装:
 62 | ```bash
 63 | $ pip install jaqs
 64 | ```
 65 | 
 66 | - 如果已安装过jaqs,进行更新:
 67 | ```bash
 68 | $ pip install -U --no-deps jaqs
 69 | ```
 70 | 
 71 | ### 安装
 72 | ```bash
 73 | $ pip install jaqs_fxdayu
 74 | ```
 75 | 
 76 | ### 更新
 77 | 当有新版本发布时，使用以下命令更新
 78 | ```bash
 79 | $ pip uninstall jaqs_fxdayu
 80 | $ pip install jaqs_fxdayu
 81 | ```
 82 | 
 83 | ## 使用
 84 | 该模块主要分为两部分：
 85 | 
 86 | ### 基础API：
 87 | 基于jaqs项目的原有模块进行替换和拓展。
 88 | 支持monkey_patch或直接从jaqs_fxdayu中导入。
 89 | 
 90 | 以使用Dataview为例：
 91 | 
 92 | - monkey_patch:
 93 | ```
 94 | import jaqs_fxdayu
 95 | jaqs_fxdayu.patch_all() # 需要放在任何import jaqs.* 之前
 96 | 
 97 | from jaqs.data import DataView
 98 | 
 99 | dv = DataView()
100 | 
101 | ...
102 | ```
103 | 
104 | !!! Note
105 |     该使用方法的好处是最大程度兼容原生JAQS的代码，方便迁移。
106 | 
107 | - 直接导入:
108 | ```
109 | from jaqs_fxdayu.data import DataView
110 | 
111 | dv = DataView()
112 | 
113 | ...
114 | ```
115 | 
116 | !!! Note
117 |     该使用方法的好处是更为直观，且支持IDE的静态代码提示功能。
118 | 
119 | ### 拓展API：
120 | 主要为独立开发，提供一些因子分析中常用，而jaqs中未实现的拓展功能。
121 | 使用方法主要是从jaqs_fxdayu模块中导入:
122 | 例如：
123 | ```python
124 | from jaqs_fxdayu.research import Optimizer
125 | ```
126 | 
127 | 
128 | ### 文档
129 | [详细文档地址](http://jaqs-fxdayu.readthedocs.io/zh_CN/latest/)
130 | 
131 | ## 最新功能
132 | ### 2018/7/15
133 | 
134 | dataview添加财务数据时，允许指定财报类型
135 | 
136 | ### 2018/7/2
137 | 
138 | TimingDigger/SignalDigger 支持根据group划分组内quantile
139 | 
140 | 预处理因子/信号数据时,若传入group参数,quantile计算会在组内进行而非在全数据集上进行.
141 | 
142 | ### 2018/6/9
143 | 
144 | TimingDigger-create_event_report方法新增进出场点位画图功能
145 | 
146 | 新增hf_dataview-针对高频因子(bar级别)和事件设计的数据操作和信号计算模块，功能和Dataview一致.
147 | 
148 | ### 2018/6/5
149 | 选股叠加择时研究(TimingDigger)新增功能,通过TimingDigger,可以在设计完选股因子和事件后,加入简单的择时出场条件对因子进行进一步测试.支持设置止盈，止损等出场方式.
150 | 
151 | ### 2018/4/19
152 | 参数优化器(optimizer)新增功能,支持在待优化公式中调用自定义方法.
153 | 
154 | ### 2018/4/19
155 | 新增process-mad,用于因子去极值.优化了行业市值中性化的算法效率.
156 | 
157 | ### 2018/4/16
158 | 新增multi_factor-get_factors_ret_df,用于获取因子收益序列矩阵．同时，combine_factors新增基于最近一段时间的因子收益进行多因子加权组合的方法．
159 | 
160 | ### 2018/4/11
161 | 新增dataview-refresh_data方法,可对数据集进行更新．
162 | 
163 | ### 2018/3/26
164 | 新增dataservice文档.dataservice是对jaqs底层dataapi的一个高级封装，提供了一些常用数据的快捷查询方法——如K线、交易日历、指数成分信息、行业分类信息等。 
165 | 
166 | ### 2018/3/26
167 | 
168 | 新增模块dp,针对A股因子研究和交易分析场景，提供了常用的小工具，如查询历史的交易日，历史的行业分类表等
169 | 
170 | 添加对performance模块的说明文档　performance:因子选股研究中常用的绩效计算方法
171 | 
172 | 
173 | ### 2018/3/20
174 | 
175 | 作为单独模块发布，更新文档
176 | 
177 | ### 2018/3/19 更新
178 | 
179 | 新增dataview-fields可选字段查询方式，详见文档　dataview-fields可选字段查询方式
180 | 
181 | ## 技术支持
182 | 
183 | - [GitHub](https://github.com/xingetouzi/jaqs-fxdayu/tree/master)
184 | - [访问大鱼学院获得更多的案例和金融量化知识](http://www.fxdayu.com)
185 | - 加qq群(372592121)进行讨论
186 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/built_in_funcs_docs.csv:
--------------------------------------------------------------------------------
 1 | 分类,说明,公式,示例
 2 | 四则运算,加法运算,+,close + open
 3 | 四则运算,减法运算,-,close - open
 4 | 四则运算,乘法运算,*,vwap * volume
 5 | 四则运算,除法运算,/,close / open
 6 | 基本数学函数,"符号函数，返回值为{-1, 0, 1}",Sign(x),Sign(close-open)
 7 | 基本数学函数,绝对值函数,Abs(x),Abs(close-open)
 8 | 基本数学函数,自然对数,Log(x),Log(close/open)
 9 | 基本数学函数,对x取负,-x,-close
10 | 基本数学函数,幂函数,^,close ^ 2
11 | 基本数学函数,幂函数x^y,"Pow(x,y)","Pow(close,2)"
12 | 基本数学函数,保持符号的幂函数，等价于Sign(x) * (Abs(x)^e),"SignedPower(x,e)","SignedPower(close-open, 0.5)"
13 | 基本数学函数,取余函数,%,oi % 10
14 | 逻辑运算,判断是否相等,==,close == open
15 | 逻辑运算,判断是否不等,!=,close != open
16 | 逻辑运算,大于,>,close > open
17 | 逻辑运算,小于,<,close < open
18 | 逻辑运算,大于等于,>=,close >= open
19 | 逻辑运算,小于等于,<=,close <= open
20 | 逻辑运算,逻辑与,&&,(close > open) && (close > vwap)
21 | 逻辑运算,逻辑或,||,(close > open) || (close > vwap)
22 | 逻辑运算,逻辑非,!,!(close>open)
23 | 逻辑运算,判断值是否为NaN,IsNan(x),IsNan(net_profit)
24 | 三角函数,正弦函数,Sin(x),Sin(close/open)
25 | 三角函数,余弦函数,Cos(x),Cos(close/open)
26 | 三角函数,正切函数,Tan(x),Tan(close/open)
27 | 三角函数,开平方函数,Sqrt(x),Sqrt(close^2 + open^2)
28 | 取整函数,向上取整,Ceil(x),Ceil(high)
29 | 取整函数,向下取整,Floor(x),Floor(low)
30 | 取整函数,四舍五入,Round(x),Round（close）
31 | 选择函数,取 x 和 y 同位置上的较大值组成新的DataFrame返回,"Max(x,y)","Max(close, open)"
32 | 选择函数,取 x 和 y 同位置上的较小值组成新的DataFrame返回,"Min(x,y)","Min(close,open)"
33 | 选择函数,cond为True取x的值，反之取y的值,"If(cond,x,y)","If(close > open, close, open) 表示取open和close的较大值"
34 | 时间序列函数 - 基本数学运算,指标n个周期前的值,"Delay(x,n)","Delay(close,1) 表示前一天收盘价"
35 | 时间序列函数 - 基本数学运算,指标在过去n天的和,"Ts_Sum(x,n)","Ts_Sum(volume,5) 表示一周成交量"
36 | 时间序列函数 - 基本数学运算,指标在过去 n 天的积,"Ts_Product(x,n)","Ts_Product(close/Delay(close,1),5) - 1 表示过去5天累计收益"
37 | 时间序列函数 - 基本数学运算,指标当前值与n天前的值的差,"Delta(x,n)","Delta(close,5)"
38 | 时间序列函数 - 基本数学运算,计算指标相比n天前的变化率，默认计算百分比变化率；当log为1时，计算对数变化率;为0时计算普通变化率,"Return(x,n,log)","Return(close,5,True)计算一周对数收益"
39 | 时间序列函数 - 基本数学运算,计算指标在过去n天的平均值,Ts_Mean(x，n),"Ts_Mean(close,5)"
40 | 时间序列函数 - 统计,指标在过去n天的标准差,"StdDev(x,n)","StdDev(close/Delay(close,1)-1, 10)"
41 | 时间序列函数 - 统计,两个指标在过去n天的协方差,"Covariance(x,y,n)","Covariance(close, open, 10)"
42 | 时间序列函数 - 统计,两个指标在过去n天的相关系数,"Correlation(x,y,n)","Correlation(close,open, 10)"
43 | 时间序列函数 - 统计,计算指标在过去n天的最小值,Ts_Min(x，n),Ts_Min(close，5)
44 | 时间序列函数 - 统计,计算指标在过去n天的最大值,Ts_Max(x，n),Ts_Max(close，5)
45 | 时间序列函数 - 统计,计算指标在过去n天的偏度,Ts_Skewness(x，n),Ts_Skewness(close，20)
46 | 时间序列函数 - 统计,计算指标在过去n天的峰度,Ts_Kurtosis(x，n),Ts_Kurtosis(close，20)
47 | 时间序列函数 - 排名,计算指标在过去n天的排名，返回值为名次,"Ts_Rank(x, n)","Ts_Rank(close, 5)"
48 | 时间序列函数 - 排名,"计算指标在过去n天的百分比，返回值为[0.0, 1.0]","Ts_Percentile(x, n)","Ts_Percentile(close, 5)"
49 | 时间序列函数 - 排名,计算指标在过去n天所属的quantile，返回值为表示quantile的整数,"Ts_Quantile(x, n)","Ts_Quantile(close, 5)"
50 | 时间序列函数 - 排名,指数移动平均，以halflife的衰减对x进行指数移动平均,"Ewma(x, halflife)","Ewma(x, 3)"
51 | 横截面函数 - 排名,将指标值在横截面方向排名，返回值为名次,Rank(x),"Rank( close/Delay(close,1)-1 ) 表示按日收益率进行排名"
52 | 横截面函数 - 排名,按分组数据g在每组内将指标值在横截面方向排名，返回值为名次,"GroupRank(x,g)","GroupRank(close/Delay(close,1)-1, g) 表示按分组g根据日收益率进行分组排名"
53 | 横截面函数 - 排名,将指标值在横截面方向排名，返回值为排名百分比,Percentile(x),Percentile(close)
54 | 横截面函数 - 排名,按分组数据g在每组内将指标值在横截面方向排名，返回值为排名百分比,"GroupPercentile(x, g, n)","GroupPercentile(close, sw1) 按申万1级行业"
55 | 横截面函数 - 排名,和Rank函数相同，但只有 cond 中值为True的标的参与排名,"ConditionRank(x, cond)","GroupRank(close/Delay(close,1)-1, cond) 表示按条件cond根据日收益率进行分组排名"
56 | 横截面函数 - 排名,根据指标值在横截面方向将标的分成n个quantile，返回值为所属quantile,"Quantile(x, n)","Quantile( close/Delay(close,1)-1,5)表示按日收益率分为5档"
57 | 横截面函数 - 排名,按分组数据g在每组内根据指标值在横截面方向将标的分成n个quantile，返回值为所属quantile,"GroupQuantile(x, g, n)","GroupQuantile(close/Delay(close,1)-1,g,5) 表示按日收益率和分组g进行分档，每组分为5档"
58 | 横截面函数 - 数据处理,将指标标准化，即在横截面上减去平均值后再除以标准差,Standardize(x),"Standardize(close/Delay(close,1)-1) 表示日收益率的标准化"
59 | 横截面函数 - 数据处理,"将指标横截面上去极值，用MAD (Maximum Absolute Deviation)方法, z_score为极值判断标准","Cutoff(x, z_score)","Cutoff(close,3) 表示去掉z_score大于3的极值"
60 | 财报函数,将累计财务数据转换为单季财务数据,CumToSingle(x),CumToSingle(net_profit)
61 | 财报函数,从累计财务数据计算TTM的财务数据,TTM(x),TTM(net_profit)
62 | 其他,"过去 n 天的指数衰减函数，其中 f 是平滑因子。这里 f 是平滑因子，可以赋一个小于 1 的值。Decay_exp(x, f, n) = (x[date] + x[date - 1] * f + … +x[date – n - 1] * (f (n – 1))) / (1 + f + … + f ^ (n - 1))","Decay_exp(x,f,n)","Decay_exp(close,0.9,10)"
63 | 其他,"过去n天的线性衰减函数。Decay_linear(x, n) = (x[date] * n + x[date - 1] * (n - 1) + … + x[date – n - 1]) / (n + (n - 1) + … + 1)","Decay_linear(x,n)","Decay_linear(close,15)"
64 | 其他,如果 x 的值介于 lower 和 upper，则将其设定为 newval,"Tail(x, lower, upper, newval)","Tail(close/open, 0.99, 1.01, 1.0)"
65 | 其他,Step(n) 为每个标的创建一个向量，向量中 n 代表最新日期，n-1 代表前一天，以此类推。,Step(n),Step(30)
66 | 其他,时间序列函数，计算 x 中的值在过去 n 天中为 nan （非数字）的次数,"CountNans(x,n)","CountNans((close-open)^0.5, 10) 表示过去10天内有几天close小于open"
67 | 时间序列函数 - 统计,计算指标在过去n天最大值的坐标,"Ts_Argmax(x,n)","Ts_Argmax(high,10)"
68 | 时间序列函数 - 统计,计算指标在过去n天最小值的坐标,"Ts_Argmin(x,n)","Ts_Argmin(low,10)"
69 | 技术指标,根据talib技术指标库计算x中每只股票的技术指标,"Ta(ta_method,ta_column,open,high,low,close,volume,*args)","Ta('MACD','macdsignal',open,high,low,close,volume) 表示对每只股票进行talib.macd计算并返回macdsignal的值（自动剔除停牌期数据）."
70 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/util/dp.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from collections import defaultdict
  3 | 
  4 | 
  5 | English_classify = {'480000': 'Bank',
  6 |                     '430000': 'Real_Estate',
  7 |                     '460000': 'Leisure_Service',
  8 |                     '640000': 'Mechanical_Equipment',
  9 |                     '240000': 'Nonferrous_Metals',
 10 |                     '510000': 'Synthesis',
 11 |                     '410000': 'Public_Utility',
 12 |                     '450000': 'Commercial_Trade',
 13 |                     '730000': 'Communication',
 14 |                     '330000': 'Household_Appliances',
 15 |                     '720000': 'Media',
 16 |                     '630000': 'Electrical_Equipment',
 17 |                     '270000': 'Electronic_Engineering',
 18 |                     '490000': 'Non_Bank_Finance',
 19 |                     '370000': 'Medical_Biology',
 20 |                     '710000': 'Computer',
 21 |                     '280000': 'Car',
 22 |                     '340000': 'Food_Beverage',
 23 |                     '220000': 'Chemical_Engineering',
 24 |                     '210000': 'Digging',
 25 |                     '230000': 'Steel',
 26 |                     '650000': 'Military',
 27 |                     '110000': 'Agriculture_Fishing',
 28 |                     '420000': 'Transportation',
 29 |                     '620000': 'Architectural_Ornament',
 30 |                     '350000': 'Textile_Garment',
 31 |                     '610000': 'Building_Materials',
 32 |                     '360000': 'Light_Manufacturing'}
 33 | 
 34 | 
 35 | # 交易日列表(return pandas.Index)
 36 | def trade_days(api, start, end):
 37 |     """
 38 | 
 39 |     :param api: jaqs.data.DataApi
 40 |     :param start: int, sample: 20170101
 41 |     :param end: int, sample: 20180101
 42 |     :return:
 43 |     """
 44 |     data, msg = api.query("jz.secTradeCal", "start_date={}&end_date={}".format(start, end))
 45 |     if msg == "0,":
 46 |         return data.set_index("trade_date").rename_axis(int).index
 47 |     else:
 48 |         raise Exception(msg)
 49 | 
 50 | 
 51 | def st_status(api, symbol, start, end):
 52 |     """
 53 |     :param api: jaqs.data.DataApi
 54 |     :param symbol: str, sample: 600000.SH,000001.SZ
 55 |     :param start: int, sample: 20170101
 56 |     :param end: int, sample: 20180101
 57 |     :return:
 58 |     """
 59 |     dates = trade_days(api, start, end)
 60 |     data, msg = api.query("lb.sState", "symbol={}".format(symbol))
 61 |     if len(data) == 0:
 62 |         return None
 63 |     data["in_date"] = data["effDate"].apply(int)
 64 |     data["out_date"] = 99999999
 65 |     data = data.sort_values(by=["in_date"])
 66 |     if msg != "0,":
 67 |         raise Exception(msg)
 68 | 
 69 |     return expand(data, dates, None, value="state").fillna(0)
 70 | 
 71 | 
 72 | # 指数成分股(return pandas.DataFrame)
 73 | def index_cons(api, index_code, start, end):
 74 |     """
 75 | 
 76 |     :param api: jaqs.data.DataApi
 77 |     :param index_code: str, sample: 000300.SH
 78 |     :param start: int, sample: 20170101
 79 |     :param end: int, sample: 20180101
 80 |     :return:
 81 |     """
 82 |     data, msg = api.query("lb.indexCons", "index_code={}&start_date={}&end_date={}".format(index_code, start, end))
 83 |     if msg == "0,":
 84 |         data["in_date"] = data["in_date"].apply(int)
 85 |         data["out_date"] = data["out_date"].replace("", "99999999").apply(int)
 86 |         return data
 87 |     else:
 88 |         raise Exception(msg)
 89 | 
 90 | 
 91 | # range扩展为daily
 92 | def expand(data, index, default=False, prefix=True, key="symbol", start="in_date", end="out_date", value=None):
 93 |     """
 94 | 
 95 |     :param data: pd.DataFrame
 96 |     :param index: pd.Index, 作为输出表的index
 97 |     :param default: 新表的默认值
 98 |     :param prefix: 将符合范围判断条件的数据设为该值
 99 |     :param key: 指定data中用来作为输出表的columns的列
100 |     :param start: 指定用来作为开始取值范围的列
101 |     :param end: 指定用来作为结束取值范围的列
102 |     :param value: 以data中的特定列作为预设值
103 |     :return:
104 | 
105 |     Examples
106 |     --------
107 |     > dates
108 |     Int64Index([20170626, 20170627, 20170628, 20170629, 20170630, 20170703, 20170704, 20170705],
109 |                dtype='int64', name='trade_date')
110 | 
111 |     > industry
112 |         in_date  out_date     symbol industry1_name industry1_code
113 |     0  20140101  99999999  000001.SZ             银行         480000
114 |     1  20140101  20151001  000006.SZ            房地产         430000
115 |     2  20151001  20170629  000006.SZ             采掘         210000
116 |     3  20170629  99999999  000006.SZ            房地产         430000
117 |     4  20140101  99999999  000651.SZ           家用电器         330000
118 | 
119 |     > expand(industry, dates, None, value="industry1_name")
120 |                    000001.SZ 000006.SZ 000651.SZ
121 |     trade_date
122 |     20170626          银行        采掘      家用电器
123 |     20170627          银行        采掘      家用电器
124 |     20170628          银行        采掘      家用电器
125 |     20170629          银行       房地产      家用电器
126 |     20170630          银行       房地产      家用电器
127 |     20170703          银行       房地产      家用电器
128 |     20170704          银行       房地产      家用电器
129 |     20170705          银行       房地产      家用电器
130 | 
131 |     """
132 |     if isinstance(data, pd.DataFrame) and isinstance(index, pd.Index):
133 |         dct = defaultdict(lambda: pd.Series(default, index))
134 |         for name, row in data.iterrows():
135 |             s = dct[row[key]]
136 |             s.loc[row[start]:row[end]] = prefix if value is None else row[value]
137 |         return pd.DataFrame(dct)
138 | 
139 | 
140 | # 日线级指数表
141 | def daily_index_cons(api, index_code, start, end):
142 |     """
143 | 
144 |     :param api: jaqs.data.DataApi
145 |     :param index_code: str, sample: 000300.SH
146 |     :param start: int, sample: 20170101
147 |     :param end: int, sample: 20180101
148 |     :return:
149 |     """
150 |     dates = trade_days(api, start, end)
151 |     codes = index_cons(api, index_code, start, end)
152 |     return expand(codes, dates)
153 | 
154 | 
155 | # 日线级行业分类表
156 | def daily_sec_industry(api, symbol, start, end, source="sw", value="industry1_code"):
157 |     """
158 | 
159 |     :param api: jaqs.data.DataApi
160 |     :param symbol: str, sample: 600000.SH,000001.SZ
161 |     :param start: int, sample: 20170101
162 |     :param end: int, sample: 20180101
163 |     :param source: str, sample: sw
164 |     :param value: str, sample: industry1_code
165 |     :return:
166 |     """
167 |     dates = trade_days(api, start, end)
168 |     data, msg = api.query("lb.secIndustry", "symbol={}&industry_src={}".format(symbol, source))
169 |     data["in_date"] = data["in_date"].apply(int)
170 |     data["out_date"] = data["out_date"].replace("", "99999999").apply(int)
171 |     if msg != "0,":
172 |         raise Exception(msg)
173 | 
174 |     return expand(data, dates, None, value=value)
175 | 


--------------------------------------------------------------------------------
/tests/test_dataview.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | from __future__ import print_function
  3 | 
  4 | # import jaqs_fxdayu
  5 | # jaqs_fxdayu.patch_all()
  6 | from jaqs_fxdayu.data import RemoteDataService
  7 | from jaqs_fxdayu.data import DataView
  8 | from .data_config import data_config
  9 | 
 10 | # from config_path import DATA_CONFIG_PATH
 11 | 
 12 | daily_path = '../output/tests/test_dataview_d'
 13 | quarterly_path = '../output/tests/test_dataview_q'
 14 | 
 15 | 
 16 | def test_write():
 17 |     ds = RemoteDataService()
 18 |     ds.init_from_config(data_config)
 19 |     dv = DataView()
 20 |     
 21 |     secs = '600030.SH,000063.SZ,000001.SZ'
 22 |     props = {'start_date': 20160601, 'end_date': 20170601, 'symbol': secs,
 23 |              'fields': 'open,close,high,low,volume,pb,net_assets,pcf_ncf',
 24 |              'freq': 1}
 25 | 
 26 |     dv.init_from_config(props, data_api=ds)
 27 |     dv.prepare_data()
 28 |     assert dv.data_d.shape == (281, 48)
 29 |     assert dv.dates.shape == (281, )
 30 |     # TODO
 31 |     """
 32 |     PerformanceWarning:
 33 |     your performance may suffer as PyTables will pickle object types that it cannot
 34 |     map directly to c-types [inferred_type->mixed,key->block1_values] [items->[('000001.SZ', 'int_income'), ('000001.SZ', 'less_handling_chrg_comm_exp'), ('000001.SZ', 'net_int_income'), ('000001.SZ', 'oper_exp'), ('000001.SZ', 'symbol'), ('000063.SZ', 'int_income'), ('000063.SZ', 'less_handling_chrg_comm_exp'), ('000063.SZ', 'net_int_income'), ('000063.SZ', 'oper_exp'), ('000063.SZ', 'symbol'), ('600030.SH', 'int_income'), ('600030.SH', 'less_handling_chrg_comm_exp'), ('600030.SH', 'net_int_income'), ('600030.SH', 'oper_exp'), ('600030.SH', 'symbol')]]
 35 |     """
 36 |     
 37 |     dv.save_dataview(folder_path=daily_path)
 38 |     
 39 |     
 40 | def test_load():
 41 |     dv = DataView()
 42 |     dv.load_dataview(folder_path=daily_path)
 43 |     
 44 |     assert dv.start_date == 20160601 and set(dv.symbol) == set('000001.SZ,600030.SH,000063.SZ'.split(','))
 45 | 
 46 |     # test get_snapshot
 47 |     snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb')
 48 |     assert snap1.shape == (2, 2)
 49 |     assert set(snap1.columns.values) == {'close', 'pb'}
 50 |     assert set(snap1.index.values) == {'600030.SH', '000063.SZ'}
 51 |     
 52 |     # test get_ts
 53 |     ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302)
 54 |     assert ts1.shape == (38, 2)
 55 |     assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'}
 56 |     assert ts1.index.values[-1] == 20170302
 57 | 
 58 | 
 59 | def test_add_field():
 60 |     dv = DataView()
 61 |     dv.load_dataview(folder_path=daily_path)
 62 |     nrows, ncols = dv.data_d.shape
 63 |     n_securities = len(dv.data_d.columns.levels[0])
 64 |     
 65 |     ds = RemoteDataService()
 66 |     ds.init_from_config(data_config)
 67 |     dv.add_field('total_share', ds)
 68 |     assert dv.data_d.shape == (nrows, ncols + 1 * n_securities)
 69 | 
 70 | 
 71 | def test_add_formula_directly():
 72 |     ds = RemoteDataService()
 73 |     ds.init_from_config(data_config)
 74 |     dv = DataView()
 75 |     
 76 |     secs = '600030.SH,000063.SZ,000001.SZ'
 77 |     props = {'start_date': 20160601, 'end_date': 20170601, 'symbol': secs,
 78 |              'fields': 'open,close',
 79 |              'freq': 1}
 80 |     dv.init_from_config(props, data_api=ds)
 81 |     dv.prepare_data()
 82 | 
 83 |     dv.add_formula("myfactor", 'close / open', is_quarterly=False)
 84 |     assert dv.data_d.shape == (281, 36)
 85 | 
 86 | 
 87 | def test_add_formula():
 88 |     dv = DataView()
 89 |     dv.load_dataview(folder_path=daily_path)
 90 |     nrows, ncols = dv.data_d.shape
 91 |     n_securities = len(dv.data_d.columns.levels[0])
 92 |     
 93 |     formula = 'Delta(high - close, 1)'
 94 |     dv.add_formula('myvar1', formula, is_quarterly=False, add_data=True)
 95 |     assert dv.data_d.shape == (nrows, ncols + 1 * n_securities)
 96 |     
 97 |     formula2 = 'myvar1 - close'
 98 |     dv.add_formula('myvar2', formula2, is_quarterly=False, add_data=True)
 99 |     assert dv.data_d.shape == (nrows, ncols + 2 * n_securities)
100 | 
101 | 
102 | def test_dataview_universe():
103 |     ds = RemoteDataService()
104 |     ds.init_from_config(data_config)
105 |     dv = DataView()
106 |     
107 |     props = {'start_date': 20170227, 'end_date': 20170327, 'universe': '000016.SH',
108 |              # 'symbol': 'rb1710.SHF,rb1801.SHF',
109 |              'fields': ('open,high,low,close,vwap,volume,turnover,'
110 |                         + 'sw1,zz2,'
111 |                         + 'roe,net_assets,'
112 |                         + 'total_oper_rev,oper_exp,tot_profit,int_income'
113 |                         ),
114 |              'freq': 1}
115 |     
116 |     dv.init_from_config(props, ds)
117 |     dv.prepare_data()
118 | 
119 |     data_bench = dv.data_benchmark.copy()
120 |     dv.data_benchmark = data_bench
121 |     
122 |     try:
123 |         dv.data_benchmark = data_bench.iloc[3:]
124 |     except ValueError:
125 |         pass
126 |     
127 |     dv.remove_field('roe,net_assets')
128 |     dv.remove_field('close')
129 | 
130 | 
131 | # quarterly
132 | def test_q():
133 |     ds = RemoteDataService()
134 |     ds.init_from_config(data_config)
135 |     dv = DataView()
136 |     
137 |     secs = '600030.SH,000063.SZ,000001.SZ'
138 |     props = {'start_date': 20160609, 'end_date': 20170601, 'symbol': secs,
139 |              'fields': ('open,close,'
140 |                         + 'pb,net_assets,'
141 |                         + 'total_oper_rev,oper_exp,'
142 |                         + 'cash_paid_invest,'
143 |                         + 'capital_stk,'
144 |                         + 'roe'), 'freq': 1}
145 |     
146 |     dv.init_from_config(props, data_api=ds)
147 |     dv.prepare_data()
148 |     dv.save_dataview(folder_path=quarterly_path)
149 | 
150 | 
151 | def test_q_get():
152 |     dv = DataView()
153 |     dv.load_dataview(folder_path=quarterly_path)
154 |     res = dv.get("", 0, 0, 'total_oper_rev')
155 |     assert set(res.index.values) == set(dv.dates[dv.dates >= dv.start_date])
156 | 
157 | 
158 | def test_q_add_field():
159 |     dv = DataView()
160 |     dv.load_dataview(folder_path=quarterly_path)
161 |     nrows, ncols = dv.data_q.shape
162 |     n_securities = len(dv.data_d.columns.levels[0])
163 |     
164 |     ds = RemoteDataService()
165 |     ds.init_from_config(data_config)
166 |     dv.add_field("debttoassets", ds)
167 |     assert dv.data_q.shape == (nrows, ncols + 1 * n_securities)
168 | 
169 | 
170 | def test_q_add_formula():
171 |     dv = DataView()
172 |     folder_path = '../output/prepared/20160609_20170601_freq=1D'
173 |     dv.load_dataview(folder_path=quarterly_path)
174 |     nrows, ncols = dv.data_d.shape
175 |     n_securities = len(dv.data_d.columns.levels[0])
176 |     
177 |     formula = 'total_oper_rev / close'
178 |     dv.add_formula('myvar1', formula, is_quarterly=False, add_data=True)
179 |     df1 = dv.get_ts('myvar1')
180 |     assert not df1.empty
181 |     
182 |     formula2 = 'Delta(oper_exp * myvar1 - open, 3)'
183 |     dv.add_formula('myvar2', formula2, is_quarterly=False, add_data=True)
184 |     df2 = dv.get_ts('myvar2')
185 |     assert not df2.empty
186 | 
187 | 
188 | if __name__ == "__main__":
189 |     g = globals()
190 |     g = {k: v for k, v in g.items() if k.startswith('test_') and callable(v)}
191 | 
192 |     # for test_name, test_func in g.items():
193 |     for test_name in ['test_write', 'test_load', 'test_add_field', 'test_add_formula_directly',
194 |                       'test_add_formula', 'test_dataview_universe',
195 |                       'test_q', 'test_q_get', 'test_q_add_field', 'test_q_add_formula',
196 |                       ]:
197 |         test_func = g[test_name]
198 |         print("\n==========\nTesting {:s}...".format(test_name))
199 |         test_func()
200 |     print("Test Complete.")
201 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/py_expression_eval.py:
--------------------------------------------------------------------------------
  1 | from jaqs.data.py_expression_eval import *
  2 | from jaqs.data.py_expression_eval import Parser as OriginParser
  3 | 
  4 | from jaqs_fxdayu.patch_util import auto_register_patch
  5 | from jaqs_fxdayu.util import fillinf
  6 | from . import signal_function_mod as sfm
  7 | 
  8 | 
  9 | @auto_register_patch(parent_level=1)
 10 | class Parser(OriginParser):
 11 |     def __init__(self):
 12 |         super(Parser, self).__init__()
 13 |         self.functions.update({
 14 |             'Ta': self.ta,
 15 |             'Ts_Argmax': self.ts_argmax,
 16 |             'Ts_Argmin': self.ts_argmin
 17 |         })
 18 | 
 19 |     def evaluate(self, values, ann_dts=None, trade_dts=None, index_member=None):
 20 |         """
 21 |         Evaluate the value of expression using. Data of different frequency will be automatically expanded.
 22 | 
 23 |         Parameters
 24 |         ----------
 25 |         values : dict
 26 |             Key is variable name, value is pd.DataFrame (index is date, column is symbol)
 27 |         ann_dts : pd.DataFrame
 28 |             Announcement dates of financial statements of securities.
 29 |         trade_dts : np.ndarray
 30 |             The date index of result.
 31 |         index_member : pd.DataFrame
 32 | 
 33 |         Returns
 34 |         -------
 35 |         pd.DataFrame
 36 | 
 37 |         """
 38 | 
 39 |         def _fillinf(df):
 40 |             try:
 41 |                 df = fillinf(df)
 42 |             except:
 43 |                 pass
 44 |             return df
 45 | 
 46 |         self.ann_dts = ann_dts
 47 |         self.trade_dts = trade_dts
 48 |         self.index_member = index_member
 49 | 
 50 |         values = values or {}
 51 |         nstack = []
 52 |         L = len(self.tokens)
 53 |         for i in range(0, L):
 54 |             item = self.tokens[i]
 55 |             type_ = item.type_
 56 |             if type_ == TNUMBER:
 57 |                 nstack.append(item.number_)
 58 |             elif type_ == TOP2:
 59 |                 n2 = nstack.pop()
 60 |                 n1 = nstack.pop()
 61 |                 f = self.ops2[item.index_]
 62 |                 nstack.append(_fillinf(f(n1, n2)))
 63 |             elif type_ == TVAR:
 64 |                 if item.index_ in values:
 65 |                     nstack.append(_fillinf(values[item.index_]))
 66 |                 elif item.index_ in self.functions:
 67 |                     nstack.append(self.functions[item.index_])
 68 |                 else:
 69 |                     raise Exception('undefined variable: ' + item.index_)
 70 |             elif type_ == TOP1:
 71 |                 n1 = nstack.pop()
 72 |                 f = self.ops1[item.index_]
 73 |                 nstack.append(_fillinf(f(n1)))
 74 |             elif type_ == TFUNCALL:
 75 |                 n1 = nstack.pop()
 76 |                 f = nstack.pop()
 77 |                 if callable(f):
 78 |                     if type(n1) is list:
 79 |                         nstack.append(_fillinf(f(*n1)))
 80 |                     else:
 81 |                         nstack.append(_fillinf(f(n1)))  # call(f, n1)
 82 |                 else:
 83 |                     raise Exception(f + ' is not a function')
 84 |             else:
 85 |                 raise Exception('invalid Expression')
 86 |         if len(nstack) > 1:
 87 |             raise Exception('invalid Expression (parity)')
 88 |         return _fillinf(nstack[0])
 89 | 
 90 |     # -----------------------------------------------------
 91 |     def reindex_df(self, df):
 92 |         # 修復因子中有缺的索引
 93 |         if isinstance(df, pd.DataFrame):
 94 |             if self.ann_dts is not None:
 95 |                 if len(set(list(df.index)) - set(list(self.ann_dts))) == 0:
 96 |                     return df.reindex(self.ann_dts)
 97 |             if self.trade_dts is not None:
 98 |                 if len(set(list(df.index)) - set(list(self.trade_dts))) == 0:
 99 |                     return df.reindex(self.trade_dts)
100 |         return df
101 | 
102 |     # align functions
103 |     def _align_bivariate(self, df1, df2, force_align=False):
104 |         df1 = self.reindex_df(df1)
105 |         df2 = self.reindex_df(df2)
106 |         if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame):
107 |             len1 = len(df1.index)
108 |             len2 = len(df2.index)
109 |             if (self.ann_dts is not None) and (self.trade_dts is not None):
110 |                 if len1 > len2:
111 |                     df2 = align(df2, self.ann_dts, self.trade_dts)
112 |                 elif len1 < len2:
113 |                     df1 = align(df1, self.ann_dts, self.trade_dts)
114 |                 elif force_align:
115 |                     df1 = align(df1, self.ann_dts, self.trade_dts)
116 |                     df2 = align(df2, self.ann_dts, self.trade_dts)
117 |         return (df1, df2)
118 | 
119 |     def _align_univariate(self, df1):
120 |         df1 = self.reindex_df(df1)
121 |         if isinstance(df1, pd.DataFrame):
122 |             if (self.ann_dts is not None) and (self.trade_dts is not None):
123 |                 len1 = len(df1.index)
124 |                 len2 = len(self.trade_dts)
125 |                 if len1 != len2:
126 |                     return align(df1, self.ann_dts, self.trade_dts)
127 |         return df1
128 | 
129 |     # -----------------------------------------------------
130 |     # functions
131 |     # ta function
132 |     def ta(self,
133 |            ta_method,
134 |            ta_column,
135 |            Open,
136 |            High,
137 |            Low,
138 |            Close,
139 |            Volume,
140 |            *args,
141 |            **kwargs):
142 |         return sfm.ta(ta_method,
143 |                       ta_column,
144 |                       Open,
145 |                       High,
146 |                       Low,
147 |                       Close,
148 |                       Volume,
149 |                       *args,
150 |                       **kwargs)
151 | 
152 |     def ts_argmax(self, *args,
153 |                   **kwargs):
154 |         return sfm.ts_argmax(*args, **kwargs)
155 | 
156 |     def ts_argmin(self, *args,
157 |                   **kwargs):
158 |         return sfm.ts_argmin(*args, **kwargs)
159 | 
160 |     def std_dev(self, df, n):
161 |         return df.apply(lambda x: x.dropna().rolling(n).std()).reindex(df.index)
162 | 
163 |     def ts_sum(self, df, n):
164 |         return df.apply(lambda x: x.dropna().rolling(n).sum()).reindex(df.index)
165 | 
166 |     def count_nans(self, df, n):
167 |         return n - df.rolling(n).count()
168 | 
169 |     def ts_mean(self, df, n):
170 |         return df.apply(lambda x: x.dropna().rolling(n).mean()).reindex(df.index)
171 | 
172 |     def ts_min(self, df, n):
173 |         return df.apply(lambda x: x.dropna().rolling(n).min()).reindex(df.index)
174 | 
175 |     def ts_max(self, df, n):
176 |         return df.apply(lambda x: x.dropna().rolling(n).max()).reindex(df.index)
177 | 
178 |     def ts_kurt(self, df, n):
179 |         return df.apply(lambda x: x.dropna().rolling(n).kurt()).reindex(df.index)
180 | 
181 |     def ts_skew(self, df, n):
182 |         return df.apply(lambda x: x.dropna().rolling(n).skew()).reindex(df.index)
183 | 
184 |     def ts_product(self, df, n):
185 |         return df.apply(lambda x: x.dropna().rolling(n).apply(np.product)).reindex(df.index)
186 | 
187 |     def corr(self, x, y, n):
188 |         (x, y) = self._align_bivariate(x, y)
189 |         return x.rolling(n, min_periods=1).corr(y)
190 | 
191 |     def cov(self, x, y, n):
192 |         (x, y) = self._align_bivariate(x, y)
193 |         return x.rolling(n, min_periods=1).cov(y)
194 | 
195 |     def decay_linear(self, df, n):
196 |         return df.apply(lambda x: x.dropna().rolling(n).apply(self.decay_linear_array)).reindex(df.index)
197 | 
198 |     def decay_exp(self, df, f, n):
199 |         return df.apply(lambda x: x.dropna().rolling(n).apply(self.decay_exp_array, args=[f])).reindex(df.index)
200 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/signaldigger/performance.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from jaqs_fxdayu.patch_util import auto_register_patch
  3 | 
  4 | from jaqs.research.signaldigger.performance import calc_ic_stats_table as __calc_ic_stats_stable
  5 | 
  6 | 
  7 | @auto_register_patch()
  8 | def calc_signal_ic(signal_data, by_group=False):
  9 |     """
 10 |     Computes the Spearman Rank Correlation based Information Coefficient (IC)
 11 |     between signal values and N period forward returns for each period in
 12 |     the signal index.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     signal_data : pd.DataFrame - MultiIndex
 17 |         Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'quantile']
 18 |     by_group : bool
 19 |         If True, compute period wise IC separately for each group.
 20 |     Returns
 21 |     -------
 22 |     ic : pd.DataFrame
 23 |         Spearman Rank correlation between signal and provided forward returns.
 24 | 
 25 |     """
 26 | 
 27 |     def src_ic(df):
 28 |         _ic = scst.spearmanr(df['signal'], df['return'])[0]
 29 |         return _ic
 30 | 
 31 |     signal_data = signal_data.copy()
 32 | 
 33 |     grouper = ['trade_date']
 34 |     if by_group:
 35 |         grouper.append('group')
 36 | 
 37 |     ic = signal_data.groupby(grouper).apply(src_ic)
 38 |     ic = pd.DataFrame(ic)
 39 |     ic.columns = ['ic']
 40 | 
 41 |     return ic
 42 | 
 43 | 
 44 | @auto_register_patch()
 45 | def mean_information_coefficient(ic, by_time=None, by_group=False):
 46 |     """
 47 |     Get the mean information coefficient of specified groups.
 48 |     Answers questions like:
 49 |     What is the mean IC for each month?
 50 |     What is the mean IC for each group for our whole timerange?
 51 |     What is the mean IC for for each group, each week?
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     by_time : str (pd time_rule), optional
 56 |         Time window to use when taking mean IC.
 57 |         See http://pandas.pydata.org/pandas-docs/stable/timeseries.html
 58 |         for available options.
 59 |     by_group : bool
 60 |         If True, compute period wise IC separately for each group.
 61 |     Returns
 62 |     -------
 63 |     ic : pd.DataFrame
 64 |         Mean Spearman Rank correlation between signal and provided
 65 |         forward price movement windows.
 66 |     """
 67 |     grouper = []
 68 |     if by_time is not None:
 69 |         grouper.append(pd.TimeGrouper(by_time))
 70 |     if by_group:
 71 |         grouper.append('group')
 72 | 
 73 |     if len(grouper) == 0:
 74 |         ic = ic.mean()
 75 |     else:
 76 |         if isinstance(ic.index, pd.MultiIndex):
 77 |             ic.index = pd.MultiIndex(levels=[pd.to_datetime(ic.index.levels[0],
 78 |                                                             format="%Y%m%d"),
 79 |                                              ic.index.levels[1]],
 80 |                                      labels=ic.index.labels,
 81 |                                      names=ic.index.names)
 82 |         else:
 83 |             ic.index = pd.to_datetime(ic.index, format="%Y%m%d")
 84 |         ic = (ic.reset_index().set_index('trade_date').groupby(grouper).mean())
 85 | 
 86 |     return ic
 87 | 
 88 | 
 89 | @auto_register_patch()
 90 | def calc_quantile_return_mean_std(signal_data, time_series=False):
 91 |     """
 92 |     Computes mean returns for signal quantiles across
 93 |     provided forward returns columns.
 94 | 
 95 |     Parameters
 96 |     ----------
 97 |     signal_data : pd.DataFrame - MultiIndex
 98 |         Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'quantile']
 99 |     Returns
100 |     -------
101 |     res : pd.DataFrame of dict
102 | 
103 |     """
104 |     signal_data = signal_data.copy()
105 |     grouper = ['quantile']
106 |     if time_series:
107 |         grouper.append('trade_date')
108 | 
109 |     group_mean_std = signal_data.groupby(grouper)['return'].agg(['mean', 'std', 'count'])
110 |     # TODO: why?
111 |     '''
112 |     std_error_ret = group_mean_std.loc[:, 'std'].copy() / np.sqrt(group_mean_std.loc[:, 'count'].copy())
113 |     '''
114 |     indexes = []
115 |     if time_series:
116 |         quantile_daily_mean_std_dic = dict()
117 |         quantiles = np.unique(group_mean_std.index.get_level_values(level='quantile'))
118 |         for q in quantiles:  # loop for different quantiles
119 |             df_q = group_mean_std.loc[pd.IndexSlice[q, :], :]  # bug
120 |             df_q.index = df_q.index.droplevel(level='quantile')
121 |             indexes.append(pd.Series(df_q.index))
122 |             quantile_daily_mean_std_dic[q] = df_q
123 |         new_index = sorted(pd.concat(indexes).unique())
124 |         for q in quantiles:
125 |             quantile_daily_mean_std_dic[q] = quantile_daily_mean_std_dic[q].reindex(new_index).fillna(0)
126 |         return quantile_daily_mean_std_dic
127 |     else:
128 |         return group_mean_std
129 | 
130 | 
131 | @auto_register_patch()
132 | def daily_ret_to_cum(df_ret, axis=0):
133 |     cum = df_ret.add(1.0).cumprod(axis=axis)
134 |     return cum
135 | 
136 | 
137 | @auto_register_patch()
138 | def daily_ret_to_ret(daily_ret, period=5, axis=0):
139 |     ret = daily_ret.add(1).rolling(period,axis=axis).apply(np.product).sub(1)
140 |     return ret
141 | 
142 | 
143 | @auto_register_patch()
144 | def calc_ic_stats_table(ic_data):
145 |     ic_data = ic_data.dropna()
146 |     return __calc_ic_stats_stable(ic_data)
147 | 
148 | 
149 | @auto_register_patch()
150 | def price2ret(prices, period=5, axis=None, compound=True):
151 |     """
152 | 
153 |     Parameters
154 |     ----------
155 |     prices : pd.DataFrame or pd.Series
156 |         Index is datetime.
157 |     period : int
158 |     axis : {0, 1, None}
159 | 
160 |     Returns
161 |     -------
162 |     ret : pd.DataFrame or pd.Series
163 | 
164 |     """
165 |     if axis is None:
166 |         kwargs = dict()
167 |     else:
168 |         kwargs = {'axis': axis}
169 | 
170 |     if compound:
171 |         ret = prices.pct_change(periods=period, **kwargs)
172 |     else:
173 |         ret = prices.diff(periods=period, **kwargs) / prices.iloc[0]
174 |     return ret
175 | 
176 | 
177 | @auto_register_patch()
178 | def period_wise_ret_to_cum(ret, period, compound=True):
179 |     """
180 |     Calculate cumulative returns from N-periods returns, no compounding.
181 |     When 'period' N is greater than 1 the cumulative returns plot is computed
182 |     building and averaging the cumulative returns of N interleaved portfolios
183 |     (started at subsequent periods 1,2,3,...,N) each one rebalancing every N
184 |     periods.
185 | 
186 |     Parameters
187 |     ----------
188 |     ret: pd.Series or pd.DataFrame
189 |         pd.Series containing N-periods returns
190 |     period: integer
191 |         Period for which the returns are computed
192 |     compound : bool
193 |         Whether calculate using compound return.
194 | 
195 |     Returns
196 |     -------
197 |     pd.Series
198 |         Cumulative returns series starting from zero.
199 | 
200 |     """
201 |     if isinstance(ret, pd.DataFrame):
202 |         # deal with each column recursively
203 |         return ret.apply(period_wise_ret_to_cum, axis=0, args=(period,))
204 |     elif isinstance(ret, pd.Series):
205 |         if period == 1:
206 |             return ret.add(1).cumprod().sub(1.0)
207 | 
208 |         # invest in each portfolio separately
209 | 
210 |         periods_index = np.arange(len(ret.index)) // period
211 |         period_portfolios = ret.groupby(by=periods_index, axis=0).apply(lambda ser: pd.DataFrame(np.diag(ser))).fillna(0)
212 |         period_portfolios.index = ret.index
213 | 
214 | 
215 |         # cumulate returns separately
216 |         if compound:
217 |             cum_returns = period_portfolios.add(1).cumprod().sub(1.0)
218 |         else:
219 |             cum_returns = period_portfolios.cumsum()
220 | 
221 |         # since capital of all portfolios are the same, return in all equals average return
222 |         res = cum_returns.mean(axis=1)
223 | 
224 |         return res
225 |     else:
226 |         raise NotImplementedError("ret must be Series or DataFrame.")
227 | 
228 | 
229 | _calc_signal_ic = calc_signal_ic
230 | _mean_information_coefficient = mean_information_coefficient
231 | _calc_ic_stats_table = calc_ic_stats_table
232 | _calc_quantile_return_mean_std = calc_quantile_return_mean_std
233 | _daily_ret_to_cum = daily_ret_to_cum
234 | _daily_ret_to_ret = daily_ret_to_ret
235 | _price2ret = price2ret
236 | _period_wise_ret_to_cum = period_wise_ret_to_cum
237 | 
238 | from jaqs.research.signaldigger.performance import *
239 | 
240 | calc_signal_ic = _calc_signal_ic
241 | mean_information_coefficient = _mean_information_coefficient
242 | calc_quantile_return_mean_std = _calc_quantile_return_mean_std
243 | daily_ret_to_cum = _daily_ret_to_cum
244 | daily_ret_to_ret = _daily_ret_to_ret
245 | price2ret = _price2ret
246 | calc_ic_stats_table = _calc_ic_stats_table
247 | period_wise_ret_to_cum = _period_wise_ret_to_cum
248 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/signaldigger/process.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | # 数据处理
  3 | 
  4 | import jaqs.util as jutil
  5 | import pandas as pd
  6 | import numpy as np
  7 | 
  8 | 
  9 | def _mask_df(df, mask):
 10 |     mask = mask.astype(bool)
 11 |     df[mask] = np.nan
 12 |     return df
 13 | 
 14 | 
 15 | def _mask_non_index_member(df, index_member=None):
 16 |     if index_member is not None:
 17 |         index_member = index_member.astype(bool)
 18 |         return _mask_df(df, ~index_member)
 19 |     return df
 20 | 
 21 | 
 22 | # 横截面标准化 - 对Dataframe数据
 23 | def standardize(factor_df, index_member=None):
 24 |     """
 25 |     对因子值做z-score标准化-算样本方差选择自由度为n-1
 26 |     :param index_member:
 27 |     :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。
 28 |                       形如:
 29 |                                   　AAPL	　　　     BA	　　　CMG	　　   DAL	      LULU	　　
 30 |                         date
 31 |                         2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832
 32 |                         2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890
 33 |                         2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691
 34 |                         2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805
 35 |                         2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902
 36 |     :return:z-score标准化后的因子值(pandas.Dataframe类型),index为datetime, colunms为股票代码。
 37 |     """
 38 | 
 39 |     factor_df = jutil.fillinf(factor_df)
 40 |     factor_df = _mask_non_index_member(factor_df, index_member)
 41 |     return factor_df.sub(factor_df.mean(axis=1), axis=0).div(factor_df.std(axis=1), axis=0)
 42 | 
 43 | 
 44 | # 横截面去极值 - 对Dataframe数据
 45 | def winsorize(factor_df, alpha=0.05, index_member=None):
 46 |     """
 47 |     对因子值做去极值操作
 48 |     :param index_member:
 49 |     :param alpha: 极值范围
 50 |     :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。
 51 |                       形如:
 52 |                                   　AAPL	　　　     BA	　　　CMG	　　   DAL	      LULU	　　
 53 |                         date
 54 |                         2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832
 55 |                         2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890
 56 |                         2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691
 57 |                         2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805
 58 |                         2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902
 59 |     :return:去极值后的因子值(pandas.Dataframe类型),index为datetime, colunms为股票代码。
 60 |     """
 61 | 
 62 |     def winsorize_series(se):
 63 |         q = se.quantile([alpha / 2, 1 - alpha / 2])
 64 |         se[se < q.iloc[0]] = q.iloc[0]
 65 |         se[se > q.iloc[1]] = q.iloc[1]
 66 |         return se
 67 | 
 68 |     factor_df = jutil.fillinf(factor_df)
 69 |     factor_df = _mask_non_index_member(factor_df, index_member)
 70 |     return factor_df.apply(lambda x: winsorize_series(x), axis=1)
 71 | 
 72 | 
 73 | # 横截面去极值 - 对Dataframe数据
 74 | def mad(factor_df, index_member=None):
 75 |     """
 76 |     对因子值做去极值操作
 77 |     :param index_member:
 78 |     :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。
 79 |                       形如:
 80 |                                   　AAPL	　　　     BA	　　　CMG	　　   DAL	      LULU	　　
 81 |                         date
 82 |                         2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832
 83 |                         2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890
 84 |                         2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691
 85 |                         2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805
 86 |                         2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902
 87 |     :return:去极值后的因子值(pandas.Dataframe类型),index为datetime, colunms为股票代码。
 88 |     """
 89 | 
 90 |     def _mad(series):
 91 |         if series.dropna().size==0:
 92 |             return series
 93 |         median = series.median()
 94 |         tmp = (series - median).abs().median()
 95 |         return series.clip(median - 5 * tmp, median + 5 * tmp)
 96 | 
 97 |     factor_df = jutil.fillinf(factor_df)
 98 |     factor_df = _mask_non_index_member(factor_df, index_member)
 99 |     return factor_df.apply(lambda x: _mad(x), axis=1)
100 | 
101 | 
102 | # 横截面排序并归一化
103 | def rank_standardize(factor_df, index_member=None):
104 |     """
105 |     输入因子值, 将因子用排序分值重构，并处理到0-1之间(默认为升序——因子越大 排序分值越大(越好)
106 |         :param index_member:
107 |         :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。
108 |                       形如:
109 |                                   　AAPL	　　　     BA	　　　CMG	　　   DAL	      LULU	　　
110 |                         date
111 |                         2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832
112 |                         2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890
113 |                         2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691
114 |                         2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805
115 |                         2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902
116 | 
117 |     :return: 排序重构后的因子值。 取值范围在0-1之间
118 |     """
119 |     factor_df = jutil.fillinf(factor_df)
120 |     factor_df = _mask_non_index_member(factor_df, index_member)
121 |     return jutil.rank_with_mask(factor_df, axis=1, normalize=True)
122 | 
123 | 
124 | # 将因子值加一个极小的扰动项,用于对quantile做区分
125 | def get_disturbed_factor(factor_df):
126 |     """
127 |     将因子值加一个极小的扰动项,用于对quantile区分
128 |     :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。
129 |                       形如:
130 |                                   　AAPL	　　　     BA	　　　CMG	　　   DAL	      LULU	　　
131 |                         date
132 |                         2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832
133 |                         2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890
134 |                         2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691
135 |                         2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805
136 |                         2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902
137 | 
138 |     :return: 重构后的因子值,每个值加了一个极小的扰动项。
139 |     """
140 |     return factor_df + np.random.random(factor_df.shape) / 1000000000
141 | 
142 | 
143 | # 行业、市值中性化 - 对Dataframe数据
144 | def neutralize(factor_df,
145 |                group,
146 |                float_mv=None,
147 |                index_member=None):
148 |     """
149 |     对因子做行业、市值中性化
150 |     :param index_member:
151 |     :param group:　行业分类(pandas.Dataframe类型),index为datetime, colunms为股票代码
152 |     :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。
153 |                       形如:
154 |                                   　AAPL	　　　     BA	　　　CMG	　　   DAL	      LULU	　　
155 |                         date
156 |                         2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832
157 |                         2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890
158 |                         2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691
159 |                         2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805
160 |                         2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902
161 |     :param float_mv: 流通市值因子(pandas.Dataframe类型),index为datetime, colunms为股票代码．为空则不进行市值中性化
162 |     :return: 中性化后的因子值(pandas.Dataframe类型),index为datetime, colunms为股票代码。
163 |     """
164 |     def drop_nan(s):
165 |         return s[s != "nan"]
166 | 
167 |     def _ols_by_numpy(x, y):
168 |         m = np.linalg.lstsq(x, y)[0]
169 |         resid = y - (x@m)
170 |         return resid
171 | 
172 |     def _generate_cross_sectional_residual(data):
173 |         for _, X in data.groupby(level=0):
174 |             signal = X.pop("signal")
175 |             X = pd.concat([X, pd.get_dummies(X.pop("industry"))], axis=1)
176 |             signal = pd.Series(_ols_by_numpy(X.values, signal), index=signal.index, name=signal.name)
177 |             yield signal
178 | 
179 |     data = []
180 | 
181 |     # 用于恢复原先的索引和列
182 |     origin_factor_columns = factor_df.columns
183 |     origin_factor_index = factor_df.index
184 | 
185 |     factor_df = jutil.fillinf(factor_df)  # 调整非法值
186 |     factor_df = _mask_non_index_member(factor_df, index_member)  # 剔除非指数成份股
187 |     factor_df = factor_df.dropna(how="all").stack().rename("signal")  # 删除全为空的截面
188 |     data.append(factor_df)
189 | 
190 |     # 获取对数流动市值，并去极值、标准化。市值类因子不需进行这一步
191 |     if float_mv is not None:
192 |         float_mv = standardize(mad(np.log(float_mv), index_member=index_member), index_member).stack().rename("style")
193 |         data.append(float_mv)
194 | 
195 |     # 行业
196 |     industry_standard = drop_nan(group.stack()).rename("industry")
197 |     data.append(industry_standard)
198 | 
199 |     data = pd.concat(data,axis=1).dropna()
200 |     residuals = pd.concat(_generate_cross_sectional_residual(data)).unstack()
201 | 
202 |     # 恢复在中性化过程中剔除的行和列
203 |     residuals.reindex(index=origin_factor_index,columns=origin_factor_columns)
204 |     return residuals.reindex(index=origin_factor_index,columns=origin_factor_columns)
205 | 


--------------------------------------------------------------------------------
/tests/test_py_expression_eval.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | from jaqs_fxdayu import patch_all
  6 | 
  7 | patch_all()
  8 | import pandas as pd
  9 | import numpy as np
 10 | 
 11 | try:
 12 |     import pytest
 13 | except ImportError as e:
 14 |     if __name__ == "__main__":
 15 |         pass
 16 |     else:
 17 |         raise e
 18 | from jaqs.data import RemoteDataService
 19 | from jaqs.data import Parser
 20 | from tests.data_config import data_config
 21 | 
 22 | 
 23 | def test_group_rank():
 24 |     shape = (500, 3000)
 25 |     df_val = pd.DataFrame(np.random.rand(*shape))
 26 |     df_group = pd.DataFrame(np.random.randint(1, 5, size=shape[0] * shape[1]).reshape(*shape))
 27 |     expr = parser.parse('GroupRank(val, mygroup)')
 28 |     res = parser.evaluate({'val': df_val, 'mygroup': df_group})
 29 | 
 30 | 
 31 | def test_group_quantile():
 32 |     shape = (500, 3000)
 33 |     df_val = pd.DataFrame(np.random.rand(*shape))
 34 |     df_group = pd.DataFrame(np.random.randint(1, 5, size=shape[0] * shape[1]).reshape(*shape))
 35 |     expr = parser.parse('GroupQuantile(val, mygroup, 23)')
 36 |     res = parser.evaluate({'val': df_val, 'mygroup': df_group})
 37 |     n = 100
 38 |     df_val = pd.DataFrame(np.arange(n).reshape(2, -1))
 39 |     df_group = pd.DataFrame(np.array([1] * 25 + [2] * 25 + [2] * 20 + [3] * 20 + [9] * 10).reshape(2, -1))
 40 |     expr = parser.parse('GroupQuantile(val, mygroup, 5)')
 41 |     res = parser.evaluate({'val': df_val, 'mygroup': df_group})
 42 |     n1 = 5
 43 |     n2 = 4
 44 |     n3 = 2
 45 |     res_correct = np.array([0.] * n1 + [1.] * n1 + [2.] * n1 + [3.] * n1 + [4.] * n1
 46 |                            + [0.] * n1 + [1.] * n1 + [2.] * n1 + [3.] * n1 + [4.] * n1
 47 |                            + [0.] * n2 + [1.] * n2 + [2.] * n2 + [3.] * n2 + [4.] * n2
 48 |                            + [0.] * n2 + [1.] * n2 + [2.] * n2 + [3.] * n2 + [4.] * n2
 49 |                            + [0.] * n3 + [1.] * n3 + [2.] * n3 + [3.] * n3 + [4.] * n3).reshape(2, -1) + 1.0
 50 |     assert np.abs(res.values - res_correct).flatten().sum() < 1e-6
 51 | 
 52 | 
 53 | def test_quantile():
 54 |     val = pd.DataFrame(np.random.rand(500, 3000))
 55 |     expr = parser.parse('Quantile(val, 12)')
 56 |     res = parser.evaluate({'val': val})
 57 |     assert np.nanmean(val[res == 1].values.flatten()) < 0.11
 58 | 
 59 |     val = pd.DataFrame(np.random.rand(1000, 100))
 60 |     expr = parser.parse('Ts_Quantile(val, 500, 12)')
 61 |     res = parser.evaluate({'val': val})
 62 |     assert np.nanmean(val[res == 1].values.flatten()) < 0.11
 63 | 
 64 | 
 65 | def test_ttm():
 66 |     from jaqs.data import DataView
 67 | 
 68 |     ds = RemoteDataService()
 69 |     ds.init_from_config(data_config)
 70 |     dv = DataView()
 71 |     props = {'start_date': 20120101, 'end_date': 20170601, 'universe': '000016.SH',
 72 |              'fields': ('net_profit_incl_min_int_inc'), 'freq': 1}
 73 |     dv.init_from_config(props, ds)
 74 |     dv.prepare_data()
 75 | 
 76 |     dv.add_formula('single', 'TTM(net_profit_incl_min_int_inc)', is_quarterly=True)
 77 | 
 78 | 
 79 | def test_logical_and_or():
 80 |     parser.parse('open + 3 && 1')
 81 |     res = parser.evaluate({'open': dfx})
 82 |     assert np.all(res.values.flatten())
 83 | 
 84 |     parser.parse('open + 3 && 0.0')
 85 |     res = parser.evaluate({'open': dfx})
 86 |     assert not np.all(res.values.flatten())
 87 | 
 88 | 
 89 | def test_plus_minus_mul_div():
 90 |     expression = parser.parse('close * open + close / open - close^3 % open')
 91 |     res = parser.evaluate({'close': dfy, 'open': dfx})
 92 | 
 93 | 
 94 | def test_eq_neq():
 95 |     expression = parser.parse('(close == open) && (close != open) && (!close)')
 96 |     res = parser.evaluate({'close': dfy, 'open': dfx})
 97 | 
 98 |     expression = parser.parse('(close > open)')
 99 |     res = parser.evaluate({'close': dfy, 'open': dfx})
100 | 
101 |     expression = parser.parse('(close >= open)')
102 |     res = parser.evaluate({'close': dfy, 'open': dfx})
103 | 
104 |     expression = parser.parse('(close < open)')
105 |     res = parser.evaluate({'close': dfy, 'open': dfx})
106 | 
107 |     expression = parser.parse('(close <= open)')
108 |     res = parser.evaluate({'close': dfy, 'open': dfx})
109 | 
110 | 
111 | def test_cutoff_standardize():
112 |     expression = parser.parse('Standardize(Cutoff(close, 2.8))')
113 |     res = parser.evaluate({'close': dfy, 'open': dfx})
114 | 
115 | 
116 | def test_moving_avg():
117 |     expression = parser.parse('Ewma(close, 5)')
118 |     res = parser.evaluate({'close': dfy})
119 |     expression = parser.parse('Ts_Mean(close, 5)')
120 |     res = parser.evaluate({'close': dfy})
121 |     expression = parser.parse('Ts_Min(close, 5)')
122 |     res = parser.evaluate({'close': dfy})
123 |     expression = parser.parse('Ts_Max(close, 5)')
124 |     res = parser.evaluate({'close': dfy})
125 | 
126 | 
127 | def test_cov_corr():
128 |     expression = parser.parse('Correlation(close, open, 5)')
129 |     res = parser.evaluate({'close': dfy, 'open': dfx})
130 |     expression = parser.parse('Covariance(close, open, 5)')
131 |     res = parser.evaluate({'close': dfy, 'open': dfx})
132 | 
133 | 
134 | def test_return_delay_delta():
135 |     expression = parser.parse('Delta(close, 5)')
136 |     res = parser.evaluate({'close': dfy})
137 |     expression = parser.parse('Delay(close, 5)')
138 |     res = parser.evaluate({'close': dfy})
139 |     expression = parser.parse('Return(close, 5)')
140 |     res = parser.evaluate({'close': dfy})
141 | 
142 | 
143 | def test_skew():
144 |     expression = parser.parse('Ts_Skewness(close,4)')
145 |     res = parser.evaluate({'close': dfy})
146 |     expression = parser.parse('Ts_Kurtosis(close,4)')
147 |     res = parser.evaluate({'close': dfy})
148 | 
149 | 
150 | def test_variables():
151 |     expression = parser.parse('Ts_Skewness(open,4)+close / what')
152 |     res = set(expression.variables()) == {'open', 'close', 'what'}
153 | 
154 | 
155 | def test_product():
156 |     # parser.set_capital('lower')
157 |     expression = parser.parse('Ts_Product(open,2)')
158 |     res = parser.evaluate({'close': dfy, 'open': dfx})
159 |     # parser.set_capital('upper')
160 | 
161 | 
162 | def test_rank():
163 |     expression = parser.parse('Rank(close)')
164 |     res = parser.evaluate({'close': dfy, 'open': dfx})
165 | 
166 |     expression = parser.parse('Ts_Rank(close, 8)')
167 |     res = parser.evaluate({'close': dfy, 'open': dfx})
168 | 
169 | 
170 | def test_tail():
171 |     expression = parser.parse('Tail(close/open,0.99,1.01,1.0)')
172 |     res = parser.evaluate({'close': dfy, 'open': dfx})
173 | 
174 | 
175 | def test_step():
176 |     expression = parser.parse('Step(close,10)')
177 |     res = parser.evaluate({'close': dfy, 'open': dfx})
178 | 
179 | 
180 | def test_decay_linear():
181 |     expression = parser.parse('Decay_linear(open,2)')
182 |     res = parser.evaluate({'close': dfy, 'open': dfx})
183 | 
184 | 
185 | def test_decay_exp():
186 |     expression = parser.parse('Decay_exp(open, 0.5, 2)')
187 |     res = parser.evaluate({'close': dfy, 'open': dfx})
188 | 
189 | 
190 | def test_signed_power():
191 |     expression = parser.parse('SignedPower(close-open, 2)')
192 |     res = parser.evaluate({'close': dfx, 'open': dfy})
193 | 
194 | 
195 | def test_ewma():
196 |     expr = parser.parse('Ewma(close, 3)')
197 |     res = parser.evaluate({'close': dfx})
198 |     assert abs(res.loc[20170801, '000001.SH'] - 3292.6) < 1e-1
199 | 
200 | 
201 | def test_if():
202 |     expr = parser.parse('If(close > 20, 3, -3)')
203 |     res = parser.evaluate({'close': dfx})
204 |     assert res.iloc[0, 0] == 3.
205 |     assert res.iloc[0, 2] == -3.
206 | 
207 | 
208 | '''
209 | def test_group_apply():
210 |     import numpy as np
211 |     np.random.seed(369)
212 |     
213 |     n = 20
214 |     
215 |     dic = {c: np.random.rand(n) for c in 'abcdefghijklmnopqrstuvwxyz'[:n]}
216 |     df_value = pd.DataFrame(index=range(n), data=dic)
217 |     
218 |     r = np.random.randint(0, 5, n * df_value.shape[0]).reshape(df_value.shape[0], n)
219 |     cols = df_value.columns.values.copy()
220 |     np.random.shuffle(cols)
221 |     
222 |     df_group = pd.DataFrame(index=df_value.index, columns=cols, data=r)
223 | 
224 |     parser = Parser()
225 |     expr = parser.parse('GroupApply(Standardize, GroupApply(Cutoff, close, 2.8))')
226 |     res = parser.evaluate({'close': df_value}, df_group=df_group)
227 |     
228 |     assert abs(res.iloc[3, 6] - (-1.53432)) < 1e-5
229 |     assert abs(res.iloc[19, 18] - (-1.17779)) < 1e-5
230 | 
231 | 
232 | '''
233 | 
234 | 
235 | def test_calc_return():
236 |     expr = parser.parse('Return(close, 2, 0)')
237 |     res = parser.evaluate({'close': dfx})
238 |     assert abs(res.loc[20170808, '000001.SH'] - 0.006067) < 1e-6
239 | 
240 |     expr = parser.parse('Return(close, 2, 1)')
241 |     res = parser.evaluate({'close': dfx})
242 | 
243 | 
244 | @pytest.fixture(autouse=True)
245 | def my_globals(request):
246 |     ds = RemoteDataService()
247 |     ds.init_from_config(data_config)
248 | 
249 |     df, msg = ds.daily("000001.SH, 600030.SH, 000300.SH", start_date=20170801, end_date=20170820,
250 |                        fields="open,high,low,close,vwap,preclose")
251 | 
252 |     multi_index_names = ['trade_date', 'symbol']
253 |     df_multi = df.set_index(multi_index_names, drop=False)
254 |     df_multi.sort_index(axis=0, level=multi_index_names, inplace=True)
255 | 
256 |     dfx = df_multi.loc[pd.IndexSlice[:, :], pd.IndexSlice['close']].unstack()
257 |     dfy = df_multi.loc[pd.IndexSlice[:, :], pd.IndexSlice['open']].unstack()
258 | 
259 |     parser = Parser()
260 |     request.function.__globals__.update({'parser': parser, 'dfx': dfx, 'dfy': dfy})
261 | 
262 | 
263 | if __name__ == "__main__":
264 |     pytest.main([__file__])
265 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/signaldigger/signal_creator.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | 
  3 | from .analysis import compute_downside_returns, compute_upside_returns
  4 | from . import performance as pfm
  5 | import pandas as pd
  6 | import numpy as np
  7 | import jaqs.util as jutil
  8 | 
  9 | 
 10 | def _process_filter(_filter):
 11 |     if _filter is not None:
 12 |         _filter = jutil.fillinf(_filter)
 13 |         _filter = _filter.astype(int).fillna(0).astype(bool)
 14 |     return _filter
 15 | 
 16 | 
 17 | def _assert(standard, tmp):
 18 |     if tmp is not None:
 19 |         assert np.all(standard.index == tmp.index)
 20 |         assert np.all(standard.columns == tmp.columns)
 21 | 
 22 | 
 23 | def _get_df(index,columns,value):
 24 |     return pd.DataFrame(index=index, columns=columns, data=value)
 25 | 
 26 | 
 27 | class SignalCreator(object):
 28 |     def __init__(self,
 29 |                  price=None, daily_ret=None,
 30 |                  benchmark_price=None, daily_benchmark_ret=None,
 31 |                  high=None, low=None,
 32 |                  group=None,
 33 |                  period=5, n_quantiles=5,
 34 |                  mask=None,
 35 |                  can_enter=None,
 36 |                  can_exit=None,
 37 |                  forward=True,
 38 |                  commission=0.0008):
 39 | 
 40 |         if price is None and daily_ret is None:
 41 |             raise ValueError("One of price / daily_ret must be provided.")
 42 |         if price is not None and daily_ret is not None:
 43 |             raise ValueError("Only one of price / daily_ret should be provided.")
 44 |         if benchmark_price is not None and daily_benchmark_ret is not None:
 45 |             raise ValueError("Only one of benchmark_price / daily_benchmark_ret should be provided.")
 46 |         if not (n_quantiles > 0 and isinstance(n_quantiles, int)):
 47 |             raise ValueError("n_quantiles must be a positive integer. Input is: {}".format(n_quantiles))
 48 | 
 49 |         self.price = price
 50 |         self.daily_ret = daily_ret
 51 |         self.high = high
 52 |         self.low = low
 53 |         self.group = group
 54 |         self.n_quantiles = n_quantiles
 55 |         self.mask = _process_filter(mask)
 56 |         self.can_enter = _process_filter(can_enter)
 57 |         self.can_exit = _process_filter(can_exit)
 58 | 
 59 |         self.period = period
 60 |         self.benchmark_price = benchmark_price
 61 |         self.daily_benchmark_ret = daily_benchmark_ret
 62 |         self.forward = forward
 63 |         self.commission = commission
 64 | 
 65 |         self.benchmark_ret = None
 66 |         self.signal_data = None
 67 |         self.signal_ret = None
 68 | 
 69 |     def _judge(self, signal):
 70 |         # 生成filter的dataframe
 71 |         self.mask = _get_df(signal.index, signal.columns, False) if self.mask is None else self.mask
 72 |         self.can_enter = _get_df(signal.index, signal.columns, True) if self.can_enter is None else self.can_enter
 73 |         self.can_exit = _get_df(signal.index, signal.columns, True) if self.can_exit is None else self.can_exit
 74 | 
 75 |         # df shape确认
 76 |         _assert(signal, self.mask)
 77 |         _assert(signal, self.can_enter)
 78 |         _assert(signal, self.can_exit)
 79 |         _assert(signal, self.group)
 80 | 
 81 |         if self.signal_ret is not None:
 82 |             for ret_type in self.signal_ret.keys():
 83 |                 _assert(signal, self.signal_ret[ret_type])
 84 |         else:
 85 |             _assert(signal, self.price)
 86 |             _assert(signal, self.daily_ret)
 87 |             _assert(signal, self.high)
 88 |             _assert(signal, self.low)
 89 | 
 90 |     def _cal_ret(self):
 91 |         if self.signal_ret is not None:
 92 |             return
 93 |         else:
 94 |             # 计算benchmark收益
 95 |             if self.benchmark_price is not None:
 96 |                 self.benchmark_ret = pfm.price2ret(self.benchmark_price, self.period, axis=0, compound=True)
 97 |             elif self.daily_benchmark_ret is not None:
 98 |                 self.benchmark_ret = pfm.daily_ret_to_ret(self.daily_benchmark_ret, self.period)
 99 | 
100 |             # 计算区间持仓收益
101 |             isRealPrice = False
102 |             if self.daily_ret is not None:
103 |                 self.daily_ret = jutil.fillinf(self.daily_ret).fillna(0)
104 |                 self.price = pfm.daily_ret_to_cum(self.daily_ret)
105 |             else:
106 |                 # 有price
107 |                 isRealPrice = True
108 |                 self.price = jutil.fillinf(self.price)
109 | 
110 |             self.can_enter = np.logical_and(self.price != np.NaN, self.can_enter)
111 |             df_ret = pfm.price2ret(self.price, period=self.period, axis=0, compound=True)
112 |             price_can_exit = self.price.copy()
113 |             price_can_exit[~self.can_exit] = np.NaN
114 |             price_can_exit = price_can_exit.fillna(method="bfill")
115 |             ret_can_exit = pfm.price2ret(price_can_exit, period=self.period, axis=0, compound=True)
116 |             df_ret[~self.can_exit] = ret_can_exit[~self.can_exit]
117 | 
118 |             if self.benchmark_ret is not None:
119 |                 # 计算持有期相对收益
120 |                 self.benchmark_ret = self.benchmark_ret.reindex(df_ret.index)
121 |                 residual_ret = df_ret.sub(self.benchmark_ret.values.flatten(), axis=0)
122 |             else:
123 |                 residual_ret = df_ret
124 |             residual_ret = jutil.fillinf(residual_ret)
125 |             residual_ret -= self.commission
126 | 
127 |             # 计算潜在上涨空间和潜在下跌空间
128 |             if self.high is not None and isRealPrice:
129 |                 self.high = jutil.fillinf(self.high)
130 |             else:
131 |                 self.high = self.price
132 |             upside_ret = compute_upside_returns(self.price, self.high, self.can_exit, self.period, compound=True)
133 |             upside_ret = jutil.fillinf(upside_ret)
134 |             upside_ret -= self.commission
135 | 
136 |             if self.low is not None and isRealPrice:
137 |                 self.low = jutil.fillinf(self.low)
138 |             else:
139 |                 self.low = self.price
140 |             downside_ret = compute_downside_returns(self.price, self.low, self.can_exit, self.period, compound=True)
141 |             downside_ret = jutil.fillinf(downside_ret)
142 |             downside_ret -= self.commission
143 | 
144 |             self.signal_ret = {
145 |                 "return": residual_ret,
146 |                 "upside_ret": upside_ret,
147 |                 "downside_ret": downside_ret
148 |             }
149 |             if self.forward:
150 |                 for ret_type in self.signal_ret.keys():
151 |                     if self.signal_ret[ret_type] is not None:
152 |                         # point-in-time signal and forward return
153 |                         self.signal_ret[ret_type] = self.signal_ret[ret_type].shift(-self.period)
154 |             else:
155 |                 self.can_enter = self.can_enter.shift(self.period)
156 |                 self.mask = self.mask.shift(self.period)
157 | 
158 |             # 处理mask
159 |             self.mask = np.logical_or(self.mask.fillna(True), ~(self.can_enter.fillna(False)))
160 | 
161 |     def get_signal_data(self, signal):
162 |         """
163 |         Returns
164 |         -------
165 |         res : pd.DataFrame
166 |             Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret(N)','downside_ret(N)','quantile']
167 |         """
168 |         self._judge(signal)  # 判断signal与其他关键参数是否格式一致
169 |         self._cal_ret()  # 计算信号收益
170 |         signal = jutil.fillinf(signal)
171 |         signal = signal.shift(1)  # avoid forward-looking bias
172 | 
173 |         # forward or not
174 |         if not self.forward:
175 |             signal = signal.shift(self.period)
176 | 
177 |         # 处理mask
178 |         mask = np.logical_or(self.mask, signal.isnull())
179 | 
180 |         # calculate quantile
181 |         signal_masked = signal.copy()
182 |         signal_masked = signal_masked[~mask]
183 |         if self.n_quantiles == 1:
184 |             df_quantile = signal_masked.copy()
185 |             df_quantile.loc[:, :] = 1.0
186 |         else:
187 |             df_quantile = jutil.to_quantile(signal_masked, n_quantiles=self.n_quantiles)
188 | 
189 |         # ----------------------------------------------------------------------
190 |         # stack
191 |         def stack_td_symbol(df):
192 |             df = pd.DataFrame(df.stack(dropna=False))  # do not dropna
193 |             df.index.names = ['trade_date', 'symbol']
194 |             df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True)
195 |             return df
196 | 
197 |         # ----------------------------------------------------------------------
198 |         # concat signal value
199 |         res = stack_td_symbol(signal)  # 信号
200 |         res.columns = ['signal']
201 | 
202 |         for ret_type in self.signal_ret.keys():
203 |             if self.signal_ret[ret_type] is not None:
204 |                 res[ret_type] = stack_td_symbol(self.signal_ret[ret_type]).fillna(0)  # 收益
205 | 
206 |         if self.group is not None:
207 |             res["group"] = stack_td_symbol(self.group)
208 | 
209 |         res['quantile'] = stack_td_symbol(df_quantile)  # quantile
210 |         mask = stack_td_symbol(mask)
211 |         res = res.loc[~(mask.iloc[:, 0]), :]
212 | 
213 |         if len(res) > 0:
214 |             print("Nan Data Count (should be zero) : {:d};  " \
215 |                   "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(),
216 |                                                                  len(res) * 100. / signal.size))
217 |         else:
218 |             print("No signal available.")
219 |         res = res.astype({'signal': float, 'return': float, 'quantile': int})
220 |         return res
221 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/data/dataapi/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/signaldigger/optimizer.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | # 参数优化器
  3 | 
  4 | 
  5 | from itertools import product
  6 | from .analysis import analysis
  7 | from .signal_creator import SignalCreator
  8 | import warnings
  9 | import pandas as pd
 10 | 
 11 | target_types = {
 12 |     'factor': {
 13 |         "ic": [
 14 |             "return_ic",
 15 |             "upside_ret_ic",
 16 |             "downside_ret_ic"
 17 |         ],
 18 |         "ret": [
 19 |             "long_ret",
 20 |             "short_ret",
 21 |             "long_short_ret",
 22 |             'top_quantile_ret',
 23 |             'bottom_quantile_ret',
 24 |             "tmb_ret",
 25 |             "all_sample_ret"],
 26 |         "space": [
 27 |             'long_space',
 28 |             'short_space',
 29 |             'long_short_space',
 30 |             "top_quantile_space",
 31 |             "bottom_quantile_space",
 32 |             "tmb_space",
 33 |             "all_sample_space"
 34 |         ]
 35 |     },
 36 |     "event": {
 37 |         "ret": [
 38 |             "long_ret",
 39 |             "short_ret",
 40 |             "long_short_ret",
 41 |         ],
 42 |         "space": [
 43 |             'long_space',
 44 |             'short_space',
 45 |             'long_short_space',
 46 |         ]
 47 |     }
 48 | }
 49 | 
 50 | targets = {
 51 |     "ic": ["IC Mean", "IC Std.", "t-stat(IC)", "p-value(IC)", "IC Skew", "IC Kurtosis", "Ann. IR"],
 52 |     "ret": ['t-stat', "p-value", "skewness", "kurtosis", "Ann. Ret", "Ann. Vol", "Ann. IR", "occurance"],
 53 |     "space": [
 54 |         'Up_sp Mean',
 55 |         'Up_sp Std',
 56 |         'Up_sp IR',
 57 |         'Up_sp Pct5',
 58 |         'Up_sp Pct25 ',
 59 |         'Up_sp Pct50 ',
 60 |         'Up_sp Pct75',
 61 |         'Up_sp Pct95',
 62 |         'Up_sp Occur',
 63 |         'Down_sp Mean',
 64 |         'Down_sp Std',
 65 |         'Down_sp IR',
 66 |         'Down_sp Pct5',
 67 |         'Down_sp Pct25 ',
 68 |         'Down_sp Pct50 ',
 69 |         'Down_sp Pct75',
 70 |         'Down_sp Pct95',
 71 |         'Down_sp Occur',
 72 |     ]
 73 | }
 74 | 
 75 | 
 76 | class Optimizer(object):
 77 |     '''
 78 |     :param dataview: 包含了计算公式所需要的所有数据的jaqs.data.DataView对象
 79 |     :param formula: str(N) 需要优化的公式：如'(open - Delay(close, l1)) / Delay(close, l2)'
 80 |     :param params: dict(N) 需要优化的参数范围：如{"LEN1"：range(1,10,1),"LEN2":range(1,10,1)}
 81 |     :param name: str (N) 信号的名称
 82 |     :param price: dataFrame (N) 价格与daily_ret不能同时存在
 83 |     :param daily_ret: dataFrame (N) 每日收益
 84 |     :param high: dataFrame (N) 最高价　用于计算上行收益空间
 85 |     :param low: dataFrame (N) 最低价　用于计算下行收益空间
 86 |     :param benchmark_price: dataFrame (N) 基准价格　若不为空收益计算模式为相对benchmark的收益　与daily_benchmark_ret不能同时存在
 87 |     :param daily_benchmark_ret: dataFrame (N) 基准日收益　若不为空收益计算模式为相对benchmark的收益
 88 |     :param period: int (5) 选股持有期
 89 |     :param n_quantiles: int (5)
 90 |     :param mask: 过滤条件 dataFrame (N)
 91 |     :param can_enter: dataFrame (N) 是否能进场
 92 |     :param can_exit: dataFrame (N) 是否能出场
 93 |     :param forward: bool(True) 是否forward return
 94 |     :param commission:　float(0.0008) 手续费率
 95 |     :param is_event: bool(False) 是否是事件(0/1因子)
 96 |     :param is_quarterly: bool(False) 是否是季度因子
 97 |     '''
 98 | 
 99 |     def __init__(self,
100 |                  dataview=None,
101 |                  formula=None,
102 |                  params=None,
103 |                  name=None,
104 |                  price=None,
105 |                  daily_ret=None,
106 |                  high=None,
107 |                  low=None,
108 |                  benchmark_price=None,
109 |                  daily_benchmark_ret=None,
110 |                  period=5,
111 |                  n_quantiles=5,
112 |                  mask=None,
113 |                  can_enter=None,
114 |                  can_exit=None,
115 |                  forward=True,
116 |                  commission=0.0008,
117 |                  is_event=False,
118 |                  is_quarterly=False,
119 |                  register_funcs=None,
120 |                  ):
121 |         self.dataview = dataview
122 |         self.formula = formula
123 |         self.params = params
124 |         if self.formula is not None:
125 |             self._judge_params()
126 |         self.name = name if name else formula
127 |         if price is None and daily_ret is None:
128 |             try:
129 |                 price = dataview.get_ts('close_adj')
130 |             except:
131 |                 raise ValueError("One of price / ret must be provided.")
132 |         self.period = period
133 |         if is_event:
134 |             n_quantiles = 1
135 |         self.is_event = is_event
136 |         self.is_quarterly = is_quarterly
137 |         self.register_funcs = register_funcs
138 |         self.signal_creator = SignalCreator(
139 |             price=price, daily_ret=daily_ret,
140 |             benchmark_price=benchmark_price, daily_benchmark_ret=daily_benchmark_ret,
141 |             high=high, low=low,
142 |             period=period, n_quantiles=n_quantiles,
143 |             mask=mask,
144 |             can_enter=can_enter,
145 |             can_exit=can_exit,
146 |             forward=forward,
147 |             commission=commission
148 |         )
149 |         self.all_signals = None
150 |         self.all_signals_perf = None
151 |         self.in_sample_range = None
152 | 
153 |     # 判断参数命名的规范性
154 |     def _judge_params(self):
155 |         if self.params is None:
156 |             raise ValueError("未给优化器提供优化空间(需要参数params)")
157 |         if not isinstance(self.params, dict):
158 |             raise ValueError("优化空间参数不符合格式要求:如{'LEN1'：range(1,10,1),'LEN2':range(1,10,1)}")
159 |         for para in self.params.keys():
160 |             if len(para) < 2 or not para.isupper():
161 |                 raise ValueError("formula的参数%s的命名不符合要求!参数名称需全部由大写英文字母组成,且字母数不少于2"%(para,))
162 | 
163 |     # 判断target合法性
164 |     def _judge_target(self, target_type, target):
165 |         legal = True
166 |         # 判断所提供的输入数据是否支持空间分析
167 |         if self.signal_creator.high is None or self.signal_creator.low is None:
168 |             if (target_type in target_types["factor"]["space"]) or \
169 |                     (target_types in ["upside_ret_ic", "downside_ret_ic"]) or \
170 |                     (target in targets["space"]):
171 |                 legal = False
172 |                 print("需要在Optimizer中传入[high]&[low],以支持收益空间分析和优化")
173 |         # 判断是否target/target_type参数在可选的选项内
174 |         if self.is_event:
175 |             if target_type in target_types["event"]["ret"]:
176 |                 if not (target in targets["ret"]):
177 |                     legal = False
178 |                     print("可选的优化目标仅能从%s选取" % (str(targets["ret"])))
179 |             elif target_type in target_types["event"]["space"]:
180 |                 if not (target in targets["space"]):
181 |                     legal = False
182 |                     print("可选的优化目标仅能从%s选取" % (str(targets["space"])))
183 |             else:
184 |                 legal = False
185 |                 print("可选的优化类型仅能从%s选取" % (str(target_types["event"]["ret"] + target_types["event"]["space"])))
186 |         else:
187 |             if target_type in target_types["factor"]["ret"]:
188 |                 if not (target in targets["ret"]):
189 |                     legal = False
190 |                     print("可选的优化目标仅能从%s选取" % (str(targets["ret"])))
191 |             elif target_type in target_types["factor"]["ic"]:
192 |                 if not (target in targets["ic"]):
193 |                     legal = False
194 |                     print("可选的优化目标仅能从%s选取" % (str(targets["ic"])))
195 |             elif target_type in target_types["factor"]["space"]:
196 |                 if not (target in targets["space"]):
197 |                     legal = False
198 |                     print("可选的优化目标仅能从%s选取" % (str(targets["space"])))
199 |             else:
200 |                 print("可选的优化类型仅能从%s选取" % (
201 |                     str(target_types["factor"]["ret"] + target_types["factor"]["ic"] + target_types["factor"][
202 |                         "space"])))
203 |         return legal
204 | 
205 |     def enumerate_optimizer(self,
206 |                             target_type="long_ret",
207 |                             target="Ann. IR",
208 |                             ascending=False,
209 |                             in_sample_range=None):
210 |         '''
211 |         :param target_type: 目标种类
212 |         :param target: 优化目标
213 |         :param ascending: bool(False)升序or降序排列
214 |         :param in_sample_range: [date_start(int),date_end(int)] (N) 定义样本内优化范围.
215 |         :return:
216 |         '''
217 | 
218 |         if self._judge_target(target_type, target):  # 判断target合法性
219 |             self.get_all_signals_perf(in_sample_range)
220 |             if len(self.all_signals_perf) == 0:
221 |                 return []
222 |             if target_type in (target_types["factor"]["ic"]):
223 |                 order_index = "ic"
224 |             elif target_type in (target_types["factor"]["ret"]):
225 |                 order_index = "ret"
226 |             else:
227 |                 order_index = "space"
228 |             ordered_perf = self.all_signals_perf.values()
229 |             return sorted(ordered_perf,
230 |                           key=lambda x: x[order_index].loc[target, target_type],
231 |                           reverse=(ascending == False))
232 |         return []
233 | 
234 |     def get_all_signals(self):
235 |         if self.all_signals is None:
236 |             self.all_signals = dict()
237 |             keys = list(self.params.keys())
238 |             for value in product(*self.params.values()):
239 |                 para_dict = dict(zip(keys, value))
240 |                 formula = self.formula
241 |                 for vars in para_dict.keys():
242 |                     formula = formula.replace(vars, str(para_dict[vars]))
243 |                 signal = self.dataview.add_formula(field_name=self.name,
244 |                                                    formula=formula,
245 |                                                    is_quarterly=self.is_quarterly,
246 |                                                    register_funcs=self.register_funcs)
247 |                 if (not isinstance(signal,pd.DataFrame)) or (signal.size==0):
248 |                     warnings.warn("待优化公式%s不能计算出有效结果,请检查数据和公式是否正确完备!")
249 |                     continue
250 |                 self.all_signals[self.name + str(para_dict)] = self.cal_signal(signal)
251 | 
252 |     def get_all_signals_perf(self, in_sample_range=None):
253 |         self.get_all_signals()
254 |         if self.all_signals_perf is None or \
255 |                 (self.in_sample_range != in_sample_range) or \
256 |                 (len(set(self.all_signals_perf.keys()) - set(self.all_signals.keys())) != 0):
257 |             self.all_signals_perf = dict()
258 |             for sig_name in self.all_signals.keys():
259 |                 perf = self.cal_perf(self.all_signals[sig_name], in_sample_range)
260 |                 if perf is not None:
261 |                     self.all_signals_perf[sig_name] = perf
262 |                     self.all_signals_perf[sig_name]["signal_name"] = sig_name
263 |             if len(self.all_signals_perf) == 0:
264 |                 print("没有计算出可用的信号绩效，请确保至少有一个信号可用.(可尝试增加样本内数据的时间范围以确保有信号发生)")
265 |             self.in_sample_range = in_sample_range
266 | 
267 |     def cal_signal(self, signal):
268 |         return self.signal_creator.get_signal_data(signal)
269 | 
270 |     # TODO 输入绩效要求，过滤掉不符合要求的结果
271 |     def cal_perf(self,
272 |                  signal_data,
273 |                  in_sample_range=None,
274 |                  constraints=None):
275 |         '''
276 |         :param signal_data:
277 |         :param in_sample_range: like [20100312,20170405] 样本内范围起止时间
278 |         :param constraints: like [{"target_type":"long_ret",
279 |                                    "target":"Ann. IR",
280 |                                    "condition":}]
281 |         :return:
282 |         '''
283 |         perf = None
284 |         if signal_data is not None:
285 |             if in_sample_range is not None:
286 |                 signal_data = signal_data.loc[in_sample_range[0]:in_sample_range[1]]
287 |             if len(signal_data) > 0:
288 |                 perf = analysis(signal_data, self.is_event, self.period)
289 |         return perf
290 | 


--------------------------------------------------------------------------------
/tests/test_research.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | from __future__ import unicode_literals
  3 | 
  4 | from jaqs_fxdayu import patch_all
  5 | 
  6 | patch_all()
  7 | 
  8 | import numpy as np
  9 | import os
 10 | from pathlib import Path
 11 | from jaqs.data import DataView
 12 | from jaqs.data import RemoteDataService
 13 | from jaqs.research import SignalDigger
 14 | from jaqs.research.signaldigger import performance as pfm
 15 | from jaqs.research.signaldigger import plotting
 16 | from jaqs_fxdayu.research.signaldigger.analysis import analysis
 17 | from tests.data_config import data_config
 18 | 
 19 | output_root = Path(__file__).absolute().parent
 20 | 
 21 | dataview_folder = str(output_root / ".persist" / "test_signal")
 22 | if not (os.path.isdir(dataview_folder)):
 23 |     os.makedirs(dataview_folder)
 24 | 
 25 | 
 26 | # --------------------------------------------------------------------------------
 27 | # 定义信号过滤条件-非指数成分
 28 | def mask_index_member(dv):
 29 |     df_index_member = dv.get_ts('index_member')
 30 |     mask_index_member = df_index_member == 0
 31 |     return mask_index_member
 32 | 
 33 | 
 34 | # 定义可买卖条件——未停牌、未涨跌停
 35 | def limit_up_down(dv):
 36 |     trade_status = dv.get_ts('trade_status')
 37 |     mask_sus = trade_status != 1 # 不可交易
 38 |     # 涨停
 39 |     dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False,
 40 |                    add_data=True)
 41 |     # 跌停
 42 |     dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False,
 43 |                    add_data=True)
 44 |     can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus)  # 未涨停未停牌
 45 |     can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus)  # 未跌停未停牌
 46 |     return can_enter, can_exit
 47 | 
 48 | 
 49 | def test_save_dataview():
 50 |     ds = RemoteDataService()
 51 |     ds.init_from_config(data_config)
 52 |     dv = DataView()
 53 |     print(DataView)
 54 |     props = {'start_date': 20170501, 'end_date': 20171001, 'universe': '000016.SH',
 55 |              'fields': 'volume,pb,pe,ps,float_mv,sw1',
 56 |              'freq': 1}
 57 | 
 58 |     dv.init_from_config(props, ds)
 59 |     dv.prepare_data()
 60 | 
 61 |     dv.save_dataview(dataview_folder)
 62 | 
 63 | 
 64 | def test_analyze_signal():
 65 |     # --------------------------------------------------------------------------------
 66 |     # Step.1 load dataview
 67 |     dv = DataView()
 68 |     dv.load_dataview(dataview_folder)
 69 | 
 70 |     mask = mask_index_member(dv)
 71 |     can_enter, can_exit = limit_up_down(dv)
 72 | 
 73 |     # --------------------------------------------------------------------------------
 74 |     # Step.3 get signal, benchmark and price data
 75 |     dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False, add_data=True)
 76 | 
 77 |     signal = dv.get_ts('divert')
 78 |     price = dv.get_ts('close_adj')
 79 |     price_bench = dv.data_benchmark
 80 | 
 81 |     # Step.4 analyze!
 82 |     my_period = 5
 83 |     obj = SignalDigger(output_folder='../output/test_signal', output_format='pdf')
 84 |     obj.process_signal_before_analysis(signal=signal,
 85 |                                        price=price,
 86 |                                        high=dv.get_ts("high_adj"),  # 可为空
 87 |                                        low=dv.get_ts("low_adj"),  # 可为空
 88 |                                        group=dv.get_ts("sw1"),
 89 |                                        n_quantiles=5,  # quantile分类数
 90 |                                        mask=mask,  # 过滤条件
 91 |                                        can_enter=can_enter,  # 是否能进场
 92 |                                        can_exit=can_exit,  # 是否能出场
 93 |                                        period=my_period,  # 持有期
 94 |                                        benchmark_price=price_bench,  # 基准价格 可不传入，持有期收益（return）计算为绝对收益
 95 |                                        commission=0.0008,
 96 |                                        )
 97 |     signal_data = obj.signal_data
 98 |     result = analysis(signal_data, is_event=False, period=my_period)
 99 |     ic = pfm.calc_signal_ic(signal_data, by_group=True)
100 |     mean_ic_by_group = pfm.mean_information_coefficient(ic, by_group=True)
101 |     plotting.plot_ic_by_group(mean_ic_by_group)
102 |     res = obj.create_full_report()
103 | 
104 | 
105 | def test_DIY_signal():
106 |     # --------------------------------------------------------------------------------
107 |     # Step.1 load dataview
108 |     dv = DataView()
109 |     dv.load_dataview(dataview_folder)
110 |     # 方法1：add_formula 基于dataview里已有的字段,通过表达式定义因子
111 |     dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True)
112 |     # 方法2: append_df 构造一个因子表格（pandas.Dataframe）,直接添加到dataview当中
113 |     import pandas as pd
114 |     import talib as ta
115 | 
116 |     close = dv.get_ts("close_adj").dropna(how='all', axis=1)
117 |     slope_df = pd.DataFrame(
118 |         {sec_symbol: -ta.LINEARREG_SLOPE(value.values, 10) for sec_symbol, value in close.iteritems()},
119 |         index=close.index)
120 |     dv.append_df(slope_df, 'slope')
121 |     dv.get_ts("slope")
122 | 
123 |     # 定义事件
124 |     from jaqs_fxdayu.research.signaldigger import process
125 | 
126 |     Open = dv.get_ts("open_adj")
127 |     High = dv.get_ts("high_adj")
128 |     Low = dv.get_ts("low_adj")
129 |     Close = dv.get_ts("close_adj")
130 |     trade_status = dv.get_ts('trade_status')
131 |     mask_sus = trade_status!=1
132 |     # 剔除掉停牌期的数据　再计算指标
133 |     open_masked = process._mask_df(Open, mask=mask_sus)
134 |     high_masked = process._mask_df(High, mask=mask_sus)
135 |     low_masked = process._mask_df(Low, mask=mask_sus)
136 |     close_masked = process._mask_df(Close, mask=mask_sus)
137 |     from jaqs_fxdayu.data import signal_function_mod as sfm
138 |     MA5 = sfm.ta(ta_method='MA',
139 |                  ta_column=0,
140 |                  Open=open_masked,
141 |                  High=high_masked,
142 |                  Low=low_masked,
143 |                  Close=close_masked,
144 |                  Volume=None,
145 |                  timeperiod=10)
146 |     MA10 = sfm.ta('MA', Close=close_masked, timeperiod=10)
147 |     dv.append_df(MA5, 'MA5')
148 |     dv.append_df(MA10, 'MA10')
149 |     dv.add_formula("Cross", "(MA5>=MA10)&&(Delay(MA5<MA10, 1))", is_quarterly=False, add_data=True)
150 | 
151 | 
152 | def test_multi_factor():
153 |     from jaqs_fxdayu.research.signaldigger import multi_factor, process
154 |     dv = DataView()
155 |     dv.load_dataview(dataview_folder)
156 |     dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True)
157 | 
158 |     mask = mask_index_member(dv)
159 |     can_enter, can_exit = limit_up_down(dv)
160 | 
161 |     ic = dict()
162 |     factors_dict = {signal: dv.get_ts(signal) for signal in ["pb", "pe", "ps", "momentum"]}
163 |     for period in [5, 15]:
164 |         ic[period] = multi_factor.get_factors_ic_df(factors_dict,
165 |                                                     price=dv.get_ts("close_adj"),
166 |                                                     high=dv.get_ts("high_adj"),  # 可为空
167 |                                                     low=dv.get_ts("low_adj"),  # 可为空
168 |                                                     n_quantiles=5,  # quantile分类数
169 |                                                     mask=mask,  # 过滤条件
170 |                                                     can_enter=can_enter,  # 是否能进场
171 |                                                     can_exit=can_exit,  # 是否能出场
172 |                                                     period=period,  # 持有期
173 |                                                     benchmark_price=dv.data_benchmark,  # 基准价格 可不传入，持有期收益（return）计算为绝对收益
174 |                                                     commission=0.0008,
175 |                                                     )
176 |     factor_dict = dict()
177 |     index_member = dv.get_ts("index_member")
178 |     for name in ["pb", "pe", "ps", "momentum"]:
179 |         signal = -1 * dv.get_ts(name)  # 调整符号
180 |         process.winsorize(factor_df=signal, alpha=0.05, index_member=index_member)  # 去极值
181 |         signal = process.rank_standardize(signal, index_member)  # 因子在截面排序并归一化到0-1(只保留排序信息)
182 |         signal = process.standardize(signal, index_member)  # z-score标准化 保留排序信息和分布信息
183 |         # 行业市值中性化
184 |         signal = process.neutralize(signal,
185 |                                     group=dv.get_ts("sw1"),
186 |                                     float_mv=dv.get_ts("float_mv"),
187 |                                     index_member=index_member,  # 是否只处理时只考虑指数成份股
188 |                                     )
189 |         factor_dict[name] = signal
190 | 
191 |     # 因子间存在较强同质性时，使用施密特正交化方法对因子做正交化处理，用得到的正交化残差作为因子
192 |     new_factors = multi_factor.orthogonalize(factors_dict=factor_dict,
193 |                                              standardize_type="rank",
194 |                                              # 输入因子标准化方法，有"rank"（排序标准化）,"z_score"(z-score标准化)两种（"rank"/"z_score"）
195 |                                              winsorization=False,  # 是否对输入因子去极值
196 |                                              index_member=index_member)  # 是否只处理指数成分股
197 | 
198 |     #  多因子组合-动态加权参数配置
199 |     props = {
200 |         'price': dv.get_ts("close_adj"),
201 |         'high': dv.get_ts("high_adj"),  # 可为空
202 |         'low': dv.get_ts("low_adj"),  # 可为空
203 |         'ret_type': 'return',  # 可选参数还有upside_ret/downside_ret 则组合因子将以优化潜在上行、下行空间为目标
204 |         'benchmark_price': dv.data_benchmark,  # 为空计算的是绝对收益　不为空计算相对收益
205 |         'period': 30,  # 30天的持有期
206 |         'mask': mask,
207 |         'can_enter': can_enter,
208 |         'can_exit': can_exit,
209 |         'forward': True,
210 |         'commission': 0.0008,
211 |         "covariance_type": "shrink",  # 协方差矩阵估算方法 还可以为"simple"
212 |         "rollback_period": 120}  # 滚动窗口天数
213 | 
214 |     comb_factors = dict()
215 |     for method in ["equal_weight", "ic_weight", "ir_weight", "max_IR", "max_IC", "factors_ret_weight"]:
216 |         comb_factors[method] = multi_factor.combine_factors(factor_dict,
217 |                                                             standardize_type="rank",
218 |                                                             winsorization=False,
219 |                                                             weighted_method=method,
220 |                                                             props=props)
221 | 
222 | 
223 | def test_optimizer():
224 |     from jaqs_fxdayu.research import Optimizer
225 | 
226 |     dv = DataView()
227 |     dv.load_dataview(dataview_folder)
228 | 
229 |     mask = mask_index_member(dv)
230 |     can_enter, can_exit = limit_up_down(dv)
231 | 
232 |     price = dv.get_ts('close_adj')
233 |     high = dv.get_ts('high_adj')
234 |     low = dv.get_ts('low_adj')
235 |     price_bench = dv.data_benchmark
236 |     optimizer = Optimizer(dataview=dv,
237 |                           formula='- Correlation(vwap_adj, volume, LEN)',
238 |                           params={"LEN": range(2, 4, 1)},
239 |                           name='divert',
240 |                           price=price,
241 |                           high=high,
242 |                           low=low,
243 |                           benchmark_price=price_bench,  # =None求绝对收益 #=price_bench求相对收益
244 |                           period=30,
245 |                           n_quantiles=5,
246 |                           mask=mask,
247 |                           can_enter=can_enter,
248 |                           can_exit=can_exit,
249 |                           commission=0.0008,  # 手续费 默认0.0008
250 |                           is_event=False,  # 是否是事件(0/1因子)
251 |                           is_quarterly=False)  # 是否是季度因子 默认为False
252 | 
253 |     ret_best = optimizer.enumerate_optimizer(target_type="top_quantile_ret",  # 优化目标类型
254 |                                              target="Ann. IR",  # 优化目标
255 |                                              in_sample_range=[20140101, 20160101],  # 样本内范围 默认为None,在全样本上优化
256 |                                              ascending=False)  # 是否按优化目标升序排列(从小到大)
257 | 
258 | 
259 | if __name__ == "__main__":
260 |     test_save_dataview()
261 |     test_analyze_signal()
262 |     test_DIY_signal()
263 |     test_multi_factor()
264 |     test_optimizer()
265 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/timingdigger/plotting.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from jaqs_fxdayu.research.signaldigger.plotting import *
  3 | from jaqs_fxdayu.research.timingdigger import performance
  4 | 
  5 | 
  6 | def plot_event_table(event_summary_table):
  7 |     print("Event Analysis")
  8 |     plot_table(event_summary_table.apply(lambda x: x.round(3)).T)
  9 | 
 10 | 
 11 | def get_entry_exit(signal_data_by_symbol,
 12 |                    price,
 13 |                    ax=None):
 14 | 
 15 |     def trans_t(x):
 16 |         value = str(int(x))
 17 |         format = '%Y%m%d' if len(value) == 8 else '%Y%m%d%H%M%S'
 18 |         pd.to_datetime(value, format=format)
 19 |         return pd.to_datetime(value, format=format)
 20 | 
 21 |     if ax is None:
 22 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
 23 | 
 24 |     tmp = signal_data_by_symbol.copy().reset_index()
 25 |     tmp["cum_ret"] = tmp["return"].add(1.0).cumprod()
 26 |     symbol = tmp["symbol"].values[0]
 27 |     symbol_price = price[[symbol]]
 28 |     # # 图片上下界
 29 |     # price_max, price_min = symbol_price.values.max(),symbol_price.values.min()
 30 |     # gap = price_max - price_min
 31 |     # 进出场价位(用于画图)
 32 |     tmp["entry"] = symbol_price.loc[tmp["trade_date"]].values
 33 |     tmp["exit"] = symbol_price.loc[tmp["exit_time"]].values
 34 | 
 35 |     # 价格和净值曲线
 36 |     symbol_price["cum_ret"] = np.nan
 37 |     symbol_price["cum_ret"].loc[tmp["exit_time"]] = tmp["cum_ret"].values
 38 |     symbol_price = symbol_price.fillna(method="ffill").fillna(1)
 39 |     symbol_price["time"] = pd.Series(symbol_price.index).apply(lambda x: trans_t(x)).values
 40 |     symbol_price = symbol_price.set_index("time")
 41 |     symbol_price.plot(secondary_y=["cum_ret"], ax=ax, alpha=0.6)
 42 |     ax.set_ylabel('Price')
 43 |     ax.right_ax.set_ylabel('Net')
 44 | 
 45 |     # 进出点位
 46 |     entry = tmp[["entry", "sig_type"]]
 47 |     entry["time"] = pd.Series(tmp["trade_date"]).apply(lambda x: trans_t(x)).values
 48 |     exit = tmp[["exit"]]
 49 |     exit["time"] = pd.Series(tmp["exit_time"]).apply(lambda x: trans_t(x)).values
 50 | 
 51 |     entry_long = entry[entry["sig_type"] == "long"]
 52 |     entry_short = entry[entry["sig_type"] == "short"]
 53 |     if entry_long.size > 0:
 54 |         ax.scatter(entry_long["time"].values, entry_long["entry"].values, label="long", c='r', marker='>', linewidths=1)
 55 |     if entry_short.size > 0:
 56 |         ax.scatter(entry_short["time"].values, entry_short["entry"].values, label="short", c='b', marker='>', linewidths=1)
 57 |     ax.scatter(exit["time"].values, exit["exit"].values, label="exit", c='y', marker='<', linewidths=1)
 58 | 
 59 |     # 进出场连线
 60 |     for _,row in tmp.iterrows():
 61 |         x = [trans_t(row["trade_date"]),trans_t(row["exit_time"])]
 62 |         y = [row["entry"],row["exit"]]
 63 |         if row["return"]>0:
 64 |             line_type = "r--"
 65 |         else:
 66 |             line_type = "g--"
 67 |         ax.plot(x,y,line_type,linewidth=1)
 68 | 
 69 |     ax.legend(loc='best')
 70 |     ax.set(title="Entry Exit Position of %s"%(symbol,),
 71 |            xlabel='Datetime')
 72 | 
 73 |     ax.yaxis.set_major_formatter(ScalarFormatter())
 74 | 
 75 |     return ax,symbol
 76 | 
 77 | 
 78 | def plot_mean_ic_heatmap(mean_ic, period, format="M",ax=None):
 79 |     """
 80 |     Plots a heatmap of the information coefficient or returns by month.
 81 | 
 82 |     Parameters
 83 |     ----------
 84 |     mean_monthly_ic : pd.DataFrame
 85 |         The mean monthly IC for N periods forward.
 86 | 
 87 |     Returns
 88 |     -------
 89 |     ax : matplotlib.Axes
 90 |         The axes that were plotted on.
 91 |     """
 92 |     MONTH_MAP = {1: 'Jan',
 93 |                  2: 'Feb',
 94 |                  3: 'Mar',
 95 |                  4: 'Apr',
 96 |                  5: 'May',
 97 |                  6: 'Jun',
 98 |                  7: 'Jul',
 99 |                  8: 'Aug',
100 |                  9: 'Sep',
101 |                  10: 'Oct',
102 |                  11: 'Nov',
103 |                  12: 'Dec'}
104 | 
105 |     num_plots = 1.0
106 | 
107 |     v_spaces = ((num_plots - 1) // 3) + 1
108 | 
109 |     if ax is None:
110 |         f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6))
111 |         ax = ax.flatten()
112 | 
113 |     new_index_y = []
114 |     new_index_x = []
115 |     if format == "D":
116 |         for date in mean_ic.index:
117 |             new_index_x.append(date.day)
118 |             new_index_y.append(str(date.year)+" "+MONTH_MAP[date.month])
119 |         names = ["month","day"]
120 |     else:
121 |         for date in mean_ic.index:
122 |             new_index_y.append(date.year)
123 |             new_index_x.append(MONTH_MAP[date.month])
124 |         names = ["year", "month"]
125 | 
126 |     mean_ic.index = pd.MultiIndex.from_arrays(
127 |         [new_index_y, new_index_x],
128 |         names=names)
129 | 
130 |     ic_ = mean_ic['ic'].unstack()
131 |     sns.heatmap(
132 |         ic_,
133 |         annot=True,
134 |         alpha=1.0,
135 |         center=0.0,
136 |         annot_kws={"size": 7},
137 |         linewidths=0.01,
138 |         linecolor='white',
139 |         cmap=cm.get_cmap('RdBu'),
140 |         cbar=False,
141 |         ax=ax)
142 |     ax.set(ylabel='', xlabel='')
143 | 
144 |     ax.set_title("IC Mean HeatMap".format(period))
145 | 
146 |     return ax
147 | 
148 | 
149 | def plot_quantile_returns_ts(mean_ret_by_q, ax=None):
150 |     """
151 |     Plots mean period wise returns for signal quantiles.
152 | 
153 |     Parameters
154 |     ----------
155 |     mean_ret_by_q : pd.DataFrame
156 |         DataFrame with quantile, (group) and mean period wise return values.
157 |     ax : matplotlib.Axes, optional
158 |         Axes upon which to plot.
159 | 
160 |     Returns
161 |     -------
162 |     ax : matplotlib.Axes
163 |         The axes that were plotted on.
164 | 
165 |     """
166 |     if ax is None:
167 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
168 | 
169 |     ret_wide = pd.concat({k: v['mean'] for k, v in mean_ret_by_q.items()}, axis=1)
170 |     format = '%Y%m%d' if len(str(ret_wide.index[0])) == 8 else '%Y%m%d%H%M%S'
171 |     ret_wide.index = pd.to_datetime(ret_wide.index, format=format)
172 |     ret_wide = ret_wide.mul(DECIMAL_TO_PCT)
173 | 
174 |     ret_wide.plot(lw=1.2, ax=ax, cmap=COLOR_MAP)
175 |     ax.legend(loc='upper left')
176 |     ymin, ymax = ret_wide.min().min(), ret_wide.max().max()
177 |     ax.set(ylabel='Return (%)',
178 |            title="Quantile Return (equal weight within quantile)",
179 |            xlabel='DateTime',
180 |            ylim=(ymin, ymax))
181 | 
182 |     ax.yaxis.set_major_formatter(ScalarFormatter())
183 |     ax.axhline(1.0, linestyle='-', color='black', lw=1)
184 | 
185 |     return ax
186 | 
187 | 
188 | def plot_cumulative_returns_by_quantile(quantile_ret, ax=None):
189 |     """
190 |     Plots the cumulative returns of various signal quantiles.
191 | 
192 |     Parameters
193 |     ----------
194 |     quantile_ret : int: pd.DataFrame
195 |         Cumulative returns by signal quantile.
196 |     ax : matplotlib.Axes, optional
197 |         Axes upon which to plot.
198 | 
199 |     Returns
200 |     -------
201 |     ax : matplotlib.Axes
202 |     """
203 | 
204 |     if ax is None:
205 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
206 | 
207 |     cum_ret = quantile_ret
208 |     format = '%Y%m%d' if len(str(cum_ret.index[0])) == 8 else '%Y%m%d%H%M%S'
209 |     cum_ret.index = pd.to_datetime(cum_ret.index, format=format)
210 |     cum_ret = cum_ret.mul(DECIMAL_TO_PCT)
211 | 
212 |     cum_ret.plot(lw=2, ax=ax, cmap=COLOR_MAP)
213 |     ax.axhline(0.0, linestyle='-', color='black', lw=1)
214 | 
215 |     ax.legend(loc='upper left')
216 |     ymin, ymax = cum_ret.min().min(), cum_ret.max().max()
217 |     ax.set(ylabel='Cumulative Returns (%)',
218 |            title='Cumulative Return of Each Quantile (equal weight within quantile)',
219 |            xlabel='DateTime',
220 |            ylim=(ymin, ymax))
221 |     perfs = ["total_ret_{:d} = {:.1f}%".format(col, performance.calc_performance_metrics(ser, cum_return=True,
222 |                                                                                          compound=False)['total_ret'])
223 |                for col, ser in cum_ret.iteritems()]
224 |     ax.text(.02, .30,
225 |             '\n'.join(perfs),
226 |             fontsize=12,
227 |             bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5},
228 |             transform=ax.transAxes,
229 |             verticalalignment='top')
230 | 
231 |     ax.yaxis.set_major_formatter(ScalarFormatter())
232 | 
233 |     return ax
234 | 
235 | 
236 | def plot_mean_quantile_returns_spread_time_series(mean_returns_spread,
237 |                                                   period,
238 |                                                   ax=None):
239 |     """
240 |     Plots mean period wise returns for signal quantiles.
241 | 
242 |     Parameters
243 |     ----------
244 |     mean_returns_spread : pd.Series
245 |         Series with difference between quantile mean returns by period.
246 |     std_err : pd.Series
247 |         Series with standard error of difference between quantile
248 |         mean returns each period.
249 |     bandwidth : float
250 |         Width of displayed error bands in standard deviations.
251 |     ax : matplotlib.Axes, optional
252 |         Axes upon which to plot.
253 | 
254 |     Returns
255 |     -------
256 |     ax : matplotlib.Axes
257 |         The axes that were plotted on.
258 |     """
259 | 
260 |     periods = period
261 |     title = ('Top Minus Bottom Quantile Return'
262 |              .format(periods if periods is not None else ""))
263 | 
264 |     if ax is None:
265 |         f, ax = plt.subplots(figsize=(18, 6))
266 |     format = '%Y%m%d' if len(str(mean_returns_spread.index[0])) == 8 else '%Y%m%d%H%M%S'
267 |     mean_returns_spread.index = pd.to_datetime(mean_returns_spread.index, format=format)
268 |     mean_returns_spread_bps = mean_returns_spread['mean_diff'] * DECIMAL_TO_PCT
269 | 
270 |     mean_returns_spread_bps.plot(alpha=0.4, ax=ax, lw=0.7, color='navy')
271 |     mean_returns_spread_bps.rolling(30).mean().plot(color='green',
272 |                                                     alpha=0.7,
273 |                                                     ax=ax)
274 |     ax.axhline(0.0, linestyle='-', color='black', lw=1, alpha=0.8)
275 | 
276 |     ax.legend(['mean returns spread', '30 moving avg'], loc='upper right')
277 |     ylim = np.nanpercentile(abs(mean_returns_spread_bps.values), 95)
278 |     ax.set(ylabel='Difference In Quantile Mean Return (%)',
279 |            xlabel='',
280 |            title=title,
281 |            ylim=(-ylim, ylim))
282 | 
283 |     return ax
284 | 
285 | 
286 | def plot_cumulative_return(ret, ax=None, title=None):
287 |     """
288 |     Plots the cumulative returns of the returns series passed in.
289 | 
290 |     Parameters
291 |     ----------
292 |     ret : pd.Series
293 |         Period wise returns of dollar neutral portfolio weighted by signal
294 |         value.
295 |     ax : matplotlib.Axes, optional
296 |         Axes upon which to plot.
297 | 
298 |     Returns
299 |     -------
300 |     ax : matplotlib.Axes
301 |         The axes that were plotted on.
302 |     """
303 |     if ax is None:
304 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
305 | 
306 |     ret = ret.copy()
307 | 
308 |     cum = ret  # pfm.daily_ret_to_cum(ret)
309 |     format = '%Y%m%d' if len(str(cum.index[0])) == 8 else '%Y%m%d%H%M%S'
310 |     cum.index = pd.to_datetime(cum.index, format=format)
311 |     cum = cum.mul(DECIMAL_TO_PCT)
312 | 
313 |     cum.plot(ax=ax, lw=3, color='indianred', alpha=1.0)
314 |     ax.axhline(0.0, linestyle='-', color='black', lw=1)
315 | 
316 |     metrics = performance.calc_performance_metrics(cum, cum_return=True, compound=False)
317 |     ax.text(.85, .30,
318 |             "total_ret = {:.1f}%\nmean(ret). = {:.4f}%\nstd(ret) = {:.4f}\nir = {:.4f}".format(metrics['total_ret'],
319 |                                                                              metrics['mean(ret)'],
320 |                                                                              metrics['std(ret)'],
321 |                                                                              metrics['ir']),
322 |             fontsize=12,
323 |             bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5},
324 |             transform=ax.transAxes,
325 |             verticalalignment='top')
326 |     if title is None:
327 |         title = "Cumulative Return"
328 |     ax.set(ylabel='Cumulative Return (%)',
329 |            title=title,
330 |            xlabel='DateTime')
331 | 
332 |     return ax
333 | 
334 | 


--------------------------------------------------------------------------------
/docs/optimizer.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Optimizer
  3 | 
  4 | ## 介绍
  5 | Optimizer是optimizer模块中的一个核心类，提供了因子算法参数优化的功能
  6 | 
  7 | *** 步骤 ***
  8 | 1. 实例化Optimizer
  9 | 2. 进行因子计算和参数优化
 10 | 
 11 | # step 1 实例化Optimizer
 12 | 
 13 | ## __init__
 14 | 
 15 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.__init__(*args, **kwargs) `
 16 | 
 17 | **简要描述：**
 18 | 
 19 | - 初始化Optimizer
 20 | 
 21 | **参数：**
 22 | 
 23 | 
 24 | |参数名|必选|类型|说明|
 25 | |:----    |:---|:----- |-----   |
 26 | |dataview|是 |jaqs.data.Dataview|包含了因子相关的所有标的证券及因子计算所要用到的所有字段的数据集|
 27 | |formula|否 |string|需要优化的公式：如'(open - Delay(close, LEN1)) / Delay(close, LEN2)'|
 28 | |params|否 |dict|需要优化的参数范围：如{"LEN1"：range(1,10,1),"LEN2":range(1,10,1)}|
 29 | |name|否 |string|信号的名称|
 30 | |price |是，price与daily_ret二选一  |pandas.DataFrame|因子涉及到的股票的价格数据，用于作为进出场价用于计算收益,日期为索引，股票品种为columns|
 31 | |daily_ret |是，price与daily_ret二选一  |pandas.DataFrame| 因子涉及到的股票的每日收益，日期为索引，股票品种为columns|
 32 | |benchmark_price | 否，benchmark_price与daily_benchmark_ret二选一  |pandas.DataFrame or pandas.Series|基准价格，日期为索引。用于计算因子涉及到的股票的持有期**相对收益**--相对基准。默认为空，为空时计算的收益为**绝对收益**。|
 33 | |daily_benchmark_ret | 否，benchmark_price与daily_benchmark_ret二选一  |pandas.DataFrame or pandas.Series|基准日收益，日期为索引。用于计算因子涉及到的股票的持有期**相对收益**--相对基准。默认为空，为空时计算的收益为**绝对收益**。|
 34 | |high |否  |pandas.DataFrame|因子涉及到的股票的最高价数据,用于计算持有期潜在最大上涨收益,日期为索引，股票品种为columns,默认为空|
 35 | |low |否  |pandas.DataFrame|因子涉及到的股票的最低价数据,用于计算持有期潜在最大下跌收益,日期为索引，股票品种为columns,默认为空|
 36 | |period |否  |int|持有周期,默认为5,即持有5天|
 37 | |n_quantiles |否  |int|根据每日因子值的大小分成n_quantiles组,默认为5,即将因子每天分成5组|
 38 | |mask |否  |pandas.DataFrame|一张由bool值组成的表格,日期为索引，股票品种为columns，表示在做因子分析时是否要对某期的某个品种过滤。对应位置为True则**过滤**（剔除）——不纳入因子分析考虑。默认为空，不执行过滤操作|
 39 | |can_enter |否  |pandas.DataFrame|一张由bool值组成的表格,日期为索引，股票品种为columns，表示某期的某个品种是否可以买入(进场)。对应位置为True则可以买入。默认为空，任何时间任何品种均可买入|
 40 | |can_exit |否  |pandas.DataFrame|一张由bool值组成的表格,日期为索引，股票品种为columns，表示某期的某个品种是否可以卖出(出场)。对应位置为True则可以卖出。默认为空，任何时间任何品种均可卖出|
 41 | |forward |否  |bool|收益对齐方式,forward=True则在当期因子下对齐下一期实现的收益；forward=False则在当期实现收益下对齐上一期的因子值。默认为True|
 42 | |commission |否 |float|手续费比例,每次换仓收取的手续费百分比,默认为万分之八0.0008|
 43 | |is_event |否 |bool|是否是事件(0/1因子),默认为否|
 44 | |is_quarterly |否 |bool|是否是季度因子,默认为否|
 45 | |register_funcs |否 |dict of function|待优化公式中用到的自定义方法所组成的dict,如{"name1":func1，"name2":func2}|
 46 | 
 47 | **示例：**
 48 | 
 49 | 
 50 | ```python
 51 | import warnings
 52 | warnings.filterwarnings('ignore')
 53 | ```
 54 | 
 55 | 
 56 | ```python
 57 | from jaqs_fxdayu.research import Optimizer
 58 | from jaqs_fxdayu.data import DataView
 59 | 
 60 | # 加载dataview数据集
 61 | dv = DataView()
 62 | dataview_folder = './data'
 63 | dv.load_dataview(dataview_folder)
 64 | 
 65 | def _cut_negative(df):
 66 |     return df[df>=0]
 67 | 
 68 | # step 1：实例化Optimizer
 69 | optimizer = Optimizer(dataview=dv,
 70 |                       formula='CAL(- Correlation(vwap_adj, high, LEN))',
 71 |                       params={"CAL":["Cut_Neg",""],"LEN":range(2,5,1),},
 72 |                       name='test',
 73 |                       price = dv.get_ts('close_adj'),
 74 |                       high=dv.get_ts('high_adj'),
 75 |                       low=dv.get_ts('low_adj'),
 76 |                       benchmark_price=None,#=None求绝对收益 #=price_bench求相对收益
 77 |                       period=30,
 78 |                       n_quantiles=5,
 79 |                       commission=0.0008,#手续费 默认0.0008
 80 |                       is_event=False,#是否是事件(0/1因子)
 81 |                       is_quarterly=False,
 82 |                       register_funcs={"Cut_Neg":_cut_negative})#是否是季度因子 默认为False
 83 | ```
 84 | 
 85 |     Dataview loaded successfully.
 86 | 
 87 | 
 88 | # step 2 进行因子计算和参数优化
 89 | 
 90 | ## dataview
 91 | 
 92 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.dataview `
 93 | 
 94 | **简要描述：**
 95 | 
 96 | - 优化器用到的数据集
 97 | 
 98 | **示例：**
 99 | 
100 | 
101 | ```python
102 | optimizer.dataview
103 | ```
104 | 
105 | 
106 | 
107 | 
108 |     <jaqs_fxdayu.data.dataview.DataView at 0x7fef58116e10>
109 | 
110 | 
111 | 
112 | ## formula
113 | 
114 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.formula `
115 | 
116 | **简要描述：**
117 | 
118 | - 优化器所优化的因子表达式
119 | 
120 | **示例：**
121 | 
122 | 
123 | ```python
124 | optimizer.formula
125 | ```
126 | 
127 | 
128 | 
129 | 
130 |     'CAL(- Correlation(vwap_adj, high, LEN))'
131 | 
132 | 
133 | 
134 | ## params
135 | 
136 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.params `
137 | 
138 | **简要描述：**
139 | 
140 | - 优化器所优化的参数范围
141 | 
142 | **示例：**
143 | 
144 | 
145 | ```python
146 | optimizer.params
147 | ```
148 | 
149 | 
150 | 
151 | 
152 |     {'CAL': ['Cut_Neg', ''], 'LEN': range(2, 5)}
153 | 
154 | 
155 | 
156 | ## name
157 | 
158 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.name `
159 | 
160 | **简要描述：**
161 | 
162 | - 优化器所优化的信号名称
163 | 
164 | **示例：**
165 | 
166 | 
167 | ```python
168 | optimizer.name
169 | ```
170 | 
171 | 
172 | 
173 | 
174 |     'test'
175 | 
176 | 
177 | 
178 | ## period
179 | 
180 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.period `
181 | 
182 | **简要描述：**
183 | 
184 | - 待优化因子所指定的调仓周期
185 | 
186 | **示例：**
187 | 
188 | 
189 | ```python
190 | optimizer.period
191 | ```
192 | 
193 | 
194 | 
195 | 
196 |     30
197 | 
198 | 
199 | 
200 | ## all_signals
201 | 
202 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.all_signals `
203 | 
204 | **简要描述：**
205 | 
206 | - 不同参数下计算得到的signal_data(关于signal_data的定义,详见文档digger部分-signal_data)所组成的字典
207 | - 在初始化Optimizer实例时指定了formula和params后，可以通过Optimizer.get_all_signals()计算不同参数下该公式算得的所有因子值；也可以手动指定
208 | 
209 | **示例：**
210 | 
211 | 
212 | ```python
213 | print(optimizer.all_signals)
214 | ```
215 | 
216 |     None
217 | 
218 | 
219 | ## get_all_signals
220 | 
221 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.get_all_signals() `
222 | 
223 | **简要描述：**
224 | 
225 | - 通过Optimizer.get_all_signals()计算不同参数下该公式算得的所有因子值，并更新Optimizer.all_signals属性
226 | 
227 | **示例：**
228 | 
229 | 
230 | ```python
231 | optimizer.get_all_signals()
232 | ```
233 | 
234 |     Nan Data Count (should be zero) : 0;  Percentage of effective data: 13%
235 |     Nan Data Count (should be zero) : 0;  Percentage of effective data: 5%
236 |     Nan Data Count (should be zero) : 0;  Percentage of effective data: 2%
237 |     Nan Data Count (should be zero) : 0;  Percentage of effective data: 92%
238 |     Nan Data Count (should be zero) : 0;  Percentage of effective data: 94%
239 |     Nan Data Count (should be zero) : 0;  Percentage of effective data: 94%
240 | 
241 | 
242 | 
243 | ```python
244 | print(optimizer.all_signals.keys())
245 | print(optimizer.all_signals["test{'CAL': '', 'LEN': 2}"].head())
246 | ```
247 | 
248 |     dict_keys(["test{'CAL': 'Cut_Neg', 'LEN': 2}", "test{'CAL': 'Cut_Neg', 'LEN': 3}", "test{'CAL': 'Cut_Neg', 'LEN': 4}", "test{'CAL': '', 'LEN': 2}", "test{'CAL': '', 'LEN': 3}", "test{'CAL': '', 'LEN': 4}"])
249 |                           signal    return  upside_ret  downside_ret  quantile
250 |     trade_date symbol                                                         
251 |     20170503   000001.SZ    -1.0  0.011546    0.031748     -0.038959         2
252 |                000002.SZ    -1.0  0.109486    0.165690     -0.021479         2
253 |                000008.SZ    -1.0 -0.071442   -0.005851     -0.119487         3
254 |                000009.SZ    -1.0 -0.089585   -0.003136     -0.165520         2
255 |                000027.SZ    -1.0 -0.016835    0.051678     -0.060567         5
256 | 
257 | 
258 | ## all_signals_perf
259 | 
260 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.all_signals_perf `
261 | 
262 | **简要描述：**
263 | 
264 | - 不同参数下计算得到的signal_data(关于signal_data的定义,详见文档digger部分-signal_data)的绩效表现所组成的字典
265 | - 在Optimizer.all_signals不为空的情况下，可以通过Optimizer.get_all_signals_perf()计算Optimizer.all_signals中不同因子的表现；
266 | - 在执行过Optimizer.get_all_signals_perf()后才能获取
267 | 
268 | **返回:**
269 | 
270 | dict of performance - 不同因子表现所组成的字典
271 | 其中每个performance（因子表现）也是一个字典，由ic、ret、space三个key构成，分别对应ic分析表、收益分析表、潜在收益空间分析表(关于这三张表的说明，详见文档-analysis)
272 | 
273 | **示例：**
274 | 
275 | 
276 | ```python
277 | print(optimizer.all_signals_perf)
278 | ```
279 | 
280 |     None
281 | 
282 | 
283 | ## get_all_signals_perf
284 | 
285 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.get_all_signals_perf(in_sample_range=None) `
286 | 
287 | **简要描述：**
288 | 
289 | - 在Optimizer.all_signals不为空的情况下，通过Optimizer.get_all_signals_perf()计算Optimizer.all_signals中不同因子的表现,并更新Optimizer.all_signals_perf属性；
290 | 
291 | **参数:**
292 | 
293 | |字段|必选|类型|说明|
294 | |:----    |:---|:----- |-----   |
295 | |in_sample_range |否|list of int|因子表现计算的时间范围,如[20140101,20160101] 表示计算因子表现所涵盖的数据范围只在20140101到20160101之间。默认为None,在全样本上计算因子表现|
296 | 
297 | 
298 | **示例：**
299 | 
300 | 
301 | ```python
302 | optimizer.get_all_signals_perf()
303 | ```
304 | 
305 | 
306 | ```python
307 | print(optimizer.all_signals_perf.keys())
308 | print(optimizer.all_signals_perf["test{'CAL': '', 'LEN': 2}"].keys())
309 | optimizer.all_signals_perf["test{'CAL': '', 'LEN': 2}"]["ic"]
310 | ```
311 | 
312 |     dict_keys(["test{'CAL': 'Cut_Neg', 'LEN': 2}", "test{'CAL': 'Cut_Neg', 'LEN': 3}", "test{'CAL': 'Cut_Neg', 'LEN': 4}", "test{'CAL': '', 'LEN': 2}", "test{'CAL': '', 'LEN': 3}", "test{'CAL': '', 'LEN': 4}"])
313 |     dict_keys(['ic', 'ret', 'space', 'signal_name'])
314 | 
315 | 
316 | 
317 | 
318 | 
319 | <div>
320 | <style>
321 |     .dataframe thead tr:only-child th {
322 |         text-align: right;
323 |     }
324 | 
325 |     .dataframe thead th {
326 |         text-align: left;
327 |     }
328 | 
329 |     .dataframe tbody tr th {
330 |         vertical-align: top;
331 |     }
332 | </style>
333 | <table border="1" class="dataframe">
334 |   <thead>
335 |     <tr style="text-align: right;">
336 |       <th></th>
337 |       <th>return_ic</th>
338 |       <th>upside_ret_ic</th>
339 |       <th>downside_ret_ic</th>
340 |     </tr>
341 |   </thead>
342 |   <tbody>
343 |     <tr>
344 |       <th>IC Mean</th>
345 |       <td>-0.025674</td>
346 |       <td>-0.038612</td>
347 |       <td>0.001965</td>
348 |     </tr>
349 |     <tr>
350 |       <th>IC Std.</th>
351 |       <td>0.058703</td>
352 |       <td>0.063177</td>
353 |       <td>0.058535</td>
354 |     </tr>
355 |     <tr>
356 |       <th>t-stat(IC)</th>
357 |       <td>-3.812846</td>
358 |       <td>-5.328101</td>
359 |       <td>0.292652</td>
360 |     </tr>
361 |     <tr>
362 |       <th>p-value(IC)</th>
363 |       <td>0.000280</td>
364 |       <td>0.000001</td>
365 |       <td>0.770596</td>
366 |     </tr>
367 |     <tr>
368 |       <th>IC Skew</th>
369 |       <td>0.625732</td>
370 |       <td>0.689323</td>
371 |       <td>0.226355</td>
372 |     </tr>
373 |     <tr>
374 |       <th>IC Kurtosis</th>
375 |       <td>0.434047</td>
376 |       <td>0.495804</td>
377 |       <td>0.149208</td>
378 |     </tr>
379 |     <tr>
380 |       <th>Ann. IR</th>
381 |       <td>-0.437363</td>
382 |       <td>-0.611175</td>
383 |       <td>0.033570</td>
384 |     </tr>
385 |   </tbody>
386 | </table>
387 | </div>
388 | 
389 | 
390 | 
391 | ## enumerate_optimizer
392 | 
393 | - ` jaqs_fxdayu.research.signaldigger.optimizer.Optimizer.enumerate_optimizer(target_type="long_ret",target="Ann. IR",ascending=False,in_sample_range=None) `
394 | 
395 | **简要描述：**
396 | 
397 | - 枚举优化。按照指定的参数优化范围遍历每一种可能性，并给出最佳绩效下的排序结果
398 | 
399 | **参数:**
400 | 
401 | |字段|必选|类型|说明|
402 | |:----    |:---|:----- |-----   |
403 | |target_type |是|string|待优化的目标类型，有ic类、持有收益类、收益空间类三个大类，下分小类，具体类型见下|
404 | |target |是|string|待优化的目标绩效指标，有ic类、持有收益类、收益空间类三个大类，下分小类，具体类型见下|
405 | |ascending |否|bool|输出结果是否升序排列，默认为False--降序排列(指标越大排名越前)|
406 | |in_sample_range |否|list of int|样本内优化范围 默认为None,在全样本上优化|
407 | 
408 | #### 优化目标的详细介绍
409 | 目前，所有可优化的目标均围绕analysis模块中提供的三张绩效表——ic分析表、收益分析表、潜在收益空间分析表(关于这三张表的详细定义，参考文档-analysis)。
410 | 
411 | #### target_type:
412 | * ic类:
413 |   return_ic/upside_ret_ic/downside_ret_ic
414 | * 持有收益类
415 |   long_ret/short_ret/long_short_ret/top_quantile_ret/bottom_quantile_ret/tmb_ret
416 | * 收益空间类
417 |   long_space/short_space/long_short_space/top_quantile_space/bottom_quantile_space/tmb_space
418 | 
419 | #### target:
420 | * ic类 
421 |   "IC Mean", "IC Std.", "t-stat(IC)", "p-value(IC)", "IC Skew", "IC Kurtosis", "Ann. IR"
422 | * 持有收益类 
423 |   't-stat', "p-value", "skewness", "kurtosis", "Ann. Ret", "Ann. Vol", "Ann. IR", "occurance"
424 | * 收益空间类 
425 |   'Up_sp Mean','Up_sp Std','Up_sp IR','Up_sp Pct5', 'Up_sp Pct25 ','Up_sp Pct50 ', 'Up_sp Pct75','Up_sp Pct95','Up_sp Occur','Down_sp Mean','Down_sp Std', 'Down_sp IR', 'Down_sp Pct5','Down_sp Pct25 ','Down_sp Pct50 ','Down_sp Pct75', 'Down_sp Pct95','Down_sp Occur'
426 |   
427 |   
428 | **返回:**
429 | 
430 | list of performance - 绩效的排序结果（只计算了样本内的绩效）
431 | 其中每个performance（因子表现）是一个字典，由ic、ret、space三个key构成，分别对应ic分析表、收益分析表、潜在收益空间分析表(关于这三张表的说明，详见文档-analysis)
432 | 
433 | 
434 | **示例：**
435 | 
436 | 
437 | ```python
438 | ret_best = optimizer.enumerate_optimizer(target_type="top_quantile_ret",#优化目标类型 
439 |                                          target="Ann. IR",#优化目标     
440 |                                          in_sample_range=[20170501,20170801],#样本内范围
441 |                                          ascending=False)
442 | ```
443 | 
444 | 
445 | ```python
446 | print(len(ret_best))
447 | print(ret_best[0].keys())
448 | print(ret_best[0]["signal_name"])
449 | ret_best[0]["ret"]
450 | ```
451 | 
452 |     6
453 |     dict_keys(['ic', 'ret', 'space', 'signal_name'])
454 |     test{'CAL': 'Cut_Neg', 'LEN': 4}
455 | 
456 | 
457 | 
458 | 
459 | 
460 | <div>
461 | <style>
462 |     .dataframe thead tr:only-child th {
463 |         text-align: right;
464 |     }
465 | 
466 |     .dataframe thead th {
467 |         text-align: left;
468 |     }
469 | 
470 |     .dataframe tbody tr th {
471 |         vertical-align: top;
472 |     }
473 | </style>
474 | <table border="1" class="dataframe">
475 |   <thead>
476 |     <tr style="text-align: right;">
477 |       <th></th>
478 |       <th>long_ret</th>
479 |       <th>long_short_ret</th>
480 |       <th>top_quantile_ret</th>
481 |       <th>bottom_quantile_ret</th>
482 |       <th>tmb_ret</th>
483 |       <th>all_sample_ret</th>
484 |     </tr>
485 |   </thead>
486 |   <tbody>
487 |     <tr>
488 |       <th>t-stat</th>
489 |       <td>6.483452</td>
490 |       <td>-0.101407</td>
491 |       <td>5.392292</td>
492 |       <td>4.371209</td>
493 |       <td>-0.589115</td>
494 |       <td>9.697444</td>
495 |     </tr>
496 |     <tr>
497 |       <th>p-value</th>
498 |       <td>0.000000</td>
499 |       <td>0.919580</td>
500 |       <td>0.000000</td>
501 |       <td>0.000030</td>
502 |       <td>0.557960</td>
503 |       <td>0.000000</td>
504 |     </tr>
505 |     <tr>
506 |       <th>skewness</th>
507 |       <td>0.293834</td>
508 |       <td>-0.223391</td>
509 |       <td>0.571958</td>
510 |       <td>0.789755</td>
511 |       <td>0.139580</td>
512 |       <td>0.937413</td>
513 |     </tr>
514 |     <tr>
515 |       <th>kurtosis</th>
516 |       <td>-0.358258</td>
517 |       <td>0.081883</td>
518 |       <td>0.368376</td>
519 |       <td>1.227729</td>
520 |       <td>0.237143</td>
521 |       <td>2.696370</td>
522 |     </tr>
523 |     <tr>
524 |       <th>Ann. Ret</th>
525 |       <td>0.301933</td>
526 |       <td>-0.004470</td>
527 |       <td>0.440432</td>
528 |       <td>0.314685</td>
529 |       <td>-0.053747</td>
530 |       <td>0.329067</td>
531 |     </tr>
532 |     <tr>
533 |       <th>Ann. Vol</th>
534 |       <td>0.128063</td>
535 |       <td>0.118190</td>
536 |       <td>0.209361</td>
537 |       <td>0.253471</td>
538 |       <td>0.250885</td>
539 |       <td>0.238054</td>
540 |     </tr>
541 |     <tr>
542 |       <th>Ann. IR</th>
543 |       <td>2.357699</td>
544 |       <td>-0.037818</td>
545 |       <td>2.103694</td>
546 |       <td>1.241505</td>
547 |       <td>-0.214231</td>
548 |       <td>1.382322</td>
549 |     </tr>
550 |     <tr>
551 |       <th>occurance</th>
552 |       <td>62.000000</td>
553 |       <td>59.000000</td>
554 |       <td>54.000000</td>
555 |       <td>101.000000</td>
556 |       <td>62.000000</td>
557 |       <td>398.000000</td>
558 |     </tr>
559 |   </tbody>
560 | </table>
561 | </div>
562 | 
563 | 
564 | 


--------------------------------------------------------------------------------
/docs/dp.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # dp
  3 | 
  4 | ## 介绍
  5 | 针对A股因子研究和交易分析场景，提供了常用的小工具，如查询历史的交易日，历史的行业分类表等
  6 | 
  7 | 
  8 | ```python
  9 | import warnings
 10 | warnings.filterwarnings('ignore')
 11 | ```
 12 | 
 13 | 
 14 | ```python
 15 | data_config = {
 16 | "remote.data.address": "tcp://data.quantos.org:8910",
 17 | "remote.data.username": "18566262672",
 18 | "remote.data.password": "eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI3MDI3NTAyMTIiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTg1NjYyNjI2NzIifQ.O_-yR0zYagrLRvPbggnru1Rapk4kiyAzcwYt2a3vlpM"
 19 | }
 20 | 
 21 | from jaqs_fxdayu.data import DataApi
 22 | 
 23 | api = DataApi(data_config["remote.data.address"]) # 传入连接到的远端数据服务器的tcp地址
 24 | api.login(username=data_config["remote.data.username"],
 25 |           password=data_config["remote.data.password"])
 26 | ```
 27 | 
 28 | 
 29 | 
 30 | 
 31 |     ('username: 18566262672', '0,')
 32 | 
 33 | 
 34 | 
 35 | ## trade_days
 36 | - ` jaqs_fxdayu.util.dp.trade_days(api, start, end) `
 37 | 
 38 | **简要描述：**
 39 | 
 40 | - 返回起止日期间的交易日
 41 | 
 42 | **参数:**
 43 | 
 44 | |字段|必选|类型|说明|
 45 | |:----    |:---|:----- |-----   |
 46 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi|
 47 | |start |是|int |开始日期|
 48 | |end |是|int |结束日期|
 49 | 
 50 | **返回:**
 51 | 
 52 | 起止日期间的交易日
 53 | 
 54 | **示例：**
 55 | 
 56 | 
 57 | ```python
 58 | from jaqs_fxdayu.util.dp import trade_days
 59 | trade_days(api, 20170101, 20180101)
 60 | ```
 61 | 
 62 | 
 63 | 
 64 | 
 65 |     Int64Index([20170103, 20170104, 20170105, 20170106, 20170109, 20170110,
 66 |                 20170111, 20170112, 20170113, 20170116,
 67 |                 ...
 68 |                 20171218, 20171219, 20171220, 20171221, 20171222, 20171225,
 69 |                 20171226, 20171227, 20171228, 20171229],
 70 |                dtype='int64', name='trade_date', length=244)
 71 | 
 72 | 
 73 | 
 74 | ## index_cons
 75 | - ` jaqs_fxdayu.util.dp.index_cons(api, index_code, start, end) `
 76 | 
 77 | **简要描述：**
 78 | 
 79 | - 获得某个指数起止时间段的历史成分股信息
 80 | 
 81 | **参数:**
 82 | 
 83 | |字段|必选|类型|说明|
 84 | |:----    |:---|:----- |-----   |
 85 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi|
 86 | |index_code |是| str |指数代码|
 87 | |start |是|int |开始日期|
 88 | |end |是|int |结束日期|
 89 | 
 90 | **返回:**
 91 | 
 92 | 某个指数起止时间段的历史成分股信息
 93 | 
 94 | - 其中 in_date:纳入该指数的时间；out_date:从该指数移除的时间
 95 | 
 96 | **示例：**
 97 | 
 98 | 
 99 | ```python
100 | from jaqs_fxdayu.util.dp import index_cons
101 | index_cons(api, "000300.SH", 20170101, 20170501).head()
102 | ```
103 | 
104 | 
105 | 
106 | 
107 | <div>
108 | <style>
109 |     .dataframe thead tr:only-child th {
110 |         text-align: right;
111 |     }
112 | 
113 |     .dataframe thead th {
114 |         text-align: left;
115 |     }
116 | 
117 |     .dataframe tbody tr th {
118 |         vertical-align: top;
119 |     }
120 | </style>
121 | <table border="1" class="dataframe">
122 |   <thead>
123 |     <tr style="text-align: right;">
124 |       <th></th>
125 |       <th>in_date</th>
126 |       <th>index_code</th>
127 |       <th>out_date</th>
128 |       <th>symbol</th>
129 |     </tr>
130 |   </thead>
131 |   <tbody>
132 |     <tr>
133 |       <th>0</th>
134 |       <td>20050408</td>
135 |       <td>000300.SH</td>
136 |       <td>99999999</td>
137 |       <td>000001.SZ</td>
138 |     </tr>
139 |     <tr>
140 |       <th>1</th>
141 |       <td>20050408</td>
142 |       <td>000300.SH</td>
143 |       <td>99999999</td>
144 |       <td>000002.SZ</td>
145 |     </tr>
146 |     <tr>
147 |       <th>2</th>
148 |       <td>20161212</td>
149 |       <td>000300.SH</td>
150 |       <td>99999999</td>
151 |       <td>000008.SZ</td>
152 |     </tr>
153 |     <tr>
154 |       <th>3</th>
155 |       <td>20050408</td>
156 |       <td>000300.SH</td>
157 |       <td>20171208</td>
158 |       <td>000009.SZ</td>
159 |     </tr>
160 |     <tr>
161 |       <th>4</th>
162 |       <td>20140616</td>
163 |       <td>000300.SH</td>
164 |       <td>20170609</td>
165 |       <td>000027.SZ</td>
166 |     </tr>
167 |   </tbody>
168 | </table>
169 | </div>
170 | 
171 | 
172 | 
173 | ## daily_index_cons
174 | - ` jaqs_fxdayu.util.dp.daily_index_cons(api, index_code, start, end) `
175 | 
176 | **简要描述：**
177 | 
178 | - 指定起止时间段，成分股是否还在某指数当中
179 | 
180 | **参数:**
181 | 
182 | |字段|必选|类型|说明|
183 | |:----    |:---|:----- |-----   |
184 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi|
185 | |index_code |是| str |指数代码|
186 | |start |是|int |开始日期|
187 | |end |是|int |结束日期|
188 | 
189 | **示例：**
190 | 
191 | 
192 | ```python
193 | from jaqs_fxdayu.util.dp import daily_index_cons
194 | daily_index_cons(api, "000300.SH", 20170101, 20170501).head()
195 | ```
196 | 
197 | 
198 | 
199 | 
200 | <div>
201 | <style>
202 |     .dataframe thead tr:only-child th {
203 |         text-align: right;
204 |     }
205 | 
206 |     .dataframe thead th {
207 |         text-align: left;
208 |     }
209 | 
210 |     .dataframe tbody tr th {
211 |         vertical-align: top;
212 |     }
213 | </style>
214 | <table border="1" class="dataframe">
215 |   <thead>
216 |     <tr style="text-align: right;">
217 |       <th></th>
218 |       <th>000001.SZ</th>
219 |       <th>000002.SZ</th>
220 |       <th>000008.SZ</th>
221 |       <th>000009.SZ</th>
222 |       <th>000027.SZ</th>
223 |       <th>000039.SZ</th>
224 |       <th>000060.SZ</th>
225 |       <th>000061.SZ</th>
226 |       <th>000063.SZ</th>
227 |       <th>000069.SZ</th>
228 |       <th>...</th>
229 |       <th>601933.SH</th>
230 |       <th>601939.SH</th>
231 |       <th>601958.SH</th>
232 |       <th>601985.SH</th>
233 |       <th>601988.SH</th>
234 |       <th>601989.SH</th>
235 |       <th>601998.SH</th>
236 |       <th>603000.SH</th>
237 |       <th>603885.SH</th>
238 |       <th>603993.SH</th>
239 |     </tr>
240 |     <tr>
241 |       <th>trade_date</th>
242 |       <th></th>
243 |       <th></th>
244 |       <th></th>
245 |       <th></th>
246 |       <th></th>
247 |       <th></th>
248 |       <th></th>
249 |       <th></th>
250 |       <th></th>
251 |       <th></th>
252 |       <th></th>
253 |       <th></th>
254 |       <th></th>
255 |       <th></th>
256 |       <th></th>
257 |       <th></th>
258 |       <th></th>
259 |       <th></th>
260 |       <th></th>
261 |       <th></th>
262 |       <th></th>
263 |     </tr>
264 |   </thead>
265 |   <tbody>
266 |     <tr>
267 |       <th>20170103</th>
268 |       <td>True</td>
269 |       <td>True</td>
270 |       <td>True</td>
271 |       <td>True</td>
272 |       <td>True</td>
273 |       <td>True</td>
274 |       <td>True</td>
275 |       <td>True</td>
276 |       <td>True</td>
277 |       <td>True</td>
278 |       <td>...</td>
279 |       <td>True</td>
280 |       <td>True</td>
281 |       <td>True</td>
282 |       <td>True</td>
283 |       <td>True</td>
284 |       <td>True</td>
285 |       <td>True</td>
286 |       <td>True</td>
287 |       <td>True</td>
288 |       <td>True</td>
289 |     </tr>
290 |     <tr>
291 |       <th>20170104</th>
292 |       <td>True</td>
293 |       <td>True</td>
294 |       <td>True</td>
295 |       <td>True</td>
296 |       <td>True</td>
297 |       <td>True</td>
298 |       <td>True</td>
299 |       <td>True</td>
300 |       <td>True</td>
301 |       <td>True</td>
302 |       <td>...</td>
303 |       <td>True</td>
304 |       <td>True</td>
305 |       <td>True</td>
306 |       <td>True</td>
307 |       <td>True</td>
308 |       <td>True</td>
309 |       <td>True</td>
310 |       <td>True</td>
311 |       <td>True</td>
312 |       <td>True</td>
313 |     </tr>
314 |     <tr>
315 |       <th>20170105</th>
316 |       <td>True</td>
317 |       <td>True</td>
318 |       <td>True</td>
319 |       <td>True</td>
320 |       <td>True</td>
321 |       <td>True</td>
322 |       <td>True</td>
323 |       <td>True</td>
324 |       <td>True</td>
325 |       <td>True</td>
326 |       <td>...</td>
327 |       <td>True</td>
328 |       <td>True</td>
329 |       <td>True</td>
330 |       <td>True</td>
331 |       <td>True</td>
332 |       <td>True</td>
333 |       <td>True</td>
334 |       <td>True</td>
335 |       <td>True</td>
336 |       <td>True</td>
337 |     </tr>
338 |     <tr>
339 |       <th>20170106</th>
340 |       <td>True</td>
341 |       <td>True</td>
342 |       <td>True</td>
343 |       <td>True</td>
344 |       <td>True</td>
345 |       <td>True</td>
346 |       <td>True</td>
347 |       <td>True</td>
348 |       <td>True</td>
349 |       <td>True</td>
350 |       <td>...</td>
351 |       <td>True</td>
352 |       <td>True</td>
353 |       <td>True</td>
354 |       <td>True</td>
355 |       <td>True</td>
356 |       <td>True</td>
357 |       <td>True</td>
358 |       <td>True</td>
359 |       <td>True</td>
360 |       <td>True</td>
361 |     </tr>
362 |     <tr>
363 |       <th>20170109</th>
364 |       <td>True</td>
365 |       <td>True</td>
366 |       <td>True</td>
367 |       <td>True</td>
368 |       <td>True</td>
369 |       <td>True</td>
370 |       <td>True</td>
371 |       <td>True</td>
372 |       <td>True</td>
373 |       <td>True</td>
374 |       <td>...</td>
375 |       <td>True</td>
376 |       <td>True</td>
377 |       <td>True</td>
378 |       <td>True</td>
379 |       <td>True</td>
380 |       <td>True</td>
381 |       <td>True</td>
382 |       <td>True</td>
383 |       <td>True</td>
384 |       <td>True</td>
385 |     </tr>
386 |   </tbody>
387 | </table>
388 | <p>5 rows × 301 columns</p>
389 | </div>
390 | 
391 | 
392 | 
393 | ## st_status
394 | - ` jaqs_fxdayu.util.dp.st_status(api, symbol, start, end) `
395 | 
396 | **简要描述：**
397 | 
398 | - 指定起止时间段，股票是否进入异常状态（含st,*st,退市等）
399 | - 注:从未进入过异常状态的股票不会在返回值里
400 | - 注:该方法目前只有在fxdayu数据源下才可访问到（非公开接口）
401 | 
402 | **参数:**
403 | 
404 | |字段|必选|类型|说明|
405 | |:----    |:---|:----- |-----   |
406 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi|
407 | |symbol |是| str |股票代码,以","隔开|
408 | |start |是|int |开始日期|
409 | |end |是|int |结束日期|
410 | 
411 | **示例：**
412 | 
413 | 
414 | ```python
415 | from jaqs_fxdayu.util.dp import st_status
416 | st_status(api, "000001.SZ,000003.SZ,000008.SZ", 20170101, 20170501).head()
417 | ```
418 | 
419 | 
420 | 
421 | 
422 | <div>
423 | <style>
424 |     .dataframe thead tr:only-child th {
425 |         text-align: right;
426 |     }
427 | 
428 |     .dataframe thead th {
429 |         text-align: left;
430 |     }
431 | 
432 |     .dataframe tbody tr th {
433 |         vertical-align: top;
434 |     }
435 | </style>
436 | <table border="1" class="dataframe">
437 |   <thead>
438 |     <tr style="text-align: right;">
439 |       <th></th>
440 |       <th>000003.SZ</th>
441 |       <th>000008.SZ</th>
442 |     </tr>
443 |     <tr>
444 |       <th>trade_date</th>
445 |       <th></th>
446 |       <th></th>
447 |     </tr>
448 |   </thead>
449 |   <tbody>
450 |     <tr>
451 |       <th>20170103</th>
452 |       <td>1.0</td>
453 |       <td>0.0</td>
454 |     </tr>
455 |     <tr>
456 |       <th>20170104</th>
457 |       <td>1.0</td>
458 |       <td>0.0</td>
459 |     </tr>
460 |     <tr>
461 |       <th>20170105</th>
462 |       <td>1.0</td>
463 |       <td>0.0</td>
464 |     </tr>
465 |     <tr>
466 |       <th>20170106</th>
467 |       <td>1.0</td>
468 |       <td>0.0</td>
469 |     </tr>
470 |     <tr>
471 |       <th>20170109</th>
472 |       <td>1.0</td>
473 |       <td>0.0</td>
474 |     </tr>
475 |   </tbody>
476 | </table>
477 | </div>
478 | 
479 | 
480 | 
481 | ## daily_sec_industry
482 | - ` jaqs_fxdayu.util.dp.daily_sec_industry(api, symbol, start, end, source="sw", value="industry1_code") `
483 | 
484 | **简要描述：**
485 | 
486 | - 指定起始时间段，查询某一系列股票在该时间段下的行业分类信息
487 | 
488 | **参数:**
489 | 
490 | |字段|必选|类型|说明|
491 | |:----    |:---|:----- |-----   |
492 | |api |是| jaqs.data.DataApi |jaqs.data.DataApi|
493 | |symbol |是| str |股票代码，用","隔开。如"600000.SH,000001.SZ"|
494 | |start |是|int |开始日期|
495 | |end |是|int |结束日期|
496 | |source |否|str |行业分类标准，目前仅支持"sw"(申万),"zz"(中证),"zjh"（证监会）,默认"sw"|
497 | |value |否|str |行业等级，形式可为"industry?_code"(行业编码)/"industry?_name"(行业名称)。其中"?"可为1,2,3,4,分别代表1-4个行业等级。申万支持1-4,中证支持1-2。默认为industry1_code|
498 | 
499 | **示例：**
500 | 
501 | 
502 | ```python
503 | from jaqs_fxdayu.util.dp import daily_sec_industry
504 | symbol_id = index_cons(api, "000300.SH", 20170501, 20171001,)["symbol"].dropna()
505 | symbols = ",".join(symbol_id)
506 | group = daily_sec_industry(api, symbols, 20170501, 20171001, source='zjh', value="industry1_name")
507 | group.tail()
508 | ```
509 | 
510 | 
511 | 
512 | 
513 | <div>
514 | <style>
515 |     .dataframe thead tr:only-child th {
516 |         text-align: right;
517 |     }
518 | 
519 |     .dataframe thead th {
520 |         text-align: left;
521 |     }
522 | 
523 |     .dataframe tbody tr th {
524 |         vertical-align: top;
525 |     }
526 | </style>
527 | <table border="1" class="dataframe">
528 |   <thead>
529 |     <tr style="text-align: right;">
530 |       <th></th>
531 |       <th>000001.SZ</th>
532 |       <th>000002.SZ</th>
533 |       <th>000008.SZ</th>
534 |       <th>000009.SZ</th>
535 |       <th>000027.SZ</th>
536 |       <th>000039.SZ</th>
537 |       <th>000060.SZ</th>
538 |       <th>000061.SZ</th>
539 |       <th>000063.SZ</th>
540 |       <th>000069.SZ</th>
541 |       <th>...</th>
542 |       <th>601988.SH</th>
543 |       <th>601989.SH</th>
544 |       <th>601992.SH</th>
545 |       <th>601997.SH</th>
546 |       <th>601998.SH</th>
547 |       <th>603000.SH</th>
548 |       <th>603160.SH</th>
549 |       <th>603858.SH</th>
550 |       <th>603885.SH</th>
551 |       <th>603993.SH</th>
552 |     </tr>
553 |     <tr>
554 |       <th>trade_date</th>
555 |       <th></th>
556 |       <th></th>
557 |       <th></th>
558 |       <th></th>
559 |       <th></th>
560 |       <th></th>
561 |       <th></th>
562 |       <th></th>
563 |       <th></th>
564 |       <th></th>
565 |       <th></th>
566 |       <th></th>
567 |       <th></th>
568 |       <th></th>
569 |       <th></th>
570 |       <th></th>
571 |       <th></th>
572 |       <th></th>
573 |       <th></th>
574 |       <th></th>
575 |       <th></th>
576 |     </tr>
577 |   </thead>
578 |   <tbody>
579 |     <tr>
580 |       <th>20170925</th>
581 |       <td>金融业</td>
582 |       <td>房地产业</td>
583 |       <td>制造业</td>
584 |       <td>综合</td>
585 |       <td>电力、热力、燃气及水生产和供应业</td>
586 |       <td>制造业</td>
587 |       <td>制造业</td>
588 |       <td>租赁和商务服务业</td>
589 |       <td>制造业</td>
590 |       <td>水利、环境和公共设施管理业</td>
591 |       <td>...</td>
592 |       <td>金融业</td>
593 |       <td>制造业</td>
594 |       <td>制造业</td>
595 |       <td>金融业</td>
596 |       <td>金融业</td>
597 |       <td>信息传输、软件和信息技术服务业</td>
598 |       <td>制造业</td>
599 |       <td>制造业</td>
600 |       <td>交通运输、仓储和邮政业</td>
601 |       <td>采矿业</td>
602 |     </tr>
603 |     <tr>
604 |       <th>20170926</th>
605 |       <td>金融业</td>
606 |       <td>房地产业</td>
607 |       <td>制造业</td>
608 |       <td>综合</td>
609 |       <td>电力、热力、燃气及水生产和供应业</td>
610 |       <td>制造业</td>
611 |       <td>制造业</td>
612 |       <td>租赁和商务服务业</td>
613 |       <td>制造业</td>
614 |       <td>水利、环境和公共设施管理业</td>
615 |       <td>...</td>
616 |       <td>金融业</td>
617 |       <td>制造业</td>
618 |       <td>制造业</td>
619 |       <td>金融业</td>
620 |       <td>金融业</td>
621 |       <td>信息传输、软件和信息技术服务业</td>
622 |       <td>制造业</td>
623 |       <td>制造业</td>
624 |       <td>交通运输、仓储和邮政业</td>
625 |       <td>采矿业</td>
626 |     </tr>
627 |     <tr>
628 |       <th>20170927</th>
629 |       <td>金融业</td>
630 |       <td>房地产业</td>
631 |       <td>制造业</td>
632 |       <td>综合</td>
633 |       <td>电力、热力、燃气及水生产和供应业</td>
634 |       <td>制造业</td>
635 |       <td>制造业</td>
636 |       <td>租赁和商务服务业</td>
637 |       <td>制造业</td>
638 |       <td>水利、环境和公共设施管理业</td>
639 |       <td>...</td>
640 |       <td>金融业</td>
641 |       <td>制造业</td>
642 |       <td>制造业</td>
643 |       <td>金融业</td>
644 |       <td>金融业</td>
645 |       <td>信息传输、软件和信息技术服务业</td>
646 |       <td>制造业</td>
647 |       <td>制造业</td>
648 |       <td>交通运输、仓储和邮政业</td>
649 |       <td>采矿业</td>
650 |     </tr>
651 |     <tr>
652 |       <th>20170928</th>
653 |       <td>金融业</td>
654 |       <td>房地产业</td>
655 |       <td>制造业</td>
656 |       <td>综合</td>
657 |       <td>电力、热力、燃气及水生产和供应业</td>
658 |       <td>制造业</td>
659 |       <td>制造业</td>
660 |       <td>租赁和商务服务业</td>
661 |       <td>制造业</td>
662 |       <td>水利、环境和公共设施管理业</td>
663 |       <td>...</td>
664 |       <td>金融业</td>
665 |       <td>制造业</td>
666 |       <td>制造业</td>
667 |       <td>金融业</td>
668 |       <td>金融业</td>
669 |       <td>信息传输、软件和信息技术服务业</td>
670 |       <td>制造业</td>
671 |       <td>制造业</td>
672 |       <td>交通运输、仓储和邮政业</td>
673 |       <td>采矿业</td>
674 |     </tr>
675 |     <tr>
676 |       <th>20170929</th>
677 |       <td>金融业</td>
678 |       <td>房地产业</td>
679 |       <td>制造业</td>
680 |       <td>综合</td>
681 |       <td>电力、热力、燃气及水生产和供应业</td>
682 |       <td>制造业</td>
683 |       <td>制造业</td>
684 |       <td>租赁和商务服务业</td>
685 |       <td>制造业</td>
686 |       <td>水利、环境和公共设施管理业</td>
687 |       <td>...</td>
688 |       <td>金融业</td>
689 |       <td>制造业</td>
690 |       <td>制造业</td>
691 |       <td>金融业</td>
692 |       <td>金融业</td>
693 |       <td>信息传输、软件和信息技术服务业</td>
694 |       <td>制造业</td>
695 |       <td>制造业</td>
696 |       <td>交通运输、仓储和邮政业</td>
697 |       <td>采矿业</td>
698 |     </tr>
699 |   </tbody>
700 | </table>
701 | <p>5 rows × 330 columns</p>
702 | </div>
703 | 
704 | 
705 | 


--------------------------------------------------------------------------------
/jaqs_fxdayu/research/signaldigger/analysis.py:
--------------------------------------------------------------------------------
  1 | # encoding = utf-8
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import scipy.stats as scst
  6 | from jaqs.trade import common
  7 | 
  8 | from . import performance as pfm
  9 | 
 10 | 
 11 | def compute_downside_returns(price,
 12 |                              low,
 13 |                              can_exit=None,
 14 |                              period=5,
 15 |                              compound=True):
 16 |     """
 17 |     Finds the N period downside_returns for each asset provided.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     price : pd.DataFrame
 22 |         Pricing data to use in forward price calculation.
 23 |         Assets as columns, dates as index. Pricing data must
 24 |         span the factor analysis time period plus an additional buffer window
 25 |         that is greater than the maximum number of expected periods
 26 |         in the forward returns calculations.
 27 |     low : pd.DataFrame
 28 |         Low pricing data to use in forward price calculation.
 29 |         Assets as columns, dates as index. Pricing data must
 30 |         span the factor analysis time period plus an additional buffer window
 31 |         that is greater than the maximum number of expected periods
 32 |         in the forward returns calculations.
 33 |     can_exit:bool
 34 |         shape like price&low
 35 |     period : int
 36 |         periods to compute returns on.
 37 |     compound : bool
 38 | 
 39 | 
 40 |     Returns
 41 |     -------
 42 |     downside_returns : pd.DataFrame
 43 |         downside_returns in indexed by date
 44 |     """
 45 |     if compound:
 46 |         downside_ret = (low.rolling(period).min() - price.shift(period)) / price.shift(period)
 47 |     else:
 48 |         downside_ret = (low.rolling(period).min() - price.shift(period)) / price.iloc[0]
 49 |     if can_exit is not None:
 50 |         low_can_exit = low.copy()
 51 |         low_can_exit[~can_exit] = np.NaN
 52 |         low_can_exit = low_can_exit.fillna(method="bfill")
 53 |         if compound:
 54 |             downside_ret_can_exit = (low_can_exit.rolling(period).min() - price.shift(period)) / price.shift(period)
 55 |         else:
 56 |             downside_ret_can_exit = (low_can_exit.rolling(period).min() - price.shift(period)) / price.iloc[0]
 57 |         downside_ret[~can_exit] = (downside_ret[downside_ret <= downside_ret_can_exit].fillna(0) + \
 58 |                                    downside_ret_can_exit[downside_ret_can_exit < downside_ret].fillna(0))[~can_exit]
 59 | 
 60 |     return downside_ret
 61 | 
 62 | 
 63 | def compute_upside_returns(price,
 64 |                            high,
 65 |                            can_exit=None,
 66 |                            period=5,
 67 |                            compound=True):
 68 |     """
 69 |     Finds the N period upside_returns for each asset provided.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     price : pd.DataFrame
 74 |         Pricing data to use in forward price calculation.
 75 |         Assets as columns, dates as index. Pricing data must
 76 |         span the factor analysis time period plus an additional buffer window
 77 |         that is greater than the maximum number of expected periods
 78 |         in the forward returns calculations.
 79 |     high : pd.DataFrame
 80 |         High pricing data to use in forward price calculation.
 81 |         Assets as columns, dates as index. Pricing data must
 82 |         span the factor analysis time period plus an additional buffer window
 83 |         that is greater than the maximum number of expected periods
 84 |         in the forward returns calculations.
 85 |     can_exit:bool
 86 |         shape like price&low
 87 |     period : int
 88 |         periods to compute returns on.
 89 |     compound : bool
 90 | 
 91 | 
 92 |     Returns
 93 |     -------
 94 |     upside_returns : pd.DataFrame
 95 |         upside_returns in indexed by date
 96 |     """
 97 |     if compound:
 98 |         upside_ret = (high.rolling(period).max() - price.shift(period)) / price.shift(period)
 99 |     else:
100 |         upside_ret = (high.rolling(period).max() - price.shift(period)) / price.iloc[0]
101 |     if can_exit is not None:
102 |         high_can_exit = high.copy()
103 |         high_can_exit[~can_exit] = np.NaN
104 |         high_can_exit = high_can_exit.fillna(method="bfill")
105 |         if compound:
106 |             upside_ret_can_exit = (high_can_exit.rolling(period).max() - price.shift(period)) / price.shift(period)
107 |         else:
108 |             upside_ret_can_exit = (high_can_exit.rolling(period).max() - price.shift(period)) / price.iloc[0]
109 |         upside_ret[~can_exit] = (upside_ret[upside_ret >= upside_ret_can_exit].fillna(0) + \
110 |                                  upside_ret_can_exit[upside_ret_can_exit > upside_ret].fillna(0))[~can_exit]
111 | 
112 |     return upside_ret
113 | 
114 | 
115 | def cal_rets_stats(rets, period):
116 |     ret_summary_table = pd.DataFrame()
117 |     ratio = (1.0 * common.CALENDAR_CONST.TRADE_DAYS_PER_YEAR / period)
118 |     mean = rets.mean()
119 |     std = rets.std()
120 |     annual_ret, annual_vol = mean * ratio, std * np.sqrt(ratio)
121 |     t_stats, p_values = scst.ttest_1samp(rets, np.zeros(rets.shape[1]), axis=0)
122 |     ret_summary_table['t-stat'] = t_stats
123 |     ret_summary_table['p-value'] = np.round(p_values, 5)
124 |     ret_summary_table["skewness"] = scst.skew(rets, axis=0)
125 |     ret_summary_table["kurtosis"] = scst.kurtosis(rets, axis=0)
126 |     ret_summary_table['Ann. Ret'] = annual_ret
127 |     ret_summary_table['Ann. Vol'] = annual_vol
128 |     ret_summary_table['Ann. IR'] = annual_ret / annual_vol
129 |     ret_summary_table['occurance'] = len(rets)
130 |     return ret_summary_table.T
131 | 
132 | 
133 | def ic_stats(signal_data):
134 |     ICs = get_ics(signal_data)
135 |     stats = []
136 |     for item in ICs.keys():
137 |         ic = ICs[item]
138 |         ic.index = pd.to_datetime(ic.index, format="%Y%m%d")
139 |         ic_summary_table = pfm.calc_ic_stats_table(ic).T
140 |         ic_summary_table.columns = [item]
141 |         stats.append(ic_summary_table)
142 |     if len(stats) > 0:
143 |         stats = pd.concat(stats, axis=1)
144 |     return stats
145 | 
146 | 
147 | def get_ics(signal_data):
148 |     ICs = dict()
149 |     if not ("upside_ret" in signal_data.columns) or \
150 |             not ("downside_ret" in signal_data.columns):
151 |         items = ["return"]
152 |     else:
153 |         items = ["return", "upside_ret", "downside_ret"]
154 |     for item in items:
155 |         data = signal_data[["signal", item]]
156 |         data.columns = ["signal", "return"]
157 |         ICs[item + "_ic"] = pfm.calc_signal_ic(data).dropna()
158 | 
159 |     return ICs
160 | 
161 | 
162 | def return_stats(signal_data, is_event, period):
163 |     rets = get_rets(signal_data, is_event)
164 |     stats = []
165 |     for ret_type in rets.keys():
166 |         if len(rets[ret_type]) > 0:
167 |             ret_stats = cal_rets_stats(rets[ret_type].values.reshape((-1, 1)), period)
168 |             ret_stats.columns = [ret_type]
169 |             stats.append(ret_stats)
170 |     if len(stats) > 0:
171 |         stats = pd.concat(stats, axis=1)
172 |     return stats
173 | 
174 | 
175 | def get_rets(signal_data, is_event):
176 |     rets = dict()
177 |     signal_data = signal_data.copy()
178 |     n_quantiles = signal_data['quantile'].max()
179 | 
180 |     if is_event:
181 |         rets["long_ret"] = signal_data[signal_data['signal'] == 1]["return"].dropna()
182 |         rets['short_ret'] = signal_data[signal_data['signal'] == -1]["return"].dropna() * -1
183 |     else:
184 |         rets['long_ret'] = \
185 |             pfm.calc_period_wise_weighted_signal_return(signal_data, weight_method='long_only').dropna()
186 |         rets['short_ret'] = \
187 |             pfm.calc_period_wise_weighted_signal_return(signal_data, weight_method='short_only').dropna()
188 |     rets['long_short_ret'] = \
189 |         pfm.calc_period_wise_weighted_signal_return(signal_data, weight_method='long_short').dropna()
190 |     # quantile return
191 |     if not is_event:
192 |         rets['top_quantile_ret'] = signal_data[signal_data['quantile'] == n_quantiles]["return"].dropna()
193 |         rets['bottom_quantile_ret'] = signal_data[signal_data['quantile'] == 1]["return"].dropna()
194 |         period_wise_quantile_ret_stats = pfm.calc_quantile_return_mean_std(signal_data, time_series=True)
195 |         rets['tmb_ret'] = pfm.calc_return_diff_mean_std(period_wise_quantile_ret_stats[n_quantiles],
196 |                                                         period_wise_quantile_ret_stats[1])['mean_diff'].dropna()
197 |     rets['all_sample_ret'] = signal_data["return"].dropna()
198 |     return rets
199 | 
200 | 
201 | def weighted_signal_ret_space(signal_data):
202 |     """
203 |     Computes period wise period_wise_returns for portfolio weighted by signal
204 |     values. Weights are computed by demeaning signals and dividing
205 |     by the sum of their absolute value (achieving gross leverage of 1).
206 | 
207 |     Parameters
208 |     ----------
209 |     signal_data : pd.DataFrame - MultiIndex
210 |         Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', "upside_ret","downside_ret", 'quantile']
211 | 
212 |     Returns
213 |     -------
214 |     space : pd.DataFrame of dict
215 |         weighted_signal_ret_space
216 |     """
217 | 
218 |     def calc_norm_weights(ser, method):
219 |         if method == 'long_only':
220 |             ser = (ser + ser.abs()) / 2.0
221 |         elif method == 'short_only':
222 |             ser = (ser - ser.abs()) / 2.0
223 |         else:
224 |             raise ValueError("method can only be long_only or short_only,"
225 |                              "but [{}] is provided".format(method))
226 |         return ser / ser.abs().sum()
227 | 
228 |     grouper = ['trade_date']
229 | 
230 |     long_weights = signal_data.groupby(grouper)['signal'].apply(calc_norm_weights, "long_only")
231 |     short_weights = signal_data.groupby(grouper)['signal'].apply(calc_norm_weights, "short_only")
232 | 
233 |     space = dict()
234 |     space["long_space"] = dict()
235 |     space["long_space"]["upside_space"] = signal_data['upside_ret'].multiply(long_weights, axis=0)
236 |     space["long_space"]["downside_space"] = signal_data['downside_ret'].multiply(long_weights, axis=0)
237 |     space["short_space"] = dict()
238 |     space["short_space"]["upside_space"] = signal_data['downside_ret'].multiply(short_weights, axis=0)
239 |     space["short_space"]["downside_space"] = signal_data['upside_ret'].multiply(short_weights, axis=0)
240 |     space["long_short_space"] = dict()
241 |     space["long_short_space"]["upside_space"] = space["long_space"]["upside_space"] + space["short_space"][
242 |         "upside_space"]
243 |     space["long_short_space"]["downside_space"] = space["long_space"]["downside_space"] + space["short_space"][
244 |         "downside_space"]
245 | 
246 |     for dir_type in ["long_space", "short_space", "long_short_space"]:
247 |         for space_type in ["upside_space", "downside_space"]:
248 |             space[dir_type][space_type] = space[dir_type][space_type].groupby(level='trade_date').sum()
249 |             space[dir_type][space_type] = pd.DataFrame(space[dir_type][space_type]).dropna()
250 | 
251 |     return space
252 | 
253 | 
254 | def calc_tb_quantile_ret_space_mean_std(signal_data,
255 |                                         space_type="upside"):
256 |     """
257 |     Computes mean space for signal top & bottom quantiles across
258 |     provided upside_ret or downside_ret.
259 | 
260 |     Parameters
261 |     ----------
262 |     signal_data : pd.DataFrame - MultiIndex
263 |         Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret', "downside_ret", 'quantile']
264 | 
265 |     Returns
266 |     -------
267 |     quantile_space : pd.DataFrame of dict
268 | 
269 |     """
270 |     signal_data = signal_data.copy()
271 |     n_quantiles = signal_data['quantile'].max()
272 |     grouper = ['quantile']
273 |     grouper.append('trade_date')
274 | 
275 |     group_mean_std = signal_data.groupby(grouper)[space_type + "_ret"].agg(['mean', 'std', 'count'])
276 |     indexes = []
277 |     quantile_daily_mean_std_dic = dict()
278 |     for q in [1, n_quantiles]:  # loop for different quantiles
279 |         df_q = group_mean_std.loc[pd.IndexSlice[q, :], :]  # bug
280 |         df_q.index = df_q.index.droplevel(level='quantile')
281 |         indexes.append(pd.Series(df_q.index))
282 |         quantile_daily_mean_std_dic[q] = df_q
283 |     new_index = sorted(pd.concat(indexes).unique())
284 |     for q in [1, n_quantiles]:
285 |         quantile_daily_mean_std_dic[q] = quantile_daily_mean_std_dic[q].reindex(new_index).fillna(0)
286 |     return quantile_daily_mean_std_dic
287 | 
288 | 
289 | def cal_spaces_stats(space):
290 |     space_summary_table = pd.DataFrame()
291 |     if len(space["upside_space"]) > 0:
292 |         space["Up_sp"] = space["upside_space"].values.reshape((-1, 1))
293 |         space["Down_sp"] = space["downside_space"].values.reshape((-1, 1))
294 |         for space_type in ["Up_sp", "Down_sp"]:
295 |             mean = space[space_type].mean()
296 |             std = space[space_type].std()
297 |             space_summary_table[space_type + " Mean"] = [mean]
298 |             space_summary_table[space_type + " Std"] = [std]
299 |             space_summary_table[space_type + " IR"] = [mean / std]
300 |             for percent in [5, 25, 50, 75, 95]:
301 |                 space_summary_table[space_type + " Pct" + str(percent)] = [np.percentile(space[space_type],
302 |                                                                                          percent)]
303 |             space_summary_table[space_type + ' Occur'] = [len(space[space_type])]
304 |     return space_summary_table.T
305 | 
306 | 
307 | def space_stats(signal_data, is_event):
308 |     spaces = get_spaces(signal_data, is_event)
309 |     stats_result = []
310 |     for dir_type in spaces.keys():
311 |         stats = cal_spaces_stats(spaces[dir_type])
312 |         if len(stats) > 0:
313 |             stats.columns = [dir_type]
314 |             stats_result.append(stats)
315 |     if len(stats_result) > 0:
316 |         stats_result = pd.concat(stats_result, axis=1)
317 |     return stats_result
318 | 
319 | 
320 | def get_spaces(signal_data, is_event):
321 |     spaces = dict()
322 |     if not ("upside_ret" in signal_data.columns) or \
323 |             not ("downside_ret" in signal_data.columns):
324 |         return spaces
325 |     signal_data = signal_data.copy()
326 |     n_quantiles = signal_data['quantile'].max()
327 | 
328 |     spaces = weighted_signal_ret_space(signal_data)
329 |     if is_event:
330 |         spaces["long_space"]["upside_space"] = signal_data[signal_data['signal'] == 1]["upside_ret"].dropna()
331 |         spaces["long_space"]["downside_space"] = signal_data[signal_data['signal'] == 1]["downside_ret"].dropna()
332 |         spaces["short_space"]["upside_space"] = signal_data[signal_data['signal'] == -1]["downside_ret"].dropna() * -1
333 |         spaces["short_space"]["downside_space"] = signal_data[signal_data['signal'] == -1]["upside_ret"].dropna() * -1
334 | 
335 |     # quantile return space
336 |     if not is_event:
337 |         spaces["top_quantile_space"] = dict()
338 |         spaces["bottom_quantile_space"] = dict()
339 |         spaces["tmb_space"] = dict()
340 | 
341 |         spaces["top_quantile_space"]["upside_space"] = signal_data[signal_data['quantile'] == n_quantiles][
342 |             "upside_ret"].dropna()
343 |         spaces["top_quantile_space"]["downside_space"] = signal_data[signal_data['quantile'] == n_quantiles][
344 |             "downside_ret"].dropna()
345 |         spaces["bottom_quantile_space"]["upside_space"] = signal_data[signal_data['quantile'] == 1][
346 |             "upside_ret"].dropna()
347 |         spaces["bottom_quantile_space"]["downside_space"] = signal_data[signal_data['quantile'] == 1][
348 |             "downside_ret"].dropna()
349 | 
350 |         tb_upside_mean_space = calc_tb_quantile_ret_space_mean_std(signal_data,
351 |                                                                    space_type="upside")
352 |         tb_downside_mean_space = calc_tb_quantile_ret_space_mean_std(signal_data,
353 |                                                                      space_type="downside")
354 |         spaces['tmb_space']["upside_space"] = pfm.calc_return_diff_mean_std(tb_upside_mean_space[n_quantiles],
355 |                                                                             tb_downside_mean_space[1])[
356 |             'mean_diff'].dropna()
357 |         spaces['tmb_space']["downside_space"] = pfm.calc_return_diff_mean_std(tb_downside_mean_space[n_quantiles],
358 |                                                                               tb_upside_mean_space[1])[
359 |             'mean_diff'].dropna()
360 | 
361 |     spaces["all_sample_space"] = dict()
362 |     spaces["all_sample_space"]["upside_space"] = signal_data["upside_ret"].dropna()
363 |     spaces["all_sample_space"]["downside_space"] = signal_data["downside_ret"].dropna()
364 |     return spaces
365 | 
366 | 
367 | def analysis(signal_data, is_event, period):
368 |     if is_event:
369 |         return {
370 |             "ret": return_stats(signal_data, True, period),
371 |             "space": space_stats(signal_data, True)
372 |         }
373 |     else:
374 |         return {
375 |             "ic": ic_stats(signal_data),
376 |             "ret": return_stats(signal_data, False, period),
377 |             "space": space_stats(signal_data, False)
378 |         }
379 | 


--------------------------------------------------------------------------------
/docs/analysis.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # analysis
  3 | 
  4 | ## 介绍
  5 | 单因子多维度分析.从因子ic,因子收益,选股潜在收益空间三个维度给出因子评价.新增模块
  6 | 
  7 | ## ic_stats
  8 | - ` jaqs_fxdayu.research.signaldigger.analysis.ic_stats(signal_data) `
  9 | 
 10 | **简要描述：**
 11 | 
 12 | - 因子ic分析表
 13 | - 对事件因子(数值为0/1/-1的因子)无法使用该方法
 14 | 
 15 | **参数:**
 16 | 
 17 | |字段|必选|类型|说明|
 18 | |:----    |:---|:----- |-----   |
 19 | |signal_data |是|pandas.DataFrame |trade_date+symbol为MultiIndex,columns为signal(因子)、return(持有期相对/绝对收益,必须)、upside_ret(持有期潜在最大上涨收益,非必须)、downside_ret(持有期潜在最大下跌收益,非必须)、group(分组/行业分类,非必须)、quantile(按因子值分组,非必须)|
 20 | 
 21 | **返回:**
 22 | 因子ic分析表
 23 | * 列:
 24 |   * return_ic/upside_ret_ic/downside_ret_ic
 25 |   * 持有期收益的ic/持有期最大向上空间的ic/持有期最大向下空间的ic
 26 |   
 27 | * 行:
 28 |   *  "IC Mean", "IC Std.", "t-stat(IC)", "p-value(IC)", "IC Skew", "IC Kurtosis", "Ann. IR"
 29 |   * IC均值，IC标准差，IC的t统计量，对IC做0均值假设检验的p-value，IC偏度，IC峰度，iC的年化信息比率-mean/std
 30 | 
 31 | 
 32 | **示例：**
 33 | 
 34 | 
 35 | ```python
 36 | import warnings
 37 | warnings.filterwarnings('ignore')
 38 | ```
 39 | 
 40 | 
 41 | ```python
 42 | from jaqs_fxdayu.data import DataView
 43 | from jaqs_fxdayu.research import SignalDigger
 44 | 
 45 | # 加载dataview数据集
 46 | dv = DataView()
 47 | dataview_folder = './data'
 48 | dv.load_dataview(dataview_folder)
 49 | 
 50 | # 计算signal_data(通过jaqs.research.signaldigger.digger.SignalDigger.process_signal_before_analysis(*args, **kwargs))
 51 | sd = SignalDigger()
 52 | sd.process_signal_before_analysis(signal=dv.get_ts("pe"),
 53 |                                   price=dv.get_ts("close_adj"),
 54 |                                   high=dv.get_ts("high_adj"),
 55 |                                   low=dv.get_ts("low_adj"),
 56 |                                   group=dv.get_ts("sw1"),
 57 |                                   n_quantiles=5,
 58 |                                   period=5,
 59 |                                   benchmark_price=dv.data_benchmark,
 60 |                                   )
 61 | signal_data = sd.signal_data
 62 | signal_data.head()
 63 | ```
 64 | 
 65 |     Dataview loaded successfully.
 66 |     Nan Data Count (should be zero) : 0;  Percentage of effective data: 99%
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | <div>
 73 | <style>
 74 |     .dataframe thead tr:only-child th {
 75 |         text-align: right;
 76 |     }
 77 | 
 78 |     .dataframe thead th {
 79 |         text-align: left;
 80 |     }
 81 | 
 82 |     .dataframe tbody tr th {
 83 |         vertical-align: top;
 84 |     }
 85 | </style>
 86 | <table border="1" class="dataframe">
 87 |   <thead>
 88 |     <tr style="text-align: right;">
 89 |       <th></th>
 90 |       <th></th>
 91 |       <th>signal</th>
 92 |       <th>return</th>
 93 |       <th>upside_ret</th>
 94 |       <th>downside_ret</th>
 95 |       <th>group</th>
 96 |       <th>quantile</th>
 97 |     </tr>
 98 |     <tr>
 99 |       <th>trade_date</th>
100 |       <th>symbol</th>
101 |       <th></th>
102 |       <th></th>
103 |       <th></th>
104 |       <th></th>
105 |       <th></th>
106 |       <th></th>
107 |     </tr>
108 |   </thead>
109 |   <tbody>
110 |     <tr>
111 |       <th rowspan="5" valign="top">20170503</th>
112 |       <th>000001.SZ</th>
113 |       <td>6.7925</td>
114 |       <td>-0.005637</td>
115 |       <td>-0.003045</td>
116 |       <td>-0.042326</td>
117 |       <td>480000</td>
118 |       <td>1</td>
119 |     </tr>
120 |     <tr>
121 |       <th>000002.SZ</th>
122 |       <td>10.0821</td>
123 |       <td>0.011225</td>
124 |       <td>0.016697</td>
125 |       <td>-0.029432</td>
126 |       <td>430000</td>
127 |       <td>1</td>
128 |     </tr>
129 |     <tr>
130 |       <th>000008.SZ</th>
131 |       <td>42.9544</td>
132 |       <td>-0.049408</td>
133 |       <td>0.000463</td>
134 |       <td>-0.092972</td>
135 |       <td>640000</td>
136 |       <td>4</td>
137 |     </tr>
138 |     <tr>
139 |       <th>000009.SZ</th>
140 |       <td>79.4778</td>
141 |       <td>-0.069822</td>
142 |       <td>0.009714</td>
143 |       <td>-0.095426</td>
144 |       <td>510000</td>
145 |       <td>5</td>
146 |     </tr>
147 |     <tr>
148 |       <th>000027.SZ</th>
149 |       <td>20.4542</td>
150 |       <td>-0.019517</td>
151 |       <td>0.009404</td>
152 |       <td>-0.041616</td>
153 |       <td>410000</td>
154 |       <td>2</td>
155 |     </tr>
156 |   </tbody>
157 | </table>
158 | </div>
159 | 
160 | 
161 | 
162 | 
163 | ```python
164 | from jaqs_fxdayu.research.signaldigger.analysis import ic_stats
165 | 
166 | ic_stats(signal_data)
167 | ```
168 | 
169 | 
170 | 
171 | 
172 | <div>
173 | <style>
174 |     .dataframe thead tr:only-child th {
175 |         text-align: right;
176 |     }
177 | 
178 |     .dataframe thead th {
179 |         text-align: left;
180 |     }
181 | 
182 |     .dataframe tbody tr th {
183 |         vertical-align: top;
184 |     }
185 | </style>
186 | <table border="1" class="dataframe">
187 |   <thead>
188 |     <tr style="text-align: right;">
189 |       <th></th>
190 |       <th>return_ic</th>
191 |       <th>upside_ret_ic</th>
192 |       <th>downside_ret_ic</th>
193 |     </tr>
194 |   </thead>
195 |   <tbody>
196 |     <tr>
197 |       <th>IC Mean</th>
198 |       <td>-0.022805</td>
199 |       <td>0.031198</td>
200 |       <td>-2.035376e-01</td>
201 |     </tr>
202 |     <tr>
203 |       <th>IC Std.</th>
204 |       <td>0.207325</td>
205 |       <td>0.159313</td>
206 |       <td>1.692702e-01</td>
207 |     </tr>
208 |     <tr>
209 |       <th>t-stat(IC)</th>
210 |       <td>-1.105467</td>
211 |       <td>1.968055</td>
212 |       <td>-1.208439e+01</td>
213 |     </tr>
214 |     <tr>
215 |       <th>p-value(IC)</th>
216 |       <td>0.271610</td>
217 |       <td>0.051831</td>
218 |       <td>2.894849e-21</td>
219 |     </tr>
220 |     <tr>
221 |       <th>IC Skew</th>
222 |       <td>0.009493</td>
223 |       <td>-0.065715</td>
224 |       <td>4.407910e-01</td>
225 |     </tr>
226 |     <tr>
227 |       <th>IC Kurtosis</th>
228 |       <td>-0.978744</td>
229 |       <td>-0.639758</td>
230 |       <td>-5.878823e-01</td>
231 |     </tr>
232 |     <tr>
233 |       <th>Ann. IR</th>
234 |       <td>-0.109998</td>
235 |       <td>0.195829</td>
236 |       <td>-1.202442e+00</td>
237 |     </tr>
238 |   </tbody>
239 | </table>
240 | </div>
241 | 
242 | 
243 | 
244 | ### return_stats
245 | - ` jaqs_fxdayu.research.signaldigger.analysis.return_stats(signal_data,is_event,period) `
246 | 
247 | **简要描述：**
248 | 
249 | - 因子收益分析表--根据因子构建几种投资组合，通过组合表现分析因子的收益能力
250 | 
251 | **参数:**
252 | 
253 | |字段|必选|类型|说明|
254 | |:----    |:---|:----- |-----   |
255 | |signal_data |是|pandas.DataFrame |trade_date+symbol为MultiIndex,columns为signal(因子)、return(持有期相对/绝对收益,必须)、upside_ret(持有期潜在最大上涨收益,非必须)、downside_ret(持有期潜在最大下跌收益,非必须)、group(分组/行业分类,非必须)、quantile(按因子值分组,非必须)|
256 | |is_event |是|bool |是否是事件因子(数值为0/1/-1的因子)|
257 | |period |是|int |换仓周期(天数),**注意:**必须与signal_data中收益的计算周期一致|
258 | 
259 | **返回:**
260 | 
261 | 收益分析表
262 | * 列:
263 |   * long_ret/short_ret/long_short_ret/top_quantile_ret/bottom_quantile_ret/tmb_ret/all_sample_ret
264 |   * 多头组合收益/空头组合收益/多空组合收益/因子值最大组合收益/因子值最小组合收益/因子值最大组（构建多头）+因子值最小组（构建空头）收益/全样本（无论信号大小和方向）-基准组合收益
265 |   
266 | * 行:
267 |   * 't-stat', "p-value", "skewness", "kurtosis", "Ann. Ret", "Ann. Vol", "Ann. IR", "occurance"
268 |   * 持有期收益的t统计量，对持有期收益做0均值假设检验的p-value，偏度，峰度，持有期收益年化值，年化波动率，年化信息比率-年化收益/年化波动率，样本数量
269 | 
270 | 
271 | **示例：**
272 | 
273 | 
274 | ```python
275 | from jaqs_fxdayu.research.signaldigger.analysis import return_stats
276 | 
277 | return_stats(signal_data,is_event=False,period=5)
278 | ```
279 | 
280 | 
281 | 
282 | 
283 | <div>
284 | <style>
285 |     .dataframe thead tr:only-child th {
286 |         text-align: right;
287 |     }
288 | 
289 |     .dataframe thead th {
290 |         text-align: left;
291 |     }
292 | 
293 |     .dataframe tbody tr th {
294 |         vertical-align: top;
295 |     }
296 | </style>
297 | <table border="1" class="dataframe">
298 |   <thead>
299 |     <tr style="text-align: right;">
300 |       <th></th>
301 |       <th>long_ret</th>
302 |       <th>long_short_ret</th>
303 |       <th>top_quantile_ret</th>
304 |       <th>bottom_quantile_ret</th>
305 |       <th>tmb_ret</th>
306 |       <th>all_sample_ret</th>
307 |     </tr>
308 |   </thead>
309 |   <tbody>
310 |     <tr>
311 |       <th>t-stat</th>
312 |       <td>-1.203846</td>
313 |       <td>0.411628</td>
314 |       <td>-4.728619</td>
315 |       <td>-2.714885</td>
316 |       <td>-0.755901</td>
317 |       <td>-12.043624</td>
318 |     </tr>
319 |     <tr>
320 |       <th>p-value</th>
321 |       <td>0.231360</td>
322 |       <td>0.681450</td>
323 |       <td>0.000000</td>
324 |       <td>0.006650</td>
325 |       <td>0.451400</td>
326 |       <td>0.000000</td>
327 |     </tr>
328 |     <tr>
329 |       <th>skewness</th>
330 |       <td>-0.083057</td>
331 |       <td>0.373680</td>
332 |       <td>0.495042</td>
333 |       <td>1.348467</td>
334 |       <td>-0.261998</td>
335 |       <td>0.546392</td>
336 |     </tr>
337 |     <tr>
338 |       <th>kurtosis</th>
339 |       <td>-0.555038</td>
340 |       <td>0.042535</td>
341 |       <td>6.187667</td>
342 |       <td>9.207208</td>
343 |       <td>-0.272022</td>
344 |       <td>6.241350</td>
345 |     </tr>
346 |     <tr>
347 |       <th>Ann. Ret</th>
348 |       <td>-0.101735</td>
349 |       <td>0.021452</td>
350 |       <td>-0.129940</td>
351 |       <td>-0.051046</td>
352 |       <td>-0.078894</td>
353 |       <td>-0.120509</td>
354 |     </tr>
355 |     <tr>
356 |       <th>Ann. Vol</th>
357 |       <td>0.124471</td>
358 |       <td>0.076759</td>
359 |       <td>0.330355</td>
360 |       <td>0.226040</td>
361 |       <td>0.153727</td>
362 |       <td>0.268994</td>
363 |     </tr>
364 |     <tr>
365 |       <th>Ann. IR</th>
366 |       <td>-0.817333</td>
367 |       <td>0.279469</td>
368 |       <td>-0.393336</td>
369 |       <td>-0.225829</td>
370 |       <td>-0.513207</td>
371 |       <td>-0.447998</td>
372 |     </tr>
373 |     <tr>
374 |       <th>occurance</th>
375 |       <td>106.000000</td>
376 |       <td>106.000000</td>
377 |       <td>6996.000000</td>
378 |       <td>6996.000000</td>
379 |       <td>106.000000</td>
380 |       <td>34980.000000</td>
381 |     </tr>
382 |   </tbody>
383 | </table>
384 | </div>
385 | 
386 | 
387 | 
388 | ## space_stats
389 | - ` jaqs_fxdayu.research.signaldigger.analysis.space_stats(signal_data,is_event) `
390 | 
391 | **简要描述：**
392 | 
393 | - 因子潜在收益空间分析表--根据因子构建几种投资组合，通过组合在换仓周期内可能达到潜在最大上涨空间、潜在最大下跌空间来分析该因子选股收益的提升潜力，用于进一步辅助设计择时方案
394 | 
395 | **参数:**
396 | 
397 | |字段|必选|类型|说明|
398 | |:----    |:---|:----- |-----   |
399 | |signal_data |是|pandas.DataFrame |trade_date+symbol为MultiIndex,columns为signal(因子)、return(持有期相对/绝对收益,必须)、upside_ret(持有期潜在最大上涨收益,非必须)、downside_ret(持有期潜在最大下跌收益,非必须)、group(分组/行业分类,非必须)、quantile(按因子值分组,非必须)|
400 | |is_event |是|bool |是否是事件因子(数值为0/1/-1的因子)|
401 | 
402 | **返回:**
403 | 
404 | 因子潜在收益空间分析表
405 | * 列:
406 |   * long_space/short_space/long_short_space/top_quantile_space/bottom_quantile_space/tmb_space/all_sample_space
407 |   * 多头组合空间/空头组合空间/多空组合空间/因子值最大组合空间/因子值最小组合空间/因子值最大组（构建多头）+因子值最小组（构建空头）空间/全样本（无论信号大小和方向）-基准组合空间
408 |   
409 | * 行:
410 |   * 'Up_sp Mean','Up_sp Std','Up_sp IR','Up_sp Pct5', 'Up_sp Pct25 ','Up_sp Pct50 ', 'Up_sp Pct75','Up_sp Pct95','Up_sp Occur','Down_sp Mean','Down_sp Std', 'Down_sp IR', 'Down_sp Pct5','Down_sp Pct25 ','Down_sp Pct50 ','Down_sp Pct75', 'Down_sp Pct95','Down_sp Occur'
411 |   * 组合持有个股的上行空间均值，上行空间标准差，上行空间信息比率-均值/标准差，上行空间5%分位数,..25%分位数，..中位数，..75%分位数,..95%分位数，上行空间样本数，下行空间...(同上行空间)
412 | 
413 | 
414 | **示例：**
415 | 
416 | 
417 | ```python
418 | from jaqs_fxdayu.research.signaldigger.analysis import space_stats
419 | 
420 | space_stats(signal_data,is_event=False)
421 | ```
422 | 
423 | 
424 | 
425 | 
426 | <div>
427 | <style>
428 |     .dataframe thead tr:only-child th {
429 |         text-align: right;
430 |     }
431 | 
432 |     .dataframe thead th {
433 |         text-align: left;
434 |     }
435 | 
436 |     .dataframe tbody tr th {
437 |         vertical-align: top;
438 |     }
439 | </style>
440 | <table border="1" class="dataframe">
441 |   <thead>
442 |     <tr style="text-align: right;">
443 |       <th></th>
444 |       <th>long_space</th>
445 |       <th>top_quantile_space</th>
446 |       <th>bottom_quantile_space</th>
447 |       <th>tmb_space</th>
448 |       <th>all_sample_space</th>
449 |     </tr>
450 |   </thead>
451 |   <tbody>
452 |     <tr>
453 |       <th>Up_sp Mean</th>
454 |       <td>-0.091582</td>
455 |       <td>-0.089756</td>
456 |       <td>-0.016239</td>
457 |       <td>-0.013714</td>
458 |       <td>-0.026786</td>
459 |     </tr>
460 |     <tr>
461 |       <th>Up_sp Std</th>
462 |       <td>0.033321</td>
463 |       <td>0.343245</td>
464 |       <td>0.212997</td>
465 |       <td>0.017699</td>
466 |       <td>0.240319</td>
467 |     </tr>
468 |     <tr>
469 |       <th>Up_sp IR</th>
470 |       <td>-2.748454</td>
471 |       <td>-0.261492</td>
472 |       <td>-0.076242</td>
473 |       <td>-0.774819</td>
474 |       <td>-0.111460</td>
475 |     </tr>
476 |     <tr>
477 |       <th>Up_sp Pct5</th>
478 |       <td>-0.127152</td>
479 |       <td>-1.000800</td>
480 |       <td>-0.005893</td>
481 |       <td>-0.040333</td>
482 |       <td>-1.000800</td>
483 |     </tr>
484 |     <tr>
485 |       <th>Up_sp Pct25</th>
486 |       <td>-0.117286</td>
487 |       <td>0.002457</td>
488 |       <td>0.004533</td>
489 |       <td>-0.028591</td>
490 |       <td>0.005062</td>
491 |     </tr>
492 |     <tr>
493 |       <th>Up_sp Pct50</th>
494 |       <td>-0.101419</td>
495 |       <td>0.020756</td>
496 |       <td>0.017939</td>
497 |       <td>-0.013746</td>
498 |       <td>0.019105</td>
499 |     </tr>
500 |     <tr>
501 |       <th>Up_sp Pct75</th>
502 |       <td>-0.076478</td>
503 |       <td>0.047980</td>
504 |       <td>0.039831</td>
505 |       <td>-0.000051</td>
506 |       <td>0.041935</td>
507 |     </tr>
508 |     <tr>
509 |       <th>Up_sp Pct95</th>
510 |       <td>-0.031515</td>
511 |       <td>0.111557</td>
512 |       <td>0.090402</td>
513 |       <td>0.013496</td>
514 |       <td>0.098799</td>
515 |     </tr>
516 |     <tr>
517 |       <th>Up_sp Occur</th>
518 |       <td>106.000000</td>
519 |       <td>6996.000000</td>
520 |       <td>6996.000000</td>
521 |       <td>106.000000</td>
522 |       <td>34980.000000</td>
523 |     </tr>
524 |     <tr>
525 |       <th>Down_sp Mean</th>
526 |       <td>-0.167327</td>
527 |       <td>-0.171114</td>
528 |       <td>-0.076042</td>
529 |       <td>-0.154875</td>
530 |       <td>-0.092512</td>
531 |     </tr>
532 |     <tr>
533 |       <th>Down_sp Std</th>
534 |       <td>0.046346</td>
535 |       <td>0.340002</td>
536 |       <td>0.224699</td>
537 |       <td>0.045501</td>
538 |       <td>0.245442</td>
539 |     </tr>
540 |     <tr>
541 |       <th>Down_sp IR</th>
542 |       <td>-3.610429</td>
543 |       <td>-0.503275</td>
544 |       <td>-0.338419</td>
545 |       <td>-3.403795</td>
546 |       <td>-0.376919</td>
547 |     </tr>
548 |     <tr>
549 |       <th>Down_sp Pct5</th>
550 |       <td>-0.220840</td>
551 |       <td>-1.000800</td>
552 |       <td>-1.000800</td>
553 |       <td>-0.208216</td>
554 |       <td>-1.000800</td>
555 |     </tr>
556 |     <tr>
557 |       <th>Down_sp Pct25</th>
558 |       <td>-0.190647</td>
559 |       <td>-0.067406</td>
560 |       <td>-0.034329</td>
561 |       <td>-0.183180</td>
562 |       <td>-0.042842</td>
563 |     </tr>
564 |     <tr>
565 |       <th>Down_sp Pct50</th>
566 |       <td>-0.176590</td>
567 |       <td>-0.029282</td>
568 |       <td>-0.017467</td>
569 |       <td>-0.162556</td>
570 |       <td>-0.021792</td>
571 |     </tr>
572 |     <tr>
573 |       <th>Down_sp Pct75</th>
574 |       <td>-0.152016</td>
575 |       <td>-0.012810</td>
576 |       <td>-0.007824</td>
577 |       <td>-0.139399</td>
578 |       <td>-0.009769</td>
579 |     </tr>
580 |     <tr>
581 |       <th>Down_sp Pct95</th>
582 |       <td>-0.111972</td>
583 |       <td>0.000000</td>
584 |       <td>0.000000</td>
585 |       <td>-0.086766</td>
586 |       <td>0.000000</td>
587 |     </tr>
588 |     <tr>
589 |       <th>Down_sp Occur</th>
590 |       <td>106.000000</td>
591 |       <td>6996.000000</td>
592 |       <td>6996.000000</td>
593 |       <td>106.000000</td>
594 |       <td>34980.000000</td>
595 |     </tr>
596 |   </tbody>
597 | </table>
598 | </div>
599 | 
600 | 
601 | 
602 | ## analysis
603 | - ` jaqs_fxdayu.research.signaldigger.analysis.analysis(signal_data,is_event,period) `
604 | 
605 | **简要描述：**
606 | 
607 | - 同时获得因子ic分析表、收益分析表、潜在收益空间分析表——单独计算三张表的方法见上述api
608 | 
609 | **参数:**
610 | 
611 | |字段|必选|类型|说明|
612 | |:----    |:---|:----- |-----   |
613 | |signal_data |是|pandas.DataFrame |trade_date+symbol为MultiIndex,columns为signal(因子)、return(持有期相对/绝对收益,必须)、upside_ret(持有期潜在最大上涨收益,非必须)、downside_ret(持有期潜在最大下跌收益,非必须)、group(分组/行业分类,非必须)、quantile(按因子值分组,非必须)|
614 | |is_event |是|bool |是否是事件因子(数值为0/1/-1的因子)|
615 | |period |是|int |换仓周期(天数),**注意:**必须与signal_data中收益的计算周期一致|
616 | 
617 | **返回:**
618 | 
619 | 由因子ic分析表、收益分析表、潜在收益空间分析表组成的字典(dict)
620 | 
621 | **示例：**
622 | 
623 | 
624 | ```python
625 | from jaqs_fxdayu.research.signaldigger.analysis import analysis
626 | 
627 | result = analysis(signal_data,is_event=False,period=5)
628 | print(result.keys())
629 | result["ic"]
630 | ```
631 | 
632 |     dict_keys(['ic', 'ret', 'space'])
633 | 
634 | 
635 | 
636 | 
637 | 
638 | <div>
639 | <style>
640 |     .dataframe thead tr:only-child th {
641 |         text-align: right;
642 |     }
643 | 
644 |     .dataframe thead th {
645 |         text-align: left;
646 |     }
647 | 
648 |     .dataframe tbody tr th {
649 |         vertical-align: top;
650 |     }
651 | </style>
652 | <table border="1" class="dataframe">
653 |   <thead>
654 |     <tr style="text-align: right;">
655 |       <th></th>
656 |       <th>return_ic</th>
657 |       <th>upside_ret_ic</th>
658 |       <th>downside_ret_ic</th>
659 |     </tr>
660 |   </thead>
661 |   <tbody>
662 |     <tr>
663 |       <th>IC Mean</th>
664 |       <td>-0.022805</td>
665 |       <td>0.031198</td>
666 |       <td>-2.035376e-01</td>
667 |     </tr>
668 |     <tr>
669 |       <th>IC Std.</th>
670 |       <td>0.207325</td>
671 |       <td>0.159313</td>
672 |       <td>1.692702e-01</td>
673 |     </tr>
674 |     <tr>
675 |       <th>t-stat(IC)</th>
676 |       <td>-1.105467</td>
677 |       <td>1.968055</td>
678 |       <td>-1.208439e+01</td>
679 |     </tr>
680 |     <tr>
681 |       <th>p-value(IC)</th>
682 |       <td>0.271610</td>
683 |       <td>0.051831</td>
684 |       <td>2.894849e-21</td>
685 |     </tr>
686 |     <tr>
687 |       <th>IC Skew</th>
688 |       <td>0.009493</td>
689 |       <td>-0.065715</td>
690 |       <td>4.407910e-01</td>
691 |     </tr>
692 |     <tr>
693 |       <th>IC Kurtosis</th>
694 |       <td>-0.978744</td>
695 |       <td>-0.639758</td>
696 |       <td>-5.878823e-01</td>
697 |     </tr>
698 |     <tr>
699 |       <th>Ann. IR</th>
700 |       <td>-0.109998</td>
701 |       <td>0.195829</td>
702 |       <td>-1.202442e+00</td>
703 |     </tr>
704 |   </tbody>
705 | </table>
706 | </div>
707 | 
708 | 
709 | 


--------------------------------------------------------------------------------